1/*	$OpenBSD: pf_lb.c,v 1.74 2023/05/10 22:42:51 sashan Exp $ */
2
3/*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#include "bpfilter.h"
39#include "pflog.h"
40#include "pfsync.h"
41#include "pflow.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/mbuf.h>
46#include <sys/filio.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/kernel.h>
50#include <sys/time.h>
51#include <sys/pool.h>
52#include <sys/rwlock.h>
53#include <sys/syslog.h>
54#include <sys/stdint.h>
55
56#include <crypto/siphash.h>
57
58#include <net/if.h>
59#include <net/bpf.h>
60#include <net/route.h>
61
62#include <netinet/in.h>
63#include <netinet/ip.h>
64#include <netinet/in_pcb.h>
65#include <netinet/ip_var.h>
66#include <netinet/ip_icmp.h>
67#include <netinet/icmp_var.h>
68#include <netinet/tcp.h>
69#include <netinet/tcp_seq.h>
70#include <netinet/tcp_timer.h>
71#include <netinet/udp.h>
72#include <netinet/udp_var.h>
73#include <netinet/if_ether.h>
74
75#ifdef INET6
76#include <netinet/ip6.h>
77#include <netinet/icmp6.h>
78#endif /* INET6 */
79
80#include <net/pfvar.h>
81#include <net/pfvar_priv.h>
82
83#if NPFLOG > 0
84#include <net/if_pflog.h>
85#endif	/* NPFLOG > 0 */
86
87#if NPFLOW > 0
88#include <net/if_pflow.h>
89#endif	/* NPFLOW > 0 */
90
91#if NPFSYNC > 0
92#include <net/if_pfsync.h>
93#endif /* NPFSYNC > 0 */
94
95u_int64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
96			    struct pf_poolhashkey *, sa_family_t);
97int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
98			    struct pf_addr *, u_int16_t *, u_int16_t,
99			    u_int16_t, struct pf_src_node **);
100int			 pf_map_addr_states_increase(sa_family_t,
101				struct pf_pool *, struct pf_addr *);
102int			 pf_get_transaddr_af(struct pf_rule *,
103			    struct pf_pdesc *, struct pf_src_node **);
104int			 pf_map_addr_sticky(sa_family_t, struct pf_rule *,
105			    struct pf_addr *, struct pf_addr *,
106			    struct pf_src_node **, struct pf_pool *,
107			    enum pf_sn_types);
108
109u_int64_t
110pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
111    struct pf_poolhashkey *key, sa_family_t af)
112{
113	uint64_t res = 0;
114#ifdef INET6
115	union {
116		uint64_t hash64;
117		uint32_t hash32[2];
118	} h;
119#endif	/* INET6 */
120
121	switch (af) {
122	case AF_INET:
123		res = SipHash24((SIPHASH_KEY *)key,
124		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
125		hash->addr32[0] = res;
126		break;
127#ifdef INET6
128	case AF_INET6:
129		res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0],
130		    4 * sizeof(inaddr->addr32[0]));
131		h.hash64 = res;
132		hash->addr32[0] = h.hash32[0];
133		hash->addr32[1] = h.hash32[1];
134		/*
135		 * siphash isn't big enough, but flipping it around is
136		 * good enough here.
137		 */
138		hash->addr32[2] = ~h.hash32[1];
139		hash->addr32[3] = ~h.hash32[0];
140		break;
141#endif /* INET6 */
142	default:
143		unhandled_af(af);
144	}
145	return (res);
146}
147
148int
149pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
150    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
151    struct pf_src_node **sn)
152{
153	struct pf_state_key_cmp	key;
154	struct pf_addr		init_addr;
155	u_int16_t		cut;
156	int			dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
157	int			sidx = pd->sidx;
158	int			didx = pd->didx;
159
160	memset(&init_addr, 0, sizeof(init_addr));
161	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
162	    PF_SN_NAT))
163		return (1);
164
165	if (pd->proto == IPPROTO_ICMP) {
166		if (pd->ndport == htons(ICMP_ECHO)) {
167			low = 1;
168			high = 65535;
169		} else
170			return (0);	/* Don't try to modify non-echo ICMP */
171	}
172#ifdef INET6
173	if (pd->proto == IPPROTO_ICMPV6) {
174		if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
175			low = 1;
176			high = 65535;
177		} else
178			return (0);	/* Don't try to modify non-echo ICMP */
179	}
180#endif /* INET6 */
181
182	do {
183		key.af = pd->naf;
184		key.proto = pd->proto;
185		key.rdomain = pd->rdomain;
186		pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af);
187		pf_addrcpy(&key.addr[sidx], naddr, key.af);
188		key.port[didx] = pd->ndport;
189
190		/*
191		 * port search; start random, step;
192		 * similar 2 portloop in in_pcbbind
193		 */
194		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
195		    pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
196			/* XXX bug: icmp states dont use the id on both
197			 * XXX sides (traceroute -I through nat) */
198			key.port[sidx] = pd->nsport;
199			key.hash = pf_pkt_hash(key.af, key.proto, &key.addr[0],
200			    &key.addr[1], key.port[0], key.port[1]);
201			if (pf_find_state_all(&key, dir, NULL) == NULL) {
202				*nport = pd->nsport;
203				return (0);
204			}
205		} else if (low == 0 && high == 0) {
206			key.port[sidx] = pd->nsport;
207			key.hash = pf_pkt_hash(key.af, key.proto, &key.addr[0],
208			    &key.addr[1], key.port[0], key.port[1]);
209			if (pf_find_state_all(&key, dir, NULL) == NULL) {
210				*nport = pd->nsport;
211				return (0);
212			}
213		} else if (low == high) {
214			key.port[sidx] = htons(low);
215			key.hash = pf_pkt_hash(key.af, key.proto, &key.addr[0],
216			    &key.addr[1], key.port[0], key.port[1]);
217			if (pf_find_state_all(&key, dir, NULL) == NULL) {
218				*nport = htons(low);
219				return (0);
220			}
221		} else {
222			u_int32_t tmp;
223
224			if (low > high) {
225				tmp = low;
226				low = high;
227				high = tmp;
228			}
229			/* low < high */
230			cut = arc4random_uniform(1 + high - low) + low;
231			/* low <= cut <= high */
232			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
233				key.port[sidx] = htons(tmp);
234				key.hash = pf_pkt_hash(key.af, key.proto,
235				    &key.addr[0], &key.addr[1], key.port[0],
236				    key.port[1]);
237				if (pf_find_state_all(&key, dir, NULL) ==
238				    NULL && !in_baddynamic(tmp, pd->proto)) {
239					*nport = htons(tmp);
240					return (0);
241				}
242			}
243			tmp = cut;
244			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
245				key.port[sidx] = htons(tmp);
246				key.hash = pf_pkt_hash(key.af, key.proto,
247				    &key.addr[0], &key.addr[1], key.port[0],
248				    key.port[1]);
249				if (pf_find_state_all(&key, dir, NULL) ==
250				    NULL && !in_baddynamic(tmp, pd->proto)) {
251					*nport = htons(tmp);
252					return (0);
253				}
254			}
255		}
256
257		switch (r->nat.opts & PF_POOL_TYPEMASK) {
258		case PF_POOL_RANDOM:
259		case PF_POOL_ROUNDROBIN:
260		case PF_POOL_LEASTSTATES:
261			/*
262			 * pick a different source address since we're out
263			 * of free port choices for the current one.
264			 */
265			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
266			    &init_addr, sn, &r->nat, PF_SN_NAT))
267				return (1);
268			break;
269		case PF_POOL_NONE:
270		case PF_POOL_SRCHASH:
271		case PF_POOL_BITMASK:
272		default:
273			return (1);
274		}
275	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
276	return (1);					/* none available */
277}
278
279int
280pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
281    struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
282    enum pf_sn_types type)
283{
284	struct pf_addr		*raddr, *rmask, *cached;
285	struct pf_state		*s;
286	struct pf_src_node	 k;
287	int			 valid;
288
289	k.af = af;
290	k.type = type;
291	pf_addrcpy(&k.addr, saddr, af);
292	k.rule.ptr = r;
293	pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
294	sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
295	if (sns[type] == NULL)
296		return (-1);
297
298	/* check if the cached entry is still valid */
299	cached = &(sns[type])->raddr;
300	valid = 0;
301	if (PF_AZERO(cached, af)) {
302		valid = 1;
303	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
304		if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
305		    af, 0))
306			valid = 1;
307	} else if (rpool->addr.type == PF_ADDR_TABLE) {
308		if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
309			valid = 1;
310	} else if (rpool->addr.type != PF_ADDR_NOROUTE) {
311		raddr = &rpool->addr.v.a.addr;
312		rmask = &rpool->addr.v.a.mask;
313		valid = pf_match_addr(0, raddr, rmask, cached, af);
314	}
315	if (!valid) {
316		if (pf_status.debug >= LOG_DEBUG) {
317			log(LOG_DEBUG, "pf: pf_map_addr: "
318			    "stale src tracking (%u) ", type);
319			pf_print_host(&k.addr, 0, af);
320			addlog(" to ");
321			pf_print_host(cached, 0, af);
322			addlog("\n");
323		}
324		if (sns[type]->states != 0) {
325			/* XXX expensive */
326			RBT_FOREACH(s, pf_state_tree_id, &tree_id)
327				pf_state_rm_src_node(s, sns[type]);
328		}
329		sns[type]->expire = 1;
330		pf_remove_src_node(sns[type]);
331		sns[type] = NULL;
332		return (-1);
333	}
334
335
336	if (!PF_AZERO(cached, af)) {
337		pf_addrcpy(naddr, cached, af);
338		if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES &&
339		    pf_map_addr_states_increase(af, rpool, cached) == -1)
340			return (-1);
341	}
342	if (pf_status.debug >= LOG_DEBUG) {
343		log(LOG_DEBUG, "pf: pf_map_addr: "
344		    "src tracking (%u) maps ", type);
345		pf_print_host(&k.addr, 0, af);
346		addlog(" to ");
347		pf_print_host(naddr, 0, af);
348		addlog("\n");
349	}
350
351	if (sns[type]->kif != NULL)
352		rpool->kif = sns[type]->kif;
353
354	return (0);
355}
356
357uint32_t
358pf_rand_addr(uint32_t mask)
359{
360	uint32_t addr;
361
362	mask = ~ntohl(mask);
363	addr = arc4random_uniform(mask + 1);
364
365	return (htonl(addr));
366}
367
368int
369pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
370    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
371    struct pf_pool *rpool, enum pf_sn_types type)
372{
373	struct pf_addr		 hash;
374	struct pf_addr		 faddr;
375	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
376	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
377	struct pfr_ktable	*kt;
378	struct pfi_kif		*kif;
379	u_int64_t		 states;
380	u_int16_t		 weight;
381	u_int64_t		 load;
382	u_int64_t		 cload;
383	u_int64_t		 hashidx;
384	int			 cnt;
385
386	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
387	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
388	    pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
389		return (0);
390
391	if (rpool->addr.type == PF_ADDR_NOROUTE)
392		return (1);
393	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
394		switch (af) {
395		case AF_INET:
396			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
397			    !PF_POOL_DYNTYPE(rpool->opts))
398				return (1);
399			raddr = &rpool->addr.p.dyn->pfid_addr4;
400			rmask = &rpool->addr.p.dyn->pfid_mask4;
401			break;
402#ifdef INET6
403		case AF_INET6:
404			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
405			    !PF_POOL_DYNTYPE(rpool->opts))
406				return (1);
407			raddr = &rpool->addr.p.dyn->pfid_addr6;
408			rmask = &rpool->addr.p.dyn->pfid_mask6;
409			break;
410#endif /* INET6 */
411		default:
412			unhandled_af(af);
413		}
414	} else if (rpool->addr.type == PF_ADDR_TABLE) {
415		if (!PF_POOL_DYNTYPE(rpool->opts))
416			return (1); /* unsupported */
417	} else {
418		raddr = &rpool->addr.v.a.addr;
419		rmask = &rpool->addr.v.a.mask;
420	}
421
422	switch (rpool->opts & PF_POOL_TYPEMASK) {
423	case PF_POOL_NONE:
424		pf_addrcpy(naddr, raddr, af);
425		break;
426	case PF_POOL_BITMASK:
427		pf_poolmask(naddr, raddr, rmask, saddr, af);
428		break;
429	case PF_POOL_RANDOM:
430		if (rpool->addr.type == PF_ADDR_TABLE ||
431		    rpool->addr.type == PF_ADDR_DYNIFTL) {
432			if (rpool->addr.type == PF_ADDR_TABLE)
433				kt = rpool->addr.p.tbl;
434			else
435				kt = rpool->addr.p.dyn->pfid_kt;
436			kt = pfr_ktable_select_active(kt);
437			if (kt == NULL)
438				return (1);
439
440			cnt = kt->pfrkt_cnt;
441			if (cnt == 0)
442				rpool->tblidx = 0;
443			else
444				rpool->tblidx = (int)arc4random_uniform(cnt);
445			memset(&rpool->counter, 0, sizeof(rpool->counter));
446			if (pfr_pool_get(rpool, &raddr, &rmask, af))
447				return (1);
448			pf_addrcpy(naddr, &rpool->counter, af);
449		} else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
450			switch (af) {
451			case AF_INET:
452				rpool->counter.addr32[0] = pf_rand_addr(
453				    rmask->addr32[0]);
454				break;
455#ifdef INET6
456			case AF_INET6:
457				if (rmask->addr32[3] != 0xffffffff)
458					rpool->counter.addr32[3] = pf_rand_addr(
459					    rmask->addr32[3]);
460				else
461					break;
462				if (rmask->addr32[2] != 0xffffffff)
463					rpool->counter.addr32[2] = pf_rand_addr(
464					    rmask->addr32[2]);
465				else
466					break;
467				if (rmask->addr32[1] != 0xffffffff)
468					rpool->counter.addr32[1] = pf_rand_addr(
469					    rmask->addr32[1]);
470				else
471					break;
472				if (rmask->addr32[0] != 0xffffffff)
473					rpool->counter.addr32[0] = pf_rand_addr(
474					    rmask->addr32[0]);
475				break;
476#endif /* INET6 */
477			default:
478				unhandled_af(af);
479			}
480			pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
481			pf_addrcpy(init_addr, naddr, af);
482
483		} else {
484			pf_addr_inc(&rpool->counter, af);
485			pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
486		}
487		break;
488	case PF_POOL_SRCHASH:
489		hashidx = pf_hash(saddr, &hash, &rpool->key, af);
490
491		if (rpool->addr.type == PF_ADDR_TABLE ||
492		    rpool->addr.type == PF_ADDR_DYNIFTL) {
493			if (rpool->addr.type == PF_ADDR_TABLE)
494				kt = rpool->addr.p.tbl;
495			else
496				kt = rpool->addr.p.dyn->pfid_kt;
497			kt = pfr_ktable_select_active(kt);
498			if (kt == NULL)
499				return (1);
500
501			cnt = kt->pfrkt_cnt;
502			if (cnt == 0)
503				rpool->tblidx = 0;
504			else
505				rpool->tblidx = (int)(hashidx % cnt);
506			memset(&rpool->counter, 0, sizeof(rpool->counter));
507			if (pfr_pool_get(rpool, &raddr, &rmask, af))
508				return (1);
509			pf_addrcpy(naddr, &rpool->counter, af);
510		} else {
511			pf_poolmask(naddr, raddr, rmask, &hash, af);
512		}
513		break;
514	case PF_POOL_ROUNDROBIN:
515		if (rpool->addr.type == PF_ADDR_TABLE ||
516		    rpool->addr.type == PF_ADDR_DYNIFTL) {
517			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
518				/*
519				 * reset counter in case its value
520				 * has been removed from the pool.
521				 */
522				memset(&rpool->counter, 0,
523				    sizeof(rpool->counter));
524				if (pfr_pool_get(rpool, &raddr, &rmask, af))
525					return (1);
526			}
527		} else if (PF_AZERO(&rpool->counter, af)) {
528			/*
529			 * fall back to POOL_NONE if there is a single host
530			 * address in pool.
531			 */
532			if (af == AF_INET &&
533			    rmask->addr32[0] == INADDR_BROADCAST) {
534				pf_addrcpy(naddr, raddr, af);
535				break;
536			}
537#ifdef INET6
538			if (af == AF_INET6 &&
539			    IN6_ARE_ADDR_EQUAL(&rmask->v6, &in6mask128)) {
540				pf_addrcpy(naddr, raddr, af);
541				break;
542			}
543#endif
544		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
545			return (1);
546
547		/* iterate over table if it contains entries which are weighted */
548		if ((rpool->addr.type == PF_ADDR_TABLE &&
549		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
550		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
551		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
552			do {
553				if (rpool->addr.type == PF_ADDR_TABLE ||
554				    rpool->addr.type == PF_ADDR_DYNIFTL) {
555					if (pfr_pool_get(rpool,
556					    &raddr, &rmask, af))
557						return (1);
558				} else {
559					log(LOG_ERR, "pf: pf_map_addr: "
560					    "weighted RR failure");
561					return (1);
562				}
563				if (rpool->weight >= rpool->curweight)
564					break;
565				pf_addr_inc(&rpool->counter, af);
566			} while (1);
567
568			weight = rpool->weight;
569		}
570
571		pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
572		if (init_addr != NULL && PF_AZERO(init_addr, af))
573			pf_addrcpy(init_addr, &rpool->counter, af);
574		pf_addr_inc(&rpool->counter, af);
575		break;
576	case PF_POOL_LEASTSTATES:
577		/* retrieve an address first */
578		if (rpool->addr.type == PF_ADDR_TABLE ||
579		    rpool->addr.type == PF_ADDR_DYNIFTL) {
580			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
581				/* see PF_POOL_ROUNDROBIN */
582				memset(&rpool->counter, 0,
583				    sizeof(rpool->counter));
584				if (pfr_pool_get(rpool, &raddr, &rmask, af))
585					return (1);
586			}
587		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
588			return (1);
589
590		states = rpool->states;
591		weight = rpool->weight;
592		kif = rpool->kif;
593
594		if ((rpool->addr.type == PF_ADDR_TABLE &&
595		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
596		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
597		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
598			load = ((UINT16_MAX * rpool->states) / rpool->weight);
599		else
600			load = states;
601
602		pf_addrcpy(&faddr, &rpool->counter, af);
603
604		pf_addrcpy(naddr, &rpool->counter, af);
605		if (init_addr != NULL && PF_AZERO(init_addr, af))
606			pf_addrcpy(init_addr, naddr, af);
607
608		/*
609		 * iterate *once* over whole table and find destination with
610		 * least connection
611		 */
612		do  {
613			pf_addr_inc(&rpool->counter, af);
614			if (rpool->addr.type == PF_ADDR_TABLE ||
615			    rpool->addr.type == PF_ADDR_DYNIFTL) {
616				if (pfr_pool_get(rpool, &raddr, &rmask, af))
617					return (1);
618			} else if (pf_match_addr(0, raddr, rmask,
619			    &rpool->counter, af))
620				return (1);
621
622			if ((rpool->addr.type == PF_ADDR_TABLE &&
623			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
624			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
625			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
626				cload = ((UINT16_MAX * rpool->states)
627					/ rpool->weight);
628			else
629				cload = rpool->states;
630
631			/* find lc minimum */
632			if (cload < load) {
633				states = rpool->states;
634				weight = rpool->weight;
635				kif = rpool->kif;
636				load = cload;
637
638				pf_addrcpy(naddr, &rpool->counter, af);
639				if (init_addr != NULL &&
640				    PF_AZERO(init_addr, af))
641				    pf_addrcpy(init_addr, naddr, af);
642			}
643		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
644		    (states > 0));
645
646		if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
647			return (1);
648		/* revert the kif which was set by pfr_pool_get() */
649		rpool->kif = kif;
650		break;
651	}
652
653	if (rpool->opts & PF_POOL_STICKYADDR) {
654		if (sns[type] != NULL) {
655			pf_remove_src_node(sns[type]);
656			sns[type] = NULL;
657		}
658		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
659		    rpool->kif))
660			return (1);
661	}
662
663	if (pf_status.debug >= LOG_INFO &&
664	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
665		log(LOG_INFO, "pf: pf_map_addr: selected address ");
666		pf_print_host(naddr, 0, af);
667		if ((rpool->opts & PF_POOL_TYPEMASK) ==
668		    PF_POOL_LEASTSTATES)
669			addlog(" with state count %llu", states);
670		if ((rpool->addr.type == PF_ADDR_TABLE &&
671		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
672		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
673		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
674			addlog(" with weight %u", weight);
675		addlog("\n");
676	}
677
678	return (0);
679}
680
681int
682pf_map_addr_states_increase(sa_family_t af, struct pf_pool *rpool,
683    struct pf_addr *naddr)
684{
685	if (rpool->addr.type == PF_ADDR_TABLE) {
686		if (pfr_states_increase(rpool->addr.p.tbl,
687		    naddr, af) == -1) {
688			if (pf_status.debug >= LOG_DEBUG) {
689				log(LOG_DEBUG,
690				    "pf: pf_map_addr_states_increase: "
691				    "selected address ");
692				pf_print_host(naddr, 0, af);
693				addlog(". Failed to increase count!\n");
694			}
695			return (-1);
696		}
697	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
698		if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
699		    naddr, af) == -1) {
700			if (pf_status.debug >= LOG_DEBUG) {
701				log(LOG_DEBUG,
702				    "pf: pf_map_addr_states_increase: "
703				    "selected address ");
704				pf_print_host(naddr, 0, af);
705				addlog(". Failed to increase count!\n");
706			}
707			return (-1);
708		}
709	}
710	return (0);
711}
712
713int
714pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
715    struct pf_src_node **sns, struct pf_rule **nr)
716{
717	struct pf_addr	naddr;
718	u_int16_t	nport;
719
720#ifdef INET6
721	if (pd->af != pd->naf)
722		return (pf_get_transaddr_af(r, pd, sns));
723#endif /* INET6 */
724
725	if (r->nat.addr.type != PF_ADDR_NONE) {
726		/* XXX is this right? what if rtable is changed at the same
727		 * XXX time? where do I need to figure out the sport? */
728		nport = 0;
729		if (pf_get_sport(pd, r, &naddr, &nport,
730		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
731			DPFPRINTF(LOG_NOTICE,
732			    "pf: NAT proxy port allocation (%u-%u) failed",
733			    r->nat.proxy_port[0],
734			    r->nat.proxy_port[1]);
735			return (-1);
736		}
737		*nr = r;
738		pf_addrcpy(&pd->nsaddr, &naddr, pd->af);
739		pd->nsport = nport;
740	}
741	if (r->rdr.addr.type != PF_ADDR_NONE) {
742		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
743		    &r->rdr, PF_SN_RDR))
744			return (-1);
745		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
746			pf_poolmask(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
747			    &pd->ndaddr, pd->af);
748
749		nport = 0;
750		if (r->rdr.proxy_port[1]) {
751			u_int32_t	tmp_nport;
752			u_int16_t	div;
753
754			div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1;
755			div = (div == 0) ? 1 : div;
756
757			tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) +
758			    r->rdr.proxy_port[0];
759
760			/* wrap around if necessary */
761			if (tmp_nport > 65535)
762				tmp_nport -= 65535;
763			nport = htons((u_int16_t)tmp_nport);
764		} else if (r->rdr.proxy_port[0])
765			nport = htons(r->rdr.proxy_port[0]);
766		*nr = r;
767		pf_addrcpy(&pd->ndaddr, &naddr, pd->af);
768		if (nport)
769			pd->ndport = nport;
770	}
771
772	return (0);
773}
774
775#ifdef INET6
776int
777pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
778    struct pf_src_node **sns)
779{
780	struct pf_addr	ndaddr, nsaddr, naddr;
781	u_int16_t	nport;
782	int		prefixlen = 96;
783
784	if (pf_status.debug >= LOG_INFO) {
785		log(LOG_INFO, "pf: af-to %s %s, ",
786		    pd->naf == AF_INET ? "inet" : "inet6",
787		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
788		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
789		addlog(" -> ");
790		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
791		addlog("\n");
792	}
793
794	if (r->nat.addr.type == PF_ADDR_NONE)
795		panic("pf_get_transaddr_af: no nat pool for source address");
796
797	/* get source address and port */
798	nport = 0;
799	if (pf_get_sport(pd, r, &nsaddr, &nport,
800	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
801		DPFPRINTF(LOG_NOTICE,
802		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
803		    r->nat.proxy_port[0],
804		    r->nat.proxy_port[1]);
805		return (-1);
806	}
807	pd->nsport = nport;
808
809	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
810		if (pd->dir == PF_IN) {
811			pd->ndport = ntohs(pd->ndport);
812			if (pd->ndport == ICMP6_ECHO_REQUEST)
813				pd->ndport = ICMP_ECHO;
814			else if (pd->ndport == ICMP6_ECHO_REPLY)
815				pd->ndport = ICMP_ECHOREPLY;
816			pd->ndport = htons(pd->ndport);
817		} else {
818			pd->nsport = ntohs(pd->nsport);
819			if (pd->nsport == ICMP6_ECHO_REQUEST)
820				pd->nsport = ICMP_ECHO;
821			else if (pd->nsport == ICMP6_ECHO_REPLY)
822				pd->nsport = ICMP_ECHOREPLY;
823			pd->nsport = htons(pd->nsport);
824		}
825	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
826		if (pd->dir == PF_IN) {
827			pd->ndport = ntohs(pd->ndport);
828			if (pd->ndport == ICMP_ECHO)
829				pd->ndport = ICMP6_ECHO_REQUEST;
830			else if (pd->ndport == ICMP_ECHOREPLY)
831				pd->ndport = ICMP6_ECHO_REPLY;
832			pd->ndport = htons(pd->ndport);
833		} else {
834			pd->nsport = ntohs(pd->nsport);
835			if (pd->nsport == ICMP_ECHO)
836				pd->nsport = ICMP6_ECHO_REQUEST;
837			else if (pd->nsport == ICMP_ECHOREPLY)
838				pd->nsport = ICMP6_ECHO_REPLY;
839			pd->nsport = htons(pd->nsport);
840		}
841	}
842
843	/* get the destination address and port */
844	if (r->rdr.addr.type != PF_ADDR_NONE) {
845		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
846		    &r->rdr, PF_SN_RDR))
847			return (-1);
848		if (r->rdr.proxy_port[0])
849			pd->ndport = htons(r->rdr.proxy_port[0]);
850
851		if (pd->naf == AF_INET) {
852			/* The prefix is the IPv4 rdr address */
853			prefixlen = in_mask2len((struct in_addr *)
854			    &r->rdr.addr.v.a.mask);
855			inet_nat46(pd->naf, &pd->ndaddr,
856			    &ndaddr, &naddr, prefixlen);
857		} else {
858			/* The prefix is the IPv6 rdr address */
859			prefixlen =
860			    in6_mask2len((struct in6_addr *)
861			    &r->rdr.addr.v.a.mask, NULL);
862			inet_nat64(pd->naf, &pd->ndaddr,
863			    &ndaddr, &naddr, prefixlen);
864		}
865	} else {
866		if (pd->naf == AF_INET) {
867			/* The prefix is the IPv6 dst address */
868			prefixlen =
869			    in6_mask2len((struct in6_addr *)
870			    &r->dst.addr.v.a.mask, NULL);
871			if (prefixlen < 32)
872				prefixlen = 96;
873			inet_nat64(pd->naf, &pd->ndaddr,
874			    &ndaddr, &pd->ndaddr, prefixlen);
875		} else {
876			/*
877			 * The prefix is the IPv6 nat address
878			 * (that was stored in pd->nsaddr)
879			 */
880			prefixlen = in6_mask2len((struct in6_addr *)
881			    &r->nat.addr.v.a.mask, NULL);
882			if (prefixlen > 96)
883				prefixlen = 96;
884			inet_nat64(pd->naf, &pd->ndaddr,
885			    &ndaddr, &nsaddr, prefixlen);
886		}
887	}
888
889	pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf);
890	pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf);
891
892	if (pf_status.debug >= LOG_INFO) {
893		log(LOG_INFO, "pf: af-to %s %s done, prefixlen %d, ",
894		    pd->naf == AF_INET ? "inet" : "inet6",
895		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
896		    prefixlen);
897		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
898		addlog(" -> ");
899		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
900		addlog("\n");
901	}
902
903	return (0);
904}
905#endif /* INET6 */
906
907int
908pf_postprocess_addr(struct pf_state *cur)
909{
910	struct pf_rule		*nr;
911	struct pf_state_key	*sks;
912	struct pf_pool		 rpool;
913	struct pf_addr		 lookup_addr;
914	int			 slbcount = -1;
915
916	nr = cur->natrule.ptr;
917
918	if (nr == NULL)
919		return (0);
920
921	/* decrease counter */
922
923	sks = cur->key[PF_SK_STACK];
924
925	/* check for outgoing or ingoing balancing */
926	if (nr->rt == PF_ROUTETO)
927		lookup_addr = cur->rt_addr;
928	else if (sks != NULL)
929		lookup_addr = sks->addr[1];
930	else {
931		if (pf_status.debug >= LOG_DEBUG) {
932			log(LOG_DEBUG, "pf: %s: unable to obtain address",
933			    __func__);
934		}
935		return (1);
936	}
937
938	/* check for appropriate pool */
939	if (nr->rdr.addr.type != PF_ADDR_NONE)
940		rpool = nr->rdr;
941	else if (nr->nat.addr.type != PF_ADDR_NONE)
942		rpool = nr->nat;
943	else if (nr->route.addr.type != PF_ADDR_NONE)
944		rpool = nr->route;
945	else
946		return (0);
947
948	if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
949		return (0);
950
951	if (rpool.addr.type == PF_ADDR_TABLE) {
952		if ((slbcount = pfr_states_decrease(
953		    rpool.addr.p.tbl,
954		    &lookup_addr, sks->af)) == -1) {
955			if (pf_status.debug >= LOG_DEBUG) {
956				log(LOG_DEBUG, "pf: %s: selected address ",
957				    __func__);
958				pf_print_host(&lookup_addr,
959				    sks->port[0], sks->af);
960				addlog(". Failed to "
961				    "decrease count!\n");
962			}
963			return (1);
964		}
965	} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
966		if ((slbcount = pfr_states_decrease(
967		    rpool.addr.p.dyn->pfid_kt,
968		    &lookup_addr, sks->af)) == -1) {
969			if (pf_status.debug >= LOG_DEBUG) {
970				log(LOG_DEBUG, "pf: %s: selected address ",
971				    __func__);
972				pf_print_host(&lookup_addr,
973				    sks->port[0], sks->af);
974				addlog(". Failed to "
975				    "decrease count!\n");
976			}
977			return (1);
978		}
979	}
980	if (slbcount > -1) {
981		if (pf_status.debug >= LOG_INFO) {
982			log(LOG_INFO, "pf: %s: selected address ", __func__);
983			pf_print_host(&lookup_addr, sks->port[0],
984			    sks->af);
985			addlog(" decreased state count to %u\n",
986			    slbcount);
987		}
988	}
989	return (0);
990}
991