pf_lb.c revision 1.50
1/*	$OpenBSD: pf_lb.c,v 1.50 2015/10/13 19:32:31 sashan Exp $ */
2
3/*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#include "bpfilter.h"
39#include "pflog.h"
40#include "pfsync.h"
41#include "pflow.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/mbuf.h>
46#include <sys/filio.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/kernel.h>
50#include <sys/time.h>
51#include <sys/pool.h>
52#include <sys/rwlock.h>
53#include <sys/syslog.h>
54#include <sys/stdint.h>
55
56#include <crypto/siphash.h>
57
58#include <net/if.h>
59#include <net/if_types.h>
60#include <net/bpf.h>
61#include <net/route.h>
62
63#include <netinet/in.h>
64#include <netinet/ip.h>
65#include <netinet/ip_var.h>
66#include <netinet/tcp.h>
67#include <netinet/tcp_seq.h>
68#include <netinet/udp.h>
69#include <netinet/ip_icmp.h>
70#include <netinet/tcp_timer.h>
71#include <netinet/udp_var.h>
72#include <netinet/icmp_var.h>
73#include <netinet/if_ether.h>
74#include <netinet/in_pcb.h>
75
76#include <net/pfvar.h>
77
78#if NPFLOG > 0
79#include <net/if_pflog.h>
80#endif	/* NPFLOG > 0 */
81
82#if NPFLOW > 0
83#include <net/if_pflow.h>
84#endif	/* NPFLOW > 0 */
85
86#if NPFSYNC > 0
87#include <net/if_pfsync.h>
88#endif /* NPFSYNC > 0 */
89
90#ifdef INET6
91#include <netinet/ip6.h>
92#include <netinet/icmp6.h>
93#endif /* INET6 */
94
95
96/*
97 * Global variables
98 */
99
100u_int64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
101			    struct pf_poolhashkey *, sa_family_t);
102int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
103			    struct pf_addr *, u_int16_t *, u_int16_t,
104			    u_int16_t, struct pf_src_node **);
105int			 pf_get_transaddr_af(struct pf_rule *,
106			    struct pf_pdesc *, struct pf_src_node **);
107int			 pf_map_addr_sticky(sa_family_t, struct pf_rule *,
108			    struct pf_addr *, struct pf_addr *,
109			    struct pf_src_node **, struct pf_pool *,
110			    enum pf_sn_types);
111
112u_int64_t
113pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
114    struct pf_poolhashkey *key, sa_family_t af)
115{
116	uint64_t res = 0;
117#ifdef INET6
118	union {
119		uint64_t hash64;
120		uint32_t hash32[2];
121	} h;
122#endif	/* INET6 */
123
124	switch (af) {
125	case AF_INET:
126		res = SipHash24((SIPHASH_KEY *)key,
127		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
128		hash->addr32[0] = res;
129		break;
130#ifdef INET6
131	case AF_INET6:
132		res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0],
133		    4 * sizeof(inaddr->addr32[0]));
134		h.hash64 = res;
135		hash->addr32[0] = h.hash32[0];
136		hash->addr32[1] = h.hash32[1];
137		/*
138		 * siphash isn't big enough, but flipping it around is
139		 * good enough here.
140		 */
141		hash->addr32[2] = ~h.hash32[1];
142		hash->addr32[3] = ~h.hash32[0];
143		break;
144#endif /* INET6 */
145	default:
146		unhandled_af(af);
147	}
148	return (res);
149}
150
151int
152pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
153    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
154    struct pf_src_node **sn)
155{
156	struct pf_state_key_cmp	key;
157	struct pf_addr		init_addr;
158	u_int16_t		cut;
159
160	bzero(&init_addr, sizeof(init_addr));
161	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
162	    PF_SN_NAT))
163		return (1);
164
165	if (pd->proto == IPPROTO_ICMP) {
166		if (pd->ndport == htons(ICMP_ECHO)) {
167			low = 1;
168			high = 65535;
169		} else
170			return (0);	/* Don't try to modify non-echo ICMP */
171	}
172#ifdef INET6
173	if (pd->proto == IPPROTO_ICMPV6) {
174		if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
175			low = 1;
176			high = 65535;
177		} else
178			return (0);	/* Don't try to modify non-echo ICMP */
179	}
180#endif /* INET6 */
181
182	do {
183		key.af = pd->naf;
184		key.proto = pd->proto;
185		key.rdomain = pd->rdomain;
186		PF_ACPY(&key.addr[0], &pd->ndaddr, key.af);
187		PF_ACPY(&key.addr[1], naddr, key.af);
188		key.port[0] = pd->ndport;
189
190		/*
191		 * port search; start random, step;
192		 * similar 2 portloop in in_pcbbind
193		 */
194		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
195		    pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
196			/* XXX bug: icmp states dont use the id on both
197			 * XXX sides (traceroute -I through nat) */
198			key.port[1] = pd->nsport;
199			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
200				*nport = pd->nsport;
201				return (0);
202			}
203		} else if (low == 0 && high == 0) {
204			key.port[1] = pd->nsport;
205			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
206				*nport = pd->nsport;
207				return (0);
208			}
209		} else if (low == high) {
210			key.port[1] = htons(low);
211			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
212				*nport = htons(low);
213				return (0);
214			}
215		} else {
216			u_int16_t tmp;
217
218			if (low > high) {
219				tmp = low;
220				low = high;
221				high = tmp;
222			}
223			/* low < high */
224			cut = arc4random_uniform(1 + high - low) + low;
225			/* low <= cut <= high */
226			for (tmp = cut; tmp <= high; ++(tmp)) {
227				key.port[1] = htons(tmp);
228				if (pf_find_state_all(&key, PF_IN, NULL) ==
229				    NULL && !in_baddynamic(tmp, pd->proto)) {
230					*nport = htons(tmp);
231					return (0);
232				}
233			}
234			for (tmp = cut - 1; tmp >= low; --(tmp)) {
235				key.port[1] = htons(tmp);
236				if (pf_find_state_all(&key, PF_IN, NULL) ==
237				    NULL && !in_baddynamic(tmp, pd->proto)) {
238					*nport = htons(tmp);
239					return (0);
240				}
241			}
242		}
243
244		switch (r->nat.opts & PF_POOL_TYPEMASK) {
245		case PF_POOL_RANDOM:
246		case PF_POOL_ROUNDROBIN:
247		case PF_POOL_LEASTSTATES:
248			/*
249			 * pick a different source address since we're out
250			 * of free port choices for the current one.
251			 */
252			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
253			    &init_addr, sn, &r->nat, PF_SN_NAT))
254				return (1);
255			break;
256		case PF_POOL_NONE:
257		case PF_POOL_SRCHASH:
258		case PF_POOL_BITMASK:
259		default:
260			return (1);
261		}
262	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
263	return (1);					/* none available */
264}
265
266int
267pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
268    struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
269    enum pf_sn_types type)
270{
271	struct pf_addr		*raddr, *rmask, *cached;
272	struct pf_state		*s;
273	struct pf_src_node	 k;
274	int			 valid;
275
276	k.af = af;
277	k.type = type;
278	PF_ACPY(&k.addr, saddr, af);
279	k.rule.ptr = r;
280	pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
281	sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
282	if (sns[type] == NULL)
283		return (-1);
284
285	/* check if the cached entry is still valid */
286	cached = &(sns[type])->raddr;
287	valid = 0;
288	if (PF_AZERO(cached, af)) {
289		valid = 1;
290	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
291		if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
292		    af, 0))
293			valid = 1;
294	} else if (rpool->addr.type == PF_ADDR_TABLE) {
295		if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
296			valid = 1;
297	} else if (rpool->addr.type != PF_ADDR_NOROUTE) {
298		raddr = &rpool->addr.v.a.addr;
299		rmask = &rpool->addr.v.a.mask;
300		valid = pf_match_addr(0, raddr, rmask, cached, af);
301	}
302	if (!valid) {
303		if (pf_status.debug >= LOG_DEBUG) {
304			log(LOG_DEBUG, "pf: pf_map_addr: "
305			    "stale src tracking (%u) ", type);
306			pf_print_host(&k.addr, 0, af);
307			addlog(" to ");
308			pf_print_host(cached, 0, af);
309			addlog("\n");
310		}
311		if (sns[type]->states != 0) {
312			/* XXX expensive */
313			RB_FOREACH(s, pf_state_tree_id,
314			   &tree_id)
315				pf_state_rm_src_node(s,
316				    sns[type]);
317		}
318		sns[type]->expire = 1;
319		pf_remove_src_node(sns[type]);
320		sns[type] = NULL;
321		return (-1);
322	}
323	if (!PF_AZERO(cached, af))
324		PF_ACPY(naddr, cached, af);
325	if (pf_status.debug >= LOG_DEBUG) {
326		log(LOG_DEBUG, "pf: pf_map_addr: "
327		    "src tracking (%u) maps ", type);
328		pf_print_host(&k.addr, 0, af);
329		addlog(" to ");
330		pf_print_host(naddr, 0, af);
331		addlog("\n");
332	}
333	return (0);
334}
335
336int
337pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
338    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
339    struct pf_pool *rpool, enum pf_sn_types type)
340{
341	unsigned char		 hash[16];
342	struct pf_addr		 faddr;
343	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
344	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
345	u_int64_t		 states;
346	u_int16_t		 weight;
347	u_int64_t		 load;
348	u_int64_t		 cload;
349	u_int64_t		 hashidx;
350	int			 cnt;
351
352	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
353	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
354	    pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
355		return (0);
356
357	if (rpool->addr.type == PF_ADDR_NOROUTE)
358		return (1);
359	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
360		switch (af) {
361		case AF_INET:
362			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
363			    !PF_POOL_DYNTYPE(rpool->opts))
364				return (1);
365			raddr = &rpool->addr.p.dyn->pfid_addr4;
366			rmask = &rpool->addr.p.dyn->pfid_mask4;
367			break;
368#ifdef INET6
369		case AF_INET6:
370			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
371			    !PF_POOL_DYNTYPE(rpool->opts))
372				return (1);
373			raddr = &rpool->addr.p.dyn->pfid_addr6;
374			rmask = &rpool->addr.p.dyn->pfid_mask6;
375			break;
376#endif /* INET6 */
377		default:
378			unhandled_af(af);
379		}
380	} else if (rpool->addr.type == PF_ADDR_TABLE) {
381		if (!PF_POOL_DYNTYPE(rpool->opts))
382			return (1); /* unsupported */
383	} else {
384		raddr = &rpool->addr.v.a.addr;
385		rmask = &rpool->addr.v.a.mask;
386	}
387
388	switch (rpool->opts & PF_POOL_TYPEMASK) {
389	case PF_POOL_NONE:
390		PF_ACPY(naddr, raddr, af);
391		break;
392	case PF_POOL_BITMASK:
393		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
394		break;
395	case PF_POOL_RANDOM:
396		if (rpool->addr.type == PF_ADDR_TABLE) {
397			cnt = rpool->addr.p.tbl->pfrkt_cnt;
398			if (cnt == 0)
399				rpool->tblidx = 0;
400			else
401				rpool->tblidx = (int)arc4random_uniform(cnt);
402			memset(&rpool->counter, 0, sizeof(rpool->counter));
403			if (pfr_pool_get(rpool, &raddr, &rmask, af))
404				return (1);
405			PF_ACPY(naddr, &rpool->counter, af);
406		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
407			cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
408			if (cnt == 0)
409				rpool->tblidx = 0;
410			else
411				rpool->tblidx = (int)arc4random_uniform(cnt);
412			memset(&rpool->counter, 0, sizeof(rpool->counter));
413			if (pfr_pool_get(rpool, &raddr, &rmask, af))
414				return (1);
415			PF_ACPY(naddr, &rpool->counter, af);
416		} else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
417			switch (af) {
418			case AF_INET:
419				rpool->counter.addr32[0] = htonl(arc4random());
420				break;
421#ifdef INET6
422			case AF_INET6:
423				if (rmask->addr32[3] != 0xffffffff)
424					rpool->counter.addr32[3] =
425					    htonl(arc4random());
426				else
427					break;
428				if (rmask->addr32[2] != 0xffffffff)
429					rpool->counter.addr32[2] =
430					    htonl(arc4random());
431				else
432					break;
433				if (rmask->addr32[1] != 0xffffffff)
434					rpool->counter.addr32[1] =
435					    htonl(arc4random());
436				else
437					break;
438				if (rmask->addr32[0] != 0xffffffff)
439					rpool->counter.addr32[0] =
440					    htonl(arc4random());
441				break;
442#endif /* INET6 */
443			default:
444				unhandled_af(af);
445			}
446			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
447			PF_ACPY(init_addr, naddr, af);
448
449		} else {
450			PF_AINC(&rpool->counter, af);
451			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
452		}
453		break;
454	case PF_POOL_SRCHASH:
455		hashidx =
456		    pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
457		if (rpool->addr.type == PF_ADDR_TABLE) {
458			cnt = rpool->addr.p.tbl->pfrkt_cnt;
459			if (cnt == 0)
460				rpool->tblidx = 0;
461			else
462				rpool->tblidx = (int)(hashidx % cnt);
463			memset(&rpool->counter, 0, sizeof(rpool->counter));
464			if (pfr_pool_get(rpool, &raddr, &rmask, af))
465				return (1);
466			PF_ACPY(naddr, &rpool->counter, af);
467		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
468			cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
469			if (cnt == 0)
470				rpool->tblidx = 0;
471			else
472				rpool->tblidx = (int)(hashidx % cnt);
473			memset(&rpool->counter, 0, sizeof(rpool->counter));
474			if (pfr_pool_get(rpool, &raddr, &rmask, af))
475				return (1);
476			PF_ACPY(naddr, &rpool->counter, af);
477		} else {
478			PF_POOLMASK(naddr, raddr, rmask,
479			    (struct pf_addr *)&hash, af);
480		}
481		break;
482	case PF_POOL_ROUNDROBIN:
483		if (rpool->addr.type == PF_ADDR_TABLE ||
484		    rpool->addr.type == PF_ADDR_DYNIFTL) {
485			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
486				/*
487				 * reset counter in case its value
488				 * has been removed from the pool.
489				 */
490				bzero(&rpool->counter, sizeof(rpool->counter));
491				if (pfr_pool_get(rpool, &raddr, &rmask, af))
492					return (1);
493			}
494		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
495			return (1);
496
497		/* iterate over table if it contains entries which are weighted */
498		if ((rpool->addr.type == PF_ADDR_TABLE &&
499		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
500		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
501		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
502			do {
503				if (rpool->addr.type == PF_ADDR_TABLE ||
504				    rpool->addr.type == PF_ADDR_DYNIFTL) {
505					if (pfr_pool_get(rpool,
506					    &raddr, &rmask, af))
507						return (1);
508				} else {
509					log(LOG_ERR, "pf: pf_map_addr: "
510					    "weighted RR failure");
511					return (1);
512				}
513				if (rpool->weight >= rpool->curweight)
514					break;
515				PF_AINC(&rpool->counter, af);
516			} while (1);
517
518			weight = rpool->weight;
519		}
520
521		PF_ACPY(naddr, &rpool->counter, af);
522		if (init_addr != NULL && PF_AZERO(init_addr, af))
523			PF_ACPY(init_addr, naddr, af);
524		PF_AINC(&rpool->counter, af);
525		break;
526	case PF_POOL_LEASTSTATES:
527		/* retrieve an address first */
528		if (rpool->addr.type == PF_ADDR_TABLE ||
529		    rpool->addr.type == PF_ADDR_DYNIFTL) {
530			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
531				/* see PF_POOL_ROUNDROBIN */
532				bzero(&rpool->counter, sizeof(rpool->counter));
533				if (pfr_pool_get(rpool, &raddr, &rmask, af))
534					return (1);
535			}
536		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
537			return (1);
538
539		states = rpool->states;
540		weight = rpool->weight;
541
542		if ((rpool->addr.type == PF_ADDR_TABLE &&
543		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
544		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
545		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
546			load = ((UINT16_MAX * rpool->states) / rpool->weight);
547		else
548			load = states;
549
550		PF_ACPY(&faddr, &rpool->counter, af);
551
552		PF_ACPY(naddr, &rpool->counter, af);
553		if (init_addr != NULL && PF_AZERO(init_addr, af))
554			PF_ACPY(init_addr, naddr, af);
555
556		/*
557		 * iterate *once* over whole table and find destination with
558		 * least connection
559		 */
560		do  {
561			PF_AINC(&rpool->counter, af);
562			if (rpool->addr.type == PF_ADDR_TABLE ||
563			    rpool->addr.type == PF_ADDR_DYNIFTL) {
564				if (pfr_pool_get(rpool, &raddr, &rmask, af))
565					return (1);
566			} else if (pf_match_addr(0, raddr, rmask,
567			    &rpool->counter, af))
568				return (1);
569
570			if ((rpool->addr.type == PF_ADDR_TABLE &&
571			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
572			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
573			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
574				cload = ((UINT16_MAX * rpool->states)
575					/ rpool->weight);
576			else
577				cload = rpool->states;
578
579			/* find lc minimum */
580			if (cload < load) {
581				states = rpool->states;
582				weight = rpool->weight;
583				load = cload;
584
585				PF_ACPY(naddr, &rpool->counter, af);
586				if (init_addr != NULL &&
587				    PF_AZERO(init_addr, af))
588				    PF_ACPY(init_addr, naddr, af);
589			}
590		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
591		    (states > 0));
592
593		if (rpool->addr.type == PF_ADDR_TABLE) {
594			if (pfr_states_increase(rpool->addr.p.tbl,
595			    naddr, af) == -1) {
596				if (pf_status.debug >= LOG_DEBUG) {
597					log(LOG_DEBUG,"pf: pf_map_addr: "
598					    "selected address ");
599					pf_print_host(naddr, 0, af);
600					addlog(". Failed to increase count!\n");
601				}
602				return (1);
603			}
604		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
605			if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
606			    naddr, af) == -1) {
607				if (pf_status.debug >= LOG_DEBUG) {
608					log(LOG_DEBUG, "pf: pf_map_addr: "
609					    "selected address ");
610					pf_print_host(naddr, 0, af);
611					addlog(". Failed to increase count!\n");
612				}
613				return (1);
614			}
615		}
616		break;
617	}
618
619	if (rpool->opts & PF_POOL_STICKYADDR) {
620		if (sns[type] != NULL) {
621			pf_remove_src_node(sns[type]);
622			sns[type] = NULL;
623		}
624		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr))
625			return (1);
626	}
627
628	if (pf_status.debug >= LOG_NOTICE &&
629	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
630		log(LOG_NOTICE, "pf: pf_map_addr: selected address ");
631		pf_print_host(naddr, 0, af);
632		if ((rpool->opts & PF_POOL_TYPEMASK) ==
633		    PF_POOL_LEASTSTATES)
634			addlog(" with state count %llu", states);
635		if ((rpool->addr.type == PF_ADDR_TABLE &&
636		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
637		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
638		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
639			addlog(" with weight %u", weight);
640		addlog("\n");
641	}
642
643	return (0);
644}
645
646int
647pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
648    struct pf_src_node **sns, struct pf_rule **nr)
649{
650	struct pf_addr	naddr;
651	u_int16_t	nport = 0;
652
653#ifdef INET6
654	if (pd->af != pd->naf)
655		return (pf_get_transaddr_af(r, pd, sns));
656#endif /* INET6 */
657
658	if (r->nat.addr.type != PF_ADDR_NONE) {
659		/* XXX is this right? what if rtable is changed at the same
660		 * XXX time? where do I need to figure out the sport? */
661		if (pf_get_sport(pd, r, &naddr, &nport,
662		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
663			DPFPRINTF(LOG_NOTICE,
664			    "pf: NAT proxy port allocation (%u-%u) failed",
665			    r->nat.proxy_port[0],
666			    r->nat.proxy_port[1]);
667			return (-1);
668		}
669		*nr = r;
670		PF_ACPY(&pd->nsaddr, &naddr, pd->af);
671		pd->nsport = nport;
672	}
673	if (r->rdr.addr.type != PF_ADDR_NONE) {
674		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
675		    &r->rdr, PF_SN_RDR))
676			return (-1);
677		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
678			PF_POOLMASK(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
679			    &pd->ndaddr, pd->af);
680
681		if (r->rdr.proxy_port[1]) {
682			u_int32_t	tmp_nport;
683
684			tmp_nport = ((ntohs(pd->ndport) -
685			    ntohs(r->dst.port[0])) %
686			    (r->rdr.proxy_port[1] -
687			    r->rdr.proxy_port[0] + 1)) +
688			    r->rdr.proxy_port[0];
689
690			/* wrap around if necessary */
691			if (tmp_nport > 65535)
692				tmp_nport -= 65535;
693			nport = htons((u_int16_t)tmp_nport);
694		} else if (r->rdr.proxy_port[0])
695			nport = htons(r->rdr.proxy_port[0]);
696		*nr = r;
697		PF_ACPY(&pd->ndaddr, &naddr, pd->af);
698		if (nport)
699			pd->ndport = nport;
700	}
701
702	return (0);
703}
704
705#ifdef INET6
706int
707pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
708    struct pf_src_node **sns)
709{
710	struct pf_addr	ndaddr, nsaddr, naddr;
711	u_int16_t	nport = 0;
712	int		prefixlen = 96;
713
714	if (pf_status.debug >= LOG_NOTICE) {
715		log(LOG_NOTICE, "pf: af-to %s %s, ",
716		    pd->naf == AF_INET ? "inet" : "inet6",
717		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
718		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
719		addlog(" -> ");
720		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
721		addlog("\n");
722	}
723
724	if (r->nat.addr.type == PF_ADDR_NONE)
725		panic("pf_get_transaddr_af: no nat pool for source address");
726
727	/* get source address and port */
728	if (pf_get_sport(pd, r, &nsaddr, &nport,
729	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
730		DPFPRINTF(LOG_NOTICE,
731		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
732		    r->nat.proxy_port[0],
733		    r->nat.proxy_port[1]);
734		return (-1);
735	}
736	pd->nsport = nport;
737
738	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
739		if (pd->dir == PF_IN) {
740			pd->ndport = ntohs(pd->ndport);
741			if (pd->ndport == ICMP6_ECHO_REQUEST)
742				pd->ndport = ICMP_ECHO;
743			else if (pd->ndport == ICMP6_ECHO_REPLY)
744				pd->ndport = ICMP_ECHOREPLY;
745			pd->ndport = htons(pd->ndport);
746		} else {
747			pd->nsport = ntohs(pd->nsport);
748			if (pd->nsport == ICMP6_ECHO_REQUEST)
749				pd->nsport = ICMP_ECHO;
750			else if (pd->nsport == ICMP6_ECHO_REPLY)
751				pd->nsport = ICMP_ECHOREPLY;
752			pd->nsport = htons(pd->nsport);
753		}
754	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
755		if (pd->dir == PF_IN) {
756			pd->ndport = ntohs(pd->ndport);
757			if (pd->ndport == ICMP_ECHO)
758				pd->ndport = ICMP6_ECHO_REQUEST;
759			else if (pd->ndport == ICMP_ECHOREPLY)
760				pd->ndport = ICMP6_ECHO_REPLY;
761			pd->ndport = htons(pd->ndport);
762		} else {
763			pd->nsport = ntohs(pd->nsport);
764			if (pd->nsport == ICMP_ECHO)
765				pd->nsport = ICMP6_ECHO_REQUEST;
766			else if (pd->nsport == ICMP_ECHOREPLY)
767				pd->nsport = ICMP6_ECHO_REPLY;
768			pd->nsport = htons(pd->nsport);
769		}
770	}
771
772	/* get the destination address and port */
773	if (r->rdr.addr.type != PF_ADDR_NONE) {
774		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
775		    &r->rdr, PF_SN_RDR))
776			return (-1);
777		if (r->rdr.proxy_port[0])
778			pd->ndport = htons(r->rdr.proxy_port[0]);
779
780		if (pd->naf == AF_INET) {
781			/* The prefix is the IPv4 rdr address */
782			prefixlen = in_mask2len((struct in_addr *)
783			    &r->rdr.addr.v.a.mask);
784			inet_nat46(pd->naf, &pd->ndaddr,
785			    &ndaddr, &naddr, prefixlen);
786		} else {
787			/* The prefix is the IPv6 rdr address */
788			prefixlen =
789			    in6_mask2len((struct in6_addr *)
790			    &r->rdr.addr.v.a.mask, NULL);
791			inet_nat64(pd->naf, &pd->ndaddr,
792			    &ndaddr, &naddr, prefixlen);
793		}
794	} else {
795		if (pd->naf == AF_INET) {
796			/* The prefix is the IPv6 dst address */
797			prefixlen =
798			    in6_mask2len((struct in6_addr *)
799			    &r->dst.addr.v.a.mask, NULL);
800			if (prefixlen < 32)
801				prefixlen = 96;
802			inet_nat64(pd->naf, &pd->ndaddr,
803			    &ndaddr, &pd->ndaddr, prefixlen);
804		} else {
805			/*
806			 * The prefix is the IPv6 nat address
807			 * (that was stored in pd->nsaddr)
808			 */
809			prefixlen = in6_mask2len((struct in6_addr *)
810			    &r->nat.addr.v.a.mask, NULL);
811			if (prefixlen > 96)
812				prefixlen = 96;
813			inet_nat64(pd->naf, &pd->ndaddr,
814			    &ndaddr, &nsaddr, prefixlen);
815		}
816	}
817
818	PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
819	PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
820
821	if (pf_status.debug >= LOG_NOTICE) {
822		log(LOG_NOTICE, "pf: af-to %s %s done, prefixlen %d, ",
823		    pd->naf == AF_INET ? "inet" : "inet6",
824		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
825		    prefixlen);
826		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
827		addlog(" -> ");
828		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
829		addlog("\n");
830	}
831
832	return (0);
833}
834#endif /* INET6 */
835
836int
837pf_postprocess_addr(struct pf_state *cur)
838{
839	struct pf_rule		*nr;
840	struct pf_state_key	*sks;
841	struct pf_pool		 rpool;
842	struct pf_addr		 lookup_addr;
843	int			 slbcount = -1;
844
845	nr = cur->natrule.ptr;
846
847	if (nr == NULL)
848		return (0);
849
850	/* decrease counter */
851
852	sks = cur ? cur->key[PF_SK_STACK] : NULL;
853
854	/* check for outgoing or ingoing balancing */
855	if (nr->rt == PF_ROUTETO)
856		lookup_addr = cur->rt_addr;
857	else if (sks != NULL)
858		lookup_addr = sks->addr[1];
859	else {
860		if (pf_status.debug >= LOG_DEBUG) {
861			log(LOG_DEBUG, "pf: %s: unable to obtain address",
862			    __func__);
863		}
864		return (1);
865	}
866
867	/* check for appropriate pool */
868	if (nr->rdr.addr.type != PF_ADDR_NONE)
869		rpool = nr->rdr;
870	else if (nr->nat.addr.type != PF_ADDR_NONE)
871		rpool = nr->nat;
872	else if (nr->route.addr.type != PF_ADDR_NONE)
873		rpool = nr->route;
874	else
875		return (0);
876
877	if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
878		return (0);
879
880	if (rpool.addr.type == PF_ADDR_TABLE) {
881		if ((slbcount = pfr_states_decrease(
882		    rpool.addr.p.tbl,
883		    &lookup_addr, sks->af)) == -1) {
884			if (pf_status.debug >= LOG_DEBUG) {
885				log(LOG_DEBUG, "pf: %s: selected address ",
886				    __func__);
887				pf_print_host(&lookup_addr,
888				    sks->port[0], sks->af);
889				addlog(". Failed to "
890				    "decrease count!\n");
891			}
892			return (1);
893		}
894	} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
895		if ((slbcount = pfr_states_decrease(
896		    rpool.addr.p.dyn->pfid_kt,
897		    &lookup_addr, sks->af)) == -1) {
898			if (pf_status.debug >= LOG_DEBUG) {
899				log(LOG_DEBUG, "pf: %s: selected address ",
900				    __func__);
901				pf_print_host(&lookup_addr,
902				    sks->port[0], sks->af);
903				addlog(". Failed to "
904				    "decrease count!\n");
905			}
906			return (1);
907		}
908	}
909	if (slbcount > -1) {
910		if (pf_status.debug >= LOG_NOTICE) {
911			log(LOG_NOTICE, "pf: %s: selected address ", __func__);
912			pf_print_host(&lookup_addr, sks->port[0],
913			    sks->af);
914			addlog(" decreased state count to %u\n",
915			    slbcount);
916		}
917	}
918	return (0);
919}
920