1/*	$OpenBSD: pf_table.c,v 1.145 2023/08/10 16:44:04 sashan Exp $	*/
2
3/*
4 * Copyright (c) 2002 Cedric Berger
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 *    - Redistributions of source code must retain the above copyright
12 *      notice, this list of conditions and the following disclaimer.
13 *    - Redistributions in binary form must reproduce the above
14 *      copyright notice, this list of conditions and the following
15 *      disclaimer in the documentation and/or other materials provided
16 *      with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 */
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/socket.h>
36#include <sys/mbuf.h>
37#include <sys/pool.h>
38#include <sys/syslog.h>
39#include <sys/proc.h>
40
41#include <net/if.h>
42
43#include <netinet/in.h>
44#include <netinet/ip.h>
45#include <netinet/ip_ipsp.h>
46#include <netinet/ip_icmp.h>
47#include <netinet/tcp.h>
48#include <netinet/udp.h>
49
50#ifdef INET6
51#include <netinet/ip6.h>
52#include <netinet/icmp6.h>
53#endif /* INET6 */
54
55#include <net/pfvar.h>
56#include <net/pfvar_priv.h>
57
58#define ACCEPT_FLAGS(flags, oklist)		\
59	do {					\
60		if ((flags & ~(oklist)) &	\
61		    PFR_FLAG_ALLMASK)		\
62			return (EINVAL);	\
63	} while (0)
64
65#define COPYIN(from, to, size, flags)		\
66	((flags & PFR_FLAG_USERIOCTL) ?		\
67	copyin((from), (to), (size)) :		\
68	(bcopy((from), (to), (size)), 0))
69
70#define COPYOUT(from, to, size, flags)		\
71	((flags & PFR_FLAG_USERIOCTL) ?		\
72	copyout((from), (to), (size)) :		\
73	(bcopy((from), (to), (size)), 0))
74
75#define YIELD(ok)				\
76	do {					\
77		if (ok)				\
78			sched_pause(preempt);	\
79	} while (0)
80
81#define	FILLIN_SIN(sin, addr)			\
82	do {					\
83		(sin).sin_len = sizeof(sin);	\
84		(sin).sin_family = AF_INET;	\
85		(sin).sin_addr = (addr);	\
86	} while (0)
87
88#define	FILLIN_SIN6(sin6, addr)			\
89	do {					\
90		(sin6).sin6_len = sizeof(sin6);	\
91		(sin6).sin6_family = AF_INET6;	\
92		(sin6).sin6_addr = (addr);	\
93	} while (0)
94
95#define SWAP(type, a1, a2)			\
96	do {					\
97		type tmp = a1;			\
98		a1 = a2;			\
99		a2 = tmp;			\
100	} while (0)
101
102#define SUNION2PF(su, af) (((af)==AF_INET) ?	\
103    (struct pf_addr *)&(su)->sin.sin_addr :	\
104    (struct pf_addr *)&(su)->sin6.sin6_addr)
105
106#define	AF_BITS(af)		(((af)==AF_INET)?32:128)
107#define	ADDR_NETWORK(ad)	((ad)->pfra_net < AF_BITS((ad)->pfra_af))
108#define	KENTRY_NETWORK(ke)	((ke)->pfrke_net < AF_BITS((ke)->pfrke_af))
109
110#define NO_ADDRESSES		(-1)
111#define ENQUEUE_UNMARKED_ONLY	(1)
112#define INVERT_NEG_FLAG		(1)
113
114struct pfr_walktree {
115	enum pfrw_op {
116		PFRW_MARK,
117		PFRW_SWEEP,
118		PFRW_ENQUEUE,
119		PFRW_GET_ADDRS,
120		PFRW_GET_ASTATS,
121		PFRW_POOL_GET,
122		PFRW_DYNADDR_UPDATE
123	}	 pfrw_op;
124	union {
125		struct pfr_addr		*pfrw1_addr;
126		struct pfr_astats	*pfrw1_astats;
127		struct pfr_kentryworkq	*pfrw1_workq;
128		struct pfr_kentry	*pfrw1_kentry;
129		struct pfi_dynaddr	*pfrw1_dyn;
130	}	 pfrw_1;
131	int	 pfrw_free;
132	int	 pfrw_flags;
133};
134#define pfrw_addr	pfrw_1.pfrw1_addr
135#define pfrw_astats	pfrw_1.pfrw1_astats
136#define pfrw_workq	pfrw_1.pfrw1_workq
137#define pfrw_kentry	pfrw_1.pfrw1_kentry
138#define pfrw_dyn	pfrw_1.pfrw1_dyn
139#define pfrw_cnt	pfrw_free
140
141#define senderr(e)	do { rv = (e); goto _bad; } while (0)
142
143struct pool		 pfr_ktable_pl;
144struct pool		 pfr_kentry_pl[PFRKE_MAX];
145struct pool		 pfr_kcounters_pl;
146union sockaddr_union	 pfr_mask;
147struct pf_addr		 pfr_ffaddr;
148
149int			 pfr_gcd(int, int);
150void			 pfr_copyout_addr(struct pfr_addr *,
151			    struct pfr_kentry *ke);
152int			 pfr_validate_addr(struct pfr_addr *);
153void			 pfr_enqueue_addrs(struct pfr_ktable *,
154			    struct pfr_kentryworkq *, int *, int);
155void			 pfr_mark_addrs(struct pfr_ktable *);
156struct pfr_kentry	*pfr_lookup_addr(struct pfr_ktable *,
157			    struct pfr_addr *, int);
158struct pfr_kentry	*pfr_lookup_kentry(struct pfr_ktable *,
159			    struct pfr_kentry *, int);
160struct pfr_kentry	*pfr_create_kentry(struct pfr_addr *);
161struct pfr_kentry 	*pfr_create_kentry_unlocked(struct pfr_addr *, int);
162void			 pfr_kentry_kif_ref(struct pfr_kentry *);
163void			 pfr_destroy_kentries(struct pfr_kentryworkq *);
164void			 pfr_destroy_ioq(struct pfr_kentryworkq *, int);
165void			 pfr_destroy_kentry(struct pfr_kentry *);
166void			 pfr_insert_kentries(struct pfr_ktable *,
167			    struct pfr_kentryworkq *, time_t);
168void			 pfr_remove_kentries(struct pfr_ktable *,
169			    struct pfr_kentryworkq *);
170void			 pfr_clstats_kentries(struct pfr_kentryworkq *, time_t,
171			    int);
172void			 pfr_reset_feedback(struct pfr_addr *, int, int);
173void			 pfr_prepare_network(union sockaddr_union *, int, int);
174int			 pfr_route_kentry(struct pfr_ktable *,
175			    struct pfr_kentry *);
176int			 pfr_unroute_kentry(struct pfr_ktable *,
177			    struct pfr_kentry *);
178int			 pfr_walktree(struct radix_node *, void *, u_int);
179int			 pfr_validate_table(struct pfr_table *, int, int);
180int			 pfr_fix_anchor(char *);
181void			 pfr_commit_ktable(struct pfr_ktable *, time_t);
182void			 pfr_insert_ktables(struct pfr_ktableworkq *);
183void			 pfr_insert_ktable(struct pfr_ktable *);
184void			 pfr_setflags_ktables(struct pfr_ktableworkq *);
185void			 pfr_setflags_ktable(struct pfr_ktable *, int);
186void			 pfr_clstats_ktables(struct pfr_ktableworkq *, time_t,
187			    int);
188void			 pfr_clstats_ktable(struct pfr_ktable *, time_t, int);
189struct pfr_ktable	*pfr_create_ktable(struct pfr_table *, time_t, int,
190			    int);
191void			 pfr_destroy_ktables(struct pfr_ktableworkq *, int);
192void			 pfr_destroy_ktables_aux(struct pfr_ktableworkq *);
193void			 pfr_destroy_ktable(struct pfr_ktable *, int);
194int			 pfr_ktable_compare(struct pfr_ktable *,
195			    struct pfr_ktable *);
196void			 pfr_ktable_winfo_update(struct pfr_ktable *,
197			    struct pfr_kentry *);
198struct pfr_ktable	*pfr_lookup_table(struct pfr_table *);
199void			 pfr_clean_node_mask(struct pfr_ktable *,
200			    struct pfr_kentryworkq *);
201int			 pfr_table_count(struct pfr_table *, int);
202int			 pfr_skip_table(struct pfr_table *,
203			    struct pfr_ktable *, int);
204struct pfr_kentry	*pfr_kentry_byidx(struct pfr_ktable *, int, int);
205int			 pfr_islinklocal(sa_family_t, struct pf_addr *);
206
207RB_PROTOTYPE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
208RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare);
209
210struct pfr_ktablehead	 pfr_ktables;
211struct pfr_table	 pfr_nulltable;
212int			 pfr_ktable_cnt;
213
214int
215pfr_gcd(int m, int n)
216{
217       int t;
218
219       while (m > 0) {
220	       t = n % m;
221	       n = m;
222	       m = t;
223       }
224       return (n);
225}
226
227void
228pfr_initialize(void)
229{
230	rn_init(sizeof(struct sockaddr_in6));
231
232	pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable),
233	    0, IPL_SOFTNET, 0, "pfrktable", NULL);
234	pool_init(&pfr_kentry_pl[PFRKE_PLAIN], sizeof(struct pfr_kentry),
235	    0, IPL_SOFTNET, 0, "pfrke_plain", NULL);
236	pool_init(&pfr_kentry_pl[PFRKE_ROUTE], sizeof(struct pfr_kentry_route),
237	    0, IPL_SOFTNET, 0, "pfrke_route", NULL);
238	pool_init(&pfr_kentry_pl[PFRKE_COST], sizeof(struct pfr_kentry_cost),
239	    0, IPL_SOFTNET, 0, "pfrke_cost", NULL);
240	pool_init(&pfr_kcounters_pl, sizeof(struct pfr_kcounters),
241	    0, IPL_SOFTNET, 0, "pfrkcounters", NULL);
242
243	memset(&pfr_ffaddr, 0xff, sizeof(pfr_ffaddr));
244}
245
246int
247pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags)
248{
249	struct pfr_ktable	*kt;
250	struct pfr_kentryworkq	 workq;
251
252	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
253	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
254		return (EINVAL);
255	kt = pfr_lookup_table(tbl);
256	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
257		return (ESRCH);
258	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
259		return (EPERM);
260	pfr_enqueue_addrs(kt, &workq, ndel, 0);
261
262	if (!(flags & PFR_FLAG_DUMMY)) {
263		pfr_remove_kentries(kt, &workq);
264		if (kt->pfrkt_cnt) {
265			DPFPRINTF(LOG_NOTICE,
266			    "pfr_clr_addrs: corruption detected (%d).",
267			    kt->pfrkt_cnt);
268			kt->pfrkt_cnt = 0;
269		}
270	}
271	return (0);
272}
273
274void
275pfr_fill_feedback(struct pfr_kentry_all *ke, struct pfr_addr *ad)
276{
277	ad->pfra_type = ke->pfrke_type;
278
279	switch (ke->pfrke_type) {
280	case PFRKE_PLAIN:
281		break;
282	case PFRKE_COST:
283		((struct pfr_kentry_cost *)ke)->weight = ad->pfra_weight;
284		/* FALLTHROUGH */
285	case PFRKE_ROUTE:
286		if (ke->pfrke_rifname[0])
287			strlcpy(ad->pfra_ifname, ke->pfrke_rifname, IFNAMSIZ);
288		break;
289	}
290
291	switch (ke->pfrke_af) {
292	case AF_INET:
293		ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr;
294		break;
295#ifdef	INET6
296	case AF_INET6:
297		ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr;
298		break;
299#endif	/* INET6 */
300	default:
301		unhandled_af(ke->pfrke_af);
302	}
303	ad->pfra_weight = ((struct pfr_kentry_cost *)ke)->weight;
304	ad->pfra_af = ke->pfrke_af;
305	ad->pfra_net = ke->pfrke_net;
306	if (ke->pfrke_flags & PFRKE_FLAG_NOT)
307		ad->pfra_not = 1;
308	ad->pfra_fback = ke->pfrke_fb;
309}
310
311int
312pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
313    int *nadd, int flags)
314{
315	struct pfr_ktable	*kt, *tmpkt;
316	struct pfr_kentryworkq	 workq, ioq;
317	struct pfr_kentry	*p, *q, *ke;
318	struct pfr_addr		 ad;
319	int			 i, rv, xadd = 0;
320	time_t			 tzero = gettime();
321
322	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
323	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
324		return (EINVAL);
325	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0,
326	    (flags & PFR_FLAG_USERIOCTL? PR_WAITOK : PR_NOWAIT));
327	if (tmpkt == NULL)
328		return (ENOMEM);
329	SLIST_INIT(&workq);
330	SLIST_INIT(&ioq);
331	for (i = 0; i < size; i++) {
332		YIELD(flags & PFR_FLAG_USERIOCTL);
333		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
334			senderr(EFAULT);
335		if (pfr_validate_addr(&ad))
336			senderr(EINVAL);
337
338		ke = pfr_create_kentry_unlocked(&ad, flags);
339		if (ke == NULL)
340			senderr(ENOMEM);
341		ke->pfrke_fb = PFR_FB_NONE;
342		SLIST_INSERT_HEAD(&ioq, ke, pfrke_ioq);
343	}
344
345	NET_LOCK();
346	PF_LOCK();
347	kt = pfr_lookup_table(tbl);
348	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
349		PF_UNLOCK();
350		NET_UNLOCK();
351		senderr(ESRCH);
352	}
353	if (kt->pfrkt_flags & PFR_TFLAG_CONST) {
354		PF_UNLOCK();
355		NET_UNLOCK();
356		senderr(EPERM);
357	}
358	SLIST_FOREACH(ke, &ioq, pfrke_ioq) {
359		pfr_kentry_kif_ref(ke);
360		p = pfr_lookup_kentry(kt, ke, 1);
361		q = pfr_lookup_kentry(tmpkt, ke, 1);
362		if (flags & PFR_FLAG_FEEDBACK) {
363			if (q != NULL)
364				ke->pfrke_fb = PFR_FB_DUPLICATE;
365			else if (p == NULL)
366				ke->pfrke_fb = PFR_FB_ADDED;
367			else if ((p->pfrke_flags & PFRKE_FLAG_NOT) !=
368			    (ke->pfrke_flags & PFRKE_FLAG_NOT))
369				ke->pfrke_fb = PFR_FB_CONFLICT;
370			else
371				ke->pfrke_fb = PFR_FB_NONE;
372		}
373		if (p == NULL && q == NULL) {
374			if (pfr_route_kentry(tmpkt, ke)) {
375				/* defer destroy after feedback is processed */
376				ke->pfrke_fb = PFR_FB_NONE;
377			} else {
378				/*
379				 * mark entry as added to table, so we won't
380				 * kill it with rest of the ioq
381				 */
382				ke->pfrke_fb = PFR_FB_ADDED;
383				SLIST_INSERT_HEAD(&workq, ke, pfrke_workq);
384				xadd++;
385			}
386		}
387	}
388	/* remove entries, which we will insert from tmpkt */
389	pfr_clean_node_mask(tmpkt, &workq);
390	if (!(flags & PFR_FLAG_DUMMY))
391		pfr_insert_kentries(kt, &workq, tzero);
392
393	PF_UNLOCK();
394	NET_UNLOCK();
395
396	if (flags & PFR_FLAG_FEEDBACK) {
397		i = 0;
398		while ((ke = SLIST_FIRST(&ioq)) != NULL) {
399			YIELD(flags & PFR_FLAG_USERIOCTL);
400			pfr_fill_feedback((struct pfr_kentry_all *)ke, &ad);
401			if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
402				senderr(EFAULT);
403			i++;
404			SLIST_REMOVE_HEAD(&ioq, pfrke_ioq);
405			switch (ke->pfrke_fb) {
406			case PFR_FB_CONFLICT:
407			case PFR_FB_DUPLICATE:
408			case PFR_FB_NONE:
409				pfr_destroy_kentry(ke);
410				break;
411			case PFR_FB_ADDED:
412				if (flags & PFR_FLAG_DUMMY)
413					pfr_destroy_kentry(ke);
414			}
415		}
416	} else
417		pfr_destroy_ioq(&ioq, flags);
418
419	if (nadd != NULL)
420		*nadd = xadd;
421
422	pfr_destroy_ktable(tmpkt, 0);
423	return (0);
424_bad:
425	pfr_destroy_ioq(&ioq, flags);
426	if (flags & PFR_FLAG_FEEDBACK)
427		pfr_reset_feedback(addr, size, flags);
428	pfr_destroy_ktable(tmpkt, 0);
429	return (rv);
430}
431
432int
433pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
434    int *ndel, int flags)
435{
436	struct pfr_ktable	*kt;
437	struct pfr_kentryworkq	 workq;
438	struct pfr_kentry	*p;
439	struct pfr_addr		 ad;
440	int			 i, rv, xdel = 0, log = 1;
441
442	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
443	if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL))
444		return (EINVAL);
445	kt = pfr_lookup_table(tbl);
446	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
447		return (ESRCH);
448	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
449		return (EPERM);
450	/*
451	 * there are two algorithms to choose from here.
452	 * with:
453	 *   n: number of addresses to delete
454	 *   N: number of addresses in the table
455	 *
456	 * one is O(N) and is better for large 'n'
457	 * one is O(n*LOG(N)) and is better for small 'n'
458	 *
459	 * following code try to decide which one is best.
460	 */
461	for (i = kt->pfrkt_cnt; i > 0; i >>= 1)
462		log++;
463	if (size > kt->pfrkt_cnt/log) {
464		/* full table scan */
465		pfr_mark_addrs(kt);
466	} else {
467		/* iterate over addresses to delete */
468		for (i = 0; i < size; i++) {
469			YIELD(flags & PFR_FLAG_USERIOCTL);
470			if (COPYIN(addr+i, &ad, sizeof(ad), flags))
471				return (EFAULT);
472			if (pfr_validate_addr(&ad))
473				return (EINVAL);
474			p = pfr_lookup_addr(kt, &ad, 1);
475			if (p != NULL)
476				p->pfrke_flags &= ~PFRKE_FLAG_MARK;
477		}
478	}
479	SLIST_INIT(&workq);
480	for (i = 0; i < size; i++) {
481		YIELD(flags & PFR_FLAG_USERIOCTL);
482		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
483			senderr(EFAULT);
484		if (pfr_validate_addr(&ad))
485			senderr(EINVAL);
486		p = pfr_lookup_addr(kt, &ad, 1);
487		if (flags & PFR_FLAG_FEEDBACK) {
488			if (p == NULL)
489				ad.pfra_fback = PFR_FB_NONE;
490			else if ((p->pfrke_flags & PFRKE_FLAG_NOT) !=
491			    ad.pfra_not)
492				ad.pfra_fback = PFR_FB_CONFLICT;
493			else if (p->pfrke_flags & PFRKE_FLAG_MARK)
494				ad.pfra_fback = PFR_FB_DUPLICATE;
495			else
496				ad.pfra_fback = PFR_FB_DELETED;
497		}
498		if (p != NULL &&
499		    (p->pfrke_flags & PFRKE_FLAG_NOT) == ad.pfra_not &&
500		    !(p->pfrke_flags & PFRKE_FLAG_MARK)) {
501			p->pfrke_flags |= PFRKE_FLAG_MARK;
502			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
503			xdel++;
504		}
505		if (flags & PFR_FLAG_FEEDBACK)
506			if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
507				senderr(EFAULT);
508	}
509	if (!(flags & PFR_FLAG_DUMMY)) {
510		pfr_remove_kentries(kt, &workq);
511	}
512	if (ndel != NULL)
513		*ndel = xdel;
514	return (0);
515_bad:
516	if (flags & PFR_FLAG_FEEDBACK)
517		pfr_reset_feedback(addr, size, flags);
518	return (rv);
519}
520
521int
522pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
523    int *size2, int *nadd, int *ndel, int *nchange, int flags,
524    u_int32_t ignore_pfrt_flags)
525{
526	struct pfr_ktable	*kt, *tmpkt;
527	struct pfr_kentryworkq	 addq, delq, changeq;
528	struct pfr_kentry	*p, *q;
529	struct pfr_addr		 ad;
530	int			 i, rv, xadd = 0, xdel = 0, xchange = 0;
531	time_t			 tzero = gettime();
532
533	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
534	if (pfr_validate_table(tbl, ignore_pfrt_flags, flags &
535	    PFR_FLAG_USERIOCTL))
536		return (EINVAL);
537	kt = pfr_lookup_table(tbl);
538	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
539		return (ESRCH);
540	if (kt->pfrkt_flags & PFR_TFLAG_CONST)
541		return (EPERM);
542	tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0,
543	    (flags & PFR_FLAG_USERIOCTL? PR_WAITOK : PR_NOWAIT));
544	if (tmpkt == NULL)
545		return (ENOMEM);
546	pfr_mark_addrs(kt);
547	SLIST_INIT(&addq);
548	SLIST_INIT(&delq);
549	SLIST_INIT(&changeq);
550	for (i = 0; i < size; i++) {
551		YIELD(flags & PFR_FLAG_USERIOCTL);
552		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
553			senderr(EFAULT);
554		if (pfr_validate_addr(&ad))
555			senderr(EINVAL);
556		ad.pfra_fback = PFR_FB_NONE;
557		p = pfr_lookup_addr(kt, &ad, 1);
558		if (p != NULL) {
559			if (p->pfrke_flags & PFRKE_FLAG_MARK) {
560				ad.pfra_fback = PFR_FB_DUPLICATE;
561				goto _skip;
562			}
563			p->pfrke_flags |= PFRKE_FLAG_MARK;
564			if ((p->pfrke_flags & PFRKE_FLAG_NOT) != ad.pfra_not) {
565				SLIST_INSERT_HEAD(&changeq, p, pfrke_workq);
566				ad.pfra_fback = PFR_FB_CHANGED;
567				xchange++;
568			}
569		} else {
570			q = pfr_lookup_addr(tmpkt, &ad, 1);
571			if (q != NULL) {
572				ad.pfra_fback = PFR_FB_DUPLICATE;
573				goto _skip;
574			}
575			p = pfr_create_kentry(&ad);
576			if (p == NULL)
577				senderr(ENOMEM);
578			if (pfr_route_kentry(tmpkt, p)) {
579				pfr_destroy_kentry(p);
580				ad.pfra_fback = PFR_FB_NONE;
581				goto _skip;
582			}
583			SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
584			ad.pfra_fback = PFR_FB_ADDED;
585			xadd++;
586			if (p->pfrke_type == PFRKE_COST)
587				kt->pfrkt_refcntcost++;
588			pfr_ktable_winfo_update(kt, p);
589		}
590_skip:
591		if (flags & PFR_FLAG_FEEDBACK)
592			if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
593				senderr(EFAULT);
594	}
595	pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY);
596	if ((flags & PFR_FLAG_FEEDBACK) && *size2) {
597		if (*size2 < size+xdel) {
598			*size2 = size+xdel;
599			senderr(0);
600		}
601		i = 0;
602		SLIST_FOREACH(p, &delq, pfrke_workq) {
603			pfr_copyout_addr(&ad, p);
604			ad.pfra_fback = PFR_FB_DELETED;
605			if (COPYOUT(&ad, addr+size+i, sizeof(ad), flags))
606				senderr(EFAULT);
607			i++;
608		}
609	}
610	pfr_clean_node_mask(tmpkt, &addq);
611	if (!(flags & PFR_FLAG_DUMMY)) {
612		pfr_insert_kentries(kt, &addq, tzero);
613		pfr_remove_kentries(kt, &delq);
614		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
615	} else
616		pfr_destroy_kentries(&addq);
617	if (nadd != NULL)
618		*nadd = xadd;
619	if (ndel != NULL)
620		*ndel = xdel;
621	if (nchange != NULL)
622		*nchange = xchange;
623	if ((flags & PFR_FLAG_FEEDBACK) && size2)
624		*size2 = size+xdel;
625	pfr_destroy_ktable(tmpkt, 0);
626	return (0);
627_bad:
628	pfr_clean_node_mask(tmpkt, &addq);
629	pfr_destroy_kentries(&addq);
630	if (flags & PFR_FLAG_FEEDBACK)
631		pfr_reset_feedback(addr, size, flags);
632	pfr_destroy_ktable(tmpkt, 0);
633	return (rv);
634}
635
636int
637pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size,
638	int *nmatch, int flags)
639{
640	struct pfr_ktable	*kt;
641	struct pfr_kentry	*p;
642	struct pfr_addr		 ad;
643	int			 i, xmatch = 0;
644
645	ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE);
646	if (pfr_validate_table(tbl, 0, 0))
647		return (EINVAL);
648	kt = pfr_lookup_table(tbl);
649	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
650		return (ESRCH);
651
652	for (i = 0; i < size; i++) {
653		YIELD(flags & PFR_FLAG_USERIOCTL);
654		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
655			return (EFAULT);
656		if (pfr_validate_addr(&ad))
657			return (EINVAL);
658		if (ADDR_NETWORK(&ad))
659			return (EINVAL);
660		p = pfr_lookup_addr(kt, &ad, 0);
661		if (flags & PFR_FLAG_REPLACE)
662			pfr_copyout_addr(&ad, p);
663		ad.pfra_fback = (p == NULL) ? PFR_FB_NONE :
664		    ((p->pfrke_flags & PFRKE_FLAG_NOT) ?
665		    PFR_FB_NOTMATCH : PFR_FB_MATCH);
666		if (p != NULL && !(p->pfrke_flags & PFRKE_FLAG_NOT))
667			xmatch++;
668		if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
669			return (EFAULT);
670	}
671	if (nmatch != NULL)
672		*nmatch = xmatch;
673	return (0);
674}
675
676int
677pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size,
678	int flags)
679{
680	struct pfr_ktable	*kt;
681	struct pfr_walktree	 w;
682	int			 rv;
683
684	ACCEPT_FLAGS(flags, 0);
685	if (pfr_validate_table(tbl, 0, 0))
686		return (EINVAL);
687	kt = pfr_lookup_table(tbl);
688	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
689		return (ESRCH);
690	if (kt->pfrkt_cnt > *size) {
691		*size = kt->pfrkt_cnt;
692		return (0);
693	}
694
695	bzero(&w, sizeof(w));
696	w.pfrw_op = PFRW_GET_ADDRS;
697	w.pfrw_addr = addr;
698	w.pfrw_free = kt->pfrkt_cnt;
699	w.pfrw_flags = flags;
700	rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
701	if (!rv)
702		rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
703	if (rv)
704		return (rv);
705
706	if (w.pfrw_free) {
707		DPFPRINTF(LOG_ERR,
708		    "pfr_get_addrs: corruption detected (%d)", w.pfrw_free);
709		return (ENOTTY);
710	}
711	*size = kt->pfrkt_cnt;
712	return (0);
713}
714
715int
716pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size,
717	int flags)
718{
719	struct pfr_ktable	*kt;
720	struct pfr_walktree	 w;
721	struct pfr_kentryworkq	 workq;
722	int			 rv;
723	time_t			 tzero = gettime();
724
725	if (pfr_validate_table(tbl, 0, 0))
726		return (EINVAL);
727	kt = pfr_lookup_table(tbl);
728	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
729		return (ESRCH);
730	if (kt->pfrkt_cnt > *size) {
731		*size = kt->pfrkt_cnt;
732		return (0);
733	}
734
735	bzero(&w, sizeof(w));
736	w.pfrw_op = PFRW_GET_ASTATS;
737	w.pfrw_astats = addr;
738	w.pfrw_free = kt->pfrkt_cnt;
739	w.pfrw_flags = flags;
740	rv = rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
741	if (!rv)
742		rv = rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
743	if (!rv && (flags & PFR_FLAG_CLSTATS)) {
744		pfr_enqueue_addrs(kt, &workq, NULL, 0);
745		pfr_clstats_kentries(&workq, tzero, 0);
746	}
747	if (rv)
748		return (rv);
749
750	if (w.pfrw_free) {
751		DPFPRINTF(LOG_ERR,
752		    "pfr_get_astats: corruption detected (%d)", w.pfrw_free);
753		return (ENOTTY);
754	}
755	*size = kt->pfrkt_cnt;
756	return (0);
757}
758
759int
760pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size,
761    int *nzero, int flags)
762{
763	struct pfr_ktable	*kt;
764	struct pfr_kentryworkq	 workq;
765	struct pfr_kentry	*p;
766	struct pfr_addr		 ad;
767	int			 i, rv, xzero = 0;
768
769	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_FEEDBACK);
770	if (pfr_validate_table(tbl, 0, 0))
771		return (EINVAL);
772	kt = pfr_lookup_table(tbl);
773	if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
774		return (ESRCH);
775	SLIST_INIT(&workq);
776	for (i = 0; i < size; i++) {
777		YIELD(flags & PFR_FLAG_USERIOCTL);
778		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
779			senderr(EFAULT);
780		if (pfr_validate_addr(&ad))
781			senderr(EINVAL);
782		p = pfr_lookup_addr(kt, &ad, 1);
783		if (flags & PFR_FLAG_FEEDBACK) {
784			ad.pfra_fback = (p != NULL) ?
785			    PFR_FB_CLEARED : PFR_FB_NONE;
786			if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
787				senderr(EFAULT);
788		}
789		if (p != NULL) {
790			SLIST_INSERT_HEAD(&workq, p, pfrke_workq);
791			xzero++;
792		}
793	}
794
795	if (!(flags & PFR_FLAG_DUMMY)) {
796		pfr_clstats_kentries(&workq, gettime(), 0);
797	}
798	if (nzero != NULL)
799		*nzero = xzero;
800	return (0);
801_bad:
802	if (flags & PFR_FLAG_FEEDBACK)
803		pfr_reset_feedback(addr, size, flags);
804	return (rv);
805}
806
807int
808pfr_validate_addr(struct pfr_addr *ad)
809{
810	int i;
811
812	switch (ad->pfra_af) {
813	case AF_INET:
814		if (ad->pfra_net > 32)
815			return (-1);
816		break;
817#ifdef INET6
818	case AF_INET6:
819		if (ad->pfra_net > 128)
820			return (-1);
821		break;
822#endif /* INET6 */
823	default:
824		return (-1);
825	}
826	if (ad->pfra_net < 128 &&
827		(((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8))))
828			return (-1);
829	for (i = (ad->pfra_net+7)/8; i < sizeof(ad->pfra_u); i++)
830		if (((caddr_t)ad)[i])
831			return (-1);
832	if (ad->pfra_not && ad->pfra_not != 1)
833		return (-1);
834	if (ad->pfra_fback != PFR_FB_NONE)
835		return (-1);
836	if (ad->pfra_type >= PFRKE_MAX)
837		return (-1);
838	return (0);
839}
840
841void
842pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq,
843	int *naddr, int sweep)
844{
845	struct pfr_walktree	w;
846
847	SLIST_INIT(workq);
848	bzero(&w, sizeof(w));
849	w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE;
850	w.pfrw_workq = workq;
851	if (kt->pfrkt_ip4 != NULL)
852		if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w))
853			DPFPRINTF(LOG_ERR,
854			    "pfr_enqueue_addrs: IPv4 walktree failed.");
855	if (kt->pfrkt_ip6 != NULL)
856		if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w))
857			DPFPRINTF(LOG_ERR,
858			    "pfr_enqueue_addrs: IPv6 walktree failed.");
859	if (naddr != NULL)
860		*naddr = w.pfrw_cnt;
861}
862
863void
864pfr_mark_addrs(struct pfr_ktable *kt)
865{
866	struct pfr_walktree	w;
867
868	bzero(&w, sizeof(w));
869	w.pfrw_op = PFRW_MARK;
870	if (rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w))
871		DPFPRINTF(LOG_ERR,
872		    "pfr_mark_addrs: IPv4 walktree failed.");
873	if (rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w))
874		DPFPRINTF(LOG_ERR,
875		    "pfr_mark_addrs: IPv6 walktree failed.");
876}
877
878
879struct pfr_kentry *
880pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
881{
882	union sockaddr_union	 sa, mask;
883	struct radix_node_head	*head;
884	struct pfr_kentry	*ke;
885
886	bzero(&sa, sizeof(sa));
887	switch (ad->pfra_af) {
888	case AF_INET:
889		FILLIN_SIN(sa.sin, ad->pfra_ip4addr);
890		head = kt->pfrkt_ip4;
891		break;
892#ifdef	INET6
893	case AF_INET6:
894		FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr);
895		head = kt->pfrkt_ip6;
896		break;
897#endif	/* INET6 */
898	default:
899		unhandled_af(ad->pfra_af);
900	}
901	if (ADDR_NETWORK(ad)) {
902		pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net);
903		ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head);
904	} else {
905		ke = (struct pfr_kentry *)rn_match(&sa, head);
906		if (exact && ke && KENTRY_NETWORK(ke))
907			ke = NULL;
908	}
909	return (ke);
910}
911
912struct pfr_kentry *
913pfr_lookup_kentry(struct pfr_ktable *kt, struct pfr_kentry *key, int exact)
914{
915	union sockaddr_union	 mask;
916	struct radix_node_head	*head;
917	struct pfr_kentry	*ke;
918
919	switch (key->pfrke_af) {
920	case AF_INET:
921		head = kt->pfrkt_ip4;
922		break;
923#ifdef	INET6
924	case AF_INET6:
925		head = kt->pfrkt_ip6;
926		break;
927#endif	/* INET6 */
928	default:
929		unhandled_af(key->pfrke_af);
930	}
931	if (KENTRY_NETWORK(key)) {
932		pfr_prepare_network(&mask, key->pfrke_af, key->pfrke_net);
933		ke = (struct pfr_kentry *)rn_lookup(&key->pfrke_sa, &mask,
934		    head);
935	} else {
936		ke = (struct pfr_kentry *)rn_match(&key->pfrke_sa, head);
937		if (exact && ke && KENTRY_NETWORK(ke))
938			ke = NULL;
939	}
940	return (ke);
941}
942
943struct pfr_kentry *
944pfr_create_kentry(struct pfr_addr *ad)
945{
946	struct pfr_kentry_all	*ke;
947
948	if (ad->pfra_type >= PFRKE_MAX)
949		panic("unknown pfra_type %d", ad->pfra_type);
950
951	ke = pool_get(&pfr_kentry_pl[ad->pfra_type], PR_NOWAIT | PR_ZERO);
952	if (ke == NULL)
953		return (NULL);
954
955	ke->pfrke_type = ad->pfra_type;
956
957	/* set weight allowing implicit weights */
958	if (ad->pfra_weight == 0)
959		ad->pfra_weight = 1;
960
961	switch (ke->pfrke_type) {
962	case PFRKE_PLAIN:
963		break;
964	case PFRKE_COST:
965		((struct pfr_kentry_cost *)ke)->weight = ad->pfra_weight;
966		/* FALLTHROUGH */
967	case PFRKE_ROUTE:
968		if (ad->pfra_ifname[0])
969			ke->pfrke_rkif = pfi_kif_get(ad->pfra_ifname, NULL);
970		if (ke->pfrke_rkif)
971			pfi_kif_ref(ke->pfrke_rkif, PFI_KIF_REF_ROUTE);
972		break;
973	}
974
975	switch (ad->pfra_af) {
976	case AF_INET:
977		FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr);
978		break;
979#ifdef	INET6
980	case AF_INET6:
981		FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr);
982		break;
983#endif	/* INET6 */
984	default:
985		unhandled_af(ad->pfra_af);
986	}
987	ke->pfrke_af = ad->pfra_af;
988	ke->pfrke_net = ad->pfra_net;
989	if (ad->pfra_not)
990		ke->pfrke_flags |= PFRKE_FLAG_NOT;
991	return ((struct pfr_kentry *)ke);
992}
993
994struct pfr_kentry *
995pfr_create_kentry_unlocked(struct pfr_addr *ad, int flags)
996{
997	struct pfr_kentry_all	*ke;
998	int mflags = PR_ZERO;
999
1000	if (ad->pfra_type >= PFRKE_MAX)
1001		panic("unknown pfra_type %d", ad->pfra_type);
1002
1003	if (flags & PFR_FLAG_USERIOCTL)
1004		mflags |= PR_WAITOK;
1005	else
1006		mflags |= PR_NOWAIT;
1007
1008	ke = pool_get(&pfr_kentry_pl[ad->pfra_type], mflags);
1009	if (ke == NULL)
1010		return (NULL);
1011
1012	ke->pfrke_type = ad->pfra_type;
1013
1014	/* set weight allowing implicit weights */
1015	if (ad->pfra_weight == 0)
1016		ad->pfra_weight = 1;
1017
1018	switch (ke->pfrke_type) {
1019	case PFRKE_PLAIN:
1020		break;
1021	case PFRKE_COST:
1022		((struct pfr_kentry_cost *)ke)->weight = ad->pfra_weight;
1023		/* FALLTHROUGH */
1024	case PFRKE_ROUTE:
1025		if (ad->pfra_ifname[0])
1026			(void) strlcpy(ke->pfrke_rifname, ad->pfra_ifname,
1027			    IFNAMSIZ);
1028		break;
1029	}
1030
1031	switch (ad->pfra_af) {
1032	case AF_INET:
1033		FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr);
1034		break;
1035#ifdef	INET6
1036	case AF_INET6:
1037		FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr);
1038		break;
1039#endif	/* INET6 */
1040	default:
1041		unhandled_af(ad->pfra_af);
1042	}
1043	ke->pfrke_af = ad->pfra_af;
1044	ke->pfrke_net = ad->pfra_net;
1045	if (ad->pfra_not)
1046		ke->pfrke_flags |= PFRKE_FLAG_NOT;
1047	return ((struct pfr_kentry *)ke);
1048}
1049
1050void
1051pfr_kentry_kif_ref(struct pfr_kentry *ke_all)
1052{
1053	struct pfr_kentry_all	*ke = (struct pfr_kentry_all *)ke_all;
1054
1055	NET_ASSERT_LOCKED();
1056	switch (ke->pfrke_type) {
1057	case PFRKE_PLAIN:
1058		break;
1059	case PFRKE_COST:
1060	case PFRKE_ROUTE:
1061		if (ke->pfrke_rifname[0])
1062			ke->pfrke_rkif = pfi_kif_get(ke->pfrke_rifname, NULL);
1063		if (ke->pfrke_rkif)
1064			pfi_kif_ref(ke->pfrke_rkif, PFI_KIF_REF_ROUTE);
1065		break;
1066	}
1067}
1068
1069void
1070pfr_destroy_kentries(struct pfr_kentryworkq *workq)
1071{
1072	struct pfr_kentry	*p;
1073
1074	while ((p = SLIST_FIRST(workq)) != NULL) {
1075		YIELD(1);
1076		SLIST_REMOVE_HEAD(workq, pfrke_workq);
1077		pfr_destroy_kentry(p);
1078	}
1079}
1080
1081void
1082pfr_destroy_ioq(struct pfr_kentryworkq *ioq, int flags)
1083{
1084	struct pfr_kentry	*p;
1085
1086	while ((p = SLIST_FIRST(ioq)) != NULL) {
1087		YIELD(flags & PFR_FLAG_USERIOCTL);
1088		SLIST_REMOVE_HEAD(ioq, pfrke_ioq);
1089		/*
1090		 * we destroy only those entries, which did not make it to
1091		 * table
1092		 */
1093		if ((p->pfrke_fb != PFR_FB_ADDED) || (flags & PFR_FLAG_DUMMY))
1094			pfr_destroy_kentry(p);
1095	}
1096}
1097
1098void
1099pfr_destroy_kentry(struct pfr_kentry *ke)
1100{
1101	if (ke->pfrke_counters)
1102		pool_put(&pfr_kcounters_pl, ke->pfrke_counters);
1103	if (ke->pfrke_type == PFRKE_COST || ke->pfrke_type == PFRKE_ROUTE)
1104		pfi_kif_unref(((struct pfr_kentry_all *)ke)->pfrke_rkif,
1105		    PFI_KIF_REF_ROUTE);
1106	pool_put(&pfr_kentry_pl[ke->pfrke_type], ke);
1107}
1108
1109void
1110pfr_insert_kentries(struct pfr_ktable *kt,
1111    struct pfr_kentryworkq *workq, time_t tzero)
1112{
1113	struct pfr_kentry	*p;
1114	int			 rv, n = 0;
1115
1116	SLIST_FOREACH(p, workq, pfrke_workq) {
1117		rv = pfr_route_kentry(kt, p);
1118		if (rv) {
1119			DPFPRINTF(LOG_ERR,
1120			    "pfr_insert_kentries: cannot route entry "
1121			    "(code=%d).", rv);
1122			break;
1123		}
1124		p->pfrke_tzero = tzero;
1125		++n;
1126		if (p->pfrke_type == PFRKE_COST)
1127			kt->pfrkt_refcntcost++;
1128		pfr_ktable_winfo_update(kt, p);
1129		YIELD(1);
1130	}
1131	kt->pfrkt_cnt += n;
1132}
1133
1134int
1135pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, time_t tzero)
1136{
1137	struct pfr_kentry	*p;
1138	int			 rv;
1139
1140	p = pfr_lookup_addr(kt, ad, 1);
1141	if (p != NULL)
1142		return (0);
1143	p = pfr_create_kentry(ad);
1144	if (p == NULL)
1145		return (EINVAL);
1146
1147	rv = pfr_route_kentry(kt, p);
1148	if (rv)
1149		return (rv);
1150
1151	p->pfrke_tzero = tzero;
1152	if (p->pfrke_type == PFRKE_COST)
1153		kt->pfrkt_refcntcost++;
1154	kt->pfrkt_cnt++;
1155	pfr_ktable_winfo_update(kt, p);
1156
1157	return (0);
1158}
1159
1160void
1161pfr_remove_kentries(struct pfr_ktable *kt,
1162    struct pfr_kentryworkq *workq)
1163{
1164	struct pfr_kentry	*p;
1165	struct pfr_kentryworkq   addrq;
1166	int			 n = 0;
1167
1168	SLIST_FOREACH(p, workq, pfrke_workq) {
1169		pfr_unroute_kentry(kt, p);
1170		++n;
1171		YIELD(1);
1172		if (p->pfrke_type == PFRKE_COST)
1173			kt->pfrkt_refcntcost--;
1174	}
1175	kt->pfrkt_cnt -= n;
1176	pfr_destroy_kentries(workq);
1177
1178	/* update maxweight and gcd for load balancing */
1179	if (kt->pfrkt_refcntcost > 0) {
1180		kt->pfrkt_gcdweight = 0;
1181		kt->pfrkt_maxweight = 1;
1182		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
1183		SLIST_FOREACH(p, &addrq, pfrke_workq)
1184			pfr_ktable_winfo_update(kt, p);
1185	}
1186}
1187
1188void
1189pfr_clean_node_mask(struct pfr_ktable *kt,
1190    struct pfr_kentryworkq *workq)
1191{
1192	struct pfr_kentry	*p;
1193
1194	SLIST_FOREACH(p, workq, pfrke_workq) {
1195		pfr_unroute_kentry(kt, p);
1196	}
1197}
1198
1199void
1200pfr_clstats_kentries(struct pfr_kentryworkq *workq, time_t tzero, int negchange)
1201{
1202	struct pfr_kentry	*p;
1203
1204	SLIST_FOREACH(p, workq, pfrke_workq) {
1205		if (negchange)
1206			p->pfrke_flags ^= PFRKE_FLAG_NOT;
1207		if (p->pfrke_counters) {
1208			pool_put(&pfr_kcounters_pl, p->pfrke_counters);
1209			p->pfrke_counters = NULL;
1210		}
1211		p->pfrke_tzero = tzero;
1212	}
1213}
1214
1215void
1216pfr_reset_feedback(struct pfr_addr *addr, int size, int flags)
1217{
1218	struct pfr_addr	ad;
1219	int		i;
1220
1221	for (i = 0; i < size; i++) {
1222		YIELD(flags & PFR_FLAG_USERIOCTL);
1223		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
1224			break;
1225		ad.pfra_fback = PFR_FB_NONE;
1226		if (COPYOUT(&ad, addr+i, sizeof(ad), flags))
1227			break;
1228	}
1229}
1230
1231void
1232pfr_prepare_network(union sockaddr_union *sa, int af, int net)
1233{
1234#ifdef	INET6
1235	int	i;
1236#endif	/* INET6 */
1237
1238	bzero(sa, sizeof(*sa));
1239	switch (af) {
1240	case AF_INET:
1241		sa->sin.sin_len = sizeof(sa->sin);
1242		sa->sin.sin_family = AF_INET;
1243		sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0;
1244		break;
1245#ifdef	INET6
1246	case AF_INET6:
1247		sa->sin6.sin6_len = sizeof(sa->sin6);
1248		sa->sin6.sin6_family = AF_INET6;
1249		for (i = 0; i < 4; i++) {
1250			if (net <= 32) {
1251				sa->sin6.sin6_addr.s6_addr32[i] =
1252				    net ? htonl(-1 << (32-net)) : 0;
1253				break;
1254			}
1255			sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF;
1256			net -= 32;
1257		}
1258		break;
1259#endif	/* INET6 */
1260	default:
1261		unhandled_af(af);
1262	}
1263}
1264
1265int
1266pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
1267{
1268	union sockaddr_union	 mask;
1269	struct radix_node	*rn;
1270	struct radix_node_head	*head;
1271
1272	bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
1273	switch (ke->pfrke_af) {
1274	case AF_INET:
1275		head = kt->pfrkt_ip4;
1276		break;
1277#ifdef	INET6
1278	case AF_INET6:
1279		head = kt->pfrkt_ip6;
1280		break;
1281#endif	/* INET6 */
1282	default:
1283		unhandled_af(ke->pfrke_af);
1284	}
1285
1286	if (KENTRY_NETWORK(ke)) {
1287		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
1288		rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node, 0);
1289	} else
1290		rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node, 0);
1291
1292	return (rn == NULL ? -1 : 0);
1293}
1294
1295int
1296pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
1297{
1298	union sockaddr_union	 mask;
1299	struct radix_node	*rn;
1300	struct radix_node_head	*head;
1301
1302	switch (ke->pfrke_af) {
1303	case AF_INET:
1304		head = kt->pfrkt_ip4;
1305		break;
1306#ifdef	INET6
1307	case AF_INET6:
1308		head = kt->pfrkt_ip6;
1309		break;
1310#endif	/* INET6 */
1311	default:
1312		unhandled_af(ke->pfrke_af);
1313	}
1314
1315	if (KENTRY_NETWORK(ke)) {
1316		pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
1317		rn = rn_delete(&ke->pfrke_sa, &mask, head, NULL);
1318	} else
1319		rn = rn_delete(&ke->pfrke_sa, NULL, head, NULL);
1320
1321	if (rn == NULL) {
1322		DPFPRINTF(LOG_ERR, "pfr_unroute_kentry: delete failed.\n");
1323		return (-1);
1324	}
1325	return (0);
1326}
1327
1328void
1329pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke)
1330{
1331	bzero(ad, sizeof(*ad));
1332	if (ke == NULL)
1333		return;
1334	ad->pfra_af = ke->pfrke_af;
1335	ad->pfra_net = ke->pfrke_net;
1336	ad->pfra_type = ke->pfrke_type;
1337	if (ke->pfrke_flags & PFRKE_FLAG_NOT)
1338		ad->pfra_not = 1;
1339
1340	switch (ad->pfra_af) {
1341	case AF_INET:
1342		ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr;
1343		break;
1344#ifdef	INET6
1345	case AF_INET6:
1346		ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr;
1347		break;
1348#endif	/* INET6 */
1349	default:
1350		unhandled_af(ad->pfra_af);
1351	}
1352	if (ke->pfrke_counters != NULL)
1353		ad->pfra_states = ke->pfrke_counters->states;
1354	switch (ke->pfrke_type) {
1355	case PFRKE_COST:
1356		ad->pfra_weight = ((struct pfr_kentry_cost *)ke)->weight;
1357		/* FALLTHROUGH */
1358	case PFRKE_ROUTE:
1359		if (((struct pfr_kentry_route *)ke)->kif != NULL)
1360			strlcpy(ad->pfra_ifname,
1361			    ((struct pfr_kentry_route *)ke)->kif->pfik_name,
1362			    IFNAMSIZ);
1363		break;
1364	default:
1365		break;
1366	}
1367}
1368
1369int
1370pfr_walktree(struct radix_node *rn, void *arg, u_int id)
1371{
1372	struct pfr_kentry	*ke = (struct pfr_kentry *)rn;
1373	struct pfr_walktree	*w = arg;
1374	union sockaddr_union	 mask;
1375	int			 flags = w->pfrw_flags;
1376
1377	switch (w->pfrw_op) {
1378	case PFRW_MARK:
1379		ke->pfrke_flags &= ~PFRKE_FLAG_MARK;
1380		break;
1381	case PFRW_SWEEP:
1382		if (ke->pfrke_flags & PFRKE_FLAG_MARK)
1383			break;
1384		/* FALLTHROUGH */
1385	case PFRW_ENQUEUE:
1386		SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq);
1387		w->pfrw_cnt++;
1388		break;
1389	case PFRW_GET_ADDRS:
1390		if (w->pfrw_free-- > 0) {
1391			struct pfr_addr ad;
1392
1393			pfr_copyout_addr(&ad, ke);
1394			if (copyout(&ad, w->pfrw_addr, sizeof(ad)))
1395				return (EFAULT);
1396			w->pfrw_addr++;
1397		}
1398		break;
1399	case PFRW_GET_ASTATS:
1400		if (w->pfrw_free-- > 0) {
1401			struct pfr_astats as;
1402
1403			pfr_copyout_addr(&as.pfras_a, ke);
1404
1405			if (ke->pfrke_counters) {
1406				bcopy(ke->pfrke_counters->pfrkc_packets,
1407				    as.pfras_packets, sizeof(as.pfras_packets));
1408				bcopy(ke->pfrke_counters->pfrkc_bytes,
1409				    as.pfras_bytes, sizeof(as.pfras_bytes));
1410			} else {
1411				bzero(as.pfras_packets,
1412				    sizeof(as.pfras_packets));
1413				bzero(as.pfras_bytes, sizeof(as.pfras_bytes));
1414				as.pfras_a.pfra_fback = PFR_FB_NOCOUNT;
1415			}
1416			as.pfras_tzero = ke->pfrke_tzero;
1417
1418			if (COPYOUT(&as, w->pfrw_astats, sizeof(as), flags))
1419				return (EFAULT);
1420			w->pfrw_astats++;
1421		}
1422		break;
1423	case PFRW_POOL_GET:
1424		if (ke->pfrke_flags & PFRKE_FLAG_NOT)
1425			break; /* negative entries are ignored */
1426		if (!w->pfrw_cnt--) {
1427			w->pfrw_kentry = ke;
1428			return (1); /* finish search */
1429		}
1430		break;
1431	case PFRW_DYNADDR_UPDATE:
1432		switch (ke->pfrke_af) {
1433		case AF_INET:
1434			if (w->pfrw_dyn->pfid_acnt4++ > 0)
1435				break;
1436			pfr_prepare_network(&mask, AF_INET, ke->pfrke_net);
1437			w->pfrw_dyn->pfid_addr4 = *SUNION2PF(
1438			    &ke->pfrke_sa, AF_INET);
1439			w->pfrw_dyn->pfid_mask4 = *SUNION2PF(
1440			    &mask, AF_INET);
1441			break;
1442#ifdef	INET6
1443		case AF_INET6:
1444			if (w->pfrw_dyn->pfid_acnt6++ > 0)
1445				break;
1446			pfr_prepare_network(&mask, AF_INET6, ke->pfrke_net);
1447			w->pfrw_dyn->pfid_addr6 = *SUNION2PF(
1448			    &ke->pfrke_sa, AF_INET6);
1449			w->pfrw_dyn->pfid_mask6 = *SUNION2PF(
1450			    &mask, AF_INET6);
1451			break;
1452#endif	/* INET6 */
1453		default:
1454			unhandled_af(ke->pfrke_af);
1455		}
1456		break;
1457	}
1458	return (0);
1459}
1460
1461int
1462pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags)
1463{
1464	struct pfr_ktableworkq	 workq;
1465	struct pfr_ktable	*p;
1466	int			 xdel = 0;
1467
1468	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ALLRSETS);
1469	if (pfr_fix_anchor(filter->pfrt_anchor))
1470		return (EINVAL);
1471	if (pfr_table_count(filter, flags) < 0)
1472		return (ENOENT);
1473
1474	SLIST_INIT(&workq);
1475	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
1476		if (pfr_skip_table(filter, p, flags))
1477			continue;
1478		if (!strcmp(p->pfrkt_anchor, PF_RESERVED_ANCHOR))
1479			continue;
1480		if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE))
1481			continue;
1482		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
1483		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1484		xdel++;
1485	}
1486	if (!(flags & PFR_FLAG_DUMMY)) {
1487		pfr_setflags_ktables(&workq);
1488	}
1489	if (ndel != NULL)
1490		*ndel = xdel;
1491	return (0);
1492}
1493
1494int
1495pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags)
1496{
1497	struct pfr_ktableworkq	 addq, changeq, auxq;
1498	struct pfr_ktable	*p, *q, *r, *n, *w, key;
1499	int			 i, rv, xadd = 0;
1500	time_t			 tzero = gettime();
1501
1502	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
1503	SLIST_INIT(&addq);
1504	SLIST_INIT(&changeq);
1505	SLIST_INIT(&auxq);
1506	/* pre-allocate all memory outside of locks */
1507	for (i = 0; i < size; i++) {
1508		YIELD(flags & PFR_FLAG_USERIOCTL);
1509		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
1510			senderr(EFAULT);
1511		if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK,
1512		    flags & PFR_FLAG_USERIOCTL))
1513			senderr(EINVAL);
1514		key.pfrkt_flags |= PFR_TFLAG_ACTIVE;
1515		p = pfr_create_ktable(&key.pfrkt_t, tzero, 0,
1516		    (flags & PFR_FLAG_USERIOCTL? PR_WAITOK : PR_NOWAIT));
1517		if (p == NULL)
1518			senderr(ENOMEM);
1519
1520		/*
1521		 * Note: we also pre-allocate a root table here. We keep it
1522		 * at ->pfrkt_root, which we must not forget about.
1523		 */
1524		key.pfrkt_flags = 0;
1525		memset(key.pfrkt_anchor, 0, sizeof(key.pfrkt_anchor));
1526		p->pfrkt_root = pfr_create_ktable(&key.pfrkt_t, 0, 0,
1527		    (flags & PFR_FLAG_USERIOCTL? PR_WAITOK : PR_NOWAIT));
1528		if (p->pfrkt_root == NULL) {
1529			pfr_destroy_ktable(p, 0);
1530			senderr(ENOMEM);
1531		}
1532
1533		SLIST_FOREACH(q, &auxq, pfrkt_workq) {
1534			if (!pfr_ktable_compare(p, q)) {
1535				/*
1536				 * We need no lock here, because `p` is empty,
1537				 * there are no rules or shadow tables
1538				 * attached.
1539				 */
1540				pfr_destroy_ktable(p->pfrkt_root, 0);
1541				p->pfrkt_root = NULL;
1542				pfr_destroy_ktable(p, 0);
1543				p = NULL;
1544				break;
1545			}
1546		}
1547		if (q != NULL)
1548			continue;
1549
1550		SLIST_INSERT_HEAD(&auxq, p, pfrkt_workq);
1551	}
1552
1553	/*
1554	 * auxq contains freshly allocated tables with no dups.
1555	 * also note there are no rulesets attached, because
1556	 * the attach operation requires PF_LOCK().
1557	 */
1558	NET_LOCK();
1559	PF_LOCK();
1560	SLIST_FOREACH_SAFE(n, &auxq, pfrkt_workq, w) {
1561		p = RB_FIND(pfr_ktablehead, &pfr_ktables, n);
1562		if (p == NULL) {
1563			SLIST_REMOVE(&auxq, n, pfr_ktable, pfrkt_workq);
1564			SLIST_INSERT_HEAD(&addq, n, pfrkt_workq);
1565			xadd++;
1566		} else if (!(flags & PFR_FLAG_DUMMY) &&
1567		    !(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
1568			p->pfrkt_nflags =
1569			    (p->pfrkt_flags & ~PFR_TFLAG_USRMASK) |
1570			    (n->pfrkt_flags & PFR_TFLAG_USRMASK) |
1571			    PFR_TFLAG_ACTIVE;
1572			SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq);
1573		}
1574	}
1575
1576	if (!(flags & PFR_FLAG_DUMMY)) {
1577		/*
1578		 * addq contains tables we have to insert and attach rules to
1579		 * them
1580		 *
1581		 * changeq contains tables we need to update
1582		 *
1583		 * auxq contains pre-allocated tables, we won't use and we must
1584		 * free them
1585		 */
1586		SLIST_FOREACH_SAFE(p, &addq, pfrkt_workq, w) {
1587			p->pfrkt_rs = pf_find_or_create_ruleset(
1588			    p->pfrkt_anchor);
1589			if (p->pfrkt_rs == NULL) {
1590				xadd--;
1591				SLIST_REMOVE(&addq, p, pfr_ktable, pfrkt_workq);
1592				SLIST_INSERT_HEAD(&auxq, p, pfrkt_workq);
1593				continue;
1594			}
1595			p->pfrkt_rs->tables++;
1596
1597			if (!p->pfrkt_anchor[0]) {
1598				q = p->pfrkt_root;
1599				p->pfrkt_root = NULL;
1600				SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq);
1601				continue;
1602			}
1603
1604			/* use pre-allocated root table as a key */
1605			q = p->pfrkt_root;
1606			p->pfrkt_root = NULL;
1607			r = RB_FIND(pfr_ktablehead, &pfr_ktables, q);
1608			if (r != NULL) {
1609				p->pfrkt_root = r;
1610				SLIST_INSERT_HEAD(&auxq, q, pfrkt_workq);
1611				continue;
1612			}
1613			/*
1614			 * there is a chance we could create root table in
1615			 * earlier iteration. such table may exist in addq only
1616			 * then.
1617			 */
1618			SLIST_FOREACH(r, &addq, pfrkt_workq) {
1619				if (!pfr_ktable_compare(r, q)) {
1620					/*
1621					 * `r` is our root table we've found
1622					 * earlier, `q` can get dropped.
1623					 */
1624					p->pfrkt_root = r;
1625					SLIST_INSERT_HEAD(&auxq, q,
1626					    pfrkt_workq);
1627					break;
1628				}
1629			}
1630			if (r != NULL)
1631				continue;
1632
1633			q->pfrkt_rs = pf_find_or_create_ruleset(q->pfrkt_anchor);
1634			/*
1635			 * root tables are attached to main ruleset,
1636			 * because ->pfrkt_anchor[0] == '\0'
1637			 */
1638			KASSERT(q->pfrkt_rs == &pf_main_ruleset);
1639			q->pfrkt_rs->tables++;
1640			p->pfrkt_root = q;
1641			SLIST_INSERT_HEAD(&addq, q, pfrkt_workq);
1642		}
1643
1644		pfr_insert_ktables(&addq);
1645		pfr_setflags_ktables(&changeq);
1646	}
1647	PF_UNLOCK();
1648	NET_UNLOCK();
1649
1650	pfr_destroy_ktables_aux(&auxq);
1651	if (flags & PFR_FLAG_DUMMY)
1652		pfr_destroy_ktables_aux(&addq);
1653
1654	if (nadd != NULL)
1655		*nadd = xadd;
1656	return (0);
1657_bad:
1658	pfr_destroy_ktables_aux(&auxq);
1659	return (rv);
1660}
1661
1662int
1663pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags)
1664{
1665	struct pfr_ktableworkq	 workq;
1666	struct pfr_ktable	*p, *q, key;
1667	int			 i, xdel = 0;
1668
1669	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
1670	SLIST_INIT(&workq);
1671	for (i = 0; i < size; i++) {
1672		YIELD(flags & PFR_FLAG_USERIOCTL);
1673		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
1674			return (EFAULT);
1675		if (pfr_validate_table(&key.pfrkt_t, 0,
1676		    flags & PFR_FLAG_USERIOCTL))
1677			return (EINVAL);
1678		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
1679		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
1680			SLIST_FOREACH(q, &workq, pfrkt_workq)
1681				if (!pfr_ktable_compare(p, q))
1682					goto _skip;
1683			p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE;
1684			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1685			xdel++;
1686		}
1687_skip:
1688	;
1689	}
1690
1691	if (!(flags & PFR_FLAG_DUMMY)) {
1692		pfr_setflags_ktables(&workq);
1693	}
1694	if (ndel != NULL)
1695		*ndel = xdel;
1696	return (0);
1697}
1698
1699int
1700pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size,
1701	int flags)
1702{
1703	struct pfr_ktable	*p;
1704	int			 n, nn;
1705
1706	ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
1707	if (pfr_fix_anchor(filter->pfrt_anchor))
1708		return (EINVAL);
1709	n = nn = pfr_table_count(filter, flags);
1710	if (n < 0)
1711		return (ENOENT);
1712	if (n > *size) {
1713		*size = n;
1714		return (0);
1715	}
1716	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
1717		if (pfr_skip_table(filter, p, flags))
1718			continue;
1719		if (n-- <= 0)
1720			continue;
1721		if (COPYOUT(&p->pfrkt_t, tbl++, sizeof(*tbl), flags))
1722			return (EFAULT);
1723	}
1724	if (n) {
1725		DPFPRINTF(LOG_ERR,
1726		    "pfr_get_tables: corruption detected (%d).", n);
1727		return (ENOTTY);
1728	}
1729	*size = nn;
1730	return (0);
1731}
1732
1733int
1734pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
1735	int flags)
1736{
1737	struct pfr_ktable	*p;
1738	struct pfr_ktableworkq	 workq;
1739	int			 n, nn;
1740	time_t			 tzero = gettime();
1741
1742	/* XXX PFR_FLAG_CLSTATS disabled */
1743	ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
1744	if (pfr_fix_anchor(filter->pfrt_anchor))
1745		return (EINVAL);
1746	n = nn = pfr_table_count(filter, flags);
1747	if (n < 0)
1748		return (ENOENT);
1749	if (n > *size) {
1750		*size = n;
1751		return (0);
1752	}
1753	SLIST_INIT(&workq);
1754	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
1755		if (pfr_skip_table(filter, p, flags))
1756			continue;
1757		if (n-- <= 0)
1758			continue;
1759		if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl), flags))
1760			return (EFAULT);
1761		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1762	}
1763	if (flags & PFR_FLAG_CLSTATS)
1764		pfr_clstats_ktables(&workq, tzero,
1765		    flags & PFR_FLAG_ADDRSTOO);
1766	if (n) {
1767		DPFPRINTF(LOG_ERR,
1768		    "pfr_get_tstats: corruption detected (%d).", n);
1769		return (ENOTTY);
1770	}
1771	*size = nn;
1772	return (0);
1773}
1774
1775int
1776pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags)
1777{
1778	struct pfr_ktableworkq	 workq;
1779	struct pfr_ktable	*p, key;
1780	int			 i, xzero = 0;
1781	time_t			 tzero = gettime();
1782
1783	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
1784	SLIST_INIT(&workq);
1785	for (i = 0; i < size; i++) {
1786		YIELD(flags & PFR_FLAG_USERIOCTL);
1787		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
1788			return (EFAULT);
1789		if (pfr_validate_table(&key.pfrkt_t, 0, 0))
1790			return (EINVAL);
1791		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
1792		if (p != NULL) {
1793			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1794			xzero++;
1795		}
1796	}
1797	if (!(flags & PFR_FLAG_DUMMY)) {
1798		pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO);
1799	}
1800	if (nzero != NULL)
1801		*nzero = xzero;
1802	return (0);
1803}
1804
1805int
1806pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag,
1807	int *nchange, int *ndel, int flags)
1808{
1809	struct pfr_ktableworkq	 workq;
1810	struct pfr_ktable	*p, *q, key;
1811	int			 i, xchange = 0, xdel = 0;
1812
1813	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
1814	if ((setflag & ~PFR_TFLAG_USRMASK) ||
1815	    (clrflag & ~PFR_TFLAG_USRMASK) ||
1816	    (setflag & clrflag))
1817		return (EINVAL);
1818	SLIST_INIT(&workq);
1819	for (i = 0; i < size; i++) {
1820		YIELD(flags & PFR_FLAG_USERIOCTL);
1821		if (COPYIN(tbl+i, &key.pfrkt_t, sizeof(key.pfrkt_t), flags))
1822			return (EFAULT);
1823		if (pfr_validate_table(&key.pfrkt_t, 0,
1824		    flags & PFR_FLAG_USERIOCTL))
1825			return (EINVAL);
1826		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
1827		if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) {
1828			p->pfrkt_nflags = (p->pfrkt_flags | setflag) &
1829			    ~clrflag;
1830			if (p->pfrkt_nflags == p->pfrkt_flags)
1831				goto _skip;
1832			SLIST_FOREACH(q, &workq, pfrkt_workq)
1833				if (!pfr_ktable_compare(p, q))
1834					goto _skip;
1835			SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1836			if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) &&
1837			    (clrflag & PFR_TFLAG_PERSIST) &&
1838			    !(p->pfrkt_flags & PFR_TFLAG_REFERENCED))
1839				xdel++;
1840			else
1841				xchange++;
1842		}
1843_skip:
1844	;
1845	}
1846	if (!(flags & PFR_FLAG_DUMMY)) {
1847		pfr_setflags_ktables(&workq);
1848	}
1849	if (nchange != NULL)
1850		*nchange = xchange;
1851	if (ndel != NULL)
1852		*ndel = xdel;
1853	return (0);
1854}
1855
1856int
1857pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags)
1858{
1859	struct pfr_ktableworkq	 workq;
1860	struct pfr_ktable	*p;
1861	struct pf_ruleset	*rs;
1862	int			 xdel = 0;
1863
1864	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
1865	rs = pf_find_or_create_ruleset(trs->pfrt_anchor);
1866	if (rs == NULL)
1867		return (ENOMEM);
1868	SLIST_INIT(&workq);
1869	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
1870		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
1871		    pfr_skip_table(trs, p, 0))
1872			continue;
1873		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
1874		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
1875		xdel++;
1876	}
1877	if (!(flags & PFR_FLAG_DUMMY)) {
1878		pfr_setflags_ktables(&workq);
1879		if (ticket != NULL)
1880			*ticket = ++rs->tticket;
1881		rs->topen = 1;
1882	} else
1883		pf_remove_if_empty_ruleset(rs);
1884	if (ndel != NULL)
1885		*ndel = xdel;
1886	return (0);
1887}
1888
1889int
1890pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size,
1891    int *nadd, int *naddr, u_int32_t ticket, int flags)
1892{
1893	struct pfr_ktableworkq	 tableq;
1894	struct pfr_kentryworkq	 addrq;
1895	struct pfr_ktable	*kt, *rt, *shadow, key;
1896	struct pfr_kentry	*p;
1897	struct pfr_addr		 ad;
1898	struct pf_ruleset	*rs;
1899	int			 i, rv, xadd = 0, xaddr = 0;
1900
1901	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO);
1902	if (size && !(flags & PFR_FLAG_ADDRSTOO))
1903		return (EINVAL);
1904	if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK,
1905	    flags & PFR_FLAG_USERIOCTL))
1906		return (EINVAL);
1907	rs = pf_find_ruleset(tbl->pfrt_anchor);
1908	if (rs == NULL || !rs->topen || ticket != rs->tticket)
1909		return (EBUSY);
1910	tbl->pfrt_flags |= PFR_TFLAG_INACTIVE;
1911	SLIST_INIT(&tableq);
1912	kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl);
1913	if (kt == NULL) {
1914		kt = pfr_create_ktable(tbl, 0, 1,
1915		    (flags & PFR_FLAG_USERIOCTL? PR_WAITOK : PR_NOWAIT));
1916		if (kt == NULL)
1917			return (ENOMEM);
1918		SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq);
1919		xadd++;
1920		if (!tbl->pfrt_anchor[0])
1921			goto _skip;
1922
1923		/* find or create root table */
1924		bzero(&key, sizeof(key));
1925		strlcpy(key.pfrkt_name, tbl->pfrt_name, sizeof(key.pfrkt_name));
1926		rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
1927		if (rt != NULL) {
1928			kt->pfrkt_root = rt;
1929			goto _skip;
1930		}
1931		rt = pfr_create_ktable(&key.pfrkt_t, 0, 1,
1932		    (flags & PFR_FLAG_USERIOCTL? PR_WAITOK : PR_NOWAIT));
1933		if (rt == NULL) {
1934			pfr_destroy_ktables(&tableq, 0);
1935			return (ENOMEM);
1936		}
1937		SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq);
1938		kt->pfrkt_root = rt;
1939	} else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE))
1940		xadd++;
1941_skip:
1942	shadow = pfr_create_ktable(tbl, 0, 0,
1943	    (flags & PFR_FLAG_USERIOCTL? PR_WAITOK : PR_NOWAIT));
1944	if (shadow == NULL) {
1945		pfr_destroy_ktables(&tableq, 0);
1946		return (ENOMEM);
1947	}
1948	SLIST_INIT(&addrq);
1949	for (i = 0; i < size; i++) {
1950		YIELD(flags & PFR_FLAG_USERIOCTL);
1951		if (COPYIN(addr+i, &ad, sizeof(ad), flags))
1952			senderr(EFAULT);
1953		if (pfr_validate_addr(&ad))
1954			senderr(EINVAL);
1955		if (pfr_lookup_addr(shadow, &ad, 1) != NULL)
1956			continue;
1957		p = pfr_create_kentry(&ad);
1958		if (p == NULL)
1959			senderr(ENOMEM);
1960		if (pfr_route_kentry(shadow, p)) {
1961			pfr_destroy_kentry(p);
1962			continue;
1963		}
1964		SLIST_INSERT_HEAD(&addrq, p, pfrke_workq);
1965		xaddr++;
1966		if (p->pfrke_type == PFRKE_COST)
1967			kt->pfrkt_refcntcost++;
1968		pfr_ktable_winfo_update(kt, p);
1969	}
1970	if (!(flags & PFR_FLAG_DUMMY)) {
1971		if (kt->pfrkt_shadow != NULL)
1972			pfr_destroy_ktable(kt->pfrkt_shadow, 1);
1973		kt->pfrkt_flags |= PFR_TFLAG_INACTIVE;
1974		pfr_insert_ktables(&tableq);
1975		shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ?
1976		    xaddr : NO_ADDRESSES;
1977		kt->pfrkt_shadow = shadow;
1978	} else {
1979		pfr_clean_node_mask(shadow, &addrq);
1980		pfr_destroy_ktable(shadow, 0);
1981		pfr_destroy_ktables(&tableq, 0);
1982		pfr_destroy_kentries(&addrq);
1983	}
1984	if (nadd != NULL)
1985		*nadd = xadd;
1986	if (naddr != NULL)
1987		*naddr = xaddr;
1988	return (0);
1989_bad:
1990	pfr_destroy_ktable(shadow, 0);
1991	pfr_destroy_ktables(&tableq, 0);
1992	pfr_destroy_kentries(&addrq);
1993	return (rv);
1994}
1995
1996int
1997pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags)
1998{
1999	struct pfr_ktableworkq	 workq;
2000	struct pfr_ktable	*p;
2001	struct pf_ruleset	*rs;
2002	int			 xdel = 0;
2003
2004	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
2005	rs = pf_find_ruleset(trs->pfrt_anchor);
2006	if (rs == NULL || !rs->topen || ticket != rs->tticket)
2007		return (0);
2008	SLIST_INIT(&workq);
2009	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
2010		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
2011		    pfr_skip_table(trs, p, 0))
2012			continue;
2013		p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE;
2014		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
2015		xdel++;
2016	}
2017	if (!(flags & PFR_FLAG_DUMMY)) {
2018		pfr_setflags_ktables(&workq);
2019		rs->topen = 0;
2020		pf_remove_if_empty_ruleset(rs);
2021	}
2022	if (ndel != NULL)
2023		*ndel = xdel;
2024	return (0);
2025}
2026
2027int
2028pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd,
2029    int *nchange, int flags)
2030{
2031	struct pfr_ktable	*p, *q;
2032	struct pfr_ktableworkq	 workq;
2033	struct pf_ruleset	*rs;
2034	int			 xadd = 0, xchange = 0;
2035	time_t			 tzero = gettime();
2036
2037	ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY);
2038	rs = pf_find_ruleset(trs->pfrt_anchor);
2039	if (rs == NULL || !rs->topen || ticket != rs->tticket)
2040		return (EBUSY);
2041
2042	SLIST_INIT(&workq);
2043	RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) {
2044		if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) ||
2045		    pfr_skip_table(trs, p, 0))
2046			continue;
2047		SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
2048		if (p->pfrkt_flags & PFR_TFLAG_ACTIVE)
2049			xchange++;
2050		else
2051			xadd++;
2052	}
2053
2054	if (!(flags & PFR_FLAG_DUMMY)) {
2055		SLIST_FOREACH_SAFE(p, &workq, pfrkt_workq, q) {
2056			pfr_commit_ktable(p, tzero);
2057		}
2058		rs->topen = 0;
2059		pf_remove_if_empty_ruleset(rs);
2060	}
2061	if (nadd != NULL)
2062		*nadd = xadd;
2063	if (nchange != NULL)
2064		*nchange = xchange;
2065
2066	return (0);
2067}
2068
2069void
2070pfr_commit_ktable(struct pfr_ktable *kt, time_t tzero)
2071{
2072	struct pfr_ktable	*shadow = kt->pfrkt_shadow;
2073	int			 nflags;
2074
2075	if (shadow->pfrkt_cnt == NO_ADDRESSES) {
2076		if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
2077			pfr_clstats_ktable(kt, tzero, 1);
2078	} else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) {
2079		/* kt might contain addresses */
2080		struct pfr_kentryworkq	 addrq, addq, changeq, delq, garbageq;
2081		struct pfr_kentry	*p, *q;
2082		struct pfr_addr		 ad;
2083
2084		pfr_enqueue_addrs(shadow, &addrq, NULL, 0);
2085		pfr_mark_addrs(kt);
2086		SLIST_INIT(&addq);
2087		SLIST_INIT(&changeq);
2088		SLIST_INIT(&delq);
2089		SLIST_INIT(&garbageq);
2090		pfr_clean_node_mask(shadow, &addrq);
2091		while ((p = SLIST_FIRST(&addrq)) != NULL) {
2092			SLIST_REMOVE_HEAD(&addrq, pfrke_workq);
2093			pfr_copyout_addr(&ad, p);
2094			q = pfr_lookup_addr(kt, &ad, 1);
2095			if (q != NULL) {
2096				if ((q->pfrke_flags & PFRKE_FLAG_NOT) !=
2097				    (p->pfrke_flags & PFRKE_FLAG_NOT))
2098					SLIST_INSERT_HEAD(&changeq, q,
2099					    pfrke_workq);
2100				q->pfrke_flags |= PFRKE_FLAG_MARK;
2101				SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq);
2102			} else {
2103				p->pfrke_tzero = tzero;
2104				SLIST_INSERT_HEAD(&addq, p, pfrke_workq);
2105			}
2106		}
2107		pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY);
2108		pfr_insert_kentries(kt, &addq, tzero);
2109		pfr_remove_kentries(kt, &delq);
2110		pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG);
2111		pfr_destroy_kentries(&garbageq);
2112	} else {
2113		/* kt cannot contain addresses */
2114		SWAP(struct radix_node_head *, kt->pfrkt_ip4,
2115		    shadow->pfrkt_ip4);
2116		SWAP(struct radix_node_head *, kt->pfrkt_ip6,
2117		    shadow->pfrkt_ip6);
2118		SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt);
2119		pfr_clstats_ktable(kt, tzero, 1);
2120	}
2121	nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) |
2122	    (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE)
2123		& ~PFR_TFLAG_INACTIVE;
2124	pfr_destroy_ktable(shadow, 0);
2125	kt->pfrkt_shadow = NULL;
2126	pfr_setflags_ktable(kt, nflags);
2127}
2128
2129int
2130pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved)
2131{
2132	int i;
2133
2134	if (!tbl->pfrt_name[0])
2135		return (-1);
2136	if (no_reserved && !strcmp(tbl->pfrt_anchor, PF_RESERVED_ANCHOR))
2137		 return (-1);
2138	if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1])
2139		return (-1);
2140	for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++)
2141		if (tbl->pfrt_name[i])
2142			return (-1);
2143	if (pfr_fix_anchor(tbl->pfrt_anchor))
2144		return (-1);
2145	if (tbl->pfrt_flags & ~allowedflags)
2146		return (-1);
2147	return (0);
2148}
2149
2150/*
2151 * Rewrite anchors referenced by tables to remove slashes
2152 * and check for validity.
2153 */
2154int
2155pfr_fix_anchor(char *anchor)
2156{
2157	size_t siz = MAXPATHLEN;
2158	int i;
2159
2160	if (anchor[0] == '/') {
2161		char *path;
2162		int off;
2163
2164		path = anchor;
2165		off = 1;
2166		while (*++path == '/')
2167			off++;
2168		bcopy(path, anchor, siz - off);
2169		memset(anchor + siz - off, 0, off);
2170	}
2171	if (anchor[siz - 1])
2172		return (-1);
2173	for (i = strlen(anchor); i < siz; i++)
2174		if (anchor[i])
2175			return (-1);
2176	return (0);
2177}
2178
2179int
2180pfr_table_count(struct pfr_table *filter, int flags)
2181{
2182	struct pf_ruleset *rs;
2183
2184	if (flags & PFR_FLAG_ALLRSETS)
2185		return (pfr_ktable_cnt);
2186	if (filter->pfrt_anchor[0]) {
2187		rs = pf_find_ruleset(filter->pfrt_anchor);
2188		return ((rs != NULL) ? rs->tables : -1);
2189	}
2190	return (pf_main_ruleset.tables);
2191}
2192
2193int
2194pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags)
2195{
2196	if (flags & PFR_FLAG_ALLRSETS)
2197		return (0);
2198	if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor))
2199		return (1);
2200	return (0);
2201}
2202
2203void
2204pfr_insert_ktables(struct pfr_ktableworkq *workq)
2205{
2206	struct pfr_ktable	*p;
2207
2208	SLIST_FOREACH(p, workq, pfrkt_workq)
2209		pfr_insert_ktable(p);
2210}
2211
2212void
2213pfr_insert_ktable(struct pfr_ktable *kt)
2214{
2215	RB_INSERT(pfr_ktablehead, &pfr_ktables, kt);
2216	pfr_ktable_cnt++;
2217	if (kt->pfrkt_root != NULL)
2218		if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++)
2219			pfr_setflags_ktable(kt->pfrkt_root,
2220			    kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR);
2221}
2222
2223void
2224pfr_setflags_ktables(struct pfr_ktableworkq *workq)
2225{
2226	struct pfr_ktable	*p, *q;
2227
2228	SLIST_FOREACH_SAFE(p, workq, pfrkt_workq, q) {
2229		pfr_setflags_ktable(p, p->pfrkt_nflags);
2230	}
2231}
2232
2233void
2234pfr_setflags_ktable(struct pfr_ktable *kt, int newf)
2235{
2236	struct pfr_kentryworkq	addrq;
2237
2238	if (!(newf & PFR_TFLAG_REFERENCED) &&
2239	    !(newf & PFR_TFLAG_REFDANCHOR) &&
2240	    !(newf & PFR_TFLAG_PERSIST))
2241		newf &= ~PFR_TFLAG_ACTIVE;
2242	if (!(newf & PFR_TFLAG_ACTIVE))
2243		newf &= ~PFR_TFLAG_USRMASK;
2244	if (!(newf & PFR_TFLAG_SETMASK)) {
2245		RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt);
2246		if (kt->pfrkt_root != NULL)
2247			if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR])
2248				pfr_setflags_ktable(kt->pfrkt_root,
2249				    kt->pfrkt_root->pfrkt_flags &
2250					~PFR_TFLAG_REFDANCHOR);
2251		pfr_destroy_ktable(kt, 1);
2252		pfr_ktable_cnt--;
2253		return;
2254	}
2255	if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) {
2256		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
2257		pfr_remove_kentries(kt, &addrq);
2258	}
2259	if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) {
2260		pfr_destroy_ktable(kt->pfrkt_shadow, 1);
2261		kt->pfrkt_shadow = NULL;
2262	}
2263	kt->pfrkt_flags = newf;
2264}
2265
2266void
2267pfr_clstats_ktables(struct pfr_ktableworkq *workq, time_t tzero, int recurse)
2268{
2269	struct pfr_ktable	*p;
2270
2271	SLIST_FOREACH(p, workq, pfrkt_workq)
2272		pfr_clstats_ktable(p, tzero, recurse);
2273}
2274
2275void
2276pfr_clstats_ktable(struct pfr_ktable *kt, time_t tzero, int recurse)
2277{
2278	struct pfr_kentryworkq	 addrq;
2279
2280	if (recurse) {
2281		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
2282		pfr_clstats_kentries(&addrq, tzero, 0);
2283	}
2284	bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets));
2285	bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes));
2286	kt->pfrkt_match = kt->pfrkt_nomatch = 0;
2287	kt->pfrkt_tzero = tzero;
2288}
2289
2290struct pfr_ktable *
2291pfr_create_ktable(struct pfr_table *tbl, time_t tzero, int attachruleset,
2292    int wait)
2293{
2294	struct pfr_ktable	*kt;
2295	struct pf_ruleset	*rs;
2296
2297	kt = pool_get(&pfr_ktable_pl, wait|PR_ZERO|PR_LIMITFAIL);
2298	if (kt == NULL)
2299		return (NULL);
2300	kt->pfrkt_t = *tbl;
2301
2302	if (attachruleset) {
2303		PF_ASSERT_LOCKED();
2304		rs = pf_find_or_create_ruleset(tbl->pfrt_anchor);
2305		if (!rs) {
2306			pfr_destroy_ktable(kt, 0);
2307			return (NULL);
2308		}
2309		kt->pfrkt_rs = rs;
2310		rs->tables++;
2311	}
2312
2313	if (!rn_inithead((void **)&kt->pfrkt_ip4,
2314	    offsetof(struct sockaddr_in, sin_addr)) ||
2315	    !rn_inithead((void **)&kt->pfrkt_ip6,
2316	    offsetof(struct sockaddr_in6, sin6_addr))) {
2317		pfr_destroy_ktable(kt, 0);
2318		return (NULL);
2319	}
2320	kt->pfrkt_tzero = tzero;
2321	kt->pfrkt_refcntcost = 0;
2322	kt->pfrkt_gcdweight = 0;
2323	kt->pfrkt_maxweight = 1;
2324
2325	return (kt);
2326}
2327
2328void
2329pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr)
2330{
2331	struct pfr_ktable	*p;
2332
2333	while ((p = SLIST_FIRST(workq)) != NULL) {
2334		SLIST_REMOVE_HEAD(workq, pfrkt_workq);
2335		pfr_destroy_ktable(p, flushaddr);
2336	}
2337}
2338
2339void
2340pfr_destroy_ktables_aux(struct pfr_ktableworkq *auxq)
2341{
2342	struct pfr_ktable	*p;
2343
2344	while ((p = SLIST_FIRST(auxq)) != NULL) {
2345		SLIST_REMOVE_HEAD(auxq, pfrkt_workq);
2346		/*
2347		 * There must be no extra data (rules, shadow tables, ...)
2348		 * attached, because auxq holds just empty memory to be
2349		 * initialized. Therefore we can also be called with no lock.
2350		 */
2351		if (p->pfrkt_root != NULL) {
2352			KASSERT(p->pfrkt_root->pfrkt_rs == NULL);
2353			KASSERT(p->pfrkt_root->pfrkt_shadow == NULL);
2354			KASSERT(p->pfrkt_root->pfrkt_root == NULL);
2355			pfr_destroy_ktable(p->pfrkt_root, 0);
2356			p->pfrkt_root = NULL;
2357		}
2358		KASSERT(p->pfrkt_rs == NULL);
2359		KASSERT(p->pfrkt_shadow == NULL);
2360		pfr_destroy_ktable(p, 0);
2361	}
2362}
2363
2364void
2365pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
2366{
2367	struct pfr_kentryworkq	 addrq;
2368
2369	if (flushaddr) {
2370		pfr_enqueue_addrs(kt, &addrq, NULL, 0);
2371		pfr_clean_node_mask(kt, &addrq);
2372		pfr_destroy_kentries(&addrq);
2373	}
2374	if (kt->pfrkt_ip4 != NULL)
2375		free(kt->pfrkt_ip4, M_RTABLE, sizeof(*kt->pfrkt_ip4));
2376	if (kt->pfrkt_ip6 != NULL)
2377		free(kt->pfrkt_ip6, M_RTABLE, sizeof(*kt->pfrkt_ip6));
2378	if (kt->pfrkt_shadow != NULL)
2379		pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr);
2380	if (kt->pfrkt_rs != NULL) {
2381		kt->pfrkt_rs->tables--;
2382		pf_remove_if_empty_ruleset(kt->pfrkt_rs);
2383	}
2384	pool_put(&pfr_ktable_pl, kt);
2385}
2386
2387int
2388pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q)
2389{
2390	int d;
2391
2392	if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE)))
2393		return (d);
2394	return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor));
2395}
2396
2397struct pfr_ktable *
2398pfr_lookup_table(struct pfr_table *tbl)
2399{
2400	/* struct pfr_ktable start like a struct pfr_table */
2401	return (RB_FIND(pfr_ktablehead, &pfr_ktables,
2402	    (struct pfr_ktable *)tbl));
2403}
2404
2405int
2406pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
2407{
2408	struct pfr_kentry	*ke = NULL;
2409	int			 match;
2410
2411	ke = pfr_kentry_byaddr(kt, a, af, 0);
2412
2413	match = (ke && !(ke->pfrke_flags & PFRKE_FLAG_NOT));
2414	if (match)
2415		kt->pfrkt_match++;
2416	else
2417		kt->pfrkt_nomatch++;
2418
2419	return (match);
2420}
2421
2422struct pfr_kentry *
2423pfr_kentry_byaddr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
2424    int exact)
2425{
2426	struct pfr_kentry	*ke = NULL;
2427	struct sockaddr_in	 tmp4;
2428#ifdef INET6
2429	struct sockaddr_in6	 tmp6;
2430#endif /* INET6 */
2431
2432	kt = pfr_ktable_select_active(kt);
2433	if (kt == NULL)
2434		return (0);
2435
2436	switch (af) {
2437	case AF_INET:
2438		bzero(&tmp4, sizeof(tmp4));
2439		tmp4.sin_len = sizeof(tmp4);
2440		tmp4.sin_family = AF_INET;
2441		tmp4.sin_addr.s_addr = a->addr32[0];
2442		ke = (struct pfr_kentry *)rn_match(&tmp4, kt->pfrkt_ip4);
2443		break;
2444#ifdef INET6
2445	case AF_INET6:
2446		bzero(&tmp6, sizeof(tmp6));
2447		tmp6.sin6_len = sizeof(tmp6);
2448		tmp6.sin6_family = AF_INET6;
2449		bcopy(a, &tmp6.sin6_addr, sizeof(tmp6.sin6_addr));
2450		ke = (struct pfr_kentry *)rn_match(&tmp6, kt->pfrkt_ip6);
2451		break;
2452#endif /* INET6 */
2453	default:
2454		unhandled_af(af);
2455	}
2456	if (exact && ke && KENTRY_NETWORK(ke))
2457		ke = NULL;
2458
2459	return (ke);
2460}
2461
2462void
2463pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, struct pf_pdesc *pd,
2464    int op, int notrule)
2465{
2466	struct pfr_kentry	*ke = NULL;
2467	struct sockaddr_in	 tmp4;
2468#ifdef INET6
2469	struct sockaddr_in6	 tmp6;
2470#endif /* INET6 */
2471	sa_family_t		 af = pd->af;
2472	u_int64_t		 len = pd->tot_len;
2473	int			 dir_idx = (pd->dir == PF_OUT);
2474	int			 op_idx;
2475
2476	kt = pfr_ktable_select_active(kt);
2477	if (kt == NULL)
2478		return;
2479
2480	switch (af) {
2481	case AF_INET:
2482		bzero(&tmp4, sizeof(tmp4));
2483		tmp4.sin_len = sizeof(tmp4);
2484		tmp4.sin_family = AF_INET;
2485		tmp4.sin_addr.s_addr = a->addr32[0];
2486		ke = (struct pfr_kentry *)rn_match(&tmp4, kt->pfrkt_ip4);
2487		break;
2488#ifdef INET6
2489	case AF_INET6:
2490		bzero(&tmp6, sizeof(tmp6));
2491		tmp6.sin6_len = sizeof(tmp6);
2492		tmp6.sin6_family = AF_INET6;
2493		bcopy(a, &tmp6.sin6_addr, sizeof(tmp6.sin6_addr));
2494		ke = (struct pfr_kentry *)rn_match(&tmp6, kt->pfrkt_ip6);
2495		break;
2496#endif /* INET6 */
2497	default:
2498		unhandled_af(af);
2499	}
2500
2501	switch (op) {
2502	case PF_PASS:
2503		op_idx = PFR_OP_PASS;
2504		break;
2505	case PF_MATCH:
2506		op_idx = PFR_OP_MATCH;
2507		break;
2508	case PF_DROP:
2509		op_idx = PFR_OP_BLOCK;
2510		break;
2511	default:
2512		panic("unhandled op");
2513	}
2514
2515	if ((ke == NULL || (ke->pfrke_flags & PFRKE_FLAG_NOT)) != notrule) {
2516		if (op_idx != PFR_OP_PASS)
2517			DPFPRINTF(LOG_DEBUG,
2518			    "pfr_update_stats: assertion failed.");
2519		op_idx = PFR_OP_XPASS;
2520	}
2521	kt->pfrkt_packets[dir_idx][op_idx]++;
2522	kt->pfrkt_bytes[dir_idx][op_idx] += len;
2523	if (ke != NULL && op_idx != PFR_OP_XPASS &&
2524	    (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
2525		if (ke->pfrke_counters == NULL)
2526			ke->pfrke_counters = pool_get(&pfr_kcounters_pl,
2527			    PR_NOWAIT | PR_ZERO);
2528		if (ke->pfrke_counters != NULL) {
2529			ke->pfrke_counters->pfrkc_packets[dir_idx][op_idx]++;
2530			ke->pfrke_counters->pfrkc_bytes[dir_idx][op_idx] += len;
2531		}
2532	}
2533}
2534
2535struct pfr_ktable *
2536pfr_attach_table(struct pf_ruleset *rs, char *name, int wait)
2537{
2538	struct pfr_ktable	*kt, *rt;
2539	struct pfr_table	 tbl;
2540	struct pf_anchor	*ac = rs->anchor;
2541
2542	bzero(&tbl, sizeof(tbl));
2543	strlcpy(tbl.pfrt_name, name, sizeof(tbl.pfrt_name));
2544	if (ac != NULL)
2545		strlcpy(tbl.pfrt_anchor, ac->path, sizeof(tbl.pfrt_anchor));
2546	kt = pfr_lookup_table(&tbl);
2547	if (kt == NULL) {
2548		kt = pfr_create_ktable(&tbl, gettime(), 1, wait);
2549		if (kt == NULL)
2550			return (NULL);
2551		if (ac != NULL) {
2552			bzero(tbl.pfrt_anchor, sizeof(tbl.pfrt_anchor));
2553			rt = pfr_lookup_table(&tbl);
2554			if (rt == NULL) {
2555				rt = pfr_create_ktable(&tbl, 0, 1, wait);
2556				if (rt == NULL) {
2557					pfr_destroy_ktable(kt, 0);
2558					return (NULL);
2559				}
2560				pfr_insert_ktable(rt);
2561			}
2562			kt->pfrkt_root = rt;
2563		}
2564		pfr_insert_ktable(kt);
2565	}
2566	if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++)
2567		pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED);
2568	return (kt);
2569}
2570
2571void
2572pfr_detach_table(struct pfr_ktable *kt)
2573{
2574	if (kt->pfrkt_refcnt[PFR_REFCNT_RULE] <= 0)
2575		DPFPRINTF(LOG_NOTICE, "pfr_detach_table: refcount = %d.",
2576		    kt->pfrkt_refcnt[PFR_REFCNT_RULE]);
2577	else if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE])
2578		pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED);
2579}
2580
2581int
2582pfr_islinklocal(sa_family_t af, struct pf_addr *addr)
2583{
2584#ifdef	INET6
2585	if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
2586		return (1);
2587#endif	/* INET6 */
2588	return (0);
2589}
2590
2591int
2592pfr_pool_get(struct pf_pool *rpool, struct pf_addr **raddr,
2593    struct pf_addr **rmask, sa_family_t af)
2594{
2595	struct pfr_ktable	*kt;
2596	struct pfr_kentry	*ke, *ke2;
2597	struct pf_addr		*addr, *counter;
2598	union sockaddr_union	 mask;
2599	struct sockaddr_in	 tmp4;
2600#ifdef INET6
2601	struct sockaddr_in6	 tmp6;
2602#endif
2603	int			 startidx, idx = -1, loop = 0, use_counter = 0;
2604
2605	switch (af) {
2606	case AF_INET:
2607		bzero(&tmp4, sizeof(tmp4));
2608		tmp4.sin_len = sizeof(tmp4);
2609		tmp4.sin_family = AF_INET;
2610		addr = (struct pf_addr *)&tmp4.sin_addr;
2611		break;
2612#ifdef	INET6
2613	case AF_INET6:
2614		bzero(&tmp6, sizeof(tmp6));
2615		tmp6.sin6_len = sizeof(tmp6);
2616		tmp6.sin6_family = AF_INET6;
2617		addr = (struct pf_addr *)&tmp6.sin6_addr;
2618		break;
2619#endif	/* INET6 */
2620	default:
2621		unhandled_af(af);
2622	}
2623
2624	if (rpool->addr.type == PF_ADDR_TABLE)
2625		kt = rpool->addr.p.tbl;
2626	else if (rpool->addr.type == PF_ADDR_DYNIFTL)
2627		kt = rpool->addr.p.dyn->pfid_kt;
2628	else
2629		return (-1);
2630	kt = pfr_ktable_select_active(kt);
2631	if (kt == NULL)
2632		return (-1);
2633
2634	counter = &rpool->counter;
2635	idx = rpool->tblidx;
2636	if (idx < 0 || idx >= kt->pfrkt_cnt)
2637		idx = 0;
2638	else
2639		use_counter = 1;
2640	startidx = idx;
2641
2642 _next_block:
2643	if (loop && startidx == idx) {
2644		kt->pfrkt_nomatch++;
2645		return (1);
2646	}
2647
2648	ke = pfr_kentry_byidx(kt, idx, af);
2649	if (ke == NULL) {
2650		/* we don't have this idx, try looping */
2651		if (loop || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) {
2652			kt->pfrkt_nomatch++;
2653			return (1);
2654		}
2655		idx = 0;
2656		loop++;
2657	}
2658
2659	/* Get current weight for weighted round-robin */
2660	if (idx == 0 && use_counter == 1 && kt->pfrkt_refcntcost > 0) {
2661		rpool->curweight = rpool->curweight - kt->pfrkt_gcdweight;
2662
2663		if (rpool->curweight < 1)
2664			rpool->curweight = kt->pfrkt_maxweight;
2665	}
2666
2667	pfr_prepare_network(&pfr_mask, af, ke->pfrke_net);
2668	*raddr = SUNION2PF(&ke->pfrke_sa, af);
2669	*rmask = SUNION2PF(&pfr_mask, af);
2670
2671	if (use_counter && !PF_AZERO(counter, af)) {
2672		/* is supplied address within block? */
2673		if (!pf_match_addr(0, *raddr, *rmask, counter, af)) {
2674			/* no, go to next block in table */
2675			idx++;
2676			use_counter = 0;
2677			goto _next_block;
2678		}
2679		pf_addrcpy(addr, counter, af);
2680	} else {
2681		/* use first address of block */
2682		pf_addrcpy(addr, *raddr, af);
2683	}
2684
2685	if (!KENTRY_NETWORK(ke)) {
2686		/* this is a single IP address - no possible nested block */
2687		if (rpool->addr.type == PF_ADDR_DYNIFTL &&
2688		    pfr_islinklocal(af, addr)) {
2689			idx++;
2690			goto _next_block;
2691		}
2692		pf_addrcpy(counter, addr, af);
2693		rpool->tblidx = idx;
2694		kt->pfrkt_match++;
2695		rpool->states = 0;
2696		if (ke->pfrke_counters != NULL)
2697			rpool->states = ke->pfrke_counters->states;
2698		switch (ke->pfrke_type) {
2699		case PFRKE_COST:
2700			rpool->weight = ((struct pfr_kentry_cost *)ke)->weight;
2701			/* FALLTHROUGH */
2702		case PFRKE_ROUTE:
2703			rpool->kif = ((struct pfr_kentry_route *)ke)->kif;
2704			break;
2705		default:
2706			rpool->weight = 1;
2707			break;
2708		}
2709		return (0);
2710	}
2711	for (;;) {
2712		/* we don't want to use a nested block */
2713		switch (af) {
2714		case AF_INET:
2715			ke2 = (struct pfr_kentry *)rn_match(&tmp4,
2716			    kt->pfrkt_ip4);
2717			break;
2718#ifdef	INET6
2719		case AF_INET6:
2720			ke2 = (struct pfr_kentry *)rn_match(&tmp6,
2721			    kt->pfrkt_ip6);
2722			break;
2723#endif	/* INET6 */
2724		default:
2725			unhandled_af(af);
2726		}
2727		if (ke2 == ke) {
2728			/* lookup return the same block - perfect */
2729			if (rpool->addr.type == PF_ADDR_DYNIFTL &&
2730			    pfr_islinklocal(af, addr))
2731				goto _next_entry;
2732			pf_addrcpy(counter, addr, af);
2733			rpool->tblidx = idx;
2734			kt->pfrkt_match++;
2735			rpool->states = 0;
2736			if (ke->pfrke_counters != NULL)
2737				rpool->states = ke->pfrke_counters->states;
2738			switch (ke->pfrke_type) {
2739			case PFRKE_COST:
2740				rpool->weight =
2741				    ((struct pfr_kentry_cost *)ke)->weight;
2742				/* FALLTHROUGH */
2743			case PFRKE_ROUTE:
2744				rpool->kif = ((struct pfr_kentry_route *)ke)->kif;
2745				break;
2746			default:
2747				rpool->weight = 1;
2748				break;
2749			}
2750			return (0);
2751		}
2752_next_entry:
2753		/* we need to increase the counter past the nested block */
2754		pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net);
2755		pf_poolmask(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af);
2756		pf_addr_inc(addr, af);
2757		if (!pf_match_addr(0, *raddr, *rmask, addr, af)) {
2758			/* ok, we reached the end of our main block */
2759			/* go to next block in table */
2760			idx++;
2761			use_counter = 0;
2762			goto _next_block;
2763		}
2764	}
2765}
2766
2767struct pfr_kentry *
2768pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af)
2769{
2770	struct pfr_walktree	w;
2771
2772	bzero(&w, sizeof(w));
2773	w.pfrw_op = PFRW_POOL_GET;
2774	w.pfrw_cnt = idx;
2775
2776	switch (af) {
2777	case AF_INET:
2778		rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
2779		return (w.pfrw_kentry);
2780#ifdef INET6
2781	case AF_INET6:
2782		rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
2783		return (w.pfrw_kentry);
2784#endif /* INET6 */
2785	default:
2786		return (NULL);
2787	}
2788}
2789
2790/* Added for load balancing state counter use. */
2791int
2792pfr_states_increase(struct pfr_ktable *kt, struct pf_addr *addr, int af)
2793{
2794	struct pfr_kentry *ke;
2795
2796	ke = pfr_kentry_byaddr(kt, addr, af, 1);
2797	if (ke == NULL)
2798		return (-1);
2799
2800	if (ke->pfrke_counters == NULL)
2801		ke->pfrke_counters = pool_get(&pfr_kcounters_pl,
2802		    PR_NOWAIT | PR_ZERO);
2803	if (ke->pfrke_counters == NULL)
2804		return (-1);
2805
2806	ke->pfrke_counters->states++;
2807	return ke->pfrke_counters->states;
2808}
2809
2810/* Added for load balancing state counter use. */
2811int
2812pfr_states_decrease(struct pfr_ktable *kt, struct pf_addr *addr, int af)
2813{
2814	struct pfr_kentry *ke;
2815
2816	ke = pfr_kentry_byaddr(kt, addr, af, 1);
2817	if (ke == NULL)
2818		return (-1);
2819
2820	if (ke->pfrke_counters == NULL)
2821		ke->pfrke_counters = pool_get(&pfr_kcounters_pl,
2822		    PR_NOWAIT | PR_ZERO);
2823	if (ke->pfrke_counters == NULL)
2824		return (-1);
2825
2826	if (ke->pfrke_counters->states > 0)
2827		ke->pfrke_counters->states--;
2828	else
2829		DPFPRINTF(LOG_DEBUG,
2830		    "pfr_states_decrease: states-- when states <= 0");
2831
2832	return ke->pfrke_counters->states;
2833}
2834
2835void
2836pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn)
2837{
2838	struct pfr_walktree	w;
2839
2840	bzero(&w, sizeof(w));
2841	w.pfrw_op = PFRW_DYNADDR_UPDATE;
2842	w.pfrw_dyn = dyn;
2843
2844	dyn->pfid_acnt4 = 0;
2845	dyn->pfid_acnt6 = 0;
2846	switch (dyn->pfid_af) {
2847	case AF_UNSPEC:	/* look up all both addresses IPv4 + IPv6 */
2848		rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
2849		rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
2850		break;
2851	case AF_INET:
2852		rn_walktree(kt->pfrkt_ip4, pfr_walktree, &w);
2853		break;
2854#ifdef	INET6
2855	case AF_INET6:
2856		rn_walktree(kt->pfrkt_ip6, pfr_walktree, &w);
2857		break;
2858#endif	/* INET6 */
2859	default:
2860		unhandled_af(dyn->pfid_af);
2861	}
2862}
2863
2864void
2865pfr_ktable_winfo_update(struct pfr_ktable *kt, struct pfr_kentry *p) {
2866	/*
2867	 * If cost flag is set,
2868	 * gcdweight is needed for round-robin.
2869	 */
2870	if (kt->pfrkt_refcntcost > 0) {
2871		u_int16_t weight;
2872
2873		weight = (p->pfrke_type == PFRKE_COST) ?
2874		    ((struct pfr_kentry_cost *)p)->weight : 1;
2875
2876		if (kt->pfrkt_gcdweight == 0)
2877			kt->pfrkt_gcdweight = weight;
2878
2879		kt->pfrkt_gcdweight =
2880			pfr_gcd(weight, kt->pfrkt_gcdweight);
2881
2882		if (kt->pfrkt_maxweight < weight)
2883			kt->pfrkt_maxweight = weight;
2884	}
2885}
2886
2887struct pfr_ktable *
2888pfr_ktable_select_active(struct pfr_ktable *kt)
2889{
2890	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
2891		kt = kt->pfrkt_root;
2892	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE))
2893		return (NULL);
2894
2895	return (kt);
2896}
2897