1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002,2003 Henning Brauer
6 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 *    - Redistributions of source code must retain the above copyright
14 *      notice, this list of conditions and the following disclaimer.
15 *    - Redistributions in binary form must reproduce the above
16 *      copyright notice, this list of conditions and the following
17 *      disclaimer in the documentation and/or other materials provided
18 *      with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Effort sponsored in part by the Defense Advanced Research Projects
34 * Agency (DARPA) and Air Force Research Laboratory, Air Force
35 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36 *
37 *	$OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $
38 */
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD$");
42
43#include "opt_inet.h"
44#include "opt_inet6.h"
45#include "opt_bpf.h"
46#include "opt_pf.h"
47
48#include <sys/param.h>
49#include <sys/_bitset.h>
50#include <sys/bitset.h>
51#include <sys/bus.h>
52#include <sys/conf.h>
53#include <sys/endian.h>
54#include <sys/fcntl.h>
55#include <sys/filio.h>
56#include <sys/hash.h>
57#include <sys/interrupt.h>
58#include <sys/jail.h>
59#include <sys/kernel.h>
60#include <sys/kthread.h>
61#include <sys/lock.h>
62#include <sys/mbuf.h>
63#include <sys/module.h>
64#include <sys/nv.h>
65#include <sys/proc.h>
66#include <sys/sdt.h>
67#include <sys/smp.h>
68#include <sys/socket.h>
69#include <sys/sysctl.h>
70#include <sys/md5.h>
71#include <sys/ucred.h>
72
73#include <net/if.h>
74#include <net/if_var.h>
75#include <net/vnet.h>
76#include <net/route.h>
77#include <net/pfil.h>
78#include <net/pfvar.h>
79#include <net/if_pfsync.h>
80#include <net/if_pflog.h>
81
82#include <netinet/in.h>
83#include <netinet/ip.h>
84#include <netinet/ip_var.h>
85#include <netinet6/ip6_var.h>
86#include <netinet/ip_icmp.h>
87#include <netpfil/pf/pf_nv.h>
88
89#ifdef INET6
90#include <netinet/ip6.h>
91#endif /* INET6 */
92
93#ifdef ALTQ
94#include <net/altq/altq.h>
95#endif
96
97SDT_PROVIDER_DECLARE(pf);
98SDT_PROBE_DEFINE3(pf, ioctl, ioctl, error, "int", "int", "int");
99SDT_PROBE_DEFINE3(pf, ioctl, function, error, "char *", "int", "int");
100SDT_PROBE_DEFINE2(pf, ioctl, addrule, error, "int", "int");
101SDT_PROBE_DEFINE2(pf, ioctl, nvchk, error, "int", "int");
102
103static struct pf_kpool	*pf_get_kpool(char *, u_int32_t, u_int8_t, u_int32_t,
104			    u_int8_t, u_int8_t, u_int8_t);
105
106static void		 pf_mv_kpool(struct pf_kpalist *, struct pf_kpalist *);
107static void		 pf_empty_kpool(struct pf_kpalist *);
108static int		 pfioctl(struct cdev *, u_long, caddr_t, int,
109			    struct thread *);
110#ifdef ALTQ
111static int		 pf_begin_altq(u_int32_t *);
112static int		 pf_rollback_altq(u_int32_t);
113static int		 pf_commit_altq(u_int32_t);
114static int		 pf_enable_altq(struct pf_altq *);
115static int		 pf_disable_altq(struct pf_altq *);
116static u_int32_t	 pf_qname2qid(char *);
117static void		 pf_qid_unref(u_int32_t);
118#endif /* ALTQ */
119static int		 pf_begin_rules(u_int32_t *, int, const char *);
120static int		 pf_rollback_rules(u_int32_t, int, char *);
121static int		 pf_setup_pfsync_matching(struct pf_kruleset *);
122static void		 pf_hash_rule(MD5_CTX *, struct pf_krule *);
123static void		 pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
124static int		 pf_commit_rules(u_int32_t, int, char *);
125static int		 pf_addr_setup(struct pf_kruleset *,
126			    struct pf_addr_wrap *, sa_family_t);
127static void		 pf_addr_copyout(struct pf_addr_wrap *);
128static void		 pf_src_node_copy(const struct pf_ksrc_node *,
129			    struct pf_src_node *);
130#ifdef ALTQ
131static int		 pf_export_kaltq(struct pf_altq *,
132			    struct pfioc_altq_v1 *, size_t);
133static int		 pf_import_kaltq(struct pfioc_altq_v1 *,
134			    struct pf_altq *, size_t);
135#endif /* ALTQ */
136
137VNET_DEFINE(struct pf_krule,	pf_default_rule);
138
139#ifdef ALTQ
140VNET_DEFINE_STATIC(int,		pf_altq_running);
141#define	V_pf_altq_running	VNET(pf_altq_running)
142#endif
143
144#define	TAGID_MAX	 50000
145struct pf_tagname {
146	TAILQ_ENTRY(pf_tagname)	namehash_entries;
147	TAILQ_ENTRY(pf_tagname)	taghash_entries;
148	char			name[PF_TAG_NAME_SIZE];
149	uint16_t		tag;
150	int			ref;
151};
152
153struct pf_tagset {
154	TAILQ_HEAD(, pf_tagname)	*namehash;
155	TAILQ_HEAD(, pf_tagname)	*taghash;
156	unsigned int			 mask;
157	uint32_t			 seed;
158	BITSET_DEFINE(, TAGID_MAX)	 avail;
159};
160
161VNET_DEFINE(struct pf_tagset, pf_tags);
162#define	V_pf_tags	VNET(pf_tags)
163static unsigned int	pf_rule_tag_hashsize;
164#define	PF_RULE_TAG_HASH_SIZE_DEFAULT	128
165SYSCTL_UINT(_net_pf, OID_AUTO, rule_tag_hashsize, CTLFLAG_RDTUN,
166    &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT,
167    "Size of pf(4) rule tag hashtable");
168
169#ifdef ALTQ
170VNET_DEFINE(struct pf_tagset, pf_qids);
171#define	V_pf_qids	VNET(pf_qids)
172static unsigned int	pf_queue_tag_hashsize;
173#define	PF_QUEUE_TAG_HASH_SIZE_DEFAULT	128
174SYSCTL_UINT(_net_pf, OID_AUTO, queue_tag_hashsize, CTLFLAG_RDTUN,
175    &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT,
176    "Size of pf(4) queue tag hashtable");
177#endif
178VNET_DEFINE(uma_zone_t,	 pf_tag_z);
179#define	V_pf_tag_z		 VNET(pf_tag_z)
180static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
181static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
182
183#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
184#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
185#endif
186
187static void		 pf_init_tagset(struct pf_tagset *, unsigned int *,
188			    unsigned int);
189static void		 pf_cleanup_tagset(struct pf_tagset *);
190static uint16_t		 tagname2hashindex(const struct pf_tagset *, const char *);
191static uint16_t		 tag2hashindex(const struct pf_tagset *, uint16_t);
192static u_int16_t	 tagname2tag(struct pf_tagset *, char *);
193static u_int16_t	 pf_tagname2tag(char *);
194static void		 tag_unref(struct pf_tagset *, u_int16_t);
195
196#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
197
198struct cdev *pf_dev;
199
200/*
201 * XXX - These are new and need to be checked when moveing to a new version
202 */
203static void		 pf_clear_all_states(void);
204static unsigned int	 pf_clear_states(const struct pf_kstate_kill *);
205static int		 pf_killstates(struct pf_kstate_kill *,
206			    unsigned int *);
207static int		 pf_killstates_row(struct pf_kstate_kill *,
208			    struct pf_idhash *);
209static int		 pf_killstates_nv(struct pfioc_nv *);
210static int		 pf_clearstates_nv(struct pfioc_nv *);
211static int		 pf_getstate(struct pfioc_nv *);
212static int		 pf_getstates(struct pfioc_nv *);
213static int		 pf_clear_tables(void);
214static void		 pf_clear_srcnodes(struct pf_ksrc_node *);
215static void		 pf_kill_srcnodes(struct pfioc_src_node_kill *);
216static int		 pf_keepcounters(struct pfioc_nv *);
217static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
218
219/*
220 * Wrapper functions for pfil(9) hooks
221 */
222#ifdef INET
223static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp,
224    int flags, void *ruleset __unused, struct inpcb *inp);
225static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp,
226    int flags, void *ruleset __unused, struct inpcb *inp);
227#endif
228#ifdef INET6
229static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp,
230    int flags, void *ruleset __unused, struct inpcb *inp);
231static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp,
232    int flags, void *ruleset __unused, struct inpcb *inp);
233#endif
234
235static void		hook_pf(void);
236static void		dehook_pf(void);
237static int		shutdown_pf(void);
238static int		pf_load(void);
239static void		pf_unload(void);
240
241static struct cdevsw pf_cdevsw = {
242	.d_ioctl =	pfioctl,
243	.d_name =	PF_NAME,
244	.d_version =	D_VERSION,
245};
246
247volatile VNET_DEFINE_STATIC(int, pf_pfil_hooked);
248#define V_pf_pfil_hooked	VNET(pf_pfil_hooked)
249
250/*
251 * We need a flag that is neither hooked nor running to know when
252 * the VNET is "valid".  We primarily need this to control (global)
253 * external event, e.g., eventhandlers.
254 */
255VNET_DEFINE(int, pf_vnet_active);
256#define V_pf_vnet_active	VNET(pf_vnet_active)
257
258int pf_end_threads;
259struct proc *pf_purge_proc;
260
261struct rmlock			pf_rules_lock;
262struct sx			pf_ioctl_lock;
263struct sx			pf_end_lock;
264
265/* pfsync */
266VNET_DEFINE(pfsync_state_import_t *, pfsync_state_import_ptr);
267VNET_DEFINE(pfsync_insert_state_t *, pfsync_insert_state_ptr);
268VNET_DEFINE(pfsync_update_state_t *, pfsync_update_state_ptr);
269VNET_DEFINE(pfsync_delete_state_t *, pfsync_delete_state_ptr);
270VNET_DEFINE(pfsync_clear_states_t *, pfsync_clear_states_ptr);
271VNET_DEFINE(pfsync_defer_t *, pfsync_defer_ptr);
272pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr;
273
274/* pflog */
275pflog_packet_t			*pflog_packet_ptr = NULL;
276
277extern u_long	pf_ioctl_maxcount;
278
279static void
280pfattach_vnet(void)
281{
282	u_int32_t *my_timeout = V_pf_default_rule.timeout;
283
284	pf_initialize();
285	pfr_initialize();
286	pfi_initialize_vnet();
287	pf_normalize_init();
288
289	V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
290	V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
291
292	RB_INIT(&V_pf_anchors);
293	pf_init_kruleset(&pf_main_ruleset);
294
295	/* default rule should never be garbage collected */
296	V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next;
297#ifdef PF_DEFAULT_TO_DROP
298	V_pf_default_rule.action = PF_DROP;
299#else
300	V_pf_default_rule.action = PF_PASS;
301#endif
302	V_pf_default_rule.nr = -1;
303	V_pf_default_rule.rtableid = -1;
304
305	V_pf_default_rule.evaluations = counter_u64_alloc(M_WAITOK);
306	for (int i = 0; i < 2; i++) {
307		V_pf_default_rule.packets[i] = counter_u64_alloc(M_WAITOK);
308		V_pf_default_rule.bytes[i] = counter_u64_alloc(M_WAITOK);
309	}
310	V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK);
311	V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK);
312	V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK);
313
314	/* initialize default timeouts */
315	my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
316	my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
317	my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
318	my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
319	my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
320	my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
321	my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
322	my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
323	my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
324	my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
325	my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
326	my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
327	my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
328	my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
329	my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
330	my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
331	my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
332	my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
333	my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
334	my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
335
336	bzero(&V_pf_status, sizeof(V_pf_status));
337	V_pf_status.debug = PF_DEBUG_URGENT;
338
339	V_pf_pfil_hooked = 0;
340
341	/* XXX do our best to avoid a conflict */
342	V_pf_status.hostid = arc4random();
343
344	for (int i = 0; i < PFRES_MAX; i++)
345		V_pf_status.counters[i] = counter_u64_alloc(M_WAITOK);
346	for (int i = 0; i < LCNT_MAX; i++)
347		V_pf_status.lcounters[i] = counter_u64_alloc(M_WAITOK);
348	for (int i = 0; i < FCNT_MAX; i++)
349		V_pf_status.fcounters[i] = counter_u64_alloc(M_WAITOK);
350	for (int i = 0; i < SCNT_MAX; i++)
351		V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK);
352
353	if (swi_add(&V_pf_swi_ie, "pf send", pf_intr, curvnet, SWI_NET,
354	    INTR_MPSAFE, &V_pf_swi_cookie) != 0)
355		/* XXXGL: leaked all above. */
356		return;
357}
358
359static struct pf_kpool *
360pf_get_kpool(char *anchor, u_int32_t ticket, u_int8_t rule_action,
361    u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
362    u_int8_t check_ticket)
363{
364	struct pf_kruleset	*ruleset;
365	struct pf_krule		*rule;
366	int			 rs_num;
367
368	ruleset = pf_find_kruleset(anchor);
369	if (ruleset == NULL)
370		return (NULL);
371	rs_num = pf_get_ruleset_number(rule_action);
372	if (rs_num >= PF_RULESET_MAX)
373		return (NULL);
374	if (active) {
375		if (check_ticket && ticket !=
376		    ruleset->rules[rs_num].active.ticket)
377			return (NULL);
378		if (r_last)
379			rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
380			    pf_krulequeue);
381		else
382			rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
383	} else {
384		if (check_ticket && ticket !=
385		    ruleset->rules[rs_num].inactive.ticket)
386			return (NULL);
387		if (r_last)
388			rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
389			    pf_krulequeue);
390		else
391			rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr);
392	}
393	if (!r_last) {
394		while ((rule != NULL) && (rule->nr != rule_number))
395			rule = TAILQ_NEXT(rule, entries);
396	}
397	if (rule == NULL)
398		return (NULL);
399
400	return (&rule->rpool);
401}
402
403static void
404pf_mv_kpool(struct pf_kpalist *poola, struct pf_kpalist *poolb)
405{
406	struct pf_kpooladdr	*mv_pool_pa;
407
408	while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) {
409		TAILQ_REMOVE(poola, mv_pool_pa, entries);
410		TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries);
411	}
412}
413
414static void
415pf_empty_kpool(struct pf_kpalist *poola)
416{
417	struct pf_kpooladdr *pa;
418
419	while ((pa = TAILQ_FIRST(poola)) != NULL) {
420		switch (pa->addr.type) {
421		case PF_ADDR_DYNIFTL:
422			pfi_dynaddr_remove(pa->addr.p.dyn);
423			break;
424		case PF_ADDR_TABLE:
425			/* XXX: this could be unfinished pooladdr on pabuf */
426			if (pa->addr.p.tbl != NULL)
427				pfr_detach_table(pa->addr.p.tbl);
428			break;
429		}
430		if (pa->kif)
431			pfi_kkif_unref(pa->kif);
432		TAILQ_REMOVE(poola, pa, entries);
433		free(pa, M_PFRULE);
434	}
435}
436
437static void
438pf_unlink_rule(struct pf_krulequeue *rulequeue, struct pf_krule *rule)
439{
440
441	PF_RULES_WASSERT();
442
443	TAILQ_REMOVE(rulequeue, rule, entries);
444
445	PF_UNLNKDRULES_LOCK();
446	rule->rule_ref |= PFRULE_REFS;
447	TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries);
448	PF_UNLNKDRULES_UNLOCK();
449}
450
451void
452pf_free_rule(struct pf_krule *rule)
453{
454
455	PF_RULES_WASSERT();
456
457	if (rule->tag)
458		tag_unref(&V_pf_tags, rule->tag);
459	if (rule->match_tag)
460		tag_unref(&V_pf_tags, rule->match_tag);
461#ifdef ALTQ
462	if (rule->pqid != rule->qid)
463		pf_qid_unref(rule->pqid);
464	pf_qid_unref(rule->qid);
465#endif
466	switch (rule->src.addr.type) {
467	case PF_ADDR_DYNIFTL:
468		pfi_dynaddr_remove(rule->src.addr.p.dyn);
469		break;
470	case PF_ADDR_TABLE:
471		pfr_detach_table(rule->src.addr.p.tbl);
472		break;
473	}
474	switch (rule->dst.addr.type) {
475	case PF_ADDR_DYNIFTL:
476		pfi_dynaddr_remove(rule->dst.addr.p.dyn);
477		break;
478	case PF_ADDR_TABLE:
479		pfr_detach_table(rule->dst.addr.p.tbl);
480		break;
481	}
482	if (rule->overload_tbl)
483		pfr_detach_table(rule->overload_tbl);
484	if (rule->kif)
485		pfi_kkif_unref(rule->kif);
486	pf_kanchor_remove(rule);
487	pf_empty_kpool(&rule->rpool.list);
488
489	pf_krule_free(rule);
490}
491
492static void
493pf_init_tagset(struct pf_tagset *ts, unsigned int *tunable_size,
494    unsigned int default_size)
495{
496	unsigned int i;
497	unsigned int hashsize;
498
499	if (*tunable_size == 0 || !powerof2(*tunable_size))
500		*tunable_size = default_size;
501
502	hashsize = *tunable_size;
503	ts->namehash = mallocarray(hashsize, sizeof(*ts->namehash), M_PFHASH,
504	    M_WAITOK);
505	ts->taghash = mallocarray(hashsize, sizeof(*ts->taghash), M_PFHASH,
506	    M_WAITOK);
507	ts->mask = hashsize - 1;
508	ts->seed = arc4random();
509	for (i = 0; i < hashsize; i++) {
510		TAILQ_INIT(&ts->namehash[i]);
511		TAILQ_INIT(&ts->taghash[i]);
512	}
513	BIT_FILL(TAGID_MAX, &ts->avail);
514}
515
516static void
517pf_cleanup_tagset(struct pf_tagset *ts)
518{
519	unsigned int i;
520	unsigned int hashsize;
521	struct pf_tagname *t, *tmp;
522
523	/*
524	 * Only need to clean up one of the hashes as each tag is hashed
525	 * into each table.
526	 */
527	hashsize = ts->mask + 1;
528	for (i = 0; i < hashsize; i++)
529		TAILQ_FOREACH_SAFE(t, &ts->namehash[i], namehash_entries, tmp)
530			uma_zfree(V_pf_tag_z, t);
531
532	free(ts->namehash, M_PFHASH);
533	free(ts->taghash, M_PFHASH);
534}
535
536static uint16_t
537tagname2hashindex(const struct pf_tagset *ts, const char *tagname)
538{
539	size_t len;
540
541	len = strnlen(tagname, PF_TAG_NAME_SIZE - 1);
542	return (murmur3_32_hash(tagname, len, ts->seed) & ts->mask);
543}
544
545static uint16_t
546tag2hashindex(const struct pf_tagset *ts, uint16_t tag)
547{
548
549	return (tag & ts->mask);
550}
551
552static u_int16_t
553tagname2tag(struct pf_tagset *ts, char *tagname)
554{
555	struct pf_tagname	*tag;
556	u_int32_t		 index;
557	u_int16_t		 new_tagid;
558
559	PF_RULES_WASSERT();
560
561	index = tagname2hashindex(ts, tagname);
562	TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries)
563		if (strcmp(tagname, tag->name) == 0) {
564			tag->ref++;
565			return (tag->tag);
566		}
567
568	/*
569	 * new entry
570	 *
571	 * to avoid fragmentation, we do a linear search from the beginning
572	 * and take the first free slot we find.
573	 */
574	new_tagid = BIT_FFS(TAGID_MAX, &ts->avail);
575	/*
576	 * Tags are 1-based, with valid tags in the range [1..TAGID_MAX].
577	 * BIT_FFS() returns a 1-based bit number, with 0 indicating no bits
578	 * set.  It may also return a bit number greater than TAGID_MAX due
579	 * to rounding of the number of bits in the vector up to a multiple
580	 * of the vector word size at declaration/allocation time.
581	 */
582	if ((new_tagid == 0) || (new_tagid > TAGID_MAX))
583		return (0);
584
585	/* Mark the tag as in use.  Bits are 0-based for BIT_CLR() */
586	BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail);
587
588	/* allocate and fill new struct pf_tagname */
589	tag = uma_zalloc(V_pf_tag_z, M_NOWAIT);
590	if (tag == NULL)
591		return (0);
592	strlcpy(tag->name, tagname, sizeof(tag->name));
593	tag->tag = new_tagid;
594	tag->ref = 1;
595
596	/* Insert into namehash */
597	TAILQ_INSERT_TAIL(&ts->namehash[index], tag, namehash_entries);
598
599	/* Insert into taghash */
600	index = tag2hashindex(ts, new_tagid);
601	TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries);
602
603	return (tag->tag);
604}
605
606static void
607tag_unref(struct pf_tagset *ts, u_int16_t tag)
608{
609	struct pf_tagname	*t;
610	uint16_t		 index;
611
612	PF_RULES_WASSERT();
613
614	index = tag2hashindex(ts, tag);
615	TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries)
616		if (tag == t->tag) {
617			if (--t->ref == 0) {
618				TAILQ_REMOVE(&ts->taghash[index], t,
619				    taghash_entries);
620				index = tagname2hashindex(ts, t->name);
621				TAILQ_REMOVE(&ts->namehash[index], t,
622				    namehash_entries);
623				/* Bits are 0-based for BIT_SET() */
624				BIT_SET(TAGID_MAX, tag - 1, &ts->avail);
625				uma_zfree(V_pf_tag_z, t);
626			}
627			break;
628		}
629}
630
631static u_int16_t
632pf_tagname2tag(char *tagname)
633{
634	return (tagname2tag(&V_pf_tags, tagname));
635}
636
637#ifdef ALTQ
638static u_int32_t
639pf_qname2qid(char *qname)
640{
641	return ((u_int32_t)tagname2tag(&V_pf_qids, qname));
642}
643
644static void
645pf_qid_unref(u_int32_t qid)
646{
647	tag_unref(&V_pf_qids, (u_int16_t)qid);
648}
649
650static int
651pf_begin_altq(u_int32_t *ticket)
652{
653	struct pf_altq	*altq, *tmp;
654	int		 error = 0;
655
656	PF_RULES_WASSERT();
657
658	/* Purge the old altq lists */
659	TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
660		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
661			/* detach and destroy the discipline */
662			error = altq_remove(altq);
663		}
664		free(altq, M_PFALTQ);
665	}
666	TAILQ_INIT(V_pf_altq_ifs_inactive);
667	TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
668		pf_qid_unref(altq->qid);
669		free(altq, M_PFALTQ);
670	}
671	TAILQ_INIT(V_pf_altqs_inactive);
672	if (error)
673		return (error);
674	*ticket = ++V_ticket_altqs_inactive;
675	V_altqs_inactive_open = 1;
676	return (0);
677}
678
679static int
680pf_rollback_altq(u_int32_t ticket)
681{
682	struct pf_altq	*altq, *tmp;
683	int		 error = 0;
684
685	PF_RULES_WASSERT();
686
687	if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
688		return (0);
689	/* Purge the old altq lists */
690	TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
691		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
692			/* detach and destroy the discipline */
693			error = altq_remove(altq);
694		}
695		free(altq, M_PFALTQ);
696	}
697	TAILQ_INIT(V_pf_altq_ifs_inactive);
698	TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
699		pf_qid_unref(altq->qid);
700		free(altq, M_PFALTQ);
701	}
702	TAILQ_INIT(V_pf_altqs_inactive);
703	V_altqs_inactive_open = 0;
704	return (error);
705}
706
707static int
708pf_commit_altq(u_int32_t ticket)
709{
710	struct pf_altqqueue	*old_altqs, *old_altq_ifs;
711	struct pf_altq		*altq, *tmp;
712	int			 err, error = 0;
713
714	PF_RULES_WASSERT();
715
716	if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
717		return (EBUSY);
718
719	/* swap altqs, keep the old. */
720	old_altqs = V_pf_altqs_active;
721	old_altq_ifs = V_pf_altq_ifs_active;
722	V_pf_altqs_active = V_pf_altqs_inactive;
723	V_pf_altq_ifs_active = V_pf_altq_ifs_inactive;
724	V_pf_altqs_inactive = old_altqs;
725	V_pf_altq_ifs_inactive = old_altq_ifs;
726	V_ticket_altqs_active = V_ticket_altqs_inactive;
727
728	/* Attach new disciplines */
729	TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
730		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
731			/* attach the discipline */
732			error = altq_pfattach(altq);
733			if (error == 0 && V_pf_altq_running)
734				error = pf_enable_altq(altq);
735			if (error != 0)
736				return (error);
737		}
738	}
739
740	/* Purge the old altq lists */
741	TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
742		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
743			/* detach and destroy the discipline */
744			if (V_pf_altq_running)
745				error = pf_disable_altq(altq);
746			err = altq_pfdetach(altq);
747			if (err != 0 && error == 0)
748				error = err;
749			err = altq_remove(altq);
750			if (err != 0 && error == 0)
751				error = err;
752		}
753		free(altq, M_PFALTQ);
754	}
755	TAILQ_INIT(V_pf_altq_ifs_inactive);
756	TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
757		pf_qid_unref(altq->qid);
758		free(altq, M_PFALTQ);
759	}
760	TAILQ_INIT(V_pf_altqs_inactive);
761
762	V_altqs_inactive_open = 0;
763	return (error);
764}
765
766static int
767pf_enable_altq(struct pf_altq *altq)
768{
769	struct ifnet		*ifp;
770	struct tb_profile	 tb;
771	int			 error = 0;
772
773	if ((ifp = ifunit(altq->ifname)) == NULL)
774		return (EINVAL);
775
776	if (ifp->if_snd.altq_type != ALTQT_NONE)
777		error = altq_enable(&ifp->if_snd);
778
779	/* set tokenbucket regulator */
780	if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
781		tb.rate = altq->ifbandwidth;
782		tb.depth = altq->tbrsize;
783		error = tbr_set(&ifp->if_snd, &tb);
784	}
785
786	return (error);
787}
788
789static int
790pf_disable_altq(struct pf_altq *altq)
791{
792	struct ifnet		*ifp;
793	struct tb_profile	 tb;
794	int			 error;
795
796	if ((ifp = ifunit(altq->ifname)) == NULL)
797		return (EINVAL);
798
799	/*
800	 * when the discipline is no longer referenced, it was overridden
801	 * by a new one.  if so, just return.
802	 */
803	if (altq->altq_disc != ifp->if_snd.altq_disc)
804		return (0);
805
806	error = altq_disable(&ifp->if_snd);
807
808	if (error == 0) {
809		/* clear tokenbucket regulator */
810		tb.rate = 0;
811		error = tbr_set(&ifp->if_snd, &tb);
812	}
813
814	return (error);
815}
816
817static int
818pf_altq_ifnet_event_add(struct ifnet *ifp, int remove, u_int32_t ticket,
819    struct pf_altq *altq)
820{
821	struct ifnet	*ifp1;
822	int		 error = 0;
823
824	/* Deactivate the interface in question */
825	altq->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
826	if ((ifp1 = ifunit(altq->ifname)) == NULL ||
827	    (remove && ifp1 == ifp)) {
828		altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
829	} else {
830		error = altq_add(ifp1, altq);
831
832		if (ticket != V_ticket_altqs_inactive)
833			error = EBUSY;
834
835		if (error)
836			free(altq, M_PFALTQ);
837	}
838
839	return (error);
840}
841
842void
843pf_altq_ifnet_event(struct ifnet *ifp, int remove)
844{
845	struct pf_altq	*a1, *a2, *a3;
846	u_int32_t	 ticket;
847	int		 error = 0;
848
849	/*
850	 * No need to re-evaluate the configuration for events on interfaces
851	 * that do not support ALTQ, as it's not possible for such
852	 * interfaces to be part of the configuration.
853	 */
854	if (!ALTQ_IS_READY(&ifp->if_snd))
855		return;
856
857	/* Interrupt userland queue modifications */
858	if (V_altqs_inactive_open)
859		pf_rollback_altq(V_ticket_altqs_inactive);
860
861	/* Start new altq ruleset */
862	if (pf_begin_altq(&ticket))
863		return;
864
865	/* Copy the current active set */
866	TAILQ_FOREACH(a1, V_pf_altq_ifs_active, entries) {
867		a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
868		if (a2 == NULL) {
869			error = ENOMEM;
870			break;
871		}
872		bcopy(a1, a2, sizeof(struct pf_altq));
873
874		error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
875		if (error)
876			break;
877
878		TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, a2, entries);
879	}
880	if (error)
881		goto out;
882	TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
883		a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
884		if (a2 == NULL) {
885			error = ENOMEM;
886			break;
887		}
888		bcopy(a1, a2, sizeof(struct pf_altq));
889
890		if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
891			error = EBUSY;
892			free(a2, M_PFALTQ);
893			break;
894		}
895		a2->altq_disc = NULL;
896		TAILQ_FOREACH(a3, V_pf_altq_ifs_inactive, entries) {
897			if (strncmp(a3->ifname, a2->ifname,
898				IFNAMSIZ) == 0) {
899				a2->altq_disc = a3->altq_disc;
900				break;
901			}
902		}
903		error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
904		if (error)
905			break;
906
907		TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries);
908	}
909
910out:
911	if (error != 0)
912		pf_rollback_altq(ticket);
913	else
914		pf_commit_altq(ticket);
915}
916#endif /* ALTQ */
917
918static int
919pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
920{
921	struct pf_kruleset	*rs;
922	struct pf_krule		*rule;
923
924	PF_RULES_WASSERT();
925
926	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
927		return (EINVAL);
928	rs = pf_find_or_create_kruleset(anchor);
929	if (rs == NULL)
930		return (EINVAL);
931	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
932		pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
933		rs->rules[rs_num].inactive.rcount--;
934	}
935	*ticket = ++rs->rules[rs_num].inactive.ticket;
936	rs->rules[rs_num].inactive.open = 1;
937	return (0);
938}
939
940static int
941pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
942{
943	struct pf_kruleset	*rs;
944	struct pf_krule		*rule;
945
946	PF_RULES_WASSERT();
947
948	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
949		return (EINVAL);
950	rs = pf_find_kruleset(anchor);
951	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
952	    rs->rules[rs_num].inactive.ticket != ticket)
953		return (0);
954	while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
955		pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
956		rs->rules[rs_num].inactive.rcount--;
957	}
958	rs->rules[rs_num].inactive.open = 0;
959	return (0);
960}
961
962#define PF_MD5_UPD(st, elm)						\
963		MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
964
965#define PF_MD5_UPD_STR(st, elm)						\
966		MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
967
968#define PF_MD5_UPD_HTONL(st, elm, stor) do {				\
969		(stor) = htonl((st)->elm);				\
970		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
971} while (0)
972
973#define PF_MD5_UPD_HTONS(st, elm, stor) do {				\
974		(stor) = htons((st)->elm);				\
975		MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
976} while (0)
977
978static void
979pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
980{
981	PF_MD5_UPD(pfr, addr.type);
982	switch (pfr->addr.type) {
983		case PF_ADDR_DYNIFTL:
984			PF_MD5_UPD(pfr, addr.v.ifname);
985			PF_MD5_UPD(pfr, addr.iflags);
986			break;
987		case PF_ADDR_TABLE:
988			PF_MD5_UPD(pfr, addr.v.tblname);
989			break;
990		case PF_ADDR_ADDRMASK:
991			/* XXX ignore af? */
992			PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
993			PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
994			break;
995	}
996
997	PF_MD5_UPD(pfr, port[0]);
998	PF_MD5_UPD(pfr, port[1]);
999	PF_MD5_UPD(pfr, neg);
1000	PF_MD5_UPD(pfr, port_op);
1001}
1002
1003static void
1004pf_hash_rule(MD5_CTX *ctx, struct pf_krule *rule)
1005{
1006	u_int16_t x;
1007	u_int32_t y;
1008
1009	pf_hash_rule_addr(ctx, &rule->src);
1010	pf_hash_rule_addr(ctx, &rule->dst);
1011	for (int i = 0; i < PF_RULE_MAX_LABEL_COUNT; i++)
1012		PF_MD5_UPD_STR(rule, label[i]);
1013	PF_MD5_UPD_STR(rule, ifname);
1014	PF_MD5_UPD_STR(rule, match_tagname);
1015	PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
1016	PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
1017	PF_MD5_UPD_HTONL(rule, prob, y);
1018	PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
1019	PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
1020	PF_MD5_UPD(rule, uid.op);
1021	PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
1022	PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
1023	PF_MD5_UPD(rule, gid.op);
1024	PF_MD5_UPD_HTONL(rule, rule_flag, y);
1025	PF_MD5_UPD(rule, action);
1026	PF_MD5_UPD(rule, direction);
1027	PF_MD5_UPD(rule, af);
1028	PF_MD5_UPD(rule, quick);
1029	PF_MD5_UPD(rule, ifnot);
1030	PF_MD5_UPD(rule, match_tag_not);
1031	PF_MD5_UPD(rule, natpass);
1032	PF_MD5_UPD(rule, keep_state);
1033	PF_MD5_UPD(rule, proto);
1034	PF_MD5_UPD(rule, type);
1035	PF_MD5_UPD(rule, code);
1036	PF_MD5_UPD(rule, flags);
1037	PF_MD5_UPD(rule, flagset);
1038	PF_MD5_UPD(rule, allow_opts);
1039	PF_MD5_UPD(rule, rt);
1040	PF_MD5_UPD(rule, tos);
1041}
1042
1043static bool
1044pf_krule_compare(struct pf_krule *a, struct pf_krule *b)
1045{
1046	MD5_CTX		ctx[2];
1047	u_int8_t	digest[2][PF_MD5_DIGEST_LENGTH];
1048
1049	MD5Init(&ctx[0]);
1050	MD5Init(&ctx[1]);
1051	pf_hash_rule(&ctx[0], a);
1052	pf_hash_rule(&ctx[1], b);
1053	MD5Final(digest[0], &ctx[0]);
1054	MD5Final(digest[1], &ctx[1]);
1055
1056	return (memcmp(digest[0], digest[1], PF_MD5_DIGEST_LENGTH) == 0);
1057}
1058
1059static int
1060pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
1061{
1062	struct pf_kruleset	*rs;
1063	struct pf_krule		*rule, **old_array, *tail;
1064	struct pf_krulequeue	*old_rules;
1065	int			 error;
1066	u_int32_t		 old_rcount;
1067
1068	PF_RULES_WASSERT();
1069
1070	if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
1071		return (EINVAL);
1072	rs = pf_find_kruleset(anchor);
1073	if (rs == NULL || !rs->rules[rs_num].inactive.open ||
1074	    ticket != rs->rules[rs_num].inactive.ticket)
1075		return (EBUSY);
1076
1077	/* Calculate checksum for the main ruleset */
1078	if (rs == &pf_main_ruleset) {
1079		error = pf_setup_pfsync_matching(rs);
1080		if (error != 0)
1081			return (error);
1082	}
1083
1084	/* Swap rules, keep the old. */
1085	old_rules = rs->rules[rs_num].active.ptr;
1086	old_rcount = rs->rules[rs_num].active.rcount;
1087	old_array = rs->rules[rs_num].active.ptr_array;
1088
1089	rs->rules[rs_num].active.ptr =
1090	    rs->rules[rs_num].inactive.ptr;
1091	rs->rules[rs_num].active.ptr_array =
1092	    rs->rules[rs_num].inactive.ptr_array;
1093	rs->rules[rs_num].active.rcount =
1094	    rs->rules[rs_num].inactive.rcount;
1095
1096	/* Attempt to preserve counter information. */
1097	if (V_pf_status.keep_counters) {
1098		TAILQ_FOREACH(rule, rs->rules[rs_num].active.ptr,
1099		    entries) {
1100			tail = TAILQ_FIRST(old_rules);
1101			while ((tail != NULL) && ! pf_krule_compare(tail, rule))
1102				tail = TAILQ_NEXT(tail, entries);
1103			if (tail != NULL) {
1104				counter_u64_add(rule->evaluations,
1105				    counter_u64_fetch(tail->evaluations));
1106				counter_u64_add(rule->packets[0],
1107				    counter_u64_fetch(tail->packets[0]));
1108				counter_u64_add(rule->packets[1],
1109				    counter_u64_fetch(tail->packets[1]));
1110				counter_u64_add(rule->bytes[0],
1111				    counter_u64_fetch(tail->bytes[0]));
1112				counter_u64_add(rule->bytes[1],
1113				    counter_u64_fetch(tail->bytes[1]));
1114			}
1115		}
1116	}
1117
1118	rs->rules[rs_num].inactive.ptr = old_rules;
1119	rs->rules[rs_num].inactive.ptr_array = old_array;
1120	rs->rules[rs_num].inactive.rcount = old_rcount;
1121
1122	rs->rules[rs_num].active.ticket =
1123	    rs->rules[rs_num].inactive.ticket;
1124	pf_calc_skip_steps(rs->rules[rs_num].active.ptr);
1125
1126	/* Purge the old rule list. */
1127	while ((rule = TAILQ_FIRST(old_rules)) != NULL)
1128		pf_unlink_rule(old_rules, rule);
1129	if (rs->rules[rs_num].inactive.ptr_array)
1130		free(rs->rules[rs_num].inactive.ptr_array, M_TEMP);
1131	rs->rules[rs_num].inactive.ptr_array = NULL;
1132	rs->rules[rs_num].inactive.rcount = 0;
1133	rs->rules[rs_num].inactive.open = 0;
1134	pf_remove_if_empty_kruleset(rs);
1135
1136	return (0);
1137}
1138
1139static int
1140pf_setup_pfsync_matching(struct pf_kruleset *rs)
1141{
1142	MD5_CTX			 ctx;
1143	struct pf_krule		*rule;
1144	int			 rs_cnt;
1145	u_int8_t		 digest[PF_MD5_DIGEST_LENGTH];
1146
1147	MD5Init(&ctx);
1148	for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) {
1149		/* XXX PF_RULESET_SCRUB as well? */
1150		if (rs_cnt == PF_RULESET_SCRUB)
1151			continue;
1152
1153		if (rs->rules[rs_cnt].inactive.ptr_array)
1154			free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP);
1155		rs->rules[rs_cnt].inactive.ptr_array = NULL;
1156
1157		if (rs->rules[rs_cnt].inactive.rcount) {
1158			rs->rules[rs_cnt].inactive.ptr_array =
1159			    malloc(sizeof(caddr_t) *
1160			    rs->rules[rs_cnt].inactive.rcount,
1161			    M_TEMP, M_NOWAIT);
1162
1163			if (!rs->rules[rs_cnt].inactive.ptr_array)
1164				return (ENOMEM);
1165		}
1166
1167		TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
1168		    entries) {
1169			pf_hash_rule(&ctx, rule);
1170			(rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule;
1171		}
1172	}
1173
1174	MD5Final(digest, &ctx);
1175	memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum));
1176	return (0);
1177}
1178
1179static int
1180pf_addr_setup(struct pf_kruleset *ruleset, struct pf_addr_wrap *addr,
1181    sa_family_t af)
1182{
1183	int error = 0;
1184
1185	switch (addr->type) {
1186	case PF_ADDR_TABLE:
1187		addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname);
1188		if (addr->p.tbl == NULL)
1189			error = ENOMEM;
1190		break;
1191	case PF_ADDR_DYNIFTL:
1192		error = pfi_dynaddr_setup(addr, af);
1193		break;
1194	}
1195
1196	return (error);
1197}
1198
1199static void
1200pf_addr_copyout(struct pf_addr_wrap *addr)
1201{
1202
1203	switch (addr->type) {
1204	case PF_ADDR_DYNIFTL:
1205		pfi_dynaddr_copyout(addr);
1206		break;
1207	case PF_ADDR_TABLE:
1208		pf_tbladdr_copyout(addr);
1209		break;
1210	}
1211}
1212
1213static void
1214pf_src_node_copy(const struct pf_ksrc_node *in, struct pf_src_node *out)
1215{
1216	int	secs = time_uptime, diff;
1217
1218	bzero(out, sizeof(struct pf_src_node));
1219
1220	bcopy(&in->addr, &out->addr, sizeof(struct pf_addr));
1221	bcopy(&in->raddr, &out->raddr, sizeof(struct pf_addr));
1222
1223	if (in->rule.ptr != NULL)
1224		out->rule.nr = in->rule.ptr->nr;
1225
1226	for (int i = 0; i < 2; i++) {
1227		out->bytes[i] = counter_u64_fetch(in->bytes[i]);
1228		out->packets[i] = counter_u64_fetch(in->packets[i]);
1229	}
1230
1231	out->states = in->states;
1232	out->conn = in->conn;
1233	out->af = in->af;
1234	out->ruletype = in->ruletype;
1235
1236	out->creation = secs - in->creation;
1237	if (out->expire > secs)
1238		out->expire -= secs;
1239	else
1240		out->expire = 0;
1241
1242	/* Adjust the connection rate estimate. */
1243	diff = secs - in->conn_rate.last;
1244	if (diff >= in->conn_rate.seconds)
1245		out->conn_rate.count = 0;
1246	else
1247		out->conn_rate.count -=
1248		    in->conn_rate.count * diff /
1249		    in->conn_rate.seconds;
1250}
1251
1252#ifdef ALTQ
1253/*
1254 * Handle export of struct pf_kaltq to user binaries that may be using any
1255 * version of struct pf_altq.
1256 */
1257static int
1258pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size)
1259{
1260	u_int32_t version;
1261
1262	if (ioc_size == sizeof(struct pfioc_altq_v0))
1263		version = 0;
1264	else
1265		version = pa->version;
1266
1267	if (version > PFIOC_ALTQ_VERSION)
1268		return (EINVAL);
1269
1270#define ASSIGN(x) exported_q->x = q->x
1271#define COPY(x) \
1272	bcopy(&q->x, &exported_q->x, min(sizeof(q->x), sizeof(exported_q->x)))
1273#define SATU16(x) (u_int32_t)uqmin((x), USHRT_MAX)
1274#define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX)
1275
1276	switch (version) {
1277	case 0: {
1278		struct pf_altq_v0 *exported_q =
1279		    &((struct pfioc_altq_v0 *)pa)->altq;
1280
1281		COPY(ifname);
1282
1283		ASSIGN(scheduler);
1284		ASSIGN(tbrsize);
1285		exported_q->tbrsize = SATU16(q->tbrsize);
1286		exported_q->ifbandwidth = SATU32(q->ifbandwidth);
1287
1288		COPY(qname);
1289		COPY(parent);
1290		ASSIGN(parent_qid);
1291		exported_q->bandwidth = SATU32(q->bandwidth);
1292		ASSIGN(priority);
1293		ASSIGN(local_flags);
1294
1295		ASSIGN(qlimit);
1296		ASSIGN(flags);
1297
1298		if (q->scheduler == ALTQT_HFSC) {
1299#define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x
1300#define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \
1301			    SATU32(q->pq_u.hfsc_opts.x)
1302
1303			ASSIGN_OPT_SATU32(rtsc_m1);
1304			ASSIGN_OPT(rtsc_d);
1305			ASSIGN_OPT_SATU32(rtsc_m2);
1306
1307			ASSIGN_OPT_SATU32(lssc_m1);
1308			ASSIGN_OPT(lssc_d);
1309			ASSIGN_OPT_SATU32(lssc_m2);
1310
1311			ASSIGN_OPT_SATU32(ulsc_m1);
1312			ASSIGN_OPT(ulsc_d);
1313			ASSIGN_OPT_SATU32(ulsc_m2);
1314
1315			ASSIGN_OPT(flags);
1316
1317#undef ASSIGN_OPT
1318#undef ASSIGN_OPT_SATU32
1319		} else
1320			COPY(pq_u);
1321
1322		ASSIGN(qid);
1323		break;
1324	}
1325	case 1:	{
1326		struct pf_altq_v1 *exported_q =
1327		    &((struct pfioc_altq_v1 *)pa)->altq;
1328
1329		COPY(ifname);
1330
1331		ASSIGN(scheduler);
1332		ASSIGN(tbrsize);
1333		ASSIGN(ifbandwidth);
1334
1335		COPY(qname);
1336		COPY(parent);
1337		ASSIGN(parent_qid);
1338		ASSIGN(bandwidth);
1339		ASSIGN(priority);
1340		ASSIGN(local_flags);
1341
1342		ASSIGN(qlimit);
1343		ASSIGN(flags);
1344		COPY(pq_u);
1345
1346		ASSIGN(qid);
1347		break;
1348	}
1349	default:
1350		panic("%s: unhandled struct pfioc_altq version", __func__);
1351		break;
1352	}
1353
1354#undef ASSIGN
1355#undef COPY
1356#undef SATU16
1357#undef SATU32
1358
1359	return (0);
1360}
1361
1362/*
1363 * Handle import to struct pf_kaltq of struct pf_altq from user binaries
1364 * that may be using any version of it.
1365 */
1366static int
1367pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size)
1368{
1369	u_int32_t version;
1370
1371	if (ioc_size == sizeof(struct pfioc_altq_v0))
1372		version = 0;
1373	else
1374		version = pa->version;
1375
1376	if (version > PFIOC_ALTQ_VERSION)
1377		return (EINVAL);
1378
1379#define ASSIGN(x) q->x = imported_q->x
1380#define COPY(x) \
1381	bcopy(&imported_q->x, &q->x, min(sizeof(imported_q->x), sizeof(q->x)))
1382
1383	switch (version) {
1384	case 0: {
1385		struct pf_altq_v0 *imported_q =
1386		    &((struct pfioc_altq_v0 *)pa)->altq;
1387
1388		COPY(ifname);
1389
1390		ASSIGN(scheduler);
1391		ASSIGN(tbrsize); /* 16-bit -> 32-bit */
1392		ASSIGN(ifbandwidth); /* 32-bit -> 64-bit */
1393
1394		COPY(qname);
1395		COPY(parent);
1396		ASSIGN(parent_qid);
1397		ASSIGN(bandwidth); /* 32-bit -> 64-bit */
1398		ASSIGN(priority);
1399		ASSIGN(local_flags);
1400
1401		ASSIGN(qlimit);
1402		ASSIGN(flags);
1403
1404		if (imported_q->scheduler == ALTQT_HFSC) {
1405#define ASSIGN_OPT(x) q->pq_u.hfsc_opts.x = imported_q->pq_u.hfsc_opts.x
1406
1407			/*
1408			 * The m1 and m2 parameters are being copied from
1409			 * 32-bit to 64-bit.
1410			 */
1411			ASSIGN_OPT(rtsc_m1);
1412			ASSIGN_OPT(rtsc_d);
1413			ASSIGN_OPT(rtsc_m2);
1414
1415			ASSIGN_OPT(lssc_m1);
1416			ASSIGN_OPT(lssc_d);
1417			ASSIGN_OPT(lssc_m2);
1418
1419			ASSIGN_OPT(ulsc_m1);
1420			ASSIGN_OPT(ulsc_d);
1421			ASSIGN_OPT(ulsc_m2);
1422
1423			ASSIGN_OPT(flags);
1424
1425#undef ASSIGN_OPT
1426		} else
1427			COPY(pq_u);
1428
1429		ASSIGN(qid);
1430		break;
1431	}
1432	case 1: {
1433		struct pf_altq_v1 *imported_q =
1434		    &((struct pfioc_altq_v1 *)pa)->altq;
1435
1436		COPY(ifname);
1437
1438		ASSIGN(scheduler);
1439		ASSIGN(tbrsize);
1440		ASSIGN(ifbandwidth);
1441
1442		COPY(qname);
1443		COPY(parent);
1444		ASSIGN(parent_qid);
1445		ASSIGN(bandwidth);
1446		ASSIGN(priority);
1447		ASSIGN(local_flags);
1448
1449		ASSIGN(qlimit);
1450		ASSIGN(flags);
1451		COPY(pq_u);
1452
1453		ASSIGN(qid);
1454		break;
1455	}
1456	default:
1457		panic("%s: unhandled struct pfioc_altq version", __func__);
1458		break;
1459	}
1460
1461#undef ASSIGN
1462#undef COPY
1463
1464	return (0);
1465}
1466
1467static struct pf_altq *
1468pf_altq_get_nth_active(u_int32_t n)
1469{
1470	struct pf_altq		*altq;
1471	u_int32_t		 nr;
1472
1473	nr = 0;
1474	TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
1475		if (nr == n)
1476			return (altq);
1477		nr++;
1478	}
1479
1480	TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
1481		if (nr == n)
1482			return (altq);
1483		nr++;
1484	}
1485
1486	return (NULL);
1487}
1488#endif /* ALTQ */
1489
1490void
1491pf_krule_free(struct pf_krule *rule)
1492{
1493	if (rule == NULL)
1494		return;
1495
1496	counter_u64_free(rule->evaluations);
1497	for (int i = 0; i < 2; i++) {
1498		counter_u64_free(rule->packets[i]);
1499		counter_u64_free(rule->bytes[i]);
1500	}
1501	counter_u64_free(rule->states_cur);
1502	counter_u64_free(rule->states_tot);
1503	counter_u64_free(rule->src_nodes);
1504	free(rule, M_PFRULE);
1505}
1506
1507static void
1508pf_kpooladdr_to_pooladdr(const struct pf_kpooladdr *kpool,
1509    struct pf_pooladdr *pool)
1510{
1511
1512	bzero(pool, sizeof(*pool));
1513	bcopy(&kpool->addr, &pool->addr, sizeof(pool->addr));
1514	strlcpy(pool->ifname, kpool->ifname, sizeof(pool->ifname));
1515}
1516
1517static void
1518pf_pooladdr_to_kpooladdr(const struct pf_pooladdr *pool,
1519    struct pf_kpooladdr *kpool)
1520{
1521
1522	bzero(kpool, sizeof(*kpool));
1523	bcopy(&pool->addr, &kpool->addr, sizeof(kpool->addr));
1524	strlcpy(kpool->ifname, pool->ifname, sizeof(kpool->ifname));
1525}
1526
1527static void
1528pf_kpool_to_pool(const struct pf_kpool *kpool, struct pf_pool *pool)
1529{
1530	bzero(pool, sizeof(*pool));
1531
1532	bcopy(&kpool->key, &pool->key, sizeof(pool->key));
1533	bcopy(&kpool->counter, &pool->counter, sizeof(pool->counter));
1534
1535	pool->tblidx = kpool->tblidx;
1536	pool->proxy_port[0] = kpool->proxy_port[0];
1537	pool->proxy_port[1] = kpool->proxy_port[1];
1538	pool->opts = kpool->opts;
1539}
1540
1541static int
1542pf_pool_to_kpool(const struct pf_pool *pool, struct pf_kpool *kpool)
1543{
1544	_Static_assert(sizeof(pool->key) == sizeof(kpool->key), "");
1545	_Static_assert(sizeof(pool->counter) == sizeof(kpool->counter), "");
1546
1547	bzero(kpool, sizeof(*kpool));
1548
1549	bcopy(&pool->key, &kpool->key, sizeof(kpool->key));
1550	bcopy(&pool->counter, &kpool->counter, sizeof(kpool->counter));
1551
1552	kpool->tblidx = pool->tblidx;
1553	kpool->proxy_port[0] = pool->proxy_port[0];
1554	kpool->proxy_port[1] = pool->proxy_port[1];
1555	kpool->opts = pool->opts;
1556
1557	return (0);
1558}
1559
1560static void
1561pf_krule_to_rule(const struct pf_krule *krule, struct pf_rule *rule)
1562{
1563
1564	bzero(rule, sizeof(*rule));
1565
1566	bcopy(&krule->src, &rule->src, sizeof(rule->src));
1567	bcopy(&krule->dst, &rule->dst, sizeof(rule->dst));
1568
1569	for (int i = 0; i < PF_SKIP_COUNT; ++i) {
1570		if (rule->skip[i].ptr == NULL)
1571			rule->skip[i].nr = -1;
1572		else
1573			rule->skip[i].nr = krule->skip[i].ptr->nr;
1574	}
1575
1576	strlcpy(rule->label, krule->label[0], sizeof(rule->label));
1577	strlcpy(rule->ifname, krule->ifname, sizeof(rule->ifname));
1578	strlcpy(rule->qname, krule->qname, sizeof(rule->qname));
1579	strlcpy(rule->pqname, krule->pqname, sizeof(rule->pqname));
1580	strlcpy(rule->tagname, krule->tagname, sizeof(rule->tagname));
1581	strlcpy(rule->match_tagname, krule->match_tagname,
1582	    sizeof(rule->match_tagname));
1583	strlcpy(rule->overload_tblname, krule->overload_tblname,
1584	    sizeof(rule->overload_tblname));
1585
1586	pf_kpool_to_pool(&krule->rpool, &rule->rpool);
1587
1588	rule->evaluations = counter_u64_fetch(krule->evaluations);
1589	for (int i = 0; i < 2; i++) {
1590		rule->packets[i] = counter_u64_fetch(krule->packets[i]);
1591		rule->bytes[i] = counter_u64_fetch(krule->bytes[i]);
1592	}
1593
1594	/* kif, anchor, overload_tbl are not copied over. */
1595
1596	rule->os_fingerprint = krule->os_fingerprint;
1597
1598	rule->rtableid = krule->rtableid;
1599	bcopy(krule->timeout, rule->timeout, sizeof(krule->timeout));
1600	rule->max_states = krule->max_states;
1601	rule->max_src_nodes = krule->max_src_nodes;
1602	rule->max_src_states = krule->max_src_states;
1603	rule->max_src_conn = krule->max_src_conn;
1604	rule->max_src_conn_rate.limit = krule->max_src_conn_rate.limit;
1605	rule->max_src_conn_rate.seconds = krule->max_src_conn_rate.seconds;
1606	rule->qid = krule->qid;
1607	rule->pqid = krule->pqid;
1608	rule->nr = krule->nr;
1609	rule->prob = krule->prob;
1610	rule->cuid = krule->cuid;
1611	rule->cpid = krule->cpid;
1612
1613	rule->return_icmp = krule->return_icmp;
1614	rule->return_icmp6 = krule->return_icmp6;
1615	rule->max_mss = krule->max_mss;
1616	rule->tag = krule->tag;
1617	rule->match_tag = krule->match_tag;
1618	rule->scrub_flags = krule->scrub_flags;
1619
1620	bcopy(&krule->uid, &rule->uid, sizeof(krule->uid));
1621	bcopy(&krule->gid, &rule->gid, sizeof(krule->gid));
1622
1623	rule->rule_flag = krule->rule_flag;
1624	rule->action = krule->action;
1625	rule->direction = krule->direction;
1626	rule->log = krule->log;
1627	rule->logif = krule->logif;
1628	rule->quick = krule->quick;
1629	rule->ifnot = krule->ifnot;
1630	rule->match_tag_not = krule->match_tag_not;
1631	rule->natpass = krule->natpass;
1632
1633	rule->keep_state = krule->keep_state;
1634	rule->af = krule->af;
1635	rule->proto = krule->proto;
1636	rule->type = krule->type;
1637	rule->code = krule->code;
1638	rule->flags = krule->flags;
1639	rule->flagset = krule->flagset;
1640	rule->min_ttl = krule->min_ttl;
1641	rule->allow_opts = krule->allow_opts;
1642	rule->rt = krule->rt;
1643	rule->return_ttl = krule->return_ttl;
1644	rule->tos = krule->tos;
1645	rule->set_tos = krule->set_tos;
1646	rule->anchor_relative = krule->anchor_relative;
1647	rule->anchor_wildcard = krule->anchor_wildcard;
1648
1649	rule->flush = krule->flush;
1650	rule->prio = krule->prio;
1651	rule->set_prio[0] = krule->set_prio[0];
1652	rule->set_prio[1] = krule->set_prio[1];
1653
1654	bcopy(&krule->divert, &rule->divert, sizeof(krule->divert));
1655
1656	rule->u_states_cur = counter_u64_fetch(krule->states_cur);
1657	rule->u_states_tot = counter_u64_fetch(krule->states_tot);
1658	rule->u_src_nodes = counter_u64_fetch(krule->src_nodes);
1659}
1660
1661static int
1662pf_rule_to_krule(const struct pf_rule *rule, struct pf_krule *krule)
1663{
1664	int ret;
1665
1666#ifndef INET
1667	if (rule->af == AF_INET) {
1668		return (EAFNOSUPPORT);
1669	}
1670#endif /* INET */
1671#ifndef INET6
1672	if (rule->af == AF_INET6) {
1673		return (EAFNOSUPPORT);
1674	}
1675#endif /* INET6 */
1676
1677	ret = pf_check_rule_addr(&rule->src);
1678	if (ret != 0)
1679		return (ret);
1680	ret = pf_check_rule_addr(&rule->dst);
1681	if (ret != 0)
1682		return (ret);
1683
1684	bzero(krule, sizeof(*krule));
1685
1686	bcopy(&rule->src, &krule->src, sizeof(rule->src));
1687	bcopy(&rule->dst, &krule->dst, sizeof(rule->dst));
1688
1689	strlcpy(krule->label[0], rule->label, sizeof(rule->label));
1690	strlcpy(krule->ifname, rule->ifname, sizeof(rule->ifname));
1691	strlcpy(krule->qname, rule->qname, sizeof(rule->qname));
1692	strlcpy(krule->pqname, rule->pqname, sizeof(rule->pqname));
1693	strlcpy(krule->tagname, rule->tagname, sizeof(rule->tagname));
1694	strlcpy(krule->match_tagname, rule->match_tagname,
1695	    sizeof(rule->match_tagname));
1696	strlcpy(krule->overload_tblname, rule->overload_tblname,
1697	    sizeof(rule->overload_tblname));
1698
1699	ret = pf_pool_to_kpool(&rule->rpool, &krule->rpool);
1700	if (ret != 0)
1701		return (ret);
1702
1703	/* Don't allow userspace to set evaulations, packets or bytes. */
1704	/* kif, anchor, overload_tbl are not copied over. */
1705
1706	krule->os_fingerprint = rule->os_fingerprint;
1707
1708	krule->rtableid = rule->rtableid;
1709	bcopy(rule->timeout, krule->timeout, sizeof(krule->timeout));
1710	krule->max_states = rule->max_states;
1711	krule->max_src_nodes = rule->max_src_nodes;
1712	krule->max_src_states = rule->max_src_states;
1713	krule->max_src_conn = rule->max_src_conn;
1714	krule->max_src_conn_rate.limit = rule->max_src_conn_rate.limit;
1715	krule->max_src_conn_rate.seconds = rule->max_src_conn_rate.seconds;
1716	krule->qid = rule->qid;
1717	krule->pqid = rule->pqid;
1718	krule->nr = rule->nr;
1719	krule->prob = rule->prob;
1720	krule->cuid = rule->cuid;
1721	krule->cpid = rule->cpid;
1722
1723	krule->return_icmp = rule->return_icmp;
1724	krule->return_icmp6 = rule->return_icmp6;
1725	krule->max_mss = rule->max_mss;
1726	krule->tag = rule->tag;
1727	krule->match_tag = rule->match_tag;
1728	krule->scrub_flags = rule->scrub_flags;
1729
1730	bcopy(&rule->uid, &krule->uid, sizeof(krule->uid));
1731	bcopy(&rule->gid, &krule->gid, sizeof(krule->gid));
1732
1733	krule->rule_flag = rule->rule_flag;
1734	krule->action = rule->action;
1735	krule->direction = rule->direction;
1736	krule->log = rule->log;
1737	krule->logif = rule->logif;
1738	krule->quick = rule->quick;
1739	krule->ifnot = rule->ifnot;
1740	krule->match_tag_not = rule->match_tag_not;
1741	krule->natpass = rule->natpass;
1742
1743	krule->keep_state = rule->keep_state;
1744	krule->af = rule->af;
1745	krule->proto = rule->proto;
1746	krule->type = rule->type;
1747	krule->code = rule->code;
1748	krule->flags = rule->flags;
1749	krule->flagset = rule->flagset;
1750	krule->min_ttl = rule->min_ttl;
1751	krule->allow_opts = rule->allow_opts;
1752	krule->rt = rule->rt;
1753	krule->return_ttl = rule->return_ttl;
1754	krule->tos = rule->tos;
1755	krule->set_tos = rule->set_tos;
1756	krule->anchor_relative = rule->anchor_relative;
1757	krule->anchor_wildcard = rule->anchor_wildcard;
1758
1759	krule->flush = rule->flush;
1760	krule->prio = rule->prio;
1761	krule->set_prio[0] = rule->set_prio[0];
1762	krule->set_prio[1] = rule->set_prio[1];
1763
1764	bcopy(&rule->divert, &krule->divert, sizeof(krule->divert));
1765
1766	return (0);
1767}
1768
1769static bool
1770pf_label_match(const struct pf_krule *rule, const char *label)
1771{
1772	int i = 0;
1773
1774	while (*rule->label[i]) {
1775		if (strcmp(rule->label[i], label) == 0)
1776			return (true);
1777		i++;
1778	}
1779
1780	return (false);
1781}
1782
1783static unsigned int
1784pf_kill_matching_state(struct pf_state_key_cmp *key, int dir)
1785{
1786	struct pf_state *match;
1787	int more = 0;
1788	unsigned int killed = 0;
1789
1790	/* Call with unlocked hashrow */
1791
1792	match = pf_find_state_all(key, dir, &more);
1793	if (match && !more) {
1794		pf_unlink_state(match, 0);
1795		killed++;
1796	}
1797
1798	return (killed);
1799}
1800
1801static int
1802pf_killstates_row(struct pf_kstate_kill *psk, struct pf_idhash *ih)
1803{
1804	struct pf_state		*s;
1805	struct pf_state_key	*sk;
1806	struct pf_addr		*srcaddr, *dstaddr;
1807	struct pf_state_key_cmp	 match_key;
1808	int			 idx, killed = 0;
1809	unsigned int		 dir;
1810	u_int16_t		 srcport, dstport;
1811	struct pfi_kkif		*kif;
1812
1813relock_DIOCKILLSTATES:
1814	PF_HASHROW_LOCK(ih);
1815	LIST_FOREACH(s, &ih->states, entry) {
1816		/* For floating states look at the original kif. */
1817		kif = s->kif == V_pfi_all ? s->orig_kif : s->kif;
1818
1819		sk = s->key[PF_SK_WIRE];
1820		if (s->direction == PF_OUT) {
1821			srcaddr = &sk->addr[1];
1822			dstaddr = &sk->addr[0];
1823			srcport = sk->port[1];
1824			dstport = sk->port[0];
1825		} else {
1826			srcaddr = &sk->addr[0];
1827			dstaddr = &sk->addr[1];
1828			srcport = sk->port[0];
1829			dstport = sk->port[1];
1830		}
1831
1832		if (psk->psk_af && sk->af != psk->psk_af)
1833			continue;
1834
1835		if (psk->psk_proto && psk->psk_proto != sk->proto)
1836			continue;
1837
1838		if (! PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr,
1839		    &psk->psk_src.addr.v.a.mask, srcaddr, sk->af))
1840			continue;
1841
1842		if (! PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr,
1843		    &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af))
1844			continue;
1845
1846		if (!  PF_MATCHA(psk->psk_rt_addr.neg,
1847		    &psk->psk_rt_addr.addr.v.a.addr,
1848		    &psk->psk_rt_addr.addr.v.a.mask,
1849		    &s->rt_addr, sk->af))
1850			continue;
1851
1852		if (psk->psk_src.port_op != 0 &&
1853		    ! pf_match_port(psk->psk_src.port_op,
1854		    psk->psk_src.port[0], psk->psk_src.port[1], srcport))
1855			continue;
1856
1857		if (psk->psk_dst.port_op != 0 &&
1858		    ! pf_match_port(psk->psk_dst.port_op,
1859		    psk->psk_dst.port[0], psk->psk_dst.port[1], dstport))
1860			continue;
1861
1862		if (psk->psk_label[0] &&
1863		    ! pf_label_match(s->rule.ptr, psk->psk_label))
1864			continue;
1865
1866		if (psk->psk_ifname[0] && strcmp(psk->psk_ifname,
1867		    kif->pfik_name))
1868			continue;
1869
1870		if (psk->psk_kill_match) {
1871			/* Create the key to find matching states, with lock
1872			 * held. */
1873
1874			bzero(&match_key, sizeof(match_key));
1875
1876			if (s->direction == PF_OUT) {
1877				dir = PF_IN;
1878				idx = PF_SK_STACK;
1879			} else {
1880				dir = PF_OUT;
1881				idx = PF_SK_WIRE;
1882			}
1883
1884			match_key.af = s->key[idx]->af;
1885			match_key.proto = s->key[idx]->proto;
1886			PF_ACPY(&match_key.addr[0],
1887			    &s->key[idx]->addr[1], match_key.af);
1888			match_key.port[0] = s->key[idx]->port[1];
1889			PF_ACPY(&match_key.addr[1],
1890			    &s->key[idx]->addr[0], match_key.af);
1891			match_key.port[1] = s->key[idx]->port[0];
1892		}
1893
1894		pf_unlink_state(s, PF_ENTER_LOCKED);
1895		killed++;
1896
1897		if (psk->psk_kill_match)
1898			killed += pf_kill_matching_state(&match_key, dir);
1899
1900		goto relock_DIOCKILLSTATES;
1901	}
1902	PF_HASHROW_UNLOCK(ih);
1903
1904	return (killed);
1905}
1906
1907static int
1908pf_state_kill_to_kstate_kill(const struct pfioc_state_kill *psk,
1909    struct pf_kstate_kill *kill)
1910{
1911	bzero(kill, sizeof(*kill));
1912
1913	bcopy(&psk->psk_pfcmp, &kill->psk_pfcmp, sizeof(kill->psk_pfcmp));
1914	kill->psk_af = psk->psk_af;
1915	kill->psk_proto = psk->psk_proto;
1916	bcopy(&psk->psk_src, &kill->psk_src, sizeof(kill->psk_src));
1917	bcopy(&psk->psk_dst, &kill->psk_dst, sizeof(kill->psk_dst));
1918	strlcpy(kill->psk_ifname, psk->psk_ifname, sizeof(kill->psk_ifname));
1919	strlcpy(kill->psk_label, psk->psk_label, sizeof(kill->psk_label));
1920
1921	return (0);
1922}
1923
1924static int
1925pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
1926    uint32_t pool_ticket, const char *anchor, const char *anchor_call,
1927    struct thread *td)
1928{
1929	struct pf_kruleset	*ruleset;
1930	struct pf_krule		*tail;
1931	struct pf_kpooladdr	*pa;
1932	struct pfi_kkif		*kif = NULL;
1933	int			 rs_num;
1934	int			 error = 0;
1935
1936	if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) {
1937		error = EINVAL;
1938		goto errout_unlocked;
1939	}
1940
1941#define	ERROUT(x)	ERROUT_FUNCTION(errout, x)
1942
1943	if (rule->ifname[0])
1944		kif = pf_kkif_create(M_WAITOK);
1945	rule->evaluations = counter_u64_alloc(M_WAITOK);
1946	for (int i = 0; i < 2; i++) {
1947		rule->packets[i] = counter_u64_alloc(M_WAITOK);
1948		rule->bytes[i] = counter_u64_alloc(M_WAITOK);
1949	}
1950	rule->states_cur = counter_u64_alloc(M_WAITOK);
1951	rule->states_tot = counter_u64_alloc(M_WAITOK);
1952	rule->src_nodes = counter_u64_alloc(M_WAITOK);
1953	rule->cuid = td->td_ucred->cr_ruid;
1954	rule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
1955	TAILQ_INIT(&rule->rpool.list);
1956
1957	PF_RULES_WLOCK();
1958	ruleset = pf_find_kruleset(anchor);
1959	if (ruleset == NULL)
1960		ERROUT(EINVAL);
1961	rs_num = pf_get_ruleset_number(rule->action);
1962	if (rs_num >= PF_RULESET_MAX)
1963		ERROUT(EINVAL);
1964	if (ticket != ruleset->rules[rs_num].inactive.ticket) {
1965		DPFPRINTF(PF_DEBUG_MISC,
1966		    ("ticket: %d != [%d]%d\n", ticket, rs_num,
1967		    ruleset->rules[rs_num].inactive.ticket));
1968		ERROUT(EBUSY);
1969	}
1970	if (pool_ticket != V_ticket_pabuf) {
1971		DPFPRINTF(PF_DEBUG_MISC,
1972		    ("pool_ticket: %d != %d\n", pool_ticket,
1973		    V_ticket_pabuf));
1974		ERROUT(EBUSY);
1975	}
1976
1977	tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
1978	    pf_krulequeue);
1979	if (tail)
1980		rule->nr = tail->nr + 1;
1981	else
1982		rule->nr = 0;
1983	if (rule->ifname[0]) {
1984		rule->kif = pfi_kkif_attach(kif, rule->ifname);
1985		kif = NULL;
1986		pfi_kkif_ref(rule->kif);
1987	} else
1988		rule->kif = NULL;
1989
1990	if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs)
1991		error = EBUSY;
1992
1993#ifdef ALTQ
1994	/* set queue IDs */
1995	if (rule->qname[0] != 0) {
1996		if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
1997			error = EBUSY;
1998		else if (rule->pqname[0] != 0) {
1999			if ((rule->pqid =
2000			    pf_qname2qid(rule->pqname)) == 0)
2001				error = EBUSY;
2002		} else
2003			rule->pqid = rule->qid;
2004	}
2005#endif
2006	if (rule->tagname[0])
2007		if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
2008			error = EBUSY;
2009	if (rule->match_tagname[0])
2010		if ((rule->match_tag =
2011		    pf_tagname2tag(rule->match_tagname)) == 0)
2012			error = EBUSY;
2013	if (rule->rt && !rule->direction)
2014		error = EINVAL;
2015	if (!rule->log)
2016		rule->logif = 0;
2017	if (rule->logif >= PFLOGIFS_MAX)
2018		error = EINVAL;
2019	if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
2020		error = ENOMEM;
2021	if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
2022		error = ENOMEM;
2023	if (pf_kanchor_setup(rule, ruleset, anchor_call))
2024		error = EINVAL;
2025	if (rule->scrub_flags & PFSTATE_SETPRIO &&
2026	    (rule->set_prio[0] > PF_PRIO_MAX ||
2027	    rule->set_prio[1] > PF_PRIO_MAX))
2028		error = EINVAL;
2029	TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
2030		if (pa->addr.type == PF_ADDR_TABLE) {
2031			pa->addr.p.tbl = pfr_attach_table(ruleset,
2032			    pa->addr.v.tblname);
2033			if (pa->addr.p.tbl == NULL)
2034				error = ENOMEM;
2035		}
2036
2037	rule->overload_tbl = NULL;
2038	if (rule->overload_tblname[0]) {
2039		if ((rule->overload_tbl = pfr_attach_table(ruleset,
2040		    rule->overload_tblname)) == NULL)
2041			error = EINVAL;
2042		else
2043			rule->overload_tbl->pfrkt_flags |=
2044			    PFR_TFLAG_ACTIVE;
2045	}
2046
2047	pf_mv_kpool(&V_pf_pabuf, &rule->rpool.list);
2048	if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
2049	    (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
2050	    (rule->rt > PF_NOPFROUTE)) &&
2051	    (TAILQ_FIRST(&rule->rpool.list) == NULL))
2052		error = EINVAL;
2053
2054	if (error) {
2055		pf_free_rule(rule);
2056		rule = NULL;
2057		ERROUT(error);
2058	}
2059
2060	rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
2061	counter_u64_zero(rule->evaluations);
2062	for (int i = 0; i < 2; i++) {
2063		counter_u64_zero(rule->packets[i]);
2064		counter_u64_zero(rule->bytes[i]);
2065	}
2066	TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
2067	    rule, entries);
2068	ruleset->rules[rs_num].inactive.rcount++;
2069	PF_RULES_WUNLOCK();
2070
2071	return (0);
2072
2073#undef ERROUT
2074errout:
2075	PF_RULES_WUNLOCK();
2076errout_unlocked:
2077	pf_kkif_free(kif);
2078	pf_krule_free(rule);
2079	return (error);
2080}
2081
2082static int
2083pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
2084{
2085	int			 error = 0;
2086	PF_RULES_RLOCK_TRACKER;
2087
2088#define	ERROUT_IOCTL(target, x)					\
2089    do {								\
2090	    error = (x);						\
2091	    SDT_PROBE3(pf, ioctl, ioctl, error, cmd, error, __LINE__);	\
2092	    goto target;						\
2093    } while (0)
2094
2095
2096	/* XXX keep in sync with switch() below */
2097	if (securelevel_gt(td->td_ucred, 2))
2098		switch (cmd) {
2099		case DIOCGETRULES:
2100		case DIOCGETRULE:
2101		case DIOCGETRULENV:
2102		case DIOCGETADDRS:
2103		case DIOCGETADDR:
2104		case DIOCGETSTATE:
2105		case DIOCGETSTATENV:
2106		case DIOCSETSTATUSIF:
2107		case DIOCGETSTATUS:
2108		case DIOCCLRSTATUS:
2109		case DIOCNATLOOK:
2110		case DIOCSETDEBUG:
2111		case DIOCGETSTATES:
2112		case DIOCGETSTATESNV:
2113		case DIOCGETTIMEOUT:
2114		case DIOCCLRRULECTRS:
2115		case DIOCGETLIMIT:
2116		case DIOCGETALTQSV0:
2117		case DIOCGETALTQSV1:
2118		case DIOCGETALTQV0:
2119		case DIOCGETALTQV1:
2120		case DIOCGETQSTATSV0:
2121		case DIOCGETQSTATSV1:
2122		case DIOCGETRULESETS:
2123		case DIOCGETRULESET:
2124		case DIOCRGETTABLES:
2125		case DIOCRGETTSTATS:
2126		case DIOCRCLRTSTATS:
2127		case DIOCRCLRADDRS:
2128		case DIOCRADDADDRS:
2129		case DIOCRDELADDRS:
2130		case DIOCRSETADDRS:
2131		case DIOCRGETADDRS:
2132		case DIOCRGETASTATS:
2133		case DIOCRCLRASTATS:
2134		case DIOCRTSTADDRS:
2135		case DIOCOSFPGET:
2136		case DIOCGETSRCNODES:
2137		case DIOCCLRSRCNODES:
2138		case DIOCIGETIFACES:
2139		case DIOCGIFSPEEDV0:
2140		case DIOCGIFSPEEDV1:
2141		case DIOCSETIFFLAG:
2142		case DIOCCLRIFFLAG:
2143			break;
2144		case DIOCRCLRTABLES:
2145		case DIOCRADDTABLES:
2146		case DIOCRDELTABLES:
2147		case DIOCRSETTFLAGS:
2148			if (((struct pfioc_table *)addr)->pfrio_flags &
2149			    PFR_FLAG_DUMMY)
2150				break; /* dummy operation ok */
2151			return (EPERM);
2152		default:
2153			return (EPERM);
2154		}
2155
2156	if (!(flags & FWRITE))
2157		switch (cmd) {
2158		case DIOCGETRULES:
2159		case DIOCGETADDRS:
2160		case DIOCGETADDR:
2161		case DIOCGETSTATE:
2162		case DIOCGETSTATENV:
2163		case DIOCGETSTATUS:
2164		case DIOCGETSTATES:
2165		case DIOCGETSTATESNV:
2166		case DIOCGETTIMEOUT:
2167		case DIOCGETLIMIT:
2168		case DIOCGETALTQSV0:
2169		case DIOCGETALTQSV1:
2170		case DIOCGETALTQV0:
2171		case DIOCGETALTQV1:
2172		case DIOCGETQSTATSV0:
2173		case DIOCGETQSTATSV1:
2174		case DIOCGETRULESETS:
2175		case DIOCGETRULESET:
2176		case DIOCNATLOOK:
2177		case DIOCRGETTABLES:
2178		case DIOCRGETTSTATS:
2179		case DIOCRGETADDRS:
2180		case DIOCRGETASTATS:
2181		case DIOCRTSTADDRS:
2182		case DIOCOSFPGET:
2183		case DIOCGETSRCNODES:
2184		case DIOCIGETIFACES:
2185		case DIOCGIFSPEEDV1:
2186		case DIOCGIFSPEEDV0:
2187		case DIOCGETRULENV:
2188			break;
2189		case DIOCRCLRTABLES:
2190		case DIOCRADDTABLES:
2191		case DIOCRDELTABLES:
2192		case DIOCRCLRTSTATS:
2193		case DIOCRCLRADDRS:
2194		case DIOCRADDADDRS:
2195		case DIOCRDELADDRS:
2196		case DIOCRSETADDRS:
2197		case DIOCRSETTFLAGS:
2198			if (((struct pfioc_table *)addr)->pfrio_flags &
2199			    PFR_FLAG_DUMMY) {
2200				flags |= FWRITE; /* need write lock for dummy */
2201				break; /* dummy operation ok */
2202			}
2203			return (EACCES);
2204		case DIOCGETRULE:
2205			if (((struct pfioc_rule *)addr)->action ==
2206			    PF_GET_CLR_CNTR)
2207				return (EACCES);
2208			break;
2209		default:
2210			return (EACCES);
2211		}
2212
2213	CURVNET_SET(TD_TO_VNET(td));
2214
2215	switch (cmd) {
2216	case DIOCSTART:
2217		sx_xlock(&pf_ioctl_lock);
2218		if (V_pf_status.running)
2219			error = EEXIST;
2220		else {
2221			int cpu;
2222
2223			hook_pf();
2224			V_pf_status.running = 1;
2225			V_pf_status.since = time_second;
2226
2227			CPU_FOREACH(cpu)
2228				V_pf_stateid[cpu] = time_second;
2229
2230			DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
2231		}
2232		break;
2233
2234	case DIOCSTOP:
2235		sx_xlock(&pf_ioctl_lock);
2236		if (!V_pf_status.running)
2237			error = ENOENT;
2238		else {
2239			V_pf_status.running = 0;
2240			dehook_pf();
2241			V_pf_status.since = time_second;
2242			DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
2243		}
2244		break;
2245
2246	case DIOCADDRULENV: {
2247		struct pfioc_nv	*nv = (struct pfioc_nv *)addr;
2248		nvlist_t	*nvl = NULL;
2249		void		*nvlpacked = NULL;
2250		struct pf_krule	*rule = NULL;
2251		const char	*anchor = "", *anchor_call = "";
2252		uint32_t	 ticket = 0, pool_ticket = 0;
2253
2254#define	ERROUT(x)	ERROUT_IOCTL(DIOCADDRULENV_error, x)
2255
2256		if (nv->len > pf_ioctl_maxcount)
2257			ERROUT(ENOMEM);
2258
2259		nvlpacked = malloc(nv->len, M_TEMP, M_WAITOK);
2260		error = copyin(nv->data, nvlpacked, nv->len);
2261		if (error)
2262			ERROUT(error);
2263
2264		nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2265		if (nvl == NULL)
2266			ERROUT(EBADMSG);
2267
2268		if (! nvlist_exists_number(nvl, "ticket"))
2269			ERROUT(EINVAL);
2270		ticket = nvlist_get_number(nvl, "ticket");
2271
2272		if (! nvlist_exists_number(nvl, "pool_ticket"))
2273			ERROUT(EINVAL);
2274		pool_ticket = nvlist_get_number(nvl, "pool_ticket");
2275
2276		if (! nvlist_exists_nvlist(nvl, "rule"))
2277			ERROUT(EINVAL);
2278
2279		rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK | M_ZERO);
2280		error = pf_nvrule_to_krule(nvlist_get_nvlist(nvl, "rule"),
2281		    rule);
2282		if (error)
2283			ERROUT(error);
2284
2285		if (nvlist_exists_string(nvl, "anchor"))
2286			anchor = nvlist_get_string(nvl, "anchor");
2287		if (nvlist_exists_string(nvl, "anchor_call"))
2288			anchor_call = nvlist_get_string(nvl, "anchor_call");
2289
2290		if ((error = nvlist_error(nvl)))
2291			ERROUT(error);
2292
2293		/* Frees rule on error */
2294		error = pf_ioctl_addrule(rule, ticket, pool_ticket, anchor,
2295		    anchor_call, td);
2296
2297		nvlist_destroy(nvl);
2298		free(nvlpacked, M_TEMP);
2299		break;
2300#undef ERROUT
2301DIOCADDRULENV_error:
2302		pf_krule_free(rule);
2303		nvlist_destroy(nvl);
2304		free(nvlpacked, M_TEMP);
2305
2306		break;
2307	}
2308	case DIOCADDRULE: {
2309		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
2310		struct pf_krule		*rule;
2311
2312		rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK);
2313		error = pf_rule_to_krule(&pr->rule, rule);
2314		if (error != 0) {
2315			free(rule, M_PFRULE);
2316			break;
2317		}
2318
2319		pr->anchor[sizeof(pr->anchor) - 1] = 0;
2320
2321		/* Frees rule on error */
2322		error = pf_ioctl_addrule(rule, pr->ticket, pr->pool_ticket,
2323		    pr->anchor, pr->anchor_call, td);
2324		break;
2325	}
2326
2327	case DIOCGETRULES: {
2328		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
2329		struct pf_kruleset	*ruleset;
2330		struct pf_krule		*tail;
2331		int			 rs_num;
2332
2333		PF_RULES_WLOCK();
2334		pr->anchor[sizeof(pr->anchor) - 1] = 0;
2335		ruleset = pf_find_kruleset(pr->anchor);
2336		if (ruleset == NULL) {
2337			PF_RULES_WUNLOCK();
2338			error = EINVAL;
2339			break;
2340		}
2341		rs_num = pf_get_ruleset_number(pr->rule.action);
2342		if (rs_num >= PF_RULESET_MAX) {
2343			PF_RULES_WUNLOCK();
2344			error = EINVAL;
2345			break;
2346		}
2347		tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
2348		    pf_krulequeue);
2349		if (tail)
2350			pr->nr = tail->nr + 1;
2351		else
2352			pr->nr = 0;
2353		pr->ticket = ruleset->rules[rs_num].active.ticket;
2354		PF_RULES_WUNLOCK();
2355		break;
2356	}
2357
2358	case DIOCGETRULE: {
2359		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
2360		struct pf_kruleset	*ruleset;
2361		struct pf_krule		*rule;
2362		int			 rs_num;
2363
2364		PF_RULES_WLOCK();
2365		pr->anchor[sizeof(pr->anchor) - 1] = 0;
2366		ruleset = pf_find_kruleset(pr->anchor);
2367		if (ruleset == NULL) {
2368			PF_RULES_WUNLOCK();
2369			error = EINVAL;
2370			break;
2371		}
2372		rs_num = pf_get_ruleset_number(pr->rule.action);
2373		if (rs_num >= PF_RULESET_MAX) {
2374			PF_RULES_WUNLOCK();
2375			error = EINVAL;
2376			break;
2377		}
2378		if (pr->ticket != ruleset->rules[rs_num].active.ticket) {
2379			PF_RULES_WUNLOCK();
2380			error = EBUSY;
2381			break;
2382		}
2383		rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
2384		while ((rule != NULL) && (rule->nr != pr->nr))
2385			rule = TAILQ_NEXT(rule, entries);
2386		if (rule == NULL) {
2387			PF_RULES_WUNLOCK();
2388			error = EBUSY;
2389			break;
2390		}
2391
2392		pf_krule_to_rule(rule, &pr->rule);
2393
2394		if (pf_kanchor_copyout(ruleset, rule, pr)) {
2395			PF_RULES_WUNLOCK();
2396			error = EBUSY;
2397			break;
2398		}
2399		pf_addr_copyout(&pr->rule.src.addr);
2400		pf_addr_copyout(&pr->rule.dst.addr);
2401
2402		if (pr->action == PF_GET_CLR_CNTR) {
2403			counter_u64_zero(rule->evaluations);
2404			for (int i = 0; i < 2; i++) {
2405				counter_u64_zero(rule->packets[i]);
2406				counter_u64_zero(rule->bytes[i]);
2407			}
2408			counter_u64_zero(rule->states_tot);
2409		}
2410		PF_RULES_WUNLOCK();
2411		break;
2412	}
2413
2414	case DIOCGETRULENV: {
2415		struct pfioc_nv		*nv = (struct pfioc_nv *)addr;
2416		nvlist_t		*nvrule = NULL;
2417		nvlist_t		*nvl = NULL;
2418		struct pf_kruleset	*ruleset;
2419		struct pf_krule		*rule;
2420		void			*nvlpacked = NULL;
2421		int			 rs_num, nr;
2422		bool			 clear_counter = false;
2423
2424#define	ERROUT(x)	ERROUT_IOCTL(DIOCGETRULENV_error, x)
2425
2426		if (nv->len > pf_ioctl_maxcount)
2427			ERROUT(ENOMEM);
2428
2429		/* Copy the request in */
2430		nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2431		if (nvlpacked == NULL)
2432			ERROUT(ENOMEM);
2433
2434		error = copyin(nv->data, nvlpacked, nv->len);
2435		if (error)
2436			ERROUT(error);
2437
2438		nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2439		if (nvl == NULL)
2440			ERROUT(EBADMSG);
2441
2442		if (! nvlist_exists_string(nvl, "anchor"))
2443			ERROUT(EBADMSG);
2444		if (! nvlist_exists_number(nvl, "ruleset"))
2445			ERROUT(EBADMSG);
2446		if (! nvlist_exists_number(nvl, "ticket"))
2447			ERROUT(EBADMSG);
2448		if (! nvlist_exists_number(nvl, "nr"))
2449			ERROUT(EBADMSG);
2450
2451		if (nvlist_exists_bool(nvl, "clear_counter"))
2452			clear_counter = nvlist_get_bool(nvl, "clear_counter");
2453
2454		if (clear_counter && !(flags & FWRITE))
2455			ERROUT(EACCES);
2456
2457		nr = nvlist_get_number(nvl, "nr");
2458
2459		PF_RULES_WLOCK();
2460		ruleset = pf_find_kruleset(nvlist_get_string(nvl, "anchor"));
2461		if (ruleset == NULL) {
2462			PF_RULES_WUNLOCK();
2463			ERROUT(ENOENT);
2464		}
2465
2466		rs_num = pf_get_ruleset_number(nvlist_get_number(nvl, "ruleset"));
2467		if (rs_num >= PF_RULESET_MAX) {
2468			PF_RULES_WUNLOCK();
2469			ERROUT(EINVAL);
2470		}
2471
2472		if (nvlist_get_number(nvl, "ticket") !=
2473		    ruleset->rules[rs_num].active.ticket) {
2474			PF_RULES_WUNLOCK();
2475			ERROUT(EBUSY);
2476		}
2477
2478		if ((error = nvlist_error(nvl))) {
2479			PF_RULES_WUNLOCK();
2480			ERROUT(error);
2481		}
2482
2483		rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
2484		while ((rule != NULL) && (rule->nr != nr))
2485			rule = TAILQ_NEXT(rule, entries);
2486		if (rule == NULL) {
2487			PF_RULES_WUNLOCK();
2488			ERROUT(EBUSY);
2489		}
2490
2491		nvrule = pf_krule_to_nvrule(rule);
2492
2493		nvlist_destroy(nvl);
2494		nvl = nvlist_create(0);
2495		if (nvl == NULL) {
2496			PF_RULES_WUNLOCK();
2497			ERROUT(ENOMEM);
2498		}
2499		nvlist_add_number(nvl, "nr", nr);
2500		nvlist_add_nvlist(nvl, "rule", nvrule);
2501		nvlist_destroy(nvrule);
2502		nvrule = NULL;
2503		if (pf_kanchor_nvcopyout(ruleset, rule, nvl)) {
2504			PF_RULES_WUNLOCK();
2505			ERROUT(EBUSY);
2506		}
2507
2508		free(nvlpacked, M_NVLIST);
2509		nvlpacked = nvlist_pack(nvl, &nv->len);
2510		if (nvlpacked == NULL) {
2511			PF_RULES_WUNLOCK();
2512			ERROUT(ENOMEM);
2513		}
2514
2515		if (nv->size == 0) {
2516			PF_RULES_WUNLOCK();
2517			ERROUT(0);
2518		}
2519		else if (nv->size < nv->len) {
2520			PF_RULES_WUNLOCK();
2521			ERROUT(ENOSPC);
2522		}
2523
2524		if (clear_counter) {
2525			counter_u64_zero(rule->evaluations);
2526			for (int i = 0; i < 2; i++) {
2527				counter_u64_zero(rule->packets[i]);
2528				counter_u64_zero(rule->bytes[i]);
2529			}
2530			counter_u64_zero(rule->states_tot);
2531		}
2532		PF_RULES_WUNLOCK();
2533
2534		error = copyout(nvlpacked, nv->data, nv->len);
2535
2536#undef ERROUT
2537DIOCGETRULENV_error:
2538		free(nvlpacked, M_NVLIST);
2539		nvlist_destroy(nvrule);
2540		nvlist_destroy(nvl);
2541
2542		break;
2543	}
2544
2545	case DIOCCHANGERULE: {
2546		struct pfioc_rule	*pcr = (struct pfioc_rule *)addr;
2547		struct pf_kruleset	*ruleset;
2548		struct pf_krule		*oldrule = NULL, *newrule = NULL;
2549		struct pfi_kkif		*kif = NULL;
2550		struct pf_kpooladdr	*pa;
2551		u_int32_t		 nr = 0;
2552		int			 rs_num;
2553
2554		if (pcr->action < PF_CHANGE_ADD_HEAD ||
2555		    pcr->action > PF_CHANGE_GET_TICKET) {
2556			error = EINVAL;
2557			break;
2558		}
2559		if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
2560			error = EINVAL;
2561			break;
2562		}
2563
2564		if (pcr->action != PF_CHANGE_REMOVE) {
2565			newrule = malloc(sizeof(*newrule), M_PFRULE, M_WAITOK);
2566			error = pf_rule_to_krule(&pcr->rule, newrule);
2567			if (error != 0) {
2568				free(newrule, M_PFRULE);
2569				break;
2570			}
2571
2572			if (newrule->ifname[0])
2573				kif = pf_kkif_create(M_WAITOK);
2574			newrule->evaluations = counter_u64_alloc(M_WAITOK);
2575			for (int i = 0; i < 2; i++) {
2576				newrule->packets[i] =
2577				    counter_u64_alloc(M_WAITOK);
2578				newrule->bytes[i] =
2579				    counter_u64_alloc(M_WAITOK);
2580			}
2581			newrule->states_cur = counter_u64_alloc(M_WAITOK);
2582			newrule->states_tot = counter_u64_alloc(M_WAITOK);
2583			newrule->src_nodes = counter_u64_alloc(M_WAITOK);
2584			newrule->cuid = td->td_ucred->cr_ruid;
2585			newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
2586			TAILQ_INIT(&newrule->rpool.list);
2587		}
2588#define	ERROUT(x)	{ error = (x); goto DIOCCHANGERULE_error; }
2589
2590		PF_RULES_WLOCK();
2591		if (!(pcr->action == PF_CHANGE_REMOVE ||
2592		    pcr->action == PF_CHANGE_GET_TICKET) &&
2593		    pcr->pool_ticket != V_ticket_pabuf)
2594			ERROUT(EBUSY);
2595
2596		ruleset = pf_find_kruleset(pcr->anchor);
2597		if (ruleset == NULL)
2598			ERROUT(EINVAL);
2599
2600		rs_num = pf_get_ruleset_number(pcr->rule.action);
2601		if (rs_num >= PF_RULESET_MAX)
2602			ERROUT(EINVAL);
2603
2604		if (pcr->action == PF_CHANGE_GET_TICKET) {
2605			pcr->ticket = ++ruleset->rules[rs_num].active.ticket;
2606			ERROUT(0);
2607		} else if (pcr->ticket !=
2608			    ruleset->rules[rs_num].active.ticket)
2609				ERROUT(EINVAL);
2610
2611		if (pcr->action != PF_CHANGE_REMOVE) {
2612			if (newrule->ifname[0]) {
2613				newrule->kif = pfi_kkif_attach(kif,
2614				    newrule->ifname);
2615				kif = NULL;
2616				pfi_kkif_ref(newrule->kif);
2617			} else
2618				newrule->kif = NULL;
2619
2620			if (newrule->rtableid > 0 &&
2621			    newrule->rtableid >= rt_numfibs)
2622				error = EBUSY;
2623
2624#ifdef ALTQ
2625			/* set queue IDs */
2626			if (newrule->qname[0] != 0) {
2627				if ((newrule->qid =
2628				    pf_qname2qid(newrule->qname)) == 0)
2629					error = EBUSY;
2630				else if (newrule->pqname[0] != 0) {
2631					if ((newrule->pqid =
2632					    pf_qname2qid(newrule->pqname)) == 0)
2633						error = EBUSY;
2634				} else
2635					newrule->pqid = newrule->qid;
2636			}
2637#endif /* ALTQ */
2638			if (newrule->tagname[0])
2639				if ((newrule->tag =
2640				    pf_tagname2tag(newrule->tagname)) == 0)
2641					error = EBUSY;
2642			if (newrule->match_tagname[0])
2643				if ((newrule->match_tag = pf_tagname2tag(
2644				    newrule->match_tagname)) == 0)
2645					error = EBUSY;
2646			if (newrule->rt && !newrule->direction)
2647				error = EINVAL;
2648			if (!newrule->log)
2649				newrule->logif = 0;
2650			if (newrule->logif >= PFLOGIFS_MAX)
2651				error = EINVAL;
2652			if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af))
2653				error = ENOMEM;
2654			if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af))
2655				error = ENOMEM;
2656			if (pf_kanchor_setup(newrule, ruleset, pcr->anchor_call))
2657				error = EINVAL;
2658			TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
2659				if (pa->addr.type == PF_ADDR_TABLE) {
2660					pa->addr.p.tbl =
2661					    pfr_attach_table(ruleset,
2662					    pa->addr.v.tblname);
2663					if (pa->addr.p.tbl == NULL)
2664						error = ENOMEM;
2665				}
2666
2667			newrule->overload_tbl = NULL;
2668			if (newrule->overload_tblname[0]) {
2669				if ((newrule->overload_tbl = pfr_attach_table(
2670				    ruleset, newrule->overload_tblname)) ==
2671				    NULL)
2672					error = EINVAL;
2673				else
2674					newrule->overload_tbl->pfrkt_flags |=
2675					    PFR_TFLAG_ACTIVE;
2676			}
2677
2678			pf_mv_kpool(&V_pf_pabuf, &newrule->rpool.list);
2679			if (((((newrule->action == PF_NAT) ||
2680			    (newrule->action == PF_RDR) ||
2681			    (newrule->action == PF_BINAT) ||
2682			    (newrule->rt > PF_NOPFROUTE)) &&
2683			    !newrule->anchor)) &&
2684			    (TAILQ_FIRST(&newrule->rpool.list) == NULL))
2685				error = EINVAL;
2686
2687			if (error) {
2688				pf_free_rule(newrule);
2689				PF_RULES_WUNLOCK();
2690				break;
2691			}
2692
2693			newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list);
2694		}
2695		pf_empty_kpool(&V_pf_pabuf);
2696
2697		if (pcr->action == PF_CHANGE_ADD_HEAD)
2698			oldrule = TAILQ_FIRST(
2699			    ruleset->rules[rs_num].active.ptr);
2700		else if (pcr->action == PF_CHANGE_ADD_TAIL)
2701			oldrule = TAILQ_LAST(
2702			    ruleset->rules[rs_num].active.ptr, pf_krulequeue);
2703		else {
2704			oldrule = TAILQ_FIRST(
2705			    ruleset->rules[rs_num].active.ptr);
2706			while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
2707				oldrule = TAILQ_NEXT(oldrule, entries);
2708			if (oldrule == NULL) {
2709				if (newrule != NULL)
2710					pf_free_rule(newrule);
2711				PF_RULES_WUNLOCK();
2712				error = EINVAL;
2713				break;
2714			}
2715		}
2716
2717		if (pcr->action == PF_CHANGE_REMOVE) {
2718			pf_unlink_rule(ruleset->rules[rs_num].active.ptr,
2719			    oldrule);
2720			ruleset->rules[rs_num].active.rcount--;
2721		} else {
2722			if (oldrule == NULL)
2723				TAILQ_INSERT_TAIL(
2724				    ruleset->rules[rs_num].active.ptr,
2725				    newrule, entries);
2726			else if (pcr->action == PF_CHANGE_ADD_HEAD ||
2727			    pcr->action == PF_CHANGE_ADD_BEFORE)
2728				TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
2729			else
2730				TAILQ_INSERT_AFTER(
2731				    ruleset->rules[rs_num].active.ptr,
2732				    oldrule, newrule, entries);
2733			ruleset->rules[rs_num].active.rcount++;
2734		}
2735
2736		nr = 0;
2737		TAILQ_FOREACH(oldrule,
2738		    ruleset->rules[rs_num].active.ptr, entries)
2739			oldrule->nr = nr++;
2740
2741		ruleset->rules[rs_num].active.ticket++;
2742
2743		pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr);
2744		pf_remove_if_empty_kruleset(ruleset);
2745
2746		PF_RULES_WUNLOCK();
2747		break;
2748
2749#undef ERROUT
2750DIOCCHANGERULE_error:
2751		PF_RULES_WUNLOCK();
2752		pf_krule_free(newrule);
2753		pf_kkif_free(kif);
2754		break;
2755	}
2756
2757	case DIOCCLRSTATES: {
2758		struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
2759		struct pf_kstate_kill	 kill;
2760
2761		error = pf_state_kill_to_kstate_kill(psk, &kill);
2762		if (error)
2763			break;
2764
2765		psk->psk_killed = pf_clear_states(&kill);
2766		break;
2767	}
2768
2769	case DIOCCLRSTATESNV: {
2770		error = pf_clearstates_nv((struct pfioc_nv *)addr);
2771		break;
2772	}
2773
2774	case DIOCKILLSTATES: {
2775		struct pfioc_state_kill	*psk = (struct pfioc_state_kill *)addr;
2776		struct pf_kstate_kill	 kill;
2777
2778		error = pf_state_kill_to_kstate_kill(psk, &kill);
2779		if (error)
2780			break;
2781
2782		psk->psk_killed = 0;
2783		error = pf_killstates(&kill, &psk->psk_killed);
2784		break;
2785	}
2786
2787	case DIOCKILLSTATESNV: {
2788		error = pf_killstates_nv((struct pfioc_nv *)addr);
2789		break;
2790	}
2791
2792	case DIOCADDSTATE: {
2793		struct pfioc_state	*ps = (struct pfioc_state *)addr;
2794		struct pfsync_state	*sp = &ps->state;
2795
2796		if (sp->timeout >= PFTM_MAX) {
2797			error = EINVAL;
2798			break;
2799		}
2800		if (V_pfsync_state_import_ptr != NULL) {
2801			PF_RULES_RLOCK();
2802			error = V_pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL);
2803			PF_RULES_RUNLOCK();
2804		} else
2805			error = EOPNOTSUPP;
2806		break;
2807	}
2808
2809	case DIOCGETSTATE: {
2810		struct pfioc_state	*ps = (struct pfioc_state *)addr;
2811		struct pf_state		*s;
2812
2813		s = pf_find_state_byid(ps->state.id, ps->state.creatorid);
2814		if (s == NULL) {
2815			error = ENOENT;
2816			break;
2817		}
2818
2819		pfsync_state_export(&ps->state, s);
2820		PF_STATE_UNLOCK(s);
2821		break;
2822	}
2823
2824	case DIOCGETSTATENV: {
2825		error = pf_getstate((struct pfioc_nv *)addr);
2826		break;
2827	}
2828
2829	case DIOCGETSTATES: {
2830		struct pfioc_states	*ps = (struct pfioc_states *)addr;
2831		struct pf_state		*s;
2832		struct pfsync_state	*pstore, *p;
2833		int i, nr;
2834
2835		if (ps->ps_len <= 0) {
2836			nr = uma_zone_get_cur(V_pf_state_z);
2837			ps->ps_len = sizeof(struct pfsync_state) * nr;
2838			break;
2839		}
2840
2841		p = pstore = malloc(ps->ps_len, M_TEMP, M_WAITOK | M_ZERO);
2842		nr = 0;
2843
2844		for (i = 0; i <= pf_hashmask; i++) {
2845			struct pf_idhash *ih = &V_pf_idhash[i];
2846
2847			PF_HASHROW_LOCK(ih);
2848			LIST_FOREACH(s, &ih->states, entry) {
2849				if (s->timeout == PFTM_UNLINKED)
2850					continue;
2851
2852				if ((nr+1) * sizeof(*p) > ps->ps_len) {
2853					PF_HASHROW_UNLOCK(ih);
2854					goto DIOCGETSTATES_full;
2855				}
2856				pfsync_state_export(p, s);
2857				p++;
2858				nr++;
2859			}
2860			PF_HASHROW_UNLOCK(ih);
2861		}
2862DIOCGETSTATES_full:
2863		error = copyout(pstore, ps->ps_states,
2864		    sizeof(struct pfsync_state) * nr);
2865		if (error) {
2866			free(pstore, M_TEMP);
2867			break;
2868		}
2869		ps->ps_len = sizeof(struct pfsync_state) * nr;
2870		free(pstore, M_TEMP);
2871
2872		break;
2873	}
2874
2875	case DIOCGETSTATESNV: {
2876		error = pf_getstates((struct pfioc_nv *)addr);
2877		break;
2878	}
2879
2880	case DIOCGETSTATUS: {
2881		struct pf_status *s = (struct pf_status *)addr;
2882
2883		PF_RULES_RLOCK();
2884		s->running = V_pf_status.running;
2885		s->since   = V_pf_status.since;
2886		s->debug   = V_pf_status.debug;
2887		s->hostid  = V_pf_status.hostid;
2888		s->states  = V_pf_status.states;
2889		s->src_nodes = V_pf_status.src_nodes;
2890
2891		for (int i = 0; i < PFRES_MAX; i++)
2892			s->counters[i] =
2893			    counter_u64_fetch(V_pf_status.counters[i]);
2894		for (int i = 0; i < LCNT_MAX; i++)
2895			s->lcounters[i] =
2896			    counter_u64_fetch(V_pf_status.lcounters[i]);
2897		for (int i = 0; i < FCNT_MAX; i++)
2898			s->fcounters[i] =
2899			    counter_u64_fetch(V_pf_status.fcounters[i]);
2900		for (int i = 0; i < SCNT_MAX; i++)
2901			s->scounters[i] =
2902			    counter_u64_fetch(V_pf_status.scounters[i]);
2903
2904		bcopy(V_pf_status.ifname, s->ifname, IFNAMSIZ);
2905		bcopy(V_pf_status.pf_chksum, s->pf_chksum,
2906		    PF_MD5_DIGEST_LENGTH);
2907
2908		pfi_update_status(s->ifname, s);
2909		PF_RULES_RUNLOCK();
2910		break;
2911	}
2912
2913	case DIOCSETSTATUSIF: {
2914		struct pfioc_if	*pi = (struct pfioc_if *)addr;
2915
2916		if (pi->ifname[0] == 0) {
2917			bzero(V_pf_status.ifname, IFNAMSIZ);
2918			break;
2919		}
2920		PF_RULES_WLOCK();
2921		strlcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ);
2922		PF_RULES_WUNLOCK();
2923		break;
2924	}
2925
2926	case DIOCCLRSTATUS: {
2927		PF_RULES_WLOCK();
2928		for (int i = 0; i < PFRES_MAX; i++)
2929			counter_u64_zero(V_pf_status.counters[i]);
2930		for (int i = 0; i < FCNT_MAX; i++)
2931			counter_u64_zero(V_pf_status.fcounters[i]);
2932		for (int i = 0; i < SCNT_MAX; i++)
2933			counter_u64_zero(V_pf_status.scounters[i]);
2934		for (int i = 0; i < LCNT_MAX; i++)
2935			counter_u64_zero(V_pf_status.lcounters[i]);
2936		V_pf_status.since = time_second;
2937		if (*V_pf_status.ifname)
2938			pfi_update_status(V_pf_status.ifname, NULL);
2939		PF_RULES_WUNLOCK();
2940		break;
2941	}
2942
2943	case DIOCNATLOOK: {
2944		struct pfioc_natlook	*pnl = (struct pfioc_natlook *)addr;
2945		struct pf_state_key	*sk;
2946		struct pf_state		*state;
2947		struct pf_state_key_cmp	 key;
2948		int			 m = 0, direction = pnl->direction;
2949		int			 sidx, didx;
2950
2951		/* NATLOOK src and dst are reversed, so reverse sidx/didx */
2952		sidx = (direction == PF_IN) ? 1 : 0;
2953		didx = (direction == PF_IN) ? 0 : 1;
2954
2955		if (!pnl->proto ||
2956		    PF_AZERO(&pnl->saddr, pnl->af) ||
2957		    PF_AZERO(&pnl->daddr, pnl->af) ||
2958		    ((pnl->proto == IPPROTO_TCP ||
2959		    pnl->proto == IPPROTO_UDP) &&
2960		    (!pnl->dport || !pnl->sport)))
2961			error = EINVAL;
2962		else {
2963			bzero(&key, sizeof(key));
2964			key.af = pnl->af;
2965			key.proto = pnl->proto;
2966			PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af);
2967			key.port[sidx] = pnl->sport;
2968			PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af);
2969			key.port[didx] = pnl->dport;
2970
2971			state = pf_find_state_all(&key, direction, &m);
2972
2973			if (m > 1)
2974				error = E2BIG;	/* more than one state */
2975			else if (state != NULL) {
2976				/* XXXGL: not locked read */
2977				sk = state->key[sidx];
2978				PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af);
2979				pnl->rsport = sk->port[sidx];
2980				PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af);
2981				pnl->rdport = sk->port[didx];
2982			} else
2983				error = ENOENT;
2984		}
2985		break;
2986	}
2987
2988	case DIOCSETTIMEOUT: {
2989		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
2990		int		 old;
2991
2992		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
2993		    pt->seconds < 0) {
2994			error = EINVAL;
2995			break;
2996		}
2997		PF_RULES_WLOCK();
2998		old = V_pf_default_rule.timeout[pt->timeout];
2999		if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
3000			pt->seconds = 1;
3001		V_pf_default_rule.timeout[pt->timeout] = pt->seconds;
3002		if (pt->timeout == PFTM_INTERVAL && pt->seconds < old)
3003			wakeup(pf_purge_thread);
3004		pt->seconds = old;
3005		PF_RULES_WUNLOCK();
3006		break;
3007	}
3008
3009	case DIOCGETTIMEOUT: {
3010		struct pfioc_tm	*pt = (struct pfioc_tm *)addr;
3011
3012		if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
3013			error = EINVAL;
3014			break;
3015		}
3016		PF_RULES_RLOCK();
3017		pt->seconds = V_pf_default_rule.timeout[pt->timeout];
3018		PF_RULES_RUNLOCK();
3019		break;
3020	}
3021
3022	case DIOCGETLIMIT: {
3023		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
3024
3025		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
3026			error = EINVAL;
3027			break;
3028		}
3029		PF_RULES_RLOCK();
3030		pl->limit = V_pf_limits[pl->index].limit;
3031		PF_RULES_RUNLOCK();
3032		break;
3033	}
3034
3035	case DIOCSETLIMIT: {
3036		struct pfioc_limit	*pl = (struct pfioc_limit *)addr;
3037		int			 old_limit;
3038
3039		PF_RULES_WLOCK();
3040		if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
3041		    V_pf_limits[pl->index].zone == NULL) {
3042			PF_RULES_WUNLOCK();
3043			error = EINVAL;
3044			break;
3045		}
3046		uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit);
3047		old_limit = V_pf_limits[pl->index].limit;
3048		V_pf_limits[pl->index].limit = pl->limit;
3049		pl->limit = old_limit;
3050		PF_RULES_WUNLOCK();
3051		break;
3052	}
3053
3054	case DIOCSETDEBUG: {
3055		u_int32_t	*level = (u_int32_t *)addr;
3056
3057		PF_RULES_WLOCK();
3058		V_pf_status.debug = *level;
3059		PF_RULES_WUNLOCK();
3060		break;
3061	}
3062
3063	case DIOCCLRRULECTRS: {
3064		/* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
3065		struct pf_kruleset	*ruleset = &pf_main_ruleset;
3066		struct pf_krule		*rule;
3067
3068		PF_RULES_WLOCK();
3069		TAILQ_FOREACH(rule,
3070		    ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
3071			counter_u64_zero(rule->evaluations);
3072			for (int i = 0; i < 2; i++) {
3073				counter_u64_zero(rule->packets[i]);
3074				counter_u64_zero(rule->bytes[i]);
3075			}
3076		}
3077		PF_RULES_WUNLOCK();
3078		break;
3079	}
3080
3081	case DIOCGIFSPEEDV0:
3082	case DIOCGIFSPEEDV1: {
3083		struct pf_ifspeed_v1	*psp = (struct pf_ifspeed_v1 *)addr;
3084		struct pf_ifspeed_v1	ps;
3085		struct ifnet		*ifp;
3086
3087		if (psp->ifname[0] != 0) {
3088			/* Can we completely trust user-land? */
3089			strlcpy(ps.ifname, psp->ifname, IFNAMSIZ);
3090			ifp = ifunit(ps.ifname);
3091			if (ifp != NULL) {
3092				psp->baudrate32 =
3093				    (u_int32_t)uqmin(ifp->if_baudrate, UINT_MAX);
3094				if (cmd == DIOCGIFSPEEDV1)
3095					psp->baudrate = ifp->if_baudrate;
3096			} else
3097				error = EINVAL;
3098		} else
3099			error = EINVAL;
3100		break;
3101	}
3102
3103#ifdef ALTQ
3104	case DIOCSTARTALTQ: {
3105		struct pf_altq		*altq;
3106
3107		PF_RULES_WLOCK();
3108		/* enable all altq interfaces on active list */
3109		TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
3110			if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
3111				error = pf_enable_altq(altq);
3112				if (error != 0)
3113					break;
3114			}
3115		}
3116		if (error == 0)
3117			V_pf_altq_running = 1;
3118		PF_RULES_WUNLOCK();
3119		DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
3120		break;
3121	}
3122
3123	case DIOCSTOPALTQ: {
3124		struct pf_altq		*altq;
3125
3126		PF_RULES_WLOCK();
3127		/* disable all altq interfaces on active list */
3128		TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
3129			if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
3130				error = pf_disable_altq(altq);
3131				if (error != 0)
3132					break;
3133			}
3134		}
3135		if (error == 0)
3136			V_pf_altq_running = 0;
3137		PF_RULES_WUNLOCK();
3138		DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
3139		break;
3140	}
3141
3142	case DIOCADDALTQV0:
3143	case DIOCADDALTQV1: {
3144		struct pfioc_altq_v1	*pa = (struct pfioc_altq_v1 *)addr;
3145		struct pf_altq		*altq, *a;
3146		struct ifnet		*ifp;
3147
3148		altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK | M_ZERO);
3149		error = pf_import_kaltq(pa, altq, IOCPARM_LEN(cmd));
3150		if (error)
3151			break;
3152		altq->local_flags = 0;
3153
3154		PF_RULES_WLOCK();
3155		if (pa->ticket != V_ticket_altqs_inactive) {
3156			PF_RULES_WUNLOCK();
3157			free(altq, M_PFALTQ);
3158			error = EBUSY;
3159			break;
3160		}
3161
3162		/*
3163		 * if this is for a queue, find the discipline and
3164		 * copy the necessary fields
3165		 */
3166		if (altq->qname[0] != 0) {
3167			if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
3168				PF_RULES_WUNLOCK();
3169				error = EBUSY;
3170				free(altq, M_PFALTQ);
3171				break;
3172			}
3173			altq->altq_disc = NULL;
3174			TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) {
3175				if (strncmp(a->ifname, altq->ifname,
3176				    IFNAMSIZ) == 0) {
3177					altq->altq_disc = a->altq_disc;
3178					break;
3179				}
3180			}
3181		}
3182
3183		if ((ifp = ifunit(altq->ifname)) == NULL)
3184			altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
3185		else
3186			error = altq_add(ifp, altq);
3187
3188		if (error) {
3189			PF_RULES_WUNLOCK();
3190			free(altq, M_PFALTQ);
3191			break;
3192		}
3193
3194		if (altq->qname[0] != 0)
3195			TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
3196		else
3197			TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, altq, entries);
3198		/* version error check done on import above */
3199		pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
3200		PF_RULES_WUNLOCK();
3201		break;
3202	}
3203
3204	case DIOCGETALTQSV0:
3205	case DIOCGETALTQSV1: {
3206		struct pfioc_altq_v1	*pa = (struct pfioc_altq_v1 *)addr;
3207		struct pf_altq		*altq;
3208
3209		PF_RULES_RLOCK();
3210		pa->nr = 0;
3211		TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries)
3212			pa->nr++;
3213		TAILQ_FOREACH(altq, V_pf_altqs_active, entries)
3214			pa->nr++;
3215		pa->ticket = V_ticket_altqs_active;
3216		PF_RULES_RUNLOCK();
3217		break;
3218	}
3219
3220	case DIOCGETALTQV0:
3221	case DIOCGETALTQV1: {
3222		struct pfioc_altq_v1	*pa = (struct pfioc_altq_v1 *)addr;
3223		struct pf_altq		*altq;
3224
3225		PF_RULES_RLOCK();
3226		if (pa->ticket != V_ticket_altqs_active) {
3227			PF_RULES_RUNLOCK();
3228			error = EBUSY;
3229			break;
3230		}
3231		altq = pf_altq_get_nth_active(pa->nr);
3232		if (altq == NULL) {
3233			PF_RULES_RUNLOCK();
3234			error = EBUSY;
3235			break;
3236		}
3237		pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
3238		PF_RULES_RUNLOCK();
3239		break;
3240	}
3241
3242	case DIOCCHANGEALTQV0:
3243	case DIOCCHANGEALTQV1:
3244		/* CHANGEALTQ not supported yet! */
3245		error = ENODEV;
3246		break;
3247
3248	case DIOCGETQSTATSV0:
3249	case DIOCGETQSTATSV1: {
3250		struct pfioc_qstats_v1	*pq = (struct pfioc_qstats_v1 *)addr;
3251		struct pf_altq		*altq;
3252		int			 nbytes;
3253		u_int32_t		 version;
3254
3255		PF_RULES_RLOCK();
3256		if (pq->ticket != V_ticket_altqs_active) {
3257			PF_RULES_RUNLOCK();
3258			error = EBUSY;
3259			break;
3260		}
3261		nbytes = pq->nbytes;
3262		altq = pf_altq_get_nth_active(pq->nr);
3263		if (altq == NULL) {
3264			PF_RULES_RUNLOCK();
3265			error = EBUSY;
3266			break;
3267		}
3268
3269		if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
3270			PF_RULES_RUNLOCK();
3271			error = ENXIO;
3272			break;
3273		}
3274		PF_RULES_RUNLOCK();
3275		if (cmd == DIOCGETQSTATSV0)
3276			version = 0;  /* DIOCGETQSTATSV0 means stats struct v0 */
3277		else
3278			version = pq->version;
3279		error = altq_getqstats(altq, pq->buf, &nbytes, version);
3280		if (error == 0) {
3281			pq->scheduler = altq->scheduler;
3282			pq->nbytes = nbytes;
3283		}
3284		break;
3285	}
3286#endif /* ALTQ */
3287
3288	case DIOCBEGINADDRS: {
3289		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
3290
3291		PF_RULES_WLOCK();
3292		pf_empty_kpool(&V_pf_pabuf);
3293		pp->ticket = ++V_ticket_pabuf;
3294		PF_RULES_WUNLOCK();
3295		break;
3296	}
3297
3298	case DIOCADDADDR: {
3299		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
3300		struct pf_kpooladdr	*pa;
3301		struct pfi_kkif		*kif = NULL;
3302
3303#ifndef INET
3304		if (pp->af == AF_INET) {
3305			error = EAFNOSUPPORT;
3306			break;
3307		}
3308#endif /* INET */
3309#ifndef INET6
3310		if (pp->af == AF_INET6) {
3311			error = EAFNOSUPPORT;
3312			break;
3313		}
3314#endif /* INET6 */
3315		if (pp->addr.addr.type != PF_ADDR_ADDRMASK &&
3316		    pp->addr.addr.type != PF_ADDR_DYNIFTL &&
3317		    pp->addr.addr.type != PF_ADDR_TABLE) {
3318			error = EINVAL;
3319			break;
3320		}
3321		if (pp->addr.addr.p.dyn != NULL) {
3322			error = EINVAL;
3323			break;
3324		}
3325		pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK);
3326		pf_pooladdr_to_kpooladdr(&pp->addr, pa);
3327		if (pa->ifname[0])
3328			kif = pf_kkif_create(M_WAITOK);
3329		PF_RULES_WLOCK();
3330		if (pp->ticket != V_ticket_pabuf) {
3331			PF_RULES_WUNLOCK();
3332			if (pa->ifname[0])
3333				pf_kkif_free(kif);
3334			free(pa, M_PFRULE);
3335			error = EBUSY;
3336			break;
3337		}
3338		if (pa->ifname[0]) {
3339			pa->kif = pfi_kkif_attach(kif, pa->ifname);
3340			kif = NULL;
3341			pfi_kkif_ref(pa->kif);
3342		} else
3343			pa->kif = NULL;
3344		if (pa->addr.type == PF_ADDR_DYNIFTL && ((error =
3345		    pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) {
3346			if (pa->ifname[0])
3347				pfi_kkif_unref(pa->kif);
3348			PF_RULES_WUNLOCK();
3349			free(pa, M_PFRULE);
3350			break;
3351		}
3352		TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries);
3353		PF_RULES_WUNLOCK();
3354		break;
3355	}
3356
3357	case DIOCGETADDRS: {
3358		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
3359		struct pf_kpool		*pool;
3360		struct pf_kpooladdr	*pa;
3361
3362		PF_RULES_RLOCK();
3363		pp->nr = 0;
3364		pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action,
3365		    pp->r_num, 0, 1, 0);
3366		if (pool == NULL) {
3367			PF_RULES_RUNLOCK();
3368			error = EBUSY;
3369			break;
3370		}
3371		TAILQ_FOREACH(pa, &pool->list, entries)
3372			pp->nr++;
3373		PF_RULES_RUNLOCK();
3374		break;
3375	}
3376
3377	case DIOCGETADDR: {
3378		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
3379		struct pf_kpool		*pool;
3380		struct pf_kpooladdr	*pa;
3381		u_int32_t		 nr = 0;
3382
3383		PF_RULES_RLOCK();
3384		pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action,
3385		    pp->r_num, 0, 1, 1);
3386		if (pool == NULL) {
3387			PF_RULES_RUNLOCK();
3388			error = EBUSY;
3389			break;
3390		}
3391		pa = TAILQ_FIRST(&pool->list);
3392		while ((pa != NULL) && (nr < pp->nr)) {
3393			pa = TAILQ_NEXT(pa, entries);
3394			nr++;
3395		}
3396		if (pa == NULL) {
3397			PF_RULES_RUNLOCK();
3398			error = EBUSY;
3399			break;
3400		}
3401		pf_kpooladdr_to_pooladdr(pa, &pp->addr);
3402		pf_addr_copyout(&pp->addr.addr);
3403		PF_RULES_RUNLOCK();
3404		break;
3405	}
3406
3407	case DIOCCHANGEADDR: {
3408		struct pfioc_pooladdr	*pca = (struct pfioc_pooladdr *)addr;
3409		struct pf_kpool		*pool;
3410		struct pf_kpooladdr	*oldpa = NULL, *newpa = NULL;
3411		struct pf_kruleset	*ruleset;
3412		struct pfi_kkif		*kif = NULL;
3413
3414		if (pca->action < PF_CHANGE_ADD_HEAD ||
3415		    pca->action > PF_CHANGE_REMOVE) {
3416			error = EINVAL;
3417			break;
3418		}
3419		if (pca->addr.addr.type != PF_ADDR_ADDRMASK &&
3420		    pca->addr.addr.type != PF_ADDR_DYNIFTL &&
3421		    pca->addr.addr.type != PF_ADDR_TABLE) {
3422			error = EINVAL;
3423			break;
3424		}
3425		if (pca->addr.addr.p.dyn != NULL) {
3426			error = EINVAL;
3427			break;
3428		}
3429
3430		if (pca->action != PF_CHANGE_REMOVE) {
3431#ifndef INET
3432			if (pca->af == AF_INET) {
3433				error = EAFNOSUPPORT;
3434				break;
3435			}
3436#endif /* INET */
3437#ifndef INET6
3438			if (pca->af == AF_INET6) {
3439				error = EAFNOSUPPORT;
3440				break;
3441			}
3442#endif /* INET6 */
3443			newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK);
3444			bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
3445			if (newpa->ifname[0])
3446				kif = pf_kkif_create(M_WAITOK);
3447			newpa->kif = NULL;
3448		}
3449#define	ERROUT(x)	ERROUT_IOCTL(DIOCCHANGEADDR_error, x)
3450		PF_RULES_WLOCK();
3451		ruleset = pf_find_kruleset(pca->anchor);
3452		if (ruleset == NULL)
3453			ERROUT(EBUSY);
3454
3455		pool = pf_get_kpool(pca->anchor, pca->ticket, pca->r_action,
3456		    pca->r_num, pca->r_last, 1, 1);
3457		if (pool == NULL)
3458			ERROUT(EBUSY);
3459
3460		if (pca->action != PF_CHANGE_REMOVE) {
3461			if (newpa->ifname[0]) {
3462				newpa->kif = pfi_kkif_attach(kif, newpa->ifname);
3463				pfi_kkif_ref(newpa->kif);
3464				kif = NULL;
3465			}
3466
3467			switch (newpa->addr.type) {
3468			case PF_ADDR_DYNIFTL:
3469				error = pfi_dynaddr_setup(&newpa->addr,
3470				    pca->af);
3471				break;
3472			case PF_ADDR_TABLE:
3473				newpa->addr.p.tbl = pfr_attach_table(ruleset,
3474				    newpa->addr.v.tblname);
3475				if (newpa->addr.p.tbl == NULL)
3476					error = ENOMEM;
3477				break;
3478			}
3479			if (error)
3480				goto DIOCCHANGEADDR_error;
3481		}
3482
3483		switch (pca->action) {
3484		case PF_CHANGE_ADD_HEAD:
3485			oldpa = TAILQ_FIRST(&pool->list);
3486			break;
3487		case PF_CHANGE_ADD_TAIL:
3488			oldpa = TAILQ_LAST(&pool->list, pf_kpalist);
3489			break;
3490		default:
3491			oldpa = TAILQ_FIRST(&pool->list);
3492			for (int i = 0; oldpa && i < pca->nr; i++)
3493				oldpa = TAILQ_NEXT(oldpa, entries);
3494
3495			if (oldpa == NULL)
3496				ERROUT(EINVAL);
3497		}
3498
3499		if (pca->action == PF_CHANGE_REMOVE) {
3500			TAILQ_REMOVE(&pool->list, oldpa, entries);
3501			switch (oldpa->addr.type) {
3502			case PF_ADDR_DYNIFTL:
3503				pfi_dynaddr_remove(oldpa->addr.p.dyn);
3504				break;
3505			case PF_ADDR_TABLE:
3506				pfr_detach_table(oldpa->addr.p.tbl);
3507				break;
3508			}
3509			if (oldpa->kif)
3510				pfi_kkif_unref(oldpa->kif);
3511			free(oldpa, M_PFRULE);
3512		} else {
3513			if (oldpa == NULL)
3514				TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
3515			else if (pca->action == PF_CHANGE_ADD_HEAD ||
3516			    pca->action == PF_CHANGE_ADD_BEFORE)
3517				TAILQ_INSERT_BEFORE(oldpa, newpa, entries);
3518			else
3519				TAILQ_INSERT_AFTER(&pool->list, oldpa,
3520				    newpa, entries);
3521		}
3522
3523		pool->cur = TAILQ_FIRST(&pool->list);
3524		PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af);
3525		PF_RULES_WUNLOCK();
3526		break;
3527
3528#undef ERROUT
3529DIOCCHANGEADDR_error:
3530		if (newpa != NULL) {
3531			if (newpa->kif)
3532				pfi_kkif_unref(newpa->kif);
3533			free(newpa, M_PFRULE);
3534		}
3535		PF_RULES_WUNLOCK();
3536		pf_kkif_free(kif);
3537		break;
3538	}
3539
3540	case DIOCGETRULESETS: {
3541		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
3542		struct pf_kruleset	*ruleset;
3543		struct pf_kanchor	*anchor;
3544
3545		PF_RULES_RLOCK();
3546		pr->path[sizeof(pr->path) - 1] = 0;
3547		if ((ruleset = pf_find_kruleset(pr->path)) == NULL) {
3548			PF_RULES_RUNLOCK();
3549			error = ENOENT;
3550			break;
3551		}
3552		pr->nr = 0;
3553		if (ruleset->anchor == NULL) {
3554			/* XXX kludge for pf_main_ruleset */
3555			RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
3556				if (anchor->parent == NULL)
3557					pr->nr++;
3558		} else {
3559			RB_FOREACH(anchor, pf_kanchor_node,
3560			    &ruleset->anchor->children)
3561				pr->nr++;
3562		}
3563		PF_RULES_RUNLOCK();
3564		break;
3565	}
3566
3567	case DIOCGETRULESET: {
3568		struct pfioc_ruleset	*pr = (struct pfioc_ruleset *)addr;
3569		struct pf_kruleset	*ruleset;
3570		struct pf_kanchor	*anchor;
3571		u_int32_t		 nr = 0;
3572
3573		PF_RULES_RLOCK();
3574		pr->path[sizeof(pr->path) - 1] = 0;
3575		if ((ruleset = pf_find_kruleset(pr->path)) == NULL) {
3576			PF_RULES_RUNLOCK();
3577			error = ENOENT;
3578			break;
3579		}
3580		pr->name[0] = 0;
3581		if (ruleset->anchor == NULL) {
3582			/* XXX kludge for pf_main_ruleset */
3583			RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
3584				if (anchor->parent == NULL && nr++ == pr->nr) {
3585					strlcpy(pr->name, anchor->name,
3586					    sizeof(pr->name));
3587					break;
3588				}
3589		} else {
3590			RB_FOREACH(anchor, pf_kanchor_node,
3591			    &ruleset->anchor->children)
3592				if (nr++ == pr->nr) {
3593					strlcpy(pr->name, anchor->name,
3594					    sizeof(pr->name));
3595					break;
3596				}
3597		}
3598		if (!pr->name[0])
3599			error = EBUSY;
3600		PF_RULES_RUNLOCK();
3601		break;
3602	}
3603
3604	case DIOCRCLRTABLES: {
3605		struct pfioc_table *io = (struct pfioc_table *)addr;
3606
3607		if (io->pfrio_esize != 0) {
3608			error = ENODEV;
3609			break;
3610		}
3611		PF_RULES_WLOCK();
3612		error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
3613		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
3614		PF_RULES_WUNLOCK();
3615		break;
3616	}
3617
3618	case DIOCRADDTABLES: {
3619		struct pfioc_table *io = (struct pfioc_table *)addr;
3620		struct pfr_table *pfrts;
3621		size_t totlen;
3622
3623		if (io->pfrio_esize != sizeof(struct pfr_table)) {
3624			error = ENODEV;
3625			break;
3626		}
3627
3628		if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
3629		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
3630			error = ENOMEM;
3631			break;
3632		}
3633
3634		totlen = io->pfrio_size * sizeof(struct pfr_table);
3635		pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
3636		    M_TEMP, M_WAITOK);
3637		error = copyin(io->pfrio_buffer, pfrts, totlen);
3638		if (error) {
3639			free(pfrts, M_TEMP);
3640			break;
3641		}
3642		PF_RULES_WLOCK();
3643		error = pfr_add_tables(pfrts, io->pfrio_size,
3644		    &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
3645		PF_RULES_WUNLOCK();
3646		free(pfrts, M_TEMP);
3647		break;
3648	}
3649
3650	case DIOCRDELTABLES: {
3651		struct pfioc_table *io = (struct pfioc_table *)addr;
3652		struct pfr_table *pfrts;
3653		size_t totlen;
3654
3655		if (io->pfrio_esize != sizeof(struct pfr_table)) {
3656			error = ENODEV;
3657			break;
3658		}
3659
3660		if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
3661		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
3662			error = ENOMEM;
3663			break;
3664		}
3665
3666		totlen = io->pfrio_size * sizeof(struct pfr_table);
3667		pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
3668		    M_TEMP, M_WAITOK);
3669		error = copyin(io->pfrio_buffer, pfrts, totlen);
3670		if (error) {
3671			free(pfrts, M_TEMP);
3672			break;
3673		}
3674		PF_RULES_WLOCK();
3675		error = pfr_del_tables(pfrts, io->pfrio_size,
3676		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
3677		PF_RULES_WUNLOCK();
3678		free(pfrts, M_TEMP);
3679		break;
3680	}
3681
3682	case DIOCRGETTABLES: {
3683		struct pfioc_table *io = (struct pfioc_table *)addr;
3684		struct pfr_table *pfrts;
3685		size_t totlen;
3686		int n;
3687
3688		if (io->pfrio_esize != sizeof(struct pfr_table)) {
3689			error = ENODEV;
3690			break;
3691		}
3692		PF_RULES_RLOCK();
3693		n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
3694		if (n < 0) {
3695			PF_RULES_RUNLOCK();
3696			error = EINVAL;
3697			break;
3698		}
3699		io->pfrio_size = min(io->pfrio_size, n);
3700
3701		totlen = io->pfrio_size * sizeof(struct pfr_table);
3702
3703		pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
3704		    M_TEMP, M_NOWAIT);
3705		if (pfrts == NULL) {
3706			error = ENOMEM;
3707			PF_RULES_RUNLOCK();
3708			break;
3709		}
3710		error = pfr_get_tables(&io->pfrio_table, pfrts,
3711		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
3712		PF_RULES_RUNLOCK();
3713		if (error == 0)
3714			error = copyout(pfrts, io->pfrio_buffer, totlen);
3715		free(pfrts, M_TEMP);
3716		break;
3717	}
3718
3719	case DIOCRGETTSTATS: {
3720		struct pfioc_table *io = (struct pfioc_table *)addr;
3721		struct pfr_tstats *pfrtstats;
3722		size_t totlen;
3723		int n;
3724
3725		if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
3726			error = ENODEV;
3727			break;
3728		}
3729		PF_RULES_WLOCK();
3730		n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
3731		if (n < 0) {
3732			PF_RULES_WUNLOCK();
3733			error = EINVAL;
3734			break;
3735		}
3736		io->pfrio_size = min(io->pfrio_size, n);
3737
3738		totlen = io->pfrio_size * sizeof(struct pfr_tstats);
3739		pfrtstats = mallocarray(io->pfrio_size,
3740		    sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT);
3741		if (pfrtstats == NULL) {
3742			error = ENOMEM;
3743			PF_RULES_WUNLOCK();
3744			break;
3745		}
3746		error = pfr_get_tstats(&io->pfrio_table, pfrtstats,
3747		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
3748		PF_RULES_WUNLOCK();
3749		if (error == 0)
3750			error = copyout(pfrtstats, io->pfrio_buffer, totlen);
3751		free(pfrtstats, M_TEMP);
3752		break;
3753	}
3754
3755	case DIOCRCLRTSTATS: {
3756		struct pfioc_table *io = (struct pfioc_table *)addr;
3757		struct pfr_table *pfrts;
3758		size_t totlen;
3759
3760		if (io->pfrio_esize != sizeof(struct pfr_table)) {
3761			error = ENODEV;
3762			break;
3763		}
3764
3765		if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
3766		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
3767			/* We used to count tables and use the minimum required
3768			 * size, so we didn't fail on overly large requests.
3769			 * Keep doing so. */
3770			io->pfrio_size = pf_ioctl_maxcount;
3771			break;
3772		}
3773
3774		totlen = io->pfrio_size * sizeof(struct pfr_table);
3775		pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
3776		    M_TEMP, M_WAITOK);
3777		error = copyin(io->pfrio_buffer, pfrts, totlen);
3778		if (error) {
3779			free(pfrts, M_TEMP);
3780			break;
3781		}
3782
3783		PF_RULES_WLOCK();
3784		error = pfr_clr_tstats(pfrts, io->pfrio_size,
3785		    &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
3786		PF_RULES_WUNLOCK();
3787		free(pfrts, M_TEMP);
3788		break;
3789	}
3790
3791	case DIOCRSETTFLAGS: {
3792		struct pfioc_table *io = (struct pfioc_table *)addr;
3793		struct pfr_table *pfrts;
3794		size_t totlen;
3795		int n;
3796
3797		if (io->pfrio_esize != sizeof(struct pfr_table)) {
3798			error = ENODEV;
3799			break;
3800		}
3801
3802		PF_RULES_RLOCK();
3803		n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
3804		if (n < 0) {
3805			PF_RULES_RUNLOCK();
3806			error = EINVAL;
3807			break;
3808		}
3809
3810		io->pfrio_size = min(io->pfrio_size, n);
3811		PF_RULES_RUNLOCK();
3812
3813		totlen = io->pfrio_size * sizeof(struct pfr_table);
3814		pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
3815		    M_TEMP, M_WAITOK);
3816		error = copyin(io->pfrio_buffer, pfrts, totlen);
3817		if (error) {
3818			free(pfrts, M_TEMP);
3819			break;
3820		}
3821		PF_RULES_WLOCK();
3822		error = pfr_set_tflags(pfrts, io->pfrio_size,
3823		    io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
3824		    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
3825		PF_RULES_WUNLOCK();
3826		free(pfrts, M_TEMP);
3827		break;
3828	}
3829
3830	case DIOCRCLRADDRS: {
3831		struct pfioc_table *io = (struct pfioc_table *)addr;
3832
3833		if (io->pfrio_esize != 0) {
3834			error = ENODEV;
3835			break;
3836		}
3837		PF_RULES_WLOCK();
3838		error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
3839		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
3840		PF_RULES_WUNLOCK();
3841		break;
3842	}
3843
3844	case DIOCRADDADDRS: {
3845		struct pfioc_table *io = (struct pfioc_table *)addr;
3846		struct pfr_addr *pfras;
3847		size_t totlen;
3848
3849		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
3850			error = ENODEV;
3851			break;
3852		}
3853		if (io->pfrio_size < 0 ||
3854		    io->pfrio_size > pf_ioctl_maxcount ||
3855		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
3856			error = EINVAL;
3857			break;
3858		}
3859		totlen = io->pfrio_size * sizeof(struct pfr_addr);
3860		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
3861		    M_TEMP, M_WAITOK);
3862		error = copyin(io->pfrio_buffer, pfras, totlen);
3863		if (error) {
3864			free(pfras, M_TEMP);
3865			break;
3866		}
3867		PF_RULES_WLOCK();
3868		error = pfr_add_addrs(&io->pfrio_table, pfras,
3869		    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
3870		    PFR_FLAG_USERIOCTL);
3871		PF_RULES_WUNLOCK();
3872		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
3873			error = copyout(pfras, io->pfrio_buffer, totlen);
3874		free(pfras, M_TEMP);
3875		break;
3876	}
3877
3878	case DIOCRDELADDRS: {
3879		struct pfioc_table *io = (struct pfioc_table *)addr;
3880		struct pfr_addr *pfras;
3881		size_t totlen;
3882
3883		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
3884			error = ENODEV;
3885			break;
3886		}
3887		if (io->pfrio_size < 0 ||
3888		    io->pfrio_size > pf_ioctl_maxcount ||
3889		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
3890			error = EINVAL;
3891			break;
3892		}
3893		totlen = io->pfrio_size * sizeof(struct pfr_addr);
3894		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
3895		    M_TEMP, M_WAITOK);
3896		error = copyin(io->pfrio_buffer, pfras, totlen);
3897		if (error) {
3898			free(pfras, M_TEMP);
3899			break;
3900		}
3901		PF_RULES_WLOCK();
3902		error = pfr_del_addrs(&io->pfrio_table, pfras,
3903		    io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
3904		    PFR_FLAG_USERIOCTL);
3905		PF_RULES_WUNLOCK();
3906		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
3907			error = copyout(pfras, io->pfrio_buffer, totlen);
3908		free(pfras, M_TEMP);
3909		break;
3910	}
3911
3912	case DIOCRSETADDRS: {
3913		struct pfioc_table *io = (struct pfioc_table *)addr;
3914		struct pfr_addr *pfras;
3915		size_t totlen, count;
3916
3917		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
3918			error = ENODEV;
3919			break;
3920		}
3921		if (io->pfrio_size < 0 || io->pfrio_size2 < 0) {
3922			error = EINVAL;
3923			break;
3924		}
3925		count = max(io->pfrio_size, io->pfrio_size2);
3926		if (count > pf_ioctl_maxcount ||
3927		    WOULD_OVERFLOW(count, sizeof(struct pfr_addr))) {
3928			error = EINVAL;
3929			break;
3930		}
3931		totlen = count * sizeof(struct pfr_addr);
3932		pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP,
3933		    M_WAITOK);
3934		error = copyin(io->pfrio_buffer, pfras, totlen);
3935		if (error) {
3936			free(pfras, M_TEMP);
3937			break;
3938		}
3939		PF_RULES_WLOCK();
3940		error = pfr_set_addrs(&io->pfrio_table, pfras,
3941		    io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
3942		    &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
3943		    PFR_FLAG_USERIOCTL, 0);
3944		PF_RULES_WUNLOCK();
3945		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
3946			error = copyout(pfras, io->pfrio_buffer, totlen);
3947		free(pfras, M_TEMP);
3948		break;
3949	}
3950
3951	case DIOCRGETADDRS: {
3952		struct pfioc_table *io = (struct pfioc_table *)addr;
3953		struct pfr_addr *pfras;
3954		size_t totlen;
3955
3956		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
3957			error = ENODEV;
3958			break;
3959		}
3960		if (io->pfrio_size < 0 ||
3961		    io->pfrio_size > pf_ioctl_maxcount ||
3962		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
3963			error = EINVAL;
3964			break;
3965		}
3966		totlen = io->pfrio_size * sizeof(struct pfr_addr);
3967		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
3968		    M_TEMP, M_WAITOK);
3969		PF_RULES_RLOCK();
3970		error = pfr_get_addrs(&io->pfrio_table, pfras,
3971		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
3972		PF_RULES_RUNLOCK();
3973		if (error == 0)
3974			error = copyout(pfras, io->pfrio_buffer, totlen);
3975		free(pfras, M_TEMP);
3976		break;
3977	}
3978
3979	case DIOCRGETASTATS: {
3980		struct pfioc_table *io = (struct pfioc_table *)addr;
3981		struct pfr_astats *pfrastats;
3982		size_t totlen;
3983
3984		if (io->pfrio_esize != sizeof(struct pfr_astats)) {
3985			error = ENODEV;
3986			break;
3987		}
3988		if (io->pfrio_size < 0 ||
3989		    io->pfrio_size > pf_ioctl_maxcount ||
3990		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_astats))) {
3991			error = EINVAL;
3992			break;
3993		}
3994		totlen = io->pfrio_size * sizeof(struct pfr_astats);
3995		pfrastats = mallocarray(io->pfrio_size,
3996		    sizeof(struct pfr_astats), M_TEMP, M_WAITOK);
3997		PF_RULES_RLOCK();
3998		error = pfr_get_astats(&io->pfrio_table, pfrastats,
3999		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4000		PF_RULES_RUNLOCK();
4001		if (error == 0)
4002			error = copyout(pfrastats, io->pfrio_buffer, totlen);
4003		free(pfrastats, M_TEMP);
4004		break;
4005	}
4006
4007	case DIOCRCLRASTATS: {
4008		struct pfioc_table *io = (struct pfioc_table *)addr;
4009		struct pfr_addr *pfras;
4010		size_t totlen;
4011
4012		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4013			error = ENODEV;
4014			break;
4015		}
4016		if (io->pfrio_size < 0 ||
4017		    io->pfrio_size > pf_ioctl_maxcount ||
4018		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4019			error = EINVAL;
4020			break;
4021		}
4022		totlen = io->pfrio_size * sizeof(struct pfr_addr);
4023		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4024		    M_TEMP, M_WAITOK);
4025		error = copyin(io->pfrio_buffer, pfras, totlen);
4026		if (error) {
4027			free(pfras, M_TEMP);
4028			break;
4029		}
4030		PF_RULES_WLOCK();
4031		error = pfr_clr_astats(&io->pfrio_table, pfras,
4032		    io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
4033		    PFR_FLAG_USERIOCTL);
4034		PF_RULES_WUNLOCK();
4035		if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4036			error = copyout(pfras, io->pfrio_buffer, totlen);
4037		free(pfras, M_TEMP);
4038		break;
4039	}
4040
4041	case DIOCRTSTADDRS: {
4042		struct pfioc_table *io = (struct pfioc_table *)addr;
4043		struct pfr_addr *pfras;
4044		size_t totlen;
4045
4046		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4047			error = ENODEV;
4048			break;
4049		}
4050		if (io->pfrio_size < 0 ||
4051		    io->pfrio_size > pf_ioctl_maxcount ||
4052		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4053			error = EINVAL;
4054			break;
4055		}
4056		totlen = io->pfrio_size * sizeof(struct pfr_addr);
4057		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4058		    M_TEMP, M_WAITOK);
4059		error = copyin(io->pfrio_buffer, pfras, totlen);
4060		if (error) {
4061			free(pfras, M_TEMP);
4062			break;
4063		}
4064		PF_RULES_RLOCK();
4065		error = pfr_tst_addrs(&io->pfrio_table, pfras,
4066		    io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
4067		    PFR_FLAG_USERIOCTL);
4068		PF_RULES_RUNLOCK();
4069		if (error == 0)
4070			error = copyout(pfras, io->pfrio_buffer, totlen);
4071		free(pfras, M_TEMP);
4072		break;
4073	}
4074
4075	case DIOCRINADEFINE: {
4076		struct pfioc_table *io = (struct pfioc_table *)addr;
4077		struct pfr_addr *pfras;
4078		size_t totlen;
4079
4080		if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4081			error = ENODEV;
4082			break;
4083		}
4084		if (io->pfrio_size < 0 ||
4085		    io->pfrio_size > pf_ioctl_maxcount ||
4086		    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4087			error = EINVAL;
4088			break;
4089		}
4090		totlen = io->pfrio_size * sizeof(struct pfr_addr);
4091		pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4092		    M_TEMP, M_WAITOK);
4093		error = copyin(io->pfrio_buffer, pfras, totlen);
4094		if (error) {
4095			free(pfras, M_TEMP);
4096			break;
4097		}
4098		PF_RULES_WLOCK();
4099		error = pfr_ina_define(&io->pfrio_table, pfras,
4100		    io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
4101		    io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4102		PF_RULES_WUNLOCK();
4103		free(pfras, M_TEMP);
4104		break;
4105	}
4106
4107	case DIOCOSFPADD: {
4108		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
4109		PF_RULES_WLOCK();
4110		error = pf_osfp_add(io);
4111		PF_RULES_WUNLOCK();
4112		break;
4113	}
4114
4115	case DIOCOSFPGET: {
4116		struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
4117		PF_RULES_RLOCK();
4118		error = pf_osfp_get(io);
4119		PF_RULES_RUNLOCK();
4120		break;
4121	}
4122
4123	case DIOCXBEGIN: {
4124		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
4125		struct pfioc_trans_e	*ioes, *ioe;
4126		size_t			 totlen;
4127		int			 i;
4128
4129		if (io->esize != sizeof(*ioe)) {
4130			error = ENODEV;
4131			break;
4132		}
4133		if (io->size < 0 ||
4134		    io->size > pf_ioctl_maxcount ||
4135		    WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
4136			error = EINVAL;
4137			break;
4138		}
4139		totlen = sizeof(struct pfioc_trans_e) * io->size;
4140		ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
4141		    M_TEMP, M_WAITOK);
4142		error = copyin(io->array, ioes, totlen);
4143		if (error) {
4144			free(ioes, M_TEMP);
4145			break;
4146		}
4147		PF_RULES_WLOCK();
4148		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
4149			switch (ioe->rs_num) {
4150#ifdef ALTQ
4151			case PF_RULESET_ALTQ:
4152				if (ioe->anchor[0]) {
4153					PF_RULES_WUNLOCK();
4154					free(ioes, M_TEMP);
4155					error = EINVAL;
4156					goto fail;
4157				}
4158				if ((error = pf_begin_altq(&ioe->ticket))) {
4159					PF_RULES_WUNLOCK();
4160					free(ioes, M_TEMP);
4161					goto fail;
4162				}
4163				break;
4164#endif /* ALTQ */
4165			case PF_RULESET_TABLE:
4166			    {
4167				struct pfr_table table;
4168
4169				bzero(&table, sizeof(table));
4170				strlcpy(table.pfrt_anchor, ioe->anchor,
4171				    sizeof(table.pfrt_anchor));
4172				if ((error = pfr_ina_begin(&table,
4173				    &ioe->ticket, NULL, 0))) {
4174					PF_RULES_WUNLOCK();
4175					free(ioes, M_TEMP);
4176					goto fail;
4177				}
4178				break;
4179			    }
4180			default:
4181				if ((error = pf_begin_rules(&ioe->ticket,
4182				    ioe->rs_num, ioe->anchor))) {
4183					PF_RULES_WUNLOCK();
4184					free(ioes, M_TEMP);
4185					goto fail;
4186				}
4187				break;
4188			}
4189		}
4190		PF_RULES_WUNLOCK();
4191		error = copyout(ioes, io->array, totlen);
4192		free(ioes, M_TEMP);
4193		break;
4194	}
4195
4196	case DIOCXROLLBACK: {
4197		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
4198		struct pfioc_trans_e	*ioe, *ioes;
4199		size_t			 totlen;
4200		int			 i;
4201
4202		if (io->esize != sizeof(*ioe)) {
4203			error = ENODEV;
4204			break;
4205		}
4206		if (io->size < 0 ||
4207		    io->size > pf_ioctl_maxcount ||
4208		    WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
4209			error = EINVAL;
4210			break;
4211		}
4212		totlen = sizeof(struct pfioc_trans_e) * io->size;
4213		ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
4214		    M_TEMP, M_WAITOK);
4215		error = copyin(io->array, ioes, totlen);
4216		if (error) {
4217			free(ioes, M_TEMP);
4218			break;
4219		}
4220		PF_RULES_WLOCK();
4221		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
4222			switch (ioe->rs_num) {
4223#ifdef ALTQ
4224			case PF_RULESET_ALTQ:
4225				if (ioe->anchor[0]) {
4226					PF_RULES_WUNLOCK();
4227					free(ioes, M_TEMP);
4228					error = EINVAL;
4229					goto fail;
4230				}
4231				if ((error = pf_rollback_altq(ioe->ticket))) {
4232					PF_RULES_WUNLOCK();
4233					free(ioes, M_TEMP);
4234					goto fail; /* really bad */
4235				}
4236				break;
4237#endif /* ALTQ */
4238			case PF_RULESET_TABLE:
4239			    {
4240				struct pfr_table table;
4241
4242				bzero(&table, sizeof(table));
4243				strlcpy(table.pfrt_anchor, ioe->anchor,
4244				    sizeof(table.pfrt_anchor));
4245				if ((error = pfr_ina_rollback(&table,
4246				    ioe->ticket, NULL, 0))) {
4247					PF_RULES_WUNLOCK();
4248					free(ioes, M_TEMP);
4249					goto fail; /* really bad */
4250				}
4251				break;
4252			    }
4253			default:
4254				if ((error = pf_rollback_rules(ioe->ticket,
4255				    ioe->rs_num, ioe->anchor))) {
4256					PF_RULES_WUNLOCK();
4257					free(ioes, M_TEMP);
4258					goto fail; /* really bad */
4259				}
4260				break;
4261			}
4262		}
4263		PF_RULES_WUNLOCK();
4264		free(ioes, M_TEMP);
4265		break;
4266	}
4267
4268	case DIOCXCOMMIT: {
4269		struct pfioc_trans	*io = (struct pfioc_trans *)addr;
4270		struct pfioc_trans_e	*ioe, *ioes;
4271		struct pf_kruleset	*rs;
4272		size_t			 totlen;
4273		int			 i;
4274
4275		if (io->esize != sizeof(*ioe)) {
4276			error = ENODEV;
4277			break;
4278		}
4279
4280		if (io->size < 0 ||
4281		    io->size > pf_ioctl_maxcount ||
4282		    WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
4283			error = EINVAL;
4284			break;
4285		}
4286
4287		totlen = sizeof(struct pfioc_trans_e) * io->size;
4288		ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
4289		    M_TEMP, M_WAITOK);
4290		error = copyin(io->array, ioes, totlen);
4291		if (error) {
4292			free(ioes, M_TEMP);
4293			break;
4294		}
4295		PF_RULES_WLOCK();
4296		/* First makes sure everything will succeed. */
4297		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
4298			switch (ioe->rs_num) {
4299#ifdef ALTQ
4300			case PF_RULESET_ALTQ:
4301				if (ioe->anchor[0]) {
4302					PF_RULES_WUNLOCK();
4303					free(ioes, M_TEMP);
4304					error = EINVAL;
4305					goto fail;
4306				}
4307				if (!V_altqs_inactive_open || ioe->ticket !=
4308				    V_ticket_altqs_inactive) {
4309					PF_RULES_WUNLOCK();
4310					free(ioes, M_TEMP);
4311					error = EBUSY;
4312					goto fail;
4313				}
4314				break;
4315#endif /* ALTQ */
4316			case PF_RULESET_TABLE:
4317				rs = pf_find_kruleset(ioe->anchor);
4318				if (rs == NULL || !rs->topen || ioe->ticket !=
4319				    rs->tticket) {
4320					PF_RULES_WUNLOCK();
4321					free(ioes, M_TEMP);
4322					error = EBUSY;
4323					goto fail;
4324				}
4325				break;
4326			default:
4327				if (ioe->rs_num < 0 || ioe->rs_num >=
4328				    PF_RULESET_MAX) {
4329					PF_RULES_WUNLOCK();
4330					free(ioes, M_TEMP);
4331					error = EINVAL;
4332					goto fail;
4333				}
4334				rs = pf_find_kruleset(ioe->anchor);
4335				if (rs == NULL ||
4336				    !rs->rules[ioe->rs_num].inactive.open ||
4337				    rs->rules[ioe->rs_num].inactive.ticket !=
4338				    ioe->ticket) {
4339					PF_RULES_WUNLOCK();
4340					free(ioes, M_TEMP);
4341					error = EBUSY;
4342					goto fail;
4343				}
4344				break;
4345			}
4346		}
4347		/* Now do the commit - no errors should happen here. */
4348		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
4349			switch (ioe->rs_num) {
4350#ifdef ALTQ
4351			case PF_RULESET_ALTQ:
4352				if ((error = pf_commit_altq(ioe->ticket))) {
4353					PF_RULES_WUNLOCK();
4354					free(ioes, M_TEMP);
4355					goto fail; /* really bad */
4356				}
4357				break;
4358#endif /* ALTQ */
4359			case PF_RULESET_TABLE:
4360			    {
4361				struct pfr_table table;
4362
4363				bzero(&table, sizeof(table));
4364				strlcpy(table.pfrt_anchor, ioe->anchor,
4365				    sizeof(table.pfrt_anchor));
4366				if ((error = pfr_ina_commit(&table,
4367				    ioe->ticket, NULL, NULL, 0))) {
4368					PF_RULES_WUNLOCK();
4369					free(ioes, M_TEMP);
4370					goto fail; /* really bad */
4371				}
4372				break;
4373			    }
4374			default:
4375				if ((error = pf_commit_rules(ioe->ticket,
4376				    ioe->rs_num, ioe->anchor))) {
4377					PF_RULES_WUNLOCK();
4378					free(ioes, M_TEMP);
4379					goto fail; /* really bad */
4380				}
4381				break;
4382			}
4383		}
4384		PF_RULES_WUNLOCK();
4385		free(ioes, M_TEMP);
4386		break;
4387	}
4388
4389	case DIOCGETSRCNODES: {
4390		struct pfioc_src_nodes	*psn = (struct pfioc_src_nodes *)addr;
4391		struct pf_srchash	*sh;
4392		struct pf_ksrc_node	*n;
4393		struct pf_src_node	*p, *pstore;
4394		uint32_t		 i, nr = 0;
4395
4396		for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
4397				i++, sh++) {
4398			PF_HASHROW_LOCK(sh);
4399			LIST_FOREACH(n, &sh->nodes, entry)
4400				nr++;
4401			PF_HASHROW_UNLOCK(sh);
4402		}
4403
4404		psn->psn_len = min(psn->psn_len,
4405		    sizeof(struct pf_src_node) * nr);
4406
4407		if (psn->psn_len == 0) {
4408			psn->psn_len = sizeof(struct pf_src_node) * nr;
4409			break;
4410		}
4411
4412		nr = 0;
4413
4414		p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO);
4415		for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
4416		    i++, sh++) {
4417		    PF_HASHROW_LOCK(sh);
4418		    LIST_FOREACH(n, &sh->nodes, entry) {
4419
4420			if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
4421				break;
4422
4423			pf_src_node_copy(n, p);
4424
4425			p++;
4426			nr++;
4427		    }
4428		    PF_HASHROW_UNLOCK(sh);
4429		}
4430		error = copyout(pstore, psn->psn_src_nodes,
4431		    sizeof(struct pf_src_node) * nr);
4432		if (error) {
4433			free(pstore, M_TEMP);
4434			break;
4435		}
4436		psn->psn_len = sizeof(struct pf_src_node) * nr;
4437		free(pstore, M_TEMP);
4438		break;
4439	}
4440
4441	case DIOCCLRSRCNODES: {
4442		pf_clear_srcnodes(NULL);
4443		pf_purge_expired_src_nodes();
4444		break;
4445	}
4446
4447	case DIOCKILLSRCNODES:
4448		pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
4449		break;
4450
4451	case DIOCKEEPCOUNTERS:
4452		error = pf_keepcounters((struct pfioc_nv *)addr);
4453		break;
4454
4455	case DIOCSETHOSTID: {
4456		u_int32_t	*hostid = (u_int32_t *)addr;
4457
4458		PF_RULES_WLOCK();
4459		if (*hostid == 0)
4460			V_pf_status.hostid = arc4random();
4461		else
4462			V_pf_status.hostid = *hostid;
4463		PF_RULES_WUNLOCK();
4464		break;
4465	}
4466
4467	case DIOCOSFPFLUSH:
4468		PF_RULES_WLOCK();
4469		pf_osfp_flush();
4470		PF_RULES_WUNLOCK();
4471		break;
4472
4473	case DIOCIGETIFACES: {
4474		struct pfioc_iface *io = (struct pfioc_iface *)addr;
4475		struct pfi_kif *ifstore;
4476		size_t bufsiz;
4477
4478		if (io->pfiio_esize != sizeof(struct pfi_kif)) {
4479			error = ENODEV;
4480			break;
4481		}
4482
4483		if (io->pfiio_size < 0 ||
4484		    io->pfiio_size > pf_ioctl_maxcount ||
4485		    WOULD_OVERFLOW(io->pfiio_size, sizeof(struct pfi_kif))) {
4486			error = EINVAL;
4487			break;
4488		}
4489
4490		bufsiz = io->pfiio_size * sizeof(struct pfi_kif);
4491		ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif),
4492		    M_TEMP, M_WAITOK);
4493
4494		PF_RULES_RLOCK();
4495		pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size);
4496		PF_RULES_RUNLOCK();
4497		error = copyout(ifstore, io->pfiio_buffer, bufsiz);
4498		free(ifstore, M_TEMP);
4499		break;
4500	}
4501
4502	case DIOCSETIFFLAG: {
4503		struct pfioc_iface *io = (struct pfioc_iface *)addr;
4504
4505		PF_RULES_WLOCK();
4506		error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
4507		PF_RULES_WUNLOCK();
4508		break;
4509	}
4510
4511	case DIOCCLRIFFLAG: {
4512		struct pfioc_iface *io = (struct pfioc_iface *)addr;
4513
4514		PF_RULES_WLOCK();
4515		error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
4516		PF_RULES_WUNLOCK();
4517		break;
4518	}
4519
4520	default:
4521		error = ENODEV;
4522		break;
4523	}
4524fail:
4525	if (sx_xlocked(&pf_ioctl_lock))
4526		sx_xunlock(&pf_ioctl_lock);
4527	CURVNET_RESTORE();
4528
4529#undef ERROUT_IOCTL
4530
4531	return (error);
4532}
4533
4534void
4535pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
4536{
4537	bzero(sp, sizeof(struct pfsync_state));
4538
4539	/* copy from state key */
4540	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
4541	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
4542	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
4543	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
4544	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
4545	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
4546	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
4547	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
4548	sp->proto = st->key[PF_SK_WIRE]->proto;
4549	sp->af = st->key[PF_SK_WIRE]->af;
4550
4551	/* copy from state */
4552	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
4553	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
4554	sp->creation = htonl(time_uptime - st->creation);
4555	sp->expire = pf_state_expires(st);
4556	if (sp->expire <= time_uptime)
4557		sp->expire = htonl(0);
4558	else
4559		sp->expire = htonl(sp->expire - time_uptime);
4560
4561	sp->direction = st->direction;
4562	sp->log = st->log;
4563	sp->timeout = st->timeout;
4564	sp->state_flags = st->state_flags;
4565	if (st->src_node)
4566		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
4567	if (st->nat_src_node)
4568		sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
4569
4570	sp->id = st->id;
4571	sp->creatorid = st->creatorid;
4572	pf_state_peer_hton(&st->src, &sp->src);
4573	pf_state_peer_hton(&st->dst, &sp->dst);
4574
4575	if (st->rule.ptr == NULL)
4576		sp->rule = htonl(-1);
4577	else
4578		sp->rule = htonl(st->rule.ptr->nr);
4579	if (st->anchor.ptr == NULL)
4580		sp->anchor = htonl(-1);
4581	else
4582		sp->anchor = htonl(st->anchor.ptr->nr);
4583	if (st->nat_rule.ptr == NULL)
4584		sp->nat_rule = htonl(-1);
4585	else
4586		sp->nat_rule = htonl(st->nat_rule.ptr->nr);
4587
4588	pf_state_counter_hton(counter_u64_fetch(st->packets[0]),
4589	    sp->packets[0]);
4590	pf_state_counter_hton(counter_u64_fetch(st->packets[1]),
4591	    sp->packets[1]);
4592	pf_state_counter_hton(counter_u64_fetch(st->bytes[0]), sp->bytes[0]);
4593	pf_state_counter_hton(counter_u64_fetch(st->bytes[1]), sp->bytes[1]);
4594
4595}
4596
4597static void
4598pf_tbladdr_copyout(struct pf_addr_wrap *aw)
4599{
4600	struct pfr_ktable *kt;
4601
4602	KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type));
4603
4604	kt = aw->p.tbl;
4605	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
4606		kt = kt->pfrkt_root;
4607	aw->p.tbl = NULL;
4608	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
4609		kt->pfrkt_cnt : -1;
4610}
4611
4612/*
4613 * XXX - Check for version missmatch!!!
4614 */
4615static void
4616pf_clear_all_states(void)
4617{
4618	struct pf_state	*s;
4619	u_int i;
4620
4621	for (i = 0; i <= pf_hashmask; i++) {
4622		struct pf_idhash *ih = &V_pf_idhash[i];
4623relock:
4624		PF_HASHROW_LOCK(ih);
4625		LIST_FOREACH(s, &ih->states, entry) {
4626			s->timeout = PFTM_PURGE;
4627			/* Don't send out individual delete messages. */
4628			s->state_flags |= PFSTATE_NOSYNC;
4629			pf_unlink_state(s, PF_ENTER_LOCKED);
4630			goto relock;
4631		}
4632		PF_HASHROW_UNLOCK(ih);
4633	}
4634}
4635
4636static int
4637pf_clear_tables(void)
4638{
4639	struct pfioc_table io;
4640	int error;
4641
4642	bzero(&io, sizeof(io));
4643
4644	error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel,
4645	    io.pfrio_flags);
4646
4647	return (error);
4648}
4649
4650static void
4651pf_clear_srcnodes(struct pf_ksrc_node *n)
4652{
4653	struct pf_state *s;
4654	int i;
4655
4656	for (i = 0; i <= pf_hashmask; i++) {
4657		struct pf_idhash *ih = &V_pf_idhash[i];
4658
4659		PF_HASHROW_LOCK(ih);
4660		LIST_FOREACH(s, &ih->states, entry) {
4661			if (n == NULL || n == s->src_node)
4662				s->src_node = NULL;
4663			if (n == NULL || n == s->nat_src_node)
4664				s->nat_src_node = NULL;
4665		}
4666		PF_HASHROW_UNLOCK(ih);
4667	}
4668
4669	if (n == NULL) {
4670		struct pf_srchash *sh;
4671
4672		for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
4673		    i++, sh++) {
4674			PF_HASHROW_LOCK(sh);
4675			LIST_FOREACH(n, &sh->nodes, entry) {
4676				n->expire = 1;
4677				n->states = 0;
4678			}
4679			PF_HASHROW_UNLOCK(sh);
4680		}
4681	} else {
4682		/* XXX: hash slot should already be locked here. */
4683		n->expire = 1;
4684		n->states = 0;
4685	}
4686}
4687
4688static void
4689pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
4690{
4691	struct pf_ksrc_node_list	 kill;
4692
4693	LIST_INIT(&kill);
4694	for (int i = 0; i <= pf_srchashmask; i++) {
4695		struct pf_srchash *sh = &V_pf_srchash[i];
4696		struct pf_ksrc_node *sn, *tmp;
4697
4698		PF_HASHROW_LOCK(sh);
4699		LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
4700			if (PF_MATCHA(psnk->psnk_src.neg,
4701			      &psnk->psnk_src.addr.v.a.addr,
4702			      &psnk->psnk_src.addr.v.a.mask,
4703			      &sn->addr, sn->af) &&
4704			    PF_MATCHA(psnk->psnk_dst.neg,
4705			      &psnk->psnk_dst.addr.v.a.addr,
4706			      &psnk->psnk_dst.addr.v.a.mask,
4707			      &sn->raddr, sn->af)) {
4708				pf_unlink_src_node(sn);
4709				LIST_INSERT_HEAD(&kill, sn, entry);
4710				sn->expire = 1;
4711			}
4712		PF_HASHROW_UNLOCK(sh);
4713	}
4714
4715	for (int i = 0; i <= pf_hashmask; i++) {
4716		struct pf_idhash *ih = &V_pf_idhash[i];
4717		struct pf_state *s;
4718
4719		PF_HASHROW_LOCK(ih);
4720		LIST_FOREACH(s, &ih->states, entry) {
4721			if (s->src_node && s->src_node->expire == 1)
4722				s->src_node = NULL;
4723			if (s->nat_src_node && s->nat_src_node->expire == 1)
4724				s->nat_src_node = NULL;
4725		}
4726		PF_HASHROW_UNLOCK(ih);
4727	}
4728
4729	psnk->psnk_killed = pf_free_src_nodes(&kill);
4730}
4731
4732static int
4733pf_keepcounters(struct pfioc_nv *nv)
4734{
4735	nvlist_t	*nvl = NULL;
4736	void		*nvlpacked = NULL;
4737	int		 error = 0;
4738
4739#define	ERROUT(x)	ERROUT_FUNCTION(on_error, x)
4740
4741	if (nv->len > pf_ioctl_maxcount)
4742		ERROUT(ENOMEM);
4743
4744	nvlpacked = malloc(nv->len, M_TEMP, M_WAITOK);
4745	if (nvlpacked == NULL)
4746		ERROUT(ENOMEM);
4747
4748	error = copyin(nv->data, nvlpacked, nv->len);
4749	if (error)
4750		ERROUT(error);
4751
4752	nvl = nvlist_unpack(nvlpacked, nv->len, 0);
4753	if (nvl == NULL)
4754		ERROUT(EBADMSG);
4755
4756	if (! nvlist_exists_bool(nvl, "keep_counters"))
4757		ERROUT(EBADMSG);
4758
4759	V_pf_status.keep_counters = nvlist_get_bool(nvl, "keep_counters");
4760
4761on_error:
4762	nvlist_destroy(nvl);
4763	free(nvlpacked, M_TEMP);
4764	return (error);
4765}
4766
4767static unsigned int
4768pf_clear_states(const struct pf_kstate_kill *kill)
4769{
4770	struct pf_state_key_cmp	 match_key;
4771	struct pf_state	*s;
4772	struct pfi_kkif	*kif;
4773	int		 idx;
4774	unsigned int	 killed = 0, dir;
4775
4776	for (unsigned int i = 0; i <= pf_hashmask; i++) {
4777		struct pf_idhash *ih = &V_pf_idhash[i];
4778
4779relock_DIOCCLRSTATES:
4780		PF_HASHROW_LOCK(ih);
4781		LIST_FOREACH(s, &ih->states, entry) {
4782			/* For floating states look at the original kif. */
4783			kif = s->kif == V_pfi_all ? s->orig_kif : s->kif;
4784
4785			if (kill->psk_ifname[0] &&
4786			    strcmp(kill->psk_ifname,
4787			    kif->pfik_name))
4788				continue;
4789
4790			if (kill->psk_kill_match) {
4791				bzero(&match_key, sizeof(match_key));
4792
4793				if (s->direction == PF_OUT) {
4794					dir = PF_IN;
4795					idx = PF_SK_STACK;
4796				} else {
4797					dir = PF_OUT;
4798					idx = PF_SK_WIRE;
4799				}
4800
4801				match_key.af = s->key[idx]->af;
4802				match_key.proto = s->key[idx]->proto;
4803				PF_ACPY(&match_key.addr[0],
4804				    &s->key[idx]->addr[1], match_key.af);
4805				match_key.port[0] = s->key[idx]->port[1];
4806				PF_ACPY(&match_key.addr[1],
4807				    &s->key[idx]->addr[0], match_key.af);
4808				match_key.port[1] = s->key[idx]->port[0];
4809			}
4810
4811			/*
4812			 * Don't send out individual
4813			 * delete messages.
4814			 */
4815			s->state_flags |= PFSTATE_NOSYNC;
4816			pf_unlink_state(s, PF_ENTER_LOCKED);
4817			killed++;
4818
4819			if (kill->psk_kill_match)
4820				killed += pf_kill_matching_state(&match_key,
4821				    dir);
4822
4823			goto relock_DIOCCLRSTATES;
4824		}
4825		PF_HASHROW_UNLOCK(ih);
4826	}
4827
4828	if (V_pfsync_clear_states_ptr != NULL)
4829		V_pfsync_clear_states_ptr(V_pf_status.hostid, kill->psk_ifname);
4830
4831	return (killed);
4832}
4833
4834static int
4835pf_killstates(struct pf_kstate_kill *kill, unsigned int *killed)
4836{
4837	struct pf_state		*s;
4838
4839	if (kill->psk_pfcmp.id) {
4840		if (kill->psk_pfcmp.creatorid == 0)
4841			kill->psk_pfcmp.creatorid = V_pf_status.hostid;
4842		if ((s = pf_find_state_byid(kill->psk_pfcmp.id,
4843		    kill->psk_pfcmp.creatorid))) {
4844			pf_unlink_state(s, PF_ENTER_LOCKED);
4845			*killed = 1;
4846		}
4847		return (0);
4848	}
4849
4850	for (unsigned int i = 0; i <= pf_hashmask; i++)
4851		*killed += pf_killstates_row(kill, &V_pf_idhash[i]);
4852
4853	return (0);
4854}
4855
4856static int
4857pf_killstates_nv(struct pfioc_nv *nv)
4858{
4859	struct pf_kstate_kill	 kill;
4860	nvlist_t		*nvl = NULL;
4861	void			*nvlpacked = NULL;
4862	int			 error = 0;
4863	unsigned int		 killed = 0;
4864
4865#define ERROUT(x)	ERROUT_FUNCTION(on_error, x)
4866
4867	if (nv->len > pf_ioctl_maxcount)
4868		ERROUT(ENOMEM);
4869
4870	nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
4871	if (nvlpacked == NULL)
4872		ERROUT(ENOMEM);
4873
4874	error = copyin(nv->data, nvlpacked, nv->len);
4875	if (error)
4876		ERROUT(error);
4877
4878	nvl = nvlist_unpack(nvlpacked, nv->len, 0);
4879	if (nvl == NULL)
4880		ERROUT(EBADMSG);
4881
4882	error = pf_nvstate_kill_to_kstate_kill(nvl, &kill);
4883	if (error)
4884		ERROUT(error);
4885
4886	error = pf_killstates(&kill, &killed);
4887
4888	free(nvlpacked, M_NVLIST);
4889	nvlpacked = NULL;
4890	nvlist_destroy(nvl);
4891	nvl = nvlist_create(0);
4892	if (nvl == NULL)
4893		ERROUT(ENOMEM);
4894
4895	nvlist_add_number(nvl, "killed", killed);
4896
4897	nvlpacked = nvlist_pack(nvl, &nv->len);
4898	if (nvlpacked == NULL)
4899		ERROUT(ENOMEM);
4900
4901	if (nv->size == 0)
4902		ERROUT(0);
4903	else if (nv->size < nv->len)
4904		ERROUT(ENOSPC);
4905
4906	error = copyout(nvlpacked, nv->data, nv->len);
4907
4908on_error:
4909	nvlist_destroy(nvl);
4910	free(nvlpacked, M_NVLIST);
4911	return (error);
4912}
4913
4914static int
4915pf_clearstates_nv(struct pfioc_nv *nv)
4916{
4917	struct pf_kstate_kill	 kill;
4918	nvlist_t		*nvl = NULL;
4919	void			*nvlpacked = NULL;
4920	int			 error = 0;
4921	unsigned int		 killed;
4922
4923#define ERROUT(x)	ERROUT_FUNCTION(on_error, x)
4924
4925	if (nv->len > pf_ioctl_maxcount)
4926		ERROUT(ENOMEM);
4927
4928	nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
4929	if (nvlpacked == NULL)
4930		ERROUT(ENOMEM);
4931
4932	error = copyin(nv->data, nvlpacked, nv->len);
4933	if (error)
4934		ERROUT(error);
4935
4936	nvl = nvlist_unpack(nvlpacked, nv->len, 0);
4937	if (nvl == NULL)
4938		ERROUT(EBADMSG);
4939
4940	error = pf_nvstate_kill_to_kstate_kill(nvl, &kill);
4941	if (error)
4942		ERROUT(error);
4943
4944	killed = pf_clear_states(&kill);
4945
4946	free(nvlpacked, M_NVLIST);
4947	nvlpacked = NULL;
4948	nvlist_destroy(nvl);
4949	nvl = nvlist_create(0);
4950	if (nvl == NULL)
4951		ERROUT(ENOMEM);
4952
4953	nvlist_add_number(nvl, "killed", killed);
4954
4955	nvlpacked = nvlist_pack(nvl, &nv->len);
4956	if (nvlpacked == NULL)
4957		ERROUT(ENOMEM);
4958
4959	if (nv->size == 0)
4960		ERROUT(0);
4961	else if (nv->size < nv->len)
4962		ERROUT(ENOSPC);
4963
4964	error = copyout(nvlpacked, nv->data, nv->len);
4965
4966#undef ERROUT
4967on_error:
4968	nvlist_destroy(nvl);
4969	free(nvlpacked, M_NVLIST);
4970	return (error);
4971}
4972
4973static int
4974pf_getstate(struct pfioc_nv *nv)
4975{
4976	nvlist_t	*nvl = NULL, *nvls;
4977	void		*nvlpacked = NULL;
4978	struct pf_state	*s = NULL;
4979	int		 error = 0;
4980	uint64_t	 id, creatorid;
4981
4982#define ERROUT(x)	ERROUT_FUNCTION(errout, x)
4983
4984	if (nv->len > pf_ioctl_maxcount)
4985		ERROUT(ENOMEM);
4986
4987	nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
4988	if (nvlpacked == NULL)
4989		ERROUT(ENOMEM);
4990
4991	error = copyin(nv->data, nvlpacked, nv->len);
4992	if (error)
4993		ERROUT(error);
4994
4995	nvl = nvlist_unpack(nvlpacked, nv->len, 0);
4996	if (nvl == NULL)
4997		ERROUT(EBADMSG);
4998
4999	PFNV_CHK(pf_nvuint64(nvl, "id", &id));
5000	PFNV_CHK(pf_nvuint64(nvl, "creatorid", &creatorid));
5001
5002	s = pf_find_state_byid(id, creatorid);
5003	if (s == NULL)
5004		ERROUT(ENOENT);
5005
5006	free(nvlpacked, M_NVLIST);
5007	nvlpacked = NULL;
5008	nvlist_destroy(nvl);
5009	nvl = nvlist_create(0);
5010	if (nvl == NULL)
5011		ERROUT(ENOMEM);
5012
5013	nvls = pf_state_to_nvstate(s);
5014	if (nvls == NULL)
5015		ERROUT(ENOMEM);
5016
5017	nvlist_add_nvlist(nvl, "state", nvls);
5018	nvlist_destroy(nvls);
5019
5020	nvlpacked = nvlist_pack(nvl, &nv->len);
5021	if (nvlpacked == NULL)
5022		ERROUT(ENOMEM);
5023
5024	if (nv->size == 0)
5025		ERROUT(0);
5026	else if (nv->size < nv->len)
5027		ERROUT(ENOSPC);
5028
5029	error = copyout(nvlpacked, nv->data, nv->len);
5030
5031#undef ERROUT
5032errout:
5033	if (s != NULL)
5034		PF_STATE_UNLOCK(s);
5035	free(nvlpacked, M_NVLIST);
5036	nvlist_destroy(nvl);
5037	return (error);
5038}
5039
5040static int
5041pf_getstates(struct pfioc_nv *nv)
5042{
5043	nvlist_t	*nvl = NULL, *nvls;
5044	void		*nvlpacked = NULL;
5045	struct pf_state	*s = NULL;
5046	int		 error = 0;
5047	uint64_t	 count = 0;
5048
5049#define ERROUT(x)	ERROUT_FUNCTION(errout, x)
5050
5051	nvl = nvlist_create(0);
5052	if (nvl == NULL)
5053		ERROUT(ENOMEM);
5054
5055	nvlist_add_number(nvl, "count", uma_zone_get_cur(V_pf_state_z));
5056
5057	for (int i = 0; i < pf_hashmask; i++) {
5058		struct pf_idhash *ih = &V_pf_idhash[i];
5059
5060		PF_HASHROW_LOCK(ih);
5061		LIST_FOREACH(s, &ih->states, entry) {
5062			if (s->timeout == PFTM_UNLINKED)
5063				continue;
5064
5065			nvls = pf_state_to_nvstate(s);
5066			if (nvls == NULL) {
5067				PF_HASHROW_UNLOCK(ih);
5068				ERROUT(ENOMEM);
5069			}
5070			if ((nvlist_size(nvl) + nvlist_size(nvls)) > nv->size) {
5071				/* We've run out of room for more states. */
5072				nvlist_destroy(nvls);
5073				PF_HASHROW_UNLOCK(ih);
5074				goto DIOCGETSTATESNV_full;
5075			}
5076			nvlist_append_nvlist_array(nvl, "states", nvls);
5077			nvlist_destroy(nvls);
5078			count++;
5079		}
5080		PF_HASHROW_UNLOCK(ih);
5081	}
5082
5083	/* We've managed to put them all the available space. Let's make sure
5084	 * 'count' matches our array (that's racy, because we don't hold a lock
5085	 * over all states, only over each row individually. */
5086	(void)nvlist_take_number(nvl, "count");
5087	nvlist_add_number(nvl, "count", count);
5088
5089DIOCGETSTATESNV_full:
5090
5091	nvlpacked = nvlist_pack(nvl, &nv->len);
5092	if (nvlpacked == NULL)
5093		ERROUT(ENOMEM);
5094
5095	if (nv->size == 0)
5096		ERROUT(0);
5097	else if (nv->size < nv->len)
5098		ERROUT(ENOSPC);
5099
5100	error = copyout(nvlpacked, nv->data, nv->len);
5101
5102#undef ERROUT
5103errout:
5104	free(nvlpacked, M_NVLIST);
5105	nvlist_destroy(nvl);
5106	return (error);
5107}
5108
5109/*
5110 * XXX - Check for version missmatch!!!
5111 */
5112
5113/*
5114 * Duplicate pfctl -Fa operation to get rid of as much as we can.
5115 */
5116static int
5117shutdown_pf(void)
5118{
5119	int error = 0;
5120	u_int32_t t[5];
5121	char nn = '\0';
5122
5123	do {
5124		if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
5125		    != 0) {
5126			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n"));
5127			break;
5128		}
5129		if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
5130		    != 0) {
5131			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n"));
5132			break;		/* XXX: rollback? */
5133		}
5134		if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
5135		    != 0) {
5136			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n"));
5137			break;		/* XXX: rollback? */
5138		}
5139		if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
5140		    != 0) {
5141			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n"));
5142			break;		/* XXX: rollback? */
5143		}
5144		if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
5145		    != 0) {
5146			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n"));
5147			break;		/* XXX: rollback? */
5148		}
5149
5150		/* XXX: these should always succeed here */
5151		pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn);
5152		pf_commit_rules(t[1], PF_RULESET_FILTER, &nn);
5153		pf_commit_rules(t[2], PF_RULESET_NAT, &nn);
5154		pf_commit_rules(t[3], PF_RULESET_BINAT, &nn);
5155		pf_commit_rules(t[4], PF_RULESET_RDR, &nn);
5156
5157		if ((error = pf_clear_tables()) != 0)
5158			break;
5159
5160#ifdef ALTQ
5161		if ((error = pf_begin_altq(&t[0])) != 0) {
5162			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
5163			break;
5164		}
5165		pf_commit_altq(t[0]);
5166#endif
5167
5168		pf_clear_all_states();
5169
5170		pf_clear_srcnodes(NULL);
5171
5172		/* status does not use malloced mem so no need to cleanup */
5173		/* fingerprints and interfaces have their own cleanup code */
5174	} while(0);
5175
5176	return (error);
5177}
5178
5179static pfil_return_t
5180pf_check_return(int chk, struct mbuf **m)
5181{
5182
5183	switch (chk) {
5184	case PF_PASS:
5185		if (*m == NULL)
5186			return (PFIL_CONSUMED);
5187		else
5188			return (PFIL_PASS);
5189		break;
5190	default:
5191		if (*m != NULL) {
5192			m_freem(*m);
5193			*m = NULL;
5194		}
5195		return (PFIL_DROPPED);
5196	}
5197}
5198
5199#ifdef INET
5200static pfil_return_t
5201pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
5202    void *ruleset __unused, struct inpcb *inp)
5203{
5204	int chk;
5205
5206	chk = pf_test(PF_IN, flags, ifp, m, inp);
5207
5208	return (pf_check_return(chk, m));
5209}
5210
5211static pfil_return_t
5212pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
5213    void *ruleset __unused,  struct inpcb *inp)
5214{
5215	int chk;
5216
5217	chk = pf_test(PF_OUT, flags, ifp, m, inp);
5218
5219	return (pf_check_return(chk, m));
5220}
5221#endif
5222
5223#ifdef INET6
5224static pfil_return_t
5225pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags,
5226    void *ruleset __unused,  struct inpcb *inp)
5227{
5228	int chk;
5229
5230	/*
5231	 * In case of loopback traffic IPv6 uses the real interface in
5232	 * order to support scoped addresses. In order to support stateful
5233	 * filtering we have change this to lo0 as it is the case in IPv4.
5234	 */
5235	CURVNET_SET(ifp->if_vnet);
5236	chk = pf_test6(PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp);
5237	CURVNET_RESTORE();
5238
5239	return (pf_check_return(chk, m));
5240}
5241
5242static pfil_return_t
5243pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags,
5244    void *ruleset __unused,  struct inpcb *inp)
5245{
5246	int chk;
5247
5248	CURVNET_SET(ifp->if_vnet);
5249	chk = pf_test6(PF_OUT, flags, ifp, m, inp);
5250	CURVNET_RESTORE();
5251
5252	return (pf_check_return(chk, m));
5253}
5254#endif /* INET6 */
5255
5256#ifdef INET
5257VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook);
5258VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook);
5259#define	V_pf_ip4_in_hook	VNET(pf_ip4_in_hook)
5260#define	V_pf_ip4_out_hook	VNET(pf_ip4_out_hook)
5261#endif
5262#ifdef INET6
5263VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook);
5264VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook);
5265#define	V_pf_ip6_in_hook	VNET(pf_ip6_in_hook)
5266#define	V_pf_ip6_out_hook	VNET(pf_ip6_out_hook)
5267#endif
5268
5269static void
5270hook_pf(void)
5271{
5272	struct pfil_hook_args pha;
5273	struct pfil_link_args pla;
5274	int ret;
5275
5276	if (V_pf_pfil_hooked)
5277		return;
5278
5279	pha.pa_version = PFIL_VERSION;
5280	pha.pa_modname = "pf";
5281	pha.pa_ruleset = NULL;
5282
5283	pla.pa_version = PFIL_VERSION;
5284
5285#ifdef INET
5286	pha.pa_type = PFIL_TYPE_IP4;
5287	pha.pa_func = pf_check_in;
5288	pha.pa_flags = PFIL_IN;
5289	pha.pa_rulname = "default-in";
5290	V_pf_ip4_in_hook = pfil_add_hook(&pha);
5291	pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
5292	pla.pa_head = V_inet_pfil_head;
5293	pla.pa_hook = V_pf_ip4_in_hook;
5294	ret = pfil_link(&pla);
5295	MPASS(ret == 0);
5296	pha.pa_func = pf_check_out;
5297	pha.pa_flags = PFIL_OUT;
5298	pha.pa_rulname = "default-out";
5299	V_pf_ip4_out_hook = pfil_add_hook(&pha);
5300	pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
5301	pla.pa_head = V_inet_pfil_head;
5302	pla.pa_hook = V_pf_ip4_out_hook;
5303	ret = pfil_link(&pla);
5304	MPASS(ret == 0);
5305#endif
5306#ifdef INET6
5307	pha.pa_type = PFIL_TYPE_IP6;
5308	pha.pa_func = pf_check6_in;
5309	pha.pa_flags = PFIL_IN;
5310	pha.pa_rulname = "default-in6";
5311	V_pf_ip6_in_hook = pfil_add_hook(&pha);
5312	pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
5313	pla.pa_head = V_inet6_pfil_head;
5314	pla.pa_hook = V_pf_ip6_in_hook;
5315	ret = pfil_link(&pla);
5316	MPASS(ret == 0);
5317	pha.pa_func = pf_check6_out;
5318	pha.pa_rulname = "default-out6";
5319	pha.pa_flags = PFIL_OUT;
5320	V_pf_ip6_out_hook = pfil_add_hook(&pha);
5321	pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
5322	pla.pa_head = V_inet6_pfil_head;
5323	pla.pa_hook = V_pf_ip6_out_hook;
5324	ret = pfil_link(&pla);
5325	MPASS(ret == 0);
5326#endif
5327
5328	V_pf_pfil_hooked = 1;
5329}
5330
5331static void
5332dehook_pf(void)
5333{
5334
5335	if (V_pf_pfil_hooked == 0)
5336		return;
5337
5338#ifdef INET
5339	pfil_remove_hook(V_pf_ip4_in_hook);
5340	pfil_remove_hook(V_pf_ip4_out_hook);
5341#endif
5342#ifdef INET6
5343	pfil_remove_hook(V_pf_ip6_in_hook);
5344	pfil_remove_hook(V_pf_ip6_out_hook);
5345#endif
5346
5347	V_pf_pfil_hooked = 0;
5348}
5349
5350static void
5351pf_load_vnet(void)
5352{
5353	V_pf_tag_z = uma_zcreate("pf tags", sizeof(struct pf_tagname),
5354	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
5355
5356	pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize,
5357	    PF_RULE_TAG_HASH_SIZE_DEFAULT);
5358#ifdef ALTQ
5359	pf_init_tagset(&V_pf_qids, &pf_queue_tag_hashsize,
5360	    PF_QUEUE_TAG_HASH_SIZE_DEFAULT);
5361#endif
5362
5363	pfattach_vnet();
5364	V_pf_vnet_active = 1;
5365}
5366
5367static int
5368pf_load(void)
5369{
5370	int error;
5371
5372	rm_init(&pf_rules_lock, "pf rulesets");
5373	sx_init(&pf_ioctl_lock, "pf ioctl");
5374	sx_init(&pf_end_lock, "pf end thread");
5375
5376	pf_mtag_initialize();
5377
5378	pf_dev = make_dev(&pf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, PF_NAME);
5379	if (pf_dev == NULL)
5380		return (ENOMEM);
5381
5382	pf_end_threads = 0;
5383	error = kproc_create(pf_purge_thread, NULL, &pf_purge_proc, 0, 0, "pf purge");
5384	if (error != 0)
5385		return (error);
5386
5387	pfi_initialize();
5388
5389	return (0);
5390}
5391
5392static void
5393pf_unload_vnet(void)
5394{
5395	int ret;
5396
5397	V_pf_vnet_active = 0;
5398	V_pf_status.running = 0;
5399	dehook_pf();
5400
5401	PF_RULES_WLOCK();
5402	shutdown_pf();
5403	PF_RULES_WUNLOCK();
5404
5405	ret = swi_remove(V_pf_swi_cookie);
5406	MPASS(ret == 0);
5407	ret = intr_event_destroy(V_pf_swi_ie);
5408	MPASS(ret == 0);
5409
5410	pf_unload_vnet_purge();
5411
5412	pf_normalize_cleanup();
5413	PF_RULES_WLOCK();
5414	pfi_cleanup_vnet();
5415	PF_RULES_WUNLOCK();
5416	pfr_cleanup();
5417	pf_osfp_flush();
5418	pf_cleanup();
5419	if (IS_DEFAULT_VNET(curvnet))
5420		pf_mtag_cleanup();
5421
5422	pf_cleanup_tagset(&V_pf_tags);
5423#ifdef ALTQ
5424	pf_cleanup_tagset(&V_pf_qids);
5425#endif
5426	uma_zdestroy(V_pf_tag_z);
5427
5428	/* Free counters last as we updated them during shutdown. */
5429	counter_u64_free(V_pf_default_rule.evaluations);
5430	for (int i = 0; i < 2; i++) {
5431		counter_u64_free(V_pf_default_rule.packets[i]);
5432		counter_u64_free(V_pf_default_rule.bytes[i]);
5433	}
5434	counter_u64_free(V_pf_default_rule.states_cur);
5435	counter_u64_free(V_pf_default_rule.states_tot);
5436	counter_u64_free(V_pf_default_rule.src_nodes);
5437
5438	for (int i = 0; i < PFRES_MAX; i++)
5439		counter_u64_free(V_pf_status.counters[i]);
5440	for (int i = 0; i < LCNT_MAX; i++)
5441		counter_u64_free(V_pf_status.lcounters[i]);
5442	for (int i = 0; i < FCNT_MAX; i++)
5443		counter_u64_free(V_pf_status.fcounters[i]);
5444	for (int i = 0; i < SCNT_MAX; i++)
5445		counter_u64_free(V_pf_status.scounters[i]);
5446}
5447
5448static void
5449pf_unload(void)
5450{
5451
5452	sx_xlock(&pf_end_lock);
5453	pf_end_threads = 1;
5454	while (pf_end_threads < 2) {
5455		wakeup_one(pf_purge_thread);
5456		sx_sleep(pf_purge_proc, &pf_end_lock, 0, "pftmo", 0);
5457	}
5458	sx_xunlock(&pf_end_lock);
5459
5460	if (pf_dev != NULL)
5461		destroy_dev(pf_dev);
5462
5463	pfi_cleanup();
5464
5465	rm_destroy(&pf_rules_lock);
5466	sx_destroy(&pf_ioctl_lock);
5467	sx_destroy(&pf_end_lock);
5468}
5469
5470static void
5471vnet_pf_init(void *unused __unused)
5472{
5473
5474	pf_load_vnet();
5475}
5476VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
5477    vnet_pf_init, NULL);
5478
5479static void
5480vnet_pf_uninit(const void *unused __unused)
5481{
5482
5483	pf_unload_vnet();
5484}
5485SYSUNINIT(pf_unload, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND, pf_unload, NULL);
5486VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
5487    vnet_pf_uninit, NULL);
5488
5489static int
5490pf_modevent(module_t mod, int type, void *data)
5491{
5492	int error = 0;
5493
5494	switch(type) {
5495	case MOD_LOAD:
5496		error = pf_load();
5497		break;
5498	case MOD_UNLOAD:
5499		/* Handled in SYSUNINIT(pf_unload) to ensure it's done after
5500		 * the vnet_pf_uninit()s */
5501		break;
5502	default:
5503		error = EINVAL;
5504		break;
5505	}
5506
5507	return (error);
5508}
5509
5510static moduledata_t pf_mod = {
5511	"pf",
5512	pf_modevent,
5513	0
5514};
5515
5516DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND);
5517MODULE_VERSION(pf, PF_MODVER);
5518