1/*-
2 * Copyright (c) 2002 Michael Shalayeff
3 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
25 * THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*-
29 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
30 *
31 * Permission to use, copy, modify, and distribute this software for any
32 * purpose with or without fee is hereby granted, provided that the above
33 * copyright notice and this permission notice appear in all copies.
34 *
35 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
36 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
37 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
38 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
39 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
40 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
41 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
42 */
43
44/*
45 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
46 *
47 * Revisions picked from OpenBSD after revision 1.110 import:
48 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
49 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
50 * 1.120, 1.175 - use monotonic time_uptime
51 * 1.122 - reduce number of updates for non-TCP sessions
52 * 1.125, 1.127 - rewrite merge or stale processing
53 * 1.128 - cleanups
54 * 1.146 - bzero() mbuf before sparsely filling it with data
55 * 1.170 - SIOCSIFMTU checks
56 * 1.126, 1.142 - deferred packets processing
57 * 1.173 - correct expire time processing
58 */
59
60#include <sys/cdefs.h>
61__FBSDID("$FreeBSD: stable/11/sys/netpfil/pf/if_pfsync.c 345439 2019-03-23 07:07:41Z kp $");
62
63#include "opt_inet.h"
64#include "opt_inet6.h"
65#include "opt_pf.h"
66
67#include <sys/param.h>
68#include <sys/bus.h>
69#include <sys/endian.h>
70#include <sys/interrupt.h>
71#include <sys/kernel.h>
72#include <sys/lock.h>
73#include <sys/mbuf.h>
74#include <sys/module.h>
75#include <sys/mutex.h>
76#include <sys/priv.h>
77#include <sys/protosw.h>
78#include <sys/smp.h>
79#include <sys/socket.h>
80#include <sys/sockio.h>
81#include <sys/sysctl.h>
82#include <sys/syslog.h>
83
84#include <net/bpf.h>
85#include <net/if.h>
86#include <net/if_var.h>
87#include <net/if_clone.h>
88#include <net/if_types.h>
89#include <net/vnet.h>
90#include <net/pfvar.h>
91#include <net/if_pfsync.h>
92
93#include <netinet/if_ether.h>
94#include <netinet/in.h>
95#include <netinet/in_var.h>
96#include <netinet/ip.h>
97#include <netinet/ip_carp.h>
98#include <netinet/ip_var.h>
99#include <netinet/tcp.h>
100#include <netinet/tcp_fsm.h>
101#include <netinet/tcp_seq.h>
102
103#define PFSYNC_MINPKT ( \
104	sizeof(struct ip) + \
105	sizeof(struct pfsync_header) + \
106	sizeof(struct pfsync_subheader) )
107
108struct pfsync_bucket;
109
110struct pfsync_pkt {
111	struct ip *ip;
112	struct in_addr src;
113	u_int8_t flags;
114};
115
116static int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
117		    struct pfsync_state_peer *);
118static int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
119static int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
120static int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
121static int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
122static int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
123static int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
124static int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
125static int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
126static int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
127static int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
128static int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
129static int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
130
131static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
132	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
133	pfsync_in_ins,			/* PFSYNC_ACT_INS */
134	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
135	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
136	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
137	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
138	pfsync_in_del,			/* PFSYNC_ACT_DEL */
139	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
140	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
141	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
142	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
143	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
144	pfsync_in_eof			/* PFSYNC_ACT_EOF */
145};
146
147struct pfsync_q {
148	void		(*write)(struct pf_state *, void *);
149	size_t		len;
150	u_int8_t	action;
151};
152
153/* we have one of these for every PFSYNC_S_ */
154static void	pfsync_out_state(struct pf_state *, void *);
155static void	pfsync_out_iack(struct pf_state *, void *);
156static void	pfsync_out_upd_c(struct pf_state *, void *);
157static void	pfsync_out_del(struct pf_state *, void *);
158
159static struct pfsync_q pfsync_qs[] = {
160	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
161	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
162	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
163	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
164	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
165};
166
167static void	pfsync_q_ins(struct pf_state *, int, bool);
168static void	pfsync_q_del(struct pf_state *, bool, struct pfsync_bucket *);
169
170static void	pfsync_update_state(struct pf_state *);
171
172struct pfsync_upd_req_item {
173	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
174	struct pfsync_upd_req			ur_msg;
175};
176
177struct pfsync_deferral {
178	struct pfsync_softc		*pd_sc;
179	TAILQ_ENTRY(pfsync_deferral)	pd_entry;
180	u_int				pd_refs;
181	struct callout			pd_tmo;
182
183	struct pf_state			*pd_st;
184	struct mbuf			*pd_m;
185};
186
187struct pfsync_sofct;
188
189struct pfsync_bucket
190{
191	int			b_id;
192	struct pfsync_softc	*b_sc;
193	struct mtx		b_mtx;
194	struct callout		b_tmo;
195	int			b_flags;
196#define	PFSYNCF_BUCKET_PUSH	0x00000001
197
198	size_t			b_len;
199	TAILQ_HEAD(, pf_state)			b_qs[PFSYNC_S_COUNT];
200	TAILQ_HEAD(, pfsync_upd_req_item)	b_upd_req_list;
201	TAILQ_HEAD(, pfsync_deferral)		b_deferrals;
202	u_int			b_deferred;
203	void			*b_plus;
204	size_t			b_pluslen;
205
206	struct  ifaltq b_snd;
207};
208
209struct pfsync_softc {
210	/* Configuration */
211	struct ifnet		*sc_ifp;
212	struct ifnet		*sc_sync_if;
213	struct ip_moptions	sc_imo;
214	struct in_addr		sc_sync_peer;
215	uint32_t		sc_flags;
216#define	PFSYNCF_OK		0x00000001
217#define	PFSYNCF_DEFER		0x00000002
218	uint8_t			sc_maxupdates;
219	struct ip		sc_template;
220	struct mtx		sc_mtx;
221
222	/* Queued data */
223	struct pfsync_bucket	*sc_buckets;
224
225	/* Bulk update info */
226	struct mtx		sc_bulk_mtx;
227	uint32_t		sc_ureq_sent;
228	int			sc_bulk_tries;
229	uint32_t		sc_ureq_received;
230	int			sc_bulk_hashid;
231	uint64_t		sc_bulk_stateid;
232	uint32_t		sc_bulk_creatorid;
233	struct callout		sc_bulk_tmo;
234	struct callout		sc_bulkfail_tmo;
235};
236
237#define	PFSYNC_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
238#define	PFSYNC_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
239#define	PFSYNC_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
240
241#define PFSYNC_BUCKET_LOCK(b)		mtx_lock(&(b)->b_mtx)
242#define PFSYNC_BUCKET_UNLOCK(b)		mtx_unlock(&(b)->b_mtx)
243#define PFSYNC_BUCKET_LOCK_ASSERT(b)	mtx_assert(&(b)->b_mtx, MA_OWNED)
244
245#define	PFSYNC_BLOCK(sc)	mtx_lock(&(sc)->sc_bulk_mtx)
246#define	PFSYNC_BUNLOCK(sc)	mtx_unlock(&(sc)->sc_bulk_mtx)
247#define	PFSYNC_BLOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
248
249static const char pfsyncname[] = "pfsync";
250static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
251static VNET_DEFINE(struct pfsync_softc	*, pfsyncif) = NULL;
252#define	V_pfsyncif		VNET(pfsyncif)
253static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
254#define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
255static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
256#define	V_pfsyncstats		VNET(pfsyncstats)
257static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW;
258#define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
259
260static void	pfsync_timeout(void *);
261static void	pfsync_push(struct pfsync_bucket *);
262static void	pfsync_push_all(struct pfsync_softc *);
263static void	pfsyncintr(void *);
264static int	pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
265		    void *);
266static void	pfsync_multicast_cleanup(struct pfsync_softc *);
267static void	pfsync_pointers_init(void);
268static void	pfsync_pointers_uninit(void);
269static int	pfsync_init(void);
270static void	pfsync_uninit(void);
271
272static unsigned long pfsync_buckets;
273
274SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
275SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
276    &VNET_NAME(pfsyncstats), pfsyncstats,
277    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
278SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
279    &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
280SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN,
281    &pfsync_buckets, 0, "Number of pfsync hash buckets");
282
283static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
284static void	pfsync_clone_destroy(struct ifnet *);
285static int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
286		    struct pf_state_peer *);
287static int	pfsyncoutput(struct ifnet *, struct mbuf *,
288		    const struct sockaddr *, struct route *);
289static int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
290
291static int	pfsync_defer(struct pf_state *, struct mbuf *);
292static void	pfsync_undefer(struct pfsync_deferral *, int);
293static void	pfsync_undefer_state(struct pf_state *, int);
294static void	pfsync_defer_tmo(void *);
295
296static void	pfsync_request_update(u_int32_t, u_int64_t);
297static bool	pfsync_update_state_req(struct pf_state *);
298
299static void	pfsync_drop(struct pfsync_softc *);
300static void	pfsync_sendout(int, int);
301static void	pfsync_send_plus(void *, size_t);
302
303static void	pfsync_bulk_start(void);
304static void	pfsync_bulk_status(u_int8_t);
305static void	pfsync_bulk_update(void *);
306static void	pfsync_bulk_fail(void *);
307
308static void	pfsync_detach_ifnet(struct ifnet *);
309#ifdef IPSEC
310static void	pfsync_update_net_tdb(struct pfsync_tdb *);
311#endif
312static struct pfsync_bucket	*pfsync_get_bucket(struct pfsync_softc *,
313		    struct pf_state *);
314
315
316#define PFSYNC_MAX_BULKTRIES	12
317
318VNET_DEFINE(struct if_clone *, pfsync_cloner);
319#define	V_pfsync_cloner	VNET(pfsync_cloner)
320
321static int
322pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
323{
324	struct pfsync_softc *sc;
325	struct ifnet *ifp;
326	struct pfsync_bucket *b;
327	int c, q;
328
329	if (unit != 0)
330		return (EINVAL);
331
332	if (! pfsync_buckets)
333		pfsync_buckets = mp_ncpus * 2;
334
335	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
336	sc->sc_flags |= PFSYNCF_OK;
337	sc->sc_maxupdates = 128;
338
339	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
340	if (ifp == NULL) {
341		free(sc, M_PFSYNC);
342		return (ENOSPC);
343	}
344	if_initname(ifp, pfsyncname, unit);
345	ifp->if_softc = sc;
346	ifp->if_ioctl = pfsyncioctl;
347	ifp->if_output = pfsyncoutput;
348	ifp->if_type = IFT_PFSYNC;
349	ifp->if_hdrlen = sizeof(struct pfsync_header);
350	ifp->if_mtu = ETHERMTU;
351	mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
352	mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
353	callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
354	callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
355
356	if_attach(ifp);
357
358	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
359
360	sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets),
361	    M_PFSYNC, M_ZERO | M_WAITOK);
362	for (c = 0; c < pfsync_buckets; c++) {
363		b = &sc->sc_buckets[c];
364		mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF);
365
366		b->b_id = c;
367		b->b_sc = sc;
368		b->b_len = PFSYNC_MINPKT;
369
370		for (q = 0; q < PFSYNC_S_COUNT; q++)
371			TAILQ_INIT(&b->b_qs[q]);
372
373		TAILQ_INIT(&b->b_upd_req_list);
374		TAILQ_INIT(&b->b_deferrals);
375
376		callout_init(&b->b_tmo, 1);
377
378		b->b_snd.ifq_maxlen = ifqmaxlen;
379	}
380
381	V_pfsyncif = sc;
382
383	return (0);
384}
385
386static void
387pfsync_clone_destroy(struct ifnet *ifp)
388{
389	struct pfsync_softc *sc = ifp->if_softc;
390	struct pfsync_bucket *b;
391	int c;
392
393	for (c = 0; c < pfsync_buckets; c++) {
394		b = &sc->sc_buckets[c];
395		/*
396		 * At this stage, everything should have already been
397		 * cleared by pfsync_uninit(), and we have only to
398		 * drain callouts.
399		 */
400		while (b->b_deferred > 0) {
401			struct pfsync_deferral *pd =
402			    TAILQ_FIRST(&b->b_deferrals);
403
404			TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
405			b->b_deferred--;
406			if (callout_stop(&pd->pd_tmo) > 0) {
407				pf_release_state(pd->pd_st);
408				m_freem(pd->pd_m);
409				free(pd, M_PFSYNC);
410			} else {
411				pd->pd_refs++;
412				callout_drain(&pd->pd_tmo);
413				free(pd, M_PFSYNC);
414			}
415		}
416
417		callout_drain(&b->b_tmo);
418	}
419
420	callout_drain(&sc->sc_bulkfail_tmo);
421	callout_drain(&sc->sc_bulk_tmo);
422
423	if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
424		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
425	bpfdetach(ifp);
426	if_detach(ifp);
427
428	pfsync_drop(sc);
429
430	if_free(ifp);
431	if (sc->sc_imo.imo_membership)
432		pfsync_multicast_cleanup(sc);
433	mtx_destroy(&sc->sc_mtx);
434	mtx_destroy(&sc->sc_bulk_mtx);
435
436	free(sc->sc_buckets, M_PFSYNC);
437	free(sc, M_PFSYNC);
438
439	V_pfsyncif = NULL;
440}
441
442static int
443pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
444    struct pf_state_peer *d)
445{
446	if (s->scrub.scrub_flag && d->scrub == NULL) {
447		d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
448		if (d->scrub == NULL)
449			return (ENOMEM);
450	}
451
452	return (0);
453}
454
455
456static int
457pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
458{
459	struct pfsync_softc *sc = V_pfsyncif;
460#ifndef	__NO_STRICT_ALIGNMENT
461	struct pfsync_state_key key[2];
462#endif
463	struct pfsync_state_key *kw, *ks;
464	struct pf_state	*st = NULL;
465	struct pf_state_key *skw = NULL, *sks = NULL;
466	struct pf_rule *r = NULL;
467	struct pfi_kif	*kif;
468	int error;
469
470	PF_RULES_RASSERT();
471
472	if (sp->creatorid == 0) {
473		if (V_pf_status.debug >= PF_DEBUG_MISC)
474			printf("%s: invalid creator id: %08x\n", __func__,
475			    ntohl(sp->creatorid));
476		return (EINVAL);
477	}
478
479	if ((kif = pfi_kif_find(sp->ifname)) == NULL) {
480		if (V_pf_status.debug >= PF_DEBUG_MISC)
481			printf("%s: unknown interface: %s\n", __func__,
482			    sp->ifname);
483		if (flags & PFSYNC_SI_IOCTL)
484			return (EINVAL);
485		return (0);	/* skip this state */
486	}
487
488	/*
489	 * If the ruleset checksums match or the state is coming from the ioctl,
490	 * it's safe to associate the state with the rule of that number.
491	 */
492	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
493	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
494	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
495		r = pf_main_ruleset.rules[
496		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
497	else
498		r = &V_pf_default_rule;
499
500	if ((r->max_states &&
501	    counter_u64_fetch(r->states_cur) >= r->max_states))
502		goto cleanup;
503
504	/*
505	 * XXXGL: consider M_WAITOK in ioctl path after.
506	 */
507	if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL)
508		goto cleanup;
509
510	if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
511		goto cleanup;
512
513#ifndef	__NO_STRICT_ALIGNMENT
514	bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2);
515	kw = &key[PF_SK_WIRE];
516	ks = &key[PF_SK_STACK];
517#else
518	kw = &sp->key[PF_SK_WIRE];
519	ks = &sp->key[PF_SK_STACK];
520#endif
521
522	if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) ||
523	    PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) ||
524	    kw->port[0] != ks->port[0] ||
525	    kw->port[1] != ks->port[1]) {
526		sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
527		if (sks == NULL)
528			goto cleanup;
529	} else
530		sks = skw;
531
532	/* allocate memory for scrub info */
533	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
534	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
535		goto cleanup;
536
537	/* Copy to state key(s). */
538	skw->addr[0] = kw->addr[0];
539	skw->addr[1] = kw->addr[1];
540	skw->port[0] = kw->port[0];
541	skw->port[1] = kw->port[1];
542	skw->proto = sp->proto;
543	skw->af = sp->af;
544	if (sks != skw) {
545		sks->addr[0] = ks->addr[0];
546		sks->addr[1] = ks->addr[1];
547		sks->port[0] = ks->port[0];
548		sks->port[1] = ks->port[1];
549		sks->proto = sp->proto;
550		sks->af = sp->af;
551	}
552
553	/* copy to state */
554	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
555	st->creation = time_uptime - ntohl(sp->creation);
556	st->expire = time_uptime;
557	if (sp->expire) {
558		uint32_t timeout;
559
560		timeout = r->timeout[sp->timeout];
561		if (!timeout)
562			timeout = V_pf_default_rule.timeout[sp->timeout];
563
564		/* sp->expire may have been adaptively scaled by export. */
565		st->expire -= timeout - ntohl(sp->expire);
566	}
567
568	st->direction = sp->direction;
569	st->log = sp->log;
570	st->timeout = sp->timeout;
571	st->state_flags = sp->state_flags;
572
573	st->id = sp->id;
574	st->creatorid = sp->creatorid;
575	pf_state_peer_ntoh(&sp->src, &st->src);
576	pf_state_peer_ntoh(&sp->dst, &st->dst);
577
578	st->rule.ptr = r;
579	st->nat_rule.ptr = NULL;
580	st->anchor.ptr = NULL;
581	st->rt_kif = NULL;
582
583	st->pfsync_time = time_uptime;
584	st->sync_state = PFSYNC_S_NONE;
585
586	if (!(flags & PFSYNC_SI_IOCTL))
587		st->state_flags |= PFSTATE_NOSYNC;
588
589	if ((error = pf_state_insert(kif, skw, sks, st)) != 0)
590		goto cleanup_state;
591
592	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
593	counter_u64_add(r->states_cur, 1);
594	counter_u64_add(r->states_tot, 1);
595
596	if (!(flags & PFSYNC_SI_IOCTL)) {
597		st->state_flags &= ~PFSTATE_NOSYNC;
598		if (st->state_flags & PFSTATE_ACK) {
599			pfsync_q_ins(st, PFSYNC_S_IACK, true);
600			pfsync_push_all(sc);
601		}
602	}
603	st->state_flags &= ~PFSTATE_ACK;
604	PF_STATE_UNLOCK(st);
605
606	return (0);
607
608cleanup:
609	error = ENOMEM;
610	if (skw == sks)
611		sks = NULL;
612	if (skw != NULL)
613		uma_zfree(V_pf_state_key_z, skw);
614	if (sks != NULL)
615		uma_zfree(V_pf_state_key_z, sks);
616
617cleanup_state:	/* pf_state_insert() frees the state keys. */
618	if (st) {
619		if (st->dst.scrub)
620			uma_zfree(V_pf_state_scrub_z, st->dst.scrub);
621		if (st->src.scrub)
622			uma_zfree(V_pf_state_scrub_z, st->src.scrub);
623		uma_zfree(V_pf_state_z, st);
624	}
625	return (error);
626}
627
628static int
629pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
630{
631	struct pfsync_softc *sc = V_pfsyncif;
632	struct pfsync_pkt pkt;
633	struct mbuf *m = *mp;
634	struct ip *ip = mtod(m, struct ip *);
635	struct pfsync_header *ph;
636	struct pfsync_subheader subh;
637
638	int offset, len;
639	int rv;
640	uint16_t count;
641
642	PF_RULES_RLOCK_TRACKER;
643
644	*mp = NULL;
645	V_pfsyncstats.pfsyncs_ipackets++;
646
647	/* Verify that we have a sync interface configured. */
648	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
649	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
650		goto done;
651
652	/* verify that the packet came in on the right interface */
653	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
654		V_pfsyncstats.pfsyncs_badif++;
655		goto done;
656	}
657
658	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
659	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
660	/* verify that the IP TTL is 255. */
661	if (ip->ip_ttl != PFSYNC_DFLTTL) {
662		V_pfsyncstats.pfsyncs_badttl++;
663		goto done;
664	}
665
666	offset = ip->ip_hl << 2;
667	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
668		V_pfsyncstats.pfsyncs_hdrops++;
669		goto done;
670	}
671
672	if (offset + sizeof(*ph) > m->m_len) {
673		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
674			V_pfsyncstats.pfsyncs_hdrops++;
675			return (IPPROTO_DONE);
676		}
677		ip = mtod(m, struct ip *);
678	}
679	ph = (struct pfsync_header *)((char *)ip + offset);
680
681	/* verify the version */
682	if (ph->version != PFSYNC_VERSION) {
683		V_pfsyncstats.pfsyncs_badver++;
684		goto done;
685	}
686
687	len = ntohs(ph->len) + offset;
688	if (m->m_pkthdr.len < len) {
689		V_pfsyncstats.pfsyncs_badlen++;
690		goto done;
691	}
692
693	/* Cheaper to grab this now than having to mess with mbufs later */
694	pkt.ip = ip;
695	pkt.src = ip->ip_src;
696	pkt.flags = 0;
697
698	/*
699	 * Trusting pf_chksum during packet processing, as well as seeking
700	 * in interface name tree, require holding PF_RULES_RLOCK().
701	 */
702	PF_RULES_RLOCK();
703	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
704		pkt.flags |= PFSYNC_SI_CKSUM;
705
706	offset += sizeof(*ph);
707	while (offset <= len - sizeof(subh)) {
708		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
709		offset += sizeof(subh);
710
711		if (subh.action >= PFSYNC_ACT_MAX) {
712			V_pfsyncstats.pfsyncs_badact++;
713			PF_RULES_RUNLOCK();
714			goto done;
715		}
716
717		count = ntohs(subh.count);
718		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
719		rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count);
720		if (rv == -1) {
721			PF_RULES_RUNLOCK();
722			return (IPPROTO_DONE);
723		}
724
725		offset += rv;
726	}
727	PF_RULES_RUNLOCK();
728
729done:
730	m_freem(m);
731	return (IPPROTO_DONE);
732}
733
734static int
735pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
736{
737	struct pfsync_clr *clr;
738	struct mbuf *mp;
739	int len = sizeof(*clr) * count;
740	int i, offp;
741	u_int32_t creatorid;
742
743	mp = m_pulldown(m, offset, len, &offp);
744	if (mp == NULL) {
745		V_pfsyncstats.pfsyncs_badlen++;
746		return (-1);
747	}
748	clr = (struct pfsync_clr *)(mp->m_data + offp);
749
750	for (i = 0; i < count; i++) {
751		creatorid = clr[i].creatorid;
752
753		if (clr[i].ifname[0] != '\0' &&
754		    pfi_kif_find(clr[i].ifname) == NULL)
755			continue;
756
757		for (int i = 0; i <= pf_hashmask; i++) {
758			struct pf_idhash *ih = &V_pf_idhash[i];
759			struct pf_state *s;
760relock:
761			PF_HASHROW_LOCK(ih);
762			LIST_FOREACH(s, &ih->states, entry) {
763				if (s->creatorid == creatorid) {
764					s->state_flags |= PFSTATE_NOSYNC;
765					pf_unlink_state(s, PF_ENTER_LOCKED);
766					goto relock;
767				}
768			}
769			PF_HASHROW_UNLOCK(ih);
770		}
771	}
772
773	return (len);
774}
775
776static int
777pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
778{
779	struct mbuf *mp;
780	struct pfsync_state *sa, *sp;
781	int len = sizeof(*sp) * count;
782	int i, offp;
783
784	mp = m_pulldown(m, offset, len, &offp);
785	if (mp == NULL) {
786		V_pfsyncstats.pfsyncs_badlen++;
787		return (-1);
788	}
789	sa = (struct pfsync_state *)(mp->m_data + offp);
790
791	for (i = 0; i < count; i++) {
792		sp = &sa[i];
793
794		/* Check for invalid values. */
795		if (sp->timeout >= PFTM_MAX ||
796		    sp->src.state > PF_TCPS_PROXY_DST ||
797		    sp->dst.state > PF_TCPS_PROXY_DST ||
798		    sp->direction > PF_OUT ||
799		    (sp->af != AF_INET && sp->af != AF_INET6)) {
800			if (V_pf_status.debug >= PF_DEBUG_MISC)
801				printf("%s: invalid value\n", __func__);
802			V_pfsyncstats.pfsyncs_badval++;
803			continue;
804		}
805
806		if (pfsync_state_import(sp, pkt->flags) == ENOMEM)
807			/* Drop out, but process the rest of the actions. */
808			break;
809	}
810
811	return (len);
812}
813
814static int
815pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
816{
817	struct pfsync_ins_ack *ia, *iaa;
818	struct pf_state *st;
819
820	struct mbuf *mp;
821	int len = count * sizeof(*ia);
822	int offp, i;
823
824	mp = m_pulldown(m, offset, len, &offp);
825	if (mp == NULL) {
826		V_pfsyncstats.pfsyncs_badlen++;
827		return (-1);
828	}
829	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
830
831	for (i = 0; i < count; i++) {
832		ia = &iaa[i];
833
834		st = pf_find_state_byid(ia->id, ia->creatorid);
835		if (st == NULL)
836			continue;
837
838		if (st->state_flags & PFSTATE_ACK) {
839			pfsync_undefer_state(st, 0);
840		}
841		PF_STATE_UNLOCK(st);
842	}
843	/*
844	 * XXX this is not yet implemented, but we know the size of the
845	 * message so we can skip it.
846	 */
847
848	return (count * sizeof(struct pfsync_ins_ack));
849}
850
851static int
852pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
853    struct pfsync_state_peer *dst)
854{
855	int sync = 0;
856
857	PF_STATE_LOCK_ASSERT(st);
858
859	/*
860	 * The state should never go backwards except
861	 * for syn-proxy states.  Neither should the
862	 * sequence window slide backwards.
863	 */
864	if ((st->src.state > src->state &&
865	    (st->src.state < PF_TCPS_PROXY_SRC ||
866	    src->state >= PF_TCPS_PROXY_SRC)) ||
867
868	    (st->src.state == src->state &&
869	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
870		sync++;
871	else
872		pf_state_peer_ntoh(src, &st->src);
873
874	if ((st->dst.state > dst->state) ||
875
876	    (st->dst.state >= TCPS_SYN_SENT &&
877	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
878		sync++;
879	else
880		pf_state_peer_ntoh(dst, &st->dst);
881
882	return (sync);
883}
884
885static int
886pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
887{
888	struct pfsync_softc *sc = V_pfsyncif;
889	struct pfsync_state *sa, *sp;
890	struct pf_state *st;
891	int sync;
892
893	struct mbuf *mp;
894	int len = count * sizeof(*sp);
895	int offp, i;
896
897	mp = m_pulldown(m, offset, len, &offp);
898	if (mp == NULL) {
899		V_pfsyncstats.pfsyncs_badlen++;
900		return (-1);
901	}
902	sa = (struct pfsync_state *)(mp->m_data + offp);
903
904	for (i = 0; i < count; i++) {
905		sp = &sa[i];
906
907		/* check for invalid values */
908		if (sp->timeout >= PFTM_MAX ||
909		    sp->src.state > PF_TCPS_PROXY_DST ||
910		    sp->dst.state > PF_TCPS_PROXY_DST) {
911			if (V_pf_status.debug >= PF_DEBUG_MISC) {
912				printf("pfsync_input: PFSYNC_ACT_UPD: "
913				    "invalid value\n");
914			}
915			V_pfsyncstats.pfsyncs_badval++;
916			continue;
917		}
918
919		st = pf_find_state_byid(sp->id, sp->creatorid);
920		if (st == NULL) {
921			/* insert the update */
922			if (pfsync_state_import(sp, pkt->flags))
923				V_pfsyncstats.pfsyncs_badstate++;
924			continue;
925		}
926
927		if (st->state_flags & PFSTATE_ACK) {
928			pfsync_undefer_state(st, 1);
929		}
930
931		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
932			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
933		else {
934			sync = 0;
935
936			/*
937			 * Non-TCP protocol state machine always go
938			 * forwards
939			 */
940			if (st->src.state > sp->src.state)
941				sync++;
942			else
943				pf_state_peer_ntoh(&sp->src, &st->src);
944			if (st->dst.state > sp->dst.state)
945				sync++;
946			else
947				pf_state_peer_ntoh(&sp->dst, &st->dst);
948		}
949		if (sync < 2) {
950			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
951			pf_state_peer_ntoh(&sp->dst, &st->dst);
952			st->expire = time_uptime;
953			st->timeout = sp->timeout;
954		}
955		st->pfsync_time = time_uptime;
956
957		if (sync) {
958			V_pfsyncstats.pfsyncs_stale++;
959
960			pfsync_update_state(st);
961			PF_STATE_UNLOCK(st);
962			pfsync_push_all(sc);
963			continue;
964		}
965		PF_STATE_UNLOCK(st);
966	}
967
968	return (len);
969}
970
971static int
972pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
973{
974	struct pfsync_softc *sc = V_pfsyncif;
975	struct pfsync_upd_c *ua, *up;
976	struct pf_state *st;
977	int len = count * sizeof(*up);
978	int sync;
979	struct mbuf *mp;
980	int offp, i;
981
982	mp = m_pulldown(m, offset, len, &offp);
983	if (mp == NULL) {
984		V_pfsyncstats.pfsyncs_badlen++;
985		return (-1);
986	}
987	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
988
989	for (i = 0; i < count; i++) {
990		up = &ua[i];
991
992		/* check for invalid values */
993		if (up->timeout >= PFTM_MAX ||
994		    up->src.state > PF_TCPS_PROXY_DST ||
995		    up->dst.state > PF_TCPS_PROXY_DST) {
996			if (V_pf_status.debug >= PF_DEBUG_MISC) {
997				printf("pfsync_input: "
998				    "PFSYNC_ACT_UPD_C: "
999				    "invalid value\n");
1000			}
1001			V_pfsyncstats.pfsyncs_badval++;
1002			continue;
1003		}
1004
1005		st = pf_find_state_byid(up->id, up->creatorid);
1006		if (st == NULL) {
1007			/* We don't have this state. Ask for it. */
1008			PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
1009			pfsync_request_update(up->creatorid, up->id);
1010			PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
1011			continue;
1012		}
1013
1014		if (st->state_flags & PFSTATE_ACK) {
1015			pfsync_undefer_state(st, 1);
1016		}
1017
1018		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1019			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
1020		else {
1021			sync = 0;
1022
1023			/*
1024			 * Non-TCP protocol state machine always go
1025			 * forwards
1026			 */
1027			if (st->src.state > up->src.state)
1028				sync++;
1029			else
1030				pf_state_peer_ntoh(&up->src, &st->src);
1031			if (st->dst.state > up->dst.state)
1032				sync++;
1033			else
1034				pf_state_peer_ntoh(&up->dst, &st->dst);
1035		}
1036		if (sync < 2) {
1037			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1038			pf_state_peer_ntoh(&up->dst, &st->dst);
1039			st->expire = time_uptime;
1040			st->timeout = up->timeout;
1041		}
1042		st->pfsync_time = time_uptime;
1043
1044		if (sync) {
1045			V_pfsyncstats.pfsyncs_stale++;
1046
1047			pfsync_update_state(st);
1048			PF_STATE_UNLOCK(st);
1049			pfsync_push_all(sc);
1050			continue;
1051		}
1052		PF_STATE_UNLOCK(st);
1053	}
1054
1055	return (len);
1056}
1057
1058static int
1059pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1060{
1061	struct pfsync_upd_req *ur, *ura;
1062	struct mbuf *mp;
1063	int len = count * sizeof(*ur);
1064	int i, offp;
1065
1066	struct pf_state *st;
1067
1068	mp = m_pulldown(m, offset, len, &offp);
1069	if (mp == NULL) {
1070		V_pfsyncstats.pfsyncs_badlen++;
1071		return (-1);
1072	}
1073	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1074
1075	for (i = 0; i < count; i++) {
1076		ur = &ura[i];
1077
1078		if (ur->id == 0 && ur->creatorid == 0)
1079			pfsync_bulk_start();
1080		else {
1081			st = pf_find_state_byid(ur->id, ur->creatorid);
1082			if (st == NULL) {
1083				V_pfsyncstats.pfsyncs_badstate++;
1084				continue;
1085			}
1086			if (st->state_flags & PFSTATE_NOSYNC) {
1087				PF_STATE_UNLOCK(st);
1088				continue;
1089			}
1090
1091			pfsync_update_state_req(st);
1092			PF_STATE_UNLOCK(st);
1093		}
1094	}
1095
1096	return (len);
1097}
1098
1099static int
1100pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1101{
1102	struct mbuf *mp;
1103	struct pfsync_state *sa, *sp;
1104	struct pf_state *st;
1105	int len = count * sizeof(*sp);
1106	int offp, i;
1107
1108	mp = m_pulldown(m, offset, len, &offp);
1109	if (mp == NULL) {
1110		V_pfsyncstats.pfsyncs_badlen++;
1111		return (-1);
1112	}
1113	sa = (struct pfsync_state *)(mp->m_data + offp);
1114
1115	for (i = 0; i < count; i++) {
1116		sp = &sa[i];
1117
1118		st = pf_find_state_byid(sp->id, sp->creatorid);
1119		if (st == NULL) {
1120			V_pfsyncstats.pfsyncs_badstate++;
1121			continue;
1122		}
1123		st->state_flags |= PFSTATE_NOSYNC;
1124		pf_unlink_state(st, PF_ENTER_LOCKED);
1125	}
1126
1127	return (len);
1128}
1129
1130static int
1131pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1132{
1133	struct mbuf *mp;
1134	struct pfsync_del_c *sa, *sp;
1135	struct pf_state *st;
1136	int len = count * sizeof(*sp);
1137	int offp, i;
1138
1139	mp = m_pulldown(m, offset, len, &offp);
1140	if (mp == NULL) {
1141		V_pfsyncstats.pfsyncs_badlen++;
1142		return (-1);
1143	}
1144	sa = (struct pfsync_del_c *)(mp->m_data + offp);
1145
1146	for (i = 0; i < count; i++) {
1147		sp = &sa[i];
1148
1149		st = pf_find_state_byid(sp->id, sp->creatorid);
1150		if (st == NULL) {
1151			V_pfsyncstats.pfsyncs_badstate++;
1152			continue;
1153		}
1154
1155		st->state_flags |= PFSTATE_NOSYNC;
1156		pf_unlink_state(st, PF_ENTER_LOCKED);
1157	}
1158
1159	return (len);
1160}
1161
1162static int
1163pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1164{
1165	struct pfsync_softc *sc = V_pfsyncif;
1166	struct pfsync_bus *bus;
1167	struct mbuf *mp;
1168	int len = count * sizeof(*bus);
1169	int offp;
1170
1171	PFSYNC_BLOCK(sc);
1172
1173	/* If we're not waiting for a bulk update, who cares. */
1174	if (sc->sc_ureq_sent == 0) {
1175		PFSYNC_BUNLOCK(sc);
1176		return (len);
1177	}
1178
1179	mp = m_pulldown(m, offset, len, &offp);
1180	if (mp == NULL) {
1181		PFSYNC_BUNLOCK(sc);
1182		V_pfsyncstats.pfsyncs_badlen++;
1183		return (-1);
1184	}
1185	bus = (struct pfsync_bus *)(mp->m_data + offp);
1186
1187	switch (bus->status) {
1188	case PFSYNC_BUS_START:
1189		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
1190		    V_pf_limits[PF_LIMIT_STATES].limit /
1191		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
1192		    sizeof(struct pfsync_state)),
1193		    pfsync_bulk_fail, sc);
1194		if (V_pf_status.debug >= PF_DEBUG_MISC)
1195			printf("pfsync: received bulk update start\n");
1196		break;
1197
1198	case PFSYNC_BUS_END:
1199		if (time_uptime - ntohl(bus->endtime) >=
1200		    sc->sc_ureq_sent) {
1201			/* that's it, we're happy */
1202			sc->sc_ureq_sent = 0;
1203			sc->sc_bulk_tries = 0;
1204			callout_stop(&sc->sc_bulkfail_tmo);
1205			if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
1206				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
1207				    "pfsync bulk done");
1208			sc->sc_flags |= PFSYNCF_OK;
1209			if (V_pf_status.debug >= PF_DEBUG_MISC)
1210				printf("pfsync: received valid "
1211				    "bulk update end\n");
1212		} else {
1213			if (V_pf_status.debug >= PF_DEBUG_MISC)
1214				printf("pfsync: received invalid "
1215				    "bulk update end: bad timestamp\n");
1216		}
1217		break;
1218	}
1219	PFSYNC_BUNLOCK(sc);
1220
1221	return (len);
1222}
1223
1224static int
1225pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1226{
1227	int len = count * sizeof(struct pfsync_tdb);
1228
1229#if defined(IPSEC)
1230	struct pfsync_tdb *tp;
1231	struct mbuf *mp;
1232	int offp;
1233	int i;
1234	int s;
1235
1236	mp = m_pulldown(m, offset, len, &offp);
1237	if (mp == NULL) {
1238		V_pfsyncstats.pfsyncs_badlen++;
1239		return (-1);
1240	}
1241	tp = (struct pfsync_tdb *)(mp->m_data + offp);
1242
1243	for (i = 0; i < count; i++)
1244		pfsync_update_net_tdb(&tp[i]);
1245#endif
1246
1247	return (len);
1248}
1249
1250#if defined(IPSEC)
1251/* Update an in-kernel tdb. Silently fail if no tdb is found. */
1252static void
1253pfsync_update_net_tdb(struct pfsync_tdb *pt)
1254{
1255	struct tdb		*tdb;
1256	int			 s;
1257
1258	/* check for invalid values */
1259	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1260	    (pt->dst.sa.sa_family != AF_INET &&
1261	    pt->dst.sa.sa_family != AF_INET6))
1262		goto bad;
1263
1264	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1265	if (tdb) {
1266		pt->rpl = ntohl(pt->rpl);
1267		pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
1268
1269		/* Neither replay nor byte counter should ever decrease. */
1270		if (pt->rpl < tdb->tdb_rpl ||
1271		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1272			goto bad;
1273		}
1274
1275		tdb->tdb_rpl = pt->rpl;
1276		tdb->tdb_cur_bytes = pt->cur_bytes;
1277	}
1278	return;
1279
1280bad:
1281	if (V_pf_status.debug >= PF_DEBUG_MISC)
1282		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1283		    "invalid value\n");
1284	V_pfsyncstats.pfsyncs_badstate++;
1285	return;
1286}
1287#endif
1288
1289
1290static int
1291pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1292{
1293	/* check if we are at the right place in the packet */
1294	if (offset != m->m_pkthdr.len)
1295		V_pfsyncstats.pfsyncs_badlen++;
1296
1297	/* we're done. free and let the caller return */
1298	m_freem(m);
1299	return (-1);
1300}
1301
1302static int
1303pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1304{
1305	V_pfsyncstats.pfsyncs_badact++;
1306
1307	m_freem(m);
1308	return (-1);
1309}
1310
1311static int
1312pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
1313	struct route *rt)
1314{
1315	m_freem(m);
1316	return (0);
1317}
1318
1319/* ARGSUSED */
1320static int
1321pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1322{
1323	struct pfsync_softc *sc = ifp->if_softc;
1324	struct ifreq *ifr = (struct ifreq *)data;
1325	struct pfsyncreq pfsyncr;
1326	int error;
1327	int c;
1328
1329	switch (cmd) {
1330	case SIOCSIFFLAGS:
1331		PFSYNC_LOCK(sc);
1332		if (ifp->if_flags & IFF_UP) {
1333			ifp->if_drv_flags |= IFF_DRV_RUNNING;
1334			PFSYNC_UNLOCK(sc);
1335			pfsync_pointers_init();
1336		} else {
1337			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1338			PFSYNC_UNLOCK(sc);
1339			pfsync_pointers_uninit();
1340		}
1341		break;
1342	case SIOCSIFMTU:
1343		if (!sc->sc_sync_if ||
1344		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1345		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1346			return (EINVAL);
1347		if (ifr->ifr_mtu < ifp->if_mtu) {
1348			for (c = 0; c < pfsync_buckets; c++) {
1349				PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
1350				if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT)
1351					pfsync_sendout(1, c);
1352				PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
1353			}
1354		}
1355		ifp->if_mtu = ifr->ifr_mtu;
1356		break;
1357	case SIOCGETPFSYNC:
1358		bzero(&pfsyncr, sizeof(pfsyncr));
1359		PFSYNC_LOCK(sc);
1360		if (sc->sc_sync_if) {
1361			strlcpy(pfsyncr.pfsyncr_syncdev,
1362			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1363		}
1364		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1365		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1366		pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER ==
1367		    (sc->sc_flags & PFSYNCF_DEFER));
1368		PFSYNC_UNLOCK(sc);
1369		return (copyout(&pfsyncr, ifr_data_get_ptr(ifr),
1370		    sizeof(pfsyncr)));
1371
1372	case SIOCSETPFSYNC:
1373	    {
1374		struct ip_moptions *imo = &sc->sc_imo;
1375		struct ifnet *sifp;
1376		struct ip *ip;
1377		void *mship = NULL;
1378
1379		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1380			return (error);
1381		if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr,
1382		    sizeof(pfsyncr))))
1383			return (error);
1384
1385		if (pfsyncr.pfsyncr_maxupdates > 255)
1386			return (EINVAL);
1387
1388		if (pfsyncr.pfsyncr_syncdev[0] == 0)
1389			sifp = NULL;
1390		else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL)
1391			return (EINVAL);
1392
1393		if (sifp != NULL && (
1394		    pfsyncr.pfsyncr_syncpeer.s_addr == 0 ||
1395		    pfsyncr.pfsyncr_syncpeer.s_addr ==
1396		    htonl(INADDR_PFSYNC_GROUP)))
1397			mship = malloc((sizeof(struct in_multi *) *
1398			    IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO);
1399
1400		PFSYNC_LOCK(sc);
1401		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1402			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
1403		else
1404			sc->sc_sync_peer.s_addr =
1405			    pfsyncr.pfsyncr_syncpeer.s_addr;
1406
1407		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1408		if (pfsyncr.pfsyncr_defer) {
1409			sc->sc_flags |= PFSYNCF_DEFER;
1410			V_pfsync_defer_ptr = pfsync_defer;
1411		} else {
1412			sc->sc_flags &= ~PFSYNCF_DEFER;
1413			V_pfsync_defer_ptr = NULL;
1414		}
1415
1416		if (sifp == NULL) {
1417			if (sc->sc_sync_if)
1418				if_rele(sc->sc_sync_if);
1419			sc->sc_sync_if = NULL;
1420			if (imo->imo_membership)
1421				pfsync_multicast_cleanup(sc);
1422			PFSYNC_UNLOCK(sc);
1423			break;
1424		}
1425
1426		for (c = 0; c < pfsync_buckets; c++) {
1427			PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
1428			if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT &&
1429			    (sifp->if_mtu < sc->sc_ifp->if_mtu ||
1430			    (sc->sc_sync_if != NULL &&
1431			    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1432			    sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
1433				pfsync_sendout(1, c);
1434			PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
1435		}
1436
1437		if (imo->imo_membership)
1438			pfsync_multicast_cleanup(sc);
1439
1440		if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1441			error = pfsync_multicast_setup(sc, sifp, mship);
1442			if (error) {
1443				if_rele(sifp);
1444				free(mship, M_PFSYNC);
1445				PFSYNC_UNLOCK(sc);
1446				return (error);
1447			}
1448		}
1449		if (sc->sc_sync_if)
1450			if_rele(sc->sc_sync_if);
1451		sc->sc_sync_if = sifp;
1452
1453		ip = &sc->sc_template;
1454		bzero(ip, sizeof(*ip));
1455		ip->ip_v = IPVERSION;
1456		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1457		ip->ip_tos = IPTOS_LOWDELAY;
1458		/* len and id are set later. */
1459		ip->ip_off = htons(IP_DF);
1460		ip->ip_ttl = PFSYNC_DFLTTL;
1461		ip->ip_p = IPPROTO_PFSYNC;
1462		ip->ip_src.s_addr = INADDR_ANY;
1463		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1464
1465		/* Request a full state table update. */
1466		if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
1467			(*carp_demote_adj_p)(V_pfsync_carp_adj,
1468			    "pfsync bulk start");
1469		sc->sc_flags &= ~PFSYNCF_OK;
1470		if (V_pf_status.debug >= PF_DEBUG_MISC)
1471			printf("pfsync: requesting bulk update\n");
1472		PFSYNC_UNLOCK(sc);
1473		PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
1474		pfsync_request_update(0, 0);
1475		PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
1476		PFSYNC_BLOCK(sc);
1477		sc->sc_ureq_sent = time_uptime;
1478		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
1479		    sc);
1480		PFSYNC_BUNLOCK(sc);
1481
1482		break;
1483	    }
1484	default:
1485		return (ENOTTY);
1486	}
1487
1488	return (0);
1489}
1490
1491static void
1492pfsync_out_state(struct pf_state *st, void *buf)
1493{
1494	struct pfsync_state *sp = buf;
1495
1496	pfsync_state_export(sp, st);
1497}
1498
1499static void
1500pfsync_out_iack(struct pf_state *st, void *buf)
1501{
1502	struct pfsync_ins_ack *iack = buf;
1503
1504	iack->id = st->id;
1505	iack->creatorid = st->creatorid;
1506}
1507
1508static void
1509pfsync_out_upd_c(struct pf_state *st, void *buf)
1510{
1511	struct pfsync_upd_c *up = buf;
1512
1513	bzero(up, sizeof(*up));
1514	up->id = st->id;
1515	pf_state_peer_hton(&st->src, &up->src);
1516	pf_state_peer_hton(&st->dst, &up->dst);
1517	up->creatorid = st->creatorid;
1518	up->timeout = st->timeout;
1519}
1520
1521static void
1522pfsync_out_del(struct pf_state *st, void *buf)
1523{
1524	struct pfsync_del_c *dp = buf;
1525
1526	dp->id = st->id;
1527	dp->creatorid = st->creatorid;
1528	st->state_flags |= PFSTATE_NOSYNC;
1529}
1530
1531static void
1532pfsync_drop(struct pfsync_softc *sc)
1533{
1534	struct pf_state *st, *next;
1535	struct pfsync_upd_req_item *ur;
1536	struct pfsync_bucket *b;
1537	int c, q;
1538
1539	for (c = 0; c < pfsync_buckets; c++) {
1540		b = &sc->sc_buckets[c];
1541		for (q = 0; q < PFSYNC_S_COUNT; q++) {
1542			if (TAILQ_EMPTY(&b->b_qs[q]))
1543				continue;
1544
1545			TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) {
1546				KASSERT(st->sync_state == q,
1547					("%s: st->sync_state == q",
1548						__func__));
1549				st->sync_state = PFSYNC_S_NONE;
1550				pf_release_state(st);
1551			}
1552			TAILQ_INIT(&b->b_qs[q]);
1553		}
1554
1555		while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
1556			TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
1557			free(ur, M_PFSYNC);
1558		}
1559
1560		b->b_len = PFSYNC_MINPKT;
1561		b->b_plus = NULL;
1562	}
1563}
1564
1565static void
1566pfsync_sendout(int schedswi, int c)
1567{
1568	struct pfsync_softc *sc = V_pfsyncif;
1569	struct ifnet *ifp = sc->sc_ifp;
1570	struct mbuf *m;
1571	struct ip *ip;
1572	struct pfsync_header *ph;
1573	struct pfsync_subheader *subh;
1574	struct pf_state *st, *st_next;
1575	struct pfsync_upd_req_item *ur;
1576	struct pfsync_bucket *b = &sc->sc_buckets[c];
1577	int offset;
1578	int q, count = 0;
1579
1580	KASSERT(sc != NULL, ("%s: null sc", __func__));
1581	KASSERT(b->b_len > PFSYNC_MINPKT,
1582	    ("%s: sc_len %zu", __func__, b->b_len));
1583	PFSYNC_BUCKET_LOCK_ASSERT(b);
1584
1585	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
1586		pfsync_drop(sc);
1587		return;
1588	}
1589
1590	m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR);
1591	if (m == NULL) {
1592		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
1593		V_pfsyncstats.pfsyncs_onomem++;
1594		return;
1595	}
1596	m->m_data += max_linkhdr;
1597	m->m_len = m->m_pkthdr.len = b->b_len;
1598
1599	/* build the ip header */
1600	ip = (struct ip *)m->m_data;
1601	bcopy(&sc->sc_template, ip, sizeof(*ip));
1602	offset = sizeof(*ip);
1603
1604	ip->ip_len = htons(m->m_pkthdr.len);
1605	ip_fillid(ip);
1606
1607	/* build the pfsync header */
1608	ph = (struct pfsync_header *)(m->m_data + offset);
1609	bzero(ph, sizeof(*ph));
1610	offset += sizeof(*ph);
1611
1612	ph->version = PFSYNC_VERSION;
1613	ph->len = htons(b->b_len - sizeof(*ip));
1614	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1615
1616	/* walk the queues */
1617	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1618		if (TAILQ_EMPTY(&b->b_qs[q]))
1619			continue;
1620
1621		subh = (struct pfsync_subheader *)(m->m_data + offset);
1622		offset += sizeof(*subh);
1623
1624		count = 0;
1625		TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) {
1626			KASSERT(st->sync_state == q,
1627				("%s: st->sync_state == q",
1628					__func__));
1629			/*
1630			 * XXXGL: some of write methods do unlocked reads
1631			 * of state data :(
1632			 */
1633			pfsync_qs[q].write(st, m->m_data + offset);
1634			offset += pfsync_qs[q].len;
1635			st->sync_state = PFSYNC_S_NONE;
1636			pf_release_state(st);
1637			count++;
1638		}
1639		TAILQ_INIT(&b->b_qs[q]);
1640
1641		bzero(subh, sizeof(*subh));
1642		subh->action = pfsync_qs[q].action;
1643		subh->count = htons(count);
1644		V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
1645	}
1646
1647	if (!TAILQ_EMPTY(&b->b_upd_req_list)) {
1648		subh = (struct pfsync_subheader *)(m->m_data + offset);
1649		offset += sizeof(*subh);
1650
1651		count = 0;
1652		while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
1653			TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
1654
1655			bcopy(&ur->ur_msg, m->m_data + offset,
1656			    sizeof(ur->ur_msg));
1657			offset += sizeof(ur->ur_msg);
1658			free(ur, M_PFSYNC);
1659			count++;
1660		}
1661
1662		bzero(subh, sizeof(*subh));
1663		subh->action = PFSYNC_ACT_UPD_REQ;
1664		subh->count = htons(count);
1665		V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
1666	}
1667
1668	/* has someone built a custom region for us to add? */
1669	if (b->b_plus != NULL) {
1670		bcopy(b->b_plus, m->m_data + offset, b->b_pluslen);
1671		offset += b->b_pluslen;
1672
1673		b->b_plus = NULL;
1674	}
1675
1676	subh = (struct pfsync_subheader *)(m->m_data + offset);
1677	offset += sizeof(*subh);
1678
1679	bzero(subh, sizeof(*subh));
1680	subh->action = PFSYNC_ACT_EOF;
1681	subh->count = htons(1);
1682	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
1683
1684	/* we're done, let's put it on the wire */
1685	if (ifp->if_bpf) {
1686		m->m_data += sizeof(*ip);
1687		m->m_len = m->m_pkthdr.len = b->b_len - sizeof(*ip);
1688		BPF_MTAP(ifp, m);
1689		m->m_data -= sizeof(*ip);
1690		m->m_len = m->m_pkthdr.len = b->b_len;
1691	}
1692
1693	if (sc->sc_sync_if == NULL) {
1694		b->b_len = PFSYNC_MINPKT;
1695		m_freem(m);
1696		return;
1697	}
1698
1699	if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
1700	if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
1701	b->b_len = PFSYNC_MINPKT;
1702
1703	if (!_IF_QFULL(&b->b_snd))
1704		_IF_ENQUEUE(&b->b_snd, m);
1705	else {
1706		m_freem(m);
1707		if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
1708	}
1709	if (schedswi)
1710		swi_sched(V_pfsync_swi_cookie, 0);
1711}
1712
1713static void
1714pfsync_insert_state(struct pf_state *st)
1715{
1716	struct pfsync_softc *sc = V_pfsyncif;
1717	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1718
1719	if (st->state_flags & PFSTATE_NOSYNC)
1720		return;
1721
1722	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
1723	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1724		st->state_flags |= PFSTATE_NOSYNC;
1725		return;
1726	}
1727
1728	KASSERT(st->sync_state == PFSYNC_S_NONE,
1729		("%s: st->sync_state %u", __func__, st->sync_state));
1730
1731	PFSYNC_BUCKET_LOCK(b);
1732	if (b->b_len == PFSYNC_MINPKT)
1733		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
1734
1735	pfsync_q_ins(st, PFSYNC_S_INS, true);
1736	PFSYNC_BUCKET_UNLOCK(b);
1737
1738	st->sync_updates = 0;
1739}
1740
1741static int
1742pfsync_defer(struct pf_state *st, struct mbuf *m)
1743{
1744	struct pfsync_softc *sc = V_pfsyncif;
1745	struct pfsync_deferral *pd;
1746	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1747
1748	if (m->m_flags & (M_BCAST|M_MCAST))
1749		return (0);
1750
1751	PFSYNC_LOCK(sc);
1752
1753	if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) ||
1754	    !(sc->sc_flags & PFSYNCF_DEFER)) {
1755		PFSYNC_UNLOCK(sc);
1756		return (0);
1757	}
1758
1759	if (b->b_deferred >= 128)
1760		pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0);
1761
1762	pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
1763	if (pd == NULL)
1764		return (0);
1765	b->b_deferred++;
1766
1767	m->m_flags |= M_SKIP_FIREWALL;
1768	st->state_flags |= PFSTATE_ACK;
1769
1770	pd->pd_sc = sc;
1771	pd->pd_refs = 0;
1772	pd->pd_st = st;
1773	pf_ref_state(st);
1774	pd->pd_m = m;
1775
1776	TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry);
1777	callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED);
1778	callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
1779
1780	pfsync_push(b);
1781
1782	return (1);
1783}
1784
1785static void
1786pfsync_undefer(struct pfsync_deferral *pd, int drop)
1787{
1788	struct pfsync_softc *sc = pd->pd_sc;
1789	struct mbuf *m = pd->pd_m;
1790	struct pf_state *st = pd->pd_st;
1791	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1792
1793	PFSYNC_BUCKET_LOCK_ASSERT(b);
1794
1795	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
1796	b->b_deferred--;
1797	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
1798	free(pd, M_PFSYNC);
1799	pf_release_state(st);
1800
1801	if (drop)
1802		m_freem(m);
1803	else {
1804		_IF_ENQUEUE(&b->b_snd, m);
1805		pfsync_push(b);
1806	}
1807}
1808
1809static void
1810pfsync_defer_tmo(void *arg)
1811{
1812	struct pfsync_deferral *pd = arg;
1813	struct pfsync_softc *sc = pd->pd_sc;
1814	struct mbuf *m = pd->pd_m;
1815	struct pf_state *st = pd->pd_st;
1816	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1817
1818	PFSYNC_BUCKET_LOCK_ASSERT(b);
1819
1820	CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
1821
1822	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
1823	b->b_deferred--;
1824	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
1825	if (pd->pd_refs == 0)
1826		free(pd, M_PFSYNC);
1827	PFSYNC_UNLOCK(sc);
1828
1829	ip_output(m, NULL, NULL, 0, NULL, NULL);
1830
1831	pf_release_state(st);
1832
1833	CURVNET_RESTORE();
1834}
1835
1836static void
1837pfsync_undefer_state(struct pf_state *st, int drop)
1838{
1839	struct pfsync_softc *sc = V_pfsyncif;
1840	struct pfsync_deferral *pd;
1841	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1842
1843	PFSYNC_BUCKET_LOCK(b);
1844
1845	TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) {
1846		 if (pd->pd_st == st) {
1847			if (callout_stop(&pd->pd_tmo) > 0)
1848				pfsync_undefer(pd, drop);
1849
1850			PFSYNC_BUCKET_UNLOCK(b);
1851			return;
1852		}
1853	}
1854	PFSYNC_BUCKET_UNLOCK(b);
1855
1856	panic("%s: unable to find deferred state", __func__);
1857}
1858
1859static struct pfsync_bucket*
1860pfsync_get_bucket(struct pfsync_softc *sc, struct pf_state *st)
1861{
1862	int c = PF_IDHASH(st) % pfsync_buckets;
1863	return &sc->sc_buckets[c];
1864}
1865
1866static void
1867pfsync_update_state(struct pf_state *st)
1868{
1869	struct pfsync_softc *sc = V_pfsyncif;
1870	bool sync = false, ref = true;
1871	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1872
1873	PF_STATE_LOCK_ASSERT(st);
1874	PFSYNC_BUCKET_LOCK(b);
1875
1876	if (st->state_flags & PFSTATE_ACK)
1877		pfsync_undefer_state(st, 0);
1878	if (st->state_flags & PFSTATE_NOSYNC) {
1879		if (st->sync_state != PFSYNC_S_NONE)
1880			pfsync_q_del(st, true, b);
1881		PFSYNC_BUCKET_UNLOCK(b);
1882		return;
1883	}
1884
1885	if (b->b_len == PFSYNC_MINPKT)
1886		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
1887
1888	switch (st->sync_state) {
1889	case PFSYNC_S_UPD_C:
1890	case PFSYNC_S_UPD:
1891	case PFSYNC_S_INS:
1892		/* we're already handling it */
1893
1894		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1895			st->sync_updates++;
1896			if (st->sync_updates >= sc->sc_maxupdates)
1897				sync = true;
1898		}
1899		break;
1900
1901	case PFSYNC_S_IACK:
1902		pfsync_q_del(st, false, b);
1903		ref = false;
1904		/* FALLTHROUGH */
1905
1906	case PFSYNC_S_NONE:
1907		pfsync_q_ins(st, PFSYNC_S_UPD_C, ref);
1908		st->sync_updates = 0;
1909		break;
1910
1911	default:
1912		panic("%s: unexpected sync state %d", __func__, st->sync_state);
1913	}
1914
1915	if (sync || (time_uptime - st->pfsync_time) < 2)
1916		pfsync_push(b);
1917
1918	PFSYNC_BUCKET_UNLOCK(b);
1919}
1920
1921static void
1922pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1923{
1924	struct pfsync_softc *sc = V_pfsyncif;
1925	struct pfsync_bucket *b = &sc->sc_buckets[0];
1926	struct pfsync_upd_req_item *item;
1927	size_t nlen = sizeof(struct pfsync_upd_req);
1928
1929	PFSYNC_BUCKET_LOCK_ASSERT(b);
1930
1931	/*
1932	 * This code does a bit to prevent multiple update requests for the
1933	 * same state being generated. It searches current subheader queue,
1934	 * but it doesn't lookup into queue of already packed datagrams.
1935	 */
1936	TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry)
1937		if (item->ur_msg.id == id &&
1938		    item->ur_msg.creatorid == creatorid)
1939			return;
1940
1941	item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
1942	if (item == NULL)
1943		return; /* XXX stats */
1944
1945	item->ur_msg.id = id;
1946	item->ur_msg.creatorid = creatorid;
1947
1948	if (TAILQ_EMPTY(&b->b_upd_req_list))
1949		nlen += sizeof(struct pfsync_subheader);
1950
1951	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
1952		pfsync_sendout(1, 0);
1953
1954		nlen = sizeof(struct pfsync_subheader) +
1955		    sizeof(struct pfsync_upd_req);
1956	}
1957
1958	TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry);
1959	b->b_len += nlen;
1960}
1961
1962static bool
1963pfsync_update_state_req(struct pf_state *st)
1964{
1965	struct pfsync_softc *sc = V_pfsyncif;
1966	bool ref = true, full = false;
1967	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1968
1969	PF_STATE_LOCK_ASSERT(st);
1970	PFSYNC_BUCKET_LOCK(b);
1971
1972	if (st->state_flags & PFSTATE_NOSYNC) {
1973		if (st->sync_state != PFSYNC_S_NONE)
1974			pfsync_q_del(st, true, b);
1975		PFSYNC_BUCKET_UNLOCK(b);
1976		return (full);
1977	}
1978
1979	switch (st->sync_state) {
1980	case PFSYNC_S_UPD_C:
1981	case PFSYNC_S_IACK:
1982		pfsync_q_del(st, false, b);
1983		ref = false;
1984		/* FALLTHROUGH */
1985
1986	case PFSYNC_S_NONE:
1987		pfsync_q_ins(st, PFSYNC_S_UPD, ref);
1988		pfsync_push(b);
1989		break;
1990
1991	case PFSYNC_S_INS:
1992	case PFSYNC_S_UPD:
1993	case PFSYNC_S_DEL:
1994		/* we're already handling it */
1995		break;
1996
1997	default:
1998		panic("%s: unexpected sync state %d", __func__, st->sync_state);
1999	}
2000
2001	if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state))
2002		full = true;
2003
2004	PFSYNC_BUCKET_UNLOCK(b);
2005
2006	return (full);
2007}
2008
2009static void
2010pfsync_delete_state(struct pf_state *st)
2011{
2012	struct pfsync_softc *sc = V_pfsyncif;
2013	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2014	bool ref = true;
2015
2016	PFSYNC_BUCKET_LOCK(b);
2017	if (st->state_flags & PFSTATE_ACK)
2018		pfsync_undefer_state(st, 1);
2019	if (st->state_flags & PFSTATE_NOSYNC) {
2020		if (st->sync_state != PFSYNC_S_NONE)
2021			pfsync_q_del(st, true, b);
2022		PFSYNC_BUCKET_UNLOCK(b);
2023		return;
2024	}
2025
2026	if (b->b_len == PFSYNC_MINPKT)
2027		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2028
2029	switch (st->sync_state) {
2030	case PFSYNC_S_INS:
2031		/* We never got to tell the world so just forget about it. */
2032		pfsync_q_del(st, true, b);
2033		break;
2034
2035	case PFSYNC_S_UPD_C:
2036	case PFSYNC_S_UPD:
2037	case PFSYNC_S_IACK:
2038		pfsync_q_del(st, false, b);
2039		ref = false;
2040		/* FALLTHROUGH */
2041
2042	case PFSYNC_S_NONE:
2043		pfsync_q_ins(st, PFSYNC_S_DEL, ref);
2044		break;
2045
2046	default:
2047		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2048	}
2049
2050	PFSYNC_BUCKET_UNLOCK(b);
2051}
2052
2053static void
2054pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2055{
2056	struct {
2057		struct pfsync_subheader subh;
2058		struct pfsync_clr clr;
2059	} __packed r;
2060
2061	bzero(&r, sizeof(r));
2062
2063	r.subh.action = PFSYNC_ACT_CLR;
2064	r.subh.count = htons(1);
2065	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
2066
2067	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2068	r.clr.creatorid = creatorid;
2069
2070	pfsync_send_plus(&r, sizeof(r));
2071}
2072
2073static void
2074pfsync_q_ins(struct pf_state *st, int q, bool ref)
2075{
2076	struct pfsync_softc *sc = V_pfsyncif;
2077	size_t nlen = pfsync_qs[q].len;
2078	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2079
2080	PFSYNC_BUCKET_LOCK_ASSERT(b);
2081
2082	KASSERT(st->sync_state == PFSYNC_S_NONE,
2083		("%s: st->sync_state %u", __func__, st->sync_state));
2084	KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
2085	    b->b_len));
2086
2087	if (TAILQ_EMPTY(&b->b_qs[q]))
2088		nlen += sizeof(struct pfsync_subheader);
2089
2090	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
2091		pfsync_sendout(1, b->b_id);
2092
2093		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2094	}
2095
2096	b->b_len += nlen;
2097	TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list);
2098	st->sync_state = q;
2099	if (ref)
2100		pf_ref_state(st);
2101}
2102
2103static void
2104pfsync_q_del(struct pf_state *st, bool unref, struct pfsync_bucket *b)
2105{
2106	int q = st->sync_state;
2107
2108	PFSYNC_BUCKET_LOCK_ASSERT(b);
2109	KASSERT(st->sync_state != PFSYNC_S_NONE,
2110		("%s: st->sync_state != PFSYNC_S_NONE", __func__));
2111
2112	b->b_len -= pfsync_qs[q].len;
2113	TAILQ_REMOVE(&b->b_qs[q], st, sync_list);
2114	st->sync_state = PFSYNC_S_NONE;
2115	if (unref)
2116		pf_release_state(st);
2117
2118	if (TAILQ_EMPTY(&b->b_qs[q]))
2119		b->b_len -= sizeof(struct pfsync_subheader);
2120}
2121
2122static void
2123pfsync_bulk_start(void)
2124{
2125	struct pfsync_softc *sc = V_pfsyncif;
2126
2127	if (V_pf_status.debug >= PF_DEBUG_MISC)
2128		printf("pfsync: received bulk update request\n");
2129
2130	PFSYNC_BLOCK(sc);
2131
2132	sc->sc_ureq_received = time_uptime;
2133	sc->sc_bulk_hashid = 0;
2134	sc->sc_bulk_stateid = 0;
2135	pfsync_bulk_status(PFSYNC_BUS_START);
2136	callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
2137	PFSYNC_BUNLOCK(sc);
2138}
2139
2140static void
2141pfsync_bulk_update(void *arg)
2142{
2143	struct pfsync_softc *sc = arg;
2144	struct pf_state *s;
2145	int i, sent = 0;
2146
2147	PFSYNC_BLOCK_ASSERT(sc);
2148	CURVNET_SET(sc->sc_ifp->if_vnet);
2149
2150	/*
2151	 * Start with last state from previous invocation.
2152	 * It may had gone, in this case start from the
2153	 * hash slot.
2154	 */
2155	s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
2156
2157	if (s != NULL)
2158		i = PF_IDHASH(s);
2159	else
2160		i = sc->sc_bulk_hashid;
2161
2162	for (; i <= pf_hashmask; i++) {
2163		struct pf_idhash *ih = &V_pf_idhash[i];
2164
2165		if (s != NULL)
2166			PF_HASHROW_ASSERT(ih);
2167		else {
2168			PF_HASHROW_LOCK(ih);
2169			s = LIST_FIRST(&ih->states);
2170		}
2171
2172		for (; s; s = LIST_NEXT(s, entry)) {
2173			if (s->sync_state == PFSYNC_S_NONE &&
2174			    s->timeout < PFTM_MAX &&
2175			    s->pfsync_time <= sc->sc_ureq_received) {
2176				if (pfsync_update_state_req(s)) {
2177					/* We've filled a packet. */
2178					sc->sc_bulk_hashid = i;
2179					sc->sc_bulk_stateid = s->id;
2180					sc->sc_bulk_creatorid = s->creatorid;
2181					PF_HASHROW_UNLOCK(ih);
2182					callout_reset(&sc->sc_bulk_tmo, 1,
2183					    pfsync_bulk_update, sc);
2184					goto full;
2185				}
2186				sent++;
2187			}
2188		}
2189		PF_HASHROW_UNLOCK(ih);
2190	}
2191
2192	/* We're done. */
2193	pfsync_bulk_status(PFSYNC_BUS_END);
2194full:
2195	CURVNET_RESTORE();
2196}
2197
2198static void
2199pfsync_bulk_status(u_int8_t status)
2200{
2201	struct {
2202		struct pfsync_subheader subh;
2203		struct pfsync_bus bus;
2204	} __packed r;
2205
2206	struct pfsync_softc *sc = V_pfsyncif;
2207
2208	bzero(&r, sizeof(r));
2209
2210	r.subh.action = PFSYNC_ACT_BUS;
2211	r.subh.count = htons(1);
2212	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
2213
2214	r.bus.creatorid = V_pf_status.hostid;
2215	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2216	r.bus.status = status;
2217
2218	pfsync_send_plus(&r, sizeof(r));
2219}
2220
2221static void
2222pfsync_bulk_fail(void *arg)
2223{
2224	struct pfsync_softc *sc = arg;
2225	struct pfsync_bucket *b = &sc->sc_buckets[0];
2226
2227	CURVNET_SET(sc->sc_ifp->if_vnet);
2228
2229	PFSYNC_BLOCK_ASSERT(sc);
2230
2231	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2232		/* Try again */
2233		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
2234		    pfsync_bulk_fail, V_pfsyncif);
2235		PFSYNC_BUCKET_LOCK(b);
2236		pfsync_request_update(0, 0);
2237		PFSYNC_BUCKET_UNLOCK(b);
2238	} else {
2239		/* Pretend like the transfer was ok. */
2240		sc->sc_ureq_sent = 0;
2241		sc->sc_bulk_tries = 0;
2242		PFSYNC_LOCK(sc);
2243		if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
2244			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
2245			    "pfsync bulk fail");
2246		sc->sc_flags |= PFSYNCF_OK;
2247		PFSYNC_UNLOCK(sc);
2248		if (V_pf_status.debug >= PF_DEBUG_MISC)
2249			printf("pfsync: failed to receive bulk update\n");
2250	}
2251
2252	CURVNET_RESTORE();
2253}
2254
2255static void
2256pfsync_send_plus(void *plus, size_t pluslen)
2257{
2258	struct pfsync_softc *sc = V_pfsyncif;
2259	struct pfsync_bucket *b = &sc->sc_buckets[0];
2260
2261	PFSYNC_BUCKET_LOCK(b);
2262
2263	if (b->b_len + pluslen > sc->sc_ifp->if_mtu)
2264		pfsync_sendout(1, b->b_id);
2265
2266	b->b_plus = plus;
2267	b->b_len += (b->b_pluslen = pluslen);
2268
2269	pfsync_sendout(1, b->b_id);
2270	PFSYNC_BUCKET_UNLOCK(b);
2271}
2272
2273static void
2274pfsync_timeout(void *arg)
2275{
2276	struct pfsync_bucket *b = arg;
2277
2278	CURVNET_SET(b->b_sc->sc_ifp->if_vnet);
2279	PFSYNC_BUCKET_LOCK(b);
2280	pfsync_push(b);
2281	PFSYNC_BUCKET_UNLOCK(b);
2282	CURVNET_RESTORE();
2283}
2284
2285static void
2286pfsync_push(struct pfsync_bucket *b)
2287{
2288
2289	PFSYNC_BUCKET_LOCK_ASSERT(b);
2290
2291	b->b_flags |= PFSYNCF_BUCKET_PUSH;
2292	swi_sched(V_pfsync_swi_cookie, 0);
2293}
2294
2295static void
2296pfsync_push_all(struct pfsync_softc *sc)
2297{
2298	int c;
2299	struct pfsync_bucket *b;
2300
2301	for (c = 0; c < pfsync_buckets; c++) {
2302		b = &sc->sc_buckets[c];
2303
2304		PFSYNC_BUCKET_LOCK(b);
2305		pfsync_push(b);
2306		PFSYNC_BUCKET_UNLOCK(b);
2307	}
2308}
2309
2310static void
2311pfsyncintr(void *arg)
2312{
2313	struct pfsync_softc *sc = arg;
2314	struct pfsync_bucket *b;
2315	struct mbuf *m, *n;
2316	int c;
2317
2318	CURVNET_SET(sc->sc_ifp->if_vnet);
2319
2320	for (c = 0; c < pfsync_buckets; c++) {
2321		b = &sc->sc_buckets[c];
2322
2323		PFSYNC_BUCKET_LOCK(b);
2324		if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) {
2325			pfsync_sendout(0, b->b_id);
2326			b->b_flags &= ~PFSYNCF_BUCKET_PUSH;
2327		}
2328		_IF_DEQUEUE_ALL(&b->b_snd, m);
2329		PFSYNC_BUCKET_UNLOCK(b);
2330
2331		for (; m != NULL; m = n) {
2332
2333			n = m->m_nextpkt;
2334			m->m_nextpkt = NULL;
2335
2336			/*
2337			 * We distinguish between a deferral packet and our
2338			 * own pfsync packet based on M_SKIP_FIREWALL
2339			 * flag. This is XXX.
2340			 */
2341			if (m->m_flags & M_SKIP_FIREWALL)
2342				ip_output(m, NULL, NULL, 0, NULL, NULL);
2343			else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
2344			    NULL) == 0)
2345				V_pfsyncstats.pfsyncs_opackets++;
2346			else
2347				V_pfsyncstats.pfsyncs_oerrors++;
2348		}
2349	}
2350	CURVNET_RESTORE();
2351}
2352
2353static int
2354pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship)
2355{
2356	struct ip_moptions *imo = &sc->sc_imo;
2357	int error;
2358
2359	if (!(ifp->if_flags & IFF_MULTICAST))
2360		return (EADDRNOTAVAIL);
2361
2362	imo->imo_membership = (struct in_multi **)mship;
2363	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
2364	imo->imo_multicast_vif = -1;
2365
2366	if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL,
2367	    &imo->imo_membership[0])) != 0) {
2368		imo->imo_membership = NULL;
2369		return (error);
2370	}
2371	imo->imo_num_memberships++;
2372	imo->imo_multicast_ifp = ifp;
2373	imo->imo_multicast_ttl = PFSYNC_DFLTTL;
2374	imo->imo_multicast_loop = 0;
2375
2376	return (0);
2377}
2378
2379static void
2380pfsync_multicast_cleanup(struct pfsync_softc *sc)
2381{
2382	struct ip_moptions *imo = &sc->sc_imo;
2383
2384	in_leavegroup(imo->imo_membership[0], NULL);
2385	free(imo->imo_membership, M_PFSYNC);
2386	imo->imo_membership = NULL;
2387	imo->imo_multicast_ifp = NULL;
2388}
2389
2390void
2391pfsync_detach_ifnet(struct ifnet *ifp)
2392{
2393	struct pfsync_softc *sc = V_pfsyncif;
2394
2395	if (sc == NULL)
2396		return;
2397
2398	PFSYNC_LOCK(sc);
2399
2400	if (sc->sc_sync_if == ifp) {
2401		/* We don't need mutlicast cleanup here, because the interface
2402		 * is going away. We do need to ensure we don't try to do
2403		 * cleanup later.
2404		 */
2405		sc->sc_imo.imo_membership = NULL;
2406		sc->sc_imo.imo_multicast_ifp = NULL;
2407		sc->sc_sync_if = NULL;
2408	}
2409
2410	PFSYNC_UNLOCK(sc);
2411}
2412
2413#ifdef INET
2414extern  struct domain inetdomain;
2415static struct protosw in_pfsync_protosw = {
2416	.pr_type =		SOCK_RAW,
2417	.pr_domain =		&inetdomain,
2418	.pr_protocol =		IPPROTO_PFSYNC,
2419	.pr_flags =		PR_ATOMIC|PR_ADDR,
2420	.pr_input =		pfsync_input,
2421	.pr_output =		rip_output,
2422	.pr_ctloutput =		rip_ctloutput,
2423	.pr_usrreqs =		&rip_usrreqs
2424};
2425#endif
2426
2427static void
2428pfsync_pointers_init()
2429{
2430
2431	PF_RULES_WLOCK();
2432	V_pfsync_state_import_ptr = pfsync_state_import;
2433	V_pfsync_insert_state_ptr = pfsync_insert_state;
2434	V_pfsync_update_state_ptr = pfsync_update_state;
2435	V_pfsync_delete_state_ptr = pfsync_delete_state;
2436	V_pfsync_clear_states_ptr = pfsync_clear_states;
2437	V_pfsync_defer_ptr = pfsync_defer;
2438	PF_RULES_WUNLOCK();
2439}
2440
2441static void
2442pfsync_pointers_uninit()
2443{
2444
2445	PF_RULES_WLOCK();
2446	V_pfsync_state_import_ptr = NULL;
2447	V_pfsync_insert_state_ptr = NULL;
2448	V_pfsync_update_state_ptr = NULL;
2449	V_pfsync_delete_state_ptr = NULL;
2450	V_pfsync_clear_states_ptr = NULL;
2451	V_pfsync_defer_ptr = NULL;
2452	PF_RULES_WUNLOCK();
2453}
2454
2455static void
2456vnet_pfsync_init(const void *unused __unused)
2457{
2458	int error;
2459
2460	V_pfsync_cloner = if_clone_simple(pfsyncname,
2461	    pfsync_clone_create, pfsync_clone_destroy, 1);
2462	error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif,
2463	    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
2464	if (error) {
2465		if_clone_detach(V_pfsync_cloner);
2466		log(LOG_INFO, "swi_add() failed in %s\n", __func__);
2467	}
2468
2469	pfsync_pointers_init();
2470}
2471VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
2472    vnet_pfsync_init, NULL);
2473
2474static void
2475vnet_pfsync_uninit(const void *unused __unused)
2476{
2477
2478	pfsync_pointers_uninit();
2479
2480	if_clone_detach(V_pfsync_cloner);
2481	swi_remove(V_pfsync_swi_cookie);
2482}
2483
2484VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
2485    vnet_pfsync_uninit, NULL);
2486
2487static int
2488pfsync_init()
2489{
2490#ifdef INET
2491	int error;
2492
2493	pfsync_detach_ifnet_ptr = pfsync_detach_ifnet;
2494
2495	error = pf_proto_register(PF_INET, &in_pfsync_protosw);
2496	if (error)
2497		return (error);
2498	error = ipproto_register(IPPROTO_PFSYNC);
2499	if (error) {
2500		pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
2501		return (error);
2502	}
2503#endif
2504
2505	return (0);
2506}
2507
2508static void
2509pfsync_uninit()
2510{
2511	pfsync_detach_ifnet_ptr = NULL;
2512
2513#ifdef INET
2514	ipproto_unregister(IPPROTO_PFSYNC);
2515	pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
2516#endif
2517}
2518
2519static int
2520pfsync_modevent(module_t mod, int type, void *data)
2521{
2522	int error = 0;
2523
2524	switch (type) {
2525	case MOD_LOAD:
2526		error = pfsync_init();
2527		break;
2528	case MOD_UNLOAD:
2529		pfsync_uninit();
2530		break;
2531	default:
2532		error = EINVAL;
2533		break;
2534	}
2535
2536	return (error);
2537}
2538
2539static moduledata_t pfsync_mod = {
2540	pfsyncname,
2541	pfsync_modevent,
2542	0
2543};
2544
2545#define PFSYNC_MODVER 1
2546
2547/* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
2548DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
2549MODULE_VERSION(pfsync, PFSYNC_MODVER);
2550MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
2551