1/*-
2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND ISC)
3 *
4 * Copyright (c) 2002 Michael Shalayeff
5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/*-
31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
32 *
33 * Permission to use, copy, modify, and distribute this software for any
34 * purpose with or without fee is hereby granted, provided that the above
35 * copyright notice and this permission notice appear in all copies.
36 *
37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
44 */
45
46/*
47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
48 *
49 * Revisions picked from OpenBSD after revision 1.110 import:
50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
52 * 1.120, 1.175 - use monotonic time_uptime
53 * 1.122 - reduce number of updates for non-TCP sessions
54 * 1.125, 1.127 - rewrite merge or stale processing
55 * 1.128 - cleanups
56 * 1.146 - bzero() mbuf before sparsely filling it with data
57 * 1.170 - SIOCSIFMTU checks
58 * 1.126, 1.142 - deferred packets processing
59 * 1.173 - correct expire time processing
60 */
61
62#include <sys/cdefs.h>
63__FBSDID("$FreeBSD$");
64
65#include "opt_inet.h"
66#include "opt_inet6.h"
67#include "opt_pf.h"
68
69#include <sys/param.h>
70#include <sys/bus.h>
71#include <sys/endian.h>
72#include <sys/interrupt.h>
73#include <sys/kernel.h>
74#include <sys/lock.h>
75#include <sys/mbuf.h>
76#include <sys/module.h>
77#include <sys/mutex.h>
78#include <sys/priv.h>
79#include <sys/protosw.h>
80#include <sys/smp.h>
81#include <sys/socket.h>
82#include <sys/sockio.h>
83#include <sys/sysctl.h>
84#include <sys/syslog.h>
85
86#include <net/bpf.h>
87#include <net/if.h>
88#include <net/if_var.h>
89#include <net/if_clone.h>
90#include <net/if_types.h>
91#include <net/vnet.h>
92#include <net/pfvar.h>
93#include <net/if_pfsync.h>
94
95#include <netinet/if_ether.h>
96#include <netinet/in.h>
97#include <netinet/in_var.h>
98#include <netinet/ip.h>
99#include <netinet/ip_carp.h>
100#include <netinet/ip_var.h>
101#include <netinet/tcp.h>
102#include <netinet/tcp_fsm.h>
103#include <netinet/tcp_seq.h>
104
105#define PFSYNC_MINPKT ( \
106	sizeof(struct ip) + \
107	sizeof(struct pfsync_header) + \
108	sizeof(struct pfsync_subheader) )
109
110struct pfsync_bucket;
111
112struct pfsync_pkt {
113	struct ip *ip;
114	struct in_addr src;
115	u_int8_t flags;
116};
117
118static int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
119		    struct pfsync_state_peer *);
120static int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
121static int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
122static int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
123static int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
124static int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
125static int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
126static int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
127static int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
128static int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
129static int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
130static int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
131static int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
132
133static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
134	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
135	pfsync_in_ins,			/* PFSYNC_ACT_INS */
136	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
137	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
138	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
139	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
140	pfsync_in_del,			/* PFSYNC_ACT_DEL */
141	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
142	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
143	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
144	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
145	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
146	pfsync_in_eof			/* PFSYNC_ACT_EOF */
147};
148
149struct pfsync_q {
150	void		(*write)(struct pf_state *, void *);
151	size_t		len;
152	u_int8_t	action;
153};
154
155/* we have one of these for every PFSYNC_S_ */
156static void	pfsync_out_state(struct pf_state *, void *);
157static void	pfsync_out_iack(struct pf_state *, void *);
158static void	pfsync_out_upd_c(struct pf_state *, void *);
159static void	pfsync_out_del(struct pf_state *, void *);
160
161static struct pfsync_q pfsync_qs[] = {
162	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
163	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
164	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
165	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
166	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
167};
168
169static void	pfsync_q_ins(struct pf_state *, int, bool);
170static void	pfsync_q_del(struct pf_state *, bool, struct pfsync_bucket *);
171
172static void	pfsync_update_state(struct pf_state *);
173
174struct pfsync_upd_req_item {
175	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
176	struct pfsync_upd_req			ur_msg;
177};
178
179struct pfsync_deferral {
180	struct pfsync_softc		*pd_sc;
181	TAILQ_ENTRY(pfsync_deferral)	pd_entry;
182	u_int				pd_refs;
183	struct callout			pd_tmo;
184
185	struct pf_state			*pd_st;
186	struct mbuf			*pd_m;
187};
188
189struct pfsync_sofct;
190
191struct pfsync_bucket
192{
193	int			b_id;
194	struct pfsync_softc	*b_sc;
195	struct mtx		b_mtx;
196	struct callout		b_tmo;
197	int			b_flags;
198#define	PFSYNCF_BUCKET_PUSH	0x00000001
199
200	size_t			b_len;
201	TAILQ_HEAD(, pf_state)			b_qs[PFSYNC_S_COUNT];
202	TAILQ_HEAD(, pfsync_upd_req_item)	b_upd_req_list;
203	TAILQ_HEAD(, pfsync_deferral)		b_deferrals;
204	u_int			b_deferred;
205	void			*b_plus;
206	size_t			b_pluslen;
207
208	struct  ifaltq b_snd;
209};
210
211struct pfsync_softc {
212	/* Configuration */
213	struct ifnet		*sc_ifp;
214	struct ifnet		*sc_sync_if;
215	struct ip_moptions	sc_imo;
216	struct in_addr		sc_sync_peer;
217	uint32_t		sc_flags;
218	uint8_t			sc_maxupdates;
219	struct ip		sc_template;
220	struct mtx		sc_mtx;
221
222	/* Queued data */
223	struct pfsync_bucket	*sc_buckets;
224
225	/* Bulk update info */
226	struct mtx		sc_bulk_mtx;
227	uint32_t		sc_ureq_sent;
228	int			sc_bulk_tries;
229	uint32_t		sc_ureq_received;
230	int			sc_bulk_hashid;
231	uint64_t		sc_bulk_stateid;
232	uint32_t		sc_bulk_creatorid;
233	struct callout		sc_bulk_tmo;
234	struct callout		sc_bulkfail_tmo;
235};
236
237#define	PFSYNC_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
238#define	PFSYNC_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
239#define	PFSYNC_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
240
241#define PFSYNC_BUCKET_LOCK(b)		mtx_lock(&(b)->b_mtx)
242#define PFSYNC_BUCKET_UNLOCK(b)		mtx_unlock(&(b)->b_mtx)
243#define PFSYNC_BUCKET_LOCK_ASSERT(b)	mtx_assert(&(b)->b_mtx, MA_OWNED)
244
245#define	PFSYNC_BLOCK(sc)	mtx_lock(&(sc)->sc_bulk_mtx)
246#define	PFSYNC_BUNLOCK(sc)	mtx_unlock(&(sc)->sc_bulk_mtx)
247#define	PFSYNC_BLOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
248
249static const char pfsyncname[] = "pfsync";
250static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
251VNET_DEFINE_STATIC(struct pfsync_softc	*, pfsyncif) = NULL;
252#define	V_pfsyncif		VNET(pfsyncif)
253VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL;
254#define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
255VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie);
256#define	V_pfsync_swi_ie		VNET(pfsync_swi_ie)
257VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats);
258#define	V_pfsyncstats		VNET(pfsyncstats)
259VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW;
260#define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
261
262static void	pfsync_timeout(void *);
263static void	pfsync_push(struct pfsync_bucket *);
264static void	pfsync_push_all(struct pfsync_softc *);
265static void	pfsyncintr(void *);
266static int	pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
267		    struct in_mfilter *imf);
268static void	pfsync_multicast_cleanup(struct pfsync_softc *);
269static void	pfsync_pointers_init(void);
270static void	pfsync_pointers_uninit(void);
271static int	pfsync_init(void);
272static void	pfsync_uninit(void);
273
274static unsigned long pfsync_buckets;
275
276SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
277SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
278    &VNET_NAME(pfsyncstats), pfsyncstats,
279    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
280SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
281    &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
282SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN,
283    &pfsync_buckets, 0, "Number of pfsync hash buckets");
284
285static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
286static void	pfsync_clone_destroy(struct ifnet *);
287static int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
288		    struct pf_state_peer *);
289static int	pfsyncoutput(struct ifnet *, struct mbuf *,
290		    const struct sockaddr *, struct route *);
291static int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
292
293static int	pfsync_defer(struct pf_state *, struct mbuf *);
294static void	pfsync_undefer(struct pfsync_deferral *, int);
295static void	pfsync_undefer_state(struct pf_state *, int);
296static void	pfsync_defer_tmo(void *);
297
298static void	pfsync_request_update(u_int32_t, u_int64_t);
299static bool	pfsync_update_state_req(struct pf_state *);
300
301static void	pfsync_drop(struct pfsync_softc *);
302static void	pfsync_sendout(int, int);
303static void	pfsync_send_plus(void *, size_t);
304
305static void	pfsync_bulk_start(void);
306static void	pfsync_bulk_status(u_int8_t);
307static void	pfsync_bulk_update(void *);
308static void	pfsync_bulk_fail(void *);
309
310static void	pfsync_detach_ifnet(struct ifnet *);
311#ifdef IPSEC
312static void	pfsync_update_net_tdb(struct pfsync_tdb *);
313#endif
314static struct pfsync_bucket	*pfsync_get_bucket(struct pfsync_softc *,
315		    struct pf_state *);
316
317
318#define PFSYNC_MAX_BULKTRIES	12
319
320VNET_DEFINE(struct if_clone *, pfsync_cloner);
321#define	V_pfsync_cloner	VNET(pfsync_cloner)
322
323static int
324pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
325{
326	struct pfsync_softc *sc;
327	struct ifnet *ifp;
328	struct pfsync_bucket *b;
329	int c, q;
330
331	if (unit != 0)
332		return (EINVAL);
333
334	if (! pfsync_buckets)
335		pfsync_buckets = mp_ncpus * 2;
336
337	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
338	sc->sc_flags |= PFSYNCF_OK;
339	sc->sc_maxupdates = 128;
340
341	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
342	if (ifp == NULL) {
343		free(sc, M_PFSYNC);
344		return (ENOSPC);
345	}
346	if_initname(ifp, pfsyncname, unit);
347	ifp->if_softc = sc;
348	ifp->if_ioctl = pfsyncioctl;
349	ifp->if_output = pfsyncoutput;
350	ifp->if_type = IFT_PFSYNC;
351	ifp->if_hdrlen = sizeof(struct pfsync_header);
352	ifp->if_mtu = ETHERMTU;
353	mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
354	mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
355	callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
356	callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
357
358	if_attach(ifp);
359
360	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
361
362	sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets),
363	    M_PFSYNC, M_ZERO | M_WAITOK);
364	for (c = 0; c < pfsync_buckets; c++) {
365		b = &sc->sc_buckets[c];
366		mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF);
367
368		b->b_id = c;
369		b->b_sc = sc;
370		b->b_len = PFSYNC_MINPKT;
371
372		for (q = 0; q < PFSYNC_S_COUNT; q++)
373			TAILQ_INIT(&b->b_qs[q]);
374
375		TAILQ_INIT(&b->b_upd_req_list);
376		TAILQ_INIT(&b->b_deferrals);
377
378		callout_init(&b->b_tmo, 1);
379
380		b->b_snd.ifq_maxlen = ifqmaxlen;
381	}
382
383	V_pfsyncif = sc;
384
385	return (0);
386}
387
388static void
389pfsync_clone_destroy(struct ifnet *ifp)
390{
391	struct pfsync_softc *sc = ifp->if_softc;
392	struct pfsync_bucket *b;
393	int c;
394
395	for (c = 0; c < pfsync_buckets; c++) {
396		b = &sc->sc_buckets[c];
397		/*
398		 * At this stage, everything should have already been
399		 * cleared by pfsync_uninit(), and we have only to
400		 * drain callouts.
401		 */
402		while (b->b_deferred > 0) {
403			struct pfsync_deferral *pd =
404			    TAILQ_FIRST(&b->b_deferrals);
405
406			TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
407			b->b_deferred--;
408			if (callout_stop(&pd->pd_tmo) > 0) {
409				pf_release_state(pd->pd_st);
410				m_freem(pd->pd_m);
411				free(pd, M_PFSYNC);
412			} else {
413				pd->pd_refs++;
414				callout_drain(&pd->pd_tmo);
415				free(pd, M_PFSYNC);
416			}
417		}
418
419		callout_drain(&b->b_tmo);
420	}
421
422	callout_drain(&sc->sc_bulkfail_tmo);
423	callout_drain(&sc->sc_bulk_tmo);
424
425	if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
426		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
427	bpfdetach(ifp);
428	if_detach(ifp);
429
430	pfsync_drop(sc);
431
432	if_free(ifp);
433	pfsync_multicast_cleanup(sc);
434	mtx_destroy(&sc->sc_mtx);
435	mtx_destroy(&sc->sc_bulk_mtx);
436
437	free(sc->sc_buckets, M_PFSYNC);
438	free(sc, M_PFSYNC);
439
440	V_pfsyncif = NULL;
441}
442
443static int
444pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
445    struct pf_state_peer *d)
446{
447	if (s->scrub.scrub_flag && d->scrub == NULL) {
448		d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
449		if (d->scrub == NULL)
450			return (ENOMEM);
451	}
452
453	return (0);
454}
455
456
457static int
458pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
459{
460	struct pfsync_softc *sc = V_pfsyncif;
461#ifndef	__NO_STRICT_ALIGNMENT
462	struct pfsync_state_key key[2];
463#endif
464	struct pfsync_state_key *kw, *ks;
465	struct pf_state	*st = NULL;
466	struct pf_state_key *skw = NULL, *sks = NULL;
467	struct pf_krule *r = NULL;
468	struct pfi_kkif	*kif;
469	int error;
470
471	PF_RULES_RASSERT();
472
473	if (sp->creatorid == 0) {
474		if (V_pf_status.debug >= PF_DEBUG_MISC)
475			printf("%s: invalid creator id: %08x\n", __func__,
476			    ntohl(sp->creatorid));
477		return (EINVAL);
478	}
479
480	if ((kif = pfi_kkif_find(sp->ifname)) == NULL) {
481		if (V_pf_status.debug >= PF_DEBUG_MISC)
482			printf("%s: unknown interface: %s\n", __func__,
483			    sp->ifname);
484		if (flags & PFSYNC_SI_IOCTL)
485			return (EINVAL);
486		return (0);	/* skip this state */
487	}
488
489	/*
490	 * If the ruleset checksums match or the state is coming from the ioctl,
491	 * it's safe to associate the state with the rule of that number.
492	 */
493	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
494	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
495	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
496		r = pf_main_ruleset.rules[
497		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
498	else
499		r = &V_pf_default_rule;
500
501	if ((r->max_states &&
502	    counter_u64_fetch(r->states_cur) >= r->max_states))
503		goto cleanup;
504
505	/*
506	 * XXXGL: consider M_WAITOK in ioctl path after.
507	 */
508	if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL)
509		goto cleanup;
510
511	for (int i = 0; i < 2; i++) {
512		st->packets[i] = counter_u64_alloc(M_NOWAIT);
513		st->bytes[i] = counter_u64_alloc(M_NOWAIT);
514		if (st->packets[i] == NULL || st->bytes[i] == NULL)
515			goto cleanup;
516	}
517
518	if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
519		goto cleanup;
520
521#ifndef	__NO_STRICT_ALIGNMENT
522	bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2);
523	kw = &key[PF_SK_WIRE];
524	ks = &key[PF_SK_STACK];
525#else
526	kw = &sp->key[PF_SK_WIRE];
527	ks = &sp->key[PF_SK_STACK];
528#endif
529
530	if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) ||
531	    PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) ||
532	    kw->port[0] != ks->port[0] ||
533	    kw->port[1] != ks->port[1]) {
534		sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
535		if (sks == NULL)
536			goto cleanup;
537	} else
538		sks = skw;
539
540	/* allocate memory for scrub info */
541	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
542	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
543		goto cleanup;
544
545	/* Copy to state key(s). */
546	skw->addr[0] = kw->addr[0];
547	skw->addr[1] = kw->addr[1];
548	skw->port[0] = kw->port[0];
549	skw->port[1] = kw->port[1];
550	skw->proto = sp->proto;
551	skw->af = sp->af;
552	if (sks != skw) {
553		sks->addr[0] = ks->addr[0];
554		sks->addr[1] = ks->addr[1];
555		sks->port[0] = ks->port[0];
556		sks->port[1] = ks->port[1];
557		sks->proto = sp->proto;
558		sks->af = sp->af;
559	}
560
561	/* copy to state */
562	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
563	st->creation = time_uptime - ntohl(sp->creation);
564	st->expire = time_uptime;
565	if (sp->expire) {
566		uint32_t timeout;
567
568		timeout = r->timeout[sp->timeout];
569		if (!timeout)
570			timeout = V_pf_default_rule.timeout[sp->timeout];
571
572		/* sp->expire may have been adaptively scaled by export. */
573		st->expire -= timeout - ntohl(sp->expire);
574	}
575
576	st->direction = sp->direction;
577	st->log = sp->log;
578	st->timeout = sp->timeout;
579	st->state_flags = sp->state_flags;
580
581	st->id = sp->id;
582	st->creatorid = sp->creatorid;
583	pf_state_peer_ntoh(&sp->src, &st->src);
584	pf_state_peer_ntoh(&sp->dst, &st->dst);
585
586	st->rule.ptr = r;
587	st->nat_rule.ptr = NULL;
588	st->anchor.ptr = NULL;
589	st->rt_kif = NULL;
590
591	st->pfsync_time = time_uptime;
592	st->sync_state = PFSYNC_S_NONE;
593
594	if (!(flags & PFSYNC_SI_IOCTL))
595		st->state_flags |= PFSTATE_NOSYNC;
596
597	if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0)
598		goto cleanup_state;
599
600	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
601	counter_u64_add(r->states_cur, 1);
602	counter_u64_add(r->states_tot, 1);
603
604	if (!(flags & PFSYNC_SI_IOCTL)) {
605		st->state_flags &= ~PFSTATE_NOSYNC;
606		if (st->state_flags & PFSTATE_ACK) {
607			pfsync_q_ins(st, PFSYNC_S_IACK, true);
608			pfsync_push_all(sc);
609		}
610	}
611	st->state_flags &= ~PFSTATE_ACK;
612	PF_STATE_UNLOCK(st);
613
614	return (0);
615
616cleanup:
617	error = ENOMEM;
618	if (skw == sks)
619		sks = NULL;
620	if (skw != NULL)
621		uma_zfree(V_pf_state_key_z, skw);
622	if (sks != NULL)
623		uma_zfree(V_pf_state_key_z, sks);
624
625cleanup_state:	/* pf_state_insert() frees the state keys. */
626	if (st) {
627		for (int i = 0; i < 2; i++) {
628			counter_u64_free(st->packets[i]);
629			counter_u64_free(st->bytes[i]);
630		}
631		if (st->dst.scrub)
632			uma_zfree(V_pf_state_scrub_z, st->dst.scrub);
633		if (st->src.scrub)
634			uma_zfree(V_pf_state_scrub_z, st->src.scrub);
635		uma_zfree(V_pf_state_z, st);
636	}
637	return (error);
638}
639
640static int
641pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
642{
643	struct pfsync_softc *sc = V_pfsyncif;
644	struct pfsync_pkt pkt;
645	struct mbuf *m = *mp;
646	struct ip *ip = mtod(m, struct ip *);
647	struct pfsync_header *ph;
648	struct pfsync_subheader subh;
649
650	int offset, len;
651	int rv;
652	uint16_t count;
653
654	PF_RULES_RLOCK_TRACKER;
655
656	*mp = NULL;
657	V_pfsyncstats.pfsyncs_ipackets++;
658
659	/* Verify that we have a sync interface configured. */
660	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
661	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
662		goto done;
663
664	/* verify that the packet came in on the right interface */
665	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
666		V_pfsyncstats.pfsyncs_badif++;
667		goto done;
668	}
669
670	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
671	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
672	/* verify that the IP TTL is 255. */
673	if (ip->ip_ttl != PFSYNC_DFLTTL) {
674		V_pfsyncstats.pfsyncs_badttl++;
675		goto done;
676	}
677
678	offset = ip->ip_hl << 2;
679	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
680		V_pfsyncstats.pfsyncs_hdrops++;
681		goto done;
682	}
683
684	if (offset + sizeof(*ph) > m->m_len) {
685		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
686			V_pfsyncstats.pfsyncs_hdrops++;
687			return (IPPROTO_DONE);
688		}
689		ip = mtod(m, struct ip *);
690	}
691	ph = (struct pfsync_header *)((char *)ip + offset);
692
693	/* verify the version */
694	if (ph->version != PFSYNC_VERSION) {
695		V_pfsyncstats.pfsyncs_badver++;
696		goto done;
697	}
698
699	len = ntohs(ph->len) + offset;
700	if (m->m_pkthdr.len < len) {
701		V_pfsyncstats.pfsyncs_badlen++;
702		goto done;
703	}
704
705	/* Cheaper to grab this now than having to mess with mbufs later */
706	pkt.ip = ip;
707	pkt.src = ip->ip_src;
708	pkt.flags = 0;
709
710	/*
711	 * Trusting pf_chksum during packet processing, as well as seeking
712	 * in interface name tree, require holding PF_RULES_RLOCK().
713	 */
714	PF_RULES_RLOCK();
715	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
716		pkt.flags |= PFSYNC_SI_CKSUM;
717
718	offset += sizeof(*ph);
719	while (offset <= len - sizeof(subh)) {
720		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
721		offset += sizeof(subh);
722
723		if (subh.action >= PFSYNC_ACT_MAX) {
724			V_pfsyncstats.pfsyncs_badact++;
725			PF_RULES_RUNLOCK();
726			goto done;
727		}
728
729		count = ntohs(subh.count);
730		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
731		rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count);
732		if (rv == -1) {
733			PF_RULES_RUNLOCK();
734			return (IPPROTO_DONE);
735		}
736
737		offset += rv;
738	}
739	PF_RULES_RUNLOCK();
740
741done:
742	m_freem(m);
743	return (IPPROTO_DONE);
744}
745
746static int
747pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
748{
749	struct pfsync_clr *clr;
750	struct mbuf *mp;
751	int len = sizeof(*clr) * count;
752	int i, offp;
753	u_int32_t creatorid;
754
755	mp = m_pulldown(m, offset, len, &offp);
756	if (mp == NULL) {
757		V_pfsyncstats.pfsyncs_badlen++;
758		return (-1);
759	}
760	clr = (struct pfsync_clr *)(mp->m_data + offp);
761
762	for (i = 0; i < count; i++) {
763		creatorid = clr[i].creatorid;
764
765		if (clr[i].ifname[0] != '\0' &&
766		    pfi_kkif_find(clr[i].ifname) == NULL)
767			continue;
768
769		for (int i = 0; i <= pf_hashmask; i++) {
770			struct pf_idhash *ih = &V_pf_idhash[i];
771			struct pf_state *s;
772relock:
773			PF_HASHROW_LOCK(ih);
774			LIST_FOREACH(s, &ih->states, entry) {
775				if (s->creatorid == creatorid) {
776					s->state_flags |= PFSTATE_NOSYNC;
777					pf_unlink_state(s, PF_ENTER_LOCKED);
778					goto relock;
779				}
780			}
781			PF_HASHROW_UNLOCK(ih);
782		}
783	}
784
785	return (len);
786}
787
788static int
789pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
790{
791	struct mbuf *mp;
792	struct pfsync_state *sa, *sp;
793	int len = sizeof(*sp) * count;
794	int i, offp;
795
796	mp = m_pulldown(m, offset, len, &offp);
797	if (mp == NULL) {
798		V_pfsyncstats.pfsyncs_badlen++;
799		return (-1);
800	}
801	sa = (struct pfsync_state *)(mp->m_data + offp);
802
803	for (i = 0; i < count; i++) {
804		sp = &sa[i];
805
806		/* Check for invalid values. */
807		if (sp->timeout >= PFTM_MAX ||
808		    sp->src.state > PF_TCPS_PROXY_DST ||
809		    sp->dst.state > PF_TCPS_PROXY_DST ||
810		    sp->direction > PF_OUT ||
811		    (sp->af != AF_INET && sp->af != AF_INET6)) {
812			if (V_pf_status.debug >= PF_DEBUG_MISC)
813				printf("%s: invalid value\n", __func__);
814			V_pfsyncstats.pfsyncs_badval++;
815			continue;
816		}
817
818		if (pfsync_state_import(sp, pkt->flags) == ENOMEM)
819			/* Drop out, but process the rest of the actions. */
820			break;
821	}
822
823	return (len);
824}
825
826static int
827pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
828{
829	struct pfsync_ins_ack *ia, *iaa;
830	struct pf_state *st;
831
832	struct mbuf *mp;
833	int len = count * sizeof(*ia);
834	int offp, i;
835
836	mp = m_pulldown(m, offset, len, &offp);
837	if (mp == NULL) {
838		V_pfsyncstats.pfsyncs_badlen++;
839		return (-1);
840	}
841	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
842
843	for (i = 0; i < count; i++) {
844		ia = &iaa[i];
845
846		st = pf_find_state_byid(ia->id, ia->creatorid);
847		if (st == NULL)
848			continue;
849
850		if (st->state_flags & PFSTATE_ACK) {
851			pfsync_undefer_state(st, 0);
852		}
853		PF_STATE_UNLOCK(st);
854	}
855	/*
856	 * XXX this is not yet implemented, but we know the size of the
857	 * message so we can skip it.
858	 */
859
860	return (count * sizeof(struct pfsync_ins_ack));
861}
862
863static int
864pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
865    struct pfsync_state_peer *dst)
866{
867	int sync = 0;
868
869	PF_STATE_LOCK_ASSERT(st);
870
871	/*
872	 * The state should never go backwards except
873	 * for syn-proxy states.  Neither should the
874	 * sequence window slide backwards.
875	 */
876	if ((st->src.state > src->state &&
877	    (st->src.state < PF_TCPS_PROXY_SRC ||
878	    src->state >= PF_TCPS_PROXY_SRC)) ||
879
880	    (st->src.state == src->state &&
881	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
882		sync++;
883	else
884		pf_state_peer_ntoh(src, &st->src);
885
886	if ((st->dst.state > dst->state) ||
887
888	    (st->dst.state >= TCPS_SYN_SENT &&
889	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
890		sync++;
891	else
892		pf_state_peer_ntoh(dst, &st->dst);
893
894	return (sync);
895}
896
897static int
898pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
899{
900	struct pfsync_softc *sc = V_pfsyncif;
901	struct pfsync_state *sa, *sp;
902	struct pf_state *st;
903	int sync;
904
905	struct mbuf *mp;
906	int len = count * sizeof(*sp);
907	int offp, i;
908
909	mp = m_pulldown(m, offset, len, &offp);
910	if (mp == NULL) {
911		V_pfsyncstats.pfsyncs_badlen++;
912		return (-1);
913	}
914	sa = (struct pfsync_state *)(mp->m_data + offp);
915
916	for (i = 0; i < count; i++) {
917		sp = &sa[i];
918
919		/* check for invalid values */
920		if (sp->timeout >= PFTM_MAX ||
921		    sp->src.state > PF_TCPS_PROXY_DST ||
922		    sp->dst.state > PF_TCPS_PROXY_DST) {
923			if (V_pf_status.debug >= PF_DEBUG_MISC) {
924				printf("pfsync_input: PFSYNC_ACT_UPD: "
925				    "invalid value\n");
926			}
927			V_pfsyncstats.pfsyncs_badval++;
928			continue;
929		}
930
931		st = pf_find_state_byid(sp->id, sp->creatorid);
932		if (st == NULL) {
933			/* insert the update */
934			if (pfsync_state_import(sp, pkt->flags))
935				V_pfsyncstats.pfsyncs_badstate++;
936			continue;
937		}
938
939		if (st->state_flags & PFSTATE_ACK) {
940			pfsync_undefer_state(st, 1);
941		}
942
943		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
944			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
945		else {
946			sync = 0;
947
948			/*
949			 * Non-TCP protocol state machine always go
950			 * forwards
951			 */
952			if (st->src.state > sp->src.state)
953				sync++;
954			else
955				pf_state_peer_ntoh(&sp->src, &st->src);
956			if (st->dst.state > sp->dst.state)
957				sync++;
958			else
959				pf_state_peer_ntoh(&sp->dst, &st->dst);
960		}
961		if (sync < 2) {
962			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
963			pf_state_peer_ntoh(&sp->dst, &st->dst);
964			st->expire = time_uptime;
965			st->timeout = sp->timeout;
966		}
967		st->pfsync_time = time_uptime;
968
969		if (sync) {
970			V_pfsyncstats.pfsyncs_stale++;
971
972			pfsync_update_state(st);
973			PF_STATE_UNLOCK(st);
974			pfsync_push_all(sc);
975			continue;
976		}
977		PF_STATE_UNLOCK(st);
978	}
979
980	return (len);
981}
982
983static int
984pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
985{
986	struct pfsync_softc *sc = V_pfsyncif;
987	struct pfsync_upd_c *ua, *up;
988	struct pf_state *st;
989	int len = count * sizeof(*up);
990	int sync;
991	struct mbuf *mp;
992	int offp, i;
993
994	mp = m_pulldown(m, offset, len, &offp);
995	if (mp == NULL) {
996		V_pfsyncstats.pfsyncs_badlen++;
997		return (-1);
998	}
999	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
1000
1001	for (i = 0; i < count; i++) {
1002		up = &ua[i];
1003
1004		/* check for invalid values */
1005		if (up->timeout >= PFTM_MAX ||
1006		    up->src.state > PF_TCPS_PROXY_DST ||
1007		    up->dst.state > PF_TCPS_PROXY_DST) {
1008			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1009				printf("pfsync_input: "
1010				    "PFSYNC_ACT_UPD_C: "
1011				    "invalid value\n");
1012			}
1013			V_pfsyncstats.pfsyncs_badval++;
1014			continue;
1015		}
1016
1017		st = pf_find_state_byid(up->id, up->creatorid);
1018		if (st == NULL) {
1019			/* We don't have this state. Ask for it. */
1020			PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
1021			pfsync_request_update(up->creatorid, up->id);
1022			PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
1023			continue;
1024		}
1025
1026		if (st->state_flags & PFSTATE_ACK) {
1027			pfsync_undefer_state(st, 1);
1028		}
1029
1030		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1031			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
1032		else {
1033			sync = 0;
1034
1035			/*
1036			 * Non-TCP protocol state machine always go
1037			 * forwards
1038			 */
1039			if (st->src.state > up->src.state)
1040				sync++;
1041			else
1042				pf_state_peer_ntoh(&up->src, &st->src);
1043			if (st->dst.state > up->dst.state)
1044				sync++;
1045			else
1046				pf_state_peer_ntoh(&up->dst, &st->dst);
1047		}
1048		if (sync < 2) {
1049			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1050			pf_state_peer_ntoh(&up->dst, &st->dst);
1051			st->expire = time_uptime;
1052			st->timeout = up->timeout;
1053		}
1054		st->pfsync_time = time_uptime;
1055
1056		if (sync) {
1057			V_pfsyncstats.pfsyncs_stale++;
1058
1059			pfsync_update_state(st);
1060			PF_STATE_UNLOCK(st);
1061			pfsync_push_all(sc);
1062			continue;
1063		}
1064		PF_STATE_UNLOCK(st);
1065	}
1066
1067	return (len);
1068}
1069
1070static int
1071pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1072{
1073	struct pfsync_upd_req *ur, *ura;
1074	struct mbuf *mp;
1075	int len = count * sizeof(*ur);
1076	int i, offp;
1077
1078	struct pf_state *st;
1079
1080	mp = m_pulldown(m, offset, len, &offp);
1081	if (mp == NULL) {
1082		V_pfsyncstats.pfsyncs_badlen++;
1083		return (-1);
1084	}
1085	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1086
1087	for (i = 0; i < count; i++) {
1088		ur = &ura[i];
1089
1090		if (ur->id == 0 && ur->creatorid == 0)
1091			pfsync_bulk_start();
1092		else {
1093			st = pf_find_state_byid(ur->id, ur->creatorid);
1094			if (st == NULL) {
1095				V_pfsyncstats.pfsyncs_badstate++;
1096				continue;
1097			}
1098			if (st->state_flags & PFSTATE_NOSYNC) {
1099				PF_STATE_UNLOCK(st);
1100				continue;
1101			}
1102
1103			pfsync_update_state_req(st);
1104			PF_STATE_UNLOCK(st);
1105		}
1106	}
1107
1108	return (len);
1109}
1110
1111static int
1112pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1113{
1114	struct mbuf *mp;
1115	struct pfsync_state *sa, *sp;
1116	struct pf_state *st;
1117	int len = count * sizeof(*sp);
1118	int offp, i;
1119
1120	mp = m_pulldown(m, offset, len, &offp);
1121	if (mp == NULL) {
1122		V_pfsyncstats.pfsyncs_badlen++;
1123		return (-1);
1124	}
1125	sa = (struct pfsync_state *)(mp->m_data + offp);
1126
1127	for (i = 0; i < count; i++) {
1128		sp = &sa[i];
1129
1130		st = pf_find_state_byid(sp->id, sp->creatorid);
1131		if (st == NULL) {
1132			V_pfsyncstats.pfsyncs_badstate++;
1133			continue;
1134		}
1135		st->state_flags |= PFSTATE_NOSYNC;
1136		pf_unlink_state(st, PF_ENTER_LOCKED);
1137	}
1138
1139	return (len);
1140}
1141
1142static int
1143pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1144{
1145	struct mbuf *mp;
1146	struct pfsync_del_c *sa, *sp;
1147	struct pf_state *st;
1148	int len = count * sizeof(*sp);
1149	int offp, i;
1150
1151	mp = m_pulldown(m, offset, len, &offp);
1152	if (mp == NULL) {
1153		V_pfsyncstats.pfsyncs_badlen++;
1154		return (-1);
1155	}
1156	sa = (struct pfsync_del_c *)(mp->m_data + offp);
1157
1158	for (i = 0; i < count; i++) {
1159		sp = &sa[i];
1160
1161		st = pf_find_state_byid(sp->id, sp->creatorid);
1162		if (st == NULL) {
1163			V_pfsyncstats.pfsyncs_badstate++;
1164			continue;
1165		}
1166
1167		st->state_flags |= PFSTATE_NOSYNC;
1168		pf_unlink_state(st, PF_ENTER_LOCKED);
1169	}
1170
1171	return (len);
1172}
1173
1174static int
1175pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1176{
1177	struct pfsync_softc *sc = V_pfsyncif;
1178	struct pfsync_bus *bus;
1179	struct mbuf *mp;
1180	int len = count * sizeof(*bus);
1181	int offp;
1182
1183	PFSYNC_BLOCK(sc);
1184
1185	/* If we're not waiting for a bulk update, who cares. */
1186	if (sc->sc_ureq_sent == 0) {
1187		PFSYNC_BUNLOCK(sc);
1188		return (len);
1189	}
1190
1191	mp = m_pulldown(m, offset, len, &offp);
1192	if (mp == NULL) {
1193		PFSYNC_BUNLOCK(sc);
1194		V_pfsyncstats.pfsyncs_badlen++;
1195		return (-1);
1196	}
1197	bus = (struct pfsync_bus *)(mp->m_data + offp);
1198
1199	switch (bus->status) {
1200	case PFSYNC_BUS_START:
1201		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
1202		    V_pf_limits[PF_LIMIT_STATES].limit /
1203		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
1204		    sizeof(struct pfsync_state)),
1205		    pfsync_bulk_fail, sc);
1206		if (V_pf_status.debug >= PF_DEBUG_MISC)
1207			printf("pfsync: received bulk update start\n");
1208		break;
1209
1210	case PFSYNC_BUS_END:
1211		if (time_uptime - ntohl(bus->endtime) >=
1212		    sc->sc_ureq_sent) {
1213			/* that's it, we're happy */
1214			sc->sc_ureq_sent = 0;
1215			sc->sc_bulk_tries = 0;
1216			callout_stop(&sc->sc_bulkfail_tmo);
1217			if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
1218				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
1219				    "pfsync bulk done");
1220			sc->sc_flags |= PFSYNCF_OK;
1221			if (V_pf_status.debug >= PF_DEBUG_MISC)
1222				printf("pfsync: received valid "
1223				    "bulk update end\n");
1224		} else {
1225			if (V_pf_status.debug >= PF_DEBUG_MISC)
1226				printf("pfsync: received invalid "
1227				    "bulk update end: bad timestamp\n");
1228		}
1229		break;
1230	}
1231	PFSYNC_BUNLOCK(sc);
1232
1233	return (len);
1234}
1235
1236static int
1237pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1238{
1239	int len = count * sizeof(struct pfsync_tdb);
1240
1241#if defined(IPSEC)
1242	struct pfsync_tdb *tp;
1243	struct mbuf *mp;
1244	int offp;
1245	int i;
1246	int s;
1247
1248	mp = m_pulldown(m, offset, len, &offp);
1249	if (mp == NULL) {
1250		V_pfsyncstats.pfsyncs_badlen++;
1251		return (-1);
1252	}
1253	tp = (struct pfsync_tdb *)(mp->m_data + offp);
1254
1255	for (i = 0; i < count; i++)
1256		pfsync_update_net_tdb(&tp[i]);
1257#endif
1258
1259	return (len);
1260}
1261
1262#if defined(IPSEC)
1263/* Update an in-kernel tdb. Silently fail if no tdb is found. */
1264static void
1265pfsync_update_net_tdb(struct pfsync_tdb *pt)
1266{
1267	struct tdb		*tdb;
1268	int			 s;
1269
1270	/* check for invalid values */
1271	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1272	    (pt->dst.sa.sa_family != AF_INET &&
1273	    pt->dst.sa.sa_family != AF_INET6))
1274		goto bad;
1275
1276	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1277	if (tdb) {
1278		pt->rpl = ntohl(pt->rpl);
1279		pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
1280
1281		/* Neither replay nor byte counter should ever decrease. */
1282		if (pt->rpl < tdb->tdb_rpl ||
1283		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1284			goto bad;
1285		}
1286
1287		tdb->tdb_rpl = pt->rpl;
1288		tdb->tdb_cur_bytes = pt->cur_bytes;
1289	}
1290	return;
1291
1292bad:
1293	if (V_pf_status.debug >= PF_DEBUG_MISC)
1294		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1295		    "invalid value\n");
1296	V_pfsyncstats.pfsyncs_badstate++;
1297	return;
1298}
1299#endif
1300
1301
1302static int
1303pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1304{
1305	/* check if we are at the right place in the packet */
1306	if (offset != m->m_pkthdr.len)
1307		V_pfsyncstats.pfsyncs_badlen++;
1308
1309	/* we're done. free and let the caller return */
1310	m_freem(m);
1311	return (-1);
1312}
1313
1314static int
1315pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1316{
1317	V_pfsyncstats.pfsyncs_badact++;
1318
1319	m_freem(m);
1320	return (-1);
1321}
1322
1323static int
1324pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
1325	struct route *rt)
1326{
1327	m_freem(m);
1328	return (0);
1329}
1330
1331/* ARGSUSED */
1332static int
1333pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1334{
1335	struct pfsync_softc *sc = ifp->if_softc;
1336	struct ifreq *ifr = (struct ifreq *)data;
1337	struct pfsyncreq pfsyncr;
1338	int error;
1339	int c;
1340
1341	switch (cmd) {
1342	case SIOCSIFFLAGS:
1343		PFSYNC_LOCK(sc);
1344		if (ifp->if_flags & IFF_UP) {
1345			ifp->if_drv_flags |= IFF_DRV_RUNNING;
1346			PFSYNC_UNLOCK(sc);
1347			pfsync_pointers_init();
1348		} else {
1349			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1350			PFSYNC_UNLOCK(sc);
1351			pfsync_pointers_uninit();
1352		}
1353		break;
1354	case SIOCSIFMTU:
1355		if (!sc->sc_sync_if ||
1356		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1357		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1358			return (EINVAL);
1359		if (ifr->ifr_mtu < ifp->if_mtu) {
1360			for (c = 0; c < pfsync_buckets; c++) {
1361				PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
1362				if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT)
1363					pfsync_sendout(1, c);
1364				PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
1365			}
1366		}
1367		ifp->if_mtu = ifr->ifr_mtu;
1368		break;
1369	case SIOCGETPFSYNC:
1370		bzero(&pfsyncr, sizeof(pfsyncr));
1371		PFSYNC_LOCK(sc);
1372		if (sc->sc_sync_if) {
1373			strlcpy(pfsyncr.pfsyncr_syncdev,
1374			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1375		}
1376		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1377		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1378		pfsyncr.pfsyncr_defer = sc->sc_flags;
1379		PFSYNC_UNLOCK(sc);
1380		return (copyout(&pfsyncr, ifr_data_get_ptr(ifr),
1381		    sizeof(pfsyncr)));
1382
1383	case SIOCSETPFSYNC:
1384	    {
1385		struct in_mfilter *imf = NULL;
1386		struct ifnet *sifp;
1387		struct ip *ip;
1388
1389		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1390			return (error);
1391		if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr,
1392		    sizeof(pfsyncr))))
1393			return (error);
1394
1395		if (pfsyncr.pfsyncr_maxupdates > 255)
1396			return (EINVAL);
1397
1398		if (pfsyncr.pfsyncr_syncdev[0] == 0)
1399			sifp = NULL;
1400		else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL)
1401			return (EINVAL);
1402
1403		if (sifp != NULL && (
1404		    pfsyncr.pfsyncr_syncpeer.s_addr == 0 ||
1405		    pfsyncr.pfsyncr_syncpeer.s_addr ==
1406		    htonl(INADDR_PFSYNC_GROUP)))
1407			imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
1408
1409		PFSYNC_LOCK(sc);
1410		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1411			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
1412		else
1413			sc->sc_sync_peer.s_addr =
1414			    pfsyncr.pfsyncr_syncpeer.s_addr;
1415
1416		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1417		if (pfsyncr.pfsyncr_defer) {
1418			sc->sc_flags |= PFSYNCF_DEFER;
1419			V_pfsync_defer_ptr = pfsync_defer;
1420		} else {
1421			sc->sc_flags &= ~PFSYNCF_DEFER;
1422			V_pfsync_defer_ptr = NULL;
1423		}
1424
1425		if (sifp == NULL) {
1426			if (sc->sc_sync_if)
1427				if_rele(sc->sc_sync_if);
1428			sc->sc_sync_if = NULL;
1429			pfsync_multicast_cleanup(sc);
1430			PFSYNC_UNLOCK(sc);
1431			break;
1432		}
1433
1434		for (c = 0; c < pfsync_buckets; c++) {
1435			PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
1436			if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT &&
1437			    (sifp->if_mtu < sc->sc_ifp->if_mtu ||
1438			    (sc->sc_sync_if != NULL &&
1439			    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1440			    sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
1441				pfsync_sendout(1, c);
1442			PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
1443		}
1444
1445		pfsync_multicast_cleanup(sc);
1446
1447		if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1448			error = pfsync_multicast_setup(sc, sifp, imf);
1449			if (error) {
1450				if_rele(sifp);
1451				ip_mfilter_free(imf);
1452				PFSYNC_UNLOCK(sc);
1453				return (error);
1454			}
1455		}
1456		if (sc->sc_sync_if)
1457			if_rele(sc->sc_sync_if);
1458		sc->sc_sync_if = sifp;
1459
1460		ip = &sc->sc_template;
1461		bzero(ip, sizeof(*ip));
1462		ip->ip_v = IPVERSION;
1463		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1464		ip->ip_tos = IPTOS_LOWDELAY;
1465		/* len and id are set later. */
1466		ip->ip_off = htons(IP_DF);
1467		ip->ip_ttl = PFSYNC_DFLTTL;
1468		ip->ip_p = IPPROTO_PFSYNC;
1469		ip->ip_src.s_addr = INADDR_ANY;
1470		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1471
1472		/* Request a full state table update. */
1473		if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
1474			(*carp_demote_adj_p)(V_pfsync_carp_adj,
1475			    "pfsync bulk start");
1476		sc->sc_flags &= ~PFSYNCF_OK;
1477		if (V_pf_status.debug >= PF_DEBUG_MISC)
1478			printf("pfsync: requesting bulk update\n");
1479		PFSYNC_UNLOCK(sc);
1480		PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
1481		pfsync_request_update(0, 0);
1482		PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
1483		PFSYNC_BLOCK(sc);
1484		sc->sc_ureq_sent = time_uptime;
1485		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
1486		    sc);
1487		PFSYNC_BUNLOCK(sc);
1488
1489		break;
1490	    }
1491	default:
1492		return (ENOTTY);
1493	}
1494
1495	return (0);
1496}
1497
1498static void
1499pfsync_out_state(struct pf_state *st, void *buf)
1500{
1501	struct pfsync_state *sp = buf;
1502
1503	pfsync_state_export(sp, st);
1504}
1505
1506static void
1507pfsync_out_iack(struct pf_state *st, void *buf)
1508{
1509	struct pfsync_ins_ack *iack = buf;
1510
1511	iack->id = st->id;
1512	iack->creatorid = st->creatorid;
1513}
1514
1515static void
1516pfsync_out_upd_c(struct pf_state *st, void *buf)
1517{
1518	struct pfsync_upd_c *up = buf;
1519
1520	bzero(up, sizeof(*up));
1521	up->id = st->id;
1522	pf_state_peer_hton(&st->src, &up->src);
1523	pf_state_peer_hton(&st->dst, &up->dst);
1524	up->creatorid = st->creatorid;
1525	up->timeout = st->timeout;
1526}
1527
1528static void
1529pfsync_out_del(struct pf_state *st, void *buf)
1530{
1531	struct pfsync_del_c *dp = buf;
1532
1533	dp->id = st->id;
1534	dp->creatorid = st->creatorid;
1535	st->state_flags |= PFSTATE_NOSYNC;
1536}
1537
1538static void
1539pfsync_drop(struct pfsync_softc *sc)
1540{
1541	struct pf_state *st, *next;
1542	struct pfsync_upd_req_item *ur;
1543	struct pfsync_bucket *b;
1544	int c, q;
1545
1546	for (c = 0; c < pfsync_buckets; c++) {
1547		b = &sc->sc_buckets[c];
1548		for (q = 0; q < PFSYNC_S_COUNT; q++) {
1549			if (TAILQ_EMPTY(&b->b_qs[q]))
1550				continue;
1551
1552			TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) {
1553				KASSERT(st->sync_state == q,
1554					("%s: st->sync_state == q",
1555						__func__));
1556				st->sync_state = PFSYNC_S_NONE;
1557				pf_release_state(st);
1558			}
1559			TAILQ_INIT(&b->b_qs[q]);
1560		}
1561
1562		while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
1563			TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
1564			free(ur, M_PFSYNC);
1565		}
1566
1567		b->b_len = PFSYNC_MINPKT;
1568		b->b_plus = NULL;
1569	}
1570}
1571
1572static void
1573pfsync_sendout(int schedswi, int c)
1574{
1575	struct pfsync_softc *sc = V_pfsyncif;
1576	struct ifnet *ifp = sc->sc_ifp;
1577	struct mbuf *m;
1578	struct ip *ip;
1579	struct pfsync_header *ph;
1580	struct pfsync_subheader *subh;
1581	struct pf_state *st, *st_next;
1582	struct pfsync_upd_req_item *ur;
1583	struct pfsync_bucket *b = &sc->sc_buckets[c];
1584	int offset;
1585	int q, count = 0;
1586
1587	KASSERT(sc != NULL, ("%s: null sc", __func__));
1588	KASSERT(b->b_len > PFSYNC_MINPKT,
1589	    ("%s: sc_len %zu", __func__, b->b_len));
1590	PFSYNC_BUCKET_LOCK_ASSERT(b);
1591
1592	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
1593		pfsync_drop(sc);
1594		return;
1595	}
1596
1597	m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR);
1598	if (m == NULL) {
1599		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
1600		V_pfsyncstats.pfsyncs_onomem++;
1601		return;
1602	}
1603	m->m_data += max_linkhdr;
1604	m->m_len = m->m_pkthdr.len = b->b_len;
1605
1606	/* build the ip header */
1607	ip = (struct ip *)m->m_data;
1608	bcopy(&sc->sc_template, ip, sizeof(*ip));
1609	offset = sizeof(*ip);
1610
1611	ip->ip_len = htons(m->m_pkthdr.len);
1612	ip_fillid(ip);
1613
1614	/* build the pfsync header */
1615	ph = (struct pfsync_header *)(m->m_data + offset);
1616	bzero(ph, sizeof(*ph));
1617	offset += sizeof(*ph);
1618
1619	ph->version = PFSYNC_VERSION;
1620	ph->len = htons(b->b_len - sizeof(*ip));
1621	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1622
1623	/* walk the queues */
1624	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1625		if (TAILQ_EMPTY(&b->b_qs[q]))
1626			continue;
1627
1628		subh = (struct pfsync_subheader *)(m->m_data + offset);
1629		offset += sizeof(*subh);
1630
1631		count = 0;
1632		TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) {
1633			KASSERT(st->sync_state == q,
1634				("%s: st->sync_state == q",
1635					__func__));
1636			/*
1637			 * XXXGL: some of write methods do unlocked reads
1638			 * of state data :(
1639			 */
1640			pfsync_qs[q].write(st, m->m_data + offset);
1641			offset += pfsync_qs[q].len;
1642			st->sync_state = PFSYNC_S_NONE;
1643			pf_release_state(st);
1644			count++;
1645		}
1646		TAILQ_INIT(&b->b_qs[q]);
1647
1648		bzero(subh, sizeof(*subh));
1649		subh->action = pfsync_qs[q].action;
1650		subh->count = htons(count);
1651		V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
1652	}
1653
1654	if (!TAILQ_EMPTY(&b->b_upd_req_list)) {
1655		subh = (struct pfsync_subheader *)(m->m_data + offset);
1656		offset += sizeof(*subh);
1657
1658		count = 0;
1659		while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
1660			TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
1661
1662			bcopy(&ur->ur_msg, m->m_data + offset,
1663			    sizeof(ur->ur_msg));
1664			offset += sizeof(ur->ur_msg);
1665			free(ur, M_PFSYNC);
1666			count++;
1667		}
1668
1669		bzero(subh, sizeof(*subh));
1670		subh->action = PFSYNC_ACT_UPD_REQ;
1671		subh->count = htons(count);
1672		V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
1673	}
1674
1675	/* has someone built a custom region for us to add? */
1676	if (b->b_plus != NULL) {
1677		bcopy(b->b_plus, m->m_data + offset, b->b_pluslen);
1678		offset += b->b_pluslen;
1679
1680		b->b_plus = NULL;
1681	}
1682
1683	subh = (struct pfsync_subheader *)(m->m_data + offset);
1684	offset += sizeof(*subh);
1685
1686	bzero(subh, sizeof(*subh));
1687	subh->action = PFSYNC_ACT_EOF;
1688	subh->count = htons(1);
1689	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
1690
1691	/* we're done, let's put it on the wire */
1692	if (ifp->if_bpf) {
1693		m->m_data += sizeof(*ip);
1694		m->m_len = m->m_pkthdr.len = b->b_len - sizeof(*ip);
1695		BPF_MTAP(ifp, m);
1696		m->m_data -= sizeof(*ip);
1697		m->m_len = m->m_pkthdr.len = b->b_len;
1698	}
1699
1700	if (sc->sc_sync_if == NULL) {
1701		b->b_len = PFSYNC_MINPKT;
1702		m_freem(m);
1703		return;
1704	}
1705
1706	if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
1707	if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
1708	b->b_len = PFSYNC_MINPKT;
1709
1710	if (!_IF_QFULL(&b->b_snd))
1711		_IF_ENQUEUE(&b->b_snd, m);
1712	else {
1713		m_freem(m);
1714		if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
1715	}
1716	if (schedswi)
1717		swi_sched(V_pfsync_swi_cookie, 0);
1718}
1719
1720static void
1721pfsync_insert_state(struct pf_state *st)
1722{
1723	struct pfsync_softc *sc = V_pfsyncif;
1724	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1725
1726	if (st->state_flags & PFSTATE_NOSYNC)
1727		return;
1728
1729	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
1730	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1731		st->state_flags |= PFSTATE_NOSYNC;
1732		return;
1733	}
1734
1735	KASSERT(st->sync_state == PFSYNC_S_NONE,
1736		("%s: st->sync_state %u", __func__, st->sync_state));
1737
1738	PFSYNC_BUCKET_LOCK(b);
1739	if (b->b_len == PFSYNC_MINPKT)
1740		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
1741
1742	pfsync_q_ins(st, PFSYNC_S_INS, true);
1743	PFSYNC_BUCKET_UNLOCK(b);
1744
1745	st->sync_updates = 0;
1746}
1747
1748static int
1749pfsync_defer(struct pf_state *st, struct mbuf *m)
1750{
1751	struct pfsync_softc *sc = V_pfsyncif;
1752	struct pfsync_deferral *pd;
1753	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1754
1755	if (m->m_flags & (M_BCAST|M_MCAST))
1756		return (0);
1757
1758	PFSYNC_LOCK(sc);
1759
1760	if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) ||
1761	    !(sc->sc_flags & PFSYNCF_DEFER)) {
1762		PFSYNC_UNLOCK(sc);
1763		return (0);
1764	}
1765
1766	if (b->b_deferred >= 128)
1767		pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0);
1768
1769	pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
1770	if (pd == NULL)
1771		return (0);
1772	b->b_deferred++;
1773
1774	m->m_flags |= M_SKIP_FIREWALL;
1775	st->state_flags |= PFSTATE_ACK;
1776
1777	pd->pd_sc = sc;
1778	pd->pd_refs = 0;
1779	pd->pd_st = st;
1780	pf_ref_state(st);
1781	pd->pd_m = m;
1782
1783	TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry);
1784	callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED);
1785	callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
1786
1787	pfsync_push(b);
1788
1789	return (1);
1790}
1791
1792static void
1793pfsync_undefer(struct pfsync_deferral *pd, int drop)
1794{
1795	struct pfsync_softc *sc = pd->pd_sc;
1796	struct mbuf *m = pd->pd_m;
1797	struct pf_state *st = pd->pd_st;
1798	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1799
1800	PFSYNC_BUCKET_LOCK_ASSERT(b);
1801
1802	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
1803	b->b_deferred--;
1804	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
1805	free(pd, M_PFSYNC);
1806	pf_release_state(st);
1807
1808	if (drop)
1809		m_freem(m);
1810	else {
1811		_IF_ENQUEUE(&b->b_snd, m);
1812		pfsync_push(b);
1813	}
1814}
1815
1816static void
1817pfsync_defer_tmo(void *arg)
1818{
1819	struct pfsync_deferral *pd = arg;
1820	struct pfsync_softc *sc = pd->pd_sc;
1821	struct mbuf *m = pd->pd_m;
1822	struct pf_state *st = pd->pd_st;
1823	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1824
1825	PFSYNC_BUCKET_LOCK_ASSERT(b);
1826
1827	CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
1828
1829	TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
1830	b->b_deferred--;
1831	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
1832	if (pd->pd_refs == 0)
1833		free(pd, M_PFSYNC);
1834	PFSYNC_UNLOCK(sc);
1835
1836	ip_output(m, NULL, NULL, 0, NULL, NULL);
1837
1838	pf_release_state(st);
1839
1840	CURVNET_RESTORE();
1841}
1842
1843static void
1844pfsync_undefer_state(struct pf_state *st, int drop)
1845{
1846	struct pfsync_softc *sc = V_pfsyncif;
1847	struct pfsync_deferral *pd;
1848	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1849
1850	PFSYNC_BUCKET_LOCK(b);
1851
1852	TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) {
1853		 if (pd->pd_st == st) {
1854			if (callout_stop(&pd->pd_tmo) > 0)
1855				pfsync_undefer(pd, drop);
1856
1857			PFSYNC_BUCKET_UNLOCK(b);
1858			return;
1859		}
1860	}
1861	PFSYNC_BUCKET_UNLOCK(b);
1862
1863	panic("%s: unable to find deferred state", __func__);
1864}
1865
1866static struct pfsync_bucket*
1867pfsync_get_bucket(struct pfsync_softc *sc, struct pf_state *st)
1868{
1869	int c = PF_IDHASH(st) % pfsync_buckets;
1870	return &sc->sc_buckets[c];
1871}
1872
1873static void
1874pfsync_update_state(struct pf_state *st)
1875{
1876	struct pfsync_softc *sc = V_pfsyncif;
1877	bool sync = false, ref = true;
1878	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1879
1880	PF_STATE_LOCK_ASSERT(st);
1881	PFSYNC_BUCKET_LOCK(b);
1882
1883	if (st->state_flags & PFSTATE_ACK)
1884		pfsync_undefer_state(st, 0);
1885	if (st->state_flags & PFSTATE_NOSYNC) {
1886		if (st->sync_state != PFSYNC_S_NONE)
1887			pfsync_q_del(st, true, b);
1888		PFSYNC_BUCKET_UNLOCK(b);
1889		return;
1890	}
1891
1892	if (b->b_len == PFSYNC_MINPKT)
1893		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
1894
1895	switch (st->sync_state) {
1896	case PFSYNC_S_UPD_C:
1897	case PFSYNC_S_UPD:
1898	case PFSYNC_S_INS:
1899		/* we're already handling it */
1900
1901		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1902			st->sync_updates++;
1903			if (st->sync_updates >= sc->sc_maxupdates)
1904				sync = true;
1905		}
1906		break;
1907
1908	case PFSYNC_S_IACK:
1909		pfsync_q_del(st, false, b);
1910		ref = false;
1911		/* FALLTHROUGH */
1912
1913	case PFSYNC_S_NONE:
1914		pfsync_q_ins(st, PFSYNC_S_UPD_C, ref);
1915		st->sync_updates = 0;
1916		break;
1917
1918	default:
1919		panic("%s: unexpected sync state %d", __func__, st->sync_state);
1920	}
1921
1922	if (sync || (time_uptime - st->pfsync_time) < 2)
1923		pfsync_push(b);
1924
1925	PFSYNC_BUCKET_UNLOCK(b);
1926}
1927
1928static void
1929pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1930{
1931	struct pfsync_softc *sc = V_pfsyncif;
1932	struct pfsync_bucket *b = &sc->sc_buckets[0];
1933	struct pfsync_upd_req_item *item;
1934	size_t nlen = sizeof(struct pfsync_upd_req);
1935
1936	PFSYNC_BUCKET_LOCK_ASSERT(b);
1937
1938	/*
1939	 * This code does a bit to prevent multiple update requests for the
1940	 * same state being generated. It searches current subheader queue,
1941	 * but it doesn't lookup into queue of already packed datagrams.
1942	 */
1943	TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry)
1944		if (item->ur_msg.id == id &&
1945		    item->ur_msg.creatorid == creatorid)
1946			return;
1947
1948	item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
1949	if (item == NULL)
1950		return; /* XXX stats */
1951
1952	item->ur_msg.id = id;
1953	item->ur_msg.creatorid = creatorid;
1954
1955	if (TAILQ_EMPTY(&b->b_upd_req_list))
1956		nlen += sizeof(struct pfsync_subheader);
1957
1958	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
1959		pfsync_sendout(0, 0);
1960
1961		nlen = sizeof(struct pfsync_subheader) +
1962		    sizeof(struct pfsync_upd_req);
1963	}
1964
1965	TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry);
1966	b->b_len += nlen;
1967
1968	pfsync_push(b);
1969}
1970
1971static bool
1972pfsync_update_state_req(struct pf_state *st)
1973{
1974	struct pfsync_softc *sc = V_pfsyncif;
1975	bool ref = true, full = false;
1976	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
1977
1978	PF_STATE_LOCK_ASSERT(st);
1979	PFSYNC_BUCKET_LOCK(b);
1980
1981	if (st->state_flags & PFSTATE_NOSYNC) {
1982		if (st->sync_state != PFSYNC_S_NONE)
1983			pfsync_q_del(st, true, b);
1984		PFSYNC_BUCKET_UNLOCK(b);
1985		return (full);
1986	}
1987
1988	switch (st->sync_state) {
1989	case PFSYNC_S_UPD_C:
1990	case PFSYNC_S_IACK:
1991		pfsync_q_del(st, false, b);
1992		ref = false;
1993		/* FALLTHROUGH */
1994
1995	case PFSYNC_S_NONE:
1996		pfsync_q_ins(st, PFSYNC_S_UPD, ref);
1997		pfsync_push(b);
1998		break;
1999
2000	case PFSYNC_S_INS:
2001	case PFSYNC_S_UPD:
2002	case PFSYNC_S_DEL:
2003		/* we're already handling it */
2004		break;
2005
2006	default:
2007		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2008	}
2009
2010	if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state))
2011		full = true;
2012
2013	PFSYNC_BUCKET_UNLOCK(b);
2014
2015	return (full);
2016}
2017
2018static void
2019pfsync_delete_state(struct pf_state *st)
2020{
2021	struct pfsync_softc *sc = V_pfsyncif;
2022	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2023	bool ref = true;
2024
2025	PFSYNC_BUCKET_LOCK(b);
2026	if (st->state_flags & PFSTATE_ACK)
2027		pfsync_undefer_state(st, 1);
2028	if (st->state_flags & PFSTATE_NOSYNC) {
2029		if (st->sync_state != PFSYNC_S_NONE)
2030			pfsync_q_del(st, true, b);
2031		PFSYNC_BUCKET_UNLOCK(b);
2032		return;
2033	}
2034
2035	if (b->b_len == PFSYNC_MINPKT)
2036		callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
2037
2038	switch (st->sync_state) {
2039	case PFSYNC_S_INS:
2040		/* We never got to tell the world so just forget about it. */
2041		pfsync_q_del(st, true, b);
2042		break;
2043
2044	case PFSYNC_S_UPD_C:
2045	case PFSYNC_S_UPD:
2046	case PFSYNC_S_IACK:
2047		pfsync_q_del(st, false, b);
2048		ref = false;
2049		/* FALLTHROUGH */
2050
2051	case PFSYNC_S_NONE:
2052		pfsync_q_ins(st, PFSYNC_S_DEL, ref);
2053		break;
2054
2055	default:
2056		panic("%s: unexpected sync state %d", __func__, st->sync_state);
2057	}
2058
2059	PFSYNC_BUCKET_UNLOCK(b);
2060}
2061
2062static void
2063pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2064{
2065	struct {
2066		struct pfsync_subheader subh;
2067		struct pfsync_clr clr;
2068	} __packed r;
2069
2070	bzero(&r, sizeof(r));
2071
2072	r.subh.action = PFSYNC_ACT_CLR;
2073	r.subh.count = htons(1);
2074	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
2075
2076	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2077	r.clr.creatorid = creatorid;
2078
2079	pfsync_send_plus(&r, sizeof(r));
2080}
2081
2082static void
2083pfsync_q_ins(struct pf_state *st, int q, bool ref)
2084{
2085	struct pfsync_softc *sc = V_pfsyncif;
2086	size_t nlen = pfsync_qs[q].len;
2087	struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
2088
2089	PFSYNC_BUCKET_LOCK_ASSERT(b);
2090
2091	KASSERT(st->sync_state == PFSYNC_S_NONE,
2092		("%s: st->sync_state %u", __func__, st->sync_state));
2093	KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
2094	    b->b_len));
2095
2096	if (TAILQ_EMPTY(&b->b_qs[q]))
2097		nlen += sizeof(struct pfsync_subheader);
2098
2099	if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
2100		pfsync_sendout(1, b->b_id);
2101
2102		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2103	}
2104
2105	b->b_len += nlen;
2106	TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list);
2107	st->sync_state = q;
2108	if (ref)
2109		pf_ref_state(st);
2110}
2111
2112static void
2113pfsync_q_del(struct pf_state *st, bool unref, struct pfsync_bucket *b)
2114{
2115	int q = st->sync_state;
2116
2117	PFSYNC_BUCKET_LOCK_ASSERT(b);
2118	KASSERT(st->sync_state != PFSYNC_S_NONE,
2119		("%s: st->sync_state != PFSYNC_S_NONE", __func__));
2120
2121	b->b_len -= pfsync_qs[q].len;
2122	TAILQ_REMOVE(&b->b_qs[q], st, sync_list);
2123	st->sync_state = PFSYNC_S_NONE;
2124	if (unref)
2125		pf_release_state(st);
2126
2127	if (TAILQ_EMPTY(&b->b_qs[q]))
2128		b->b_len -= sizeof(struct pfsync_subheader);
2129}
2130
2131static void
2132pfsync_bulk_start(void)
2133{
2134	struct pfsync_softc *sc = V_pfsyncif;
2135
2136	if (V_pf_status.debug >= PF_DEBUG_MISC)
2137		printf("pfsync: received bulk update request\n");
2138
2139	PFSYNC_BLOCK(sc);
2140
2141	sc->sc_ureq_received = time_uptime;
2142	sc->sc_bulk_hashid = 0;
2143	sc->sc_bulk_stateid = 0;
2144	pfsync_bulk_status(PFSYNC_BUS_START);
2145	callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
2146	PFSYNC_BUNLOCK(sc);
2147}
2148
2149static void
2150pfsync_bulk_update(void *arg)
2151{
2152	struct pfsync_softc *sc = arg;
2153	struct pf_state *s;
2154	int i, sent = 0;
2155
2156	PFSYNC_BLOCK_ASSERT(sc);
2157	CURVNET_SET(sc->sc_ifp->if_vnet);
2158
2159	/*
2160	 * Start with last state from previous invocation.
2161	 * It may had gone, in this case start from the
2162	 * hash slot.
2163	 */
2164	s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
2165
2166	if (s != NULL)
2167		i = PF_IDHASH(s);
2168	else
2169		i = sc->sc_bulk_hashid;
2170
2171	for (; i <= pf_hashmask; i++) {
2172		struct pf_idhash *ih = &V_pf_idhash[i];
2173
2174		if (s != NULL)
2175			PF_HASHROW_ASSERT(ih);
2176		else {
2177			PF_HASHROW_LOCK(ih);
2178			s = LIST_FIRST(&ih->states);
2179		}
2180
2181		for (; s; s = LIST_NEXT(s, entry)) {
2182			if (s->sync_state == PFSYNC_S_NONE &&
2183			    s->timeout < PFTM_MAX &&
2184			    s->pfsync_time <= sc->sc_ureq_received) {
2185				if (pfsync_update_state_req(s)) {
2186					/* We've filled a packet. */
2187					sc->sc_bulk_hashid = i;
2188					sc->sc_bulk_stateid = s->id;
2189					sc->sc_bulk_creatorid = s->creatorid;
2190					PF_HASHROW_UNLOCK(ih);
2191					callout_reset(&sc->sc_bulk_tmo, 1,
2192					    pfsync_bulk_update, sc);
2193					goto full;
2194				}
2195				sent++;
2196			}
2197		}
2198		PF_HASHROW_UNLOCK(ih);
2199	}
2200
2201	/* We're done. */
2202	pfsync_bulk_status(PFSYNC_BUS_END);
2203full:
2204	CURVNET_RESTORE();
2205}
2206
2207static void
2208pfsync_bulk_status(u_int8_t status)
2209{
2210	struct {
2211		struct pfsync_subheader subh;
2212		struct pfsync_bus bus;
2213	} __packed r;
2214
2215	struct pfsync_softc *sc = V_pfsyncif;
2216
2217	bzero(&r, sizeof(r));
2218
2219	r.subh.action = PFSYNC_ACT_BUS;
2220	r.subh.count = htons(1);
2221	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
2222
2223	r.bus.creatorid = V_pf_status.hostid;
2224	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2225	r.bus.status = status;
2226
2227	pfsync_send_plus(&r, sizeof(r));
2228}
2229
2230static void
2231pfsync_bulk_fail(void *arg)
2232{
2233	struct pfsync_softc *sc = arg;
2234	struct pfsync_bucket *b = &sc->sc_buckets[0];
2235
2236	CURVNET_SET(sc->sc_ifp->if_vnet);
2237
2238	PFSYNC_BLOCK_ASSERT(sc);
2239
2240	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2241		/* Try again */
2242		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
2243		    pfsync_bulk_fail, V_pfsyncif);
2244		PFSYNC_BUCKET_LOCK(b);
2245		pfsync_request_update(0, 0);
2246		PFSYNC_BUCKET_UNLOCK(b);
2247	} else {
2248		/* Pretend like the transfer was ok. */
2249		sc->sc_ureq_sent = 0;
2250		sc->sc_bulk_tries = 0;
2251		PFSYNC_LOCK(sc);
2252		if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
2253			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
2254			    "pfsync bulk fail");
2255		sc->sc_flags |= PFSYNCF_OK;
2256		PFSYNC_UNLOCK(sc);
2257		if (V_pf_status.debug >= PF_DEBUG_MISC)
2258			printf("pfsync: failed to receive bulk update\n");
2259	}
2260
2261	CURVNET_RESTORE();
2262}
2263
2264static void
2265pfsync_send_plus(void *plus, size_t pluslen)
2266{
2267	struct pfsync_softc *sc = V_pfsyncif;
2268	struct pfsync_bucket *b = &sc->sc_buckets[0];
2269
2270	PFSYNC_BUCKET_LOCK(b);
2271
2272	if (b->b_len + pluslen > sc->sc_ifp->if_mtu)
2273		pfsync_sendout(1, b->b_id);
2274
2275	b->b_plus = plus;
2276	b->b_len += (b->b_pluslen = pluslen);
2277
2278	pfsync_sendout(1, b->b_id);
2279	PFSYNC_BUCKET_UNLOCK(b);
2280}
2281
2282static void
2283pfsync_timeout(void *arg)
2284{
2285	struct pfsync_bucket *b = arg;
2286
2287	CURVNET_SET(b->b_sc->sc_ifp->if_vnet);
2288	PFSYNC_BUCKET_LOCK(b);
2289	pfsync_push(b);
2290	PFSYNC_BUCKET_UNLOCK(b);
2291	CURVNET_RESTORE();
2292}
2293
2294static void
2295pfsync_push(struct pfsync_bucket *b)
2296{
2297
2298	PFSYNC_BUCKET_LOCK_ASSERT(b);
2299
2300	b->b_flags |= PFSYNCF_BUCKET_PUSH;
2301	swi_sched(V_pfsync_swi_cookie, 0);
2302}
2303
2304static void
2305pfsync_push_all(struct pfsync_softc *sc)
2306{
2307	int c;
2308	struct pfsync_bucket *b;
2309
2310	for (c = 0; c < pfsync_buckets; c++) {
2311		b = &sc->sc_buckets[c];
2312
2313		PFSYNC_BUCKET_LOCK(b);
2314		pfsync_push(b);
2315		PFSYNC_BUCKET_UNLOCK(b);
2316	}
2317}
2318
2319static void
2320pfsyncintr(void *arg)
2321{
2322	struct pfsync_softc *sc = arg;
2323	struct pfsync_bucket *b;
2324	struct mbuf *m, *n;
2325	int c;
2326
2327	CURVNET_SET(sc->sc_ifp->if_vnet);
2328
2329	for (c = 0; c < pfsync_buckets; c++) {
2330		b = &sc->sc_buckets[c];
2331
2332		PFSYNC_BUCKET_LOCK(b);
2333		if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) {
2334			pfsync_sendout(0, b->b_id);
2335			b->b_flags &= ~PFSYNCF_BUCKET_PUSH;
2336		}
2337		_IF_DEQUEUE_ALL(&b->b_snd, m);
2338		PFSYNC_BUCKET_UNLOCK(b);
2339
2340		for (; m != NULL; m = n) {
2341
2342			n = m->m_nextpkt;
2343			m->m_nextpkt = NULL;
2344
2345			/*
2346			 * We distinguish between a deferral packet and our
2347			 * own pfsync packet based on M_SKIP_FIREWALL
2348			 * flag. This is XXX.
2349			 */
2350			if (m->m_flags & M_SKIP_FIREWALL)
2351				ip_output(m, NULL, NULL, 0, NULL, NULL);
2352			else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
2353			    NULL) == 0)
2354				V_pfsyncstats.pfsyncs_opackets++;
2355			else
2356				V_pfsyncstats.pfsyncs_oerrors++;
2357		}
2358	}
2359	CURVNET_RESTORE();
2360}
2361
2362static int
2363pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp,
2364    struct in_mfilter *imf)
2365{
2366	struct ip_moptions *imo = &sc->sc_imo;
2367	int error;
2368
2369	if (!(ifp->if_flags & IFF_MULTICAST))
2370		return (EADDRNOTAVAIL);
2371
2372	imo->imo_multicast_vif = -1;
2373
2374	if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL,
2375	    &imf->imf_inm)) != 0)
2376		return (error);
2377
2378	ip_mfilter_init(&imo->imo_head);
2379	ip_mfilter_insert(&imo->imo_head, imf);
2380	imo->imo_multicast_ifp = ifp;
2381	imo->imo_multicast_ttl = PFSYNC_DFLTTL;
2382	imo->imo_multicast_loop = 0;
2383
2384	return (0);
2385}
2386
2387static void
2388pfsync_multicast_cleanup(struct pfsync_softc *sc)
2389{
2390	struct ip_moptions *imo = &sc->sc_imo;
2391	struct in_mfilter *imf;
2392
2393	while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
2394		ip_mfilter_remove(&imo->imo_head, imf);
2395		in_leavegroup(imf->imf_inm, NULL);
2396		ip_mfilter_free(imf);
2397	}
2398	imo->imo_multicast_ifp = NULL;
2399}
2400
2401void
2402pfsync_detach_ifnet(struct ifnet *ifp)
2403{
2404	struct pfsync_softc *sc = V_pfsyncif;
2405
2406	if (sc == NULL)
2407		return;
2408
2409	PFSYNC_LOCK(sc);
2410
2411	if (sc->sc_sync_if == ifp) {
2412		/* We don't need mutlicast cleanup here, because the interface
2413		 * is going away. We do need to ensure we don't try to do
2414		 * cleanup later.
2415		 */
2416		ip_mfilter_init(&sc->sc_imo.imo_head);
2417		sc->sc_imo.imo_multicast_ifp = NULL;
2418		sc->sc_sync_if = NULL;
2419	}
2420
2421	PFSYNC_UNLOCK(sc);
2422}
2423
2424#ifdef INET
2425extern  struct domain inetdomain;
2426static struct protosw in_pfsync_protosw = {
2427	.pr_type =		SOCK_RAW,
2428	.pr_domain =		&inetdomain,
2429	.pr_protocol =		IPPROTO_PFSYNC,
2430	.pr_flags =		PR_ATOMIC|PR_ADDR,
2431	.pr_input =		pfsync_input,
2432	.pr_output =		rip_output,
2433	.pr_ctloutput =		rip_ctloutput,
2434	.pr_usrreqs =		&rip_usrreqs
2435};
2436#endif
2437
2438static void
2439pfsync_pointers_init()
2440{
2441
2442	PF_RULES_WLOCK();
2443	V_pfsync_state_import_ptr = pfsync_state_import;
2444	V_pfsync_insert_state_ptr = pfsync_insert_state;
2445	V_pfsync_update_state_ptr = pfsync_update_state;
2446	V_pfsync_delete_state_ptr = pfsync_delete_state;
2447	V_pfsync_clear_states_ptr = pfsync_clear_states;
2448	V_pfsync_defer_ptr = pfsync_defer;
2449	PF_RULES_WUNLOCK();
2450}
2451
2452static void
2453pfsync_pointers_uninit()
2454{
2455
2456	PF_RULES_WLOCK();
2457	V_pfsync_state_import_ptr = NULL;
2458	V_pfsync_insert_state_ptr = NULL;
2459	V_pfsync_update_state_ptr = NULL;
2460	V_pfsync_delete_state_ptr = NULL;
2461	V_pfsync_clear_states_ptr = NULL;
2462	V_pfsync_defer_ptr = NULL;
2463	PF_RULES_WUNLOCK();
2464}
2465
2466static void
2467vnet_pfsync_init(const void *unused __unused)
2468{
2469	int error;
2470
2471	V_pfsync_cloner = if_clone_simple(pfsyncname,
2472	    pfsync_clone_create, pfsync_clone_destroy, 1);
2473	error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif,
2474	    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
2475	if (error) {
2476		if_clone_detach(V_pfsync_cloner);
2477		log(LOG_INFO, "swi_add() failed in %s\n", __func__);
2478	}
2479
2480	pfsync_pointers_init();
2481}
2482VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
2483    vnet_pfsync_init, NULL);
2484
2485static void
2486vnet_pfsync_uninit(const void *unused __unused)
2487{
2488	int ret;
2489
2490	pfsync_pointers_uninit();
2491
2492	if_clone_detach(V_pfsync_cloner);
2493	ret = swi_remove(V_pfsync_swi_cookie);
2494	MPASS(ret == 0);
2495	ret = intr_event_destroy(V_pfsync_swi_ie);
2496	MPASS(ret == 0);
2497}
2498
2499VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,
2500    vnet_pfsync_uninit, NULL);
2501
2502static int
2503pfsync_init()
2504{
2505#ifdef INET
2506	int error;
2507
2508	pfsync_detach_ifnet_ptr = pfsync_detach_ifnet;
2509
2510	error = pf_proto_register(PF_INET, &in_pfsync_protosw);
2511	if (error)
2512		return (error);
2513	error = ipproto_register(IPPROTO_PFSYNC);
2514	if (error) {
2515		pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
2516		return (error);
2517	}
2518#endif
2519
2520	return (0);
2521}
2522
2523static void
2524pfsync_uninit()
2525{
2526	pfsync_detach_ifnet_ptr = NULL;
2527
2528#ifdef INET
2529	ipproto_unregister(IPPROTO_PFSYNC);
2530	pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
2531#endif
2532}
2533
2534static int
2535pfsync_modevent(module_t mod, int type, void *data)
2536{
2537	int error = 0;
2538
2539	switch (type) {
2540	case MOD_LOAD:
2541		error = pfsync_init();
2542		break;
2543	case MOD_UNLOAD:
2544		pfsync_uninit();
2545		break;
2546	default:
2547		error = EINVAL;
2548		break;
2549	}
2550
2551	return (error);
2552}
2553
2554static moduledata_t pfsync_mod = {
2555	pfsyncname,
2556	pfsync_modevent,
2557	0
2558};
2559
2560#define PFSYNC_MODVER 1
2561
2562/* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
2563DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
2564MODULE_VERSION(pfsync, PFSYNC_MODVER);
2565MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
2566