if_pfsync.c revision 230265
1/*	$OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $	*/
2
3/*
4 * Copyright (c) 2002 Michael Shalayeff
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31 *
32 * Permission to use, copy, modify, and distribute this software for any
33 * purpose with or without fee is hereby granted, provided that the above
34 * copyright notice and this permission notice appear in all copies.
35 *
36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43 */
44
45/*
46 * Revisions picked from OpenBSD after revision 1.110 import:
47 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
48 * 1.120, 1.175 - use monotonic time_uptime
49 * 1.122 - reduce number of updates for non-TCP sessions
50 * 1.128 - cleanups
51 * 1.170 - SIOCSIFMTU checks
52 */
53
54#ifdef __FreeBSD__
55#include "opt_inet.h"
56#include "opt_inet6.h"
57#include "opt_pf.h"
58
59#include <sys/cdefs.h>
60__FBSDID("$FreeBSD: head/sys/contrib/pf/net/if_pfsync.c 230265 2012-01-17 12:14:26Z glebius $");
61
62#define	NBPFILTER	1
63#endif /* __FreeBSD__ */
64
65#include <sys/param.h>
66#include <sys/kernel.h>
67#ifdef __FreeBSD__
68#include <sys/bus.h>
69#include <sys/interrupt.h>
70#include <sys/priv.h>
71#endif
72#include <sys/proc.h>
73#include <sys/systm.h>
74#include <sys/time.h>
75#include <sys/mbuf.h>
76#include <sys/socket.h>
77#ifdef __FreeBSD__
78#include <sys/endian.h>
79#include <sys/malloc.h>
80#include <sys/module.h>
81#include <sys/sockio.h>
82#include <sys/taskqueue.h>
83#include <sys/lock.h>
84#include <sys/mutex.h>
85#include <sys/protosw.h>
86#else
87#include <sys/ioctl.h>
88#include <sys/timeout.h>
89#endif
90#include <sys/sysctl.h>
91#ifndef __FreeBSD__
92#include <sys/pool.h>
93#endif
94
95#include <net/if.h>
96#ifdef __FreeBSD__
97#include <net/if_clone.h>
98#endif
99#include <net/if_types.h>
100#include <net/route.h>
101#include <net/bpf.h>
102#include <net/netisr.h>
103#ifdef __FreeBSD__
104#include <net/vnet.h>
105#endif
106
107#include <netinet/in.h>
108#include <netinet/if_ether.h>
109#include <netinet/tcp.h>
110#include <netinet/tcp_seq.h>
111
112#ifdef	INET
113#include <netinet/in_systm.h>
114#include <netinet/in_var.h>
115#include <netinet/ip.h>
116#include <netinet/ip_var.h>
117#endif
118
119#ifdef INET6
120#include <netinet6/nd6.h>
121#endif /* INET6 */
122
123#ifdef __FreeBSD__
124#include <netinet/ip_carp.h>
125#else
126#include "carp.h"
127#if NCARP > 0
128#include <netinet/ip_carp.h>
129#endif
130#endif
131
132#include <net/pfvar.h>
133#include <net/if_pfsync.h>
134
135#ifndef __FreeBSD__
136#include "bpfilter.h"
137#include "pfsync.h"
138#endif
139
140#define PFSYNC_MINPKT ( \
141	sizeof(struct ip) + \
142	sizeof(struct pfsync_header) + \
143	sizeof(struct pfsync_subheader) + \
144	sizeof(struct pfsync_eof))
145
146struct pfsync_pkt {
147	struct ip *ip;
148	struct in_addr src;
149	u_int8_t flags;
150};
151
152int	pfsync_input_hmac(struct mbuf *, int);
153
154int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
155	    struct pfsync_state_peer *);
156
157int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
158int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
159int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
160int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
161int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
162int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
163int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
164int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
165int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
166int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
167int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
168
169int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
170
171int	(*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
172	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
173	pfsync_in_ins,			/* PFSYNC_ACT_INS */
174	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
175	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
176	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
177	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
178	pfsync_in_del,			/* PFSYNC_ACT_DEL */
179	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
180	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
181	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
182	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
183	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
184	pfsync_in_eof			/* PFSYNC_ACT_EOF */
185};
186
187struct pfsync_q {
188	int		(*write)(struct pf_state *, struct mbuf *, int);
189	size_t		len;
190	u_int8_t	action;
191};
192
193/* we have one of these for every PFSYNC_S_ */
194int	pfsync_out_state(struct pf_state *, struct mbuf *, int);
195int	pfsync_out_iack(struct pf_state *, struct mbuf *, int);
196int	pfsync_out_upd_c(struct pf_state *, struct mbuf *, int);
197int	pfsync_out_del(struct pf_state *, struct mbuf *, int);
198
199struct pfsync_q pfsync_qs[] = {
200	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
201	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
202	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
203	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
204	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
205};
206
207void	pfsync_q_ins(struct pf_state *, int);
208void	pfsync_q_del(struct pf_state *);
209
210struct pfsync_upd_req_item {
211	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
212	struct pfsync_upd_req			ur_msg;
213};
214TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
215
216struct pfsync_deferral {
217	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
218	struct pf_state				*pd_st;
219	struct mbuf				*pd_m;
220#ifdef __FreeBSD__
221	struct callout				 pd_tmo;
222#else
223	struct timeout				 pd_tmo;
224#endif
225};
226TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
227
228#define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
229			    sizeof(struct pfsync_deferral))
230
231#ifdef notyet
232int	pfsync_out_tdb(struct tdb *, struct mbuf *, int);
233#endif
234
235struct pfsync_softc {
236#ifdef __FreeBSD__
237	struct ifnet		*sc_ifp;
238#else
239	struct ifnet		 sc_if;
240#endif
241	struct ifnet		*sc_sync_if;
242
243#ifdef __FreeBSD__
244	uma_zone_t		 sc_pool;
245#else
246	struct pool		 sc_pool;
247#endif
248
249	struct ip_moptions	 sc_imo;
250
251	struct in_addr		 sc_sync_peer;
252	u_int8_t		 sc_maxupdates;
253#ifdef __FreeBSD__
254	int			 pfsync_sync_ok;
255#endif
256
257	struct ip		 sc_template;
258
259	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
260	size_t			 sc_len;
261
262	struct pfsync_upd_reqs	 sc_upd_req_list;
263
264	struct pfsync_deferrals	 sc_deferrals;
265	u_int			 sc_deferred;
266
267	void			*sc_plus;
268	size_t			 sc_pluslen;
269
270	u_int32_t		 sc_ureq_sent;
271	int			 sc_bulk_tries;
272#ifdef __FreeBSD__
273	struct callout		 sc_bulkfail_tmo;
274#else
275	struct timeout		 sc_bulkfail_tmo;
276#endif
277
278	u_int32_t		 sc_ureq_received;
279	struct pf_state		*sc_bulk_next;
280	struct pf_state		*sc_bulk_last;
281#ifdef __FreeBSD__
282	struct callout		 sc_bulk_tmo;
283#else
284	struct timeout		 sc_bulk_tmo;
285#endif
286
287	TAILQ_HEAD(, tdb)	 sc_tdb_q;
288
289#ifdef __FreeBSD__
290	struct callout		 sc_tmo;
291#else
292	struct timeout		 sc_tmo;
293#endif
294};
295
296#ifdef __FreeBSD__
297static MALLOC_DEFINE(M_PFSYNC, "pfsync", "pfsync data");
298static VNET_DEFINE(struct pfsync_softc	*, pfsyncif) = NULL;
299#define	V_pfsyncif		VNET(pfsyncif)
300static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
301#define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
302static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
303#define	V_pfsyncstats		VNET(pfsyncstats)
304static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW;
305#define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
306
307static void	pfsyncintr(void *);
308static int	pfsync_multicast_setup(struct pfsync_softc *);
309static void	pfsync_multicast_cleanup(struct pfsync_softc *);
310static int	pfsync_init(void);
311static void	pfsync_uninit(void);
312static void	pfsync_sendout1(int);
313
314#define	schednetisr(NETISR_PFSYNC)	swi_sched(V_pfsync_swi_cookie, 0)
315
316SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
317SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW,
318    &VNET_NAME(pfsyncstats), pfsyncstats,
319    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
320SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
321    &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
322#else
323struct pfsync_softc	*pfsyncif = NULL;
324struct pfsyncstats	 pfsyncstats;
325#define	V_pfsyncstats	 pfsyncstats
326#endif
327
328void	pfsyncattach(int);
329#ifdef __FreeBSD__
330int	pfsync_clone_create(struct if_clone *, int, caddr_t);
331void	pfsync_clone_destroy(struct ifnet *);
332#else
333int	pfsync_clone_create(struct if_clone *, int);
334int	pfsync_clone_destroy(struct ifnet *);
335#endif
336int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
337	    struct pf_state_peer *);
338void	pfsync_update_net_tdb(struct pfsync_tdb *);
339int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
340#ifdef __FreeBSD__
341	    struct route *);
342#else
343	    struct rtentry *);
344#endif
345int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
346void	pfsyncstart(struct ifnet *);
347
348struct mbuf *pfsync_if_dequeue(struct ifnet *);
349
350void	pfsync_deferred(struct pf_state *, int);
351void	pfsync_undefer(struct pfsync_deferral *, int);
352void	pfsync_defer_tmo(void *);
353
354void	pfsync_request_update(u_int32_t, u_int64_t);
355void	pfsync_update_state_req(struct pf_state *);
356
357void	pfsync_drop(struct pfsync_softc *);
358void	pfsync_sendout(void);
359void	pfsync_send_plus(void *, size_t);
360void	pfsync_timeout(void *);
361void	pfsync_tdb_timeout(void *);
362
363void	pfsync_bulk_start(void);
364void	pfsync_bulk_status(u_int8_t);
365void	pfsync_bulk_update(void *);
366void	pfsync_bulk_fail(void *);
367
368#ifdef __FreeBSD__
369/* XXX: ugly */
370#define	betoh64		(unsigned long long)be64toh
371#define	timeout_del	callout_stop
372#endif
373
374#define PFSYNC_MAX_BULKTRIES	12
375#ifndef __FreeBSD__
376int	pfsync_sync_ok;
377#endif
378
379#ifdef __FreeBSD__
380VNET_DEFINE(struct ifc_simple_data, pfsync_cloner_data);
381VNET_DEFINE(struct if_clone, pfsync_cloner);
382#define	V_pfsync_cloner_data	VNET(pfsync_cloner_data)
383#define	V_pfsync_cloner		VNET(pfsync_cloner)
384IFC_SIMPLE_DECLARE(pfsync, 1);
385#else
386struct if_clone	pfsync_cloner =
387    IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
388#endif
389
390void
391pfsyncattach(int npfsync)
392{
393	if_clone_attach(&pfsync_cloner);
394}
395int
396#ifdef __FreeBSD__
397pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
398#else
399pfsync_clone_create(struct if_clone *ifc, int unit)
400#endif
401{
402	struct pfsync_softc *sc;
403	struct ifnet *ifp;
404	int q;
405
406	if (unit != 0)
407		return (EINVAL);
408
409#ifdef __FreeBSD__
410	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
411	sc->pfsync_sync_ok = 1;
412#else
413	pfsync_sync_ok = 1;
414	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO);
415#endif
416
417	for (q = 0; q < PFSYNC_S_COUNT; q++)
418		TAILQ_INIT(&sc->sc_qs[q]);
419
420#ifdef __FreeBSD__
421	sc->sc_pool = uma_zcreate("pfsync", PFSYNC_PLSIZE, NULL, NULL, NULL,
422	    NULL, UMA_ALIGN_PTR, 0);
423#else
424	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
425#endif
426	TAILQ_INIT(&sc->sc_upd_req_list);
427	TAILQ_INIT(&sc->sc_deferrals);
428	sc->sc_deferred = 0;
429
430	TAILQ_INIT(&sc->sc_tdb_q);
431
432	sc->sc_len = PFSYNC_MINPKT;
433	sc->sc_maxupdates = 128;
434
435#ifndef __FreeBSD__
436	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
437	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
438	    M_WAITOK | M_ZERO);
439	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
440#endif
441
442#ifdef __FreeBSD__
443	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
444	if (ifp == NULL) {
445		uma_zdestroy(sc->sc_pool);
446		free(sc, M_PFSYNC);
447		return (ENOSPC);
448	}
449	if_initname(ifp, ifc->ifc_name, unit);
450#else
451	ifp = &sc->sc_if;
452	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
453#endif
454	ifp->if_softc = sc;
455	ifp->if_ioctl = pfsyncioctl;
456	ifp->if_output = pfsyncoutput;
457	ifp->if_start = pfsyncstart;
458	ifp->if_type = IFT_PFSYNC;
459	ifp->if_snd.ifq_maxlen = ifqmaxlen;
460	ifp->if_hdrlen = sizeof(struct pfsync_header);
461	ifp->if_mtu = ETHERMTU;
462#ifdef __FreeBSD__
463	callout_init(&sc->sc_tmo, CALLOUT_MPSAFE);
464	callout_init_mtx(&sc->sc_bulk_tmo, &pf_task_mtx, 0);
465	callout_init(&sc->sc_bulkfail_tmo, CALLOUT_MPSAFE);
466#else
467	timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
468	timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
469	timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
470#endif
471
472	if_attach(ifp);
473#ifndef __FreeBSD__
474	if_alloc_sadl(ifp);
475
476#if NCARP > 0
477	if_addgroup(ifp, "carp");
478#endif
479#endif
480
481#if NBPFILTER > 0
482#ifdef __FreeBSD__
483	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
484#else
485	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
486#endif
487#endif
488
489#ifdef __FreeBSD__
490	V_pfsyncif = sc;
491#else
492	pfsyncif = sc;
493#endif
494
495	return (0);
496}
497
498#ifdef __FreeBSD__
499void
500#else
501int
502#endif
503pfsync_clone_destroy(struct ifnet *ifp)
504{
505	struct pfsync_softc *sc = ifp->if_softc;
506
507#ifdef __FreeBSD__
508	PF_LOCK();
509#endif
510	timeout_del(&sc->sc_bulkfail_tmo);
511	timeout_del(&sc->sc_bulk_tmo);
512	timeout_del(&sc->sc_tmo);
513#ifdef __FreeBSD__
514	PF_UNLOCK();
515	if (!sc->pfsync_sync_ok && carp_demote_adj_p)
516		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
517#else
518#if NCARP > 0
519	if (!pfsync_sync_ok)
520		carp_group_demote_adj(&sc->sc_if, -1);
521#endif
522#endif
523#if NBPFILTER > 0
524	bpfdetach(ifp);
525#endif
526	if_detach(ifp);
527
528	pfsync_drop(sc);
529
530	while (sc->sc_deferred > 0)
531		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
532
533#ifdef __FreeBSD__
534	UMA_DESTROY(sc->sc_pool);
535#else
536	pool_destroy(&sc->sc_pool);
537#endif
538#ifdef __FreeBSD__
539	if_free(ifp);
540	if (sc->sc_imo.imo_membership)
541		pfsync_multicast_cleanup(sc);
542	free(sc, M_PFSYNC);
543#else
544	free(sc->sc_imo.imo_membership, M_IPMOPTS);
545	free(sc, M_DEVBUF);
546#endif
547
548#ifdef __FreeBSD__
549	V_pfsyncif = NULL;
550#else
551	pfsyncif = NULL;
552#endif
553
554#ifndef __FreeBSD__
555	return (0);
556#endif
557}
558
559struct mbuf *
560pfsync_if_dequeue(struct ifnet *ifp)
561{
562	struct mbuf *m;
563#ifndef __FreeBSD__
564	int s;
565#endif
566
567#ifdef __FreeBSD__
568	IF_LOCK(&ifp->if_snd);
569	_IF_DROP(&ifp->if_snd);
570	_IF_DEQUEUE(&ifp->if_snd, m);
571	IF_UNLOCK(&ifp->if_snd);
572#else
573	s = splnet();
574	IF_DEQUEUE(&ifp->if_snd, m);
575	splx(s);
576#endif
577
578	return (m);
579}
580
581/*
582 * Start output on the pfsync interface.
583 */
584void
585pfsyncstart(struct ifnet *ifp)
586{
587	struct mbuf *m;
588
589	while ((m = pfsync_if_dequeue(ifp)) != NULL) {
590#ifndef __FreeBSD__
591		IF_DROP(&ifp->if_snd);
592#endif
593		m_freem(m);
594	}
595}
596
597int
598pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
599    struct pf_state_peer *d)
600{
601	if (s->scrub.scrub_flag && d->scrub == NULL) {
602#ifdef __FreeBSD__
603		d->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
604#else
605		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
606#endif
607		if (d->scrub == NULL)
608			return (ENOMEM);
609	}
610
611	return (0);
612}
613
614#ifndef __FreeBSD__
615void
616pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
617{
618	bzero(sp, sizeof(struct pfsync_state));
619
620	/* copy from state key */
621	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
622	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
623	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
624	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
625	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
626	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
627	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
628	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
629	sp->proto = st->key[PF_SK_WIRE]->proto;
630	sp->af = st->key[PF_SK_WIRE]->af;
631
632	/* copy from state */
633	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
634	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
635	sp->creation = htonl(time_uptime - st->creation);
636	sp->expire = pf_state_expires(st);
637	if (sp->expire <= time_second)
638		sp->expire = htonl(0);
639	else
640		sp->expire = htonl(sp->expire - time_second);
641
642	sp->direction = st->direction;
643	sp->log = st->log;
644	sp->timeout = st->timeout;
645	sp->state_flags = st->state_flags;
646	if (st->src_node)
647		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
648	if (st->nat_src_node)
649		sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
650
651	bcopy(&st->id, &sp->id, sizeof(sp->id));
652	sp->creatorid = st->creatorid;
653	pf_state_peer_hton(&st->src, &sp->src);
654	pf_state_peer_hton(&st->dst, &sp->dst);
655
656	if (st->rule.ptr == NULL)
657		sp->rule = htonl(-1);
658	else
659		sp->rule = htonl(st->rule.ptr->nr);
660	if (st->anchor.ptr == NULL)
661		sp->anchor = htonl(-1);
662	else
663		sp->anchor = htonl(st->anchor.ptr->nr);
664	if (st->nat_rule.ptr == NULL)
665		sp->nat_rule = htonl(-1);
666	else
667		sp->nat_rule = htonl(st->nat_rule.ptr->nr);
668
669	pf_state_counter_hton(st->packets[0], sp->packets[0]);
670	pf_state_counter_hton(st->packets[1], sp->packets[1]);
671	pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
672	pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
673
674}
675#endif
676
677int
678pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
679{
680	struct pf_state	*st = NULL;
681	struct pf_state_key *skw = NULL, *sks = NULL;
682	struct pf_rule *r = NULL;
683	struct pfi_kif	*kif;
684	int pool_flags;
685	int error;
686
687#ifdef __FreeBSD__
688	PF_LOCK_ASSERT();
689
690	if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) {
691#else
692	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
693#endif
694		printf("pfsync_state_import: invalid creator id:"
695		    " %08x\n", ntohl(sp->creatorid));
696		return (EINVAL);
697	}
698
699	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
700#ifdef __FreeBSD__
701		if (V_pf_status.debug >= PF_DEBUG_MISC)
702#else
703		if (pf_status.debug >= PF_DEBUG_MISC)
704#endif
705			printf("pfsync_state_import: "
706			    "unknown interface: %s\n", sp->ifname);
707		if (flags & PFSYNC_SI_IOCTL)
708			return (EINVAL);
709		return (0);	/* skip this state */
710	}
711
712	/*
713	 * If the ruleset checksums match or the state is coming from the ioctl,
714	 * it's safe to associate the state with the rule of that number.
715	 */
716	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
717	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
718	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
719		r = pf_main_ruleset.rules[
720		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
721	else
722#ifdef __FreeBSD__
723		r = &V_pf_default_rule;
724#else
725		r = &pf_default_rule;
726#endif
727
728	if ((r->max_states && r->states_cur >= r->max_states))
729		goto cleanup;
730
731#ifdef __FreeBSD__
732	if (flags & PFSYNC_SI_IOCTL)
733		pool_flags = PR_WAITOK | PR_ZERO;
734	else
735		pool_flags = PR_NOWAIT | PR_ZERO;
736
737	if ((st = pool_get(&V_pf_state_pl, pool_flags)) == NULL)
738		goto cleanup;
739#else
740	if (flags & PFSYNC_SI_IOCTL)
741		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
742	else
743		pool_flags = PR_LIMITFAIL | PR_ZERO;
744
745	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
746		goto cleanup;
747#endif
748
749	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
750		goto cleanup;
751
752	if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
753	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
754	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
755	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
756	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
757	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) {
758		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
759			goto cleanup;
760	} else
761		sks = skw;
762
763	/* allocate memory for scrub info */
764	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
765	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
766		goto cleanup;
767
768	/* copy to state key(s) */
769	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
770	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
771	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
772	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
773	skw->proto = sp->proto;
774	skw->af = sp->af;
775	if (sks != skw) {
776		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
777		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
778		sks->port[0] = sp->key[PF_SK_STACK].port[0];
779		sks->port[1] = sp->key[PF_SK_STACK].port[1];
780		sks->proto = sp->proto;
781		sks->af = sp->af;
782	}
783
784	/* copy to state */
785	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
786	st->creation = time_uptime - ntohl(sp->creation);
787	st->expire = time_second;
788	if (sp->expire) {
789		/* XXX No adaptive scaling. */
790		st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
791	}
792
793	st->expire = ntohl(sp->expire) + time_second;
794	st->direction = sp->direction;
795	st->log = sp->log;
796	st->timeout = sp->timeout;
797	st->state_flags = sp->state_flags;
798
799	bcopy(sp->id, &st->id, sizeof(st->id));
800	st->creatorid = sp->creatorid;
801	pf_state_peer_ntoh(&sp->src, &st->src);
802	pf_state_peer_ntoh(&sp->dst, &st->dst);
803
804	st->rule.ptr = r;
805	st->nat_rule.ptr = NULL;
806	st->anchor.ptr = NULL;
807	st->rt_kif = NULL;
808
809	st->pfsync_time = time_uptime;
810	st->sync_state = PFSYNC_S_NONE;
811
812	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
813	r->states_cur++;
814	r->states_tot++;
815
816	if (!ISSET(flags, PFSYNC_SI_IOCTL))
817		SET(st->state_flags, PFSTATE_NOSYNC);
818
819	if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
820		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
821		r->states_cur--;
822		goto cleanup_state;
823	}
824
825	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
826		CLR(st->state_flags, PFSTATE_NOSYNC);
827		if (ISSET(st->state_flags, PFSTATE_ACK)) {
828			pfsync_q_ins(st, PFSYNC_S_IACK);
829			schednetisr(NETISR_PFSYNC);
830		}
831	}
832	CLR(st->state_flags, PFSTATE_ACK);
833
834	return (0);
835
836cleanup:
837	error = ENOMEM;
838	if (skw == sks)
839		sks = NULL;
840#ifdef __FreeBSD__
841	if (skw != NULL)
842		pool_put(&V_pf_state_key_pl, skw);
843	if (sks != NULL)
844		pool_put(&V_pf_state_key_pl, sks);
845#else
846	if (skw != NULL)
847		pool_put(&pf_state_key_pl, skw);
848	if (sks != NULL)
849		pool_put(&pf_state_key_pl, sks);
850#endif
851
852cleanup_state:	/* pf_state_insert frees the state keys */
853	if (st) {
854#ifdef __FreeBSD__
855		if (st->dst.scrub)
856			pool_put(&V_pf_state_scrub_pl, st->dst.scrub);
857		if (st->src.scrub)
858			pool_put(&V_pf_state_scrub_pl, st->src.scrub);
859		pool_put(&V_pf_state_pl, st);
860#else
861		if (st->dst.scrub)
862			pool_put(&pf_state_scrub_pl, st->dst.scrub);
863		if (st->src.scrub)
864			pool_put(&pf_state_scrub_pl, st->src.scrub);
865		pool_put(&pf_state_pl, st);
866#endif
867	}
868	return (error);
869}
870
871void
872#ifdef __FreeBSD__
873pfsync_input(struct mbuf *m, __unused int off)
874#else
875pfsync_input(struct mbuf *m, ...)
876#endif
877{
878#ifdef __FreeBSD__
879	struct pfsync_softc *sc = V_pfsyncif;
880#else
881	struct pfsync_softc *sc = pfsyncif;
882#endif
883	struct pfsync_pkt pkt;
884	struct ip *ip = mtod(m, struct ip *);
885	struct pfsync_header *ph;
886	struct pfsync_subheader subh;
887
888	int offset;
889	int rv;
890
891	V_pfsyncstats.pfsyncs_ipackets++;
892
893	/* verify that we have a sync interface configured */
894#ifdef __FreeBSD__
895	if (!sc || !sc->sc_sync_if || !V_pf_status.running)
896#else
897	if (!sc || !sc->sc_sync_if || !pf_status.running)
898#endif
899		goto done;
900
901	/* verify that the packet came in on the right interface */
902	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
903		V_pfsyncstats.pfsyncs_badif++;
904		goto done;
905	}
906
907#ifdef __FreeBSD__
908	sc->sc_ifp->if_ipackets++;
909	sc->sc_ifp->if_ibytes += m->m_pkthdr.len;
910#else
911	sc->sc_if.if_ipackets++;
912	sc->sc_if.if_ibytes += m->m_pkthdr.len;
913#endif
914	/* verify that the IP TTL is 255. */
915	if (ip->ip_ttl != PFSYNC_DFLTTL) {
916		V_pfsyncstats.pfsyncs_badttl++;
917		goto done;
918	}
919
920	offset = ip->ip_hl << 2;
921	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
922		V_pfsyncstats.pfsyncs_hdrops++;
923		goto done;
924	}
925
926	if (offset + sizeof(*ph) > m->m_len) {
927		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
928			V_pfsyncstats.pfsyncs_hdrops++;
929			return;
930		}
931		ip = mtod(m, struct ip *);
932	}
933	ph = (struct pfsync_header *)((char *)ip + offset);
934
935	/* verify the version */
936	if (ph->version != PFSYNC_VERSION) {
937		V_pfsyncstats.pfsyncs_badver++;
938		goto done;
939	}
940
941#if 0
942	if (pfsync_input_hmac(m, offset) != 0) {
943		/* XXX stats */
944		goto done;
945	}
946#endif
947
948	/* Cheaper to grab this now than having to mess with mbufs later */
949	pkt.ip = ip;
950	pkt.src = ip->ip_src;
951	pkt.flags = 0;
952
953#ifdef __FreeBSD__
954	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
955#else
956	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
957#endif
958		pkt.flags |= PFSYNC_SI_CKSUM;
959
960	offset += sizeof(*ph);
961	for (;;) {
962		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
963		offset += sizeof(subh);
964
965		if (subh.action >= PFSYNC_ACT_MAX) {
966			V_pfsyncstats.pfsyncs_badact++;
967			goto done;
968		}
969
970		rv = (*pfsync_acts[subh.action])(&pkt, m, offset,
971		    ntohs(subh.count));
972		if (rv == -1)
973			return;
974
975		offset += rv;
976	}
977
978done:
979	m_freem(m);
980}
981
982int
983pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
984{
985	struct pfsync_clr *clr;
986	struct mbuf *mp;
987	int len = sizeof(*clr) * count;
988	int i, offp;
989
990	struct pf_state *st, *nexts;
991	struct pf_state_key *sk, *nextsk;
992	struct pf_state_item *si;
993	u_int32_t creatorid;
994	int s;
995
996	mp = m_pulldown(m, offset, len, &offp);
997	if (mp == NULL) {
998		V_pfsyncstats.pfsyncs_badlen++;
999		return (-1);
1000	}
1001	clr = (struct pfsync_clr *)(mp->m_data + offp);
1002
1003	s = splsoftnet();
1004#ifdef __FreeBSD__
1005	PF_LOCK();
1006#endif
1007	for (i = 0; i < count; i++) {
1008		creatorid = clr[i].creatorid;
1009
1010		if (clr[i].ifname[0] == '\0') {
1011#ifdef __FreeBSD__
1012			for (st = RB_MIN(pf_state_tree_id, &V_tree_id);
1013			    st; st = nexts) {
1014				nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, st);
1015#else
1016			for (st = RB_MIN(pf_state_tree_id, &tree_id);
1017			    st; st = nexts) {
1018				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
1019#endif
1020				if (st->creatorid == creatorid) {
1021					SET(st->state_flags, PFSTATE_NOSYNC);
1022					pf_unlink_state(st);
1023				}
1024			}
1025		} else {
1026			if (pfi_kif_get(clr[i].ifname) == NULL)
1027				continue;
1028
1029			/* XXX correct? */
1030#ifdef __FreeBSD__
1031			for (sk = RB_MIN(pf_state_tree, &V_pf_statetbl);
1032#else
1033			for (sk = RB_MIN(pf_state_tree, &pf_statetbl);
1034#endif
1035			    sk; sk = nextsk) {
1036				nextsk = RB_NEXT(pf_state_tree,
1037#ifdef __FreeBSD__
1038				    &V_pf_statetbl, sk);
1039#else
1040				    &pf_statetbl, sk);
1041#endif
1042				TAILQ_FOREACH(si, &sk->states, entry) {
1043					if (si->s->creatorid == creatorid) {
1044						SET(si->s->state_flags,
1045						    PFSTATE_NOSYNC);
1046						pf_unlink_state(si->s);
1047					}
1048				}
1049			}
1050		}
1051	}
1052#ifdef __FreeBSD__
1053	PF_UNLOCK();
1054#endif
1055	splx(s);
1056
1057	return (len);
1058}
1059
1060int
1061pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1062{
1063	struct mbuf *mp;
1064	struct pfsync_state *sa, *sp;
1065	int len = sizeof(*sp) * count;
1066	int i, offp;
1067
1068	int s;
1069
1070	mp = m_pulldown(m, offset, len, &offp);
1071	if (mp == NULL) {
1072		V_pfsyncstats.pfsyncs_badlen++;
1073		return (-1);
1074	}
1075	sa = (struct pfsync_state *)(mp->m_data + offp);
1076
1077	s = splsoftnet();
1078#ifdef __FreeBSD__
1079	PF_LOCK();
1080#endif
1081	for (i = 0; i < count; i++) {
1082		sp = &sa[i];
1083
1084		/* check for invalid values */
1085		if (sp->timeout >= PFTM_MAX ||
1086		    sp->src.state > PF_TCPS_PROXY_DST ||
1087		    sp->dst.state > PF_TCPS_PROXY_DST ||
1088		    sp->direction > PF_OUT ||
1089		    (sp->af != AF_INET && sp->af != AF_INET6)) {
1090#ifdef __FreeBSD__
1091			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1092#else
1093			if (pf_status.debug >= PF_DEBUG_MISC) {
1094#endif
1095				printf("pfsync_input: PFSYNC5_ACT_INS: "
1096				    "invalid value\n");
1097			}
1098			V_pfsyncstats.pfsyncs_badval++;
1099			continue;
1100		}
1101
1102		if (pfsync_state_import(sp, pkt->flags) == ENOMEM) {
1103			/* drop out, but process the rest of the actions */
1104			break;
1105		}
1106	}
1107#ifdef __FreeBSD__
1108	PF_UNLOCK();
1109#endif
1110	splx(s);
1111
1112	return (len);
1113}
1114
1115int
1116pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1117{
1118	struct pfsync_ins_ack *ia, *iaa;
1119	struct pf_state_cmp id_key;
1120	struct pf_state *st;
1121
1122	struct mbuf *mp;
1123	int len = count * sizeof(*ia);
1124	int offp, i;
1125	int s;
1126
1127	mp = m_pulldown(m, offset, len, &offp);
1128	if (mp == NULL) {
1129		V_pfsyncstats.pfsyncs_badlen++;
1130		return (-1);
1131	}
1132	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
1133
1134	s = splsoftnet();
1135#ifdef __FreeBSD__
1136	PF_LOCK();
1137#endif
1138	for (i = 0; i < count; i++) {
1139		ia = &iaa[i];
1140
1141		bcopy(&ia->id, &id_key.id, sizeof(id_key.id));
1142		id_key.creatorid = ia->creatorid;
1143
1144		st = pf_find_state_byid(&id_key);
1145		if (st == NULL)
1146			continue;
1147
1148		if (ISSET(st->state_flags, PFSTATE_ACK))
1149			pfsync_deferred(st, 0);
1150	}
1151#ifdef __FreeBSD__
1152	PF_UNLOCK();
1153#endif
1154	splx(s);
1155	/*
1156	 * XXX this is not yet implemented, but we know the size of the
1157	 * message so we can skip it.
1158	 */
1159
1160	return (count * sizeof(struct pfsync_ins_ack));
1161}
1162
1163int
1164pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
1165    struct pfsync_state_peer *dst)
1166{
1167	int sfail = 0;
1168
1169	/*
1170	 * The state should never go backwards except
1171	 * for syn-proxy states.  Neither should the
1172	 * sequence window slide backwards.
1173	 */
1174	if (st->src.state > src->state &&
1175	    (st->src.state < PF_TCPS_PROXY_SRC ||
1176	    src->state >= PF_TCPS_PROXY_SRC))
1177		sfail = 1;
1178	else if (SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))
1179		sfail = 3;
1180	else if (st->dst.state > dst->state) {
1181		/* There might still be useful
1182		 * information about the src state here,
1183		 * so import that part of the update,
1184		 * then "fail" so we send the updated
1185		 * state back to the peer who is missing
1186		 * our what we know. */
1187		pf_state_peer_ntoh(src, &st->src);
1188		/* XXX do anything with timeouts? */
1189		sfail = 7;
1190	} else if (st->dst.state >= TCPS_SYN_SENT &&
1191	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))
1192		sfail = 4;
1193
1194	return (sfail);
1195}
1196
1197int
1198pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1199{
1200	struct pfsync_state *sa, *sp;
1201	struct pf_state_cmp id_key;
1202	struct pf_state_key *sk;
1203	struct pf_state *st;
1204	int sfail;
1205
1206	struct mbuf *mp;
1207	int len = count * sizeof(*sp);
1208	int offp, i;
1209	int s;
1210
1211	mp = m_pulldown(m, offset, len, &offp);
1212	if (mp == NULL) {
1213		V_pfsyncstats.pfsyncs_badlen++;
1214		return (-1);
1215	}
1216	sa = (struct pfsync_state *)(mp->m_data + offp);
1217
1218	s = splsoftnet();
1219#ifdef __FreeBSD__
1220	PF_LOCK();
1221#endif
1222	for (i = 0; i < count; i++) {
1223		sp = &sa[i];
1224
1225		/* check for invalid values */
1226		if (sp->timeout >= PFTM_MAX ||
1227		    sp->src.state > PF_TCPS_PROXY_DST ||
1228		    sp->dst.state > PF_TCPS_PROXY_DST) {
1229#ifdef __FreeBSD__
1230			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1231#else
1232			if (pf_status.debug >= PF_DEBUG_MISC) {
1233#endif
1234				printf("pfsync_input: PFSYNC_ACT_UPD: "
1235				    "invalid value\n");
1236			}
1237			V_pfsyncstats.pfsyncs_badval++;
1238			continue;
1239		}
1240
1241		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1242		id_key.creatorid = sp->creatorid;
1243
1244		st = pf_find_state_byid(&id_key);
1245		if (st == NULL) {
1246			/* insert the update */
1247			if (pfsync_state_import(sp, 0))
1248				V_pfsyncstats.pfsyncs_badstate++;
1249			continue;
1250		}
1251
1252		if (ISSET(st->state_flags, PFSTATE_ACK))
1253			pfsync_deferred(st, 1);
1254
1255		sk = st->key[PF_SK_WIRE];	/* XXX right one? */
1256		sfail = 0;
1257		if (sk->proto == IPPROTO_TCP)
1258			sfail = pfsync_upd_tcp(st, &sp->src, &sp->dst);
1259		else {
1260			/*
1261			 * Non-TCP protocol state machine always go
1262			 * forwards
1263			 */
1264			if (st->src.state > sp->src.state)
1265				sfail = 5;
1266			else if (st->dst.state > sp->dst.state)
1267				sfail = 6;
1268		}
1269
1270		if (sfail) {
1271#ifdef __FreeBSD__
1272			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1273#else
1274			if (pf_status.debug >= PF_DEBUG_MISC) {
1275#endif
1276				printf("pfsync: %s stale update (%d)"
1277				    " id: %016llx creatorid: %08x\n",
1278				    (sfail < 7 ?  "ignoring" : "partial"),
1279				    sfail, betoh64(st->id),
1280				    ntohl(st->creatorid));
1281			}
1282			V_pfsyncstats.pfsyncs_stale++;
1283
1284			pfsync_update_state(st);
1285			schednetisr(NETISR_PFSYNC);
1286			continue;
1287		}
1288		pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
1289		pf_state_peer_ntoh(&sp->src, &st->src);
1290		pf_state_peer_ntoh(&sp->dst, &st->dst);
1291		st->expire = ntohl(sp->expire) + time_second;
1292		st->timeout = sp->timeout;
1293		st->pfsync_time = time_uptime;
1294	}
1295#ifdef __FreeBSD__
1296	PF_UNLOCK();
1297#endif
1298	splx(s);
1299
1300	return (len);
1301}
1302
1303int
1304pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1305{
1306	struct pfsync_upd_c *ua, *up;
1307	struct pf_state_key *sk;
1308	struct pf_state_cmp id_key;
1309	struct pf_state *st;
1310
1311	int len = count * sizeof(*up);
1312	int sfail;
1313
1314	struct mbuf *mp;
1315	int offp, i;
1316	int s;
1317
1318	mp = m_pulldown(m, offset, len, &offp);
1319	if (mp == NULL) {
1320		V_pfsyncstats.pfsyncs_badlen++;
1321		return (-1);
1322	}
1323	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
1324
1325	s = splsoftnet();
1326#ifdef __FreeBSD__
1327	PF_LOCK();
1328#endif
1329	for (i = 0; i < count; i++) {
1330		up = &ua[i];
1331
1332		/* check for invalid values */
1333		if (up->timeout >= PFTM_MAX ||
1334		    up->src.state > PF_TCPS_PROXY_DST ||
1335		    up->dst.state > PF_TCPS_PROXY_DST) {
1336#ifdef __FreeBSD__
1337			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1338#else
1339			if (pf_status.debug >= PF_DEBUG_MISC) {
1340#endif
1341				printf("pfsync_input: "
1342				    "PFSYNC_ACT_UPD_C: "
1343				    "invalid value\n");
1344			}
1345			V_pfsyncstats.pfsyncs_badval++;
1346			continue;
1347		}
1348
1349		bcopy(&up->id, &id_key.id, sizeof(id_key.id));
1350		id_key.creatorid = up->creatorid;
1351
1352		st = pf_find_state_byid(&id_key);
1353		if (st == NULL) {
1354			/* We don't have this state. Ask for it. */
1355			pfsync_request_update(id_key.creatorid, id_key.id);
1356			continue;
1357		}
1358
1359		if (ISSET(st->state_flags, PFSTATE_ACK))
1360			pfsync_deferred(st, 1);
1361
1362		sk = st->key[PF_SK_WIRE]; /* XXX right one? */
1363		sfail = 0;
1364		if (sk->proto == IPPROTO_TCP)
1365			sfail = pfsync_upd_tcp(st, &up->src, &up->dst);
1366		else {
1367			/*
1368			 * Non-TCP protocol state machine always go forwards
1369			 */
1370			if (st->src.state > up->src.state)
1371				sfail = 5;
1372			else if (st->dst.state > up->dst.state)
1373				sfail = 6;
1374		}
1375
1376		if (sfail) {
1377#ifdef __FreeBSD__
1378			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1379#else
1380			if (pf_status.debug >= PF_DEBUG_MISC) {
1381#endif
1382				printf("pfsync: ignoring stale update "
1383				    "(%d) id: %016llx "
1384				    "creatorid: %08x\n", sfail,
1385				    betoh64(st->id),
1386				    ntohl(st->creatorid));
1387			}
1388			V_pfsyncstats.pfsyncs_stale++;
1389
1390			pfsync_update_state(st);
1391			schednetisr(NETISR_PFSYNC);
1392			continue;
1393		}
1394		pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1395		pf_state_peer_ntoh(&up->src, &st->src);
1396		pf_state_peer_ntoh(&up->dst, &st->dst);
1397		st->expire = ntohl(up->expire) + time_second;
1398		st->timeout = up->timeout;
1399		st->pfsync_time = time_uptime;
1400	}
1401#ifdef __FreeBSD__
1402	PF_UNLOCK();
1403#endif
1404	splx(s);
1405
1406	return (len);
1407}
1408
1409int
1410pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1411{
1412	struct pfsync_upd_req *ur, *ura;
1413	struct mbuf *mp;
1414	int len = count * sizeof(*ur);
1415	int i, offp;
1416
1417	struct pf_state_cmp id_key;
1418	struct pf_state *st;
1419
1420	mp = m_pulldown(m, offset, len, &offp);
1421	if (mp == NULL) {
1422		V_pfsyncstats.pfsyncs_badlen++;
1423		return (-1);
1424	}
1425	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1426
1427#ifdef __FreeBSD__
1428	PF_LOCK();
1429#endif
1430	for (i = 0; i < count; i++) {
1431		ur = &ura[i];
1432
1433		bcopy(&ur->id, &id_key.id, sizeof(id_key.id));
1434		id_key.creatorid = ur->creatorid;
1435
1436		if (id_key.id == 0 && id_key.creatorid == 0)
1437			pfsync_bulk_start();
1438		else {
1439			st = pf_find_state_byid(&id_key);
1440			if (st == NULL) {
1441				V_pfsyncstats.pfsyncs_badstate++;
1442				continue;
1443			}
1444			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1445				continue;
1446
1447			pfsync_update_state_req(st);
1448		}
1449	}
1450#ifdef __FreeBSD__
1451	PF_UNLOCK();
1452#endif
1453
1454	return (len);
1455}
1456
1457int
1458pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1459{
1460	struct mbuf *mp;
1461	struct pfsync_state *sa, *sp;
1462	struct pf_state_cmp id_key;
1463	struct pf_state *st;
1464	int len = count * sizeof(*sp);
1465	int offp, i;
1466	int s;
1467
1468	mp = m_pulldown(m, offset, len, &offp);
1469	if (mp == NULL) {
1470		V_pfsyncstats.pfsyncs_badlen++;
1471		return (-1);
1472	}
1473	sa = (struct pfsync_state *)(mp->m_data + offp);
1474
1475	s = splsoftnet();
1476#ifdef __FreeBSD__
1477	PF_LOCK();
1478#endif
1479	for (i = 0; i < count; i++) {
1480		sp = &sa[i];
1481
1482		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1483		id_key.creatorid = sp->creatorid;
1484
1485		st = pf_find_state_byid(&id_key);
1486		if (st == NULL) {
1487			V_pfsyncstats.pfsyncs_badstate++;
1488			continue;
1489		}
1490		SET(st->state_flags, PFSTATE_NOSYNC);
1491		pf_unlink_state(st);
1492	}
1493#ifdef __FreeBSD__
1494	PF_UNLOCK();
1495#endif
1496	splx(s);
1497
1498	return (len);
1499}
1500
1501int
1502pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1503{
1504	struct mbuf *mp;
1505	struct pfsync_del_c *sa, *sp;
1506	struct pf_state_cmp id_key;
1507	struct pf_state *st;
1508	int len = count * sizeof(*sp);
1509	int offp, i;
1510	int s;
1511
1512	mp = m_pulldown(m, offset, len, &offp);
1513	if (mp == NULL) {
1514		V_pfsyncstats.pfsyncs_badlen++;
1515		return (-1);
1516	}
1517	sa = (struct pfsync_del_c *)(mp->m_data + offp);
1518
1519	s = splsoftnet();
1520#ifdef __FreeBSD__
1521	PF_LOCK();
1522#endif
1523	for (i = 0; i < count; i++) {
1524		sp = &sa[i];
1525
1526		bcopy(&sp->id, &id_key.id, sizeof(id_key.id));
1527		id_key.creatorid = sp->creatorid;
1528
1529		st = pf_find_state_byid(&id_key);
1530		if (st == NULL) {
1531			V_pfsyncstats.pfsyncs_badstate++;
1532			continue;
1533		}
1534
1535		SET(st->state_flags, PFSTATE_NOSYNC);
1536		pf_unlink_state(st);
1537	}
1538#ifdef __FreeBSD__
1539	PF_UNLOCK();
1540#endif
1541	splx(s);
1542
1543	return (len);
1544}
1545
1546int
1547pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1548{
1549#ifdef __FreeBSD__
1550	struct pfsync_softc *sc = V_pfsyncif;
1551#else
1552	struct pfsync_softc *sc = pfsyncif;
1553#endif
1554	struct pfsync_bus *bus;
1555	struct mbuf *mp;
1556	int len = count * sizeof(*bus);
1557	int offp;
1558
1559	/* If we're not waiting for a bulk update, who cares. */
1560	if (sc->sc_ureq_sent == 0)
1561		return (len);
1562
1563	mp = m_pulldown(m, offset, len, &offp);
1564	if (mp == NULL) {
1565		V_pfsyncstats.pfsyncs_badlen++;
1566		return (-1);
1567	}
1568	bus = (struct pfsync_bus *)(mp->m_data + offp);
1569
1570	switch (bus->status) {
1571	case PFSYNC_BUS_START:
1572#ifdef __FreeBSD__
1573		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
1574		    V_pf_pool_limits[PF_LIMIT_STATES].limit /
1575		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
1576		    sizeof(struct pfsync_state)),
1577		    pfsync_bulk_fail, V_pfsyncif);
1578#else
1579		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1580		    pf_pool_limits[PF_LIMIT_STATES].limit /
1581		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1582		    sizeof(struct pfsync_state)));
1583#endif
1584#ifdef __FreeBSD__
1585		if (V_pf_status.debug >= PF_DEBUG_MISC)
1586#else
1587		if (pf_status.debug >= PF_DEBUG_MISC)
1588#endif
1589			printf("pfsync: received bulk update start\n");
1590		break;
1591
1592	case PFSYNC_BUS_END:
1593		if (time_uptime - ntohl(bus->endtime) >=
1594		    sc->sc_ureq_sent) {
1595			/* that's it, we're happy */
1596			sc->sc_ureq_sent = 0;
1597			sc->sc_bulk_tries = 0;
1598			timeout_del(&sc->sc_bulkfail_tmo);
1599#ifdef __FreeBSD__
1600			if (!sc->pfsync_sync_ok && carp_demote_adj_p)
1601				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
1602				    "pfsync bulk done");
1603			sc->pfsync_sync_ok = 1;
1604#else
1605#if NCARP > 0
1606			if (!pfsync_sync_ok)
1607				carp_group_demote_adj(&sc->sc_if, -1);
1608#endif
1609			pfsync_sync_ok = 1;
1610#endif
1611#ifdef __FreeBSD__
1612			if (V_pf_status.debug >= PF_DEBUG_MISC)
1613#else
1614			if (pf_status.debug >= PF_DEBUG_MISC)
1615#endif
1616				printf("pfsync: received valid "
1617				    "bulk update end\n");
1618		} else {
1619#ifdef __FreeBSD__
1620			if (V_pf_status.debug >= PF_DEBUG_MISC)
1621#else
1622			if (pf_status.debug >= PF_DEBUG_MISC)
1623#endif
1624				printf("pfsync: received invalid "
1625				    "bulk update end: bad timestamp\n");
1626		}
1627		break;
1628	}
1629
1630	return (len);
1631}
1632
1633int
1634pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1635{
1636	int len = count * sizeof(struct pfsync_tdb);
1637
1638#if defined(IPSEC)
1639	struct pfsync_tdb *tp;
1640	struct mbuf *mp;
1641	int offp;
1642	int i;
1643	int s;
1644
1645	mp = m_pulldown(m, offset, len, &offp);
1646	if (mp == NULL) {
1647		V_pfsyncstats.pfsyncs_badlen++;
1648		return (-1);
1649	}
1650	tp = (struct pfsync_tdb *)(mp->m_data + offp);
1651
1652	s = splsoftnet();
1653#ifdef __FreeBSD__
1654	PF_LOCK();
1655#endif
1656	for (i = 0; i < count; i++)
1657		pfsync_update_net_tdb(&tp[i]);
1658#ifdef __FreeBSD__
1659	PF_UNLOCK();
1660#endif
1661	splx(s);
1662#endif
1663
1664	return (len);
1665}
1666
1667#if defined(IPSEC)
1668/* Update an in-kernel tdb. Silently fail if no tdb is found. */
1669void
1670pfsync_update_net_tdb(struct pfsync_tdb *pt)
1671{
1672	struct tdb		*tdb;
1673	int			 s;
1674
1675	/* check for invalid values */
1676	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1677	    (pt->dst.sa.sa_family != AF_INET &&
1678	     pt->dst.sa.sa_family != AF_INET6))
1679		goto bad;
1680
1681	s = spltdb();
1682	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1683	if (tdb) {
1684		pt->rpl = ntohl(pt->rpl);
1685		pt->cur_bytes = betoh64(pt->cur_bytes);
1686
1687		/* Neither replay nor byte counter should ever decrease. */
1688		if (pt->rpl < tdb->tdb_rpl ||
1689		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1690			splx(s);
1691			goto bad;
1692		}
1693
1694		tdb->tdb_rpl = pt->rpl;
1695		tdb->tdb_cur_bytes = pt->cur_bytes;
1696	}
1697	splx(s);
1698	return;
1699
1700bad:
1701#ifdef __FreeBSD__
1702	if (V_pf_status.debug >= PF_DEBUG_MISC)
1703#else
1704	if (pf_status.debug >= PF_DEBUG_MISC)
1705#endif
1706		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1707		    "invalid value\n");
1708	V_pfsyncstats.pfsyncs_badstate++;
1709	return;
1710}
1711#endif
1712
1713
1714int
1715pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1716{
1717	/* check if we are at the right place in the packet */
1718	if (offset != m->m_pkthdr.len - sizeof(struct pfsync_eof))
1719		V_pfsyncstats.pfsyncs_badact++;
1720
1721	/* we're done. free and let the caller return */
1722	m_freem(m);
1723	return (-1);
1724}
1725
1726int
1727pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1728{
1729	V_pfsyncstats.pfsyncs_badact++;
1730
1731	m_freem(m);
1732	return (-1);
1733}
1734
1735int
1736pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1737#ifdef __FreeBSD__
1738	struct route *rt)
1739#else
1740	struct rtentry *rt)
1741#endif
1742{
1743	m_freem(m);
1744	return (0);
1745}
1746
1747/* ARGSUSED */
1748int
1749pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1750{
1751#ifndef __FreeBSD__
1752	struct proc *p = curproc;
1753#endif
1754	struct pfsync_softc *sc = ifp->if_softc;
1755	struct ifreq *ifr = (struct ifreq *)data;
1756	struct ip_moptions *imo = &sc->sc_imo;
1757	struct pfsyncreq pfsyncr;
1758	struct ifnet    *sifp;
1759	struct ip *ip;
1760	int s, error;
1761
1762	switch (cmd) {
1763#if 0
1764	case SIOCSIFADDR:
1765	case SIOCAIFADDR:
1766	case SIOCSIFDSTADDR:
1767#endif
1768	case SIOCSIFFLAGS:
1769#ifdef __FreeBSD__
1770		if (ifp->if_flags & IFF_UP)
1771			ifp->if_drv_flags |= IFF_DRV_RUNNING;
1772		else
1773			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1774#else
1775		if (ifp->if_flags & IFF_UP)
1776			ifp->if_flags |= IFF_RUNNING;
1777		else
1778			ifp->if_flags &= ~IFF_RUNNING;
1779#endif
1780		break;
1781	case SIOCSIFMTU:
1782		if (!sc->sc_sync_if ||
1783		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1784		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1785			return (EINVAL);
1786		if (ifr->ifr_mtu < ifp->if_mtu) {
1787			s = splnet();
1788#ifdef __FreeBSD__
1789			PF_LOCK();
1790#endif
1791			pfsync_sendout();
1792#ifdef __FreeBSD__
1793			PF_UNLOCK();
1794#endif
1795			splx(s);
1796		}
1797		ifp->if_mtu = ifr->ifr_mtu;
1798		break;
1799	case SIOCGETPFSYNC:
1800		bzero(&pfsyncr, sizeof(pfsyncr));
1801		if (sc->sc_sync_if) {
1802			strlcpy(pfsyncr.pfsyncr_syncdev,
1803			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1804		}
1805		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1806		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1807		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1808
1809	case SIOCSETPFSYNC:
1810#ifdef __FreeBSD__
1811		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1812#else
1813		if ((error = suser(p, p->p_acflag)) != 0)
1814#endif
1815			return (error);
1816		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1817			return (error);
1818
1819#ifdef __FreeBSD__
1820		PF_LOCK();
1821#endif
1822		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1823#ifdef __FreeBSD__
1824			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
1825#else
1826			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1827#endif
1828		else
1829			sc->sc_sync_peer.s_addr =
1830			    pfsyncr.pfsyncr_syncpeer.s_addr;
1831
1832		if (pfsyncr.pfsyncr_maxupdates > 255)
1833#ifdef __FreeBSD__
1834		{
1835			PF_UNLOCK();
1836#endif
1837			return (EINVAL);
1838#ifdef __FreeBSD__
1839		}
1840#endif
1841		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1842
1843		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1844			sc->sc_sync_if = NULL;
1845#ifdef __FreeBSD__
1846			PF_UNLOCK();
1847			if (imo->imo_membership)
1848				pfsync_multicast_cleanup(sc);
1849#else
1850			if (imo->imo_num_memberships > 0) {
1851				in_delmulti(imo->imo_membership[
1852				    --imo->imo_num_memberships]);
1853				imo->imo_multicast_ifp = NULL;
1854			}
1855#endif
1856			break;
1857		}
1858
1859#ifdef __FreeBSD__
1860		PF_UNLOCK();
1861#endif
1862		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
1863			return (EINVAL);
1864
1865#ifdef __FreeBSD__
1866		PF_LOCK();
1867#endif
1868		s = splnet();
1869#ifdef __FreeBSD__
1870		if (sifp->if_mtu < sc->sc_ifp->if_mtu ||
1871#else
1872		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1873#endif
1874		    (sc->sc_sync_if != NULL &&
1875		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1876		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1877			pfsync_sendout();
1878		sc->sc_sync_if = sifp;
1879
1880#ifdef __FreeBSD__
1881		if (imo->imo_membership) {
1882			PF_UNLOCK();
1883			pfsync_multicast_cleanup(sc);
1884			PF_LOCK();
1885		}
1886#else
1887		if (imo->imo_num_memberships > 0) {
1888			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1889			imo->imo_multicast_ifp = NULL;
1890		}
1891#endif
1892
1893#ifdef __FreeBSD__
1894		if (sc->sc_sync_if &&
1895		    sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1896			PF_UNLOCK();
1897			error = pfsync_multicast_setup(sc);
1898			if (error)
1899				return (error);
1900			PF_LOCK();
1901		}
1902#else
1903		if (sc->sc_sync_if &&
1904		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1905			struct in_addr addr;
1906
1907			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1908				sc->sc_sync_if = NULL;
1909				splx(s);
1910				return (EADDRNOTAVAIL);
1911			}
1912
1913			addr.s_addr = INADDR_PFSYNC_GROUP;
1914
1915			if ((imo->imo_membership[0] =
1916			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1917				sc->sc_sync_if = NULL;
1918				splx(s);
1919				return (ENOBUFS);
1920			}
1921			imo->imo_num_memberships++;
1922			imo->imo_multicast_ifp = sc->sc_sync_if;
1923			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1924			imo->imo_multicast_loop = 0;
1925		}
1926#endif	/* !__FreeBSD__ */
1927
1928		ip = &sc->sc_template;
1929		bzero(ip, sizeof(*ip));
1930		ip->ip_v = IPVERSION;
1931		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1932		ip->ip_tos = IPTOS_LOWDELAY;
1933		/* len and id are set later */
1934#ifdef __FreeBSD__
1935		ip->ip_off = IP_DF;
1936#else
1937		ip->ip_off = htons(IP_DF);
1938#endif
1939		ip->ip_ttl = PFSYNC_DFLTTL;
1940		ip->ip_p = IPPROTO_PFSYNC;
1941		ip->ip_src.s_addr = INADDR_ANY;
1942		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1943
1944		if (sc->sc_sync_if) {
1945			/* Request a full state table update. */
1946			sc->sc_ureq_sent = time_uptime;
1947#ifdef __FreeBSD__
1948			if (sc->pfsync_sync_ok && carp_demote_adj_p)
1949				(*carp_demote_adj_p)(V_pfsync_carp_adj,
1950				    "pfsync bulk start");
1951			sc->pfsync_sync_ok = 0;
1952#else
1953#if NCARP > 0
1954			if (pfsync_sync_ok)
1955				carp_group_demote_adj(&sc->sc_if, 1);
1956#endif
1957			pfsync_sync_ok = 0;
1958#endif
1959#ifdef __FreeBSD__
1960			if (V_pf_status.debug >= PF_DEBUG_MISC)
1961#else
1962			if (pf_status.debug >= PF_DEBUG_MISC)
1963#endif
1964				printf("pfsync: requesting bulk update\n");
1965#ifdef __FreeBSD__
1966			callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1967			    pfsync_bulk_fail, V_pfsyncif);
1968#else
1969			timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
1970#endif
1971			pfsync_request_update(0, 0);
1972		}
1973#ifdef __FreeBSD__
1974		PF_UNLOCK();
1975#endif
1976		splx(s);
1977
1978		break;
1979
1980	default:
1981		return (ENOTTY);
1982	}
1983
1984	return (0);
1985}
1986
1987int
1988pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset)
1989{
1990	struct pfsync_state *sp = (struct pfsync_state *)(m->m_data + offset);
1991
1992	pfsync_state_export(sp, st);
1993
1994	return (sizeof(*sp));
1995}
1996
1997int
1998pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset)
1999{
2000	struct pfsync_ins_ack *iack =
2001	    (struct pfsync_ins_ack *)(m->m_data + offset);
2002
2003	iack->id = st->id;
2004	iack->creatorid = st->creatorid;
2005
2006	return (sizeof(*iack));
2007}
2008
2009int
2010pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset)
2011{
2012	struct pfsync_upd_c *up = (struct pfsync_upd_c *)(m->m_data + offset);
2013
2014	up->id = st->id;
2015	pf_state_peer_hton(&st->src, &up->src);
2016	pf_state_peer_hton(&st->dst, &up->dst);
2017	up->creatorid = st->creatorid;
2018
2019	up->expire = pf_state_expires(st);
2020	if (up->expire <= time_second)
2021		up->expire = htonl(0);
2022	else
2023		up->expire = htonl(up->expire - time_second);
2024	up->timeout = st->timeout;
2025
2026	bzero(up->_pad, sizeof(up->_pad)); /* XXX */
2027
2028	return (sizeof(*up));
2029}
2030
2031int
2032pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset)
2033{
2034	struct pfsync_del_c *dp = (struct pfsync_del_c *)(m->m_data + offset);
2035
2036	dp->id = st->id;
2037	dp->creatorid = st->creatorid;
2038
2039	SET(st->state_flags, PFSTATE_NOSYNC);
2040
2041	return (sizeof(*dp));
2042}
2043
2044void
2045pfsync_drop(struct pfsync_softc *sc)
2046{
2047	struct pf_state *st;
2048	struct pfsync_upd_req_item *ur;
2049#ifdef notyet
2050	struct tdb *t;
2051#endif
2052	int q;
2053
2054	for (q = 0; q < PFSYNC_S_COUNT; q++) {
2055		if (TAILQ_EMPTY(&sc->sc_qs[q]))
2056			continue;
2057
2058		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
2059#ifdef PFSYNC_DEBUG
2060#ifdef __FreeBSD__
2061			KASSERT(st->sync_state == q,
2062				("%s: st->sync_state == q",
2063					__FUNCTION__));
2064#else
2065			KASSERT(st->sync_state == q);
2066#endif
2067#endif
2068			st->sync_state = PFSYNC_S_NONE;
2069		}
2070		TAILQ_INIT(&sc->sc_qs[q]);
2071	}
2072
2073	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
2074		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
2075		pool_put(&sc->sc_pool, ur);
2076	}
2077
2078	sc->sc_plus = NULL;
2079
2080#ifdef notyet
2081	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
2082		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
2083			CLR(t->tdb_flags, TDBF_PFSYNC);
2084
2085		TAILQ_INIT(&sc->sc_tdb_q);
2086	}
2087#endif
2088
2089	sc->sc_len = PFSYNC_MINPKT;
2090}
2091
2092#ifdef __FreeBSD__
2093void pfsync_sendout()
2094{
2095	pfsync_sendout1(1);
2096}
2097
2098static void
2099pfsync_sendout1(int schedswi)
2100{
2101	struct pfsync_softc *sc = V_pfsyncif;
2102#else
2103void
2104pfsync_sendout(void)
2105{
2106	struct pfsync_softc *sc = pfsyncif;
2107#endif
2108#if NBPFILTER > 0
2109#ifdef __FreeBSD__
2110	struct ifnet *ifp = sc->sc_ifp;
2111#else
2112	struct ifnet *ifp = &sc->sc_if;
2113#endif
2114#endif
2115	struct mbuf *m;
2116	struct ip *ip;
2117	struct pfsync_header *ph;
2118	struct pfsync_subheader *subh;
2119	struct pf_state *st;
2120	struct pfsync_upd_req_item *ur;
2121#ifdef notyet
2122	struct tdb *t;
2123#endif
2124	int offset;
2125	int q, count = 0;
2126
2127#ifdef __FreeBSD__
2128	PF_LOCK_ASSERT();
2129#else
2130	splassert(IPL_NET);
2131#endif
2132
2133	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
2134		return;
2135
2136#if NBPFILTER > 0
2137	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
2138#else
2139	if (sc->sc_sync_if == NULL) {
2140#endif
2141		pfsync_drop(sc);
2142		return;
2143	}
2144
2145#ifdef __FreeBSD__
2146	m = m_get2(M_NOWAIT, MT_DATA, M_PKTHDR, max_linkhdr + sc->sc_len);
2147	if (m == NULL) {
2148		sc->sc_ifp->if_oerrors++;
2149		V_pfsyncstats.pfsyncs_onomem++;
2150		return;
2151	}
2152#else
2153	MGETHDR(m, M_DONTWAIT, MT_DATA);
2154	if (m == NULL) {
2155		sc->sc_if.if_oerrors++;
2156		pfsyncstats.pfsyncs_onomem++;
2157		pfsync_drop(sc);
2158		return;
2159	}
2160
2161	if (max_linkhdr + sc->sc_len > MHLEN) {
2162		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
2163		if (!ISSET(m->m_flags, M_EXT)) {
2164			m_free(m);
2165			sc->sc_if.if_oerrors++;
2166			pfsyncstats.pfsyncs_onomem++;
2167			pfsync_drop(sc);
2168			return;
2169		}
2170	}
2171#endif
2172	m->m_data += max_linkhdr;
2173	m->m_len = m->m_pkthdr.len = sc->sc_len;
2174
2175	/* build the ip header */
2176	ip = (struct ip *)m->m_data;
2177	bcopy(&sc->sc_template, ip, sizeof(*ip));
2178	offset = sizeof(*ip);
2179
2180#ifdef __FreeBSD__
2181	ip->ip_len = m->m_pkthdr.len;
2182#else
2183	ip->ip_len = htons(m->m_pkthdr.len);
2184#endif
2185	ip->ip_id = htons(ip_randomid());
2186
2187	/* build the pfsync header */
2188	ph = (struct pfsync_header *)(m->m_data + offset);
2189	bzero(ph, sizeof(*ph));
2190	offset += sizeof(*ph);
2191
2192	ph->version = PFSYNC_VERSION;
2193	ph->len = htons(sc->sc_len - sizeof(*ip));
2194#ifdef __FreeBSD__
2195	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
2196#else
2197	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
2198#endif
2199
2200	/* walk the queues */
2201	for (q = 0; q < PFSYNC_S_COUNT; q++) {
2202		if (TAILQ_EMPTY(&sc->sc_qs[q]))
2203			continue;
2204
2205		subh = (struct pfsync_subheader *)(m->m_data + offset);
2206		offset += sizeof(*subh);
2207
2208		count = 0;
2209		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
2210#ifdef PFSYNC_DEBUG
2211#ifdef __FreeBSD__
2212			KASSERT(st->sync_state == q,
2213				("%s: st->sync_state == q",
2214					__FUNCTION__));
2215#else
2216			KASSERT(st->sync_state == q);
2217#endif
2218#endif
2219
2220			offset += pfsync_qs[q].write(st, m, offset);
2221			st->sync_state = PFSYNC_S_NONE;
2222			count++;
2223		}
2224		TAILQ_INIT(&sc->sc_qs[q]);
2225
2226		bzero(subh, sizeof(*subh));
2227		subh->action = pfsync_qs[q].action;
2228		subh->count = htons(count);
2229	}
2230
2231	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
2232		subh = (struct pfsync_subheader *)(m->m_data + offset);
2233		offset += sizeof(*subh);
2234
2235		count = 0;
2236		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
2237			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
2238
2239			bcopy(&ur->ur_msg, m->m_data + offset,
2240			    sizeof(ur->ur_msg));
2241			offset += sizeof(ur->ur_msg);
2242
2243			pool_put(&sc->sc_pool, ur);
2244
2245			count++;
2246		}
2247
2248		bzero(subh, sizeof(*subh));
2249		subh->action = PFSYNC_ACT_UPD_REQ;
2250		subh->count = htons(count);
2251	}
2252
2253	/* has someone built a custom region for us to add? */
2254	if (sc->sc_plus != NULL) {
2255		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
2256		offset += sc->sc_pluslen;
2257
2258		sc->sc_plus = NULL;
2259	}
2260
2261#ifdef notyet
2262	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
2263		subh = (struct pfsync_subheader *)(m->m_data + offset);
2264		offset += sizeof(*subh);
2265
2266		count = 0;
2267		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
2268			offset += pfsync_out_tdb(t, m, offset);
2269			CLR(t->tdb_flags, TDBF_PFSYNC);
2270
2271			count++;
2272		}
2273		TAILQ_INIT(&sc->sc_tdb_q);
2274
2275		bzero(subh, sizeof(*subh));
2276		subh->action = PFSYNC_ACT_TDB;
2277		subh->count = htons(count);
2278	}
2279#endif
2280
2281	subh = (struct pfsync_subheader *)(m->m_data + offset);
2282	offset += sizeof(*subh);
2283
2284	bzero(subh, sizeof(*subh));
2285	subh->action = PFSYNC_ACT_EOF;
2286	subh->count = htons(1);
2287
2288	/* XXX write checksum in EOF here */
2289
2290	/* we're done, let's put it on the wire */
2291#if NBPFILTER > 0
2292	if (ifp->if_bpf) {
2293		m->m_data += sizeof(*ip);
2294		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
2295#ifdef __FreeBSD__
2296		BPF_MTAP(ifp, m);
2297#else
2298		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
2299#endif
2300		m->m_data -= sizeof(*ip);
2301		m->m_len = m->m_pkthdr.len = sc->sc_len;
2302	}
2303
2304	if (sc->sc_sync_if == NULL) {
2305		sc->sc_len = PFSYNC_MINPKT;
2306		m_freem(m);
2307		return;
2308	}
2309#endif
2310
2311#ifdef __FreeBSD__
2312	sc->sc_ifp->if_opackets++;
2313	sc->sc_ifp->if_obytes += m->m_pkthdr.len;
2314	sc->sc_len = PFSYNC_MINPKT;
2315
2316	if (!_IF_QFULL(&sc->sc_ifp->if_snd))
2317		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
2318	else {
2319		m_freem(m);
2320                sc->sc_ifp->if_snd.ifq_drops++;
2321	}
2322	if (schedswi)
2323		swi_sched(V_pfsync_swi_cookie, 0);
2324#else
2325	sc->sc_if.if_opackets++;
2326	sc->sc_if.if_obytes += m->m_pkthdr.len;
2327
2328	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0)
2329		pfsyncstats.pfsyncs_opackets++;
2330	else
2331		pfsyncstats.pfsyncs_oerrors++;
2332
2333	/* start again */
2334	sc->sc_len = PFSYNC_MINPKT;
2335#endif
2336}
2337
2338void
2339pfsync_insert_state(struct pf_state *st)
2340{
2341#ifdef __FreeBSD__
2342	struct pfsync_softc *sc = V_pfsyncif;
2343#else
2344	struct pfsync_softc *sc = pfsyncif;
2345#endif
2346
2347#ifdef __FreeBSD__
2348	PF_LOCK_ASSERT();
2349#else
2350	splassert(IPL_SOFTNET);
2351#endif
2352
2353	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
2354	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
2355		SET(st->state_flags, PFSTATE_NOSYNC);
2356		return;
2357	}
2358
2359	if (sc == NULL || ISSET(st->state_flags, PFSTATE_NOSYNC))
2360		return;
2361
2362#ifdef PFSYNC_DEBUG
2363#ifdef __FreeBSD__
2364	KASSERT(st->sync_state == PFSYNC_S_NONE,
2365		("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__));
2366#else
2367	KASSERT(st->sync_state == PFSYNC_S_NONE);
2368#endif
2369#endif
2370
2371	if (sc->sc_len == PFSYNC_MINPKT)
2372#ifdef __FreeBSD__
2373		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2374		    V_pfsyncif);
2375#else
2376		timeout_add_sec(&sc->sc_tmo, 1);
2377#endif
2378
2379	pfsync_q_ins(st, PFSYNC_S_INS);
2380
2381	if (ISSET(st->state_flags, PFSTATE_ACK))
2382		schednetisr(NETISR_PFSYNC);
2383	else
2384		st->sync_updates = 0;
2385}
2386
2387int defer = 10;
2388
2389int
2390pfsync_defer(struct pf_state *st, struct mbuf *m)
2391{
2392#ifdef __FreeBSD__
2393	struct pfsync_softc *sc = V_pfsyncif;
2394#else
2395	struct pfsync_softc *sc = pfsyncif;
2396#endif
2397	struct pfsync_deferral *pd;
2398
2399#ifdef __FreeBSD__
2400	PF_LOCK_ASSERT();
2401#else
2402	splassert(IPL_SOFTNET);
2403#endif
2404
2405	if (sc->sc_deferred >= 128)
2406		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
2407
2408	pd = pool_get(&sc->sc_pool, M_NOWAIT);
2409	if (pd == NULL)
2410		return (0);
2411	sc->sc_deferred++;
2412
2413#ifdef __FreeBSD__
2414	m->m_flags |= M_SKIP_FIREWALL;
2415#else
2416	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
2417#endif
2418	SET(st->state_flags, PFSTATE_ACK);
2419
2420	pd->pd_st = st;
2421	pd->pd_m = m;
2422
2423	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
2424#ifdef __FreeBSD__
2425	callout_init(&pd->pd_tmo, CALLOUT_MPSAFE);
2426	callout_reset(&pd->pd_tmo, defer, pfsync_defer_tmo,
2427		pd);
2428#else
2429	timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
2430	timeout_add(&pd->pd_tmo, defer);
2431#endif
2432
2433	return (1);
2434}
2435
2436void
2437pfsync_undefer(struct pfsync_deferral *pd, int drop)
2438{
2439#ifdef __FreeBSD__
2440	struct pfsync_softc *sc = V_pfsyncif;
2441#else
2442	struct pfsync_softc *sc = pfsyncif;
2443#endif
2444	int s;
2445
2446#ifdef __FreeBSD__
2447	PF_LOCK_ASSERT();
2448#else
2449	splassert(IPL_SOFTNET);
2450#endif
2451
2452	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
2453	sc->sc_deferred--;
2454
2455	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
2456	timeout_del(&pd->pd_tmo); /* bah */
2457	if (drop)
2458		m_freem(pd->pd_m);
2459	else {
2460		s = splnet();
2461#ifdef __FreeBSD__
2462		/* XXX: use pf_defered?! */
2463		PF_UNLOCK();
2464#endif
2465		ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0,
2466		    (void *)NULL, (void *)NULL);
2467#ifdef __FreeBSD__
2468		PF_LOCK();
2469#endif
2470		splx(s);
2471	}
2472
2473	pool_put(&sc->sc_pool, pd);
2474}
2475
2476void
2477pfsync_defer_tmo(void *arg)
2478{
2479#if defined(__FreeBSD__) && defined(VIMAGE)
2480	struct pfsync_deferral *pd = arg;
2481#endif
2482	int s;
2483
2484	s = splsoftnet();
2485#ifdef __FreeBSD__
2486	CURVNET_SET(pd->pd_m->m_pkthdr.rcvif->if_vnet); /* XXX */
2487	PF_LOCK();
2488#endif
2489	pfsync_undefer(arg, 0);
2490#ifdef __FreeBSD__
2491	PF_UNLOCK();
2492	CURVNET_RESTORE();
2493#endif
2494	splx(s);
2495}
2496
2497void
2498pfsync_deferred(struct pf_state *st, int drop)
2499{
2500#ifdef __FreeBSD__
2501	struct pfsync_softc *sc = V_pfsyncif;
2502#else
2503	struct pfsync_softc *sc = pfsyncif;
2504#endif
2505	struct pfsync_deferral *pd;
2506
2507	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
2508		 if (pd->pd_st == st) {
2509			pfsync_undefer(pd, drop);
2510			return;
2511		}
2512	}
2513
2514	panic("pfsync_send_deferred: unable to find deferred state");
2515}
2516
2517u_int pfsync_upds = 0;
2518
2519void
2520pfsync_update_state(struct pf_state *st)
2521{
2522#ifdef __FreeBSD__
2523	struct pfsync_softc *sc = V_pfsyncif;
2524#else
2525	struct pfsync_softc *sc = pfsyncif;
2526#endif
2527	int sync = 0;
2528
2529#ifdef __FreeBSD__
2530	PF_LOCK_ASSERT();
2531#else
2532	splassert(IPL_SOFTNET);
2533#endif
2534
2535	if (sc == NULL)
2536		return;
2537
2538	if (ISSET(st->state_flags, PFSTATE_ACK))
2539		pfsync_deferred(st, 0);
2540	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2541		if (st->sync_state != PFSYNC_S_NONE)
2542			pfsync_q_del(st);
2543		return;
2544	}
2545
2546	if (sc->sc_len == PFSYNC_MINPKT)
2547#ifdef __FreeBSD__
2548		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2549		    V_pfsyncif);
2550#else
2551		timeout_add_sec(&sc->sc_tmo, 1);
2552#endif
2553
2554	switch (st->sync_state) {
2555	case PFSYNC_S_UPD_C:
2556	case PFSYNC_S_UPD:
2557	case PFSYNC_S_INS:
2558		/* we're already handling it */
2559
2560		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
2561			st->sync_updates++;
2562			if (st->sync_updates >= sc->sc_maxupdates)
2563				sync = 1;
2564		}
2565		break;
2566
2567	case PFSYNC_S_IACK:
2568		pfsync_q_del(st);
2569	case PFSYNC_S_NONE:
2570		pfsync_q_ins(st, PFSYNC_S_UPD_C);
2571		st->sync_updates = 0;
2572		break;
2573
2574	default:
2575		panic("pfsync_update_state: unexpected sync state %d",
2576		    st->sync_state);
2577	}
2578
2579	if (sync || (time_uptime - st->pfsync_time) < 2) {
2580		pfsync_upds++;
2581		schednetisr(NETISR_PFSYNC);
2582	}
2583}
2584
2585void
2586pfsync_request_update(u_int32_t creatorid, u_int64_t id)
2587{
2588#ifdef __FreeBSD__
2589	struct pfsync_softc *sc = V_pfsyncif;
2590#else
2591	struct pfsync_softc *sc = pfsyncif;
2592#endif
2593	struct pfsync_upd_req_item *item;
2594	size_t nlen = sizeof(struct pfsync_upd_req);
2595	int s;
2596
2597	PF_LOCK_ASSERT();
2598
2599	/*
2600	 * this code does nothing to prevent multiple update requests for the
2601	 * same state being generated.
2602	 */
2603
2604	item = pool_get(&sc->sc_pool, PR_NOWAIT);
2605	if (item == NULL) {
2606		/* XXX stats */
2607		return;
2608	}
2609
2610	item->ur_msg.id = id;
2611	item->ur_msg.creatorid = creatorid;
2612
2613	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
2614		nlen += sizeof(struct pfsync_subheader);
2615
2616#ifdef __FreeBSD__
2617	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
2618#else
2619	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2620#endif
2621		s = splnet();
2622		pfsync_sendout();
2623		splx(s);
2624
2625		nlen = sizeof(struct pfsync_subheader) +
2626		    sizeof(struct pfsync_upd_req);
2627	}
2628
2629	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
2630	sc->sc_len += nlen;
2631
2632	schednetisr(NETISR_PFSYNC);
2633}
2634
2635void
2636pfsync_update_state_req(struct pf_state *st)
2637{
2638#ifdef __FreeBSD__
2639	struct pfsync_softc *sc = V_pfsyncif;
2640#else
2641	struct pfsync_softc *sc = pfsyncif;
2642#endif
2643
2644	PF_LOCK_ASSERT();
2645
2646	if (sc == NULL)
2647		panic("pfsync_update_state_req: nonexistant instance");
2648
2649	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2650		if (st->sync_state != PFSYNC_S_NONE)
2651			pfsync_q_del(st);
2652		return;
2653	}
2654
2655	switch (st->sync_state) {
2656	case PFSYNC_S_UPD_C:
2657	case PFSYNC_S_IACK:
2658		pfsync_q_del(st);
2659	case PFSYNC_S_NONE:
2660		pfsync_q_ins(st, PFSYNC_S_UPD);
2661		schednetisr(NETISR_PFSYNC);
2662		return;
2663
2664	case PFSYNC_S_INS:
2665	case PFSYNC_S_UPD:
2666	case PFSYNC_S_DEL:
2667		/* we're already handling it */
2668		return;
2669
2670	default:
2671		panic("pfsync_update_state_req: unexpected sync state %d",
2672		    st->sync_state);
2673	}
2674}
2675
2676void
2677pfsync_delete_state(struct pf_state *st)
2678{
2679#ifdef __FreeBSD__
2680	struct pfsync_softc *sc = V_pfsyncif;
2681#else
2682	struct pfsync_softc *sc = pfsyncif;
2683#endif
2684
2685#ifdef __FreeBSD__
2686	PF_LOCK_ASSERT();
2687#else
2688	splassert(IPL_SOFTNET);
2689#endif
2690
2691	if (sc == NULL)
2692		return;
2693
2694	if (ISSET(st->state_flags, PFSTATE_ACK))
2695		pfsync_deferred(st, 1);
2696	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2697		if (st->sync_state != PFSYNC_S_NONE)
2698			pfsync_q_del(st);
2699		return;
2700	}
2701
2702	if (sc->sc_len == PFSYNC_MINPKT)
2703#ifdef __FreeBSD__
2704		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2705		    V_pfsyncif);
2706#else
2707		timeout_add_sec(&sc->sc_tmo, 1);
2708#endif
2709
2710	switch (st->sync_state) {
2711	case PFSYNC_S_INS:
2712		/* we never got to tell the world so just forget about it */
2713		pfsync_q_del(st);
2714		return;
2715
2716	case PFSYNC_S_UPD_C:
2717	case PFSYNC_S_UPD:
2718	case PFSYNC_S_IACK:
2719		pfsync_q_del(st);
2720		/* FALLTHROUGH to putting it on the del list */
2721
2722	case PFSYNC_S_NONE:
2723		pfsync_q_ins(st, PFSYNC_S_DEL);
2724		return;
2725
2726	default:
2727		panic("pfsync_delete_state: unexpected sync state %d",
2728		    st->sync_state);
2729	}
2730}
2731
2732void
2733pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2734{
2735	struct {
2736		struct pfsync_subheader subh;
2737		struct pfsync_clr clr;
2738	} __packed r;
2739
2740#ifdef __FreeBSD__
2741	struct pfsync_softc *sc = V_pfsyncif;
2742#else
2743	struct pfsync_softc *sc = pfsyncif;
2744#endif
2745
2746#ifdef __FreeBSD__
2747	PF_LOCK_ASSERT();
2748#else
2749	splassert(IPL_SOFTNET);
2750#endif
2751
2752	if (sc == NULL)
2753		return;
2754
2755	bzero(&r, sizeof(r));
2756
2757	r.subh.action = PFSYNC_ACT_CLR;
2758	r.subh.count = htons(1);
2759
2760	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2761	r.clr.creatorid = creatorid;
2762
2763	pfsync_send_plus(&r, sizeof(r));
2764}
2765
2766void
2767pfsync_q_ins(struct pf_state *st, int q)
2768{
2769#ifdef __FreeBSD__
2770	struct pfsync_softc *sc = V_pfsyncif;
2771#else
2772	struct pfsync_softc *sc = pfsyncif;
2773#endif
2774	size_t nlen = pfsync_qs[q].len;
2775	int s;
2776
2777	PF_LOCK_ASSERT();
2778
2779#ifdef __FreeBSD__
2780	KASSERT(st->sync_state == PFSYNC_S_NONE,
2781		("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__));
2782#else
2783	KASSERT(st->sync_state == PFSYNC_S_NONE);
2784#endif
2785
2786#if 1 || defined(PFSYNC_DEBUG)
2787	if (sc->sc_len < PFSYNC_MINPKT)
2788#ifdef __FreeBSD__
2789		panic("pfsync pkt len is too low %zu", sc->sc_len);
2790#else
2791		panic("pfsync pkt len is too low %d", sc->sc_len);
2792#endif
2793#endif
2794	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2795		nlen += sizeof(struct pfsync_subheader);
2796
2797#ifdef __FreeBSD__
2798	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
2799#else
2800	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2801#endif
2802		s = splnet();
2803		pfsync_sendout();
2804		splx(s);
2805
2806		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2807	}
2808
2809	sc->sc_len += nlen;
2810	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2811	st->sync_state = q;
2812}
2813
2814void
2815pfsync_q_del(struct pf_state *st)
2816{
2817#ifdef __FreeBSD__
2818	struct pfsync_softc *sc = V_pfsyncif;
2819#else
2820	struct pfsync_softc *sc = pfsyncif;
2821#endif
2822	int q = st->sync_state;
2823
2824#ifdef __FreeBSD__
2825	KASSERT(st->sync_state != PFSYNC_S_NONE,
2826		("%s: st->sync_state != PFSYNC_S_NONE", __FUNCTION__));
2827#else
2828	KASSERT(st->sync_state != PFSYNC_S_NONE);
2829#endif
2830
2831	sc->sc_len -= pfsync_qs[q].len;
2832	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2833	st->sync_state = PFSYNC_S_NONE;
2834
2835	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2836		sc->sc_len -= sizeof(struct pfsync_subheader);
2837}
2838
2839#ifdef notyet
2840void
2841pfsync_update_tdb(struct tdb *t, int output)
2842{
2843#ifdef __FreeBSD__
2844	struct pfsync_softc *sc = V_pfsyncif;
2845#else
2846	struct pfsync_softc *sc = pfsyncif;
2847#endif
2848	size_t nlen = sizeof(struct pfsync_tdb);
2849	int s;
2850
2851	if (sc == NULL)
2852		return;
2853
2854	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2855		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2856			nlen += sizeof(struct pfsync_subheader);
2857
2858		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2859			s = splnet();
2860			PF_LOCK();
2861			pfsync_sendout();
2862			PF_UNLOCK();
2863			splx(s);
2864
2865			nlen = sizeof(struct pfsync_subheader) +
2866			    sizeof(struct pfsync_tdb);
2867		}
2868
2869		sc->sc_len += nlen;
2870		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2871		SET(t->tdb_flags, TDBF_PFSYNC);
2872		t->tdb_updates = 0;
2873	} else {
2874		if (++t->tdb_updates >= sc->sc_maxupdates)
2875			schednetisr(NETISR_PFSYNC);
2876	}
2877
2878	if (output)
2879		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2880	else
2881		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2882}
2883
2884void
2885pfsync_delete_tdb(struct tdb *t)
2886{
2887#ifdef __FreeBSD__
2888	struct pfsync_softc *sc = V_pfsyncif;
2889#else
2890	struct pfsync_softc *sc = pfsyncif;
2891#endif
2892
2893	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2894		return;
2895
2896	sc->sc_len -= sizeof(struct pfsync_tdb);
2897	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2898	CLR(t->tdb_flags, TDBF_PFSYNC);
2899
2900	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2901		sc->sc_len -= sizeof(struct pfsync_subheader);
2902}
2903
2904int
2905pfsync_out_tdb(struct tdb *t, struct mbuf *m, int offset)
2906{
2907	struct pfsync_tdb *ut = (struct pfsync_tdb *)(m->m_data + offset);
2908
2909	bzero(ut, sizeof(*ut));
2910	ut->spi = t->tdb_spi;
2911	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2912	/*
2913	 * When a failover happens, the master's rpl is probably above
2914	 * what we see here (we may be up to a second late), so
2915	 * increase it a bit for outbound tdbs to manage most such
2916	 * situations.
2917	 *
2918	 * For now, just add an offset that is likely to be larger
2919	 * than the number of packets we can see in one second. The RFC
2920	 * just says the next packet must have a higher seq value.
2921	 *
2922	 * XXX What is a good algorithm for this? We could use
2923	 * a rate-determined increase, but to know it, we would have
2924	 * to extend struct tdb.
2925	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2926	 * will soon be replaced anyway. For now, just don't handle
2927	 * this edge case.
2928	 */
2929#define RPL_INCR 16384
2930	ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2931	    RPL_INCR : 0));
2932	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2933	ut->sproto = t->tdb_sproto;
2934
2935	return (sizeof(*ut));
2936}
2937#endif
2938
2939void
2940pfsync_bulk_start(void)
2941{
2942#ifdef __FreeBSD__
2943	struct pfsync_softc *sc = V_pfsyncif;
2944#else
2945	struct pfsync_softc *sc = pfsyncif;
2946#endif
2947
2948#ifdef __FreeBSD__
2949	if (V_pf_status.debug >= PF_DEBUG_MISC)
2950#else
2951	if (pf_status.debug >= PF_DEBUG_MISC)
2952#endif
2953		printf("pfsync: received bulk update request\n");
2954
2955#ifdef __FreeBSD__
2956	PF_LOCK_ASSERT();
2957	if (TAILQ_EMPTY(&V_state_list))
2958#else
2959	if (TAILQ_EMPTY(&state_list))
2960#endif
2961		pfsync_bulk_status(PFSYNC_BUS_END);
2962	else {
2963		sc->sc_ureq_received = time_uptime;
2964		if (sc->sc_bulk_next == NULL)
2965#ifdef __FreeBSD__
2966			sc->sc_bulk_next = TAILQ_FIRST(&V_state_list);
2967#else
2968			sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2969#endif
2970		sc->sc_bulk_last = sc->sc_bulk_next;
2971
2972		pfsync_bulk_status(PFSYNC_BUS_START);
2973		callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
2974	}
2975}
2976
2977void
2978pfsync_bulk_update(void *arg)
2979{
2980	struct pfsync_softc *sc = arg;
2981	struct pf_state *st = sc->sc_bulk_next;
2982	int i = 0;
2983	int s;
2984
2985	PF_LOCK_ASSERT();
2986
2987	s = splsoftnet();
2988#ifdef __FreeBSD__
2989	CURVNET_SET(sc->sc_ifp->if_vnet);
2990#endif
2991	for (;;) {
2992		if (st->sync_state == PFSYNC_S_NONE &&
2993		    st->timeout < PFTM_MAX &&
2994		    st->pfsync_time <= sc->sc_ureq_received) {
2995			pfsync_update_state_req(st);
2996			i++;
2997		}
2998
2999		st = TAILQ_NEXT(st, entry_list);
3000		if (st == NULL)
3001#ifdef __FreeBSD__
3002			st = TAILQ_FIRST(&V_state_list);
3003#else
3004			st = TAILQ_FIRST(&state_list);
3005#endif
3006
3007		if (st == sc->sc_bulk_last) {
3008			/* we're done */
3009			sc->sc_bulk_next = NULL;
3010			sc->sc_bulk_last = NULL;
3011			pfsync_bulk_status(PFSYNC_BUS_END);
3012			break;
3013		}
3014
3015#ifdef __FreeBSD__
3016		if (i > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
3017#else
3018		if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
3019#endif
3020		    sizeof(struct pfsync_state)) {
3021			/* we've filled a packet */
3022			sc->sc_bulk_next = st;
3023#ifdef __FreeBSD__
3024			callout_reset(&sc->sc_bulk_tmo, 1,
3025			    pfsync_bulk_update, sc);
3026#else
3027			timeout_add(&sc->sc_bulk_tmo, 1);
3028#endif
3029			break;
3030		}
3031	}
3032
3033#ifdef __FreeBSD__
3034	CURVNET_RESTORE();
3035#endif
3036	splx(s);
3037}
3038
3039void
3040pfsync_bulk_status(u_int8_t status)
3041{
3042	struct {
3043		struct pfsync_subheader subh;
3044		struct pfsync_bus bus;
3045	} __packed r;
3046
3047#ifdef __FreeBSD__
3048	struct pfsync_softc *sc = V_pfsyncif;
3049#else
3050	struct pfsync_softc *sc = pfsyncif;
3051#endif
3052
3053	PF_LOCK_ASSERT();
3054
3055	bzero(&r, sizeof(r));
3056
3057	r.subh.action = PFSYNC_ACT_BUS;
3058	r.subh.count = htons(1);
3059
3060#ifdef __FreeBSD__
3061	r.bus.creatorid = V_pf_status.hostid;
3062#else
3063	r.bus.creatorid = pf_status.hostid;
3064#endif
3065	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
3066	r.bus.status = status;
3067
3068	pfsync_send_plus(&r, sizeof(r));
3069}
3070
3071void
3072pfsync_bulk_fail(void *arg)
3073{
3074	struct pfsync_softc *sc = arg;
3075
3076#ifdef __FreeBSD__
3077	CURVNET_SET(sc->sc_ifp->if_vnet);
3078#endif
3079
3080	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
3081		/* Try again */
3082#ifdef __FreeBSD__
3083		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
3084		    pfsync_bulk_fail, V_pfsyncif);
3085#else
3086		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
3087#endif
3088		PF_LOCK();
3089		pfsync_request_update(0, 0);
3090		PF_UNLOCK();
3091	} else {
3092		/* Pretend like the transfer was ok */
3093		sc->sc_ureq_sent = 0;
3094		sc->sc_bulk_tries = 0;
3095#ifdef __FreeBSD__
3096		if (!sc->pfsync_sync_ok && carp_demote_adj_p)
3097			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
3098			    "pfsync bulk fail");
3099		sc->pfsync_sync_ok = 1;
3100#else
3101#if NCARP > 0
3102		if (!pfsync_sync_ok)
3103			carp_group_demote_adj(&sc->sc_if, -1);
3104#endif
3105		pfsync_sync_ok = 1;
3106#endif
3107#ifdef __FreeBSD__
3108		if (V_pf_status.debug >= PF_DEBUG_MISC)
3109#else
3110		if (pf_status.debug >= PF_DEBUG_MISC)
3111#endif
3112			printf("pfsync: failed to receive bulk update\n");
3113	}
3114
3115#ifdef __FreeBSD__
3116	CURVNET_RESTORE();
3117#endif
3118}
3119
3120void
3121pfsync_send_plus(void *plus, size_t pluslen)
3122{
3123#ifdef __FreeBSD__
3124	struct pfsync_softc *sc = V_pfsyncif;
3125#else
3126	struct pfsync_softc *sc = pfsyncif;
3127#endif
3128	int s;
3129
3130	PF_LOCK_ASSERT();
3131
3132#ifdef __FreeBSD__
3133	if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) {
3134#else
3135	if (sc->sc_len + pluslen > sc->sc_if.if_mtu) {
3136#endif
3137		s = splnet();
3138		pfsync_sendout();
3139		splx(s);
3140	}
3141
3142	sc->sc_plus = plus;
3143	sc->sc_len += (sc->sc_pluslen = pluslen);
3144
3145	s = splnet();
3146	pfsync_sendout();
3147	splx(s);
3148}
3149
3150int
3151pfsync_up(void)
3152{
3153#ifdef __FreeBSD__
3154	struct pfsync_softc *sc = V_pfsyncif;
3155#else
3156	struct pfsync_softc *sc = pfsyncif;
3157#endif
3158
3159#ifdef __FreeBSD__
3160	if (sc == NULL || !ISSET(sc->sc_ifp->if_flags, IFF_DRV_RUNNING))
3161#else
3162	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
3163#endif
3164		return (0);
3165
3166	return (1);
3167}
3168
3169int
3170pfsync_state_in_use(struct pf_state *st)
3171{
3172#ifdef __FreeBSD__
3173	struct pfsync_softc *sc = V_pfsyncif;
3174#else
3175	struct pfsync_softc *sc = pfsyncif;
3176#endif
3177
3178	if (sc == NULL)
3179		return (0);
3180
3181	if (st->sync_state != PFSYNC_S_NONE ||
3182	    st == sc->sc_bulk_next ||
3183	    st == sc->sc_bulk_last)
3184		return (1);
3185
3186	return (0);
3187}
3188
3189u_int pfsync_ints;
3190u_int pfsync_tmos;
3191
3192void
3193pfsync_timeout(void *arg)
3194{
3195#if defined(__FreeBSD__) && defined(VIMAGE)
3196	struct pfsync_softc *sc = arg;
3197#endif
3198	int s;
3199
3200#ifdef __FreeBSD__
3201	CURVNET_SET(sc->sc_ifp->if_vnet);
3202#endif
3203
3204	pfsync_tmos++;
3205
3206	s = splnet();
3207#ifdef __FreeBSD__
3208	PF_LOCK();
3209#endif
3210	pfsync_sendout();
3211#ifdef __FreeBSD__
3212	PF_UNLOCK();
3213#endif
3214	splx(s);
3215
3216#ifdef __FreeBSD__
3217	CURVNET_RESTORE();
3218#endif
3219}
3220
3221/* this is a softnet/netisr handler */
3222void
3223#ifdef __FreeBSD__
3224pfsyncintr(void *arg)
3225{
3226	struct pfsync_softc *sc = arg;
3227	struct mbuf *m, *n;
3228
3229	CURVNET_SET(sc->sc_ifp->if_vnet);
3230	pfsync_ints++;
3231
3232	PF_LOCK();
3233	if (sc->sc_len > PFSYNC_MINPKT)
3234		pfsync_sendout1(0);
3235	_IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
3236	PF_UNLOCK();
3237
3238	for (; m != NULL; m = n) {
3239
3240		n = m->m_nextpkt;
3241		m->m_nextpkt = NULL;
3242		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)
3243		    == 0)
3244			V_pfsyncstats.pfsyncs_opackets++;
3245		else
3246			V_pfsyncstats.pfsyncs_oerrors++;
3247	}
3248	CURVNET_RESTORE();
3249}
3250#else
3251pfsyncintr(void)
3252{
3253	int s;
3254
3255	pfsync_ints++;
3256
3257	s = splnet();
3258	pfsync_sendout();
3259	splx(s);
3260}
3261#endif
3262
3263int
3264pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
3265    size_t newlen)
3266{
3267
3268#ifdef notyet
3269	/* All sysctl names at this level are terminal. */
3270	if (namelen != 1)
3271		return (ENOTDIR);
3272
3273	switch (name[0]) {
3274	case PFSYNCCTL_STATS:
3275		if (newp != NULL)
3276			return (EPERM);
3277		return (sysctl_struct(oldp, oldlenp, newp, newlen,
3278		    &V_pfsyncstats, sizeof(V_pfsyncstats)));
3279	}
3280#endif
3281	return (ENOPROTOOPT);
3282}
3283
3284#ifdef __FreeBSD__
3285static int
3286pfsync_multicast_setup(struct pfsync_softc *sc)
3287{
3288	struct ip_moptions *imo = &sc->sc_imo;
3289	int error;
3290
3291	if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
3292		sc->sc_sync_if = NULL;
3293		return (EADDRNOTAVAIL);
3294	}
3295
3296	imo->imo_membership = (struct in_multi **)malloc(
3297	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC,
3298	    M_WAITOK | M_ZERO);
3299	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
3300	imo->imo_multicast_vif = -1;
3301
3302	if ((error = in_joingroup(sc->sc_sync_if, &sc->sc_sync_peer, NULL,
3303	    &imo->imo_membership[0])) != 0) {
3304		free(imo->imo_membership, M_PFSYNC);
3305		return (error);
3306	}
3307	imo->imo_num_memberships++;
3308	imo->imo_multicast_ifp = sc->sc_sync_if;
3309	imo->imo_multicast_ttl = PFSYNC_DFLTTL;
3310	imo->imo_multicast_loop = 0;
3311
3312	return (0);
3313}
3314
3315static void
3316pfsync_multicast_cleanup(struct pfsync_softc *sc)
3317{
3318	struct ip_moptions *imo = &sc->sc_imo;
3319
3320	in_leavegroup(imo->imo_membership[0], NULL);
3321	free(imo->imo_membership, M_PFSYNC);
3322	imo->imo_membership = NULL;
3323	imo->imo_multicast_ifp = NULL;
3324}
3325
3326#ifdef INET
3327extern  struct domain inetdomain;
3328static struct protosw in_pfsync_protosw = {
3329	.pr_type =		SOCK_RAW,
3330	.pr_domain =		&inetdomain,
3331	.pr_protocol =		IPPROTO_PFSYNC,
3332	.pr_flags =		PR_ATOMIC|PR_ADDR,
3333	.pr_input =		pfsync_input,
3334	.pr_output =		(pr_output_t *)rip_output,
3335	.pr_ctloutput =		rip_ctloutput,
3336	.pr_usrreqs =		&rip_usrreqs
3337};
3338#endif
3339
3340static int
3341pfsync_init()
3342{
3343	VNET_ITERATOR_DECL(vnet_iter);
3344	int error = 0;
3345
3346	VNET_LIST_RLOCK();
3347	VNET_FOREACH(vnet_iter) {
3348		CURVNET_SET(vnet_iter);
3349		V_pfsync_cloner = pfsync_cloner;
3350		V_pfsync_cloner_data = pfsync_cloner_data;
3351		V_pfsync_cloner.ifc_data = &V_pfsync_cloner_data;
3352		if_clone_attach(&V_pfsync_cloner);
3353		error = swi_add(NULL, "pfsync", pfsyncintr, V_pfsyncif,
3354		    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
3355		CURVNET_RESTORE();
3356		if (error)
3357			goto fail_locked;
3358	}
3359	VNET_LIST_RUNLOCK();
3360#ifdef INET
3361	error = pf_proto_register(PF_INET, &in_pfsync_protosw);
3362	if (error)
3363		goto fail;
3364	error = ipproto_register(IPPROTO_PFSYNC);
3365	if (error) {
3366		pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
3367		goto fail;
3368	}
3369#endif
3370	PF_LOCK();
3371	pfsync_state_import_ptr = pfsync_state_import;
3372	pfsync_up_ptr = pfsync_up;
3373	pfsync_insert_state_ptr = pfsync_insert_state;
3374	pfsync_update_state_ptr = pfsync_update_state;
3375	pfsync_delete_state_ptr = pfsync_delete_state;
3376	pfsync_clear_states_ptr = pfsync_clear_states;
3377	pfsync_state_in_use_ptr = pfsync_state_in_use;
3378	pfsync_defer_ptr = pfsync_defer;
3379	PF_UNLOCK();
3380
3381	return (0);
3382
3383fail:
3384	VNET_LIST_RLOCK();
3385fail_locked:
3386	VNET_FOREACH(vnet_iter) {
3387		CURVNET_SET(vnet_iter);
3388		if (V_pfsync_swi_cookie) {
3389			swi_remove(V_pfsync_swi_cookie);
3390			if_clone_detach(&V_pfsync_cloner);
3391		}
3392		CURVNET_RESTORE();
3393	}
3394	VNET_LIST_RUNLOCK();
3395
3396	return (error);
3397}
3398
3399static void
3400pfsync_uninit()
3401{
3402	VNET_ITERATOR_DECL(vnet_iter);
3403
3404	PF_LOCK();
3405	pfsync_state_import_ptr = NULL;
3406	pfsync_up_ptr = NULL;
3407	pfsync_insert_state_ptr = NULL;
3408	pfsync_update_state_ptr = NULL;
3409	pfsync_delete_state_ptr = NULL;
3410	pfsync_clear_states_ptr = NULL;
3411	pfsync_state_in_use_ptr = NULL;
3412	pfsync_defer_ptr = NULL;
3413	PF_UNLOCK();
3414
3415	ipproto_unregister(IPPROTO_PFSYNC);
3416	pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
3417	VNET_LIST_RLOCK();
3418	VNET_FOREACH(vnet_iter) {
3419		CURVNET_SET(vnet_iter);
3420		swi_remove(V_pfsync_swi_cookie);
3421		if_clone_detach(&V_pfsync_cloner);
3422		CURVNET_RESTORE();
3423	}
3424	VNET_LIST_RUNLOCK();
3425}
3426
3427static int
3428pfsync_modevent(module_t mod, int type, void *data)
3429{
3430	int error = 0;
3431
3432	switch (type) {
3433	case MOD_LOAD:
3434		error = pfsync_init();
3435		break;
3436	case MOD_QUIESCE:
3437		/*
3438		 * Module should not be unloaded due to race conditions.
3439		 */
3440		error = EPERM;
3441		break;
3442	case MOD_UNLOAD:
3443		pfsync_uninit();
3444		break;
3445	default:
3446		error = EINVAL;
3447		break;
3448	}
3449
3450	return (error);
3451}
3452
3453static moduledata_t pfsync_mod = {
3454	"pfsync",
3455	pfsync_modevent,
3456	0
3457};
3458
3459#define PFSYNC_MODVER 1
3460
3461DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
3462MODULE_VERSION(pfsync, PFSYNC_MODVER);
3463MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
3464#endif /* __FreeBSD__ */
3465