1/*	$OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $	*/
2
3/*
4 * Copyright (c) 2002 Michael Shalayeff
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31 *
32 * Permission to use, copy, modify, and distribute this software for any
33 * purpose with or without fee is hereby granted, provided that the above
34 * copyright notice and this permission notice appear in all copies.
35 *
36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43 */
44
45/*
46 * Revisions picked from OpenBSD after revision 1.110 import:
47 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
48 * 1.120, 1.175 - use monotonic time_uptime
49 * 1.122 - reduce number of updates for non-TCP sessions
50 * 1.128 - cleanups
51 * 1.146 - bzero() mbuf before sparsely filling it with data
52 * 1.170 - SIOCSIFMTU checks
53 */
54
55#ifdef __FreeBSD__
56#include "opt_inet.h"
57#include "opt_inet6.h"
58#include "opt_pf.h"
59
60#include <sys/cdefs.h>
61__FBSDID("$FreeBSD$");
62
63#define	NBPFILTER	1
64
65#ifdef DEV_CARP
66#define	NCARP		DEV_CARP
67#else
68#define	NCARP		0
69#endif
70#endif /* __FreeBSD__ */
71
72#include <sys/param.h>
73#include <sys/kernel.h>
74#ifdef __FreeBSD__
75#include <sys/bus.h>
76#include <sys/interrupt.h>
77#include <sys/priv.h>
78#endif
79#include <sys/proc.h>
80#include <sys/systm.h>
81#include <sys/time.h>
82#include <sys/mbuf.h>
83#include <sys/socket.h>
84#ifdef __FreeBSD__
85#include <sys/endian.h>
86#include <sys/malloc.h>
87#include <sys/module.h>
88#include <sys/sockio.h>
89#include <sys/taskqueue.h>
90#include <sys/lock.h>
91#include <sys/mutex.h>
92#include <sys/protosw.h>
93#else
94#include <sys/ioctl.h>
95#include <sys/timeout.h>
96#endif
97#include <sys/sysctl.h>
98#ifndef __FreeBSD__
99#include <sys/pool.h>
100#endif
101
102#include <net/if.h>
103#ifdef __FreeBSD__
104#include <net/if_clone.h>
105#endif
106#include <net/if_types.h>
107#include <net/route.h>
108#include <net/bpf.h>
109#include <net/netisr.h>
110#ifdef __FreeBSD__
111#include <net/vnet.h>
112#endif
113
114#include <netinet/in.h>
115#include <netinet/if_ether.h>
116#include <netinet/tcp.h>
117#include <netinet/tcp_seq.h>
118
119#ifdef	INET
120#include <netinet/in_systm.h>
121#include <netinet/in_var.h>
122#include <netinet/ip.h>
123#include <netinet/ip_var.h>
124#endif
125
126#ifdef INET6
127#include <netinet6/nd6.h>
128#endif /* INET6 */
129
130#ifndef __FreeBSD__
131#include "carp.h"
132#endif
133#if NCARP > 0
134#include <netinet/ip_carp.h>
135#endif
136
137#include <net/pfvar.h>
138#include <net/if_pfsync.h>
139
140#ifndef __FreeBSD__
141#include "bpfilter.h"
142#include "pfsync.h"
143#endif
144
145#define PFSYNC_MINPKT ( \
146	sizeof(struct ip) + \
147	sizeof(struct pfsync_header) + \
148	sizeof(struct pfsync_subheader) + \
149	sizeof(struct pfsync_eof))
150
151struct pfsync_pkt {
152	struct ip *ip;
153	struct in_addr src;
154	u_int8_t flags;
155};
156
157int	pfsync_input_hmac(struct mbuf *, int);
158
159int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
160	    struct pfsync_state_peer *);
161
162int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
163int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
164int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
165int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
166int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
167int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
168int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
169int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
170int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
171int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
172int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
173
174int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
175
176int	(*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
177	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
178	pfsync_in_ins,			/* PFSYNC_ACT_INS */
179	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
180	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
181	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
182	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
183	pfsync_in_del,			/* PFSYNC_ACT_DEL */
184	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
185	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
186	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
187	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
188	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
189	pfsync_in_eof			/* PFSYNC_ACT_EOF */
190};
191
192struct pfsync_q {
193	int		(*write)(struct pf_state *, struct mbuf *, int);
194	size_t		len;
195	u_int8_t	action;
196};
197
198/* we have one of these for every PFSYNC_S_ */
199int	pfsync_out_state(struct pf_state *, struct mbuf *, int);
200int	pfsync_out_iack(struct pf_state *, struct mbuf *, int);
201int	pfsync_out_upd_c(struct pf_state *, struct mbuf *, int);
202int	pfsync_out_del(struct pf_state *, struct mbuf *, int);
203
204struct pfsync_q pfsync_qs[] = {
205	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
206	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
207	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
208	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
209	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
210};
211
212void	pfsync_q_ins(struct pf_state *, int);
213void	pfsync_q_del(struct pf_state *);
214
215struct pfsync_upd_req_item {
216	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
217	struct pfsync_upd_req			ur_msg;
218};
219TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
220
221struct pfsync_deferral {
222	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
223	struct pf_state				*pd_st;
224	struct mbuf				*pd_m;
225#ifdef __FreeBSD__
226	struct callout				 pd_tmo;
227#else
228	struct timeout				 pd_tmo;
229#endif
230};
231TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
232
233#define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
234			    sizeof(struct pfsync_deferral))
235
236#ifdef notyet
237int	pfsync_out_tdb(struct tdb *, struct mbuf *, int);
238#endif
239
240struct pfsync_softc {
241#ifdef __FreeBSD__
242	struct ifnet		*sc_ifp;
243#else
244	struct ifnet		 sc_if;
245#endif
246	struct ifnet		*sc_sync_if;
247
248#ifdef __FreeBSD__
249	uma_zone_t		 sc_pool;
250#else
251	struct pool		 sc_pool;
252#endif
253
254	struct ip_moptions	 sc_imo;
255
256	struct in_addr		 sc_sync_peer;
257	u_int8_t		 sc_maxupdates;
258#ifdef __FreeBSD__
259	int			 pfsync_sync_ok;
260#endif
261
262	struct ip		 sc_template;
263
264	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
265	size_t			 sc_len;
266
267	struct pfsync_upd_reqs	 sc_upd_req_list;
268
269	struct pfsync_deferrals	 sc_deferrals;
270	u_int			 sc_deferred;
271
272	void			*sc_plus;
273	size_t			 sc_pluslen;
274
275	u_int32_t		 sc_ureq_sent;
276	int			 sc_bulk_tries;
277#ifdef __FreeBSD__
278	struct callout		 sc_bulkfail_tmo;
279#else
280	struct timeout		 sc_bulkfail_tmo;
281#endif
282
283	u_int32_t		 sc_ureq_received;
284	struct pf_state		*sc_bulk_next;
285	struct pf_state		*sc_bulk_last;
286#ifdef __FreeBSD__
287	struct callout		 sc_bulk_tmo;
288#else
289	struct timeout		 sc_bulk_tmo;
290#endif
291
292	TAILQ_HEAD(, tdb)	 sc_tdb_q;
293
294#ifdef __FreeBSD__
295	struct callout		 sc_tmo;
296#else
297	struct timeout		 sc_tmo;
298#endif
299};
300
301#ifdef __FreeBSD__
302static MALLOC_DEFINE(M_PFSYNC, "pfsync", "pfsync data");
303static VNET_DEFINE(struct pfsync_softc	*, pfsyncif) = NULL;
304#define	V_pfsyncif		VNET(pfsyncif)
305static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
306#define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
307static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
308#define	V_pfsyncstats		VNET(pfsyncstats)
309
310static void	pfsyncintr(void *);
311static int	pfsync_multicast_setup(struct pfsync_softc *);
312static void	pfsync_multicast_cleanup(struct pfsync_softc *);
313static int	pfsync_init(void);
314static void	pfsync_uninit(void);
315static void	pfsync_sendout1(int);
316
317#define	schednetisr(NETISR_PFSYNC)	swi_sched(V_pfsync_swi_cookie, 0)
318
319SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
320SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW,
321    &VNET_NAME(pfsyncstats), pfsyncstats,
322    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
323#else
324struct pfsync_softc	*pfsyncif = NULL;
325struct pfsyncstats	 pfsyncstats;
326#define	V_pfsyncstats	 pfsyncstats
327#endif
328
329void	pfsyncattach(int);
330#ifdef __FreeBSD__
331int	pfsync_clone_create(struct if_clone *, int, caddr_t);
332void	pfsync_clone_destroy(struct ifnet *);
333#else
334int	pfsync_clone_create(struct if_clone *, int);
335int	pfsync_clone_destroy(struct ifnet *);
336#endif
337int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
338	    struct pf_state_peer *);
339void	pfsync_update_net_tdb(struct pfsync_tdb *);
340int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
341#ifdef __FreeBSD__
342	    struct route *);
343#else
344	    struct rtentry *);
345#endif
346int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
347void	pfsyncstart(struct ifnet *);
348
349struct mbuf *pfsync_if_dequeue(struct ifnet *);
350
351void	pfsync_deferred(struct pf_state *, int);
352void	pfsync_undefer(struct pfsync_deferral *, int);
353void	pfsync_defer_tmo(void *);
354
355void	pfsync_request_update(u_int32_t, u_int64_t);
356void	pfsync_update_state_req(struct pf_state *);
357
358void	pfsync_drop(struct pfsync_softc *);
359void	pfsync_sendout(void);
360void	pfsync_send_plus(void *, size_t);
361void	pfsync_timeout(void *);
362void	pfsync_tdb_timeout(void *);
363
364void	pfsync_bulk_start(void);
365void	pfsync_bulk_status(u_int8_t);
366void	pfsync_bulk_update(void *);
367void	pfsync_bulk_fail(void *);
368
369#ifdef __FreeBSD__
370/* XXX: ugly */
371#define	betoh64		(unsigned long long)be64toh
372#define	timeout_del	callout_stop
373#endif
374
375#define PFSYNC_MAX_BULKTRIES	12
376#ifndef __FreeBSD__
377int	pfsync_sync_ok;
378#endif
379
380#ifdef __FreeBSD__
381VNET_DEFINE(struct ifc_simple_data, pfsync_cloner_data);
382VNET_DEFINE(struct if_clone, pfsync_cloner);
383#define	V_pfsync_cloner_data	VNET(pfsync_cloner_data)
384#define	V_pfsync_cloner		VNET(pfsync_cloner)
385IFC_SIMPLE_DECLARE(pfsync, 1);
386#else
387struct if_clone	pfsync_cloner =
388    IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
389#endif
390
391void
392pfsyncattach(int npfsync)
393{
394	if_clone_attach(&pfsync_cloner);
395}
396int
397#ifdef __FreeBSD__
398pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
399#else
400pfsync_clone_create(struct if_clone *ifc, int unit)
401#endif
402{
403	struct pfsync_softc *sc;
404	struct ifnet *ifp;
405	int q;
406
407	if (unit != 0)
408		return (EINVAL);
409
410#ifdef __FreeBSD__
411	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
412	sc->pfsync_sync_ok = 1;
413#else
414	pfsync_sync_ok = 1;
415	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO);
416#endif
417
418	for (q = 0; q < PFSYNC_S_COUNT; q++)
419		TAILQ_INIT(&sc->sc_qs[q]);
420
421#ifdef __FreeBSD__
422	sc->sc_pool = uma_zcreate("pfsync", PFSYNC_PLSIZE, NULL, NULL, NULL,
423	    NULL, UMA_ALIGN_PTR, 0);
424#else
425	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
426#endif
427	TAILQ_INIT(&sc->sc_upd_req_list);
428	TAILQ_INIT(&sc->sc_deferrals);
429	sc->sc_deferred = 0;
430
431	TAILQ_INIT(&sc->sc_tdb_q);
432
433	sc->sc_len = PFSYNC_MINPKT;
434	sc->sc_maxupdates = 128;
435
436#ifndef __FreeBSD__
437	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
438	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
439	    M_WAITOK | M_ZERO);
440	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
441#endif
442
443#ifdef __FreeBSD__
444	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
445	if (ifp == NULL) {
446		uma_zdestroy(sc->sc_pool);
447		free(sc, M_PFSYNC);
448		return (ENOSPC);
449	}
450	if_initname(ifp, ifc->ifc_name, unit);
451#else
452	ifp = &sc->sc_if;
453	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
454#endif
455	ifp->if_softc = sc;
456	ifp->if_ioctl = pfsyncioctl;
457	ifp->if_output = pfsyncoutput;
458	ifp->if_start = pfsyncstart;
459	ifp->if_type = IFT_PFSYNC;
460	ifp->if_snd.ifq_maxlen = ifqmaxlen;
461	ifp->if_hdrlen = sizeof(struct pfsync_header);
462	ifp->if_mtu = ETHERMTU;
463#ifdef __FreeBSD__
464	callout_init(&sc->sc_tmo, CALLOUT_MPSAFE);
465	callout_init_mtx(&sc->sc_bulk_tmo, &pf_task_mtx, 0);
466	callout_init(&sc->sc_bulkfail_tmo, CALLOUT_MPSAFE);
467#else
468	timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
469	timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
470	timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
471#endif
472
473	if_attach(ifp);
474#ifndef __FreeBSD__
475	if_alloc_sadl(ifp);
476#endif
477
478#if NCARP > 0
479	if_addgroup(ifp, "carp");
480#endif
481
482#if NBPFILTER > 0
483#ifdef __FreeBSD__
484	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
485#else
486	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
487#endif
488#endif
489
490#ifdef __FreeBSD__
491	V_pfsyncif = sc;
492#else
493	pfsyncif = sc;
494#endif
495
496	return (0);
497}
498
499#ifdef __FreeBSD__
500void
501#else
502int
503#endif
504pfsync_clone_destroy(struct ifnet *ifp)
505{
506	struct pfsync_softc *sc = ifp->if_softc;
507
508#ifdef __FreeBSD__
509	PF_LOCK();
510#endif
511	timeout_del(&sc->sc_bulkfail_tmo);
512	timeout_del(&sc->sc_bulk_tmo);
513	timeout_del(&sc->sc_tmo);
514#ifdef __FreeBSD__
515	PF_UNLOCK();
516#endif
517#if NCARP > 0
518#ifdef notyet
519#ifdef __FreeBSD__
520	if (!sc->pfsync_sync_ok)
521#else
522	if (!pfsync_sync_ok)
523#endif
524		carp_group_demote_adj(&sc->sc_if, -1);
525#endif
526#endif
527#if NBPFILTER > 0
528	bpfdetach(ifp);
529#endif
530	if_detach(ifp);
531
532	pfsync_drop(sc);
533
534	while (sc->sc_deferred > 0)
535		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
536
537#ifdef __FreeBSD__
538	UMA_DESTROY(sc->sc_pool);
539#else
540	pool_destroy(&sc->sc_pool);
541#endif
542#ifdef __FreeBSD__
543	if_free(ifp);
544	if (sc->sc_imo.imo_membership)
545		pfsync_multicast_cleanup(sc);
546	free(sc, M_PFSYNC);
547#else
548	free(sc->sc_imo.imo_membership, M_IPMOPTS);
549	free(sc, M_DEVBUF);
550#endif
551
552#ifdef __FreeBSD__
553	V_pfsyncif = NULL;
554#else
555	pfsyncif = NULL;
556#endif
557
558#ifndef __FreeBSD__
559	return (0);
560#endif
561}
562
563struct mbuf *
564pfsync_if_dequeue(struct ifnet *ifp)
565{
566	struct mbuf *m;
567#ifndef __FreeBSD__
568	int s;
569#endif
570
571#ifdef __FreeBSD__
572	IF_LOCK(&ifp->if_snd);
573	_IF_DROP(&ifp->if_snd);
574	_IF_DEQUEUE(&ifp->if_snd, m);
575	IF_UNLOCK(&ifp->if_snd);
576#else
577	s = splnet();
578	IF_DEQUEUE(&ifp->if_snd, m);
579	splx(s);
580#endif
581
582	return (m);
583}
584
585/*
586 * Start output on the pfsync interface.
587 */
588void
589pfsyncstart(struct ifnet *ifp)
590{
591	struct mbuf *m;
592
593	while ((m = pfsync_if_dequeue(ifp)) != NULL) {
594#ifndef __FreeBSD__
595		IF_DROP(&ifp->if_snd);
596#endif
597		m_freem(m);
598	}
599}
600
601int
602pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
603    struct pf_state_peer *d)
604{
605	if (s->scrub.scrub_flag && d->scrub == NULL) {
606#ifdef __FreeBSD__
607		d->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
608#else
609		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
610#endif
611		if (d->scrub == NULL)
612			return (ENOMEM);
613	}
614
615	return (0);
616}
617
618#ifndef __FreeBSD__
619void
620pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
621{
622	bzero(sp, sizeof(struct pfsync_state));
623
624	/* copy from state key */
625	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
626	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
627	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
628	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
629	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
630	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
631	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
632	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
633	sp->proto = st->key[PF_SK_WIRE]->proto;
634	sp->af = st->key[PF_SK_WIRE]->af;
635
636	/* copy from state */
637	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
638	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
639	sp->creation = htonl(time_uptime - st->creation);
640	sp->expire = pf_state_expires(st);
641	if (sp->expire <= time_second)
642		sp->expire = htonl(0);
643	else
644		sp->expire = htonl(sp->expire - time_second);
645
646	sp->direction = st->direction;
647	sp->log = st->log;
648	sp->timeout = st->timeout;
649	sp->state_flags = st->state_flags;
650	if (st->src_node)
651		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
652	if (st->nat_src_node)
653		sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
654
655	bcopy(&st->id, &sp->id, sizeof(sp->id));
656	sp->creatorid = st->creatorid;
657	pf_state_peer_hton(&st->src, &sp->src);
658	pf_state_peer_hton(&st->dst, &sp->dst);
659
660	if (st->rule.ptr == NULL)
661		sp->rule = htonl(-1);
662	else
663		sp->rule = htonl(st->rule.ptr->nr);
664	if (st->anchor.ptr == NULL)
665		sp->anchor = htonl(-1);
666	else
667		sp->anchor = htonl(st->anchor.ptr->nr);
668	if (st->nat_rule.ptr == NULL)
669		sp->nat_rule = htonl(-1);
670	else
671		sp->nat_rule = htonl(st->nat_rule.ptr->nr);
672
673	pf_state_counter_hton(st->packets[0], sp->packets[0]);
674	pf_state_counter_hton(st->packets[1], sp->packets[1]);
675	pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
676	pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
677
678}
679#endif
680
681int
682pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
683{
684	struct pf_state	*st = NULL;
685	struct pf_state_key *skw = NULL, *sks = NULL;
686	struct pf_rule *r = NULL;
687	struct pfi_kif	*kif;
688	int pool_flags;
689	int error;
690
691#ifdef __FreeBSD__
692	PF_LOCK_ASSERT();
693
694	if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) {
695#else
696	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
697#endif
698		printf("pfsync_state_import: invalid creator id:"
699		    " %08x\n", ntohl(sp->creatorid));
700		return (EINVAL);
701	}
702
703	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
704#ifdef __FreeBSD__
705		if (V_pf_status.debug >= PF_DEBUG_MISC)
706#else
707		if (pf_status.debug >= PF_DEBUG_MISC)
708#endif
709			printf("pfsync_state_import: "
710			    "unknown interface: %s\n", sp->ifname);
711		if (flags & PFSYNC_SI_IOCTL)
712			return (EINVAL);
713		return (0);	/* skip this state */
714	}
715
716	/*
717	 * If the ruleset checksums match or the state is coming from the ioctl,
718	 * it's safe to associate the state with the rule of that number.
719	 */
720	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
721	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
722	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
723		r = pf_main_ruleset.rules[
724		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
725	else
726#ifdef __FreeBSD__
727		r = &V_pf_default_rule;
728#else
729		r = &pf_default_rule;
730#endif
731
732	if ((r->max_states && r->states_cur >= r->max_states))
733		goto cleanup;
734
735#ifdef __FreeBSD__
736	if (flags & PFSYNC_SI_IOCTL)
737		pool_flags = PR_WAITOK | PR_ZERO;
738	else
739		pool_flags = PR_NOWAIT | PR_ZERO;
740
741	if ((st = pool_get(&V_pf_state_pl, pool_flags)) == NULL)
742		goto cleanup;
743#else
744	if (flags & PFSYNC_SI_IOCTL)
745		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
746	else
747		pool_flags = PR_LIMITFAIL | PR_ZERO;
748
749	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
750		goto cleanup;
751#endif
752
753	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
754		goto cleanup;
755
756	if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
757	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
758	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
759	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
760	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
761	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) {
762		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
763			goto cleanup;
764	} else
765		sks = skw;
766
767	/* allocate memory for scrub info */
768	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
769	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
770		goto cleanup;
771
772	/* copy to state key(s) */
773	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
774	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
775	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
776	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
777	skw->proto = sp->proto;
778	skw->af = sp->af;
779	if (sks != skw) {
780		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
781		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
782		sks->port[0] = sp->key[PF_SK_STACK].port[0];
783		sks->port[1] = sp->key[PF_SK_STACK].port[1];
784		sks->proto = sp->proto;
785		sks->af = sp->af;
786	}
787
788	/* copy to state */
789	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
790	st->creation = time_uptime - ntohl(sp->creation);
791	st->expire = time_second;
792	if (sp->expire) {
793		/* XXX No adaptive scaling. */
794		st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
795	}
796
797	st->expire = ntohl(sp->expire) + time_second;
798	st->direction = sp->direction;
799	st->log = sp->log;
800	st->timeout = sp->timeout;
801	st->state_flags = sp->state_flags;
802
803	bcopy(sp->id, &st->id, sizeof(st->id));
804	st->creatorid = sp->creatorid;
805	pf_state_peer_ntoh(&sp->src, &st->src);
806	pf_state_peer_ntoh(&sp->dst, &st->dst);
807
808	st->rule.ptr = r;
809	st->nat_rule.ptr = NULL;
810	st->anchor.ptr = NULL;
811	st->rt_kif = NULL;
812
813	st->pfsync_time = time_uptime;
814	st->sync_state = PFSYNC_S_NONE;
815
816	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
817	r->states_cur++;
818	r->states_tot++;
819
820	if (!ISSET(flags, PFSYNC_SI_IOCTL))
821		SET(st->state_flags, PFSTATE_NOSYNC);
822
823	if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
824		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
825		r->states_cur--;
826		goto cleanup_state;
827	}
828
829	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
830		CLR(st->state_flags, PFSTATE_NOSYNC);
831		if (ISSET(st->state_flags, PFSTATE_ACK)) {
832			pfsync_q_ins(st, PFSYNC_S_IACK);
833			schednetisr(NETISR_PFSYNC);
834		}
835	}
836	CLR(st->state_flags, PFSTATE_ACK);
837
838	return (0);
839
840cleanup:
841	error = ENOMEM;
842	if (skw == sks)
843		sks = NULL;
844#ifdef __FreeBSD__
845	if (skw != NULL)
846		pool_put(&V_pf_state_key_pl, skw);
847	if (sks != NULL)
848		pool_put(&V_pf_state_key_pl, sks);
849#else
850	if (skw != NULL)
851		pool_put(&pf_state_key_pl, skw);
852	if (sks != NULL)
853		pool_put(&pf_state_key_pl, sks);
854#endif
855
856cleanup_state:	/* pf_state_insert frees the state keys */
857	if (st) {
858#ifdef __FreeBSD__
859		if (st->dst.scrub)
860			pool_put(&V_pf_state_scrub_pl, st->dst.scrub);
861		if (st->src.scrub)
862			pool_put(&V_pf_state_scrub_pl, st->src.scrub);
863		pool_put(&V_pf_state_pl, st);
864#else
865		if (st->dst.scrub)
866			pool_put(&pf_state_scrub_pl, st->dst.scrub);
867		if (st->src.scrub)
868			pool_put(&pf_state_scrub_pl, st->src.scrub);
869		pool_put(&pf_state_pl, st);
870#endif
871	}
872	return (error);
873}
874
875void
876#ifdef __FreeBSD__
877pfsync_input(struct mbuf *m, __unused int off)
878#else
879pfsync_input(struct mbuf *m, ...)
880#endif
881{
882#ifdef __FreeBSD__
883	struct pfsync_softc *sc = V_pfsyncif;
884#else
885	struct pfsync_softc *sc = pfsyncif;
886#endif
887	struct pfsync_pkt pkt;
888	struct ip *ip = mtod(m, struct ip *);
889	struct pfsync_header *ph;
890	struct pfsync_subheader subh;
891
892	int offset;
893	int rv;
894
895	V_pfsyncstats.pfsyncs_ipackets++;
896
897	/* verify that we have a sync interface configured */
898#ifdef __FreeBSD__
899	if (!sc || !sc->sc_sync_if || !V_pf_status.running)
900#else
901	if (!sc || !sc->sc_sync_if || !pf_status.running)
902#endif
903		goto done;
904
905	/* verify that the packet came in on the right interface */
906	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
907		V_pfsyncstats.pfsyncs_badif++;
908		goto done;
909	}
910
911#ifdef __FreeBSD__
912	sc->sc_ifp->if_ipackets++;
913	sc->sc_ifp->if_ibytes += m->m_pkthdr.len;
914#else
915	sc->sc_if.if_ipackets++;
916	sc->sc_if.if_ibytes += m->m_pkthdr.len;
917#endif
918	/* verify that the IP TTL is 255. */
919	if (ip->ip_ttl != PFSYNC_DFLTTL) {
920		V_pfsyncstats.pfsyncs_badttl++;
921		goto done;
922	}
923
924	offset = ip->ip_hl << 2;
925	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
926		V_pfsyncstats.pfsyncs_hdrops++;
927		goto done;
928	}
929
930	if (offset + sizeof(*ph) > m->m_len) {
931		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
932			V_pfsyncstats.pfsyncs_hdrops++;
933			return;
934		}
935		ip = mtod(m, struct ip *);
936	}
937	ph = (struct pfsync_header *)((char *)ip + offset);
938
939	/* verify the version */
940	if (ph->version != PFSYNC_VERSION) {
941		V_pfsyncstats.pfsyncs_badver++;
942		goto done;
943	}
944
945#if 0
946	if (pfsync_input_hmac(m, offset) != 0) {
947		/* XXX stats */
948		goto done;
949	}
950#endif
951
952	/* Cheaper to grab this now than having to mess with mbufs later */
953	pkt.ip = ip;
954	pkt.src = ip->ip_src;
955	pkt.flags = 0;
956
957#ifdef __FreeBSD__
958	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
959#else
960	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
961#endif
962		pkt.flags |= PFSYNC_SI_CKSUM;
963
964	offset += sizeof(*ph);
965	for (;;) {
966		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
967		offset += sizeof(subh);
968
969		if (subh.action >= PFSYNC_ACT_MAX) {
970			V_pfsyncstats.pfsyncs_badact++;
971			goto done;
972		}
973
974		rv = (*pfsync_acts[subh.action])(&pkt, m, offset,
975		    ntohs(subh.count));
976		if (rv == -1)
977			return;
978
979		offset += rv;
980	}
981
982done:
983	m_freem(m);
984}
985
986int
987pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
988{
989	struct pfsync_clr *clr;
990	struct mbuf *mp;
991	int len = sizeof(*clr) * count;
992	int i, offp;
993
994	struct pf_state *st, *nexts;
995	struct pf_state_key *sk, *nextsk;
996	struct pf_state_item *si;
997	u_int32_t creatorid;
998	int s;
999
1000	mp = m_pulldown(m, offset, len, &offp);
1001	if (mp == NULL) {
1002		V_pfsyncstats.pfsyncs_badlen++;
1003		return (-1);
1004	}
1005	clr = (struct pfsync_clr *)(mp->m_data + offp);
1006
1007	s = splsoftnet();
1008#ifdef __FreeBSD__
1009	PF_LOCK();
1010#endif
1011	for (i = 0; i < count; i++) {
1012		creatorid = clr[i].creatorid;
1013
1014		if (clr[i].ifname[0] == '\0') {
1015#ifdef __FreeBSD__
1016			for (st = RB_MIN(pf_state_tree_id, &V_tree_id);
1017			    st; st = nexts) {
1018				nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, st);
1019#else
1020			for (st = RB_MIN(pf_state_tree_id, &tree_id);
1021			    st; st = nexts) {
1022				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
1023#endif
1024				if (st->creatorid == creatorid) {
1025					SET(st->state_flags, PFSTATE_NOSYNC);
1026					pf_unlink_state(st);
1027				}
1028			}
1029		} else {
1030			if (pfi_kif_get(clr[i].ifname) == NULL)
1031				continue;
1032
1033			/* XXX correct? */
1034#ifdef __FreeBSD__
1035			for (sk = RB_MIN(pf_state_tree, &V_pf_statetbl);
1036#else
1037			for (sk = RB_MIN(pf_state_tree, &pf_statetbl);
1038#endif
1039			    sk; sk = nextsk) {
1040				nextsk = RB_NEXT(pf_state_tree,
1041#ifdef __FreeBSD__
1042				    &V_pf_statetbl, sk);
1043#else
1044				    &pf_statetbl, sk);
1045#endif
1046				TAILQ_FOREACH(si, &sk->states, entry) {
1047					if (si->s->creatorid == creatorid) {
1048						SET(si->s->state_flags,
1049						    PFSTATE_NOSYNC);
1050						pf_unlink_state(si->s);
1051					}
1052				}
1053			}
1054		}
1055	}
1056#ifdef __FreeBSD__
1057	PF_UNLOCK();
1058#endif
1059	splx(s);
1060
1061	return (len);
1062}
1063
1064int
1065pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1066{
1067	struct mbuf *mp;
1068	struct pfsync_state *sa, *sp;
1069	int len = sizeof(*sp) * count;
1070	int i, offp;
1071
1072	int s;
1073
1074	mp = m_pulldown(m, offset, len, &offp);
1075	if (mp == NULL) {
1076		V_pfsyncstats.pfsyncs_badlen++;
1077		return (-1);
1078	}
1079	sa = (struct pfsync_state *)(mp->m_data + offp);
1080
1081	s = splsoftnet();
1082#ifdef __FreeBSD__
1083	PF_LOCK();
1084#endif
1085	for (i = 0; i < count; i++) {
1086		sp = &sa[i];
1087
1088		/* check for invalid values */
1089		if (sp->timeout >= PFTM_MAX ||
1090		    sp->src.state > PF_TCPS_PROXY_DST ||
1091		    sp->dst.state > PF_TCPS_PROXY_DST ||
1092		    sp->direction > PF_OUT ||
1093		    (sp->af != AF_INET && sp->af != AF_INET6)) {
1094#ifdef __FreeBSD__
1095			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1096#else
1097			if (pf_status.debug >= PF_DEBUG_MISC) {
1098#endif
1099				printf("pfsync_input: PFSYNC5_ACT_INS: "
1100				    "invalid value\n");
1101			}
1102			V_pfsyncstats.pfsyncs_badval++;
1103			continue;
1104		}
1105
1106		if (pfsync_state_import(sp, pkt->flags) == ENOMEM) {
1107			/* drop out, but process the rest of the actions */
1108			break;
1109		}
1110	}
1111#ifdef __FreeBSD__
1112	PF_UNLOCK();
1113#endif
1114	splx(s);
1115
1116	return (len);
1117}
1118
1119int
1120pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1121{
1122	struct pfsync_ins_ack *ia, *iaa;
1123	struct pf_state_cmp id_key;
1124	struct pf_state *st;
1125
1126	struct mbuf *mp;
1127	int len = count * sizeof(*ia);
1128	int offp, i;
1129	int s;
1130
1131	mp = m_pulldown(m, offset, len, &offp);
1132	if (mp == NULL) {
1133		V_pfsyncstats.pfsyncs_badlen++;
1134		return (-1);
1135	}
1136	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
1137
1138	s = splsoftnet();
1139#ifdef __FreeBSD__
1140	PF_LOCK();
1141#endif
1142	for (i = 0; i < count; i++) {
1143		ia = &iaa[i];
1144
1145		bcopy(&ia->id, &id_key.id, sizeof(id_key.id));
1146		id_key.creatorid = ia->creatorid;
1147
1148		st = pf_find_state_byid(&id_key);
1149		if (st == NULL)
1150			continue;
1151
1152		if (ISSET(st->state_flags, PFSTATE_ACK))
1153			pfsync_deferred(st, 0);
1154	}
1155#ifdef __FreeBSD__
1156	PF_UNLOCK();
1157#endif
1158	splx(s);
1159	/*
1160	 * XXX this is not yet implemented, but we know the size of the
1161	 * message so we can skip it.
1162	 */
1163
1164	return (count * sizeof(struct pfsync_ins_ack));
1165}
1166
1167int
1168pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
1169    struct pfsync_state_peer *dst)
1170{
1171	int sfail = 0;
1172
1173	/*
1174	 * The state should never go backwards except
1175	 * for syn-proxy states.  Neither should the
1176	 * sequence window slide backwards.
1177	 */
1178	if (st->src.state > src->state &&
1179	    (st->src.state < PF_TCPS_PROXY_SRC ||
1180	    src->state >= PF_TCPS_PROXY_SRC))
1181		sfail = 1;
1182	else if (SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))
1183		sfail = 3;
1184	else if (st->dst.state > dst->state) {
1185		/* There might still be useful
1186		 * information about the src state here,
1187		 * so import that part of the update,
1188		 * then "fail" so we send the updated
1189		 * state back to the peer who is missing
1190		 * our what we know. */
1191		pf_state_peer_ntoh(src, &st->src);
1192		/* XXX do anything with timeouts? */
1193		sfail = 7;
1194	} else if (st->dst.state >= TCPS_SYN_SENT &&
1195	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))
1196		sfail = 4;
1197
1198	return (sfail);
1199}
1200
1201int
1202pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1203{
1204	struct pfsync_state *sa, *sp;
1205	struct pf_state_cmp id_key;
1206	struct pf_state_key *sk;
1207	struct pf_state *st;
1208	int sfail;
1209
1210	struct mbuf *mp;
1211	int len = count * sizeof(*sp);
1212	int offp, i;
1213	int s;
1214
1215	mp = m_pulldown(m, offset, len, &offp);
1216	if (mp == NULL) {
1217		V_pfsyncstats.pfsyncs_badlen++;
1218		return (-1);
1219	}
1220	sa = (struct pfsync_state *)(mp->m_data + offp);
1221
1222	s = splsoftnet();
1223#ifdef __FreeBSD__
1224	PF_LOCK();
1225#endif
1226	for (i = 0; i < count; i++) {
1227		sp = &sa[i];
1228
1229		/* check for invalid values */
1230		if (sp->timeout >= PFTM_MAX ||
1231		    sp->src.state > PF_TCPS_PROXY_DST ||
1232		    sp->dst.state > PF_TCPS_PROXY_DST) {
1233#ifdef __FreeBSD__
1234			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1235#else
1236			if (pf_status.debug >= PF_DEBUG_MISC) {
1237#endif
1238				printf("pfsync_input: PFSYNC_ACT_UPD: "
1239				    "invalid value\n");
1240			}
1241			V_pfsyncstats.pfsyncs_badval++;
1242			continue;
1243		}
1244
1245		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1246		id_key.creatorid = sp->creatorid;
1247
1248		st = pf_find_state_byid(&id_key);
1249		if (st == NULL) {
1250			/* insert the update */
1251			if (pfsync_state_import(sp, 0))
1252				V_pfsyncstats.pfsyncs_badstate++;
1253			continue;
1254		}
1255
1256		if (ISSET(st->state_flags, PFSTATE_ACK))
1257			pfsync_deferred(st, 1);
1258
1259		sk = st->key[PF_SK_WIRE];	/* XXX right one? */
1260		sfail = 0;
1261		if (sk->proto == IPPROTO_TCP)
1262			sfail = pfsync_upd_tcp(st, &sp->src, &sp->dst);
1263		else {
1264			/*
1265			 * Non-TCP protocol state machine always go
1266			 * forwards
1267			 */
1268			if (st->src.state > sp->src.state)
1269				sfail = 5;
1270			else if (st->dst.state > sp->dst.state)
1271				sfail = 6;
1272		}
1273
1274		if (sfail) {
1275#ifdef __FreeBSD__
1276			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1277#else
1278			if (pf_status.debug >= PF_DEBUG_MISC) {
1279#endif
1280				printf("pfsync: %s stale update (%d)"
1281				    " id: %016llx creatorid: %08x\n",
1282				    (sfail < 7 ?  "ignoring" : "partial"),
1283				    sfail, betoh64(st->id),
1284				    ntohl(st->creatorid));
1285			}
1286			V_pfsyncstats.pfsyncs_stale++;
1287
1288			pfsync_update_state(st);
1289			schednetisr(NETISR_PFSYNC);
1290			continue;
1291		}
1292		pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
1293		pf_state_peer_ntoh(&sp->src, &st->src);
1294		pf_state_peer_ntoh(&sp->dst, &st->dst);
1295		st->expire = ntohl(sp->expire) + time_second;
1296		st->timeout = sp->timeout;
1297		st->pfsync_time = time_uptime;
1298	}
1299#ifdef __FreeBSD__
1300	PF_UNLOCK();
1301#endif
1302	splx(s);
1303
1304	return (len);
1305}
1306
1307int
1308pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1309{
1310	struct pfsync_upd_c *ua, *up;
1311	struct pf_state_key *sk;
1312	struct pf_state_cmp id_key;
1313	struct pf_state *st;
1314
1315	int len = count * sizeof(*up);
1316	int sfail;
1317
1318	struct mbuf *mp;
1319	int offp, i;
1320	int s;
1321
1322	mp = m_pulldown(m, offset, len, &offp);
1323	if (mp == NULL) {
1324		V_pfsyncstats.pfsyncs_badlen++;
1325		return (-1);
1326	}
1327	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
1328
1329	s = splsoftnet();
1330#ifdef __FreeBSD__
1331	PF_LOCK();
1332#endif
1333	for (i = 0; i < count; i++) {
1334		up = &ua[i];
1335
1336		/* check for invalid values */
1337		if (up->timeout >= PFTM_MAX ||
1338		    up->src.state > PF_TCPS_PROXY_DST ||
1339		    up->dst.state > PF_TCPS_PROXY_DST) {
1340#ifdef __FreeBSD__
1341			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1342#else
1343			if (pf_status.debug >= PF_DEBUG_MISC) {
1344#endif
1345				printf("pfsync_input: "
1346				    "PFSYNC_ACT_UPD_C: "
1347				    "invalid value\n");
1348			}
1349			V_pfsyncstats.pfsyncs_badval++;
1350			continue;
1351		}
1352
1353		bcopy(&up->id, &id_key.id, sizeof(id_key.id));
1354		id_key.creatorid = up->creatorid;
1355
1356		st = pf_find_state_byid(&id_key);
1357		if (st == NULL) {
1358			/* We don't have this state. Ask for it. */
1359			pfsync_request_update(id_key.creatorid, id_key.id);
1360			continue;
1361		}
1362
1363		if (ISSET(st->state_flags, PFSTATE_ACK))
1364			pfsync_deferred(st, 1);
1365
1366		sk = st->key[PF_SK_WIRE]; /* XXX right one? */
1367		sfail = 0;
1368		if (sk->proto == IPPROTO_TCP)
1369			sfail = pfsync_upd_tcp(st, &up->src, &up->dst);
1370		else {
1371			/*
1372			 * Non-TCP protocol state machine always go forwards
1373			 */
1374			if (st->src.state > up->src.state)
1375				sfail = 5;
1376			else if (st->dst.state > up->dst.state)
1377				sfail = 6;
1378		}
1379
1380		if (sfail) {
1381#ifdef __FreeBSD__
1382			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1383#else
1384			if (pf_status.debug >= PF_DEBUG_MISC) {
1385#endif
1386				printf("pfsync: ignoring stale update "
1387				    "(%d) id: %016llx "
1388				    "creatorid: %08x\n", sfail,
1389				    betoh64(st->id),
1390				    ntohl(st->creatorid));
1391			}
1392			V_pfsyncstats.pfsyncs_stale++;
1393
1394			pfsync_update_state(st);
1395			schednetisr(NETISR_PFSYNC);
1396			continue;
1397		}
1398		pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1399		pf_state_peer_ntoh(&up->src, &st->src);
1400		pf_state_peer_ntoh(&up->dst, &st->dst);
1401		st->expire = ntohl(up->expire) + time_second;
1402		st->timeout = up->timeout;
1403		st->pfsync_time = time_uptime;
1404	}
1405#ifdef __FreeBSD__
1406	PF_UNLOCK();
1407#endif
1408	splx(s);
1409
1410	return (len);
1411}
1412
1413int
1414pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1415{
1416	struct pfsync_upd_req *ur, *ura;
1417	struct mbuf *mp;
1418	int len = count * sizeof(*ur);
1419	int i, offp;
1420
1421	struct pf_state_cmp id_key;
1422	struct pf_state *st;
1423
1424	mp = m_pulldown(m, offset, len, &offp);
1425	if (mp == NULL) {
1426		V_pfsyncstats.pfsyncs_badlen++;
1427		return (-1);
1428	}
1429	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1430
1431#ifdef __FreeBSD__
1432	PF_LOCK();
1433#endif
1434	for (i = 0; i < count; i++) {
1435		ur = &ura[i];
1436
1437		bcopy(&ur->id, &id_key.id, sizeof(id_key.id));
1438		id_key.creatorid = ur->creatorid;
1439
1440		if (id_key.id == 0 && id_key.creatorid == 0)
1441			pfsync_bulk_start();
1442		else {
1443			st = pf_find_state_byid(&id_key);
1444			if (st == NULL) {
1445				V_pfsyncstats.pfsyncs_badstate++;
1446				continue;
1447			}
1448			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1449				continue;
1450
1451			pfsync_update_state_req(st);
1452		}
1453	}
1454#ifdef __FreeBSD__
1455	PF_UNLOCK();
1456#endif
1457
1458	return (len);
1459}
1460
1461int
1462pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1463{
1464	struct mbuf *mp;
1465	struct pfsync_state *sa, *sp;
1466	struct pf_state_cmp id_key;
1467	struct pf_state *st;
1468	int len = count * sizeof(*sp);
1469	int offp, i;
1470	int s;
1471
1472	mp = m_pulldown(m, offset, len, &offp);
1473	if (mp == NULL) {
1474		V_pfsyncstats.pfsyncs_badlen++;
1475		return (-1);
1476	}
1477	sa = (struct pfsync_state *)(mp->m_data + offp);
1478
1479	s = splsoftnet();
1480#ifdef __FreeBSD__
1481	PF_LOCK();
1482#endif
1483	for (i = 0; i < count; i++) {
1484		sp = &sa[i];
1485
1486		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1487		id_key.creatorid = sp->creatorid;
1488
1489		st = pf_find_state_byid(&id_key);
1490		if (st == NULL) {
1491			V_pfsyncstats.pfsyncs_badstate++;
1492			continue;
1493		}
1494		SET(st->state_flags, PFSTATE_NOSYNC);
1495		pf_unlink_state(st);
1496	}
1497#ifdef __FreeBSD__
1498	PF_UNLOCK();
1499#endif
1500	splx(s);
1501
1502	return (len);
1503}
1504
1505int
1506pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1507{
1508	struct mbuf *mp;
1509	struct pfsync_del_c *sa, *sp;
1510	struct pf_state_cmp id_key;
1511	struct pf_state *st;
1512	int len = count * sizeof(*sp);
1513	int offp, i;
1514	int s;
1515
1516	mp = m_pulldown(m, offset, len, &offp);
1517	if (mp == NULL) {
1518		V_pfsyncstats.pfsyncs_badlen++;
1519		return (-1);
1520	}
1521	sa = (struct pfsync_del_c *)(mp->m_data + offp);
1522
1523	s = splsoftnet();
1524#ifdef __FreeBSD__
1525	PF_LOCK();
1526#endif
1527	for (i = 0; i < count; i++) {
1528		sp = &sa[i];
1529
1530		bcopy(&sp->id, &id_key.id, sizeof(id_key.id));
1531		id_key.creatorid = sp->creatorid;
1532
1533		st = pf_find_state_byid(&id_key);
1534		if (st == NULL) {
1535			V_pfsyncstats.pfsyncs_badstate++;
1536			continue;
1537		}
1538
1539		SET(st->state_flags, PFSTATE_NOSYNC);
1540		pf_unlink_state(st);
1541	}
1542#ifdef __FreeBSD__
1543	PF_UNLOCK();
1544#endif
1545	splx(s);
1546
1547	return (len);
1548}
1549
1550int
1551pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1552{
1553#ifdef __FreeBSD__
1554	struct pfsync_softc *sc = V_pfsyncif;
1555#else
1556	struct pfsync_softc *sc = pfsyncif;
1557#endif
1558	struct pfsync_bus *bus;
1559	struct mbuf *mp;
1560	int len = count * sizeof(*bus);
1561	int offp;
1562
1563	/* If we're not waiting for a bulk update, who cares. */
1564	if (sc->sc_ureq_sent == 0)
1565		return (len);
1566
1567	mp = m_pulldown(m, offset, len, &offp);
1568	if (mp == NULL) {
1569		V_pfsyncstats.pfsyncs_badlen++;
1570		return (-1);
1571	}
1572	bus = (struct pfsync_bus *)(mp->m_data + offp);
1573
1574	switch (bus->status) {
1575	case PFSYNC_BUS_START:
1576#ifdef __FreeBSD__
1577		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
1578		    V_pf_pool_limits[PF_LIMIT_STATES].limit /
1579		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
1580		    sizeof(struct pfsync_state)),
1581		    pfsync_bulk_fail, V_pfsyncif);
1582#else
1583		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1584		    pf_pool_limits[PF_LIMIT_STATES].limit /
1585		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1586		    sizeof(struct pfsync_state)));
1587#endif
1588#ifdef __FreeBSD__
1589		if (V_pf_status.debug >= PF_DEBUG_MISC)
1590#else
1591		if (pf_status.debug >= PF_DEBUG_MISC)
1592#endif
1593			printf("pfsync: received bulk update start\n");
1594		break;
1595
1596	case PFSYNC_BUS_END:
1597		if (time_uptime - ntohl(bus->endtime) >=
1598		    sc->sc_ureq_sent) {
1599			/* that's it, we're happy */
1600			sc->sc_ureq_sent = 0;
1601			sc->sc_bulk_tries = 0;
1602			timeout_del(&sc->sc_bulkfail_tmo);
1603#if NCARP > 0
1604#ifdef notyet
1605#ifdef __FreeBSD__
1606			if (!sc->pfsync_sync_ok)
1607#else
1608			if (!pfsync_sync_ok)
1609#endif
1610				carp_group_demote_adj(&sc->sc_if, -1);
1611#endif
1612#endif
1613#ifdef __FreeBSD__
1614			sc->pfsync_sync_ok = 1;
1615#else
1616			pfsync_sync_ok = 1;
1617#endif
1618#ifdef __FreeBSD__
1619			if (V_pf_status.debug >= PF_DEBUG_MISC)
1620#else
1621			if (pf_status.debug >= PF_DEBUG_MISC)
1622#endif
1623				printf("pfsync: received valid "
1624				    "bulk update end\n");
1625		} else {
1626#ifdef __FreeBSD__
1627			if (V_pf_status.debug >= PF_DEBUG_MISC)
1628#else
1629			if (pf_status.debug >= PF_DEBUG_MISC)
1630#endif
1631				printf("pfsync: received invalid "
1632				    "bulk update end: bad timestamp\n");
1633		}
1634		break;
1635	}
1636
1637	return (len);
1638}
1639
1640int
1641pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1642{
1643	int len = count * sizeof(struct pfsync_tdb);
1644
1645#if defined(IPSEC)
1646	struct pfsync_tdb *tp;
1647	struct mbuf *mp;
1648	int offp;
1649	int i;
1650	int s;
1651
1652	mp = m_pulldown(m, offset, len, &offp);
1653	if (mp == NULL) {
1654		V_pfsyncstats.pfsyncs_badlen++;
1655		return (-1);
1656	}
1657	tp = (struct pfsync_tdb *)(mp->m_data + offp);
1658
1659	s = splsoftnet();
1660#ifdef __FreeBSD__
1661	PF_LOCK();
1662#endif
1663	for (i = 0; i < count; i++)
1664		pfsync_update_net_tdb(&tp[i]);
1665#ifdef __FreeBSD__
1666	PF_UNLOCK();
1667#endif
1668	splx(s);
1669#endif
1670
1671	return (len);
1672}
1673
1674#if defined(IPSEC)
1675/* Update an in-kernel tdb. Silently fail if no tdb is found. */
1676void
1677pfsync_update_net_tdb(struct pfsync_tdb *pt)
1678{
1679	struct tdb		*tdb;
1680	int			 s;
1681
1682	/* check for invalid values */
1683	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1684	    (pt->dst.sa.sa_family != AF_INET &&
1685	     pt->dst.sa.sa_family != AF_INET6))
1686		goto bad;
1687
1688	s = spltdb();
1689	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1690	if (tdb) {
1691		pt->rpl = ntohl(pt->rpl);
1692		pt->cur_bytes = betoh64(pt->cur_bytes);
1693
1694		/* Neither replay nor byte counter should ever decrease. */
1695		if (pt->rpl < tdb->tdb_rpl ||
1696		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1697			splx(s);
1698			goto bad;
1699		}
1700
1701		tdb->tdb_rpl = pt->rpl;
1702		tdb->tdb_cur_bytes = pt->cur_bytes;
1703	}
1704	splx(s);
1705	return;
1706
1707bad:
1708#ifdef __FreeBSD__
1709	if (V_pf_status.debug >= PF_DEBUG_MISC)
1710#else
1711	if (pf_status.debug >= PF_DEBUG_MISC)
1712#endif
1713		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1714		    "invalid value\n");
1715	V_pfsyncstats.pfsyncs_badstate++;
1716	return;
1717}
1718#endif
1719
1720
1721int
1722pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1723{
1724	/* check if we are at the right place in the packet */
1725	if (offset != m->m_pkthdr.len - sizeof(struct pfsync_eof))
1726		V_pfsyncstats.pfsyncs_badact++;
1727
1728	/* we're done. free and let the caller return */
1729	m_freem(m);
1730	return (-1);
1731}
1732
1733int
1734pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1735{
1736	V_pfsyncstats.pfsyncs_badact++;
1737
1738	m_freem(m);
1739	return (-1);
1740}
1741
1742int
1743pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1744#ifdef __FreeBSD__
1745	struct route *rt)
1746#else
1747	struct rtentry *rt)
1748#endif
1749{
1750	m_freem(m);
1751	return (0);
1752}
1753
1754/* ARGSUSED */
1755int
1756pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1757{
1758#ifndef __FreeBSD__
1759	struct proc *p = curproc;
1760#endif
1761	struct pfsync_softc *sc = ifp->if_softc;
1762	struct ifreq *ifr = (struct ifreq *)data;
1763	struct ip_moptions *imo = &sc->sc_imo;
1764	struct pfsyncreq pfsyncr;
1765	struct ifnet    *sifp;
1766	struct ip *ip;
1767	int s, error;
1768
1769	switch (cmd) {
1770#if 0
1771	case SIOCSIFADDR:
1772	case SIOCAIFADDR:
1773	case SIOCSIFDSTADDR:
1774#endif
1775	case SIOCSIFFLAGS:
1776#ifdef __FreeBSD__
1777		if (ifp->if_flags & IFF_UP)
1778			ifp->if_drv_flags |= IFF_DRV_RUNNING;
1779		else
1780			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1781#else
1782		if (ifp->if_flags & IFF_UP)
1783			ifp->if_flags |= IFF_RUNNING;
1784		else
1785			ifp->if_flags &= ~IFF_RUNNING;
1786#endif
1787		break;
1788	case SIOCSIFMTU:
1789		if (!sc->sc_sync_if ||
1790		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1791		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1792			return (EINVAL);
1793		if (ifr->ifr_mtu < ifp->if_mtu) {
1794			s = splnet();
1795#ifdef __FreeBSD__
1796			PF_LOCK();
1797#endif
1798			pfsync_sendout();
1799#ifdef __FreeBSD__
1800			PF_UNLOCK();
1801#endif
1802			splx(s);
1803		}
1804		ifp->if_mtu = ifr->ifr_mtu;
1805		break;
1806	case SIOCGETPFSYNC:
1807		bzero(&pfsyncr, sizeof(pfsyncr));
1808		if (sc->sc_sync_if) {
1809			strlcpy(pfsyncr.pfsyncr_syncdev,
1810			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1811		}
1812		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1813		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1814		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1815
1816	case SIOCSETPFSYNC:
1817#ifdef __FreeBSD__
1818		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1819#else
1820		if ((error = suser(p, p->p_acflag)) != 0)
1821#endif
1822			return (error);
1823		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1824			return (error);
1825
1826#ifdef __FreeBSD__
1827		PF_LOCK();
1828#endif
1829		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1830#ifdef __FreeBSD__
1831			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
1832#else
1833			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1834#endif
1835		else
1836			sc->sc_sync_peer.s_addr =
1837			    pfsyncr.pfsyncr_syncpeer.s_addr;
1838
1839		if (pfsyncr.pfsyncr_maxupdates > 255)
1840#ifdef __FreeBSD__
1841		{
1842			PF_UNLOCK();
1843#endif
1844			return (EINVAL);
1845#ifdef __FreeBSD__
1846		}
1847#endif
1848		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1849
1850		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1851			sc->sc_sync_if = NULL;
1852#ifdef __FreeBSD__
1853			PF_UNLOCK();
1854			if (imo->imo_membership)
1855				pfsync_multicast_cleanup(sc);
1856#else
1857			if (imo->imo_num_memberships > 0) {
1858				in_delmulti(imo->imo_membership[
1859				    --imo->imo_num_memberships]);
1860				imo->imo_multicast_ifp = NULL;
1861			}
1862#endif
1863			break;
1864		}
1865
1866#ifdef __FreeBSD__
1867		PF_UNLOCK();
1868#endif
1869		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
1870			return (EINVAL);
1871
1872#ifdef __FreeBSD__
1873		PF_LOCK();
1874#endif
1875		s = splnet();
1876#ifdef __FreeBSD__
1877		if (sifp->if_mtu < sc->sc_ifp->if_mtu ||
1878#else
1879		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1880#endif
1881		    (sc->sc_sync_if != NULL &&
1882		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1883		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1884			pfsync_sendout();
1885		sc->sc_sync_if = sifp;
1886
1887#ifdef __FreeBSD__
1888		if (imo->imo_membership) {
1889			PF_UNLOCK();
1890			pfsync_multicast_cleanup(sc);
1891			PF_LOCK();
1892		}
1893#else
1894		if (imo->imo_num_memberships > 0) {
1895			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1896			imo->imo_multicast_ifp = NULL;
1897		}
1898#endif
1899
1900#ifdef __FreeBSD__
1901		if (sc->sc_sync_if &&
1902		    sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1903			PF_UNLOCK();
1904			error = pfsync_multicast_setup(sc);
1905			if (error)
1906				return (error);
1907			PF_LOCK();
1908		}
1909#else
1910		if (sc->sc_sync_if &&
1911		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1912			struct in_addr addr;
1913
1914			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1915				sc->sc_sync_if = NULL;
1916				splx(s);
1917				return (EADDRNOTAVAIL);
1918			}
1919
1920			addr.s_addr = INADDR_PFSYNC_GROUP;
1921
1922			if ((imo->imo_membership[0] =
1923			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1924				sc->sc_sync_if = NULL;
1925				splx(s);
1926				return (ENOBUFS);
1927			}
1928			imo->imo_num_memberships++;
1929			imo->imo_multicast_ifp = sc->sc_sync_if;
1930			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1931			imo->imo_multicast_loop = 0;
1932		}
1933#endif	/* !__FreeBSD__ */
1934
1935		ip = &sc->sc_template;
1936		bzero(ip, sizeof(*ip));
1937		ip->ip_v = IPVERSION;
1938		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1939		ip->ip_tos = IPTOS_LOWDELAY;
1940		/* len and id are set later */
1941#ifdef __FreeBSD__
1942		ip->ip_off = IP_DF;
1943#else
1944		ip->ip_off = htons(IP_DF);
1945#endif
1946		ip->ip_ttl = PFSYNC_DFLTTL;
1947		ip->ip_p = IPPROTO_PFSYNC;
1948		ip->ip_src.s_addr = INADDR_ANY;
1949		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1950
1951		if (sc->sc_sync_if) {
1952			/* Request a full state table update. */
1953			sc->sc_ureq_sent = time_uptime;
1954#if NCARP > 0
1955#ifdef notyet
1956#ifdef __FreeBSD__
1957			if (sc->pfsync_sync_ok)
1958#else
1959			if (pfsync_sync_ok)
1960#endif
1961				carp_group_demote_adj(&sc->sc_if, 1);
1962#endif
1963#endif
1964#ifdef __FreeBSD__
1965			sc->pfsync_sync_ok = 0;
1966#else
1967			pfsync_sync_ok = 0;
1968#endif
1969#ifdef __FreeBSD__
1970			if (V_pf_status.debug >= PF_DEBUG_MISC)
1971#else
1972			if (pf_status.debug >= PF_DEBUG_MISC)
1973#endif
1974				printf("pfsync: requesting bulk update\n");
1975#ifdef __FreeBSD__
1976			callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1977			    pfsync_bulk_fail, V_pfsyncif);
1978#else
1979			timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
1980#endif
1981			pfsync_request_update(0, 0);
1982		}
1983#ifdef __FreeBSD__
1984		PF_UNLOCK();
1985#endif
1986		splx(s);
1987
1988		break;
1989
1990	default:
1991		return (ENOTTY);
1992	}
1993
1994	return (0);
1995}
1996
1997int
1998pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset)
1999{
2000	struct pfsync_state *sp = (struct pfsync_state *)(m->m_data + offset);
2001
2002	pfsync_state_export(sp, st);
2003
2004	return (sizeof(*sp));
2005}
2006
2007int
2008pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset)
2009{
2010	struct pfsync_ins_ack *iack =
2011	    (struct pfsync_ins_ack *)(m->m_data + offset);
2012
2013	iack->id = st->id;
2014	iack->creatorid = st->creatorid;
2015
2016	return (sizeof(*iack));
2017}
2018
2019int
2020pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset)
2021{
2022	struct pfsync_upd_c *up = (struct pfsync_upd_c *)(m->m_data + offset);
2023
2024	bzero(up, sizeof(*up));
2025	up->id = st->id;
2026	pf_state_peer_hton(&st->src, &up->src);
2027	pf_state_peer_hton(&st->dst, &up->dst);
2028	up->creatorid = st->creatorid;
2029
2030	up->expire = pf_state_expires(st);
2031	if (up->expire <= time_second)
2032		up->expire = htonl(0);
2033	else
2034		up->expire = htonl(up->expire - time_second);
2035	up->timeout = st->timeout;
2036
2037	return (sizeof(*up));
2038}
2039
2040int
2041pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset)
2042{
2043	struct pfsync_del_c *dp = (struct pfsync_del_c *)(m->m_data + offset);
2044
2045	dp->id = st->id;
2046	dp->creatorid = st->creatorid;
2047
2048	SET(st->state_flags, PFSTATE_NOSYNC);
2049
2050	return (sizeof(*dp));
2051}
2052
2053void
2054pfsync_drop(struct pfsync_softc *sc)
2055{
2056	struct pf_state *st;
2057	struct pfsync_upd_req_item *ur;
2058#ifdef notyet
2059	struct tdb *t;
2060#endif
2061	int q;
2062
2063	for (q = 0; q < PFSYNC_S_COUNT; q++) {
2064		if (TAILQ_EMPTY(&sc->sc_qs[q]))
2065			continue;
2066
2067		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
2068#ifdef PFSYNC_DEBUG
2069#ifdef __FreeBSD__
2070			KASSERT(st->sync_state == q,
2071				("%s: st->sync_state == q",
2072					__FUNCTION__));
2073#else
2074			KASSERT(st->sync_state == q);
2075#endif
2076#endif
2077			st->sync_state = PFSYNC_S_NONE;
2078		}
2079		TAILQ_INIT(&sc->sc_qs[q]);
2080	}
2081
2082	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
2083		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
2084		pool_put(&sc->sc_pool, ur);
2085	}
2086
2087	sc->sc_plus = NULL;
2088
2089#ifdef notyet
2090	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
2091		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
2092			CLR(t->tdb_flags, TDBF_PFSYNC);
2093
2094		TAILQ_INIT(&sc->sc_tdb_q);
2095	}
2096#endif
2097
2098	sc->sc_len = PFSYNC_MINPKT;
2099}
2100
2101#ifdef __FreeBSD__
2102void pfsync_sendout()
2103{
2104	pfsync_sendout1(1);
2105}
2106
2107static void
2108pfsync_sendout1(int schedswi)
2109{
2110	struct pfsync_softc *sc = V_pfsyncif;
2111#else
2112void
2113pfsync_sendout(void)
2114{
2115	struct pfsync_softc *sc = pfsyncif;
2116#endif
2117#if NBPFILTER > 0
2118#ifdef __FreeBSD__
2119	struct ifnet *ifp = sc->sc_ifp;
2120#else
2121	struct ifnet *ifp = &sc->sc_if;
2122#endif
2123#endif
2124	struct mbuf *m;
2125	struct ip *ip;
2126	struct pfsync_header *ph;
2127	struct pfsync_subheader *subh;
2128	struct pf_state *st;
2129	struct pfsync_upd_req_item *ur;
2130#ifdef notyet
2131	struct tdb *t;
2132#endif
2133#ifdef __FreeBSD__
2134	size_t pktlen;
2135#endif
2136	int offset;
2137	int q, count = 0;
2138
2139#ifdef __FreeBSD__
2140	PF_LOCK_ASSERT();
2141#else
2142	splassert(IPL_NET);
2143#endif
2144
2145	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
2146		return;
2147
2148#if NBPFILTER > 0
2149	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
2150#else
2151	if (sc->sc_sync_if == NULL) {
2152#endif
2153		pfsync_drop(sc);
2154		return;
2155	}
2156
2157	MGETHDR(m, M_DONTWAIT, MT_DATA);
2158	if (m == NULL) {
2159#ifdef __FreeBSD__
2160		sc->sc_ifp->if_oerrors++;
2161#else
2162		sc->sc_if.if_oerrors++;
2163#endif
2164		V_pfsyncstats.pfsyncs_onomem++;
2165		pfsync_drop(sc);
2166		return;
2167	}
2168
2169#ifdef __FreeBSD__
2170	pktlen = max_linkhdr + sc->sc_len;
2171	if (pktlen > MHLEN) {
2172		/* Find the right pool to allocate from. */
2173		/* XXX: This is ugly. */
2174		m_cljget(m, M_DONTWAIT, pktlen <= MCLBYTES ? MCLBYTES :
2175#if MJUMPAGESIZE != MCLBYTES
2176			pktlen <= MJUMPAGESIZE ? MJUMPAGESIZE :
2177#endif
2178			pktlen <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES);
2179#else
2180	if (max_linkhdr + sc->sc_len > MHLEN) {
2181		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
2182#endif
2183		if (!ISSET(m->m_flags, M_EXT)) {
2184			m_free(m);
2185#ifdef __FreeBSD__
2186			sc->sc_ifp->if_oerrors++;
2187#else
2188			sc->sc_if.if_oerrors++;
2189#endif
2190			V_pfsyncstats.pfsyncs_onomem++;
2191			pfsync_drop(sc);
2192			return;
2193		}
2194	}
2195	m->m_data += max_linkhdr;
2196	m->m_len = m->m_pkthdr.len = sc->sc_len;
2197
2198	/* build the ip header */
2199	ip = (struct ip *)m->m_data;
2200	bcopy(&sc->sc_template, ip, sizeof(*ip));
2201	offset = sizeof(*ip);
2202
2203#ifdef __FreeBSD__
2204	ip->ip_len = m->m_pkthdr.len;
2205#else
2206	ip->ip_len = htons(m->m_pkthdr.len);
2207#endif
2208	ip->ip_id = htons(ip_randomid());
2209
2210	/* build the pfsync header */
2211	ph = (struct pfsync_header *)(m->m_data + offset);
2212	bzero(ph, sizeof(*ph));
2213	offset += sizeof(*ph);
2214
2215	ph->version = PFSYNC_VERSION;
2216	ph->len = htons(sc->sc_len - sizeof(*ip));
2217#ifdef __FreeBSD__
2218	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
2219#else
2220	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
2221#endif
2222
2223	/* walk the queues */
2224	for (q = 0; q < PFSYNC_S_COUNT; q++) {
2225		if (TAILQ_EMPTY(&sc->sc_qs[q]))
2226			continue;
2227
2228		subh = (struct pfsync_subheader *)(m->m_data + offset);
2229		offset += sizeof(*subh);
2230
2231		count = 0;
2232		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
2233#ifdef PFSYNC_DEBUG
2234#ifdef __FreeBSD__
2235			KASSERT(st->sync_state == q,
2236				("%s: st->sync_state == q",
2237					__FUNCTION__));
2238#else
2239			KASSERT(st->sync_state == q);
2240#endif
2241#endif
2242
2243			offset += pfsync_qs[q].write(st, m, offset);
2244			st->sync_state = PFSYNC_S_NONE;
2245			count++;
2246		}
2247		TAILQ_INIT(&sc->sc_qs[q]);
2248
2249		bzero(subh, sizeof(*subh));
2250		subh->action = pfsync_qs[q].action;
2251		subh->count = htons(count);
2252	}
2253
2254	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
2255		subh = (struct pfsync_subheader *)(m->m_data + offset);
2256		offset += sizeof(*subh);
2257
2258		count = 0;
2259		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
2260			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
2261
2262			bcopy(&ur->ur_msg, m->m_data + offset,
2263			    sizeof(ur->ur_msg));
2264			offset += sizeof(ur->ur_msg);
2265
2266			pool_put(&sc->sc_pool, ur);
2267
2268			count++;
2269		}
2270
2271		bzero(subh, sizeof(*subh));
2272		subh->action = PFSYNC_ACT_UPD_REQ;
2273		subh->count = htons(count);
2274	}
2275
2276	/* has someone built a custom region for us to add? */
2277	if (sc->sc_plus != NULL) {
2278		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
2279		offset += sc->sc_pluslen;
2280
2281		sc->sc_plus = NULL;
2282	}
2283
2284#ifdef notyet
2285	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
2286		subh = (struct pfsync_subheader *)(m->m_data + offset);
2287		offset += sizeof(*subh);
2288
2289		count = 0;
2290		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
2291			offset += pfsync_out_tdb(t, m, offset);
2292			CLR(t->tdb_flags, TDBF_PFSYNC);
2293
2294			count++;
2295		}
2296		TAILQ_INIT(&sc->sc_tdb_q);
2297
2298		bzero(subh, sizeof(*subh));
2299		subh->action = PFSYNC_ACT_TDB;
2300		subh->count = htons(count);
2301	}
2302#endif
2303
2304	subh = (struct pfsync_subheader *)(m->m_data + offset);
2305	offset += sizeof(*subh);
2306
2307	bzero(subh, sizeof(*subh));
2308	subh->action = PFSYNC_ACT_EOF;
2309	subh->count = htons(1);
2310
2311	/* XXX write checksum in EOF here */
2312
2313	/* we're done, let's put it on the wire */
2314#if NBPFILTER > 0
2315	if (ifp->if_bpf) {
2316		m->m_data += sizeof(*ip);
2317		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
2318#ifdef __FreeBSD__
2319		BPF_MTAP(ifp, m);
2320#else
2321		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
2322#endif
2323		m->m_data -= sizeof(*ip);
2324		m->m_len = m->m_pkthdr.len = sc->sc_len;
2325	}
2326
2327	if (sc->sc_sync_if == NULL) {
2328		sc->sc_len = PFSYNC_MINPKT;
2329		m_freem(m);
2330		return;
2331	}
2332#endif
2333
2334#ifdef __FreeBSD__
2335	sc->sc_ifp->if_opackets++;
2336	sc->sc_ifp->if_obytes += m->m_pkthdr.len;
2337	sc->sc_len = PFSYNC_MINPKT;
2338
2339	if (!_IF_QFULL(&sc->sc_ifp->if_snd))
2340		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
2341	else {
2342		m_freem(m);
2343                sc->sc_ifp->if_snd.ifq_drops++;
2344	}
2345	if (schedswi)
2346		swi_sched(V_pfsync_swi_cookie, 0);
2347#else
2348	sc->sc_if.if_opackets++;
2349	sc->sc_if.if_obytes += m->m_pkthdr.len;
2350
2351	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0)
2352		pfsyncstats.pfsyncs_opackets++;
2353	else
2354		pfsyncstats.pfsyncs_oerrors++;
2355
2356	/* start again */
2357	sc->sc_len = PFSYNC_MINPKT;
2358#endif
2359}
2360
2361void
2362pfsync_insert_state(struct pf_state *st)
2363{
2364#ifdef __FreeBSD__
2365	struct pfsync_softc *sc = V_pfsyncif;
2366#else
2367	struct pfsync_softc *sc = pfsyncif;
2368#endif
2369
2370#ifdef __FreeBSD__
2371	PF_LOCK_ASSERT();
2372#else
2373	splassert(IPL_SOFTNET);
2374#endif
2375
2376	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
2377	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
2378		SET(st->state_flags, PFSTATE_NOSYNC);
2379		return;
2380	}
2381
2382	if (sc == NULL || ISSET(st->state_flags, PFSTATE_NOSYNC))
2383		return;
2384
2385#ifdef PFSYNC_DEBUG
2386#ifdef __FreeBSD__
2387	KASSERT(st->sync_state == PFSYNC_S_NONE,
2388		("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__));
2389#else
2390	KASSERT(st->sync_state == PFSYNC_S_NONE);
2391#endif
2392#endif
2393
2394	if (sc->sc_len == PFSYNC_MINPKT)
2395#ifdef __FreeBSD__
2396		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2397		    V_pfsyncif);
2398#else
2399		timeout_add_sec(&sc->sc_tmo, 1);
2400#endif
2401
2402	pfsync_q_ins(st, PFSYNC_S_INS);
2403
2404	if (ISSET(st->state_flags, PFSTATE_ACK))
2405		schednetisr(NETISR_PFSYNC);
2406	else
2407		st->sync_updates = 0;
2408}
2409
2410int defer = 10;
2411
2412int
2413pfsync_defer(struct pf_state *st, struct mbuf *m)
2414{
2415#ifdef __FreeBSD__
2416	struct pfsync_softc *sc = V_pfsyncif;
2417#else
2418	struct pfsync_softc *sc = pfsyncif;
2419#endif
2420	struct pfsync_deferral *pd;
2421
2422#ifdef __FreeBSD__
2423	PF_LOCK_ASSERT();
2424#else
2425	splassert(IPL_SOFTNET);
2426#endif
2427
2428	if (sc->sc_deferred >= 128)
2429		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
2430
2431	pd = pool_get(&sc->sc_pool, M_NOWAIT);
2432	if (pd == NULL)
2433		return (0);
2434	sc->sc_deferred++;
2435
2436#ifdef __FreeBSD__
2437	m->m_flags |= M_SKIP_FIREWALL;
2438#else
2439	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
2440#endif
2441	SET(st->state_flags, PFSTATE_ACK);
2442
2443	pd->pd_st = st;
2444	pd->pd_m = m;
2445
2446	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
2447#ifdef __FreeBSD__
2448	callout_init(&pd->pd_tmo, CALLOUT_MPSAFE);
2449	callout_reset(&pd->pd_tmo, defer, pfsync_defer_tmo,
2450		pd);
2451#else
2452	timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
2453	timeout_add(&pd->pd_tmo, defer);
2454#endif
2455
2456	return (1);
2457}
2458
2459void
2460pfsync_undefer(struct pfsync_deferral *pd, int drop)
2461{
2462#ifdef __FreeBSD__
2463	struct pfsync_softc *sc = V_pfsyncif;
2464#else
2465	struct pfsync_softc *sc = pfsyncif;
2466#endif
2467	int s;
2468
2469#ifdef __FreeBSD__
2470	PF_LOCK_ASSERT();
2471#else
2472	splassert(IPL_SOFTNET);
2473#endif
2474
2475	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
2476	sc->sc_deferred--;
2477
2478	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
2479	timeout_del(&pd->pd_tmo); /* bah */
2480	if (drop)
2481		m_freem(pd->pd_m);
2482	else {
2483		s = splnet();
2484#ifdef __FreeBSD__
2485		/* XXX: use pf_defered?! */
2486		PF_UNLOCK();
2487#endif
2488		ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0,
2489		    (void *)NULL, (void *)NULL);
2490#ifdef __FreeBSD__
2491		PF_LOCK();
2492#endif
2493		splx(s);
2494	}
2495
2496	pool_put(&sc->sc_pool, pd);
2497}
2498
2499void
2500pfsync_defer_tmo(void *arg)
2501{
2502#if defined(__FreeBSD__) && defined(VIMAGE)
2503	struct pfsync_deferral *pd = arg;
2504#endif
2505	int s;
2506
2507	s = splsoftnet();
2508#ifdef __FreeBSD__
2509	CURVNET_SET(pd->pd_m->m_pkthdr.rcvif->if_vnet); /* XXX */
2510	PF_LOCK();
2511#endif
2512	pfsync_undefer(arg, 0);
2513#ifdef __FreeBSD__
2514	PF_UNLOCK();
2515	CURVNET_RESTORE();
2516#endif
2517	splx(s);
2518}
2519
2520void
2521pfsync_deferred(struct pf_state *st, int drop)
2522{
2523#ifdef __FreeBSD__
2524	struct pfsync_softc *sc = V_pfsyncif;
2525#else
2526	struct pfsync_softc *sc = pfsyncif;
2527#endif
2528	struct pfsync_deferral *pd;
2529
2530	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
2531		 if (pd->pd_st == st) {
2532			pfsync_undefer(pd, drop);
2533			return;
2534		}
2535	}
2536
2537	panic("pfsync_send_deferred: unable to find deferred state");
2538}
2539
2540u_int pfsync_upds = 0;
2541
2542void
2543pfsync_update_state(struct pf_state *st)
2544{
2545#ifdef __FreeBSD__
2546	struct pfsync_softc *sc = V_pfsyncif;
2547#else
2548	struct pfsync_softc *sc = pfsyncif;
2549#endif
2550	int sync = 0;
2551
2552#ifdef __FreeBSD__
2553	PF_LOCK_ASSERT();
2554#else
2555	splassert(IPL_SOFTNET);
2556#endif
2557
2558	if (sc == NULL)
2559		return;
2560
2561	if (ISSET(st->state_flags, PFSTATE_ACK))
2562		pfsync_deferred(st, 0);
2563	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2564		if (st->sync_state != PFSYNC_S_NONE)
2565			pfsync_q_del(st);
2566		return;
2567	}
2568
2569	if (sc->sc_len == PFSYNC_MINPKT)
2570#ifdef __FreeBSD__
2571		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2572		    V_pfsyncif);
2573#else
2574		timeout_add_sec(&sc->sc_tmo, 1);
2575#endif
2576
2577	switch (st->sync_state) {
2578	case PFSYNC_S_UPD_C:
2579	case PFSYNC_S_UPD:
2580	case PFSYNC_S_INS:
2581		/* we're already handling it */
2582
2583		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
2584			st->sync_updates++;
2585			if (st->sync_updates >= sc->sc_maxupdates)
2586				sync = 1;
2587		}
2588		break;
2589
2590	case PFSYNC_S_IACK:
2591		pfsync_q_del(st);
2592	case PFSYNC_S_NONE:
2593		pfsync_q_ins(st, PFSYNC_S_UPD_C);
2594		st->sync_updates = 0;
2595		break;
2596
2597	default:
2598		panic("pfsync_update_state: unexpected sync state %d",
2599		    st->sync_state);
2600	}
2601
2602	if (sync || (time_uptime - st->pfsync_time) < 2) {
2603		pfsync_upds++;
2604		schednetisr(NETISR_PFSYNC);
2605	}
2606}
2607
2608void
2609pfsync_request_update(u_int32_t creatorid, u_int64_t id)
2610{
2611#ifdef __FreeBSD__
2612	struct pfsync_softc *sc = V_pfsyncif;
2613#else
2614	struct pfsync_softc *sc = pfsyncif;
2615#endif
2616	struct pfsync_upd_req_item *item;
2617	size_t nlen = sizeof(struct pfsync_upd_req);
2618	int s;
2619
2620	PF_LOCK_ASSERT();
2621
2622	/*
2623	 * this code does nothing to prevent multiple update requests for the
2624	 * same state being generated.
2625	 */
2626
2627	item = pool_get(&sc->sc_pool, PR_NOWAIT);
2628	if (item == NULL) {
2629		/* XXX stats */
2630		return;
2631	}
2632
2633	item->ur_msg.id = id;
2634	item->ur_msg.creatorid = creatorid;
2635
2636	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
2637		nlen += sizeof(struct pfsync_subheader);
2638
2639#ifdef __FreeBSD__
2640	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
2641#else
2642	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2643#endif
2644		s = splnet();
2645		pfsync_sendout();
2646		splx(s);
2647
2648		nlen = sizeof(struct pfsync_subheader) +
2649		    sizeof(struct pfsync_upd_req);
2650	}
2651
2652	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
2653	sc->sc_len += nlen;
2654
2655	schednetisr(NETISR_PFSYNC);
2656}
2657
2658void
2659pfsync_update_state_req(struct pf_state *st)
2660{
2661#ifdef __FreeBSD__
2662	struct pfsync_softc *sc = V_pfsyncif;
2663#else
2664	struct pfsync_softc *sc = pfsyncif;
2665#endif
2666
2667	PF_LOCK_ASSERT();
2668
2669	if (sc == NULL)
2670		panic("pfsync_update_state_req: nonexistant instance");
2671
2672	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2673		if (st->sync_state != PFSYNC_S_NONE)
2674			pfsync_q_del(st);
2675		return;
2676	}
2677
2678	switch (st->sync_state) {
2679	case PFSYNC_S_UPD_C:
2680	case PFSYNC_S_IACK:
2681		pfsync_q_del(st);
2682	case PFSYNC_S_NONE:
2683		pfsync_q_ins(st, PFSYNC_S_UPD);
2684		schednetisr(NETISR_PFSYNC);
2685		return;
2686
2687	case PFSYNC_S_INS:
2688	case PFSYNC_S_UPD:
2689	case PFSYNC_S_DEL:
2690		/* we're already handling it */
2691		return;
2692
2693	default:
2694		panic("pfsync_update_state_req: unexpected sync state %d",
2695		    st->sync_state);
2696	}
2697}
2698
2699void
2700pfsync_delete_state(struct pf_state *st)
2701{
2702#ifdef __FreeBSD__
2703	struct pfsync_softc *sc = V_pfsyncif;
2704#else
2705	struct pfsync_softc *sc = pfsyncif;
2706#endif
2707
2708#ifdef __FreeBSD__
2709	PF_LOCK_ASSERT();
2710#else
2711	splassert(IPL_SOFTNET);
2712#endif
2713
2714	if (sc == NULL)
2715		return;
2716
2717	if (ISSET(st->state_flags, PFSTATE_ACK))
2718		pfsync_deferred(st, 1);
2719	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2720		if (st->sync_state != PFSYNC_S_NONE)
2721			pfsync_q_del(st);
2722		return;
2723	}
2724
2725	if (sc->sc_len == PFSYNC_MINPKT)
2726#ifdef __FreeBSD__
2727		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2728		    V_pfsyncif);
2729#else
2730		timeout_add_sec(&sc->sc_tmo, 1);
2731#endif
2732
2733	switch (st->sync_state) {
2734	case PFSYNC_S_INS:
2735		/* we never got to tell the world so just forget about it */
2736		pfsync_q_del(st);
2737		return;
2738
2739	case PFSYNC_S_UPD_C:
2740	case PFSYNC_S_UPD:
2741	case PFSYNC_S_IACK:
2742		pfsync_q_del(st);
2743		/* FALLTHROUGH to putting it on the del list */
2744
2745	case PFSYNC_S_NONE:
2746		pfsync_q_ins(st, PFSYNC_S_DEL);
2747		return;
2748
2749	default:
2750		panic("pfsync_delete_state: unexpected sync state %d",
2751		    st->sync_state);
2752	}
2753}
2754
2755void
2756pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2757{
2758	struct {
2759		struct pfsync_subheader subh;
2760		struct pfsync_clr clr;
2761	} __packed r;
2762
2763#ifdef __FreeBSD__
2764	struct pfsync_softc *sc = V_pfsyncif;
2765#else
2766	struct pfsync_softc *sc = pfsyncif;
2767#endif
2768
2769#ifdef __FreeBSD__
2770	PF_LOCK_ASSERT();
2771#else
2772	splassert(IPL_SOFTNET);
2773#endif
2774
2775	if (sc == NULL)
2776		return;
2777
2778	bzero(&r, sizeof(r));
2779
2780	r.subh.action = PFSYNC_ACT_CLR;
2781	r.subh.count = htons(1);
2782
2783	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2784	r.clr.creatorid = creatorid;
2785
2786	pfsync_send_plus(&r, sizeof(r));
2787}
2788
2789void
2790pfsync_q_ins(struct pf_state *st, int q)
2791{
2792#ifdef __FreeBSD__
2793	struct pfsync_softc *sc = V_pfsyncif;
2794#else
2795	struct pfsync_softc *sc = pfsyncif;
2796#endif
2797	size_t nlen = pfsync_qs[q].len;
2798	int s;
2799
2800	PF_LOCK_ASSERT();
2801
2802#ifdef __FreeBSD__
2803	KASSERT(st->sync_state == PFSYNC_S_NONE,
2804		("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__));
2805#else
2806	KASSERT(st->sync_state == PFSYNC_S_NONE);
2807#endif
2808
2809#if 1 || defined(PFSYNC_DEBUG)
2810	if (sc->sc_len < PFSYNC_MINPKT)
2811#ifdef __FreeBSD__
2812		panic("pfsync pkt len is too low %zu", sc->sc_len);
2813#else
2814		panic("pfsync pkt len is too low %d", sc->sc_len);
2815#endif
2816#endif
2817	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2818		nlen += sizeof(struct pfsync_subheader);
2819
2820#ifdef __FreeBSD__
2821	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
2822#else
2823	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2824#endif
2825		s = splnet();
2826		pfsync_sendout();
2827		splx(s);
2828
2829		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2830	}
2831
2832	sc->sc_len += nlen;
2833	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2834	st->sync_state = q;
2835}
2836
2837void
2838pfsync_q_del(struct pf_state *st)
2839{
2840#ifdef __FreeBSD__
2841	struct pfsync_softc *sc = V_pfsyncif;
2842#else
2843	struct pfsync_softc *sc = pfsyncif;
2844#endif
2845	int q = st->sync_state;
2846
2847#ifdef __FreeBSD__
2848	KASSERT(st->sync_state != PFSYNC_S_NONE,
2849		("%s: st->sync_state != PFSYNC_S_NONE", __FUNCTION__));
2850#else
2851	KASSERT(st->sync_state != PFSYNC_S_NONE);
2852#endif
2853
2854	sc->sc_len -= pfsync_qs[q].len;
2855	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2856	st->sync_state = PFSYNC_S_NONE;
2857
2858	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2859		sc->sc_len -= sizeof(struct pfsync_subheader);
2860}
2861
2862#ifdef notyet
2863void
2864pfsync_update_tdb(struct tdb *t, int output)
2865{
2866#ifdef __FreeBSD__
2867	struct pfsync_softc *sc = V_pfsyncif;
2868#else
2869	struct pfsync_softc *sc = pfsyncif;
2870#endif
2871	size_t nlen = sizeof(struct pfsync_tdb);
2872	int s;
2873
2874	if (sc == NULL)
2875		return;
2876
2877	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2878		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2879			nlen += sizeof(struct pfsync_subheader);
2880
2881		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2882			s = splnet();
2883			PF_LOCK();
2884			pfsync_sendout();
2885			PF_UNLOCK();
2886			splx(s);
2887
2888			nlen = sizeof(struct pfsync_subheader) +
2889			    sizeof(struct pfsync_tdb);
2890		}
2891
2892		sc->sc_len += nlen;
2893		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2894		SET(t->tdb_flags, TDBF_PFSYNC);
2895		t->tdb_updates = 0;
2896	} else {
2897		if (++t->tdb_updates >= sc->sc_maxupdates)
2898			schednetisr(NETISR_PFSYNC);
2899	}
2900
2901	if (output)
2902		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2903	else
2904		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2905}
2906
2907void
2908pfsync_delete_tdb(struct tdb *t)
2909{
2910#ifdef __FreeBSD__
2911	struct pfsync_softc *sc = V_pfsyncif;
2912#else
2913	struct pfsync_softc *sc = pfsyncif;
2914#endif
2915
2916	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2917		return;
2918
2919	sc->sc_len -= sizeof(struct pfsync_tdb);
2920	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2921	CLR(t->tdb_flags, TDBF_PFSYNC);
2922
2923	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2924		sc->sc_len -= sizeof(struct pfsync_subheader);
2925}
2926
2927int
2928pfsync_out_tdb(struct tdb *t, struct mbuf *m, int offset)
2929{
2930	struct pfsync_tdb *ut = (struct pfsync_tdb *)(m->m_data + offset);
2931
2932	bzero(ut, sizeof(*ut));
2933	ut->spi = t->tdb_spi;
2934	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2935	/*
2936	 * When a failover happens, the master's rpl is probably above
2937	 * what we see here (we may be up to a second late), so
2938	 * increase it a bit for outbound tdbs to manage most such
2939	 * situations.
2940	 *
2941	 * For now, just add an offset that is likely to be larger
2942	 * than the number of packets we can see in one second. The RFC
2943	 * just says the next packet must have a higher seq value.
2944	 *
2945	 * XXX What is a good algorithm for this? We could use
2946	 * a rate-determined increase, but to know it, we would have
2947	 * to extend struct tdb.
2948	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2949	 * will soon be replaced anyway. For now, just don't handle
2950	 * this edge case.
2951	 */
2952#define RPL_INCR 16384
2953	ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2954	    RPL_INCR : 0));
2955	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2956	ut->sproto = t->tdb_sproto;
2957
2958	return (sizeof(*ut));
2959}
2960#endif
2961
2962void
2963pfsync_bulk_start(void)
2964{
2965#ifdef __FreeBSD__
2966	struct pfsync_softc *sc = V_pfsyncif;
2967#else
2968	struct pfsync_softc *sc = pfsyncif;
2969#endif
2970
2971#ifdef __FreeBSD__
2972	if (V_pf_status.debug >= PF_DEBUG_MISC)
2973#else
2974	if (pf_status.debug >= PF_DEBUG_MISC)
2975#endif
2976		printf("pfsync: received bulk update request\n");
2977
2978#ifdef __FreeBSD__
2979	PF_LOCK_ASSERT();
2980	if (TAILQ_EMPTY(&V_state_list))
2981#else
2982	if (TAILQ_EMPTY(&state_list))
2983#endif
2984		pfsync_bulk_status(PFSYNC_BUS_END);
2985	else {
2986		sc->sc_ureq_received = time_uptime;
2987		if (sc->sc_bulk_next == NULL)
2988#ifdef __FreeBSD__
2989			sc->sc_bulk_next = TAILQ_FIRST(&V_state_list);
2990#else
2991			sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2992#endif
2993		sc->sc_bulk_last = sc->sc_bulk_next;
2994
2995		pfsync_bulk_status(PFSYNC_BUS_START);
2996		callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
2997	}
2998}
2999
3000void
3001pfsync_bulk_update(void *arg)
3002{
3003	struct pfsync_softc *sc = arg;
3004	struct pf_state *st = sc->sc_bulk_next;
3005	int i = 0;
3006	int s;
3007
3008	PF_LOCK_ASSERT();
3009
3010	s = splsoftnet();
3011#ifdef __FreeBSD__
3012	CURVNET_SET(sc->sc_ifp->if_vnet);
3013#endif
3014	for (;;) {
3015		if (st->sync_state == PFSYNC_S_NONE &&
3016		    st->timeout < PFTM_MAX &&
3017		    st->pfsync_time <= sc->sc_ureq_received) {
3018			pfsync_update_state_req(st);
3019			i++;
3020		}
3021
3022		st = TAILQ_NEXT(st, entry_list);
3023		if (st == NULL)
3024#ifdef __FreeBSD__
3025			st = TAILQ_FIRST(&V_state_list);
3026#else
3027			st = TAILQ_FIRST(&state_list);
3028#endif
3029
3030		if (st == sc->sc_bulk_last) {
3031			/* we're done */
3032			sc->sc_bulk_next = NULL;
3033			sc->sc_bulk_last = NULL;
3034			pfsync_bulk_status(PFSYNC_BUS_END);
3035			break;
3036		}
3037
3038#ifdef __FreeBSD__
3039		if (i > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
3040#else
3041		if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
3042#endif
3043		    sizeof(struct pfsync_state)) {
3044			/* we've filled a packet */
3045			sc->sc_bulk_next = st;
3046#ifdef __FreeBSD__
3047			callout_reset(&sc->sc_bulk_tmo, 1,
3048			    pfsync_bulk_update, sc);
3049#else
3050			timeout_add(&sc->sc_bulk_tmo, 1);
3051#endif
3052			break;
3053		}
3054	}
3055
3056#ifdef __FreeBSD__
3057	CURVNET_RESTORE();
3058#endif
3059	splx(s);
3060}
3061
3062void
3063pfsync_bulk_status(u_int8_t status)
3064{
3065	struct {
3066		struct pfsync_subheader subh;
3067		struct pfsync_bus bus;
3068	} __packed r;
3069
3070#ifdef __FreeBSD__
3071	struct pfsync_softc *sc = V_pfsyncif;
3072#else
3073	struct pfsync_softc *sc = pfsyncif;
3074#endif
3075
3076	PF_LOCK_ASSERT();
3077
3078	bzero(&r, sizeof(r));
3079
3080	r.subh.action = PFSYNC_ACT_BUS;
3081	r.subh.count = htons(1);
3082
3083#ifdef __FreeBSD__
3084	r.bus.creatorid = V_pf_status.hostid;
3085#else
3086	r.bus.creatorid = pf_status.hostid;
3087#endif
3088	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
3089	r.bus.status = status;
3090
3091	pfsync_send_plus(&r, sizeof(r));
3092}
3093
3094void
3095pfsync_bulk_fail(void *arg)
3096{
3097	struct pfsync_softc *sc = arg;
3098
3099#ifdef __FreeBSD__
3100	CURVNET_SET(sc->sc_ifp->if_vnet);
3101#endif
3102
3103	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
3104		/* Try again */
3105#ifdef __FreeBSD__
3106		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
3107		    pfsync_bulk_fail, V_pfsyncif);
3108#else
3109		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
3110#endif
3111		PF_LOCK();
3112		pfsync_request_update(0, 0);
3113		PF_UNLOCK();
3114	} else {
3115		/* Pretend like the transfer was ok */
3116		sc->sc_ureq_sent = 0;
3117		sc->sc_bulk_tries = 0;
3118#if NCARP > 0
3119#ifdef notyet
3120#ifdef __FreeBSD__
3121		if (!sc->pfsync_sync_ok)
3122#else
3123		if (!pfsync_sync_ok)
3124#endif
3125			carp_group_demote_adj(&sc->sc_if, -1);
3126#endif
3127#endif
3128#ifdef __FreeBSD__
3129		sc->pfsync_sync_ok = 1;
3130#else
3131		pfsync_sync_ok = 1;
3132#endif
3133#ifdef __FreeBSD__
3134		if (V_pf_status.debug >= PF_DEBUG_MISC)
3135#else
3136		if (pf_status.debug >= PF_DEBUG_MISC)
3137#endif
3138			printf("pfsync: failed to receive bulk update\n");
3139	}
3140
3141#ifdef __FreeBSD__
3142	CURVNET_RESTORE();
3143#endif
3144}
3145
3146void
3147pfsync_send_plus(void *plus, size_t pluslen)
3148{
3149#ifdef __FreeBSD__
3150	struct pfsync_softc *sc = V_pfsyncif;
3151#else
3152	struct pfsync_softc *sc = pfsyncif;
3153#endif
3154	int s;
3155
3156	PF_LOCK_ASSERT();
3157
3158#ifdef __FreeBSD__
3159	if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) {
3160#else
3161	if (sc->sc_len + pluslen > sc->sc_if.if_mtu) {
3162#endif
3163		s = splnet();
3164		pfsync_sendout();
3165		splx(s);
3166	}
3167
3168	sc->sc_plus = plus;
3169	sc->sc_len += (sc->sc_pluslen = pluslen);
3170
3171	s = splnet();
3172	pfsync_sendout();
3173	splx(s);
3174}
3175
3176int
3177pfsync_up(void)
3178{
3179#ifdef __FreeBSD__
3180	struct pfsync_softc *sc = V_pfsyncif;
3181#else
3182	struct pfsync_softc *sc = pfsyncif;
3183#endif
3184
3185#ifdef __FreeBSD__
3186	if (sc == NULL || !ISSET(sc->sc_ifp->if_flags, IFF_DRV_RUNNING))
3187#else
3188	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
3189#endif
3190		return (0);
3191
3192	return (1);
3193}
3194
3195int
3196pfsync_state_in_use(struct pf_state *st)
3197{
3198#ifdef __FreeBSD__
3199	struct pfsync_softc *sc = V_pfsyncif;
3200#else
3201	struct pfsync_softc *sc = pfsyncif;
3202#endif
3203
3204	if (sc == NULL)
3205		return (0);
3206
3207	if (st->sync_state != PFSYNC_S_NONE ||
3208	    st == sc->sc_bulk_next ||
3209	    st == sc->sc_bulk_last)
3210		return (1);
3211
3212	return (0);
3213}
3214
3215u_int pfsync_ints;
3216u_int pfsync_tmos;
3217
3218void
3219pfsync_timeout(void *arg)
3220{
3221#if defined(__FreeBSD__) && defined(VIMAGE)
3222	struct pfsync_softc *sc = arg;
3223#endif
3224	int s;
3225
3226#ifdef __FreeBSD__
3227	CURVNET_SET(sc->sc_ifp->if_vnet);
3228#endif
3229
3230	pfsync_tmos++;
3231
3232	s = splnet();
3233#ifdef __FreeBSD__
3234	PF_LOCK();
3235#endif
3236	pfsync_sendout();
3237#ifdef __FreeBSD__
3238	PF_UNLOCK();
3239#endif
3240	splx(s);
3241
3242#ifdef __FreeBSD__
3243	CURVNET_RESTORE();
3244#endif
3245}
3246
3247/* this is a softnet/netisr handler */
3248void
3249#ifdef __FreeBSD__
3250pfsyncintr(void *arg)
3251{
3252	struct pfsync_softc *sc = arg;
3253	struct mbuf *m, *n;
3254
3255	CURVNET_SET(sc->sc_ifp->if_vnet);
3256	pfsync_ints++;
3257
3258	PF_LOCK();
3259	if (sc->sc_len > PFSYNC_MINPKT)
3260		pfsync_sendout1(0);
3261	_IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
3262	PF_UNLOCK();
3263
3264	for (; m != NULL; m = n) {
3265
3266		n = m->m_nextpkt;
3267		m->m_nextpkt = NULL;
3268		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)
3269		    == 0)
3270			V_pfsyncstats.pfsyncs_opackets++;
3271		else
3272			V_pfsyncstats.pfsyncs_oerrors++;
3273	}
3274	CURVNET_RESTORE();
3275}
3276#else
3277pfsyncintr(void)
3278{
3279	int s;
3280
3281	pfsync_ints++;
3282
3283	s = splnet();
3284	pfsync_sendout();
3285	splx(s);
3286}
3287#endif
3288
3289int
3290pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
3291    size_t newlen)
3292{
3293
3294#ifdef notyet
3295	/* All sysctl names at this level are terminal. */
3296	if (namelen != 1)
3297		return (ENOTDIR);
3298
3299	switch (name[0]) {
3300	case PFSYNCCTL_STATS:
3301		if (newp != NULL)
3302			return (EPERM);
3303		return (sysctl_struct(oldp, oldlenp, newp, newlen,
3304		    &V_pfsyncstats, sizeof(V_pfsyncstats)));
3305	}
3306#endif
3307	return (ENOPROTOOPT);
3308}
3309
3310#ifdef __FreeBSD__
3311static int
3312pfsync_multicast_setup(struct pfsync_softc *sc)
3313{
3314	struct ip_moptions *imo = &sc->sc_imo;
3315	int error;
3316
3317	if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
3318		sc->sc_sync_if = NULL;
3319		return (EADDRNOTAVAIL);
3320	}
3321
3322	imo->imo_membership = (struct in_multi **)malloc(
3323	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC,
3324	    M_WAITOK | M_ZERO);
3325	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
3326	imo->imo_multicast_vif = -1;
3327
3328	if ((error = in_joingroup(sc->sc_sync_if, &sc->sc_sync_peer, NULL,
3329	    &imo->imo_membership[0])) != 0) {
3330		free(imo->imo_membership, M_PFSYNC);
3331		return (error);
3332	}
3333	imo->imo_num_memberships++;
3334	imo->imo_multicast_ifp = sc->sc_sync_if;
3335	imo->imo_multicast_ttl = PFSYNC_DFLTTL;
3336	imo->imo_multicast_loop = 0;
3337
3338	return (0);
3339}
3340
3341static void
3342pfsync_multicast_cleanup(struct pfsync_softc *sc)
3343{
3344	struct ip_moptions *imo = &sc->sc_imo;
3345
3346	in_leavegroup(imo->imo_membership[0], NULL);
3347	free(imo->imo_membership, M_PFSYNC);
3348	imo->imo_membership = NULL;
3349	imo->imo_multicast_ifp = NULL;
3350}
3351
3352#ifdef INET
3353extern  struct domain inetdomain;
3354static struct protosw in_pfsync_protosw = {
3355	.pr_type =		SOCK_RAW,
3356	.pr_domain =		&inetdomain,
3357	.pr_protocol =		IPPROTO_PFSYNC,
3358	.pr_flags =		PR_ATOMIC|PR_ADDR,
3359	.pr_input =		pfsync_input,
3360	.pr_output =		(pr_output_t *)rip_output,
3361	.pr_ctloutput =		rip_ctloutput,
3362	.pr_usrreqs =		&rip_usrreqs
3363};
3364#endif
3365
3366static int
3367pfsync_init()
3368{
3369	VNET_ITERATOR_DECL(vnet_iter);
3370	int error = 0;
3371
3372	VNET_LIST_RLOCK();
3373	VNET_FOREACH(vnet_iter) {
3374		CURVNET_SET(vnet_iter);
3375		V_pfsync_cloner = pfsync_cloner;
3376		V_pfsync_cloner_data = pfsync_cloner_data;
3377		V_pfsync_cloner.ifc_data = &V_pfsync_cloner_data;
3378		if_clone_attach(&V_pfsync_cloner);
3379		error = swi_add(NULL, "pfsync", pfsyncintr, V_pfsyncif,
3380		    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
3381		CURVNET_RESTORE();
3382		if (error)
3383			goto fail_locked;
3384	}
3385	VNET_LIST_RUNLOCK();
3386#ifdef INET
3387	error = pf_proto_register(PF_INET, &in_pfsync_protosw);
3388	if (error)
3389		goto fail;
3390	error = ipproto_register(IPPROTO_PFSYNC);
3391	if (error) {
3392		pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
3393		goto fail;
3394	}
3395#endif
3396	PF_LOCK();
3397	pfsync_state_import_ptr = pfsync_state_import;
3398	pfsync_up_ptr = pfsync_up;
3399	pfsync_insert_state_ptr = pfsync_insert_state;
3400	pfsync_update_state_ptr = pfsync_update_state;
3401	pfsync_delete_state_ptr = pfsync_delete_state;
3402	pfsync_clear_states_ptr = pfsync_clear_states;
3403	pfsync_state_in_use_ptr = pfsync_state_in_use;
3404	pfsync_defer_ptr = pfsync_defer;
3405	PF_UNLOCK();
3406
3407	return (0);
3408
3409fail:
3410	VNET_LIST_RLOCK();
3411fail_locked:
3412	VNET_FOREACH(vnet_iter) {
3413		CURVNET_SET(vnet_iter);
3414		if (V_pfsync_swi_cookie) {
3415			swi_remove(V_pfsync_swi_cookie);
3416			if_clone_detach(&V_pfsync_cloner);
3417		}
3418		CURVNET_RESTORE();
3419	}
3420	VNET_LIST_RUNLOCK();
3421
3422	return (error);
3423}
3424
3425static void
3426pfsync_uninit()
3427{
3428	VNET_ITERATOR_DECL(vnet_iter);
3429
3430	PF_LOCK();
3431	pfsync_state_import_ptr = NULL;
3432	pfsync_up_ptr = NULL;
3433	pfsync_insert_state_ptr = NULL;
3434	pfsync_update_state_ptr = NULL;
3435	pfsync_delete_state_ptr = NULL;
3436	pfsync_clear_states_ptr = NULL;
3437	pfsync_state_in_use_ptr = NULL;
3438	pfsync_defer_ptr = NULL;
3439	PF_UNLOCK();
3440
3441	ipproto_unregister(IPPROTO_PFSYNC);
3442	pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
3443	VNET_LIST_RLOCK();
3444	VNET_FOREACH(vnet_iter) {
3445		CURVNET_SET(vnet_iter);
3446		swi_remove(V_pfsync_swi_cookie);
3447		if_clone_detach(&V_pfsync_cloner);
3448		CURVNET_RESTORE();
3449	}
3450	VNET_LIST_RUNLOCK();
3451}
3452
3453static int
3454pfsync_modevent(module_t mod, int type, void *data)
3455{
3456	int error = 0;
3457
3458	switch (type) {
3459	case MOD_LOAD:
3460		error = pfsync_init();
3461		break;
3462	case MOD_QUIESCE:
3463		/*
3464		 * Module should not be unloaded due to race conditions.
3465		 */
3466		error = EPERM;
3467		break;
3468	case MOD_UNLOAD:
3469		pfsync_uninit();
3470		break;
3471	default:
3472		error = EINVAL;
3473		break;
3474	}
3475
3476	return (error);
3477}
3478
3479static moduledata_t pfsync_mod = {
3480	"pfsync",
3481	pfsync_modevent,
3482	0
3483};
3484
3485#define PFSYNC_MODVER 1
3486
3487DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
3488MODULE_VERSION(pfsync, PFSYNC_MODVER);
3489MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
3490#endif /* __FreeBSD__ */
3491