if_pfsync.c revision 230868
1/*	$OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $	*/
2
3/*
4 * Copyright (c) 2002 Michael Shalayeff
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31 *
32 * Permission to use, copy, modify, and distribute this software for any
33 * purpose with or without fee is hereby granted, provided that the above
34 * copyright notice and this permission notice appear in all copies.
35 *
36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43 */
44
45/*
46 * Revisions picked from OpenBSD after revision 1.110 import:
47 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
48 * 1.120, 1.175 - use monotonic time_uptime
49 * 1.122 - reduce number of updates for non-TCP sessions
50 * 1.128 - cleanups
51 * 1.170 - SIOCSIFMTU checks
52 */
53
54#ifdef __FreeBSD__
55#include "opt_inet.h"
56#include "opt_inet6.h"
57#include "opt_pf.h"
58
59#include <sys/cdefs.h>
60__FBSDID("$FreeBSD: stable/9/sys/contrib/pf/net/if_pfsync.c 230868 2012-02-01 15:57:49Z glebius $");
61
62#define	NBPFILTER	1
63
64#ifdef DEV_CARP
65#define	NCARP		DEV_CARP
66#else
67#define	NCARP		0
68#endif
69#endif /* __FreeBSD__ */
70
71#include <sys/param.h>
72#include <sys/kernel.h>
73#ifdef __FreeBSD__
74#include <sys/bus.h>
75#include <sys/interrupt.h>
76#include <sys/priv.h>
77#endif
78#include <sys/proc.h>
79#include <sys/systm.h>
80#include <sys/time.h>
81#include <sys/mbuf.h>
82#include <sys/socket.h>
83#ifdef __FreeBSD__
84#include <sys/endian.h>
85#include <sys/malloc.h>
86#include <sys/module.h>
87#include <sys/sockio.h>
88#include <sys/taskqueue.h>
89#include <sys/lock.h>
90#include <sys/mutex.h>
91#include <sys/protosw.h>
92#else
93#include <sys/ioctl.h>
94#include <sys/timeout.h>
95#endif
96#include <sys/sysctl.h>
97#ifndef __FreeBSD__
98#include <sys/pool.h>
99#endif
100
101#include <net/if.h>
102#ifdef __FreeBSD__
103#include <net/if_clone.h>
104#endif
105#include <net/if_types.h>
106#include <net/route.h>
107#include <net/bpf.h>
108#include <net/netisr.h>
109#ifdef __FreeBSD__
110#include <net/vnet.h>
111#endif
112
113#include <netinet/in.h>
114#include <netinet/if_ether.h>
115#include <netinet/tcp.h>
116#include <netinet/tcp_seq.h>
117
118#ifdef	INET
119#include <netinet/in_systm.h>
120#include <netinet/in_var.h>
121#include <netinet/ip.h>
122#include <netinet/ip_var.h>
123#endif
124
125#ifdef INET6
126#include <netinet6/nd6.h>
127#endif /* INET6 */
128
129#ifndef __FreeBSD__
130#include "carp.h"
131#endif
132#if NCARP > 0
133#include <netinet/ip_carp.h>
134#endif
135
136#include <net/pfvar.h>
137#include <net/if_pfsync.h>
138
139#ifndef __FreeBSD__
140#include "bpfilter.h"
141#include "pfsync.h"
142#endif
143
144#define PFSYNC_MINPKT ( \
145	sizeof(struct ip) + \
146	sizeof(struct pfsync_header) + \
147	sizeof(struct pfsync_subheader) + \
148	sizeof(struct pfsync_eof))
149
150struct pfsync_pkt {
151	struct ip *ip;
152	struct in_addr src;
153	u_int8_t flags;
154};
155
156int	pfsync_input_hmac(struct mbuf *, int);
157
158int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
159	    struct pfsync_state_peer *);
160
161int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
162int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
163int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
164int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
165int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
166int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
167int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
168int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
169int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
170int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
171int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
172
173int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
174
175int	(*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
176	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
177	pfsync_in_ins,			/* PFSYNC_ACT_INS */
178	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
179	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
180	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
181	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
182	pfsync_in_del,			/* PFSYNC_ACT_DEL */
183	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
184	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
185	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
186	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
187	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
188	pfsync_in_eof			/* PFSYNC_ACT_EOF */
189};
190
191struct pfsync_q {
192	int		(*write)(struct pf_state *, struct mbuf *, int);
193	size_t		len;
194	u_int8_t	action;
195};
196
197/* we have one of these for every PFSYNC_S_ */
198int	pfsync_out_state(struct pf_state *, struct mbuf *, int);
199int	pfsync_out_iack(struct pf_state *, struct mbuf *, int);
200int	pfsync_out_upd_c(struct pf_state *, struct mbuf *, int);
201int	pfsync_out_del(struct pf_state *, struct mbuf *, int);
202
203struct pfsync_q pfsync_qs[] = {
204	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
205	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
206	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
207	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
208	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
209};
210
211void	pfsync_q_ins(struct pf_state *, int);
212void	pfsync_q_del(struct pf_state *);
213
214struct pfsync_upd_req_item {
215	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
216	struct pfsync_upd_req			ur_msg;
217};
218TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
219
220struct pfsync_deferral {
221	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
222	struct pf_state				*pd_st;
223	struct mbuf				*pd_m;
224#ifdef __FreeBSD__
225	struct callout				 pd_tmo;
226#else
227	struct timeout				 pd_tmo;
228#endif
229};
230TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
231
232#define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
233			    sizeof(struct pfsync_deferral))
234
235#ifdef notyet
236int	pfsync_out_tdb(struct tdb *, struct mbuf *, int);
237#endif
238
239struct pfsync_softc {
240#ifdef __FreeBSD__
241	struct ifnet		*sc_ifp;
242#else
243	struct ifnet		 sc_if;
244#endif
245	struct ifnet		*sc_sync_if;
246
247#ifdef __FreeBSD__
248	uma_zone_t		 sc_pool;
249#else
250	struct pool		 sc_pool;
251#endif
252
253	struct ip_moptions	 sc_imo;
254
255	struct in_addr		 sc_sync_peer;
256	u_int8_t		 sc_maxupdates;
257#ifdef __FreeBSD__
258	int			 pfsync_sync_ok;
259#endif
260
261	struct ip		 sc_template;
262
263	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
264	size_t			 sc_len;
265
266	struct pfsync_upd_reqs	 sc_upd_req_list;
267
268	struct pfsync_deferrals	 sc_deferrals;
269	u_int			 sc_deferred;
270
271	void			*sc_plus;
272	size_t			 sc_pluslen;
273
274	u_int32_t		 sc_ureq_sent;
275	int			 sc_bulk_tries;
276#ifdef __FreeBSD__
277	struct callout		 sc_bulkfail_tmo;
278#else
279	struct timeout		 sc_bulkfail_tmo;
280#endif
281
282	u_int32_t		 sc_ureq_received;
283	struct pf_state		*sc_bulk_next;
284	struct pf_state		*sc_bulk_last;
285#ifdef __FreeBSD__
286	struct callout		 sc_bulk_tmo;
287#else
288	struct timeout		 sc_bulk_tmo;
289#endif
290
291	TAILQ_HEAD(, tdb)	 sc_tdb_q;
292
293#ifdef __FreeBSD__
294	struct callout		 sc_tmo;
295#else
296	struct timeout		 sc_tmo;
297#endif
298};
299
300#ifdef __FreeBSD__
301static MALLOC_DEFINE(M_PFSYNC, "pfsync", "pfsync data");
302static VNET_DEFINE(struct pfsync_softc	*, pfsyncif) = NULL;
303#define	V_pfsyncif		VNET(pfsyncif)
304static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
305#define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
306static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
307#define	V_pfsyncstats		VNET(pfsyncstats)
308
309static void	pfsyncintr(void *);
310static int	pfsync_multicast_setup(struct pfsync_softc *);
311static void	pfsync_multicast_cleanup(struct pfsync_softc *);
312static int	pfsync_init(void);
313static void	pfsync_uninit(void);
314static void	pfsync_sendout1(int);
315
316#define	schednetisr(NETISR_PFSYNC)	swi_sched(V_pfsync_swi_cookie, 0)
317
318SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
319SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW,
320    &VNET_NAME(pfsyncstats), pfsyncstats,
321    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
322#else
323struct pfsync_softc	*pfsyncif = NULL;
324struct pfsyncstats	 pfsyncstats;
325#define	V_pfsyncstats	 pfsyncstats
326#endif
327
328void	pfsyncattach(int);
329#ifdef __FreeBSD__
330int	pfsync_clone_create(struct if_clone *, int, caddr_t);
331void	pfsync_clone_destroy(struct ifnet *);
332#else
333int	pfsync_clone_create(struct if_clone *, int);
334int	pfsync_clone_destroy(struct ifnet *);
335#endif
336int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
337	    struct pf_state_peer *);
338void	pfsync_update_net_tdb(struct pfsync_tdb *);
339int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
340#ifdef __FreeBSD__
341	    struct route *);
342#else
343	    struct rtentry *);
344#endif
345int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
346void	pfsyncstart(struct ifnet *);
347
348struct mbuf *pfsync_if_dequeue(struct ifnet *);
349
350void	pfsync_deferred(struct pf_state *, int);
351void	pfsync_undefer(struct pfsync_deferral *, int);
352void	pfsync_defer_tmo(void *);
353
354void	pfsync_request_update(u_int32_t, u_int64_t);
355void	pfsync_update_state_req(struct pf_state *);
356
357void	pfsync_drop(struct pfsync_softc *);
358void	pfsync_sendout(void);
359void	pfsync_send_plus(void *, size_t);
360void	pfsync_timeout(void *);
361void	pfsync_tdb_timeout(void *);
362
363void	pfsync_bulk_start(void);
364void	pfsync_bulk_status(u_int8_t);
365void	pfsync_bulk_update(void *);
366void	pfsync_bulk_fail(void *);
367
368#ifdef __FreeBSD__
369/* XXX: ugly */
370#define	betoh64		(unsigned long long)be64toh
371#define	timeout_del	callout_stop
372#endif
373
374#define PFSYNC_MAX_BULKTRIES	12
375#ifndef __FreeBSD__
376int	pfsync_sync_ok;
377#endif
378
379#ifdef __FreeBSD__
380VNET_DEFINE(struct ifc_simple_data, pfsync_cloner_data);
381VNET_DEFINE(struct if_clone, pfsync_cloner);
382#define	V_pfsync_cloner_data	VNET(pfsync_cloner_data)
383#define	V_pfsync_cloner		VNET(pfsync_cloner)
384IFC_SIMPLE_DECLARE(pfsync, 1);
385#else
386struct if_clone	pfsync_cloner =
387    IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
388#endif
389
390void
391pfsyncattach(int npfsync)
392{
393	if_clone_attach(&pfsync_cloner);
394}
395int
396#ifdef __FreeBSD__
397pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
398#else
399pfsync_clone_create(struct if_clone *ifc, int unit)
400#endif
401{
402	struct pfsync_softc *sc;
403	struct ifnet *ifp;
404	int q;
405
406	if (unit != 0)
407		return (EINVAL);
408
409#ifdef __FreeBSD__
410	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
411	sc->pfsync_sync_ok = 1;
412#else
413	pfsync_sync_ok = 1;
414	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO);
415#endif
416
417	for (q = 0; q < PFSYNC_S_COUNT; q++)
418		TAILQ_INIT(&sc->sc_qs[q]);
419
420#ifdef __FreeBSD__
421	sc->sc_pool = uma_zcreate("pfsync", PFSYNC_PLSIZE, NULL, NULL, NULL,
422	    NULL, UMA_ALIGN_PTR, 0);
423#else
424	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
425#endif
426	TAILQ_INIT(&sc->sc_upd_req_list);
427	TAILQ_INIT(&sc->sc_deferrals);
428	sc->sc_deferred = 0;
429
430	TAILQ_INIT(&sc->sc_tdb_q);
431
432	sc->sc_len = PFSYNC_MINPKT;
433	sc->sc_maxupdates = 128;
434
435#ifndef __FreeBSD__
436	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
437	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
438	    M_WAITOK | M_ZERO);
439	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
440#endif
441
442#ifdef __FreeBSD__
443	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
444	if (ifp == NULL) {
445		uma_zdestroy(sc->sc_pool);
446		free(sc, M_PFSYNC);
447		return (ENOSPC);
448	}
449	if_initname(ifp, ifc->ifc_name, unit);
450#else
451	ifp = &sc->sc_if;
452	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
453#endif
454	ifp->if_softc = sc;
455	ifp->if_ioctl = pfsyncioctl;
456	ifp->if_output = pfsyncoutput;
457	ifp->if_start = pfsyncstart;
458	ifp->if_type = IFT_PFSYNC;
459	ifp->if_snd.ifq_maxlen = ifqmaxlen;
460	ifp->if_hdrlen = sizeof(struct pfsync_header);
461	ifp->if_mtu = ETHERMTU;
462#ifdef __FreeBSD__
463	callout_init(&sc->sc_tmo, CALLOUT_MPSAFE);
464	callout_init_mtx(&sc->sc_bulk_tmo, &pf_task_mtx, 0);
465	callout_init(&sc->sc_bulkfail_tmo, CALLOUT_MPSAFE);
466#else
467	timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
468	timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
469	timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
470#endif
471
472	if_attach(ifp);
473#ifndef __FreeBSD__
474	if_alloc_sadl(ifp);
475#endif
476
477#if NCARP > 0
478	if_addgroup(ifp, "carp");
479#endif
480
481#if NBPFILTER > 0
482#ifdef __FreeBSD__
483	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
484#else
485	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
486#endif
487#endif
488
489#ifdef __FreeBSD__
490	V_pfsyncif = sc;
491#else
492	pfsyncif = sc;
493#endif
494
495	return (0);
496}
497
498#ifdef __FreeBSD__
499void
500#else
501int
502#endif
503pfsync_clone_destroy(struct ifnet *ifp)
504{
505	struct pfsync_softc *sc = ifp->if_softc;
506
507#ifdef __FreeBSD__
508	PF_LOCK();
509#endif
510	timeout_del(&sc->sc_bulkfail_tmo);
511	timeout_del(&sc->sc_bulk_tmo);
512	timeout_del(&sc->sc_tmo);
513#ifdef __FreeBSD__
514	PF_UNLOCK();
515#endif
516#if NCARP > 0
517#ifdef notyet
518#ifdef __FreeBSD__
519	if (!sc->pfsync_sync_ok)
520#else
521	if (!pfsync_sync_ok)
522#endif
523		carp_group_demote_adj(&sc->sc_if, -1);
524#endif
525#endif
526#if NBPFILTER > 0
527	bpfdetach(ifp);
528#endif
529	if_detach(ifp);
530
531	pfsync_drop(sc);
532
533	while (sc->sc_deferred > 0)
534		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
535
536#ifdef __FreeBSD__
537	UMA_DESTROY(sc->sc_pool);
538#else
539	pool_destroy(&sc->sc_pool);
540#endif
541#ifdef __FreeBSD__
542	if_free(ifp);
543	if (sc->sc_imo.imo_membership)
544		pfsync_multicast_cleanup(sc);
545	free(sc, M_PFSYNC);
546#else
547	free(sc->sc_imo.imo_membership, M_IPMOPTS);
548	free(sc, M_DEVBUF);
549#endif
550
551#ifdef __FreeBSD__
552	V_pfsyncif = NULL;
553#else
554	pfsyncif = NULL;
555#endif
556
557#ifndef __FreeBSD__
558	return (0);
559#endif
560}
561
562struct mbuf *
563pfsync_if_dequeue(struct ifnet *ifp)
564{
565	struct mbuf *m;
566#ifndef __FreeBSD__
567	int s;
568#endif
569
570#ifdef __FreeBSD__
571	IF_LOCK(&ifp->if_snd);
572	_IF_DROP(&ifp->if_snd);
573	_IF_DEQUEUE(&ifp->if_snd, m);
574	IF_UNLOCK(&ifp->if_snd);
575#else
576	s = splnet();
577	IF_DEQUEUE(&ifp->if_snd, m);
578	splx(s);
579#endif
580
581	return (m);
582}
583
584/*
585 * Start output on the pfsync interface.
586 */
587void
588pfsyncstart(struct ifnet *ifp)
589{
590	struct mbuf *m;
591
592	while ((m = pfsync_if_dequeue(ifp)) != NULL) {
593#ifndef __FreeBSD__
594		IF_DROP(&ifp->if_snd);
595#endif
596		m_freem(m);
597	}
598}
599
600int
601pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
602    struct pf_state_peer *d)
603{
604	if (s->scrub.scrub_flag && d->scrub == NULL) {
605#ifdef __FreeBSD__
606		d->scrub = pool_get(&V_pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
607#else
608		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
609#endif
610		if (d->scrub == NULL)
611			return (ENOMEM);
612	}
613
614	return (0);
615}
616
617#ifndef __FreeBSD__
618void
619pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
620{
621	bzero(sp, sizeof(struct pfsync_state));
622
623	/* copy from state key */
624	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
625	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
626	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
627	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
628	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
629	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
630	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
631	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
632	sp->proto = st->key[PF_SK_WIRE]->proto;
633	sp->af = st->key[PF_SK_WIRE]->af;
634
635	/* copy from state */
636	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
637	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
638	sp->creation = htonl(time_uptime - st->creation);
639	sp->expire = pf_state_expires(st);
640	if (sp->expire <= time_second)
641		sp->expire = htonl(0);
642	else
643		sp->expire = htonl(sp->expire - time_second);
644
645	sp->direction = st->direction;
646	sp->log = st->log;
647	sp->timeout = st->timeout;
648	sp->state_flags = st->state_flags;
649	if (st->src_node)
650		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
651	if (st->nat_src_node)
652		sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
653
654	bcopy(&st->id, &sp->id, sizeof(sp->id));
655	sp->creatorid = st->creatorid;
656	pf_state_peer_hton(&st->src, &sp->src);
657	pf_state_peer_hton(&st->dst, &sp->dst);
658
659	if (st->rule.ptr == NULL)
660		sp->rule = htonl(-1);
661	else
662		sp->rule = htonl(st->rule.ptr->nr);
663	if (st->anchor.ptr == NULL)
664		sp->anchor = htonl(-1);
665	else
666		sp->anchor = htonl(st->anchor.ptr->nr);
667	if (st->nat_rule.ptr == NULL)
668		sp->nat_rule = htonl(-1);
669	else
670		sp->nat_rule = htonl(st->nat_rule.ptr->nr);
671
672	pf_state_counter_hton(st->packets[0], sp->packets[0]);
673	pf_state_counter_hton(st->packets[1], sp->packets[1]);
674	pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
675	pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
676
677}
678#endif
679
680int
681pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
682{
683	struct pf_state	*st = NULL;
684	struct pf_state_key *skw = NULL, *sks = NULL;
685	struct pf_rule *r = NULL;
686	struct pfi_kif	*kif;
687	int pool_flags;
688	int error;
689
690#ifdef __FreeBSD__
691	PF_LOCK_ASSERT();
692
693	if (sp->creatorid == 0 && V_pf_status.debug >= PF_DEBUG_MISC) {
694#else
695	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
696#endif
697		printf("pfsync_state_import: invalid creator id:"
698		    " %08x\n", ntohl(sp->creatorid));
699		return (EINVAL);
700	}
701
702	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
703#ifdef __FreeBSD__
704		if (V_pf_status.debug >= PF_DEBUG_MISC)
705#else
706		if (pf_status.debug >= PF_DEBUG_MISC)
707#endif
708			printf("pfsync_state_import: "
709			    "unknown interface: %s\n", sp->ifname);
710		if (flags & PFSYNC_SI_IOCTL)
711			return (EINVAL);
712		return (0);	/* skip this state */
713	}
714
715	/*
716	 * If the ruleset checksums match or the state is coming from the ioctl,
717	 * it's safe to associate the state with the rule of that number.
718	 */
719	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
720	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
721	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
722		r = pf_main_ruleset.rules[
723		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
724	else
725#ifdef __FreeBSD__
726		r = &V_pf_default_rule;
727#else
728		r = &pf_default_rule;
729#endif
730
731	if ((r->max_states && r->states_cur >= r->max_states))
732		goto cleanup;
733
734#ifdef __FreeBSD__
735	if (flags & PFSYNC_SI_IOCTL)
736		pool_flags = PR_WAITOK | PR_ZERO;
737	else
738		pool_flags = PR_NOWAIT | PR_ZERO;
739
740	if ((st = pool_get(&V_pf_state_pl, pool_flags)) == NULL)
741		goto cleanup;
742#else
743	if (flags & PFSYNC_SI_IOCTL)
744		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
745	else
746		pool_flags = PR_LIMITFAIL | PR_ZERO;
747
748	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
749		goto cleanup;
750#endif
751
752	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
753		goto cleanup;
754
755	if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
756	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
757	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
758	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
759	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
760	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) {
761		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
762			goto cleanup;
763	} else
764		sks = skw;
765
766	/* allocate memory for scrub info */
767	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
768	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
769		goto cleanup;
770
771	/* copy to state key(s) */
772	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
773	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
774	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
775	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
776	skw->proto = sp->proto;
777	skw->af = sp->af;
778	if (sks != skw) {
779		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
780		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
781		sks->port[0] = sp->key[PF_SK_STACK].port[0];
782		sks->port[1] = sp->key[PF_SK_STACK].port[1];
783		sks->proto = sp->proto;
784		sks->af = sp->af;
785	}
786
787	/* copy to state */
788	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
789	st->creation = time_uptime - ntohl(sp->creation);
790	st->expire = time_second;
791	if (sp->expire) {
792		/* XXX No adaptive scaling. */
793		st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
794	}
795
796	st->expire = ntohl(sp->expire) + time_second;
797	st->direction = sp->direction;
798	st->log = sp->log;
799	st->timeout = sp->timeout;
800	st->state_flags = sp->state_flags;
801
802	bcopy(sp->id, &st->id, sizeof(st->id));
803	st->creatorid = sp->creatorid;
804	pf_state_peer_ntoh(&sp->src, &st->src);
805	pf_state_peer_ntoh(&sp->dst, &st->dst);
806
807	st->rule.ptr = r;
808	st->nat_rule.ptr = NULL;
809	st->anchor.ptr = NULL;
810	st->rt_kif = NULL;
811
812	st->pfsync_time = time_uptime;
813	st->sync_state = PFSYNC_S_NONE;
814
815	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
816	r->states_cur++;
817	r->states_tot++;
818
819	if (!ISSET(flags, PFSYNC_SI_IOCTL))
820		SET(st->state_flags, PFSTATE_NOSYNC);
821
822	if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
823		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
824		r->states_cur--;
825		goto cleanup_state;
826	}
827
828	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
829		CLR(st->state_flags, PFSTATE_NOSYNC);
830		if (ISSET(st->state_flags, PFSTATE_ACK)) {
831			pfsync_q_ins(st, PFSYNC_S_IACK);
832			schednetisr(NETISR_PFSYNC);
833		}
834	}
835	CLR(st->state_flags, PFSTATE_ACK);
836
837	return (0);
838
839cleanup:
840	error = ENOMEM;
841	if (skw == sks)
842		sks = NULL;
843#ifdef __FreeBSD__
844	if (skw != NULL)
845		pool_put(&V_pf_state_key_pl, skw);
846	if (sks != NULL)
847		pool_put(&V_pf_state_key_pl, sks);
848#else
849	if (skw != NULL)
850		pool_put(&pf_state_key_pl, skw);
851	if (sks != NULL)
852		pool_put(&pf_state_key_pl, sks);
853#endif
854
855cleanup_state:	/* pf_state_insert frees the state keys */
856	if (st) {
857#ifdef __FreeBSD__
858		if (st->dst.scrub)
859			pool_put(&V_pf_state_scrub_pl, st->dst.scrub);
860		if (st->src.scrub)
861			pool_put(&V_pf_state_scrub_pl, st->src.scrub);
862		pool_put(&V_pf_state_pl, st);
863#else
864		if (st->dst.scrub)
865			pool_put(&pf_state_scrub_pl, st->dst.scrub);
866		if (st->src.scrub)
867			pool_put(&pf_state_scrub_pl, st->src.scrub);
868		pool_put(&pf_state_pl, st);
869#endif
870	}
871	return (error);
872}
873
874void
875#ifdef __FreeBSD__
876pfsync_input(struct mbuf *m, __unused int off)
877#else
878pfsync_input(struct mbuf *m, ...)
879#endif
880{
881#ifdef __FreeBSD__
882	struct pfsync_softc *sc = V_pfsyncif;
883#else
884	struct pfsync_softc *sc = pfsyncif;
885#endif
886	struct pfsync_pkt pkt;
887	struct ip *ip = mtod(m, struct ip *);
888	struct pfsync_header *ph;
889	struct pfsync_subheader subh;
890
891	int offset;
892	int rv;
893
894	V_pfsyncstats.pfsyncs_ipackets++;
895
896	/* verify that we have a sync interface configured */
897#ifdef __FreeBSD__
898	if (!sc || !sc->sc_sync_if || !V_pf_status.running)
899#else
900	if (!sc || !sc->sc_sync_if || !pf_status.running)
901#endif
902		goto done;
903
904	/* verify that the packet came in on the right interface */
905	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
906		V_pfsyncstats.pfsyncs_badif++;
907		goto done;
908	}
909
910#ifdef __FreeBSD__
911	sc->sc_ifp->if_ipackets++;
912	sc->sc_ifp->if_ibytes += m->m_pkthdr.len;
913#else
914	sc->sc_if.if_ipackets++;
915	sc->sc_if.if_ibytes += m->m_pkthdr.len;
916#endif
917	/* verify that the IP TTL is 255. */
918	if (ip->ip_ttl != PFSYNC_DFLTTL) {
919		V_pfsyncstats.pfsyncs_badttl++;
920		goto done;
921	}
922
923	offset = ip->ip_hl << 2;
924	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
925		V_pfsyncstats.pfsyncs_hdrops++;
926		goto done;
927	}
928
929	if (offset + sizeof(*ph) > m->m_len) {
930		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
931			V_pfsyncstats.pfsyncs_hdrops++;
932			return;
933		}
934		ip = mtod(m, struct ip *);
935	}
936	ph = (struct pfsync_header *)((char *)ip + offset);
937
938	/* verify the version */
939	if (ph->version != PFSYNC_VERSION) {
940		V_pfsyncstats.pfsyncs_badver++;
941		goto done;
942	}
943
944#if 0
945	if (pfsync_input_hmac(m, offset) != 0) {
946		/* XXX stats */
947		goto done;
948	}
949#endif
950
951	/* Cheaper to grab this now than having to mess with mbufs later */
952	pkt.ip = ip;
953	pkt.src = ip->ip_src;
954	pkt.flags = 0;
955
956#ifdef __FreeBSD__
957	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
958#else
959	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
960#endif
961		pkt.flags |= PFSYNC_SI_CKSUM;
962
963	offset += sizeof(*ph);
964	for (;;) {
965		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
966		offset += sizeof(subh);
967
968		if (subh.action >= PFSYNC_ACT_MAX) {
969			V_pfsyncstats.pfsyncs_badact++;
970			goto done;
971		}
972
973		rv = (*pfsync_acts[subh.action])(&pkt, m, offset,
974		    ntohs(subh.count));
975		if (rv == -1)
976			return;
977
978		offset += rv;
979	}
980
981done:
982	m_freem(m);
983}
984
985int
986pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
987{
988	struct pfsync_clr *clr;
989	struct mbuf *mp;
990	int len = sizeof(*clr) * count;
991	int i, offp;
992
993	struct pf_state *st, *nexts;
994	struct pf_state_key *sk, *nextsk;
995	struct pf_state_item *si;
996	u_int32_t creatorid;
997	int s;
998
999	mp = m_pulldown(m, offset, len, &offp);
1000	if (mp == NULL) {
1001		V_pfsyncstats.pfsyncs_badlen++;
1002		return (-1);
1003	}
1004	clr = (struct pfsync_clr *)(mp->m_data + offp);
1005
1006	s = splsoftnet();
1007#ifdef __FreeBSD__
1008	PF_LOCK();
1009#endif
1010	for (i = 0; i < count; i++) {
1011		creatorid = clr[i].creatorid;
1012
1013		if (clr[i].ifname[0] == '\0') {
1014#ifdef __FreeBSD__
1015			for (st = RB_MIN(pf_state_tree_id, &V_tree_id);
1016			    st; st = nexts) {
1017				nexts = RB_NEXT(pf_state_tree_id, &V_tree_id, st);
1018#else
1019			for (st = RB_MIN(pf_state_tree_id, &tree_id);
1020			    st; st = nexts) {
1021				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
1022#endif
1023				if (st->creatorid == creatorid) {
1024					SET(st->state_flags, PFSTATE_NOSYNC);
1025					pf_unlink_state(st);
1026				}
1027			}
1028		} else {
1029			if (pfi_kif_get(clr[i].ifname) == NULL)
1030				continue;
1031
1032			/* XXX correct? */
1033#ifdef __FreeBSD__
1034			for (sk = RB_MIN(pf_state_tree, &V_pf_statetbl);
1035#else
1036			for (sk = RB_MIN(pf_state_tree, &pf_statetbl);
1037#endif
1038			    sk; sk = nextsk) {
1039				nextsk = RB_NEXT(pf_state_tree,
1040#ifdef __FreeBSD__
1041				    &V_pf_statetbl, sk);
1042#else
1043				    &pf_statetbl, sk);
1044#endif
1045				TAILQ_FOREACH(si, &sk->states, entry) {
1046					if (si->s->creatorid == creatorid) {
1047						SET(si->s->state_flags,
1048						    PFSTATE_NOSYNC);
1049						pf_unlink_state(si->s);
1050					}
1051				}
1052			}
1053		}
1054	}
1055#ifdef __FreeBSD__
1056	PF_UNLOCK();
1057#endif
1058	splx(s);
1059
1060	return (len);
1061}
1062
1063int
1064pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1065{
1066	struct mbuf *mp;
1067	struct pfsync_state *sa, *sp;
1068	int len = sizeof(*sp) * count;
1069	int i, offp;
1070
1071	int s;
1072
1073	mp = m_pulldown(m, offset, len, &offp);
1074	if (mp == NULL) {
1075		V_pfsyncstats.pfsyncs_badlen++;
1076		return (-1);
1077	}
1078	sa = (struct pfsync_state *)(mp->m_data + offp);
1079
1080	s = splsoftnet();
1081#ifdef __FreeBSD__
1082	PF_LOCK();
1083#endif
1084	for (i = 0; i < count; i++) {
1085		sp = &sa[i];
1086
1087		/* check for invalid values */
1088		if (sp->timeout >= PFTM_MAX ||
1089		    sp->src.state > PF_TCPS_PROXY_DST ||
1090		    sp->dst.state > PF_TCPS_PROXY_DST ||
1091		    sp->direction > PF_OUT ||
1092		    (sp->af != AF_INET && sp->af != AF_INET6)) {
1093#ifdef __FreeBSD__
1094			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1095#else
1096			if (pf_status.debug >= PF_DEBUG_MISC) {
1097#endif
1098				printf("pfsync_input: PFSYNC5_ACT_INS: "
1099				    "invalid value\n");
1100			}
1101			V_pfsyncstats.pfsyncs_badval++;
1102			continue;
1103		}
1104
1105		if (pfsync_state_import(sp, pkt->flags) == ENOMEM) {
1106			/* drop out, but process the rest of the actions */
1107			break;
1108		}
1109	}
1110#ifdef __FreeBSD__
1111	PF_UNLOCK();
1112#endif
1113	splx(s);
1114
1115	return (len);
1116}
1117
1118int
1119pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1120{
1121	struct pfsync_ins_ack *ia, *iaa;
1122	struct pf_state_cmp id_key;
1123	struct pf_state *st;
1124
1125	struct mbuf *mp;
1126	int len = count * sizeof(*ia);
1127	int offp, i;
1128	int s;
1129
1130	mp = m_pulldown(m, offset, len, &offp);
1131	if (mp == NULL) {
1132		V_pfsyncstats.pfsyncs_badlen++;
1133		return (-1);
1134	}
1135	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
1136
1137	s = splsoftnet();
1138#ifdef __FreeBSD__
1139	PF_LOCK();
1140#endif
1141	for (i = 0; i < count; i++) {
1142		ia = &iaa[i];
1143
1144		bcopy(&ia->id, &id_key.id, sizeof(id_key.id));
1145		id_key.creatorid = ia->creatorid;
1146
1147		st = pf_find_state_byid(&id_key);
1148		if (st == NULL)
1149			continue;
1150
1151		if (ISSET(st->state_flags, PFSTATE_ACK))
1152			pfsync_deferred(st, 0);
1153	}
1154#ifdef __FreeBSD__
1155	PF_UNLOCK();
1156#endif
1157	splx(s);
1158	/*
1159	 * XXX this is not yet implemented, but we know the size of the
1160	 * message so we can skip it.
1161	 */
1162
1163	return (count * sizeof(struct pfsync_ins_ack));
1164}
1165
1166int
1167pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
1168    struct pfsync_state_peer *dst)
1169{
1170	int sfail = 0;
1171
1172	/*
1173	 * The state should never go backwards except
1174	 * for syn-proxy states.  Neither should the
1175	 * sequence window slide backwards.
1176	 */
1177	if (st->src.state > src->state &&
1178	    (st->src.state < PF_TCPS_PROXY_SRC ||
1179	    src->state >= PF_TCPS_PROXY_SRC))
1180		sfail = 1;
1181	else if (SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))
1182		sfail = 3;
1183	else if (st->dst.state > dst->state) {
1184		/* There might still be useful
1185		 * information about the src state here,
1186		 * so import that part of the update,
1187		 * then "fail" so we send the updated
1188		 * state back to the peer who is missing
1189		 * our what we know. */
1190		pf_state_peer_ntoh(src, &st->src);
1191		/* XXX do anything with timeouts? */
1192		sfail = 7;
1193	} else if (st->dst.state >= TCPS_SYN_SENT &&
1194	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))
1195		sfail = 4;
1196
1197	return (sfail);
1198}
1199
1200int
1201pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1202{
1203	struct pfsync_state *sa, *sp;
1204	struct pf_state_cmp id_key;
1205	struct pf_state_key *sk;
1206	struct pf_state *st;
1207	int sfail;
1208
1209	struct mbuf *mp;
1210	int len = count * sizeof(*sp);
1211	int offp, i;
1212	int s;
1213
1214	mp = m_pulldown(m, offset, len, &offp);
1215	if (mp == NULL) {
1216		V_pfsyncstats.pfsyncs_badlen++;
1217		return (-1);
1218	}
1219	sa = (struct pfsync_state *)(mp->m_data + offp);
1220
1221	s = splsoftnet();
1222#ifdef __FreeBSD__
1223	PF_LOCK();
1224#endif
1225	for (i = 0; i < count; i++) {
1226		sp = &sa[i];
1227
1228		/* check for invalid values */
1229		if (sp->timeout >= PFTM_MAX ||
1230		    sp->src.state > PF_TCPS_PROXY_DST ||
1231		    sp->dst.state > PF_TCPS_PROXY_DST) {
1232#ifdef __FreeBSD__
1233			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1234#else
1235			if (pf_status.debug >= PF_DEBUG_MISC) {
1236#endif
1237				printf("pfsync_input: PFSYNC_ACT_UPD: "
1238				    "invalid value\n");
1239			}
1240			V_pfsyncstats.pfsyncs_badval++;
1241			continue;
1242		}
1243
1244		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1245		id_key.creatorid = sp->creatorid;
1246
1247		st = pf_find_state_byid(&id_key);
1248		if (st == NULL) {
1249			/* insert the update */
1250			if (pfsync_state_import(sp, 0))
1251				V_pfsyncstats.pfsyncs_badstate++;
1252			continue;
1253		}
1254
1255		if (ISSET(st->state_flags, PFSTATE_ACK))
1256			pfsync_deferred(st, 1);
1257
1258		sk = st->key[PF_SK_WIRE];	/* XXX right one? */
1259		sfail = 0;
1260		if (sk->proto == IPPROTO_TCP)
1261			sfail = pfsync_upd_tcp(st, &sp->src, &sp->dst);
1262		else {
1263			/*
1264			 * Non-TCP protocol state machine always go
1265			 * forwards
1266			 */
1267			if (st->src.state > sp->src.state)
1268				sfail = 5;
1269			else if (st->dst.state > sp->dst.state)
1270				sfail = 6;
1271		}
1272
1273		if (sfail) {
1274#ifdef __FreeBSD__
1275			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1276#else
1277			if (pf_status.debug >= PF_DEBUG_MISC) {
1278#endif
1279				printf("pfsync: %s stale update (%d)"
1280				    " id: %016llx creatorid: %08x\n",
1281				    (sfail < 7 ?  "ignoring" : "partial"),
1282				    sfail, betoh64(st->id),
1283				    ntohl(st->creatorid));
1284			}
1285			V_pfsyncstats.pfsyncs_stale++;
1286
1287			pfsync_update_state(st);
1288			schednetisr(NETISR_PFSYNC);
1289			continue;
1290		}
1291		pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
1292		pf_state_peer_ntoh(&sp->src, &st->src);
1293		pf_state_peer_ntoh(&sp->dst, &st->dst);
1294		st->expire = ntohl(sp->expire) + time_second;
1295		st->timeout = sp->timeout;
1296		st->pfsync_time = time_uptime;
1297	}
1298#ifdef __FreeBSD__
1299	PF_UNLOCK();
1300#endif
1301	splx(s);
1302
1303	return (len);
1304}
1305
1306int
1307pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1308{
1309	struct pfsync_upd_c *ua, *up;
1310	struct pf_state_key *sk;
1311	struct pf_state_cmp id_key;
1312	struct pf_state *st;
1313
1314	int len = count * sizeof(*up);
1315	int sfail;
1316
1317	struct mbuf *mp;
1318	int offp, i;
1319	int s;
1320
1321	mp = m_pulldown(m, offset, len, &offp);
1322	if (mp == NULL) {
1323		V_pfsyncstats.pfsyncs_badlen++;
1324		return (-1);
1325	}
1326	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
1327
1328	s = splsoftnet();
1329#ifdef __FreeBSD__
1330	PF_LOCK();
1331#endif
1332	for (i = 0; i < count; i++) {
1333		up = &ua[i];
1334
1335		/* check for invalid values */
1336		if (up->timeout >= PFTM_MAX ||
1337		    up->src.state > PF_TCPS_PROXY_DST ||
1338		    up->dst.state > PF_TCPS_PROXY_DST) {
1339#ifdef __FreeBSD__
1340			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1341#else
1342			if (pf_status.debug >= PF_DEBUG_MISC) {
1343#endif
1344				printf("pfsync_input: "
1345				    "PFSYNC_ACT_UPD_C: "
1346				    "invalid value\n");
1347			}
1348			V_pfsyncstats.pfsyncs_badval++;
1349			continue;
1350		}
1351
1352		bcopy(&up->id, &id_key.id, sizeof(id_key.id));
1353		id_key.creatorid = up->creatorid;
1354
1355		st = pf_find_state_byid(&id_key);
1356		if (st == NULL) {
1357			/* We don't have this state. Ask for it. */
1358			pfsync_request_update(id_key.creatorid, id_key.id);
1359			continue;
1360		}
1361
1362		if (ISSET(st->state_flags, PFSTATE_ACK))
1363			pfsync_deferred(st, 1);
1364
1365		sk = st->key[PF_SK_WIRE]; /* XXX right one? */
1366		sfail = 0;
1367		if (sk->proto == IPPROTO_TCP)
1368			sfail = pfsync_upd_tcp(st, &up->src, &up->dst);
1369		else {
1370			/*
1371			 * Non-TCP protocol state machine always go forwards
1372			 */
1373			if (st->src.state > up->src.state)
1374				sfail = 5;
1375			else if (st->dst.state > up->dst.state)
1376				sfail = 6;
1377		}
1378
1379		if (sfail) {
1380#ifdef __FreeBSD__
1381			if (V_pf_status.debug >= PF_DEBUG_MISC) {
1382#else
1383			if (pf_status.debug >= PF_DEBUG_MISC) {
1384#endif
1385				printf("pfsync: ignoring stale update "
1386				    "(%d) id: %016llx "
1387				    "creatorid: %08x\n", sfail,
1388				    betoh64(st->id),
1389				    ntohl(st->creatorid));
1390			}
1391			V_pfsyncstats.pfsyncs_stale++;
1392
1393			pfsync_update_state(st);
1394			schednetisr(NETISR_PFSYNC);
1395			continue;
1396		}
1397		pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1398		pf_state_peer_ntoh(&up->src, &st->src);
1399		pf_state_peer_ntoh(&up->dst, &st->dst);
1400		st->expire = ntohl(up->expire) + time_second;
1401		st->timeout = up->timeout;
1402		st->pfsync_time = time_uptime;
1403	}
1404#ifdef __FreeBSD__
1405	PF_UNLOCK();
1406#endif
1407	splx(s);
1408
1409	return (len);
1410}
1411
1412int
1413pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1414{
1415	struct pfsync_upd_req *ur, *ura;
1416	struct mbuf *mp;
1417	int len = count * sizeof(*ur);
1418	int i, offp;
1419
1420	struct pf_state_cmp id_key;
1421	struct pf_state *st;
1422
1423	mp = m_pulldown(m, offset, len, &offp);
1424	if (mp == NULL) {
1425		V_pfsyncstats.pfsyncs_badlen++;
1426		return (-1);
1427	}
1428	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
1429
1430#ifdef __FreeBSD__
1431	PF_LOCK();
1432#endif
1433	for (i = 0; i < count; i++) {
1434		ur = &ura[i];
1435
1436		bcopy(&ur->id, &id_key.id, sizeof(id_key.id));
1437		id_key.creatorid = ur->creatorid;
1438
1439		if (id_key.id == 0 && id_key.creatorid == 0)
1440			pfsync_bulk_start();
1441		else {
1442			st = pf_find_state_byid(&id_key);
1443			if (st == NULL) {
1444				V_pfsyncstats.pfsyncs_badstate++;
1445				continue;
1446			}
1447			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1448				continue;
1449
1450			pfsync_update_state_req(st);
1451		}
1452	}
1453#ifdef __FreeBSD__
1454	PF_UNLOCK();
1455#endif
1456
1457	return (len);
1458}
1459
1460int
1461pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1462{
1463	struct mbuf *mp;
1464	struct pfsync_state *sa, *sp;
1465	struct pf_state_cmp id_key;
1466	struct pf_state *st;
1467	int len = count * sizeof(*sp);
1468	int offp, i;
1469	int s;
1470
1471	mp = m_pulldown(m, offset, len, &offp);
1472	if (mp == NULL) {
1473		V_pfsyncstats.pfsyncs_badlen++;
1474		return (-1);
1475	}
1476	sa = (struct pfsync_state *)(mp->m_data + offp);
1477
1478	s = splsoftnet();
1479#ifdef __FreeBSD__
1480	PF_LOCK();
1481#endif
1482	for (i = 0; i < count; i++) {
1483		sp = &sa[i];
1484
1485		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1486		id_key.creatorid = sp->creatorid;
1487
1488		st = pf_find_state_byid(&id_key);
1489		if (st == NULL) {
1490			V_pfsyncstats.pfsyncs_badstate++;
1491			continue;
1492		}
1493		SET(st->state_flags, PFSTATE_NOSYNC);
1494		pf_unlink_state(st);
1495	}
1496#ifdef __FreeBSD__
1497	PF_UNLOCK();
1498#endif
1499	splx(s);
1500
1501	return (len);
1502}
1503
1504int
1505pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1506{
1507	struct mbuf *mp;
1508	struct pfsync_del_c *sa, *sp;
1509	struct pf_state_cmp id_key;
1510	struct pf_state *st;
1511	int len = count * sizeof(*sp);
1512	int offp, i;
1513	int s;
1514
1515	mp = m_pulldown(m, offset, len, &offp);
1516	if (mp == NULL) {
1517		V_pfsyncstats.pfsyncs_badlen++;
1518		return (-1);
1519	}
1520	sa = (struct pfsync_del_c *)(mp->m_data + offp);
1521
1522	s = splsoftnet();
1523#ifdef __FreeBSD__
1524	PF_LOCK();
1525#endif
1526	for (i = 0; i < count; i++) {
1527		sp = &sa[i];
1528
1529		bcopy(&sp->id, &id_key.id, sizeof(id_key.id));
1530		id_key.creatorid = sp->creatorid;
1531
1532		st = pf_find_state_byid(&id_key);
1533		if (st == NULL) {
1534			V_pfsyncstats.pfsyncs_badstate++;
1535			continue;
1536		}
1537
1538		SET(st->state_flags, PFSTATE_NOSYNC);
1539		pf_unlink_state(st);
1540	}
1541#ifdef __FreeBSD__
1542	PF_UNLOCK();
1543#endif
1544	splx(s);
1545
1546	return (len);
1547}
1548
1549int
1550pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1551{
1552#ifdef __FreeBSD__
1553	struct pfsync_softc *sc = V_pfsyncif;
1554#else
1555	struct pfsync_softc *sc = pfsyncif;
1556#endif
1557	struct pfsync_bus *bus;
1558	struct mbuf *mp;
1559	int len = count * sizeof(*bus);
1560	int offp;
1561
1562	/* If we're not waiting for a bulk update, who cares. */
1563	if (sc->sc_ureq_sent == 0)
1564		return (len);
1565
1566	mp = m_pulldown(m, offset, len, &offp);
1567	if (mp == NULL) {
1568		V_pfsyncstats.pfsyncs_badlen++;
1569		return (-1);
1570	}
1571	bus = (struct pfsync_bus *)(mp->m_data + offp);
1572
1573	switch (bus->status) {
1574	case PFSYNC_BUS_START:
1575#ifdef __FreeBSD__
1576		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
1577		    V_pf_pool_limits[PF_LIMIT_STATES].limit /
1578		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
1579		    sizeof(struct pfsync_state)),
1580		    pfsync_bulk_fail, V_pfsyncif);
1581#else
1582		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1583		    pf_pool_limits[PF_LIMIT_STATES].limit /
1584		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1585		    sizeof(struct pfsync_state)));
1586#endif
1587#ifdef __FreeBSD__
1588		if (V_pf_status.debug >= PF_DEBUG_MISC)
1589#else
1590		if (pf_status.debug >= PF_DEBUG_MISC)
1591#endif
1592			printf("pfsync: received bulk update start\n");
1593		break;
1594
1595	case PFSYNC_BUS_END:
1596		if (time_uptime - ntohl(bus->endtime) >=
1597		    sc->sc_ureq_sent) {
1598			/* that's it, we're happy */
1599			sc->sc_ureq_sent = 0;
1600			sc->sc_bulk_tries = 0;
1601			timeout_del(&sc->sc_bulkfail_tmo);
1602#if NCARP > 0
1603#ifdef notyet
1604#ifdef __FreeBSD__
1605			if (!sc->pfsync_sync_ok)
1606#else
1607			if (!pfsync_sync_ok)
1608#endif
1609				carp_group_demote_adj(&sc->sc_if, -1);
1610#endif
1611#endif
1612#ifdef __FreeBSD__
1613			sc->pfsync_sync_ok = 1;
1614#else
1615			pfsync_sync_ok = 1;
1616#endif
1617#ifdef __FreeBSD__
1618			if (V_pf_status.debug >= PF_DEBUG_MISC)
1619#else
1620			if (pf_status.debug >= PF_DEBUG_MISC)
1621#endif
1622				printf("pfsync: received valid "
1623				    "bulk update end\n");
1624		} else {
1625#ifdef __FreeBSD__
1626			if (V_pf_status.debug >= PF_DEBUG_MISC)
1627#else
1628			if (pf_status.debug >= PF_DEBUG_MISC)
1629#endif
1630				printf("pfsync: received invalid "
1631				    "bulk update end: bad timestamp\n");
1632		}
1633		break;
1634	}
1635
1636	return (len);
1637}
1638
1639int
1640pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1641{
1642	int len = count * sizeof(struct pfsync_tdb);
1643
1644#if defined(IPSEC)
1645	struct pfsync_tdb *tp;
1646	struct mbuf *mp;
1647	int offp;
1648	int i;
1649	int s;
1650
1651	mp = m_pulldown(m, offset, len, &offp);
1652	if (mp == NULL) {
1653		V_pfsyncstats.pfsyncs_badlen++;
1654		return (-1);
1655	}
1656	tp = (struct pfsync_tdb *)(mp->m_data + offp);
1657
1658	s = splsoftnet();
1659#ifdef __FreeBSD__
1660	PF_LOCK();
1661#endif
1662	for (i = 0; i < count; i++)
1663		pfsync_update_net_tdb(&tp[i]);
1664#ifdef __FreeBSD__
1665	PF_UNLOCK();
1666#endif
1667	splx(s);
1668#endif
1669
1670	return (len);
1671}
1672
1673#if defined(IPSEC)
1674/* Update an in-kernel tdb. Silently fail if no tdb is found. */
1675void
1676pfsync_update_net_tdb(struct pfsync_tdb *pt)
1677{
1678	struct tdb		*tdb;
1679	int			 s;
1680
1681	/* check for invalid values */
1682	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1683	    (pt->dst.sa.sa_family != AF_INET &&
1684	     pt->dst.sa.sa_family != AF_INET6))
1685		goto bad;
1686
1687	s = spltdb();
1688	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1689	if (tdb) {
1690		pt->rpl = ntohl(pt->rpl);
1691		pt->cur_bytes = betoh64(pt->cur_bytes);
1692
1693		/* Neither replay nor byte counter should ever decrease. */
1694		if (pt->rpl < tdb->tdb_rpl ||
1695		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1696			splx(s);
1697			goto bad;
1698		}
1699
1700		tdb->tdb_rpl = pt->rpl;
1701		tdb->tdb_cur_bytes = pt->cur_bytes;
1702	}
1703	splx(s);
1704	return;
1705
1706bad:
1707#ifdef __FreeBSD__
1708	if (V_pf_status.debug >= PF_DEBUG_MISC)
1709#else
1710	if (pf_status.debug >= PF_DEBUG_MISC)
1711#endif
1712		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1713		    "invalid value\n");
1714	V_pfsyncstats.pfsyncs_badstate++;
1715	return;
1716}
1717#endif
1718
1719
1720int
1721pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1722{
1723	/* check if we are at the right place in the packet */
1724	if (offset != m->m_pkthdr.len - sizeof(struct pfsync_eof))
1725		V_pfsyncstats.pfsyncs_badact++;
1726
1727	/* we're done. free and let the caller return */
1728	m_freem(m);
1729	return (-1);
1730}
1731
1732int
1733pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
1734{
1735	V_pfsyncstats.pfsyncs_badact++;
1736
1737	m_freem(m);
1738	return (-1);
1739}
1740
1741int
1742pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1743#ifdef __FreeBSD__
1744	struct route *rt)
1745#else
1746	struct rtentry *rt)
1747#endif
1748{
1749	m_freem(m);
1750	return (0);
1751}
1752
1753/* ARGSUSED */
1754int
1755pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1756{
1757#ifndef __FreeBSD__
1758	struct proc *p = curproc;
1759#endif
1760	struct pfsync_softc *sc = ifp->if_softc;
1761	struct ifreq *ifr = (struct ifreq *)data;
1762	struct ip_moptions *imo = &sc->sc_imo;
1763	struct pfsyncreq pfsyncr;
1764	struct ifnet    *sifp;
1765	struct ip *ip;
1766	int s, error;
1767
1768	switch (cmd) {
1769#if 0
1770	case SIOCSIFADDR:
1771	case SIOCAIFADDR:
1772	case SIOCSIFDSTADDR:
1773#endif
1774	case SIOCSIFFLAGS:
1775#ifdef __FreeBSD__
1776		if (ifp->if_flags & IFF_UP)
1777			ifp->if_drv_flags |= IFF_DRV_RUNNING;
1778		else
1779			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1780#else
1781		if (ifp->if_flags & IFF_UP)
1782			ifp->if_flags |= IFF_RUNNING;
1783		else
1784			ifp->if_flags &= ~IFF_RUNNING;
1785#endif
1786		break;
1787	case SIOCSIFMTU:
1788		if (!sc->sc_sync_if ||
1789		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
1790		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
1791			return (EINVAL);
1792		if (ifr->ifr_mtu < ifp->if_mtu) {
1793			s = splnet();
1794#ifdef __FreeBSD__
1795			PF_LOCK();
1796#endif
1797			pfsync_sendout();
1798#ifdef __FreeBSD__
1799			PF_UNLOCK();
1800#endif
1801			splx(s);
1802		}
1803		ifp->if_mtu = ifr->ifr_mtu;
1804		break;
1805	case SIOCGETPFSYNC:
1806		bzero(&pfsyncr, sizeof(pfsyncr));
1807		if (sc->sc_sync_if) {
1808			strlcpy(pfsyncr.pfsyncr_syncdev,
1809			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1810		}
1811		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1812		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1813		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1814
1815	case SIOCSETPFSYNC:
1816#ifdef __FreeBSD__
1817		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1818#else
1819		if ((error = suser(p, p->p_acflag)) != 0)
1820#endif
1821			return (error);
1822		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1823			return (error);
1824
1825#ifdef __FreeBSD__
1826		PF_LOCK();
1827#endif
1828		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1829#ifdef __FreeBSD__
1830			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
1831#else
1832			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1833#endif
1834		else
1835			sc->sc_sync_peer.s_addr =
1836			    pfsyncr.pfsyncr_syncpeer.s_addr;
1837
1838		if (pfsyncr.pfsyncr_maxupdates > 255)
1839#ifdef __FreeBSD__
1840		{
1841			PF_UNLOCK();
1842#endif
1843			return (EINVAL);
1844#ifdef __FreeBSD__
1845		}
1846#endif
1847		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1848
1849		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1850			sc->sc_sync_if = NULL;
1851#ifdef __FreeBSD__
1852			PF_UNLOCK();
1853			if (imo->imo_membership)
1854				pfsync_multicast_cleanup(sc);
1855#else
1856			if (imo->imo_num_memberships > 0) {
1857				in_delmulti(imo->imo_membership[
1858				    --imo->imo_num_memberships]);
1859				imo->imo_multicast_ifp = NULL;
1860			}
1861#endif
1862			break;
1863		}
1864
1865#ifdef __FreeBSD__
1866		PF_UNLOCK();
1867#endif
1868		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL)
1869			return (EINVAL);
1870
1871#ifdef __FreeBSD__
1872		PF_LOCK();
1873#endif
1874		s = splnet();
1875#ifdef __FreeBSD__
1876		if (sifp->if_mtu < sc->sc_ifp->if_mtu ||
1877#else
1878		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1879#endif
1880		    (sc->sc_sync_if != NULL &&
1881		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1882		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1883			pfsync_sendout();
1884		sc->sc_sync_if = sifp;
1885
1886#ifdef __FreeBSD__
1887		if (imo->imo_membership) {
1888			PF_UNLOCK();
1889			pfsync_multicast_cleanup(sc);
1890			PF_LOCK();
1891		}
1892#else
1893		if (imo->imo_num_memberships > 0) {
1894			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1895			imo->imo_multicast_ifp = NULL;
1896		}
1897#endif
1898
1899#ifdef __FreeBSD__
1900		if (sc->sc_sync_if &&
1901		    sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1902			PF_UNLOCK();
1903			error = pfsync_multicast_setup(sc);
1904			if (error)
1905				return (error);
1906			PF_LOCK();
1907		}
1908#else
1909		if (sc->sc_sync_if &&
1910		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1911			struct in_addr addr;
1912
1913			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1914				sc->sc_sync_if = NULL;
1915				splx(s);
1916				return (EADDRNOTAVAIL);
1917			}
1918
1919			addr.s_addr = INADDR_PFSYNC_GROUP;
1920
1921			if ((imo->imo_membership[0] =
1922			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1923				sc->sc_sync_if = NULL;
1924				splx(s);
1925				return (ENOBUFS);
1926			}
1927			imo->imo_num_memberships++;
1928			imo->imo_multicast_ifp = sc->sc_sync_if;
1929			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1930			imo->imo_multicast_loop = 0;
1931		}
1932#endif	/* !__FreeBSD__ */
1933
1934		ip = &sc->sc_template;
1935		bzero(ip, sizeof(*ip));
1936		ip->ip_v = IPVERSION;
1937		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1938		ip->ip_tos = IPTOS_LOWDELAY;
1939		/* len and id are set later */
1940#ifdef __FreeBSD__
1941		ip->ip_off = IP_DF;
1942#else
1943		ip->ip_off = htons(IP_DF);
1944#endif
1945		ip->ip_ttl = PFSYNC_DFLTTL;
1946		ip->ip_p = IPPROTO_PFSYNC;
1947		ip->ip_src.s_addr = INADDR_ANY;
1948		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1949
1950		if (sc->sc_sync_if) {
1951			/* Request a full state table update. */
1952			sc->sc_ureq_sent = time_uptime;
1953#if NCARP > 0
1954#ifdef notyet
1955#ifdef __FreeBSD__
1956			if (sc->pfsync_sync_ok)
1957#else
1958			if (pfsync_sync_ok)
1959#endif
1960				carp_group_demote_adj(&sc->sc_if, 1);
1961#endif
1962#endif
1963#ifdef __FreeBSD__
1964			sc->pfsync_sync_ok = 0;
1965#else
1966			pfsync_sync_ok = 0;
1967#endif
1968#ifdef __FreeBSD__
1969			if (V_pf_status.debug >= PF_DEBUG_MISC)
1970#else
1971			if (pf_status.debug >= PF_DEBUG_MISC)
1972#endif
1973				printf("pfsync: requesting bulk update\n");
1974#ifdef __FreeBSD__
1975			callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1976			    pfsync_bulk_fail, V_pfsyncif);
1977#else
1978			timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
1979#endif
1980			pfsync_request_update(0, 0);
1981		}
1982#ifdef __FreeBSD__
1983		PF_UNLOCK();
1984#endif
1985		splx(s);
1986
1987		break;
1988
1989	default:
1990		return (ENOTTY);
1991	}
1992
1993	return (0);
1994}
1995
1996int
1997pfsync_out_state(struct pf_state *st, struct mbuf *m, int offset)
1998{
1999	struct pfsync_state *sp = (struct pfsync_state *)(m->m_data + offset);
2000
2001	pfsync_state_export(sp, st);
2002
2003	return (sizeof(*sp));
2004}
2005
2006int
2007pfsync_out_iack(struct pf_state *st, struct mbuf *m, int offset)
2008{
2009	struct pfsync_ins_ack *iack =
2010	    (struct pfsync_ins_ack *)(m->m_data + offset);
2011
2012	iack->id = st->id;
2013	iack->creatorid = st->creatorid;
2014
2015	return (sizeof(*iack));
2016}
2017
2018int
2019pfsync_out_upd_c(struct pf_state *st, struct mbuf *m, int offset)
2020{
2021	struct pfsync_upd_c *up = (struct pfsync_upd_c *)(m->m_data + offset);
2022
2023	up->id = st->id;
2024	pf_state_peer_hton(&st->src, &up->src);
2025	pf_state_peer_hton(&st->dst, &up->dst);
2026	up->creatorid = st->creatorid;
2027
2028	up->expire = pf_state_expires(st);
2029	if (up->expire <= time_second)
2030		up->expire = htonl(0);
2031	else
2032		up->expire = htonl(up->expire - time_second);
2033	up->timeout = st->timeout;
2034
2035	bzero(up->_pad, sizeof(up->_pad)); /* XXX */
2036
2037	return (sizeof(*up));
2038}
2039
2040int
2041pfsync_out_del(struct pf_state *st, struct mbuf *m, int offset)
2042{
2043	struct pfsync_del_c *dp = (struct pfsync_del_c *)(m->m_data + offset);
2044
2045	dp->id = st->id;
2046	dp->creatorid = st->creatorid;
2047
2048	SET(st->state_flags, PFSTATE_NOSYNC);
2049
2050	return (sizeof(*dp));
2051}
2052
2053void
2054pfsync_drop(struct pfsync_softc *sc)
2055{
2056	struct pf_state *st;
2057	struct pfsync_upd_req_item *ur;
2058#ifdef notyet
2059	struct tdb *t;
2060#endif
2061	int q;
2062
2063	for (q = 0; q < PFSYNC_S_COUNT; q++) {
2064		if (TAILQ_EMPTY(&sc->sc_qs[q]))
2065			continue;
2066
2067		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
2068#ifdef PFSYNC_DEBUG
2069#ifdef __FreeBSD__
2070			KASSERT(st->sync_state == q,
2071				("%s: st->sync_state == q",
2072					__FUNCTION__));
2073#else
2074			KASSERT(st->sync_state == q);
2075#endif
2076#endif
2077			st->sync_state = PFSYNC_S_NONE;
2078		}
2079		TAILQ_INIT(&sc->sc_qs[q]);
2080	}
2081
2082	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
2083		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
2084		pool_put(&sc->sc_pool, ur);
2085	}
2086
2087	sc->sc_plus = NULL;
2088
2089#ifdef notyet
2090	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
2091		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
2092			CLR(t->tdb_flags, TDBF_PFSYNC);
2093
2094		TAILQ_INIT(&sc->sc_tdb_q);
2095	}
2096#endif
2097
2098	sc->sc_len = PFSYNC_MINPKT;
2099}
2100
2101#ifdef __FreeBSD__
2102void pfsync_sendout()
2103{
2104	pfsync_sendout1(1);
2105}
2106
2107static void
2108pfsync_sendout1(int schedswi)
2109{
2110	struct pfsync_softc *sc = V_pfsyncif;
2111#else
2112void
2113pfsync_sendout(void)
2114{
2115	struct pfsync_softc *sc = pfsyncif;
2116#endif
2117#if NBPFILTER > 0
2118#ifdef __FreeBSD__
2119	struct ifnet *ifp = sc->sc_ifp;
2120#else
2121	struct ifnet *ifp = &sc->sc_if;
2122#endif
2123#endif
2124	struct mbuf *m;
2125	struct ip *ip;
2126	struct pfsync_header *ph;
2127	struct pfsync_subheader *subh;
2128	struct pf_state *st;
2129	struct pfsync_upd_req_item *ur;
2130#ifdef notyet
2131	struct tdb *t;
2132#endif
2133#ifdef __FreeBSD__
2134	size_t pktlen;
2135#endif
2136	int offset;
2137	int q, count = 0;
2138
2139#ifdef __FreeBSD__
2140	PF_LOCK_ASSERT();
2141#else
2142	splassert(IPL_NET);
2143#endif
2144
2145	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
2146		return;
2147
2148#if NBPFILTER > 0
2149	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
2150#else
2151	if (sc->sc_sync_if == NULL) {
2152#endif
2153		pfsync_drop(sc);
2154		return;
2155	}
2156
2157	MGETHDR(m, M_DONTWAIT, MT_DATA);
2158	if (m == NULL) {
2159#ifdef __FreeBSD__
2160		sc->sc_ifp->if_oerrors++;
2161#else
2162		sc->sc_if.if_oerrors++;
2163#endif
2164		V_pfsyncstats.pfsyncs_onomem++;
2165		pfsync_drop(sc);
2166		return;
2167	}
2168
2169#ifdef __FreeBSD__
2170	pktlen = max_linkhdr + sc->sc_len;
2171	if (pktlen > MHLEN) {
2172		/* Find the right pool to allocate from. */
2173		/* XXX: This is ugly. */
2174		m_cljget(m, M_DONTWAIT, pktlen <= MCLBYTES ? MCLBYTES :
2175#if MJUMPAGESIZE != MCLBYTES
2176			pktlen <= MJUMPAGESIZE ? MJUMPAGESIZE :
2177#endif
2178			pktlen <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES);
2179#else
2180	if (max_linkhdr + sc->sc_len > MHLEN) {
2181		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
2182#endif
2183		if (!ISSET(m->m_flags, M_EXT)) {
2184			m_free(m);
2185#ifdef __FreeBSD__
2186			sc->sc_ifp->if_oerrors++;
2187#else
2188			sc->sc_if.if_oerrors++;
2189#endif
2190			V_pfsyncstats.pfsyncs_onomem++;
2191			pfsync_drop(sc);
2192			return;
2193		}
2194	}
2195	m->m_data += max_linkhdr;
2196	m->m_len = m->m_pkthdr.len = sc->sc_len;
2197
2198	/* build the ip header */
2199	ip = (struct ip *)m->m_data;
2200	bcopy(&sc->sc_template, ip, sizeof(*ip));
2201	offset = sizeof(*ip);
2202
2203#ifdef __FreeBSD__
2204	ip->ip_len = m->m_pkthdr.len;
2205#else
2206	ip->ip_len = htons(m->m_pkthdr.len);
2207#endif
2208	ip->ip_id = htons(ip_randomid());
2209
2210	/* build the pfsync header */
2211	ph = (struct pfsync_header *)(m->m_data + offset);
2212	bzero(ph, sizeof(*ph));
2213	offset += sizeof(*ph);
2214
2215	ph->version = PFSYNC_VERSION;
2216	ph->len = htons(sc->sc_len - sizeof(*ip));
2217#ifdef __FreeBSD__
2218	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
2219#else
2220	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
2221#endif
2222
2223	/* walk the queues */
2224	for (q = 0; q < PFSYNC_S_COUNT; q++) {
2225		if (TAILQ_EMPTY(&sc->sc_qs[q]))
2226			continue;
2227
2228		subh = (struct pfsync_subheader *)(m->m_data + offset);
2229		offset += sizeof(*subh);
2230
2231		count = 0;
2232		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
2233#ifdef PFSYNC_DEBUG
2234#ifdef __FreeBSD__
2235			KASSERT(st->sync_state == q,
2236				("%s: st->sync_state == q",
2237					__FUNCTION__));
2238#else
2239			KASSERT(st->sync_state == q);
2240#endif
2241#endif
2242
2243			offset += pfsync_qs[q].write(st, m, offset);
2244			st->sync_state = PFSYNC_S_NONE;
2245			count++;
2246		}
2247		TAILQ_INIT(&sc->sc_qs[q]);
2248
2249		bzero(subh, sizeof(*subh));
2250		subh->action = pfsync_qs[q].action;
2251		subh->count = htons(count);
2252	}
2253
2254	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
2255		subh = (struct pfsync_subheader *)(m->m_data + offset);
2256		offset += sizeof(*subh);
2257
2258		count = 0;
2259		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
2260			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
2261
2262			bcopy(&ur->ur_msg, m->m_data + offset,
2263			    sizeof(ur->ur_msg));
2264			offset += sizeof(ur->ur_msg);
2265
2266			pool_put(&sc->sc_pool, ur);
2267
2268			count++;
2269		}
2270
2271		bzero(subh, sizeof(*subh));
2272		subh->action = PFSYNC_ACT_UPD_REQ;
2273		subh->count = htons(count);
2274	}
2275
2276	/* has someone built a custom region for us to add? */
2277	if (sc->sc_plus != NULL) {
2278		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
2279		offset += sc->sc_pluslen;
2280
2281		sc->sc_plus = NULL;
2282	}
2283
2284#ifdef notyet
2285	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
2286		subh = (struct pfsync_subheader *)(m->m_data + offset);
2287		offset += sizeof(*subh);
2288
2289		count = 0;
2290		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
2291			offset += pfsync_out_tdb(t, m, offset);
2292			CLR(t->tdb_flags, TDBF_PFSYNC);
2293
2294			count++;
2295		}
2296		TAILQ_INIT(&sc->sc_tdb_q);
2297
2298		bzero(subh, sizeof(*subh));
2299		subh->action = PFSYNC_ACT_TDB;
2300		subh->count = htons(count);
2301	}
2302#endif
2303
2304	subh = (struct pfsync_subheader *)(m->m_data + offset);
2305	offset += sizeof(*subh);
2306
2307	bzero(subh, sizeof(*subh));
2308	subh->action = PFSYNC_ACT_EOF;
2309	subh->count = htons(1);
2310
2311	/* XXX write checksum in EOF here */
2312
2313	/* we're done, let's put it on the wire */
2314#if NBPFILTER > 0
2315	if (ifp->if_bpf) {
2316		m->m_data += sizeof(*ip);
2317		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
2318#ifdef __FreeBSD__
2319		BPF_MTAP(ifp, m);
2320#else
2321		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
2322#endif
2323		m->m_data -= sizeof(*ip);
2324		m->m_len = m->m_pkthdr.len = sc->sc_len;
2325	}
2326
2327	if (sc->sc_sync_if == NULL) {
2328		sc->sc_len = PFSYNC_MINPKT;
2329		m_freem(m);
2330		return;
2331	}
2332#endif
2333
2334#ifdef __FreeBSD__
2335	sc->sc_ifp->if_opackets++;
2336	sc->sc_ifp->if_obytes += m->m_pkthdr.len;
2337	sc->sc_len = PFSYNC_MINPKT;
2338
2339	if (!_IF_QFULL(&sc->sc_ifp->if_snd))
2340		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
2341	else {
2342		m_freem(m);
2343                sc->sc_ifp->if_snd.ifq_drops++;
2344	}
2345	if (schedswi)
2346		swi_sched(V_pfsync_swi_cookie, 0);
2347#else
2348	sc->sc_if.if_opackets++;
2349	sc->sc_if.if_obytes += m->m_pkthdr.len;
2350
2351	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0)
2352		pfsyncstats.pfsyncs_opackets++;
2353	else
2354		pfsyncstats.pfsyncs_oerrors++;
2355
2356	/* start again */
2357	sc->sc_len = PFSYNC_MINPKT;
2358#endif
2359}
2360
2361void
2362pfsync_insert_state(struct pf_state *st)
2363{
2364#ifdef __FreeBSD__
2365	struct pfsync_softc *sc = V_pfsyncif;
2366#else
2367	struct pfsync_softc *sc = pfsyncif;
2368#endif
2369
2370#ifdef __FreeBSD__
2371	PF_LOCK_ASSERT();
2372#else
2373	splassert(IPL_SOFTNET);
2374#endif
2375
2376	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
2377	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
2378		SET(st->state_flags, PFSTATE_NOSYNC);
2379		return;
2380	}
2381
2382	if (sc == NULL || ISSET(st->state_flags, PFSTATE_NOSYNC))
2383		return;
2384
2385#ifdef PFSYNC_DEBUG
2386#ifdef __FreeBSD__
2387	KASSERT(st->sync_state == PFSYNC_S_NONE,
2388		("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__));
2389#else
2390	KASSERT(st->sync_state == PFSYNC_S_NONE);
2391#endif
2392#endif
2393
2394	if (sc->sc_len == PFSYNC_MINPKT)
2395#ifdef __FreeBSD__
2396		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2397		    V_pfsyncif);
2398#else
2399		timeout_add_sec(&sc->sc_tmo, 1);
2400#endif
2401
2402	pfsync_q_ins(st, PFSYNC_S_INS);
2403
2404	if (ISSET(st->state_flags, PFSTATE_ACK))
2405		schednetisr(NETISR_PFSYNC);
2406	else
2407		st->sync_updates = 0;
2408}
2409
2410int defer = 10;
2411
2412int
2413pfsync_defer(struct pf_state *st, struct mbuf *m)
2414{
2415#ifdef __FreeBSD__
2416	struct pfsync_softc *sc = V_pfsyncif;
2417#else
2418	struct pfsync_softc *sc = pfsyncif;
2419#endif
2420	struct pfsync_deferral *pd;
2421
2422#ifdef __FreeBSD__
2423	PF_LOCK_ASSERT();
2424#else
2425	splassert(IPL_SOFTNET);
2426#endif
2427
2428	if (sc->sc_deferred >= 128)
2429		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
2430
2431	pd = pool_get(&sc->sc_pool, M_NOWAIT);
2432	if (pd == NULL)
2433		return (0);
2434	sc->sc_deferred++;
2435
2436#ifdef __FreeBSD__
2437	m->m_flags |= M_SKIP_FIREWALL;
2438#else
2439	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
2440#endif
2441	SET(st->state_flags, PFSTATE_ACK);
2442
2443	pd->pd_st = st;
2444	pd->pd_m = m;
2445
2446	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
2447#ifdef __FreeBSD__
2448	callout_init(&pd->pd_tmo, CALLOUT_MPSAFE);
2449	callout_reset(&pd->pd_tmo, defer, pfsync_defer_tmo,
2450		pd);
2451#else
2452	timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
2453	timeout_add(&pd->pd_tmo, defer);
2454#endif
2455
2456	return (1);
2457}
2458
2459void
2460pfsync_undefer(struct pfsync_deferral *pd, int drop)
2461{
2462#ifdef __FreeBSD__
2463	struct pfsync_softc *sc = V_pfsyncif;
2464#else
2465	struct pfsync_softc *sc = pfsyncif;
2466#endif
2467	int s;
2468
2469#ifdef __FreeBSD__
2470	PF_LOCK_ASSERT();
2471#else
2472	splassert(IPL_SOFTNET);
2473#endif
2474
2475	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
2476	sc->sc_deferred--;
2477
2478	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
2479	timeout_del(&pd->pd_tmo); /* bah */
2480	if (drop)
2481		m_freem(pd->pd_m);
2482	else {
2483		s = splnet();
2484#ifdef __FreeBSD__
2485		/* XXX: use pf_defered?! */
2486		PF_UNLOCK();
2487#endif
2488		ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0,
2489		    (void *)NULL, (void *)NULL);
2490#ifdef __FreeBSD__
2491		PF_LOCK();
2492#endif
2493		splx(s);
2494	}
2495
2496	pool_put(&sc->sc_pool, pd);
2497}
2498
2499void
2500pfsync_defer_tmo(void *arg)
2501{
2502#if defined(__FreeBSD__) && defined(VIMAGE)
2503	struct pfsync_deferral *pd = arg;
2504#endif
2505	int s;
2506
2507	s = splsoftnet();
2508#ifdef __FreeBSD__
2509	CURVNET_SET(pd->pd_m->m_pkthdr.rcvif->if_vnet); /* XXX */
2510	PF_LOCK();
2511#endif
2512	pfsync_undefer(arg, 0);
2513#ifdef __FreeBSD__
2514	PF_UNLOCK();
2515	CURVNET_RESTORE();
2516#endif
2517	splx(s);
2518}
2519
2520void
2521pfsync_deferred(struct pf_state *st, int drop)
2522{
2523#ifdef __FreeBSD__
2524	struct pfsync_softc *sc = V_pfsyncif;
2525#else
2526	struct pfsync_softc *sc = pfsyncif;
2527#endif
2528	struct pfsync_deferral *pd;
2529
2530	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
2531		 if (pd->pd_st == st) {
2532			pfsync_undefer(pd, drop);
2533			return;
2534		}
2535	}
2536
2537	panic("pfsync_send_deferred: unable to find deferred state");
2538}
2539
2540u_int pfsync_upds = 0;
2541
2542void
2543pfsync_update_state(struct pf_state *st)
2544{
2545#ifdef __FreeBSD__
2546	struct pfsync_softc *sc = V_pfsyncif;
2547#else
2548	struct pfsync_softc *sc = pfsyncif;
2549#endif
2550	int sync = 0;
2551
2552#ifdef __FreeBSD__
2553	PF_LOCK_ASSERT();
2554#else
2555	splassert(IPL_SOFTNET);
2556#endif
2557
2558	if (sc == NULL)
2559		return;
2560
2561	if (ISSET(st->state_flags, PFSTATE_ACK))
2562		pfsync_deferred(st, 0);
2563	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2564		if (st->sync_state != PFSYNC_S_NONE)
2565			pfsync_q_del(st);
2566		return;
2567	}
2568
2569	if (sc->sc_len == PFSYNC_MINPKT)
2570#ifdef __FreeBSD__
2571		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2572		    V_pfsyncif);
2573#else
2574		timeout_add_sec(&sc->sc_tmo, 1);
2575#endif
2576
2577	switch (st->sync_state) {
2578	case PFSYNC_S_UPD_C:
2579	case PFSYNC_S_UPD:
2580	case PFSYNC_S_INS:
2581		/* we're already handling it */
2582
2583		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
2584			st->sync_updates++;
2585			if (st->sync_updates >= sc->sc_maxupdates)
2586				sync = 1;
2587		}
2588		break;
2589
2590	case PFSYNC_S_IACK:
2591		pfsync_q_del(st);
2592	case PFSYNC_S_NONE:
2593		pfsync_q_ins(st, PFSYNC_S_UPD_C);
2594		st->sync_updates = 0;
2595		break;
2596
2597	default:
2598		panic("pfsync_update_state: unexpected sync state %d",
2599		    st->sync_state);
2600	}
2601
2602	if (sync || (time_uptime - st->pfsync_time) < 2) {
2603		pfsync_upds++;
2604		schednetisr(NETISR_PFSYNC);
2605	}
2606}
2607
2608void
2609pfsync_request_update(u_int32_t creatorid, u_int64_t id)
2610{
2611#ifdef __FreeBSD__
2612	struct pfsync_softc *sc = V_pfsyncif;
2613#else
2614	struct pfsync_softc *sc = pfsyncif;
2615#endif
2616	struct pfsync_upd_req_item *item;
2617	size_t nlen = sizeof(struct pfsync_upd_req);
2618	int s;
2619
2620	PF_LOCK_ASSERT();
2621
2622	/*
2623	 * this code does nothing to prevent multiple update requests for the
2624	 * same state being generated.
2625	 */
2626
2627	item = pool_get(&sc->sc_pool, PR_NOWAIT);
2628	if (item == NULL) {
2629		/* XXX stats */
2630		return;
2631	}
2632
2633	item->ur_msg.id = id;
2634	item->ur_msg.creatorid = creatorid;
2635
2636	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
2637		nlen += sizeof(struct pfsync_subheader);
2638
2639#ifdef __FreeBSD__
2640	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
2641#else
2642	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2643#endif
2644		s = splnet();
2645		pfsync_sendout();
2646		splx(s);
2647
2648		nlen = sizeof(struct pfsync_subheader) +
2649		    sizeof(struct pfsync_upd_req);
2650	}
2651
2652	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
2653	sc->sc_len += nlen;
2654
2655	schednetisr(NETISR_PFSYNC);
2656}
2657
2658void
2659pfsync_update_state_req(struct pf_state *st)
2660{
2661#ifdef __FreeBSD__
2662	struct pfsync_softc *sc = V_pfsyncif;
2663#else
2664	struct pfsync_softc *sc = pfsyncif;
2665#endif
2666
2667	PF_LOCK_ASSERT();
2668
2669	if (sc == NULL)
2670		panic("pfsync_update_state_req: nonexistant instance");
2671
2672	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2673		if (st->sync_state != PFSYNC_S_NONE)
2674			pfsync_q_del(st);
2675		return;
2676	}
2677
2678	switch (st->sync_state) {
2679	case PFSYNC_S_UPD_C:
2680	case PFSYNC_S_IACK:
2681		pfsync_q_del(st);
2682	case PFSYNC_S_NONE:
2683		pfsync_q_ins(st, PFSYNC_S_UPD);
2684		schednetisr(NETISR_PFSYNC);
2685		return;
2686
2687	case PFSYNC_S_INS:
2688	case PFSYNC_S_UPD:
2689	case PFSYNC_S_DEL:
2690		/* we're already handling it */
2691		return;
2692
2693	default:
2694		panic("pfsync_update_state_req: unexpected sync state %d",
2695		    st->sync_state);
2696	}
2697}
2698
2699void
2700pfsync_delete_state(struct pf_state *st)
2701{
2702#ifdef __FreeBSD__
2703	struct pfsync_softc *sc = V_pfsyncif;
2704#else
2705	struct pfsync_softc *sc = pfsyncif;
2706#endif
2707
2708#ifdef __FreeBSD__
2709	PF_LOCK_ASSERT();
2710#else
2711	splassert(IPL_SOFTNET);
2712#endif
2713
2714	if (sc == NULL)
2715		return;
2716
2717	if (ISSET(st->state_flags, PFSTATE_ACK))
2718		pfsync_deferred(st, 1);
2719	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
2720		if (st->sync_state != PFSYNC_S_NONE)
2721			pfsync_q_del(st);
2722		return;
2723	}
2724
2725	if (sc->sc_len == PFSYNC_MINPKT)
2726#ifdef __FreeBSD__
2727		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout,
2728		    V_pfsyncif);
2729#else
2730		timeout_add_sec(&sc->sc_tmo, 1);
2731#endif
2732
2733	switch (st->sync_state) {
2734	case PFSYNC_S_INS:
2735		/* we never got to tell the world so just forget about it */
2736		pfsync_q_del(st);
2737		return;
2738
2739	case PFSYNC_S_UPD_C:
2740	case PFSYNC_S_UPD:
2741	case PFSYNC_S_IACK:
2742		pfsync_q_del(st);
2743		/* FALLTHROUGH to putting it on the del list */
2744
2745	case PFSYNC_S_NONE:
2746		pfsync_q_ins(st, PFSYNC_S_DEL);
2747		return;
2748
2749	default:
2750		panic("pfsync_delete_state: unexpected sync state %d",
2751		    st->sync_state);
2752	}
2753}
2754
2755void
2756pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2757{
2758	struct {
2759		struct pfsync_subheader subh;
2760		struct pfsync_clr clr;
2761	} __packed r;
2762
2763#ifdef __FreeBSD__
2764	struct pfsync_softc *sc = V_pfsyncif;
2765#else
2766	struct pfsync_softc *sc = pfsyncif;
2767#endif
2768
2769#ifdef __FreeBSD__
2770	PF_LOCK_ASSERT();
2771#else
2772	splassert(IPL_SOFTNET);
2773#endif
2774
2775	if (sc == NULL)
2776		return;
2777
2778	bzero(&r, sizeof(r));
2779
2780	r.subh.action = PFSYNC_ACT_CLR;
2781	r.subh.count = htons(1);
2782
2783	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2784	r.clr.creatorid = creatorid;
2785
2786	pfsync_send_plus(&r, sizeof(r));
2787}
2788
2789void
2790pfsync_q_ins(struct pf_state *st, int q)
2791{
2792#ifdef __FreeBSD__
2793	struct pfsync_softc *sc = V_pfsyncif;
2794#else
2795	struct pfsync_softc *sc = pfsyncif;
2796#endif
2797	size_t nlen = pfsync_qs[q].len;
2798	int s;
2799
2800	PF_LOCK_ASSERT();
2801
2802#ifdef __FreeBSD__
2803	KASSERT(st->sync_state == PFSYNC_S_NONE,
2804		("%s: st->sync_state == PFSYNC_S_NONE", __FUNCTION__));
2805#else
2806	KASSERT(st->sync_state == PFSYNC_S_NONE);
2807#endif
2808
2809#if 1 || defined(PFSYNC_DEBUG)
2810	if (sc->sc_len < PFSYNC_MINPKT)
2811#ifdef __FreeBSD__
2812		panic("pfsync pkt len is too low %zu", sc->sc_len);
2813#else
2814		panic("pfsync pkt len is too low %d", sc->sc_len);
2815#endif
2816#endif
2817	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2818		nlen += sizeof(struct pfsync_subheader);
2819
2820#ifdef __FreeBSD__
2821	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
2822#else
2823	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2824#endif
2825		s = splnet();
2826		pfsync_sendout();
2827		splx(s);
2828
2829		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2830	}
2831
2832	sc->sc_len += nlen;
2833	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2834	st->sync_state = q;
2835}
2836
2837void
2838pfsync_q_del(struct pf_state *st)
2839{
2840#ifdef __FreeBSD__
2841	struct pfsync_softc *sc = V_pfsyncif;
2842#else
2843	struct pfsync_softc *sc = pfsyncif;
2844#endif
2845	int q = st->sync_state;
2846
2847#ifdef __FreeBSD__
2848	KASSERT(st->sync_state != PFSYNC_S_NONE,
2849		("%s: st->sync_state != PFSYNC_S_NONE", __FUNCTION__));
2850#else
2851	KASSERT(st->sync_state != PFSYNC_S_NONE);
2852#endif
2853
2854	sc->sc_len -= pfsync_qs[q].len;
2855	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2856	st->sync_state = PFSYNC_S_NONE;
2857
2858	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2859		sc->sc_len -= sizeof(struct pfsync_subheader);
2860}
2861
2862#ifdef notyet
2863void
2864pfsync_update_tdb(struct tdb *t, int output)
2865{
2866#ifdef __FreeBSD__
2867	struct pfsync_softc *sc = V_pfsyncif;
2868#else
2869	struct pfsync_softc *sc = pfsyncif;
2870#endif
2871	size_t nlen = sizeof(struct pfsync_tdb);
2872	int s;
2873
2874	if (sc == NULL)
2875		return;
2876
2877	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2878		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2879			nlen += sizeof(struct pfsync_subheader);
2880
2881		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2882			s = splnet();
2883			PF_LOCK();
2884			pfsync_sendout();
2885			PF_UNLOCK();
2886			splx(s);
2887
2888			nlen = sizeof(struct pfsync_subheader) +
2889			    sizeof(struct pfsync_tdb);
2890		}
2891
2892		sc->sc_len += nlen;
2893		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2894		SET(t->tdb_flags, TDBF_PFSYNC);
2895		t->tdb_updates = 0;
2896	} else {
2897		if (++t->tdb_updates >= sc->sc_maxupdates)
2898			schednetisr(NETISR_PFSYNC);
2899	}
2900
2901	if (output)
2902		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2903	else
2904		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2905}
2906
2907void
2908pfsync_delete_tdb(struct tdb *t)
2909{
2910#ifdef __FreeBSD__
2911	struct pfsync_softc *sc = V_pfsyncif;
2912#else
2913	struct pfsync_softc *sc = pfsyncif;
2914#endif
2915
2916	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2917		return;
2918
2919	sc->sc_len -= sizeof(struct pfsync_tdb);
2920	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2921	CLR(t->tdb_flags, TDBF_PFSYNC);
2922
2923	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2924		sc->sc_len -= sizeof(struct pfsync_subheader);
2925}
2926
2927int
2928pfsync_out_tdb(struct tdb *t, struct mbuf *m, int offset)
2929{
2930	struct pfsync_tdb *ut = (struct pfsync_tdb *)(m->m_data + offset);
2931
2932	bzero(ut, sizeof(*ut));
2933	ut->spi = t->tdb_spi;
2934	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2935	/*
2936	 * When a failover happens, the master's rpl is probably above
2937	 * what we see here (we may be up to a second late), so
2938	 * increase it a bit for outbound tdbs to manage most such
2939	 * situations.
2940	 *
2941	 * For now, just add an offset that is likely to be larger
2942	 * than the number of packets we can see in one second. The RFC
2943	 * just says the next packet must have a higher seq value.
2944	 *
2945	 * XXX What is a good algorithm for this? We could use
2946	 * a rate-determined increase, but to know it, we would have
2947	 * to extend struct tdb.
2948	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2949	 * will soon be replaced anyway. For now, just don't handle
2950	 * this edge case.
2951	 */
2952#define RPL_INCR 16384
2953	ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2954	    RPL_INCR : 0));
2955	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2956	ut->sproto = t->tdb_sproto;
2957
2958	return (sizeof(*ut));
2959}
2960#endif
2961
2962void
2963pfsync_bulk_start(void)
2964{
2965#ifdef __FreeBSD__
2966	struct pfsync_softc *sc = V_pfsyncif;
2967#else
2968	struct pfsync_softc *sc = pfsyncif;
2969#endif
2970
2971#ifdef __FreeBSD__
2972	if (V_pf_status.debug >= PF_DEBUG_MISC)
2973#else
2974	if (pf_status.debug >= PF_DEBUG_MISC)
2975#endif
2976		printf("pfsync: received bulk update request\n");
2977
2978#ifdef __FreeBSD__
2979	PF_LOCK_ASSERT();
2980	if (TAILQ_EMPTY(&V_state_list))
2981#else
2982	if (TAILQ_EMPTY(&state_list))
2983#endif
2984		pfsync_bulk_status(PFSYNC_BUS_END);
2985	else {
2986		sc->sc_ureq_received = time_uptime;
2987		if (sc->sc_bulk_next == NULL)
2988#ifdef __FreeBSD__
2989			sc->sc_bulk_next = TAILQ_FIRST(&V_state_list);
2990#else
2991			sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2992#endif
2993		sc->sc_bulk_last = sc->sc_bulk_next;
2994
2995		pfsync_bulk_status(PFSYNC_BUS_START);
2996		callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
2997	}
2998}
2999
3000void
3001pfsync_bulk_update(void *arg)
3002{
3003	struct pfsync_softc *sc = arg;
3004	struct pf_state *st = sc->sc_bulk_next;
3005	int i = 0;
3006	int s;
3007
3008	PF_LOCK_ASSERT();
3009
3010	s = splsoftnet();
3011#ifdef __FreeBSD__
3012	CURVNET_SET(sc->sc_ifp->if_vnet);
3013#endif
3014	for (;;) {
3015		if (st->sync_state == PFSYNC_S_NONE &&
3016		    st->timeout < PFTM_MAX &&
3017		    st->pfsync_time <= sc->sc_ureq_received) {
3018			pfsync_update_state_req(st);
3019			i++;
3020		}
3021
3022		st = TAILQ_NEXT(st, entry_list);
3023		if (st == NULL)
3024#ifdef __FreeBSD__
3025			st = TAILQ_FIRST(&V_state_list);
3026#else
3027			st = TAILQ_FIRST(&state_list);
3028#endif
3029
3030		if (st == sc->sc_bulk_last) {
3031			/* we're done */
3032			sc->sc_bulk_next = NULL;
3033			sc->sc_bulk_last = NULL;
3034			pfsync_bulk_status(PFSYNC_BUS_END);
3035			break;
3036		}
3037
3038#ifdef __FreeBSD__
3039		if (i > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
3040#else
3041		if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
3042#endif
3043		    sizeof(struct pfsync_state)) {
3044			/* we've filled a packet */
3045			sc->sc_bulk_next = st;
3046#ifdef __FreeBSD__
3047			callout_reset(&sc->sc_bulk_tmo, 1,
3048			    pfsync_bulk_update, sc);
3049#else
3050			timeout_add(&sc->sc_bulk_tmo, 1);
3051#endif
3052			break;
3053		}
3054	}
3055
3056#ifdef __FreeBSD__
3057	CURVNET_RESTORE();
3058#endif
3059	splx(s);
3060}
3061
3062void
3063pfsync_bulk_status(u_int8_t status)
3064{
3065	struct {
3066		struct pfsync_subheader subh;
3067		struct pfsync_bus bus;
3068	} __packed r;
3069
3070#ifdef __FreeBSD__
3071	struct pfsync_softc *sc = V_pfsyncif;
3072#else
3073	struct pfsync_softc *sc = pfsyncif;
3074#endif
3075
3076	PF_LOCK_ASSERT();
3077
3078	bzero(&r, sizeof(r));
3079
3080	r.subh.action = PFSYNC_ACT_BUS;
3081	r.subh.count = htons(1);
3082
3083#ifdef __FreeBSD__
3084	r.bus.creatorid = V_pf_status.hostid;
3085#else
3086	r.bus.creatorid = pf_status.hostid;
3087#endif
3088	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
3089	r.bus.status = status;
3090
3091	pfsync_send_plus(&r, sizeof(r));
3092}
3093
3094void
3095pfsync_bulk_fail(void *arg)
3096{
3097	struct pfsync_softc *sc = arg;
3098
3099#ifdef __FreeBSD__
3100	CURVNET_SET(sc->sc_ifp->if_vnet);
3101#endif
3102
3103	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
3104		/* Try again */
3105#ifdef __FreeBSD__
3106		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
3107		    pfsync_bulk_fail, V_pfsyncif);
3108#else
3109		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
3110#endif
3111		PF_LOCK();
3112		pfsync_request_update(0, 0);
3113		PF_UNLOCK();
3114	} else {
3115		/* Pretend like the transfer was ok */
3116		sc->sc_ureq_sent = 0;
3117		sc->sc_bulk_tries = 0;
3118#if NCARP > 0
3119#ifdef notyet
3120#ifdef __FreeBSD__
3121		if (!sc->pfsync_sync_ok)
3122#else
3123		if (!pfsync_sync_ok)
3124#endif
3125			carp_group_demote_adj(&sc->sc_if, -1);
3126#endif
3127#endif
3128#ifdef __FreeBSD__
3129		sc->pfsync_sync_ok = 1;
3130#else
3131		pfsync_sync_ok = 1;
3132#endif
3133#ifdef __FreeBSD__
3134		if (V_pf_status.debug >= PF_DEBUG_MISC)
3135#else
3136		if (pf_status.debug >= PF_DEBUG_MISC)
3137#endif
3138			printf("pfsync: failed to receive bulk update\n");
3139	}
3140
3141#ifdef __FreeBSD__
3142	CURVNET_RESTORE();
3143#endif
3144}
3145
3146void
3147pfsync_send_plus(void *plus, size_t pluslen)
3148{
3149#ifdef __FreeBSD__
3150	struct pfsync_softc *sc = V_pfsyncif;
3151#else
3152	struct pfsync_softc *sc = pfsyncif;
3153#endif
3154	int s;
3155
3156	PF_LOCK_ASSERT();
3157
3158#ifdef __FreeBSD__
3159	if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) {
3160#else
3161	if (sc->sc_len + pluslen > sc->sc_if.if_mtu) {
3162#endif
3163		s = splnet();
3164		pfsync_sendout();
3165		splx(s);
3166	}
3167
3168	sc->sc_plus = plus;
3169	sc->sc_len += (sc->sc_pluslen = pluslen);
3170
3171	s = splnet();
3172	pfsync_sendout();
3173	splx(s);
3174}
3175
3176int
3177pfsync_up(void)
3178{
3179#ifdef __FreeBSD__
3180	struct pfsync_softc *sc = V_pfsyncif;
3181#else
3182	struct pfsync_softc *sc = pfsyncif;
3183#endif
3184
3185#ifdef __FreeBSD__
3186	if (sc == NULL || !ISSET(sc->sc_ifp->if_flags, IFF_DRV_RUNNING))
3187#else
3188	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
3189#endif
3190		return (0);
3191
3192	return (1);
3193}
3194
3195int
3196pfsync_state_in_use(struct pf_state *st)
3197{
3198#ifdef __FreeBSD__
3199	struct pfsync_softc *sc = V_pfsyncif;
3200#else
3201	struct pfsync_softc *sc = pfsyncif;
3202#endif
3203
3204	if (sc == NULL)
3205		return (0);
3206
3207	if (st->sync_state != PFSYNC_S_NONE ||
3208	    st == sc->sc_bulk_next ||
3209	    st == sc->sc_bulk_last)
3210		return (1);
3211
3212	return (0);
3213}
3214
3215u_int pfsync_ints;
3216u_int pfsync_tmos;
3217
3218void
3219pfsync_timeout(void *arg)
3220{
3221#if defined(__FreeBSD__) && defined(VIMAGE)
3222	struct pfsync_softc *sc = arg;
3223#endif
3224	int s;
3225
3226#ifdef __FreeBSD__
3227	CURVNET_SET(sc->sc_ifp->if_vnet);
3228#endif
3229
3230	pfsync_tmos++;
3231
3232	s = splnet();
3233#ifdef __FreeBSD__
3234	PF_LOCK();
3235#endif
3236	pfsync_sendout();
3237#ifdef __FreeBSD__
3238	PF_UNLOCK();
3239#endif
3240	splx(s);
3241
3242#ifdef __FreeBSD__
3243	CURVNET_RESTORE();
3244#endif
3245}
3246
3247/* this is a softnet/netisr handler */
3248void
3249#ifdef __FreeBSD__
3250pfsyncintr(void *arg)
3251{
3252	struct pfsync_softc *sc = arg;
3253	struct mbuf *m, *n;
3254
3255	CURVNET_SET(sc->sc_ifp->if_vnet);
3256	pfsync_ints++;
3257
3258	PF_LOCK();
3259	if (sc->sc_len > PFSYNC_MINPKT)
3260		pfsync_sendout1(0);
3261	_IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
3262	PF_UNLOCK();
3263
3264	for (; m != NULL; m = n) {
3265
3266		n = m->m_nextpkt;
3267		m->m_nextpkt = NULL;
3268		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)
3269		    == 0)
3270			V_pfsyncstats.pfsyncs_opackets++;
3271		else
3272			V_pfsyncstats.pfsyncs_oerrors++;
3273	}
3274	CURVNET_RESTORE();
3275}
3276#else
3277pfsyncintr(void)
3278{
3279	int s;
3280
3281	pfsync_ints++;
3282
3283	s = splnet();
3284	pfsync_sendout();
3285	splx(s);
3286}
3287#endif
3288
3289int
3290pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
3291    size_t newlen)
3292{
3293
3294#ifdef notyet
3295	/* All sysctl names at this level are terminal. */
3296	if (namelen != 1)
3297		return (ENOTDIR);
3298
3299	switch (name[0]) {
3300	case PFSYNCCTL_STATS:
3301		if (newp != NULL)
3302			return (EPERM);
3303		return (sysctl_struct(oldp, oldlenp, newp, newlen,
3304		    &V_pfsyncstats, sizeof(V_pfsyncstats)));
3305	}
3306#endif
3307	return (ENOPROTOOPT);
3308}
3309
3310#ifdef __FreeBSD__
3311static int
3312pfsync_multicast_setup(struct pfsync_softc *sc)
3313{
3314	struct ip_moptions *imo = &sc->sc_imo;
3315	int error;
3316
3317	if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
3318		sc->sc_sync_if = NULL;
3319		return (EADDRNOTAVAIL);
3320	}
3321
3322	imo->imo_membership = (struct in_multi **)malloc(
3323	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC,
3324	    M_WAITOK | M_ZERO);
3325	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
3326	imo->imo_multicast_vif = -1;
3327
3328	if ((error = in_joingroup(sc->sc_sync_if, &sc->sc_sync_peer, NULL,
3329	    &imo->imo_membership[0])) != 0) {
3330		free(imo->imo_membership, M_PFSYNC);
3331		return (error);
3332	}
3333	imo->imo_num_memberships++;
3334	imo->imo_multicast_ifp = sc->sc_sync_if;
3335	imo->imo_multicast_ttl = PFSYNC_DFLTTL;
3336	imo->imo_multicast_loop = 0;
3337
3338	return (0);
3339}
3340
3341static void
3342pfsync_multicast_cleanup(struct pfsync_softc *sc)
3343{
3344	struct ip_moptions *imo = &sc->sc_imo;
3345
3346	in_leavegroup(imo->imo_membership[0], NULL);
3347	free(imo->imo_membership, M_PFSYNC);
3348	imo->imo_membership = NULL;
3349	imo->imo_multicast_ifp = NULL;
3350}
3351
3352#ifdef INET
3353extern  struct domain inetdomain;
3354static struct protosw in_pfsync_protosw = {
3355	.pr_type =		SOCK_RAW,
3356	.pr_domain =		&inetdomain,
3357	.pr_protocol =		IPPROTO_PFSYNC,
3358	.pr_flags =		PR_ATOMIC|PR_ADDR,
3359	.pr_input =		pfsync_input,
3360	.pr_output =		(pr_output_t *)rip_output,
3361	.pr_ctloutput =		rip_ctloutput,
3362	.pr_usrreqs =		&rip_usrreqs
3363};
3364#endif
3365
3366static int
3367pfsync_init()
3368{
3369	VNET_ITERATOR_DECL(vnet_iter);
3370	int error = 0;
3371
3372	VNET_LIST_RLOCK();
3373	VNET_FOREACH(vnet_iter) {
3374		CURVNET_SET(vnet_iter);
3375		V_pfsync_cloner = pfsync_cloner;
3376		V_pfsync_cloner_data = pfsync_cloner_data;
3377		V_pfsync_cloner.ifc_data = &V_pfsync_cloner_data;
3378		if_clone_attach(&V_pfsync_cloner);
3379		error = swi_add(NULL, "pfsync", pfsyncintr, V_pfsyncif,
3380		    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
3381		CURVNET_RESTORE();
3382		if (error)
3383			goto fail_locked;
3384	}
3385	VNET_LIST_RUNLOCK();
3386#ifdef INET
3387	error = pf_proto_register(PF_INET, &in_pfsync_protosw);
3388	if (error)
3389		goto fail;
3390	error = ipproto_register(IPPROTO_PFSYNC);
3391	if (error) {
3392		pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
3393		goto fail;
3394	}
3395#endif
3396	PF_LOCK();
3397	pfsync_state_import_ptr = pfsync_state_import;
3398	pfsync_up_ptr = pfsync_up;
3399	pfsync_insert_state_ptr = pfsync_insert_state;
3400	pfsync_update_state_ptr = pfsync_update_state;
3401	pfsync_delete_state_ptr = pfsync_delete_state;
3402	pfsync_clear_states_ptr = pfsync_clear_states;
3403	pfsync_state_in_use_ptr = pfsync_state_in_use;
3404	pfsync_defer_ptr = pfsync_defer;
3405	PF_UNLOCK();
3406
3407	return (0);
3408
3409fail:
3410	VNET_LIST_RLOCK();
3411fail_locked:
3412	VNET_FOREACH(vnet_iter) {
3413		CURVNET_SET(vnet_iter);
3414		if (V_pfsync_swi_cookie) {
3415			swi_remove(V_pfsync_swi_cookie);
3416			if_clone_detach(&V_pfsync_cloner);
3417		}
3418		CURVNET_RESTORE();
3419	}
3420	VNET_LIST_RUNLOCK();
3421
3422	return (error);
3423}
3424
3425static void
3426pfsync_uninit()
3427{
3428	VNET_ITERATOR_DECL(vnet_iter);
3429
3430	PF_LOCK();
3431	pfsync_state_import_ptr = NULL;
3432	pfsync_up_ptr = NULL;
3433	pfsync_insert_state_ptr = NULL;
3434	pfsync_update_state_ptr = NULL;
3435	pfsync_delete_state_ptr = NULL;
3436	pfsync_clear_states_ptr = NULL;
3437	pfsync_state_in_use_ptr = NULL;
3438	pfsync_defer_ptr = NULL;
3439	PF_UNLOCK();
3440
3441	ipproto_unregister(IPPROTO_PFSYNC);
3442	pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
3443	VNET_LIST_RLOCK();
3444	VNET_FOREACH(vnet_iter) {
3445		CURVNET_SET(vnet_iter);
3446		swi_remove(V_pfsync_swi_cookie);
3447		if_clone_detach(&V_pfsync_cloner);
3448		CURVNET_RESTORE();
3449	}
3450	VNET_LIST_RUNLOCK();
3451}
3452
3453static int
3454pfsync_modevent(module_t mod, int type, void *data)
3455{
3456	int error = 0;
3457
3458	switch (type) {
3459	case MOD_LOAD:
3460		error = pfsync_init();
3461		break;
3462	case MOD_QUIESCE:
3463		/*
3464		 * Module should not be unloaded due to race conditions.
3465		 */
3466		error = EPERM;
3467		break;
3468	case MOD_UNLOAD:
3469		pfsync_uninit();
3470		break;
3471	default:
3472		error = EINVAL;
3473		break;
3474	}
3475
3476	return (error);
3477}
3478
3479static moduledata_t pfsync_mod = {
3480	"pfsync",
3481	pfsync_modevent,
3482	0
3483};
3484
3485#define PFSYNC_MODVER 1
3486
3487DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
3488MODULE_VERSION(pfsync, PFSYNC_MODVER);
3489MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
3490#endif /* __FreeBSD__ */
3491