if_pfsync.c revision 164033
1/*	$FreeBSD: head/sys/contrib/pf/net/if_pfsync.c 164033 2006-11-06 13:42:10Z rwatson $	*/
2/*	$OpenBSD: if_pfsync.c,v 1.46 2005/02/20 15:58:38 mcbride Exp $	*/
3
4/*
5 * Copyright (c) 2002 Michael Shalayeff
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#ifdef __FreeBSD__
31#include "opt_inet.h"
32#include "opt_inet6.h"
33#endif
34
35#ifndef __FreeBSD__
36#include "bpfilter.h"
37#include "pfsync.h"
38#elif __FreeBSD__ >= 5
39#include "opt_bpf.h"
40#include "opt_pf.h"
41
42#ifdef DEV_BPF
43#define	NBPFILTER	DEV_BPF
44#else
45#define	NBPFILTER	0
46#endif
47
48#ifdef DEV_PFSYNC
49#define	NPFSYNC		DEV_PFSYNC
50#else
51#define	NPFSYNC		0
52#endif
53
54#endif
55
56#include <sys/param.h>
57#ifdef __FreeBSD__
58#include <sys/priv.h>
59#endif
60#include <sys/proc.h>
61#include <sys/systm.h>
62#include <sys/time.h>
63#include <sys/mbuf.h>
64#include <sys/socket.h>
65#include <sys/kernel.h>
66#ifdef __FreeBSD__
67#include <sys/endian.h>
68#include <sys/malloc.h>
69#include <sys/module.h>
70#include <sys/sockio.h>
71#include <sys/lock.h>
72#include <sys/mutex.h>
73#include <sys/sysctl.h>
74#else
75#include <sys/ioctl.h>
76#include <sys/timeout.h>
77#endif
78
79#include <net/if.h>
80#if defined(__FreeBSD__)
81#include <net/if_clone.h>
82#endif
83#include <net/if_types.h>
84#include <net/route.h>
85#include <net/bpf.h>
86#include <netinet/tcp.h>
87#include <netinet/tcp_seq.h>
88
89#ifdef	INET
90#include <netinet/in.h>
91#include <netinet/in_systm.h>
92#include <netinet/in_var.h>
93#include <netinet/ip.h>
94#include <netinet/ip_var.h>
95#endif
96
97#ifdef INET6
98#ifndef INET
99#include <netinet/in.h>
100#endif
101#include <netinet6/nd6.h>
102#endif /* INET6 */
103
104#ifdef __FreeBSD__
105#include "opt_carp.h"
106#ifdef DEV_CARP
107#define	NCARP	1
108#else
109#define	NCARP	0
110#endif
111#else
112#include "carp.h"
113#endif
114#if NCARP > 0
115extern int carp_suppress_preempt;
116#endif
117
118#include <net/pfvar.h>
119#include <net/if_pfsync.h>
120
121#ifdef __FreeBSD__
122#define	PFSYNCNAME	"pfsync"
123#endif
124
125#define PFSYNC_MINMTU	\
126    (sizeof(struct pfsync_header) + sizeof(struct pf_state))
127
128#ifdef PFSYNCDEBUG
129#define DPRINTF(x)    do { if (pfsyncdebug) printf x ; } while (0)
130int pfsyncdebug;
131#else
132#define DPRINTF(x)
133#endif
134
135#ifndef __FreeBSD__
136struct pfsync_softc	pfsyncif;
137#endif
138struct pfsyncstats	pfsyncstats;
139#ifdef __FreeBSD__
140SYSCTL_DECL(_net_inet_pfsync);
141SYSCTL_STRUCT(_net_inet_pfsync, 0, stats, CTLFLAG_RW,
142    &pfsyncstats, pfsyncstats,
143    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
144
145/*
146 * Locking notes:
147 * Whenever we really touch/look at the state table we have to hold the
148 * PF_LOCK. Functions that do just the interface handling, grab the per
149 * softc lock instead.
150 *
151 */
152
153static void	pfsync_clone_destroy(struct ifnet *);
154static int	pfsync_clone_create(struct if_clone *, int, caddr_t params);
155static void	pfsync_senddef(void *);
156#else
157void	pfsyncattach(int);
158#endif
159void	pfsync_setmtu(struct pfsync_softc *, int);
160int	pfsync_insert_net_state(struct pfsync_state *);
161int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
162	    struct rtentry *);
163int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
164void	pfsyncstart(struct ifnet *);
165
166struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
167int	pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
168int	pfsync_sendout(struct pfsync_softc *);
169void	pfsync_timeout(void *);
170void	pfsync_send_bus(struct pfsync_softc *, u_int8_t);
171void	pfsync_bulk_update(void *);
172void	pfsync_bulkfail(void *);
173
174int	pfsync_sync_ok;
175#ifndef __FreeBSD__
176extern int ifqmaxlen;
177extern struct timeval time;
178extern struct timeval mono_time;
179extern int hz;
180#endif
181
182#ifdef __FreeBSD__
183static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface");
184static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list;
185#define	SCP2IFP(sc)		((sc)->sc_ifp)
186IFC_SIMPLE_DECLARE(pfsync, 1);
187
188static void
189pfsync_clone_destroy(struct ifnet *ifp)
190{
191        struct pfsync_softc *sc;
192
193	sc = ifp->if_softc;
194	callout_stop(&sc->sc_tmo);
195	callout_stop(&sc->sc_bulk_tmo);
196	callout_stop(&sc->sc_bulkfail_tmo);
197
198	callout_stop(&sc->sc_send_tmo);
199
200#if NBPFILTER > 0
201        bpfdetach(ifp);
202#endif
203        if_detach(ifp);
204	if_free(ifp);
205        LIST_REMOVE(sc, sc_next);
206        free(sc->sc_imo.imo_membership, M_PFSYNC);
207        free(sc, M_PFSYNC);
208}
209
210static int
211#ifdef __FreeBSD__
212pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t params)
213#else
214pfsync_clone_create(struct if_clone *ifc, int unit)
215#endif
216{
217	struct pfsync_softc *sc;
218	struct ifnet *ifp;
219
220	MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC,
221	    M_WAITOK|M_ZERO);
222	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
223	if (ifp == NULL) {
224		free(sc, M_PFSYNC);
225		return (ENOSPC);
226	}
227
228	pfsync_sync_ok = 1;
229	sc->sc_mbuf = NULL;
230	sc->sc_mbuf_net = NULL;
231	sc->sc_statep.s = NULL;
232	sc->sc_statep_net.s = NULL;
233	sc->sc_maxupdates = 128;
234	sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
235	sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
236	sc->sc_ureq_received = 0;
237	sc->sc_ureq_sent = 0;
238	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
239	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC,
240	    M_WAITOK);
241	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
242
243	ifp = SCP2IFP(sc);
244	if_initname(ifp, ifc->ifc_name, unit);
245	ifp->if_ioctl = pfsyncioctl;
246	ifp->if_output = pfsyncoutput;
247	ifp->if_start = pfsyncstart;
248	ifp->if_snd.ifq_maxlen = ifqmaxlen;
249	ifp->if_hdrlen = PFSYNC_HDRLEN;
250	ifp->if_baudrate = IF_Mbps(100);
251	ifp->if_softc = sc;
252	pfsync_setmtu(sc, MCLBYTES);
253	callout_init(&sc->sc_tmo, NET_CALLOUT_MPSAFE);
254	callout_init(&sc->sc_bulk_tmo, NET_CALLOUT_MPSAFE);
255	callout_init(&sc->sc_bulkfail_tmo, NET_CALLOUT_MPSAFE);
256	callout_init(&sc->sc_send_tmo, NET_CALLOUT_MPSAFE);
257	sc->sc_ifq.ifq_maxlen = ifqmaxlen;
258	mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
259	    MTX_DEF);
260	if_attach(ifp);
261
262	LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
263#if NBPFILTER > 0
264	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
265#endif
266
267	return (0);
268}
269#else /* !__FreeBSD__ */
270void
271pfsyncattach(int npfsync)
272{
273	struct ifnet *ifp;
274
275	pfsync_sync_ok = 1;
276	bzero(&pfsyncif, sizeof(pfsyncif));
277	pfsyncif.sc_mbuf = NULL;
278	pfsyncif.sc_mbuf_net = NULL;
279	pfsyncif.sc_statep.s = NULL;
280	pfsyncif.sc_statep_net.s = NULL;
281	pfsyncif.sc_maxupdates = 128;
282	pfsyncif.sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
283	pfsyncif.sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP;
284	pfsyncif.sc_ureq_received = 0;
285	pfsyncif.sc_ureq_sent = 0;
286	ifp = &pfsyncif.sc_if;
287	strlcpy(ifp->if_xname, "pfsync0", sizeof ifp->if_xname);
288	ifp->if_softc = &pfsyncif;
289	ifp->if_ioctl = pfsyncioctl;
290	ifp->if_output = pfsyncoutput;
291	ifp->if_start = pfsyncstart;
292	ifp->if_type = IFT_PFSYNC;
293	ifp->if_snd.ifq_maxlen = ifqmaxlen;
294	ifp->if_hdrlen = PFSYNC_HDRLEN;
295	pfsync_setmtu(&pfsyncif, MCLBYTES);
296	timeout_set(&pfsyncif.sc_tmo, pfsync_timeout, &pfsyncif);
297	timeout_set(&pfsyncif.sc_bulk_tmo, pfsync_bulk_update, &pfsyncif);
298	timeout_set(&pfsyncif.sc_bulkfail_tmo, pfsync_bulkfail, &pfsyncif);
299	if_attach(ifp);
300	if_alloc_sadl(ifp);
301
302#if NBPFILTER > 0
303	bpfattach(&pfsyncif.sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
304#endif
305}
306#endif
307
308/*
309 * Start output on the pfsync interface.
310 */
311void
312pfsyncstart(struct ifnet *ifp)
313{
314#ifdef __FreeBSD__
315	IF_LOCK(&ifp->if_snd);
316	_IF_DROP(&ifp->if_snd);
317	_IF_DRAIN(&ifp->if_snd);
318	IF_UNLOCK(&ifp->if_snd);
319#else
320	struct mbuf *m;
321	int s;
322
323	for (;;) {
324		s = splimp();
325		IF_DROP(&ifp->if_snd);
326		IF_DEQUEUE(&ifp->if_snd, m);
327		splx(s);
328
329		if (m == NULL)
330			return;
331		else
332			m_freem(m);
333	}
334#endif
335}
336
337int
338pfsync_insert_net_state(struct pfsync_state *sp)
339{
340	struct pf_state	*st = NULL;
341	struct pf_rule *r = NULL;
342	struct pfi_kif	*kif;
343
344#ifdef __FreeBSD__
345	PF_ASSERT(MA_OWNED);
346#endif
347	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
348		printf("pfsync_insert_net_state: invalid creator id:"
349		    " %08x\n", ntohl(sp->creatorid));
350		return (EINVAL);
351	}
352
353	kif = pfi_lookup_create(sp->ifname);
354	if (kif == NULL) {
355		if (pf_status.debug >= PF_DEBUG_MISC)
356			printf("pfsync_insert_net_state: "
357			    "unknown interface: %s\n", sp->ifname);
358		/* skip this state */
359		return (0);
360	}
361
362	/*
363	 * Just use the default rule until we have infrastructure to find the
364	 * best matching rule.
365	 */
366	r = &pf_default_rule;
367
368	if (!r->max_states || r->states < r->max_states)
369		st = pool_get(&pf_state_pl, PR_NOWAIT);
370	if (st == NULL) {
371		pfi_maybe_destroy(kif);
372		return (ENOMEM);
373	}
374	bzero(st, sizeof(*st));
375
376	st->rule.ptr = r;
377	/* XXX get pointers to nat_rule and anchor */
378
379	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
380	r->states++;
381
382	/* fill in the rest of the state entry */
383	pf_state_host_ntoh(&sp->lan, &st->lan);
384	pf_state_host_ntoh(&sp->gwy, &st->gwy);
385	pf_state_host_ntoh(&sp->ext, &st->ext);
386
387	pf_state_peer_ntoh(&sp->src, &st->src);
388	pf_state_peer_ntoh(&sp->dst, &st->dst);
389
390	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
391	st->creation = time_second - ntohl(sp->creation);
392	st->expire = ntohl(sp->expire) + time_second;
393
394	st->af = sp->af;
395	st->proto = sp->proto;
396	st->direction = sp->direction;
397	st->log = sp->log;
398	st->timeout = sp->timeout;
399	st->allow_opts = sp->allow_opts;
400
401	bcopy(sp->id, &st->id, sizeof(st->id));
402	st->creatorid = sp->creatorid;
403	st->sync_flags = PFSTATE_FROMSYNC;
404
405
406	if (pf_insert_state(kif, st)) {
407		pfi_maybe_destroy(kif);
408		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
409		r->states--;
410		pool_put(&pf_state_pl, st);
411		return (EINVAL);
412	}
413
414	return (0);
415}
416
417void
418#ifdef __FreeBSD__
419pfsync_input(struct mbuf *m, __unused int off)
420#else
421pfsync_input(struct mbuf *m, ...)
422#endif
423{
424	struct ip *ip = mtod(m, struct ip *);
425	struct pfsync_header *ph;
426#ifdef __FreeBSD__
427	struct pfsync_softc *sc = LIST_FIRST(&pfsync_list);
428#else
429	struct pfsync_softc *sc = &pfsyncif;
430#endif
431	struct pf_state *st, key;
432	struct pfsync_state *sp;
433	struct pfsync_state_upd *up;
434	struct pfsync_state_del *dp;
435	struct pfsync_state_clr *cp;
436	struct pfsync_state_upd_req *rup;
437	struct pfsync_state_bus *bus;
438	struct in_addr src;
439	struct mbuf *mp;
440	int iplen, action, error, i, s, count, offp, sfail, stale = 0;
441
442	pfsyncstats.pfsyncs_ipackets++;
443
444	/* verify that we have a sync interface configured */
445	if (!sc->sc_sync_ifp || !pf_status.running) /* XXX PF_LOCK? */
446		goto done;
447
448	/* verify that the packet came in on the right interface */
449	if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
450		pfsyncstats.pfsyncs_badif++;
451		goto done;
452	}
453
454	/* verify that the IP TTL is 255.  */
455	if (ip->ip_ttl != PFSYNC_DFLTTL) {
456		pfsyncstats.pfsyncs_badttl++;
457		goto done;
458	}
459
460	iplen = ip->ip_hl << 2;
461
462	if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
463		pfsyncstats.pfsyncs_hdrops++;
464		goto done;
465	}
466
467	if (iplen + sizeof(*ph) > m->m_len) {
468		if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
469			pfsyncstats.pfsyncs_hdrops++;
470			goto done;
471		}
472		ip = mtod(m, struct ip *);
473	}
474	ph = (struct pfsync_header *)((char *)ip + iplen);
475
476	/* verify the version */
477	if (ph->version != PFSYNC_VERSION) {
478		pfsyncstats.pfsyncs_badver++;
479		goto done;
480	}
481
482	action = ph->action;
483	count = ph->count;
484
485	/* make sure it's a valid action code */
486	if (action >= PFSYNC_ACT_MAX) {
487		pfsyncstats.pfsyncs_badact++;
488		goto done;
489	}
490
491	/* Cheaper to grab this now than having to mess with mbufs later */
492	src = ip->ip_src;
493
494	switch (action) {
495	case PFSYNC_ACT_CLR: {
496		struct pf_state *nexts;
497		struct pfi_kif	*kif;
498		u_int32_t creatorid;
499		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
500		    sizeof(*cp), &offp)) == NULL) {
501			pfsyncstats.pfsyncs_badlen++;
502			return;
503		}
504		cp = (struct pfsync_state_clr *)(mp->m_data + offp);
505		creatorid = cp->creatorid;
506
507		s = splsoftnet();
508#ifdef __FreeBSD__
509		PF_LOCK();
510#endif
511		if (cp->ifname[0] == '\0') {
512			for (st = RB_MIN(pf_state_tree_id, &tree_id);
513			    st; st = nexts) {
514                		nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
515				if (st->creatorid == creatorid) {
516					st->timeout = PFTM_PURGE;
517					pf_purge_expired_state(st);
518				}
519			}
520		} else {
521			kif = pfi_lookup_if(cp->ifname);
522			if (kif == NULL) {
523				if (pf_status.debug >= PF_DEBUG_MISC)
524					printf("pfsync_input: PFSYNC_ACT_CLR "
525					    "bad interface: %s\n", cp->ifname);
526				splx(s);
527#ifdef __FreeBSD__
528				PF_UNLOCK();
529#endif
530				goto done;
531			}
532			for (st = RB_MIN(pf_state_tree_lan_ext,
533			    &kif->pfik_lan_ext); st; st = nexts) {
534				nexts = RB_NEXT(pf_state_tree_lan_ext,
535				    &kif->pfik_lan_ext, st);
536				if (st->creatorid == creatorid) {
537					st->timeout = PFTM_PURGE;
538					pf_purge_expired_state(st);
539				}
540			}
541		}
542#ifdef __FreeBSD__
543		PF_UNLOCK();
544#endif
545		splx(s);
546
547		break;
548	}
549	case PFSYNC_ACT_INS:
550		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
551		    count * sizeof(*sp), &offp)) == NULL) {
552			pfsyncstats.pfsyncs_badlen++;
553			return;
554		}
555
556		s = splsoftnet();
557#ifdef __FreeBSD__
558		PF_LOCK();
559#endif
560		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
561		    i < count; i++, sp++) {
562			/* check for invalid values */
563			if (sp->timeout >= PFTM_MAX ||
564			    sp->src.state > PF_TCPS_PROXY_DST ||
565			    sp->dst.state > PF_TCPS_PROXY_DST ||
566			    sp->direction > PF_OUT ||
567			    (sp->af != AF_INET && sp->af != AF_INET6)) {
568				if (pf_status.debug >= PF_DEBUG_MISC)
569					printf("pfsync_insert: PFSYNC_ACT_INS: "
570					    "invalid value\n");
571				pfsyncstats.pfsyncs_badstate++;
572				continue;
573			}
574
575			if ((error = pfsync_insert_net_state(sp))) {
576				if (error == ENOMEM) {
577					splx(s);
578#ifdef __FreeBSD__
579					PF_UNLOCK();
580#endif
581					goto done;
582				}
583				continue;
584			}
585		}
586#ifdef __FreeBSD__
587		PF_UNLOCK();
588#endif
589		splx(s);
590		break;
591	case PFSYNC_ACT_UPD:
592		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
593		    count * sizeof(*sp), &offp)) == NULL) {
594			pfsyncstats.pfsyncs_badlen++;
595			return;
596		}
597
598		s = splsoftnet();
599#ifdef __FreeBSD__
600		PF_LOCK();
601#endif
602		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
603		    i < count; i++, sp++) {
604			int flags = PFSYNC_FLAG_STALE;
605
606			/* check for invalid values */
607			if (sp->timeout >= PFTM_MAX ||
608			    sp->src.state > PF_TCPS_PROXY_DST ||
609			    sp->dst.state > PF_TCPS_PROXY_DST) {
610				if (pf_status.debug >= PF_DEBUG_MISC)
611					printf("pfsync_insert: PFSYNC_ACT_UPD: "
612					    "invalid value\n");
613				pfsyncstats.pfsyncs_badstate++;
614				continue;
615			}
616
617			bcopy(sp->id, &key.id, sizeof(key.id));
618			key.creatorid = sp->creatorid;
619
620			st = pf_find_state_byid(&key);
621			if (st == NULL) {
622				/* insert the update */
623				if (pfsync_insert_net_state(sp))
624					pfsyncstats.pfsyncs_badstate++;
625				continue;
626			}
627			sfail = 0;
628			if (st->proto == IPPROTO_TCP) {
629				/*
630				 * The state should never go backwards except
631				 * for syn-proxy states.  Neither should the
632				 * sequence window slide backwards.
633				 */
634				if (st->src.state > sp->src.state &&
635				    (st->src.state < PF_TCPS_PROXY_SRC ||
636				    sp->src.state >= PF_TCPS_PROXY_SRC))
637					sfail = 1;
638				else if (SEQ_GT(st->src.seqlo,
639				    ntohl(sp->src.seqlo)))
640					sfail = 3;
641				else if (st->dst.state > sp->dst.state) {
642					/* There might still be useful
643					 * information about the src state here,
644					 * so import that part of the update,
645					 * then "fail" so we send the updated
646					 * state back to the peer who is missing
647					 * our what we know. */
648					pf_state_peer_ntoh(&sp->src, &st->src);
649					/* XXX do anything with timeouts? */
650					sfail = 7;
651					flags = 0;
652				} else if (st->dst.state >= TCPS_SYN_SENT &&
653				    SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
654					sfail = 4;
655			} else {
656				/*
657				 * Non-TCP protocol state machine always go
658				 * forwards
659				 */
660				if (st->src.state > sp->src.state)
661					sfail = 5;
662				else if ( st->dst.state > sp->dst.state)
663					sfail = 6;
664			}
665			if (sfail) {
666				if (pf_status.debug >= PF_DEBUG_MISC)
667					printf("pfsync: %s stale update "
668					    "(%d) id: %016llx "
669					    "creatorid: %08x\n",
670					    (sfail < 7 ?  "ignoring"
671					     : "partial"), sfail,
672#ifdef __FreeBSD__
673					    (unsigned long long)be64toh(st->id),
674#else
675					    betoh64(st->id),
676#endif
677					    ntohl(st->creatorid));
678				pfsyncstats.pfsyncs_badstate++;
679
680				if (!(sp->sync_flags & PFSTATE_STALE)) {
681					/* we have a better state, send it */
682					if (sc->sc_mbuf != NULL && !stale)
683						pfsync_sendout(sc);
684					stale++;
685					if (!st->sync_flags)
686						pfsync_pack_state(
687						    PFSYNC_ACT_UPD, st, flags);
688				}
689				continue;
690			}
691			pf_state_peer_ntoh(&sp->src, &st->src);
692			pf_state_peer_ntoh(&sp->dst, &st->dst);
693			st->expire = ntohl(sp->expire) + time_second;
694			st->timeout = sp->timeout;
695		}
696		if (stale && sc->sc_mbuf != NULL)
697			pfsync_sendout(sc);
698#ifdef __FreeBSD__
699		PF_UNLOCK();
700#endif
701		splx(s);
702		break;
703	/*
704	 * It's not strictly necessary for us to support the "uncompressed"
705	 * delete action, but it's relatively simple and maintains consistency.
706	 */
707	case PFSYNC_ACT_DEL:
708		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
709		    count * sizeof(*sp), &offp)) == NULL) {
710			pfsyncstats.pfsyncs_badlen++;
711			return;
712		}
713
714		s = splsoftnet();
715#ifdef __FreeBSD__
716		PF_LOCK();
717#endif
718		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
719		    i < count; i++, sp++) {
720			bcopy(sp->id, &key.id, sizeof(key.id));
721			key.creatorid = sp->creatorid;
722
723			st = pf_find_state_byid(&key);
724			if (st == NULL) {
725				pfsyncstats.pfsyncs_badstate++;
726				continue;
727			}
728			st->timeout = PFTM_PURGE;
729			st->sync_flags |= PFSTATE_FROMSYNC;
730			pf_purge_expired_state(st);
731		}
732#ifdef __FreeBSD__
733		PF_UNLOCK();
734#endif
735		splx(s);
736		break;
737	case PFSYNC_ACT_UPD_C: {
738		int update_requested = 0;
739
740		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
741		    count * sizeof(*up), &offp)) == NULL) {
742			pfsyncstats.pfsyncs_badlen++;
743			return;
744		}
745
746		s = splsoftnet();
747#ifdef __FreeBSD__
748		PF_LOCK();
749#endif
750		for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
751		    i < count; i++, up++) {
752			/* check for invalid values */
753			if (up->timeout >= PFTM_MAX ||
754			    up->src.state > PF_TCPS_PROXY_DST ||
755			    up->dst.state > PF_TCPS_PROXY_DST) {
756				if (pf_status.debug >= PF_DEBUG_MISC)
757					printf("pfsync_insert: "
758					    "PFSYNC_ACT_UPD_C: "
759					    "invalid value\n");
760				pfsyncstats.pfsyncs_badstate++;
761				continue;
762			}
763
764			bcopy(up->id, &key.id, sizeof(key.id));
765			key.creatorid = up->creatorid;
766
767			st = pf_find_state_byid(&key);
768			if (st == NULL) {
769				/* We don't have this state. Ask for it. */
770				error = pfsync_request_update(up, &src);
771				if (error == ENOMEM) {
772					splx(s);
773					goto done;
774				}
775				update_requested = 1;
776				pfsyncstats.pfsyncs_badstate++;
777				continue;
778			}
779			sfail = 0;
780			if (st->proto == IPPROTO_TCP) {
781				/*
782				 * The state should never go backwards except
783				 * for syn-proxy states.  Neither should the
784				 * sequence window slide backwards.
785				 */
786				if (st->src.state > up->src.state &&
787				    (st->src.state < PF_TCPS_PROXY_SRC ||
788				    up->src.state >= PF_TCPS_PROXY_SRC))
789					sfail = 1;
790				else if (st->dst.state > up->dst.state)
791					sfail = 2;
792				else if (SEQ_GT(st->src.seqlo,
793				    ntohl(up->src.seqlo)))
794					sfail = 3;
795				else if (st->dst.state >= TCPS_SYN_SENT &&
796				    SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
797					sfail = 4;
798			} else {
799				/*
800				 * Non-TCP protocol state machine always go
801				 * forwards
802				 */
803				if (st->src.state > up->src.state)
804					sfail = 5;
805				else if (st->dst.state > up->dst.state)
806					sfail = 6;
807			}
808			if (sfail) {
809				if (pf_status.debug >= PF_DEBUG_MISC)
810					printf("pfsync: ignoring stale update "
811					    "(%d) id: %016llx "
812					    "creatorid: %08x\n", sfail,
813#ifdef __FreeBSD__
814					    (unsigned long long)be64toh(st->id),
815#else
816					    betoh64(st->id),
817#endif
818					    ntohl(st->creatorid));
819				pfsyncstats.pfsyncs_badstate++;
820
821				/* we have a better state, send it out */
822				if ((!stale || update_requested) &&
823				    sc->sc_mbuf != NULL) {
824					pfsync_sendout(sc);
825					update_requested = 0;
826				}
827				stale++;
828				if (!st->sync_flags)
829					pfsync_pack_state(PFSYNC_ACT_UPD, st,
830					    PFSYNC_FLAG_STALE);
831				continue;
832			}
833			pf_state_peer_ntoh(&up->src, &st->src);
834			pf_state_peer_ntoh(&up->dst, &st->dst);
835			st->expire = ntohl(up->expire) + time_second;
836			st->timeout = up->timeout;
837		}
838		if ((update_requested || stale) && sc->sc_mbuf)
839			pfsync_sendout(sc);
840#ifdef __FreeBSD__
841		PF_UNLOCK();
842#endif
843		splx(s);
844		break;
845	}
846	case PFSYNC_ACT_DEL_C:
847		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
848		    count * sizeof(*dp), &offp)) == NULL) {
849			pfsyncstats.pfsyncs_badlen++;
850			return;
851		}
852
853		s = splsoftnet();
854#ifdef __FreeBSD__
855		PF_LOCK();
856#endif
857		for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
858		    i < count; i++, dp++) {
859			bcopy(dp->id, &key.id, sizeof(key.id));
860			key.creatorid = dp->creatorid;
861
862			st = pf_find_state_byid(&key);
863			if (st == NULL) {
864				pfsyncstats.pfsyncs_badstate++;
865				continue;
866			}
867			st->timeout = PFTM_PURGE;
868			st->sync_flags |= PFSTATE_FROMSYNC;
869			pf_purge_expired_state(st);
870		}
871#ifdef __FreeBSD__
872		PF_UNLOCK();
873#endif
874		splx(s);
875		break;
876	case PFSYNC_ACT_INS_F:
877	case PFSYNC_ACT_DEL_F:
878		/* not implemented */
879		break;
880	case PFSYNC_ACT_UREQ:
881		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
882		    count * sizeof(*rup), &offp)) == NULL) {
883			pfsyncstats.pfsyncs_badlen++;
884			return;
885		}
886
887		s = splsoftnet();
888#ifdef __FreeBSD__
889		PF_LOCK();
890#endif
891		if (sc->sc_mbuf != NULL)
892			pfsync_sendout(sc);
893		for (i = 0,
894		    rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
895		    i < count; i++, rup++) {
896			bcopy(rup->id, &key.id, sizeof(key.id));
897			key.creatorid = rup->creatorid;
898
899			if (key.id == 0 && key.creatorid == 0) {
900				sc->sc_ureq_received = time_uptime;
901				if (pf_status.debug >= PF_DEBUG_MISC)
902					printf("pfsync: received "
903					    "bulk update request\n");
904				pfsync_send_bus(sc, PFSYNC_BUS_START);
905#ifdef __FreeBSD__
906				callout_reset(&sc->sc_bulk_tmo, 1 * hz,
907				    pfsync_bulk_update,
908				    LIST_FIRST(&pfsync_list));
909#else
910				timeout_add(&sc->sc_bulk_tmo, 1 * hz);
911#endif
912			} else {
913				st = pf_find_state_byid(&key);
914				if (st == NULL) {
915					pfsyncstats.pfsyncs_badstate++;
916					continue;
917				}
918				if (!st->sync_flags)
919					pfsync_pack_state(PFSYNC_ACT_UPD,
920					    st, 0);
921			}
922		}
923		if (sc->sc_mbuf != NULL)
924			pfsync_sendout(sc);
925#ifdef __FreeBSD__
926		PF_UNLOCK();
927#endif
928		splx(s);
929		break;
930	case PFSYNC_ACT_BUS:
931		/* If we're not waiting for a bulk update, who cares. */
932		if (sc->sc_ureq_sent == 0)
933			break;
934
935		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
936		    sizeof(*bus), &offp)) == NULL) {
937			pfsyncstats.pfsyncs_badlen++;
938			return;
939		}
940		bus = (struct pfsync_state_bus *)(mp->m_data + offp);
941		switch (bus->status) {
942		case PFSYNC_BUS_START:
943#ifdef __FreeBSD__
944			callout_reset(&sc->sc_bulkfail_tmo,
945			    pf_pool_limits[PF_LIMIT_STATES].limit /
946			    (PFSYNC_BULKPACKETS * sc->sc_maxcount),
947			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
948#else
949			timeout_add(&sc->sc_bulkfail_tmo,
950			    pf_pool_limits[PF_LIMIT_STATES].limit /
951			    (PFSYNC_BULKPACKETS * sc->sc_maxcount));
952#endif
953			if (pf_status.debug >= PF_DEBUG_MISC)
954				printf("pfsync: received bulk "
955				    "update start\n");
956			break;
957		case PFSYNC_BUS_END:
958			if (time_uptime - ntohl(bus->endtime) >=
959			    sc->sc_ureq_sent) {
960				/* that's it, we're happy */
961				sc->sc_ureq_sent = 0;
962				sc->sc_bulk_tries = 0;
963#ifdef __FreeBSD__
964				callout_stop(&sc->sc_bulkfail_tmo);
965#else
966				timeout_del(&sc->sc_bulkfail_tmo);
967#endif
968#if NCARP > 0	/* XXX_IMPORT */
969				if (!pfsync_sync_ok)
970					carp_suppress_preempt--;
971#endif
972				pfsync_sync_ok = 1;
973				if (pf_status.debug >= PF_DEBUG_MISC)
974					printf("pfsync: received valid "
975					    "bulk update end\n");
976			} else {
977				if (pf_status.debug >= PF_DEBUG_MISC)
978					printf("pfsync: received invalid "
979					    "bulk update end: bad timestamp\n");
980			}
981			break;
982		}
983		break;
984	}
985
986done:
987	if (m)
988		m_freem(m);
989}
990
991int
992pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
993	struct rtentry *rt)
994{
995	m_freem(m);
996	return (0);
997}
998
999/* ARGSUSED */
1000int
1001pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1002{
1003#ifndef __FreeBSD__
1004	struct proc *p = curproc;
1005#endif
1006	struct pfsync_softc *sc = ifp->if_softc;
1007	struct ifreq *ifr = (struct ifreq *)data;
1008	struct ip_moptions *imo = &sc->sc_imo;
1009	struct pfsyncreq pfsyncr;
1010	struct ifnet    *sifp;
1011	int s, error;
1012
1013	switch (cmd) {
1014	case SIOCSIFADDR:
1015	case SIOCAIFADDR:
1016	case SIOCSIFDSTADDR:
1017	case SIOCSIFFLAGS:
1018#ifdef __FreeBSD__
1019		if (ifp->if_flags & IFF_UP)
1020			ifp->if_drv_flags |= IFF_DRV_RUNNING;
1021		else
1022			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1023#else
1024		if (ifp->if_flags & IFF_UP)
1025			ifp->if_flags |= IFF_RUNNING;
1026		else
1027			ifp->if_flags &= ~IFF_RUNNING;
1028#endif
1029		break;
1030	case SIOCSIFMTU:
1031		if (ifr->ifr_mtu < PFSYNC_MINMTU)
1032			return (EINVAL);
1033		if (ifr->ifr_mtu > MCLBYTES)
1034			ifr->ifr_mtu = MCLBYTES;
1035		s = splnet();
1036#ifdef __FreeBSD__
1037		PF_LOCK();
1038#endif
1039		if (ifr->ifr_mtu < ifp->if_mtu) {
1040			pfsync_sendout(sc);
1041		}
1042		pfsync_setmtu(sc, ifr->ifr_mtu);
1043#ifdef __FreeBSD__
1044		PF_UNLOCK();
1045#endif
1046		splx(s);
1047		break;
1048	case SIOCGETPFSYNC:
1049#ifdef __FreeBSD__
1050		/* XXX: read unlocked */
1051#endif
1052		bzero(&pfsyncr, sizeof(pfsyncr));
1053		if (sc->sc_sync_ifp)
1054			strlcpy(pfsyncr.pfsyncr_syncdev,
1055			    sc->sc_sync_ifp->if_xname, IFNAMSIZ);
1056		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1057		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1058		if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
1059			return (error);
1060		break;
1061	case SIOCSETPFSYNC:
1062#ifdef __FreeBSD__
1063		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
1064#else
1065		if ((error = suser(p, p->p_acflag)) != 0)
1066#endif
1067			return (error);
1068		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1069			return (error);
1070
1071		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1072#ifdef __FreeBSD__
1073			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
1074#else
1075			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1076#endif
1077		else
1078			sc->sc_sync_peer.s_addr =
1079			    pfsyncr.pfsyncr_syncpeer.s_addr;
1080
1081		if (pfsyncr.pfsyncr_maxupdates > 255)
1082			return (EINVAL);
1083#ifdef __FreeBSD__
1084		callout_drain(&sc->sc_send_tmo);
1085		PF_LOCK();
1086#endif
1087		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1088
1089		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1090			sc->sc_sync_ifp = NULL;
1091			if (sc->sc_mbuf_net != NULL) {
1092				/* Don't keep stale pfsync packets around. */
1093				s = splnet();
1094				m_freem(sc->sc_mbuf_net);
1095				sc->sc_mbuf_net = NULL;
1096				sc->sc_statep_net.s = NULL;
1097				splx(s);
1098			}
1099			if (imo->imo_num_memberships > 0) {
1100				in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1101				imo->imo_multicast_ifp = NULL;
1102			}
1103#ifdef __FreeBSD__
1104			PF_UNLOCK();
1105#endif
1106			break;
1107		}
1108
1109		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1110#ifdef __FreeBSD__
1111			PF_UNLOCK();
1112#endif
1113			return (EINVAL);
1114		}
1115
1116		s = splnet();
1117#ifdef __FreeBSD__
1118		if (sifp->if_mtu < SCP2IFP(sc)->if_mtu ||
1119#else
1120		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1121#endif
1122		    (sc->sc_sync_ifp != NULL &&
1123		    sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
1124		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1125			pfsync_sendout(sc);
1126		sc->sc_sync_ifp = sifp;
1127
1128#ifdef __FreeBSD__
1129		pfsync_setmtu(sc, SCP2IFP(sc)->if_mtu);
1130#else
1131		pfsync_setmtu(sc, sc->sc_if.if_mtu);
1132#endif
1133
1134		if (imo->imo_num_memberships > 0) {
1135			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1136			imo->imo_multicast_ifp = NULL;
1137		}
1138
1139		if (sc->sc_sync_ifp &&
1140#ifdef __FreeBSD__
1141		    sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1142#else
1143		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1144#endif
1145			struct in_addr addr;
1146
1147			if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
1148				sc->sc_sync_ifp = NULL;
1149#ifdef __FreeBSD__
1150				PF_UNLOCK();
1151#endif
1152				splx(s);
1153				return (EADDRNOTAVAIL);
1154			}
1155#ifdef __FreeBSD__
1156			PF_UNLOCK();		/* addmulti mallocs w/ WAITOK */
1157			addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
1158#else
1159			addr.s_addr = INADDR_PFSYNC_GROUP;
1160#endif
1161
1162			if ((imo->imo_membership[0] =
1163			    in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
1164				sc->sc_sync_ifp = NULL;
1165				splx(s);
1166				return (ENOBUFS);
1167			}
1168			imo->imo_num_memberships++;
1169			imo->imo_multicast_ifp = sc->sc_sync_ifp;
1170			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1171			imo->imo_multicast_loop = 0;
1172#ifdef __FreeBSD__
1173			PF_LOCK();
1174#endif
1175		}
1176
1177		if (sc->sc_sync_ifp ||
1178#ifdef __FreeBSD__
1179		    sc->sc_sendaddr.s_addr != htonl(INADDR_PFSYNC_GROUP)) {
1180#else
1181		    sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
1182#endif
1183			/* Request a full state table update. */
1184			sc->sc_ureq_sent = time_uptime;
1185#if NCARP > 0
1186			if (pfsync_sync_ok)
1187				carp_suppress_preempt++;
1188#endif
1189			pfsync_sync_ok = 0;
1190			if (pf_status.debug >= PF_DEBUG_MISC)
1191				printf("pfsync: requesting bulk update\n");
1192#ifdef __FreeBSD__
1193			callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1194			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
1195#else
1196			timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
1197#endif
1198			error = pfsync_request_update(NULL, NULL);
1199			if (error == ENOMEM) {
1200#ifdef __FreeBSD__
1201				PF_UNLOCK();
1202#endif
1203				splx(s);
1204				return (ENOMEM);
1205			}
1206			pfsync_sendout(sc);
1207		}
1208#ifdef __FreeBSD__
1209		PF_UNLOCK();
1210#endif
1211		splx(s);
1212
1213		break;
1214
1215	default:
1216		return (ENOTTY);
1217	}
1218
1219	return (0);
1220}
1221
1222void
1223pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
1224{
1225	int mtu;
1226
1227	if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
1228		mtu = sc->sc_sync_ifp->if_mtu;
1229	else
1230		mtu = mtu_req;
1231
1232	sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
1233	    sizeof(struct pfsync_state);
1234	if (sc->sc_maxcount > 254)
1235	    sc->sc_maxcount = 254;
1236#ifdef __FreeBSD__
1237	SCP2IFP(sc)->if_mtu = sizeof(struct pfsync_header) +
1238	    sc->sc_maxcount * sizeof(struct pfsync_state);
1239#else
1240	sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
1241	    sc->sc_maxcount * sizeof(struct pfsync_state);
1242#endif
1243}
1244
1245struct mbuf *
1246pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
1247{
1248	struct pfsync_header *h;
1249	struct mbuf *m;
1250	int len;
1251
1252#ifdef __FreeBSD__
1253	PF_ASSERT(MA_OWNED);
1254#endif
1255	MGETHDR(m, M_DONTWAIT, MT_DATA);
1256	if (m == NULL) {
1257#ifdef __FreeBSD__
1258		SCP2IFP(sc)->if_oerrors++;
1259#else
1260		sc->sc_if.if_oerrors++;
1261#endif
1262		return (NULL);
1263	}
1264
1265	switch (action) {
1266	case PFSYNC_ACT_CLR:
1267		len = sizeof(struct pfsync_header) +
1268		    sizeof(struct pfsync_state_clr);
1269		break;
1270	case PFSYNC_ACT_UPD_C:
1271		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
1272		    sizeof(struct pfsync_header);
1273		break;
1274	case PFSYNC_ACT_DEL_C:
1275		len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
1276		    sizeof(struct pfsync_header);
1277		break;
1278	case PFSYNC_ACT_UREQ:
1279		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
1280		    sizeof(struct pfsync_header);
1281		break;
1282	case PFSYNC_ACT_BUS:
1283		len = sizeof(struct pfsync_header) +
1284		    sizeof(struct pfsync_state_bus);
1285		break;
1286	default:
1287		len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
1288		    sizeof(struct pfsync_header);
1289		break;
1290	}
1291
1292	if (len > MHLEN) {
1293		MCLGET(m, M_DONTWAIT);
1294		if ((m->m_flags & M_EXT) == 0) {
1295			m_free(m);
1296#ifdef __FreeBSD__
1297			SCP2IFP(sc)->if_oerrors++;
1298#else
1299			sc->sc_if.if_oerrors++;
1300#endif
1301			return (NULL);
1302		}
1303		m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
1304	} else
1305		MH_ALIGN(m, len);
1306
1307	m->m_pkthdr.rcvif = NULL;
1308	m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
1309	h = mtod(m, struct pfsync_header *);
1310	h->version = PFSYNC_VERSION;
1311	h->af = 0;
1312	h->count = 0;
1313	h->action = action;
1314
1315	*sp = (void *)((char *)h + PFSYNC_HDRLEN);
1316#ifdef __FreeBSD__
1317	callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
1318	    LIST_FIRST(&pfsync_list));
1319#else
1320	timeout_add(&sc->sc_tmo, hz);
1321#endif
1322	return (m);
1323}
1324
1325int
1326pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
1327{
1328#ifdef __FreeBSD__
1329	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1330#else
1331	struct ifnet *ifp = &pfsyncif.sc_if;
1332#endif
1333	struct pfsync_softc *sc = ifp->if_softc;
1334	struct pfsync_header *h, *h_net;
1335	struct pfsync_state *sp = NULL;
1336	struct pfsync_state_upd *up = NULL;
1337	struct pfsync_state_del *dp = NULL;
1338	struct pf_rule *r;
1339	u_long secs;
1340	int s, ret = 0;
1341	u_int8_t i = 255, newaction = 0;
1342
1343#ifdef __FreeBSD__
1344	PF_ASSERT(MA_OWNED);
1345#endif
1346	/*
1347	 * If a packet falls in the forest and there's nobody around to
1348	 * hear, does it make a sound?
1349	 */
1350	if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
1351#ifdef __FreeBSD__
1352	    sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1353#else
1354	    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1355#endif
1356		/* Don't leave any stale pfsync packets hanging around. */
1357		if (sc->sc_mbuf != NULL) {
1358			m_freem(sc->sc_mbuf);
1359			sc->sc_mbuf = NULL;
1360			sc->sc_statep.s = NULL;
1361		}
1362		return (0);
1363	}
1364
1365	if (action >= PFSYNC_ACT_MAX)
1366		return (EINVAL);
1367
1368	s = splnet();
1369	if (sc->sc_mbuf == NULL) {
1370		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1371		    (void *)&sc->sc_statep.s)) == NULL) {
1372			splx(s);
1373			return (ENOMEM);
1374		}
1375		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1376	} else {
1377		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1378		if (h->action != action) {
1379			pfsync_sendout(sc);
1380			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1381			    (void *)&sc->sc_statep.s)) == NULL) {
1382				splx(s);
1383				return (ENOMEM);
1384			}
1385			h = mtod(sc->sc_mbuf, struct pfsync_header *);
1386		} else {
1387			/*
1388			 * If it's an update, look in the packet to see if
1389			 * we already have an update for the state.
1390			 */
1391			if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
1392				struct pfsync_state *usp =
1393				    (void *)((char *)h + PFSYNC_HDRLEN);
1394
1395				for (i = 0; i < h->count; i++) {
1396					if (!memcmp(usp->id, &st->id,
1397					    PFSYNC_ID_LEN) &&
1398					    usp->creatorid == st->creatorid) {
1399						sp = usp;
1400						sp->updates++;
1401						break;
1402					}
1403					usp++;
1404				}
1405			}
1406		}
1407	}
1408
1409	secs = time_second;
1410
1411	st->pfsync_time = time_uptime;
1412	TAILQ_REMOVE(&state_updates, st, u.s.entry_updates);
1413	TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates);
1414
1415	if (sp == NULL) {
1416		/* not a "duplicate" update */
1417		i = 255;
1418		sp = sc->sc_statep.s++;
1419		sc->sc_mbuf->m_pkthdr.len =
1420		    sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
1421		h->count++;
1422		bzero(sp, sizeof(*sp));
1423
1424		bcopy(&st->id, sp->id, sizeof(sp->id));
1425		sp->creatorid = st->creatorid;
1426
1427		strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname));
1428		pf_state_host_hton(&st->lan, &sp->lan);
1429		pf_state_host_hton(&st->gwy, &sp->gwy);
1430		pf_state_host_hton(&st->ext, &sp->ext);
1431
1432		bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
1433
1434		sp->creation = htonl(secs - st->creation);
1435		sp->packets[0] = htonl(st->packets[0]);
1436		sp->packets[1] = htonl(st->packets[1]);
1437		sp->bytes[0] = htonl(st->bytes[0]);
1438		sp->bytes[1] = htonl(st->bytes[1]);
1439		if ((r = st->rule.ptr) == NULL)
1440			sp->rule = htonl(-1);
1441		else
1442			sp->rule = htonl(r->nr);
1443		if ((r = st->anchor.ptr) == NULL)
1444			sp->anchor = htonl(-1);
1445		else
1446			sp->anchor = htonl(r->nr);
1447		sp->af = st->af;
1448		sp->proto = st->proto;
1449		sp->direction = st->direction;
1450		sp->log = st->log;
1451		sp->allow_opts = st->allow_opts;
1452		sp->timeout = st->timeout;
1453
1454		if (flags & PFSYNC_FLAG_STALE)
1455			sp->sync_flags |= PFSTATE_STALE;
1456	}
1457
1458	pf_state_peer_hton(&st->src, &sp->src);
1459	pf_state_peer_hton(&st->dst, &sp->dst);
1460
1461	if (st->expire <= secs)
1462		sp->expire = htonl(0);
1463	else
1464		sp->expire = htonl(st->expire - secs);
1465
1466	/* do we need to build "compressed" actions for network transfer? */
1467	if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
1468		switch (action) {
1469		case PFSYNC_ACT_UPD:
1470			newaction = PFSYNC_ACT_UPD_C;
1471			break;
1472		case PFSYNC_ACT_DEL:
1473			newaction = PFSYNC_ACT_DEL_C;
1474			break;
1475		default:
1476			/* by default we just send the uncompressed states */
1477			break;
1478		}
1479	}
1480
1481	if (newaction) {
1482		if (sc->sc_mbuf_net == NULL) {
1483			if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
1484			    (void *)&sc->sc_statep_net.s)) == NULL) {
1485				splx(s);
1486				return (ENOMEM);
1487			}
1488		}
1489		h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
1490
1491		switch (newaction) {
1492		case PFSYNC_ACT_UPD_C:
1493			if (i != 255) {
1494				up = (void *)((char *)h_net +
1495				    PFSYNC_HDRLEN + (i * sizeof(*up)));
1496				up->updates++;
1497			} else {
1498				h_net->count++;
1499				sc->sc_mbuf_net->m_pkthdr.len =
1500				    sc->sc_mbuf_net->m_len += sizeof(*up);
1501				up = sc->sc_statep_net.u++;
1502
1503				bzero(up, sizeof(*up));
1504				bcopy(&st->id, up->id, sizeof(up->id));
1505				up->creatorid = st->creatorid;
1506			}
1507			up->timeout = st->timeout;
1508			up->expire = sp->expire;
1509			up->src = sp->src;
1510			up->dst = sp->dst;
1511			break;
1512		case PFSYNC_ACT_DEL_C:
1513			sc->sc_mbuf_net->m_pkthdr.len =
1514			    sc->sc_mbuf_net->m_len += sizeof(*dp);
1515			dp = sc->sc_statep_net.d++;
1516			h_net->count++;
1517
1518			bzero(dp, sizeof(*dp));
1519			bcopy(&st->id, dp->id, sizeof(dp->id));
1520			dp->creatorid = st->creatorid;
1521			break;
1522		}
1523	}
1524
1525	if (h->count == sc->sc_maxcount ||
1526	    (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
1527		ret = pfsync_sendout(sc);
1528
1529	splx(s);
1530	return (ret);
1531}
1532
1533/* This must be called in splnet() */
1534int
1535pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
1536{
1537#ifdef __FreeBSD__
1538	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1539#else
1540	struct ifnet *ifp = &pfsyncif.sc_if;
1541#endif
1542	struct pfsync_header *h;
1543	struct pfsync_softc *sc = ifp->if_softc;
1544	struct pfsync_state_upd_req *rup;
1545	int ret = 0;
1546
1547#ifdef __FreeBSD__
1548	PF_ASSERT(MA_OWNED);
1549#endif
1550	if (sc->sc_mbuf == NULL) {
1551		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1552		    (void *)&sc->sc_statep.s)) == NULL)
1553			return (ENOMEM);
1554		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1555	} else {
1556		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1557		if (h->action != PFSYNC_ACT_UREQ) {
1558			pfsync_sendout(sc);
1559			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1560			    (void *)&sc->sc_statep.s)) == NULL)
1561				return (ENOMEM);
1562			h = mtod(sc->sc_mbuf, struct pfsync_header *);
1563		}
1564	}
1565
1566	if (src != NULL)
1567		sc->sc_sendaddr = *src;
1568	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
1569	h->count++;
1570	rup = sc->sc_statep.r++;
1571	bzero(rup, sizeof(*rup));
1572	if (up != NULL) {
1573		bcopy(up->id, rup->id, sizeof(rup->id));
1574		rup->creatorid = up->creatorid;
1575	}
1576
1577	if (h->count == sc->sc_maxcount)
1578		ret = pfsync_sendout(sc);
1579
1580	return (ret);
1581}
1582
1583int
1584pfsync_clear_states(u_int32_t creatorid, char *ifname)
1585{
1586#ifdef __FreeBSD__
1587	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1588#else
1589	struct ifnet *ifp = &pfsyncif.sc_if;
1590#endif
1591	struct pfsync_softc *sc = ifp->if_softc;
1592	struct pfsync_state_clr *cp;
1593	int s, ret;
1594
1595	s = splnet();
1596#ifdef __FreeBSD__
1597	PF_ASSERT(MA_OWNED);
1598#endif
1599	if (sc->sc_mbuf != NULL)
1600		pfsync_sendout(sc);
1601	if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
1602	    (void *)&sc->sc_statep.c)) == NULL) {
1603		splx(s);
1604		return (ENOMEM);
1605	}
1606	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
1607	cp = sc->sc_statep.c;
1608	cp->creatorid = creatorid;
1609	if (ifname != NULL)
1610		strlcpy(cp->ifname, ifname, IFNAMSIZ);
1611
1612	ret = (pfsync_sendout(sc));
1613	splx(s);
1614	return (ret);
1615}
1616
1617void
1618pfsync_timeout(void *v)
1619{
1620	struct pfsync_softc *sc = v;
1621	int s;
1622
1623	s = splnet();
1624#ifdef __FreeBSD__
1625	PF_LOCK();
1626#endif
1627	pfsync_sendout(sc);
1628#ifdef __FreeBSD__
1629	PF_UNLOCK();
1630#endif
1631	splx(s);
1632}
1633
1634/* This must be called in splnet() */
1635void
1636pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
1637{
1638	struct pfsync_state_bus *bus;
1639
1640#ifdef __FreeBSD__
1641	PF_ASSERT(MA_OWNED);
1642#endif
1643	if (sc->sc_mbuf != NULL)
1644		pfsync_sendout(sc);
1645
1646	if (pfsync_sync_ok &&
1647	    (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
1648	    (void *)&sc->sc_statep.b)) != NULL) {
1649		sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
1650		bus = sc->sc_statep.b;
1651		bus->creatorid = pf_status.hostid;
1652		bus->status = status;
1653		bus->endtime = htonl(time_uptime - sc->sc_ureq_received);
1654		pfsync_sendout(sc);
1655	}
1656}
1657
1658void
1659pfsync_bulk_update(void *v)
1660{
1661	struct pfsync_softc *sc = v;
1662	int s, i = 0;
1663	struct pf_state *state;
1664
1665#ifdef __FreeBSD__
1666	PF_LOCK();
1667#endif
1668	s = splnet();
1669	if (sc->sc_mbuf != NULL)
1670		pfsync_sendout(sc);
1671
1672	/*
1673	 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
1674	 * been sent since the latest request was made.
1675	 */
1676	while ((state = TAILQ_FIRST(&state_updates)) != NULL &&
1677	    ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) {
1678		if (state->pfsync_time > sc->sc_ureq_received) {
1679			/* we're done */
1680			pfsync_send_bus(sc, PFSYNC_BUS_END);
1681			sc->sc_ureq_received = 0;
1682#ifdef __FreeBSD__
1683			callout_stop(&sc->sc_bulk_tmo);
1684#else
1685			timeout_del(&sc->sc_bulk_tmo);
1686#endif
1687			if (pf_status.debug >= PF_DEBUG_MISC)
1688				printf("pfsync: bulk update complete\n");
1689			break;
1690		} else {
1691			/* send an update and move to end of list */
1692			if (!state->sync_flags)
1693				pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
1694			state->pfsync_time = time_uptime;
1695			TAILQ_REMOVE(&state_updates, state, u.s.entry_updates);
1696			TAILQ_INSERT_TAIL(&state_updates, state,
1697			    u.s.entry_updates);
1698
1699			/* look again for more in a bit */
1700#ifdef __FreeBSD__
1701			callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
1702			    LIST_FIRST(&pfsync_list));
1703#else
1704			timeout_add(&sc->sc_bulk_tmo, 1);
1705#endif
1706		}
1707	}
1708	if (sc->sc_mbuf != NULL)
1709		pfsync_sendout(sc);
1710	splx(s);
1711#ifdef __FreeBSD__
1712	PF_UNLOCK();
1713#endif
1714}
1715
1716void
1717pfsync_bulkfail(void *v)
1718{
1719	struct pfsync_softc *sc = v;
1720	int s, error;
1721
1722#ifdef __FreeBSD__
1723	PF_LOCK();
1724#endif
1725	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
1726		/* Try again in a bit */
1727#ifdef __FreeBSD__
1728		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
1729		    LIST_FIRST(&pfsync_list));
1730#else
1731		timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
1732#endif
1733		s = splnet();
1734		error = pfsync_request_update(NULL, NULL);
1735		if (error == ENOMEM) {
1736			if (pf_status.debug >= PF_DEBUG_MISC)
1737				printf("pfsync: cannot allocate mbufs for "
1738				    "bulk update\n");
1739		} else
1740			pfsync_sendout(sc);
1741		splx(s);
1742	} else {
1743		/* Pretend like the transfer was ok */
1744		sc->sc_ureq_sent = 0;
1745		sc->sc_bulk_tries = 0;
1746#if NCARP > 0
1747		if (!pfsync_sync_ok)
1748			carp_suppress_preempt--;
1749#endif
1750		pfsync_sync_ok = 1;
1751		if (pf_status.debug >= PF_DEBUG_MISC)
1752			printf("pfsync: failed to receive "
1753			    "bulk update status\n");
1754#ifdef __FreeBSD__
1755		callout_stop(&sc->sc_bulkfail_tmo);
1756#else
1757		timeout_del(&sc->sc_bulkfail_tmo);
1758#endif
1759	}
1760#ifdef __FreeBSD__
1761	PF_UNLOCK();
1762#endif
1763}
1764
1765/* This must be called in splnet() */
1766int
1767pfsync_sendout(sc)
1768	struct pfsync_softc *sc;
1769{
1770#if NBPFILTER > 0
1771# ifdef __FreeBSD__
1772	struct ifnet *ifp = SCP2IFP(sc);
1773# else
1774	struct ifnet *ifp = &sc->if_sc;
1775# endif
1776#endif
1777	struct mbuf *m;
1778
1779#ifdef __FreeBSD__
1780	PF_ASSERT(MA_OWNED);
1781	callout_stop(&sc->sc_tmo);
1782#else
1783	timeout_del(&sc->sc_tmo);
1784#endif
1785
1786	if (sc->sc_mbuf == NULL)
1787		return (0);
1788	m = sc->sc_mbuf;
1789	sc->sc_mbuf = NULL;
1790	sc->sc_statep.s = NULL;
1791
1792#ifdef __FreeBSD__
1793	KASSERT(m != NULL, ("pfsync_sendout: null mbuf"));
1794#endif
1795#if NBPFILTER > 0
1796	if (ifp->if_bpf)
1797		bpf_mtap(ifp->if_bpf, m);
1798#endif
1799
1800	if (sc->sc_mbuf_net) {
1801		m_freem(m);
1802		m = sc->sc_mbuf_net;
1803		sc->sc_mbuf_net = NULL;
1804		sc->sc_statep_net.s = NULL;
1805	}
1806
1807#ifdef __FreeBSD__
1808	if (sc->sc_sync_ifp ||
1809	    sc->sc_sync_peer.s_addr != htonl(INADDR_PFSYNC_GROUP)) {
1810#else
1811	if (sc->sc_sync_ifp ||sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
1812#endif
1813		struct ip *ip;
1814		struct sockaddr sa;
1815
1816		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
1817		if (m == NULL) {
1818			pfsyncstats.pfsyncs_onomem++;
1819			return (0);
1820		}
1821		ip = mtod(m, struct ip *);
1822		ip->ip_v = IPVERSION;
1823		ip->ip_hl = sizeof(*ip) >> 2;
1824		ip->ip_tos = IPTOS_LOWDELAY;
1825#ifdef __FreeBSD__
1826		ip->ip_len = m->m_pkthdr.len;
1827#else
1828		ip->ip_len = htons(m->m_pkthdr.len);
1829#endif
1830		ip->ip_id = htons(ip_randomid());
1831#ifdef __FreeBSD__
1832		ip->ip_off = IP_DF;
1833#else
1834		ip->ip_off = htons(IP_DF);
1835#endif
1836		ip->ip_ttl = PFSYNC_DFLTTL;
1837		ip->ip_p = IPPROTO_PFSYNC;
1838		ip->ip_sum = 0;
1839
1840		bzero(&sa, sizeof(sa));
1841		ip->ip_src.s_addr = INADDR_ANY;
1842
1843#ifdef __FreeBSD__
1844		if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP))
1845#else
1846		if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
1847#endif
1848			m->m_flags |= M_MCAST;
1849		ip->ip_dst = sc->sc_sendaddr;
1850		sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
1851
1852		pfsyncstats.pfsyncs_opackets++;
1853#ifdef __FreeBSD__
1854		if (!IF_HANDOFF(&sc->sc_ifq, m, NULL))
1855			pfsyncstats.pfsyncs_oerrors++;
1856		callout_reset(&sc->sc_send_tmo, 1, pfsync_senddef, sc);
1857#else
1858		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1859			pfsyncstats.pfsyncs_oerrors++;
1860#endif
1861	} else
1862		m_freem(m);
1863
1864	return (0);
1865}
1866
1867#ifdef __FreeBSD__
1868static void
1869pfsync_senddef(void *arg)
1870{
1871	struct pfsync_softc *sc = (struct pfsync_softc *)arg;
1872	struct mbuf *m;
1873
1874	for(;;) {
1875		IF_DEQUEUE(&sc->sc_ifq, m);
1876		if (m == NULL)
1877			break;
1878		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1879			pfsyncstats.pfsyncs_oerrors++;
1880	}
1881}
1882
1883static int
1884pfsync_modevent(module_t mod, int type, void *data)
1885{
1886	int error = 0;
1887
1888	switch (type) {
1889	case MOD_LOAD:
1890		LIST_INIT(&pfsync_list);
1891		if_clone_attach(&pfsync_cloner);
1892		break;
1893
1894	case MOD_UNLOAD:
1895		if_clone_detach(&pfsync_cloner);
1896		break;
1897
1898	default:
1899		error = EINVAL;
1900		break;
1901	}
1902
1903	return error;
1904}
1905
1906static moduledata_t pfsync_mod = {
1907	"pfsync",
1908	pfsync_modevent,
1909	0
1910};
1911
1912#define PFSYNC_MODVER 1
1913
1914DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
1915MODULE_VERSION(pfsync, PFSYNC_MODVER);
1916#endif /* __FreeBSD__ */
1917