if_pfsync.c revision 151266
1/*	$FreeBSD: head/sys/contrib/pf/net/if_pfsync.c 151266 2005-10-12 19:52:16Z thompsa $	*/
2/*	$OpenBSD: if_pfsync.c,v 1.46 2005/02/20 15:58:38 mcbride Exp $	*/
3
4/*
5 * Copyright (c) 2002 Michael Shalayeff
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#ifdef __FreeBSD__
31#include "opt_inet.h"
32#include "opt_inet6.h"
33#endif
34
35#ifndef __FreeBSD__
36#include "bpfilter.h"
37#include "pfsync.h"
38#elif __FreeBSD__ >= 5
39#include "opt_bpf.h"
40#include "opt_pf.h"
41#define	NBPFILTER	DEV_BPF
42#define	NPFSYNC		DEV_PFSYNC
43#endif
44
45#include <sys/param.h>
46#include <sys/proc.h>
47#include <sys/systm.h>
48#include <sys/time.h>
49#include <sys/mbuf.h>
50#include <sys/socket.h>
51#include <sys/kernel.h>
52#ifdef __FreeBSD__
53#include <sys/endian.h>
54#include <sys/malloc.h>
55#include <sys/module.h>
56#include <sys/sockio.h>
57#include <sys/lock.h>
58#include <sys/mutex.h>
59#include <sys/sysctl.h>
60#else
61#include <sys/ioctl.h>
62#include <sys/timeout.h>
63#endif
64
65#include <net/if.h>
66#if defined(__FreeBSD__)
67#include <net/if_clone.h>
68#endif
69#include <net/if_types.h>
70#include <net/route.h>
71#include <net/bpf.h>
72#include <netinet/tcp.h>
73#include <netinet/tcp_seq.h>
74
75#ifdef	INET
76#include <netinet/in.h>
77#include <netinet/in_systm.h>
78#include <netinet/in_var.h>
79#include <netinet/ip.h>
80#include <netinet/ip_var.h>
81#endif
82
83#ifdef INET6
84#ifndef INET
85#include <netinet/in.h>
86#endif
87#include <netinet6/nd6.h>
88#endif /* INET6 */
89
90#ifdef __FreeBSD__
91#include "opt_carp.h"
92#ifdef DEV_CARP
93#define	NCARP	1
94#endif
95#else
96#include "carp.h"
97#endif
98#if NCARP > 0
99extern int carp_suppress_preempt;
100#endif
101
102#include <net/pfvar.h>
103#include <net/if_pfsync.h>
104
105#ifdef __FreeBSD__
106#define	PFSYNCNAME	"pfsync"
107#endif
108
109#define PFSYNC_MINMTU	\
110    (sizeof(struct pfsync_header) + sizeof(struct pf_state))
111
112#ifdef PFSYNCDEBUG
113#define DPRINTF(x)    do { if (pfsyncdebug) printf x ; } while (0)
114int pfsyncdebug;
115#else
116#define DPRINTF(x)
117#endif
118
119#ifndef __FreeBSD__
120struct pfsync_softc	pfsyncif;
121#endif
122struct pfsyncstats	pfsyncstats;
123#ifdef __FreeBSD__
124SYSCTL_DECL(_net_inet_pfsync);
125SYSCTL_STRUCT(_net_inet_pfsync, 0, stats, CTLFLAG_RW,
126    &pfsyncstats, pfsyncstats,
127    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
128
129/*
130 * Locking notes:
131 * Whenever we really touch/look at the state table we have to hold the
132 * PF_LOCK. Functions that do just the interface handling, grab the per
133 * softc lock instead.
134 *
135 */
136
137static void	pfsync_clone_destroy(struct ifnet *);
138static int	pfsync_clone_create(struct if_clone *, int);
139static void	pfsync_senddef(void *);
140#else
141void	pfsyncattach(int);
142#endif
143void	pfsync_setmtu(struct pfsync_softc *, int);
144int	pfsync_insert_net_state(struct pfsync_state *);
145int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
146	    struct rtentry *);
147int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
148void	pfsyncstart(struct ifnet *);
149
150struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
151int	pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
152int	pfsync_sendout(struct pfsync_softc *);
153void	pfsync_timeout(void *);
154void	pfsync_send_bus(struct pfsync_softc *, u_int8_t);
155void	pfsync_bulk_update(void *);
156void	pfsync_bulkfail(void *);
157
158int	pfsync_sync_ok;
159#ifndef __FreeBSD__
160extern int ifqmaxlen;
161extern struct timeval time;
162extern struct timeval mono_time;
163extern int hz;
164#endif
165
166#ifdef __FreeBSD__
167static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface");
168static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list;
169#define	SCP2IFP(sc)		((sc)->sc_ifp)
170IFC_SIMPLE_DECLARE(pfsync, 1);
171
172static void
173pfsync_clone_destroy(struct ifnet *ifp)
174{
175        struct pfsync_softc *sc;
176
177	sc = ifp->if_softc;
178	callout_stop(&sc->sc_tmo);
179	callout_stop(&sc->sc_bulk_tmo);
180	callout_stop(&sc->sc_bulkfail_tmo);
181
182	callout_stop(&sc->sc_send_tmo);
183
184#if NBPFILTER > 0
185        bpfdetach(ifp);
186#endif
187        if_detach(ifp);
188	if_free(ifp);
189        LIST_REMOVE(sc, sc_next);
190        free(sc, M_PFSYNC);
191}
192
193static int
194pfsync_clone_create(struct if_clone *ifc, int unit)
195{
196	struct pfsync_softc *sc;
197	struct ifnet *ifp;
198
199	MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC,
200	    M_WAITOK|M_ZERO);
201	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
202	if (ifp == NULL) {
203		free(sc, M_PFSYNC);
204		return (ENOSPC);
205	}
206
207	pfsync_sync_ok = 1;
208	sc->sc_mbuf = NULL;
209	sc->sc_mbuf_net = NULL;
210	sc->sc_statep.s = NULL;
211	sc->sc_statep_net.s = NULL;
212	sc->sc_maxupdates = 128;
213	sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
214	sc->sc_ureq_received = 0;
215	sc->sc_ureq_sent = 0;
216
217	ifp = SCP2IFP(sc);
218	if_initname(ifp, ifc->ifc_name, unit);
219	ifp->if_ioctl = pfsyncioctl;
220	ifp->if_output = pfsyncoutput;
221	ifp->if_start = pfsyncstart;
222	ifp->if_snd.ifq_maxlen = ifqmaxlen;
223	ifp->if_hdrlen = PFSYNC_HDRLEN;
224	ifp->if_baudrate = IF_Mbps(100);
225	ifp->if_softc = sc;
226	pfsync_setmtu(sc, MCLBYTES);
227	callout_init(&sc->sc_tmo, NET_CALLOUT_MPSAFE);
228	callout_init(&sc->sc_bulk_tmo, NET_CALLOUT_MPSAFE);
229	callout_init(&sc->sc_bulkfail_tmo, NET_CALLOUT_MPSAFE);
230	callout_init(&sc->sc_send_tmo, NET_CALLOUT_MPSAFE);
231	sc->sc_ifq.ifq_maxlen = ifqmaxlen;
232	mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
233	    MTX_DEF);
234	if_attach(ifp);
235
236	LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
237#if NBPFILTER > 0
238	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
239#endif
240
241	return (0);
242}
243#else /* !__FreeBSD__ */
244void
245pfsyncattach(int npfsync)
246{
247	struct ifnet *ifp;
248
249	pfsync_sync_ok = 1;
250	bzero(&pfsyncif, sizeof(pfsyncif));
251	pfsyncif.sc_mbuf = NULL;
252	pfsyncif.sc_mbuf_net = NULL;
253	pfsyncif.sc_statep.s = NULL;
254	pfsyncif.sc_statep_net.s = NULL;
255	pfsyncif.sc_maxupdates = 128;
256	pfsyncif.sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
257	pfsyncif.sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP;
258	pfsyncif.sc_ureq_received = 0;
259	pfsyncif.sc_ureq_sent = 0;
260	ifp = &pfsyncif.sc_if;
261	strlcpy(ifp->if_xname, "pfsync0", sizeof ifp->if_xname);
262	ifp->if_softc = &pfsyncif;
263	ifp->if_ioctl = pfsyncioctl;
264	ifp->if_output = pfsyncoutput;
265	ifp->if_start = pfsyncstart;
266	ifp->if_type = IFT_PFSYNC;
267	ifp->if_snd.ifq_maxlen = ifqmaxlen;
268	ifp->if_hdrlen = PFSYNC_HDRLEN;
269	pfsync_setmtu(&pfsyncif, MCLBYTES);
270	timeout_set(&pfsyncif.sc_tmo, pfsync_timeout, &pfsyncif);
271	timeout_set(&pfsyncif.sc_bulk_tmo, pfsync_bulk_update, &pfsyncif);
272	timeout_set(&pfsyncif.sc_bulkfail_tmo, pfsync_bulkfail, &pfsyncif);
273	if_attach(ifp);
274	if_alloc_sadl(ifp);
275
276#if NBPFILTER > 0
277	bpfattach(&pfsyncif.sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
278#endif
279}
280#endif
281
282/*
283 * Start output on the pfsync interface.
284 */
285void
286pfsyncstart(struct ifnet *ifp)
287{
288#ifdef __FreeBSD__
289	IF_LOCK(&ifp->if_snd);
290	_IF_DROP(&ifp->if_snd);
291	_IF_DRAIN(&ifp->if_snd);
292	IF_UNLOCK(&ifp->if_snd);
293#else
294	struct mbuf *m;
295	int s;
296
297	for (;;) {
298		s = splimp();
299		IF_DROP(&ifp->if_snd);
300		IF_DEQUEUE(&ifp->if_snd, m);
301		splx(s);
302
303		if (m == NULL)
304			return;
305		else
306			m_freem(m);
307	}
308#endif
309}
310
311int
312pfsync_insert_net_state(struct pfsync_state *sp)
313{
314	struct pf_state	*st = NULL;
315	struct pf_rule *r = NULL;
316	struct pfi_kif	*kif;
317
318#ifdef __FreeBSD__
319	PF_ASSERT(MA_OWNED);
320#endif
321	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
322		printf("pfsync_insert_net_state: invalid creator id:"
323		    " %08x\n", ntohl(sp->creatorid));
324		return (EINVAL);
325	}
326
327	kif = pfi_lookup_create(sp->ifname);
328	if (kif == NULL) {
329		if (pf_status.debug >= PF_DEBUG_MISC)
330			printf("pfsync_insert_net_state: "
331			    "unknown interface: %s\n", sp->ifname);
332		/* skip this state */
333		return (0);
334	}
335
336	/*
337	 * Just use the default rule until we have infrastructure to find the
338	 * best matching rule.
339	 */
340	r = &pf_default_rule;
341
342	if (!r->max_states || r->states < r->max_states)
343		st = pool_get(&pf_state_pl, PR_NOWAIT);
344	if (st == NULL) {
345		pfi_maybe_destroy(kif);
346		return (ENOMEM);
347	}
348	bzero(st, sizeof(*st));
349
350	st->rule.ptr = r;
351	/* XXX get pointers to nat_rule and anchor */
352
353	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
354	r->states++;
355
356	/* fill in the rest of the state entry */
357	pf_state_host_ntoh(&sp->lan, &st->lan);
358	pf_state_host_ntoh(&sp->gwy, &st->gwy);
359	pf_state_host_ntoh(&sp->ext, &st->ext);
360
361	pf_state_peer_ntoh(&sp->src, &st->src);
362	pf_state_peer_ntoh(&sp->dst, &st->dst);
363
364	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
365	st->creation = time_second - ntohl(sp->creation);
366	st->expire = ntohl(sp->expire) + time_second;
367
368	st->af = sp->af;
369	st->proto = sp->proto;
370	st->direction = sp->direction;
371	st->log = sp->log;
372	st->timeout = sp->timeout;
373	st->allow_opts = sp->allow_opts;
374
375	bcopy(sp->id, &st->id, sizeof(st->id));
376	st->creatorid = sp->creatorid;
377	st->sync_flags = PFSTATE_FROMSYNC;
378
379
380	if (pf_insert_state(kif, st)) {
381		pfi_maybe_destroy(kif);
382		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
383		r->states--;
384		pool_put(&pf_state_pl, st);
385		return (EINVAL);
386	}
387
388	return (0);
389}
390
391void
392#ifdef __FreeBSD__
393pfsync_input(struct mbuf *m, __unused int off)
394#else
395pfsync_input(struct mbuf *m, ...)
396#endif
397{
398	struct ip *ip = mtod(m, struct ip *);
399	struct pfsync_header *ph;
400#ifdef __FreeBSD__
401	struct pfsync_softc *sc = LIST_FIRST(&pfsync_list);
402#else
403	struct pfsync_softc *sc = &pfsyncif;
404#endif
405	struct pf_state *st, key;
406	struct pfsync_state *sp;
407	struct pfsync_state_upd *up;
408	struct pfsync_state_del *dp;
409	struct pfsync_state_clr *cp;
410	struct pfsync_state_upd_req *rup;
411	struct pfsync_state_bus *bus;
412	struct in_addr src;
413	struct mbuf *mp;
414	int iplen, action, error, i, s, count, offp, sfail, stale = 0;
415
416	pfsyncstats.pfsyncs_ipackets++;
417
418	/* verify that we have a sync interface configured */
419	if (!sc->sc_sync_ifp || !pf_status.running) /* XXX PF_LOCK? */
420		goto done;
421
422	/* verify that the packet came in on the right interface */
423	if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
424		pfsyncstats.pfsyncs_badif++;
425		goto done;
426	}
427
428	/* verify that the IP TTL is 255.  */
429	if (ip->ip_ttl != PFSYNC_DFLTTL) {
430		pfsyncstats.pfsyncs_badttl++;
431		goto done;
432	}
433
434	iplen = ip->ip_hl << 2;
435
436	if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
437		pfsyncstats.pfsyncs_hdrops++;
438		goto done;
439	}
440
441	if (iplen + sizeof(*ph) > m->m_len) {
442		if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
443			pfsyncstats.pfsyncs_hdrops++;
444			goto done;
445		}
446		ip = mtod(m, struct ip *);
447	}
448	ph = (struct pfsync_header *)((char *)ip + iplen);
449
450	/* verify the version */
451	if (ph->version != PFSYNC_VERSION) {
452		pfsyncstats.pfsyncs_badver++;
453		goto done;
454	}
455
456	action = ph->action;
457	count = ph->count;
458
459	/* make sure it's a valid action code */
460	if (action >= PFSYNC_ACT_MAX) {
461		pfsyncstats.pfsyncs_badact++;
462		goto done;
463	}
464
465	/* Cheaper to grab this now than having to mess with mbufs later */
466	src = ip->ip_src;
467
468	switch (action) {
469	case PFSYNC_ACT_CLR: {
470		struct pf_state *nexts;
471		struct pfi_kif	*kif;
472		u_int32_t creatorid;
473		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
474		    sizeof(*cp), &offp)) == NULL) {
475			pfsyncstats.pfsyncs_badlen++;
476			return;
477		}
478		cp = (struct pfsync_state_clr *)(mp->m_data + offp);
479		creatorid = cp->creatorid;
480
481		s = splsoftnet();
482#ifdef __FreeBSD__
483		PF_LOCK();
484#endif
485		if (cp->ifname[0] == '\0') {
486			for (st = RB_MIN(pf_state_tree_id, &tree_id);
487			    st; st = nexts) {
488                		nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
489				if (st->creatorid == creatorid) {
490					st->timeout = PFTM_PURGE;
491					pf_purge_expired_state(st);
492				}
493			}
494		} else {
495			kif = pfi_lookup_if(cp->ifname);
496			if (kif == NULL) {
497				if (pf_status.debug >= PF_DEBUG_MISC)
498					printf("pfsync_input: PFSYNC_ACT_CLR "
499					    "bad interface: %s\n", cp->ifname);
500				splx(s);
501#ifdef __FreeBSD__
502				PF_UNLOCK();
503#endif
504				goto done;
505			}
506			for (st = RB_MIN(pf_state_tree_lan_ext,
507			    &kif->pfik_lan_ext); st; st = nexts) {
508				nexts = RB_NEXT(pf_state_tree_lan_ext,
509				    &kif->pfik_lan_ext, st);
510				if (st->creatorid == creatorid) {
511					st->timeout = PFTM_PURGE;
512					pf_purge_expired_state(st);
513				}
514			}
515		}
516#ifdef __FreeBSD__
517		PF_UNLOCK();
518#endif
519		splx(s);
520
521		break;
522	}
523	case PFSYNC_ACT_INS:
524		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
525		    count * sizeof(*sp), &offp)) == NULL) {
526			pfsyncstats.pfsyncs_badlen++;
527			return;
528		}
529
530		s = splsoftnet();
531#ifdef __FreeBSD__
532		PF_LOCK();
533#endif
534		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
535		    i < count; i++, sp++) {
536			/* check for invalid values */
537			if (sp->timeout >= PFTM_MAX ||
538			    sp->src.state > PF_TCPS_PROXY_DST ||
539			    sp->dst.state > PF_TCPS_PROXY_DST ||
540			    sp->direction > PF_OUT ||
541			    (sp->af != AF_INET && sp->af != AF_INET6)) {
542				if (pf_status.debug >= PF_DEBUG_MISC)
543					printf("pfsync_insert: PFSYNC_ACT_INS: "
544					    "invalid value\n");
545				pfsyncstats.pfsyncs_badstate++;
546				continue;
547			}
548
549			if ((error = pfsync_insert_net_state(sp))) {
550				if (error == ENOMEM) {
551					splx(s);
552#ifdef __FreeBSD__
553					PF_UNLOCK();
554#endif
555					goto done;
556				}
557				continue;
558			}
559		}
560#ifdef __FreeBSD__
561		PF_UNLOCK();
562#endif
563		splx(s);
564		break;
565	case PFSYNC_ACT_UPD:
566		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
567		    count * sizeof(*sp), &offp)) == NULL) {
568			pfsyncstats.pfsyncs_badlen++;
569			return;
570		}
571
572		s = splsoftnet();
573#ifdef __FreeBSD__
574		PF_LOCK();
575#endif
576		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
577		    i < count; i++, sp++) {
578			int flags = PFSYNC_FLAG_STALE;
579
580			/* check for invalid values */
581			if (sp->timeout >= PFTM_MAX ||
582			    sp->src.state > PF_TCPS_PROXY_DST ||
583			    sp->dst.state > PF_TCPS_PROXY_DST) {
584				if (pf_status.debug >= PF_DEBUG_MISC)
585					printf("pfsync_insert: PFSYNC_ACT_UPD: "
586					    "invalid value\n");
587				pfsyncstats.pfsyncs_badstate++;
588				continue;
589			}
590
591			bcopy(sp->id, &key.id, sizeof(key.id));
592			key.creatorid = sp->creatorid;
593
594			st = pf_find_state_byid(&key);
595			if (st == NULL) {
596				/* insert the update */
597				if (pfsync_insert_net_state(sp))
598					pfsyncstats.pfsyncs_badstate++;
599				continue;
600			}
601			sfail = 0;
602			if (st->proto == IPPROTO_TCP) {
603				/*
604				 * The state should never go backwards except
605				 * for syn-proxy states.  Neither should the
606				 * sequence window slide backwards.
607				 */
608				if (st->src.state > sp->src.state &&
609				    (st->src.state < PF_TCPS_PROXY_SRC ||
610				    sp->src.state >= PF_TCPS_PROXY_SRC))
611					sfail = 1;
612				else if (SEQ_GT(st->src.seqlo,
613				    ntohl(sp->src.seqlo)))
614					sfail = 3;
615				else if (st->dst.state > sp->dst.state) {
616					/* There might still be useful
617					 * information about the src state here,
618					 * so import that part of the update,
619					 * then "fail" so we send the updated
620					 * state back to the peer who is missing
621					 * our what we know. */
622					pf_state_peer_ntoh(&sp->src, &st->src);
623					/* XXX do anything with timeouts? */
624					sfail = 7;
625					flags = 0;
626				} else if (st->dst.state >= TCPS_SYN_SENT &&
627				    SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
628					sfail = 4;
629			} else {
630				/*
631				 * Non-TCP protocol state machine always go
632				 * forwards
633				 */
634				if (st->src.state > sp->src.state)
635					sfail = 5;
636				else if ( st->dst.state > sp->dst.state)
637					sfail = 6;
638			}
639			if (sfail) {
640				if (pf_status.debug >= PF_DEBUG_MISC)
641					printf("pfsync: %s stale update "
642					    "(%d) id: %016llx "
643					    "creatorid: %08x\n",
644					    (sfail < 7 ?  "ignoring"
645					     : "partial"), sfail,
646#ifdef __FreeBSD__
647					    (unsigned long long)be64toh(st->id),
648#else
649					    betoh64(st->id),
650#endif
651					    ntohl(st->creatorid));
652				pfsyncstats.pfsyncs_badstate++;
653
654				if (!(sp->sync_flags & PFSTATE_STALE)) {
655					/* we have a better state, send it */
656					if (sc->sc_mbuf != NULL && !stale)
657						pfsync_sendout(sc);
658					stale++;
659					if (!st->sync_flags)
660						pfsync_pack_state(
661						    PFSYNC_ACT_UPD, st, flags);
662				}
663				continue;
664			}
665			pf_state_peer_ntoh(&sp->src, &st->src);
666			pf_state_peer_ntoh(&sp->dst, &st->dst);
667			st->expire = ntohl(sp->expire) + time_second;
668			st->timeout = sp->timeout;
669		}
670		if (stale && sc->sc_mbuf != NULL)
671			pfsync_sendout(sc);
672#ifdef __FreeBSD__
673		PF_UNLOCK();
674#endif
675		splx(s);
676		break;
677	/*
678	 * It's not strictly necessary for us to support the "uncompressed"
679	 * delete action, but it's relatively simple and maintains consistency.
680	 */
681	case PFSYNC_ACT_DEL:
682		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
683		    count * sizeof(*sp), &offp)) == NULL) {
684			pfsyncstats.pfsyncs_badlen++;
685			return;
686		}
687
688		s = splsoftnet();
689#ifdef __FreeBSD__
690		PF_LOCK();
691#endif
692		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
693		    i < count; i++, sp++) {
694			bcopy(sp->id, &key.id, sizeof(key.id));
695			key.creatorid = sp->creatorid;
696
697			st = pf_find_state_byid(&key);
698			if (st == NULL) {
699				pfsyncstats.pfsyncs_badstate++;
700				continue;
701			}
702			st->timeout = PFTM_PURGE;
703			st->sync_flags |= PFSTATE_FROMSYNC;
704			pf_purge_expired_state(st);
705		}
706#ifdef __FreeBSD__
707		PF_UNLOCK();
708#endif
709		splx(s);
710		break;
711	case PFSYNC_ACT_UPD_C: {
712		int update_requested = 0;
713
714		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
715		    count * sizeof(*up), &offp)) == NULL) {
716			pfsyncstats.pfsyncs_badlen++;
717			return;
718		}
719
720		s = splsoftnet();
721#ifdef __FreeBSD__
722		PF_LOCK();
723#endif
724		for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
725		    i < count; i++, up++) {
726			/* check for invalid values */
727			if (up->timeout >= PFTM_MAX ||
728			    up->src.state > PF_TCPS_PROXY_DST ||
729			    up->dst.state > PF_TCPS_PROXY_DST) {
730				if (pf_status.debug >= PF_DEBUG_MISC)
731					printf("pfsync_insert: "
732					    "PFSYNC_ACT_UPD_C: "
733					    "invalid value\n");
734				pfsyncstats.pfsyncs_badstate++;
735				continue;
736			}
737
738			bcopy(up->id, &key.id, sizeof(key.id));
739			key.creatorid = up->creatorid;
740
741			st = pf_find_state_byid(&key);
742			if (st == NULL) {
743				/* We don't have this state. Ask for it. */
744				error = pfsync_request_update(up, &src);
745				if (error == ENOMEM) {
746					splx(s);
747					goto done;
748				}
749				update_requested = 1;
750				pfsyncstats.pfsyncs_badstate++;
751				continue;
752			}
753			sfail = 0;
754			if (st->proto == IPPROTO_TCP) {
755				/*
756				 * The state should never go backwards except
757				 * for syn-proxy states.  Neither should the
758				 * sequence window slide backwards.
759				 */
760				if (st->src.state > up->src.state &&
761				    (st->src.state < PF_TCPS_PROXY_SRC ||
762				    up->src.state >= PF_TCPS_PROXY_SRC))
763					sfail = 1;
764				else if (st->dst.state > up->dst.state)
765					sfail = 2;
766				else if (SEQ_GT(st->src.seqlo,
767				    ntohl(up->src.seqlo)))
768					sfail = 3;
769				else if (st->dst.state >= TCPS_SYN_SENT &&
770				    SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
771					sfail = 4;
772			} else {
773				/*
774				 * Non-TCP protocol state machine always go
775				 * forwards
776				 */
777				if (st->src.state > up->src.state)
778					sfail = 5;
779				else if (st->dst.state > up->dst.state)
780					sfail = 6;
781			}
782			if (sfail) {
783				if (pf_status.debug >= PF_DEBUG_MISC)
784					printf("pfsync: ignoring stale update "
785					    "(%d) id: %016llx "
786					    "creatorid: %08x\n", sfail,
787#ifdef __FreeBSD__
788					    (unsigned long long)be64toh(st->id),
789#else
790					    betoh64(st->id),
791#endif
792					    ntohl(st->creatorid));
793				pfsyncstats.pfsyncs_badstate++;
794
795				/* we have a better state, send it out */
796				if ((!stale || update_requested) &&
797				    sc->sc_mbuf != NULL) {
798					pfsync_sendout(sc);
799					update_requested = 0;
800				}
801				stale++;
802				if (!st->sync_flags)
803					pfsync_pack_state(PFSYNC_ACT_UPD, st,
804					    PFSYNC_FLAG_STALE);
805				continue;
806			}
807			pf_state_peer_ntoh(&up->src, &st->src);
808			pf_state_peer_ntoh(&up->dst, &st->dst);
809			st->expire = ntohl(up->expire) + time_second;
810			st->timeout = up->timeout;
811		}
812		if ((update_requested || stale) && sc->sc_mbuf)
813			pfsync_sendout(sc);
814#ifdef __FreeBSD__
815		PF_UNLOCK();
816#endif
817		splx(s);
818		break;
819	}
820	case PFSYNC_ACT_DEL_C:
821		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
822		    count * sizeof(*dp), &offp)) == NULL) {
823			pfsyncstats.pfsyncs_badlen++;
824			return;
825		}
826
827		s = splsoftnet();
828#ifdef __FreeBSD__
829		PF_LOCK();
830#endif
831		for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
832		    i < count; i++, dp++) {
833			bcopy(dp->id, &key.id, sizeof(key.id));
834			key.creatorid = dp->creatorid;
835
836			st = pf_find_state_byid(&key);
837			if (st == NULL) {
838				pfsyncstats.pfsyncs_badstate++;
839				continue;
840			}
841			st->timeout = PFTM_PURGE;
842			st->sync_flags |= PFSTATE_FROMSYNC;
843			pf_purge_expired_state(st);
844		}
845#ifdef __FreeBSD__
846		PF_UNLOCK();
847#endif
848		splx(s);
849		break;
850	case PFSYNC_ACT_INS_F:
851	case PFSYNC_ACT_DEL_F:
852		/* not implemented */
853		break;
854	case PFSYNC_ACT_UREQ:
855		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
856		    count * sizeof(*rup), &offp)) == NULL) {
857			pfsyncstats.pfsyncs_badlen++;
858			return;
859		}
860
861		s = splsoftnet();
862#ifdef __FreeBSD__
863		PF_LOCK();
864#endif
865		if (sc->sc_mbuf != NULL)
866			pfsync_sendout(sc);
867		for (i = 0,
868		    rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
869		    i < count; i++, rup++) {
870			bcopy(rup->id, &key.id, sizeof(key.id));
871			key.creatorid = rup->creatorid;
872
873			if (key.id == 0 && key.creatorid == 0) {
874				sc->sc_ureq_received = time_uptime;
875				if (pf_status.debug >= PF_DEBUG_MISC)
876					printf("pfsync: received "
877					    "bulk update request\n");
878				pfsync_send_bus(sc, PFSYNC_BUS_START);
879#ifdef __FreeBSD__
880				callout_reset(&sc->sc_bulk_tmo, 1 * hz,
881				    pfsync_bulk_update,
882				    LIST_FIRST(&pfsync_list));
883#else
884				timeout_add(&sc->sc_bulk_tmo, 1 * hz);
885#endif
886			} else {
887				st = pf_find_state_byid(&key);
888				if (st == NULL) {
889					pfsyncstats.pfsyncs_badstate++;
890					continue;
891				}
892				if (!st->sync_flags)
893					pfsync_pack_state(PFSYNC_ACT_UPD,
894					    st, 0);
895			}
896		}
897		if (sc->sc_mbuf != NULL)
898			pfsync_sendout(sc);
899#ifdef __FreeBSD__
900		PF_UNLOCK();
901#endif
902		splx(s);
903		break;
904	case PFSYNC_ACT_BUS:
905		/* If we're not waiting for a bulk update, who cares. */
906		if (sc->sc_ureq_sent == 0)
907			break;
908
909		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
910		    sizeof(*bus), &offp)) == NULL) {
911			pfsyncstats.pfsyncs_badlen++;
912			return;
913		}
914		bus = (struct pfsync_state_bus *)(mp->m_data + offp);
915		switch (bus->status) {
916		case PFSYNC_BUS_START:
917#ifdef __FreeBSD__
918			callout_reset(&sc->sc_bulkfail_tmo,
919			    pf_pool_limits[PF_LIMIT_STATES].limit /
920			    (PFSYNC_BULKPACKETS * sc->sc_maxcount),
921			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
922#else
923			timeout_add(&sc->sc_bulkfail_tmo,
924			    pf_pool_limits[PF_LIMIT_STATES].limit /
925			    (PFSYNC_BULKPACKETS * sc->sc_maxcount));
926#endif
927			if (pf_status.debug >= PF_DEBUG_MISC)
928				printf("pfsync: received bulk "
929				    "update start\n");
930			break;
931		case PFSYNC_BUS_END:
932			if (time_uptime - ntohl(bus->endtime) >=
933			    sc->sc_ureq_sent) {
934				/* that's it, we're happy */
935				sc->sc_ureq_sent = 0;
936				sc->sc_bulk_tries = 0;
937#ifdef __FreeBSD__
938				callout_stop(&sc->sc_bulkfail_tmo);
939#else
940				timeout_del(&sc->sc_bulkfail_tmo);
941#endif
942#if NCARP > 0	/* XXX_IMPORT */
943				if (!pfsync_sync_ok)
944					carp_suppress_preempt--;
945#endif
946				pfsync_sync_ok = 1;
947				if (pf_status.debug >= PF_DEBUG_MISC)
948					printf("pfsync: received valid "
949					    "bulk update end\n");
950			} else {
951				if (pf_status.debug >= PF_DEBUG_MISC)
952					printf("pfsync: received invalid "
953					    "bulk update end: bad timestamp\n");
954			}
955			break;
956		}
957		break;
958	}
959
960done:
961	if (m)
962		m_freem(m);
963}
964
965int
966pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
967	struct rtentry *rt)
968{
969	m_freem(m);
970	return (0);
971}
972
973/* ARGSUSED */
974int
975pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
976{
977#ifndef __FreeBSD__
978	struct proc *p = curproc;
979#endif
980	struct pfsync_softc *sc = ifp->if_softc;
981	struct ifreq *ifr = (struct ifreq *)data;
982	struct ip_moptions *imo = &sc->sc_imo;
983	struct pfsyncreq pfsyncr;
984	struct ifnet    *sifp;
985	int s, error;
986
987	switch (cmd) {
988	case SIOCSIFADDR:
989	case SIOCAIFADDR:
990	case SIOCSIFDSTADDR:
991	case SIOCSIFFLAGS:
992#ifdef __FreeBSD__
993		if (ifp->if_flags & IFF_UP)
994			ifp->if_drv_flags |= IFF_DRV_RUNNING;
995		else
996			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
997#else
998		if (ifp->if_flags & IFF_UP)
999			ifp->if_flags |= IFF_RUNNING;
1000		else
1001			ifp->if_flags &= ~IFF_RUNNING;
1002#endif
1003		break;
1004	case SIOCSIFMTU:
1005		if (ifr->ifr_mtu < PFSYNC_MINMTU)
1006			return (EINVAL);
1007		if (ifr->ifr_mtu > MCLBYTES)
1008			ifr->ifr_mtu = MCLBYTES;
1009		s = splnet();
1010#ifdef __FreeBSD__
1011		PF_LOCK();
1012#endif
1013		if (ifr->ifr_mtu < ifp->if_mtu) {
1014			pfsync_sendout(sc);
1015		}
1016		pfsync_setmtu(sc, ifr->ifr_mtu);
1017#ifdef __FreeBSD__
1018		PF_UNLOCK();
1019#endif
1020		splx(s);
1021		break;
1022	case SIOCGETPFSYNC:
1023#ifdef __FreeBSD__
1024		/* XXX: read unlocked */
1025#endif
1026		bzero(&pfsyncr, sizeof(pfsyncr));
1027		if (sc->sc_sync_ifp)
1028			strlcpy(pfsyncr.pfsyncr_syncdev,
1029			    sc->sc_sync_ifp->if_xname, IFNAMSIZ);
1030		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1031		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1032		if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
1033			return (error);
1034		break;
1035	case SIOCSETPFSYNC:
1036#ifdef __FreeBSD__
1037		if ((error = suser(curthread)) != 0)
1038#else
1039		if ((error = suser(p, p->p_acflag)) != 0)
1040#endif
1041			return (error);
1042		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1043			return (error);
1044
1045		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1046			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1047		else
1048			sc->sc_sync_peer.s_addr =
1049			    pfsyncr.pfsyncr_syncpeer.s_addr;
1050
1051		if (pfsyncr.pfsyncr_maxupdates > 255)
1052			return (EINVAL);
1053#ifdef __FreeBSD__
1054		callout_drain(&sc->sc_send_tmo);
1055		PF_LOCK();
1056#endif
1057		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1058
1059		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1060			sc->sc_sync_ifp = NULL;
1061			if (sc->sc_mbuf_net != NULL) {
1062				/* Don't keep stale pfsync packets around. */
1063				s = splnet();
1064				m_freem(sc->sc_mbuf_net);
1065				sc->sc_mbuf_net = NULL;
1066				sc->sc_statep_net.s = NULL;
1067				splx(s);
1068			}
1069			if (imo->imo_num_memberships > 0) {
1070				in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1071				imo->imo_multicast_ifp = NULL;
1072			}
1073#ifdef __FreeBSD__
1074			PF_UNLOCK();
1075#endif
1076			break;
1077		}
1078
1079		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1080#ifdef __FreeBSD__
1081			PF_UNLOCK();
1082#endif
1083			return (EINVAL);
1084		}
1085
1086		s = splnet();
1087#ifdef __FreeBSD__
1088		if (sifp->if_mtu < SCP2IFP(sc)->if_mtu ||
1089#else
1090		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1091#endif
1092		    (sc->sc_sync_ifp != NULL &&
1093		    sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
1094		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1095			pfsync_sendout(sc);
1096		sc->sc_sync_ifp = sifp;
1097
1098#ifdef __FreeBSD__
1099		pfsync_setmtu(sc, SCP2IFP(sc)->if_mtu);
1100#else
1101		pfsync_setmtu(sc, sc->sc_if.if_mtu);
1102#endif
1103
1104		if (imo->imo_num_memberships > 0) {
1105			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1106			imo->imo_multicast_ifp = NULL;
1107		}
1108
1109		if (sc->sc_sync_ifp &&
1110		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1111			struct in_addr addr;
1112
1113			if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
1114				sc->sc_sync_ifp = NULL;
1115#ifdef __FreeBSD__
1116				PF_UNLOCK();
1117#endif
1118				splx(s);
1119				return (EADDRNOTAVAIL);
1120			}
1121#ifdef __FreeBSD__
1122			PF_UNLOCK();		/* addmulti mallocs w/ WAITOK */
1123			addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
1124#else
1125			addr.s_addr = INADDR_PFSYNC_GROUP;
1126#endif
1127
1128			if ((imo->imo_membership[0] =
1129			    in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
1130				sc->sc_sync_ifp = NULL;
1131				splx(s);
1132				return (ENOBUFS);
1133			}
1134			imo->imo_num_memberships++;
1135			imo->imo_multicast_ifp = sc->sc_sync_ifp;
1136			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1137			imo->imo_multicast_loop = 0;
1138#ifdef __FreeBSD__
1139			PF_LOCK();
1140#endif
1141		}
1142
1143		if (sc->sc_sync_ifp ||
1144		    sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
1145			/* Request a full state table update. */
1146			sc->sc_ureq_sent = time_uptime;
1147#if NCARP > 0
1148			if (pfsync_sync_ok)
1149				carp_suppress_preempt++;
1150#endif
1151			pfsync_sync_ok = 0;
1152			if (pf_status.debug >= PF_DEBUG_MISC)
1153				printf("pfsync: requesting bulk update\n");
1154#ifdef __FreeBSD__
1155			callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1156			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
1157#else
1158			timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
1159#endif
1160			error = pfsync_request_update(NULL, NULL);
1161			if (error == ENOMEM) {
1162#ifdef __FreeBSD__
1163				PF_UNLOCK();
1164#endif
1165				splx(s);
1166				return (ENOMEM);
1167			}
1168			pfsync_sendout(sc);
1169		}
1170#ifdef __FreeBSD__
1171		PF_UNLOCK();
1172#endif
1173		splx(s);
1174
1175		break;
1176
1177	default:
1178		return (ENOTTY);
1179	}
1180
1181	return (0);
1182}
1183
1184void
1185pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
1186{
1187	int mtu;
1188
1189	if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
1190		mtu = sc->sc_sync_ifp->if_mtu;
1191	else
1192		mtu = mtu_req;
1193
1194	sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
1195	    sizeof(struct pfsync_state);
1196	if (sc->sc_maxcount > 254)
1197	    sc->sc_maxcount = 254;
1198#ifdef __FreeBSD__
1199	SCP2IFP(sc)->if_mtu = sizeof(struct pfsync_header) +
1200	    sc->sc_maxcount * sizeof(struct pfsync_state);
1201#else
1202	sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
1203	    sc->sc_maxcount * sizeof(struct pfsync_state);
1204#endif
1205}
1206
1207struct mbuf *
1208pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
1209{
1210	struct pfsync_header *h;
1211	struct mbuf *m;
1212	int len;
1213
1214#ifdef __FreeBSD__
1215	PF_ASSERT(MA_OWNED);
1216#endif
1217	MGETHDR(m, M_DONTWAIT, MT_DATA);
1218	if (m == NULL) {
1219#ifdef __FreeBSD__
1220		SCP2IFP(sc)->if_oerrors++;
1221#else
1222		sc->sc_if.if_oerrors++;
1223#endif
1224		return (NULL);
1225	}
1226
1227	switch (action) {
1228	case PFSYNC_ACT_CLR:
1229		len = sizeof(struct pfsync_header) +
1230		    sizeof(struct pfsync_state_clr);
1231		break;
1232	case PFSYNC_ACT_UPD_C:
1233		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
1234		    sizeof(struct pfsync_header);
1235		break;
1236	case PFSYNC_ACT_DEL_C:
1237		len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
1238		    sizeof(struct pfsync_header);
1239		break;
1240	case PFSYNC_ACT_UREQ:
1241		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
1242		    sizeof(struct pfsync_header);
1243		break;
1244	case PFSYNC_ACT_BUS:
1245		len = sizeof(struct pfsync_header) +
1246		    sizeof(struct pfsync_state_bus);
1247		break;
1248	default:
1249		len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
1250		    sizeof(struct pfsync_header);
1251		break;
1252	}
1253
1254	if (len > MHLEN) {
1255		MCLGET(m, M_DONTWAIT);
1256		if ((m->m_flags & M_EXT) == 0) {
1257			m_free(m);
1258#ifdef __FreeBSD__
1259			SCP2IFP(sc)->if_oerrors++;
1260#else
1261			sc->sc_if.if_oerrors++;
1262#endif
1263			return (NULL);
1264		}
1265		m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
1266	} else
1267		MH_ALIGN(m, len);
1268
1269	m->m_pkthdr.rcvif = NULL;
1270	m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
1271	h = mtod(m, struct pfsync_header *);
1272	h->version = PFSYNC_VERSION;
1273	h->af = 0;
1274	h->count = 0;
1275	h->action = action;
1276
1277	*sp = (void *)((char *)h + PFSYNC_HDRLEN);
1278#ifdef __FreeBSD__
1279	callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
1280	    LIST_FIRST(&pfsync_list));
1281#else
1282	timeout_add(&sc->sc_tmo, hz);
1283#endif
1284	return (m);
1285}
1286
1287int
1288pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
1289{
1290#ifdef __FreeBSD__
1291	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1292#else
1293	struct ifnet *ifp = &pfsyncif.sc_if;
1294#endif
1295	struct pfsync_softc *sc = ifp->if_softc;
1296	struct pfsync_header *h, *h_net;
1297	struct pfsync_state *sp = NULL;
1298	struct pfsync_state_upd *up = NULL;
1299	struct pfsync_state_del *dp = NULL;
1300	struct pf_rule *r;
1301	u_long secs;
1302	int s, ret = 0;
1303	u_int8_t i = 255, newaction = 0;
1304
1305#ifdef __FreeBSD__
1306	PF_ASSERT(MA_OWNED);
1307#endif
1308	/*
1309	 * If a packet falls in the forest and there's nobody around to
1310	 * hear, does it make a sound?
1311	 */
1312	if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
1313	    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1314		/* Don't leave any stale pfsync packets hanging around. */
1315		if (sc->sc_mbuf != NULL) {
1316			m_freem(sc->sc_mbuf);
1317			sc->sc_mbuf = NULL;
1318			sc->sc_statep.s = NULL;
1319		}
1320		return (0);
1321	}
1322
1323	if (action >= PFSYNC_ACT_MAX)
1324		return (EINVAL);
1325
1326	s = splnet();
1327	if (sc->sc_mbuf == NULL) {
1328		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1329		    (void *)&sc->sc_statep.s)) == NULL) {
1330			splx(s);
1331			return (ENOMEM);
1332		}
1333		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1334	} else {
1335		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1336		if (h->action != action) {
1337			pfsync_sendout(sc);
1338			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1339			    (void *)&sc->sc_statep.s)) == NULL) {
1340				splx(s);
1341				return (ENOMEM);
1342			}
1343			h = mtod(sc->sc_mbuf, struct pfsync_header *);
1344		} else {
1345			/*
1346			 * If it's an update, look in the packet to see if
1347			 * we already have an update for the state.
1348			 */
1349			if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
1350				struct pfsync_state *usp =
1351				    (void *)((char *)h + PFSYNC_HDRLEN);
1352
1353				for (i = 0; i < h->count; i++) {
1354					if (!memcmp(usp->id, &st->id,
1355					    PFSYNC_ID_LEN) &&
1356					    usp->creatorid == st->creatorid) {
1357						sp = usp;
1358						sp->updates++;
1359						break;
1360					}
1361					usp++;
1362				}
1363			}
1364		}
1365	}
1366
1367	secs = time_second;
1368
1369	st->pfsync_time = time_uptime;
1370	TAILQ_REMOVE(&state_updates, st, u.s.entry_updates);
1371	TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates);
1372
1373	if (sp == NULL) {
1374		/* not a "duplicate" update */
1375		i = 255;
1376		sp = sc->sc_statep.s++;
1377		sc->sc_mbuf->m_pkthdr.len =
1378		    sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
1379		h->count++;
1380		bzero(sp, sizeof(*sp));
1381
1382		bcopy(&st->id, sp->id, sizeof(sp->id));
1383		sp->creatorid = st->creatorid;
1384
1385		strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname));
1386		pf_state_host_hton(&st->lan, &sp->lan);
1387		pf_state_host_hton(&st->gwy, &sp->gwy);
1388		pf_state_host_hton(&st->ext, &sp->ext);
1389
1390		bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
1391
1392		sp->creation = htonl(secs - st->creation);
1393		sp->packets[0] = htonl(st->packets[0]);
1394		sp->packets[1] = htonl(st->packets[1]);
1395		sp->bytes[0] = htonl(st->bytes[0]);
1396		sp->bytes[1] = htonl(st->bytes[1]);
1397		if ((r = st->rule.ptr) == NULL)
1398			sp->rule = htonl(-1);
1399		else
1400			sp->rule = htonl(r->nr);
1401		if ((r = st->anchor.ptr) == NULL)
1402			sp->anchor = htonl(-1);
1403		else
1404			sp->anchor = htonl(r->nr);
1405		sp->af = st->af;
1406		sp->proto = st->proto;
1407		sp->direction = st->direction;
1408		sp->log = st->log;
1409		sp->allow_opts = st->allow_opts;
1410		sp->timeout = st->timeout;
1411
1412		if (flags & PFSYNC_FLAG_STALE)
1413			sp->sync_flags |= PFSTATE_STALE;
1414	}
1415
1416	pf_state_peer_hton(&st->src, &sp->src);
1417	pf_state_peer_hton(&st->dst, &sp->dst);
1418
1419	if (st->expire <= secs)
1420		sp->expire = htonl(0);
1421	else
1422		sp->expire = htonl(st->expire - secs);
1423
1424	/* do we need to build "compressed" actions for network transfer? */
1425	if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
1426		switch (action) {
1427		case PFSYNC_ACT_UPD:
1428			newaction = PFSYNC_ACT_UPD_C;
1429			break;
1430		case PFSYNC_ACT_DEL:
1431			newaction = PFSYNC_ACT_DEL_C;
1432			break;
1433		default:
1434			/* by default we just send the uncompressed states */
1435			break;
1436		}
1437	}
1438
1439	if (newaction) {
1440		if (sc->sc_mbuf_net == NULL) {
1441			if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
1442			    (void *)&sc->sc_statep_net.s)) == NULL) {
1443				splx(s);
1444				return (ENOMEM);
1445			}
1446		}
1447		h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
1448
1449		switch (newaction) {
1450		case PFSYNC_ACT_UPD_C:
1451			if (i != 255) {
1452				up = (void *)((char *)h_net +
1453				    PFSYNC_HDRLEN + (i * sizeof(*up)));
1454				up->updates++;
1455			} else {
1456				h_net->count++;
1457				sc->sc_mbuf_net->m_pkthdr.len =
1458				    sc->sc_mbuf_net->m_len += sizeof(*up);
1459				up = sc->sc_statep_net.u++;
1460
1461				bzero(up, sizeof(*up));
1462				bcopy(&st->id, up->id, sizeof(up->id));
1463				up->creatorid = st->creatorid;
1464			}
1465			up->timeout = st->timeout;
1466			up->expire = sp->expire;
1467			up->src = sp->src;
1468			up->dst = sp->dst;
1469			break;
1470		case PFSYNC_ACT_DEL_C:
1471			sc->sc_mbuf_net->m_pkthdr.len =
1472			    sc->sc_mbuf_net->m_len += sizeof(*dp);
1473			dp = sc->sc_statep_net.d++;
1474			h_net->count++;
1475
1476			bzero(dp, sizeof(*dp));
1477			bcopy(&st->id, dp->id, sizeof(dp->id));
1478			dp->creatorid = st->creatorid;
1479			break;
1480		}
1481	}
1482
1483	if (h->count == sc->sc_maxcount ||
1484	    (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
1485		ret = pfsync_sendout(sc);
1486
1487	splx(s);
1488	return (ret);
1489}
1490
1491/* This must be called in splnet() */
1492int
1493pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
1494{
1495#ifdef __FreeBSD__
1496	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1497#else
1498	struct ifnet *ifp = &pfsyncif.sc_if;
1499#endif
1500	struct pfsync_header *h;
1501	struct pfsync_softc *sc = ifp->if_softc;
1502	struct pfsync_state_upd_req *rup;
1503	int ret = 0;
1504
1505#ifdef __FreeBSD__
1506	PF_ASSERT(MA_OWNED);
1507#endif
1508	if (sc->sc_mbuf == NULL) {
1509		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1510		    (void *)&sc->sc_statep.s)) == NULL)
1511			return (ENOMEM);
1512		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1513	} else {
1514		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1515		if (h->action != PFSYNC_ACT_UREQ) {
1516			pfsync_sendout(sc);
1517			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1518			    (void *)&sc->sc_statep.s)) == NULL)
1519				return (ENOMEM);
1520			h = mtod(sc->sc_mbuf, struct pfsync_header *);
1521		}
1522	}
1523
1524	if (src != NULL)
1525		sc->sc_sendaddr = *src;
1526	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
1527	h->count++;
1528	rup = sc->sc_statep.r++;
1529	bzero(rup, sizeof(*rup));
1530	if (up != NULL) {
1531		bcopy(up->id, rup->id, sizeof(rup->id));
1532		rup->creatorid = up->creatorid;
1533	}
1534
1535	if (h->count == sc->sc_maxcount)
1536		ret = pfsync_sendout(sc);
1537
1538	return (ret);
1539}
1540
1541int
1542pfsync_clear_states(u_int32_t creatorid, char *ifname)
1543{
1544#ifdef __FreeBSD__
1545	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1546#else
1547	struct ifnet *ifp = &pfsyncif.sc_if;
1548#endif
1549	struct pfsync_softc *sc = ifp->if_softc;
1550	struct pfsync_state_clr *cp;
1551	int s, ret;
1552
1553	s = splnet();
1554#ifdef __FreeBSD__
1555	PF_ASSERT(MA_OWNED);
1556#endif
1557	if (sc->sc_mbuf != NULL)
1558		pfsync_sendout(sc);
1559	if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
1560	    (void *)&sc->sc_statep.c)) == NULL) {
1561		splx(s);
1562		return (ENOMEM);
1563	}
1564	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
1565	cp = sc->sc_statep.c;
1566	cp->creatorid = creatorid;
1567	if (ifname != NULL)
1568		strlcpy(cp->ifname, ifname, IFNAMSIZ);
1569
1570	ret = (pfsync_sendout(sc));
1571	splx(s);
1572	return (ret);
1573}
1574
1575void
1576pfsync_timeout(void *v)
1577{
1578	struct pfsync_softc *sc = v;
1579	int s;
1580
1581	s = splnet();
1582#ifdef __FreeBSD__
1583	PF_LOCK();
1584#endif
1585	pfsync_sendout(sc);
1586#ifdef __FreeBSD__
1587	PF_UNLOCK();
1588#endif
1589	splx(s);
1590}
1591
1592/* This must be called in splnet() */
1593void
1594pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
1595{
1596	struct pfsync_state_bus *bus;
1597
1598#ifdef __FreeBSD__
1599	PF_ASSERT(MA_OWNED);
1600#endif
1601	if (sc->sc_mbuf != NULL)
1602		pfsync_sendout(sc);
1603
1604	if (pfsync_sync_ok &&
1605	    (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
1606	    (void *)&sc->sc_statep.b)) != NULL) {
1607		sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
1608		bus = sc->sc_statep.b;
1609		bus->creatorid = pf_status.hostid;
1610		bus->status = status;
1611		bus->endtime = htonl(time_uptime - sc->sc_ureq_received);
1612		pfsync_sendout(sc);
1613	}
1614}
1615
1616void
1617pfsync_bulk_update(void *v)
1618{
1619	struct pfsync_softc *sc = v;
1620	int s, i = 0;
1621	struct pf_state *state;
1622
1623#ifdef __FreeBSD__
1624	PF_LOCK();
1625#endif
1626	s = splnet();
1627	if (sc->sc_mbuf != NULL)
1628		pfsync_sendout(sc);
1629
1630	/*
1631	 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
1632	 * been sent since the latest request was made.
1633	 */
1634	while ((state = TAILQ_FIRST(&state_updates)) != NULL &&
1635	    ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) {
1636		if (state->pfsync_time > sc->sc_ureq_received) {
1637			/* we're done */
1638			pfsync_send_bus(sc, PFSYNC_BUS_END);
1639			sc->sc_ureq_received = 0;
1640#ifdef __FreeBSD__
1641			callout_stop(&sc->sc_bulk_tmo);
1642#else
1643			timeout_del(&sc->sc_bulk_tmo);
1644#endif
1645			if (pf_status.debug >= PF_DEBUG_MISC)
1646				printf("pfsync: bulk update complete\n");
1647			break;
1648		} else {
1649			/* send an update and move to end of list */
1650			if (!state->sync_flags)
1651				pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
1652			state->pfsync_time = time_uptime;
1653			TAILQ_REMOVE(&state_updates, state, u.s.entry_updates);
1654			TAILQ_INSERT_TAIL(&state_updates, state,
1655			    u.s.entry_updates);
1656
1657			/* look again for more in a bit */
1658#ifdef __FreeBSD__
1659			callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
1660			    LIST_FIRST(&pfsync_list));
1661#else
1662			timeout_add(&sc->sc_bulk_tmo, 1);
1663#endif
1664		}
1665	}
1666	if (sc->sc_mbuf != NULL)
1667		pfsync_sendout(sc);
1668	splx(s);
1669#ifdef __FreeBSD__
1670	PF_UNLOCK();
1671#endif
1672}
1673
1674void
1675pfsync_bulkfail(void *v)
1676{
1677	struct pfsync_softc *sc = v;
1678	int s, error;
1679
1680#ifdef __FreeBSD__
1681	PF_LOCK();
1682#endif
1683	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
1684		/* Try again in a bit */
1685#ifdef __FreeBSD__
1686		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
1687		    LIST_FIRST(&pfsync_list));
1688#else
1689		timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
1690#endif
1691		s = splnet();
1692		error = pfsync_request_update(NULL, NULL);
1693		if (error == ENOMEM) {
1694			if (pf_status.debug >= PF_DEBUG_MISC)
1695				printf("pfsync: cannot allocate mbufs for "
1696				    "bulk update\n");
1697		} else
1698			pfsync_sendout(sc);
1699		splx(s);
1700	} else {
1701		/* Pretend like the transfer was ok */
1702		sc->sc_ureq_sent = 0;
1703		sc->sc_bulk_tries = 0;
1704#if NCARP > 0
1705		if (!pfsync_sync_ok)
1706			carp_suppress_preempt--;
1707#endif
1708		pfsync_sync_ok = 1;
1709		if (pf_status.debug >= PF_DEBUG_MISC)
1710			printf("pfsync: failed to receive "
1711			    "bulk update status\n");
1712#ifdef __FreeBSD__
1713		callout_stop(&sc->sc_bulkfail_tmo);
1714#else
1715		timeout_del(&sc->sc_bulkfail_tmo);
1716#endif
1717	}
1718#ifdef __FreeBSD__
1719	PF_UNLOCK();
1720#endif
1721}
1722
1723/* This must be called in splnet() */
1724int
1725pfsync_sendout(sc)
1726	struct pfsync_softc *sc;
1727{
1728#if NBPFILTER > 0
1729# ifdef __FreeBSD__
1730	struct ifnet *ifp = SCP2IFP(sc);
1731# else
1732	struct ifnet *ifp = &sc->if_sc;
1733# endif
1734#endif
1735	struct mbuf *m;
1736
1737#ifdef __FreeBSD__
1738	PF_ASSERT(MA_OWNED);
1739	callout_stop(&sc->sc_tmo);
1740#else
1741	timeout_del(&sc->sc_tmo);
1742#endif
1743
1744	if (sc->sc_mbuf == NULL)
1745		return (0);
1746	m = sc->sc_mbuf;
1747	sc->sc_mbuf = NULL;
1748	sc->sc_statep.s = NULL;
1749
1750#ifdef __FreeBSD__
1751	KASSERT(m != NULL, ("pfsync_sendout: null mbuf"));
1752#endif
1753#if NBPFILTER > 0
1754	if (ifp->if_bpf)
1755		bpf_mtap(ifp->if_bpf, m);
1756#endif
1757
1758	if (sc->sc_mbuf_net) {
1759		m_freem(m);
1760		m = sc->sc_mbuf_net;
1761		sc->sc_mbuf_net = NULL;
1762		sc->sc_statep_net.s = NULL;
1763	}
1764
1765	if (sc->sc_sync_ifp || sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
1766		struct ip *ip;
1767		struct sockaddr sa;
1768
1769		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
1770		if (m == NULL) {
1771			pfsyncstats.pfsyncs_onomem++;
1772			return (0);
1773		}
1774		ip = mtod(m, struct ip *);
1775		ip->ip_v = IPVERSION;
1776		ip->ip_hl = sizeof(*ip) >> 2;
1777		ip->ip_tos = IPTOS_LOWDELAY;
1778#ifdef __FreeBSD__
1779		ip->ip_len = m->m_pkthdr.len;
1780#else
1781		ip->ip_len = htons(m->m_pkthdr.len);
1782#endif
1783		ip->ip_id = htons(ip_randomid());
1784#ifdef __FreeBSD__
1785		ip->ip_off = IP_DF;
1786#else
1787		ip->ip_off = htons(IP_DF);
1788#endif
1789		ip->ip_ttl = PFSYNC_DFLTTL;
1790		ip->ip_p = IPPROTO_PFSYNC;
1791		ip->ip_sum = 0;
1792
1793		bzero(&sa, sizeof(sa));
1794		ip->ip_src.s_addr = INADDR_ANY;
1795
1796#ifdef __FreeBSD__
1797		if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP))
1798#else
1799		if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
1800#endif
1801			m->m_flags |= M_MCAST;
1802		ip->ip_dst = sc->sc_sendaddr;
1803#ifdef __FreeBSD__
1804		/* XXX_IMPORT */
1805		sc->sc_sendaddr.s_addr = htonl(sc->sc_sync_peer.s_addr);
1806#else
1807		sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
1808#endif
1809
1810		pfsyncstats.pfsyncs_opackets++;
1811#ifdef __FreeBSD__
1812		if (!IF_HANDOFF(&sc->sc_ifq, m, NULL))
1813			pfsyncstats.pfsyncs_oerrors++;
1814		callout_reset(&sc->sc_send_tmo, 1, pfsync_senddef, sc);
1815#else
1816		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1817			pfsyncstats.pfsyncs_oerrors++;
1818#endif
1819	} else
1820		m_freem(m);
1821
1822	return (0);
1823}
1824
1825#ifdef __FreeBSD__
1826static void
1827pfsync_senddef(void *arg)
1828{
1829	struct pfsync_softc *sc = (struct pfsync_softc *)arg;
1830	struct mbuf *m;
1831
1832	for(;;) {
1833		IF_DEQUEUE(&sc->sc_ifq, m);
1834		if (m == NULL)
1835			break;
1836		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1837			pfsyncstats.pfsyncs_oerrors++;
1838	}
1839}
1840
1841static int
1842pfsync_modevent(module_t mod, int type, void *data)
1843{
1844	int error = 0;
1845
1846	switch (type) {
1847	case MOD_LOAD:
1848		LIST_INIT(&pfsync_list);
1849		if_clone_attach(&pfsync_cloner);
1850		break;
1851
1852	case MOD_UNLOAD:
1853		if_clone_detach(&pfsync_cloner);
1854		while (!LIST_EMPTY(&pfsync_list))
1855			ifc_simple_destroy(&pfsync_cloner,
1856			    SCP2IFP(LIST_FIRST(&pfsync_list)));
1857		break;
1858
1859	default:
1860		error = EINVAL;
1861		break;
1862	}
1863
1864	return error;
1865}
1866
1867static moduledata_t pfsync_mod = {
1868	"pfsync",
1869	pfsync_modevent,
1870	0
1871};
1872
1873#define PFSYNC_MODVER 1
1874
1875DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
1876MODULE_VERSION(pfsync, PFSYNC_MODVER);
1877#endif /* __FreeBSD__ */
1878