if_pfsync.c revision 147261
1/*	$FreeBSD: head/sys/contrib/pf/net/if_pfsync.c 147261 2005-06-10 17:23:49Z mlaier $	*/
2/*	$OpenBSD: if_pfsync.c,v 1.46 2005/02/20 15:58:38 mcbride Exp $	*/
3
4/*
5 * Copyright (c) 2002 Michael Shalayeff
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#ifdef __FreeBSD__
31#include "opt_inet.h"
32#include "opt_inet6.h"
33#endif
34
35#ifndef __FreeBSD__
36#include "bpfilter.h"
37#include "pfsync.h"
38#elif __FreeBSD__ >= 5
39#include "opt_bpf.h"
40#include "opt_pf.h"
41#define	NBPFILTER	DEV_BPF
42#define	NPFSYNC		DEV_PFSYNC
43#endif
44
45#include <sys/param.h>
46#include <sys/proc.h>
47#include <sys/systm.h>
48#include <sys/time.h>
49#include <sys/mbuf.h>
50#include <sys/socket.h>
51#include <sys/kernel.h>
52#ifdef __FreeBSD__
53#include <sys/endian.h>
54#include <sys/malloc.h>
55#include <sys/module.h>
56#include <sys/sockio.h>
57#include <sys/lock.h>
58#include <sys/mutex.h>
59#else
60#include <sys/ioctl.h>
61#include <sys/timeout.h>
62#endif
63
64#include <net/if.h>
65#if defined(__FreeBSD__)
66#include <net/if_clone.h>
67#endif
68#include <net/if_types.h>
69#include <net/route.h>
70#include <net/bpf.h>
71#include <netinet/tcp.h>
72#include <netinet/tcp_seq.h>
73
74#ifdef	INET
75#include <netinet/in.h>
76#include <netinet/in_systm.h>
77#include <netinet/in_var.h>
78#include <netinet/ip.h>
79#include <netinet/ip_var.h>
80#endif
81
82#ifdef INET6
83#ifndef INET
84#include <netinet/in.h>
85#endif
86#include <netinet6/nd6.h>
87#endif /* INET6 */
88
89#ifdef __FreeBSD__
90#include "opt_carp.h"
91#ifdef DEV_CARP
92#define	NCARP	1
93#endif
94#else
95#include "carp.h"
96#endif
97#if NCARP > 0
98extern int carp_suppress_preempt;
99#endif
100
101#include <net/pfvar.h>
102#include <net/if_pfsync.h>
103
104#ifdef __FreeBSD__
105#define	PFSYNCNAME	"pfsync"
106#endif
107
108#define PFSYNC_MINMTU	\
109    (sizeof(struct pfsync_header) + sizeof(struct pf_state))
110
111#ifdef PFSYNCDEBUG
112#define DPRINTF(x)    do { if (pfsyncdebug) printf x ; } while (0)
113int pfsyncdebug;
114#else
115#define DPRINTF(x)
116#endif
117
118#ifndef __FreeBSD__
119struct pfsync_softc	pfsyncif;
120#endif
121struct pfsyncstats	pfsyncstats;
122
123#ifdef __FreeBSD__
124
125/*
126 * Locking notes:
127 * Whenever we really touch/look at the state table we have to hold the
128 * PF_LOCK. Functions that do just the interface handling, grab the per
129 * softc lock instead.
130 *
131 */
132
133static void	pfsync_clone_destroy(struct ifnet *);
134static int	pfsync_clone_create(struct if_clone *, int);
135static void	pfsync_senddef(void *);
136#else
137void	pfsyncattach(int);
138#endif
139void	pfsync_setmtu(struct pfsync_softc *, int);
140int	pfsync_insert_net_state(struct pfsync_state *);
141int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
142	    struct rtentry *);
143int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
144void	pfsyncstart(struct ifnet *);
145
146struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
147int	pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
148int	pfsync_sendout(struct pfsync_softc *);
149void	pfsync_timeout(void *);
150void	pfsync_send_bus(struct pfsync_softc *, u_int8_t);
151void	pfsync_bulk_update(void *);
152void	pfsync_bulkfail(void *);
153
154int	pfsync_sync_ok;
155#ifndef __FreeBSD__
156extern int ifqmaxlen;
157extern struct timeval time;
158extern struct timeval mono_time;
159extern int hz;
160#endif
161
162#ifdef __FreeBSD__
163static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface");
164static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list;
165#define	SCP2IFP(sc)		((sc)->sc_ifp)
166IFC_SIMPLE_DECLARE(pfsync, 1);
167
168static void
169pfsync_clone_destroy(struct ifnet *ifp)
170{
171        struct pfsync_softc *sc;
172
173	sc = ifp->if_softc;
174	callout_stop(&sc->sc_tmo);
175	callout_stop(&sc->sc_bulk_tmo);
176	callout_stop(&sc->sc_bulkfail_tmo);
177
178	callout_stop(&sc->sc_send_tmo);
179
180#if NBPFILTER > 0
181        bpfdetach(ifp);
182#endif
183        if_detach(ifp);
184	if_free(ifp);
185        LIST_REMOVE(sc, sc_next);
186        free(sc, M_PFSYNC);
187}
188
189static int
190pfsync_clone_create(struct if_clone *ifc, int unit)
191{
192	struct pfsync_softc *sc;
193	struct ifnet *ifp;
194
195	MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC,
196	    M_WAITOK|M_ZERO);
197	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
198	if (ifp == NULL) {
199		free(sc, M_PFSYNC);
200		return (ENOSPC);
201	}
202
203	pfsync_sync_ok = 1;
204	sc->sc_mbuf = NULL;
205	sc->sc_mbuf_net = NULL;
206	sc->sc_statep.s = NULL;
207	sc->sc_statep_net.s = NULL;
208	sc->sc_maxupdates = 128;
209	sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
210	sc->sc_ureq_received = 0;
211	sc->sc_ureq_sent = 0;
212
213	ifp = SCP2IFP(sc);
214	if_initname(ifp, ifc->ifc_name, unit);
215	ifp->if_ioctl = pfsyncioctl;
216	ifp->if_output = pfsyncoutput;
217	ifp->if_start = pfsyncstart;
218	ifp->if_snd.ifq_maxlen = ifqmaxlen;
219	ifp->if_hdrlen = PFSYNC_HDRLEN;
220	ifp->if_baudrate = IF_Mbps(100);
221	ifp->if_softc = sc;
222	pfsync_setmtu(sc, MCLBYTES);
223	/*
224	 * XXX
225	 *  The 2nd arg. 0 to callout_init(9) shoule be set to CALLOUT_MPSAFE
226	 * if Gaint lock is removed from the network stack.
227	 */
228	callout_init(&sc->sc_tmo, 0);
229	callout_init(&sc->sc_bulk_tmo, 0);
230	callout_init(&sc->sc_bulkfail_tmo, 0);
231	callout_init(&sc->sc_send_tmo, 0);
232	mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
233	    MTX_DEF);
234	if_attach(ifp);
235
236	LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
237#if NBPFILTER > 0
238	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
239#endif
240
241	return (0);
242}
243#else /* !__FreeBSD__ */
244void
245pfsyncattach(int npfsync)
246{
247	struct ifnet *ifp;
248
249	pfsync_sync_ok = 1;
250	bzero(&pfsyncif, sizeof(pfsyncif));
251	pfsyncif.sc_mbuf = NULL;
252	pfsyncif.sc_mbuf_net = NULL;
253	pfsyncif.sc_statep.s = NULL;
254	pfsyncif.sc_statep_net.s = NULL;
255	pfsyncif.sc_maxupdates = 128;
256	pfsyncif.sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
257	pfsyncif.sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP;
258	pfsyncif.sc_ureq_received = 0;
259	pfsyncif.sc_ureq_sent = 0;
260	ifp = &pfsyncif.sc_if;
261	strlcpy(ifp->if_xname, "pfsync0", sizeof ifp->if_xname);
262	ifp->if_softc = &pfsyncif;
263	ifp->if_ioctl = pfsyncioctl;
264	ifp->if_output = pfsyncoutput;
265	ifp->if_start = pfsyncstart;
266	ifp->if_type = IFT_PFSYNC;
267	ifp->if_snd.ifq_maxlen = ifqmaxlen;
268	ifp->if_hdrlen = PFSYNC_HDRLEN;
269	pfsync_setmtu(&pfsyncif, MCLBYTES);
270	timeout_set(&pfsyncif.sc_tmo, pfsync_timeout, &pfsyncif);
271	timeout_set(&pfsyncif.sc_bulk_tmo, pfsync_bulk_update, &pfsyncif);
272	timeout_set(&pfsyncif.sc_bulkfail_tmo, pfsync_bulkfail, &pfsyncif);
273	if_attach(ifp);
274	if_alloc_sadl(ifp);
275
276#if NBPFILTER > 0
277	bpfattach(&pfsyncif.sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
278#endif
279}
280#endif
281
282/*
283 * Start output on the pfsync interface.
284 */
285void
286pfsyncstart(struct ifnet *ifp)
287{
288#ifdef __FreeBSD__
289	IF_LOCK(&ifp->if_snd);
290	_IF_DROP(&ifp->if_snd);
291	_IF_DRAIN(&ifp->if_snd);
292	IF_UNLOCK(&ifp->if_snd);
293#else
294	struct mbuf *m;
295	int s;
296
297	for (;;) {
298		s = splimp();
299		IF_DROP(&ifp->if_snd);
300		IF_DEQUEUE(&ifp->if_snd, m);
301		splx(s);
302
303		if (m == NULL)
304			return;
305		else
306			m_freem(m);
307	}
308#endif
309}
310
311int
312pfsync_insert_net_state(struct pfsync_state *sp)
313{
314	struct pf_state	*st = NULL;
315	struct pf_rule *r = NULL;
316	struct pfi_kif	*kif;
317
318#ifdef __FreeBSD__
319	PF_ASSERT(MA_OWNED);
320#endif
321	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
322		printf("pfsync_insert_net_state: invalid creator id:"
323		    " %08x\n", ntohl(sp->creatorid));
324		return (EINVAL);
325	}
326
327	kif = pfi_lookup_create(sp->ifname);
328	if (kif == NULL) {
329		if (pf_status.debug >= PF_DEBUG_MISC)
330			printf("pfsync_insert_net_state: "
331			    "unknown interface: %s\n", sp->ifname);
332		/* skip this state */
333		return (0);
334	}
335
336	/*
337	 * Just use the default rule until we have infrastructure to find the
338	 * best matching rule.
339	 */
340	r = &pf_default_rule;
341
342	if (!r->max_states || r->states < r->max_states)
343		st = pool_get(&pf_state_pl, PR_NOWAIT);
344	if (st == NULL) {
345		pfi_maybe_destroy(kif);
346		return (ENOMEM);
347	}
348	bzero(st, sizeof(*st));
349
350	st->rule.ptr = r;
351	/* XXX get pointers to nat_rule and anchor */
352
353	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
354	r->states++;
355
356	/* fill in the rest of the state entry */
357	pf_state_host_ntoh(&sp->lan, &st->lan);
358	pf_state_host_ntoh(&sp->gwy, &st->gwy);
359	pf_state_host_ntoh(&sp->ext, &st->ext);
360
361	pf_state_peer_ntoh(&sp->src, &st->src);
362	pf_state_peer_ntoh(&sp->dst, &st->dst);
363
364	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
365	st->creation = time_second - ntohl(sp->creation);
366	st->expire = ntohl(sp->expire) + time_second;
367
368	st->af = sp->af;
369	st->proto = sp->proto;
370	st->direction = sp->direction;
371	st->log = sp->log;
372	st->timeout = sp->timeout;
373	st->allow_opts = sp->allow_opts;
374
375	bcopy(sp->id, &st->id, sizeof(st->id));
376	st->creatorid = sp->creatorid;
377	st->sync_flags = PFSTATE_FROMSYNC;
378
379
380	if (pf_insert_state(kif, st)) {
381		pfi_maybe_destroy(kif);
382		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
383		r->states--;
384		pool_put(&pf_state_pl, st);
385		return (EINVAL);
386	}
387
388	return (0);
389}
390
391void
392#ifdef __FreeBSD__
393pfsync_input(struct mbuf *m, __unused int off)
394#else
395pfsync_input(struct mbuf *m, ...)
396#endif
397{
398	struct ip *ip = mtod(m, struct ip *);
399	struct pfsync_header *ph;
400#ifdef __FreeBSD__
401	struct pfsync_softc *sc = LIST_FIRST(&pfsync_list);
402#else
403	struct pfsync_softc *sc = &pfsyncif;
404#endif
405	struct pf_state *st, key;
406	struct pfsync_state *sp;
407	struct pfsync_state_upd *up;
408	struct pfsync_state_del *dp;
409	struct pfsync_state_clr *cp;
410	struct pfsync_state_upd_req *rup;
411	struct pfsync_state_bus *bus;
412	struct in_addr src;
413	struct mbuf *mp;
414	int iplen, action, error, i, s, count, offp, sfail, stale = 0;
415
416	pfsyncstats.pfsyncs_ipackets++;
417
418	/* verify that we have a sync interface configured */
419	if (!sc->sc_sync_ifp || !pf_status.running) /* XXX PF_LOCK? */
420		goto done;
421
422	/* verify that the packet came in on the right interface */
423	if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
424		pfsyncstats.pfsyncs_badif++;
425		goto done;
426	}
427
428	/* verify that the IP TTL is 255.  */
429	if (ip->ip_ttl != PFSYNC_DFLTTL) {
430		pfsyncstats.pfsyncs_badttl++;
431		goto done;
432	}
433
434	iplen = ip->ip_hl << 2;
435
436	if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
437		pfsyncstats.pfsyncs_hdrops++;
438		goto done;
439	}
440
441	if (iplen + sizeof(*ph) > m->m_len) {
442		if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
443			pfsyncstats.pfsyncs_hdrops++;
444			goto done;
445		}
446		ip = mtod(m, struct ip *);
447	}
448	ph = (struct pfsync_header *)((char *)ip + iplen);
449
450	/* verify the version */
451	if (ph->version != PFSYNC_VERSION) {
452		pfsyncstats.pfsyncs_badver++;
453		goto done;
454	}
455
456	action = ph->action;
457	count = ph->count;
458
459	/* make sure it's a valid action code */
460	if (action >= PFSYNC_ACT_MAX) {
461		pfsyncstats.pfsyncs_badact++;
462		goto done;
463	}
464
465	/* Cheaper to grab this now than having to mess with mbufs later */
466	src = ip->ip_src;
467
468	switch (action) {
469	case PFSYNC_ACT_CLR: {
470		struct pf_state *nexts;
471		struct pfi_kif	*kif;
472		u_int32_t creatorid;
473		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
474		    sizeof(*cp), &offp)) == NULL) {
475			pfsyncstats.pfsyncs_badlen++;
476			return;
477		}
478		cp = (struct pfsync_state_clr *)(mp->m_data + offp);
479		creatorid = cp->creatorid;
480
481		s = splsoftnet();
482#ifdef __FreeBSD__
483		PF_LOCK();
484#endif
485		if (cp->ifname[0] == '\0') {
486			for (st = RB_MIN(pf_state_tree_id, &tree_id);
487			    st; st = nexts) {
488                		nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
489				if (st->creatorid == creatorid) {
490					st->timeout = PFTM_PURGE;
491					pf_purge_expired_state(st);
492				}
493			}
494		} else {
495			kif = pfi_lookup_if(cp->ifname);
496			if (kif == NULL) {
497				if (pf_status.debug >= PF_DEBUG_MISC)
498					printf("pfsync_input: PFSYNC_ACT_CLR "
499					    "bad interface: %s\n", cp->ifname);
500				splx(s);
501#ifdef __FreeBSD__
502				PF_UNLOCK();
503#endif
504				goto done;
505			}
506			for (st = RB_MIN(pf_state_tree_lan_ext,
507			    &kif->pfik_lan_ext); st; st = nexts) {
508				nexts = RB_NEXT(pf_state_tree_lan_ext,
509				    &kif->pfik_lan_ext, st);
510				if (st->creatorid == creatorid) {
511					st->timeout = PFTM_PURGE;
512					pf_purge_expired_state(st);
513				}
514			}
515		}
516#ifdef __FreeBSD__
517		PF_UNLOCK();
518#endif
519		splx(s);
520
521		break;
522	}
523	case PFSYNC_ACT_INS:
524		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
525		    count * sizeof(*sp), &offp)) == NULL) {
526			pfsyncstats.pfsyncs_badlen++;
527			return;
528		}
529
530		s = splsoftnet();
531#ifdef __FreeBSD__
532		PF_LOCK();
533#endif
534		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
535		    i < count; i++, sp++) {
536			/* check for invalid values */
537			if (sp->timeout >= PFTM_MAX ||
538			    sp->src.state > PF_TCPS_PROXY_DST ||
539			    sp->dst.state > PF_TCPS_PROXY_DST ||
540			    sp->direction > PF_OUT ||
541			    (sp->af != AF_INET && sp->af != AF_INET6)) {
542				if (pf_status.debug >= PF_DEBUG_MISC)
543					printf("pfsync_insert: PFSYNC_ACT_INS: "
544					    "invalid value\n");
545				pfsyncstats.pfsyncs_badstate++;
546				continue;
547			}
548
549			if ((error = pfsync_insert_net_state(sp))) {
550				if (error == ENOMEM) {
551					splx(s);
552#ifdef __FreeBSD__
553					PF_UNLOCK();
554#endif
555					goto done;
556				}
557				continue;
558			}
559		}
560#ifdef __FreeBSD__
561		PF_UNLOCK();
562#endif
563		splx(s);
564		break;
565	case PFSYNC_ACT_UPD:
566		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
567		    count * sizeof(*sp), &offp)) == NULL) {
568			pfsyncstats.pfsyncs_badlen++;
569			return;
570		}
571
572		s = splsoftnet();
573#ifdef __FreeBSD__
574		PF_LOCK();
575#endif
576		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
577		    i < count; i++, sp++) {
578			int flags = PFSYNC_FLAG_STALE;
579
580			/* check for invalid values */
581			if (sp->timeout >= PFTM_MAX ||
582			    sp->src.state > PF_TCPS_PROXY_DST ||
583			    sp->dst.state > PF_TCPS_PROXY_DST) {
584				if (pf_status.debug >= PF_DEBUG_MISC)
585					printf("pfsync_insert: PFSYNC_ACT_UPD: "
586					    "invalid value\n");
587				pfsyncstats.pfsyncs_badstate++;
588				continue;
589			}
590
591			bcopy(sp->id, &key.id, sizeof(key.id));
592			key.creatorid = sp->creatorid;
593
594			st = pf_find_state_byid(&key);
595			if (st == NULL) {
596				/* insert the update */
597				if (pfsync_insert_net_state(sp))
598					pfsyncstats.pfsyncs_badstate++;
599				continue;
600			}
601			sfail = 0;
602			if (st->proto == IPPROTO_TCP) {
603				/*
604				 * The state should never go backwards except
605				 * for syn-proxy states.  Neither should the
606				 * sequence window slide backwards.
607				 */
608				if (st->src.state > sp->src.state &&
609				    (st->src.state < PF_TCPS_PROXY_SRC ||
610				    sp->src.state >= PF_TCPS_PROXY_SRC))
611					sfail = 1;
612				else if (SEQ_GT(st->src.seqlo,
613				    ntohl(sp->src.seqlo)))
614					sfail = 3;
615				else if (st->dst.state > sp->dst.state) {
616					/* There might still be useful
617					 * information about the src state here,
618					 * so import that part of the update,
619					 * then "fail" so we send the updated
620					 * state back to the peer who is missing
621					 * our what we know. */
622					pf_state_peer_ntoh(&sp->src, &st->src);
623					/* XXX do anything with timeouts? */
624					sfail = 7;
625					flags = 0;
626				} else if (st->dst.state >= TCPS_SYN_SENT &&
627				    SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
628					sfail = 4;
629			} else {
630				/*
631				 * Non-TCP protocol state machine always go
632				 * forwards
633				 */
634				if (st->src.state > sp->src.state)
635					sfail = 5;
636				else if ( st->dst.state > sp->dst.state)
637					sfail = 6;
638			}
639			if (sfail) {
640				if (pf_status.debug >= PF_DEBUG_MISC)
641					printf("pfsync: %s stale update "
642					    "(%d) id: %016llx "
643					    "creatorid: %08x\n",
644					    (sfail < 7 ?  "ignoring"
645					     : "partial"), sfail,
646#ifdef __FreeBSD__
647					    (unsigned long long)be64toh(st->id),
648#else
649					    betoh64(st->id),
650#endif
651					    ntohl(st->creatorid));
652				pfsyncstats.pfsyncs_badstate++;
653
654				if (!(sp->sync_flags & PFSTATE_STALE)) {
655					/* we have a better state, send it */
656					if (sc->sc_mbuf != NULL && !stale)
657						pfsync_sendout(sc);
658					stale++;
659					if (!st->sync_flags)
660						pfsync_pack_state(
661						    PFSYNC_ACT_UPD, st, flags);
662				}
663				continue;
664			}
665			pf_state_peer_ntoh(&sp->src, &st->src);
666			pf_state_peer_ntoh(&sp->dst, &st->dst);
667			st->expire = ntohl(sp->expire) + time_second;
668			st->timeout = sp->timeout;
669		}
670		if (stale && sc->sc_mbuf != NULL)
671			pfsync_sendout(sc);
672#ifdef __FreeBSD__
673		PF_UNLOCK();
674#endif
675		splx(s);
676		break;
677	/*
678	 * It's not strictly necessary for us to support the "uncompressed"
679	 * delete action, but it's relatively simple and maintains consistency.
680	 */
681	case PFSYNC_ACT_DEL:
682		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
683		    count * sizeof(*sp), &offp)) == NULL) {
684			pfsyncstats.pfsyncs_badlen++;
685			return;
686		}
687
688		s = splsoftnet();
689#ifdef __FreeBSD__
690		PF_LOCK();
691#endif
692		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
693		    i < count; i++, sp++) {
694			bcopy(sp->id, &key.id, sizeof(key.id));
695			key.creatorid = sp->creatorid;
696
697			st = pf_find_state_byid(&key);
698			if (st == NULL) {
699				pfsyncstats.pfsyncs_badstate++;
700				continue;
701			}
702			st->timeout = PFTM_PURGE;
703			st->sync_flags |= PFSTATE_FROMSYNC;
704			pf_purge_expired_state(st);
705		}
706#ifdef __FreeBSD__
707		PF_UNLOCK();
708#endif
709		splx(s);
710		break;
711	case PFSYNC_ACT_UPD_C: {
712		int update_requested = 0;
713
714		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
715		    count * sizeof(*up), &offp)) == NULL) {
716			pfsyncstats.pfsyncs_badlen++;
717			return;
718		}
719
720		s = splsoftnet();
721#ifdef __FreeBSD__
722		PF_LOCK();
723#endif
724		for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
725		    i < count; i++, up++) {
726			/* check for invalid values */
727			if (up->timeout >= PFTM_MAX ||
728			    up->src.state > PF_TCPS_PROXY_DST ||
729			    up->dst.state > PF_TCPS_PROXY_DST) {
730				if (pf_status.debug >= PF_DEBUG_MISC)
731					printf("pfsync_insert: "
732					    "PFSYNC_ACT_UPD_C: "
733					    "invalid value\n");
734				pfsyncstats.pfsyncs_badstate++;
735				continue;
736			}
737
738			bcopy(up->id, &key.id, sizeof(key.id));
739			key.creatorid = up->creatorid;
740
741			st = pf_find_state_byid(&key);
742			if (st == NULL) {
743				/* We don't have this state. Ask for it. */
744				error = pfsync_request_update(up, &src);
745				if (error == ENOMEM) {
746					splx(s);
747					goto done;
748				}
749				update_requested = 1;
750				pfsyncstats.pfsyncs_badstate++;
751				continue;
752			}
753			sfail = 0;
754			if (st->proto == IPPROTO_TCP) {
755				/*
756				 * The state should never go backwards except
757				 * for syn-proxy states.  Neither should the
758				 * sequence window slide backwards.
759				 */
760				if (st->src.state > up->src.state &&
761				    (st->src.state < PF_TCPS_PROXY_SRC ||
762				    up->src.state >= PF_TCPS_PROXY_SRC))
763					sfail = 1;
764				else if (st->dst.state > up->dst.state)
765					sfail = 2;
766				else if (SEQ_GT(st->src.seqlo,
767				    ntohl(up->src.seqlo)))
768					sfail = 3;
769				else if (st->dst.state >= TCPS_SYN_SENT &&
770				    SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
771					sfail = 4;
772			} else {
773				/*
774				 * Non-TCP protocol state machine always go
775				 * forwards
776				 */
777				if (st->src.state > up->src.state)
778					sfail = 5;
779				else if (st->dst.state > up->dst.state)
780					sfail = 6;
781			}
782			if (sfail) {
783				if (pf_status.debug >= PF_DEBUG_MISC)
784					printf("pfsync: ignoring stale update "
785					    "(%d) id: %016llx "
786					    "creatorid: %08x\n", sfail,
787#ifdef __FreeBSD__
788					    (unsigned long long)be64toh(st->id),
789#else
790					    betoh64(st->id),
791#endif
792					    ntohl(st->creatorid));
793				pfsyncstats.pfsyncs_badstate++;
794
795				/* we have a better state, send it out */
796				if ((!stale || update_requested) &&
797				    sc->sc_mbuf != NULL) {
798					pfsync_sendout(sc);
799					update_requested = 0;
800				}
801				stale++;
802				if (!st->sync_flags)
803					pfsync_pack_state(PFSYNC_ACT_UPD, st,
804					    PFSYNC_FLAG_STALE);
805				continue;
806			}
807			pf_state_peer_ntoh(&up->src, &st->src);
808			pf_state_peer_ntoh(&up->dst, &st->dst);
809			st->expire = ntohl(up->expire) + time_second;
810			st->timeout = up->timeout;
811		}
812		if ((update_requested || stale) && sc->sc_mbuf)
813			pfsync_sendout(sc);
814#ifdef __FreeBSD__
815		PF_UNLOCK();
816#endif
817		splx(s);
818		break;
819	}
820	case PFSYNC_ACT_DEL_C:
821		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
822		    count * sizeof(*dp), &offp)) == NULL) {
823			pfsyncstats.pfsyncs_badlen++;
824			return;
825		}
826
827		s = splsoftnet();
828#ifdef __FreeBSD__
829		PF_LOCK();
830#endif
831		for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
832		    i < count; i++, dp++) {
833			bcopy(dp->id, &key.id, sizeof(key.id));
834			key.creatorid = dp->creatorid;
835
836			st = pf_find_state_byid(&key);
837			if (st == NULL) {
838				pfsyncstats.pfsyncs_badstate++;
839				continue;
840			}
841			st->timeout = PFTM_PURGE;
842			st->sync_flags |= PFSTATE_FROMSYNC;
843			pf_purge_expired_state(st);
844		}
845#ifdef __FreeBSD__
846		PF_UNLOCK();
847#endif
848		splx(s);
849		break;
850	case PFSYNC_ACT_INS_F:
851	case PFSYNC_ACT_DEL_F:
852		/* not implemented */
853		break;
854	case PFSYNC_ACT_UREQ:
855		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
856		    count * sizeof(*rup), &offp)) == NULL) {
857			pfsyncstats.pfsyncs_badlen++;
858			return;
859		}
860
861		s = splsoftnet();
862#ifdef __FreeBSD__
863		PF_LOCK();
864#endif
865		if (sc->sc_mbuf != NULL)
866			pfsync_sendout(sc);
867		for (i = 0,
868		    rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
869		    i < count; i++, rup++) {
870			bcopy(rup->id, &key.id, sizeof(key.id));
871			key.creatorid = rup->creatorid;
872
873			if (key.id == 0 && key.creatorid == 0) {
874				sc->sc_ureq_received = time_uptime;
875				if (pf_status.debug >= PF_DEBUG_MISC)
876					printf("pfsync: received "
877					    "bulk update request\n");
878				pfsync_send_bus(sc, PFSYNC_BUS_START);
879#ifdef __FreeBSD__
880				callout_reset(&sc->sc_bulk_tmo, 1 * hz,
881				    pfsync_bulk_update,
882				    LIST_FIRST(&pfsync_list));
883#else
884				timeout_add(&sc->sc_bulk_tmo, 1 * hz);
885#endif
886			} else {
887				st = pf_find_state_byid(&key);
888				if (st == NULL) {
889					pfsyncstats.pfsyncs_badstate++;
890					continue;
891				}
892				if (!st->sync_flags)
893					pfsync_pack_state(PFSYNC_ACT_UPD,
894					    st, 0);
895			}
896		}
897		if (sc->sc_mbuf != NULL)
898			pfsync_sendout(sc);
899#ifdef __FreeBSD__
900		PF_UNLOCK();
901#endif
902		splx(s);
903		break;
904	case PFSYNC_ACT_BUS:
905		/* If we're not waiting for a bulk update, who cares. */
906		if (sc->sc_ureq_sent == 0)
907			break;
908
909		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
910		    sizeof(*bus), &offp)) == NULL) {
911			pfsyncstats.pfsyncs_badlen++;
912			return;
913		}
914		bus = (struct pfsync_state_bus *)(mp->m_data + offp);
915		switch (bus->status) {
916		case PFSYNC_BUS_START:
917#ifdef __FreeBSD__
918			callout_reset(&sc->sc_bulkfail_tmo,
919			    pf_pool_limits[PF_LIMIT_STATES].limit /
920			    (PFSYNC_BULKPACKETS * sc->sc_maxcount),
921			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
922#else
923			timeout_add(&sc->sc_bulkfail_tmo,
924			    pf_pool_limits[PF_LIMIT_STATES].limit /
925			    (PFSYNC_BULKPACKETS * sc->sc_maxcount));
926#endif
927			if (pf_status.debug >= PF_DEBUG_MISC)
928				printf("pfsync: received bulk "
929				    "update start\n");
930			break;
931		case PFSYNC_BUS_END:
932			if (time_uptime - ntohl(bus->endtime) >=
933			    sc->sc_ureq_sent) {
934				/* that's it, we're happy */
935				sc->sc_ureq_sent = 0;
936				sc->sc_bulk_tries = 0;
937#ifdef __FreeBSD__
938				callout_stop(&sc->sc_bulkfail_tmo);
939#else
940				timeout_del(&sc->sc_bulkfail_tmo);
941#endif
942#if NCARP > 0	/* XXX_IMPORT */
943				if (!pfsync_sync_ok)
944					carp_suppress_preempt--;
945#endif
946				pfsync_sync_ok = 1;
947				if (pf_status.debug >= PF_DEBUG_MISC)
948					printf("pfsync: received valid "
949					    "bulk update end\n");
950			} else {
951				if (pf_status.debug >= PF_DEBUG_MISC)
952					printf("pfsync: received invalid "
953					    "bulk update end: bad timestamp\n");
954			}
955			break;
956		}
957		break;
958	}
959
960done:
961	if (m)
962		m_freem(m);
963}
964
965int
966pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
967	struct rtentry *rt)
968{
969	m_freem(m);
970	return (0);
971}
972
973/* ARGSUSED */
974int
975pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
976{
977#ifndef __FreeBSD__
978	struct proc *p = curproc;
979#endif
980	struct pfsync_softc *sc = ifp->if_softc;
981	struct ifreq *ifr = (struct ifreq *)data;
982	struct ip_moptions *imo = &sc->sc_imo;
983	struct pfsyncreq pfsyncr;
984	struct ifnet    *sifp;
985	int s, error;
986
987	switch (cmd) {
988	case SIOCSIFADDR:
989	case SIOCAIFADDR:
990	case SIOCSIFDSTADDR:
991	case SIOCSIFFLAGS:
992		if (ifp->if_flags & IFF_UP)
993			ifp->if_flags |= IFF_RUNNING;
994		else
995			ifp->if_flags &= ~IFF_RUNNING;
996		break;
997	case SIOCSIFMTU:
998		if (ifr->ifr_mtu < PFSYNC_MINMTU)
999			return (EINVAL);
1000		if (ifr->ifr_mtu > MCLBYTES)
1001			ifr->ifr_mtu = MCLBYTES;
1002		s = splnet();
1003#ifdef __FreeBSD__
1004		PF_LOCK();
1005#endif
1006		if (ifr->ifr_mtu < ifp->if_mtu) {
1007			pfsync_sendout(sc);
1008		}
1009		pfsync_setmtu(sc, ifr->ifr_mtu);
1010#ifdef __FreeBSD__
1011		PF_UNLOCK();
1012#endif
1013		splx(s);
1014		break;
1015	case SIOCGETPFSYNC:
1016#ifdef __FreeBSD__
1017		/* XXX: read unlocked */
1018#endif
1019		bzero(&pfsyncr, sizeof(pfsyncr));
1020		if (sc->sc_sync_ifp)
1021			strlcpy(pfsyncr.pfsyncr_syncdev,
1022			    sc->sc_sync_ifp->if_xname, IFNAMSIZ);
1023		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1024		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1025		if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
1026			return (error);
1027		break;
1028	case SIOCSETPFSYNC:
1029#ifdef __FreeBSD__
1030		if ((error = suser(curthread)) != 0)
1031#else
1032		if ((error = suser(p, p->p_acflag)) != 0)
1033#endif
1034			return (error);
1035		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1036			return (error);
1037
1038		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1039			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1040		else
1041			sc->sc_sync_peer.s_addr =
1042			    pfsyncr.pfsyncr_syncpeer.s_addr;
1043
1044		if (pfsyncr.pfsyncr_maxupdates > 255)
1045			return (EINVAL);
1046#ifdef __FreeBSD__
1047		callout_drain(&sc->sc_send_tmo);
1048		PF_LOCK();
1049#endif
1050		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1051
1052		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1053			sc->sc_sync_ifp = NULL;
1054			if (sc->sc_mbuf_net != NULL) {
1055				/* Don't keep stale pfsync packets around. */
1056				s = splnet();
1057				m_freem(sc->sc_mbuf_net);
1058				sc->sc_mbuf_net = NULL;
1059				sc->sc_statep_net.s = NULL;
1060				splx(s);
1061			}
1062			if (imo->imo_num_memberships > 0) {
1063				in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1064				imo->imo_multicast_ifp = NULL;
1065			}
1066#ifdef __FreeBSD__
1067			PF_UNLOCK();
1068#endif
1069			break;
1070		}
1071
1072		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1073#ifdef __FreeBSD__
1074			PF_UNLOCK();
1075#endif
1076			return (EINVAL);
1077		}
1078
1079		s = splnet();
1080#ifdef __FreeBSD__
1081		if (sifp->if_mtu < SCP2IFP(sc)->if_mtu ||
1082#else
1083		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1084#endif
1085		    (sc->sc_sync_ifp != NULL &&
1086		    sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
1087		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1088			pfsync_sendout(sc);
1089		sc->sc_sync_ifp = sifp;
1090
1091#ifdef __FreeBSD__
1092		pfsync_setmtu(sc, SCP2IFP(sc)->if_mtu);
1093#else
1094		pfsync_setmtu(sc, sc->sc_if.if_mtu);
1095#endif
1096
1097		if (imo->imo_num_memberships > 0) {
1098			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1099			imo->imo_multicast_ifp = NULL;
1100		}
1101
1102		if (sc->sc_sync_ifp &&
1103		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1104			struct in_addr addr;
1105
1106			if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
1107				sc->sc_sync_ifp = NULL;
1108#ifdef __FreeBSD__
1109				PF_UNLOCK();
1110#endif
1111				splx(s);
1112				return (EADDRNOTAVAIL);
1113			}
1114#ifdef __FreeBSD__
1115			PF_UNLOCK();		/* addmulti mallocs w/ WAITOK */
1116			addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
1117#else
1118			addr.s_addr = INADDR_PFSYNC_GROUP;
1119#endif
1120
1121			if ((imo->imo_membership[0] =
1122			    in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
1123				sc->sc_sync_ifp = NULL;
1124				splx(s);
1125				return (ENOBUFS);
1126			}
1127			imo->imo_num_memberships++;
1128			imo->imo_multicast_ifp = sc->sc_sync_ifp;
1129			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1130			imo->imo_multicast_loop = 0;
1131		}
1132
1133		if (sc->sc_sync_ifp ||
1134		    sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
1135			/* Request a full state table update. */
1136#ifdef __FreeBSD__
1137			PF_LOCK();
1138#endif
1139			sc->sc_ureq_sent = time_uptime;
1140#if NCARP > 0
1141			if (pfsync_sync_ok)
1142				carp_suppress_preempt++;
1143#endif
1144			pfsync_sync_ok = 0;
1145			if (pf_status.debug >= PF_DEBUG_MISC)
1146				printf("pfsync: requesting bulk update\n");
1147#ifdef __FreeBSD__
1148			callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1149			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
1150#else
1151			timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
1152#endif
1153			error = pfsync_request_update(NULL, NULL);
1154			if (error == ENOMEM) {
1155#ifdef __FreeBSD__
1156				PF_UNLOCK();
1157#endif
1158				splx(s);
1159				return (ENOMEM);
1160			}
1161			pfsync_sendout(sc);
1162		}
1163#ifdef __FreeBSD__
1164		PF_UNLOCK();
1165#endif
1166		splx(s);
1167
1168		break;
1169
1170	default:
1171		return (ENOTTY);
1172	}
1173
1174	return (0);
1175}
1176
1177void
1178pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
1179{
1180	int mtu;
1181
1182	if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
1183		mtu = sc->sc_sync_ifp->if_mtu;
1184	else
1185		mtu = mtu_req;
1186
1187	sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
1188	    sizeof(struct pfsync_state);
1189	if (sc->sc_maxcount > 254)
1190	    sc->sc_maxcount = 254;
1191#ifdef __FreeBSD__
1192	SCP2IFP(sc)->if_mtu = sizeof(struct pfsync_header) +
1193	    sc->sc_maxcount * sizeof(struct pfsync_state);
1194#else
1195	sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
1196	    sc->sc_maxcount * sizeof(struct pfsync_state);
1197#endif
1198}
1199
1200struct mbuf *
1201pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
1202{
1203	struct pfsync_header *h;
1204	struct mbuf *m;
1205	int len;
1206
1207#ifdef __FreeBSD__
1208	PF_ASSERT(MA_OWNED);
1209#endif
1210	MGETHDR(m, M_DONTWAIT, MT_DATA);
1211	if (m == NULL) {
1212#ifdef __FreeBSD__
1213		SCP2IFP(sc)->if_oerrors++;
1214#else
1215		sc->sc_if.if_oerrors++;
1216#endif
1217		return (NULL);
1218	}
1219
1220	switch (action) {
1221	case PFSYNC_ACT_CLR:
1222		len = sizeof(struct pfsync_header) +
1223		    sizeof(struct pfsync_state_clr);
1224		break;
1225	case PFSYNC_ACT_UPD_C:
1226		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
1227		    sizeof(struct pfsync_header);
1228		break;
1229	case PFSYNC_ACT_DEL_C:
1230		len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
1231		    sizeof(struct pfsync_header);
1232		break;
1233	case PFSYNC_ACT_UREQ:
1234		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
1235		    sizeof(struct pfsync_header);
1236		break;
1237	case PFSYNC_ACT_BUS:
1238		len = sizeof(struct pfsync_header) +
1239		    sizeof(struct pfsync_state_bus);
1240		break;
1241	default:
1242		len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
1243		    sizeof(struct pfsync_header);
1244		break;
1245	}
1246
1247	if (len > MHLEN) {
1248		MCLGET(m, M_DONTWAIT);
1249		if ((m->m_flags & M_EXT) == 0) {
1250			m_free(m);
1251#ifdef __FreeBSD__
1252			SCP2IFP(sc)->if_oerrors++;
1253#else
1254			sc->sc_if.if_oerrors++;
1255#endif
1256			return (NULL);
1257		}
1258		m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
1259	} else
1260		MH_ALIGN(m, len);
1261
1262	m->m_pkthdr.rcvif = NULL;
1263	m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
1264	h = mtod(m, struct pfsync_header *);
1265	h->version = PFSYNC_VERSION;
1266	h->af = 0;
1267	h->count = 0;
1268	h->action = action;
1269
1270	*sp = (void *)((char *)h + PFSYNC_HDRLEN);
1271#ifdef __FreeBSD__
1272	callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
1273	    LIST_FIRST(&pfsync_list));
1274#else
1275	timeout_add(&sc->sc_tmo, hz);
1276#endif
1277	return (m);
1278}
1279
1280int
1281pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
1282{
1283#ifdef __FreeBSD__
1284	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1285#else
1286	struct ifnet *ifp = &pfsyncif.sc_if;
1287#endif
1288	struct pfsync_softc *sc = ifp->if_softc;
1289	struct pfsync_header *h, *h_net;
1290	struct pfsync_state *sp = NULL;
1291	struct pfsync_state_upd *up = NULL;
1292	struct pfsync_state_del *dp = NULL;
1293	struct pf_rule *r;
1294	u_long secs;
1295	int s, ret = 0;
1296	u_int8_t i = 255, newaction = 0;
1297
1298#ifdef __FreeBSD__
1299	PF_ASSERT(MA_OWNED);
1300#endif
1301	/*
1302	 * If a packet falls in the forest and there's nobody around to
1303	 * hear, does it make a sound?
1304	 */
1305	if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
1306	    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1307		/* Don't leave any stale pfsync packets hanging around. */
1308		if (sc->sc_mbuf != NULL) {
1309			m_freem(sc->sc_mbuf);
1310			sc->sc_mbuf = NULL;
1311			sc->sc_statep.s = NULL;
1312		}
1313		return (0);
1314	}
1315
1316	if (action >= PFSYNC_ACT_MAX)
1317		return (EINVAL);
1318
1319	s = splnet();
1320	if (sc->sc_mbuf == NULL) {
1321		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1322		    (void *)&sc->sc_statep.s)) == NULL) {
1323			splx(s);
1324			return (ENOMEM);
1325		}
1326		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1327	} else {
1328		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1329		if (h->action != action) {
1330			pfsync_sendout(sc);
1331			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1332			    (void *)&sc->sc_statep.s)) == NULL) {
1333				splx(s);
1334				return (ENOMEM);
1335			}
1336			h = mtod(sc->sc_mbuf, struct pfsync_header *);
1337		} else {
1338			/*
1339			 * If it's an update, look in the packet to see if
1340			 * we already have an update for the state.
1341			 */
1342			if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
1343				struct pfsync_state *usp =
1344				    (void *)((char *)h + PFSYNC_HDRLEN);
1345
1346				for (i = 0; i < h->count; i++) {
1347					if (!memcmp(usp->id, &st->id,
1348					    PFSYNC_ID_LEN) &&
1349					    usp->creatorid == st->creatorid) {
1350						sp = usp;
1351						sp->updates++;
1352						break;
1353					}
1354					usp++;
1355				}
1356			}
1357		}
1358	}
1359
1360	secs = time_second;
1361
1362	st->pfsync_time = time_uptime;
1363	TAILQ_REMOVE(&state_updates, st, u.s.entry_updates);
1364	TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates);
1365
1366	if (sp == NULL) {
1367		/* not a "duplicate" update */
1368		i = 255;
1369		sp = sc->sc_statep.s++;
1370		sc->sc_mbuf->m_pkthdr.len =
1371		    sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
1372		h->count++;
1373		bzero(sp, sizeof(*sp));
1374
1375		bcopy(&st->id, sp->id, sizeof(sp->id));
1376		sp->creatorid = st->creatorid;
1377
1378		strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname));
1379		pf_state_host_hton(&st->lan, &sp->lan);
1380		pf_state_host_hton(&st->gwy, &sp->gwy);
1381		pf_state_host_hton(&st->ext, &sp->ext);
1382
1383		bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
1384
1385		sp->creation = htonl(secs - st->creation);
1386		sp->packets[0] = htonl(st->packets[0]);
1387		sp->packets[1] = htonl(st->packets[1]);
1388		sp->bytes[0] = htonl(st->bytes[0]);
1389		sp->bytes[1] = htonl(st->bytes[1]);
1390		if ((r = st->rule.ptr) == NULL)
1391			sp->rule = htonl(-1);
1392		else
1393			sp->rule = htonl(r->nr);
1394		if ((r = st->anchor.ptr) == NULL)
1395			sp->anchor = htonl(-1);
1396		else
1397			sp->anchor = htonl(r->nr);
1398		sp->af = st->af;
1399		sp->proto = st->proto;
1400		sp->direction = st->direction;
1401		sp->log = st->log;
1402		sp->allow_opts = st->allow_opts;
1403		sp->timeout = st->timeout;
1404
1405		if (flags & PFSYNC_FLAG_STALE)
1406			sp->sync_flags |= PFSTATE_STALE;
1407	}
1408
1409	pf_state_peer_hton(&st->src, &sp->src);
1410	pf_state_peer_hton(&st->dst, &sp->dst);
1411
1412	if (st->expire <= secs)
1413		sp->expire = htonl(0);
1414	else
1415		sp->expire = htonl(st->expire - secs);
1416
1417	/* do we need to build "compressed" actions for network transfer? */
1418	if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
1419		switch (action) {
1420		case PFSYNC_ACT_UPD:
1421			newaction = PFSYNC_ACT_UPD_C;
1422			break;
1423		case PFSYNC_ACT_DEL:
1424			newaction = PFSYNC_ACT_DEL_C;
1425			break;
1426		default:
1427			/* by default we just send the uncompressed states */
1428			break;
1429		}
1430	}
1431
1432	if (newaction) {
1433		if (sc->sc_mbuf_net == NULL) {
1434			if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
1435			    (void *)&sc->sc_statep_net.s)) == NULL) {
1436				splx(s);
1437				return (ENOMEM);
1438			}
1439		}
1440		h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
1441
1442		switch (newaction) {
1443		case PFSYNC_ACT_UPD_C:
1444			if (i != 255) {
1445				up = (void *)((char *)h_net +
1446				    PFSYNC_HDRLEN + (i * sizeof(*up)));
1447				up->updates++;
1448			} else {
1449				h_net->count++;
1450				sc->sc_mbuf_net->m_pkthdr.len =
1451				    sc->sc_mbuf_net->m_len += sizeof(*up);
1452				up = sc->sc_statep_net.u++;
1453
1454				bzero(up, sizeof(*up));
1455				bcopy(&st->id, up->id, sizeof(up->id));
1456				up->creatorid = st->creatorid;
1457			}
1458			up->timeout = st->timeout;
1459			up->expire = sp->expire;
1460			up->src = sp->src;
1461			up->dst = sp->dst;
1462			break;
1463		case PFSYNC_ACT_DEL_C:
1464			sc->sc_mbuf_net->m_pkthdr.len =
1465			    sc->sc_mbuf_net->m_len += sizeof(*dp);
1466			dp = sc->sc_statep_net.d++;
1467			h_net->count++;
1468
1469			bzero(dp, sizeof(*dp));
1470			bcopy(&st->id, dp->id, sizeof(dp->id));
1471			dp->creatorid = st->creatorid;
1472			break;
1473		}
1474	}
1475
1476	if (h->count == sc->sc_maxcount ||
1477	    (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
1478		ret = pfsync_sendout(sc);
1479
1480	splx(s);
1481	return (ret);
1482}
1483
1484/* This must be called in splnet() */
1485int
1486pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
1487{
1488#ifdef __FreeBSD__
1489	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1490#else
1491	struct ifnet *ifp = &pfsyncif.sc_if;
1492#endif
1493	struct pfsync_header *h;
1494	struct pfsync_softc *sc = ifp->if_softc;
1495	struct pfsync_state_upd_req *rup;
1496	int ret = 0;
1497
1498#ifdef __FreeBSD__
1499	PF_ASSERT(MA_OWNED);
1500#endif
1501	if (sc->sc_mbuf == NULL) {
1502		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1503		    (void *)&sc->sc_statep.s)) == NULL)
1504			return (ENOMEM);
1505		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1506	} else {
1507		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1508		if (h->action != PFSYNC_ACT_UREQ) {
1509			pfsync_sendout(sc);
1510			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1511			    (void *)&sc->sc_statep.s)) == NULL)
1512				return (ENOMEM);
1513			h = mtod(sc->sc_mbuf, struct pfsync_header *);
1514		}
1515	}
1516
1517	if (src != NULL)
1518		sc->sc_sendaddr = *src;
1519	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
1520	h->count++;
1521	rup = sc->sc_statep.r++;
1522	bzero(rup, sizeof(*rup));
1523	if (up != NULL) {
1524		bcopy(up->id, rup->id, sizeof(rup->id));
1525		rup->creatorid = up->creatorid;
1526	}
1527
1528	if (h->count == sc->sc_maxcount)
1529		ret = pfsync_sendout(sc);
1530
1531	return (ret);
1532}
1533
1534int
1535pfsync_clear_states(u_int32_t creatorid, char *ifname)
1536{
1537#ifdef __FreeBSD__
1538	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1539#else
1540	struct ifnet *ifp = &pfsyncif.sc_if;
1541#endif
1542	struct pfsync_softc *sc = ifp->if_softc;
1543	struct pfsync_state_clr *cp;
1544	int s, ret;
1545
1546	s = splnet();
1547#ifdef __FreeBSD__
1548	PF_ASSERT(MA_OWNED);
1549#endif
1550	if (sc->sc_mbuf != NULL)
1551		pfsync_sendout(sc);
1552	if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
1553	    (void *)&sc->sc_statep.c)) == NULL) {
1554		splx(s);
1555		return (ENOMEM);
1556	}
1557	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
1558	cp = sc->sc_statep.c;
1559	cp->creatorid = creatorid;
1560	if (ifname != NULL)
1561		strlcpy(cp->ifname, ifname, IFNAMSIZ);
1562
1563	ret = (pfsync_sendout(sc));
1564	splx(s);
1565	return (ret);
1566}
1567
1568void
1569pfsync_timeout(void *v)
1570{
1571	struct pfsync_softc *sc = v;
1572	int s;
1573
1574	s = splnet();
1575#ifdef __FreeBSD__
1576	PF_LOCK();
1577#endif
1578	pfsync_sendout(sc);
1579#ifdef __FreeBSD__
1580	PF_UNLOCK();
1581#endif
1582	splx(s);
1583}
1584
1585/* This must be called in splnet() */
1586void
1587pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
1588{
1589	struct pfsync_state_bus *bus;
1590
1591#ifdef __FreeBSD__
1592	PF_ASSERT(MA_OWNED);
1593#endif
1594	if (sc->sc_mbuf != NULL)
1595		pfsync_sendout(sc);
1596
1597	if (pfsync_sync_ok &&
1598	    (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
1599	    (void *)&sc->sc_statep.b)) != NULL) {
1600		sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
1601		bus = sc->sc_statep.b;
1602		bus->creatorid = pf_status.hostid;
1603		bus->status = status;
1604		bus->endtime = htonl(time_uptime - sc->sc_ureq_received);
1605		pfsync_sendout(sc);
1606	}
1607}
1608
1609void
1610pfsync_bulk_update(void *v)
1611{
1612	struct pfsync_softc *sc = v;
1613	int s, i = 0;
1614	struct pf_state *state;
1615
1616#ifdef __FreeBSD__
1617	PF_LOCK();
1618#endif
1619	s = splnet();
1620	if (sc->sc_mbuf != NULL)
1621		pfsync_sendout(sc);
1622
1623	/*
1624	 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
1625	 * been sent since the latest request was made.
1626	 */
1627	while ((state = TAILQ_FIRST(&state_updates)) != NULL &&
1628	    ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) {
1629		if (state->pfsync_time > sc->sc_ureq_received) {
1630			/* we're done */
1631			pfsync_send_bus(sc, PFSYNC_BUS_END);
1632			sc->sc_ureq_received = 0;
1633#ifdef __FreeBSD__
1634			callout_stop(&sc->sc_bulk_tmo);
1635#else
1636			timeout_del(&sc->sc_bulk_tmo);
1637#endif
1638			if (pf_status.debug >= PF_DEBUG_MISC)
1639				printf("pfsync: bulk update complete\n");
1640			break;
1641		} else {
1642			/* send an update and move to end of list */
1643			if (!state->sync_flags)
1644				pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
1645			state->pfsync_time = time_uptime;
1646			TAILQ_REMOVE(&state_updates, state, u.s.entry_updates);
1647			TAILQ_INSERT_TAIL(&state_updates, state,
1648			    u.s.entry_updates);
1649
1650			/* look again for more in a bit */
1651#ifdef __FreeBSD__
1652			callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
1653			    LIST_FIRST(&pfsync_list));
1654#else
1655			timeout_add(&sc->sc_bulk_tmo, 1);
1656#endif
1657		}
1658	}
1659	if (sc->sc_mbuf != NULL)
1660		pfsync_sendout(sc);
1661	splx(s);
1662#ifdef __FreeBSD__
1663	PF_UNLOCK();
1664#endif
1665}
1666
1667void
1668pfsync_bulkfail(void *v)
1669{
1670	struct pfsync_softc *sc = v;
1671	int s, error;
1672
1673#ifdef __FreeBSD__
1674	PF_LOCK();
1675#endif
1676	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
1677		/* Try again in a bit */
1678#ifdef __FreeBSD__
1679		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
1680		    LIST_FIRST(&pfsync_list));
1681#else
1682		timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
1683#endif
1684		s = splnet();
1685		error = pfsync_request_update(NULL, NULL);
1686		if (error == ENOMEM) {
1687			if (pf_status.debug >= PF_DEBUG_MISC)
1688				printf("pfsync: cannot allocate mbufs for "
1689				    "bulk update\n");
1690		} else
1691			pfsync_sendout(sc);
1692		splx(s);
1693	} else {
1694		/* Pretend like the transfer was ok */
1695		sc->sc_ureq_sent = 0;
1696		sc->sc_bulk_tries = 0;
1697#if NCARP > 0
1698		if (!pfsync_sync_ok)
1699			carp_suppress_preempt--;
1700#endif
1701		pfsync_sync_ok = 1;
1702		if (pf_status.debug >= PF_DEBUG_MISC)
1703			printf("pfsync: failed to receive "
1704			    "bulk update status\n");
1705#ifdef __FreeBSD__
1706		callout_stop(&sc->sc_bulkfail_tmo);
1707#else
1708		timeout_del(&sc->sc_bulkfail_tmo);
1709#endif
1710	}
1711#ifdef __FreeBSD__
1712	PF_UNLOCK();
1713#endif
1714}
1715
1716/* This must be called in splnet() */
1717int
1718pfsync_sendout(sc)
1719	struct pfsync_softc *sc;
1720{
1721#if NBPFILTER > 0
1722# ifdef __FreeBSD__
1723	struct ifnet *ifp = SCP2IFP(sc);
1724# else
1725	struct ifnet *ifp = &sc->if_sc;
1726# endif
1727#endif
1728	struct mbuf *m;
1729
1730#ifdef __FreeBSD__
1731	PF_ASSERT(MA_OWNED);
1732	callout_stop(&sc->sc_tmo);
1733#else
1734	timeout_del(&sc->sc_tmo);
1735#endif
1736
1737	if (sc->sc_mbuf == NULL)
1738		return (0);
1739	m = sc->sc_mbuf;
1740	sc->sc_mbuf = NULL;
1741	sc->sc_statep.s = NULL;
1742
1743#ifdef __FreeBSD__
1744	KASSERT(m != NULL, ("pfsync_sendout: null mbuf"));
1745#endif
1746#if NBPFILTER > 0
1747	if (ifp->if_bpf)
1748		bpf_mtap(ifp->if_bpf, m);
1749#endif
1750
1751	if (sc->sc_mbuf_net) {
1752		m_freem(m);
1753		m = sc->sc_mbuf_net;
1754		sc->sc_mbuf_net = NULL;
1755		sc->sc_statep_net.s = NULL;
1756	}
1757
1758	if (sc->sc_sync_ifp || sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
1759		struct ip *ip;
1760		struct sockaddr sa;
1761
1762		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
1763		if (m == NULL) {
1764			pfsyncstats.pfsyncs_onomem++;
1765			return (0);
1766		}
1767		ip = mtod(m, struct ip *);
1768		ip->ip_v = IPVERSION;
1769		ip->ip_hl = sizeof(*ip) >> 2;
1770		ip->ip_tos = IPTOS_LOWDELAY;
1771#ifdef __FreeBSD__
1772		ip->ip_len = m->m_pkthdr.len;
1773#else
1774		ip->ip_len = htons(m->m_pkthdr.len);
1775#endif
1776		ip->ip_id = htons(ip_randomid());
1777#ifdef __FreeBSD__
1778		ip->ip_off = IP_DF;
1779#else
1780		ip->ip_off = htons(IP_DF);
1781#endif
1782		ip->ip_ttl = PFSYNC_DFLTTL;
1783		ip->ip_p = IPPROTO_PFSYNC;
1784		ip->ip_sum = 0;
1785
1786		bzero(&sa, sizeof(sa));
1787		ip->ip_src.s_addr = INADDR_ANY;
1788
1789#ifdef __FreeBSD__
1790		if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP))
1791#else
1792		if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
1793#endif
1794			m->m_flags |= M_MCAST;
1795		ip->ip_dst = sc->sc_sendaddr;
1796#ifdef __FreeBSD__
1797		/* XXX_IMPORT */
1798		sc->sc_sendaddr.s_addr = htonl(sc->sc_sync_peer.s_addr);
1799#else
1800		sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
1801#endif
1802
1803		pfsyncstats.pfsyncs_opackets++;
1804#ifdef __FreeBSD__
1805		if (IF_HANDOFF(&sc->sc_ifq, m, NULL))
1806			pfsyncstats.pfsyncs_oerrors++;
1807		callout_reset(&sc->sc_send_tmo, 1, pfsync_senddef, sc);
1808#else
1809		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1810			pfsyncstats.pfsyncs_oerrors++;
1811#endif
1812	} else
1813		m_freem(m);
1814
1815	return (0);
1816}
1817
1818#ifdef __FreeBSD__
1819static void
1820pfsync_senddef(void *arg)
1821{
1822	struct pfsync_softc *sc = (struct pfsync_softc *)arg;
1823	struct mbuf *m;
1824
1825	for(;;) {
1826		IF_DEQUEUE(&sc->sc_ifq, m);
1827		if (m == NULL)
1828			break;
1829		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1830			pfsyncstats.pfsyncs_oerrors++;
1831	}
1832}
1833
1834static int
1835pfsync_modevent(module_t mod, int type, void *data)
1836{
1837	int error = 0;
1838
1839	switch (type) {
1840	case MOD_LOAD:
1841		LIST_INIT(&pfsync_list);
1842		if_clone_attach(&pfsync_cloner);
1843		break;
1844
1845	case MOD_UNLOAD:
1846		if_clone_detach(&pfsync_cloner);
1847		while (!LIST_EMPTY(&pfsync_list))
1848			pfsync_clone_destroy(
1849			    SCP2IFP(LIST_FIRST(&pfsync_list)));
1850		break;
1851
1852	default:
1853		error = EINVAL;
1854		break;
1855	}
1856
1857	return error;
1858}
1859
1860static moduledata_t pfsync_mod = {
1861	"pfsync",
1862	pfsync_modevent,
1863	0
1864};
1865
1866#define PFSYNC_MODVER 1
1867
1868DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
1869MODULE_VERSION(pfsync, PFSYNC_MODVER);
1870#endif /* __FreeBSD__ */
1871