if_pfsync.c revision 160195
1/*	$FreeBSD: head/sys/contrib/pf/net/if_pfsync.c 160195 2006-07-09 06:04:01Z sam $	*/
2/*	$OpenBSD: if_pfsync.c,v 1.46 2005/02/20 15:58:38 mcbride Exp $	*/
3
4/*
5 * Copyright (c) 2002 Michael Shalayeff
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#ifdef __FreeBSD__
31#include "opt_inet.h"
32#include "opt_inet6.h"
33#endif
34
35#ifndef __FreeBSD__
36#include "bpfilter.h"
37#include "pfsync.h"
38#elif __FreeBSD__ >= 5
39#include "opt_bpf.h"
40#include "opt_pf.h"
41
42#ifdef DEV_BPF
43#define	NBPFILTER	DEV_BPF
44#else
45#define	NBPFILTER	0
46#endif
47
48#ifdef DEV_PFSYNC
49#define	NPFSYNC		DEV_PFSYNC
50#else
51#define	NPFSYNC		0
52#endif
53
54#endif
55
56#include <sys/param.h>
57#include <sys/proc.h>
58#include <sys/systm.h>
59#include <sys/time.h>
60#include <sys/mbuf.h>
61#include <sys/socket.h>
62#include <sys/kernel.h>
63#ifdef __FreeBSD__
64#include <sys/endian.h>
65#include <sys/malloc.h>
66#include <sys/module.h>
67#include <sys/sockio.h>
68#include <sys/lock.h>
69#include <sys/mutex.h>
70#include <sys/sysctl.h>
71#else
72#include <sys/ioctl.h>
73#include <sys/timeout.h>
74#endif
75
76#include <net/if.h>
77#if defined(__FreeBSD__)
78#include <net/if_clone.h>
79#endif
80#include <net/if_types.h>
81#include <net/route.h>
82#include <net/bpf.h>
83#include <netinet/tcp.h>
84#include <netinet/tcp_seq.h>
85
86#ifdef	INET
87#include <netinet/in.h>
88#include <netinet/in_systm.h>
89#include <netinet/in_var.h>
90#include <netinet/ip.h>
91#include <netinet/ip_var.h>
92#endif
93
94#ifdef INET6
95#ifndef INET
96#include <netinet/in.h>
97#endif
98#include <netinet6/nd6.h>
99#endif /* INET6 */
100
101#ifdef __FreeBSD__
102#include "opt_carp.h"
103#ifdef DEV_CARP
104#define	NCARP	1
105#else
106#define	NCARP	0
107#endif
108#else
109#include "carp.h"
110#endif
111#if NCARP > 0
112extern int carp_suppress_preempt;
113#endif
114
115#include <net/pfvar.h>
116#include <net/if_pfsync.h>
117
118#ifdef __FreeBSD__
119#define	PFSYNCNAME	"pfsync"
120#endif
121
122#define PFSYNC_MINMTU	\
123    (sizeof(struct pfsync_header) + sizeof(struct pf_state))
124
125#ifdef PFSYNCDEBUG
126#define DPRINTF(x)    do { if (pfsyncdebug) printf x ; } while (0)
127int pfsyncdebug;
128#else
129#define DPRINTF(x)
130#endif
131
132#ifndef __FreeBSD__
133struct pfsync_softc	pfsyncif;
134#endif
135struct pfsyncstats	pfsyncstats;
136#ifdef __FreeBSD__
137SYSCTL_DECL(_net_inet_pfsync);
138SYSCTL_STRUCT(_net_inet_pfsync, 0, stats, CTLFLAG_RW,
139    &pfsyncstats, pfsyncstats,
140    "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
141
142/*
143 * Locking notes:
144 * Whenever we really touch/look at the state table we have to hold the
145 * PF_LOCK. Functions that do just the interface handling, grab the per
146 * softc lock instead.
147 *
148 */
149
150static void	pfsync_clone_destroy(struct ifnet *);
151static int	pfsync_clone_create(struct if_clone *, int, caddr_t params);
152static void	pfsync_senddef(void *);
153#else
154void	pfsyncattach(int);
155#endif
156void	pfsync_setmtu(struct pfsync_softc *, int);
157int	pfsync_insert_net_state(struct pfsync_state *);
158int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
159	    struct rtentry *);
160int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
161void	pfsyncstart(struct ifnet *);
162
163struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
164int	pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
165int	pfsync_sendout(struct pfsync_softc *);
166void	pfsync_timeout(void *);
167void	pfsync_send_bus(struct pfsync_softc *, u_int8_t);
168void	pfsync_bulk_update(void *);
169void	pfsync_bulkfail(void *);
170
171int	pfsync_sync_ok;
172#ifndef __FreeBSD__
173extern int ifqmaxlen;
174extern struct timeval time;
175extern struct timeval mono_time;
176extern int hz;
177#endif
178
179#ifdef __FreeBSD__
180static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface");
181static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list;
182#define	SCP2IFP(sc)		((sc)->sc_ifp)
183IFC_SIMPLE_DECLARE(pfsync, 1);
184
185static void
186pfsync_clone_destroy(struct ifnet *ifp)
187{
188        struct pfsync_softc *sc;
189
190	sc = ifp->if_softc;
191	callout_stop(&sc->sc_tmo);
192	callout_stop(&sc->sc_bulk_tmo);
193	callout_stop(&sc->sc_bulkfail_tmo);
194
195	callout_stop(&sc->sc_send_tmo);
196
197#if NBPFILTER > 0
198        bpfdetach(ifp);
199#endif
200        if_detach(ifp);
201	if_free(ifp);
202        LIST_REMOVE(sc, sc_next);
203        free(sc->sc_imo.imo_membership, M_PFSYNC);
204        free(sc, M_PFSYNC);
205}
206
207static int
208#ifdef __FreeBSD__
209pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t params)
210#else
211pfsync_clone_create(struct if_clone *ifc, int unit)
212#endif
213{
214	struct pfsync_softc *sc;
215	struct ifnet *ifp;
216
217	MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC,
218	    M_WAITOK|M_ZERO);
219	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
220	if (ifp == NULL) {
221		free(sc, M_PFSYNC);
222		return (ENOSPC);
223	}
224
225	pfsync_sync_ok = 1;
226	sc->sc_mbuf = NULL;
227	sc->sc_mbuf_net = NULL;
228	sc->sc_statep.s = NULL;
229	sc->sc_statep_net.s = NULL;
230	sc->sc_maxupdates = 128;
231	sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
232	sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
233	sc->sc_ureq_received = 0;
234	sc->sc_ureq_sent = 0;
235	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
236	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC,
237	    M_WAITOK);
238	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
239
240	ifp = SCP2IFP(sc);
241	if_initname(ifp, ifc->ifc_name, unit);
242	ifp->if_ioctl = pfsyncioctl;
243	ifp->if_output = pfsyncoutput;
244	ifp->if_start = pfsyncstart;
245	ifp->if_snd.ifq_maxlen = ifqmaxlen;
246	ifp->if_hdrlen = PFSYNC_HDRLEN;
247	ifp->if_baudrate = IF_Mbps(100);
248	ifp->if_softc = sc;
249	pfsync_setmtu(sc, MCLBYTES);
250	callout_init(&sc->sc_tmo, NET_CALLOUT_MPSAFE);
251	callout_init(&sc->sc_bulk_tmo, NET_CALLOUT_MPSAFE);
252	callout_init(&sc->sc_bulkfail_tmo, NET_CALLOUT_MPSAFE);
253	callout_init(&sc->sc_send_tmo, NET_CALLOUT_MPSAFE);
254	sc->sc_ifq.ifq_maxlen = ifqmaxlen;
255	mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
256	    MTX_DEF);
257	if_attach(ifp);
258
259	LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
260#if NBPFILTER > 0
261	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
262#endif
263
264	return (0);
265}
266#else /* !__FreeBSD__ */
267void
268pfsyncattach(int npfsync)
269{
270	struct ifnet *ifp;
271
272	pfsync_sync_ok = 1;
273	bzero(&pfsyncif, sizeof(pfsyncif));
274	pfsyncif.sc_mbuf = NULL;
275	pfsyncif.sc_mbuf_net = NULL;
276	pfsyncif.sc_statep.s = NULL;
277	pfsyncif.sc_statep_net.s = NULL;
278	pfsyncif.sc_maxupdates = 128;
279	pfsyncif.sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
280	pfsyncif.sc_sendaddr.s_addr = INADDR_PFSYNC_GROUP;
281	pfsyncif.sc_ureq_received = 0;
282	pfsyncif.sc_ureq_sent = 0;
283	ifp = &pfsyncif.sc_if;
284	strlcpy(ifp->if_xname, "pfsync0", sizeof ifp->if_xname);
285	ifp->if_softc = &pfsyncif;
286	ifp->if_ioctl = pfsyncioctl;
287	ifp->if_output = pfsyncoutput;
288	ifp->if_start = pfsyncstart;
289	ifp->if_type = IFT_PFSYNC;
290	ifp->if_snd.ifq_maxlen = ifqmaxlen;
291	ifp->if_hdrlen = PFSYNC_HDRLEN;
292	pfsync_setmtu(&pfsyncif, MCLBYTES);
293	timeout_set(&pfsyncif.sc_tmo, pfsync_timeout, &pfsyncif);
294	timeout_set(&pfsyncif.sc_bulk_tmo, pfsync_bulk_update, &pfsyncif);
295	timeout_set(&pfsyncif.sc_bulkfail_tmo, pfsync_bulkfail, &pfsyncif);
296	if_attach(ifp);
297	if_alloc_sadl(ifp);
298
299#if NBPFILTER > 0
300	bpfattach(&pfsyncif.sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
301#endif
302}
303#endif
304
305/*
306 * Start output on the pfsync interface.
307 */
308void
309pfsyncstart(struct ifnet *ifp)
310{
311#ifdef __FreeBSD__
312	IF_LOCK(&ifp->if_snd);
313	_IF_DROP(&ifp->if_snd);
314	_IF_DRAIN(&ifp->if_snd);
315	IF_UNLOCK(&ifp->if_snd);
316#else
317	struct mbuf *m;
318	int s;
319
320	for (;;) {
321		s = splimp();
322		IF_DROP(&ifp->if_snd);
323		IF_DEQUEUE(&ifp->if_snd, m);
324		splx(s);
325
326		if (m == NULL)
327			return;
328		else
329			m_freem(m);
330	}
331#endif
332}
333
334int
335pfsync_insert_net_state(struct pfsync_state *sp)
336{
337	struct pf_state	*st = NULL;
338	struct pf_rule *r = NULL;
339	struct pfi_kif	*kif;
340
341#ifdef __FreeBSD__
342	PF_ASSERT(MA_OWNED);
343#endif
344	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
345		printf("pfsync_insert_net_state: invalid creator id:"
346		    " %08x\n", ntohl(sp->creatorid));
347		return (EINVAL);
348	}
349
350	kif = pfi_lookup_create(sp->ifname);
351	if (kif == NULL) {
352		if (pf_status.debug >= PF_DEBUG_MISC)
353			printf("pfsync_insert_net_state: "
354			    "unknown interface: %s\n", sp->ifname);
355		/* skip this state */
356		return (0);
357	}
358
359	/*
360	 * Just use the default rule until we have infrastructure to find the
361	 * best matching rule.
362	 */
363	r = &pf_default_rule;
364
365	if (!r->max_states || r->states < r->max_states)
366		st = pool_get(&pf_state_pl, PR_NOWAIT);
367	if (st == NULL) {
368		pfi_maybe_destroy(kif);
369		return (ENOMEM);
370	}
371	bzero(st, sizeof(*st));
372
373	st->rule.ptr = r;
374	/* XXX get pointers to nat_rule and anchor */
375
376	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
377	r->states++;
378
379	/* fill in the rest of the state entry */
380	pf_state_host_ntoh(&sp->lan, &st->lan);
381	pf_state_host_ntoh(&sp->gwy, &st->gwy);
382	pf_state_host_ntoh(&sp->ext, &st->ext);
383
384	pf_state_peer_ntoh(&sp->src, &st->src);
385	pf_state_peer_ntoh(&sp->dst, &st->dst);
386
387	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
388	st->creation = time_second - ntohl(sp->creation);
389	st->expire = ntohl(sp->expire) + time_second;
390
391	st->af = sp->af;
392	st->proto = sp->proto;
393	st->direction = sp->direction;
394	st->log = sp->log;
395	st->timeout = sp->timeout;
396	st->allow_opts = sp->allow_opts;
397
398	bcopy(sp->id, &st->id, sizeof(st->id));
399	st->creatorid = sp->creatorid;
400	st->sync_flags = PFSTATE_FROMSYNC;
401
402
403	if (pf_insert_state(kif, st)) {
404		pfi_maybe_destroy(kif);
405		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
406		r->states--;
407		pool_put(&pf_state_pl, st);
408		return (EINVAL);
409	}
410
411	return (0);
412}
413
414void
415#ifdef __FreeBSD__
416pfsync_input(struct mbuf *m, __unused int off)
417#else
418pfsync_input(struct mbuf *m, ...)
419#endif
420{
421	struct ip *ip = mtod(m, struct ip *);
422	struct pfsync_header *ph;
423#ifdef __FreeBSD__
424	struct pfsync_softc *sc = LIST_FIRST(&pfsync_list);
425#else
426	struct pfsync_softc *sc = &pfsyncif;
427#endif
428	struct pf_state *st, key;
429	struct pfsync_state *sp;
430	struct pfsync_state_upd *up;
431	struct pfsync_state_del *dp;
432	struct pfsync_state_clr *cp;
433	struct pfsync_state_upd_req *rup;
434	struct pfsync_state_bus *bus;
435	struct in_addr src;
436	struct mbuf *mp;
437	int iplen, action, error, i, s, count, offp, sfail, stale = 0;
438
439	pfsyncstats.pfsyncs_ipackets++;
440
441	/* verify that we have a sync interface configured */
442	if (!sc->sc_sync_ifp || !pf_status.running) /* XXX PF_LOCK? */
443		goto done;
444
445	/* verify that the packet came in on the right interface */
446	if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
447		pfsyncstats.pfsyncs_badif++;
448		goto done;
449	}
450
451	/* verify that the IP TTL is 255.  */
452	if (ip->ip_ttl != PFSYNC_DFLTTL) {
453		pfsyncstats.pfsyncs_badttl++;
454		goto done;
455	}
456
457	iplen = ip->ip_hl << 2;
458
459	if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
460		pfsyncstats.pfsyncs_hdrops++;
461		goto done;
462	}
463
464	if (iplen + sizeof(*ph) > m->m_len) {
465		if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
466			pfsyncstats.pfsyncs_hdrops++;
467			goto done;
468		}
469		ip = mtod(m, struct ip *);
470	}
471	ph = (struct pfsync_header *)((char *)ip + iplen);
472
473	/* verify the version */
474	if (ph->version != PFSYNC_VERSION) {
475		pfsyncstats.pfsyncs_badver++;
476		goto done;
477	}
478
479	action = ph->action;
480	count = ph->count;
481
482	/* make sure it's a valid action code */
483	if (action >= PFSYNC_ACT_MAX) {
484		pfsyncstats.pfsyncs_badact++;
485		goto done;
486	}
487
488	/* Cheaper to grab this now than having to mess with mbufs later */
489	src = ip->ip_src;
490
491	switch (action) {
492	case PFSYNC_ACT_CLR: {
493		struct pf_state *nexts;
494		struct pfi_kif	*kif;
495		u_int32_t creatorid;
496		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
497		    sizeof(*cp), &offp)) == NULL) {
498			pfsyncstats.pfsyncs_badlen++;
499			return;
500		}
501		cp = (struct pfsync_state_clr *)(mp->m_data + offp);
502		creatorid = cp->creatorid;
503
504		s = splsoftnet();
505#ifdef __FreeBSD__
506		PF_LOCK();
507#endif
508		if (cp->ifname[0] == '\0') {
509			for (st = RB_MIN(pf_state_tree_id, &tree_id);
510			    st; st = nexts) {
511                		nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
512				if (st->creatorid == creatorid) {
513					st->timeout = PFTM_PURGE;
514					pf_purge_expired_state(st);
515				}
516			}
517		} else {
518			kif = pfi_lookup_if(cp->ifname);
519			if (kif == NULL) {
520				if (pf_status.debug >= PF_DEBUG_MISC)
521					printf("pfsync_input: PFSYNC_ACT_CLR "
522					    "bad interface: %s\n", cp->ifname);
523				splx(s);
524#ifdef __FreeBSD__
525				PF_UNLOCK();
526#endif
527				goto done;
528			}
529			for (st = RB_MIN(pf_state_tree_lan_ext,
530			    &kif->pfik_lan_ext); st; st = nexts) {
531				nexts = RB_NEXT(pf_state_tree_lan_ext,
532				    &kif->pfik_lan_ext, st);
533				if (st->creatorid == creatorid) {
534					st->timeout = PFTM_PURGE;
535					pf_purge_expired_state(st);
536				}
537			}
538		}
539#ifdef __FreeBSD__
540		PF_UNLOCK();
541#endif
542		splx(s);
543
544		break;
545	}
546	case PFSYNC_ACT_INS:
547		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
548		    count * sizeof(*sp), &offp)) == NULL) {
549			pfsyncstats.pfsyncs_badlen++;
550			return;
551		}
552
553		s = splsoftnet();
554#ifdef __FreeBSD__
555		PF_LOCK();
556#endif
557		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
558		    i < count; i++, sp++) {
559			/* check for invalid values */
560			if (sp->timeout >= PFTM_MAX ||
561			    sp->src.state > PF_TCPS_PROXY_DST ||
562			    sp->dst.state > PF_TCPS_PROXY_DST ||
563			    sp->direction > PF_OUT ||
564			    (sp->af != AF_INET && sp->af != AF_INET6)) {
565				if (pf_status.debug >= PF_DEBUG_MISC)
566					printf("pfsync_insert: PFSYNC_ACT_INS: "
567					    "invalid value\n");
568				pfsyncstats.pfsyncs_badstate++;
569				continue;
570			}
571
572			if ((error = pfsync_insert_net_state(sp))) {
573				if (error == ENOMEM) {
574					splx(s);
575#ifdef __FreeBSD__
576					PF_UNLOCK();
577#endif
578					goto done;
579				}
580				continue;
581			}
582		}
583#ifdef __FreeBSD__
584		PF_UNLOCK();
585#endif
586		splx(s);
587		break;
588	case PFSYNC_ACT_UPD:
589		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
590		    count * sizeof(*sp), &offp)) == NULL) {
591			pfsyncstats.pfsyncs_badlen++;
592			return;
593		}
594
595		s = splsoftnet();
596#ifdef __FreeBSD__
597		PF_LOCK();
598#endif
599		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
600		    i < count; i++, sp++) {
601			int flags = PFSYNC_FLAG_STALE;
602
603			/* check for invalid values */
604			if (sp->timeout >= PFTM_MAX ||
605			    sp->src.state > PF_TCPS_PROXY_DST ||
606			    sp->dst.state > PF_TCPS_PROXY_DST) {
607				if (pf_status.debug >= PF_DEBUG_MISC)
608					printf("pfsync_insert: PFSYNC_ACT_UPD: "
609					    "invalid value\n");
610				pfsyncstats.pfsyncs_badstate++;
611				continue;
612			}
613
614			bcopy(sp->id, &key.id, sizeof(key.id));
615			key.creatorid = sp->creatorid;
616
617			st = pf_find_state_byid(&key);
618			if (st == NULL) {
619				/* insert the update */
620				if (pfsync_insert_net_state(sp))
621					pfsyncstats.pfsyncs_badstate++;
622				continue;
623			}
624			sfail = 0;
625			if (st->proto == IPPROTO_TCP) {
626				/*
627				 * The state should never go backwards except
628				 * for syn-proxy states.  Neither should the
629				 * sequence window slide backwards.
630				 */
631				if (st->src.state > sp->src.state &&
632				    (st->src.state < PF_TCPS_PROXY_SRC ||
633				    sp->src.state >= PF_TCPS_PROXY_SRC))
634					sfail = 1;
635				else if (SEQ_GT(st->src.seqlo,
636				    ntohl(sp->src.seqlo)))
637					sfail = 3;
638				else if (st->dst.state > sp->dst.state) {
639					/* There might still be useful
640					 * information about the src state here,
641					 * so import that part of the update,
642					 * then "fail" so we send the updated
643					 * state back to the peer who is missing
644					 * our what we know. */
645					pf_state_peer_ntoh(&sp->src, &st->src);
646					/* XXX do anything with timeouts? */
647					sfail = 7;
648					flags = 0;
649				} else if (st->dst.state >= TCPS_SYN_SENT &&
650				    SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
651					sfail = 4;
652			} else {
653				/*
654				 * Non-TCP protocol state machine always go
655				 * forwards
656				 */
657				if (st->src.state > sp->src.state)
658					sfail = 5;
659				else if ( st->dst.state > sp->dst.state)
660					sfail = 6;
661			}
662			if (sfail) {
663				if (pf_status.debug >= PF_DEBUG_MISC)
664					printf("pfsync: %s stale update "
665					    "(%d) id: %016llx "
666					    "creatorid: %08x\n",
667					    (sfail < 7 ?  "ignoring"
668					     : "partial"), sfail,
669#ifdef __FreeBSD__
670					    (unsigned long long)be64toh(st->id),
671#else
672					    betoh64(st->id),
673#endif
674					    ntohl(st->creatorid));
675				pfsyncstats.pfsyncs_badstate++;
676
677				if (!(sp->sync_flags & PFSTATE_STALE)) {
678					/* we have a better state, send it */
679					if (sc->sc_mbuf != NULL && !stale)
680						pfsync_sendout(sc);
681					stale++;
682					if (!st->sync_flags)
683						pfsync_pack_state(
684						    PFSYNC_ACT_UPD, st, flags);
685				}
686				continue;
687			}
688			pf_state_peer_ntoh(&sp->src, &st->src);
689			pf_state_peer_ntoh(&sp->dst, &st->dst);
690			st->expire = ntohl(sp->expire) + time_second;
691			st->timeout = sp->timeout;
692		}
693		if (stale && sc->sc_mbuf != NULL)
694			pfsync_sendout(sc);
695#ifdef __FreeBSD__
696		PF_UNLOCK();
697#endif
698		splx(s);
699		break;
700	/*
701	 * It's not strictly necessary for us to support the "uncompressed"
702	 * delete action, but it's relatively simple and maintains consistency.
703	 */
704	case PFSYNC_ACT_DEL:
705		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
706		    count * sizeof(*sp), &offp)) == NULL) {
707			pfsyncstats.pfsyncs_badlen++;
708			return;
709		}
710
711		s = splsoftnet();
712#ifdef __FreeBSD__
713		PF_LOCK();
714#endif
715		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
716		    i < count; i++, sp++) {
717			bcopy(sp->id, &key.id, sizeof(key.id));
718			key.creatorid = sp->creatorid;
719
720			st = pf_find_state_byid(&key);
721			if (st == NULL) {
722				pfsyncstats.pfsyncs_badstate++;
723				continue;
724			}
725			st->timeout = PFTM_PURGE;
726			st->sync_flags |= PFSTATE_FROMSYNC;
727			pf_purge_expired_state(st);
728		}
729#ifdef __FreeBSD__
730		PF_UNLOCK();
731#endif
732		splx(s);
733		break;
734	case PFSYNC_ACT_UPD_C: {
735		int update_requested = 0;
736
737		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
738		    count * sizeof(*up), &offp)) == NULL) {
739			pfsyncstats.pfsyncs_badlen++;
740			return;
741		}
742
743		s = splsoftnet();
744#ifdef __FreeBSD__
745		PF_LOCK();
746#endif
747		for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
748		    i < count; i++, up++) {
749			/* check for invalid values */
750			if (up->timeout >= PFTM_MAX ||
751			    up->src.state > PF_TCPS_PROXY_DST ||
752			    up->dst.state > PF_TCPS_PROXY_DST) {
753				if (pf_status.debug >= PF_DEBUG_MISC)
754					printf("pfsync_insert: "
755					    "PFSYNC_ACT_UPD_C: "
756					    "invalid value\n");
757				pfsyncstats.pfsyncs_badstate++;
758				continue;
759			}
760
761			bcopy(up->id, &key.id, sizeof(key.id));
762			key.creatorid = up->creatorid;
763
764			st = pf_find_state_byid(&key);
765			if (st == NULL) {
766				/* We don't have this state. Ask for it. */
767				error = pfsync_request_update(up, &src);
768				if (error == ENOMEM) {
769					splx(s);
770					goto done;
771				}
772				update_requested = 1;
773				pfsyncstats.pfsyncs_badstate++;
774				continue;
775			}
776			sfail = 0;
777			if (st->proto == IPPROTO_TCP) {
778				/*
779				 * The state should never go backwards except
780				 * for syn-proxy states.  Neither should the
781				 * sequence window slide backwards.
782				 */
783				if (st->src.state > up->src.state &&
784				    (st->src.state < PF_TCPS_PROXY_SRC ||
785				    up->src.state >= PF_TCPS_PROXY_SRC))
786					sfail = 1;
787				else if (st->dst.state > up->dst.state)
788					sfail = 2;
789				else if (SEQ_GT(st->src.seqlo,
790				    ntohl(up->src.seqlo)))
791					sfail = 3;
792				else if (st->dst.state >= TCPS_SYN_SENT &&
793				    SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
794					sfail = 4;
795			} else {
796				/*
797				 * Non-TCP protocol state machine always go
798				 * forwards
799				 */
800				if (st->src.state > up->src.state)
801					sfail = 5;
802				else if (st->dst.state > up->dst.state)
803					sfail = 6;
804			}
805			if (sfail) {
806				if (pf_status.debug >= PF_DEBUG_MISC)
807					printf("pfsync: ignoring stale update "
808					    "(%d) id: %016llx "
809					    "creatorid: %08x\n", sfail,
810#ifdef __FreeBSD__
811					    (unsigned long long)be64toh(st->id),
812#else
813					    betoh64(st->id),
814#endif
815					    ntohl(st->creatorid));
816				pfsyncstats.pfsyncs_badstate++;
817
818				/* we have a better state, send it out */
819				if ((!stale || update_requested) &&
820				    sc->sc_mbuf != NULL) {
821					pfsync_sendout(sc);
822					update_requested = 0;
823				}
824				stale++;
825				if (!st->sync_flags)
826					pfsync_pack_state(PFSYNC_ACT_UPD, st,
827					    PFSYNC_FLAG_STALE);
828				continue;
829			}
830			pf_state_peer_ntoh(&up->src, &st->src);
831			pf_state_peer_ntoh(&up->dst, &st->dst);
832			st->expire = ntohl(up->expire) + time_second;
833			st->timeout = up->timeout;
834		}
835		if ((update_requested || stale) && sc->sc_mbuf)
836			pfsync_sendout(sc);
837#ifdef __FreeBSD__
838		PF_UNLOCK();
839#endif
840		splx(s);
841		break;
842	}
843	case PFSYNC_ACT_DEL_C:
844		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
845		    count * sizeof(*dp), &offp)) == NULL) {
846			pfsyncstats.pfsyncs_badlen++;
847			return;
848		}
849
850		s = splsoftnet();
851#ifdef __FreeBSD__
852		PF_LOCK();
853#endif
854		for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
855		    i < count; i++, dp++) {
856			bcopy(dp->id, &key.id, sizeof(key.id));
857			key.creatorid = dp->creatorid;
858
859			st = pf_find_state_byid(&key);
860			if (st == NULL) {
861				pfsyncstats.pfsyncs_badstate++;
862				continue;
863			}
864			st->timeout = PFTM_PURGE;
865			st->sync_flags |= PFSTATE_FROMSYNC;
866			pf_purge_expired_state(st);
867		}
868#ifdef __FreeBSD__
869		PF_UNLOCK();
870#endif
871		splx(s);
872		break;
873	case PFSYNC_ACT_INS_F:
874	case PFSYNC_ACT_DEL_F:
875		/* not implemented */
876		break;
877	case PFSYNC_ACT_UREQ:
878		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
879		    count * sizeof(*rup), &offp)) == NULL) {
880			pfsyncstats.pfsyncs_badlen++;
881			return;
882		}
883
884		s = splsoftnet();
885#ifdef __FreeBSD__
886		PF_LOCK();
887#endif
888		if (sc->sc_mbuf != NULL)
889			pfsync_sendout(sc);
890		for (i = 0,
891		    rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
892		    i < count; i++, rup++) {
893			bcopy(rup->id, &key.id, sizeof(key.id));
894			key.creatorid = rup->creatorid;
895
896			if (key.id == 0 && key.creatorid == 0) {
897				sc->sc_ureq_received = time_uptime;
898				if (pf_status.debug >= PF_DEBUG_MISC)
899					printf("pfsync: received "
900					    "bulk update request\n");
901				pfsync_send_bus(sc, PFSYNC_BUS_START);
902#ifdef __FreeBSD__
903				callout_reset(&sc->sc_bulk_tmo, 1 * hz,
904				    pfsync_bulk_update,
905				    LIST_FIRST(&pfsync_list));
906#else
907				timeout_add(&sc->sc_bulk_tmo, 1 * hz);
908#endif
909			} else {
910				st = pf_find_state_byid(&key);
911				if (st == NULL) {
912					pfsyncstats.pfsyncs_badstate++;
913					continue;
914				}
915				if (!st->sync_flags)
916					pfsync_pack_state(PFSYNC_ACT_UPD,
917					    st, 0);
918			}
919		}
920		if (sc->sc_mbuf != NULL)
921			pfsync_sendout(sc);
922#ifdef __FreeBSD__
923		PF_UNLOCK();
924#endif
925		splx(s);
926		break;
927	case PFSYNC_ACT_BUS:
928		/* If we're not waiting for a bulk update, who cares. */
929		if (sc->sc_ureq_sent == 0)
930			break;
931
932		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
933		    sizeof(*bus), &offp)) == NULL) {
934			pfsyncstats.pfsyncs_badlen++;
935			return;
936		}
937		bus = (struct pfsync_state_bus *)(mp->m_data + offp);
938		switch (bus->status) {
939		case PFSYNC_BUS_START:
940#ifdef __FreeBSD__
941			callout_reset(&sc->sc_bulkfail_tmo,
942			    pf_pool_limits[PF_LIMIT_STATES].limit /
943			    (PFSYNC_BULKPACKETS * sc->sc_maxcount),
944			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
945#else
946			timeout_add(&sc->sc_bulkfail_tmo,
947			    pf_pool_limits[PF_LIMIT_STATES].limit /
948			    (PFSYNC_BULKPACKETS * sc->sc_maxcount));
949#endif
950			if (pf_status.debug >= PF_DEBUG_MISC)
951				printf("pfsync: received bulk "
952				    "update start\n");
953			break;
954		case PFSYNC_BUS_END:
955			if (time_uptime - ntohl(bus->endtime) >=
956			    sc->sc_ureq_sent) {
957				/* that's it, we're happy */
958				sc->sc_ureq_sent = 0;
959				sc->sc_bulk_tries = 0;
960#ifdef __FreeBSD__
961				callout_stop(&sc->sc_bulkfail_tmo);
962#else
963				timeout_del(&sc->sc_bulkfail_tmo);
964#endif
965#if NCARP > 0	/* XXX_IMPORT */
966				if (!pfsync_sync_ok)
967					carp_suppress_preempt--;
968#endif
969				pfsync_sync_ok = 1;
970				if (pf_status.debug >= PF_DEBUG_MISC)
971					printf("pfsync: received valid "
972					    "bulk update end\n");
973			} else {
974				if (pf_status.debug >= PF_DEBUG_MISC)
975					printf("pfsync: received invalid "
976					    "bulk update end: bad timestamp\n");
977			}
978			break;
979		}
980		break;
981	}
982
983done:
984	if (m)
985		m_freem(m);
986}
987
988int
989pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
990	struct rtentry *rt)
991{
992	m_freem(m);
993	return (0);
994}
995
996/* ARGSUSED */
997int
998pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
999{
1000#ifndef __FreeBSD__
1001	struct proc *p = curproc;
1002#endif
1003	struct pfsync_softc *sc = ifp->if_softc;
1004	struct ifreq *ifr = (struct ifreq *)data;
1005	struct ip_moptions *imo = &sc->sc_imo;
1006	struct pfsyncreq pfsyncr;
1007	struct ifnet    *sifp;
1008	int s, error;
1009
1010	switch (cmd) {
1011	case SIOCSIFADDR:
1012	case SIOCAIFADDR:
1013	case SIOCSIFDSTADDR:
1014	case SIOCSIFFLAGS:
1015#ifdef __FreeBSD__
1016		if (ifp->if_flags & IFF_UP)
1017			ifp->if_drv_flags |= IFF_DRV_RUNNING;
1018		else
1019			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1020#else
1021		if (ifp->if_flags & IFF_UP)
1022			ifp->if_flags |= IFF_RUNNING;
1023		else
1024			ifp->if_flags &= ~IFF_RUNNING;
1025#endif
1026		break;
1027	case SIOCSIFMTU:
1028		if (ifr->ifr_mtu < PFSYNC_MINMTU)
1029			return (EINVAL);
1030		if (ifr->ifr_mtu > MCLBYTES)
1031			ifr->ifr_mtu = MCLBYTES;
1032		s = splnet();
1033#ifdef __FreeBSD__
1034		PF_LOCK();
1035#endif
1036		if (ifr->ifr_mtu < ifp->if_mtu) {
1037			pfsync_sendout(sc);
1038		}
1039		pfsync_setmtu(sc, ifr->ifr_mtu);
1040#ifdef __FreeBSD__
1041		PF_UNLOCK();
1042#endif
1043		splx(s);
1044		break;
1045	case SIOCGETPFSYNC:
1046#ifdef __FreeBSD__
1047		/* XXX: read unlocked */
1048#endif
1049		bzero(&pfsyncr, sizeof(pfsyncr));
1050		if (sc->sc_sync_ifp)
1051			strlcpy(pfsyncr.pfsyncr_syncdev,
1052			    sc->sc_sync_ifp->if_xname, IFNAMSIZ);
1053		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1054		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1055		if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
1056			return (error);
1057		break;
1058	case SIOCSETPFSYNC:
1059#ifdef __FreeBSD__
1060		if ((error = suser(curthread)) != 0)
1061#else
1062		if ((error = suser(p, p->p_acflag)) != 0)
1063#endif
1064			return (error);
1065		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1066			return (error);
1067
1068		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1069#ifdef __FreeBSD__
1070			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
1071#else
1072			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1073#endif
1074		else
1075			sc->sc_sync_peer.s_addr =
1076			    pfsyncr.pfsyncr_syncpeer.s_addr;
1077
1078		if (pfsyncr.pfsyncr_maxupdates > 255)
1079			return (EINVAL);
1080#ifdef __FreeBSD__
1081		callout_drain(&sc->sc_send_tmo);
1082		PF_LOCK();
1083#endif
1084		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1085
1086		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1087			sc->sc_sync_ifp = NULL;
1088			if (sc->sc_mbuf_net != NULL) {
1089				/* Don't keep stale pfsync packets around. */
1090				s = splnet();
1091				m_freem(sc->sc_mbuf_net);
1092				sc->sc_mbuf_net = NULL;
1093				sc->sc_statep_net.s = NULL;
1094				splx(s);
1095			}
1096			if (imo->imo_num_memberships > 0) {
1097				in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1098				imo->imo_multicast_ifp = NULL;
1099			}
1100#ifdef __FreeBSD__
1101			PF_UNLOCK();
1102#endif
1103			break;
1104		}
1105
1106		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1107#ifdef __FreeBSD__
1108			PF_UNLOCK();
1109#endif
1110			return (EINVAL);
1111		}
1112
1113		s = splnet();
1114#ifdef __FreeBSD__
1115		if (sifp->if_mtu < SCP2IFP(sc)->if_mtu ||
1116#else
1117		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1118#endif
1119		    (sc->sc_sync_ifp != NULL &&
1120		    sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
1121		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1122			pfsync_sendout(sc);
1123		sc->sc_sync_ifp = sifp;
1124
1125#ifdef __FreeBSD__
1126		pfsync_setmtu(sc, SCP2IFP(sc)->if_mtu);
1127#else
1128		pfsync_setmtu(sc, sc->sc_if.if_mtu);
1129#endif
1130
1131		if (imo->imo_num_memberships > 0) {
1132			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1133			imo->imo_multicast_ifp = NULL;
1134		}
1135
1136		if (sc->sc_sync_ifp &&
1137#ifdef __FreeBSD__
1138		    sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1139#else
1140		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1141#endif
1142			struct in_addr addr;
1143
1144			if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
1145				sc->sc_sync_ifp = NULL;
1146#ifdef __FreeBSD__
1147				PF_UNLOCK();
1148#endif
1149				splx(s);
1150				return (EADDRNOTAVAIL);
1151			}
1152#ifdef __FreeBSD__
1153			PF_UNLOCK();		/* addmulti mallocs w/ WAITOK */
1154			addr.s_addr = htonl(INADDR_PFSYNC_GROUP);
1155#else
1156			addr.s_addr = INADDR_PFSYNC_GROUP;
1157#endif
1158
1159			if ((imo->imo_membership[0] =
1160			    in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
1161				sc->sc_sync_ifp = NULL;
1162				splx(s);
1163				return (ENOBUFS);
1164			}
1165			imo->imo_num_memberships++;
1166			imo->imo_multicast_ifp = sc->sc_sync_ifp;
1167			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1168			imo->imo_multicast_loop = 0;
1169#ifdef __FreeBSD__
1170			PF_LOCK();
1171#endif
1172		}
1173
1174		if (sc->sc_sync_ifp ||
1175#ifdef __FreeBSD__
1176		    sc->sc_sendaddr.s_addr != htonl(INADDR_PFSYNC_GROUP)) {
1177#else
1178		    sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
1179#endif
1180			/* Request a full state table update. */
1181			sc->sc_ureq_sent = time_uptime;
1182#if NCARP > 0
1183			if (pfsync_sync_ok)
1184				carp_suppress_preempt++;
1185#endif
1186			pfsync_sync_ok = 0;
1187			if (pf_status.debug >= PF_DEBUG_MISC)
1188				printf("pfsync: requesting bulk update\n");
1189#ifdef __FreeBSD__
1190			callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1191			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
1192#else
1193			timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
1194#endif
1195			error = pfsync_request_update(NULL, NULL);
1196			if (error == ENOMEM) {
1197#ifdef __FreeBSD__
1198				PF_UNLOCK();
1199#endif
1200				splx(s);
1201				return (ENOMEM);
1202			}
1203			pfsync_sendout(sc);
1204		}
1205#ifdef __FreeBSD__
1206		PF_UNLOCK();
1207#endif
1208		splx(s);
1209
1210		break;
1211
1212	default:
1213		return (ENOTTY);
1214	}
1215
1216	return (0);
1217}
1218
1219void
1220pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
1221{
1222	int mtu;
1223
1224	if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
1225		mtu = sc->sc_sync_ifp->if_mtu;
1226	else
1227		mtu = mtu_req;
1228
1229	sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
1230	    sizeof(struct pfsync_state);
1231	if (sc->sc_maxcount > 254)
1232	    sc->sc_maxcount = 254;
1233#ifdef __FreeBSD__
1234	SCP2IFP(sc)->if_mtu = sizeof(struct pfsync_header) +
1235	    sc->sc_maxcount * sizeof(struct pfsync_state);
1236#else
1237	sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
1238	    sc->sc_maxcount * sizeof(struct pfsync_state);
1239#endif
1240}
1241
1242struct mbuf *
1243pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
1244{
1245	struct pfsync_header *h;
1246	struct mbuf *m;
1247	int len;
1248
1249#ifdef __FreeBSD__
1250	PF_ASSERT(MA_OWNED);
1251#endif
1252	MGETHDR(m, M_DONTWAIT, MT_DATA);
1253	if (m == NULL) {
1254#ifdef __FreeBSD__
1255		SCP2IFP(sc)->if_oerrors++;
1256#else
1257		sc->sc_if.if_oerrors++;
1258#endif
1259		return (NULL);
1260	}
1261
1262	switch (action) {
1263	case PFSYNC_ACT_CLR:
1264		len = sizeof(struct pfsync_header) +
1265		    sizeof(struct pfsync_state_clr);
1266		break;
1267	case PFSYNC_ACT_UPD_C:
1268		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
1269		    sizeof(struct pfsync_header);
1270		break;
1271	case PFSYNC_ACT_DEL_C:
1272		len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
1273		    sizeof(struct pfsync_header);
1274		break;
1275	case PFSYNC_ACT_UREQ:
1276		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
1277		    sizeof(struct pfsync_header);
1278		break;
1279	case PFSYNC_ACT_BUS:
1280		len = sizeof(struct pfsync_header) +
1281		    sizeof(struct pfsync_state_bus);
1282		break;
1283	default:
1284		len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
1285		    sizeof(struct pfsync_header);
1286		break;
1287	}
1288
1289	if (len > MHLEN) {
1290		MCLGET(m, M_DONTWAIT);
1291		if ((m->m_flags & M_EXT) == 0) {
1292			m_free(m);
1293#ifdef __FreeBSD__
1294			SCP2IFP(sc)->if_oerrors++;
1295#else
1296			sc->sc_if.if_oerrors++;
1297#endif
1298			return (NULL);
1299		}
1300		m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
1301	} else
1302		MH_ALIGN(m, len);
1303
1304	m->m_pkthdr.rcvif = NULL;
1305	m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
1306	h = mtod(m, struct pfsync_header *);
1307	h->version = PFSYNC_VERSION;
1308	h->af = 0;
1309	h->count = 0;
1310	h->action = action;
1311
1312	*sp = (void *)((char *)h + PFSYNC_HDRLEN);
1313#ifdef __FreeBSD__
1314	callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
1315	    LIST_FIRST(&pfsync_list));
1316#else
1317	timeout_add(&sc->sc_tmo, hz);
1318#endif
1319	return (m);
1320}
1321
1322int
1323pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
1324{
1325#ifdef __FreeBSD__
1326	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1327#else
1328	struct ifnet *ifp = &pfsyncif.sc_if;
1329#endif
1330	struct pfsync_softc *sc = ifp->if_softc;
1331	struct pfsync_header *h, *h_net;
1332	struct pfsync_state *sp = NULL;
1333	struct pfsync_state_upd *up = NULL;
1334	struct pfsync_state_del *dp = NULL;
1335	struct pf_rule *r;
1336	u_long secs;
1337	int s, ret = 0;
1338	u_int8_t i = 255, newaction = 0;
1339
1340#ifdef __FreeBSD__
1341	PF_ASSERT(MA_OWNED);
1342#endif
1343	/*
1344	 * If a packet falls in the forest and there's nobody around to
1345	 * hear, does it make a sound?
1346	 */
1347	if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
1348#ifdef __FreeBSD__
1349	    sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
1350#else
1351	    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1352#endif
1353		/* Don't leave any stale pfsync packets hanging around. */
1354		if (sc->sc_mbuf != NULL) {
1355			m_freem(sc->sc_mbuf);
1356			sc->sc_mbuf = NULL;
1357			sc->sc_statep.s = NULL;
1358		}
1359		return (0);
1360	}
1361
1362	if (action >= PFSYNC_ACT_MAX)
1363		return (EINVAL);
1364
1365	s = splnet();
1366	if (sc->sc_mbuf == NULL) {
1367		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1368		    (void *)&sc->sc_statep.s)) == NULL) {
1369			splx(s);
1370			return (ENOMEM);
1371		}
1372		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1373	} else {
1374		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1375		if (h->action != action) {
1376			pfsync_sendout(sc);
1377			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1378			    (void *)&sc->sc_statep.s)) == NULL) {
1379				splx(s);
1380				return (ENOMEM);
1381			}
1382			h = mtod(sc->sc_mbuf, struct pfsync_header *);
1383		} else {
1384			/*
1385			 * If it's an update, look in the packet to see if
1386			 * we already have an update for the state.
1387			 */
1388			if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
1389				struct pfsync_state *usp =
1390				    (void *)((char *)h + PFSYNC_HDRLEN);
1391
1392				for (i = 0; i < h->count; i++) {
1393					if (!memcmp(usp->id, &st->id,
1394					    PFSYNC_ID_LEN) &&
1395					    usp->creatorid == st->creatorid) {
1396						sp = usp;
1397						sp->updates++;
1398						break;
1399					}
1400					usp++;
1401				}
1402			}
1403		}
1404	}
1405
1406	secs = time_second;
1407
1408	st->pfsync_time = time_uptime;
1409	TAILQ_REMOVE(&state_updates, st, u.s.entry_updates);
1410	TAILQ_INSERT_TAIL(&state_updates, st, u.s.entry_updates);
1411
1412	if (sp == NULL) {
1413		/* not a "duplicate" update */
1414		i = 255;
1415		sp = sc->sc_statep.s++;
1416		sc->sc_mbuf->m_pkthdr.len =
1417		    sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
1418		h->count++;
1419		bzero(sp, sizeof(*sp));
1420
1421		bcopy(&st->id, sp->id, sizeof(sp->id));
1422		sp->creatorid = st->creatorid;
1423
1424		strlcpy(sp->ifname, st->u.s.kif->pfik_name, sizeof(sp->ifname));
1425		pf_state_host_hton(&st->lan, &sp->lan);
1426		pf_state_host_hton(&st->gwy, &sp->gwy);
1427		pf_state_host_hton(&st->ext, &sp->ext);
1428
1429		bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
1430
1431		sp->creation = htonl(secs - st->creation);
1432		sp->packets[0] = htonl(st->packets[0]);
1433		sp->packets[1] = htonl(st->packets[1]);
1434		sp->bytes[0] = htonl(st->bytes[0]);
1435		sp->bytes[1] = htonl(st->bytes[1]);
1436		if ((r = st->rule.ptr) == NULL)
1437			sp->rule = htonl(-1);
1438		else
1439			sp->rule = htonl(r->nr);
1440		if ((r = st->anchor.ptr) == NULL)
1441			sp->anchor = htonl(-1);
1442		else
1443			sp->anchor = htonl(r->nr);
1444		sp->af = st->af;
1445		sp->proto = st->proto;
1446		sp->direction = st->direction;
1447		sp->log = st->log;
1448		sp->allow_opts = st->allow_opts;
1449		sp->timeout = st->timeout;
1450
1451		if (flags & PFSYNC_FLAG_STALE)
1452			sp->sync_flags |= PFSTATE_STALE;
1453	}
1454
1455	pf_state_peer_hton(&st->src, &sp->src);
1456	pf_state_peer_hton(&st->dst, &sp->dst);
1457
1458	if (st->expire <= secs)
1459		sp->expire = htonl(0);
1460	else
1461		sp->expire = htonl(st->expire - secs);
1462
1463	/* do we need to build "compressed" actions for network transfer? */
1464	if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
1465		switch (action) {
1466		case PFSYNC_ACT_UPD:
1467			newaction = PFSYNC_ACT_UPD_C;
1468			break;
1469		case PFSYNC_ACT_DEL:
1470			newaction = PFSYNC_ACT_DEL_C;
1471			break;
1472		default:
1473			/* by default we just send the uncompressed states */
1474			break;
1475		}
1476	}
1477
1478	if (newaction) {
1479		if (sc->sc_mbuf_net == NULL) {
1480			if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
1481			    (void *)&sc->sc_statep_net.s)) == NULL) {
1482				splx(s);
1483				return (ENOMEM);
1484			}
1485		}
1486		h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
1487
1488		switch (newaction) {
1489		case PFSYNC_ACT_UPD_C:
1490			if (i != 255) {
1491				up = (void *)((char *)h_net +
1492				    PFSYNC_HDRLEN + (i * sizeof(*up)));
1493				up->updates++;
1494			} else {
1495				h_net->count++;
1496				sc->sc_mbuf_net->m_pkthdr.len =
1497				    sc->sc_mbuf_net->m_len += sizeof(*up);
1498				up = sc->sc_statep_net.u++;
1499
1500				bzero(up, sizeof(*up));
1501				bcopy(&st->id, up->id, sizeof(up->id));
1502				up->creatorid = st->creatorid;
1503			}
1504			up->timeout = st->timeout;
1505			up->expire = sp->expire;
1506			up->src = sp->src;
1507			up->dst = sp->dst;
1508			break;
1509		case PFSYNC_ACT_DEL_C:
1510			sc->sc_mbuf_net->m_pkthdr.len =
1511			    sc->sc_mbuf_net->m_len += sizeof(*dp);
1512			dp = sc->sc_statep_net.d++;
1513			h_net->count++;
1514
1515			bzero(dp, sizeof(*dp));
1516			bcopy(&st->id, dp->id, sizeof(dp->id));
1517			dp->creatorid = st->creatorid;
1518			break;
1519		}
1520	}
1521
1522	if (h->count == sc->sc_maxcount ||
1523	    (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
1524		ret = pfsync_sendout(sc);
1525
1526	splx(s);
1527	return (ret);
1528}
1529
1530/* This must be called in splnet() */
1531int
1532pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
1533{
1534#ifdef __FreeBSD__
1535	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1536#else
1537	struct ifnet *ifp = &pfsyncif.sc_if;
1538#endif
1539	struct pfsync_header *h;
1540	struct pfsync_softc *sc = ifp->if_softc;
1541	struct pfsync_state_upd_req *rup;
1542	int ret = 0;
1543
1544#ifdef __FreeBSD__
1545	PF_ASSERT(MA_OWNED);
1546#endif
1547	if (sc->sc_mbuf == NULL) {
1548		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1549		    (void *)&sc->sc_statep.s)) == NULL)
1550			return (ENOMEM);
1551		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1552	} else {
1553		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1554		if (h->action != PFSYNC_ACT_UREQ) {
1555			pfsync_sendout(sc);
1556			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1557			    (void *)&sc->sc_statep.s)) == NULL)
1558				return (ENOMEM);
1559			h = mtod(sc->sc_mbuf, struct pfsync_header *);
1560		}
1561	}
1562
1563	if (src != NULL)
1564		sc->sc_sendaddr = *src;
1565	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
1566	h->count++;
1567	rup = sc->sc_statep.r++;
1568	bzero(rup, sizeof(*rup));
1569	if (up != NULL) {
1570		bcopy(up->id, rup->id, sizeof(rup->id));
1571		rup->creatorid = up->creatorid;
1572	}
1573
1574	if (h->count == sc->sc_maxcount)
1575		ret = pfsync_sendout(sc);
1576
1577	return (ret);
1578}
1579
1580int
1581pfsync_clear_states(u_int32_t creatorid, char *ifname)
1582{
1583#ifdef __FreeBSD__
1584	struct ifnet *ifp = SCP2IFP(LIST_FIRST(&pfsync_list));
1585#else
1586	struct ifnet *ifp = &pfsyncif.sc_if;
1587#endif
1588	struct pfsync_softc *sc = ifp->if_softc;
1589	struct pfsync_state_clr *cp;
1590	int s, ret;
1591
1592	s = splnet();
1593#ifdef __FreeBSD__
1594	PF_ASSERT(MA_OWNED);
1595#endif
1596	if (sc->sc_mbuf != NULL)
1597		pfsync_sendout(sc);
1598	if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
1599	    (void *)&sc->sc_statep.c)) == NULL) {
1600		splx(s);
1601		return (ENOMEM);
1602	}
1603	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
1604	cp = sc->sc_statep.c;
1605	cp->creatorid = creatorid;
1606	if (ifname != NULL)
1607		strlcpy(cp->ifname, ifname, IFNAMSIZ);
1608
1609	ret = (pfsync_sendout(sc));
1610	splx(s);
1611	return (ret);
1612}
1613
1614void
1615pfsync_timeout(void *v)
1616{
1617	struct pfsync_softc *sc = v;
1618	int s;
1619
1620	s = splnet();
1621#ifdef __FreeBSD__
1622	PF_LOCK();
1623#endif
1624	pfsync_sendout(sc);
1625#ifdef __FreeBSD__
1626	PF_UNLOCK();
1627#endif
1628	splx(s);
1629}
1630
1631/* This must be called in splnet() */
1632void
1633pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
1634{
1635	struct pfsync_state_bus *bus;
1636
1637#ifdef __FreeBSD__
1638	PF_ASSERT(MA_OWNED);
1639#endif
1640	if (sc->sc_mbuf != NULL)
1641		pfsync_sendout(sc);
1642
1643	if (pfsync_sync_ok &&
1644	    (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
1645	    (void *)&sc->sc_statep.b)) != NULL) {
1646		sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
1647		bus = sc->sc_statep.b;
1648		bus->creatorid = pf_status.hostid;
1649		bus->status = status;
1650		bus->endtime = htonl(time_uptime - sc->sc_ureq_received);
1651		pfsync_sendout(sc);
1652	}
1653}
1654
1655void
1656pfsync_bulk_update(void *v)
1657{
1658	struct pfsync_softc *sc = v;
1659	int s, i = 0;
1660	struct pf_state *state;
1661
1662#ifdef __FreeBSD__
1663	PF_LOCK();
1664#endif
1665	s = splnet();
1666	if (sc->sc_mbuf != NULL)
1667		pfsync_sendout(sc);
1668
1669	/*
1670	 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
1671	 * been sent since the latest request was made.
1672	 */
1673	while ((state = TAILQ_FIRST(&state_updates)) != NULL &&
1674	    ++i < (sc->sc_maxcount * PFSYNC_BULKPACKETS)) {
1675		if (state->pfsync_time > sc->sc_ureq_received) {
1676			/* we're done */
1677			pfsync_send_bus(sc, PFSYNC_BUS_END);
1678			sc->sc_ureq_received = 0;
1679#ifdef __FreeBSD__
1680			callout_stop(&sc->sc_bulk_tmo);
1681#else
1682			timeout_del(&sc->sc_bulk_tmo);
1683#endif
1684			if (pf_status.debug >= PF_DEBUG_MISC)
1685				printf("pfsync: bulk update complete\n");
1686			break;
1687		} else {
1688			/* send an update and move to end of list */
1689			if (!state->sync_flags)
1690				pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
1691			state->pfsync_time = time_uptime;
1692			TAILQ_REMOVE(&state_updates, state, u.s.entry_updates);
1693			TAILQ_INSERT_TAIL(&state_updates, state,
1694			    u.s.entry_updates);
1695
1696			/* look again for more in a bit */
1697#ifdef __FreeBSD__
1698			callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
1699			    LIST_FIRST(&pfsync_list));
1700#else
1701			timeout_add(&sc->sc_bulk_tmo, 1);
1702#endif
1703		}
1704	}
1705	if (sc->sc_mbuf != NULL)
1706		pfsync_sendout(sc);
1707	splx(s);
1708#ifdef __FreeBSD__
1709	PF_UNLOCK();
1710#endif
1711}
1712
1713void
1714pfsync_bulkfail(void *v)
1715{
1716	struct pfsync_softc *sc = v;
1717	int s, error;
1718
1719#ifdef __FreeBSD__
1720	PF_LOCK();
1721#endif
1722	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
1723		/* Try again in a bit */
1724#ifdef __FreeBSD__
1725		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
1726		    LIST_FIRST(&pfsync_list));
1727#else
1728		timeout_add(&sc->sc_bulkfail_tmo, 5 * hz);
1729#endif
1730		s = splnet();
1731		error = pfsync_request_update(NULL, NULL);
1732		if (error == ENOMEM) {
1733			if (pf_status.debug >= PF_DEBUG_MISC)
1734				printf("pfsync: cannot allocate mbufs for "
1735				    "bulk update\n");
1736		} else
1737			pfsync_sendout(sc);
1738		splx(s);
1739	} else {
1740		/* Pretend like the transfer was ok */
1741		sc->sc_ureq_sent = 0;
1742		sc->sc_bulk_tries = 0;
1743#if NCARP > 0
1744		if (!pfsync_sync_ok)
1745			carp_suppress_preempt--;
1746#endif
1747		pfsync_sync_ok = 1;
1748		if (pf_status.debug >= PF_DEBUG_MISC)
1749			printf("pfsync: failed to receive "
1750			    "bulk update status\n");
1751#ifdef __FreeBSD__
1752		callout_stop(&sc->sc_bulkfail_tmo);
1753#else
1754		timeout_del(&sc->sc_bulkfail_tmo);
1755#endif
1756	}
1757#ifdef __FreeBSD__
1758	PF_UNLOCK();
1759#endif
1760}
1761
1762/* This must be called in splnet() */
1763int
1764pfsync_sendout(sc)
1765	struct pfsync_softc *sc;
1766{
1767#if NBPFILTER > 0
1768# ifdef __FreeBSD__
1769	struct ifnet *ifp = SCP2IFP(sc);
1770# else
1771	struct ifnet *ifp = &sc->if_sc;
1772# endif
1773#endif
1774	struct mbuf *m;
1775
1776#ifdef __FreeBSD__
1777	PF_ASSERT(MA_OWNED);
1778	callout_stop(&sc->sc_tmo);
1779#else
1780	timeout_del(&sc->sc_tmo);
1781#endif
1782
1783	if (sc->sc_mbuf == NULL)
1784		return (0);
1785	m = sc->sc_mbuf;
1786	sc->sc_mbuf = NULL;
1787	sc->sc_statep.s = NULL;
1788
1789#ifdef __FreeBSD__
1790	KASSERT(m != NULL, ("pfsync_sendout: null mbuf"));
1791#endif
1792#if NBPFILTER > 0
1793	if (ifp->if_bpf)
1794		bpf_mtap(ifp->if_bpf, m);
1795#endif
1796
1797	if (sc->sc_mbuf_net) {
1798		m_freem(m);
1799		m = sc->sc_mbuf_net;
1800		sc->sc_mbuf_net = NULL;
1801		sc->sc_statep_net.s = NULL;
1802	}
1803
1804#ifdef __FreeBSD__
1805	if (sc->sc_sync_ifp ||
1806	    sc->sc_sync_peer.s_addr != htonl(INADDR_PFSYNC_GROUP)) {
1807#else
1808	if (sc->sc_sync_ifp ||sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
1809#endif
1810		struct ip *ip;
1811		struct sockaddr sa;
1812
1813		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
1814		if (m == NULL) {
1815			pfsyncstats.pfsyncs_onomem++;
1816			return (0);
1817		}
1818		ip = mtod(m, struct ip *);
1819		ip->ip_v = IPVERSION;
1820		ip->ip_hl = sizeof(*ip) >> 2;
1821		ip->ip_tos = IPTOS_LOWDELAY;
1822#ifdef __FreeBSD__
1823		ip->ip_len = m->m_pkthdr.len;
1824#else
1825		ip->ip_len = htons(m->m_pkthdr.len);
1826#endif
1827		ip->ip_id = htons(ip_randomid());
1828#ifdef __FreeBSD__
1829		ip->ip_off = IP_DF;
1830#else
1831		ip->ip_off = htons(IP_DF);
1832#endif
1833		ip->ip_ttl = PFSYNC_DFLTTL;
1834		ip->ip_p = IPPROTO_PFSYNC;
1835		ip->ip_sum = 0;
1836
1837		bzero(&sa, sizeof(sa));
1838		ip->ip_src.s_addr = INADDR_ANY;
1839
1840#ifdef __FreeBSD__
1841		if (sc->sc_sendaddr.s_addr == htonl(INADDR_PFSYNC_GROUP))
1842#else
1843		if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
1844#endif
1845			m->m_flags |= M_MCAST;
1846		ip->ip_dst = sc->sc_sendaddr;
1847		sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
1848
1849		pfsyncstats.pfsyncs_opackets++;
1850#ifdef __FreeBSD__
1851		if (!IF_HANDOFF(&sc->sc_ifq, m, NULL))
1852			pfsyncstats.pfsyncs_oerrors++;
1853		callout_reset(&sc->sc_send_tmo, 1, pfsync_senddef, sc);
1854#else
1855		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1856			pfsyncstats.pfsyncs_oerrors++;
1857#endif
1858	} else
1859		m_freem(m);
1860
1861	return (0);
1862}
1863
1864#ifdef __FreeBSD__
1865static void
1866pfsync_senddef(void *arg)
1867{
1868	struct pfsync_softc *sc = (struct pfsync_softc *)arg;
1869	struct mbuf *m;
1870
1871	for(;;) {
1872		IF_DEQUEUE(&sc->sc_ifq, m);
1873		if (m == NULL)
1874			break;
1875		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1876			pfsyncstats.pfsyncs_oerrors++;
1877	}
1878}
1879
1880static int
1881pfsync_modevent(module_t mod, int type, void *data)
1882{
1883	int error = 0;
1884
1885	switch (type) {
1886	case MOD_LOAD:
1887		LIST_INIT(&pfsync_list);
1888		if_clone_attach(&pfsync_cloner);
1889		break;
1890
1891	case MOD_UNLOAD:
1892		if_clone_detach(&pfsync_cloner);
1893		break;
1894
1895	default:
1896		error = EINVAL;
1897		break;
1898	}
1899
1900	return error;
1901}
1902
1903static moduledata_t pfsync_mod = {
1904	"pfsync",
1905	pfsync_modevent,
1906	0
1907};
1908
1909#define PFSYNC_MODVER 1
1910
1911DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
1912MODULE_VERSION(pfsync, PFSYNC_MODVER);
1913#endif /* __FreeBSD__ */
1914