ip_carp.c revision 253087
1228571Sglebius/*-
2228571Sglebius * Copyright (c) 2002 Michael Shalayeff.
3228571Sglebius * Copyright (c) 2003 Ryan McBride.
4228571Sglebius * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
5228571Sglebius * All rights reserved.
6142215Sglebius *
7142215Sglebius * Redistribution and use in source and binary forms, with or without
8142215Sglebius * modification, are permitted provided that the following conditions
9142215Sglebius * are met:
10142215Sglebius * 1. Redistributions of source code must retain the above copyright
11142215Sglebius *    notice, this list of conditions and the following disclaimer.
12142215Sglebius * 2. Redistributions in binary form must reproduce the above copyright
13142215Sglebius *    notice, this list of conditions and the following disclaimer in the
14142215Sglebius *    documentation and/or other materials provided with the distribution.
15142215Sglebius *
16142215Sglebius * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17142215Sglebius * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18142215Sglebius * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19142215Sglebius * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20142215Sglebius * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21142215Sglebius * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22142215Sglebius * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23142215Sglebius * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24142215Sglebius * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25142215Sglebius * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26142215Sglebius * THE POSSIBILITY OF SUCH DAMAGE.
27142215Sglebius */
28142215Sglebius
29172467Ssilby#include <sys/cdefs.h>
30172467Ssilby__FBSDID("$FreeBSD: head/sys/netinet/ip_carp.c 253087 2013-07-09 10:02:51Z ae $");
31172467Ssilby
32142215Sglebius#include "opt_bpf.h"
33142215Sglebius#include "opt_inet.h"
34142215Sglebius#include "opt_inet6.h"
35142215Sglebius
36142215Sglebius#include <sys/param.h>
37142215Sglebius#include <sys/systm.h>
38228571Sglebius#include <sys/bus.h>
39228571Sglebius#include <sys/jail.h>
40142215Sglebius#include <sys/kernel.h>
41142215Sglebius#include <sys/limits.h>
42142215Sglebius#include <sys/malloc.h>
43142215Sglebius#include <sys/mbuf.h>
44142215Sglebius#include <sys/module.h>
45164033Srwatson#include <sys/priv.h>
46142215Sglebius#include <sys/proc.h>
47211157Swill#include <sys/protosw.h>
48228571Sglebius#include <sys/socket.h>
49228571Sglebius#include <sys/sockio.h>
50142215Sglebius#include <sys/sysctl.h>
51142215Sglebius#include <sys/syslog.h>
52228736Sglebius#include <sys/taskqueue.h>
53253087Sae#include <sys/counter.h>
54142215Sglebius
55142215Sglebius#include <net/ethernet.h>
56142215Sglebius#include <net/fddi.h>
57142215Sglebius#include <net/if.h>
58152410Sru#include <net/if_dl.h>
59228571Sglebius#include <net/if_llatbl.h>
60142215Sglebius#include <net/if_types.h>
61228571Sglebius#include <net/iso88025.h>
62142215Sglebius#include <net/route.h>
63196019Srwatson#include <net/vnet.h>
64142215Sglebius
65221130Sbz#if defined(INET) || defined(INET6)
66142215Sglebius#include <netinet/in.h>
67142215Sglebius#include <netinet/in_var.h>
68221130Sbz#include <netinet/ip_carp.h>
69221130Sbz#include <netinet/ip.h>
70221130Sbz#include <machine/in_cksum.h>
71221130Sbz#endif
72221130Sbz#ifdef INET
73142215Sglebius#include <netinet/ip_var.h>
74142215Sglebius#include <netinet/if_ether.h>
75142215Sglebius#endif
76142215Sglebius
77142215Sglebius#ifdef INET6
78142215Sglebius#include <netinet/icmp6.h>
79142215Sglebius#include <netinet/ip6.h>
80211157Swill#include <netinet6/ip6protosw.h>
81228571Sglebius#include <netinet6/in6_var.h>
82142215Sglebius#include <netinet6/ip6_var.h>
83148387Sume#include <netinet6/scope6_var.h>
84142215Sglebius#include <netinet6/nd6.h>
85142215Sglebius#endif
86142215Sglebius
87142215Sglebius#include <crypto/sha1.h>
88142215Sglebius
89228571Sglebiusstatic MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
90142215Sglebius
91142215Sglebiusstruct carp_softc {
92228571Sglebius	struct ifnet		*sc_carpdev;	/* Pointer to parent ifnet. */
93228571Sglebius	struct ifaddr		**sc_ifas;	/* Our ifaddrs. */
94228571Sglebius	struct sockaddr_dl	sc_addr;	/* Our link level address. */
95228571Sglebius	struct callout		sc_ad_tmo;	/* Advertising timeout. */
96221130Sbz#ifdef INET
97228571Sglebius	struct callout		sc_md_tmo;	/* Master down timeout. */
98221130Sbz#endif
99142215Sglebius#ifdef INET6
100228571Sglebius	struct callout 		sc_md6_tmo;	/* XXX: Master down timeout. */
101228571Sglebius#endif
102228571Sglebius	struct mtx		sc_mtx;
103142215Sglebius
104228571Sglebius	int			sc_vhid;
105228571Sglebius	int			sc_advskew;
106228571Sglebius	int			sc_advbase;
107228571Sglebius
108228571Sglebius	int			sc_naddrs;
109228571Sglebius	int			sc_naddrs6;
110228571Sglebius	int			sc_ifasiz;
111142215Sglebius	enum { INIT = 0, BACKUP, MASTER }	sc_state;
112228571Sglebius	int			sc_suppress;
113228571Sglebius	int			sc_sendad_errors;
114142215Sglebius#define	CARP_SENDAD_MAX_ERRORS	3
115228571Sglebius	int			sc_sendad_success;
116142215Sglebius#define	CARP_SENDAD_MIN_SUCCESS 3
117142215Sglebius
118228571Sglebius	int			sc_init_counter;
119228571Sglebius	uint64_t		sc_counter;
120142215Sglebius
121142215Sglebius	/* authentication */
122228571Sglebius#define	CARP_HMAC_PAD	64
123142215Sglebius	unsigned char sc_key[CARP_KEY_LEN];
124142215Sglebius	unsigned char sc_pad[CARP_HMAC_PAD];
125142215Sglebius	SHA1_CTX sc_sha1;
126142215Sglebius
127228571Sglebius	TAILQ_ENTRY(carp_softc)	sc_list;	/* On the carp_if list. */
128228571Sglebius	LIST_ENTRY(carp_softc)	sc_next;	/* On the global list. */
129142215Sglebius};
130142215Sglebius
131228571Sglebiusstruct carp_if {
132228571Sglebius#ifdef INET
133228571Sglebius	int	cif_naddrs;
134228571Sglebius#endif
135228571Sglebius#ifdef INET6
136228571Sglebius	int	cif_naddrs6;
137228571Sglebius#endif
138228571Sglebius	TAILQ_HEAD(, carp_softc) cif_vrs;
139228571Sglebius#ifdef INET
140228571Sglebius	struct ip_moptions 	 cif_imo;
141228571Sglebius#endif
142228571Sglebius#ifdef INET6
143228571Sglebius	struct ip6_moptions 	 cif_im6o;
144228571Sglebius#endif
145228571Sglebius	struct ifnet	*cif_ifp;
146228571Sglebius	struct mtx	cif_mtx;
147228571Sglebius};
148228571Sglebius
149228571Sglebius#define	CARP_INET	0
150228571Sglebius#define	CARP_INET6	1
151228571Sglebiusstatic int proto_reg[] = {-1, -1};
152228571Sglebius
153228571Sglebius/*
154228571Sglebius * Brief design of carp(4).
155228571Sglebius *
156228571Sglebius * Any carp-capable ifnet may have a list of carp softcs hanging off
157228571Sglebius * its ifp->if_carp pointer. Each softc represents one unique virtual
158228571Sglebius * host id, or vhid. The softc has a back pointer to the ifnet. All
159228571Sglebius * softcs are joined in a global list, which has quite limited use.
160228571Sglebius *
161228571Sglebius * Any interface address that takes part in CARP negotiation has a
162228571Sglebius * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
163228571Sglebius * AF_INET or AF_INET6 address.
164228571Sglebius *
165228571Sglebius * Although, one can get the softc's backpointer to ifnet and traverse
166228571Sglebius * through its ifp->if_addrhead queue to find all interface addresses
167228571Sglebius * involved in CARP, we keep a growable array of ifaddr pointers. This
168228571Sglebius * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
169228571Sglebius * do calls into the network stack, thus avoiding LORs.
170228571Sglebius *
171228571Sglebius * Locking:
172228571Sglebius *
173228571Sglebius * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
174228571Sglebius * callout-driven events and ioctl()s.
175228571Sglebius *
176228571Sglebius * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to
177228571Sglebius * traverse the global list we use the mutex carp_mtx.
178228571Sglebius *
179228571Sglebius * Known issues with locking:
180228571Sglebius *
181228571Sglebius * - There is no protection for races between two ioctl() requests,
182228571Sglebius *   neither SIOCSVH, nor SIOCAIFADDR & SIOCAIFADDR_IN6. I think that all
183228571Sglebius *   interface ioctl()s should be serialized right in net/if.c.
184228571Sglebius * - Sending ad, we put the pointer to the softc in an mtag, and no reference
185228571Sglebius *   counting is done on the softc.
186228571Sglebius * - On module unload we may race (?) with packet processing thread
187228571Sglebius *   dereferencing our function pointers.
188228571Sglebius */
189228571Sglebius
190228736Sglebiusstatic int carp_allow = 1;		/* Accept incoming CARP packets. */
191228736Sglebiusstatic int carp_preempt = 0;		/* Preempt slower nodes. */
192228736Sglebiusstatic int carp_log = 1;		/* Log level. */
193228736Sglebiusstatic int carp_demotion = 0;		/* Global advskew demotion. */
194228736Sglebiusstatic int carp_senderr_adj = CARP_MAXSKEW;	/* Send error demotion factor */
195228736Sglebiusstatic int carp_ifdown_adj = CARP_MAXSKEW;	/* Iface down demotion factor */
196244681Sglebiusstatic int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
197228736Sglebius
198211157SwillSYSCTL_NODE(_net_inet, IPPROTO_CARP,	carp,	CTLFLAG_RW, 0,	"CARP");
199228736SglebiusSYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, &carp_allow, 0,
200228736Sglebius    "Accept incoming CARP packets");
201228736SglebiusSYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, &carp_preempt, 0,
202228736Sglebius    "High-priority backup preemption mode");
203228736SglebiusSYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, &carp_log, 0,
204228736Sglebius    "CARP log level");
205244681SglebiusSYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW,
206244681Sglebius    0, 0, carp_demote_adj_sysctl, "I",
207244681Sglebius    "Adjust demotion factor (skew of advskew)");
208228736SglebiusSYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
209228736Sglebius    &carp_senderr_adj, 0, "Send error demotion factor adjustment");
210228736SglebiusSYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
211228736Sglebius    &carp_ifdown_adj, 0, "Interface down demotion factor adjustment");
212142215Sglebius
213253087Saestatic counter_u64_t carpstats[sizeof(struct carpstats) / sizeof(uint64_t)];
214253087Sae#define	CARPSTATS_ADD(name, val)	\
215253087Sae    counter_u64_add(carpstats[offsetof(struct carpstats, name) / \
216253087Sae	sizeof(uint64_t)], (val))
217253087Sae#define	CARPSTATS_INC(name)		CARPSTATS_ADD(name, 1)
218142215Sglebius
219253087Saestatic int
220253087Saecarpstats_sysctl(SYSCTL_HANDLER_ARGS)
221253087Sae{
222253087Sae	struct carpstats s;
223253087Sae
224253087Sae	COUNTER_ARRAY_COPY(carpstats, &s, sizeof(s) / sizeof(uint64_t));
225253087Sae	if (req->newptr)
226253087Sae		COUNTER_ARRAY_ZERO(carpstats, sizeof(s) / sizeof(uint64_t));
227253087Sae	return (SYSCTL_OUT(req, &s, sizeof(s)));
228253087Sae}
229253087SaeSYSCTL_PROC(_net_inet_carp, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RW,
230253087Sae    NULL, 0, carpstats_sysctl, "I",
231253087Sae    "CARP statistics (struct carpstats, netinet/ip_carp.h)");
232253087Sae
233228571Sglebius#define	CARP_LOCK_INIT(sc)	mtx_init(&(sc)->sc_mtx, "carp_softc",   \
234142215Sglebius	NULL, MTX_DEF)
235228571Sglebius#define	CARP_LOCK_DESTROY(sc)	mtx_destroy(&(sc)->sc_mtx)
236228571Sglebius#define	CARP_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
237228571Sglebius#define	CARP_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
238228571Sglebius#define	CARP_UNLOCK(sc)		mtx_unlock(&(sc)->sc_mtx)
239228571Sglebius#define	CIF_LOCK_INIT(cif)	mtx_init(&(cif)->cif_mtx, "carp_if",   \
240228571Sglebius	NULL, MTX_DEF)
241228571Sglebius#define	CIF_LOCK_DESTROY(cif)	mtx_destroy(&(cif)->cif_mtx)
242228571Sglebius#define	CIF_LOCK_ASSERT(cif)	mtx_assert(&(cif)->cif_mtx, MA_OWNED)
243228571Sglebius#define	CIF_LOCK(cif)		mtx_lock(&(cif)->cif_mtx)
244228571Sglebius#define	CIF_UNLOCK(cif)		mtx_unlock(&(cif)->cif_mtx)
245234130Sglebius#define	CIF_FREE(cif)	do {				\
246234130Sglebius		CIF_LOCK_ASSERT(cif);			\
247234130Sglebius		if (TAILQ_EMPTY(&(cif)->cif_vrs))	\
248234130Sglebius			carp_free_if(cif);		\
249234130Sglebius		else					\
250234130Sglebius			CIF_UNLOCK(cif);		\
251234130Sglebius} while (0)
252142215Sglebius
253142451Sglebius#define	CARP_LOG(...)	do {				\
254228736Sglebius	if (carp_log > 0)				\
255228571Sglebius		log(LOG_INFO, "carp: " __VA_ARGS__);	\
256142451Sglebius} while (0)
257142215Sglebius
258142451Sglebius#define	CARP_DEBUG(...)	do {				\
259228736Sglebius	if (carp_log > 1)				\
260142446Sglebius		log(LOG_DEBUG, __VA_ARGS__);		\
261142451Sglebius} while (0)
262142446Sglebius
263228571Sglebius#define	IFNET_FOREACH_IFA(ifp, ifa)					\
264228571Sglebius	IF_ADDR_LOCK_ASSERT(ifp);					\
265228571Sglebius	TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link)		\
266228571Sglebius		if ((ifa)->ifa_carp != NULL)
267228571Sglebius
268228571Sglebius#define	CARP_FOREACH_IFA(sc, ifa)					\
269228571Sglebius	CARP_LOCK_ASSERT(sc);						\
270228571Sglebius	for (int _i = 0;						\
271228571Sglebius		_i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&		\
272228571Sglebius		((ifa) = sc->sc_ifas[_i]) != NULL;			\
273228571Sglebius		++_i)
274228571Sglebius
275228571Sglebius#define	IFNET_FOREACH_CARP(ifp, sc)					\
276228571Sglebius	CIF_LOCK_ASSERT(ifp->if_carp);					\
277228571Sglebius	TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
278228571Sglebius
279228736Sglebius#define	DEMOTE_ADVSKEW(sc)					\
280228736Sglebius    (((sc)->sc_advskew + carp_demotion > CARP_MAXSKEW) ?	\
281228736Sglebius    CARP_MAXSKEW : ((sc)->sc_advskew + carp_demotion))
282228736Sglebius
283142559Sglebiusstatic void	carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
284228571Sglebiusstatic struct carp_softc
285228571Sglebius		*carp_alloc(struct ifnet *);
286234130Sglebiusstatic void	carp_detach_locked(struct ifaddr *);
287228571Sglebiusstatic void	carp_destroy(struct carp_softc *);
288228571Sglebiusstatic struct carp_if
289228571Sglebius		*carp_alloc_if(struct ifnet *);
290228571Sglebiusstatic void	carp_free_if(struct carp_if *);
291228571Sglebiusstatic void	carp_set_state(struct carp_softc *, int);
292228571Sglebiusstatic void	carp_sc_state(struct carp_softc *);
293228571Sglebiusstatic void	carp_setrun(struct carp_softc *, sa_family_t);
294228571Sglebiusstatic void	carp_master_down(void *);
295228571Sglebiusstatic void	carp_master_down_locked(struct carp_softc *);
296142559Sglebiusstatic void	carp_send_ad(void *);
297142914Sglebiusstatic void	carp_send_ad_locked(struct carp_softc *);
298228571Sglebiusstatic void	carp_addroute(struct carp_softc *);
299230863Sglebiusstatic void	carp_ifa_addroute(struct ifaddr *);
300228571Sglebiusstatic void	carp_delroute(struct carp_softc *);
301230863Sglebiusstatic void	carp_ifa_delroute(struct ifaddr *);
302228736Sglebiusstatic void	carp_send_ad_all(void *, int);
303228736Sglebiusstatic void	carp_demote_adj(int, char *);
304142215Sglebius
305228571Sglebiusstatic LIST_HEAD(, carp_softc) carp_list;
306142911Sglebiusstatic struct mtx carp_mtx;
307228736Sglebiusstatic struct task carp_sendall_task =
308228736Sglebius    TASK_INITIALIZER(0, carp_send_ad_all, NULL);
309142215Sglebius
310142559Sglebiusstatic void
311142215Sglebiuscarp_hmac_prepare(struct carp_softc *sc)
312142215Sglebius{
313228571Sglebius	uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
314228571Sglebius	uint8_t vhid = sc->sc_vhid & 0xff;
315142215Sglebius	struct ifaddr *ifa;
316179490Smlaier	int i, found;
317179490Smlaier#ifdef INET
318179490Smlaier	struct in_addr last, cur, in;
319179490Smlaier#endif
320142215Sglebius#ifdef INET6
321179490Smlaier	struct in6_addr last6, cur6, in6;
322142215Sglebius#endif
323142215Sglebius
324228571Sglebius	CARP_LOCK_ASSERT(sc);
325142914Sglebius
326228571Sglebius	/* Compute ipad from key. */
327142215Sglebius	bzero(sc->sc_pad, sizeof(sc->sc_pad));
328142215Sglebius	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
329142215Sglebius	for (i = 0; i < sizeof(sc->sc_pad); i++)
330142215Sglebius		sc->sc_pad[i] ^= 0x36;
331142215Sglebius
332228571Sglebius	/* Precompute first part of inner hash. */
333142215Sglebius	SHA1Init(&sc->sc_sha1);
334142215Sglebius	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
335142215Sglebius	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
336142215Sglebius	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
337142215Sglebius	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
338142215Sglebius#ifdef INET
339179490Smlaier	cur.s_addr = 0;
340179490Smlaier	do {
341179490Smlaier		found = 0;
342179490Smlaier		last = cur;
343179490Smlaier		cur.s_addr = 0xffffffff;
344228571Sglebius		CARP_FOREACH_IFA(sc, ifa) {
345179490Smlaier			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
346179490Smlaier			if (ifa->ifa_addr->sa_family == AF_INET &&
347179490Smlaier			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
348179490Smlaier			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
349179490Smlaier				cur.s_addr = in.s_addr;
350179490Smlaier				found++;
351179490Smlaier			}
352179490Smlaier		}
353179490Smlaier		if (found)
354179490Smlaier			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
355179490Smlaier	} while (found);
356142215Sglebius#endif /* INET */
357142215Sglebius#ifdef INET6
358179490Smlaier	memset(&cur6, 0, sizeof(cur6));
359179490Smlaier	do {
360179490Smlaier		found = 0;
361179490Smlaier		last6 = cur6;
362179490Smlaier		memset(&cur6, 0xff, sizeof(cur6));
363228571Sglebius		CARP_FOREACH_IFA(sc, ifa) {
364142215Sglebius			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
365179490Smlaier			if (IN6_IS_SCOPE_EMBED(&in6))
366179490Smlaier				in6.s6_addr16[1] = 0;
367179490Smlaier			if (ifa->ifa_addr->sa_family == AF_INET6 &&
368179490Smlaier			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
369179490Smlaier			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
370179490Smlaier				cur6 = in6;
371179490Smlaier				found++;
372179490Smlaier			}
373142215Sglebius		}
374179490Smlaier		if (found)
375179490Smlaier			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
376179490Smlaier	} while (found);
377142215Sglebius#endif /* INET6 */
378142215Sglebius
379142215Sglebius	/* convert ipad to opad */
380142215Sglebius	for (i = 0; i < sizeof(sc->sc_pad); i++)
381142215Sglebius		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
382142215Sglebius}
383142215Sglebius
384142559Sglebiusstatic void
385228571Sglebiuscarp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
386142215Sglebius    unsigned char md[20])
387142215Sglebius{
388142215Sglebius	SHA1_CTX sha1ctx;
389142215Sglebius
390228571Sglebius	CARP_LOCK_ASSERT(sc);
391228571Sglebius
392142215Sglebius	/* fetch first half of inner hash */
393142215Sglebius	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
394142215Sglebius
395142215Sglebius	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
396142215Sglebius	SHA1Final(md, &sha1ctx);
397142215Sglebius
398142215Sglebius	/* outer hash */
399142215Sglebius	SHA1Init(&sha1ctx);
400142215Sglebius	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
401142215Sglebius	SHA1Update(&sha1ctx, md, 20);
402142215Sglebius	SHA1Final(md, &sha1ctx);
403142215Sglebius}
404142215Sglebius
405142559Sglebiusstatic int
406228571Sglebiuscarp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
407142215Sglebius    unsigned char md[20])
408142215Sglebius{
409142215Sglebius	unsigned char md2[20];
410142215Sglebius
411228571Sglebius	CARP_LOCK_ASSERT(sc);
412142914Sglebius
413142215Sglebius	carp_hmac_generate(sc, counter, md2);
414142215Sglebius
415142215Sglebius	return (bcmp(md, md2, sizeof(md2)));
416142215Sglebius}
417142215Sglebius
418166423Sglebius/*
419142215Sglebius * process input packet.
420142215Sglebius * we have rearranged checks order compared to the rfc,
421142215Sglebius * but it seems more efficient this way or not possible otherwise.
422142215Sglebius */
423221130Sbz#ifdef INET
424142215Sglebiusvoid
425142215Sglebiuscarp_input(struct mbuf *m, int hlen)
426142215Sglebius{
427142215Sglebius	struct ip *ip = mtod(m, struct ip *);
428142215Sglebius	struct carp_header *ch;
429142215Sglebius	int iplen, len;
430142215Sglebius
431190968Srwatson	CARPSTATS_INC(carps_ipackets);
432142215Sglebius
433228736Sglebius	if (!carp_allow) {
434142215Sglebius		m_freem(m);
435142215Sglebius		return;
436142215Sglebius	}
437142215Sglebius
438142215Sglebius	/* verify that the IP TTL is 255.  */
439142215Sglebius	if (ip->ip_ttl != CARP_DFLTTL) {
440190968Srwatson		CARPSTATS_INC(carps_badttl);
441228571Sglebius		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
442142446Sglebius		    ip->ip_ttl,
443142446Sglebius		    m->m_pkthdr.rcvif->if_xname);
444142215Sglebius		m_freem(m);
445142215Sglebius		return;
446142215Sglebius	}
447142215Sglebius
448142215Sglebius	iplen = ip->ip_hl << 2;
449142215Sglebius
450142215Sglebius	if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
451190968Srwatson		CARPSTATS_INC(carps_badlen);
452228571Sglebius		CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
453228571Sglebius		    "on %s\n", __func__, m->m_len - sizeof(struct ip),
454195976Sdelphij		    m->m_pkthdr.rcvif->if_xname);
455142215Sglebius		m_freem(m);
456142215Sglebius		return;
457142215Sglebius	}
458142215Sglebius
459142215Sglebius	if (iplen + sizeof(*ch) < m->m_len) {
460142215Sglebius		if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
461190968Srwatson			CARPSTATS_INC(carps_hdrops);
462228571Sglebius			CARP_DEBUG("%s: pullup failed\n", __func__);
463142215Sglebius			return;
464142215Sglebius		}
465142215Sglebius		ip = mtod(m, struct ip *);
466142215Sglebius	}
467142215Sglebius	ch = (struct carp_header *)((char *)ip + iplen);
468142215Sglebius
469142215Sglebius	/*
470142215Sglebius	 * verify that the received packet length is
471142215Sglebius	 * equal to the CARP header
472142215Sglebius	 */
473142215Sglebius	len = iplen + sizeof(*ch);
474142215Sglebius	if (len > m->m_pkthdr.len) {
475190968Srwatson		CARPSTATS_INC(carps_badlen);
476228571Sglebius		CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
477142446Sglebius		    m->m_pkthdr.len,
478142446Sglebius		    m->m_pkthdr.rcvif->if_xname);
479142215Sglebius		m_freem(m);
480142215Sglebius		return;
481142215Sglebius	}
482142215Sglebius
483142215Sglebius	if ((m = m_pullup(m, len)) == NULL) {
484190968Srwatson		CARPSTATS_INC(carps_hdrops);
485142215Sglebius		return;
486142215Sglebius	}
487142215Sglebius	ip = mtod(m, struct ip *);
488142215Sglebius	ch = (struct carp_header *)((char *)ip + iplen);
489142215Sglebius
490142215Sglebius	/* verify the CARP checksum */
491142215Sglebius	m->m_data += iplen;
492244683Sglebius	if (in_cksum(m, len - iplen)) {
493190968Srwatson		CARPSTATS_INC(carps_badsum);
494228571Sglebius		CARP_DEBUG("%s: checksum failed on %s\n", __func__,
495142446Sglebius		    m->m_pkthdr.rcvif->if_xname);
496142215Sglebius		m_freem(m);
497142215Sglebius		return;
498142215Sglebius	}
499142215Sglebius	m->m_data -= iplen;
500142215Sglebius
501142446Sglebius	carp_input_c(m, ch, AF_INET);
502142215Sglebius}
503221130Sbz#endif
504142215Sglebius
505142215Sglebius#ifdef INET6
506142215Sglebiusint
507142215Sglebiuscarp6_input(struct mbuf **mp, int *offp, int proto)
508142215Sglebius{
509142215Sglebius	struct mbuf *m = *mp;
510142215Sglebius	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
511142215Sglebius	struct carp_header *ch;
512142215Sglebius	u_int len;
513142215Sglebius
514190968Srwatson	CARPSTATS_INC(carps_ipackets6);
515142215Sglebius
516228736Sglebius	if (!carp_allow) {
517142215Sglebius		m_freem(m);
518142215Sglebius		return (IPPROTO_DONE);
519142215Sglebius	}
520142215Sglebius
521142215Sglebius	/* check if received on a valid carp interface */
522142215Sglebius	if (m->m_pkthdr.rcvif->if_carp == NULL) {
523190968Srwatson		CARPSTATS_INC(carps_badif);
524228571Sglebius		CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
525228571Sglebius		    __func__, m->m_pkthdr.rcvif->if_xname);
526142215Sglebius		m_freem(m);
527142215Sglebius		return (IPPROTO_DONE);
528142215Sglebius	}
529142215Sglebius
530142215Sglebius	/* verify that the IP TTL is 255 */
531142215Sglebius	if (ip6->ip6_hlim != CARP_DFLTTL) {
532190968Srwatson		CARPSTATS_INC(carps_badttl);
533228571Sglebius		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
534228571Sglebius		    ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
535142215Sglebius		m_freem(m);
536142215Sglebius		return (IPPROTO_DONE);
537142215Sglebius	}
538142215Sglebius
539142215Sglebius	/* verify that we have a complete carp packet */
540142215Sglebius	len = m->m_len;
541142215Sglebius	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
542142215Sglebius	if (ch == NULL) {
543190968Srwatson		CARPSTATS_INC(carps_badlen);
544228571Sglebius		CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
545142215Sglebius		return (IPPROTO_DONE);
546142215Sglebius	}
547142215Sglebius
548142215Sglebius
549142215Sglebius	/* verify the CARP checksum */
550142215Sglebius	m->m_data += *offp;
551244683Sglebius	if (in_cksum(m, sizeof(*ch))) {
552190968Srwatson		CARPSTATS_INC(carps_badsum);
553228571Sglebius		CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
554142446Sglebius		    m->m_pkthdr.rcvif->if_xname);
555142215Sglebius		m_freem(m);
556142215Sglebius		return (IPPROTO_DONE);
557142215Sglebius	}
558142215Sglebius	m->m_data -= *offp;
559142215Sglebius
560142446Sglebius	carp_input_c(m, ch, AF_INET6);
561142215Sglebius	return (IPPROTO_DONE);
562142215Sglebius}
563142215Sglebius#endif /* INET6 */
564142215Sglebius
565142559Sglebiusstatic void
566142446Sglebiuscarp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
567142215Sglebius{
568142215Sglebius	struct ifnet *ifp = m->m_pkthdr.rcvif;
569228571Sglebius	struct ifaddr *ifa;
570142446Sglebius	struct carp_softc *sc;
571228571Sglebius	uint64_t tmp_counter;
572142215Sglebius	struct timeval sc_tv, ch_tv;
573142215Sglebius
574142215Sglebius	/* verify that the VHID is valid on the receiving interface */
575229621Sjhb	IF_ADDR_RLOCK(ifp);
576228571Sglebius	IFNET_FOREACH_IFA(ifp, ifa)
577228571Sglebius		if (ifa->ifa_addr->sa_family == af &&
578228571Sglebius		    ifa->ifa_carp->sc_vhid == ch->carp_vhid) {
579228571Sglebius			ifa_ref(ifa);
580142215Sglebius			break;
581228571Sglebius		}
582229621Sjhb	IF_ADDR_RUNLOCK(ifp);
583142914Sglebius
584228571Sglebius	if (ifa == NULL) {
585190968Srwatson		CARPSTATS_INC(carps_badvhid);
586142215Sglebius		m_freem(m);
587142215Sglebius		return;
588142215Sglebius	}
589142215Sglebius
590142215Sglebius	/* verify the CARP version. */
591142215Sglebius	if (ch->carp_version != CARP_VERSION) {
592190968Srwatson		CARPSTATS_INC(carps_badver);
593228571Sglebius		CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
594142446Sglebius		    ch->carp_version);
595228571Sglebius		ifa_free(ifa);
596142215Sglebius		m_freem(m);
597142215Sglebius		return;
598142215Sglebius	}
599142215Sglebius
600228571Sglebius	sc = ifa->ifa_carp;
601228571Sglebius	CARP_LOCK(sc);
602228571Sglebius	ifa_free(ifa);
603228571Sglebius
604142215Sglebius	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
605190968Srwatson		CARPSTATS_INC(carps_badauth);
606228571Sglebius		CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
607228571Sglebius		    sc->sc_vhid, ifp->if_xname);
608228571Sglebius		goto out;
609142215Sglebius	}
610142215Sglebius
611142215Sglebius	tmp_counter = ntohl(ch->carp_counter[0]);
612142215Sglebius	tmp_counter = tmp_counter<<32;
613142215Sglebius	tmp_counter += ntohl(ch->carp_counter[1]);
614142215Sglebius
615142215Sglebius	/* XXX Replay protection goes here */
616142215Sglebius
617142215Sglebius	sc->sc_init_counter = 0;
618142215Sglebius	sc->sc_counter = tmp_counter;
619142215Sglebius
620142215Sglebius	sc_tv.tv_sec = sc->sc_advbase;
621228736Sglebius	sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
622142215Sglebius	ch_tv.tv_sec = ch->carp_advbase;
623142215Sglebius	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
624142215Sglebius
625142215Sglebius	switch (sc->sc_state) {
626142215Sglebius	case INIT:
627142215Sglebius		break;
628142215Sglebius	case MASTER:
629142215Sglebius		/*
630142215Sglebius		 * If we receive an advertisement from a master who's going to
631142215Sglebius		 * be more frequent than us, go into BACKUP state.
632142215Sglebius		 */
633142215Sglebius		if (timevalcmp(&sc_tv, &ch_tv, >) ||
634142215Sglebius		    timevalcmp(&sc_tv, &ch_tv, ==)) {
635142215Sglebius			callout_stop(&sc->sc_ad_tmo);
636228571Sglebius			CARP_LOG("VHID %u@%s: MASTER -> BACKUP "
637228571Sglebius			    "(more frequent advertisement received)\n",
638228571Sglebius			    sc->sc_vhid,
639228571Sglebius			    sc->sc_carpdev->if_xname);
640142215Sglebius			carp_set_state(sc, BACKUP);
641142215Sglebius			carp_setrun(sc, 0);
642228571Sglebius			carp_delroute(sc);
643142215Sglebius		}
644142215Sglebius		break;
645142215Sglebius	case BACKUP:
646142215Sglebius		/*
647142215Sglebius		 * If we're pre-empting masters who advertise slower than us,
648142215Sglebius		 * and this one claims to be slower, treat him as down.
649142215Sglebius		 */
650228736Sglebius		if (carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
651228571Sglebius			CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
652142452Sglebius			    "(preempting a slower master)\n",
653228571Sglebius			    sc->sc_vhid,
654228571Sglebius			    sc->sc_carpdev->if_xname);
655142914Sglebius			carp_master_down_locked(sc);
656142215Sglebius			break;
657142215Sglebius		}
658142215Sglebius
659142215Sglebius		/*
660142215Sglebius		 *  If the master is going to advertise at such a low frequency
661142215Sglebius		 *  that he's guaranteed to time out, we'd might as well just
662142215Sglebius		 *  treat him as timed out now.
663142215Sglebius		 */
664142215Sglebius		sc_tv.tv_sec = sc->sc_advbase * 3;
665142215Sglebius		if (timevalcmp(&sc_tv, &ch_tv, <)) {
666228571Sglebius			CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
667142452Sglebius			    "(master timed out)\n",
668228571Sglebius			    sc->sc_vhid,
669228571Sglebius			    sc->sc_carpdev->if_xname);
670142914Sglebius			carp_master_down_locked(sc);
671142215Sglebius			break;
672142215Sglebius		}
673142215Sglebius
674142215Sglebius		/*
675142215Sglebius		 * Otherwise, we reset the counter and wait for the next
676142215Sglebius		 * advertisement.
677142215Sglebius		 */
678142215Sglebius		carp_setrun(sc, af);
679142215Sglebius		break;
680142215Sglebius	}
681142215Sglebius
682228571Sglebiusout:
683228571Sglebius	CARP_UNLOCK(sc);
684142215Sglebius	m_freem(m);
685142215Sglebius}
686142215Sglebius
687142559Sglebiusstatic int
688142215Sglebiuscarp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
689142215Sglebius{
690142215Sglebius	struct m_tag *mtag;
691142215Sglebius
692142215Sglebius	if (sc->sc_init_counter) {
693142215Sglebius		/* this could also be seconds since unix epoch */
694142215Sglebius		sc->sc_counter = arc4random();
695142215Sglebius		sc->sc_counter = sc->sc_counter << 32;
696142215Sglebius		sc->sc_counter += arc4random();
697142215Sglebius	} else
698142215Sglebius		sc->sc_counter++;
699142215Sglebius
700142215Sglebius	ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
701142215Sglebius	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
702142215Sglebius
703142215Sglebius	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
704142215Sglebius
705142215Sglebius	/* Tag packet for carp_output */
706228571Sglebius	if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
707228571Sglebius	    M_NOWAIT)) == NULL) {
708142215Sglebius		m_freem(m);
709228571Sglebius		CARPSTATS_INC(carps_onomem);
710142215Sglebius		return (ENOMEM);
711142215Sglebius	}
712236310Sglebius	bcopy(&sc, mtag + 1, sizeof(sc));
713142215Sglebius	m_tag_prepend(m, mtag);
714142215Sglebius
715142215Sglebius	return (0);
716142215Sglebius}
717142215Sglebius
718228736Sglebius/*
719228736Sglebius * To avoid LORs and possible recursions this function shouldn't
720228736Sglebius * be called directly, but scheduled via taskqueue.
721228736Sglebius */
722142559Sglebiusstatic void
723228736Sglebiuscarp_send_ad_all(void *ctx __unused, int pending __unused)
724142215Sglebius{
725142911Sglebius	struct carp_softc *sc;
726142215Sglebius
727142911Sglebius	mtx_lock(&carp_mtx);
728228571Sglebius	LIST_FOREACH(sc, &carp_list, sc_next)
729228736Sglebius		if (sc->sc_state == MASTER) {
730228571Sglebius			CARP_LOCK(sc);
731231201Sglebius			CURVNET_SET(sc->sc_carpdev->if_vnet);
732142914Sglebius			carp_send_ad_locked(sc);
733231201Sglebius			CURVNET_RESTORE();
734228571Sglebius			CARP_UNLOCK(sc);
735228571Sglebius		}
736142911Sglebius	mtx_unlock(&carp_mtx);
737142215Sglebius}
738142215Sglebius
739231201Sglebius/* Send a periodic advertisement, executed in callout context. */
740142559Sglebiusstatic void
741142215Sglebiuscarp_send_ad(void *v)
742142215Sglebius{
743142914Sglebius	struct carp_softc *sc = v;
744142914Sglebius
745228571Sglebius	CARP_LOCK_ASSERT(sc);
746231201Sglebius	CURVNET_SET(sc->sc_carpdev->if_vnet);
747142914Sglebius	carp_send_ad_locked(sc);
748231201Sglebius	CURVNET_RESTORE();
749228571Sglebius	CARP_UNLOCK(sc);
750142914Sglebius}
751142914Sglebius
752142914Sglebiusstatic void
753142914Sglebiuscarp_send_ad_locked(struct carp_softc *sc)
754142914Sglebius{
755142215Sglebius	struct carp_header ch;
756142215Sglebius	struct timeval tv;
757228571Sglebius	struct sockaddr sa;
758228571Sglebius	struct ifaddr *ifa;
759142215Sglebius	struct carp_header *ch_ptr;
760142215Sglebius	struct mbuf *m;
761228571Sglebius	int len, advskew;
762142215Sglebius
763228571Sglebius	CARP_LOCK_ASSERT(sc);
764142914Sglebius
765228736Sglebius	advskew = DEMOTE_ADVSKEW(sc);
766228571Sglebius	tv.tv_sec = sc->sc_advbase;
767228571Sglebius	tv.tv_usec = advskew * 1000000 / 256;
768142215Sglebius
769142215Sglebius	ch.carp_version = CARP_VERSION;
770142215Sglebius	ch.carp_type = CARP_ADVERTISEMENT;
771142215Sglebius	ch.carp_vhid = sc->sc_vhid;
772228571Sglebius	ch.carp_advbase = sc->sc_advbase;
773142215Sglebius	ch.carp_advskew = advskew;
774142215Sglebius	ch.carp_authlen = 7;	/* XXX DEFINE */
775142215Sglebius	ch.carp_pad1 = 0;	/* must be zero */
776142215Sglebius	ch.carp_cksum = 0;
777142215Sglebius
778228571Sglebius	/* XXXGL: OpenBSD picks first ifaddr with needed family. */
779228571Sglebius
780142215Sglebius#ifdef INET
781228571Sglebius	if (sc->sc_naddrs) {
782142215Sglebius		struct ip *ip;
783142215Sglebius
784248324Sglebius		m = m_gethdr(M_NOWAIT, MT_DATA);
785142215Sglebius		if (m == NULL) {
786190968Srwatson			CARPSTATS_INC(carps_onomem);
787241043Sglebius			goto resched;
788142215Sglebius		}
789142215Sglebius		len = sizeof(*ip) + sizeof(ch);
790142215Sglebius		m->m_pkthdr.len = len;
791142215Sglebius		m->m_pkthdr.rcvif = NULL;
792142215Sglebius		m->m_len = len;
793142215Sglebius		MH_ALIGN(m, m->m_len);
794142215Sglebius		m->m_flags |= M_MCAST;
795142215Sglebius		ip = mtod(m, struct ip *);
796142215Sglebius		ip->ip_v = IPVERSION;
797142215Sglebius		ip->ip_hl = sizeof(*ip) >> 2;
798142215Sglebius		ip->ip_tos = IPTOS_LOWDELAY;
799241913Sglebius		ip->ip_len = htons(len);
800142215Sglebius		ip->ip_id = ip_newid();
801241913Sglebius		ip->ip_off = htons(IP_DF);
802142215Sglebius		ip->ip_ttl = CARP_DFLTTL;
803142215Sglebius		ip->ip_p = IPPROTO_CARP;
804142215Sglebius		ip->ip_sum = 0;
805228571Sglebius
806228571Sglebius		bzero(&sa, sizeof(sa));
807228571Sglebius		sa.sa_family = AF_INET;
808228571Sglebius		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
809228571Sglebius		if (ifa != NULL) {
810228571Sglebius			ip->ip_src.s_addr =
811228571Sglebius			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
812228571Sglebius			ifa_free(ifa);
813228571Sglebius		} else
814228571Sglebius			ip->ip_src.s_addr = 0;
815142215Sglebius		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
816142215Sglebius
817142215Sglebius		ch_ptr = (struct carp_header *)(&ip[1]);
818142215Sglebius		bcopy(&ch, ch_ptr, sizeof(ch));
819142215Sglebius		if (carp_prepare_ad(m, sc, ch_ptr))
820241043Sglebius			goto resched;
821142215Sglebius
822142215Sglebius		m->m_data += sizeof(*ip);
823244683Sglebius		ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
824142215Sglebius		m->m_data -= sizeof(*ip);
825142215Sglebius
826190968Srwatson		CARPSTATS_INC(carps_opackets);
827142215Sglebius
828228571Sglebius		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT,
829228571Sglebius		    &sc->sc_carpdev->if_carp->cif_imo, NULL)) {
830142215Sglebius			if (sc->sc_sendad_errors < INT_MAX)
831142215Sglebius				sc->sc_sendad_errors++;
832228736Sglebius			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS)
833228736Sglebius				carp_demote_adj(carp_senderr_adj, "send error");
834142215Sglebius			sc->sc_sendad_success = 0;
835142215Sglebius		} else {
836142215Sglebius			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
837142215Sglebius				if (++sc->sc_sendad_success >=
838142215Sglebius				    CARP_SENDAD_MIN_SUCCESS) {
839228736Sglebius					carp_demote_adj(-carp_senderr_adj,
840228736Sglebius					    "send ok");
841142215Sglebius					sc->sc_sendad_errors = 0;
842142215Sglebius				}
843142215Sglebius			} else
844142215Sglebius				sc->sc_sendad_errors = 0;
845142215Sglebius		}
846142215Sglebius	}
847142215Sglebius#endif /* INET */
848142215Sglebius#ifdef INET6
849228571Sglebius	if (sc->sc_naddrs6) {
850142215Sglebius		struct ip6_hdr *ip6;
851142215Sglebius
852248324Sglebius		m = m_gethdr(M_NOWAIT, MT_DATA);
853142215Sglebius		if (m == NULL) {
854190968Srwatson			CARPSTATS_INC(carps_onomem);
855241043Sglebius			goto resched;
856142215Sglebius		}
857142215Sglebius		len = sizeof(*ip6) + sizeof(ch);
858142215Sglebius		m->m_pkthdr.len = len;
859142215Sglebius		m->m_pkthdr.rcvif = NULL;
860142215Sglebius		m->m_len = len;
861142215Sglebius		MH_ALIGN(m, m->m_len);
862142215Sglebius		m->m_flags |= M_MCAST;
863142215Sglebius		ip6 = mtod(m, struct ip6_hdr *);
864142215Sglebius		bzero(ip6, sizeof(*ip6));
865142215Sglebius		ip6->ip6_vfc |= IPV6_VERSION;
866142215Sglebius		ip6->ip6_hlim = CARP_DFLTTL;
867142215Sglebius		ip6->ip6_nxt = IPPROTO_CARP;
868228571Sglebius		bzero(&sa, sizeof(sa));
869142215Sglebius
870228571Sglebius		/* set the source address */
871228571Sglebius		sa.sa_family = AF_INET6;
872228571Sglebius		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
873228571Sglebius		if (ifa != NULL) {
874228571Sglebius			bcopy(IFA_IN6(ifa), &ip6->ip6_src,
875228571Sglebius			    sizeof(struct in6_addr));
876228571Sglebius			ifa_free(ifa);
877228571Sglebius		} else
878228571Sglebius			/* This should never happen with IPv6. */
879228571Sglebius			bzero(&ip6->ip6_src, sizeof(struct in6_addr));
880228571Sglebius
881228571Sglebius		/* Set the multicast destination. */
882163069Sbz		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
883142215Sglebius		ip6->ip6_dst.s6_addr8[15] = 0x12;
884163069Sbz		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
885163069Sbz			m_freem(m);
886200026Sglebius			CARP_DEBUG("%s: in6_setscope failed\n", __func__);
887241043Sglebius			goto resched;
888163069Sbz		}
889142215Sglebius
890142215Sglebius		ch_ptr = (struct carp_header *)(&ip6[1]);
891142215Sglebius		bcopy(&ch, ch_ptr, sizeof(ch));
892142215Sglebius		if (carp_prepare_ad(m, sc, ch_ptr))
893241043Sglebius			goto resched;
894142215Sglebius
895142215Sglebius		m->m_data += sizeof(*ip6);
896244683Sglebius		ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
897142215Sglebius		m->m_data -= sizeof(*ip6);
898142215Sglebius
899190968Srwatson		CARPSTATS_INC(carps_opackets6);
900142215Sglebius
901228571Sglebius		if (ip6_output(m, NULL, NULL, 0,
902228571Sglebius		    &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)) {
903142215Sglebius			if (sc->sc_sendad_errors < INT_MAX)
904142215Sglebius				sc->sc_sendad_errors++;
905228736Sglebius			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS)
906228736Sglebius				carp_demote_adj(carp_senderr_adj,
907228736Sglebius				    "send6 error");
908142215Sglebius			sc->sc_sendad_success = 0;
909142215Sglebius		} else {
910142215Sglebius			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
911142215Sglebius				if (++sc->sc_sendad_success >=
912142215Sglebius				    CARP_SENDAD_MIN_SUCCESS) {
913228736Sglebius					carp_demote_adj(-carp_senderr_adj,
914228736Sglebius					    "send6 ok");
915142215Sglebius					sc->sc_sendad_errors = 0;
916142215Sglebius				}
917142215Sglebius			} else
918142215Sglebius				sc->sc_sendad_errors = 0;
919142215Sglebius		}
920142215Sglebius	}
921142215Sglebius#endif /* INET6 */
922142215Sglebius
923241043Sglebiusresched:
924228571Sglebius	callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
925228571Sglebius}
926142215Sglebius
927228571Sglebiusstatic void
928228571Sglebiuscarp_addroute(struct carp_softc *sc)
929228571Sglebius{
930228571Sglebius	struct ifaddr *ifa;
931228571Sglebius
932228571Sglebius	CARP_FOREACH_IFA(sc, ifa)
933230863Sglebius		carp_ifa_addroute(ifa);
934230863Sglebius}
935230863Sglebius
936230863Sglebiusstatic void
937230863Sglebiuscarp_ifa_addroute(struct ifaddr *ifa)
938230863Sglebius{
939230863Sglebius
940230863Sglebius	switch (ifa->ifa_addr->sa_family) {
941228571Sglebius#ifdef INET
942230863Sglebius	case AF_INET:
943230863Sglebius		in_addprefix(ifatoia(ifa), RTF_UP);
944230863Sglebius		ifa_add_loopback_route(ifa,
945230863Sglebius		    (struct sockaddr *)&ifatoia(ifa)->ia_addr);
946230863Sglebius		break;
947228571Sglebius#endif
948228571Sglebius#ifdef INET6
949230863Sglebius	case AF_INET6:
950230863Sglebius		ifa_add_loopback_route(ifa,
951230863Sglebius		    (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
952230863Sglebius		in6_ifaddloop(ifa);
953230863Sglebius		break;
954228571Sglebius#endif
955230863Sglebius	}
956142215Sglebius}
957142215Sglebius
958228571Sglebiusstatic void
959228571Sglebiuscarp_delroute(struct carp_softc *sc)
960228571Sglebius{
961228571Sglebius	struct ifaddr *ifa;
962228571Sglebius
963228571Sglebius	CARP_FOREACH_IFA(sc, ifa)
964230863Sglebius		carp_ifa_delroute(ifa);
965230863Sglebius}
966230863Sglebius
967230863Sglebiusstatic void
968230863Sglebiuscarp_ifa_delroute(struct ifaddr *ifa)
969230863Sglebius{
970230863Sglebius
971230863Sglebius	switch (ifa->ifa_addr->sa_family) {
972221130Sbz#ifdef INET
973230863Sglebius	case AF_INET:
974230863Sglebius		ifa_del_loopback_route(ifa,
975230863Sglebius		    (struct sockaddr *)&ifatoia(ifa)->ia_addr);
976230863Sglebius		in_scrubprefix(ifatoia(ifa), LLE_STATIC);
977230863Sglebius		break;
978228571Sglebius#endif
979228571Sglebius#ifdef INET6
980230863Sglebius	case AF_INET6:
981230863Sglebius		ifa_del_loopback_route(ifa,
982230863Sglebius		    (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
983230863Sglebius		in6_ifremloop(ifa);
984230863Sglebius		break;
985228571Sglebius#endif
986230863Sglebius	}
987228571Sglebius}
988228571Sglebius
989246659Sglebiusint
990246659Sglebiuscarp_master(struct ifaddr *ifa)
991246659Sglebius{
992246659Sglebius	struct carp_softc *sc = ifa->ifa_carp;
993246659Sglebius
994246659Sglebius	return (sc->sc_state == MASTER);
995246659Sglebius}
996246659Sglebius
997228571Sglebius#ifdef INET
998142215Sglebius/*
999142215Sglebius * Broadcast a gratuitous ARP request containing
1000142215Sglebius * the virtual router MAC address for each IP address
1001142215Sglebius * associated with the virtual router.
1002142215Sglebius */
1003142559Sglebiusstatic void
1004142215Sglebiuscarp_send_arp(struct carp_softc *sc)
1005142215Sglebius{
1006142215Sglebius	struct ifaddr *ifa;
1007142215Sglebius
1008228571Sglebius	CARP_FOREACH_IFA(sc, ifa)
1009228571Sglebius		if (ifa->ifa_addr->sa_family == AF_INET)
1010228571Sglebius			arp_ifinit2(sc->sc_carpdev, ifa, LLADDR(&sc->sc_addr));
1011228571Sglebius}
1012142215Sglebius
1013228571Sglebiusint
1014228571Sglebiuscarp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
1015228571Sglebius{
1016228571Sglebius	struct carp_softc *sc = ifa->ifa_carp;
1017142215Sglebius
1018228571Sglebius	if (sc->sc_state == MASTER) {
1019228571Sglebius		*enaddr = LLADDR(&sc->sc_addr);
1020228571Sglebius		return (1);
1021228571Sglebius	}
1022142215Sglebius
1023228571Sglebius	return (0);
1024142215Sglebius}
1025221130Sbz#endif
1026142215Sglebius
1027142215Sglebius#ifdef INET6
1028142559Sglebiusstatic void
1029142215Sglebiuscarp_send_na(struct carp_softc *sc)
1030142215Sglebius{
1031228571Sglebius	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1032142215Sglebius	struct ifaddr *ifa;
1033142215Sglebius	struct in6_addr *in6;
1034142215Sglebius
1035228571Sglebius	CARP_FOREACH_IFA(sc, ifa) {
1036142215Sglebius		if (ifa->ifa_addr->sa_family != AF_INET6)
1037142215Sglebius			continue;
1038142215Sglebius
1039228571Sglebius		in6 = IFA_IN6(ifa);
1040142564Sglebius		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1041142215Sglebius		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1042142215Sglebius		DELAY(1000);	/* XXX */
1043142215Sglebius	}
1044142215Sglebius}
1045142215Sglebius
1046238769Sbz/*
1047238769Sbz * Returns ifa in case it's a carp address and it is MASTER, or if the address
1048238769Sbz * matches and is not a carp address.  Returns NULL otherwise.
1049238769Sbz */
1050142641Smlaierstruct ifaddr *
1051211157Swillcarp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
1052142215Sglebius{
1053142215Sglebius	struct ifaddr *ifa;
1054142215Sglebius
1055238769Sbz	ifa = NULL;
1056229621Sjhb	IF_ADDR_RLOCK(ifp);
1057238769Sbz	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1058238769Sbz		if (ifa->ifa_addr->sa_family != AF_INET6)
1059238769Sbz			continue;
1060238769Sbz		if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
1061238769Sbz			continue;
1062238769Sbz		if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
1063238769Sbz			ifa = NULL;
1064238769Sbz		else
1065228571Sglebius			ifa_ref(ifa);
1066238769Sbz		break;
1067238769Sbz	}
1068229621Sjhb	IF_ADDR_RUNLOCK(ifp);
1069228571Sglebius
1070238769Sbz	return (ifa);
1071142215Sglebius}
1072142215Sglebius
1073211157Swillcaddr_t
1074211157Swillcarp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
1075142215Sglebius{
1076142215Sglebius	struct ifaddr *ifa;
1077142215Sglebius
1078229621Sjhb	IF_ADDR_RLOCK(ifp);
1079228571Sglebius	IFNET_FOREACH_IFA(ifp, ifa)
1080228571Sglebius		if (ifa->ifa_addr->sa_family == AF_INET6 &&
1081228571Sglebius		    IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
1082228571Sglebius			struct carp_softc *sc = ifa->ifa_carp;
1083228571Sglebius			struct m_tag *mtag;
1084142215Sglebius
1085229621Sjhb			IF_ADDR_RUNLOCK(ifp);
1086228571Sglebius
1087228571Sglebius			mtag = m_tag_get(PACKET_TAG_CARP,
1088236297Sglebius			    sizeof(struct carp_softc *), M_NOWAIT);
1089228571Sglebius			if (mtag == NULL)
1090228571Sglebius				/* Better a bit than nothing. */
1091228571Sglebius				return (LLADDR(&sc->sc_addr));
1092228571Sglebius
1093236310Sglebius			bcopy(&sc, mtag + 1, sizeof(sc));
1094228571Sglebius			m_tag_prepend(m, mtag);
1095228571Sglebius
1096228571Sglebius			return (LLADDR(&sc->sc_addr));
1097142215Sglebius		}
1098229621Sjhb	IF_ADDR_RUNLOCK(ifp);
1099142215Sglebius
1100142215Sglebius	return (NULL);
1101142215Sglebius}
1102228571Sglebius#endif /* INET6 */
1103142215Sglebius
1104228571Sglebiusint
1105211157Swillcarp_forus(struct ifnet *ifp, u_char *dhost)
1106142215Sglebius{
1107228571Sglebius	struct carp_softc *sc;
1108228571Sglebius	uint8_t *ena = dhost;
1109142215Sglebius
1110142215Sglebius	if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1111228571Sglebius		return (0);
1112142215Sglebius
1113228571Sglebius	CIF_LOCK(ifp->if_carp);
1114228571Sglebius	IFNET_FOREACH_CARP(ifp, sc) {
1115228571Sglebius		CARP_LOCK(sc);
1116228571Sglebius		if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
1117228571Sglebius		    ETHER_ADDR_LEN)) {
1118228571Sglebius			CARP_UNLOCK(sc);
1119228571Sglebius			CIF_UNLOCK(ifp->if_carp);
1120228571Sglebius			return (1);
1121142215Sglebius		}
1122228571Sglebius		CARP_UNLOCK(sc);
1123228571Sglebius	}
1124228571Sglebius	CIF_UNLOCK(ifp->if_carp);
1125142215Sglebius
1126228571Sglebius	return (0);
1127142215Sglebius}
1128142215Sglebius
1129231201Sglebius/* Master down timeout event, executed in callout context. */
1130142559Sglebiusstatic void
1131142215Sglebiuscarp_master_down(void *v)
1132142215Sglebius{
1133142215Sglebius	struct carp_softc *sc = v;
1134142215Sglebius
1135228571Sglebius	CARP_LOCK_ASSERT(sc);
1136228571Sglebius
1137231201Sglebius	CURVNET_SET(sc->sc_carpdev->if_vnet);
1138228571Sglebius	if (sc->sc_state == BACKUP) {
1139228812Sglebius		CARP_LOG("VHID %u@%s: BACKUP -> MASTER (master down)\n",
1140228571Sglebius		    sc->sc_vhid,
1141228571Sglebius		    sc->sc_carpdev->if_xname);
1142228571Sglebius		carp_master_down_locked(sc);
1143228571Sglebius	}
1144231201Sglebius	CURVNET_RESTORE();
1145228571Sglebius
1146228571Sglebius	CARP_UNLOCK(sc);
1147142914Sglebius}
1148142914Sglebius
1149142914Sglebiusstatic void
1150142914Sglebiuscarp_master_down_locked(struct carp_softc *sc)
1151142914Sglebius{
1152142914Sglebius
1153228571Sglebius	CARP_LOCK_ASSERT(sc);
1154228571Sglebius
1155142215Sglebius	switch (sc->sc_state) {
1156142215Sglebius	case BACKUP:
1157142215Sglebius		carp_set_state(sc, MASTER);
1158142914Sglebius		carp_send_ad_locked(sc);
1159221130Sbz#ifdef INET
1160142215Sglebius		carp_send_arp(sc);
1161221130Sbz#endif
1162142215Sglebius#ifdef INET6
1163142215Sglebius		carp_send_na(sc);
1164228571Sglebius#endif
1165142215Sglebius		carp_setrun(sc, 0);
1166228571Sglebius		carp_addroute(sc);
1167142215Sglebius		break;
1168228571Sglebius	case INIT:
1169228571Sglebius	case MASTER:
1170228571Sglebius#ifdef INVARIANTS
1171228571Sglebius		panic("carp: VHID %u@%s: master_down event in %s state\n",
1172228571Sglebius		    sc->sc_vhid,
1173228571Sglebius		    sc->sc_carpdev->if_xname,
1174228571Sglebius		    sc->sc_state ? "MASTER" : "INIT");
1175228571Sglebius#endif
1176228571Sglebius		break;
1177142215Sglebius	}
1178142215Sglebius}
1179142215Sglebius
1180142215Sglebius/*
1181142215Sglebius * When in backup state, af indicates whether to reset the master down timer
1182142215Sglebius * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1183142215Sglebius */
1184142559Sglebiusstatic void
1185142215Sglebiuscarp_setrun(struct carp_softc *sc, sa_family_t af)
1186142215Sglebius{
1187142215Sglebius	struct timeval tv;
1188142215Sglebius
1189228571Sglebius	CARP_LOCK_ASSERT(sc);
1190142914Sglebius
1191228571Sglebius	if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
1192228571Sglebius	    sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
1193228571Sglebius	    (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0))
1194142215Sglebius		return;
1195142215Sglebius
1196142215Sglebius	switch (sc->sc_state) {
1197142215Sglebius	case INIT:
1198228571Sglebius		CARP_LOG("VHID %u@%s: INIT -> BACKUP\n",
1199228571Sglebius		    sc->sc_vhid,
1200228571Sglebius		    sc->sc_carpdev->if_xname);
1201226367Sglebius		carp_set_state(sc, BACKUP);
1202226367Sglebius		carp_setrun(sc, 0);
1203142215Sglebius		break;
1204142215Sglebius	case BACKUP:
1205142215Sglebius		callout_stop(&sc->sc_ad_tmo);
1206142215Sglebius		tv.tv_sec = 3 * sc->sc_advbase;
1207142215Sglebius		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1208142215Sglebius		switch (af) {
1209142215Sglebius#ifdef INET
1210142215Sglebius		case AF_INET:
1211142215Sglebius			callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1212142215Sglebius			    carp_master_down, sc);
1213142215Sglebius			break;
1214228571Sglebius#endif
1215142215Sglebius#ifdef INET6
1216142215Sglebius		case AF_INET6:
1217142215Sglebius			callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1218142215Sglebius			    carp_master_down, sc);
1219142215Sglebius			break;
1220228571Sglebius#endif
1221142215Sglebius		default:
1222228571Sglebius#ifdef INET
1223142215Sglebius			if (sc->sc_naddrs)
1224142215Sglebius				callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1225142215Sglebius				    carp_master_down, sc);
1226228571Sglebius#endif
1227228571Sglebius#ifdef INET6
1228142215Sglebius			if (sc->sc_naddrs6)
1229142215Sglebius				callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1230142215Sglebius				    carp_master_down, sc);
1231228571Sglebius#endif
1232142215Sglebius			break;
1233142215Sglebius		}
1234142215Sglebius		break;
1235142215Sglebius	case MASTER:
1236142215Sglebius		tv.tv_sec = sc->sc_advbase;
1237142215Sglebius		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1238142215Sglebius		callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
1239142215Sglebius		    carp_send_ad, sc);
1240142215Sglebius		break;
1241142215Sglebius	}
1242142215Sglebius}
1243142215Sglebius
1244228571Sglebius/*
1245228571Sglebius * Setup multicast structures.
1246228571Sglebius */
1247228571Sglebiusstatic int
1248234130Sglebiuscarp_multicast_setup(struct carp_if *cif, sa_family_t sa)
1249156947Sglebius{
1250234130Sglebius	struct ifnet *ifp = cif->cif_ifp;
1251228571Sglebius	int error = 0;
1252166226Sglebius
1253234130Sglebius	CIF_LOCK_ASSERT(cif);
1254234130Sglebius
1255228571Sglebius	switch (sa) {
1256228571Sglebius#ifdef INET
1257228571Sglebius	case AF_INET:
1258228571Sglebius	    {
1259228571Sglebius		struct ip_moptions *imo = &cif->cif_imo;
1260228571Sglebius		struct in_addr addr;
1261228571Sglebius
1262228571Sglebius		if (imo->imo_membership)
1263228571Sglebius			return (0);
1264228571Sglebius
1265228571Sglebius		imo->imo_membership = (struct in_multi **)malloc(
1266228571Sglebius		    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
1267234130Sglebius		    M_NOWAIT);
1268234130Sglebius		if (imo->imo_membership == NULL)
1269234130Sglebius			return (ENOMEM);
1270228571Sglebius		imo->imo_mfilters = NULL;
1271228571Sglebius		imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
1272228571Sglebius		imo->imo_multicast_vif = -1;
1273228571Sglebius
1274228571Sglebius		addr.s_addr = htonl(INADDR_CARP_GROUP);
1275228571Sglebius		if ((error = in_joingroup(ifp, &addr, NULL,
1276228571Sglebius		    &imo->imo_membership[0])) != 0) {
1277228571Sglebius			free(imo->imo_membership, M_CARP);
1278228571Sglebius			break;
1279156947Sglebius		}
1280228571Sglebius		imo->imo_num_memberships++;
1281228571Sglebius		imo->imo_multicast_ifp = ifp;
1282228571Sglebius		imo->imo_multicast_ttl = CARP_DFLTTL;
1283228571Sglebius		imo->imo_multicast_loop = 0;
1284228571Sglebius		break;
1285228571Sglebius	   }
1286228571Sglebius#endif
1287228571Sglebius#ifdef INET6
1288228571Sglebius	case AF_INET6:
1289228571Sglebius	    {
1290228571Sglebius		struct ip6_moptions *im6o = &cif->cif_im6o;
1291228571Sglebius		struct in6_addr in6;
1292228571Sglebius		struct in6_multi *in6m;
1293228571Sglebius
1294228571Sglebius		if (im6o->im6o_membership)
1295228571Sglebius			return (0);
1296228571Sglebius
1297228571Sglebius		im6o->im6o_membership = (struct in6_multi **)malloc(
1298228571Sglebius		    (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
1299234130Sglebius		    M_ZERO | M_NOWAIT);
1300234130Sglebius		if (im6o->im6o_membership == NULL)
1301234130Sglebius			return (ENOMEM);
1302228571Sglebius		im6o->im6o_mfilters = NULL;
1303228571Sglebius		im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
1304228571Sglebius		im6o->im6o_multicast_hlim = CARP_DFLTTL;
1305228571Sglebius		im6o->im6o_multicast_ifp = ifp;
1306228571Sglebius
1307228571Sglebius		/* Join IPv6 CARP multicast group. */
1308228571Sglebius		bzero(&in6, sizeof(in6));
1309228571Sglebius		in6.s6_addr16[0] = htons(0xff02);
1310228571Sglebius		in6.s6_addr8[15] = 0x12;
1311228571Sglebius		if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1312228571Sglebius			free(im6o->im6o_membership, M_CARP);
1313228571Sglebius			break;
1314228571Sglebius		}
1315228571Sglebius		in6m = NULL;
1316228571Sglebius		if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
1317228571Sglebius			free(im6o->im6o_membership, M_CARP);
1318228571Sglebius			break;
1319228571Sglebius		}
1320228571Sglebius		im6o->im6o_membership[0] = in6m;
1321228571Sglebius		im6o->im6o_num_memberships++;
1322228571Sglebius
1323228571Sglebius		/* Join solicited multicast address. */
1324228571Sglebius		bzero(&in6, sizeof(in6));
1325228571Sglebius		in6.s6_addr16[0] = htons(0xff02);
1326228571Sglebius		in6.s6_addr32[1] = 0;
1327228571Sglebius		in6.s6_addr32[2] = htonl(1);
1328228571Sglebius		in6.s6_addr32[3] = 0;
1329228571Sglebius		in6.s6_addr8[12] = 0xff;
1330228571Sglebius		if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1331228571Sglebius			in6_mc_leave(im6o->im6o_membership[0], NULL);
1332228571Sglebius			free(im6o->im6o_membership, M_CARP);
1333228571Sglebius			break;
1334228571Sglebius		}
1335228571Sglebius		in6m = NULL;
1336228571Sglebius		if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
1337228571Sglebius			in6_mc_leave(im6o->im6o_membership[0], NULL);
1338228571Sglebius			free(im6o->im6o_membership, M_CARP);
1339228571Sglebius			break;
1340228571Sglebius		}
1341228571Sglebius		im6o->im6o_membership[1] = in6m;
1342228571Sglebius		im6o->im6o_num_memberships++;
1343228571Sglebius		break;
1344228571Sglebius	    }
1345228571Sglebius#endif
1346156947Sglebius	}
1347228571Sglebius
1348228571Sglebius	return (error);
1349166423Sglebius}
1350156947Sglebius
1351228571Sglebius/*
1352228571Sglebius * Free multicast structures.
1353228571Sglebius */
1354166423Sglebiusstatic void
1355234130Sglebiuscarp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
1356166423Sglebius{
1357166423Sglebius
1358234130Sglebius	CIF_LOCK_ASSERT(cif);
1359228571Sglebius	switch (sa) {
1360228571Sglebius#ifdef INET
1361228571Sglebius	case AF_INET:
1362234130Sglebius		if (cif->cif_naddrs == 0) {
1363228571Sglebius			struct ip_moptions *imo = &cif->cif_imo;
1364228571Sglebius
1365228571Sglebius			in_leavegroup(imo->imo_membership[0], NULL);
1366228571Sglebius			KASSERT(imo->imo_mfilters == NULL,
1367228571Sglebius			    ("%s: imo_mfilters != NULL", __func__));
1368228571Sglebius			free(imo->imo_membership, M_CARP);
1369228571Sglebius			imo->imo_membership = NULL;
1370228571Sglebius
1371191672Sbms		}
1372228571Sglebius		break;
1373228571Sglebius#endif
1374228571Sglebius#ifdef INET6
1375228571Sglebius	case AF_INET6:
1376234130Sglebius		if (cif->cif_naddrs6 == 0) {
1377228571Sglebius			struct ip6_moptions *im6o = &cif->cif_im6o;
1378228571Sglebius
1379228571Sglebius			in6_mc_leave(im6o->im6o_membership[0], NULL);
1380228571Sglebius			in6_mc_leave(im6o->im6o_membership[1], NULL);
1381228571Sglebius			KASSERT(im6o->im6o_mfilters == NULL,
1382228571Sglebius			    ("%s: im6o_mfilters != NULL", __func__));
1383228571Sglebius			free(im6o->im6o_membership, M_CARP);
1384228571Sglebius			im6o->im6o_membership = NULL;
1385228571Sglebius		}
1386228571Sglebius		break;
1387228571Sglebius#endif
1388156947Sglebius	}
1389166423Sglebius}
1390156947Sglebius
1391228571Sglebiusint
1392249925Sglebiuscarp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
1393142215Sglebius{
1394228571Sglebius	struct m_tag *mtag;
1395228571Sglebius	struct carp_softc *sc;
1396142215Sglebius
1397228571Sglebius	if (!sa)
1398142215Sglebius		return (0);
1399142215Sglebius
1400228571Sglebius	switch (sa->sa_family) {
1401228571Sglebius#ifdef INET
1402228571Sglebius	case AF_INET:
1403228571Sglebius		break;
1404228571Sglebius#endif
1405228571Sglebius#ifdef INET6
1406228571Sglebius	case AF_INET6:
1407228571Sglebius		break;
1408228571Sglebius#endif
1409228571Sglebius	default:
1410228571Sglebius		return (0);
1411142215Sglebius	}
1412142215Sglebius
1413228571Sglebius	mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
1414228571Sglebius	if (mtag == NULL)
1415228571Sglebius		return (0);
1416142215Sglebius
1417236310Sglebius	bcopy(mtag + 1, &sc, sizeof(sc));
1418194951Srwatson
1419228571Sglebius	/* Set the source MAC address to the Virtual Router MAC Address. */
1420228571Sglebius	switch (ifp->if_type) {
1421228571Sglebius	case IFT_ETHER:
1422234084Sglebius	case IFT_BRIDGE:
1423228571Sglebius	case IFT_L2VLAN: {
1424228571Sglebius			struct ether_header *eh;
1425142215Sglebius
1426228571Sglebius			eh = mtod(m, struct ether_header *);
1427228571Sglebius			eh->ether_shost[0] = 0;
1428228571Sglebius			eh->ether_shost[1] = 0;
1429228571Sglebius			eh->ether_shost[2] = 0x5e;
1430228571Sglebius			eh->ether_shost[3] = 0;
1431228571Sglebius			eh->ether_shost[4] = 1;
1432228571Sglebius			eh->ether_shost[5] = sc->sc_vhid;
1433228571Sglebius		}
1434228571Sglebius		break;
1435228571Sglebius	case IFT_FDDI: {
1436228571Sglebius			struct fddi_header *fh;
1437142215Sglebius
1438228571Sglebius			fh = mtod(m, struct fddi_header *);
1439228571Sglebius			fh->fddi_shost[0] = 0;
1440228571Sglebius			fh->fddi_shost[1] = 0;
1441228571Sglebius			fh->fddi_shost[2] = 0x5e;
1442228571Sglebius			fh->fddi_shost[3] = 0;
1443228571Sglebius			fh->fddi_shost[4] = 1;
1444228571Sglebius			fh->fddi_shost[5] = sc->sc_vhid;
1445194951Srwatson		}
1446228571Sglebius		break;
1447228571Sglebius	case IFT_ISO88025: {
1448228571Sglebius 			struct iso88025_header *th;
1449228571Sglebius 			th = mtod(m, struct iso88025_header *);
1450228571Sglebius			th->iso88025_shost[0] = 3;
1451228571Sglebius			th->iso88025_shost[1] = 0;
1452228571Sglebius			th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
1453228571Sglebius			th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
1454228571Sglebius			th->iso88025_shost[4] = 0;
1455228571Sglebius			th->iso88025_shost[5] = 0;
1456228571Sglebius		}
1457228571Sglebius		break;
1458228571Sglebius	default:
1459228571Sglebius		printf("%s: carp is not supported for the %d interface type\n",
1460228571Sglebius		    ifp->if_xname, ifp->if_type);
1461228571Sglebius		return (EOPNOTSUPP);
1462142215Sglebius	}
1463142215Sglebius
1464228571Sglebius	return (0);
1465228571Sglebius}
1466142215Sglebius
1467228571Sglebiusstatic struct carp_softc*
1468228571Sglebiuscarp_alloc(struct ifnet *ifp)
1469228571Sglebius{
1470228571Sglebius	struct carp_softc *sc;
1471228571Sglebius	struct carp_if *cif;
1472142215Sglebius
1473228571Sglebius	if ((cif = ifp->if_carp) == NULL) {
1474228571Sglebius		cif = carp_alloc_if(ifp);
1475228571Sglebius		if (cif == NULL)
1476228571Sglebius			return (NULL);
1477142215Sglebius	}
1478142215Sglebius
1479228571Sglebius	sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
1480142215Sglebius
1481228571Sglebius	sc->sc_advbase = CARP_DFLTINTV;
1482228571Sglebius	sc->sc_vhid = -1;	/* required setting */
1483228571Sglebius	sc->sc_init_counter = 1;
1484228571Sglebius	sc->sc_state = INIT;
1485142215Sglebius
1486228571Sglebius	sc->sc_ifasiz = sizeof(struct ifaddr *);
1487228571Sglebius	sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
1488228571Sglebius	sc->sc_carpdev = ifp;
1489142215Sglebius
1490228571Sglebius	CARP_LOCK_INIT(sc);
1491228571Sglebius#ifdef INET
1492228571Sglebius	callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1493228571Sglebius#endif
1494228571Sglebius#ifdef INET6
1495228571Sglebius	callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1496228571Sglebius#endif
1497228571Sglebius	callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1498142215Sglebius
1499228571Sglebius	CIF_LOCK(cif);
1500228571Sglebius	TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
1501228571Sglebius	CIF_UNLOCK(cif);
1502142215Sglebius
1503228571Sglebius	mtx_lock(&carp_mtx);
1504228571Sglebius	LIST_INSERT_HEAD(&carp_list, sc, sc_next);
1505228571Sglebius	mtx_unlock(&carp_mtx);
1506142914Sglebius
1507228571Sglebius	return (sc);
1508142215Sglebius}
1509142215Sglebius
1510142559Sglebiusstatic int
1511228571Sglebiuscarp_grow_ifas(struct carp_softc *sc)
1512142215Sglebius{
1513228571Sglebius	struct ifaddr **new;
1514142215Sglebius
1515228571Sglebius	CARP_LOCK_ASSERT(sc);
1516142215Sglebius
1517228571Sglebius	new = malloc(sc->sc_ifasiz * 2, M_CARP, M_NOWAIT|M_ZERO);
1518228571Sglebius	if (new == NULL)
1519228571Sglebius		return (ENOMEM);
1520228571Sglebius	bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
1521228571Sglebius	free(sc->sc_ifas, M_CARP);
1522228571Sglebius	sc->sc_ifas = new;
1523228571Sglebius	sc->sc_ifasiz *= 2;
1524142215Sglebius
1525228571Sglebius	return (0);
1526142215Sglebius}
1527142215Sglebius
1528228571Sglebiusstatic void
1529228571Sglebiuscarp_destroy(struct carp_softc *sc)
1530142215Sglebius{
1531228571Sglebius	struct ifnet *ifp = sc->sc_carpdev;
1532228571Sglebius	struct carp_if *cif = ifp->if_carp;
1533142215Sglebius
1534234130Sglebius	CIF_LOCK_ASSERT(cif);
1535234130Sglebius
1536228571Sglebius	TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
1537191672Sbms
1538228571Sglebius	mtx_lock(&carp_mtx);
1539228571Sglebius	LIST_REMOVE(sc, sc_next);
1540228571Sglebius	mtx_unlock(&carp_mtx);
1541142215Sglebius
1542228571Sglebius	CARP_LOCK(sc);
1543228736Sglebius	if (sc->sc_suppress)
1544228736Sglebius		carp_demote_adj(-carp_ifdown_adj, "vhid removed");
1545228571Sglebius	callout_drain(&sc->sc_ad_tmo);
1546228571Sglebius#ifdef INET
1547228571Sglebius	callout_drain(&sc->sc_md_tmo);
1548228571Sglebius#endif
1549228571Sglebius#ifdef INET6
1550228571Sglebius	callout_drain(&sc->sc_md6_tmo);
1551228571Sglebius#endif
1552228571Sglebius	CARP_LOCK_DESTROY(sc);
1553142215Sglebius
1554228571Sglebius	free(sc->sc_ifas, M_CARP);
1555228571Sglebius	free(sc, M_CARP);
1556228571Sglebius}
1557142215Sglebius
1558228571Sglebiusstatic struct carp_if*
1559228571Sglebiuscarp_alloc_if(struct ifnet *ifp)
1560228571Sglebius{
1561228571Sglebius	struct carp_if *cif;
1562142215Sglebius
1563228571Sglebius	cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
1564142215Sglebius
1565228571Sglebius	if (ifpromisc(ifp, 1) != 0)
1566228571Sglebius		goto cleanup;
1567191672Sbms
1568228571Sglebius	CIF_LOCK_INIT(cif);
1569228571Sglebius	cif->cif_ifp = ifp;
1570228571Sglebius	TAILQ_INIT(&cif->cif_vrs);
1571142215Sglebius
1572229621Sjhb	IF_ADDR_WLOCK(ifp);
1573228571Sglebius	ifp->if_carp = cif;
1574228571Sglebius	if_ref(ifp);
1575229621Sjhb	IF_ADDR_WUNLOCK(ifp);
1576142215Sglebius
1577228571Sglebius	return (cif);
1578142215Sglebius
1579228571Sglebiuscleanup:
1580228571Sglebius	free(cif, M_CARP);
1581142215Sglebius
1582228571Sglebius	return (NULL);
1583228571Sglebius}
1584142215Sglebius
1585228571Sglebiusstatic void
1586228571Sglebiuscarp_free_if(struct carp_if *cif)
1587228571Sglebius{
1588228571Sglebius	struct ifnet *ifp = cif->cif_ifp;
1589142215Sglebius
1590228571Sglebius	CIF_LOCK_ASSERT(cif);
1591228571Sglebius	KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
1592228571Sglebius	    __func__));
1593142215Sglebius
1594229621Sjhb	IF_ADDR_WLOCK(ifp);
1595228571Sglebius	ifp->if_carp = NULL;
1596228571Sglebius	if_rele(ifp);
1597229621Sjhb	IF_ADDR_WUNLOCK(ifp);
1598142215Sglebius
1599228571Sglebius	CIF_LOCK_DESTROY(cif);
1600142215Sglebius
1601228571Sglebius	ifpromisc(ifp, 0);
1602142215Sglebius
1603228571Sglebius	free(cif, M_CARP);
1604142215Sglebius}
1605142215Sglebius
1606228571Sglebiusstatic void
1607228571Sglebiuscarp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
1608142215Sglebius{
1609142215Sglebius
1610228571Sglebius	CARP_LOCK(sc);
1611228571Sglebius	carpr->carpr_state = sc->sc_state;
1612228571Sglebius	carpr->carpr_vhid = sc->sc_vhid;
1613228571Sglebius	carpr->carpr_advbase = sc->sc_advbase;
1614228571Sglebius	carpr->carpr_advskew = sc->sc_advskew;
1615228571Sglebius	if (priv)
1616228571Sglebius		bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
1617228571Sglebius	else
1618228571Sglebius		bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
1619228571Sglebius	CARP_UNLOCK(sc);
1620142215Sglebius}
1621142215Sglebius
1622228571Sglebiusint
1623228571Sglebiuscarp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
1624142215Sglebius{
1625142215Sglebius	struct carpreq carpr;
1626228571Sglebius	struct ifnet *ifp;
1627228571Sglebius	struct carp_softc *sc = NULL;
1628228571Sglebius	int error = 0, locked = 0;
1629142215Sglebius
1630228571Sglebius	if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
1631228571Sglebius		return (error);
1632142215Sglebius
1633228571Sglebius	ifp = ifunit_ref(ifr->ifr_name);
1634228571Sglebius	if (ifp == NULL)
1635228571Sglebius		return (ENXIO);
1636228571Sglebius
1637228571Sglebius	switch (ifp->if_type) {
1638228571Sglebius	case IFT_ETHER:
1639228571Sglebius	case IFT_L2VLAN:
1640234084Sglebius	case IFT_BRIDGE:
1641228571Sglebius	case IFT_FDDI:
1642228571Sglebius	case IFT_ISO88025:
1643142215Sglebius		break;
1644228571Sglebius	default:
1645228571Sglebius		error = EOPNOTSUPP;
1646228571Sglebius		goto out;
1647228571Sglebius	}
1648142215Sglebius
1649228571Sglebius	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1650228571Sglebius		error = EADDRNOTAVAIL;
1651228571Sglebius		goto out;
1652228571Sglebius	}
1653228571Sglebius
1654228571Sglebius	switch (cmd) {
1655228571Sglebius	case SIOCSVH:
1656228571Sglebius		if ((error = priv_check(td, PRIV_NETINET_CARP)))
1657142215Sglebius			break;
1658228571Sglebius		if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
1659228571Sglebius		    carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
1660228571Sglebius			error = EINVAL;
1661142215Sglebius			break;
1662142215Sglebius		}
1663142215Sglebius
1664228571Sglebius		if (ifp->if_carp) {
1665228571Sglebius			CIF_LOCK(ifp->if_carp);
1666228571Sglebius			IFNET_FOREACH_CARP(ifp, sc)
1667228571Sglebius				if (sc->sc_vhid == carpr.carpr_vhid)
1668228571Sglebius					break;
1669228571Sglebius			CIF_UNLOCK(ifp->if_carp);
1670142215Sglebius		}
1671228571Sglebius		if (sc == NULL) {
1672228571Sglebius			sc = carp_alloc(ifp);
1673228571Sglebius			if (sc == NULL) {
1674228571Sglebius				error = EINVAL; /* XXX: ifpromisc failed */
1675228571Sglebius				break;
1676228571Sglebius			}
1677142215Sglebius
1678228571Sglebius			CARP_LOCK(sc);
1679228571Sglebius			sc->sc_vhid = carpr.carpr_vhid;
1680228571Sglebius			LLADDR(&sc->sc_addr)[0] = 0;
1681228571Sglebius			LLADDR(&sc->sc_addr)[1] = 0;
1682228571Sglebius			LLADDR(&sc->sc_addr)[2] = 0x5e;
1683228571Sglebius			LLADDR(&sc->sc_addr)[3] = 0;
1684228571Sglebius			LLADDR(&sc->sc_addr)[4] = 1;
1685228571Sglebius			LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
1686228571Sglebius		} else
1687228571Sglebius			CARP_LOCK(sc);
1688228571Sglebius		locked = 1;
1689228571Sglebius		if (carpr.carpr_advbase > 0) {
1690228571Sglebius			if (carpr.carpr_advbase > 255 ||
1691228571Sglebius			    carpr.carpr_advbase < CARP_DFLTINTV) {
1692228571Sglebius				error = EINVAL;
1693228571Sglebius				break;
1694228571Sglebius			}
1695228571Sglebius			sc->sc_advbase = carpr.carpr_advbase;
1696142914Sglebius		}
1697228571Sglebius		if (carpr.carpr_advskew > 0) {
1698228571Sglebius			if (carpr.carpr_advskew >= 255) {
1699228571Sglebius				error = EINVAL;
1700228571Sglebius				break;
1701228571Sglebius			}
1702228571Sglebius			sc->sc_advskew = carpr.carpr_advskew;
1703142215Sglebius		}
1704228571Sglebius		if (carpr.carpr_key[0] != '\0') {
1705228571Sglebius			bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
1706228571Sglebius			carp_hmac_prepare(sc);
1707142914Sglebius		}
1708228571Sglebius		if (sc->sc_state != INIT &&
1709228571Sglebius		    carpr.carpr_state != sc->sc_state) {
1710142215Sglebius			switch (carpr.carpr_state) {
1711142215Sglebius			case BACKUP:
1712142215Sglebius				callout_stop(&sc->sc_ad_tmo);
1713142215Sglebius				carp_set_state(sc, BACKUP);
1714142215Sglebius				carp_setrun(sc, 0);
1715228571Sglebius				carp_delroute(sc);
1716142215Sglebius				break;
1717142215Sglebius			case MASTER:
1718142914Sglebius				carp_master_down_locked(sc);
1719142215Sglebius				break;
1720142215Sglebius			default:
1721142215Sglebius				break;
1722142215Sglebius			}
1723142215Sglebius		}
1724228571Sglebius		break;
1725228571Sglebius
1726228571Sglebius	case SIOCGVH:
1727228571Sglebius	    {
1728228571Sglebius		int priveleged;
1729228571Sglebius
1730228571Sglebius		if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
1731228571Sglebius			error = EINVAL;
1732228571Sglebius			break;
1733228571Sglebius		}
1734228571Sglebius		if (carpr.carpr_count < 1) {
1735228571Sglebius			error = EMSGSIZE;
1736228571Sglebius			break;
1737228571Sglebius		}
1738228571Sglebius		if (ifp->if_carp == NULL) {
1739228571Sglebius			error = ENOENT;
1740228571Sglebius			break;
1741228571Sglebius		}
1742228571Sglebius
1743228571Sglebius		priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
1744228571Sglebius		if (carpr.carpr_vhid != 0) {
1745228571Sglebius			CIF_LOCK(ifp->if_carp);
1746228571Sglebius			IFNET_FOREACH_CARP(ifp, sc)
1747228571Sglebius				if (sc->sc_vhid == carpr.carpr_vhid)
1748170373Sglebius					break;
1749228571Sglebius			CIF_UNLOCK(ifp->if_carp);
1750228571Sglebius			if (sc == NULL) {
1751228571Sglebius				error = ENOENT;
1752142215Sglebius				break;
1753142215Sglebius			}
1754228571Sglebius			carp_carprcp(&carpr, sc, priveleged);
1755228571Sglebius			error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
1756228571Sglebius		} else  {
1757228571Sglebius			int i, count;
1758228571Sglebius
1759228571Sglebius			count = 0;
1760228571Sglebius			CIF_LOCK(ifp->if_carp);
1761228571Sglebius			IFNET_FOREACH_CARP(ifp, sc)
1762228571Sglebius				count++;
1763228571Sglebius
1764228571Sglebius			if (count > carpr.carpr_count) {
1765228571Sglebius				CIF_UNLOCK(ifp->if_carp);
1766228571Sglebius				error = EMSGSIZE;
1767142215Sglebius				break;
1768142215Sglebius			}
1769228571Sglebius
1770228571Sglebius			i = 0;
1771228571Sglebius			IFNET_FOREACH_CARP(ifp, sc) {
1772228571Sglebius				carp_carprcp(&carpr, sc, priveleged);
1773228571Sglebius				carpr.carpr_count = count;
1774228571Sglebius				error = copyout(&carpr, ifr->ifr_data +
1775228571Sglebius				    (i * sizeof(carpr)), sizeof(carpr));
1776228571Sglebius				if (error) {
1777228571Sglebius					CIF_UNLOCK(ifp->if_carp);
1778228571Sglebius					break;
1779228571Sglebius				}
1780228571Sglebius				i++;
1781228571Sglebius			}
1782228571Sglebius			CIF_UNLOCK(ifp->if_carp);
1783142215Sglebius		}
1784142215Sglebius		break;
1785228571Sglebius	    }
1786142215Sglebius	default:
1787142215Sglebius		error = EINVAL;
1788142215Sglebius	}
1789142215Sglebius
1790228571Sglebiusout:
1791142914Sglebius	if (locked)
1792228571Sglebius		CARP_UNLOCK(sc);
1793228571Sglebius	if_rele(ifp);
1794142914Sglebius
1795142215Sglebius	return (error);
1796142215Sglebius}
1797142215Sglebius
1798142215Sglebiusstatic int
1799228571Sglebiuscarp_get_vhid(struct ifaddr *ifa)
1800142215Sglebius{
1801147611Sdwmalone
1802228571Sglebius	if (ifa == NULL || ifa->ifa_carp == NULL)
1803228571Sglebius		return (0);
1804142215Sglebius
1805228571Sglebius	return (ifa->ifa_carp->sc_vhid);
1806142215Sglebius}
1807142215Sglebius
1808142215Sglebiusint
1809228571Sglebiuscarp_attach(struct ifaddr *ifa, int vhid)
1810142215Sglebius{
1811228571Sglebius	struct ifnet *ifp = ifa->ifa_ifp;
1812234130Sglebius	struct carp_if *cif = ifp->if_carp;
1813142215Sglebius	struct carp_softc *sc;
1814228571Sglebius	int index, error;
1815142215Sglebius
1816228571Sglebius	if (ifp->if_carp == NULL)
1817228571Sglebius		return (ENOPROTOOPT);
1818142215Sglebius
1819228571Sglebius	switch (ifa->ifa_addr->sa_family) {
1820142215Sglebius#ifdef INET
1821142215Sglebius	case AF_INET:
1822228571Sglebius#endif
1823142215Sglebius#ifdef INET6
1824142215Sglebius	case AF_INET6:
1825228571Sglebius#endif
1826142215Sglebius		break;
1827142215Sglebius	default:
1828228571Sglebius		return (EPROTOTYPE);
1829142215Sglebius	}
1830142215Sglebius
1831234130Sglebius	CIF_LOCK(cif);
1832228571Sglebius	IFNET_FOREACH_CARP(ifp, sc)
1833228571Sglebius		if (sc->sc_vhid == vhid)
1834228571Sglebius			break;
1835234130Sglebius	if (sc == NULL) {
1836234130Sglebius		CIF_UNLOCK(cif);
1837228571Sglebius		return (ENOENT);
1838234130Sglebius	}
1839142215Sglebius
1840228571Sglebius	if (ifa->ifa_carp) {
1841228571Sglebius		if (ifa->ifa_carp->sc_vhid != vhid)
1842234130Sglebius			carp_detach_locked(ifa);
1843234130Sglebius		else {
1844234130Sglebius			CIF_UNLOCK(cif);
1845228571Sglebius			return (0);
1846234130Sglebius		}
1847228571Sglebius	}
1848142215Sglebius
1849234130Sglebius	error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
1850234130Sglebius	if (error) {
1851234130Sglebius		CIF_FREE(cif);
1852228571Sglebius		return (error);
1853234130Sglebius	}
1854142215Sglebius
1855228571Sglebius	CARP_LOCK(sc);
1856228571Sglebius	index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
1857228571Sglebius	if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
1858228571Sglebius		if ((error = carp_grow_ifas(sc)) != 0) {
1859234130Sglebius			carp_multicast_cleanup(cif,
1860228571Sglebius			    ifa->ifa_addr->sa_family);
1861228571Sglebius			CARP_UNLOCK(sc);
1862234130Sglebius			CIF_FREE(cif);
1863228571Sglebius			return (error);
1864142215Sglebius		}
1865142215Sglebius
1866228571Sglebius	switch (ifa->ifa_addr->sa_family) {
1867228571Sglebius#ifdef INET
1868228571Sglebius	case AF_INET:
1869234130Sglebius		cif->cif_naddrs++;
1870228571Sglebius		sc->sc_naddrs++;
1871142215Sglebius		break;
1872228571Sglebius#endif
1873228571Sglebius#ifdef INET6
1874228571Sglebius	case AF_INET6:
1875234130Sglebius		cif->cif_naddrs6++;
1876228571Sglebius		sc->sc_naddrs6++;
1877142215Sglebius		break;
1878228571Sglebius#endif
1879142215Sglebius	}
1880142215Sglebius
1881228571Sglebius	ifa_ref(ifa);
1882228571Sglebius	sc->sc_ifas[index - 1] = ifa;
1883228571Sglebius	ifa->ifa_carp = sc;
1884228571Sglebius
1885228571Sglebius	carp_hmac_prepare(sc);
1886228571Sglebius	carp_sc_state(sc);
1887228571Sglebius
1888228571Sglebius	CARP_UNLOCK(sc);
1889234130Sglebius	CIF_UNLOCK(cif);
1890228571Sglebius
1891142215Sglebius	return (0);
1892142215Sglebius}
1893142215Sglebius
1894228571Sglebiusvoid
1895228571Sglebiuscarp_detach(struct ifaddr *ifa)
1896142215Sglebius{
1897234130Sglebius	struct ifnet *ifp = ifa->ifa_ifp;
1898234130Sglebius	struct carp_if *cif = ifp->if_carp;
1899234130Sglebius
1900234130Sglebius	CIF_LOCK(cif);
1901234130Sglebius	carp_detach_locked(ifa);
1902234130Sglebius	CIF_FREE(cif);
1903234130Sglebius}
1904234130Sglebius
1905234130Sglebiusstatic void
1906234130Sglebiuscarp_detach_locked(struct ifaddr *ifa)
1907234130Sglebius{
1908234130Sglebius	struct ifnet *ifp = ifa->ifa_ifp;
1909234130Sglebius	struct carp_if *cif = ifp->if_carp;
1910228571Sglebius	struct carp_softc *sc = ifa->ifa_carp;
1911228571Sglebius	int i, index;
1912142914Sglebius
1913228571Sglebius	KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
1914142914Sglebius
1915234130Sglebius	CIF_LOCK_ASSERT(cif);
1916228571Sglebius	CARP_LOCK(sc);
1917142215Sglebius
1918228571Sglebius	/* Shift array. */
1919228571Sglebius	index = sc->sc_naddrs + sc->sc_naddrs6;
1920228571Sglebius	for (i = 0; i < index; i++)
1921228571Sglebius		if (sc->sc_ifas[i] == ifa)
1922228571Sglebius			break;
1923228571Sglebius	KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
1924228571Sglebius	for (; i < index - 1; i++)
1925228571Sglebius		sc->sc_ifas[i] = sc->sc_ifas[i+1];
1926228571Sglebius	sc->sc_ifas[index - 1] = NULL;
1927228571Sglebius
1928228571Sglebius	switch (ifa->ifa_addr->sa_family) {
1929228571Sglebius#ifdef INET
1930228571Sglebius	case AF_INET:
1931234130Sglebius		cif->cif_naddrs--;
1932228571Sglebius		sc->sc_naddrs--;
1933142215Sglebius		break;
1934228571Sglebius#endif
1935228571Sglebius#ifdef INET6
1936228571Sglebius	case AF_INET6:
1937234130Sglebius		cif->cif_naddrs6--;
1938228571Sglebius		sc->sc_naddrs6--;
1939142215Sglebius		break;
1940228571Sglebius#endif
1941142215Sglebius	}
1942228571Sglebius
1943230863Sglebius	carp_ifa_delroute(ifa);
1944234130Sglebius	carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
1945228571Sglebius
1946228571Sglebius	ifa->ifa_carp = NULL;
1947228571Sglebius	ifa_free(ifa);
1948228571Sglebius
1949228571Sglebius	carp_hmac_prepare(sc);
1950228571Sglebius	carp_sc_state(sc);
1951228571Sglebius
1952228571Sglebius	if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1953228571Sglebius		CARP_UNLOCK(sc);
1954228571Sglebius		carp_destroy(sc);
1955228571Sglebius	} else
1956228571Sglebius		CARP_UNLOCK(sc);
1957142215Sglebius}
1958142215Sglebius
1959228571Sglebiusstatic void
1960228571Sglebiuscarp_set_state(struct carp_softc *sc, int state)
1961142215Sglebius{
1962142914Sglebius
1963228571Sglebius	CARP_LOCK_ASSERT(sc);
1964228571Sglebius
1965228571Sglebius	if (sc->sc_state != state) {
1966228571Sglebius		const char *carp_states[] = { CARP_STATES };
1967228571Sglebius		char subsys[IFNAMSIZ+5];
1968228571Sglebius
1969228571Sglebius		sc->sc_state = state;
1970228571Sglebius
1971228571Sglebius		snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
1972228571Sglebius		    sc->sc_carpdev->if_xname);
1973228571Sglebius		devctl_notify("CARP", subsys, carp_states[state], NULL);
1974228571Sglebius	}
1975142914Sglebius}
1976142914Sglebius
1977142914Sglebiusstatic void
1978228571Sglebiuscarp_linkstate(struct ifnet *ifp)
1979142914Sglebius{
1980142215Sglebius	struct carp_softc *sc;
1981142215Sglebius
1982228571Sglebius	CIF_LOCK(ifp->if_carp);
1983228571Sglebius	IFNET_FOREACH_CARP(ifp, sc) {
1984228571Sglebius		CARP_LOCK(sc);
1985228571Sglebius		carp_sc_state(sc);
1986228571Sglebius		CARP_UNLOCK(sc);
1987228571Sglebius	}
1988228571Sglebius	CIF_UNLOCK(ifp->if_carp);
1989144329Sglebius}
1990144329Sglebius
1991144329Sglebiusstatic void
1992228571Sglebiuscarp_sc_state(struct carp_softc *sc)
1993144329Sglebius{
1994144329Sglebius
1995228571Sglebius	CARP_LOCK_ASSERT(sc);
1996228571Sglebius
1997144329Sglebius	if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
1998144329Sglebius	    !(sc->sc_carpdev->if_flags & IFF_UP)) {
1999144329Sglebius		callout_stop(&sc->sc_ad_tmo);
2000228571Sglebius#ifdef INET
2001144329Sglebius		callout_stop(&sc->sc_md_tmo);
2002228571Sglebius#endif
2003228571Sglebius#ifdef INET6
2004144329Sglebius		callout_stop(&sc->sc_md6_tmo);
2005228571Sglebius#endif
2006144329Sglebius		carp_set_state(sc, INIT);
2007144329Sglebius		carp_setrun(sc, 0);
2008228736Sglebius		if (!sc->sc_suppress)
2009228736Sglebius			carp_demote_adj(carp_ifdown_adj, "interface down");
2010144329Sglebius		sc->sc_suppress = 1;
2011144329Sglebius	} else {
2012144329Sglebius		carp_set_state(sc, INIT);
2013144329Sglebius		carp_setrun(sc, 0);
2014144329Sglebius		if (sc->sc_suppress)
2015228736Sglebius			carp_demote_adj(-carp_ifdown_adj, "interface up");
2016144329Sglebius		sc->sc_suppress = 0;
2017142215Sglebius	}
2018142215Sglebius}
2019142215Sglebius
2020228736Sglebiusstatic void
2021228736Sglebiuscarp_demote_adj(int adj, char *reason)
2022228736Sglebius{
2023244681Sglebius	atomic_add_int(&carp_demotion, adj);
2024228736Sglebius	CARP_LOG("demoted by %d to %d (%s)\n", adj, carp_demotion, reason);
2025228736Sglebius	taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
2026228736Sglebius}
2027228571Sglebius
2028244681Sglebiusstatic int
2029244681Sglebiuscarp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
2030244681Sglebius{
2031244681Sglebius	int new, error;
2032244681Sglebius
2033244681Sglebius	new = carp_demotion;
2034244681Sglebius	error = sysctl_handle_int(oidp, &new, 0, req);
2035244681Sglebius	if (error || !req->newptr)
2036244681Sglebius		return (error);
2037244681Sglebius
2038244681Sglebius	carp_demote_adj(new, "sysctl");
2039244681Sglebius
2040244681Sglebius	return (0);
2041244681Sglebius}
2042244681Sglebius
2043211157Swill#ifdef INET
2044211157Swillextern  struct domain inetdomain;
2045211157Swillstatic struct protosw in_carp_protosw = {
2046211157Swill	.pr_type =		SOCK_RAW,
2047211157Swill	.pr_domain =		&inetdomain,
2048211157Swill	.pr_protocol =		IPPROTO_CARP,
2049211157Swill	.pr_flags =		PR_ATOMIC|PR_ADDR,
2050211157Swill	.pr_input =		carp_input,
2051211157Swill	.pr_output =		(pr_output_t *)rip_output,
2052211157Swill	.pr_ctloutput =		rip_ctloutput,
2053211157Swill	.pr_usrreqs =		&rip_usrreqs
2054211157Swill};
2055211157Swill#endif
2056211157Swill
2057211157Swill#ifdef INET6
2058211157Swillextern	struct domain inet6domain;
2059211157Swillstatic struct ip6protosw in6_carp_protosw = {
2060211157Swill	.pr_type =		SOCK_RAW,
2061211157Swill	.pr_domain =		&inet6domain,
2062211157Swill	.pr_protocol =		IPPROTO_CARP,
2063211157Swill	.pr_flags =		PR_ATOMIC|PR_ADDR,
2064211157Swill	.pr_input =		carp6_input,
2065211157Swill	.pr_output =		rip6_output,
2066211157Swill	.pr_ctloutput =		rip6_ctloutput,
2067211157Swill	.pr_usrreqs =		&rip6_usrreqs
2068211157Swill};
2069211157Swill#endif
2070211157Swill
2071211157Swillstatic void
2072211157Swillcarp_mod_cleanup(void)
2073211157Swill{
2074211157Swill
2075211157Swill#ifdef INET
2076211157Swill	if (proto_reg[CARP_INET] == 0) {
2077212266Swill		(void)ipproto_unregister(IPPROTO_CARP);
2078211157Swill		pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
2079211157Swill		proto_reg[CARP_INET] = -1;
2080211157Swill	}
2081211157Swill	carp_iamatch_p = NULL;
2082211157Swill#endif
2083211157Swill#ifdef INET6
2084211157Swill	if (proto_reg[CARP_INET6] == 0) {
2085212266Swill		(void)ip6proto_unregister(IPPROTO_CARP);
2086211157Swill		pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
2087211157Swill		proto_reg[CARP_INET6] = -1;
2088211157Swill	}
2089211157Swill	carp_iamatch6_p = NULL;
2090211157Swill	carp_macmatch6_p = NULL;
2091211157Swill#endif
2092228571Sglebius	carp_ioctl_p = NULL;
2093228571Sglebius	carp_attach_p = NULL;
2094228571Sglebius	carp_detach_p = NULL;
2095228571Sglebius	carp_get_vhid_p = NULL;
2096211157Swill	carp_linkstate_p = NULL;
2097211157Swill	carp_forus_p = NULL;
2098211157Swill	carp_output_p = NULL;
2099228736Sglebius	carp_demote_adj_p = NULL;
2100246659Sglebius	carp_master_p = NULL;
2101228736Sglebius	mtx_unlock(&carp_mtx);
2102228736Sglebius	taskqueue_drain(taskqueue_swi, &carp_sendall_task);
2103211157Swill	mtx_destroy(&carp_mtx);
2104253087Sae	COUNTER_ARRAY_FREE(carpstats,
2105253087Sae	    sizeof(struct carpstats) / sizeof(uint64_t));
2106211157Swill}
2107211157Swill
2108142215Sglebiusstatic int
2109211157Swillcarp_mod_load(void)
2110211157Swill{
2111212266Swill	int err;
2112211157Swill
2113211157Swill	mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
2114228571Sglebius	LIST_INIT(&carp_list);
2115253087Sae	COUNTER_ARRAY_ALLOC(carpstats,
2116253087Sae	    sizeof(struct carpstats) / sizeof(uint64_t), M_WAITOK);
2117228571Sglebius	carp_get_vhid_p = carp_get_vhid;
2118211157Swill	carp_forus_p = carp_forus;
2119211157Swill	carp_output_p = carp_output;
2120228571Sglebius	carp_linkstate_p = carp_linkstate;
2121228571Sglebius	carp_ioctl_p = carp_ioctl;
2122228571Sglebius	carp_attach_p = carp_attach;
2123228571Sglebius	carp_detach_p = carp_detach;
2124228736Sglebius	carp_demote_adj_p = carp_demote_adj;
2125246659Sglebius	carp_master_p = carp_master;
2126211157Swill#ifdef INET6
2127211157Swill	carp_iamatch6_p = carp_iamatch6;
2128211157Swill	carp_macmatch6_p = carp_macmatch6;
2129211157Swill	proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
2130211157Swill	    (struct protosw *)&in6_carp_protosw);
2131228571Sglebius	if (proto_reg[CARP_INET6]) {
2132211157Swill		printf("carp: error %d attaching to PF_INET6\n",
2133211157Swill		    proto_reg[CARP_INET6]);
2134211157Swill		carp_mod_cleanup();
2135212898Sglebius		return (proto_reg[CARP_INET6]);
2136211157Swill	}
2137212266Swill	err = ip6proto_register(IPPROTO_CARP);
2138212266Swill	if (err) {
2139212266Swill		printf("carp: error %d registering with INET6\n", err);
2140212266Swill		carp_mod_cleanup();
2141212898Sglebius		return (err);
2142212266Swill	}
2143211157Swill#endif
2144211157Swill#ifdef INET
2145211157Swill	carp_iamatch_p = carp_iamatch;
2146211157Swill	proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
2147228571Sglebius	if (proto_reg[CARP_INET]) {
2148211157Swill		printf("carp: error %d attaching to PF_INET\n",
2149211157Swill		    proto_reg[CARP_INET]);
2150211157Swill		carp_mod_cleanup();
2151212898Sglebius		return (proto_reg[CARP_INET]);
2152211157Swill	}
2153212266Swill	err = ipproto_register(IPPROTO_CARP);
2154212266Swill	if (err) {
2155212266Swill		printf("carp: error %d registering with INET\n", err);
2156212266Swill		carp_mod_cleanup();
2157212898Sglebius		return (err);
2158212266Swill	}
2159211157Swill#endif
2160228571Sglebius	return (0);
2161211157Swill}
2162211157Swill
2163211157Swillstatic int
2164142215Sglebiuscarp_modevent(module_t mod, int type, void *data)
2165142215Sglebius{
2166142215Sglebius	switch (type) {
2167142215Sglebius	case MOD_LOAD:
2168211157Swill		return carp_mod_load();
2169211157Swill		/* NOTREACHED */
2170142215Sglebius	case MOD_UNLOAD:
2171228571Sglebius		mtx_lock(&carp_mtx);
2172228571Sglebius		if (LIST_EMPTY(&carp_list))
2173228571Sglebius			carp_mod_cleanup();
2174228571Sglebius		else {
2175228571Sglebius			mtx_unlock(&carp_mtx);
2176228571Sglebius			return (EBUSY);
2177228571Sglebius		}
2178142215Sglebius		break;
2179142215Sglebius
2180142215Sglebius	default:
2181156947Sglebius		return (EINVAL);
2182142215Sglebius	}
2183142215Sglebius
2184156947Sglebius	return (0);
2185142215Sglebius}
2186142215Sglebius
2187142215Sglebiusstatic moduledata_t carp_mod = {
2188142215Sglebius	"carp",
2189142215Sglebius	carp_modevent,
2190241394Skevlo	0
2191142215Sglebius};
2192142215Sglebius
2193212265SwillDECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
2194