1228571Sglebius/*-
2228571Sglebius * Copyright (c) 2002 Michael Shalayeff.
3228571Sglebius * Copyright (c) 2003 Ryan McBride.
4228571Sglebius * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
5228571Sglebius * All rights reserved.
6142215Sglebius *
7142215Sglebius * Redistribution and use in source and binary forms, with or without
8142215Sglebius * modification, are permitted provided that the following conditions
9142215Sglebius * are met:
10142215Sglebius * 1. Redistributions of source code must retain the above copyright
11142215Sglebius *    notice, this list of conditions and the following disclaimer.
12142215Sglebius * 2. Redistributions in binary form must reproduce the above copyright
13142215Sglebius *    notice, this list of conditions and the following disclaimer in the
14142215Sglebius *    documentation and/or other materials provided with the distribution.
15142215Sglebius *
16142215Sglebius * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17142215Sglebius * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18142215Sglebius * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19142215Sglebius * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20142215Sglebius * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21142215Sglebius * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22142215Sglebius * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23142215Sglebius * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24142215Sglebius * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25142215Sglebius * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26142215Sglebius * THE POSSIBILITY OF SUCH DAMAGE.
27142215Sglebius */
28142215Sglebius
29172467Ssilby#include <sys/cdefs.h>
30172467Ssilby__FBSDID("$FreeBSD$");
31172467Ssilby
32142215Sglebius#include "opt_bpf.h"
33142215Sglebius#include "opt_inet.h"
34142215Sglebius#include "opt_inet6.h"
35142215Sglebius
36142215Sglebius#include <sys/param.h>
37142215Sglebius#include <sys/systm.h>
38228571Sglebius#include <sys/bus.h>
39228571Sglebius#include <sys/jail.h>
40142215Sglebius#include <sys/kernel.h>
41142215Sglebius#include <sys/limits.h>
42142215Sglebius#include <sys/malloc.h>
43142215Sglebius#include <sys/mbuf.h>
44142215Sglebius#include <sys/module.h>
45164033Srwatson#include <sys/priv.h>
46142215Sglebius#include <sys/proc.h>
47211157Swill#include <sys/protosw.h>
48228571Sglebius#include <sys/socket.h>
49228571Sglebius#include <sys/sockio.h>
50142215Sglebius#include <sys/sysctl.h>
51142215Sglebius#include <sys/syslog.h>
52228736Sglebius#include <sys/taskqueue.h>
53253087Sae#include <sys/counter.h>
54142215Sglebius
55142215Sglebius#include <net/ethernet.h>
56142215Sglebius#include <net/fddi.h>
57142215Sglebius#include <net/if.h>
58152410Sru#include <net/if_dl.h>
59228571Sglebius#include <net/if_llatbl.h>
60142215Sglebius#include <net/if_types.h>
61228571Sglebius#include <net/iso88025.h>
62142215Sglebius#include <net/route.h>
63196019Srwatson#include <net/vnet.h>
64142215Sglebius
65221130Sbz#if defined(INET) || defined(INET6)
66142215Sglebius#include <netinet/in.h>
67142215Sglebius#include <netinet/in_var.h>
68221130Sbz#include <netinet/ip_carp.h>
69221130Sbz#include <netinet/ip.h>
70221130Sbz#include <machine/in_cksum.h>
71221130Sbz#endif
72221130Sbz#ifdef INET
73142215Sglebius#include <netinet/ip_var.h>
74142215Sglebius#include <netinet/if_ether.h>
75142215Sglebius#endif
76142215Sglebius
77142215Sglebius#ifdef INET6
78142215Sglebius#include <netinet/icmp6.h>
79142215Sglebius#include <netinet/ip6.h>
80211157Swill#include <netinet6/ip6protosw.h>
81228571Sglebius#include <netinet6/in6_var.h>
82142215Sglebius#include <netinet6/ip6_var.h>
83148387Sume#include <netinet6/scope6_var.h>
84142215Sglebius#include <netinet6/nd6.h>
85142215Sglebius#endif
86142215Sglebius
87142215Sglebius#include <crypto/sha1.h>
88142215Sglebius
89228571Sglebiusstatic MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
90142215Sglebius
91142215Sglebiusstruct carp_softc {
92228571Sglebius	struct ifnet		*sc_carpdev;	/* Pointer to parent ifnet. */
93228571Sglebius	struct ifaddr		**sc_ifas;	/* Our ifaddrs. */
94228571Sglebius	struct sockaddr_dl	sc_addr;	/* Our link level address. */
95228571Sglebius	struct callout		sc_ad_tmo;	/* Advertising timeout. */
96221130Sbz#ifdef INET
97228571Sglebius	struct callout		sc_md_tmo;	/* Master down timeout. */
98221130Sbz#endif
99142215Sglebius#ifdef INET6
100228571Sglebius	struct callout 		sc_md6_tmo;	/* XXX: Master down timeout. */
101228571Sglebius#endif
102228571Sglebius	struct mtx		sc_mtx;
103142215Sglebius
104228571Sglebius	int			sc_vhid;
105228571Sglebius	int			sc_advskew;
106228571Sglebius	int			sc_advbase;
107228571Sglebius
108228571Sglebius	int			sc_naddrs;
109228571Sglebius	int			sc_naddrs6;
110228571Sglebius	int			sc_ifasiz;
111142215Sglebius	enum { INIT = 0, BACKUP, MASTER }	sc_state;
112228571Sglebius	int			sc_suppress;
113228571Sglebius	int			sc_sendad_errors;
114142215Sglebius#define	CARP_SENDAD_MAX_ERRORS	3
115228571Sglebius	int			sc_sendad_success;
116142215Sglebius#define	CARP_SENDAD_MIN_SUCCESS 3
117142215Sglebius
118228571Sglebius	int			sc_init_counter;
119228571Sglebius	uint64_t		sc_counter;
120142215Sglebius
121142215Sglebius	/* authentication */
122228571Sglebius#define	CARP_HMAC_PAD	64
123142215Sglebius	unsigned char sc_key[CARP_KEY_LEN];
124142215Sglebius	unsigned char sc_pad[CARP_HMAC_PAD];
125142215Sglebius	SHA1_CTX sc_sha1;
126142215Sglebius
127228571Sglebius	TAILQ_ENTRY(carp_softc)	sc_list;	/* On the carp_if list. */
128228571Sglebius	LIST_ENTRY(carp_softc)	sc_next;	/* On the global list. */
129142215Sglebius};
130142215Sglebius
131228571Sglebiusstruct carp_if {
132228571Sglebius#ifdef INET
133228571Sglebius	int	cif_naddrs;
134228571Sglebius#endif
135228571Sglebius#ifdef INET6
136228571Sglebius	int	cif_naddrs6;
137228571Sglebius#endif
138228571Sglebius	TAILQ_HEAD(, carp_softc) cif_vrs;
139228571Sglebius#ifdef INET
140228571Sglebius	struct ip_moptions 	 cif_imo;
141228571Sglebius#endif
142228571Sglebius#ifdef INET6
143228571Sglebius	struct ip6_moptions 	 cif_im6o;
144228571Sglebius#endif
145228571Sglebius	struct ifnet	*cif_ifp;
146228571Sglebius	struct mtx	cif_mtx;
147228571Sglebius};
148228571Sglebius
149228571Sglebius#define	CARP_INET	0
150228571Sglebius#define	CARP_INET6	1
151228571Sglebiusstatic int proto_reg[] = {-1, -1};
152228571Sglebius
153228571Sglebius/*
154228571Sglebius * Brief design of carp(4).
155228571Sglebius *
156228571Sglebius * Any carp-capable ifnet may have a list of carp softcs hanging off
157228571Sglebius * its ifp->if_carp pointer. Each softc represents one unique virtual
158228571Sglebius * host id, or vhid. The softc has a back pointer to the ifnet. All
159228571Sglebius * softcs are joined in a global list, which has quite limited use.
160228571Sglebius *
161228571Sglebius * Any interface address that takes part in CARP negotiation has a
162228571Sglebius * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
163228571Sglebius * AF_INET or AF_INET6 address.
164228571Sglebius *
165228571Sglebius * Although, one can get the softc's backpointer to ifnet and traverse
166228571Sglebius * through its ifp->if_addrhead queue to find all interface addresses
167228571Sglebius * involved in CARP, we keep a growable array of ifaddr pointers. This
168228571Sglebius * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
169228571Sglebius * do calls into the network stack, thus avoiding LORs.
170228571Sglebius *
171228571Sglebius * Locking:
172228571Sglebius *
173228571Sglebius * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
174228571Sglebius * callout-driven events and ioctl()s.
175228571Sglebius *
176228571Sglebius * To traverse the list of softcs on an ifnet we use CIF_LOCK(), to
177228571Sglebius * traverse the global list we use the mutex carp_mtx.
178228571Sglebius *
179228571Sglebius * Known issues with locking:
180228571Sglebius *
181228571Sglebius * - There is no protection for races between two ioctl() requests,
182228571Sglebius *   neither SIOCSVH, nor SIOCAIFADDR & SIOCAIFADDR_IN6. I think that all
183228571Sglebius *   interface ioctl()s should be serialized right in net/if.c.
184228571Sglebius * - Sending ad, we put the pointer to the softc in an mtag, and no reference
185228571Sglebius *   counting is done on the softc.
186228571Sglebius * - On module unload we may race (?) with packet processing thread
187228571Sglebius *   dereferencing our function pointers.
188228571Sglebius */
189228571Sglebius
190254292Strociny/* Accept incoming CARP packets. */
191254292Strocinystatic VNET_DEFINE(int, carp_allow) = 1;
192254292Strociny#define	V_carp_allow	VNET(carp_allow)
193254292Strociny
194254292Strociny/* Preempt slower nodes. */
195254292Strocinystatic VNET_DEFINE(int, carp_preempt) = 0;
196254292Strociny#define	V_carp_preempt	VNET(carp_preempt)
197254292Strociny
198254292Strociny/* Log level. */
199254292Strocinystatic VNET_DEFINE(int, carp_log) = 1;
200254292Strociny#define	V_carp_log	VNET(carp_log)
201254292Strociny
202254292Strociny/* Global advskew demotion. */
203254292Strocinystatic VNET_DEFINE(int, carp_demotion) = 0;
204254292Strociny#define	V_carp_demotion	VNET(carp_demotion)
205254292Strociny
206254292Strociny/* Send error demotion factor. */
207254292Strocinystatic VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW;
208254292Strociny#define	V_carp_senderr_adj	VNET(carp_senderr_adj)
209254292Strociny
210254292Strociny/* Iface down demotion factor. */
211254292Strocinystatic VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW;
212254292Strociny#define	V_carp_ifdown_adj	VNET(carp_ifdown_adj)
213254292Strociny
214244681Sglebiusstatic int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
215228736Sglebius
216211157SwillSYSCTL_NODE(_net_inet, IPPROTO_CARP,	carp,	CTLFLAG_RW, 0,	"CARP");
217254292StrocinySYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW,
218254292Strociny    &VNET_NAME(carp_allow), 0, "Accept incoming CARP packets");
219254292StrocinySYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW,
220254292Strociny    &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
221254292StrocinySYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW,
222254292Strociny    &VNET_NAME(carp_log), 0, "CARP log level");
223254292StrocinySYSCTL_VNET_PROC(_net_inet_carp, OID_AUTO, demotion, CTLTYPE_INT|CTLFLAG_RW,
224244681Sglebius    0, 0, carp_demote_adj_sysctl, "I",
225244681Sglebius    "Adjust demotion factor (skew of advskew)");
226254292StrocinySYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
227254292Strociny    &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
228254292StrocinySYSCTL_VNET_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
229254292Strociny    &VNET_NAME(carp_ifdown_adj), 0,
230254292Strociny    "Interface down demotion factor adjustment");
231142215Sglebius
232254292StrocinyVNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
233254292StrocinyVNET_PCPUSTAT_SYSINIT(carpstats);
234254292StrocinyVNET_PCPUSTAT_SYSUNINIT(carpstats);
235254292Strociny
236253087Sae#define	CARPSTATS_ADD(name, val)	\
237254292Strociny    counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
238253087Sae	sizeof(uint64_t)], (val))
239253087Sae#define	CARPSTATS_INC(name)		CARPSTATS_ADD(name, 1)
240142215Sglebius
241254292StrocinySYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
242254292Strociny    carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
243253087Sae
244228571Sglebius#define	CARP_LOCK_INIT(sc)	mtx_init(&(sc)->sc_mtx, "carp_softc",   \
245142215Sglebius	NULL, MTX_DEF)
246228571Sglebius#define	CARP_LOCK_DESTROY(sc)	mtx_destroy(&(sc)->sc_mtx)
247228571Sglebius#define	CARP_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
248228571Sglebius#define	CARP_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
249228571Sglebius#define	CARP_UNLOCK(sc)		mtx_unlock(&(sc)->sc_mtx)
250228571Sglebius#define	CIF_LOCK_INIT(cif)	mtx_init(&(cif)->cif_mtx, "carp_if",   \
251228571Sglebius	NULL, MTX_DEF)
252228571Sglebius#define	CIF_LOCK_DESTROY(cif)	mtx_destroy(&(cif)->cif_mtx)
253228571Sglebius#define	CIF_LOCK_ASSERT(cif)	mtx_assert(&(cif)->cif_mtx, MA_OWNED)
254228571Sglebius#define	CIF_LOCK(cif)		mtx_lock(&(cif)->cif_mtx)
255228571Sglebius#define	CIF_UNLOCK(cif)		mtx_unlock(&(cif)->cif_mtx)
256234130Sglebius#define	CIF_FREE(cif)	do {				\
257234130Sglebius		CIF_LOCK_ASSERT(cif);			\
258234130Sglebius		if (TAILQ_EMPTY(&(cif)->cif_vrs))	\
259234130Sglebius			carp_free_if(cif);		\
260234130Sglebius		else					\
261234130Sglebius			CIF_UNLOCK(cif);		\
262234130Sglebius} while (0)
263142215Sglebius
264142451Sglebius#define	CARP_LOG(...)	do {				\
265254292Strociny	if (V_carp_log > 0)				\
266228571Sglebius		log(LOG_INFO, "carp: " __VA_ARGS__);	\
267142451Sglebius} while (0)
268142215Sglebius
269142451Sglebius#define	CARP_DEBUG(...)	do {				\
270254292Strociny	if (V_carp_log > 1)				\
271142446Sglebius		log(LOG_DEBUG, __VA_ARGS__);		\
272142451Sglebius} while (0)
273142446Sglebius
274228571Sglebius#define	IFNET_FOREACH_IFA(ifp, ifa)					\
275228571Sglebius	IF_ADDR_LOCK_ASSERT(ifp);					\
276228571Sglebius	TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link)		\
277228571Sglebius		if ((ifa)->ifa_carp != NULL)
278228571Sglebius
279228571Sglebius#define	CARP_FOREACH_IFA(sc, ifa)					\
280228571Sglebius	CARP_LOCK_ASSERT(sc);						\
281228571Sglebius	for (int _i = 0;						\
282228571Sglebius		_i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&		\
283228571Sglebius		((ifa) = sc->sc_ifas[_i]) != NULL;			\
284228571Sglebius		++_i)
285228571Sglebius
286228571Sglebius#define	IFNET_FOREACH_CARP(ifp, sc)					\
287228571Sglebius	CIF_LOCK_ASSERT(ifp->if_carp);					\
288228571Sglebius	TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
289228571Sglebius
290228736Sglebius#define	DEMOTE_ADVSKEW(sc)					\
291254292Strociny    (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?	\
292254292Strociny    CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion))
293228736Sglebius
294142559Sglebiusstatic void	carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
295228571Sglebiusstatic struct carp_softc
296228571Sglebius		*carp_alloc(struct ifnet *);
297234130Sglebiusstatic void	carp_detach_locked(struct ifaddr *);
298228571Sglebiusstatic void	carp_destroy(struct carp_softc *);
299228571Sglebiusstatic struct carp_if
300228571Sglebius		*carp_alloc_if(struct ifnet *);
301228571Sglebiusstatic void	carp_free_if(struct carp_if *);
302228571Sglebiusstatic void	carp_set_state(struct carp_softc *, int);
303228571Sglebiusstatic void	carp_sc_state(struct carp_softc *);
304228571Sglebiusstatic void	carp_setrun(struct carp_softc *, sa_family_t);
305228571Sglebiusstatic void	carp_master_down(void *);
306228571Sglebiusstatic void	carp_master_down_locked(struct carp_softc *);
307142559Sglebiusstatic void	carp_send_ad(void *);
308142914Sglebiusstatic void	carp_send_ad_locked(struct carp_softc *);
309228571Sglebiusstatic void	carp_addroute(struct carp_softc *);
310230863Sglebiusstatic void	carp_ifa_addroute(struct ifaddr *);
311228571Sglebiusstatic void	carp_delroute(struct carp_softc *);
312230863Sglebiusstatic void	carp_ifa_delroute(struct ifaddr *);
313228736Sglebiusstatic void	carp_send_ad_all(void *, int);
314228736Sglebiusstatic void	carp_demote_adj(int, char *);
315142215Sglebius
316228571Sglebiusstatic LIST_HEAD(, carp_softc) carp_list;
317142911Sglebiusstatic struct mtx carp_mtx;
318228736Sglebiusstatic struct task carp_sendall_task =
319228736Sglebius    TASK_INITIALIZER(0, carp_send_ad_all, NULL);
320142215Sglebius
321142559Sglebiusstatic void
322142215Sglebiuscarp_hmac_prepare(struct carp_softc *sc)
323142215Sglebius{
324228571Sglebius	uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
325228571Sglebius	uint8_t vhid = sc->sc_vhid & 0xff;
326142215Sglebius	struct ifaddr *ifa;
327179490Smlaier	int i, found;
328179490Smlaier#ifdef INET
329179490Smlaier	struct in_addr last, cur, in;
330179490Smlaier#endif
331142215Sglebius#ifdef INET6
332179490Smlaier	struct in6_addr last6, cur6, in6;
333142215Sglebius#endif
334142215Sglebius
335228571Sglebius	CARP_LOCK_ASSERT(sc);
336142914Sglebius
337228571Sglebius	/* Compute ipad from key. */
338142215Sglebius	bzero(sc->sc_pad, sizeof(sc->sc_pad));
339142215Sglebius	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
340142215Sglebius	for (i = 0; i < sizeof(sc->sc_pad); i++)
341142215Sglebius		sc->sc_pad[i] ^= 0x36;
342142215Sglebius
343228571Sglebius	/* Precompute first part of inner hash. */
344142215Sglebius	SHA1Init(&sc->sc_sha1);
345142215Sglebius	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
346142215Sglebius	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
347142215Sglebius	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
348142215Sglebius	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
349142215Sglebius#ifdef INET
350179490Smlaier	cur.s_addr = 0;
351179490Smlaier	do {
352179490Smlaier		found = 0;
353179490Smlaier		last = cur;
354179490Smlaier		cur.s_addr = 0xffffffff;
355228571Sglebius		CARP_FOREACH_IFA(sc, ifa) {
356179490Smlaier			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
357179490Smlaier			if (ifa->ifa_addr->sa_family == AF_INET &&
358179490Smlaier			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
359179490Smlaier			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
360179490Smlaier				cur.s_addr = in.s_addr;
361179490Smlaier				found++;
362179490Smlaier			}
363179490Smlaier		}
364179490Smlaier		if (found)
365179490Smlaier			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
366179490Smlaier	} while (found);
367142215Sglebius#endif /* INET */
368142215Sglebius#ifdef INET6
369179490Smlaier	memset(&cur6, 0, sizeof(cur6));
370179490Smlaier	do {
371179490Smlaier		found = 0;
372179490Smlaier		last6 = cur6;
373179490Smlaier		memset(&cur6, 0xff, sizeof(cur6));
374228571Sglebius		CARP_FOREACH_IFA(sc, ifa) {
375142215Sglebius			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
376179490Smlaier			if (IN6_IS_SCOPE_EMBED(&in6))
377179490Smlaier				in6.s6_addr16[1] = 0;
378179490Smlaier			if (ifa->ifa_addr->sa_family == AF_INET6 &&
379179490Smlaier			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
380179490Smlaier			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
381179490Smlaier				cur6 = in6;
382179490Smlaier				found++;
383179490Smlaier			}
384142215Sglebius		}
385179490Smlaier		if (found)
386179490Smlaier			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
387179490Smlaier	} while (found);
388142215Sglebius#endif /* INET6 */
389142215Sglebius
390142215Sglebius	/* convert ipad to opad */
391142215Sglebius	for (i = 0; i < sizeof(sc->sc_pad); i++)
392142215Sglebius		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
393142215Sglebius}
394142215Sglebius
395142559Sglebiusstatic void
396228571Sglebiuscarp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
397142215Sglebius    unsigned char md[20])
398142215Sglebius{
399142215Sglebius	SHA1_CTX sha1ctx;
400142215Sglebius
401228571Sglebius	CARP_LOCK_ASSERT(sc);
402228571Sglebius
403142215Sglebius	/* fetch first half of inner hash */
404142215Sglebius	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
405142215Sglebius
406142215Sglebius	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
407142215Sglebius	SHA1Final(md, &sha1ctx);
408142215Sglebius
409142215Sglebius	/* outer hash */
410142215Sglebius	SHA1Init(&sha1ctx);
411142215Sglebius	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
412142215Sglebius	SHA1Update(&sha1ctx, md, 20);
413142215Sglebius	SHA1Final(md, &sha1ctx);
414142215Sglebius}
415142215Sglebius
416142559Sglebiusstatic int
417228571Sglebiuscarp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
418142215Sglebius    unsigned char md[20])
419142215Sglebius{
420142215Sglebius	unsigned char md2[20];
421142215Sglebius
422228571Sglebius	CARP_LOCK_ASSERT(sc);
423142914Sglebius
424142215Sglebius	carp_hmac_generate(sc, counter, md2);
425142215Sglebius
426142215Sglebius	return (bcmp(md, md2, sizeof(md2)));
427142215Sglebius}
428142215Sglebius
429166423Sglebius/*
430142215Sglebius * process input packet.
431142215Sglebius * we have rearranged checks order compared to the rfc,
432142215Sglebius * but it seems more efficient this way or not possible otherwise.
433142215Sglebius */
434221130Sbz#ifdef INET
435142215Sglebiusvoid
436142215Sglebiuscarp_input(struct mbuf *m, int hlen)
437142215Sglebius{
438142215Sglebius	struct ip *ip = mtod(m, struct ip *);
439142215Sglebius	struct carp_header *ch;
440142215Sglebius	int iplen, len;
441142215Sglebius
442190968Srwatson	CARPSTATS_INC(carps_ipackets);
443142215Sglebius
444254292Strociny	if (!V_carp_allow) {
445142215Sglebius		m_freem(m);
446142215Sglebius		return;
447142215Sglebius	}
448142215Sglebius
449142215Sglebius	/* verify that the IP TTL is 255.  */
450142215Sglebius	if (ip->ip_ttl != CARP_DFLTTL) {
451190968Srwatson		CARPSTATS_INC(carps_badttl);
452228571Sglebius		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
453142446Sglebius		    ip->ip_ttl,
454142446Sglebius		    m->m_pkthdr.rcvif->if_xname);
455142215Sglebius		m_freem(m);
456142215Sglebius		return;
457142215Sglebius	}
458142215Sglebius
459142215Sglebius	iplen = ip->ip_hl << 2;
460142215Sglebius
461142215Sglebius	if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
462190968Srwatson		CARPSTATS_INC(carps_badlen);
463228571Sglebius		CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
464228571Sglebius		    "on %s\n", __func__, m->m_len - sizeof(struct ip),
465195976Sdelphij		    m->m_pkthdr.rcvif->if_xname);
466142215Sglebius		m_freem(m);
467142215Sglebius		return;
468142215Sglebius	}
469142215Sglebius
470142215Sglebius	if (iplen + sizeof(*ch) < m->m_len) {
471142215Sglebius		if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
472190968Srwatson			CARPSTATS_INC(carps_hdrops);
473228571Sglebius			CARP_DEBUG("%s: pullup failed\n", __func__);
474142215Sglebius			return;
475142215Sglebius		}
476142215Sglebius		ip = mtod(m, struct ip *);
477142215Sglebius	}
478142215Sglebius	ch = (struct carp_header *)((char *)ip + iplen);
479142215Sglebius
480142215Sglebius	/*
481142215Sglebius	 * verify that the received packet length is
482142215Sglebius	 * equal to the CARP header
483142215Sglebius	 */
484142215Sglebius	len = iplen + sizeof(*ch);
485142215Sglebius	if (len > m->m_pkthdr.len) {
486190968Srwatson		CARPSTATS_INC(carps_badlen);
487228571Sglebius		CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
488142446Sglebius		    m->m_pkthdr.len,
489142446Sglebius		    m->m_pkthdr.rcvif->if_xname);
490142215Sglebius		m_freem(m);
491142215Sglebius		return;
492142215Sglebius	}
493142215Sglebius
494142215Sglebius	if ((m = m_pullup(m, len)) == NULL) {
495190968Srwatson		CARPSTATS_INC(carps_hdrops);
496142215Sglebius		return;
497142215Sglebius	}
498142215Sglebius	ip = mtod(m, struct ip *);
499142215Sglebius	ch = (struct carp_header *)((char *)ip + iplen);
500142215Sglebius
501142215Sglebius	/* verify the CARP checksum */
502142215Sglebius	m->m_data += iplen;
503244683Sglebius	if (in_cksum(m, len - iplen)) {
504190968Srwatson		CARPSTATS_INC(carps_badsum);
505228571Sglebius		CARP_DEBUG("%s: checksum failed on %s\n", __func__,
506142446Sglebius		    m->m_pkthdr.rcvif->if_xname);
507142215Sglebius		m_freem(m);
508142215Sglebius		return;
509142215Sglebius	}
510142215Sglebius	m->m_data -= iplen;
511142215Sglebius
512142446Sglebius	carp_input_c(m, ch, AF_INET);
513142215Sglebius}
514221130Sbz#endif
515142215Sglebius
516142215Sglebius#ifdef INET6
517142215Sglebiusint
518142215Sglebiuscarp6_input(struct mbuf **mp, int *offp, int proto)
519142215Sglebius{
520142215Sglebius	struct mbuf *m = *mp;
521142215Sglebius	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
522142215Sglebius	struct carp_header *ch;
523142215Sglebius	u_int len;
524142215Sglebius
525190968Srwatson	CARPSTATS_INC(carps_ipackets6);
526142215Sglebius
527254292Strociny	if (!V_carp_allow) {
528142215Sglebius		m_freem(m);
529142215Sglebius		return (IPPROTO_DONE);
530142215Sglebius	}
531142215Sglebius
532142215Sglebius	/* check if received on a valid carp interface */
533142215Sglebius	if (m->m_pkthdr.rcvif->if_carp == NULL) {
534190968Srwatson		CARPSTATS_INC(carps_badif);
535228571Sglebius		CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
536228571Sglebius		    __func__, m->m_pkthdr.rcvif->if_xname);
537142215Sglebius		m_freem(m);
538142215Sglebius		return (IPPROTO_DONE);
539142215Sglebius	}
540142215Sglebius
541142215Sglebius	/* verify that the IP TTL is 255 */
542142215Sglebius	if (ip6->ip6_hlim != CARP_DFLTTL) {
543190968Srwatson		CARPSTATS_INC(carps_badttl);
544228571Sglebius		CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
545228571Sglebius		    ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
546142215Sglebius		m_freem(m);
547142215Sglebius		return (IPPROTO_DONE);
548142215Sglebius	}
549142215Sglebius
550142215Sglebius	/* verify that we have a complete carp packet */
551142215Sglebius	len = m->m_len;
552142215Sglebius	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
553142215Sglebius	if (ch == NULL) {
554190968Srwatson		CARPSTATS_INC(carps_badlen);
555228571Sglebius		CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
556142215Sglebius		return (IPPROTO_DONE);
557142215Sglebius	}
558142215Sglebius
559142215Sglebius
560142215Sglebius	/* verify the CARP checksum */
561142215Sglebius	m->m_data += *offp;
562244683Sglebius	if (in_cksum(m, sizeof(*ch))) {
563190968Srwatson		CARPSTATS_INC(carps_badsum);
564228571Sglebius		CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
565142446Sglebius		    m->m_pkthdr.rcvif->if_xname);
566142215Sglebius		m_freem(m);
567142215Sglebius		return (IPPROTO_DONE);
568142215Sglebius	}
569142215Sglebius	m->m_data -= *offp;
570142215Sglebius
571142446Sglebius	carp_input_c(m, ch, AF_INET6);
572142215Sglebius	return (IPPROTO_DONE);
573142215Sglebius}
574142215Sglebius#endif /* INET6 */
575142215Sglebius
576142559Sglebiusstatic void
577142446Sglebiuscarp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
578142215Sglebius{
579142215Sglebius	struct ifnet *ifp = m->m_pkthdr.rcvif;
580312444Sjpaetzel	struct ifaddr *ifa;
581142446Sglebius	struct carp_softc *sc;
582228571Sglebius	uint64_t tmp_counter;
583142215Sglebius	struct timeval sc_tv, ch_tv;
584142215Sglebius
585312444Sjpaetzel	/* verify that the VHID is valid on the receiving interface */
586229621Sjhb	IF_ADDR_RLOCK(ifp);
587312444Sjpaetzel	IFNET_FOREACH_IFA(ifp, ifa)
588312444Sjpaetzel		if (ifa->ifa_addr->sa_family == af &&
589312444Sjpaetzel		    ifa->ifa_carp->sc_vhid == ch->carp_vhid) {
590312444Sjpaetzel			ifa_ref(ifa);
591312444Sjpaetzel			break;
592312444Sjpaetzel		}
593229621Sjhb	IF_ADDR_RUNLOCK(ifp);
594142914Sglebius
595228571Sglebius	if (ifa == NULL) {
596312444Sjpaetzel		CARPSTATS_INC(carps_badvhid);
597142215Sglebius		m_freem(m);
598142215Sglebius		return;
599142215Sglebius	}
600142215Sglebius
601142215Sglebius	/* verify the CARP version. */
602142215Sglebius	if (ch->carp_version != CARP_VERSION) {
603190968Srwatson		CARPSTATS_INC(carps_badver);
604228571Sglebius		CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
605142446Sglebius		    ch->carp_version);
606228571Sglebius		ifa_free(ifa);
607142215Sglebius		m_freem(m);
608142215Sglebius		return;
609142215Sglebius	}
610142215Sglebius
611228571Sglebius	sc = ifa->ifa_carp;
612228571Sglebius	CARP_LOCK(sc);
613228571Sglebius	ifa_free(ifa);
614228571Sglebius
615142215Sglebius	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
616190968Srwatson		CARPSTATS_INC(carps_badauth);
617228571Sglebius		CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
618228571Sglebius		    sc->sc_vhid, ifp->if_xname);
619228571Sglebius		goto out;
620142215Sglebius	}
621142215Sglebius
622142215Sglebius	tmp_counter = ntohl(ch->carp_counter[0]);
623142215Sglebius	tmp_counter = tmp_counter<<32;
624142215Sglebius	tmp_counter += ntohl(ch->carp_counter[1]);
625142215Sglebius
626142215Sglebius	/* XXX Replay protection goes here */
627142215Sglebius
628142215Sglebius	sc->sc_init_counter = 0;
629142215Sglebius	sc->sc_counter = tmp_counter;
630142215Sglebius
631142215Sglebius	sc_tv.tv_sec = sc->sc_advbase;
632228736Sglebius	sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
633142215Sglebius	ch_tv.tv_sec = ch->carp_advbase;
634142215Sglebius	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
635142215Sglebius
636142215Sglebius	switch (sc->sc_state) {
637142215Sglebius	case INIT:
638142215Sglebius		break;
639142215Sglebius	case MASTER:
640142215Sglebius		/*
641142215Sglebius		 * If we receive an advertisement from a master who's going to
642142215Sglebius		 * be more frequent than us, go into BACKUP state.
643142215Sglebius		 */
644142215Sglebius		if (timevalcmp(&sc_tv, &ch_tv, >) ||
645142215Sglebius		    timevalcmp(&sc_tv, &ch_tv, ==)) {
646142215Sglebius			callout_stop(&sc->sc_ad_tmo);
647228571Sglebius			CARP_LOG("VHID %u@%s: MASTER -> BACKUP "
648228571Sglebius			    "(more frequent advertisement received)\n",
649228571Sglebius			    sc->sc_vhid,
650228571Sglebius			    sc->sc_carpdev->if_xname);
651142215Sglebius			carp_set_state(sc, BACKUP);
652142215Sglebius			carp_setrun(sc, 0);
653228571Sglebius			carp_delroute(sc);
654142215Sglebius		}
655142215Sglebius		break;
656142215Sglebius	case BACKUP:
657142215Sglebius		/*
658142215Sglebius		 * If we're pre-empting masters who advertise slower than us,
659142215Sglebius		 * and this one claims to be slower, treat him as down.
660142215Sglebius		 */
661254292Strociny		if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
662228571Sglebius			CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
663142452Sglebius			    "(preempting a slower master)\n",
664228571Sglebius			    sc->sc_vhid,
665228571Sglebius			    sc->sc_carpdev->if_xname);
666142914Sglebius			carp_master_down_locked(sc);
667142215Sglebius			break;
668142215Sglebius		}
669142215Sglebius
670142215Sglebius		/*
671142215Sglebius		 *  If the master is going to advertise at such a low frequency
672142215Sglebius		 *  that he's guaranteed to time out, we'd might as well just
673142215Sglebius		 *  treat him as timed out now.
674142215Sglebius		 */
675142215Sglebius		sc_tv.tv_sec = sc->sc_advbase * 3;
676142215Sglebius		if (timevalcmp(&sc_tv, &ch_tv, <)) {
677228571Sglebius			CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
678142452Sglebius			    "(master timed out)\n",
679228571Sglebius			    sc->sc_vhid,
680228571Sglebius			    sc->sc_carpdev->if_xname);
681142914Sglebius			carp_master_down_locked(sc);
682142215Sglebius			break;
683142215Sglebius		}
684142215Sglebius
685142215Sglebius		/*
686142215Sglebius		 * Otherwise, we reset the counter and wait for the next
687142215Sglebius		 * advertisement.
688142215Sglebius		 */
689142215Sglebius		carp_setrun(sc, af);
690142215Sglebius		break;
691142215Sglebius	}
692142215Sglebius
693228571Sglebiusout:
694228571Sglebius	CARP_UNLOCK(sc);
695142215Sglebius	m_freem(m);
696142215Sglebius}
697142215Sglebius
698142559Sglebiusstatic int
699142215Sglebiuscarp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
700142215Sglebius{
701142215Sglebius	struct m_tag *mtag;
702142215Sglebius
703142215Sglebius	if (sc->sc_init_counter) {
704142215Sglebius		/* this could also be seconds since unix epoch */
705142215Sglebius		sc->sc_counter = arc4random();
706142215Sglebius		sc->sc_counter = sc->sc_counter << 32;
707142215Sglebius		sc->sc_counter += arc4random();
708142215Sglebius	} else
709142215Sglebius		sc->sc_counter++;
710142215Sglebius
711142215Sglebius	ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
712142215Sglebius	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
713142215Sglebius
714142215Sglebius	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
715142215Sglebius
716142215Sglebius	/* Tag packet for carp_output */
717228571Sglebius	if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
718228571Sglebius	    M_NOWAIT)) == NULL) {
719142215Sglebius		m_freem(m);
720228571Sglebius		CARPSTATS_INC(carps_onomem);
721142215Sglebius		return (ENOMEM);
722142215Sglebius	}
723236310Sglebius	bcopy(&sc, mtag + 1, sizeof(sc));
724142215Sglebius	m_tag_prepend(m, mtag);
725142215Sglebius
726142215Sglebius	return (0);
727142215Sglebius}
728142215Sglebius
729228736Sglebius/*
730228736Sglebius * To avoid LORs and possible recursions this function shouldn't
731228736Sglebius * be called directly, but scheduled via taskqueue.
732228736Sglebius */
733142559Sglebiusstatic void
734228736Sglebiuscarp_send_ad_all(void *ctx __unused, int pending __unused)
735142215Sglebius{
736142911Sglebius	struct carp_softc *sc;
737142215Sglebius
738142911Sglebius	mtx_lock(&carp_mtx);
739228571Sglebius	LIST_FOREACH(sc, &carp_list, sc_next)
740228736Sglebius		if (sc->sc_state == MASTER) {
741228571Sglebius			CARP_LOCK(sc);
742231201Sglebius			CURVNET_SET(sc->sc_carpdev->if_vnet);
743142914Sglebius			carp_send_ad_locked(sc);
744231201Sglebius			CURVNET_RESTORE();
745228571Sglebius			CARP_UNLOCK(sc);
746228571Sglebius		}
747142911Sglebius	mtx_unlock(&carp_mtx);
748142215Sglebius}
749142215Sglebius
750231201Sglebius/* Send a periodic advertisement, executed in callout context. */
751142559Sglebiusstatic void
752142215Sglebiuscarp_send_ad(void *v)
753142215Sglebius{
754142914Sglebius	struct carp_softc *sc = v;
755142914Sglebius
756228571Sglebius	CARP_LOCK_ASSERT(sc);
757231201Sglebius	CURVNET_SET(sc->sc_carpdev->if_vnet);
758142914Sglebius	carp_send_ad_locked(sc);
759231201Sglebius	CURVNET_RESTORE();
760228571Sglebius	CARP_UNLOCK(sc);
761142914Sglebius}
762142914Sglebius
763142914Sglebiusstatic void
764264111Sglebiuscarp_send_ad_error(struct carp_softc *sc, int error)
765264111Sglebius{
766264111Sglebius
767264111Sglebius	if (error) {
768264111Sglebius		if (sc->sc_sendad_errors < INT_MAX)
769264111Sglebius			sc->sc_sendad_errors++;
770264111Sglebius		if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
771264111Sglebius			static const char fmt[] = "send error %d on %s";
772264111Sglebius			char msg[sizeof(fmt) + IFNAMSIZ];
773264111Sglebius
774264111Sglebius			sprintf(msg, fmt, error, sc->sc_carpdev->if_xname);
775264111Sglebius			carp_demote_adj(V_carp_senderr_adj, msg);
776264111Sglebius		}
777264111Sglebius		sc->sc_sendad_success = 0;
778264111Sglebius	} else {
779264111Sglebius		if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS &&
780264111Sglebius		    ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
781264111Sglebius			static const char fmt[] = "send ok on %s";
782264111Sglebius			char msg[sizeof(fmt) + IFNAMSIZ];
783264111Sglebius
784264111Sglebius			sprintf(msg, fmt, sc->sc_carpdev->if_xname);
785264111Sglebius			carp_demote_adj(-V_carp_senderr_adj, msg);
786264111Sglebius			sc->sc_sendad_errors = 0;
787264111Sglebius		} else
788264111Sglebius			sc->sc_sendad_errors = 0;
789264111Sglebius	}
790264111Sglebius}
791264111Sglebius
792264111Sglebiusstatic void
793142914Sglebiuscarp_send_ad_locked(struct carp_softc *sc)
794142914Sglebius{
795142215Sglebius	struct carp_header ch;
796142215Sglebius	struct timeval tv;
797312444Sjpaetzel	struct sockaddr sa;
798228571Sglebius	struct ifaddr *ifa;
799142215Sglebius	struct carp_header *ch_ptr;
800142215Sglebius	struct mbuf *m;
801228571Sglebius	int len, advskew;
802142215Sglebius
803228571Sglebius	CARP_LOCK_ASSERT(sc);
804142914Sglebius
805228736Sglebius	advskew = DEMOTE_ADVSKEW(sc);
806228571Sglebius	tv.tv_sec = sc->sc_advbase;
807228571Sglebius	tv.tv_usec = advskew * 1000000 / 256;
808142215Sglebius
809142215Sglebius	ch.carp_version = CARP_VERSION;
810142215Sglebius	ch.carp_type = CARP_ADVERTISEMENT;
811142215Sglebius	ch.carp_vhid = sc->sc_vhid;
812228571Sglebius	ch.carp_advbase = sc->sc_advbase;
813142215Sglebius	ch.carp_advskew = advskew;
814142215Sglebius	ch.carp_authlen = 7;	/* XXX DEFINE */
815142215Sglebius	ch.carp_pad1 = 0;	/* must be zero */
816142215Sglebius	ch.carp_cksum = 0;
817142215Sglebius
818228571Sglebius	/* XXXGL: OpenBSD picks first ifaddr with needed family. */
819228571Sglebius
820142215Sglebius#ifdef INET
821228571Sglebius	if (sc->sc_naddrs) {
822142215Sglebius		struct ip *ip;
823142215Sglebius
824248324Sglebius		m = m_gethdr(M_NOWAIT, MT_DATA);
825142215Sglebius		if (m == NULL) {
826190968Srwatson			CARPSTATS_INC(carps_onomem);
827241043Sglebius			goto resched;
828142215Sglebius		}
829142215Sglebius		len = sizeof(*ip) + sizeof(ch);
830142215Sglebius		m->m_pkthdr.len = len;
831142215Sglebius		m->m_pkthdr.rcvif = NULL;
832142215Sglebius		m->m_len = len;
833142215Sglebius		MH_ALIGN(m, m->m_len);
834142215Sglebius		m->m_flags |= M_MCAST;
835142215Sglebius		ip = mtod(m, struct ip *);
836142215Sglebius		ip->ip_v = IPVERSION;
837142215Sglebius		ip->ip_hl = sizeof(*ip) >> 2;
838142215Sglebius		ip->ip_tos = IPTOS_LOWDELAY;
839241913Sglebius		ip->ip_len = htons(len);
840142215Sglebius		ip->ip_id = ip_newid();
841241913Sglebius		ip->ip_off = htons(IP_DF);
842142215Sglebius		ip->ip_ttl = CARP_DFLTTL;
843142215Sglebius		ip->ip_p = IPPROTO_CARP;
844142215Sglebius		ip->ip_sum = 0;
845228571Sglebius
846312444Sjpaetzel		bzero(&sa, sizeof(sa));
847312444Sjpaetzel		sa.sa_family = AF_INET;
848312444Sjpaetzel		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
849228571Sglebius		if (ifa != NULL) {
850228571Sglebius			ip->ip_src.s_addr =
851228571Sglebius			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
852228571Sglebius			ifa_free(ifa);
853228571Sglebius		} else
854228571Sglebius			ip->ip_src.s_addr = 0;
855142215Sglebius		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
856142215Sglebius
857142215Sglebius		ch_ptr = (struct carp_header *)(&ip[1]);
858142215Sglebius		bcopy(&ch, ch_ptr, sizeof(ch));
859142215Sglebius		if (carp_prepare_ad(m, sc, ch_ptr))
860241043Sglebius			goto resched;
861142215Sglebius
862142215Sglebius		m->m_data += sizeof(*ip);
863244683Sglebius		ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
864142215Sglebius		m->m_data -= sizeof(*ip);
865142215Sglebius
866190968Srwatson		CARPSTATS_INC(carps_opackets);
867142215Sglebius
868264111Sglebius		carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
869264111Sglebius		    &sc->sc_carpdev->if_carp->cif_imo, NULL));
870142215Sglebius	}
871142215Sglebius#endif /* INET */
872142215Sglebius#ifdef INET6
873228571Sglebius	if (sc->sc_naddrs6) {
874142215Sglebius		struct ip6_hdr *ip6;
875142215Sglebius
876248324Sglebius		m = m_gethdr(M_NOWAIT, MT_DATA);
877142215Sglebius		if (m == NULL) {
878190968Srwatson			CARPSTATS_INC(carps_onomem);
879241043Sglebius			goto resched;
880142215Sglebius		}
881142215Sglebius		len = sizeof(*ip6) + sizeof(ch);
882142215Sglebius		m->m_pkthdr.len = len;
883142215Sglebius		m->m_pkthdr.rcvif = NULL;
884142215Sglebius		m->m_len = len;
885142215Sglebius		MH_ALIGN(m, m->m_len);
886142215Sglebius		m->m_flags |= M_MCAST;
887142215Sglebius		ip6 = mtod(m, struct ip6_hdr *);
888142215Sglebius		bzero(ip6, sizeof(*ip6));
889142215Sglebius		ip6->ip6_vfc |= IPV6_VERSION;
890142215Sglebius		ip6->ip6_hlim = CARP_DFLTTL;
891142215Sglebius		ip6->ip6_nxt = IPPROTO_CARP;
892312444Sjpaetzel		bzero(&sa, sizeof(sa));
893142215Sglebius
894228571Sglebius		/* set the source address */
895312444Sjpaetzel		sa.sa_family = AF_INET6;
896312444Sjpaetzel		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
897228571Sglebius		if (ifa != NULL) {
898228571Sglebius			bcopy(IFA_IN6(ifa), &ip6->ip6_src,
899228571Sglebius			    sizeof(struct in6_addr));
900228571Sglebius			ifa_free(ifa);
901228571Sglebius		} else
902228571Sglebius			/* This should never happen with IPv6. */
903228571Sglebius			bzero(&ip6->ip6_src, sizeof(struct in6_addr));
904228571Sglebius
905228571Sglebius		/* Set the multicast destination. */
906163069Sbz		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
907142215Sglebius		ip6->ip6_dst.s6_addr8[15] = 0x12;
908163069Sbz		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
909163069Sbz			m_freem(m);
910200026Sglebius			CARP_DEBUG("%s: in6_setscope failed\n", __func__);
911241043Sglebius			goto resched;
912163069Sbz		}
913142215Sglebius
914142215Sglebius		ch_ptr = (struct carp_header *)(&ip6[1]);
915142215Sglebius		bcopy(&ch, ch_ptr, sizeof(ch));
916142215Sglebius		if (carp_prepare_ad(m, sc, ch_ptr))
917241043Sglebius			goto resched;
918142215Sglebius
919142215Sglebius		m->m_data += sizeof(*ip6);
920244683Sglebius		ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
921142215Sglebius		m->m_data -= sizeof(*ip6);
922142215Sglebius
923190968Srwatson		CARPSTATS_INC(carps_opackets6);
924142215Sglebius
925264111Sglebius		carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
926264111Sglebius		    &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
927142215Sglebius	}
928142215Sglebius#endif /* INET6 */
929142215Sglebius
930241043Sglebiusresched:
931228571Sglebius	callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
932228571Sglebius}
933142215Sglebius
934228571Sglebiusstatic void
935228571Sglebiuscarp_addroute(struct carp_softc *sc)
936228571Sglebius{
937228571Sglebius	struct ifaddr *ifa;
938228571Sglebius
939228571Sglebius	CARP_FOREACH_IFA(sc, ifa)
940230863Sglebius		carp_ifa_addroute(ifa);
941230863Sglebius}
942230863Sglebius
943230863Sglebiusstatic void
944230863Sglebiuscarp_ifa_addroute(struct ifaddr *ifa)
945230863Sglebius{
946230863Sglebius
947230863Sglebius	switch (ifa->ifa_addr->sa_family) {
948228571Sglebius#ifdef INET
949230863Sglebius	case AF_INET:
950230863Sglebius		in_addprefix(ifatoia(ifa), RTF_UP);
951230863Sglebius		ifa_add_loopback_route(ifa,
952230863Sglebius		    (struct sockaddr *)&ifatoia(ifa)->ia_addr);
953230863Sglebius		break;
954228571Sglebius#endif
955228571Sglebius#ifdef INET6
956230863Sglebius	case AF_INET6:
957230863Sglebius		ifa_add_loopback_route(ifa,
958230863Sglebius		    (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
959230863Sglebius		in6_ifaddloop(ifa);
960230863Sglebius		break;
961228571Sglebius#endif
962230863Sglebius	}
963142215Sglebius}
964142215Sglebius
965228571Sglebiusstatic void
966228571Sglebiuscarp_delroute(struct carp_softc *sc)
967228571Sglebius{
968228571Sglebius	struct ifaddr *ifa;
969228571Sglebius
970228571Sglebius	CARP_FOREACH_IFA(sc, ifa)
971230863Sglebius		carp_ifa_delroute(ifa);
972230863Sglebius}
973230863Sglebius
974230863Sglebiusstatic void
975230863Sglebiuscarp_ifa_delroute(struct ifaddr *ifa)
976230863Sglebius{
977230863Sglebius
978230863Sglebius	switch (ifa->ifa_addr->sa_family) {
979221130Sbz#ifdef INET
980230863Sglebius	case AF_INET:
981230863Sglebius		ifa_del_loopback_route(ifa,
982230863Sglebius		    (struct sockaddr *)&ifatoia(ifa)->ia_addr);
983230863Sglebius		in_scrubprefix(ifatoia(ifa), LLE_STATIC);
984230863Sglebius		break;
985228571Sglebius#endif
986228571Sglebius#ifdef INET6
987230863Sglebius	case AF_INET6:
988230863Sglebius		ifa_del_loopback_route(ifa,
989230863Sglebius		    (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
990230863Sglebius		in6_ifremloop(ifa);
991230863Sglebius		break;
992228571Sglebius#endif
993230863Sglebius	}
994228571Sglebius}
995228571Sglebius
996246659Sglebiusint
997246659Sglebiuscarp_master(struct ifaddr *ifa)
998246659Sglebius{
999246659Sglebius	struct carp_softc *sc = ifa->ifa_carp;
1000246659Sglebius
1001246659Sglebius	return (sc->sc_state == MASTER);
1002246659Sglebius}
1003246659Sglebius
1004228571Sglebius#ifdef INET
1005142215Sglebius/*
1006142215Sglebius * Broadcast a gratuitous ARP request containing
1007142215Sglebius * the virtual router MAC address for each IP address
1008142215Sglebius * associated with the virtual router.
1009142215Sglebius */
1010142559Sglebiusstatic void
1011142215Sglebiuscarp_send_arp(struct carp_softc *sc)
1012142215Sglebius{
1013142215Sglebius	struct ifaddr *ifa;
1014142215Sglebius
1015228571Sglebius	CARP_FOREACH_IFA(sc, ifa)
1016228571Sglebius		if (ifa->ifa_addr->sa_family == AF_INET)
1017228571Sglebius			arp_ifinit2(sc->sc_carpdev, ifa, LLADDR(&sc->sc_addr));
1018228571Sglebius}
1019142215Sglebius
1020228571Sglebiusint
1021228571Sglebiuscarp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
1022228571Sglebius{
1023228571Sglebius	struct carp_softc *sc = ifa->ifa_carp;
1024142215Sglebius
1025228571Sglebius	if (sc->sc_state == MASTER) {
1026228571Sglebius		*enaddr = LLADDR(&sc->sc_addr);
1027228571Sglebius		return (1);
1028228571Sglebius	}
1029142215Sglebius
1030228571Sglebius	return (0);
1031142215Sglebius}
1032221130Sbz#endif
1033142215Sglebius
1034142215Sglebius#ifdef INET6
1035142559Sglebiusstatic void
1036142215Sglebiuscarp_send_na(struct carp_softc *sc)
1037142215Sglebius{
1038228571Sglebius	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1039142215Sglebius	struct ifaddr *ifa;
1040142215Sglebius	struct in6_addr *in6;
1041142215Sglebius
1042228571Sglebius	CARP_FOREACH_IFA(sc, ifa) {
1043142215Sglebius		if (ifa->ifa_addr->sa_family != AF_INET6)
1044142215Sglebius			continue;
1045142215Sglebius
1046228571Sglebius		in6 = IFA_IN6(ifa);
1047142564Sglebius		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1048142215Sglebius		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1049142215Sglebius		DELAY(1000);	/* XXX */
1050142215Sglebius	}
1051142215Sglebius}
1052142215Sglebius
1053238769Sbz/*
1054238769Sbz * Returns ifa in case it's a carp address and it is MASTER, or if the address
1055238769Sbz * matches and is not a carp address.  Returns NULL otherwise.
1056238769Sbz */
1057142641Smlaierstruct ifaddr *
1058211157Swillcarp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
1059142215Sglebius{
1060142215Sglebius	struct ifaddr *ifa;
1061142215Sglebius
1062238769Sbz	ifa = NULL;
1063229621Sjhb	IF_ADDR_RLOCK(ifp);
1064238769Sbz	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1065238769Sbz		if (ifa->ifa_addr->sa_family != AF_INET6)
1066238769Sbz			continue;
1067238769Sbz		if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
1068238769Sbz			continue;
1069238769Sbz		if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
1070238769Sbz			ifa = NULL;
1071238769Sbz		else
1072228571Sglebius			ifa_ref(ifa);
1073238769Sbz		break;
1074238769Sbz	}
1075229621Sjhb	IF_ADDR_RUNLOCK(ifp);
1076228571Sglebius
1077238769Sbz	return (ifa);
1078142215Sglebius}
1079142215Sglebius
1080211157Swillcaddr_t
1081211157Swillcarp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
1082142215Sglebius{
1083142215Sglebius	struct ifaddr *ifa;
1084142215Sglebius
1085229621Sjhb	IF_ADDR_RLOCK(ifp);
1086228571Sglebius	IFNET_FOREACH_IFA(ifp, ifa)
1087228571Sglebius		if (ifa->ifa_addr->sa_family == AF_INET6 &&
1088228571Sglebius		    IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
1089228571Sglebius			struct carp_softc *sc = ifa->ifa_carp;
1090228571Sglebius			struct m_tag *mtag;
1091142215Sglebius
1092229621Sjhb			IF_ADDR_RUNLOCK(ifp);
1093228571Sglebius
1094228571Sglebius			mtag = m_tag_get(PACKET_TAG_CARP,
1095236297Sglebius			    sizeof(struct carp_softc *), M_NOWAIT);
1096228571Sglebius			if (mtag == NULL)
1097228571Sglebius				/* Better a bit than nothing. */
1098228571Sglebius				return (LLADDR(&sc->sc_addr));
1099228571Sglebius
1100236310Sglebius			bcopy(&sc, mtag + 1, sizeof(sc));
1101228571Sglebius			m_tag_prepend(m, mtag);
1102228571Sglebius
1103228571Sglebius			return (LLADDR(&sc->sc_addr));
1104142215Sglebius		}
1105229621Sjhb	IF_ADDR_RUNLOCK(ifp);
1106142215Sglebius
1107142215Sglebius	return (NULL);
1108142215Sglebius}
1109228571Sglebius#endif /* INET6 */
1110142215Sglebius
1111228571Sglebiusint
1112211157Swillcarp_forus(struct ifnet *ifp, u_char *dhost)
1113142215Sglebius{
1114228571Sglebius	struct carp_softc *sc;
1115228571Sglebius	uint8_t *ena = dhost;
1116142215Sglebius
1117142215Sglebius	if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1118228571Sglebius		return (0);
1119142215Sglebius
1120228571Sglebius	CIF_LOCK(ifp->if_carp);
1121228571Sglebius	IFNET_FOREACH_CARP(ifp, sc) {
1122228571Sglebius		CARP_LOCK(sc);
1123228571Sglebius		if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
1124228571Sglebius		    ETHER_ADDR_LEN)) {
1125228571Sglebius			CARP_UNLOCK(sc);
1126228571Sglebius			CIF_UNLOCK(ifp->if_carp);
1127228571Sglebius			return (1);
1128142215Sglebius		}
1129228571Sglebius		CARP_UNLOCK(sc);
1130228571Sglebius	}
1131228571Sglebius	CIF_UNLOCK(ifp->if_carp);
1132142215Sglebius
1133228571Sglebius	return (0);
1134142215Sglebius}
1135142215Sglebius
1136231201Sglebius/* Master down timeout event, executed in callout context. */
1137142559Sglebiusstatic void
1138142215Sglebiuscarp_master_down(void *v)
1139142215Sglebius{
1140142215Sglebius	struct carp_softc *sc = v;
1141142215Sglebius
1142228571Sglebius	CARP_LOCK_ASSERT(sc);
1143228571Sglebius
1144231201Sglebius	CURVNET_SET(sc->sc_carpdev->if_vnet);
1145228571Sglebius	if (sc->sc_state == BACKUP) {
1146228812Sglebius		CARP_LOG("VHID %u@%s: BACKUP -> MASTER (master down)\n",
1147228571Sglebius		    sc->sc_vhid,
1148228571Sglebius		    sc->sc_carpdev->if_xname);
1149228571Sglebius		carp_master_down_locked(sc);
1150228571Sglebius	}
1151231201Sglebius	CURVNET_RESTORE();
1152228571Sglebius
1153228571Sglebius	CARP_UNLOCK(sc);
1154142914Sglebius}
1155142914Sglebius
1156142914Sglebiusstatic void
1157142914Sglebiuscarp_master_down_locked(struct carp_softc *sc)
1158142914Sglebius{
1159142914Sglebius
1160228571Sglebius	CARP_LOCK_ASSERT(sc);
1161228571Sglebius
1162142215Sglebius	switch (sc->sc_state) {
1163142215Sglebius	case BACKUP:
1164142215Sglebius		carp_set_state(sc, MASTER);
1165142914Sglebius		carp_send_ad_locked(sc);
1166221130Sbz#ifdef INET
1167142215Sglebius		carp_send_arp(sc);
1168221130Sbz#endif
1169142215Sglebius#ifdef INET6
1170142215Sglebius		carp_send_na(sc);
1171228571Sglebius#endif
1172142215Sglebius		carp_setrun(sc, 0);
1173228571Sglebius		carp_addroute(sc);
1174142215Sglebius		break;
1175228571Sglebius	case INIT:
1176228571Sglebius	case MASTER:
1177228571Sglebius#ifdef INVARIANTS
1178228571Sglebius		panic("carp: VHID %u@%s: master_down event in %s state\n",
1179228571Sglebius		    sc->sc_vhid,
1180228571Sglebius		    sc->sc_carpdev->if_xname,
1181228571Sglebius		    sc->sc_state ? "MASTER" : "INIT");
1182228571Sglebius#endif
1183228571Sglebius		break;
1184142215Sglebius	}
1185142215Sglebius}
1186142215Sglebius
1187142215Sglebius/*
1188142215Sglebius * When in backup state, af indicates whether to reset the master down timer
1189142215Sglebius * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1190142215Sglebius */
1191142559Sglebiusstatic void
1192142215Sglebiuscarp_setrun(struct carp_softc *sc, sa_family_t af)
1193142215Sglebius{
1194142215Sglebius	struct timeval tv;
1195142215Sglebius
1196228571Sglebius	CARP_LOCK_ASSERT(sc);
1197142914Sglebius
1198228571Sglebius	if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
1199228571Sglebius	    sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
1200228571Sglebius	    (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0))
1201142215Sglebius		return;
1202142215Sglebius
1203142215Sglebius	switch (sc->sc_state) {
1204142215Sglebius	case INIT:
1205228571Sglebius		CARP_LOG("VHID %u@%s: INIT -> BACKUP\n",
1206228571Sglebius		    sc->sc_vhid,
1207228571Sglebius		    sc->sc_carpdev->if_xname);
1208226367Sglebius		carp_set_state(sc, BACKUP);
1209226367Sglebius		carp_setrun(sc, 0);
1210142215Sglebius		break;
1211142215Sglebius	case BACKUP:
1212142215Sglebius		callout_stop(&sc->sc_ad_tmo);
1213142215Sglebius		tv.tv_sec = 3 * sc->sc_advbase;
1214142215Sglebius		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1215142215Sglebius		switch (af) {
1216142215Sglebius#ifdef INET
1217142215Sglebius		case AF_INET:
1218142215Sglebius			callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1219142215Sglebius			    carp_master_down, sc);
1220142215Sglebius			break;
1221228571Sglebius#endif
1222142215Sglebius#ifdef INET6
1223142215Sglebius		case AF_INET6:
1224142215Sglebius			callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1225142215Sglebius			    carp_master_down, sc);
1226142215Sglebius			break;
1227228571Sglebius#endif
1228142215Sglebius		default:
1229228571Sglebius#ifdef INET
1230142215Sglebius			if (sc->sc_naddrs)
1231142215Sglebius				callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
1232142215Sglebius				    carp_master_down, sc);
1233228571Sglebius#endif
1234228571Sglebius#ifdef INET6
1235142215Sglebius			if (sc->sc_naddrs6)
1236142215Sglebius				callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
1237142215Sglebius				    carp_master_down, sc);
1238228571Sglebius#endif
1239142215Sglebius			break;
1240142215Sglebius		}
1241142215Sglebius		break;
1242142215Sglebius	case MASTER:
1243142215Sglebius		tv.tv_sec = sc->sc_advbase;
1244142215Sglebius		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1245142215Sglebius		callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
1246142215Sglebius		    carp_send_ad, sc);
1247142215Sglebius		break;
1248142215Sglebius	}
1249142215Sglebius}
1250142215Sglebius
1251228571Sglebius/*
1252228571Sglebius * Setup multicast structures.
1253228571Sglebius */
1254228571Sglebiusstatic int
1255234130Sglebiuscarp_multicast_setup(struct carp_if *cif, sa_family_t sa)
1256156947Sglebius{
1257234130Sglebius	struct ifnet *ifp = cif->cif_ifp;
1258228571Sglebius	int error = 0;
1259166226Sglebius
1260234130Sglebius	CIF_LOCK_ASSERT(cif);
1261234130Sglebius
1262228571Sglebius	switch (sa) {
1263228571Sglebius#ifdef INET
1264228571Sglebius	case AF_INET:
1265228571Sglebius	    {
1266228571Sglebius		struct ip_moptions *imo = &cif->cif_imo;
1267228571Sglebius		struct in_addr addr;
1268228571Sglebius
1269228571Sglebius		if (imo->imo_membership)
1270228571Sglebius			return (0);
1271228571Sglebius
1272228571Sglebius		imo->imo_membership = (struct in_multi **)malloc(
1273228571Sglebius		    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
1274234130Sglebius		    M_NOWAIT);
1275234130Sglebius		if (imo->imo_membership == NULL)
1276234130Sglebius			return (ENOMEM);
1277228571Sglebius		imo->imo_mfilters = NULL;
1278228571Sglebius		imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
1279228571Sglebius		imo->imo_multicast_vif = -1;
1280228571Sglebius
1281228571Sglebius		addr.s_addr = htonl(INADDR_CARP_GROUP);
1282228571Sglebius		if ((error = in_joingroup(ifp, &addr, NULL,
1283228571Sglebius		    &imo->imo_membership[0])) != 0) {
1284228571Sglebius			free(imo->imo_membership, M_CARP);
1285228571Sglebius			break;
1286156947Sglebius		}
1287228571Sglebius		imo->imo_num_memberships++;
1288228571Sglebius		imo->imo_multicast_ifp = ifp;
1289228571Sglebius		imo->imo_multicast_ttl = CARP_DFLTTL;
1290228571Sglebius		imo->imo_multicast_loop = 0;
1291228571Sglebius		break;
1292228571Sglebius	   }
1293228571Sglebius#endif
1294228571Sglebius#ifdef INET6
1295228571Sglebius	case AF_INET6:
1296228571Sglebius	    {
1297228571Sglebius		struct ip6_moptions *im6o = &cif->cif_im6o;
1298228571Sglebius		struct in6_addr in6;
1299228571Sglebius		struct in6_multi *in6m;
1300228571Sglebius
1301228571Sglebius		if (im6o->im6o_membership)
1302228571Sglebius			return (0);
1303228571Sglebius
1304228571Sglebius		im6o->im6o_membership = (struct in6_multi **)malloc(
1305228571Sglebius		    (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
1306234130Sglebius		    M_ZERO | M_NOWAIT);
1307234130Sglebius		if (im6o->im6o_membership == NULL)
1308234130Sglebius			return (ENOMEM);
1309228571Sglebius		im6o->im6o_mfilters = NULL;
1310228571Sglebius		im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
1311228571Sglebius		im6o->im6o_multicast_hlim = CARP_DFLTTL;
1312228571Sglebius		im6o->im6o_multicast_ifp = ifp;
1313228571Sglebius
1314228571Sglebius		/* Join IPv6 CARP multicast group. */
1315228571Sglebius		bzero(&in6, sizeof(in6));
1316228571Sglebius		in6.s6_addr16[0] = htons(0xff02);
1317228571Sglebius		in6.s6_addr8[15] = 0x12;
1318228571Sglebius		if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1319228571Sglebius			free(im6o->im6o_membership, M_CARP);
1320228571Sglebius			break;
1321228571Sglebius		}
1322228571Sglebius		in6m = NULL;
1323228571Sglebius		if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
1324228571Sglebius			free(im6o->im6o_membership, M_CARP);
1325228571Sglebius			break;
1326228571Sglebius		}
1327228571Sglebius		im6o->im6o_membership[0] = in6m;
1328228571Sglebius		im6o->im6o_num_memberships++;
1329228571Sglebius
1330228571Sglebius		/* Join solicited multicast address. */
1331228571Sglebius		bzero(&in6, sizeof(in6));
1332228571Sglebius		in6.s6_addr16[0] = htons(0xff02);
1333228571Sglebius		in6.s6_addr32[1] = 0;
1334228571Sglebius		in6.s6_addr32[2] = htonl(1);
1335228571Sglebius		in6.s6_addr32[3] = 0;
1336228571Sglebius		in6.s6_addr8[12] = 0xff;
1337228571Sglebius		if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
1338228571Sglebius			in6_mc_leave(im6o->im6o_membership[0], NULL);
1339228571Sglebius			free(im6o->im6o_membership, M_CARP);
1340228571Sglebius			break;
1341228571Sglebius		}
1342228571Sglebius		in6m = NULL;
1343228571Sglebius		if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
1344228571Sglebius			in6_mc_leave(im6o->im6o_membership[0], NULL);
1345228571Sglebius			free(im6o->im6o_membership, M_CARP);
1346228571Sglebius			break;
1347228571Sglebius		}
1348228571Sglebius		im6o->im6o_membership[1] = in6m;
1349228571Sglebius		im6o->im6o_num_memberships++;
1350228571Sglebius		break;
1351228571Sglebius	    }
1352228571Sglebius#endif
1353156947Sglebius	}
1354228571Sglebius
1355228571Sglebius	return (error);
1356166423Sglebius}
1357156947Sglebius
1358228571Sglebius/*
1359228571Sglebius * Free multicast structures.
1360228571Sglebius */
1361166423Sglebiusstatic void
1362234130Sglebiuscarp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
1363166423Sglebius{
1364166423Sglebius
1365234130Sglebius	CIF_LOCK_ASSERT(cif);
1366228571Sglebius	switch (sa) {
1367228571Sglebius#ifdef INET
1368228571Sglebius	case AF_INET:
1369234130Sglebius		if (cif->cif_naddrs == 0) {
1370228571Sglebius			struct ip_moptions *imo = &cif->cif_imo;
1371228571Sglebius
1372228571Sglebius			in_leavegroup(imo->imo_membership[0], NULL);
1373228571Sglebius			KASSERT(imo->imo_mfilters == NULL,
1374228571Sglebius			    ("%s: imo_mfilters != NULL", __func__));
1375228571Sglebius			free(imo->imo_membership, M_CARP);
1376228571Sglebius			imo->imo_membership = NULL;
1377228571Sglebius
1378191672Sbms		}
1379228571Sglebius		break;
1380228571Sglebius#endif
1381228571Sglebius#ifdef INET6
1382228571Sglebius	case AF_INET6:
1383234130Sglebius		if (cif->cif_naddrs6 == 0) {
1384228571Sglebius			struct ip6_moptions *im6o = &cif->cif_im6o;
1385228571Sglebius
1386228571Sglebius			in6_mc_leave(im6o->im6o_membership[0], NULL);
1387228571Sglebius			in6_mc_leave(im6o->im6o_membership[1], NULL);
1388228571Sglebius			KASSERT(im6o->im6o_mfilters == NULL,
1389228571Sglebius			    ("%s: im6o_mfilters != NULL", __func__));
1390228571Sglebius			free(im6o->im6o_membership, M_CARP);
1391228571Sglebius			im6o->im6o_membership = NULL;
1392228571Sglebius		}
1393228571Sglebius		break;
1394228571Sglebius#endif
1395156947Sglebius	}
1396166423Sglebius}
1397156947Sglebius
1398228571Sglebiusint
1399249925Sglebiuscarp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
1400142215Sglebius{
1401228571Sglebius	struct m_tag *mtag;
1402228571Sglebius	struct carp_softc *sc;
1403142215Sglebius
1404228571Sglebius	if (!sa)
1405142215Sglebius		return (0);
1406142215Sglebius
1407228571Sglebius	switch (sa->sa_family) {
1408228571Sglebius#ifdef INET
1409228571Sglebius	case AF_INET:
1410228571Sglebius		break;
1411228571Sglebius#endif
1412228571Sglebius#ifdef INET6
1413228571Sglebius	case AF_INET6:
1414228571Sglebius		break;
1415228571Sglebius#endif
1416228571Sglebius	default:
1417228571Sglebius		return (0);
1418142215Sglebius	}
1419142215Sglebius
1420228571Sglebius	mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
1421228571Sglebius	if (mtag == NULL)
1422228571Sglebius		return (0);
1423142215Sglebius
1424236310Sglebius	bcopy(mtag + 1, &sc, sizeof(sc));
1425194951Srwatson
1426228571Sglebius	/* Set the source MAC address to the Virtual Router MAC Address. */
1427228571Sglebius	switch (ifp->if_type) {
1428228571Sglebius	case IFT_ETHER:
1429234084Sglebius	case IFT_BRIDGE:
1430228571Sglebius	case IFT_L2VLAN: {
1431228571Sglebius			struct ether_header *eh;
1432142215Sglebius
1433228571Sglebius			eh = mtod(m, struct ether_header *);
1434228571Sglebius			eh->ether_shost[0] = 0;
1435228571Sglebius			eh->ether_shost[1] = 0;
1436228571Sglebius			eh->ether_shost[2] = 0x5e;
1437228571Sglebius			eh->ether_shost[3] = 0;
1438228571Sglebius			eh->ether_shost[4] = 1;
1439228571Sglebius			eh->ether_shost[5] = sc->sc_vhid;
1440228571Sglebius		}
1441228571Sglebius		break;
1442228571Sglebius	case IFT_FDDI: {
1443228571Sglebius			struct fddi_header *fh;
1444142215Sglebius
1445228571Sglebius			fh = mtod(m, struct fddi_header *);
1446228571Sglebius			fh->fddi_shost[0] = 0;
1447228571Sglebius			fh->fddi_shost[1] = 0;
1448228571Sglebius			fh->fddi_shost[2] = 0x5e;
1449228571Sglebius			fh->fddi_shost[3] = 0;
1450228571Sglebius			fh->fddi_shost[4] = 1;
1451228571Sglebius			fh->fddi_shost[5] = sc->sc_vhid;
1452194951Srwatson		}
1453228571Sglebius		break;
1454228571Sglebius	case IFT_ISO88025: {
1455228571Sglebius 			struct iso88025_header *th;
1456228571Sglebius 			th = mtod(m, struct iso88025_header *);
1457228571Sglebius			th->iso88025_shost[0] = 3;
1458228571Sglebius			th->iso88025_shost[1] = 0;
1459228571Sglebius			th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
1460228571Sglebius			th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
1461228571Sglebius			th->iso88025_shost[4] = 0;
1462228571Sglebius			th->iso88025_shost[5] = 0;
1463228571Sglebius		}
1464228571Sglebius		break;
1465228571Sglebius	default:
1466228571Sglebius		printf("%s: carp is not supported for the %d interface type\n",
1467228571Sglebius		    ifp->if_xname, ifp->if_type);
1468228571Sglebius		return (EOPNOTSUPP);
1469142215Sglebius	}
1470142215Sglebius
1471228571Sglebius	return (0);
1472228571Sglebius}
1473142215Sglebius
1474228571Sglebiusstatic struct carp_softc*
1475228571Sglebiuscarp_alloc(struct ifnet *ifp)
1476228571Sglebius{
1477228571Sglebius	struct carp_softc *sc;
1478228571Sglebius	struct carp_if *cif;
1479142215Sglebius
1480228571Sglebius	if ((cif = ifp->if_carp) == NULL) {
1481228571Sglebius		cif = carp_alloc_if(ifp);
1482228571Sglebius		if (cif == NULL)
1483228571Sglebius			return (NULL);
1484142215Sglebius	}
1485142215Sglebius
1486228571Sglebius	sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
1487142215Sglebius
1488228571Sglebius	sc->sc_advbase = CARP_DFLTINTV;
1489228571Sglebius	sc->sc_vhid = -1;	/* required setting */
1490228571Sglebius	sc->sc_init_counter = 1;
1491228571Sglebius	sc->sc_state = INIT;
1492142215Sglebius
1493228571Sglebius	sc->sc_ifasiz = sizeof(struct ifaddr *);
1494228571Sglebius	sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
1495228571Sglebius	sc->sc_carpdev = ifp;
1496142215Sglebius
1497228571Sglebius	CARP_LOCK_INIT(sc);
1498228571Sglebius#ifdef INET
1499228571Sglebius	callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1500228571Sglebius#endif
1501228571Sglebius#ifdef INET6
1502228571Sglebius	callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1503228571Sglebius#endif
1504228571Sglebius	callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
1505142215Sglebius
1506228571Sglebius	CIF_LOCK(cif);
1507228571Sglebius	TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
1508228571Sglebius	CIF_UNLOCK(cif);
1509142215Sglebius
1510228571Sglebius	mtx_lock(&carp_mtx);
1511228571Sglebius	LIST_INSERT_HEAD(&carp_list, sc, sc_next);
1512228571Sglebius	mtx_unlock(&carp_mtx);
1513142914Sglebius
1514228571Sglebius	return (sc);
1515142215Sglebius}
1516142215Sglebius
1517142559Sglebiusstatic int
1518228571Sglebiuscarp_grow_ifas(struct carp_softc *sc)
1519142215Sglebius{
1520228571Sglebius	struct ifaddr **new;
1521142215Sglebius
1522228571Sglebius	CARP_LOCK_ASSERT(sc);
1523142215Sglebius
1524228571Sglebius	new = malloc(sc->sc_ifasiz * 2, M_CARP, M_NOWAIT|M_ZERO);
1525228571Sglebius	if (new == NULL)
1526228571Sglebius		return (ENOMEM);
1527228571Sglebius	bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
1528228571Sglebius	free(sc->sc_ifas, M_CARP);
1529228571Sglebius	sc->sc_ifas = new;
1530228571Sglebius	sc->sc_ifasiz *= 2;
1531142215Sglebius
1532228571Sglebius	return (0);
1533142215Sglebius}
1534142215Sglebius
1535228571Sglebiusstatic void
1536228571Sglebiuscarp_destroy(struct carp_softc *sc)
1537142215Sglebius{
1538228571Sglebius	struct ifnet *ifp = sc->sc_carpdev;
1539228571Sglebius	struct carp_if *cif = ifp->if_carp;
1540142215Sglebius
1541234130Sglebius	CIF_LOCK_ASSERT(cif);
1542234130Sglebius
1543228571Sglebius	TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
1544191672Sbms
1545228571Sglebius	mtx_lock(&carp_mtx);
1546228571Sglebius	LIST_REMOVE(sc, sc_next);
1547228571Sglebius	mtx_unlock(&carp_mtx);
1548142215Sglebius
1549228571Sglebius	CARP_LOCK(sc);
1550228736Sglebius	if (sc->sc_suppress)
1551254292Strociny		carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
1552228571Sglebius	callout_drain(&sc->sc_ad_tmo);
1553228571Sglebius#ifdef INET
1554228571Sglebius	callout_drain(&sc->sc_md_tmo);
1555228571Sglebius#endif
1556228571Sglebius#ifdef INET6
1557228571Sglebius	callout_drain(&sc->sc_md6_tmo);
1558228571Sglebius#endif
1559228571Sglebius	CARP_LOCK_DESTROY(sc);
1560142215Sglebius
1561228571Sglebius	free(sc->sc_ifas, M_CARP);
1562228571Sglebius	free(sc, M_CARP);
1563228571Sglebius}
1564142215Sglebius
1565228571Sglebiusstatic struct carp_if*
1566228571Sglebiuscarp_alloc_if(struct ifnet *ifp)
1567228571Sglebius{
1568228571Sglebius	struct carp_if *cif;
1569142215Sglebius
1570228571Sglebius	cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
1571142215Sglebius
1572228571Sglebius	if (ifpromisc(ifp, 1) != 0)
1573228571Sglebius		goto cleanup;
1574191672Sbms
1575228571Sglebius	CIF_LOCK_INIT(cif);
1576228571Sglebius	cif->cif_ifp = ifp;
1577228571Sglebius	TAILQ_INIT(&cif->cif_vrs);
1578142215Sglebius
1579229621Sjhb	IF_ADDR_WLOCK(ifp);
1580228571Sglebius	ifp->if_carp = cif;
1581228571Sglebius	if_ref(ifp);
1582229621Sjhb	IF_ADDR_WUNLOCK(ifp);
1583142215Sglebius
1584228571Sglebius	return (cif);
1585142215Sglebius
1586228571Sglebiuscleanup:
1587228571Sglebius	free(cif, M_CARP);
1588142215Sglebius
1589228571Sglebius	return (NULL);
1590228571Sglebius}
1591142215Sglebius
1592228571Sglebiusstatic void
1593228571Sglebiuscarp_free_if(struct carp_if *cif)
1594228571Sglebius{
1595228571Sglebius	struct ifnet *ifp = cif->cif_ifp;
1596142215Sglebius
1597228571Sglebius	CIF_LOCK_ASSERT(cif);
1598228571Sglebius	KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
1599228571Sglebius	    __func__));
1600142215Sglebius
1601229621Sjhb	IF_ADDR_WLOCK(ifp);
1602228571Sglebius	ifp->if_carp = NULL;
1603229621Sjhb	IF_ADDR_WUNLOCK(ifp);
1604142215Sglebius
1605228571Sglebius	CIF_LOCK_DESTROY(cif);
1606142215Sglebius
1607228571Sglebius	ifpromisc(ifp, 0);
1608255397Strociny	if_rele(ifp);
1609142215Sglebius
1610228571Sglebius	free(cif, M_CARP);
1611142215Sglebius}
1612142215Sglebius
1613228571Sglebiusstatic void
1614228571Sglebiuscarp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
1615142215Sglebius{
1616142215Sglebius
1617228571Sglebius	CARP_LOCK(sc);
1618228571Sglebius	carpr->carpr_state = sc->sc_state;
1619228571Sglebius	carpr->carpr_vhid = sc->sc_vhid;
1620228571Sglebius	carpr->carpr_advbase = sc->sc_advbase;
1621228571Sglebius	carpr->carpr_advskew = sc->sc_advskew;
1622228571Sglebius	if (priv)
1623228571Sglebius		bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
1624228571Sglebius	else
1625228571Sglebius		bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
1626228571Sglebius	CARP_UNLOCK(sc);
1627142215Sglebius}
1628142215Sglebius
1629228571Sglebiusint
1630228571Sglebiuscarp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
1631142215Sglebius{
1632142215Sglebius	struct carpreq carpr;
1633228571Sglebius	struct ifnet *ifp;
1634228571Sglebius	struct carp_softc *sc = NULL;
1635228571Sglebius	int error = 0, locked = 0;
1636142215Sglebius
1637228571Sglebius	if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
1638228571Sglebius		return (error);
1639142215Sglebius
1640228571Sglebius	ifp = ifunit_ref(ifr->ifr_name);
1641228571Sglebius	if (ifp == NULL)
1642228571Sglebius		return (ENXIO);
1643228571Sglebius
1644228571Sglebius	switch (ifp->if_type) {
1645228571Sglebius	case IFT_ETHER:
1646228571Sglebius	case IFT_L2VLAN:
1647234084Sglebius	case IFT_BRIDGE:
1648228571Sglebius	case IFT_FDDI:
1649228571Sglebius	case IFT_ISO88025:
1650142215Sglebius		break;
1651228571Sglebius	default:
1652228571Sglebius		error = EOPNOTSUPP;
1653228571Sglebius		goto out;
1654228571Sglebius	}
1655142215Sglebius
1656228571Sglebius	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1657228571Sglebius		error = EADDRNOTAVAIL;
1658228571Sglebius		goto out;
1659228571Sglebius	}
1660228571Sglebius
1661228571Sglebius	switch (cmd) {
1662228571Sglebius	case SIOCSVH:
1663228571Sglebius		if ((error = priv_check(td, PRIV_NETINET_CARP)))
1664142215Sglebius			break;
1665228571Sglebius		if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
1666228571Sglebius		    carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
1667228571Sglebius			error = EINVAL;
1668142215Sglebius			break;
1669142215Sglebius		}
1670142215Sglebius
1671228571Sglebius		if (ifp->if_carp) {
1672228571Sglebius			CIF_LOCK(ifp->if_carp);
1673228571Sglebius			IFNET_FOREACH_CARP(ifp, sc)
1674228571Sglebius				if (sc->sc_vhid == carpr.carpr_vhid)
1675228571Sglebius					break;
1676228571Sglebius			CIF_UNLOCK(ifp->if_carp);
1677142215Sglebius		}
1678228571Sglebius		if (sc == NULL) {
1679228571Sglebius			sc = carp_alloc(ifp);
1680228571Sglebius			if (sc == NULL) {
1681228571Sglebius				error = EINVAL; /* XXX: ifpromisc failed */
1682228571Sglebius				break;
1683228571Sglebius			}
1684142215Sglebius
1685228571Sglebius			CARP_LOCK(sc);
1686228571Sglebius			sc->sc_vhid = carpr.carpr_vhid;
1687228571Sglebius			LLADDR(&sc->sc_addr)[0] = 0;
1688228571Sglebius			LLADDR(&sc->sc_addr)[1] = 0;
1689228571Sglebius			LLADDR(&sc->sc_addr)[2] = 0x5e;
1690228571Sglebius			LLADDR(&sc->sc_addr)[3] = 0;
1691228571Sglebius			LLADDR(&sc->sc_addr)[4] = 1;
1692228571Sglebius			LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
1693228571Sglebius		} else
1694228571Sglebius			CARP_LOCK(sc);
1695228571Sglebius		locked = 1;
1696228571Sglebius		if (carpr.carpr_advbase > 0) {
1697228571Sglebius			if (carpr.carpr_advbase > 255 ||
1698228571Sglebius			    carpr.carpr_advbase < CARP_DFLTINTV) {
1699228571Sglebius				error = EINVAL;
1700228571Sglebius				break;
1701228571Sglebius			}
1702228571Sglebius			sc->sc_advbase = carpr.carpr_advbase;
1703142914Sglebius		}
1704278075Sloos		if (carpr.carpr_advskew >= 255) {
1705278075Sloos			error = EINVAL;
1706278075Sloos			break;
1707142215Sglebius		}
1708278075Sloos		sc->sc_advskew = carpr.carpr_advskew;
1709228571Sglebius		if (carpr.carpr_key[0] != '\0') {
1710228571Sglebius			bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
1711228571Sglebius			carp_hmac_prepare(sc);
1712142914Sglebius		}
1713228571Sglebius		if (sc->sc_state != INIT &&
1714228571Sglebius		    carpr.carpr_state != sc->sc_state) {
1715142215Sglebius			switch (carpr.carpr_state) {
1716142215Sglebius			case BACKUP:
1717142215Sglebius				callout_stop(&sc->sc_ad_tmo);
1718142215Sglebius				carp_set_state(sc, BACKUP);
1719142215Sglebius				carp_setrun(sc, 0);
1720228571Sglebius				carp_delroute(sc);
1721142215Sglebius				break;
1722142215Sglebius			case MASTER:
1723142914Sglebius				carp_master_down_locked(sc);
1724142215Sglebius				break;
1725142215Sglebius			default:
1726142215Sglebius				break;
1727142215Sglebius			}
1728142215Sglebius		}
1729228571Sglebius		break;
1730228571Sglebius
1731228571Sglebius	case SIOCGVH:
1732228571Sglebius	    {
1733228571Sglebius		int priveleged;
1734228571Sglebius
1735228571Sglebius		if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
1736228571Sglebius			error = EINVAL;
1737228571Sglebius			break;
1738228571Sglebius		}
1739228571Sglebius		if (carpr.carpr_count < 1) {
1740228571Sglebius			error = EMSGSIZE;
1741228571Sglebius			break;
1742228571Sglebius		}
1743228571Sglebius		if (ifp->if_carp == NULL) {
1744228571Sglebius			error = ENOENT;
1745228571Sglebius			break;
1746228571Sglebius		}
1747228571Sglebius
1748228571Sglebius		priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
1749228571Sglebius		if (carpr.carpr_vhid != 0) {
1750228571Sglebius			CIF_LOCK(ifp->if_carp);
1751228571Sglebius			IFNET_FOREACH_CARP(ifp, sc)
1752228571Sglebius				if (sc->sc_vhid == carpr.carpr_vhid)
1753170373Sglebius					break;
1754228571Sglebius			CIF_UNLOCK(ifp->if_carp);
1755228571Sglebius			if (sc == NULL) {
1756228571Sglebius				error = ENOENT;
1757142215Sglebius				break;
1758142215Sglebius			}
1759228571Sglebius			carp_carprcp(&carpr, sc, priveleged);
1760228571Sglebius			error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
1761228571Sglebius		} else  {
1762228571Sglebius			int i, count;
1763228571Sglebius
1764228571Sglebius			count = 0;
1765228571Sglebius			CIF_LOCK(ifp->if_carp);
1766228571Sglebius			IFNET_FOREACH_CARP(ifp, sc)
1767228571Sglebius				count++;
1768228571Sglebius
1769228571Sglebius			if (count > carpr.carpr_count) {
1770228571Sglebius				CIF_UNLOCK(ifp->if_carp);
1771228571Sglebius				error = EMSGSIZE;
1772142215Sglebius				break;
1773142215Sglebius			}
1774228571Sglebius
1775228571Sglebius			i = 0;
1776228571Sglebius			IFNET_FOREACH_CARP(ifp, sc) {
1777228571Sglebius				carp_carprcp(&carpr, sc, priveleged);
1778228571Sglebius				carpr.carpr_count = count;
1779228571Sglebius				error = copyout(&carpr, ifr->ifr_data +
1780228571Sglebius				    (i * sizeof(carpr)), sizeof(carpr));
1781228571Sglebius				if (error) {
1782228571Sglebius					CIF_UNLOCK(ifp->if_carp);
1783228571Sglebius					break;
1784228571Sglebius				}
1785228571Sglebius				i++;
1786228571Sglebius			}
1787228571Sglebius			CIF_UNLOCK(ifp->if_carp);
1788142215Sglebius		}
1789142215Sglebius		break;
1790228571Sglebius	    }
1791142215Sglebius	default:
1792142215Sglebius		error = EINVAL;
1793142215Sglebius	}
1794142215Sglebius
1795228571Sglebiusout:
1796142914Sglebius	if (locked)
1797228571Sglebius		CARP_UNLOCK(sc);
1798228571Sglebius	if_rele(ifp);
1799142914Sglebius
1800142215Sglebius	return (error);
1801142215Sglebius}
1802142215Sglebius
1803142215Sglebiusstatic int
1804228571Sglebiuscarp_get_vhid(struct ifaddr *ifa)
1805142215Sglebius{
1806147611Sdwmalone
1807228571Sglebius	if (ifa == NULL || ifa->ifa_carp == NULL)
1808228571Sglebius		return (0);
1809142215Sglebius
1810228571Sglebius	return (ifa->ifa_carp->sc_vhid);
1811142215Sglebius}
1812142215Sglebius
1813142215Sglebiusint
1814228571Sglebiuscarp_attach(struct ifaddr *ifa, int vhid)
1815142215Sglebius{
1816228571Sglebius	struct ifnet *ifp = ifa->ifa_ifp;
1817234130Sglebius	struct carp_if *cif = ifp->if_carp;
1818142215Sglebius	struct carp_softc *sc;
1819228571Sglebius	int index, error;
1820142215Sglebius
1821228571Sglebius	if (ifp->if_carp == NULL)
1822228571Sglebius		return (ENOPROTOOPT);
1823142215Sglebius
1824228571Sglebius	switch (ifa->ifa_addr->sa_family) {
1825142215Sglebius#ifdef INET
1826142215Sglebius	case AF_INET:
1827228571Sglebius#endif
1828142215Sglebius#ifdef INET6
1829142215Sglebius	case AF_INET6:
1830228571Sglebius#endif
1831142215Sglebius		break;
1832142215Sglebius	default:
1833228571Sglebius		return (EPROTOTYPE);
1834142215Sglebius	}
1835142215Sglebius
1836234130Sglebius	CIF_LOCK(cif);
1837228571Sglebius	IFNET_FOREACH_CARP(ifp, sc)
1838228571Sglebius		if (sc->sc_vhid == vhid)
1839228571Sglebius			break;
1840234130Sglebius	if (sc == NULL) {
1841234130Sglebius		CIF_UNLOCK(cif);
1842228571Sglebius		return (ENOENT);
1843234130Sglebius	}
1844142215Sglebius
1845228571Sglebius	if (ifa->ifa_carp) {
1846228571Sglebius		if (ifa->ifa_carp->sc_vhid != vhid)
1847234130Sglebius			carp_detach_locked(ifa);
1848234130Sglebius		else {
1849234130Sglebius			CIF_UNLOCK(cif);
1850228571Sglebius			return (0);
1851234130Sglebius		}
1852228571Sglebius	}
1853142215Sglebius
1854234130Sglebius	error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
1855234130Sglebius	if (error) {
1856234130Sglebius		CIF_FREE(cif);
1857228571Sglebius		return (error);
1858234130Sglebius	}
1859142215Sglebius
1860228571Sglebius	CARP_LOCK(sc);
1861228571Sglebius	index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
1862228571Sglebius	if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
1863228571Sglebius		if ((error = carp_grow_ifas(sc)) != 0) {
1864234130Sglebius			carp_multicast_cleanup(cif,
1865228571Sglebius			    ifa->ifa_addr->sa_family);
1866228571Sglebius			CARP_UNLOCK(sc);
1867234130Sglebius			CIF_FREE(cif);
1868228571Sglebius			return (error);
1869142215Sglebius		}
1870142215Sglebius
1871228571Sglebius	switch (ifa->ifa_addr->sa_family) {
1872228571Sglebius#ifdef INET
1873228571Sglebius	case AF_INET:
1874234130Sglebius		cif->cif_naddrs++;
1875228571Sglebius		sc->sc_naddrs++;
1876142215Sglebius		break;
1877228571Sglebius#endif
1878228571Sglebius#ifdef INET6
1879228571Sglebius	case AF_INET6:
1880234130Sglebius		cif->cif_naddrs6++;
1881228571Sglebius		sc->sc_naddrs6++;
1882142215Sglebius		break;
1883228571Sglebius#endif
1884142215Sglebius	}
1885142215Sglebius
1886228571Sglebius	ifa_ref(ifa);
1887228571Sglebius	sc->sc_ifas[index - 1] = ifa;
1888228571Sglebius	ifa->ifa_carp = sc;
1889228571Sglebius
1890228571Sglebius	carp_hmac_prepare(sc);
1891228571Sglebius	carp_sc_state(sc);
1892228571Sglebius
1893228571Sglebius	CARP_UNLOCK(sc);
1894234130Sglebius	CIF_UNLOCK(cif);
1895228571Sglebius
1896142215Sglebius	return (0);
1897142215Sglebius}
1898142215Sglebius
1899228571Sglebiusvoid
1900228571Sglebiuscarp_detach(struct ifaddr *ifa)
1901142215Sglebius{
1902234130Sglebius	struct ifnet *ifp = ifa->ifa_ifp;
1903234130Sglebius	struct carp_if *cif = ifp->if_carp;
1904234130Sglebius
1905234130Sglebius	CIF_LOCK(cif);
1906234130Sglebius	carp_detach_locked(ifa);
1907234130Sglebius	CIF_FREE(cif);
1908234130Sglebius}
1909234130Sglebius
1910234130Sglebiusstatic void
1911234130Sglebiuscarp_detach_locked(struct ifaddr *ifa)
1912234130Sglebius{
1913234130Sglebius	struct ifnet *ifp = ifa->ifa_ifp;
1914234130Sglebius	struct carp_if *cif = ifp->if_carp;
1915228571Sglebius	struct carp_softc *sc = ifa->ifa_carp;
1916228571Sglebius	int i, index;
1917142914Sglebius
1918228571Sglebius	KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
1919142914Sglebius
1920234130Sglebius	CIF_LOCK_ASSERT(cif);
1921228571Sglebius	CARP_LOCK(sc);
1922142215Sglebius
1923228571Sglebius	/* Shift array. */
1924228571Sglebius	index = sc->sc_naddrs + sc->sc_naddrs6;
1925228571Sglebius	for (i = 0; i < index; i++)
1926228571Sglebius		if (sc->sc_ifas[i] == ifa)
1927228571Sglebius			break;
1928228571Sglebius	KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
1929228571Sglebius	for (; i < index - 1; i++)
1930228571Sglebius		sc->sc_ifas[i] = sc->sc_ifas[i+1];
1931228571Sglebius	sc->sc_ifas[index - 1] = NULL;
1932228571Sglebius
1933228571Sglebius	switch (ifa->ifa_addr->sa_family) {
1934228571Sglebius#ifdef INET
1935228571Sglebius	case AF_INET:
1936234130Sglebius		cif->cif_naddrs--;
1937228571Sglebius		sc->sc_naddrs--;
1938142215Sglebius		break;
1939228571Sglebius#endif
1940228571Sglebius#ifdef INET6
1941228571Sglebius	case AF_INET6:
1942234130Sglebius		cif->cif_naddrs6--;
1943228571Sglebius		sc->sc_naddrs6--;
1944142215Sglebius		break;
1945228571Sglebius#endif
1946142215Sglebius	}
1947228571Sglebius
1948230863Sglebius	carp_ifa_delroute(ifa);
1949234130Sglebius	carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
1950228571Sglebius
1951228571Sglebius	ifa->ifa_carp = NULL;
1952228571Sglebius	ifa_free(ifa);
1953228571Sglebius
1954228571Sglebius	carp_hmac_prepare(sc);
1955228571Sglebius	carp_sc_state(sc);
1956228571Sglebius
1957228571Sglebius	if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1958228571Sglebius		CARP_UNLOCK(sc);
1959228571Sglebius		carp_destroy(sc);
1960228571Sglebius	} else
1961228571Sglebius		CARP_UNLOCK(sc);
1962142215Sglebius}
1963142215Sglebius
1964228571Sglebiusstatic void
1965228571Sglebiuscarp_set_state(struct carp_softc *sc, int state)
1966142215Sglebius{
1967142914Sglebius
1968228571Sglebius	CARP_LOCK_ASSERT(sc);
1969228571Sglebius
1970228571Sglebius	if (sc->sc_state != state) {
1971228571Sglebius		const char *carp_states[] = { CARP_STATES };
1972228571Sglebius		char subsys[IFNAMSIZ+5];
1973228571Sglebius
1974228571Sglebius		sc->sc_state = state;
1975228571Sglebius
1976228571Sglebius		snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
1977228571Sglebius		    sc->sc_carpdev->if_xname);
1978228571Sglebius		devctl_notify("CARP", subsys, carp_states[state], NULL);
1979228571Sglebius	}
1980142914Sglebius}
1981142914Sglebius
1982142914Sglebiusstatic void
1983228571Sglebiuscarp_linkstate(struct ifnet *ifp)
1984142914Sglebius{
1985142215Sglebius	struct carp_softc *sc;
1986142215Sglebius
1987228571Sglebius	CIF_LOCK(ifp->if_carp);
1988228571Sglebius	IFNET_FOREACH_CARP(ifp, sc) {
1989228571Sglebius		CARP_LOCK(sc);
1990228571Sglebius		carp_sc_state(sc);
1991228571Sglebius		CARP_UNLOCK(sc);
1992228571Sglebius	}
1993228571Sglebius	CIF_UNLOCK(ifp->if_carp);
1994144329Sglebius}
1995144329Sglebius
1996144329Sglebiusstatic void
1997228571Sglebiuscarp_sc_state(struct carp_softc *sc)
1998144329Sglebius{
1999144329Sglebius
2000228571Sglebius	CARP_LOCK_ASSERT(sc);
2001228571Sglebius
2002144329Sglebius	if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
2003144329Sglebius	    !(sc->sc_carpdev->if_flags & IFF_UP)) {
2004144329Sglebius		callout_stop(&sc->sc_ad_tmo);
2005228571Sglebius#ifdef INET
2006144329Sglebius		callout_stop(&sc->sc_md_tmo);
2007228571Sglebius#endif
2008228571Sglebius#ifdef INET6
2009144329Sglebius		callout_stop(&sc->sc_md6_tmo);
2010228571Sglebius#endif
2011144329Sglebius		carp_set_state(sc, INIT);
2012144329Sglebius		carp_setrun(sc, 0);
2013228736Sglebius		if (!sc->sc_suppress)
2014254292Strociny			carp_demote_adj(V_carp_ifdown_adj, "interface down");
2015144329Sglebius		sc->sc_suppress = 1;
2016144329Sglebius	} else {
2017144329Sglebius		carp_set_state(sc, INIT);
2018144329Sglebius		carp_setrun(sc, 0);
2019144329Sglebius		if (sc->sc_suppress)
2020254292Strociny			carp_demote_adj(-V_carp_ifdown_adj, "interface up");
2021144329Sglebius		sc->sc_suppress = 0;
2022142215Sglebius	}
2023142215Sglebius}
2024142215Sglebius
2025228736Sglebiusstatic void
2026228736Sglebiuscarp_demote_adj(int adj, char *reason)
2027228736Sglebius{
2028254292Strociny	atomic_add_int(&V_carp_demotion, adj);
2029254292Strociny	CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
2030228736Sglebius	taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
2031228736Sglebius}
2032228571Sglebius
2033244681Sglebiusstatic int
2034244681Sglebiuscarp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
2035244681Sglebius{
2036244681Sglebius	int new, error;
2037244681Sglebius
2038254292Strociny	new = V_carp_demotion;
2039244681Sglebius	error = sysctl_handle_int(oidp, &new, 0, req);
2040244681Sglebius	if (error || !req->newptr)
2041244681Sglebius		return (error);
2042244681Sglebius
2043244681Sglebius	carp_demote_adj(new, "sysctl");
2044244681Sglebius
2045244681Sglebius	return (0);
2046244681Sglebius}
2047244681Sglebius
2048211157Swill#ifdef INET
2049211157Swillextern  struct domain inetdomain;
2050211157Swillstatic struct protosw in_carp_protosw = {
2051211157Swill	.pr_type =		SOCK_RAW,
2052211157Swill	.pr_domain =		&inetdomain,
2053211157Swill	.pr_protocol =		IPPROTO_CARP,
2054211157Swill	.pr_flags =		PR_ATOMIC|PR_ADDR,
2055211157Swill	.pr_input =		carp_input,
2056211157Swill	.pr_output =		(pr_output_t *)rip_output,
2057211157Swill	.pr_ctloutput =		rip_ctloutput,
2058211157Swill	.pr_usrreqs =		&rip_usrreqs
2059211157Swill};
2060211157Swill#endif
2061211157Swill
2062211157Swill#ifdef INET6
2063211157Swillextern	struct domain inet6domain;
2064211157Swillstatic struct ip6protosw in6_carp_protosw = {
2065211157Swill	.pr_type =		SOCK_RAW,
2066211157Swill	.pr_domain =		&inet6domain,
2067211157Swill	.pr_protocol =		IPPROTO_CARP,
2068211157Swill	.pr_flags =		PR_ATOMIC|PR_ADDR,
2069211157Swill	.pr_input =		carp6_input,
2070211157Swill	.pr_output =		rip6_output,
2071211157Swill	.pr_ctloutput =		rip6_ctloutput,
2072211157Swill	.pr_usrreqs =		&rip6_usrreqs
2073211157Swill};
2074211157Swill#endif
2075211157Swill
2076211157Swillstatic void
2077211157Swillcarp_mod_cleanup(void)
2078211157Swill{
2079211157Swill
2080211157Swill#ifdef INET
2081211157Swill	if (proto_reg[CARP_INET] == 0) {
2082212266Swill		(void)ipproto_unregister(IPPROTO_CARP);
2083211157Swill		pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
2084211157Swill		proto_reg[CARP_INET] = -1;
2085211157Swill	}
2086211157Swill	carp_iamatch_p = NULL;
2087211157Swill#endif
2088211157Swill#ifdef INET6
2089211157Swill	if (proto_reg[CARP_INET6] == 0) {
2090212266Swill		(void)ip6proto_unregister(IPPROTO_CARP);
2091211157Swill		pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
2092211157Swill		proto_reg[CARP_INET6] = -1;
2093211157Swill	}
2094211157Swill	carp_iamatch6_p = NULL;
2095211157Swill	carp_macmatch6_p = NULL;
2096211157Swill#endif
2097228571Sglebius	carp_ioctl_p = NULL;
2098228571Sglebius	carp_attach_p = NULL;
2099228571Sglebius	carp_detach_p = NULL;
2100228571Sglebius	carp_get_vhid_p = NULL;
2101211157Swill	carp_linkstate_p = NULL;
2102211157Swill	carp_forus_p = NULL;
2103211157Swill	carp_output_p = NULL;
2104228736Sglebius	carp_demote_adj_p = NULL;
2105246659Sglebius	carp_master_p = NULL;
2106228736Sglebius	mtx_unlock(&carp_mtx);
2107228736Sglebius	taskqueue_drain(taskqueue_swi, &carp_sendall_task);
2108211157Swill	mtx_destroy(&carp_mtx);
2109211157Swill}
2110211157Swill
2111142215Sglebiusstatic int
2112211157Swillcarp_mod_load(void)
2113211157Swill{
2114212266Swill	int err;
2115211157Swill
2116211157Swill	mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
2117228571Sglebius	LIST_INIT(&carp_list);
2118228571Sglebius	carp_get_vhid_p = carp_get_vhid;
2119211157Swill	carp_forus_p = carp_forus;
2120211157Swill	carp_output_p = carp_output;
2121228571Sglebius	carp_linkstate_p = carp_linkstate;
2122228571Sglebius	carp_ioctl_p = carp_ioctl;
2123228571Sglebius	carp_attach_p = carp_attach;
2124228571Sglebius	carp_detach_p = carp_detach;
2125228736Sglebius	carp_demote_adj_p = carp_demote_adj;
2126246659Sglebius	carp_master_p = carp_master;
2127211157Swill#ifdef INET6
2128211157Swill	carp_iamatch6_p = carp_iamatch6;
2129211157Swill	carp_macmatch6_p = carp_macmatch6;
2130211157Swill	proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
2131211157Swill	    (struct protosw *)&in6_carp_protosw);
2132228571Sglebius	if (proto_reg[CARP_INET6]) {
2133211157Swill		printf("carp: error %d attaching to PF_INET6\n",
2134211157Swill		    proto_reg[CARP_INET6]);
2135211157Swill		carp_mod_cleanup();
2136212898Sglebius		return (proto_reg[CARP_INET6]);
2137211157Swill	}
2138212266Swill	err = ip6proto_register(IPPROTO_CARP);
2139212266Swill	if (err) {
2140212266Swill		printf("carp: error %d registering with INET6\n", err);
2141212266Swill		carp_mod_cleanup();
2142212898Sglebius		return (err);
2143212266Swill	}
2144211157Swill#endif
2145211157Swill#ifdef INET
2146211157Swill	carp_iamatch_p = carp_iamatch;
2147211157Swill	proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
2148228571Sglebius	if (proto_reg[CARP_INET]) {
2149211157Swill		printf("carp: error %d attaching to PF_INET\n",
2150211157Swill		    proto_reg[CARP_INET]);
2151211157Swill		carp_mod_cleanup();
2152212898Sglebius		return (proto_reg[CARP_INET]);
2153211157Swill	}
2154212266Swill	err = ipproto_register(IPPROTO_CARP);
2155212266Swill	if (err) {
2156212266Swill		printf("carp: error %d registering with INET\n", err);
2157212266Swill		carp_mod_cleanup();
2158212898Sglebius		return (err);
2159212266Swill	}
2160211157Swill#endif
2161228571Sglebius	return (0);
2162211157Swill}
2163211157Swill
2164211157Swillstatic int
2165142215Sglebiuscarp_modevent(module_t mod, int type, void *data)
2166142215Sglebius{
2167142215Sglebius	switch (type) {
2168142215Sglebius	case MOD_LOAD:
2169211157Swill		return carp_mod_load();
2170211157Swill		/* NOTREACHED */
2171142215Sglebius	case MOD_UNLOAD:
2172228571Sglebius		mtx_lock(&carp_mtx);
2173228571Sglebius		if (LIST_EMPTY(&carp_list))
2174228571Sglebius			carp_mod_cleanup();
2175228571Sglebius		else {
2176228571Sglebius			mtx_unlock(&carp_mtx);
2177228571Sglebius			return (EBUSY);
2178228571Sglebius		}
2179142215Sglebius		break;
2180142215Sglebius
2181142215Sglebius	default:
2182156947Sglebius		return (EINVAL);
2183142215Sglebius	}
2184142215Sglebius
2185156947Sglebius	return (0);
2186142215Sglebius}
2187142215Sglebius
2188142215Sglebiusstatic moduledata_t carp_mod = {
2189142215Sglebius	"carp",
2190142215Sglebius	carp_modevent,
2191241394Skevlo	0
2192142215Sglebius};
2193142215Sglebius
2194212265SwillDECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
2195