1/*	$NetBSD: ip_carp.c,v 1.47.4.1 2012/04/02 18:25:35 riz Exp $	*/
2/*	$OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $	*/
3
4/*
5 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
6 * Copyright (c) 2003 Ryan McBride. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include "opt_inet.h"
31
32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.47.4.1 2012/04/02 18:25:35 riz Exp $");
34
35/*
36 * TODO:
37 *	- iface reconfigure
38 *	- support for hardware checksum calculations;
39 *
40 */
41
42#include <sys/param.h>
43#include <sys/proc.h>
44#include <sys/mbuf.h>
45#include <sys/socket.h>
46#include <sys/socketvar.h>
47#include <sys/callout.h>
48#include <sys/ioctl.h>
49#include <sys/errno.h>
50#include <sys/device.h>
51#include <sys/time.h>
52#include <sys/kernel.h>
53#include <sys/kauth.h>
54#include <sys/sysctl.h>
55#include <sys/ucred.h>
56#include <sys/syslog.h>
57#include <sys/acct.h>
58#include <sys/cprng.h>
59
60#include <sys/cpu.h>
61
62#include <net/if.h>
63#include <net/pfil.h>
64#include <net/if_types.h>
65#include <net/if_ether.h>
66#include <net/route.h>
67#include <net/netisr.h>
68#include <net/net_stats.h>
69#include <netinet/if_inarp.h>
70
71#if NFDDI > 0
72#include <net/if_fddi.h>
73#endif
74#if NTOKEN > 0
75#include <net/if_token.h>
76#endif
77
78#ifdef INET
79#include <netinet/in.h>
80#include <netinet/in_systm.h>
81#include <netinet/in_var.h>
82#include <netinet/ip.h>
83#include <netinet/ip_var.h>
84
85#include <net/if_dl.h>
86#endif
87
88#ifdef INET6
89#include <netinet/icmp6.h>
90#include <netinet/ip6.h>
91#include <netinet6/ip6_var.h>
92#include <netinet6/nd6.h>
93#include <netinet6/scope6_var.h>
94#endif
95
96#include <net/bpf.h>
97
98#include <sys/sha1.h>
99
100#include <netinet/ip_carp.h>
101
102struct carp_mc_entry {
103	LIST_ENTRY(carp_mc_entry)	mc_entries;
104	union {
105		struct ether_multi	*mcu_enm;
106	} mc_u;
107	struct sockaddr_storage		mc_addr;
108};
109#define	mc_enm	mc_u.mcu_enm
110
111struct carp_softc {
112	struct ethercom sc_ac;
113#define	sc_if		sc_ac.ec_if
114#define	sc_carpdev	sc_ac.ec_if.if_carpdev
115	int ah_cookie;
116	int lh_cookie;
117	struct ip_moptions sc_imo;
118#ifdef INET6
119	struct ip6_moptions sc_im6o;
120#endif /* INET6 */
121	TAILQ_ENTRY(carp_softc) sc_list;
122
123	enum { INIT = 0, BACKUP, MASTER }	sc_state;
124
125	int sc_suppress;
126	int sc_bow_out;
127
128	int sc_sendad_errors;
129#define CARP_SENDAD_MAX_ERRORS	3
130	int sc_sendad_success;
131#define CARP_SENDAD_MIN_SUCCESS 3
132
133	int sc_vhid;
134	int sc_advskew;
135	int sc_naddrs;
136	int sc_naddrs6;
137	int sc_advbase;		/* seconds */
138	int sc_init_counter;
139	u_int64_t sc_counter;
140
141	/* authentication */
142#define CARP_HMAC_PAD	64
143	unsigned char sc_key[CARP_KEY_LEN];
144	unsigned char sc_pad[CARP_HMAC_PAD];
145	SHA1_CTX sc_sha1;
146	u_int32_t sc_hashkey[2];
147
148	struct callout sc_ad_tmo;	/* advertisement timeout */
149	struct callout sc_md_tmo;	/* master down timeout */
150	struct callout sc_md6_tmo;	/* master down timeout */
151
152	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
153};
154
155int carp_suppress_preempt = 0;
156int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 };	/* XXX for now */
157
158static percpu_t *carpstat_percpu;
159
160#define	CARP_STATINC(x)		_NET_STATINC(carpstat_percpu, x)
161
162struct carp_if {
163	TAILQ_HEAD(, carp_softc) vhif_vrs;
164	int vhif_nvrs;
165
166	struct ifnet *vhif_ifp;
167};
168
169#define	CARP_LOG(sc, s)							\
170	if (carp_opts[CARPCTL_LOG]) {					\
171		if (sc)							\
172			log(LOG_INFO, "%s: ",				\
173			    (sc)->sc_if.if_xname);			\
174		else							\
175			log(LOG_INFO, "carp: ");			\
176		addlog s;						\
177		addlog("\n");						\
178	}
179
180void	carp_hmac_prepare(struct carp_softc *);
181void	carp_hmac_generate(struct carp_softc *, u_int32_t *,
182	    unsigned char *);
183int	carp_hmac_verify(struct carp_softc *, u_int32_t *,
184	    unsigned char *);
185void	carp_setroute(struct carp_softc *, int);
186void	carp_proto_input_c(struct mbuf *, struct carp_header *, sa_family_t);
187void	carpattach(int);
188void	carpdetach(struct carp_softc *);
189int	carp_prepare_ad(struct mbuf *, struct carp_softc *,
190	    struct carp_header *);
191void	carp_send_ad_all(void);
192void	carp_send_ad(void *);
193void	carp_send_arp(struct carp_softc *);
194void	carp_master_down(void *);
195int	carp_ioctl(struct ifnet *, u_long, void *);
196void	carp_start(struct ifnet *);
197void	carp_setrun(struct carp_softc *, sa_family_t);
198void	carp_set_state(struct carp_softc *, int);
199int	carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
200enum	{ CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
201
202void	carp_multicast_cleanup(struct carp_softc *);
203int	carp_set_ifp(struct carp_softc *, struct ifnet *);
204void	carp_set_enaddr(struct carp_softc *);
205void	carp_addr_updated(void *);
206u_int32_t	carp_hash(struct carp_softc *, u_char *);
207int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
208int	carp_join_multicast(struct carp_softc *);
209#ifdef INET6
210void	carp_send_na(struct carp_softc *);
211int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
212int	carp_join_multicast6(struct carp_softc *);
213#endif
214int	carp_clone_create(struct if_clone *, int);
215int	carp_clone_destroy(struct ifnet *);
216int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
217int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
218void	carp_ether_purgemulti(struct carp_softc *);
219
220static void sysctl_net_inet_carp_setup(struct sysctllog **);
221
222struct if_clone carp_cloner =
223    IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
224
225static __inline u_int16_t
226carp_cksum(struct mbuf *m, int len)
227{
228	return (in_cksum(m, len));
229}
230
231void
232carp_hmac_prepare(struct carp_softc *sc)
233{
234	u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT;
235	u_int8_t vhid = sc->sc_vhid & 0xff;
236	SHA1_CTX sha1ctx;
237	u_int32_t kmd[5];
238	struct ifaddr *ifa;
239	int i, found;
240	struct in_addr last, cur, in;
241#ifdef INET6
242	struct in6_addr last6, cur6, in6;
243#endif /* INET6 */
244
245	/* compute ipad from key */
246	memset(sc->sc_pad, 0, sizeof(sc->sc_pad));
247	memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key));
248	for (i = 0; i < sizeof(sc->sc_pad); i++)
249		sc->sc_pad[i] ^= 0x36;
250
251	/* precompute first part of inner hash */
252	SHA1Init(&sc->sc_sha1);
253	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
254	SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version));
255	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
256
257	/* generate a key for the arpbalance hash, before the vhid is hashed */
258	memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
259	SHA1Final((unsigned char *)kmd, &sha1ctx);
260	sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
261	sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
262
263	/* the rest of the precomputation */
264	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
265
266	/* Hash the addresses from smallest to largest, not interface order */
267#ifdef INET
268	cur.s_addr = 0;
269	do {
270		found = 0;
271		last = cur;
272		cur.s_addr = 0xffffffff;
273		IFADDR_FOREACH(ifa, &sc->sc_if) {
274			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
275			if (ifa->ifa_addr->sa_family == AF_INET &&
276			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
277			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
278				cur.s_addr = in.s_addr;
279				found++;
280			}
281		}
282		if (found)
283			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
284	} while (found);
285#endif /* INET */
286
287#ifdef INET6
288	memset(&cur6, 0x00, sizeof(cur6));
289	do {
290		found = 0;
291		last6 = cur6;
292		memset(&cur6, 0xff, sizeof(cur6));
293		IFADDR_FOREACH(ifa, &sc->sc_if) {
294			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
295			if (IN6_IS_ADDR_LINKLOCAL(&in6))
296				in6.s6_addr16[1] = 0;
297			if (ifa->ifa_addr->sa_family == AF_INET6 &&
298			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
299			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
300				cur6 = in6;
301				found++;
302			}
303		}
304		if (found)
305			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
306	} while (found);
307#endif /* INET6 */
308
309	/* convert ipad to opad */
310	for (i = 0; i < sizeof(sc->sc_pad); i++)
311		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
312}
313
314void
315carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
316    unsigned char md[20])
317{
318	SHA1_CTX sha1ctx;
319
320	/* fetch first half of inner hash */
321	memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
322
323	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
324	SHA1Final(md, &sha1ctx);
325
326	/* outer hash */
327	SHA1Init(&sha1ctx);
328	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
329	SHA1Update(&sha1ctx, md, 20);
330	SHA1Final(md, &sha1ctx);
331}
332
333int
334carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
335    unsigned char md[20])
336{
337	unsigned char md2[20];
338
339	carp_hmac_generate(sc, counter, md2);
340
341	return (memcmp(md, md2, sizeof(md2)));
342}
343
344void
345carp_setroute(struct carp_softc *sc, int cmd)
346{
347	struct ifaddr *ifa;
348	int s;
349
350	KERNEL_LOCK(1, NULL);
351	s = splsoftnet();
352	IFADDR_FOREACH(ifa, &sc->sc_if) {
353		switch (ifa->ifa_addr->sa_family) {
354		case AF_INET: {
355			int count = 0;
356			struct rtentry *rt;
357			int hr_otherif, nr_ourif;
358
359			/*
360			 * Avoid screwing with the routes if there are other
361			 * carp interfaces which are master and have the same
362			 * address.
363			 */
364			if (sc->sc_carpdev != NULL &&
365			    sc->sc_carpdev->if_carp != NULL) {
366				count = carp_addrcount(
367				    (struct carp_if *)sc->sc_carpdev->if_carp,
368				    ifatoia(ifa), CARP_COUNT_MASTER);
369				if ((cmd == RTM_ADD && count != 1) ||
370				    (cmd == RTM_DELETE && count != 0))
371					continue;
372			}
373
374			/* Remove the existing host route, if any */
375			rtrequest(RTM_DELETE, ifa->ifa_addr,
376			    ifa->ifa_addr, ifa->ifa_netmask,
377			    RTF_HOST, NULL);
378
379			rt = NULL;
380			(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
381			    ifa->ifa_netmask, RTF_HOST, &rt);
382			hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
383			    rt->rt_flags & (RTF_CLONING|RTF_CLONED));
384			if (rt != NULL) {
385				RTFREE(rt);
386				rt = NULL;
387			}
388
389			/* Check for a network route on our interface */
390
391			rt = NULL;
392			(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
393			    ifa->ifa_netmask, 0, &rt);
394			nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
395
396			switch (cmd) {
397			case RTM_ADD:
398				if (hr_otherif) {
399					ifa->ifa_rtrequest = NULL;
400					ifa->ifa_flags &= ~RTF_CLONING;
401
402					rtrequest(RTM_ADD, ifa->ifa_addr,
403					    ifa->ifa_addr, ifa->ifa_netmask,
404					    RTF_UP | RTF_HOST, NULL);
405				}
406				if (!hr_otherif || nr_ourif || !rt) {
407					if (nr_ourif && !(rt->rt_flags &
408					    RTF_CLONING))
409						rtrequest(RTM_DELETE,
410						    ifa->ifa_addr,
411						    ifa->ifa_addr,
412						    ifa->ifa_netmask, 0, NULL);
413
414					ifa->ifa_rtrequest = arp_rtrequest;
415					ifa->ifa_flags |= RTF_CLONING;
416
417					if (rtrequest(RTM_ADD, ifa->ifa_addr,
418					    ifa->ifa_addr, ifa->ifa_netmask, 0,
419					    NULL) == 0)
420						ifa->ifa_flags |= IFA_ROUTE;
421				}
422				break;
423			case RTM_DELETE:
424				break;
425			default:
426				break;
427			}
428			if (rt != NULL) {
429				RTFREE(rt);
430				rt = NULL;
431			}
432			break;
433		}
434
435#ifdef INET6
436		case AF_INET6:
437			if (cmd == RTM_ADD)
438				in6_ifaddloop(ifa);
439			else
440				in6_ifremloop(ifa);
441			break;
442#endif /* INET6 */
443		default:
444			break;
445		}
446	}
447	splx(s);
448	KERNEL_UNLOCK_ONE(NULL);
449}
450
451/*
452 * process input packet.
453 * we have rearranged checks order compared to the rfc,
454 * but it seems more efficient this way or not possible otherwise.
455 */
456void
457carp_proto_input(struct mbuf *m, ...)
458{
459	struct ip *ip = mtod(m, struct ip *);
460	struct carp_softc *sc = NULL;
461	struct carp_header *ch;
462	int iplen, len, hlen;
463	va_list ap;
464
465	va_start(ap, m);
466	hlen = va_arg(ap, int);
467	va_end(ap);
468
469	CARP_STATINC(CARP_STAT_IPACKETS);
470
471	if (!carp_opts[CARPCTL_ALLOW]) {
472		m_freem(m);
473		return;
474	}
475
476	/* check if received on a valid carp interface */
477	if (m->m_pkthdr.rcvif->if_type != IFT_CARP) {
478		CARP_STATINC(CARP_STAT_BADIF);
479		CARP_LOG(sc, ("packet received on non-carp interface: %s",
480		    m->m_pkthdr.rcvif->if_xname));
481		m_freem(m);
482		return;
483	}
484
485	/* verify that the IP TTL is 255.  */
486	if (ip->ip_ttl != CARP_DFLTTL) {
487		CARP_STATINC(CARP_STAT_BADTTL);
488		CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
489		    CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
490		m_freem(m);
491		return;
492	}
493
494	/*
495	 * verify that the received packet length is
496	 * equal to the CARP header
497	 */
498	iplen = ip->ip_hl << 2;
499	len = iplen + sizeof(*ch);
500	if (len > m->m_pkthdr.len) {
501		CARP_STATINC(CARP_STAT_BADLEN);
502		CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
503		    m->m_pkthdr.rcvif->if_xname));
504		m_freem(m);
505		return;
506	}
507
508	if ((m = m_pullup(m, len)) == NULL) {
509		CARP_STATINC(CARP_STAT_HDROPS);
510		return;
511	}
512	ip = mtod(m, struct ip *);
513	ch = (struct carp_header *)((char *)ip + iplen);
514	/* verify the CARP checksum */
515	m->m_data += iplen;
516	if (carp_cksum(m, len - iplen)) {
517		CARP_STATINC(CARP_STAT_BADSUM);
518		CARP_LOG(sc, ("checksum failed on %s",
519		    m->m_pkthdr.rcvif->if_xname));
520		m_freem(m);
521		return;
522	}
523	m->m_data -= iplen;
524
525	carp_proto_input_c(m, ch, AF_INET);
526}
527
528#ifdef INET6
529int
530carp6_proto_input(struct mbuf **mp, int *offp, int proto)
531{
532	struct mbuf *m = *mp;
533	struct carp_softc *sc = NULL;
534	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
535	struct carp_header *ch;
536	u_int len;
537
538	CARP_STATINC(CARP_STAT_IPACKETS6);
539
540	if (!carp_opts[CARPCTL_ALLOW]) {
541		m_freem(m);
542		return (IPPROTO_DONE);
543	}
544
545	/* check if received on a valid carp interface */
546	if (m->m_pkthdr.rcvif->if_type != IFT_CARP) {
547		CARP_STATINC(CARP_STAT_BADIF);
548		CARP_LOG(sc, ("packet received on non-carp interface: %s",
549		    m->m_pkthdr.rcvif->if_xname));
550		m_freem(m);
551		return (IPPROTO_DONE);
552	}
553
554	/* verify that the IP TTL is 255 */
555	if (ip6->ip6_hlim != CARP_DFLTTL) {
556		CARP_STATINC(CARP_STAT_BADTTL);
557		CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
558		    CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
559		m_freem(m);
560		return (IPPROTO_DONE);
561	}
562
563	/* verify that we have a complete carp packet */
564	len = m->m_len;
565	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
566	if (ch == NULL) {
567		CARP_STATINC(CARP_STAT_BADLEN);
568		CARP_LOG(sc, ("packet size %u too small", len));
569		return (IPPROTO_DONE);
570	}
571
572
573	/* verify the CARP checksum */
574	m->m_data += *offp;
575	if (carp_cksum(m, sizeof(*ch))) {
576		CARP_STATINC(CARP_STAT_BADSUM);
577		CARP_LOG(sc, ("checksum failed, on %s",
578		    m->m_pkthdr.rcvif->if_xname));
579		m_freem(m);
580		return (IPPROTO_DONE);
581	}
582	m->m_data -= *offp;
583
584	carp_proto_input_c(m, ch, AF_INET6);
585	return (IPPROTO_DONE);
586}
587#endif /* INET6 */
588
589void
590carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
591{
592	struct carp_softc *sc;
593	u_int64_t tmp_counter;
594	struct timeval sc_tv, ch_tv;
595
596	TAILQ_FOREACH(sc, &((struct carp_if *)
597	    m->m_pkthdr.rcvif->if_carpdev->if_carp)->vhif_vrs, sc_list)
598		if (sc->sc_vhid == ch->carp_vhid)
599			break;
600
601	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
602	    (IFF_UP|IFF_RUNNING)) {
603		CARP_STATINC(CARP_STAT_BADVHID);
604		m_freem(m);
605		return;
606	}
607
608	/*
609	 * Check if our own advertisement was duplicated
610	 * from a non simplex interface.
611	 * XXX If there is no address on our physical interface
612	 * there is no way to distinguish our ads from the ones
613	 * another carp host might have sent us.
614	 */
615	if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
616		struct sockaddr sa;
617		struct ifaddr *ifa;
618
619		memset(&sa, 0, sizeof(sa));
620		sa.sa_family = af;
621		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
622
623		if (ifa && af == AF_INET) {
624			struct ip *ip = mtod(m, struct ip *);
625			if (ip->ip_src.s_addr ==
626					ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
627				m_freem(m);
628				return;
629			}
630		}
631#ifdef INET6
632		if (ifa && af == AF_INET6) {
633			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
634			struct in6_addr in6_src, in6_found;
635
636			in6_src = ip6->ip6_src;
637			in6_found = ifatoia6(ifa)->ia_addr.sin6_addr;
638			if (IN6_IS_ADDR_LINKLOCAL(&in6_src))
639				in6_src.s6_addr16[1] = 0;
640			if (IN6_IS_ADDR_LINKLOCAL(&in6_found))
641				in6_found.s6_addr16[1] = 0;
642			if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) {
643				m_freem(m);
644				return;
645			}
646		}
647#endif /* INET6 */
648	}
649
650	nanotime(&sc->sc_if.if_lastchange);
651	sc->sc_if.if_ipackets++;
652	sc->sc_if.if_ibytes += m->m_pkthdr.len;
653
654	/* verify the CARP version. */
655	if (ch->carp_version != CARP_VERSION) {
656		CARP_STATINC(CARP_STAT_BADVER);
657		sc->sc_if.if_ierrors++;
658		CARP_LOG(sc, ("invalid version %d != %d",
659		    ch->carp_version, CARP_VERSION));
660		m_freem(m);
661		return;
662	}
663
664	/* verify the hash */
665	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
666		CARP_STATINC(CARP_STAT_BADAUTH);
667		sc->sc_if.if_ierrors++;
668		CARP_LOG(sc, ("incorrect hash"));
669		m_freem(m);
670		return;
671	}
672
673	tmp_counter = ntohl(ch->carp_counter[0]);
674	tmp_counter = tmp_counter<<32;
675	tmp_counter += ntohl(ch->carp_counter[1]);
676
677	/* XXX Replay protection goes here */
678
679	sc->sc_init_counter = 0;
680	sc->sc_counter = tmp_counter;
681
682
683	sc_tv.tv_sec = sc->sc_advbase;
684	if (carp_suppress_preempt && sc->sc_advskew <  240)
685		sc_tv.tv_usec = 240 * 1000000 / 256;
686	else
687		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
688	ch_tv.tv_sec = ch->carp_advbase;
689	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
690
691	switch (sc->sc_state) {
692	case INIT:
693		break;
694	case MASTER:
695		/*
696		 * If we receive an advertisement from a backup who's going to
697		 * be more frequent than us, go into BACKUP state.
698		 */
699		if (timercmp(&sc_tv, &ch_tv, >) ||
700		    timercmp(&sc_tv, &ch_tv, ==)) {
701			callout_stop(&sc->sc_ad_tmo);
702			CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
703			carp_set_state(sc, BACKUP);
704			carp_setrun(sc, 0);
705			carp_setroute(sc, RTM_DELETE);
706		}
707		break;
708	case BACKUP:
709		/*
710		 * If we're pre-empting masters who advertise slower than us,
711		 * and this one claims to be slower, treat him as down.
712		 */
713		if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
714			CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
715			carp_master_down(sc);
716			break;
717		}
718
719		/*
720		 *  If the master is going to advertise at such a low frequency
721		 *  that he's guaranteed to time out, we'd might as well just
722		 *  treat him as timed out now.
723		 */
724		sc_tv.tv_sec = sc->sc_advbase * 3;
725		if (timercmp(&sc_tv, &ch_tv, <)) {
726			CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
727			carp_master_down(sc);
728			break;
729		}
730
731		/*
732		 * Otherwise, we reset the counter and wait for the next
733		 * advertisement.
734		 */
735		carp_setrun(sc, af);
736		break;
737	}
738
739	m_freem(m);
740	return;
741}
742
743/*
744 * Interface side of the CARP implementation.
745 */
746
747/* ARGSUSED */
748void
749carpattach(int n)
750{
751	if_clone_attach(&carp_cloner);
752
753	carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS);
754}
755
756int
757carp_clone_create(struct if_clone *ifc, int unit)
758{
759	extern int ifqmaxlen;
760	struct carp_softc *sc;
761	struct ifnet *ifp;
762
763	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
764	if (!sc)
765		return (ENOMEM);
766
767	sc->sc_suppress = 0;
768	sc->sc_advbase = CARP_DFLTINTV;
769	sc->sc_vhid = -1;	/* required setting */
770	sc->sc_advskew = 0;
771	sc->sc_init_counter = 1;
772	sc->sc_naddrs = sc->sc_naddrs6 = 0;
773#ifdef INET6
774	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
775#endif /* INET6 */
776
777	callout_init(&sc->sc_ad_tmo, 0);
778	callout_init(&sc->sc_md_tmo, 0);
779	callout_init(&sc->sc_md6_tmo, 0);
780
781	callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc);
782	callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc);
783	callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc);
784
785	LIST_INIT(&sc->carp_mc_listhead);
786	ifp = &sc->sc_if;
787	ifp->if_softc = sc;
788	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
789	    unit);
790	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
791	ifp->if_ioctl = carp_ioctl;
792	ifp->if_start = carp_start;
793	ifp->if_output = carp_output;
794	ifp->if_type = IFT_CARP;
795	ifp->if_addrlen = ETHER_ADDR_LEN;
796	ifp->if_hdrlen = ETHER_HDR_LEN;
797	ifp->if_mtu = ETHERMTU;
798	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
799	IFQ_SET_READY(&ifp->if_snd);
800	if_attach(ifp);
801
802	if_alloc_sadl(ifp);
803	ifp->if_broadcastaddr = etherbroadcastaddr;
804	carp_set_enaddr(sc);
805	LIST_INIT(&sc->sc_ac.ec_multiaddrs);
806	bpf_attach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
807	return (0);
808}
809
810int
811carp_clone_destroy(struct ifnet *ifp)
812{
813	struct carp_softc *sc = ifp->if_softc;
814
815	carpdetach(ifp->if_softc);
816	ether_ifdetach(ifp);
817	if_detach(ifp);
818	callout_destroy(&sc->sc_ad_tmo);
819	callout_destroy(&sc->sc_md_tmo);
820	callout_destroy(&sc->sc_md6_tmo);
821	free(ifp->if_softc, M_DEVBUF);
822
823	return (0);
824}
825
826void
827carpdetach(struct carp_softc *sc)
828{
829	struct carp_if *cif;
830	int s;
831
832	callout_stop(&sc->sc_ad_tmo);
833	callout_stop(&sc->sc_md_tmo);
834	callout_stop(&sc->sc_md6_tmo);
835
836	if (sc->sc_suppress)
837		carp_suppress_preempt--;
838	sc->sc_suppress = 0;
839
840	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
841		carp_suppress_preempt--;
842	sc->sc_sendad_errors = 0;
843
844	carp_set_state(sc, INIT);
845	sc->sc_if.if_flags &= ~IFF_UP;
846	carp_setrun(sc, 0);
847	carp_multicast_cleanup(sc);
848
849	KERNEL_LOCK(1, NULL);
850	s = splnet();
851	if (sc->sc_carpdev != NULL) {
852		/* XXX linkstatehook removal */
853		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
854		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
855		if (!--cif->vhif_nvrs) {
856			ifpromisc(sc->sc_carpdev, 0);
857			sc->sc_carpdev->if_carp = NULL;
858			free(cif, M_IFADDR);
859		}
860	}
861	sc->sc_carpdev = NULL;
862	splx(s);
863	KERNEL_UNLOCK_ONE(NULL);
864}
865
866/* Detach an interface from the carp. */
867void
868carp_ifdetach(struct ifnet *ifp)
869{
870	struct carp_softc *sc, *nextsc;
871	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
872
873	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
874		nextsc = TAILQ_NEXT(sc, sc_list);
875		carpdetach(sc);
876	}
877}
878
879int
880carp_prepare_ad(struct mbuf *m, struct carp_softc *sc,
881    struct carp_header *ch)
882{
883	if (sc->sc_init_counter) {
884		/* this could also be seconds since unix epoch */
885		sc->sc_counter = cprng_fast64();
886	} else
887		sc->sc_counter++;
888
889	ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
890	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
891
892	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
893
894	return (0);
895}
896
897void
898carp_send_ad_all(void)
899{
900	struct ifnet *ifp;
901	struct carp_if *cif;
902	struct carp_softc *vh;
903
904	TAILQ_FOREACH(ifp, &ifnet, if_list) {
905		if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
906			continue;
907
908		cif = (struct carp_if *)ifp->if_carp;
909		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
910			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
911			    (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER)
912				carp_send_ad(vh);
913		}
914	}
915}
916
917
918void
919carp_send_ad(void *v)
920{
921	struct carp_header ch;
922	struct timeval tv;
923	struct carp_softc *sc = v;
924	struct carp_header *ch_ptr;
925	struct mbuf *m;
926	int error, len, advbase, advskew, s;
927	struct ifaddr *ifa;
928	struct sockaddr sa;
929
930	KERNEL_LOCK(1, NULL);
931	s = splsoftnet();
932
933	advbase = advskew = 0; /* Sssssh compiler */
934	if (sc->sc_carpdev == NULL) {
935		sc->sc_if.if_oerrors++;
936		goto retry_later;
937	}
938
939	/* bow out if we've gone to backup (the carp interface is going down) */
940	if (sc->sc_bow_out) {
941		sc->sc_bow_out = 0;
942		advbase = 255;
943		advskew = 255;
944	} else {
945		advbase = sc->sc_advbase;
946		if (!carp_suppress_preempt || sc->sc_advskew > 240)
947			advskew = sc->sc_advskew;
948		else
949			advskew = 240;
950		tv.tv_sec = advbase;
951		tv.tv_usec = advskew * 1000000 / 256;
952	}
953
954	ch.carp_version = CARP_VERSION;
955	ch.carp_type = CARP_ADVERTISEMENT;
956	ch.carp_vhid = sc->sc_vhid;
957	ch.carp_advbase = advbase;
958	ch.carp_advskew = advskew;
959	ch.carp_authlen = 7;	/* XXX DEFINE */
960	ch.carp_pad1 = 0;	/* must be zero */
961	ch.carp_cksum = 0;
962
963
964#ifdef INET
965	if (sc->sc_naddrs) {
966		struct ip *ip;
967
968		MGETHDR(m, M_DONTWAIT, MT_HEADER);
969		if (m == NULL) {
970			sc->sc_if.if_oerrors++;
971			CARP_STATINC(CARP_STAT_ONOMEM);
972			/* XXX maybe less ? */
973			goto retry_later;
974		}
975		len = sizeof(*ip) + sizeof(ch);
976		m->m_pkthdr.len = len;
977		m->m_pkthdr.rcvif = NULL;
978		m->m_len = len;
979		MH_ALIGN(m, m->m_len);
980		m->m_flags |= M_MCAST;
981		ip = mtod(m, struct ip *);
982		ip->ip_v = IPVERSION;
983		ip->ip_hl = sizeof(*ip) >> 2;
984		ip->ip_tos = IPTOS_LOWDELAY;
985		ip->ip_len = htons(len);
986		ip->ip_id = 0;	/* no need for id, we don't support fragments */
987		ip->ip_off = htons(IP_DF);
988		ip->ip_ttl = CARP_DFLTTL;
989		ip->ip_p = IPPROTO_CARP;
990		ip->ip_sum = 0;
991
992		memset(&sa, 0, sizeof(sa));
993		sa.sa_family = AF_INET;
994		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
995		if (ifa == NULL)
996			ip->ip_src.s_addr = 0;
997		else
998			ip->ip_src.s_addr =
999			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1000		ip->ip_dst.s_addr = INADDR_CARP_GROUP;
1001
1002		ch_ptr = (struct carp_header *)(&ip[1]);
1003		memcpy(ch_ptr, &ch, sizeof(ch));
1004		if (carp_prepare_ad(m, sc, ch_ptr))
1005			goto retry_later;
1006
1007		m->m_data += sizeof(*ip);
1008		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1009		m->m_data -= sizeof(*ip);
1010
1011		nanotime(&sc->sc_if.if_lastchange);
1012		sc->sc_if.if_opackets++;
1013		sc->sc_if.if_obytes += len;
1014		CARP_STATINC(CARP_STAT_OPACKETS);
1015
1016		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1017		    NULL);
1018		if (error) {
1019			if (error == ENOBUFS)
1020				CARP_STATINC(CARP_STAT_ONOMEM);
1021			else
1022				CARP_LOG(sc, ("ip_output failed: %d", error));
1023			sc->sc_if.if_oerrors++;
1024			if (sc->sc_sendad_errors < INT_MAX)
1025				sc->sc_sendad_errors++;
1026			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1027				carp_suppress_preempt++;
1028				if (carp_suppress_preempt == 1)
1029					carp_send_ad_all();
1030			}
1031			sc->sc_sendad_success = 0;
1032		} else {
1033			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1034				if (++sc->sc_sendad_success >=
1035				    CARP_SENDAD_MIN_SUCCESS) {
1036					carp_suppress_preempt--;
1037					sc->sc_sendad_errors = 0;
1038				}
1039			} else
1040				sc->sc_sendad_errors = 0;
1041		}
1042	}
1043#endif /* INET */
1044#ifdef INET6
1045	if (sc->sc_naddrs6) {
1046		struct ip6_hdr *ip6;
1047
1048		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1049		if (m == NULL) {
1050			sc->sc_if.if_oerrors++;
1051			CARP_STATINC(CARP_STAT_ONOMEM);
1052			/* XXX maybe less ? */
1053			goto retry_later;
1054		}
1055		len = sizeof(*ip6) + sizeof(ch);
1056		m->m_pkthdr.len = len;
1057		m->m_pkthdr.rcvif = NULL;
1058		m->m_len = len;
1059		MH_ALIGN(m, m->m_len);
1060		m->m_flags |= M_MCAST;
1061		ip6 = mtod(m, struct ip6_hdr *);
1062		memset(ip6, 0, sizeof(*ip6));
1063		ip6->ip6_vfc |= IPV6_VERSION;
1064		ip6->ip6_hlim = CARP_DFLTTL;
1065		ip6->ip6_nxt = IPPROTO_CARP;
1066
1067		/* set the source address */
1068		memset(&sa, 0, sizeof(sa));
1069		sa.sa_family = AF_INET6;
1070		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1071		if (ifa == NULL)	/* This should never happen with IPv6 */
1072			memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
1073		else
1074			bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1075			    &ip6->ip6_src, sizeof(struct in6_addr));
1076		/* set the multicast destination */
1077
1078		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1079		ip6->ip6_dst.s6_addr8[15] = 0x12;
1080		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1081			sc->sc_if.if_oerrors++;
1082			m_freem(m);
1083			CARP_LOG(sc, ("in6_setscope failed"));
1084			goto retry_later;
1085		}
1086
1087		ch_ptr = (struct carp_header *)(&ip6[1]);
1088		memcpy(ch_ptr, &ch, sizeof(ch));
1089		if (carp_prepare_ad(m, sc, ch_ptr))
1090			goto retry_later;
1091
1092		m->m_data += sizeof(*ip6);
1093		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1094		m->m_data -= sizeof(*ip6);
1095
1096		nanotime(&sc->sc_if.if_lastchange);
1097		sc->sc_if.if_opackets++;
1098		sc->sc_if.if_obytes += len;
1099		CARP_STATINC(CARP_STAT_OPACKETS6);
1100
1101		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1102		if (error) {
1103			if (error == ENOBUFS)
1104				CARP_STATINC(CARP_STAT_ONOMEM);
1105			else
1106				CARP_LOG(sc, ("ip6_output failed: %d", error));
1107			sc->sc_if.if_oerrors++;
1108			if (sc->sc_sendad_errors < INT_MAX)
1109				sc->sc_sendad_errors++;
1110			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1111				carp_suppress_preempt++;
1112				if (carp_suppress_preempt == 1)
1113					carp_send_ad_all();
1114			}
1115			sc->sc_sendad_success = 0;
1116		} else {
1117			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1118				if (++sc->sc_sendad_success >=
1119				    CARP_SENDAD_MIN_SUCCESS) {
1120					carp_suppress_preempt--;
1121					sc->sc_sendad_errors = 0;
1122				}
1123			} else
1124				sc->sc_sendad_errors = 0;
1125		}
1126	}
1127#endif /* INET6 */
1128
1129retry_later:
1130	splx(s);
1131	KERNEL_UNLOCK_ONE(NULL);
1132	if (advbase != 255 || advskew != 255)
1133		callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1134}
1135
1136/*
1137 * Broadcast a gratuitous ARP request containing
1138 * the virtual router MAC address for each IP address
1139 * associated with the virtual router.
1140 */
1141void
1142carp_send_arp(struct carp_softc *sc)
1143{
1144	struct ifaddr *ifa;
1145	struct in_addr *in;
1146	int s;
1147
1148	KERNEL_LOCK(1, NULL);
1149	s = splsoftnet();
1150	IFADDR_FOREACH(ifa, &sc->sc_if) {
1151
1152		if (ifa->ifa_addr->sa_family != AF_INET)
1153			continue;
1154
1155		in = &ifatoia(ifa)->ia_addr.sin_addr;
1156		arprequest(sc->sc_carpdev, in, in, CLLADDR(sc->sc_if.if_sadl));
1157	}
1158	splx(s);
1159	KERNEL_UNLOCK_ONE(NULL);
1160}
1161
1162#ifdef INET6
1163void
1164carp_send_na(struct carp_softc *sc)
1165{
1166	struct ifaddr *ifa;
1167	struct in6_addr *in6;
1168	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1169	int s;
1170
1171	KERNEL_LOCK(1, NULL);
1172	s = splsoftnet();
1173
1174	IFADDR_FOREACH(ifa, &sc->sc_if) {
1175
1176		if (ifa->ifa_addr->sa_family != AF_INET6)
1177			continue;
1178
1179		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1180		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1181		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1182	}
1183	splx(s);
1184	KERNEL_UNLOCK_ONE(NULL);
1185}
1186#endif /* INET6 */
1187
1188/*
1189 * Based on bridge_hash() in if_bridge.c
1190 */
1191#define	mix(a,b,c) \
1192	do {						\
1193		a -= b; a -= c; a ^= (c >> 13);		\
1194		b -= c; b -= a; b ^= (a << 8);		\
1195		c -= a; c -= b; c ^= (b >> 13);		\
1196		a -= b; a -= c; a ^= (c >> 12);		\
1197		b -= c; b -= a; b ^= (a << 16);		\
1198		c -= a; c -= b; c ^= (b >> 5);		\
1199		a -= b; a -= c; a ^= (c >> 3);		\
1200		b -= c; b -= a; b ^= (a << 10);		\
1201		c -= a; c -= b; c ^= (b >> 15);		\
1202	} while (0)
1203
1204u_int32_t
1205carp_hash(struct carp_softc *sc, u_char *src)
1206{
1207	u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1208
1209	c += sc->sc_key[3] << 24;
1210	c += sc->sc_key[2] << 16;
1211	c += sc->sc_key[1] << 8;
1212	c += sc->sc_key[0];
1213	b += src[5] << 8;
1214	b += src[4];
1215	a += src[3] << 24;
1216	a += src[2] << 16;
1217	a += src[1] << 8;
1218	a += src[0];
1219
1220	mix(a, b, c);
1221	return (c);
1222}
1223
1224int
1225carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1226{
1227	struct carp_softc *vh;
1228	struct ifaddr *ifa;
1229	int count = 0;
1230
1231	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1232		if ((type == CARP_COUNT_RUNNING &&
1233		    (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1234		    (IFF_UP|IFF_RUNNING)) ||
1235		    (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1236			IFADDR_FOREACH(ifa, &vh->sc_if) {
1237				if (ifa->ifa_addr->sa_family == AF_INET &&
1238				    ia->ia_addr.sin_addr.s_addr ==
1239				    ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1240					count++;
1241			}
1242		}
1243	}
1244	return (count);
1245}
1246
1247int
1248carp_iamatch(struct in_ifaddr *ia, u_char *src,
1249    u_int32_t *count, u_int32_t index)
1250{
1251	struct carp_softc *sc = ia->ia_ifp->if_softc;
1252
1253	if (carp_opts[CARPCTL_ARPBALANCE]) {
1254		/*
1255		 * We use the source ip to decide which virtual host should
1256		 * handle the request. If we're master of that virtual host,
1257		 * then we respond, otherwise, just drop the arp packet on
1258		 * the floor.
1259		 */
1260
1261		/* Count the elegible carp interfaces with this address */
1262		if (*count == 0)
1263			*count = carp_addrcount(
1264			    (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
1265			    ia, CARP_COUNT_RUNNING);
1266
1267		/* This should never happen, but... */
1268		if (*count == 0)
1269			return (0);
1270
1271		if (carp_hash(sc, src) % *count == index - 1 &&
1272		    sc->sc_state == MASTER) {
1273			return (1);
1274		}
1275	} else {
1276		if (sc->sc_state == MASTER)
1277			return (1);
1278	}
1279
1280	return (0);
1281}
1282
1283#ifdef INET6
1284struct ifaddr *
1285carp_iamatch6(void *v, struct in6_addr *taddr)
1286{
1287	struct carp_if *cif = v;
1288	struct carp_softc *vh;
1289	struct ifaddr *ifa;
1290
1291	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1292		IFADDR_FOREACH(ifa, &vh->sc_if) {
1293			if (IN6_ARE_ADDR_EQUAL(taddr,
1294			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1295			    ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1296			    (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER)
1297				return (ifa);
1298		}
1299	}
1300
1301	return (NULL);
1302}
1303#endif /* INET6 */
1304
1305struct ifnet *
1306carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src)
1307{
1308	struct carp_if *cif = (struct carp_if *)v;
1309	struct carp_softc *vh;
1310	u_int8_t *ena;
1311
1312	if (src)
1313		ena = (u_int8_t *)&eh->ether_shost;
1314	else
1315		ena = (u_int8_t *)&eh->ether_dhost;
1316
1317	switch (iftype) {
1318	case IFT_ETHER:
1319	case IFT_FDDI:
1320		if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1321			return (NULL);
1322		break;
1323	case IFT_ISO88025:
1324		if (ena[0] != 3 || ena[1] || ena[4] || ena[5])
1325			return (NULL);
1326		break;
1327	default:
1328		return (NULL);
1329		break;
1330	}
1331
1332	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
1333		if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1334		    (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
1335		    !memcmp(ena, CLLADDR(vh->sc_if.if_sadl),
1336		    ETHER_ADDR_LEN)) {
1337			return (&vh->sc_if);
1338		    }
1339
1340	return (NULL);
1341}
1342
1343int
1344carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1345{
1346	struct ether_header eh;
1347	struct carp_if *cif = (struct carp_if *)m->m_pkthdr.rcvif->if_carp;
1348	struct ifnet *ifp;
1349
1350	memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost));
1351	memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost));
1352	eh.ether_type = etype;
1353
1354	if (m->m_flags & (M_BCAST|M_MCAST)) {
1355		struct carp_softc *vh;
1356		struct mbuf *m0;
1357
1358		/*
1359		 * XXX Should really check the list of multicast addresses
1360		 * for each CARP interface _before_ copying.
1361		 */
1362		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1363			m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1364			if (m0 == NULL)
1365				continue;
1366			m0->m_pkthdr.rcvif = &vh->sc_if;
1367			ether_input(&vh->sc_if, m0);
1368		}
1369		return (1);
1370	}
1371
1372	ifp = carp_ourether(cif, &eh, m->m_pkthdr.rcvif->if_type, 0);
1373	if (ifp == NULL) {
1374		return (1);
1375	}
1376
1377	m->m_pkthdr.rcvif = ifp;
1378
1379	bpf_mtap(ifp, m);
1380	ifp->if_ipackets++;
1381	ether_input(ifp, m);
1382	return (0);
1383}
1384
1385void
1386carp_master_down(void *v)
1387{
1388	struct carp_softc *sc = v;
1389
1390	switch (sc->sc_state) {
1391	case INIT:
1392		printf("%s: master_down event in INIT state\n",
1393		    sc->sc_if.if_xname);
1394		break;
1395	case MASTER:
1396		break;
1397	case BACKUP:
1398		CARP_LOG(sc, ("INIT -> MASTER (preempting)"));
1399		carp_set_state(sc, MASTER);
1400		carp_send_ad(sc);
1401		carp_send_arp(sc);
1402#ifdef INET6
1403		carp_send_na(sc);
1404#endif /* INET6 */
1405		carp_setrun(sc, 0);
1406		carp_setroute(sc, RTM_ADD);
1407		break;
1408	}
1409}
1410
1411/*
1412 * When in backup state, af indicates whether to reset the master down timer
1413 * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1414 */
1415void
1416carp_setrun(struct carp_softc *sc, sa_family_t af)
1417{
1418	struct timeval tv;
1419
1420	if (sc->sc_carpdev == NULL) {
1421		sc->sc_if.if_flags &= ~IFF_RUNNING;
1422		carp_set_state(sc, INIT);
1423		return;
1424	}
1425
1426	if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 &&
1427	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1428		sc->sc_if.if_flags |= IFF_RUNNING;
1429	} else {
1430		sc->sc_if.if_flags &= ~IFF_RUNNING;
1431		carp_setroute(sc, RTM_DELETE);
1432		return;
1433	}
1434
1435	switch (sc->sc_state) {
1436	case INIT:
1437		carp_set_state(sc, BACKUP);
1438		carp_setroute(sc, RTM_DELETE);
1439		carp_setrun(sc, 0);
1440		break;
1441	case BACKUP:
1442		callout_stop(&sc->sc_ad_tmo);
1443		tv.tv_sec = 3 * sc->sc_advbase;
1444		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1445		switch (af) {
1446#ifdef INET
1447		case AF_INET:
1448			callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1449			break;
1450#endif /* INET */
1451#ifdef INET6
1452		case AF_INET6:
1453			callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1454			break;
1455#endif /* INET6 */
1456		default:
1457			if (sc->sc_naddrs)
1458				callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1459			if (sc->sc_naddrs6)
1460				callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1461			break;
1462		}
1463		break;
1464	case MASTER:
1465		tv.tv_sec = sc->sc_advbase;
1466		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1467		callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1468		break;
1469	}
1470}
1471
1472void
1473carp_multicast_cleanup(struct carp_softc *sc)
1474{
1475	struct ip_moptions *imo = &sc->sc_imo;
1476#ifdef INET6
1477	struct ip6_moptions *im6o = &sc->sc_im6o;
1478#endif
1479	u_int16_t n = imo->imo_num_memberships;
1480
1481	/* Clean up our own multicast memberships */
1482	while (n-- > 0) {
1483		if (imo->imo_membership[n] != NULL) {
1484			in_delmulti(imo->imo_membership[n]);
1485			imo->imo_membership[n] = NULL;
1486		}
1487	}
1488	imo->imo_num_memberships = 0;
1489	imo->imo_multicast_ifp = NULL;
1490
1491#ifdef INET6
1492	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1493		struct in6_multi_mship *imm =
1494		    LIST_FIRST(&im6o->im6o_memberships);
1495
1496		LIST_REMOVE(imm, i6mm_chain);
1497		in6_leavegroup(imm);
1498	}
1499	im6o->im6o_multicast_ifp = NULL;
1500#endif
1501
1502	/* And any other multicast memberships */
1503	carp_ether_purgemulti(sc);
1504}
1505
1506int
1507carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1508{
1509	struct carp_if *cif, *ncif = NULL;
1510	struct carp_softc *vr, *after = NULL;
1511	int myself = 0, error = 0;
1512	int s;
1513
1514	if (ifp == sc->sc_carpdev)
1515		return (0);
1516
1517	if (ifp != NULL) {
1518		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1519			return (EADDRNOTAVAIL);
1520
1521		if (ifp->if_type == IFT_CARP)
1522			return (EINVAL);
1523
1524		if (ifp->if_carp == NULL) {
1525			ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT);
1526			if (ncif == NULL)
1527				return (ENOBUFS);
1528			if ((error = ifpromisc(ifp, 1))) {
1529				free(ncif, M_IFADDR);
1530				return (error);
1531			}
1532
1533			ncif->vhif_ifp = ifp;
1534			TAILQ_INIT(&ncif->vhif_vrs);
1535		} else {
1536			cif = (struct carp_if *)ifp->if_carp;
1537			TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1538				if (vr != sc && vr->sc_vhid == sc->sc_vhid)
1539					return (EINVAL);
1540		}
1541
1542		/* detach from old interface */
1543		if (sc->sc_carpdev != NULL)
1544			carpdetach(sc);
1545
1546		/* join multicast groups */
1547		if (sc->sc_naddrs < 0 &&
1548		    (error = carp_join_multicast(sc)) != 0) {
1549			if (ncif != NULL)
1550				free(ncif, M_IFADDR);
1551			return (error);
1552		}
1553
1554#ifdef INET6
1555		if (sc->sc_naddrs6 < 0 &&
1556		    (error = carp_join_multicast6(sc)) != 0) {
1557			if (ncif != NULL)
1558				free(ncif, M_IFADDR);
1559			carp_multicast_cleanup(sc);
1560			return (error);
1561		}
1562#endif
1563
1564		/* attach carp interface to physical interface */
1565		if (ncif != NULL)
1566			ifp->if_carp = (void *)ncif;
1567		sc->sc_carpdev = ifp;
1568		cif = (struct carp_if *)ifp->if_carp;
1569		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1570			if (vr == sc)
1571				myself = 1;
1572			if (vr->sc_vhid < sc->sc_vhid)
1573				after = vr;
1574		}
1575
1576		if (!myself) {
1577			/* We're trying to keep things in order */
1578			if (after == NULL) {
1579				TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1580			} else {
1581				TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1582				    sc, sc_list);
1583			}
1584			cif->vhif_nvrs++;
1585		}
1586		if (sc->sc_naddrs || sc->sc_naddrs6)
1587			sc->sc_if.if_flags |= IFF_UP;
1588		carp_set_enaddr(sc);
1589		KERNEL_LOCK(1, NULL);
1590		s = splnet();
1591		/* XXX linkstatehooks establish */
1592		carp_carpdev_state(ifp);
1593		splx(s);
1594		KERNEL_UNLOCK_ONE(NULL);
1595	} else {
1596		carpdetach(sc);
1597		sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1598	}
1599	return (0);
1600}
1601
1602void
1603carp_set_enaddr(struct carp_softc *sc)
1604{
1605	uint8_t enaddr[ETHER_ADDR_LEN];
1606	if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) {
1607		enaddr[0] = 3;
1608		enaddr[1] = 0;
1609		enaddr[2] = 0x40 >> (sc->sc_vhid - 1);
1610		enaddr[3] = 0x40000 >> (sc->sc_vhid - 1);
1611		enaddr[4] = 0;
1612		enaddr[5] = 0;
1613	} else {
1614		enaddr[0] = 0;
1615		enaddr[1] = 0;
1616		enaddr[2] = 0x5e;
1617		enaddr[3] = 0;
1618		enaddr[4] = 1;
1619		enaddr[5] = sc->sc_vhid;
1620	}
1621	if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false);
1622}
1623
1624void
1625carp_addr_updated(void *v)
1626{
1627	struct carp_softc *sc = (struct carp_softc *) v;
1628	struct ifaddr *ifa;
1629	int new_naddrs = 0, new_naddrs6 = 0;
1630
1631	IFADDR_FOREACH(ifa, &sc->sc_if) {
1632		if (ifa->ifa_addr->sa_family == AF_INET)
1633			new_naddrs++;
1634		else if (ifa->ifa_addr->sa_family == AF_INET6)
1635			new_naddrs6++;
1636	}
1637
1638	/* Handle a callback after SIOCDIFADDR */
1639	if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
1640		struct in_addr mc_addr;
1641		struct in_multi *inm;
1642
1643		sc->sc_naddrs = new_naddrs;
1644		sc->sc_naddrs6 = new_naddrs6;
1645
1646		/* Re-establish multicast membership removed by in_control */
1647		mc_addr.s_addr = INADDR_CARP_GROUP;
1648		IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm);
1649		if (inm == NULL) {
1650			memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1651
1652			if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1653				carp_join_multicast(sc);
1654		}
1655
1656		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1657			sc->sc_if.if_flags &= ~IFF_UP;
1658			carp_set_state(sc, INIT);
1659		} else
1660			carp_hmac_prepare(sc);
1661	}
1662
1663	carp_setrun(sc, 0);
1664}
1665
1666int
1667carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1668{
1669	struct ifnet *ifp = sc->sc_carpdev;
1670	struct in_ifaddr *ia, *ia_if;
1671	int error = 0;
1672
1673	if (sin->sin_addr.s_addr == 0) {
1674		if (!(sc->sc_if.if_flags & IFF_UP))
1675			carp_set_state(sc, INIT);
1676		if (sc->sc_naddrs)
1677			sc->sc_if.if_flags |= IFF_UP;
1678		carp_setrun(sc, 0);
1679		return (0);
1680	}
1681
1682	/* we have to do this by hand to ensure we don't match on ourselves */
1683	ia_if = NULL;
1684	for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
1685	    ia = TAILQ_NEXT(ia, ia_list)) {
1686
1687		/* and, yeah, we need a multicast-capable iface too */
1688		if (ia->ia_ifp != &sc->sc_if &&
1689		    ia->ia_ifp->if_type != IFT_CARP &&
1690		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1691		    (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
1692		    ia->ia_subnet) {
1693			if (!ia_if)
1694				ia_if = ia;
1695		}
1696	}
1697
1698	if (ia_if) {
1699		ia = ia_if;
1700		if (ifp) {
1701			if (ifp != ia->ia_ifp)
1702				return (EADDRNOTAVAIL);
1703		} else {
1704			ifp = ia->ia_ifp;
1705		}
1706	}
1707
1708	if ((error = carp_set_ifp(sc, ifp)))
1709		return (error);
1710
1711	if (sc->sc_carpdev == NULL)
1712		return (EADDRNOTAVAIL);
1713
1714	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1715		return (error);
1716
1717	sc->sc_naddrs++;
1718	if (sc->sc_carpdev != NULL)
1719		sc->sc_if.if_flags |= IFF_UP;
1720
1721	carp_set_state(sc, INIT);
1722	carp_setrun(sc, 0);
1723
1724	/*
1725	 * Hook if_addrhooks so that we get a callback after in_ifinit has run,
1726	 * to correct any inappropriate routes that it inserted.
1727	 */
1728	if (sc->ah_cookie == 0) {
1729		/* XXX link address hook */
1730	}
1731
1732	return (0);
1733}
1734
1735int
1736carp_join_multicast(struct carp_softc *sc)
1737{
1738	struct ip_moptions *imo = &sc->sc_imo, tmpimo;
1739	struct in_addr addr;
1740
1741	memset(&tmpimo, 0, sizeof(tmpimo));
1742	addr.s_addr = INADDR_CARP_GROUP;
1743	if ((tmpimo.imo_membership[0] =
1744	    in_addmulti(&addr, &sc->sc_if)) == NULL) {
1745		return (ENOBUFS);
1746	}
1747
1748	imo->imo_membership[0] = tmpimo.imo_membership[0];
1749	imo->imo_num_memberships = 1;
1750	imo->imo_multicast_ifp = &sc->sc_if;
1751	imo->imo_multicast_ttl = CARP_DFLTTL;
1752	imo->imo_multicast_loop = 0;
1753	return (0);
1754}
1755
1756
1757#ifdef INET6
1758int
1759carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1760{
1761	struct ifnet *ifp = sc->sc_carpdev;
1762	struct in6_ifaddr *ia, *ia_if;
1763	int error = 0;
1764
1765	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1766		if (!(sc->sc_if.if_flags & IFF_UP))
1767			carp_set_state(sc, INIT);
1768		if (sc->sc_naddrs6)
1769			sc->sc_if.if_flags |= IFF_UP;
1770		carp_setrun(sc, 0);
1771		return (0);
1772	}
1773
1774	/* we have to do this by hand to ensure we don't match on ourselves */
1775	ia_if = NULL;
1776	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
1777		int i;
1778
1779		for (i = 0; i < 4; i++) {
1780			if ((sin6->sin6_addr.s6_addr32[i] &
1781			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1782			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
1783			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1784				break;
1785		}
1786		/* and, yeah, we need a multicast-capable iface too */
1787		if (ia->ia_ifp != &sc->sc_if &&
1788		    ia->ia_ifp->if_type != IFT_CARP &&
1789		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1790		    (i == 4)) {
1791			if (!ia_if)
1792				ia_if = ia;
1793		}
1794	}
1795
1796	if (ia_if) {
1797		ia = ia_if;
1798		if (sc->sc_carpdev) {
1799			if (sc->sc_carpdev != ia->ia_ifp)
1800				return (EADDRNOTAVAIL);
1801		} else {
1802			ifp = ia->ia_ifp;
1803		}
1804	}
1805
1806	if ((error = carp_set_ifp(sc, ifp)))
1807		return (error);
1808
1809	if (sc->sc_carpdev == NULL)
1810		return (EADDRNOTAVAIL);
1811
1812	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1813		return (error);
1814
1815	sc->sc_naddrs6++;
1816	if (sc->sc_carpdev != NULL)
1817		sc->sc_if.if_flags |= IFF_UP;
1818	carp_set_state(sc, INIT);
1819	carp_setrun(sc, 0);
1820
1821	return (0);
1822}
1823
1824int
1825carp_join_multicast6(struct carp_softc *sc)
1826{
1827	struct in6_multi_mship *imm, *imm2;
1828	struct ip6_moptions *im6o = &sc->sc_im6o;
1829	struct sockaddr_in6 addr6;
1830	int error;
1831
1832	/* Join IPv6 CARP multicast group */
1833	memset(&addr6, 0, sizeof(addr6));
1834	addr6.sin6_family = AF_INET6;
1835	addr6.sin6_len = sizeof(addr6);
1836	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1837	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1838	addr6.sin6_addr.s6_addr8[15] = 0x12;
1839	if ((imm = in6_joingroup(&sc->sc_if,
1840	    &addr6.sin6_addr, &error, 0)) == NULL) {
1841		return (error);
1842	}
1843	/* join solicited multicast address */
1844	memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1845	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1846	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1847	addr6.sin6_addr.s6_addr32[1] = 0;
1848	addr6.sin6_addr.s6_addr32[2] = htonl(1);
1849	addr6.sin6_addr.s6_addr32[3] = 0;
1850	addr6.sin6_addr.s6_addr8[12] = 0xff;
1851	if ((imm2 = in6_joingroup(&sc->sc_if,
1852	    &addr6.sin6_addr, &error, 0)) == NULL) {
1853		in6_leavegroup(imm);
1854		return (error);
1855	}
1856
1857	/* apply v6 multicast membership */
1858	im6o->im6o_multicast_ifp = &sc->sc_if;
1859	if (imm)
1860		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1861		    i6mm_chain);
1862	if (imm2)
1863		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1864		    i6mm_chain);
1865
1866	return (0);
1867}
1868
1869#endif /* INET6 */
1870
1871int
1872carp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1873{
1874	struct lwp *l = curlwp;		/* XXX */
1875	struct carp_softc *sc = ifp->if_softc, *vr;
1876	struct carpreq carpr;
1877	struct ifaddr *ifa;
1878	struct ifreq *ifr;
1879	struct ifnet *cdev = NULL;
1880	int error = 0;
1881
1882	ifa = (struct ifaddr *)data;
1883	ifr = (struct ifreq *)data;
1884
1885	switch (cmd) {
1886	case SIOCINITIFADDR:
1887		switch (ifa->ifa_addr->sa_family) {
1888#ifdef INET
1889		case AF_INET:
1890			sc->sc_if.if_flags |= IFF_UP;
1891			memcpy(ifa->ifa_dstaddr, ifa->ifa_addr,
1892			    sizeof(struct sockaddr));
1893			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1894			break;
1895#endif /* INET */
1896#ifdef INET6
1897		case AF_INET6:
1898			sc->sc_if.if_flags|= IFF_UP;
1899			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1900			break;
1901#endif /* INET6 */
1902		default:
1903			error = EAFNOSUPPORT;
1904			break;
1905		}
1906		break;
1907
1908	case SIOCSIFFLAGS:
1909		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
1910			break;
1911		if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
1912			callout_stop(&sc->sc_ad_tmo);
1913			callout_stop(&sc->sc_md_tmo);
1914			callout_stop(&sc->sc_md6_tmo);
1915			if (sc->sc_state == MASTER) {
1916				/* we need the interface up to bow out */
1917				sc->sc_if.if_flags |= IFF_UP;
1918				sc->sc_bow_out = 1;
1919				carp_send_ad(sc);
1920			}
1921			sc->sc_if.if_flags &= ~IFF_UP;
1922			carp_set_state(sc, INIT);
1923			carp_setrun(sc, 0);
1924		} else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
1925			sc->sc_if.if_flags |= IFF_UP;
1926			carp_setrun(sc, 0);
1927		}
1928		break;
1929
1930	case SIOCSVH:
1931		if (l == NULL)
1932			break;
1933		if ((error = kauth_authorize_network(l->l_cred,
1934		    KAUTH_NETWORK_INTERFACE,
1935		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
1936		    NULL)) != 0)
1937			break;
1938		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
1939			break;
1940		error = 1;
1941		if (carpr.carpr_carpdev[0] != '\0' &&
1942		    (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
1943			return (EINVAL);
1944		if ((error = carp_set_ifp(sc, cdev)))
1945			return (error);
1946		if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
1947			switch (carpr.carpr_state) {
1948			case BACKUP:
1949				callout_stop(&sc->sc_ad_tmo);
1950				carp_set_state(sc, BACKUP);
1951				carp_setrun(sc, 0);
1952				carp_setroute(sc, RTM_DELETE);
1953				break;
1954			case MASTER:
1955				carp_master_down(sc);
1956				break;
1957			default:
1958				break;
1959			}
1960		}
1961		if (carpr.carpr_vhid > 0) {
1962			if (carpr.carpr_vhid > 255) {
1963				error = EINVAL;
1964				break;
1965			}
1966			if (sc->sc_carpdev) {
1967				struct carp_if *cif;
1968				cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1969				TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1970					if (vr != sc &&
1971					    vr->sc_vhid == carpr.carpr_vhid)
1972						return (EINVAL);
1973			}
1974			sc->sc_vhid = carpr.carpr_vhid;
1975			carp_set_enaddr(sc);
1976			carp_set_state(sc, INIT);
1977			error--;
1978		}
1979		if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
1980			if (carpr.carpr_advskew > 254) {
1981				error = EINVAL;
1982				break;
1983			}
1984			if (carpr.carpr_advbase > 255) {
1985				error = EINVAL;
1986				break;
1987			}
1988			sc->sc_advbase = carpr.carpr_advbase;
1989			sc->sc_advskew = carpr.carpr_advskew;
1990			error--;
1991		}
1992		memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key));
1993		if (error > 0)
1994			error = EINVAL;
1995		else {
1996			error = 0;
1997			carp_setrun(sc, 0);
1998		}
1999		break;
2000
2001	case SIOCGVH:
2002		memset(&carpr, 0, sizeof(carpr));
2003		if (sc->sc_carpdev != NULL)
2004			strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2005			    IFNAMSIZ);
2006		carpr.carpr_state = sc->sc_state;
2007		carpr.carpr_vhid = sc->sc_vhid;
2008		carpr.carpr_advbase = sc->sc_advbase;
2009		carpr.carpr_advskew = sc->sc_advskew;
2010
2011		if ((l != NULL) && (error = kauth_authorize_network(l->l_cred,
2012		    KAUTH_NETWORK_INTERFACE,
2013		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2014		    NULL)) == 0)
2015			memcpy(carpr.carpr_key, sc->sc_key,
2016			    sizeof(carpr.carpr_key));
2017		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2018		break;
2019
2020	case SIOCADDMULTI:
2021		error = carp_ether_addmulti(sc, ifr);
2022		break;
2023
2024	case SIOCDELMULTI:
2025		error = carp_ether_delmulti(sc, ifr);
2026		break;
2027
2028	default:
2029		error = ether_ioctl(ifp, cmd, data);
2030	}
2031
2032	carp_hmac_prepare(sc);
2033	return (error);
2034}
2035
2036
2037/*
2038 * Start output on carp interface. This function should never be called.
2039 */
2040void
2041carp_start(struct ifnet *ifp)
2042{
2043#ifdef DEBUG
2044	printf("%s: start called\n", ifp->if_xname);
2045#endif
2046}
2047
2048int
2049carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
2050    struct rtentry *rt)
2051{
2052	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2053	KASSERT(KERNEL_LOCKED_P());
2054
2055	if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) {
2056		return (sc->sc_carpdev->if_output(ifp, m, sa, rt));
2057	} else {
2058		m_freem(m);
2059		return (ENETUNREACH);
2060	}
2061}
2062
2063void
2064carp_set_state(struct carp_softc *sc, int state)
2065{
2066	static const char *carp_states[] = { CARP_STATES };
2067	if (sc->sc_state == state)
2068		return;
2069
2070	CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state]));
2071
2072	sc->sc_state = state;
2073	switch (state) {
2074	case BACKUP:
2075		sc->sc_if.if_link_state = LINK_STATE_DOWN;
2076		break;
2077	case MASTER:
2078		sc->sc_if.if_link_state = LINK_STATE_UP;
2079		break;
2080	default:
2081		sc->sc_if.if_link_state = LINK_STATE_UNKNOWN;
2082		break;
2083	}
2084	rt_ifmsg(&sc->sc_if);
2085}
2086
2087void
2088carp_carpdev_state(void *v)
2089{
2090	struct carp_if *cif;
2091	struct carp_softc *sc;
2092	struct ifnet *ifp = v;
2093
2094	if (ifp->if_type == IFT_CARP)
2095		return;
2096
2097	cif = (struct carp_if *)ifp->if_carp;
2098
2099	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2100		int suppressed = sc->sc_suppress;
2101
2102		if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2103		    !(sc->sc_carpdev->if_flags & IFF_UP)) {
2104			sc->sc_if.if_flags &= ~IFF_RUNNING;
2105			callout_stop(&sc->sc_ad_tmo);
2106			callout_stop(&sc->sc_md_tmo);
2107			callout_stop(&sc->sc_md6_tmo);
2108			carp_set_state(sc, INIT);
2109			sc->sc_suppress = 1;
2110			carp_setrun(sc, 0);
2111			if (!suppressed) {
2112				carp_suppress_preempt++;
2113				if (carp_suppress_preempt == 1)
2114					carp_send_ad_all();
2115			}
2116		} else {
2117			carp_set_state(sc, INIT);
2118			sc->sc_suppress = 0;
2119			carp_setrun(sc, 0);
2120			if (suppressed)
2121				carp_suppress_preempt--;
2122		}
2123	}
2124}
2125
2126int
2127carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2128{
2129	const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr);
2130	struct ifnet *ifp;
2131	struct carp_mc_entry *mc;
2132	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2133	int error;
2134
2135	ifp = sc->sc_carpdev;
2136	if (ifp == NULL)
2137		return (EINVAL);
2138
2139	error = ether_addmulti(sa, &sc->sc_ac);
2140	if (error != ENETRESET)
2141		return (error);
2142
2143	/*
2144	 * This is new multicast address.  We have to tell parent
2145	 * about it.  Also, remember this multicast address so that
2146	 * we can delete them on unconfigure.
2147	 */
2148	mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2149	if (mc == NULL) {
2150		error = ENOMEM;
2151		goto alloc_failed;
2152	}
2153
2154	/*
2155	 * As ether_addmulti() returns ENETRESET, following two
2156	 * statement shouldn't fail.
2157	 */
2158	(void)ether_multiaddr(sa, addrlo, addrhi);
2159	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2160	memcpy(&mc->mc_addr, sa, sa->sa_len);
2161	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2162
2163	error = if_mcast_op(ifp, SIOCADDMULTI, sa);
2164	if (error != 0)
2165		goto ioctl_failed;
2166
2167	return (error);
2168
2169 ioctl_failed:
2170	LIST_REMOVE(mc, mc_entries);
2171	free(mc, M_DEVBUF);
2172 alloc_failed:
2173	(void)ether_delmulti(sa, &sc->sc_ac);
2174
2175	return (error);
2176}
2177
2178int
2179carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2180{
2181	const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr);
2182	struct ifnet *ifp;
2183	struct ether_multi *enm;
2184	struct carp_mc_entry *mc;
2185	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2186	int error;
2187
2188	ifp = sc->sc_carpdev;
2189	if (ifp == NULL)
2190		return (EINVAL);
2191
2192	/*
2193	 * Find a key to lookup carp_mc_entry.  We have to do this
2194	 * before calling ether_delmulti for obvious reason.
2195	 */
2196	if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
2197		return (error);
2198	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2199	if (enm == NULL)
2200		return (EINVAL);
2201
2202	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2203		if (mc->mc_enm == enm)
2204			break;
2205
2206	/* We won't delete entries we didn't add */
2207	if (mc == NULL)
2208		return (EINVAL);
2209
2210	error = ether_delmulti(sa, &sc->sc_ac);
2211	if (error != ENETRESET)
2212		return (error);
2213
2214	/* We no longer use this multicast address.  Tell parent so. */
2215	error = if_mcast_op(ifp, SIOCDELMULTI, sa);
2216	if (error == 0) {
2217		/* And forget about this address. */
2218		LIST_REMOVE(mc, mc_entries);
2219		free(mc, M_DEVBUF);
2220	} else
2221		(void)ether_addmulti(sa, &sc->sc_ac);
2222	return (error);
2223}
2224
2225/*
2226 * Delete any multicast address we have asked to add from parent
2227 * interface.  Called when the carp is being unconfigured.
2228 */
2229void
2230carp_ether_purgemulti(struct carp_softc *sc)
2231{
2232	struct ifnet *ifp = sc->sc_carpdev;		/* Parent. */
2233	struct carp_mc_entry *mc;
2234
2235	if (ifp == NULL)
2236		return;
2237
2238	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2239		(void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr));
2240		LIST_REMOVE(mc, mc_entries);
2241		free(mc, M_DEVBUF);
2242	}
2243}
2244
2245static int
2246sysctl_net_inet_carp_stats(SYSCTLFN_ARGS)
2247{
2248
2249	return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS));
2250}
2251
2252void
2253carp_init(void)
2254{
2255
2256	sysctl_net_inet_carp_setup(NULL);
2257}
2258
2259static void
2260sysctl_net_inet_carp_setup(struct sysctllog **clog)
2261{
2262
2263	sysctl_createv(clog, 0, NULL, NULL,
2264		       CTLFLAG_PERMANENT,
2265		       CTLTYPE_NODE, "net", NULL,
2266		       NULL, 0, NULL, 0,
2267		       CTL_NET, CTL_EOL);
2268	sysctl_createv(clog, 0, NULL, NULL,
2269		       CTLFLAG_PERMANENT,
2270		       CTLTYPE_NODE, "inet", NULL,
2271		       NULL, 0, NULL, 0,
2272		       CTL_NET, PF_INET, CTL_EOL);
2273	sysctl_createv(clog, 0, NULL, NULL,
2274		       CTLFLAG_PERMANENT,
2275		       CTLTYPE_NODE, "carp",
2276		       SYSCTL_DESCR("CARP related settings"),
2277		       NULL, 0, NULL, 0,
2278		       CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL);
2279
2280	sysctl_createv(clog, 0, NULL, NULL,
2281		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2282		       CTLTYPE_INT, "preempt",
2283		       SYSCTL_DESCR("Enable CARP Preempt"),
2284		       NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0,
2285		       CTL_NET, PF_INET, IPPROTO_CARP,
2286		       CTL_CREATE, CTL_EOL);
2287	sysctl_createv(clog, 0, NULL, NULL,
2288		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2289		       CTLTYPE_INT, "arpbalance",
2290		       SYSCTL_DESCR("Enable ARP balancing"),
2291		       NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0,
2292		       CTL_NET, PF_INET, IPPROTO_CARP,
2293		       CTL_CREATE, CTL_EOL);
2294	sysctl_createv(clog, 0, NULL, NULL,
2295		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2296		       CTLTYPE_INT, "allow",
2297		       SYSCTL_DESCR("Enable CARP"),
2298		       NULL, 0, &carp_opts[CARPCTL_ALLOW], 0,
2299		       CTL_NET, PF_INET, IPPROTO_CARP,
2300		       CTL_CREATE, CTL_EOL);
2301	sysctl_createv(clog, 0, NULL, NULL,
2302		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2303		       CTLTYPE_INT, "log",
2304		       SYSCTL_DESCR("CARP logging"),
2305		       NULL, 0, &carp_opts[CARPCTL_LOG], 0,
2306		       CTL_NET, PF_INET, IPPROTO_CARP,
2307		       CTL_CREATE, CTL_EOL);
2308	sysctl_createv(clog, 0, NULL, NULL,
2309		       CTLFLAG_PERMANENT,
2310		       CTLTYPE_STRUCT, "stats",
2311		       SYSCTL_DESCR("CARP statistics"),
2312		       sysctl_net_inet_carp_stats, 0, NULL, 0,
2313		       CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS,
2314		       CTL_EOL);
2315}
2316