mld6.c revision 195727
1/*-
2 * Copyright (c) 2009 Bruce Simpson.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. The name of the author may not be used to endorse or promote
13 *    products derived from this software without specific prior written
14 *    permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	$KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $
29 */
30
31/*-
32 * Copyright (c) 1988 Stephen Deering.
33 * Copyright (c) 1992, 1993
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * Stephen Deering of Stanford University.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 *    notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 *    notice, this list of conditions and the following disclaimer in the
46 *    documentation and/or other materials provided with the distribution.
47 * 4. Neither the name of the University nor the names of its contributors
48 *    may be used to endorse or promote products derived from this software
49 *    without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
64 */
65
66#include <sys/cdefs.h>
67__FBSDID("$FreeBSD: head/sys/netinet6/mld6.c 195727 2009-07-16 21:13:04Z rwatson $");
68
69#include "opt_inet.h"
70#include "opt_inet6.h"
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/mbuf.h>
75#include <sys/socket.h>
76#include <sys/protosw.h>
77#include <sys/sysctl.h>
78#include <sys/kernel.h>
79#include <sys/callout.h>
80#include <sys/malloc.h>
81#include <sys/module.h>
82#include <sys/vimage.h>
83
84#include <net/if.h>
85#include <net/route.h>
86#include <net/vnet.h>
87
88#include <netinet/in.h>
89#include <netinet/in_var.h>
90#include <netinet6/in6_var.h>
91#include <netinet/ip6.h>
92#include <netinet6/ip6_var.h>
93#include <netinet6/scope6_var.h>
94#include <netinet/icmp6.h>
95#include <netinet6/mld6.h>
96#include <netinet6/mld6_var.h>
97
98#include <security/mac/mac_framework.h>
99
100#ifndef KTR_MLD
101#define KTR_MLD KTR_INET6
102#endif
103
104static struct mld_ifinfo *
105		mli_alloc_locked(struct ifnet *);
106static void	mli_delete_locked(const struct ifnet *);
107static void	mld_dispatch_packet(struct mbuf *);
108static void	mld_dispatch_queue(struct ifqueue *, int);
109static void	mld_final_leave(struct in6_multi *, struct mld_ifinfo *);
110static void	mld_fasttimo_vnet(void);
111static int	mld_handle_state_change(struct in6_multi *,
112		    struct mld_ifinfo *);
113static int	mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
114		    const int);
115#ifdef KTR
116static char *	mld_rec_type_to_str(const int);
117#endif
118static void	mld_set_version(struct mld_ifinfo *, const int);
119static void	mld_slowtimo_vnet(void);
120static void	mld_sysinit(void);
121static void	mld_sysuninit(void);
122static int	mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
123		    /*const*/ struct mld_hdr *);
124static int	mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
125		    /*const*/ struct mld_hdr *);
126static void	mld_v1_process_group_timer(struct in6_multi *, const int);
127static void	mld_v1_process_querier_timers(struct mld_ifinfo *);
128static int	mld_v1_transmit_report(struct in6_multi *, const int);
129static void	mld_v1_update_group(struct in6_multi *, const int);
130static void	mld_v2_cancel_link_timers(struct mld_ifinfo *);
131static void	mld_v2_dispatch_general_query(struct mld_ifinfo *);
132static struct mbuf *
133		mld_v2_encap_report(struct ifnet *, struct mbuf *);
134static int	mld_v2_enqueue_filter_change(struct ifqueue *,
135		    struct in6_multi *);
136static int	mld_v2_enqueue_group_record(struct ifqueue *,
137		    struct in6_multi *, const int, const int, const int);
138static int	mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
139		    struct mbuf *, const int, const int);
140static int	mld_v2_merge_state_changes(struct in6_multi *,
141		    struct ifqueue *);
142static void	mld_v2_process_group_timers(struct mld_ifinfo *,
143		    struct ifqueue *, struct ifqueue *,
144		    struct in6_multi *, const int);
145static int	mld_v2_process_group_query(struct in6_multi *,
146		    struct mld_ifinfo *mli, int, struct mbuf *, const int);
147static int	sysctl_mld_gsr(SYSCTL_HANDLER_ARGS);
148static int	sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS);
149
150static vnet_attach_fn	vnet_mld_iattach;
151static vnet_detach_fn	vnet_mld_idetach;
152
153/*
154 * Normative references: RFC 2710, RFC 3590, RFC 3810.
155 *
156 * Locking:
157 *  * The MLD subsystem lock ends up being system-wide for the moment,
158 *    but could be per-VIMAGE later on.
159 *  * The permitted lock order is: IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK.
160 *    Any may be taken independently; if any are held at the same
161 *    time, the above lock order must be followed.
162 *  * IN6_MULTI_LOCK covers in_multi.
163 *  * MLD_LOCK covers per-link state and any global variables in this file.
164 *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
165 *    per-link state iterators.
166 *
167 *  XXX LOR PREVENTION
168 *  A special case for IPv6 is the in6_setscope() routine. ip6_output()
169 *  will not accept an ifp; it wants an embedded scope ID, unlike
170 *  ip_output(), which happily takes the ifp given to it. The embedded
171 *  scope ID is only used by MLD to select the outgoing interface.
172 *
173 *  During interface attach and detach, MLD will take MLD_LOCK *after*
174 *  the IF_AFDATA_LOCK.
175 *  As in6_setscope() takes IF_AFDATA_LOCK then SCOPE_LOCK, we can't call
176 *  it with MLD_LOCK held without triggering an LOR. A netisr with indirect
177 *  dispatch could work around this, but we'd rather not do that, as it
178 *  can introduce other races.
179 *
180 *  As such, we exploit the fact that the scope ID is just the interface
181 *  index, and embed it in the IPv6 destination address accordingly.
182 *  This is potentially NOT VALID for MLDv1 reports, as they
183 *  are always sent to the multicast group itself; as MLDv2
184 *  reports are always sent to ff02::16, this is not an issue
185 *  when MLDv2 is in use.
186 *
187 *  This does not however eliminate the LOR when ip6_output() itself
188 *  calls in6_setscope() internally whilst MLD_LOCK is held. This will
189 *  trigger a LOR warning in WITNESS when the ifnet is detached.
190 *
191 *  The right answer is probably to make IF_AFDATA_LOCK an rwlock, given
192 *  how it's used across the network stack. Here we're simply exploiting
193 *  the fact that MLD runs at a similar layer in the stack to scope6.c.
194 *
195 * VIMAGE:
196 *  * Each in6_multi corresponds to an ifp, and each ifp corresponds
197 *    to a vnet in ifp->if_vnet.
198 */
199static struct mtx		 mld_mtx;
200MALLOC_DEFINE(M_MLD, "mld", "mld state");
201
202#define	MLD_EMBEDSCOPE(pin6, zoneid) \
203	(pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF)
204
205/*
206 * VIMAGE-wide globals.
207 */
208static VNET_DEFINE(struct timeval, mld_gsrdelay) = {10, 0};
209static VNET_DEFINE(LIST_HEAD(, mld_ifinfo), mli_head);
210static VNET_DEFINE(int, interface_timers_running6);
211static VNET_DEFINE(int, state_change_timers_running6);
212static VNET_DEFINE(int, current_state_timers_running6);
213
214#define	V_mld_gsrdelay			VNET(mld_gsrdelay)
215#define	V_mli_head			VNET(mli_head)
216#define	V_interface_timers_running6	VNET(interface_timers_running6)
217#define	V_state_change_timers_running6	VNET(state_change_timers_running6)
218#define	V_current_state_timers_running6	VNET(current_state_timers_running6)
219
220SYSCTL_DECL(_net_inet6);	/* Note: Not in any common header. */
221
222SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW, 0,
223    "IPv6 Multicast Listener Discovery");
224
225/*
226 * Virtualized sysctls.
227 */
228SYSCTL_VNET_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
229    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
230    &VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I",
231    "Rate limit for MLDv2 Group-and-Source queries in seconds");
232
233/*
234 * Non-virtualized sysctls.
235 */
236SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE,
237    sysctl_mld_ifinfo, "Per-interface MLDv2 state");
238
239static int	mld_v1enable = 1;
240SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW,
241    &mld_v1enable, 0, "Enable fallback to MLDv1");
242TUNABLE_INT("net.inet6.mld.v1enable", &mld_v1enable);
243
244/*
245 * Packed Router Alert option structure declaration.
246 */
247struct mld_raopt {
248	struct ip6_hbh		hbh;
249	struct ip6_opt		pad;
250	struct ip6_opt_router	ra;
251} __packed;
252
253/*
254 * Router Alert hop-by-hop option header.
255 */
256static struct mld_raopt mld_ra = {
257	.hbh = { 0, 0 },
258	.pad = { .ip6o_type = IP6OPT_PADN, 0 },
259	.ra = {
260	    .ip6or_type = IP6OPT_ROUTER_ALERT,
261	    .ip6or_len = IP6OPT_RTALERT_LEN - 2,
262	    .ip6or_value[0] = ((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
263	    .ip6or_value[1] = (IP6OPT_RTALERT_MLD & 0xFF)
264	}
265};
266static struct ip6_pktopts mld_po;
267
268static __inline void
269mld_save_context(struct mbuf *m, struct ifnet *ifp)
270{
271
272#ifdef VIMAGE
273	m->m_pkthdr.header = ifp->if_vnet;
274#endif /* VIMAGE */
275	m->m_pkthdr.flowid = ifp->if_index;
276}
277
278static __inline void
279mld_scrub_context(struct mbuf *m)
280{
281
282	m->m_pkthdr.header = NULL;
283	m->m_pkthdr.flowid = 0;
284}
285
286/*
287 * Restore context from a queued output chain.
288 * Return saved ifindex.
289 *
290 * VIMAGE: The assertion is there to make sure that we
291 * actually called CURVNET_SET() with what's in the mbuf chain.
292 */
293static __inline uint32_t
294mld_restore_context(struct mbuf *m)
295{
296
297#if defined(VIMAGE) && defined(INVARIANTS)
298	KASSERT(curvnet == m->m_pkthdr.header,
299	    ("%s: called when curvnet was not restored", __func__));
300#endif
301	return (m->m_pkthdr.flowid);
302}
303
304/*
305 * Retrieve or set threshold between group-source queries in seconds.
306 *
307 * VIMAGE: Assume curvnet set by caller.
308 * SMPng: NOTE: Serialized by MLD lock.
309 */
310static int
311sysctl_mld_gsr(SYSCTL_HANDLER_ARGS)
312{
313	int error;
314	int i;
315
316	error = sysctl_wire_old_buffer(req, sizeof(int));
317	if (error)
318		return (error);
319
320	MLD_LOCK();
321
322	i = V_mld_gsrdelay.tv_sec;
323
324	error = sysctl_handle_int(oidp, &i, 0, req);
325	if (error || !req->newptr)
326		goto out_locked;
327
328	if (i < -1 || i >= 60) {
329		error = EINVAL;
330		goto out_locked;
331	}
332
333	CTR2(KTR_MLD, "change mld_gsrdelay from %d to %d",
334	     V_mld_gsrdelay.tv_sec, i);
335	V_mld_gsrdelay.tv_sec = i;
336
337out_locked:
338	MLD_UNLOCK();
339	return (error);
340}
341
342/*
343 * Expose struct mld_ifinfo to userland, keyed by ifindex.
344 * For use by ifmcstat(8).
345 *
346 * SMPng: NOTE: Does an unlocked ifindex space read.
347 * VIMAGE: Assume curvnet set by caller. The node handler itself
348 * is not directly virtualized.
349 */
350static int
351sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS)
352{
353	int			*name;
354	int			 error;
355	u_int			 namelen;
356	struct ifnet		*ifp;
357	struct mld_ifinfo	*mli;
358
359	name = (int *)arg1;
360	namelen = arg2;
361
362	if (req->newptr != NULL)
363		return (EPERM);
364
365	if (namelen != 1)
366		return (EINVAL);
367
368	error = sysctl_wire_old_buffer(req, sizeof(struct mld_ifinfo));
369	if (error)
370		return (error);
371
372	IN6_MULTI_LOCK();
373	MLD_LOCK();
374
375	if (name[0] <= 0 || name[0] > V_if_index) {
376		error = ENOENT;
377		goto out_locked;
378	}
379
380	error = ENOENT;
381
382	ifp = ifnet_byindex(name[0]);
383	if (ifp == NULL)
384		goto out_locked;
385
386	LIST_FOREACH(mli, &V_mli_head, mli_link) {
387		if (ifp == mli->mli_ifp) {
388			error = SYSCTL_OUT(req, mli,
389			    sizeof(struct mld_ifinfo));
390			break;
391		}
392	}
393
394out_locked:
395	MLD_UNLOCK();
396	IN6_MULTI_UNLOCK();
397	return (error);
398}
399
400/*
401 * Dispatch an entire queue of pending packet chains.
402 * VIMAGE: Assumes the vnet pointer has been set.
403 */
404static void
405mld_dispatch_queue(struct ifqueue *ifq, int limit)
406{
407	struct mbuf *m;
408
409	for (;;) {
410		_IF_DEQUEUE(ifq, m);
411		if (m == NULL)
412			break;
413		CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, ifq, m);
414		mld_dispatch_packet(m);
415		if (--limit == 0)
416			break;
417	}
418}
419
420/*
421 * Filter outgoing MLD report state by group.
422 *
423 * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
424 * and node-local addresses. However, kernel and socket consumers
425 * always embed the KAME scope ID in the address provided, so strip it
426 * when performing comparison.
427 * Note: This is not the same as the *multicast* scope.
428 *
429 * Return zero if the given group is one for which MLD reports
430 * should be suppressed, or non-zero if reports should be issued.
431 */
432static __inline int
433mld_is_addr_reported(const struct in6_addr *addr)
434{
435
436	KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__));
437
438	if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL)
439		return (0);
440
441	if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
442		struct in6_addr tmp = *addr;
443		in6_clearscope(&tmp);
444		if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes))
445			return (0);
446	}
447
448	return (1);
449}
450
451/*
452 * Attach MLD when PF_INET6 is attached to an interface.
453 *
454 * SMPng: Normally called with IF_AFDATA_LOCK held.
455 */
456struct mld_ifinfo *
457mld_domifattach(struct ifnet *ifp)
458{
459	struct mld_ifinfo *mli;
460
461	CTR3(KTR_MLD, "%s: called for ifp %p(%s)",
462	    __func__, ifp, ifp->if_xname);
463
464	MLD_LOCK();
465
466	mli = mli_alloc_locked(ifp);
467	if (!(ifp->if_flags & IFF_MULTICAST))
468		mli->mli_flags |= MLIF_SILENT;
469
470	MLD_UNLOCK();
471
472	return (mli);
473}
474
475/*
476 * VIMAGE: assume curvnet set by caller.
477 */
478static struct mld_ifinfo *
479mli_alloc_locked(/*const*/ struct ifnet *ifp)
480{
481	struct mld_ifinfo *mli;
482
483	MLD_LOCK_ASSERT();
484
485	mli = malloc(sizeof(struct mld_ifinfo), M_MLD, M_NOWAIT|M_ZERO);
486	if (mli == NULL)
487		goto out;
488
489	mli->mli_ifp = ifp;
490	mli->mli_version = MLD_VERSION_2;
491	mli->mli_flags = 0;
492	mli->mli_rv = MLD_RV_INIT;
493	mli->mli_qi = MLD_QI_INIT;
494	mli->mli_qri = MLD_QRI_INIT;
495	mli->mli_uri = MLD_URI_INIT;
496
497	SLIST_INIT(&mli->mli_relinmhead);
498
499	/*
500	 * Responses to general queries are subject to bounds.
501	 */
502	IFQ_SET_MAXLEN(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS);
503
504	LIST_INSERT_HEAD(&V_mli_head, mli, mli_link);
505
506	CTR2(KTR_MLD, "allocate mld_ifinfo for ifp %p(%s)",
507	     ifp, ifp->if_xname);
508
509out:
510	return (mli);
511}
512
513/*
514 * Hook for ifdetach.
515 *
516 * NOTE: Some finalization tasks need to run before the protocol domain
517 * is detached, but also before the link layer does its cleanup.
518 * Run before link-layer cleanup; cleanup groups, but do not free MLD state.
519 *
520 * SMPng: Caller must hold IN6_MULTI_LOCK().
521 * Must take IF_ADDR_LOCK() to cover if_multiaddrs iterator.
522 * XXX This routine is also bitten by unlocked ifma_protospec access.
523 */
524void
525mld_ifdetach(struct ifnet *ifp)
526{
527	struct mld_ifinfo	*mli;
528	struct ifmultiaddr	*ifma;
529	struct in6_multi	*inm, *tinm;
530
531	CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp,
532	    ifp->if_xname);
533
534	IN6_MULTI_LOCK_ASSERT();
535	MLD_LOCK();
536
537	mli = MLD_IFINFO(ifp);
538	if (mli->mli_version == MLD_VERSION_2) {
539		IF_ADDR_LOCK(ifp);
540		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
541			if (ifma->ifma_addr->sa_family != AF_INET6 ||
542			    ifma->ifma_protospec == NULL)
543				continue;
544			inm = (struct in6_multi *)ifma->ifma_protospec;
545			if (inm->in6m_state == MLD_LEAVING_MEMBER) {
546				SLIST_INSERT_HEAD(&mli->mli_relinmhead,
547				    inm, in6m_nrele);
548			}
549			in6m_clear_recorded(inm);
550		}
551		IF_ADDR_UNLOCK(ifp);
552		SLIST_FOREACH_SAFE(inm, &mli->mli_relinmhead, in6m_nrele,
553		    tinm) {
554			SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
555			in6m_release_locked(inm);
556		}
557	}
558
559	MLD_UNLOCK();
560}
561
562/*
563 * Hook for domifdetach.
564 * Runs after link-layer cleanup; free MLD state.
565 *
566 * SMPng: Normally called with IF_AFDATA_LOCK held.
567 */
568void
569mld_domifdetach(struct ifnet *ifp)
570{
571
572	CTR3(KTR_MLD, "%s: called for ifp %p(%s)",
573	    __func__, ifp, ifp->if_xname);
574
575	MLD_LOCK();
576	mli_delete_locked(ifp);
577	MLD_UNLOCK();
578}
579
580static void
581mli_delete_locked(const struct ifnet *ifp)
582{
583	struct mld_ifinfo *mli, *tmli;
584
585	CTR3(KTR_MLD, "%s: freeing mld_ifinfo for ifp %p(%s)",
586	    __func__, ifp, ifp->if_xname);
587
588	MLD_LOCK_ASSERT();
589
590	LIST_FOREACH_SAFE(mli, &V_mli_head, mli_link, tmli) {
591		if (mli->mli_ifp == ifp) {
592			/*
593			 * Free deferred General Query responses.
594			 */
595			_IF_DRAIN(&mli->mli_gq);
596
597			LIST_REMOVE(mli, mli_link);
598
599			KASSERT(SLIST_EMPTY(&mli->mli_relinmhead),
600			    ("%s: there are dangling in_multi references",
601			    __func__));
602
603			free(mli, M_MLD);
604			return;
605		}
606	}
607#ifdef INVARIANTS
608	panic("%s: mld_ifinfo not found for ifp %p\n", __func__,  ifp);
609#endif
610}
611
612/*
613 * Process a received MLDv1 general or address-specific query.
614 * Assumes that the query header has been pulled up to sizeof(mld_hdr).
615 *
616 * NOTE: Can't be fully const correct as we temporarily embed scope ID in
617 * mld_addr. This is OK as we own the mbuf chain.
618 */
619static int
620mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
621    /*const*/ struct mld_hdr *mld)
622{
623	struct ifmultiaddr	*ifma;
624	struct mld_ifinfo	*mli;
625	struct in6_multi	*inm;
626	int			 is_general_query;
627	uint16_t		 timer;
628#ifdef KTR
629	char			 ip6tbuf[INET6_ADDRSTRLEN];
630#endif
631
632	is_general_query = 0;
633
634	if (!mld_v1enable) {
635		CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)",
636		    ip6_sprintf(ip6tbuf, &mld->mld_addr),
637		    ifp, ifp->if_xname);
638		return (0);
639	}
640
641	/*
642	 * RFC3810 Section 6.2: MLD queries must originate from
643	 * a router's link-local address.
644	 */
645	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
646		CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
647		    ip6_sprintf(ip6tbuf, &ip6->ip6_src),
648		    ifp, ifp->if_xname);
649		return (0);
650	}
651
652	/*
653	 * Do address field validation upfront before we accept
654	 * the query.
655	 */
656	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
657		/*
658		 * MLDv1 General Query.
659		 * If this was not sent to the all-nodes group, ignore it.
660		 */
661		struct in6_addr		 dst;
662
663		dst = ip6->ip6_dst;
664		in6_clearscope(&dst);
665		if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes))
666			return (EINVAL);
667		is_general_query = 1;
668	} else {
669		/*
670		 * Embed scope ID of receiving interface in MLD query for
671		 * lookup whilst we don't hold other locks.
672		 */
673		in6_setscope(&mld->mld_addr, ifp, NULL);
674	}
675
676	IN6_MULTI_LOCK();
677	MLD_LOCK();
678	IF_ADDR_LOCK(ifp);
679
680	/*
681	 * Switch to MLDv1 host compatibility mode.
682	 */
683	mli = MLD_IFINFO(ifp);
684	KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
685	mld_set_version(mli, MLD_VERSION_1);
686
687	timer = (ntohs(mld->mld_maxdelay) * PR_FASTHZ) / MLD_TIMER_SCALE;
688	if (timer == 0)
689		timer = 1;
690
691	if (is_general_query) {
692		/*
693		 * For each reporting group joined on this
694		 * interface, kick the report timer.
695		 */
696		CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)",
697		    ifp, ifp->if_xname);
698		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
699			if (ifma->ifma_addr->sa_family != AF_INET6 ||
700			    ifma->ifma_protospec == NULL)
701				continue;
702			inm = (struct in6_multi *)ifma->ifma_protospec;
703			mld_v1_update_group(inm, timer);
704		}
705	} else {
706		/*
707		 * MLDv1 Group-Specific Query.
708		 * If this is a group-specific MLDv1 query, we need only
709		 * look up the single group to process it.
710		 */
711		inm = in6m_lookup_locked(ifp, &mld->mld_addr);
712		if (inm != NULL) {
713			CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)",
714			    ip6_sprintf(ip6tbuf, &mld->mld_addr),
715			    ifp, ifp->if_xname);
716			mld_v1_update_group(inm, timer);
717		}
718		/* XXX Clear embedded scope ID as userland won't expect it. */
719		in6_clearscope(&mld->mld_addr);
720	}
721
722	IF_ADDR_UNLOCK(ifp);
723	MLD_UNLOCK();
724	IN6_MULTI_UNLOCK();
725
726	return (0);
727}
728
729/*
730 * Update the report timer on a group in response to an MLDv1 query.
731 *
732 * If we are becoming the reporting member for this group, start the timer.
733 * If we already are the reporting member for this group, and timer is
734 * below the threshold, reset it.
735 *
736 * We may be updating the group for the first time since we switched
737 * to MLDv2. If we are, then we must clear any recorded source lists,
738 * and transition to REPORTING state; the group timer is overloaded
739 * for group and group-source query responses.
740 *
741 * Unlike MLDv2, the delay per group should be jittered
742 * to avoid bursts of MLDv1 reports.
743 */
744static void
745mld_v1_update_group(struct in6_multi *inm, const int timer)
746{
747#ifdef KTR
748	char			 ip6tbuf[INET6_ADDRSTRLEN];
749#endif
750
751	CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__,
752	    ip6_sprintf(ip6tbuf, &inm->in6m_addr),
753	    inm->in6m_ifp->if_xname, timer);
754
755	IN6_MULTI_LOCK_ASSERT();
756
757	switch (inm->in6m_state) {
758	case MLD_NOT_MEMBER:
759	case MLD_SILENT_MEMBER:
760		break;
761	case MLD_REPORTING_MEMBER:
762		if (inm->in6m_timer != 0 &&
763		    inm->in6m_timer <= timer) {
764			CTR1(KTR_MLD, "%s: REPORTING and timer running, "
765			    "skipping.", __func__);
766			break;
767		}
768		/* FALLTHROUGH */
769	case MLD_SG_QUERY_PENDING_MEMBER:
770	case MLD_G_QUERY_PENDING_MEMBER:
771	case MLD_IDLE_MEMBER:
772	case MLD_LAZY_MEMBER:
773	case MLD_AWAKENING_MEMBER:
774		CTR1(KTR_MLD, "%s: ->REPORTING", __func__);
775		inm->in6m_state = MLD_REPORTING_MEMBER;
776		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
777		V_current_state_timers_running6 = 1;
778		break;
779	case MLD_SLEEPING_MEMBER:
780		CTR1(KTR_MLD, "%s: ->AWAKENING", __func__);
781		inm->in6m_state = MLD_AWAKENING_MEMBER;
782		break;
783	case MLD_LEAVING_MEMBER:
784		break;
785	}
786}
787
788/*
789 * Process a received MLDv2 general, group-specific or
790 * group-and-source-specific query.
791 *
792 * Assumes that the query header has been pulled up to sizeof(mldv2_query).
793 *
794 * Return 0 if successful, otherwise an appropriate error code is returned.
795 */
796static int
797mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
798    struct mbuf *m, const int off, const int icmp6len)
799{
800	struct mld_ifinfo	*mli;
801	struct mldv2_query	*mld;
802	struct in6_multi	*inm;
803	uint32_t		 maxdelay, nsrc, qqi;
804	int			 is_general_query;
805	uint16_t		 timer;
806	uint8_t			 qrv;
807#ifdef KTR
808	char			 ip6tbuf[INET6_ADDRSTRLEN];
809#endif
810
811	is_general_query = 0;
812
813	/*
814	 * RFC3810 Section 6.2: MLD queries must originate from
815	 * a router's link-local address.
816	 */
817	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
818		CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
819		    ip6_sprintf(ip6tbuf, &ip6->ip6_src),
820		    ifp, ifp->if_xname);
821		return (0);
822	}
823
824	CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, ifp->if_xname);
825
826	mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
827
828	maxdelay = ntohs(mld->mld_maxdelay);	/* in 1/10ths of a second */
829	if (maxdelay >= 32678) {
830		maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
831			   (MLD_MRC_EXP(maxdelay) + 3);
832	}
833	timer = (maxdelay * PR_FASTHZ) / MLD_TIMER_SCALE;
834	if (timer == 0)
835		timer = 1;
836
837	qrv = MLD_QRV(mld->mld_misc);
838	if (qrv < 2) {
839		CTR3(KTR_MLD, "%s: clamping qrv %d to %d", __func__,
840		    qrv, MLD_RV_INIT);
841		qrv = MLD_RV_INIT;
842	}
843
844	qqi = mld->mld_qqi;
845	if (qqi >= 128) {
846		qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
847		     (MLD_QQIC_EXP(mld->mld_qqi) + 3);
848	}
849
850	nsrc = ntohs(mld->mld_numsrc);
851	if (nsrc > MLD_MAX_GS_SOURCES)
852		return (EMSGSIZE);
853	if (icmp6len < sizeof(struct mldv2_query) +
854	    (nsrc * sizeof(struct in6_addr)))
855		return (EMSGSIZE);
856
857	/*
858	 * Do further input validation upfront to avoid resetting timers
859	 * should we need to discard this query.
860	 */
861	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
862		/*
863		 * General Queries SHOULD be directed to ff02::1.
864		 * A general query with a source list has undefined
865		 * behaviour; discard it.
866		 */
867		struct in6_addr		 dst;
868
869		dst = ip6->ip6_dst;
870		in6_clearscope(&dst);
871		if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) ||
872		    nsrc > 0)
873			return (EINVAL);
874		is_general_query = 1;
875	} else {
876		/*
877		 * Embed scope ID of receiving interface in MLD query for
878		 * lookup whilst we don't hold other locks (due to KAME
879		 * locking lameness). We own this mbuf chain just now.
880		 */
881		in6_setscope(&mld->mld_addr, ifp, NULL);
882	}
883
884	IN6_MULTI_LOCK();
885	MLD_LOCK();
886	IF_ADDR_LOCK(ifp);
887
888	mli = MLD_IFINFO(ifp);
889	KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
890
891	/*
892	 * Discard the v2 query if we're in Compatibility Mode.
893	 * The RFC is pretty clear that hosts need to stay in MLDv1 mode
894	 * until the Old Version Querier Present timer expires.
895	 */
896	if (mli->mli_version != MLD_VERSION_2)
897		goto out_locked;
898
899	mld_set_version(mli, MLD_VERSION_2);
900	mli->mli_rv = qrv;
901	mli->mli_qi = qqi;
902	mli->mli_qri = maxdelay;
903
904	CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi,
905	    maxdelay);
906
907	if (is_general_query) {
908		/*
909		 * MLDv2 General Query.
910		 *
911		 * Schedule a current-state report on this ifp for
912		 * all groups, possibly containing source lists.
913		 *
914		 * If there is a pending General Query response
915		 * scheduled earlier than the selected delay, do
916		 * not schedule any other reports.
917		 * Otherwise, reset the interface timer.
918		 */
919		CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)",
920		    ifp, ifp->if_xname);
921		if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
922			mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
923			V_interface_timers_running6 = 1;
924		}
925	} else {
926		/*
927		 * MLDv2 Group-specific or Group-and-source-specific Query.
928		 *
929		 * Group-source-specific queries are throttled on
930		 * a per-group basis to defeat denial-of-service attempts.
931		 * Queries for groups we are not a member of on this
932		 * link are simply ignored.
933		 */
934		inm = in6m_lookup_locked(ifp, &mld->mld_addr);
935		if (inm == NULL)
936			goto out_locked;
937		if (nsrc > 0) {
938			if (!ratecheck(&inm->in6m_lastgsrtv,
939			    &V_mld_gsrdelay)) {
940				CTR1(KTR_MLD, "%s: GS query throttled.",
941				    __func__);
942				goto out_locked;
943			}
944		}
945		CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)",
946		     ifp, ifp->if_xname);
947		/*
948		 * If there is a pending General Query response
949		 * scheduled sooner than the selected delay, no
950		 * further report need be scheduled.
951		 * Otherwise, prepare to respond to the
952		 * group-specific or group-and-source query.
953		 */
954		if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer)
955			mld_v2_process_group_query(inm, mli, timer, m, off);
956
957		/* XXX Clear embedded scope ID as userland won't expect it. */
958		in6_clearscope(&mld->mld_addr);
959	}
960
961out_locked:
962	IF_ADDR_UNLOCK(ifp);
963	MLD_UNLOCK();
964	IN6_MULTI_UNLOCK();
965
966	return (0);
967}
968
969/*
970 * Process a recieved MLDv2 group-specific or group-and-source-specific
971 * query.
972 * Return <0 if any error occured. Currently this is ignored.
973 */
974static int
975mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifinfo *mli,
976    int timer, struct mbuf *m0, const int off)
977{
978	struct mldv2_query	*mld;
979	int			 retval;
980	uint16_t		 nsrc;
981
982	IN6_MULTI_LOCK_ASSERT();
983	MLD_LOCK_ASSERT();
984
985	retval = 0;
986	mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
987
988	switch (inm->in6m_state) {
989	case MLD_NOT_MEMBER:
990	case MLD_SILENT_MEMBER:
991	case MLD_SLEEPING_MEMBER:
992	case MLD_LAZY_MEMBER:
993	case MLD_AWAKENING_MEMBER:
994	case MLD_IDLE_MEMBER:
995	case MLD_LEAVING_MEMBER:
996		return (retval);
997		break;
998	case MLD_REPORTING_MEMBER:
999	case MLD_G_QUERY_PENDING_MEMBER:
1000	case MLD_SG_QUERY_PENDING_MEMBER:
1001		break;
1002	}
1003
1004	nsrc = ntohs(mld->mld_numsrc);
1005
1006	/*
1007	 * Deal with group-specific queries upfront.
1008	 * If any group query is already pending, purge any recorded
1009	 * source-list state if it exists, and schedule a query response
1010	 * for this group-specific query.
1011	 */
1012	if (nsrc == 0) {
1013		if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
1014		    inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
1015			in6m_clear_recorded(inm);
1016			timer = min(inm->in6m_timer, timer);
1017		}
1018		inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
1019		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1020		V_current_state_timers_running6 = 1;
1021		return (retval);
1022	}
1023
1024	/*
1025	 * Deal with the case where a group-and-source-specific query has
1026	 * been received but a group-specific query is already pending.
1027	 */
1028	if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
1029		timer = min(inm->in6m_timer, timer);
1030		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1031		V_current_state_timers_running6 = 1;
1032		return (retval);
1033	}
1034
1035	/*
1036	 * Finally, deal with the case where a group-and-source-specific
1037	 * query has been received, where a response to a previous g-s-r
1038	 * query exists, or none exists.
1039	 * In this case, we need to parse the source-list which the Querier
1040	 * has provided us with and check if we have any source list filter
1041	 * entries at T1 for these sources. If we do not, there is no need
1042	 * schedule a report and the query may be dropped.
1043	 * If we do, we must record them and schedule a current-state
1044	 * report for those sources.
1045	 */
1046	if (inm->in6m_nsrc > 0) {
1047		struct mbuf		*m;
1048		uint8_t			*sp;
1049		int			 i, nrecorded;
1050		int			 soff;
1051
1052		m = m0;
1053		soff = off + sizeof(struct mldv2_query);
1054		nrecorded = 0;
1055		for (i = 0; i < nsrc; i++) {
1056			sp = mtod(m, uint8_t *) + soff;
1057			retval = in6m_record_source(inm,
1058			    (const struct in6_addr *)sp);
1059			if (retval < 0)
1060				break;
1061			nrecorded += retval;
1062			soff += sizeof(struct in6_addr);
1063			if (soff >= m->m_len) {
1064				soff = soff - m->m_len;
1065				m = m->m_next;
1066				if (m == NULL)
1067					break;
1068			}
1069		}
1070		if (nrecorded > 0) {
1071			CTR1(KTR_MLD,
1072			    "%s: schedule response to SG query", __func__);
1073			inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
1074			inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1075			V_current_state_timers_running6 = 1;
1076		}
1077	}
1078
1079	return (retval);
1080}
1081
1082/*
1083 * Process a received MLDv1 host membership report.
1084 * Assumes mld points to mld_hdr in pulled up mbuf chain.
1085 *
1086 * NOTE: Can't be fully const correct as we temporarily embed scope ID in
1087 * mld_addr. This is OK as we own the mbuf chain.
1088 */
1089static int
1090mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
1091    /*const*/ struct mld_hdr *mld)
1092{
1093	struct in6_addr		 src, dst;
1094	struct in6_ifaddr	*ia;
1095	struct in6_multi	*inm;
1096#ifdef KTR
1097	char			 ip6tbuf[INET6_ADDRSTRLEN];
1098#endif
1099
1100	if (!mld_v1enable) {
1101		CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)",
1102		    ip6_sprintf(ip6tbuf, &mld->mld_addr),
1103		    ifp, ifp->if_xname);
1104		return (0);
1105	}
1106
1107	if (ifp->if_flags & IFF_LOOPBACK)
1108		return (0);
1109
1110	/*
1111	 * MLDv1 reports must originate from a host's link-local address,
1112	 * or the unspecified address (when booting).
1113	 */
1114	src = ip6->ip6_src;
1115	in6_clearscope(&src);
1116	if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
1117		CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
1118		    ip6_sprintf(ip6tbuf, &ip6->ip6_src),
1119		    ifp, ifp->if_xname);
1120		return (EINVAL);
1121	}
1122
1123	/*
1124	 * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
1125	 * group, and must be directed to the group itself.
1126	 */
1127	dst = ip6->ip6_dst;
1128	in6_clearscope(&dst);
1129	if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
1130	    !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
1131		CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)",
1132		    ip6_sprintf(ip6tbuf, &ip6->ip6_dst),
1133		    ifp, ifp->if_xname);
1134		return (EINVAL);
1135	}
1136
1137	/*
1138	 * Make sure we don't hear our own membership report, as fast
1139	 * leave requires knowing that we are the only member of a
1140	 * group. Assume we used the link-local address if available,
1141	 * otherwise look for ::.
1142	 *
1143	 * XXX Note that scope ID comparison is needed for the address
1144	 * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
1145	 * performed for the on-wire address.
1146	 */
1147	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1148	if ((ia && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia))) ||
1149	    (ia == NULL && IN6_IS_ADDR_UNSPECIFIED(&src))) {
1150		if (ia != NULL)
1151			ifa_free(&ia->ia_ifa);
1152		return (0);
1153	}
1154	if (ia != NULL)
1155		ifa_free(&ia->ia_ifa);
1156
1157	CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)",
1158	    ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, ifp->if_xname);
1159
1160	/*
1161	 * Embed scope ID of receiving interface in MLD query for lookup
1162	 * whilst we don't hold other locks (due to KAME locking lameness).
1163	 */
1164	if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr))
1165		in6_setscope(&mld->mld_addr, ifp, NULL);
1166
1167	IN6_MULTI_LOCK();
1168	MLD_LOCK();
1169	IF_ADDR_LOCK(ifp);
1170
1171	/*
1172	 * MLDv1 report suppression.
1173	 * If we are a member of this group, and our membership should be
1174	 * reported, and our group timer is pending or about to be reset,
1175	 * stop our group timer by transitioning to the 'lazy' state.
1176	 */
1177	inm = in6m_lookup_locked(ifp, &mld->mld_addr);
1178	if (inm != NULL) {
1179		struct mld_ifinfo *mli;
1180
1181		mli = inm->in6m_mli;
1182		KASSERT(mli != NULL,
1183		    ("%s: no mli for ifp %p", __func__, ifp));
1184
1185		/*
1186		 * If we are in MLDv2 host mode, do not allow the
1187		 * other host's MLDv1 report to suppress our reports.
1188		 */
1189		if (mli->mli_version == MLD_VERSION_2)
1190			goto out_locked;
1191
1192		inm->in6m_timer = 0;
1193
1194		switch (inm->in6m_state) {
1195		case MLD_NOT_MEMBER:
1196		case MLD_SILENT_MEMBER:
1197		case MLD_SLEEPING_MEMBER:
1198			break;
1199		case MLD_REPORTING_MEMBER:
1200		case MLD_IDLE_MEMBER:
1201		case MLD_AWAKENING_MEMBER:
1202			CTR3(KTR_MLD,
1203			    "report suppressed for %s on ifp %p(%s)",
1204			    ip6_sprintf(ip6tbuf, &mld->mld_addr),
1205			    ifp, ifp->if_xname);
1206		case MLD_LAZY_MEMBER:
1207			inm->in6m_state = MLD_LAZY_MEMBER;
1208			break;
1209		case MLD_G_QUERY_PENDING_MEMBER:
1210		case MLD_SG_QUERY_PENDING_MEMBER:
1211		case MLD_LEAVING_MEMBER:
1212			break;
1213		}
1214	}
1215
1216out_locked:
1217	MLD_UNLOCK();
1218	IF_ADDR_UNLOCK(ifp);
1219	IN6_MULTI_UNLOCK();
1220
1221	/* XXX Clear embedded scope ID as userland won't expect it. */
1222	in6_clearscope(&mld->mld_addr);
1223
1224	return (0);
1225}
1226
1227/*
1228 * MLD input path.
1229 *
1230 * Assume query messages which fit in a single ICMPv6 message header
1231 * have been pulled up.
1232 * Assume that userland will want to see the message, even if it
1233 * otherwise fails kernel input validation; do not free it.
1234 * Pullup may however free the mbuf chain m if it fails.
1235 *
1236 * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
1237 */
1238int
1239mld_input(struct mbuf *m, int off, int icmp6len)
1240{
1241	struct ifnet	*ifp;
1242	struct ip6_hdr	*ip6;
1243	struct mld_hdr	*mld;
1244	int		 mldlen;
1245
1246	CTR3(KTR_MLD, "%s: called w/mbuf (%p,%d)", __func__, m, off);
1247
1248	ifp = m->m_pkthdr.rcvif;
1249
1250	ip6 = mtod(m, struct ip6_hdr *);
1251
1252	/* Pullup to appropriate size. */
1253	mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1254	if (mld->mld_type == MLD_LISTENER_QUERY &&
1255	    icmp6len >= sizeof(struct mldv2_query)) {
1256		mldlen = sizeof(struct mldv2_query);
1257	} else {
1258		mldlen = sizeof(struct mld_hdr);
1259	}
1260	IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
1261	if (mld == NULL) {
1262		ICMP6STAT_INC(icp6s_badlen);
1263		return (IPPROTO_DONE);
1264	}
1265
1266	/*
1267	 * Userland needs to see all of this traffic for implementing
1268	 * the endpoint discovery portion of multicast routing.
1269	 */
1270	switch (mld->mld_type) {
1271	case MLD_LISTENER_QUERY:
1272		icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
1273		if (icmp6len == sizeof(struct mld_hdr)) {
1274			if (mld_v1_input_query(ifp, ip6, mld) != 0)
1275				return (0);
1276		} else if (icmp6len >= sizeof(struct mldv2_query)) {
1277			if (mld_v2_input_query(ifp, ip6, m, off,
1278			    icmp6len) != 0)
1279				return (0);
1280		}
1281		break;
1282	case MLD_LISTENER_REPORT:
1283		icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1284		if (mld_v1_input_report(ifp, ip6, mld) != 0)
1285			return (0);
1286		break;
1287	case MLDV2_LISTENER_REPORT:
1288		icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1289		break;
1290	case MLD_LISTENER_DONE:
1291		icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
1292		break;
1293	default:
1294		break;
1295	}
1296
1297	return (0);
1298}
1299
1300/*
1301 * Fast timeout handler (global).
1302 * VIMAGE: Timeout handlers are expected to service all vimages.
1303 */
1304void
1305mld_fasttimo(void)
1306{
1307	VNET_ITERATOR_DECL(vnet_iter);
1308
1309	VNET_LIST_RLOCK();
1310	VNET_FOREACH(vnet_iter) {
1311		CURVNET_SET(vnet_iter);
1312		mld_fasttimo_vnet();
1313		CURVNET_RESTORE();
1314	}
1315	VNET_LIST_RUNLOCK();
1316}
1317
1318/*
1319 * Fast timeout handler (per-vnet).
1320 *
1321 * VIMAGE: Assume caller has set up our curvnet.
1322 */
1323static void
1324mld_fasttimo_vnet(void)
1325{
1326	struct ifqueue		 scq;	/* State-change packets */
1327	struct ifqueue		 qrq;	/* Query response packets */
1328	struct ifnet		*ifp;
1329	struct mld_ifinfo	*mli;
1330	struct ifmultiaddr	*ifma, *tifma;
1331	struct in6_multi	*inm;
1332	int			 uri_fasthz;
1333
1334	uri_fasthz = 0;
1335
1336	/*
1337	 * Quick check to see if any work needs to be done, in order to
1338	 * minimize the overhead of fasttimo processing.
1339	 * SMPng: XXX Unlocked reads.
1340	 */
1341	if (!V_current_state_timers_running6 &&
1342	    !V_interface_timers_running6 &&
1343	    !V_state_change_timers_running6)
1344		return;
1345
1346	IN6_MULTI_LOCK();
1347	MLD_LOCK();
1348
1349	/*
1350	 * MLDv2 General Query response timer processing.
1351	 */
1352	if (V_interface_timers_running6) {
1353		CTR1(KTR_MLD, "%s: interface timers running", __func__);
1354
1355		V_interface_timers_running6 = 0;
1356		LIST_FOREACH(mli, &V_mli_head, mli_link) {
1357			if (mli->mli_v2_timer == 0) {
1358				/* Do nothing. */
1359			} else if (--mli->mli_v2_timer == 0) {
1360				mld_v2_dispatch_general_query(mli);
1361			} else {
1362				V_interface_timers_running6 = 1;
1363			}
1364		}
1365	}
1366
1367	if (!V_current_state_timers_running6 &&
1368	    !V_state_change_timers_running6)
1369		goto out_locked;
1370
1371	V_current_state_timers_running6 = 0;
1372	V_state_change_timers_running6 = 0;
1373
1374	CTR1(KTR_MLD, "%s: state change timers running", __func__);
1375
1376	/*
1377	 * MLD host report and state-change timer processing.
1378	 * Note: Processing a v2 group timer may remove a node.
1379	 */
1380	LIST_FOREACH(mli, &V_mli_head, mli_link) {
1381		ifp = mli->mli_ifp;
1382
1383		if (mli->mli_version == MLD_VERSION_2) {
1384			uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri *
1385			    PR_FASTHZ);
1386
1387			memset(&qrq, 0, sizeof(struct ifqueue));
1388			IFQ_SET_MAXLEN(&qrq, MLD_MAX_G_GS_PACKETS);
1389
1390			memset(&scq, 0, sizeof(struct ifqueue));
1391			IFQ_SET_MAXLEN(&scq, MLD_MAX_STATE_CHANGE_PACKETS);
1392		}
1393
1394		IF_ADDR_LOCK(ifp);
1395		TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link,
1396		    tifma) {
1397			if (ifma->ifma_addr->sa_family != AF_INET6 ||
1398			    ifma->ifma_protospec == NULL)
1399				continue;
1400			inm = (struct in6_multi *)ifma->ifma_protospec;
1401			switch (mli->mli_version) {
1402			case MLD_VERSION_1:
1403				/*
1404				 * XXX Drop IF_ADDR lock temporarily to
1405				 * avoid recursion caused by a potential
1406				 * call by in6ifa_ifpforlinklocal().
1407				 * rwlock candidate?
1408				 */
1409				IF_ADDR_UNLOCK(ifp);
1410				mld_v1_process_group_timer(inm,
1411				    mli->mli_version);
1412				IF_ADDR_LOCK(ifp);
1413				break;
1414			case MLD_VERSION_2:
1415				mld_v2_process_group_timers(mli, &qrq,
1416				    &scq, inm, uri_fasthz);
1417				break;
1418			}
1419		}
1420		IF_ADDR_UNLOCK(ifp);
1421
1422		if (mli->mli_version == MLD_VERSION_2) {
1423			struct in6_multi		*tinm;
1424
1425			mld_dispatch_queue(&qrq, 0);
1426			mld_dispatch_queue(&scq, 0);
1427
1428			/*
1429			 * Free the in_multi reference(s) for
1430			 * this lifecycle.
1431			 */
1432			SLIST_FOREACH_SAFE(inm, &mli->mli_relinmhead,
1433			    in6m_nrele, tinm) {
1434				SLIST_REMOVE_HEAD(&mli->mli_relinmhead,
1435				    in6m_nrele);
1436				in6m_release_locked(inm);
1437			}
1438		}
1439	}
1440
1441out_locked:
1442	MLD_UNLOCK();
1443	IN6_MULTI_UNLOCK();
1444}
1445
1446/*
1447 * Update host report group timer.
1448 * Will update the global pending timer flags.
1449 */
1450static void
1451mld_v1_process_group_timer(struct in6_multi *inm, const int version)
1452{
1453	int report_timer_expired;
1454
1455	IN6_MULTI_LOCK_ASSERT();
1456	MLD_LOCK_ASSERT();
1457
1458	if (inm->in6m_timer == 0) {
1459		report_timer_expired = 0;
1460	} else if (--inm->in6m_timer == 0) {
1461		report_timer_expired = 1;
1462	} else {
1463		V_current_state_timers_running6 = 1;
1464		return;
1465	}
1466
1467	switch (inm->in6m_state) {
1468	case MLD_NOT_MEMBER:
1469	case MLD_SILENT_MEMBER:
1470	case MLD_IDLE_MEMBER:
1471	case MLD_LAZY_MEMBER:
1472	case MLD_SLEEPING_MEMBER:
1473	case MLD_AWAKENING_MEMBER:
1474		break;
1475	case MLD_REPORTING_MEMBER:
1476		if (report_timer_expired) {
1477			inm->in6m_state = MLD_IDLE_MEMBER;
1478			(void)mld_v1_transmit_report(inm,
1479			     MLD_LISTENER_REPORT);
1480		}
1481		break;
1482	case MLD_G_QUERY_PENDING_MEMBER:
1483	case MLD_SG_QUERY_PENDING_MEMBER:
1484	case MLD_LEAVING_MEMBER:
1485		break;
1486	}
1487}
1488
1489/*
1490 * Update a group's timers for MLDv2.
1491 * Will update the global pending timer flags.
1492 * Note: Unlocked read from mli.
1493 */
1494static void
1495mld_v2_process_group_timers(struct mld_ifinfo *mli,
1496    struct ifqueue *qrq, struct ifqueue *scq,
1497    struct in6_multi *inm, const int uri_fasthz)
1498{
1499	int query_response_timer_expired;
1500	int state_change_retransmit_timer_expired;
1501#ifdef KTR
1502	char ip6tbuf[INET6_ADDRSTRLEN];
1503#endif
1504
1505	IN6_MULTI_LOCK_ASSERT();
1506	MLD_LOCK_ASSERT();
1507
1508	query_response_timer_expired = 0;
1509	state_change_retransmit_timer_expired = 0;
1510
1511	/*
1512	 * During a transition from compatibility mode back to MLDv2,
1513	 * a group record in REPORTING state may still have its group
1514	 * timer active. This is a no-op in this function; it is easier
1515	 * to deal with it here than to complicate the slow-timeout path.
1516	 */
1517	if (inm->in6m_timer == 0) {
1518		query_response_timer_expired = 0;
1519	} else if (--inm->in6m_timer == 0) {
1520		query_response_timer_expired = 1;
1521	} else {
1522		V_current_state_timers_running6 = 1;
1523	}
1524
1525	if (inm->in6m_sctimer == 0) {
1526		state_change_retransmit_timer_expired = 0;
1527	} else if (--inm->in6m_sctimer == 0) {
1528		state_change_retransmit_timer_expired = 1;
1529	} else {
1530		V_state_change_timers_running6 = 1;
1531	}
1532
1533	/* We are in fasttimo, so be quick about it. */
1534	if (!state_change_retransmit_timer_expired &&
1535	    !query_response_timer_expired)
1536		return;
1537
1538	switch (inm->in6m_state) {
1539	case MLD_NOT_MEMBER:
1540	case MLD_SILENT_MEMBER:
1541	case MLD_SLEEPING_MEMBER:
1542	case MLD_LAZY_MEMBER:
1543	case MLD_AWAKENING_MEMBER:
1544	case MLD_IDLE_MEMBER:
1545		break;
1546	case MLD_G_QUERY_PENDING_MEMBER:
1547	case MLD_SG_QUERY_PENDING_MEMBER:
1548		/*
1549		 * Respond to a previously pending Group-Specific
1550		 * or Group-and-Source-Specific query by enqueueing
1551		 * the appropriate Current-State report for
1552		 * immediate transmission.
1553		 */
1554		if (query_response_timer_expired) {
1555			int retval;
1556
1557			retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
1558			    (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER));
1559			CTR2(KTR_MLD, "%s: enqueue record = %d",
1560			    __func__, retval);
1561			inm->in6m_state = MLD_REPORTING_MEMBER;
1562			in6m_clear_recorded(inm);
1563		}
1564		/* FALLTHROUGH */
1565	case MLD_REPORTING_MEMBER:
1566	case MLD_LEAVING_MEMBER:
1567		if (state_change_retransmit_timer_expired) {
1568			/*
1569			 * State-change retransmission timer fired.
1570			 * If there are any further pending retransmissions,
1571			 * set the global pending state-change flag, and
1572			 * reset the timer.
1573			 */
1574			if (--inm->in6m_scrv > 0) {
1575				inm->in6m_sctimer = uri_fasthz;
1576				V_state_change_timers_running6 = 1;
1577			}
1578			/*
1579			 * Retransmit the previously computed state-change
1580			 * report. If there are no further pending
1581			 * retransmissions, the mbuf queue will be consumed.
1582			 * Update T0 state to T1 as we have now sent
1583			 * a state-change.
1584			 */
1585			(void)mld_v2_merge_state_changes(inm, scq);
1586
1587			in6m_commit(inm);
1588			CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
1589			    ip6_sprintf(ip6tbuf, &inm->in6m_addr),
1590			    inm->in6m_ifp->if_xname);
1591
1592			/*
1593			 * If we are leaving the group for good, make sure
1594			 * we release MLD's reference to it.
1595			 * This release must be deferred using a SLIST,
1596			 * as we are called from a loop which traverses
1597			 * the in_ifmultiaddr TAILQ.
1598			 */
1599			if (inm->in6m_state == MLD_LEAVING_MEMBER &&
1600			    inm->in6m_scrv == 0) {
1601				inm->in6m_state = MLD_NOT_MEMBER;
1602				SLIST_INSERT_HEAD(&mli->mli_relinmhead,
1603				    inm, in6m_nrele);
1604			}
1605		}
1606		break;
1607	}
1608}
1609
1610/*
1611 * Switch to a different version on the given interface,
1612 * as per Section 9.12.
1613 */
1614static void
1615mld_set_version(struct mld_ifinfo *mli, const int version)
1616{
1617	int old_version_timer;
1618
1619	MLD_LOCK_ASSERT();
1620
1621	CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__,
1622	    version, mli->mli_ifp, mli->mli_ifp->if_xname);
1623
1624	if (version == MLD_VERSION_1) {
1625		/*
1626		 * Compute the "Older Version Querier Present" timer as per
1627		 * Section 9.12.
1628		 */
1629		old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
1630		old_version_timer *= PR_SLOWHZ;
1631		mli->mli_v1_timer = old_version_timer;
1632	}
1633
1634	if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
1635		mli->mli_version = MLD_VERSION_1;
1636		mld_v2_cancel_link_timers(mli);
1637	}
1638}
1639
1640/*
1641 * Cancel pending MLDv2 timers for the given link and all groups
1642 * joined on it; state-change, general-query, and group-query timers.
1643 */
1644static void
1645mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
1646{
1647	struct ifmultiaddr	*ifma;
1648	struct ifnet		*ifp;
1649	struct in6_multi		*inm;
1650
1651	CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__,
1652	    mli->mli_ifp, mli->mli_ifp->if_xname);
1653
1654	IN6_MULTI_LOCK_ASSERT();
1655	MLD_LOCK_ASSERT();
1656
1657	/*
1658	 * Fast-track this potentially expensive operation
1659	 * by checking all the global 'timer pending' flags.
1660	 */
1661	if (!V_interface_timers_running6 &&
1662	    !V_state_change_timers_running6 &&
1663	    !V_current_state_timers_running6)
1664		return;
1665
1666	mli->mli_v2_timer = 0;
1667
1668	ifp = mli->mli_ifp;
1669
1670	IF_ADDR_LOCK(ifp);
1671	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1672		if (ifma->ifma_addr->sa_family != AF_INET6)
1673			continue;
1674		inm = (struct in6_multi *)ifma->ifma_protospec;
1675		switch (inm->in6m_state) {
1676		case MLD_NOT_MEMBER:
1677		case MLD_SILENT_MEMBER:
1678		case MLD_IDLE_MEMBER:
1679		case MLD_LAZY_MEMBER:
1680		case MLD_SLEEPING_MEMBER:
1681		case MLD_AWAKENING_MEMBER:
1682			break;
1683		case MLD_LEAVING_MEMBER:
1684			/*
1685			 * If we are leaving the group and switching
1686			 * version, we need to release the final
1687			 * reference held for issuing the INCLUDE {}.
1688			 *
1689			 * SMPNG: Must drop and re-acquire IF_ADDR_LOCK
1690			 * around in6m_release_locked(), as it is not
1691			 * a recursive mutex.
1692			 */
1693			IF_ADDR_UNLOCK(ifp);
1694			in6m_release_locked(inm);
1695			IF_ADDR_LOCK(ifp);
1696			/* FALLTHROUGH */
1697		case MLD_G_QUERY_PENDING_MEMBER:
1698		case MLD_SG_QUERY_PENDING_MEMBER:
1699			in6m_clear_recorded(inm);
1700			/* FALLTHROUGH */
1701		case MLD_REPORTING_MEMBER:
1702			inm->in6m_sctimer = 0;
1703			inm->in6m_timer = 0;
1704			inm->in6m_state = MLD_REPORTING_MEMBER;
1705			/*
1706			 * Free any pending MLDv2 state-change records.
1707			 */
1708			_IF_DRAIN(&inm->in6m_scq);
1709			break;
1710		}
1711	}
1712	IF_ADDR_UNLOCK(ifp);
1713}
1714
1715/*
1716 * Global slowtimo handler.
1717 * VIMAGE: Timeout handlers are expected to service all vimages.
1718 */
1719void
1720mld_slowtimo(void)
1721{
1722	VNET_ITERATOR_DECL(vnet_iter);
1723
1724	VNET_LIST_RLOCK();
1725	VNET_FOREACH(vnet_iter) {
1726		CURVNET_SET(vnet_iter);
1727		mld_slowtimo_vnet();
1728		CURVNET_RESTORE();
1729	}
1730	VNET_LIST_RUNLOCK();
1731}
1732
1733/*
1734 * Per-vnet slowtimo handler.
1735 */
1736static void
1737mld_slowtimo_vnet(void)
1738{
1739	struct mld_ifinfo *mli;
1740
1741	MLD_LOCK();
1742
1743	LIST_FOREACH(mli, &V_mli_head, mli_link) {
1744		mld_v1_process_querier_timers(mli);
1745	}
1746
1747	MLD_UNLOCK();
1748}
1749
1750/*
1751 * Update the Older Version Querier Present timers for a link.
1752 * See Section 9.12 of RFC 3810.
1753 */
1754static void
1755mld_v1_process_querier_timers(struct mld_ifinfo *mli)
1756{
1757
1758	MLD_LOCK_ASSERT();
1759
1760	if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) {
1761		/*
1762		 * MLDv1 Querier Present timer expired; revert to MLDv2.
1763		 */
1764		CTR5(KTR_MLD,
1765		    "%s: transition from v%d -> v%d on %p(%s)",
1766		    __func__, mli->mli_version, MLD_VERSION_2,
1767		    mli->mli_ifp, mli->mli_ifp->if_xname);
1768		mli->mli_version = MLD_VERSION_2;
1769	}
1770}
1771
1772/*
1773 * Transmit an MLDv1 report immediately.
1774 */
1775static int
1776mld_v1_transmit_report(struct in6_multi *in6m, const int type)
1777{
1778	struct ifnet		*ifp;
1779	struct in6_ifaddr	*ia;
1780	struct ip6_hdr		*ip6;
1781	struct mbuf		*mh, *md;
1782	struct mld_hdr		*mld;
1783
1784	IN6_MULTI_LOCK_ASSERT();
1785	MLD_LOCK_ASSERT();
1786
1787	ifp = in6m->in6m_ifp;
1788	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1789	/* ia may be NULL if link-local address is tentative. */
1790
1791	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
1792	if (mh == NULL) {
1793		if (ia != NULL)
1794			ifa_free(&ia->ia_ifa);
1795		return (ENOMEM);
1796	}
1797	MGET(md, M_DONTWAIT, MT_DATA);
1798	if (md == NULL) {
1799		m_free(mh);
1800		if (ia != NULL)
1801			ifa_free(&ia->ia_ifa);
1802		return (ENOMEM);
1803	}
1804	mh->m_next = md;
1805
1806	/*
1807	 * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
1808	 * that ether_output() does not need to allocate another mbuf
1809	 * for the header in the most common case.
1810	 */
1811	MH_ALIGN(mh, sizeof(struct ip6_hdr));
1812	mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
1813	mh->m_len = sizeof(struct ip6_hdr);
1814
1815	ip6 = mtod(mh, struct ip6_hdr *);
1816	ip6->ip6_flow = 0;
1817	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1818	ip6->ip6_vfc |= IPV6_VERSION;
1819	ip6->ip6_nxt = IPPROTO_ICMPV6;
1820	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
1821	ip6->ip6_dst = in6m->in6m_addr;
1822
1823	md->m_len = sizeof(struct mld_hdr);
1824	mld = mtod(md, struct mld_hdr *);
1825	mld->mld_type = type;
1826	mld->mld_code = 0;
1827	mld->mld_cksum = 0;
1828	mld->mld_maxdelay = 0;
1829	mld->mld_reserved = 0;
1830	mld->mld_addr = in6m->in6m_addr;
1831	in6_clearscope(&mld->mld_addr);
1832	mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
1833	    sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
1834
1835	mld_save_context(mh, ifp);
1836	mh->m_flags |= M_MLDV1;
1837
1838	mld_dispatch_packet(mh);
1839
1840	if (ia != NULL)
1841		ifa_free(&ia->ia_ifa);
1842	return (0);
1843}
1844
1845/*
1846 * Process a state change from the upper layer for the given IPv6 group.
1847 *
1848 * Each socket holds a reference on the in_multi in its own ip_moptions.
1849 * The socket layer will have made the necessary updates to.the group
1850 * state, it is now up to MLD to issue a state change report if there
1851 * has been any change between T0 (when the last state-change was issued)
1852 * and T1 (now).
1853 *
1854 * We use the MLDv2 state machine at group level. The MLd module
1855 * however makes the decision as to which MLD protocol version to speak.
1856 * A state change *from* INCLUDE {} always means an initial join.
1857 * A state change *to* INCLUDE {} always means a final leave.
1858 *
1859 * If delay is non-zero, and the state change is an initial multicast
1860 * join, the state change report will be delayed by 'delay' ticks
1861 * in units of PR_FASTHZ if MLDv1 is active on the link; otherwise
1862 * the initial MLDv2 state change report will be delayed by whichever
1863 * is sooner, a pending state-change timer or delay itself.
1864 *
1865 * VIMAGE: curvnet should have been set by caller, as this routine
1866 * is called from the socket option handlers.
1867 */
1868int
1869mld_change_state(struct in6_multi *inm, const int delay)
1870{
1871	struct mld_ifinfo *mli;
1872	struct ifnet *ifp;
1873	int error;
1874
1875	IN6_MULTI_LOCK_ASSERT();
1876
1877	error = 0;
1878
1879	/*
1880	 * Try to detect if the upper layer just asked us to change state
1881	 * for an interface which has now gone away.
1882	 */
1883	KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__));
1884	ifp = inm->in6m_ifma->ifma_ifp;
1885	if (ifp != NULL) {
1886		/*
1887		 * Sanity check that netinet6's notion of ifp is the
1888		 * same as net's.
1889		 */
1890		KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__));
1891	}
1892
1893	MLD_LOCK();
1894
1895	mli = MLD_IFINFO(ifp);
1896	KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
1897
1898	/*
1899	 * If we detect a state transition to or from MCAST_UNDEFINED
1900	 * for this group, then we are starting or finishing an MLD
1901	 * life cycle for this group.
1902	 */
1903	if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
1904		CTR3(KTR_MLD, "%s: inm transition %d -> %d", __func__,
1905		    inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode);
1906		if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
1907			CTR1(KTR_MLD, "%s: initial join", __func__);
1908			error = mld_initial_join(inm, mli, delay);
1909			goto out_locked;
1910		} else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
1911			CTR1(KTR_MLD, "%s: final leave", __func__);
1912			mld_final_leave(inm, mli);
1913			goto out_locked;
1914		}
1915	} else {
1916		CTR1(KTR_MLD, "%s: filter set change", __func__);
1917	}
1918
1919	error = mld_handle_state_change(inm, mli);
1920
1921out_locked:
1922	MLD_UNLOCK();
1923	return (error);
1924}
1925
1926/*
1927 * Perform the initial join for an MLD group.
1928 *
1929 * When joining a group:
1930 *  If the group should have its MLD traffic suppressed, do nothing.
1931 *  MLDv1 starts sending MLDv1 host membership reports.
1932 *  MLDv2 will schedule an MLDv2 state-change report containing the
1933 *  initial state of the membership.
1934 *
1935 * If the delay argument is non-zero, then we must delay sending the
1936 * initial state change for delay ticks (in units of PR_FASTHZ).
1937 */
1938static int
1939mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
1940    const int delay)
1941{
1942	struct ifnet		*ifp;
1943	struct ifqueue		*ifq;
1944	int			 error, retval, syncstates;
1945	int			 odelay;
1946#ifdef KTR
1947	char			 ip6tbuf[INET6_ADDRSTRLEN];
1948#endif
1949
1950	CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)",
1951	    __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
1952	    inm->in6m_ifp, inm->in6m_ifp->if_xname);
1953
1954	error = 0;
1955	syncstates = 1;
1956
1957	ifp = inm->in6m_ifp;
1958
1959	IN6_MULTI_LOCK_ASSERT();
1960	MLD_LOCK_ASSERT();
1961
1962	KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__));
1963
1964	/*
1965	 * Groups joined on loopback or marked as 'not reported',
1966	 * enter the MLD_SILENT_MEMBER state and
1967	 * are never reported in any protocol exchanges.
1968	 * All other groups enter the appropriate state machine
1969	 * for the version in use on this link.
1970	 * A link marked as MLIF_SILENT causes MLD to be completely
1971	 * disabled for the link.
1972	 */
1973	if ((ifp->if_flags & IFF_LOOPBACK) ||
1974	    (mli->mli_flags & MLIF_SILENT) ||
1975	    !mld_is_addr_reported(&inm->in6m_addr)) {
1976		CTR1(KTR_MLD,
1977"%s: not kicking state machine for silent group", __func__);
1978		inm->in6m_state = MLD_SILENT_MEMBER;
1979		inm->in6m_timer = 0;
1980	} else {
1981		/*
1982		 * Deal with overlapping in_multi lifecycle.
1983		 * If this group was LEAVING, then make sure
1984		 * we drop the reference we picked up to keep the
1985		 * group around for the final INCLUDE {} enqueue.
1986		 */
1987		if (mli->mli_version == MLD_VERSION_2 &&
1988		    inm->in6m_state == MLD_LEAVING_MEMBER)
1989			in6m_release_locked(inm);
1990
1991		inm->in6m_state = MLD_REPORTING_MEMBER;
1992
1993		switch (mli->mli_version) {
1994		case MLD_VERSION_1:
1995			/*
1996			 * If a delay was provided, only use it if
1997			 * it is greater than the delay normally
1998			 * used for an MLDv1 state change report,
1999			 * and delay sending the initial MLDv1 report
2000			 * by not transitioning to the IDLE state.
2001			 */
2002			odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_FASTHZ);
2003			if (delay) {
2004				inm->in6m_timer = max(delay, odelay);
2005				V_current_state_timers_running6 = 1;
2006			} else {
2007				inm->in6m_state = MLD_IDLE_MEMBER;
2008				error = mld_v1_transmit_report(inm,
2009				     MLD_LISTENER_REPORT);
2010				if (error == 0) {
2011					inm->in6m_timer = odelay;
2012					V_current_state_timers_running6 = 1;
2013				}
2014			}
2015			break;
2016
2017		case MLD_VERSION_2:
2018			/*
2019			 * Defer update of T0 to T1, until the first copy
2020			 * of the state change has been transmitted.
2021			 */
2022			syncstates = 0;
2023
2024			/*
2025			 * Immediately enqueue a State-Change Report for
2026			 * this interface, freeing any previous reports.
2027			 * Don't kick the timers if there is nothing to do,
2028			 * or if an error occurred.
2029			 */
2030			ifq = &inm->in6m_scq;
2031			_IF_DRAIN(ifq);
2032			retval = mld_v2_enqueue_group_record(ifq, inm, 1,
2033			    0, 0);
2034			CTR2(KTR_MLD, "%s: enqueue record = %d",
2035			    __func__, retval);
2036			if (retval <= 0) {
2037				error = retval * -1;
2038				break;
2039			}
2040
2041			/*
2042			 * Schedule transmission of pending state-change
2043			 * report up to RV times for this link. The timer
2044			 * will fire at the next mld_fasttimo (~200ms),
2045			 * giving us an opportunity to merge the reports.
2046			 *
2047			 * If a delay was provided to this function, only
2048			 * use this delay if sooner than the existing one.
2049			 */
2050			KASSERT(mli->mli_rv > 1,
2051			   ("%s: invalid robustness %d", __func__,
2052			    mli->mli_rv));
2053			inm->in6m_scrv = mli->mli_rv;
2054			if (delay) {
2055				if (inm->in6m_sctimer > 1) {
2056					inm->in6m_sctimer =
2057					    min(inm->in6m_sctimer, delay);
2058				} else
2059					inm->in6m_sctimer = delay;
2060			} else
2061				inm->in6m_sctimer = 1;
2062			V_state_change_timers_running6 = 1;
2063
2064			error = 0;
2065			break;
2066		}
2067	}
2068
2069	/*
2070	 * Only update the T0 state if state change is atomic,
2071	 * i.e. we don't need to wait for a timer to fire before we
2072	 * can consider the state change to have been communicated.
2073	 */
2074	if (syncstates) {
2075		in6m_commit(inm);
2076		CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
2077		    ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2078		    inm->in6m_ifp->if_xname);
2079	}
2080
2081	return (error);
2082}
2083
2084/*
2085 * Issue an intermediate state change during the life-cycle.
2086 */
2087static int
2088mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
2089{
2090	struct ifnet		*ifp;
2091	int			 retval;
2092#ifdef KTR
2093	char			 ip6tbuf[INET6_ADDRSTRLEN];
2094#endif
2095
2096	CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)",
2097	    __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2098	    inm->in6m_ifp, inm->in6m_ifp->if_xname);
2099
2100	ifp = inm->in6m_ifp;
2101
2102	IN6_MULTI_LOCK_ASSERT();
2103	MLD_LOCK_ASSERT();
2104
2105	KASSERT(mli && mli->mli_ifp == ifp,
2106	    ("%s: inconsistent ifp", __func__));
2107
2108	if ((ifp->if_flags & IFF_LOOPBACK) ||
2109	    (mli->mli_flags & MLIF_SILENT) ||
2110	    !mld_is_addr_reported(&inm->in6m_addr) ||
2111	    (mli->mli_version != MLD_VERSION_2)) {
2112		if (!mld_is_addr_reported(&inm->in6m_addr)) {
2113			CTR1(KTR_MLD,
2114"%s: not kicking state machine for silent group", __func__);
2115		}
2116		CTR1(KTR_MLD, "%s: nothing to do", __func__);
2117		in6m_commit(inm);
2118		CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
2119		    ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2120		    inm->in6m_ifp->if_xname);
2121		return (0);
2122	}
2123
2124	_IF_DRAIN(&inm->in6m_scq);
2125
2126	retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0);
2127	CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval);
2128	if (retval <= 0)
2129		return (-retval);
2130
2131	/*
2132	 * If record(s) were enqueued, start the state-change
2133	 * report timer for this group.
2134	 */
2135	inm->in6m_scrv = mli->mli_rv;
2136	inm->in6m_sctimer = 1;
2137	V_state_change_timers_running6 = 1;
2138
2139	return (0);
2140}
2141
2142/*
2143 * Perform the final leave for a multicast address.
2144 *
2145 * When leaving a group:
2146 *  MLDv1 sends a DONE message, if and only if we are the reporter.
2147 *  MLDv2 enqueues a state-change report containing a transition
2148 *  to INCLUDE {} for immediate transmission.
2149 */
2150static void
2151mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
2152{
2153	int syncstates;
2154#ifdef KTR
2155	char ip6tbuf[INET6_ADDRSTRLEN];
2156#endif
2157
2158	syncstates = 1;
2159
2160	CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)",
2161	    __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2162	    inm->in6m_ifp, inm->in6m_ifp->if_xname);
2163
2164	IN6_MULTI_LOCK_ASSERT();
2165	MLD_LOCK_ASSERT();
2166
2167	switch (inm->in6m_state) {
2168	case MLD_NOT_MEMBER:
2169	case MLD_SILENT_MEMBER:
2170	case MLD_LEAVING_MEMBER:
2171		/* Already leaving or left; do nothing. */
2172		CTR1(KTR_MLD,
2173"%s: not kicking state machine for silent group", __func__);
2174		break;
2175	case MLD_REPORTING_MEMBER:
2176	case MLD_IDLE_MEMBER:
2177	case MLD_G_QUERY_PENDING_MEMBER:
2178	case MLD_SG_QUERY_PENDING_MEMBER:
2179		if (mli->mli_version == MLD_VERSION_1) {
2180#ifdef INVARIANTS
2181			if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
2182			    inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER)
2183			panic("%s: MLDv2 state reached, not MLDv2 mode",
2184			     __func__);
2185#endif
2186			mld_v1_transmit_report(inm, MLD_LISTENER_DONE);
2187			inm->in6m_state = MLD_NOT_MEMBER;
2188		} else if (mli->mli_version == MLD_VERSION_2) {
2189			/*
2190			 * Stop group timer and all pending reports.
2191			 * Immediately enqueue a state-change report
2192			 * TO_IN {} to be sent on the next fast timeout,
2193			 * giving us an opportunity to merge reports.
2194			 */
2195			_IF_DRAIN(&inm->in6m_scq);
2196			inm->in6m_timer = 0;
2197			inm->in6m_scrv = mli->mli_rv;
2198			CTR4(KTR_MLD, "%s: Leaving %s/%s with %d "
2199			    "pending retransmissions.", __func__,
2200			    ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2201			    inm->in6m_ifp->if_xname, inm->in6m_scrv);
2202			if (inm->in6m_scrv == 0) {
2203				inm->in6m_state = MLD_NOT_MEMBER;
2204				inm->in6m_sctimer = 0;
2205			} else {
2206				int retval;
2207
2208				in6m_acquire_locked(inm);
2209
2210				retval = mld_v2_enqueue_group_record(
2211				    &inm->in6m_scq, inm, 1, 0, 0);
2212				KASSERT(retval != 0,
2213				    ("%s: enqueue record = %d", __func__,
2214				     retval));
2215
2216				inm->in6m_state = MLD_LEAVING_MEMBER;
2217				inm->in6m_sctimer = 1;
2218				V_state_change_timers_running6 = 1;
2219				syncstates = 0;
2220			}
2221			break;
2222		}
2223		break;
2224	case MLD_LAZY_MEMBER:
2225	case MLD_SLEEPING_MEMBER:
2226	case MLD_AWAKENING_MEMBER:
2227		/* Our reports are suppressed; do nothing. */
2228		break;
2229	}
2230
2231	if (syncstates) {
2232		in6m_commit(inm);
2233		CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
2234		    ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2235		    inm->in6m_ifp->if_xname);
2236		inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
2237		CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s",
2238		    __func__, &inm->in6m_addr, inm->in6m_ifp->if_xname);
2239	}
2240}
2241
2242/*
2243 * Enqueue an MLDv2 group record to the given output queue.
2244 *
2245 * If is_state_change is zero, a current-state record is appended.
2246 * If is_state_change is non-zero, a state-change report is appended.
2247 *
2248 * If is_group_query is non-zero, an mbuf packet chain is allocated.
2249 * If is_group_query is zero, and if there is a packet with free space
2250 * at the tail of the queue, it will be appended to providing there
2251 * is enough free space.
2252 * Otherwise a new mbuf packet chain is allocated.
2253 *
2254 * If is_source_query is non-zero, each source is checked to see if
2255 * it was recorded for a Group-Source query, and will be omitted if
2256 * it is not both in-mode and recorded.
2257 *
2258 * The function will attempt to allocate leading space in the packet
2259 * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
2260 *
2261 * If successful the size of all data appended to the queue is returned,
2262 * otherwise an error code less than zero is returned, or zero if
2263 * no record(s) were appended.
2264 */
2265static int
2266mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
2267    const int is_state_change, const int is_group_query,
2268    const int is_source_query)
2269{
2270	struct mldv2_record	 mr;
2271	struct mldv2_record	*pmr;
2272	struct ifnet		*ifp;
2273	struct ip6_msource	*ims, *nims;
2274	struct mbuf		*m0, *m, *md;
2275	int			 error, is_filter_list_change;
2276	int			 minrec0len, m0srcs, msrcs, nbytes, off;
2277	int			 record_has_sources;
2278	int			 now;
2279	int			 type;
2280	uint8_t			 mode;
2281#ifdef KTR
2282	char			 ip6tbuf[INET6_ADDRSTRLEN];
2283#endif
2284
2285	IN6_MULTI_LOCK_ASSERT();
2286
2287	error = 0;
2288	ifp = inm->in6m_ifp;
2289	is_filter_list_change = 0;
2290	m = NULL;
2291	m0 = NULL;
2292	m0srcs = 0;
2293	msrcs = 0;
2294	nbytes = 0;
2295	nims = NULL;
2296	record_has_sources = 1;
2297	pmr = NULL;
2298	type = MLD_DO_NOTHING;
2299	mode = inm->in6m_st[1].iss_fmode;
2300
2301	/*
2302	 * If we did not transition out of ASM mode during t0->t1,
2303	 * and there are no source nodes to process, we can skip
2304	 * the generation of source records.
2305	 */
2306	if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
2307	    inm->in6m_nsrc == 0)
2308		record_has_sources = 0;
2309
2310	if (is_state_change) {
2311		/*
2312		 * Queue a state change record.
2313		 * If the mode did not change, and there are non-ASM
2314		 * listeners or source filters present,
2315		 * we potentially need to issue two records for the group.
2316		 * If we are transitioning to MCAST_UNDEFINED, we need
2317		 * not send any sources.
2318		 * If there are ASM listeners, and there was no filter
2319		 * mode transition of any kind, do nothing.
2320		 */
2321		if (mode != inm->in6m_st[0].iss_fmode) {
2322			if (mode == MCAST_EXCLUDE) {
2323				CTR1(KTR_MLD, "%s: change to EXCLUDE",
2324				    __func__);
2325				type = MLD_CHANGE_TO_EXCLUDE_MODE;
2326			} else {
2327				CTR1(KTR_MLD, "%s: change to INCLUDE",
2328				    __func__);
2329				type = MLD_CHANGE_TO_INCLUDE_MODE;
2330				if (mode == MCAST_UNDEFINED)
2331					record_has_sources = 0;
2332			}
2333		} else {
2334			if (record_has_sources) {
2335				is_filter_list_change = 1;
2336			} else {
2337				type = MLD_DO_NOTHING;
2338			}
2339		}
2340	} else {
2341		/*
2342		 * Queue a current state record.
2343		 */
2344		if (mode == MCAST_EXCLUDE) {
2345			type = MLD_MODE_IS_EXCLUDE;
2346		} else if (mode == MCAST_INCLUDE) {
2347			type = MLD_MODE_IS_INCLUDE;
2348			KASSERT(inm->in6m_st[1].iss_asm == 0,
2349			    ("%s: inm %p is INCLUDE but ASM count is %d",
2350			     __func__, inm, inm->in6m_st[1].iss_asm));
2351		}
2352	}
2353
2354	/*
2355	 * Generate the filter list changes using a separate function.
2356	 */
2357	if (is_filter_list_change)
2358		return (mld_v2_enqueue_filter_change(ifq, inm));
2359
2360	if (type == MLD_DO_NOTHING) {
2361		CTR3(KTR_MLD, "%s: nothing to do for %s/%s",
2362		    __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2363		    inm->in6m_ifp->if_xname);
2364		return (0);
2365	}
2366
2367	/*
2368	 * If any sources are present, we must be able to fit at least
2369	 * one in the trailing space of the tail packet's mbuf,
2370	 * ideally more.
2371	 */
2372	minrec0len = sizeof(struct mldv2_record);
2373	if (record_has_sources)
2374		minrec0len += sizeof(struct in6_addr);
2375
2376	CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__,
2377	    mld_rec_type_to_str(type),
2378	    ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2379	    inm->in6m_ifp->if_xname);
2380
2381	/*
2382	 * Check if we have a packet in the tail of the queue for this
2383	 * group into which the first group record for this group will fit.
2384	 * Otherwise allocate a new packet.
2385	 * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
2386	 * Note: Group records for G/GSR query responses MUST be sent
2387	 * in their own packet.
2388	 */
2389	m0 = ifq->ifq_tail;
2390	if (!is_group_query &&
2391	    m0 != NULL &&
2392	    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
2393	    (m0->m_pkthdr.len + minrec0len) <
2394	     (ifp->if_mtu - MLD_MTUSPACE)) {
2395		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2396			    sizeof(struct mldv2_record)) /
2397			    sizeof(struct in6_addr);
2398		m = m0;
2399		CTR1(KTR_MLD, "%s: use existing packet", __func__);
2400	} else {
2401		if (_IF_QFULL(ifq)) {
2402			CTR1(KTR_MLD, "%s: outbound queue full", __func__);
2403			return (-ENOMEM);
2404		}
2405		m = NULL;
2406		m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2407		    sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2408		if (!is_state_change && !is_group_query)
2409			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2410		if (m == NULL)
2411			m = m_gethdr(M_DONTWAIT, MT_DATA);
2412		if (m == NULL)
2413			return (-ENOMEM);
2414
2415		mld_save_context(m, ifp);
2416
2417		CTR1(KTR_MLD, "%s: allocated first packet", __func__);
2418	}
2419
2420	/*
2421	 * Append group record.
2422	 * If we have sources, we don't know how many yet.
2423	 */
2424	mr.mr_type = type;
2425	mr.mr_datalen = 0;
2426	mr.mr_numsrc = 0;
2427	mr.mr_addr = inm->in6m_addr;
2428	in6_clearscope(&mr.mr_addr);
2429	if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2430		if (m != m0)
2431			m_freem(m);
2432		CTR1(KTR_MLD, "%s: m_append() failed.", __func__);
2433		return (-ENOMEM);
2434	}
2435	nbytes += sizeof(struct mldv2_record);
2436
2437	/*
2438	 * Append as many sources as will fit in the first packet.
2439	 * If we are appending to a new packet, the chain allocation
2440	 * may potentially use clusters; use m_getptr() in this case.
2441	 * If we are appending to an existing packet, we need to obtain
2442	 * a pointer to the group record after m_append(), in case a new
2443	 * mbuf was allocated.
2444	 * Only append sources which are in-mode at t1. If we are
2445	 * transitioning to MCAST_UNDEFINED state on the group, do not
2446	 * include source entries.
2447	 * Only report recorded sources in our filter set when responding
2448	 * to a group-source query.
2449	 */
2450	if (record_has_sources) {
2451		if (m == m0) {
2452			md = m_last(m);
2453			pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2454			    md->m_len - nbytes);
2455		} else {
2456			md = m_getptr(m, 0, &off);
2457			pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2458			    off);
2459		}
2460		msrcs = 0;
2461		RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
2462		    nims) {
2463			CTR2(KTR_MLD, "%s: visit node %s", __func__,
2464			    ip6_sprintf(ip6tbuf, &ims->im6s_addr));
2465			now = im6s_get_mode(inm, ims, 1);
2466			CTR2(KTR_MLD, "%s: node is %d", __func__, now);
2467			if ((now != mode) ||
2468			    (now == mode && mode == MCAST_UNDEFINED)) {
2469				CTR1(KTR_MLD, "%s: skip node", __func__);
2470				continue;
2471			}
2472			if (is_source_query && ims->im6s_stp == 0) {
2473				CTR1(KTR_MLD, "%s: skip unrecorded node",
2474				    __func__);
2475				continue;
2476			}
2477			CTR1(KTR_MLD, "%s: append node", __func__);
2478			if (!m_append(m, sizeof(struct in6_addr),
2479			    (void *)&ims->im6s_addr)) {
2480				if (m != m0)
2481					m_freem(m);
2482				CTR1(KTR_MLD, "%s: m_append() failed.",
2483				    __func__);
2484				return (-ENOMEM);
2485			}
2486			nbytes += sizeof(struct in6_addr);
2487			++msrcs;
2488			if (msrcs == m0srcs)
2489				break;
2490		}
2491		CTR2(KTR_MLD, "%s: msrcs is %d this packet", __func__,
2492		    msrcs);
2493		pmr->mr_numsrc = htons(msrcs);
2494		nbytes += (msrcs * sizeof(struct in6_addr));
2495	}
2496
2497	if (is_source_query && msrcs == 0) {
2498		CTR1(KTR_MLD, "%s: no recorded sources to report", __func__);
2499		if (m != m0)
2500			m_freem(m);
2501		return (0);
2502	}
2503
2504	/*
2505	 * We are good to go with first packet.
2506	 */
2507	if (m != m0) {
2508		CTR1(KTR_MLD, "%s: enqueueing first packet", __func__);
2509		m->m_pkthdr.PH_vt.vt_nrecs = 1;
2510		_IF_ENQUEUE(ifq, m);
2511	} else
2512		m->m_pkthdr.PH_vt.vt_nrecs++;
2513
2514	/*
2515	 * No further work needed if no source list in packet(s).
2516	 */
2517	if (!record_has_sources)
2518		return (nbytes);
2519
2520	/*
2521	 * Whilst sources remain to be announced, we need to allocate
2522	 * a new packet and fill out as many sources as will fit.
2523	 * Always try for a cluster first.
2524	 */
2525	while (nims != NULL) {
2526		if (_IF_QFULL(ifq)) {
2527			CTR1(KTR_MLD, "%s: outbound queue full", __func__);
2528			return (-ENOMEM);
2529		}
2530		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2531		if (m == NULL)
2532			m = m_gethdr(M_DONTWAIT, MT_DATA);
2533		if (m == NULL)
2534			return (-ENOMEM);
2535		mld_save_context(m, ifp);
2536		md = m_getptr(m, 0, &off);
2537		pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
2538		CTR1(KTR_MLD, "%s: allocated next packet", __func__);
2539
2540		if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2541			if (m != m0)
2542				m_freem(m);
2543			CTR1(KTR_MLD, "%s: m_append() failed.", __func__);
2544			return (-ENOMEM);
2545		}
2546		m->m_pkthdr.PH_vt.vt_nrecs = 1;
2547		nbytes += sizeof(struct mldv2_record);
2548
2549		m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2550		    sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2551
2552		msrcs = 0;
2553		RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2554			CTR2(KTR_MLD, "%s: visit node %s",
2555			    __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr));
2556			now = im6s_get_mode(inm, ims, 1);
2557			if ((now != mode) ||
2558			    (now == mode && mode == MCAST_UNDEFINED)) {
2559				CTR1(KTR_MLD, "%s: skip node", __func__);
2560				continue;
2561			}
2562			if (is_source_query && ims->im6s_stp == 0) {
2563				CTR1(KTR_MLD, "%s: skip unrecorded node",
2564				    __func__);
2565				continue;
2566			}
2567			CTR1(KTR_MLD, "%s: append node", __func__);
2568			if (!m_append(m, sizeof(struct in6_addr),
2569			    (void *)&ims->im6s_addr)) {
2570				if (m != m0)
2571					m_freem(m);
2572				CTR1(KTR_MLD, "%s: m_append() failed.",
2573				    __func__);
2574				return (-ENOMEM);
2575			}
2576			++msrcs;
2577			if (msrcs == m0srcs)
2578				break;
2579		}
2580		pmr->mr_numsrc = htons(msrcs);
2581		nbytes += (msrcs * sizeof(struct in6_addr));
2582
2583		CTR1(KTR_MLD, "%s: enqueueing next packet", __func__);
2584		_IF_ENQUEUE(ifq, m);
2585	}
2586
2587	return (nbytes);
2588}
2589
2590/*
2591 * Type used to mark record pass completion.
2592 * We exploit the fact we can cast to this easily from the
2593 * current filter modes on each ip_msource node.
2594 */
2595typedef enum {
2596	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
2597	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
2598	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
2599	REC_FULL = REC_ALLOW | REC_BLOCK
2600} rectype_t;
2601
2602/*
2603 * Enqueue an MLDv2 filter list change to the given output queue.
2604 *
2605 * Source list filter state is held in an RB-tree. When the filter list
2606 * for a group is changed without changing its mode, we need to compute
2607 * the deltas between T0 and T1 for each source in the filter set,
2608 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
2609 *
2610 * As we may potentially queue two record types, and the entire R-B tree
2611 * needs to be walked at once, we break this out into its own function
2612 * so we can generate a tightly packed queue of packets.
2613 *
2614 * XXX This could be written to only use one tree walk, although that makes
2615 * serializing into the mbuf chains a bit harder. For now we do two walks
2616 * which makes things easier on us, and it may or may not be harder on
2617 * the L2 cache.
2618 *
2619 * If successful the size of all data appended to the queue is returned,
2620 * otherwise an error code less than zero is returned, or zero if
2621 * no record(s) were appended.
2622 */
2623static int
2624mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
2625{
2626	static const int MINRECLEN =
2627	    sizeof(struct mldv2_record) + sizeof(struct in6_addr);
2628	struct ifnet		*ifp;
2629	struct mldv2_record	 mr;
2630	struct mldv2_record	*pmr;
2631	struct ip6_msource	*ims, *nims;
2632	struct mbuf		*m, *m0, *md;
2633	int			 m0srcs, nbytes, npbytes, off, rsrcs, schanged;
2634	int			 nallow, nblock;
2635	uint8_t			 mode, now, then;
2636	rectype_t		 crt, drt, nrt;
2637#ifdef KTR
2638	char			 ip6tbuf[INET6_ADDRSTRLEN];
2639#endif
2640
2641	IN6_MULTI_LOCK_ASSERT();
2642
2643	if (inm->in6m_nsrc == 0 ||
2644	    (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0))
2645		return (0);
2646
2647	ifp = inm->in6m_ifp;			/* interface */
2648	mode = inm->in6m_st[1].iss_fmode;	/* filter mode at t1 */
2649	crt = REC_NONE;	/* current group record type */
2650	drt = REC_NONE;	/* mask of completed group record types */
2651	nrt = REC_NONE;	/* record type for current node */
2652	m0srcs = 0;	/* # source which will fit in current mbuf chain */
2653	npbytes = 0;	/* # of bytes appended this packet */
2654	nbytes = 0;	/* # of bytes appended to group's state-change queue */
2655	rsrcs = 0;	/* # sources encoded in current record */
2656	schanged = 0;	/* # nodes encoded in overall filter change */
2657	nallow = 0;	/* # of source entries in ALLOW_NEW */
2658	nblock = 0;	/* # of source entries in BLOCK_OLD */
2659	nims = NULL;	/* next tree node pointer */
2660
2661	/*
2662	 * For each possible filter record mode.
2663	 * The first kind of source we encounter tells us which
2664	 * is the first kind of record we start appending.
2665	 * If a node transitioned to UNDEFINED at t1, its mode is treated
2666	 * as the inverse of the group's filter mode.
2667	 */
2668	while (drt != REC_FULL) {
2669		do {
2670			m0 = ifq->ifq_tail;
2671			if (m0 != NULL &&
2672			    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <=
2673			     MLD_V2_REPORT_MAXRECS) &&
2674			    (m0->m_pkthdr.len + MINRECLEN) <
2675			     (ifp->if_mtu - MLD_MTUSPACE)) {
2676				m = m0;
2677				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2678					    sizeof(struct mldv2_record)) /
2679					    sizeof(struct in6_addr);
2680				CTR1(KTR_MLD,
2681				    "%s: use previous packet", __func__);
2682			} else {
2683				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2684				if (m == NULL)
2685					m = m_gethdr(M_DONTWAIT, MT_DATA);
2686				if (m == NULL) {
2687					CTR1(KTR_MLD,
2688					    "%s: m_get*() failed", __func__);
2689					return (-ENOMEM);
2690				}
2691				m->m_pkthdr.PH_vt.vt_nrecs = 0;
2692				mld_save_context(m, ifp);
2693				m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2694				    sizeof(struct mldv2_record)) /
2695				    sizeof(struct in6_addr);
2696				npbytes = 0;
2697				CTR1(KTR_MLD,
2698				    "%s: allocated new packet", __func__);
2699			}
2700			/*
2701			 * Append the MLD group record header to the
2702			 * current packet's data area.
2703			 * Recalculate pointer to free space for next
2704			 * group record, in case m_append() allocated
2705			 * a new mbuf or cluster.
2706			 */
2707			memset(&mr, 0, sizeof(mr));
2708			mr.mr_addr = inm->in6m_addr;
2709			in6_clearscope(&mr.mr_addr);
2710			if (!m_append(m, sizeof(mr), (void *)&mr)) {
2711				if (m != m0)
2712					m_freem(m);
2713				CTR1(KTR_MLD,
2714				    "%s: m_append() failed", __func__);
2715				return (-ENOMEM);
2716			}
2717			npbytes += sizeof(struct mldv2_record);
2718			if (m != m0) {
2719				/* new packet; offset in chain */
2720				md = m_getptr(m, npbytes -
2721				    sizeof(struct mldv2_record), &off);
2722				pmr = (struct mldv2_record *)(mtod(md,
2723				    uint8_t *) + off);
2724			} else {
2725				/* current packet; offset from last append */
2726				md = m_last(m);
2727				pmr = (struct mldv2_record *)(mtod(md,
2728				    uint8_t *) + md->m_len -
2729				    sizeof(struct mldv2_record));
2730			}
2731			/*
2732			 * Begin walking the tree for this record type
2733			 * pass, or continue from where we left off
2734			 * previously if we had to allocate a new packet.
2735			 * Only report deltas in-mode at t1.
2736			 * We need not report included sources as allowed
2737			 * if we are in inclusive mode on the group,
2738			 * however the converse is not true.
2739			 */
2740			rsrcs = 0;
2741			if (nims == NULL) {
2742				nims = RB_MIN(ip6_msource_tree,
2743				    &inm->in6m_srcs);
2744			}
2745			RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2746				CTR2(KTR_MLD, "%s: visit node %s", __func__,
2747				    ip6_sprintf(ip6tbuf, &ims->im6s_addr));
2748				now = im6s_get_mode(inm, ims, 1);
2749				then = im6s_get_mode(inm, ims, 0);
2750				CTR3(KTR_MLD, "%s: mode: t0 %d, t1 %d",
2751				    __func__, then, now);
2752				if (now == then) {
2753					CTR1(KTR_MLD,
2754					    "%s: skip unchanged", __func__);
2755					continue;
2756				}
2757				if (mode == MCAST_EXCLUDE &&
2758				    now == MCAST_INCLUDE) {
2759					CTR1(KTR_MLD,
2760					    "%s: skip IN src on EX group",
2761					    __func__);
2762					continue;
2763				}
2764				nrt = (rectype_t)now;
2765				if (nrt == REC_NONE)
2766					nrt = (rectype_t)(~mode & REC_FULL);
2767				if (schanged++ == 0) {
2768					crt = nrt;
2769				} else if (crt != nrt)
2770					continue;
2771				if (!m_append(m, sizeof(struct in6_addr),
2772				    (void *)&ims->im6s_addr)) {
2773					if (m != m0)
2774						m_freem(m);
2775					CTR1(KTR_MLD,
2776					    "%s: m_append() failed", __func__);
2777					return (-ENOMEM);
2778				}
2779				nallow += !!(crt == REC_ALLOW);
2780				nblock += !!(crt == REC_BLOCK);
2781				if (++rsrcs == m0srcs)
2782					break;
2783			}
2784			/*
2785			 * If we did not append any tree nodes on this
2786			 * pass, back out of allocations.
2787			 */
2788			if (rsrcs == 0) {
2789				npbytes -= sizeof(struct mldv2_record);
2790				if (m != m0) {
2791					CTR1(KTR_MLD,
2792					    "%s: m_free(m)", __func__);
2793					m_freem(m);
2794				} else {
2795					CTR1(KTR_MLD,
2796					    "%s: m_adj(m, -mr)", __func__);
2797					m_adj(m, -((int)sizeof(
2798					    struct mldv2_record)));
2799				}
2800				continue;
2801			}
2802			npbytes += (rsrcs * sizeof(struct in6_addr));
2803			if (crt == REC_ALLOW)
2804				pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
2805			else if (crt == REC_BLOCK)
2806				pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
2807			pmr->mr_numsrc = htons(rsrcs);
2808			/*
2809			 * Count the new group record, and enqueue this
2810			 * packet if it wasn't already queued.
2811			 */
2812			m->m_pkthdr.PH_vt.vt_nrecs++;
2813			if (m != m0)
2814				_IF_ENQUEUE(ifq, m);
2815			nbytes += npbytes;
2816		} while (nims != NULL);
2817		drt |= crt;
2818		crt = (~crt & REC_FULL);
2819	}
2820
2821	CTR3(KTR_MLD, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__,
2822	    nallow, nblock);
2823
2824	return (nbytes);
2825}
2826
2827static int
2828mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
2829{
2830	struct ifqueue	*gq;
2831	struct mbuf	*m;		/* pending state-change */
2832	struct mbuf	*m0;		/* copy of pending state-change */
2833	struct mbuf	*mt;		/* last state-change in packet */
2834	int		 docopy, domerge;
2835	u_int		 recslen;
2836
2837	docopy = 0;
2838	domerge = 0;
2839	recslen = 0;
2840
2841	IN6_MULTI_LOCK_ASSERT();
2842	MLD_LOCK_ASSERT();
2843
2844	/*
2845	 * If there are further pending retransmissions, make a writable
2846	 * copy of each queued state-change message before merging.
2847	 */
2848	if (inm->in6m_scrv > 0)
2849		docopy = 1;
2850
2851	gq = &inm->in6m_scq;
2852#ifdef KTR
2853	if (gq->ifq_head == NULL) {
2854		CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty",
2855		    __func__, inm);
2856	}
2857#endif
2858
2859	m = gq->ifq_head;
2860	while (m != NULL) {
2861		/*
2862		 * Only merge the report into the current packet if
2863		 * there is sufficient space to do so; an MLDv2 report
2864		 * packet may only contain 65,535 group records.
2865		 * Always use a simple mbuf chain concatentation to do this,
2866		 * as large state changes for single groups may have
2867		 * allocated clusters.
2868		 */
2869		domerge = 0;
2870		mt = ifscq->ifq_tail;
2871		if (mt != NULL) {
2872			recslen = m_length(m, NULL);
2873
2874			if ((mt->m_pkthdr.PH_vt.vt_nrecs +
2875			    m->m_pkthdr.PH_vt.vt_nrecs <=
2876			    MLD_V2_REPORT_MAXRECS) &&
2877			    (mt->m_pkthdr.len + recslen <=
2878			    (inm->in6m_ifp->if_mtu - MLD_MTUSPACE)))
2879				domerge = 1;
2880		}
2881
2882		if (!domerge && _IF_QFULL(gq)) {
2883			CTR2(KTR_MLD,
2884			    "%s: outbound queue full, skipping whole packet %p",
2885			    __func__, m);
2886			mt = m->m_nextpkt;
2887			if (!docopy)
2888				m_freem(m);
2889			m = mt;
2890			continue;
2891		}
2892
2893		if (!docopy) {
2894			CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m);
2895			_IF_DEQUEUE(gq, m0);
2896			m = m0->m_nextpkt;
2897		} else {
2898			CTR2(KTR_MLD, "%s: copying %p", __func__, m);
2899			m0 = m_dup(m, M_NOWAIT);
2900			if (m0 == NULL)
2901				return (ENOMEM);
2902			m0->m_nextpkt = NULL;
2903			m = m->m_nextpkt;
2904		}
2905
2906		if (!domerge) {
2907			CTR3(KTR_MLD, "%s: queueing %p to ifscq %p)",
2908			    __func__, m0, ifscq);
2909			_IF_ENQUEUE(ifscq, m0);
2910		} else {
2911			struct mbuf *mtl;	/* last mbuf of packet mt */
2912
2913			CTR3(KTR_MLD, "%s: merging %p with ifscq tail %p)",
2914			    __func__, m0, mt);
2915
2916			mtl = m_last(mt);
2917			m0->m_flags &= ~M_PKTHDR;
2918			mt->m_pkthdr.len += recslen;
2919			mt->m_pkthdr.PH_vt.vt_nrecs +=
2920			    m0->m_pkthdr.PH_vt.vt_nrecs;
2921
2922			mtl->m_next = m0;
2923		}
2924	}
2925
2926	return (0);
2927}
2928
2929/*
2930 * Respond to a pending MLDv2 General Query.
2931 */
2932static void
2933mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
2934{
2935	struct ifmultiaddr	*ifma, *tifma;
2936	struct ifnet		*ifp;
2937	struct in6_multi	*inm;
2938	int			 retval;
2939
2940	IN6_MULTI_LOCK_ASSERT();
2941	MLD_LOCK_ASSERT();
2942
2943	KASSERT(mli->mli_version == MLD_VERSION_2,
2944	    ("%s: called when version %d", __func__, mli->mli_version));
2945
2946	ifp = mli->mli_ifp;
2947
2948	IF_ADDR_LOCK(ifp);
2949	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, tifma) {
2950		if (ifma->ifma_addr->sa_family != AF_INET6 ||
2951		    ifma->ifma_protospec == NULL)
2952			continue;
2953
2954		inm = (struct in6_multi *)ifma->ifma_protospec;
2955		KASSERT(ifp == inm->in6m_ifp,
2956		    ("%s: inconsistent ifp", __func__));
2957
2958		switch (inm->in6m_state) {
2959		case MLD_NOT_MEMBER:
2960		case MLD_SILENT_MEMBER:
2961			break;
2962		case MLD_REPORTING_MEMBER:
2963		case MLD_IDLE_MEMBER:
2964		case MLD_LAZY_MEMBER:
2965		case MLD_SLEEPING_MEMBER:
2966		case MLD_AWAKENING_MEMBER:
2967			inm->in6m_state = MLD_REPORTING_MEMBER;
2968			retval = mld_v2_enqueue_group_record(&mli->mli_gq,
2969			    inm, 0, 0, 0);
2970			CTR2(KTR_MLD, "%s: enqueue record = %d",
2971			    __func__, retval);
2972			break;
2973		case MLD_G_QUERY_PENDING_MEMBER:
2974		case MLD_SG_QUERY_PENDING_MEMBER:
2975		case MLD_LEAVING_MEMBER:
2976			break;
2977		}
2978	}
2979	IF_ADDR_UNLOCK(ifp);
2980
2981	mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST);
2982
2983	/*
2984	 * Slew transmission of bursts over 500ms intervals.
2985	 */
2986	if (mli->mli_gq.ifq_head != NULL) {
2987		mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
2988		    MLD_RESPONSE_BURST_INTERVAL);
2989		V_interface_timers_running6 = 1;
2990	}
2991}
2992
2993/*
2994 * Transmit the next pending message in the output queue.
2995 *
2996 * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis.
2997 * MRT: Nothing needs to be done, as MLD traffic is always local to
2998 * a link and uses a link-scope multicast address.
2999 */
3000static void
3001mld_dispatch_packet(struct mbuf *m)
3002{
3003	struct ip6_moptions	 im6o;
3004	struct ifnet		*ifp;
3005	struct ifnet		*oifp;
3006	struct mbuf		*m0;
3007	struct mbuf		*md;
3008	struct ip6_hdr		*ip6;
3009	struct mld_hdr		*mld;
3010	int			 error;
3011	int			 off;
3012	int			 type;
3013	uint32_t		 ifindex;
3014
3015	CTR2(KTR_MLD, "%s: transmit %p", __func__, m);
3016
3017	/*
3018	 * Set VNET image pointer from enqueued mbuf chain
3019	 * before doing anything else. Whilst we use interface
3020	 * indexes to guard against interface detach, they are
3021	 * unique to each VIMAGE and must be retrieved.
3022	 */
3023	ifindex = mld_restore_context(m);
3024
3025	/*
3026	 * Check if the ifnet still exists. This limits the scope of
3027	 * any race in the absence of a global ifp lock for low cost
3028	 * (an array lookup).
3029	 */
3030	ifp = ifnet_byindex(ifindex);
3031	if (ifp == NULL) {
3032		CTR3(KTR_MLD, "%s: dropped %p as ifindex %u went away.",
3033		    __func__, m, ifindex);
3034		m_freem(m);
3035		IP6STAT_INC(ip6s_noroute);
3036		goto out;
3037	}
3038
3039	im6o.im6o_multicast_hlim  = 1;
3040	im6o.im6o_multicast_loop = (V_ip6_mrouter != NULL);
3041	im6o.im6o_multicast_ifp = ifp;
3042
3043	if (m->m_flags & M_MLDV1) {
3044		m0 = m;
3045	} else {
3046		m0 = mld_v2_encap_report(ifp, m);
3047		if (m0 == NULL) {
3048			CTR2(KTR_MLD, "%s: dropped %p", __func__, m);
3049			m_freem(m);
3050			IP6STAT_INC(ip6s_odropped);
3051			goto out;
3052		}
3053	}
3054
3055	mld_scrub_context(m0);
3056	m->m_flags &= ~(M_PROTOFLAGS);
3057	m0->m_pkthdr.rcvif = V_loif;
3058
3059	ip6 = mtod(m0, struct ip6_hdr *);
3060#if 0
3061	(void)in6_setscope(&ip6->ip6_dst, ifp, NULL);	/* XXX LOR */
3062#else
3063	/*
3064	 * XXX XXX Break some KPI rules to prevent an LOR which would
3065	 * occur if we called in6_setscope() at transmission.
3066	 * See comments at top of file.
3067	 */
3068	MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index);
3069#endif
3070
3071	/*
3072	 * Retrieve the ICMPv6 type before handoff to ip6_output(),
3073	 * so we can bump the stats.
3074	 */
3075	md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
3076	mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
3077	type = mld->mld_type;
3078
3079	error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o,
3080	    &oifp, NULL);
3081	if (error) {
3082		CTR3(KTR_MLD, "%s: ip6_output(%p) = %d", __func__, m0, error);
3083		goto out;
3084	}
3085	ICMP6STAT_INC(icp6s_outhist[type]);
3086	if (oifp != NULL) {
3087		icmp6_ifstat_inc(oifp, ifs6_out_msg);
3088		switch (type) {
3089		case MLD_LISTENER_REPORT:
3090		case MLDV2_LISTENER_REPORT:
3091			icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
3092			break;
3093		case MLD_LISTENER_DONE:
3094			icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
3095			break;
3096		}
3097	}
3098out:
3099	return;
3100}
3101
3102/*
3103 * Encapsulate an MLDv2 report.
3104 *
3105 * KAME IPv6 requires that hop-by-hop options be passed separately,
3106 * and that the IPv6 header be prepended in a separate mbuf.
3107 *
3108 * Returns a pointer to the new mbuf chain head, or NULL if the
3109 * allocation failed.
3110 */
3111static struct mbuf *
3112mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
3113{
3114	struct mbuf		*mh;
3115	struct mldv2_report	*mld;
3116	struct ip6_hdr		*ip6;
3117	struct in6_ifaddr	*ia;
3118	int			 mldreclen;
3119
3120	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
3121	KASSERT((m->m_flags & M_PKTHDR),
3122	    ("%s: mbuf chain %p is !M_PKTHDR", __func__, m));
3123
3124	/*
3125	 * RFC3590: OK to send as :: or tentative during DAD.
3126	 */
3127	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
3128	if (ia == NULL)
3129		CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__);
3130
3131	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3132	if (mh == NULL) {
3133		if (ia != NULL)
3134			ifa_free(&ia->ia_ifa);
3135		m_freem(m);
3136		return (NULL);
3137	}
3138	MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
3139
3140	mldreclen = m_length(m, NULL);
3141	CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen);
3142
3143	mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
3144	mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
3145	    sizeof(struct mldv2_report) + mldreclen;
3146
3147	ip6 = mtod(mh, struct ip6_hdr *);
3148	ip6->ip6_flow = 0;
3149	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3150	ip6->ip6_vfc |= IPV6_VERSION;
3151	ip6->ip6_nxt = IPPROTO_ICMPV6;
3152	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
3153	if (ia != NULL)
3154		ifa_free(&ia->ia_ifa);
3155	ip6->ip6_dst = in6addr_linklocal_allv2routers;
3156	/* scope ID will be set in netisr */
3157
3158	mld = (struct mldv2_report *)(ip6 + 1);
3159	mld->mld_type = MLDV2_LISTENER_REPORT;
3160	mld->mld_code = 0;
3161	mld->mld_cksum = 0;
3162	mld->mld_v2_reserved = 0;
3163	mld->mld_v2_numrecs = htons(m->m_pkthdr.PH_vt.vt_nrecs);
3164	m->m_pkthdr.PH_vt.vt_nrecs = 0;
3165
3166	mh->m_next = m;
3167	mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
3168	    sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
3169	return (mh);
3170}
3171
3172#ifdef KTR
3173static char *
3174mld_rec_type_to_str(const int type)
3175{
3176
3177	switch (type) {
3178		case MLD_CHANGE_TO_EXCLUDE_MODE:
3179			return "TO_EX";
3180			break;
3181		case MLD_CHANGE_TO_INCLUDE_MODE:
3182			return "TO_IN";
3183			break;
3184		case MLD_MODE_IS_EXCLUDE:
3185			return "MODE_EX";
3186			break;
3187		case MLD_MODE_IS_INCLUDE:
3188			return "MODE_IN";
3189			break;
3190		case MLD_ALLOW_NEW_SOURCES:
3191			return "ALLOW_NEW";
3192			break;
3193		case MLD_BLOCK_OLD_SOURCES:
3194			return "BLOCK_OLD";
3195			break;
3196		default:
3197			break;
3198	}
3199	return "unknown";
3200}
3201#endif
3202
3203static void
3204mld_sysinit(void)
3205{
3206
3207	CTR1(KTR_MLD, "%s: initializing", __func__);
3208	MLD_LOCK_INIT();
3209
3210	ip6_initpktopts(&mld_po);
3211	mld_po.ip6po_hlim = 1;
3212	mld_po.ip6po_hbh = &mld_ra.hbh;
3213	mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
3214	mld_po.ip6po_flags = IP6PO_DONTFRAG;
3215}
3216
3217static void
3218mld_sysuninit(void)
3219{
3220
3221	CTR1(KTR_MLD, "%s: tearing down", __func__);
3222	MLD_LOCK_DESTROY();
3223}
3224
3225/*
3226 * Initialize an MLDv2 instance.
3227 * VIMAGE: Assumes curvnet set by caller and called per vimage.
3228 */
3229static int
3230vnet_mld_iattach(const void *unused __unused)
3231{
3232
3233	CTR1(KTR_MLD, "%s: initializing", __func__);
3234
3235	LIST_INIT(&V_mli_head);
3236
3237	return (0);
3238}
3239
3240static int
3241vnet_mld_idetach(const void *unused __unused)
3242{
3243
3244	CTR1(KTR_MLD, "%s: tearing down", __func__);
3245
3246	KASSERT(LIST_EMPTY(&V_mli_head),
3247	    ("%s: mli list not empty; ifnets not detached?", __func__));
3248
3249	return (0);
3250}
3251
3252#ifdef VIMAGE
3253static vnet_modinfo_t vnet_mld_modinfo = {
3254	.vmi_id		= VNET_MOD_MLD,
3255	.vmi_name	= "mld",
3256	.vmi_dependson	= VNET_MOD_INET6,
3257	.vmi_iattach	= vnet_mld_iattach,
3258	.vmi_idetach	= vnet_mld_idetach
3259};
3260#endif
3261
3262static int
3263mld_modevent(module_t mod, int type, void *unused __unused)
3264{
3265
3266    switch (type) {
3267    case MOD_LOAD:
3268	mld_sysinit();
3269#ifdef VIMAGE
3270	vnet_mod_register(&vnet_mld_modinfo);
3271#else
3272	vnet_mld_iattach(NULL);
3273#endif
3274	break;
3275    case MOD_UNLOAD:
3276#ifdef VIMAGE
3277	vnet_mod_deregister(&vnet_mld_modinfo);
3278#else
3279	vnet_mld_idetach(NULL);
3280#endif
3281	mld_sysuninit();
3282	break;
3283    default:
3284	return (EOPNOTSUPP);
3285    }
3286    return (0);
3287}
3288
3289static moduledata_t mld_mod = {
3290    "mld",
3291    mld_modevent,
3292    0
3293};
3294DECLARE_MODULE(mld, mld_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3295