1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*-
29 * Copyright (c) 2009 Bruce Simpson.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 *    notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 *    notice, this list of conditions and the following disclaimer in the
38 *    documentation and/or other materials provided with the distribution.
39 * 3. The name of the author may not be used to endorse or promote
40 *    products derived from this software without specific prior written
41 *    permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55
56/*
57 * Copyright (c) 1988 Stephen Deering.
58 * Copyright (c) 1992, 1993
59 *	The Regents of the University of California.  All rights reserved.
60 *
61 * This code is derived from software contributed to Berkeley by
62 * Stephen Deering of Stanford University.
63 *
64 * Redistribution and use in source and binary forms, with or without
65 * modification, are permitted provided that the following conditions
66 * are met:
67 * 1. Redistributions of source code must retain the above copyright
68 *    notice, this list of conditions and the following disclaimer.
69 * 2. Redistributions in binary form must reproduce the above copyright
70 *    notice, this list of conditions and the following disclaimer in the
71 *    documentation and/or other materials provided with the distribution.
72 * 3. All advertising materials mentioning features or use of this software
73 *    must display the following acknowledgement:
74 *	This product includes software developed by the University of
75 *	California, Berkeley and its contributors.
76 * 4. Neither the name of the University nor the names of its contributors
77 *    may be used to endorse or promote products derived from this software
78 *    without specific prior written permission.
79 *
80 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
81 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
82 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
83 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
84 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
85 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
86 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
87 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
88 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
89 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
90 * SUCH DAMAGE.
91 *
92 *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
93 */
94/*
95 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
96 * support for mandatory and extensible security protections.  This notice
97 * is included in support of clause 2.2 (b) of the Apple Public License,
98 * Version 2.0.
99 */
100
101#include <sys/cdefs.h>
102
103#include <sys/param.h>
104#include <sys/systm.h>
105#include <sys/mbuf.h>
106#include <sys/socket.h>
107#include <sys/protosw.h>
108#include <sys/sysctl.h>
109#include <sys/kernel.h>
110#include <sys/malloc.h>
111#include <sys/mcache.h>
112
113#include <kern/zalloc.h>
114
115#include <net/if.h>
116#include <net/route.h>
117
118#include <netinet/in.h>
119#include <netinet/in_var.h>
120#include <netinet6/in6_var.h>
121#include <netinet/ip6.h>
122#include <netinet6/ip6_var.h>
123#include <netinet6/scope6_var.h>
124#include <netinet/icmp6.h>
125#include <netinet6/mld6.h>
126#include <netinet6/mld6_var.h>
127
128/* Lock group and attribute for mld_mtx */
129static lck_attr_t       *mld_mtx_attr;
130static lck_grp_t        *mld_mtx_grp;
131static lck_grp_attr_t   *mld_mtx_grp_attr;
132
133/*
134 * Locking and reference counting:
135 *
136 * mld_mtx mainly protects mli_head.  In cases where both mld_mtx and
137 * in6_multihead_lock must be held, the former must be acquired first in order
138 * to maintain lock ordering.  It is not a requirement that mld_mtx be
139 * acquired first before in6_multihead_lock, but in case both must be acquired
140 * in succession, the correct lock ordering must be followed.
141 *
142 * Instead of walking the if_multiaddrs list at the interface and returning
143 * the ifma_protospec value of a matching entry, we search the global list
144 * of in6_multi records and find it that way; this is done with in6_multihead
145 * lock held.  Doing so avoids the race condition issues that many other BSDs
146 * suffer from (therefore in our implementation, ifma_protospec will never be
147 * NULL for as long as the in6_multi is valid.)
148 *
149 * The above creates a requirement for the in6_multi to stay in in6_multihead
150 * list even after the final MLD leave (in MLDv2 mode) until no longer needs
151 * be retransmitted (this is not required for MLDv1.)  In order to handle
152 * this, the request and reference counts of the in6_multi are bumped up when
153 * the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout
154 * handler.  Each in6_multi holds a reference to the underlying mld_ifinfo.
155 *
156 * Thus, the permitted lock oder is:
157 *
158 *	mld_mtx, in6_multihead_lock, inm6_lock, mli_lock
159 *
160 * Any may be taken independently, but if any are held at the same time,
161 * the above lock order must be followed.
162 */
163static decl_lck_mtx_data(, mld_mtx);
164
165SLIST_HEAD(mld_in6m_relhead, in6_multi);
166
167static void	mli_initvar(struct mld_ifinfo *, struct ifnet *, int);
168static struct mld_ifinfo *mli_alloc(int);
169static void	mli_free(struct mld_ifinfo *);
170static void	mli_delete(const struct ifnet *, struct mld_in6m_relhead *);
171static void	mld_dispatch_packet(struct mbuf *);
172static void	mld_final_leave(struct in6_multi *, struct mld_ifinfo *);
173static int	mld_handle_state_change(struct in6_multi *,
174		    struct mld_ifinfo *);
175static int	mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
176		    const int);
177#ifdef MLD_DEBUG
178static const char *	mld_rec_type_to_str(const int);
179#endif
180static void	mld_set_version(struct mld_ifinfo *, const int);
181static void	mld_flush_relq(struct mld_ifinfo *, struct mld_in6m_relhead *);
182static void	mld_dispatch_queue(struct mld_ifinfo *, struct ifqueue *, int);
183static int	mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
184		    /*const*/ struct mld_hdr *);
185static int	mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
186		    /*const*/ struct mld_hdr *);
187static void	mld_v1_process_group_timer(struct in6_multi *, const int);
188static void	mld_v1_process_querier_timers(struct mld_ifinfo *);
189static int	mld_v1_transmit_report(struct in6_multi *, const int);
190static void	mld_v1_update_group(struct in6_multi *, const int);
191static void	mld_v2_cancel_link_timers(struct mld_ifinfo *);
192static void	mld_v2_dispatch_general_query(struct mld_ifinfo *);
193static struct mbuf *
194		mld_v2_encap_report(struct ifnet *, struct mbuf *);
195static int	mld_v2_enqueue_filter_change(struct ifqueue *,
196		    struct in6_multi *);
197static int	mld_v2_enqueue_group_record(struct ifqueue *,
198		    struct in6_multi *, const int, const int, const int,
199		    const int);
200static int	mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
201		    struct mbuf *, const int, const int);
202static int	mld_v2_merge_state_changes(struct in6_multi *,
203		    struct ifqueue *);
204static void	mld_v2_process_group_timers(struct mld_ifinfo *,
205		    struct ifqueue *, struct ifqueue *,
206		    struct in6_multi *, const int);
207static int	mld_v2_process_group_query(struct in6_multi *,
208		    int, struct mbuf *, const int);
209static int	sysctl_mld_gsr SYSCTL_HANDLER_ARGS;
210static int	sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS;
211
212/*
213 * Normative references: RFC 2710, RFC 3590, RFC 3810.
214 *
215 *  XXX LOR PREVENTION
216 *  A special case for IPv6 is the in6_setscope() routine. ip6_output()
217 *  will not accept an ifp; it wants an embedded scope ID, unlike
218 *  ip_output(), which happily takes the ifp given to it. The embedded
219 *  scope ID is only used by MLD to select the outgoing interface.
220 *
221 *  As such, we exploit the fact that the scope ID is just the interface
222 *  index, and embed it in the IPv6 destination address accordingly.
223 *  This is potentially NOT VALID for MLDv1 reports, as they
224 *  are always sent to the multicast group itself; as MLDv2
225 *  reports are always sent to ff02::16, this is not an issue
226 *  when MLDv2 is in use.
227 */
228
229#define	MLD_EMBEDSCOPE(pin6, zoneid) \
230	(pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF)
231
232static struct timeval mld_gsrdelay = {10, 0};
233static LIST_HEAD(, mld_ifinfo) mli_head;
234
235static int interface_timers_running6;
236static int state_change_timers_running6;
237static int current_state_timers_running6;
238
239#define	MLD_LOCK()			\
240	lck_mtx_lock(&mld_mtx)
241#define	MLD_LOCK_ASSERT_HELD()		\
242	lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_OWNED)
243#define	MLD_LOCK_ASSERT_NOTHELD()	\
244	lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_NOTOWNED)
245#define	MLD_UNLOCK()			\
246	lck_mtx_unlock(&mld_mtx)
247
248#define	MLD_ADD_DETACHED_IN6M(_head, _in6m) {				\
249	SLIST_INSERT_HEAD(_head, _in6m, in6m_dtle);			\
250}
251
252#define	MLD_REMOVE_DETACHED_IN6M(_head) {				\
253	struct in6_multi *_in6m, *_inm_tmp;				\
254	SLIST_FOREACH_SAFE(_in6m, _head, in6m_dtle, _inm_tmp) {		\
255		SLIST_REMOVE(_head, _in6m, in6_multi, in6m_dtle);	\
256		IN6M_REMREF(_in6m);					\
257	}								\
258	VERIFY(SLIST_EMPTY(_head));					\
259}
260
261#define	MLI_ZONE_MAX		64		/* maximum elements in zone */
262#define	MLI_ZONE_NAME		"mld_ifinfo"	/* zone name */
263
264static unsigned int mli_size;			/* size of zone element */
265static struct zone *mli_zone;			/* zone for mld_ifinfo */
266
267SYSCTL_DECL(_net_inet6);	/* Note: Not in any common header. */
268
269SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
270    "IPv6 Multicast Listener Discovery");
271SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
272    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
273    &mld_gsrdelay.tv_sec, 0, sysctl_mld_gsr, "I",
274    "Rate limit for MLDv2 Group-and-Source queries in seconds");
275
276SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
277   sysctl_mld_ifinfo, "Per-interface MLDv2 state");
278
279static int	mld_v1enable = 1;
280SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
281    &mld_v1enable, 0, "Enable fallback to MLDv1");
282
283static int	mld_use_allow = 1;
284SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW | CTLFLAG_LOCKED,
285    &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
286
287#ifdef MLD_DEBUG
288int mld_debug = 0;
289SYSCTL_INT(_net_inet6_mld, OID_AUTO,
290	debug, CTLFLAG_RW | CTLFLAG_LOCKED,	&mld_debug, 0, "");
291#endif
292/*
293 * Packed Router Alert option structure declaration.
294 */
295struct mld_raopt {
296	struct ip6_hbh		hbh;
297	struct ip6_opt		pad;
298	struct ip6_opt_router	ra;
299} __packed;
300
301/*
302 * Router Alert hop-by-hop option header.
303 */
304static struct mld_raopt mld_ra = {
305	.hbh = { 0, 0 },
306	.pad = { .ip6o_type = IP6OPT_PADN, 0 },
307	.ra = {
308	    .ip6or_type = (u_int8_t)IP6OPT_ROUTER_ALERT,
309	    .ip6or_len = (u_int8_t)(IP6OPT_RTALERT_LEN - 2),
310	    .ip6or_value =  {((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
311	        (IP6OPT_RTALERT_MLD & 0xFF) }
312	}
313};
314static struct ip6_pktopts mld_po;
315
316/*
317 * Retrieve or set threshold between group-source queries in seconds.
318 */
319static int
320sysctl_mld_gsr SYSCTL_HANDLER_ARGS
321{
322#pragma unused(arg1, arg2)
323	int error;
324	int i;
325
326	MLD_LOCK();
327
328	i = mld_gsrdelay.tv_sec;
329
330	error = sysctl_handle_int(oidp, &i, 0, req);
331	if (error || !req->newptr)
332		goto out_locked;
333
334	if (i < -1 || i >= 60) {
335		error = EINVAL;
336		goto out_locked;
337	}
338
339	mld_gsrdelay.tv_sec = i;
340
341out_locked:
342	MLD_UNLOCK();
343	return (error);
344}
345/*
346 * Expose struct mld_ifinfo to userland, keyed by ifindex.
347 * For use by ifmcstat(8).
348 *
349 */
350static int
351sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS
352{
353#pragma unused(oidp)
354	int			*name;
355	int			 error;
356	u_int			 namelen;
357	struct ifnet		*ifp;
358	struct mld_ifinfo	*mli;
359	struct mld_ifinfo_u	mli_u;
360
361	name = (int *)arg1;
362	namelen = arg2;
363
364	if (req->newptr != USER_ADDR_NULL)
365		return (EPERM);
366
367	if (namelen != 1)
368		return (EINVAL);
369
370	MLD_LOCK();
371
372	if (name[0] <= 0 || name[0] > (u_int)if_index) {
373		error = ENOENT;
374		goto out_locked;
375	}
376
377	error = ENOENT;
378
379	ifnet_head_lock_shared();
380	ifp = ifindex2ifnet[name[0]];
381	ifnet_head_done();
382	if (ifp == NULL)
383		goto out_locked;
384
385	bzero(&mli_u, sizeof (mli_u));
386
387	LIST_FOREACH(mli, &mli_head, mli_link) {
388		MLI_LOCK(mli);
389		if (ifp != mli->mli_ifp) {
390			MLI_UNLOCK(mli);
391			continue;
392		}
393
394		mli_u.mli_ifindex = mli->mli_ifp->if_index;
395		mli_u.mli_version = mli->mli_version;
396		mli_u.mli_v1_timer = mli->mli_v1_timer;
397		mli_u.mli_v2_timer = mli->mli_v2_timer;
398		mli_u.mli_flags = mli->mli_flags;
399		mli_u.mli_rv = mli->mli_rv;
400		mli_u.mli_qi = mli->mli_qi;
401		mli_u.mli_qri = mli->mli_qri;
402		mli_u.mli_uri = mli->mli_uri;
403		MLI_UNLOCK(mli);
404
405		error = SYSCTL_OUT(req, &mli_u, sizeof (mli_u));
406		break;
407	}
408
409out_locked:
410	MLD_UNLOCK();
411	return (error);
412}
413
414/*
415 * Dispatch an entire queue of pending packet chains.
416 *
417 * Must not be called with in6m_lock held.
418 */
419static void
420mld_dispatch_queue(struct mld_ifinfo *mli, struct ifqueue *ifq, int limit)
421{
422	struct mbuf *m;
423
424	if (mli != NULL)
425		MLI_LOCK_ASSERT_HELD(mli);
426
427	for (;;) {
428		IF_DEQUEUE(ifq, m);
429		if (m == NULL)
430			break;
431		MLD_PRINTF(("%s: dispatch %p from %p\n", __func__, ifq, m));
432		if (mli != NULL)
433			MLI_UNLOCK(mli);
434		mld_dispatch_packet(m);
435		if (mli != NULL)
436			MLI_LOCK(mli);
437		if (--limit == 0)
438			break;
439	}
440
441	if (mli != NULL)
442		MLI_LOCK_ASSERT_HELD(mli);
443}
444
445/*
446 * Filter outgoing MLD report state by group.
447 *
448 * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
449 * and node-local addresses. However, kernel and socket consumers
450 * always embed the KAME scope ID in the address provided, so strip it
451 * when performing comparison.
452 * Note: This is not the same as the *multicast* scope.
453 *
454 * Return zero if the given group is one for which MLD reports
455 * should be suppressed, or non-zero if reports should be issued.
456 */
457static __inline__ int
458mld_is_addr_reported(const struct in6_addr *addr)
459{
460
461	VERIFY(IN6_IS_ADDR_MULTICAST(addr));
462
463	if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL)
464		return (0);
465
466	if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
467		struct in6_addr tmp = *addr;
468		in6_clearscope(&tmp);
469		if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes))
470			return (0);
471	}
472
473	return (1);
474}
475
476/*
477 * Attach MLD when PF_INET6 is attached to an interface.
478 */
479struct mld_ifinfo *
480mld_domifattach(struct ifnet *ifp, int how)
481{
482	struct mld_ifinfo *mli;
483
484	MLD_PRINTF(("%s: called for ifp %p(%s%d)\n",
485	    __func__, ifp, ifp->if_name, ifp->if_unit));
486
487	mli = mli_alloc(how);
488	if (mli == NULL)
489		return (NULL);
490
491	MLD_LOCK();
492
493	MLI_LOCK(mli);
494	mli_initvar(mli, ifp, 0);
495	mli->mli_debug |= IFD_ATTACHED;
496	MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
497	MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
498	MLI_UNLOCK(mli);
499	ifnet_lock_shared(ifp);
500	mld6_initsilent(ifp, mli);
501	ifnet_lock_done(ifp);
502
503	LIST_INSERT_HEAD(&mli_head, mli, mli_link);
504
505	MLD_UNLOCK();
506
507	MLD_PRINTF(("allocate mld_ifinfo for ifp %p(%s%d)\n",
508	     ifp, ifp->if_name, ifp->if_unit));
509
510	return (mli);
511}
512
513/*
514 * Attach MLD when PF_INET6 is reattached to an interface.  Caller is
515 * expected to have an outstanding reference to the mli.
516 */
517void
518mld_domifreattach(struct mld_ifinfo *mli)
519{
520	struct ifnet *ifp;
521
522	MLD_LOCK();
523
524	MLI_LOCK(mli);
525	VERIFY(!(mli->mli_debug & IFD_ATTACHED));
526	ifp = mli->mli_ifp;
527	VERIFY(ifp != NULL);
528	mli_initvar(mli, ifp, 1);
529	mli->mli_debug |= IFD_ATTACHED;
530	MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
531	MLI_UNLOCK(mli);
532	ifnet_lock_shared(ifp);
533	mld6_initsilent(ifp, mli);
534	ifnet_lock_done(ifp);
535
536	LIST_INSERT_HEAD(&mli_head, mli, mli_link);
537
538	MLD_UNLOCK();
539
540	MLD_PRINTF(("reattached mld_ifinfo for ifp %p(%s%d)\n",
541	     ifp, ifp->if_name, ifp->if_unit));
542}
543
544/*
545 * Hook for domifdetach.
546 */
547void
548mld_domifdetach(struct ifnet *ifp)
549{
550	SLIST_HEAD(, in6_multi)	in6m_dthead;
551
552	SLIST_INIT(&in6m_dthead);
553
554	MLD_PRINTF(("%s: called for ifp %p(%s%d)\n",
555	    __func__, ifp, ifp->if_name, ifp->if_unit));
556
557	MLD_LOCK();
558	mli_delete(ifp, (struct mld_in6m_relhead *)&in6m_dthead);
559	MLD_UNLOCK();
560
561	/* Now that we're dropped all locks, release detached records */
562	MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
563}
564
565/*
566 * Called at interface detach time.  Note that we only flush all deferred
567 * responses and record releases; all remaining inm records and their source
568 * entries related to this interface are left intact, in order to handle
569 * the reattach case.
570 */
571static void
572mli_delete(const struct ifnet *ifp, struct mld_in6m_relhead *in6m_dthead)
573{
574	struct mld_ifinfo *mli, *tmli;
575
576	MLD_LOCK_ASSERT_HELD();
577
578	LIST_FOREACH_SAFE(mli, &mli_head, mli_link, tmli) {
579		MLI_LOCK(mli);
580		if (mli->mli_ifp == ifp) {
581			/*
582			 * Free deferred General Query responses.
583			 */
584			IF_DRAIN(&mli->mli_gq);
585			IF_DRAIN(&mli->mli_v1q);
586			mld_flush_relq(mli, in6m_dthead);
587			VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
588			mli->mli_debug &= ~IFD_ATTACHED;
589			MLI_UNLOCK(mli);
590
591			LIST_REMOVE(mli, mli_link);
592			MLI_REMREF(mli); /* release mli_head reference */
593			return;
594		}
595		MLI_UNLOCK(mli);
596	}
597	panic("%s: mld_ifinfo not found for ifp %p\n", __func__,  ifp);
598}
599
600__private_extern__ void
601mld6_initsilent(struct ifnet *ifp, struct mld_ifinfo *mli)
602{
603	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
604
605	MLI_LOCK_ASSERT_NOTHELD(mli);
606	MLI_LOCK(mli);
607	if (!(ifp->if_flags & IFF_MULTICAST) &&
608	    (ifp->if_eflags & (IFEF_IPV6_ND6ALT|IFEF_LOCALNET_PRIVATE)))
609		mli->mli_flags |= MLIF_SILENT;
610	else
611		mli->mli_flags &= ~MLIF_SILENT;
612	MLI_UNLOCK(mli);
613}
614
615static void
616mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
617{
618	MLI_LOCK_ASSERT_HELD(mli);
619
620	mli->mli_ifp = ifp;
621	mli->mli_version = MLD_VERSION_2;
622	mli->mli_flags = 0;
623	mli->mli_rv = MLD_RV_INIT;
624	mli->mli_qi = MLD_QI_INIT;
625	mli->mli_qri = MLD_QRI_INIT;
626	mli->mli_uri = MLD_URI_INIT;
627
628	if (mld_use_allow)
629		mli->mli_flags |= MLIF_USEALLOW;
630	if (!reattach)
631		SLIST_INIT(&mli->mli_relinmhead);
632
633	/*
634	 * Responses to general queries are subject to bounds.
635	 */
636	mli->mli_gq.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
637	mli->mli_v1q.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
638}
639
640static struct mld_ifinfo *
641mli_alloc(int how)
642{
643	struct mld_ifinfo *mli;
644
645	mli = (how == M_WAITOK) ? zalloc(mli_zone) : zalloc_noblock(mli_zone);
646	if (mli != NULL) {
647		bzero(mli, mli_size);
648		lck_mtx_init(&mli->mli_lock, mld_mtx_grp, mld_mtx_attr);
649		mli->mli_debug |= IFD_ALLOC;
650	}
651	return (mli);
652}
653
654static void
655mli_free(struct mld_ifinfo *mli)
656{
657	MLI_LOCK(mli);
658	if (mli->mli_debug & IFD_ATTACHED) {
659		panic("%s: attached mli=%p is being freed", __func__, mli);
660		/* NOTREACHED */
661	} else if (mli->mli_ifp != NULL) {
662		panic("%s: ifp not NULL for mli=%p", __func__, mli);
663		/* NOTREACHED */
664	} else if (!(mli->mli_debug & IFD_ALLOC)) {
665		panic("%s: mli %p cannot be freed", __func__, mli);
666		/* NOTREACHED */
667	} else if (mli->mli_refcnt != 0) {
668		panic("%s: non-zero refcnt mli=%p", __func__, mli);
669		/* NOTREACHED */
670	}
671	mli->mli_debug &= ~IFD_ALLOC;
672	MLI_UNLOCK(mli);
673
674	lck_mtx_destroy(&mli->mli_lock, mld_mtx_grp);
675	zfree(mli_zone, mli);
676}
677
678void
679mli_addref(struct mld_ifinfo *mli, int locked)
680{
681	if (!locked)
682		MLI_LOCK_SPIN(mli);
683	else
684		MLI_LOCK_ASSERT_HELD(mli);
685
686	if (++mli->mli_refcnt == 0) {
687		panic("%s: mli=%p wraparound refcnt", __func__, mli);
688		/* NOTREACHED */
689	}
690	if (!locked)
691		MLI_UNLOCK(mli);
692}
693
694void
695mli_remref(struct mld_ifinfo *mli)
696{
697	SLIST_HEAD(, in6_multi)	in6m_dthead;
698	struct ifnet *ifp;
699
700	MLI_LOCK_SPIN(mli);
701
702	if (mli->mli_refcnt == 0) {
703		panic("%s: mli=%p negative refcnt", __func__, mli);
704		/* NOTREACHED */
705	}
706
707	--mli->mli_refcnt;
708	if (mli->mli_refcnt > 0) {
709		MLI_UNLOCK(mli);
710		return;
711	}
712
713	ifp = mli->mli_ifp;
714	mli->mli_ifp = NULL;
715	IF_DRAIN(&mli->mli_gq);
716	IF_DRAIN(&mli->mli_v1q);
717	SLIST_INIT(&in6m_dthead);
718	mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
719	VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
720	MLI_UNLOCK(mli);
721
722	/* Now that we're dropped all locks, release detached records */
723	MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
724
725	MLD_PRINTF(("%s: freeing mld_ifinfo for ifp %p(%s%d)\n",
726	    __func__, ifp, ifp->if_name, ifp->if_unit));
727
728	mli_free(mli);
729}
730
731/*
732 * Process a received MLDv1 general or address-specific query.
733 * Assumes that the query header has been pulled up to sizeof(mld_hdr).
734 *
735 * NOTE: Can't be fully const correct as we temporarily embed scope ID in
736 * mld_addr. This is OK as we own the mbuf chain.
737 */
738static int
739mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
740    /*const*/ struct mld_hdr *mld)
741{
742	struct mld_ifinfo	*mli;
743	struct in6_multi	*inm;
744	int			 is_general_query;
745	uint16_t		 timer;
746
747	is_general_query = 0;
748
749	if (!mld_v1enable) {
750		MLD_PRINTF(("ignore v1 query %s on ifp %p(%s%d)\n",
751		    ip6_sprintf(&mld->mld_addr),
752		    ifp, ifp->if_name, ifp->if_unit));
753		return (0);
754	}
755
756	/*
757	 * RFC3810 Section 6.2: MLD queries must originate from
758	 * a router's link-local address.
759	 */
760	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
761		MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
762		    ip6_sprintf(&ip6->ip6_src),
763		    ifp, ifp->if_name, ifp->if_unit));
764		return (0);
765	}
766
767	/*
768	 * Do address field validation upfront before we accept
769	 * the query.
770	 */
771	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
772		/*
773		 * MLDv1 General Query.
774		 * If this was not sent to the all-nodes group, ignore it.
775		 */
776		struct in6_addr		 dst;
777
778		dst = ip6->ip6_dst;
779		in6_clearscope(&dst);
780		if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes))
781			return (EINVAL);
782		is_general_query = 1;
783	} else {
784		/*
785		 * Embed scope ID of receiving interface in MLD query for
786		 * lookup whilst we don't hold other locks.
787		 */
788		in6_setscope(&mld->mld_addr, ifp, NULL);
789	}
790
791	/*
792	 * Switch to MLDv1 host compatibility mode.
793	 */
794	mli = MLD_IFINFO(ifp);
795	VERIFY(mli != NULL);
796
797	MLI_LOCK(mli);
798	mld_set_version(mli, MLD_VERSION_1);
799	MLI_UNLOCK(mli);
800
801	timer = (ntohs(mld->mld_maxdelay) * PR_SLOWHZ) / MLD_TIMER_SCALE;
802	if (timer == 0)
803		timer = 1;
804
805	if (is_general_query) {
806		struct in6_multistep step;
807
808		MLD_PRINTF(("process v1 general query on ifp %p(%s%d)\n",
809		    ifp, ifp->if_name, ifp->if_unit));
810		/*
811		 * For each reporting group joined on this
812		 * interface, kick the report timer.
813		 */
814		in6_multihead_lock_shared();
815		IN6_FIRST_MULTI(step, inm);
816		while (inm != NULL) {
817			IN6M_LOCK(inm);
818			if (inm->in6m_ifp == ifp)
819				mld_v1_update_group(inm, timer);
820			IN6M_UNLOCK(inm);
821			IN6_NEXT_MULTI(step, inm);
822		}
823		in6_multihead_lock_done();
824	} else {
825		/*
826		 * MLDv1 Group-Specific Query.
827		 * If this is a group-specific MLDv1 query, we need only
828		 * look up the single group to process it.
829		 */
830		in6_multihead_lock_shared();
831		IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
832		in6_multihead_lock_done();
833
834		if (inm != NULL) {
835			IN6M_LOCK(inm);
836			MLD_PRINTF(("process v1 query %s on ifp %p(%s%d)\n",
837			    ip6_sprintf(&mld->mld_addr),
838			    ifp, ifp->if_name, ifp->if_unit));
839			mld_v1_update_group(inm, timer);
840			IN6M_UNLOCK(inm);
841			IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
842		}
843		/* XXX Clear embedded scope ID as userland won't expect it. */
844		in6_clearscope(&mld->mld_addr);
845	}
846
847	return (0);
848}
849
850/*
851 * Update the report timer on a group in response to an MLDv1 query.
852 *
853 * If we are becoming the reporting member for this group, start the timer.
854 * If we already are the reporting member for this group, and timer is
855 * below the threshold, reset it.
856 *
857 * We may be updating the group for the first time since we switched
858 * to MLDv2. If we are, then we must clear any recorded source lists,
859 * and transition to REPORTING state; the group timer is overloaded
860 * for group and group-source query responses.
861 *
862 * Unlike MLDv2, the delay per group should be jittered
863 * to avoid bursts of MLDv1 reports.
864 */
865static void
866mld_v1_update_group(struct in6_multi *inm, const int timer)
867{
868	IN6M_LOCK_ASSERT_HELD(inm);
869
870	MLD_PRINTF(("%s: %s/%s%d timer=%d\n", __func__,
871	    ip6_sprintf(&inm->in6m_addr),
872	    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit, timer));
873
874	switch (inm->in6m_state) {
875	case MLD_NOT_MEMBER:
876	case MLD_SILENT_MEMBER:
877		break;
878	case MLD_REPORTING_MEMBER:
879		if (inm->in6m_timer != 0 &&
880		    inm->in6m_timer <= timer) {
881			MLD_PRINTF(("%s: REPORTING and timer running, "
882			    "skipping.\n", __func__));
883			break;
884		}
885		/* FALLTHROUGH */
886	case MLD_SG_QUERY_PENDING_MEMBER:
887	case MLD_G_QUERY_PENDING_MEMBER:
888	case MLD_IDLE_MEMBER:
889	case MLD_LAZY_MEMBER:
890	case MLD_AWAKENING_MEMBER:
891		MLD_PRINTF(("%s: ->REPORTING\n", __func__));
892		inm->in6m_state = MLD_REPORTING_MEMBER;
893		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
894		current_state_timers_running6 = 1;
895		break;
896	case MLD_SLEEPING_MEMBER:
897		MLD_PRINTF(("%s: ->AWAKENING\n", __func__));
898		inm->in6m_state = MLD_AWAKENING_MEMBER;
899		break;
900	case MLD_LEAVING_MEMBER:
901		break;
902	}
903}
904
905/*
906 * Process a received MLDv2 general, group-specific or
907 * group-and-source-specific query.
908 *
909 * Assumes that the query header has been pulled up to sizeof(mldv2_query).
910 *
911 * Return 0 if successful, otherwise an appropriate error code is returned.
912 */
913static int
914mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
915    struct mbuf *m, const int off, const int icmp6len)
916{
917	struct mld_ifinfo	*mli;
918	struct mldv2_query	*mld;
919	struct in6_multi	*inm;
920	uint32_t		 maxdelay, nsrc, qqi;
921	int			 is_general_query;
922	uint16_t		 timer;
923	uint8_t			 qrv;
924
925	is_general_query = 0;
926
927	/*
928	 * RFC3810 Section 6.2: MLD queries must originate from
929	 * a router's link-local address.
930	 */
931	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
932		MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
933		    ip6_sprintf(&ip6->ip6_src),
934		    ifp, ifp->if_name, ifp->if_unit));
935		return (0);
936	}
937
938	MLD_PRINTF(("input v2 query on ifp %p(%s%d)\n", ifp, ifp->if_name,
939	    ifp->if_unit));
940
941	mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
942
943	maxdelay = ntohs(mld->mld_maxdelay);	/* in 1/10ths of a second */
944	if (maxdelay >= 32678) {
945		maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
946			   (MLD_MRC_EXP(maxdelay) + 3);
947	}
948	timer = (maxdelay * PR_SLOWHZ) / MLD_TIMER_SCALE;
949	if (timer == 0)
950		timer = 1;
951
952	qrv = MLD_QRV(mld->mld_misc);
953	if (qrv < 2) {
954		MLD_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
955		    qrv, MLD_RV_INIT));
956		qrv = MLD_RV_INIT;
957	}
958
959	qqi = mld->mld_qqi;
960	if (qqi >= 128) {
961		qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
962		     (MLD_QQIC_EXP(mld->mld_qqi) + 3);
963	}
964
965	nsrc = ntohs(mld->mld_numsrc);
966	if (nsrc > MLD_MAX_GS_SOURCES)
967		return (EMSGSIZE);
968	if (icmp6len < sizeof(struct mldv2_query) +
969	    (nsrc * sizeof(struct in6_addr)))
970		return (EMSGSIZE);
971
972	/*
973	 * Do further input validation upfront to avoid resetting timers
974	 * should we need to discard this query.
975	 */
976	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
977		/*
978		 * General Queries SHOULD be directed to ff02::1.
979		 * A general query with a source list has undefined
980		 * behaviour; discard it.
981		 */
982		struct in6_addr		 dst;
983
984		dst = ip6->ip6_dst;
985		in6_clearscope(&dst);
986		if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) ||
987		    nsrc > 0)
988			return (EINVAL);
989		is_general_query = 1;
990	} else {
991		/*
992		 * Embed scope ID of receiving interface in MLD query for
993		 * lookup whilst we don't hold other locks (due to KAME
994		 * locking lameness). We own this mbuf chain just now.
995		 */
996		in6_setscope(&mld->mld_addr, ifp, NULL);
997	}
998
999	mli = MLD_IFINFO(ifp);
1000	VERIFY(mli != NULL);
1001
1002	MLI_LOCK(mli);
1003	/*
1004	 * Discard the v2 query if we're in Compatibility Mode.
1005	 * The RFC is pretty clear that hosts need to stay in MLDv1 mode
1006	 * until the Old Version Querier Present timer expires.
1007	 */
1008	if (mli->mli_version != MLD_VERSION_2) {
1009		MLI_UNLOCK(mli);
1010		return (0);
1011	}
1012
1013	mld_set_version(mli, MLD_VERSION_2);
1014	mli->mli_rv = qrv;
1015	mli->mli_qi = qqi;
1016	mli->mli_qri = maxdelay;
1017
1018	MLD_PRINTF(("%s: qrv %d qi %d maxdelay %d\n", __func__, qrv, qqi,
1019	    maxdelay));
1020
1021	if (is_general_query) {
1022		/*
1023		 * MLDv2 General Query.
1024		 *
1025		 * Schedule a current-state report on this ifp for
1026		 * all groups, possibly containing source lists.
1027		 *
1028		 * If there is a pending General Query response
1029		 * scheduled earlier than the selected delay, do
1030		 * not schedule any other reports.
1031		 * Otherwise, reset the interface timer.
1032		 */
1033		MLD_PRINTF(("process v2 general query on ifp %p(%s%d)\n",
1034		    ifp, ifp->if_name, ifp->if_unit));
1035		if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1036			mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
1037			interface_timers_running6 = 1;
1038		}
1039		MLI_UNLOCK(mli);
1040	} else {
1041		MLI_UNLOCK(mli);
1042		/*
1043		 * MLDv2 Group-specific or Group-and-source-specific Query.
1044		 *
1045		 * Group-source-specific queries are throttled on
1046		 * a per-group basis to defeat denial-of-service attempts.
1047		 * Queries for groups we are not a member of on this
1048		 * link are simply ignored.
1049		 */
1050		in6_multihead_lock_shared();
1051		IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1052		in6_multihead_lock_done();
1053		if (inm == NULL)
1054			return (0);
1055
1056		IN6M_LOCK(inm);
1057#ifndef __APPLE__
1058		/* TODO: need ratecheck equivalent */
1059		if (nsrc > 0) {
1060			if (!ratecheck(&inm->in6m_lastgsrtv,
1061			    &mld_gsrdelay)) {
1062				MLD_PRINTF(("%s: GS query throttled.\n",
1063				    __func__));
1064				IN6M_UNLOCK(inm);
1065				IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1066				return (0);
1067			}
1068		}
1069#endif
1070		MLD_PRINTF(("process v2 group query on ifp %p(%s%d)\n",
1071		     ifp, ifp->if_name, ifp->if_unit));
1072		/*
1073		 * If there is a pending General Query response
1074		 * scheduled sooner than the selected delay, no
1075		 * further report need be scheduled.
1076		 * Otherwise, prepare to respond to the
1077		 * group-specific or group-and-source query.
1078		 */
1079		MLI_LOCK(mli);
1080		if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1081			MLI_UNLOCK(mli);
1082			mld_v2_process_group_query(inm, timer, m, off);
1083		} else {
1084			MLI_UNLOCK(mli);
1085		}
1086		IN6M_UNLOCK(inm);
1087		IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1088		/* XXX Clear embedded scope ID as userland won't expect it. */
1089		in6_clearscope(&mld->mld_addr);
1090	}
1091
1092	return (0);
1093}
1094
1095/*
1096 * Process a recieved MLDv2 group-specific or group-and-source-specific
1097 * query.
1098 * Return <0 if any error occured. Currently this is ignored.
1099 */
1100static int
1101mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0,
1102    const int off)
1103{
1104	struct mldv2_query	*mld;
1105	int			 retval;
1106	uint16_t		 nsrc;
1107
1108	IN6M_LOCK_ASSERT_HELD(inm);
1109
1110	retval = 0;
1111	mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
1112
1113	switch (inm->in6m_state) {
1114	case MLD_NOT_MEMBER:
1115	case MLD_SILENT_MEMBER:
1116	case MLD_SLEEPING_MEMBER:
1117	case MLD_LAZY_MEMBER:
1118	case MLD_AWAKENING_MEMBER:
1119	case MLD_IDLE_MEMBER:
1120	case MLD_LEAVING_MEMBER:
1121		return (retval);
1122		break;
1123	case MLD_REPORTING_MEMBER:
1124	case MLD_G_QUERY_PENDING_MEMBER:
1125	case MLD_SG_QUERY_PENDING_MEMBER:
1126		break;
1127	}
1128
1129	nsrc = ntohs(mld->mld_numsrc);
1130
1131	/*
1132	 * Deal with group-specific queries upfront.
1133	 * If any group query is already pending, purge any recorded
1134	 * source-list state if it exists, and schedule a query response
1135	 * for this group-specific query.
1136	 */
1137	if (nsrc == 0) {
1138		if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
1139		    inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
1140			in6m_clear_recorded(inm);
1141			timer = min(inm->in6m_timer, timer);
1142		}
1143		inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
1144		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1145		current_state_timers_running6 = 1;
1146		return (retval);
1147	}
1148
1149	/*
1150	 * Deal with the case where a group-and-source-specific query has
1151	 * been received but a group-specific query is already pending.
1152	 */
1153	if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
1154		timer = min(inm->in6m_timer, timer);
1155		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1156		current_state_timers_running6 = 1;
1157		return (retval);
1158	}
1159
1160	/*
1161	 * Finally, deal with the case where a group-and-source-specific
1162	 * query has been received, where a response to a previous g-s-r
1163	 * query exists, or none exists.
1164	 * In this case, we need to parse the source-list which the Querier
1165	 * has provided us with and check if we have any source list filter
1166	 * entries at T1 for these sources. If we do not, there is no need
1167	 * schedule a report and the query may be dropped.
1168	 * If we do, we must record them and schedule a current-state
1169	 * report for those sources.
1170	 */
1171	if (inm->in6m_nsrc > 0) {
1172		struct mbuf		*m;
1173		uint8_t			*sp;
1174		int			 i, nrecorded;
1175		int			 soff;
1176
1177		m = m0;
1178		soff = off + sizeof(struct mldv2_query);
1179		nrecorded = 0;
1180		for (i = 0; i < nsrc; i++) {
1181			sp = mtod(m, uint8_t *) + soff;
1182			retval = in6m_record_source(inm,
1183			    (const struct in6_addr *)(void *)sp);
1184			if (retval < 0)
1185				break;
1186			nrecorded += retval;
1187			soff += sizeof(struct in6_addr);
1188			if (soff >= m->m_len) {
1189				soff = soff - m->m_len;
1190				m = m->m_next;
1191				if (m == NULL)
1192					break;
1193			}
1194		}
1195		if (nrecorded > 0) {
1196			MLD_PRINTF(( "%s: schedule response to SG query\n",
1197			    __func__));
1198			inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
1199			inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1200			current_state_timers_running6 = 1;
1201		}
1202	}
1203
1204	return (retval);
1205}
1206
1207/*
1208 * Process a received MLDv1 host membership report.
1209 * Assumes mld points to mld_hdr in pulled up mbuf chain.
1210 *
1211 * NOTE: Can't be fully const correct as we temporarily embed scope ID in
1212 * mld_addr. This is OK as we own the mbuf chain.
1213 */
1214static int
1215mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
1216    /*const*/ struct mld_hdr *mld)
1217{
1218	struct in6_addr		 src, dst;
1219	struct in6_ifaddr	*ia;
1220	struct in6_multi	*inm;
1221
1222	if (!mld_v1enable) {
1223		MLD_PRINTF(("ignore v1 report %s on ifp %p(%s%d)\n",
1224		    ip6_sprintf(&mld->mld_addr),
1225		    ifp, ifp->if_name, ifp->if_unit));
1226		return (0);
1227	}
1228
1229	if (ifp->if_flags & IFF_LOOPBACK)
1230		return (0);
1231
1232	/*
1233	 * MLDv1 reports must originate from a host's link-local address,
1234	 * or the unspecified address (when booting).
1235	 */
1236	src = ip6->ip6_src;
1237	in6_clearscope(&src);
1238	if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
1239		MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
1240		    ip6_sprintf(&ip6->ip6_src),
1241		    ifp, ifp->if_name, ifp->if_unit));
1242		return (EINVAL);
1243	}
1244
1245	/*
1246	 * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
1247	 * group, and must be directed to the group itself.
1248	 */
1249	dst = ip6->ip6_dst;
1250	in6_clearscope(&dst);
1251	if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
1252	    !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
1253		MLD_PRINTF(("ignore v1 query dst %s on ifp %p(%s%d)\n",
1254		    ip6_sprintf(&ip6->ip6_dst),
1255		    ifp, ifp->if_name, ifp->if_unit));
1256		return (EINVAL);
1257	}
1258
1259	/*
1260	 * Make sure we don't hear our own membership report, as fast
1261	 * leave requires knowing that we are the only member of a
1262	 * group. Assume we used the link-local address if available,
1263	 * otherwise look for ::.
1264	 *
1265	 * XXX Note that scope ID comparison is needed for the address
1266	 * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
1267	 * performed for the on-wire address.
1268	 */
1269	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1270	if (ia != NULL) {
1271		IFA_LOCK(&ia->ia_ifa);
1272		if ((IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia)))){
1273			IFA_UNLOCK(&ia->ia_ifa);
1274			IFA_REMREF(&ia->ia_ifa);
1275			return (0);
1276		}
1277		IFA_UNLOCK(&ia->ia_ifa);
1278		IFA_REMREF(&ia->ia_ifa);
1279	} else if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
1280		return (0);
1281	}
1282
1283	MLD_PRINTF(("process v1 report %s on ifp %p(%s%d)\n",
1284	    ip6_sprintf(&mld->mld_addr), ifp, ifp->if_name, ifp->if_unit));
1285
1286	/*
1287	 * Embed scope ID of receiving interface in MLD query for lookup
1288	 * whilst we don't hold other locks (due to KAME locking lameness).
1289	 */
1290	if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr))
1291		in6_setscope(&mld->mld_addr, ifp, NULL);
1292
1293	/*
1294	 * MLDv1 report suppression.
1295	 * If we are a member of this group, and our membership should be
1296	 * reported, and our group timer is pending or about to be reset,
1297	 * stop our group timer by transitioning to the 'lazy' state.
1298	 */
1299	in6_multihead_lock_shared();
1300	IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1301	in6_multihead_lock_done();
1302
1303	if (inm != NULL) {
1304		struct mld_ifinfo *mli;
1305
1306		IN6M_LOCK(inm);
1307		mli = inm->in6m_mli;
1308		VERIFY(mli != NULL);
1309
1310		MLI_LOCK(mli);
1311		/*
1312		 * If we are in MLDv2 host mode, do not allow the
1313		 * other host's MLDv1 report to suppress our reports.
1314		 */
1315		if (mli->mli_version == MLD_VERSION_2) {
1316			MLI_UNLOCK(mli);
1317			IN6M_UNLOCK(inm);
1318			IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1319			goto out;
1320		}
1321		MLI_UNLOCK(mli);
1322
1323		inm->in6m_timer = 0;
1324
1325		switch (inm->in6m_state) {
1326		case MLD_NOT_MEMBER:
1327		case MLD_SILENT_MEMBER:
1328		case MLD_SLEEPING_MEMBER:
1329			break;
1330		case MLD_REPORTING_MEMBER:
1331		case MLD_IDLE_MEMBER:
1332		case MLD_AWAKENING_MEMBER:
1333			MLD_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n",
1334			    ip6_sprintf(&mld->mld_addr),
1335			    ifp, ifp->if_name, ifp->if_unit));
1336		case MLD_LAZY_MEMBER:
1337			inm->in6m_state = MLD_LAZY_MEMBER;
1338			break;
1339		case MLD_G_QUERY_PENDING_MEMBER:
1340		case MLD_SG_QUERY_PENDING_MEMBER:
1341		case MLD_LEAVING_MEMBER:
1342			break;
1343		}
1344		IN6M_UNLOCK(inm);
1345		IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1346	}
1347
1348out:
1349	/* XXX Clear embedded scope ID as userland won't expect it. */
1350	in6_clearscope(&mld->mld_addr);
1351
1352	return (0);
1353}
1354
1355/*
1356 * MLD input path.
1357 *
1358 * Assume query messages which fit in a single ICMPv6 message header
1359 * have been pulled up.
1360 * Assume that userland will want to see the message, even if it
1361 * otherwise fails kernel input validation; do not free it.
1362 * Pullup may however free the mbuf chain m if it fails.
1363 *
1364 * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
1365 */
1366int
1367mld_input(struct mbuf *m, int off, int icmp6len)
1368{
1369	struct ifnet	*ifp;
1370	struct ip6_hdr	*ip6;
1371	struct mld_hdr	*mld;
1372	int		 mldlen;
1373
1374	MLD_PRINTF(("%s: called w/mbuf (%p,%d)\n", __func__, m, off));
1375
1376	ifp = m->m_pkthdr.rcvif;
1377
1378	ip6 = mtod(m, struct ip6_hdr *);
1379
1380	/* Pullup to appropriate size. */
1381	mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1382	if (mld->mld_type == MLD_LISTENER_QUERY &&
1383	    icmp6len >= sizeof(struct mldv2_query)) {
1384		mldlen = sizeof(struct mldv2_query);
1385	} else {
1386		mldlen = sizeof(struct mld_hdr);
1387	}
1388	IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
1389	if (mld == NULL) {
1390		icmp6stat.icp6s_badlen++;
1391		return (IPPROTO_DONE);
1392	}
1393
1394	/*
1395	 * Userland needs to see all of this traffic for implementing
1396	 * the endpoint discovery portion of multicast routing.
1397	 */
1398	switch (mld->mld_type) {
1399	case MLD_LISTENER_QUERY:
1400		icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
1401		if (icmp6len == sizeof(struct mld_hdr)) {
1402			if (mld_v1_input_query(ifp, ip6, mld) != 0)
1403				return (0);
1404		} else if (icmp6len >= sizeof(struct mldv2_query)) {
1405			if (mld_v2_input_query(ifp, ip6, m, off,
1406			    icmp6len) != 0)
1407				return (0);
1408		}
1409		break;
1410	case MLD_LISTENER_REPORT:
1411		icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1412		if (mld_v1_input_report(ifp, ip6, mld) != 0)
1413			return (0);
1414		break;
1415	case MLDV2_LISTENER_REPORT:
1416		icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1417		break;
1418	case MLD_LISTENER_DONE:
1419		icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
1420		break;
1421	default:
1422		break;
1423	}
1424
1425	return (0);
1426}
1427
1428/*
1429 * MLD6 slowtimo handler.
1430 * Combiles both the slow and fast timer into one. We loose some responsivness but
1431 * allows the system to avoid having a pr_fasttimo, thus allowing for power savings.
1432 */
1433void
1434mld_slowtimo(void)
1435{
1436	struct ifqueue		 scq;	/* State-change packets */
1437	struct ifqueue		 qrq;	/* Query response packets */
1438	struct ifnet		*ifp;
1439	struct mld_ifinfo	*mli;
1440	struct in6_multi	*inm;
1441	int			 uri_fasthz = 0;
1442	SLIST_HEAD(, in6_multi)	in6m_dthead;
1443
1444	SLIST_INIT(&in6m_dthead);
1445
1446	MLD_LOCK();
1447
1448	LIST_FOREACH(mli, &mli_head, mli_link) {
1449		MLI_LOCK(mli);
1450		mld_v1_process_querier_timers(mli);
1451		MLI_UNLOCK(mli);
1452	}
1453
1454	/*
1455	 * Quick check to see if any work needs to be done, in order to
1456	 * minimize the overhead of fasttimo processing.
1457	 */
1458	if (!current_state_timers_running6 &&
1459	    !interface_timers_running6 &&
1460	    !state_change_timers_running6) {
1461		MLD_UNLOCK();
1462		return;
1463	}
1464
1465	/*
1466	 * MLDv2 General Query response timer processing.
1467	 */
1468	if (interface_timers_running6) {
1469#if 0
1470		MLD_PRINTF(("%s: interface timers running\n", __func__));
1471#endif
1472		interface_timers_running6 = 0;
1473		LIST_FOREACH(mli, &mli_head, mli_link) {
1474			MLI_LOCK(mli);
1475			if (mli->mli_v2_timer == 0) {
1476				/* Do nothing. */
1477			} else if (--mli->mli_v2_timer == 0) {
1478				mld_v2_dispatch_general_query(mli);
1479			} else {
1480				interface_timers_running6 = 1;
1481			}
1482			MLI_UNLOCK(mli);
1483		}
1484	}
1485
1486	if (!current_state_timers_running6 &&
1487	    !state_change_timers_running6)
1488		goto out_locked;
1489
1490	current_state_timers_running6 = 0;
1491	state_change_timers_running6 = 0;
1492#if 0
1493	MLD_PRINTF(("%s: state change timers running\n", __func__));
1494#endif
1495
1496	memset(&qrq, 0, sizeof(struct ifqueue));
1497	qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS;
1498
1499	memset(&scq, 0, sizeof(struct ifqueue));
1500	scq.ifq_maxlen = MLD_MAX_STATE_CHANGE_PACKETS;
1501
1502	/*
1503	 * MLD host report and state-change timer processing.
1504	 * Note: Processing a v2 group timer may remove a node.
1505	 */
1506	LIST_FOREACH(mli, &mli_head, mli_link) {
1507		struct in6_multistep step;
1508
1509		MLI_LOCK(mli);
1510		ifp = mli->mli_ifp;
1511		uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * PR_SLOWHZ);
1512		MLI_UNLOCK(mli);
1513
1514		in6_multihead_lock_shared();
1515		IN6_FIRST_MULTI(step, inm);
1516		while (inm != NULL) {
1517			IN6M_LOCK(inm);
1518			if (inm->in6m_ifp != ifp)
1519				goto next;
1520
1521			MLI_LOCK(mli);
1522			switch (mli->mli_version) {
1523			case MLD_VERSION_1:
1524				mld_v1_process_group_timer(inm,
1525				    mli->mli_version);
1526				break;
1527			case MLD_VERSION_2:
1528				mld_v2_process_group_timers(mli, &qrq,
1529				    &scq, inm, uri_fasthz);
1530				break;
1531			}
1532			MLI_UNLOCK(mli);
1533next:
1534			IN6M_UNLOCK(inm);
1535			IN6_NEXT_MULTI(step, inm);
1536		}
1537		in6_multihead_lock_done();
1538
1539		MLI_LOCK(mli);
1540		if (mli->mli_version == MLD_VERSION_1) {
1541			mld_dispatch_queue(mli, &mli->mli_v1q, 0);
1542		} else if (mli->mli_version == MLD_VERSION_2) {
1543			MLI_UNLOCK(mli);
1544			mld_dispatch_queue(NULL, &qrq, 0);
1545			mld_dispatch_queue(NULL, &scq, 0);
1546			VERIFY(qrq.ifq_len == 0);
1547			VERIFY(scq.ifq_len == 0);
1548			MLI_LOCK(mli);
1549		}
1550		/*
1551		 * In case there are still any pending membership reports
1552		 * which didn't get drained at version change time.
1553		 */
1554		IF_DRAIN(&mli->mli_v1q);
1555		/*
1556		 * Release all deferred inm records, and drain any locally
1557		 * enqueued packets; do it even if the current MLD version
1558		 * for the link is no longer MLDv2, in order to handle the
1559		 * version change case.
1560		 */
1561		mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
1562		VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
1563		MLI_UNLOCK(mli);
1564
1565		IF_DRAIN(&qrq);
1566		IF_DRAIN(&scq);
1567	}
1568
1569out_locked:
1570	MLD_UNLOCK();
1571
1572	/* Now that we're dropped all locks, release detached records */
1573	MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
1574}
1575
1576/*
1577 * Free the in6_multi reference(s) for this MLD lifecycle.
1578 *
1579 * Caller must be holding mli_lock.
1580 */
1581static void
1582mld_flush_relq(struct mld_ifinfo *mli, struct mld_in6m_relhead *in6m_dthead)
1583{
1584	struct in6_multi *inm;
1585
1586again:
1587	MLI_LOCK_ASSERT_HELD(mli);
1588	inm = SLIST_FIRST(&mli->mli_relinmhead);
1589	if (inm != NULL) {
1590		int lastref;
1591
1592		SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
1593		MLI_UNLOCK(mli);
1594
1595		in6_multihead_lock_exclusive();
1596		IN6M_LOCK(inm);
1597		VERIFY(inm->in6m_nrelecnt != 0);
1598		inm->in6m_nrelecnt--;
1599		lastref = in6_multi_detach(inm);
1600		VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
1601		    inm->in6m_reqcnt == 0));
1602		IN6M_UNLOCK(inm);
1603		in6_multihead_lock_done();
1604		/* from mli_relinmhead */
1605		IN6M_REMREF(inm);
1606		/* from in6_multihead_list */
1607		if (lastref) {
1608			/*
1609			 * Defer releasing our final reference, as we
1610			 * are holding the MLD lock at this point, and
1611			 * we could end up with locking issues later on
1612			 * (while issuing SIOCDELMULTI) when this is the
1613			 * final reference count.  Let the caller do it
1614			 * when it is safe.
1615			 */
1616			MLD_ADD_DETACHED_IN6M(in6m_dthead, inm);
1617		}
1618		MLI_LOCK(mli);
1619		goto again;
1620	}
1621}
1622
1623/*
1624 * Update host report group timer.
1625 * Will update the global pending timer flags.
1626 */
1627static void
1628mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version)
1629{
1630#pragma unused(mld_version)
1631	int report_timer_expired;
1632
1633	IN6M_LOCK_ASSERT_HELD(inm);
1634	MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1635
1636	if (inm->in6m_timer == 0) {
1637		report_timer_expired = 0;
1638	} else if (--inm->in6m_timer == 0) {
1639		report_timer_expired = 1;
1640	} else {
1641		current_state_timers_running6 = 1;
1642		return;
1643	}
1644
1645	switch (inm->in6m_state) {
1646	case MLD_NOT_MEMBER:
1647	case MLD_SILENT_MEMBER:
1648	case MLD_IDLE_MEMBER:
1649	case MLD_LAZY_MEMBER:
1650	case MLD_SLEEPING_MEMBER:
1651	case MLD_AWAKENING_MEMBER:
1652		break;
1653	case MLD_REPORTING_MEMBER:
1654		if (report_timer_expired) {
1655			inm->in6m_state = MLD_IDLE_MEMBER;
1656			(void) mld_v1_transmit_report(inm,
1657			     MLD_LISTENER_REPORT);
1658			IN6M_LOCK_ASSERT_HELD(inm);
1659			MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1660		}
1661		break;
1662	case MLD_G_QUERY_PENDING_MEMBER:
1663	case MLD_SG_QUERY_PENDING_MEMBER:
1664	case MLD_LEAVING_MEMBER:
1665		break;
1666	}
1667}
1668
1669/*
1670 * Update a group's timers for MLDv2.
1671 * Will update the global pending timer flags.
1672 * Note: Unlocked read from mli.
1673 */
1674static void
1675mld_v2_process_group_timers(struct mld_ifinfo *mli,
1676    struct ifqueue *qrq, struct ifqueue *scq,
1677    struct in6_multi *inm, const int uri_fasthz)
1678{
1679	int query_response_timer_expired;
1680	int state_change_retransmit_timer_expired;
1681
1682	IN6M_LOCK_ASSERT_HELD(inm);
1683	MLI_LOCK_ASSERT_HELD(mli);
1684	VERIFY(mli == inm->in6m_mli);
1685
1686	query_response_timer_expired = 0;
1687	state_change_retransmit_timer_expired = 0;
1688
1689	/*
1690	 * During a transition from compatibility mode back to MLDv2,
1691	 * a group record in REPORTING state may still have its group
1692	 * timer active. This is a no-op in this function; it is easier
1693	 * to deal with it here than to complicate the slow-timeout path.
1694	 */
1695	if (inm->in6m_timer == 0) {
1696		query_response_timer_expired = 0;
1697	} else if (--inm->in6m_timer == 0) {
1698		query_response_timer_expired = 1;
1699	} else {
1700		current_state_timers_running6 = 1;
1701	}
1702
1703	if (inm->in6m_sctimer == 0) {
1704		state_change_retransmit_timer_expired = 0;
1705	} else if (--inm->in6m_sctimer == 0) {
1706		state_change_retransmit_timer_expired = 1;
1707	} else {
1708		state_change_timers_running6 = 1;
1709	}
1710
1711	/* We are in fasttimo, so be quick about it. */
1712	if (!state_change_retransmit_timer_expired &&
1713	    !query_response_timer_expired)
1714		return;
1715
1716	switch (inm->in6m_state) {
1717	case MLD_NOT_MEMBER:
1718	case MLD_SILENT_MEMBER:
1719	case MLD_SLEEPING_MEMBER:
1720	case MLD_LAZY_MEMBER:
1721	case MLD_AWAKENING_MEMBER:
1722	case MLD_IDLE_MEMBER:
1723		break;
1724	case MLD_G_QUERY_PENDING_MEMBER:
1725	case MLD_SG_QUERY_PENDING_MEMBER:
1726		/*
1727		 * Respond to a previously pending Group-Specific
1728		 * or Group-and-Source-Specific query by enqueueing
1729		 * the appropriate Current-State report for
1730		 * immediate transmission.
1731		 */
1732		if (query_response_timer_expired) {
1733			int retval;
1734
1735			retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
1736			    (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
1737			    0);
1738			MLD_PRINTF(("%s: enqueue record = %d\n",
1739			    __func__, retval));
1740			inm->in6m_state = MLD_REPORTING_MEMBER;
1741			in6m_clear_recorded(inm);
1742		}
1743		/* FALLTHROUGH */
1744	case MLD_REPORTING_MEMBER:
1745	case MLD_LEAVING_MEMBER:
1746		if (state_change_retransmit_timer_expired) {
1747			/*
1748			 * State-change retransmission timer fired.
1749			 * If there are any further pending retransmissions,
1750			 * set the global pending state-change flag, and
1751			 * reset the timer.
1752			 */
1753			if (--inm->in6m_scrv > 0) {
1754				inm->in6m_sctimer = uri_fasthz;
1755				state_change_timers_running6 = 1;
1756			}
1757			/*
1758			 * Retransmit the previously computed state-change
1759			 * report. If there are no further pending
1760			 * retransmissions, the mbuf queue will be consumed.
1761			 * Update T0 state to T1 as we have now sent
1762			 * a state-change.
1763			 */
1764			(void) mld_v2_merge_state_changes(inm, scq);
1765
1766			in6m_commit(inm);
1767			MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
1768			    ip6_sprintf(&inm->in6m_addr),
1769			    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
1770
1771			/*
1772			 * If we are leaving the group for good, make sure
1773			 * we release MLD's reference to it.
1774			 * This release must be deferred using a SLIST,
1775			 * as we are called from a loop which traverses
1776			 * the in_ifmultiaddr TAILQ.
1777			 */
1778			if (inm->in6m_state == MLD_LEAVING_MEMBER &&
1779			    inm->in6m_scrv == 0) {
1780				inm->in6m_state = MLD_NOT_MEMBER;
1781				/*
1782				 * A reference has already been held in
1783				 * mld_final_leave() for this inm, so
1784				 * no need to hold another one.  We also
1785				 * bumped up its request count then, so
1786				 * that it stays in in6_multihead.  Both
1787				 * of them will be released when it is
1788				 * dequeued later on.
1789				 */
1790				VERIFY(inm->in6m_nrelecnt != 0);
1791				SLIST_INSERT_HEAD(&mli->mli_relinmhead,
1792				    inm, in6m_nrele);
1793			}
1794		}
1795		break;
1796	}
1797}
1798
1799/*
1800 * Switch to a different version on the given interface,
1801 * as per Section 9.12.
1802 */
1803static void
1804mld_set_version(struct mld_ifinfo *mli, const int mld_version)
1805{
1806	int old_version_timer;
1807
1808	MLI_LOCK_ASSERT_HELD(mli);
1809
1810	MLD_PRINTF(("%s: switching to v%d on ifp %p(%s%d)\n", __func__,
1811	    mld_version, mli->mli_ifp, mli->mli_ifp->if_name,
1812	    mli->mli_ifp->if_unit));
1813
1814	if (mld_version == MLD_VERSION_1) {
1815		/*
1816		 * Compute the "Older Version Querier Present" timer as per
1817		 * Section 9.12.
1818		 */
1819		old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
1820		old_version_timer *= PR_SLOWHZ;
1821		mli->mli_v1_timer = old_version_timer;
1822	}
1823
1824	if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
1825		mli->mli_version = MLD_VERSION_1;
1826		mld_v2_cancel_link_timers(mli);
1827	}
1828
1829	MLI_LOCK_ASSERT_HELD(mli);
1830}
1831
1832/*
1833 * Cancel pending MLDv2 timers for the given link and all groups
1834 * joined on it; state-change, general-query, and group-query timers.
1835 */
1836static void
1837mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
1838{
1839	struct ifnet		*ifp;
1840	struct in6_multi	*inm;
1841	struct in6_multistep	step;
1842
1843	MLI_LOCK_ASSERT_HELD(mli);
1844
1845	MLD_PRINTF(("%s: cancel v2 timers on ifp %p(%s%d)\n", __func__,
1846	    mli->mli_ifp, mli->mli_ifp->if_name, mli->mli_ifp->if_unit));
1847
1848	/*
1849	 * Fast-track this potentially expensive operation
1850	 * by checking all the global 'timer pending' flags.
1851	 */
1852	if (!interface_timers_running6 &&
1853	    !state_change_timers_running6 &&
1854	    !current_state_timers_running6)
1855		return;
1856
1857	mli->mli_v2_timer = 0;
1858	ifp = mli->mli_ifp;
1859	MLI_UNLOCK(mli);
1860
1861	in6_multihead_lock_shared();
1862	IN6_FIRST_MULTI(step, inm);
1863	while (inm != NULL) {
1864		IN6M_LOCK(inm);
1865		if (inm->in6m_ifp != ifp)
1866			goto next;
1867
1868		switch (inm->in6m_state) {
1869		case MLD_NOT_MEMBER:
1870		case MLD_SILENT_MEMBER:
1871		case MLD_IDLE_MEMBER:
1872		case MLD_LAZY_MEMBER:
1873		case MLD_SLEEPING_MEMBER:
1874		case MLD_AWAKENING_MEMBER:
1875			break;
1876		case MLD_LEAVING_MEMBER:
1877			/*
1878			 * If we are leaving the group and switching
1879			 * version, we need to release the final
1880			 * reference held for issuing the INCLUDE {}.
1881			 * During mld_final_leave(), we bumped up both the
1882			 * request and reference counts.  Since we cannot
1883			 * call in6_multi_detach() here, defer this task to
1884			 * the timer routine.
1885			 */
1886			VERIFY(inm->in6m_nrelecnt != 0);
1887			MLI_LOCK(mli);
1888			SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
1889			    in6m_nrele);
1890			MLI_UNLOCK(mli);
1891			/* FALLTHROUGH */
1892		case MLD_G_QUERY_PENDING_MEMBER:
1893		case MLD_SG_QUERY_PENDING_MEMBER:
1894			in6m_clear_recorded(inm);
1895			/* FALLTHROUGH */
1896		case MLD_REPORTING_MEMBER:
1897			inm->in6m_sctimer = 0;
1898			inm->in6m_timer = 0;
1899			inm->in6m_state = MLD_REPORTING_MEMBER;
1900			/*
1901			 * Free any pending MLDv2 state-change records.
1902			 */
1903			IF_DRAIN(&inm->in6m_scq);
1904			break;
1905		}
1906next:
1907		IN6M_UNLOCK(inm);
1908		IN6_NEXT_MULTI(step, inm);
1909	}
1910	in6_multihead_lock_done();
1911
1912	MLI_LOCK(mli);
1913}
1914
1915/*
1916 * Update the Older Version Querier Present timers for a link.
1917 * See Section 9.12 of RFC 3810.
1918 */
1919static void
1920mld_v1_process_querier_timers(struct mld_ifinfo *mli)
1921{
1922	MLI_LOCK_ASSERT_HELD(mli);
1923
1924	if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) {
1925		/*
1926		 * MLDv1 Querier Present timer expired; revert to MLDv2.
1927		 */
1928		MLD_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
1929		    __func__, mli->mli_version, MLD_VERSION_2,
1930		    mli->mli_ifp, mli->mli_ifp->if_name, mli->mli_ifp->if_unit));
1931		mli->mli_version = MLD_VERSION_2;
1932	}
1933}
1934
1935/*
1936 * Transmit an MLDv1 report immediately.
1937 */
1938static int
1939mld_v1_transmit_report(struct in6_multi *in6m, const int type)
1940{
1941	struct ifnet		*ifp;
1942	struct in6_ifaddr	*ia;
1943	struct ip6_hdr		*ip6;
1944	struct mbuf		*mh, *md;
1945	struct mld_hdr		*mld;
1946	int			error = 0;
1947
1948	IN6M_LOCK_ASSERT_HELD(in6m);
1949	MLI_LOCK_ASSERT_HELD(in6m->in6m_mli);
1950
1951	ifp = in6m->in6m_ifp;
1952	/* ia may be NULL if link-local address is tentative. */
1953	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1954
1955	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
1956	if (mh == NULL) {
1957		if (ia != NULL)
1958			IFA_REMREF(&ia->ia_ifa);
1959		return (ENOMEM);
1960	}
1961	MGET(md, M_DONTWAIT, MT_DATA);
1962	if (md == NULL) {
1963		m_free(mh);
1964		if (ia != NULL)
1965			IFA_REMREF(&ia->ia_ifa);
1966		return (ENOMEM);
1967	}
1968	mh->m_next = md;
1969
1970	/*
1971	 * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
1972	 * that ether_output() does not need to allocate another mbuf
1973	 * for the header in the most common case.
1974	 */
1975	MH_ALIGN(mh, sizeof(struct ip6_hdr));
1976	mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
1977	mh->m_len = sizeof(struct ip6_hdr);
1978
1979	ip6 = mtod(mh, struct ip6_hdr *);
1980	ip6->ip6_flow = 0;
1981	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1982	ip6->ip6_vfc |= IPV6_VERSION;
1983	ip6->ip6_nxt = IPPROTO_ICMPV6;
1984	if (ia != NULL)
1985		IFA_LOCK(&ia->ia_ifa);
1986	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
1987	if (ia != NULL) {
1988		IFA_UNLOCK(&ia->ia_ifa);
1989		IFA_REMREF(&ia->ia_ifa);
1990		ia = NULL;
1991	}
1992	ip6->ip6_dst = in6m->in6m_addr;
1993
1994	md->m_len = sizeof(struct mld_hdr);
1995	mld = mtod(md, struct mld_hdr *);
1996	mld->mld_type = type;
1997	mld->mld_code = 0;
1998	mld->mld_cksum = 0;
1999	mld->mld_maxdelay = 0;
2000	mld->mld_reserved = 0;
2001	mld->mld_addr = in6m->in6m_addr;
2002	in6_clearscope(&mld->mld_addr);
2003	mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
2004	    sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
2005
2006	mh->m_flags |= M_MLDV1;
2007
2008	/*
2009	 * Due to the fact that at this point we are possibly holding
2010	 * in6_multihead_lock in shared or exclusive mode, we can't call
2011	 * mld_dispatch_packet() here since that will eventually call
2012	 * ip6_output(), which will try to lock in6_multihead_lock and cause
2013	 * a deadlock.
2014	 * Instead we defer the work to the mld_slowtimo() thread, thus
2015	 * avoiding unlocking in_multihead_lock here.
2016	 */
2017        if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) {
2018                MLD_PRINTF(("%s: v1 outbound queue full\n", __func__));
2019                error = ENOMEM;
2020                m_freem(mh);
2021        } else
2022                IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh);
2023
2024	return (error);
2025}
2026
2027/*
2028 * Process a state change from the upper layer for the given IPv6 group.
2029 *
2030 * Each socket holds a reference on the in6_multi in its own ip_moptions.
2031 * The socket layer will have made the necessary updates to.the group
2032 * state, it is now up to MLD to issue a state change report if there
2033 * has been any change between T0 (when the last state-change was issued)
2034 * and T1 (now).
2035 *
2036 * We use the MLDv2 state machine at group level. The MLd module
2037 * however makes the decision as to which MLD protocol version to speak.
2038 * A state change *from* INCLUDE {} always means an initial join.
2039 * A state change *to* INCLUDE {} always means a final leave.
2040 *
2041 * If delay is non-zero, and the state change is an initial multicast
2042 * join, the state change report will be delayed by 'delay' ticks
2043 * in units of PR_FASTHZ if MLDv1 is active on the link; otherwise
2044 * the initial MLDv2 state change report will be delayed by whichever
2045 * is sooner, a pending state-change timer or delay itself.
2046 */
2047int
2048mld_change_state(struct in6_multi *inm, const int delay)
2049{
2050	struct mld_ifinfo *mli;
2051	struct ifnet *ifp;
2052	int error = 0;
2053
2054	IN6M_LOCK_ASSERT_HELD(inm);
2055	VERIFY(inm->in6m_mli != NULL);
2056	MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli);
2057
2058	/*
2059	 * Try to detect if the upper layer just asked us to change state
2060	 * for an interface which has now gone away.
2061	 */
2062	VERIFY(inm->in6m_ifma != NULL);
2063	ifp = inm->in6m_ifma->ifma_ifp;
2064	/*
2065	 * Sanity check that netinet6's notion of ifp is the same as net's.
2066	 */
2067	VERIFY(inm->in6m_ifp == ifp);
2068
2069	mli = MLD_IFINFO(ifp);
2070	VERIFY(mli != NULL);
2071
2072	/*
2073	 * If we detect a state transition to or from MCAST_UNDEFINED
2074	 * for this group, then we are starting or finishing an MLD
2075	 * life cycle for this group.
2076	 */
2077	if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
2078		MLD_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2079		    inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode));
2080		if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
2081			MLD_PRINTF(("%s: initial join\n", __func__));
2082			error = mld_initial_join(inm, mli, delay);
2083			goto out;
2084		} else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
2085			MLD_PRINTF(("%s: final leave\n", __func__));
2086			mld_final_leave(inm, mli);
2087			goto out;
2088		}
2089	} else {
2090		MLD_PRINTF(("%s: filter set change\n", __func__));
2091	}
2092
2093	error = mld_handle_state_change(inm, mli);
2094
2095out:
2096	return (error);
2097}
2098
2099/*
2100 * Perform the initial join for an MLD group.
2101 *
2102 * When joining a group:
2103 *  If the group should have its MLD traffic suppressed, do nothing.
2104 *  MLDv1 starts sending MLDv1 host membership reports.
2105 *  MLDv2 will schedule an MLDv2 state-change report containing the
2106 *  initial state of the membership.
2107 *
2108 * If the delay argument is non-zero, then we must delay sending the
2109 * initial state change for delay ticks (in units of PR_FASTHZ).
2110 */
2111static int
2112mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
2113    const int delay)
2114{
2115	struct ifnet		*ifp;
2116	struct ifqueue		*ifq;
2117	int			 error, retval, syncstates;
2118	int			 odelay;
2119
2120	IN6M_LOCK_ASSERT_HELD(inm);
2121	MLI_LOCK_ASSERT_NOTHELD(mli);
2122
2123	MLD_PRINTF(("%s: initial join %s on ifp %p(%s%d)\n",
2124	    __func__, ip6_sprintf(&inm->in6m_addr),
2125	    inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2126
2127	error = 0;
2128	syncstates = 1;
2129
2130	ifp = inm->in6m_ifp;
2131
2132	MLI_LOCK(mli);
2133	VERIFY(mli->mli_ifp == ifp);
2134
2135	/*
2136	 * Groups joined on loopback or marked as 'not reported',
2137	 * enter the MLD_SILENT_MEMBER state and
2138	 * are never reported in any protocol exchanges.
2139	 * All other groups enter the appropriate state machine
2140	 * for the version in use on this link.
2141	 * A link marked as MLIF_SILENT causes MLD to be completely
2142	 * disabled for the link.
2143	 */
2144	if ((ifp->if_flags & IFF_LOOPBACK) ||
2145	    (mli->mli_flags & MLIF_SILENT) ||
2146	    !mld_is_addr_reported(&inm->in6m_addr)) {
2147		MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2148		    __func__));
2149		inm->in6m_state = MLD_SILENT_MEMBER;
2150		inm->in6m_timer = 0;
2151	} else {
2152		/*
2153		 * Deal with overlapping in6_multi lifecycle.
2154		 * If this group was LEAVING, then make sure
2155		 * we drop the reference we picked up to keep the
2156		 * group around for the final INCLUDE {} enqueue.
2157		 * Since we cannot call in6_multi_detach() here,
2158		 * defer this task to the timer routine.
2159		 */
2160		if (mli->mli_version == MLD_VERSION_2 &&
2161		    inm->in6m_state == MLD_LEAVING_MEMBER) {
2162			VERIFY(inm->in6m_nrelecnt != 0);
2163			SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2164			    in6m_nrele);
2165		}
2166
2167		inm->in6m_state = MLD_REPORTING_MEMBER;
2168
2169		switch (mli->mli_version) {
2170		case MLD_VERSION_1:
2171			/*
2172			 * If a delay was provided, only use it if
2173			 * it is greater than the delay normally
2174			 * used for an MLDv1 state change report,
2175			 * and delay sending the initial MLDv1 report
2176			 * by not transitioning to the IDLE state.
2177			 */
2178			odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_SLOWHZ);
2179			if (delay) {
2180				inm->in6m_timer = max(delay, odelay);
2181				current_state_timers_running6 = 1;
2182			} else {
2183				inm->in6m_state = MLD_IDLE_MEMBER;
2184				error = mld_v1_transmit_report(inm,
2185				     MLD_LISTENER_REPORT);
2186
2187				IN6M_LOCK_ASSERT_HELD(inm);
2188				MLI_LOCK_ASSERT_HELD(mli);
2189
2190				if (error == 0) {
2191					inm->in6m_timer = odelay;
2192					current_state_timers_running6 = 1;
2193				}
2194			}
2195			break;
2196
2197		case MLD_VERSION_2:
2198			/*
2199			 * Defer update of T0 to T1, until the first copy
2200			 * of the state change has been transmitted.
2201			 */
2202			syncstates = 0;
2203
2204			/*
2205			 * Immediately enqueue a State-Change Report for
2206			 * this interface, freeing any previous reports.
2207			 * Don't kick the timers if there is nothing to do,
2208			 * or if an error occurred.
2209			 */
2210			ifq = &inm->in6m_scq;
2211			IF_DRAIN(ifq);
2212			retval = mld_v2_enqueue_group_record(ifq, inm, 1,
2213			    0, 0, (mli->mli_flags & MLIF_USEALLOW));
2214			MLD_PRINTF(("%s: enqueue record = %d\n",
2215			    __func__, retval));
2216			if (retval <= 0) {
2217				error = retval * -1;
2218				break;
2219			}
2220
2221			/*
2222			 * Schedule transmission of pending state-change
2223			 * report up to RV times for this link. The timer
2224			 * will fire at the next mld_fasttimo (~200ms),
2225			 * giving us an opportunity to merge the reports.
2226			 *
2227			 * If a delay was provided to this function, only
2228			 * use this delay if sooner than the existing one.
2229			 */
2230			VERIFY(mli->mli_rv > 1);
2231			inm->in6m_scrv = mli->mli_rv;
2232			if (delay) {
2233				if (inm->in6m_sctimer > 1) {
2234					inm->in6m_sctimer =
2235					    min(inm->in6m_sctimer, delay);
2236				} else
2237					inm->in6m_sctimer = delay;
2238			} else
2239				inm->in6m_sctimer = 1;
2240			state_change_timers_running6 = 1;
2241
2242			error = 0;
2243			break;
2244		}
2245	}
2246	MLI_UNLOCK(mli);
2247
2248	/*
2249	 * Only update the T0 state if state change is atomic,
2250	 * i.e. we don't need to wait for a timer to fire before we
2251	 * can consider the state change to have been communicated.
2252	 */
2253	if (syncstates) {
2254		in6m_commit(inm);
2255		MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2256		    ip6_sprintf(&inm->in6m_addr),
2257		    inm->in6m_ifp->if_name, ifp->if_unit));
2258	}
2259
2260	return (error);
2261}
2262
2263/*
2264 * Issue an intermediate state change during the life-cycle.
2265 */
2266static int
2267mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
2268{
2269	struct ifnet		*ifp;
2270	int			 retval;
2271
2272	IN6M_LOCK_ASSERT_HELD(inm);
2273	MLI_LOCK_ASSERT_NOTHELD(mli);
2274
2275	MLD_PRINTF(("%s: state change for %s on ifp %p(%s%d)\n",
2276	    __func__, ip6_sprintf(&inm->in6m_addr),
2277	    inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2278
2279	ifp = inm->in6m_ifp;
2280
2281	MLI_LOCK(mli);
2282	VERIFY(mli->mli_ifp == ifp);
2283
2284	if ((ifp->if_flags & IFF_LOOPBACK) ||
2285	    (mli->mli_flags & MLIF_SILENT) ||
2286	    !mld_is_addr_reported(&inm->in6m_addr) ||
2287	    (mli->mli_version != MLD_VERSION_2)) {
2288		MLI_UNLOCK(mli);
2289		if (!mld_is_addr_reported(&inm->in6m_addr)) {
2290			MLD_PRINTF(("%s: not kicking state machine for silent "
2291			    "group\n", __func__));
2292		}
2293		MLD_PRINTF(("%s: nothing to do\n", __func__));
2294		in6m_commit(inm);
2295		MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2296		    ip6_sprintf(&inm->in6m_addr),
2297		    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2298		return (0);
2299	}
2300
2301	IF_DRAIN(&inm->in6m_scq);
2302
2303	retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
2304	    (mli->mli_flags & MLIF_USEALLOW));
2305	MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2306	if (retval <= 0) {
2307		MLI_UNLOCK(mli);
2308		return (-retval);
2309	}
2310	/*
2311	 * If record(s) were enqueued, start the state-change
2312	 * report timer for this group.
2313	 */
2314	inm->in6m_scrv = mli->mli_rv;
2315	inm->in6m_sctimer = 1;
2316	state_change_timers_running6 = 1;
2317	MLI_UNLOCK(mli);
2318
2319	return (0);
2320}
2321
2322/*
2323 * Perform the final leave for a multicast address.
2324 *
2325 * When leaving a group:
2326 *  MLDv1 sends a DONE message, if and only if we are the reporter.
2327 *  MLDv2 enqueues a state-change report containing a transition
2328 *  to INCLUDE {} for immediate transmission.
2329 */
2330static void
2331mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
2332{
2333	int syncstates = 1;
2334
2335	IN6M_LOCK_ASSERT_HELD(inm);
2336	MLI_LOCK_ASSERT_NOTHELD(mli);
2337
2338	MLD_PRINTF(("%s: final leave %s on ifp %p(%s%d)\n",
2339	    __func__, ip6_sprintf(&inm->in6m_addr),
2340	    inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2341
2342	switch (inm->in6m_state) {
2343	case MLD_NOT_MEMBER:
2344	case MLD_SILENT_MEMBER:
2345	case MLD_LEAVING_MEMBER:
2346		/* Already leaving or left; do nothing. */
2347		MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2348		    __func__));
2349		break;
2350	case MLD_REPORTING_MEMBER:
2351	case MLD_IDLE_MEMBER:
2352	case MLD_G_QUERY_PENDING_MEMBER:
2353	case MLD_SG_QUERY_PENDING_MEMBER:
2354		MLI_LOCK(mli);
2355		if (mli->mli_version == MLD_VERSION_1) {
2356			if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
2357			    inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
2358				panic("%s: MLDv2 state reached, not MLDv2 "
2359				    "mode\n", __func__);
2360				/* NOTREACHED */
2361			}
2362			mld_v1_transmit_report(inm, MLD_LISTENER_DONE);
2363
2364			IN6M_LOCK_ASSERT_HELD(inm);
2365			MLI_LOCK_ASSERT_HELD(mli);
2366
2367			inm->in6m_state = MLD_NOT_MEMBER;
2368		} else if (mli->mli_version == MLD_VERSION_2) {
2369			/*
2370			 * Stop group timer and all pending reports.
2371			 * Immediately enqueue a state-change report
2372			 * TO_IN {} to be sent on the next fast timeout,
2373			 * giving us an opportunity to merge reports.
2374			 */
2375			IF_DRAIN(&inm->in6m_scq);
2376			inm->in6m_timer = 0;
2377			inm->in6m_scrv = mli->mli_rv;
2378			MLD_PRINTF(("%s: Leaving %s/%s%d with %d "
2379			    "pending retransmissions.\n", __func__,
2380			    ip6_sprintf(&inm->in6m_addr),
2381			    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit,
2382			    inm->in6m_scrv));
2383			if (inm->in6m_scrv == 0) {
2384				inm->in6m_state = MLD_NOT_MEMBER;
2385				inm->in6m_sctimer = 0;
2386			} else {
2387				int retval;
2388				/*
2389				 * Stick around in the in6_multihead list;
2390				 * the final detach will be issued by
2391				 * mld_v2_process_group_timers() when
2392				 * the retransmit timer expires.
2393				 */
2394				IN6M_ADDREF_LOCKED(inm);
2395				VERIFY(inm->in6m_debug & IFD_ATTACHED);
2396				inm->in6m_reqcnt++;
2397				VERIFY(inm->in6m_reqcnt >= 1);
2398				inm->in6m_nrelecnt++;
2399				VERIFY(inm->in6m_nrelecnt != 0);
2400
2401				retval = mld_v2_enqueue_group_record(
2402				    &inm->in6m_scq, inm, 1, 0, 0,
2403				    (mli->mli_flags & MLIF_USEALLOW));
2404				KASSERT(retval != 0,
2405				    ("%s: enqueue record = %d\n", __func__,
2406				     retval));
2407
2408				inm->in6m_state = MLD_LEAVING_MEMBER;
2409				inm->in6m_sctimer = 1;
2410				state_change_timers_running6 = 1;
2411				syncstates = 0;
2412			}
2413		}
2414		MLI_UNLOCK(mli);
2415		break;
2416	case MLD_LAZY_MEMBER:
2417	case MLD_SLEEPING_MEMBER:
2418	case MLD_AWAKENING_MEMBER:
2419		/* Our reports are suppressed; do nothing. */
2420		break;
2421	}
2422
2423	if (syncstates) {
2424		in6m_commit(inm);
2425		MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2426		    ip6_sprintf(&inm->in6m_addr),
2427		    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2428		inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
2429		MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for %p/%s%d\n",
2430		    __func__, &inm->in6m_addr, inm->in6m_ifp->if_name,
2431		    inm->in6m_ifp->if_unit));
2432	}
2433}
2434
2435/*
2436 * Enqueue an MLDv2 group record to the given output queue.
2437 *
2438 * If is_state_change is zero, a current-state record is appended.
2439 * If is_state_change is non-zero, a state-change report is appended.
2440 *
2441 * If is_group_query is non-zero, an mbuf packet chain is allocated.
2442 * If is_group_query is zero, and if there is a packet with free space
2443 * at the tail of the queue, it will be appended to providing there
2444 * is enough free space.
2445 * Otherwise a new mbuf packet chain is allocated.
2446 *
2447 * If is_source_query is non-zero, each source is checked to see if
2448 * it was recorded for a Group-Source query, and will be omitted if
2449 * it is not both in-mode and recorded.
2450 *
2451 * If use_block_allow is non-zero, state change reports for initial join
2452 * and final leave, on an inclusive mode group with a source list, will be
2453 * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
2454 *
2455 * The function will attempt to allocate leading space in the packet
2456 * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
2457 *
2458 * If successful the size of all data appended to the queue is returned,
2459 * otherwise an error code less than zero is returned, or zero if
2460 * no record(s) were appended.
2461 */
2462static int
2463mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
2464    const int is_state_change, const int is_group_query,
2465    const int is_source_query, const int use_block_allow)
2466{
2467	struct mldv2_record	 mr;
2468	struct mldv2_record	*pmr;
2469	struct ifnet		*ifp;
2470	struct ip6_msource	*ims, *nims;
2471	struct mbuf		*m0, *m, *md;
2472	int			 error, is_filter_list_change;
2473	int			 minrec0len, m0srcs, msrcs, nbytes, off;
2474	int			 record_has_sources;
2475	int			 now;
2476	int			 type;
2477	uint8_t			 mode;
2478
2479	IN6M_LOCK_ASSERT_HELD(inm);
2480	MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
2481
2482	error = 0;
2483	ifp = inm->in6m_ifp;
2484	is_filter_list_change = 0;
2485	m = NULL;
2486	m0 = NULL;
2487	m0srcs = 0;
2488	msrcs = 0;
2489	nbytes = 0;
2490	nims = NULL;
2491	record_has_sources = 1;
2492	pmr = NULL;
2493	type = MLD_DO_NOTHING;
2494	mode = inm->in6m_st[1].iss_fmode;
2495
2496	/*
2497	 * If we did not transition out of ASM mode during t0->t1,
2498	 * and there are no source nodes to process, we can skip
2499	 * the generation of source records.
2500	 */
2501	if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
2502	    inm->in6m_nsrc == 0)
2503		record_has_sources = 0;
2504
2505	if (is_state_change) {
2506		/*
2507		 * Queue a state change record.
2508		 * If the mode did not change, and there are non-ASM
2509		 * listeners or source filters present,
2510		 * we potentially need to issue two records for the group.
2511		 * If there are ASM listeners, and there was no filter
2512		 * mode transition of any kind, do nothing.
2513		 *
2514		 * If we are transitioning to MCAST_UNDEFINED, we need
2515		 * not send any sources. A transition to/from this state is
2516		 * considered inclusive with some special treatment.
2517		 *
2518		 * If we are rewriting initial joins/leaves to use
2519		 * ALLOW/BLOCK, and the group's membership is inclusive,
2520		 * we need to send sources in all cases.
2521		 */
2522		if (mode != inm->in6m_st[0].iss_fmode) {
2523			if (mode == MCAST_EXCLUDE) {
2524				MLD_PRINTF(("%s: change to EXCLUDE\n",
2525				    __func__));
2526				type = MLD_CHANGE_TO_EXCLUDE_MODE;
2527			} else {
2528				MLD_PRINTF(("%s: change to INCLUDE\n",
2529				    __func__));
2530				if (use_block_allow) {
2531					/*
2532					 * XXX
2533					 * Here we're interested in state
2534					 * edges either direction between
2535					 * MCAST_UNDEFINED and MCAST_INCLUDE.
2536					 * Perhaps we should just check
2537					 * the group state, rather than
2538					 * the filter mode.
2539					 */
2540					if (mode == MCAST_UNDEFINED) {
2541						type = MLD_BLOCK_OLD_SOURCES;
2542					} else {
2543						type = MLD_ALLOW_NEW_SOURCES;
2544					}
2545				} else {
2546					type = MLD_CHANGE_TO_INCLUDE_MODE;
2547					if (mode == MCAST_UNDEFINED)
2548						record_has_sources = 0;
2549				}
2550			}
2551		} else {
2552			if (record_has_sources) {
2553				is_filter_list_change = 1;
2554			} else {
2555				type = MLD_DO_NOTHING;
2556			}
2557		}
2558	} else {
2559		/*
2560		 * Queue a current state record.
2561		 */
2562		if (mode == MCAST_EXCLUDE) {
2563			type = MLD_MODE_IS_EXCLUDE;
2564		} else if (mode == MCAST_INCLUDE) {
2565			type = MLD_MODE_IS_INCLUDE;
2566			VERIFY(inm->in6m_st[1].iss_asm == 0);
2567		}
2568	}
2569
2570	/*
2571	 * Generate the filter list changes using a separate function.
2572	 */
2573	if (is_filter_list_change)
2574		return (mld_v2_enqueue_filter_change(ifq, inm));
2575
2576	if (type == MLD_DO_NOTHING) {
2577		MLD_PRINTF(("%s: nothing to do for %s/%s%d\n",
2578		    __func__, ip6_sprintf(&inm->in6m_addr),
2579		    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2580		return (0);
2581	}
2582
2583	/*
2584	 * If any sources are present, we must be able to fit at least
2585	 * one in the trailing space of the tail packet's mbuf,
2586	 * ideally more.
2587	 */
2588	minrec0len = sizeof(struct mldv2_record);
2589	if (record_has_sources)
2590		minrec0len += sizeof(struct in6_addr);
2591	MLD_PRINTF(("%s: queueing %s for %s/%s%d\n", __func__,
2592	    mld_rec_type_to_str(type),
2593	    ip6_sprintf(&inm->in6m_addr),
2594	    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2595
2596	/*
2597	 * Check if we have a packet in the tail of the queue for this
2598	 * group into which the first group record for this group will fit.
2599	 * Otherwise allocate a new packet.
2600	 * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
2601	 * Note: Group records for G/GSR query responses MUST be sent
2602	 * in their own packet.
2603	 */
2604	m0 = ifq->ifq_tail;
2605	if (!is_group_query &&
2606	    m0 != NULL &&
2607	    (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
2608	    (m0->m_pkthdr.len + minrec0len) <
2609	     (ifp->if_mtu - MLD_MTUSPACE)) {
2610		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2611			    sizeof(struct mldv2_record)) /
2612			    sizeof(struct in6_addr);
2613		m = m0;
2614		MLD_PRINTF(("%s: use existing packet\n", __func__));
2615	} else {
2616		if (IF_QFULL(ifq)) {
2617			MLD_PRINTF(("%s: outbound queue full\n", __func__));
2618			return (-ENOMEM);
2619		}
2620		m = NULL;
2621		m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2622		    sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2623		if (!is_state_change && !is_group_query)
2624			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2625		if (m == NULL)
2626			m = m_gethdr(M_DONTWAIT, MT_DATA);
2627		if (m == NULL)
2628			return (-ENOMEM);
2629
2630		MLD_PRINTF(("%s: allocated first packet\n", __func__));
2631	}
2632
2633	/*
2634	 * Append group record.
2635	 * If we have sources, we don't know how many yet.
2636	 */
2637	mr.mr_type = type;
2638	mr.mr_datalen = 0;
2639	mr.mr_numsrc = 0;
2640	mr.mr_addr = inm->in6m_addr;
2641	in6_clearscope(&mr.mr_addr);
2642	if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2643		if (m != m0)
2644			m_freem(m);
2645		MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2646		return (-ENOMEM);
2647	}
2648	nbytes += sizeof(struct mldv2_record);
2649
2650	/*
2651	 * Append as many sources as will fit in the first packet.
2652	 * If we are appending to a new packet, the chain allocation
2653	 * may potentially use clusters; use m_getptr() in this case.
2654	 * If we are appending to an existing packet, we need to obtain
2655	 * a pointer to the group record after m_append(), in case a new
2656	 * mbuf was allocated.
2657	 *
2658	 * Only append sources which are in-mode at t1. If we are
2659	 * transitioning to MCAST_UNDEFINED state on the group, and
2660	 * use_block_allow is zero, do not include source entries.
2661	 * Otherwise, we need to include this source in the report.
2662	 *
2663	 * Only report recorded sources in our filter set when responding
2664	 * to a group-source query.
2665	 */
2666	if (record_has_sources) {
2667		if (m == m0) {
2668			md = m_last(m);
2669			pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2670			    md->m_len - nbytes);
2671		} else {
2672			md = m_getptr(m, 0, &off);
2673			pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2674			    off);
2675		}
2676		msrcs = 0;
2677		RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
2678		    nims) {
2679			MLD_PRINTF(("%s: visit node %s\n", __func__,
2680			    ip6_sprintf(&ims->im6s_addr)));
2681			now = im6s_get_mode(inm, ims, 1);
2682			MLD_PRINTF(("%s: node is %d\n", __func__, now));
2683			if ((now != mode) ||
2684			    (now == mode &&
2685			     (!use_block_allow && mode == MCAST_UNDEFINED))) {
2686				MLD_PRINTF(("%s: skip node\n", __func__));
2687				continue;
2688			}
2689			if (is_source_query && ims->im6s_stp == 0) {
2690				MLD_PRINTF(("%s: skip unrecorded node\n",
2691				    __func__));
2692				continue;
2693			}
2694			MLD_PRINTF(("%s: append node\n", __func__));
2695			if (!m_append(m, sizeof(struct in6_addr),
2696			    (void *)&ims->im6s_addr)) {
2697				if (m != m0)
2698					m_freem(m);
2699				MLD_PRINTF(("%s: m_append() failed.\n",
2700				    __func__));
2701				return (-ENOMEM);
2702			}
2703			nbytes += sizeof(struct in6_addr);
2704			++msrcs;
2705			if (msrcs == m0srcs)
2706				break;
2707		}
2708		MLD_PRINTF(("%s: msrcs is %d this packet\n", __func__,
2709		    msrcs));
2710		pmr->mr_numsrc = htons(msrcs);
2711		nbytes += (msrcs * sizeof(struct in6_addr));
2712	}
2713
2714	if (is_source_query && msrcs == 0) {
2715		MLD_PRINTF(("%s: no recorded sources to report\n", __func__));
2716		if (m != m0)
2717			m_freem(m);
2718		return (0);
2719	}
2720
2721	/*
2722	 * We are good to go with first packet.
2723	 */
2724	if (m != m0) {
2725		MLD_PRINTF(("%s: enqueueing first packet\n", __func__));
2726		m->m_pkthdr.vt_nrecs = 1;
2727		m->m_pkthdr.rcvif = ifp;
2728		IF_ENQUEUE(ifq, m);
2729	} else {
2730		m->m_pkthdr.vt_nrecs++;
2731	}
2732	/*
2733	 * No further work needed if no source list in packet(s).
2734	 */
2735	if (!record_has_sources)
2736		return (nbytes);
2737
2738	/*
2739	 * Whilst sources remain to be announced, we need to allocate
2740	 * a new packet and fill out as many sources as will fit.
2741	 * Always try for a cluster first.
2742	 */
2743	while (nims != NULL) {
2744		if (IF_QFULL(ifq)) {
2745			MLD_PRINTF(("%s: outbound queue full\n", __func__));
2746			return (-ENOMEM);
2747		}
2748		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2749		if (m == NULL)
2750			m = m_gethdr(M_DONTWAIT, MT_DATA);
2751		if (m == NULL)
2752			return (-ENOMEM);
2753		md = m_getptr(m, 0, &off);
2754		pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
2755		MLD_PRINTF(("%s: allocated next packet\n", __func__));
2756
2757		if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2758			if (m != m0)
2759				m_freem(m);
2760			MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2761			return (-ENOMEM);
2762		}
2763		m->m_pkthdr.vt_nrecs = 1;
2764		nbytes += sizeof(struct mldv2_record);
2765
2766		m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2767		    sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2768
2769		msrcs = 0;
2770		RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2771			MLD_PRINTF(("%s: visit node %s\n",
2772			    __func__, ip6_sprintf(&ims->im6s_addr)));
2773			now = im6s_get_mode(inm, ims, 1);
2774			if ((now != mode) ||
2775			    (now == mode &&
2776			     (!use_block_allow && mode == MCAST_UNDEFINED))) {
2777				MLD_PRINTF(("%s: skip node\n", __func__));
2778				continue;
2779			}
2780			if (is_source_query && ims->im6s_stp == 0) {
2781				MLD_PRINTF(("%s: skip unrecorded node\n",
2782				    __func__));
2783				continue;
2784			}
2785			MLD_PRINTF(("%s: append node\n", __func__));
2786			if (!m_append(m, sizeof(struct in6_addr),
2787			    (void *)&ims->im6s_addr)) {
2788				if (m != m0)
2789					m_freem(m);
2790				MLD_PRINTF(("%s: m_append() failed.\n",
2791				    __func__));
2792				return (-ENOMEM);
2793			}
2794			++msrcs;
2795			if (msrcs == m0srcs)
2796				break;
2797		}
2798		pmr->mr_numsrc = htons(msrcs);
2799		nbytes += (msrcs * sizeof(struct in6_addr));
2800
2801		MLD_PRINTF(("%s: enqueueing next packet\n", __func__));
2802		m->m_pkthdr.rcvif = ifp;
2803		IF_ENQUEUE(ifq, m);
2804	}
2805
2806	return (nbytes);
2807}
2808
2809/*
2810 * Type used to mark record pass completion.
2811 * We exploit the fact we can cast to this easily from the
2812 * current filter modes on each ip_msource node.
2813 */
2814typedef enum {
2815	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
2816	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
2817	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
2818	REC_FULL = REC_ALLOW | REC_BLOCK
2819} rectype_t;
2820
2821/*
2822 * Enqueue an MLDv2 filter list change to the given output queue.
2823 *
2824 * Source list filter state is held in an RB-tree. When the filter list
2825 * for a group is changed without changing its mode, we need to compute
2826 * the deltas between T0 and T1 for each source in the filter set,
2827 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
2828 *
2829 * As we may potentially queue two record types, and the entire R-B tree
2830 * needs to be walked at once, we break this out into its own function
2831 * so we can generate a tightly packed queue of packets.
2832 *
2833 * XXX This could be written to only use one tree walk, although that makes
2834 * serializing into the mbuf chains a bit harder. For now we do two walks
2835 * which makes things easier on us, and it may or may not be harder on
2836 * the L2 cache.
2837 *
2838 * If successful the size of all data appended to the queue is returned,
2839 * otherwise an error code less than zero is returned, or zero if
2840 * no record(s) were appended.
2841 */
2842static int
2843mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
2844{
2845	static const int MINRECLEN =
2846	    sizeof(struct mldv2_record) + sizeof(struct in6_addr);
2847	struct ifnet		*ifp;
2848	struct mldv2_record	 mr;
2849	struct mldv2_record	*pmr;
2850	struct ip6_msource	*ims, *nims;
2851	struct mbuf		*m, *m0, *md;
2852	int			 m0srcs, nbytes, npbytes, off, rsrcs, schanged;
2853	int			 nallow, nblock;
2854	uint8_t			 mode, now, then;
2855	rectype_t		 crt, drt, nrt;
2856
2857	IN6M_LOCK_ASSERT_HELD(inm);
2858
2859	if (inm->in6m_nsrc == 0 ||
2860	    (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0))
2861		return (0);
2862
2863	ifp = inm->in6m_ifp;			/* interface */
2864	mode = inm->in6m_st[1].iss_fmode;	/* filter mode at t1 */
2865	crt = REC_NONE;	/* current group record type */
2866	drt = REC_NONE;	/* mask of completed group record types */
2867	nrt = REC_NONE;	/* record type for current node */
2868	m0srcs = 0;	/* # source which will fit in current mbuf chain */
2869	npbytes = 0;	/* # of bytes appended this packet */
2870	nbytes = 0;	/* # of bytes appended to group's state-change queue */
2871	rsrcs = 0;	/* # sources encoded in current record */
2872	schanged = 0;	/* # nodes encoded in overall filter change */
2873	nallow = 0;	/* # of source entries in ALLOW_NEW */
2874	nblock = 0;	/* # of source entries in BLOCK_OLD */
2875	nims = NULL;	/* next tree node pointer */
2876
2877	/*
2878	 * For each possible filter record mode.
2879	 * The first kind of source we encounter tells us which
2880	 * is the first kind of record we start appending.
2881	 * If a node transitioned to UNDEFINED at t1, its mode is treated
2882	 * as the inverse of the group's filter mode.
2883	 */
2884	while (drt != REC_FULL) {
2885		do {
2886			m0 = ifq->ifq_tail;
2887			if (m0 != NULL &&
2888			    (m0->m_pkthdr.vt_nrecs + 1 <=
2889			     MLD_V2_REPORT_MAXRECS) &&
2890			    (m0->m_pkthdr.len + MINRECLEN) <
2891			     (ifp->if_mtu - MLD_MTUSPACE)) {
2892				m = m0;
2893				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2894					    sizeof(struct mldv2_record)) /
2895					    sizeof(struct in6_addr);
2896				MLD_PRINTF(("%s: use previous packet\n",
2897				    __func__));
2898			} else {
2899				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2900				if (m == NULL)
2901					m = m_gethdr(M_DONTWAIT, MT_DATA);
2902				if (m == NULL) {
2903					MLD_PRINTF(("%s: m_get*() failed\n",
2904					    __func__));
2905					return (-ENOMEM);
2906				}
2907				m->m_pkthdr.vt_nrecs = 0;
2908				m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2909				    sizeof(struct mldv2_record)) /
2910				    sizeof(struct in6_addr);
2911				npbytes = 0;
2912				MLD_PRINTF(("%s: allocated new packet\n",
2913				    __func__));
2914			}
2915			/*
2916			 * Append the MLD group record header to the
2917			 * current packet's data area.
2918			 * Recalculate pointer to free space for next
2919			 * group record, in case m_append() allocated
2920			 * a new mbuf or cluster.
2921			 */
2922			memset(&mr, 0, sizeof(mr));
2923			mr.mr_addr = inm->in6m_addr;
2924			in6_clearscope(&mr.mr_addr);
2925			if (!m_append(m, sizeof(mr), (void *)&mr)) {
2926				if (m != m0)
2927					m_freem(m);
2928				MLD_PRINTF(("%s: m_append() failed\n",
2929				    __func__));
2930				return (-ENOMEM);
2931			}
2932			npbytes += sizeof(struct mldv2_record);
2933			if (m != m0) {
2934				/* new packet; offset in chain */
2935				md = m_getptr(m, npbytes -
2936				    sizeof(struct mldv2_record), &off);
2937				pmr = (struct mldv2_record *)(mtod(md,
2938				    uint8_t *) + off);
2939			} else {
2940				/* current packet; offset from last append */
2941				md = m_last(m);
2942				pmr = (struct mldv2_record *)(mtod(md,
2943				    uint8_t *) + md->m_len -
2944				    sizeof(struct mldv2_record));
2945			}
2946			/*
2947			 * Begin walking the tree for this record type
2948			 * pass, or continue from where we left off
2949			 * previously if we had to allocate a new packet.
2950			 * Only report deltas in-mode at t1.
2951			 * We need not report included sources as allowed
2952			 * if we are in inclusive mode on the group,
2953			 * however the converse is not true.
2954			 */
2955			rsrcs = 0;
2956			if (nims == NULL) {
2957				nims = RB_MIN(ip6_msource_tree,
2958				    &inm->in6m_srcs);
2959			}
2960			RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2961				MLD_PRINTF(("%s: visit node %s\n", __func__,
2962				    ip6_sprintf(&ims->im6s_addr)));
2963				now = im6s_get_mode(inm, ims, 1);
2964				then = im6s_get_mode(inm, ims, 0);
2965				MLD_PRINTF(("%s: mode: t0 %d, t1 %d\n",
2966				    __func__, then, now));
2967				if (now == then) {
2968					MLD_PRINTF(("%s: skip unchanged\n",
2969					    __func__));
2970					continue;
2971				}
2972				if (mode == MCAST_EXCLUDE &&
2973				    now == MCAST_INCLUDE) {
2974					MLD_PRINTF(("%s: skip IN src on EX "
2975					    "group\n", __func__));
2976					continue;
2977				}
2978				nrt = (rectype_t)now;
2979				if (nrt == REC_NONE)
2980					nrt = (rectype_t)(~mode & REC_FULL);
2981				if (schanged++ == 0) {
2982					crt = nrt;
2983				} else if (crt != nrt)
2984					continue;
2985				if (!m_append(m, sizeof(struct in6_addr),
2986				    (void *)&ims->im6s_addr)) {
2987					if (m != m0)
2988						m_freem(m);
2989					MLD_PRINTF(("%s: m_append() failed\n",
2990					    __func__));
2991					return (-ENOMEM);
2992				}
2993				nallow += !!(crt == REC_ALLOW);
2994				nblock += !!(crt == REC_BLOCK);
2995				if (++rsrcs == m0srcs)
2996					break;
2997			}
2998			/*
2999			 * If we did not append any tree nodes on this
3000			 * pass, back out of allocations.
3001			 */
3002			if (rsrcs == 0) {
3003				npbytes -= sizeof(struct mldv2_record);
3004				if (m != m0) {
3005					MLD_PRINTF(("%s: m_free(m)\n",
3006					    __func__));
3007					m_freem(m);
3008				} else {
3009					MLD_PRINTF(("%s: m_adj(m, -mr)\n",
3010					    __func__));
3011					m_adj(m, -((int)sizeof(
3012					    struct mldv2_record)));
3013				}
3014				continue;
3015			}
3016			npbytes += (rsrcs * sizeof(struct in6_addr));
3017			if (crt == REC_ALLOW)
3018				pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
3019			else if (crt == REC_BLOCK)
3020				pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
3021			pmr->mr_numsrc = htons(rsrcs);
3022			/*
3023			 * Count the new group record, and enqueue this
3024			 * packet if it wasn't already queued.
3025			 */
3026			m->m_pkthdr.vt_nrecs++;
3027			m->m_pkthdr.rcvif = ifp;
3028			if (m != m0)
3029				IF_ENQUEUE(ifq, m);
3030			nbytes += npbytes;
3031		} while (nims != NULL);
3032		drt |= crt;
3033		crt = (~crt & REC_FULL);
3034	}
3035
3036	MLD_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3037	    nallow, nblock));
3038
3039	return (nbytes);
3040}
3041
3042static int
3043mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
3044{
3045	struct ifqueue	*gq;
3046	struct mbuf	*m;		/* pending state-change */
3047	struct mbuf	*m0;		/* copy of pending state-change */
3048	struct mbuf	*mt;		/* last state-change in packet */
3049	struct mbuf	*n;
3050	int		 docopy, domerge;
3051	u_int		 recslen;
3052
3053	IN6M_LOCK_ASSERT_HELD(inm);
3054
3055	docopy = 0;
3056	domerge = 0;
3057	recslen = 0;
3058
3059	/*
3060	 * If there are further pending retransmissions, make a writable
3061	 * copy of each queued state-change message before merging.
3062	 */
3063	if (inm->in6m_scrv > 0)
3064		docopy = 1;
3065
3066	gq = &inm->in6m_scq;
3067#ifdef MLD_DEBUG
3068	if (gq->ifq_head == NULL) {
3069		MLD_PRINTF(("%s: WARNING: queue for inm %p is empty\n",
3070		    __func__, inm));
3071	}
3072#endif
3073
3074	/*
3075	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3076	 * packet might not always be at the head of the ifqueue.
3077	 */
3078	m = gq->ifq_head;
3079	while (m != NULL) {
3080		/*
3081		 * Only merge the report into the current packet if
3082		 * there is sufficient space to do so; an MLDv2 report
3083		 * packet may only contain 65,535 group records.
3084		 * Always use a simple mbuf chain concatentation to do this,
3085		 * as large state changes for single groups may have
3086		 * allocated clusters.
3087		 */
3088		domerge = 0;
3089		mt = ifscq->ifq_tail;
3090		if (mt != NULL) {
3091			recslen = m_length(m);
3092
3093			if ((mt->m_pkthdr.vt_nrecs +
3094			    m->m_pkthdr.vt_nrecs <=
3095			    MLD_V2_REPORT_MAXRECS) &&
3096			    (mt->m_pkthdr.len + recslen <=
3097			    (inm->in6m_ifp->if_mtu - MLD_MTUSPACE)))
3098				domerge = 1;
3099		}
3100
3101		if (!domerge && IF_QFULL(gq)) {
3102			MLD_PRINTF(("%s: outbound queue full, skipping whole "
3103			    "packet %p\n", __func__, m));
3104			n = m->m_nextpkt;
3105			if (!docopy) {
3106				IF_REMQUEUE(gq, m);
3107				m_freem(m);
3108			}
3109			m = n;
3110			continue;
3111		}
3112
3113		if (!docopy) {
3114			MLD_PRINTF(("%s: dequeueing %p\n", __func__, m));
3115			n = m->m_nextpkt;
3116			IF_REMQUEUE(gq, m);
3117			m0 = m;
3118			m = n;
3119		} else {
3120			MLD_PRINTF(("%s: copying %p\n", __func__, m));
3121			m0 = m_dup(m, M_NOWAIT);
3122			if (m0 == NULL)
3123				return (ENOMEM);
3124			m0->m_nextpkt = NULL;
3125			m = m->m_nextpkt;
3126		}
3127
3128		if (!domerge) {
3129			MLD_PRINTF(("%s: queueing %p to ifscq %p)\n",
3130			    __func__, m0, ifscq));
3131			m0->m_pkthdr.rcvif = inm->in6m_ifp;
3132			IF_ENQUEUE(ifscq, m0);
3133		} else {
3134			struct mbuf *mtl;	/* last mbuf of packet mt */
3135
3136			MLD_PRINTF(("%s: merging %p with ifscq tail %p)\n",
3137			    __func__, m0, mt));
3138
3139			mtl = m_last(mt);
3140			m0->m_flags &= ~M_PKTHDR;
3141			mt->m_pkthdr.len += recslen;
3142			mt->m_pkthdr.vt_nrecs +=
3143			    m0->m_pkthdr.vt_nrecs;
3144
3145			mtl->m_next = m0;
3146		}
3147	}
3148
3149	return (0);
3150}
3151
3152/*
3153 * Respond to a pending MLDv2 General Query.
3154 */
3155static void
3156mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
3157{
3158	struct ifnet		*ifp;
3159	struct in6_multi	*inm;
3160	struct in6_multistep	step;
3161	int			 retval;
3162
3163	MLI_LOCK_ASSERT_HELD(mli);
3164
3165	VERIFY(mli->mli_version == MLD_VERSION_2);
3166
3167	ifp = mli->mli_ifp;
3168	MLI_UNLOCK(mli);
3169
3170	in6_multihead_lock_shared();
3171	IN6_FIRST_MULTI(step, inm);
3172	while (inm != NULL) {
3173		IN6M_LOCK(inm);
3174		if (inm->in6m_ifp != ifp)
3175			goto next;
3176
3177		switch (inm->in6m_state) {
3178		case MLD_NOT_MEMBER:
3179		case MLD_SILENT_MEMBER:
3180			break;
3181		case MLD_REPORTING_MEMBER:
3182		case MLD_IDLE_MEMBER:
3183		case MLD_LAZY_MEMBER:
3184		case MLD_SLEEPING_MEMBER:
3185		case MLD_AWAKENING_MEMBER:
3186			inm->in6m_state = MLD_REPORTING_MEMBER;
3187			MLI_LOCK(mli);
3188			retval = mld_v2_enqueue_group_record(&mli->mli_gq,
3189			    inm, 0, 0, 0, 0);
3190			MLI_UNLOCK(mli);
3191			MLD_PRINTF(("%s: enqueue record = %d\n",
3192			    __func__, retval));
3193			break;
3194		case MLD_G_QUERY_PENDING_MEMBER:
3195		case MLD_SG_QUERY_PENDING_MEMBER:
3196		case MLD_LEAVING_MEMBER:
3197			break;
3198		}
3199next:
3200		IN6M_UNLOCK(inm);
3201		IN6_NEXT_MULTI(step, inm);
3202	}
3203	in6_multihead_lock_done();
3204
3205	MLI_LOCK(mli);
3206	mld_dispatch_queue(mli, &mli->mli_gq, MLD_MAX_RESPONSE_BURST);
3207	MLI_LOCK_ASSERT_HELD(mli);
3208
3209	/*
3210	 * Slew transmission of bursts over 500ms intervals.
3211	 */
3212	if (mli->mli_gq.ifq_head != NULL) {
3213		mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
3214		    MLD_RESPONSE_BURST_INTERVAL);
3215		interface_timers_running6 = 1;
3216	}
3217}
3218
3219/*
3220 * Transmit the next pending message in the output queue.
3221 *
3222 * Must not be called with in6m_lockm or mli_lock held.
3223 */
3224static void
3225mld_dispatch_packet(struct mbuf *m)
3226{
3227	struct ip6_moptions	*im6o;
3228	struct ifnet		*ifp;
3229	struct ifnet		*oifp = NULL;
3230	struct mbuf		*m0;
3231	struct mbuf		*md;
3232	struct ip6_hdr		*ip6;
3233	struct mld_hdr		*mld;
3234	int			 error;
3235	int			 off;
3236	int			 type;
3237
3238	MLD_PRINTF(("%s: transmit %p\n", __func__, m));
3239
3240	/*
3241	 * Check if the ifnet is still attached.
3242	 */
3243	ifp = m->m_pkthdr.rcvif;
3244	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3245		MLD_PRINTF(("%s: dropped %p as ifindex %u went away.\n",
3246		    __func__, m, (u_int)if_index));
3247		m_freem(m);
3248		ip6stat.ip6s_noroute++;
3249		return;
3250	}
3251
3252	im6o = ip6_allocmoptions(M_WAITOK);
3253	if (im6o == NULL) {
3254		m_freem(m);
3255		return;
3256	}
3257
3258	im6o->im6o_multicast_hlim  = 1;
3259#if MROUTING
3260	im6o->im6o_multicast_loop = (ip6_mrouter != NULL);
3261#else
3262	im6o->im6o_multicast_loop = 0;
3263#endif
3264	im6o->im6o_multicast_ifp = ifp;
3265
3266	if (m->m_flags & M_MLDV1) {
3267		m0 = m;
3268	} else {
3269		m0 = mld_v2_encap_report(ifp, m);
3270		if (m0 == NULL) {
3271			MLD_PRINTF(("%s: dropped %p\n", __func__, m));
3272			/*
3273			 * mld_v2_encap_report() has already freed our mbuf.
3274			 */
3275			IM6O_REMREF(im6o);
3276			ip6stat.ip6s_odropped++;
3277			return;
3278		}
3279	}
3280
3281	m->m_flags &= ~(M_PROTOFLAGS);
3282	m0->m_pkthdr.rcvif = lo_ifp;
3283
3284	ip6 = mtod(m0, struct ip6_hdr *);
3285#if 0
3286	(void) in6_setscope(&ip6->ip6_dst, ifp, NULL);	/* XXX LOR */
3287#else
3288	/*
3289	 * XXX XXX Break some KPI rules to prevent an LOR which would
3290	 * occur if we called in6_setscope() at transmission.
3291	 * See comments at top of file.
3292	 */
3293	MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index);
3294#endif
3295
3296	/*
3297	 * Retrieve the ICMPv6 type before handoff to ip6_output(),
3298	 * so we can bump the stats.
3299	 */
3300	md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
3301	mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
3302	type = mld->mld_type;
3303
3304	if (ifp->if_eflags & IFEF_TXSTART) {
3305		/* Use control service class if the outgoing
3306		 * interface supports transmit-start model.
3307		 */
3308		(void) m_set_service_class(m0, MBUF_SC_CTL);
3309	}
3310
3311	error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
3312	    &oifp, NULL);
3313
3314	IM6O_REMREF(im6o);
3315
3316	if (error) {
3317		MLD_PRINTF(("%s: ip6_output(%p) = %d\n", __func__, m0, error));
3318		if (oifp != NULL)
3319			ifnet_release(oifp);
3320		return;
3321	}
3322
3323	icmp6stat.icp6s_outhist[type]++;
3324	if (oifp != NULL) {
3325		icmp6_ifstat_inc(oifp, ifs6_out_msg);
3326		switch (type) {
3327		case MLD_LISTENER_REPORT:
3328		case MLDV2_LISTENER_REPORT:
3329			icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
3330			break;
3331		case MLD_LISTENER_DONE:
3332			icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
3333			break;
3334		}
3335		ifnet_release(oifp);
3336	}
3337}
3338
3339/*
3340 * Encapsulate an MLDv2 report.
3341 *
3342 * KAME IPv6 requires that hop-by-hop options be passed separately,
3343 * and that the IPv6 header be prepended in a separate mbuf.
3344 *
3345 * Returns a pointer to the new mbuf chain head, or NULL if the
3346 * allocation failed.
3347 */
3348static struct mbuf *
3349mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
3350{
3351	struct mbuf		*mh;
3352	struct mldv2_report	*mld;
3353	struct ip6_hdr		*ip6;
3354	struct in6_ifaddr	*ia;
3355	int			 mldreclen;
3356
3357	VERIFY(m->m_flags & M_PKTHDR);
3358
3359	/*
3360	 * RFC3590: OK to send as :: or tentative during DAD.
3361	 */
3362	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
3363	if (ia == NULL)
3364		MLD_PRINTF(("%s: warning: ia is NULL\n", __func__));
3365
3366	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3367	if (mh == NULL) {
3368		if (ia != NULL)
3369			IFA_REMREF(&ia->ia_ifa);
3370		m_freem(m);
3371		return (NULL);
3372	}
3373	MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
3374
3375	mldreclen = m_length(m);
3376	MLD_PRINTF(("%s: mldreclen is %d\n", __func__, mldreclen));
3377
3378	mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
3379	mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
3380	    sizeof(struct mldv2_report) + mldreclen;
3381
3382	ip6 = mtod(mh, struct ip6_hdr *);
3383	ip6->ip6_flow = 0;
3384	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3385	ip6->ip6_vfc |= IPV6_VERSION;
3386	ip6->ip6_nxt = IPPROTO_ICMPV6;
3387	if (ia != NULL)
3388		IFA_LOCK(&ia->ia_ifa);
3389	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
3390	if (ia != NULL) {
3391		IFA_UNLOCK(&ia->ia_ifa);
3392		IFA_REMREF(&ia->ia_ifa);
3393		ia = NULL;
3394	}
3395	ip6->ip6_dst = in6addr_linklocal_allv2routers;
3396	/* scope ID will be set in netisr */
3397
3398	mld = (struct mldv2_report *)(ip6 + 1);
3399	mld->mld_type = MLDV2_LISTENER_REPORT;
3400	mld->mld_code = 0;
3401	mld->mld_cksum = 0;
3402	mld->mld_v2_reserved = 0;
3403	mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
3404	m->m_pkthdr.vt_nrecs = 0;
3405	m->m_flags &= ~M_PKTHDR;
3406
3407	mh->m_next = m;
3408	mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
3409	    sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
3410	return (mh);
3411}
3412
3413#ifdef MLD_DEBUG
3414static const char *
3415mld_rec_type_to_str(const int type)
3416{
3417	switch (type) {
3418		case MLD_CHANGE_TO_EXCLUDE_MODE:
3419			return "TO_EX";
3420			break;
3421		case MLD_CHANGE_TO_INCLUDE_MODE:
3422			return "TO_IN";
3423			break;
3424		case MLD_MODE_IS_EXCLUDE:
3425			return "MODE_EX";
3426			break;
3427		case MLD_MODE_IS_INCLUDE:
3428			return "MODE_IN";
3429			break;
3430		case MLD_ALLOW_NEW_SOURCES:
3431			return "ALLOW_NEW";
3432			break;
3433		case MLD_BLOCK_OLD_SOURCES:
3434			return "BLOCK_OLD";
3435			break;
3436		default:
3437			break;
3438	}
3439	return "unknown";
3440}
3441#endif
3442
3443void
3444mld_init(void)
3445{
3446
3447	MLD_PRINTF(("%s: initializing\n", __func__));
3448
3449        /* Setup lock group and attribute for mld_mtx */
3450        mld_mtx_grp_attr = lck_grp_attr_alloc_init();
3451        mld_mtx_grp = lck_grp_alloc_init("mld_mtx\n", mld_mtx_grp_attr);
3452        mld_mtx_attr = lck_attr_alloc_init();
3453        lck_mtx_init(&mld_mtx, mld_mtx_grp, mld_mtx_attr);
3454
3455	ip6_initpktopts(&mld_po);
3456	mld_po.ip6po_hlim = 1;
3457	mld_po.ip6po_hbh = &mld_ra.hbh;
3458	mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
3459	mld_po.ip6po_flags = IP6PO_DONTFRAG;
3460	LIST_INIT(&mli_head);
3461
3462	mli_size = sizeof (struct mld_ifinfo);
3463	mli_zone = zinit(mli_size, MLI_ZONE_MAX * mli_size,
3464	    0, MLI_ZONE_NAME);
3465	if (mli_zone == NULL) {
3466		panic("%s: failed allocating %s", __func__, MLI_ZONE_NAME);
3467		/* NOTREACHED */
3468	}
3469	zone_change(mli_zone, Z_EXPAND, TRUE);
3470	zone_change(mli_zone, Z_CALLERACCT, FALSE);
3471}
3472