1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2009 Bruce Simpson.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 *    products derived from this software without specific prior written
17 *    permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*
33 * IPv6 multicast socket, group, and socket option processing module.
34 * Normative references: RFC 2292, RFC 3492, RFC 3542, RFC 3678, RFC 3810.
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD$");
39
40#include "opt_inet6.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/ktr.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/protosw.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/sysctl.h>
52#include <sys/priv.h>
53#include <sys/taskqueue.h>
54#include <sys/tree.h>
55
56#include <net/if.h>
57#include <net/if_var.h>
58#include <net/if_dl.h>
59#include <net/route.h>
60#include <net/vnet.h>
61
62#include <netinet/in.h>
63#include <netinet/udp.h>
64#include <netinet/in_var.h>
65#include <netinet/ip_var.h>
66#include <netinet/udp_var.h>
67#include <netinet6/in6_fib.h>
68#include <netinet6/in6_var.h>
69#include <netinet/ip6.h>
70#include <netinet/icmp6.h>
71#include <netinet6/ip6_var.h>
72#include <netinet/in_pcb.h>
73#include <netinet/tcp_var.h>
74#include <netinet6/nd6.h>
75#include <netinet6/mld6_var.h>
76#include <netinet6/scope6_var.h>
77
78#ifndef KTR_MLD
79#define KTR_MLD KTR_INET6
80#endif
81
82#ifndef __SOCKUNION_DECLARED
83union sockunion {
84	struct sockaddr_storage	ss;
85	struct sockaddr		sa;
86	struct sockaddr_dl	sdl;
87	struct sockaddr_in6	sin6;
88};
89typedef union sockunion sockunion_t;
90#define __SOCKUNION_DECLARED
91#endif /* __SOCKUNION_DECLARED */
92
93static MALLOC_DEFINE(M_IN6MFILTER, "in6_mfilter",
94    "IPv6 multicast PCB-layer source filter");
95MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "IPv6 multicast group");
96static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "IPv6 multicast options");
97static MALLOC_DEFINE(M_IP6MSOURCE, "ip6_msource",
98    "IPv6 multicast MLD-layer source filter");
99
100RB_GENERATE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp);
101
102/*
103 * Locking:
104 * - Lock order is: Giant, IN6_MULTI_LOCK, INP_WLOCK,
105 *   IN6_MULTI_LIST_LOCK, MLD_LOCK, IF_ADDR_LOCK.
106 * - The IF_ADDR_LOCK is implicitly taken by in6m_lookup() earlier, however
107 *   it can be taken by code in net/if.c also.
108 * - ip6_moptions and in6_mfilter are covered by the INP_WLOCK.
109 *
110 * struct in6_multi is covered by IN6_MULTI_LOCK. There isn't strictly
111 * any need for in6_multi itself to be virtualized -- it is bound to an ifp
112 * anyway no matter what happens.
113 */
114struct mtx in6_multi_list_mtx;
115MTX_SYSINIT(in6_multi_mtx, &in6_multi_list_mtx, "in6_multi_list_mtx", MTX_DEF);
116
117struct mtx in6_multi_free_mtx;
118MTX_SYSINIT(in6_multi_free_mtx, &in6_multi_free_mtx, "in6_multi_free_mtx", MTX_DEF);
119
120struct sx in6_multi_sx;
121SX_SYSINIT(in6_multi_sx, &in6_multi_sx, "in6_multi_sx");
122
123
124
125static void	im6f_commit(struct in6_mfilter *);
126static int	im6f_get_source(struct in6_mfilter *imf,
127		    const struct sockaddr_in6 *psin,
128		    struct in6_msource **);
129static struct in6_msource *
130		im6f_graft(struct in6_mfilter *, const uint8_t,
131		    const struct sockaddr_in6 *);
132static void	im6f_leave(struct in6_mfilter *);
133static int	im6f_prune(struct in6_mfilter *, const struct sockaddr_in6 *);
134static void	im6f_purge(struct in6_mfilter *);
135static void	im6f_rollback(struct in6_mfilter *);
136static void	im6f_reap(struct in6_mfilter *);
137static struct in6_mfilter *
138		im6o_match_group(const struct ip6_moptions *,
139		    const struct ifnet *, const struct sockaddr *);
140static struct in6_msource *
141		im6o_match_source(struct in6_mfilter *, const struct sockaddr *);
142static void	im6s_merge(struct ip6_msource *ims,
143		    const struct in6_msource *lims, const int rollback);
144static int	in6_getmulti(struct ifnet *, const struct in6_addr *,
145		    struct in6_multi **);
146static int	in6m_get_source(struct in6_multi *inm,
147		    const struct in6_addr *addr, const int noalloc,
148		    struct ip6_msource **pims);
149#ifdef KTR
150static int	in6m_is_ifp_detached(const struct in6_multi *);
151#endif
152static int	in6m_merge(struct in6_multi *, /*const*/ struct in6_mfilter *);
153static void	in6m_purge(struct in6_multi *);
154static void	in6m_reap(struct in6_multi *);
155static struct ip6_moptions *
156		in6p_findmoptions(struct inpcb *);
157static int	in6p_get_source_filters(struct inpcb *, struct sockopt *);
158static int	in6p_join_group(struct inpcb *, struct sockopt *);
159static int	in6p_leave_group(struct inpcb *, struct sockopt *);
160static struct ifnet *
161		in6p_lookup_mcast_ifp(const struct inpcb *,
162		    const struct sockaddr_in6 *);
163static int	in6p_block_unblock_source(struct inpcb *, struct sockopt *);
164static int	in6p_set_multicast_if(struct inpcb *, struct sockopt *);
165static int	in6p_set_source_filters(struct inpcb *, struct sockopt *);
166static int	sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS);
167
168SYSCTL_DECL(_net_inet6_ip6);	/* XXX Not in any common header. */
169
170static SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW, 0,
171    "IPv6 multicast");
172
173static u_long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER;
174SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc,
175    CTLFLAG_RWTUN, &in6_mcast_maxgrpsrc, 0,
176    "Max source filters per group");
177
178static u_long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER;
179SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc,
180    CTLFLAG_RWTUN, &in6_mcast_maxsocksrc, 0,
181    "Max source filters per socket");
182
183/* TODO Virtualize this switch. */
184int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
185SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RWTUN,
186    &in6_mcast_loop, 0, "Loopback multicast datagrams by default");
187
188static SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters,
189    CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters,
190    "Per-interface stack-wide source filters");
191
192#ifdef KTR
193/*
194 * Inline function which wraps assertions for a valid ifp.
195 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
196 * is detached.
197 */
198static int __inline
199in6m_is_ifp_detached(const struct in6_multi *inm)
200{
201	struct ifnet *ifp;
202
203	KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__));
204	ifp = inm->in6m_ifma->ifma_ifp;
205	if (ifp != NULL) {
206		/*
207		 * Sanity check that network-layer notion of ifp is the
208		 * same as that of link-layer.
209		 */
210		KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__));
211	}
212
213	return (ifp == NULL);
214}
215#endif
216
217/*
218 * Initialize an in6_mfilter structure to a known state at t0, t1
219 * with an empty source filter list.
220 */
221static __inline void
222im6f_init(struct in6_mfilter *imf, const int st0, const int st1)
223{
224	memset(imf, 0, sizeof(struct in6_mfilter));
225	RB_INIT(&imf->im6f_sources);
226	imf->im6f_st[0] = st0;
227	imf->im6f_st[1] = st1;
228}
229
230struct in6_mfilter *
231ip6_mfilter_alloc(const int mflags, const int st0, const int st1)
232{
233	struct in6_mfilter *imf;
234
235	imf = malloc(sizeof(*imf), M_IN6MFILTER, mflags);
236
237	if (imf != NULL)
238		im6f_init(imf, st0, st1);
239
240	return (imf);
241}
242
243void
244ip6_mfilter_free(struct in6_mfilter *imf)
245{
246
247	im6f_purge(imf);
248	free(imf, M_IN6MFILTER);
249}
250
251/*
252 * Find an IPv6 multicast group entry for this ip6_moptions instance
253 * which matches the specified group, and optionally an interface.
254 * Return its index into the array, or -1 if not found.
255 */
256static struct in6_mfilter *
257im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp,
258    const struct sockaddr *group)
259{
260	const struct sockaddr_in6 *gsin6;
261        struct in6_mfilter *imf;
262        struct in6_multi *inm;
263
264        gsin6 = (const struct sockaddr_in6 *)group;
265
266	IP6_MFILTER_FOREACH(imf, &imo->im6o_head) {
267		inm = imf->im6f_in6m;
268		if (inm == NULL)
269			continue;
270		if ((ifp == NULL || (inm->in6m_ifp == ifp)) &&
271		    IN6_ARE_ADDR_EQUAL(&inm->in6m_addr,
272		    &gsin6->sin6_addr)) {
273			break;
274		}
275	}
276	return (imf);
277}
278
279/*
280 * Find an IPv6 multicast source entry for this imo which matches
281 * the given group index for this socket, and source address.
282 *
283 * XXX TODO: The scope ID, if present in src, is stripped before
284 * any comparison. We SHOULD enforce scope/zone checks where the source
285 * filter entry has a link scope.
286 *
287 * NOTE: This does not check if the entry is in-mode, merely if
288 * it exists, which may not be the desired behaviour.
289 */
290static struct in6_msource *
291im6o_match_source(struct in6_mfilter *imf, const struct sockaddr *src)
292{
293	struct ip6_msource	 find;
294	struct ip6_msource	*ims;
295	const sockunion_t	*psa;
296
297	KASSERT(src->sa_family == AF_INET6, ("%s: !AF_INET6", __func__));
298
299	psa = (const sockunion_t *)src;
300	find.im6s_addr = psa->sin6.sin6_addr;
301	in6_clearscope(&find.im6s_addr);		/* XXX */
302	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
303
304	return ((struct in6_msource *)ims);
305}
306
307/*
308 * Perform filtering for multicast datagrams on a socket by group and source.
309 *
310 * Returns 0 if a datagram should be allowed through, or various error codes
311 * if the socket was not a member of the group, or the source was muted, etc.
312 */
313int
314im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp,
315    const struct sockaddr *group, const struct sockaddr *src)
316{
317	struct in6_mfilter *imf;
318	struct in6_msource *ims;
319	int mode;
320
321	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
322
323	imf = im6o_match_group(imo, ifp, group);
324	if (imf == NULL)
325		return (MCAST_NOTGMEMBER);
326
327	/*
328	 * Check if the source was included in an (S,G) join.
329	 * Allow reception on exclusive memberships by default,
330	 * reject reception on inclusive memberships by default.
331	 * Exclude source only if an in-mode exclude filter exists.
332	 * Include source only if an in-mode include filter exists.
333	 * NOTE: We are comparing group state here at MLD t1 (now)
334	 * with socket-layer t0 (since last downcall).
335	 */
336	mode = imf->im6f_st[1];
337	ims = im6o_match_source(imf, src);
338
339	if ((ims == NULL && mode == MCAST_INCLUDE) ||
340	    (ims != NULL && ims->im6sl_st[0] != mode))
341		return (MCAST_NOTSMEMBER);
342
343	return (MCAST_PASS);
344}
345
346/*
347 * Find and return a reference to an in6_multi record for (ifp, group),
348 * and bump its reference count.
349 * If one does not exist, try to allocate it, and update link-layer multicast
350 * filters on ifp to listen for group.
351 * Assumes the IN6_MULTI lock is held across the call.
352 * Return 0 if successful, otherwise return an appropriate error code.
353 */
354static int
355in6_getmulti(struct ifnet *ifp, const struct in6_addr *group,
356    struct in6_multi **pinm)
357{
358	struct epoch_tracker	 et;
359	struct sockaddr_in6	 gsin6;
360	struct ifmultiaddr	*ifma;
361	struct in6_multi	*inm;
362	int			 error;
363
364	error = 0;
365
366	/*
367	 * XXX: Accesses to ifma_protospec must be covered by IF_ADDR_LOCK;
368	 * if_addmulti() takes this mutex itself, so we must drop and
369	 * re-acquire around the call.
370	 */
371	IN6_MULTI_LOCK_ASSERT();
372	IN6_MULTI_LIST_LOCK();
373	IF_ADDR_WLOCK(ifp);
374	NET_EPOCH_ENTER_ET(et);
375	inm = in6m_lookup_locked(ifp, group);
376	NET_EPOCH_EXIT_ET(et);
377
378	if (inm != NULL) {
379		/*
380		 * If we already joined this group, just bump the
381		 * refcount and return it.
382		 */
383		KASSERT(inm->in6m_refcount >= 1,
384		    ("%s: bad refcount %d", __func__, inm->in6m_refcount));
385		in6m_acquire_locked(inm);
386		*pinm = inm;
387		goto out_locked;
388	}
389
390	memset(&gsin6, 0, sizeof(gsin6));
391	gsin6.sin6_family = AF_INET6;
392	gsin6.sin6_len = sizeof(struct sockaddr_in6);
393	gsin6.sin6_addr = *group;
394
395	/*
396	 * Check if a link-layer group is already associated
397	 * with this network-layer group on the given ifnet.
398	 */
399	IN6_MULTI_LIST_UNLOCK();
400	IF_ADDR_WUNLOCK(ifp);
401	error = if_addmulti(ifp, (struct sockaddr *)&gsin6, &ifma);
402	if (error != 0)
403		return (error);
404	IN6_MULTI_LIST_LOCK();
405	IF_ADDR_WLOCK(ifp);
406
407	/*
408	 * If something other than netinet6 is occupying the link-layer
409	 * group, print a meaningful error message and back out of
410	 * the allocation.
411	 * Otherwise, bump the refcount on the existing network-layer
412	 * group association and return it.
413	 */
414	if (ifma->ifma_protospec != NULL) {
415		inm = (struct in6_multi *)ifma->ifma_protospec;
416#ifdef INVARIANTS
417		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
418		    __func__));
419		KASSERT(ifma->ifma_addr->sa_family == AF_INET6,
420		    ("%s: ifma not AF_INET6", __func__));
421		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
422		if (inm->in6m_ifma != ifma || inm->in6m_ifp != ifp ||
423		    !IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, group))
424			panic("%s: ifma %p is inconsistent with %p (%p)",
425			    __func__, ifma, inm, group);
426#endif
427		in6m_acquire_locked(inm);
428		*pinm = inm;
429		goto out_locked;
430	}
431
432	IF_ADDR_WLOCK_ASSERT(ifp);
433
434	/*
435	 * A new in6_multi record is needed; allocate and initialize it.
436	 * We DO NOT perform an MLD join as the in6_ layer may need to
437	 * push an initial source list down to MLD to support SSM.
438	 *
439	 * The initial source filter state is INCLUDE, {} as per the RFC.
440	 * Pending state-changes per group are subject to a bounds check.
441	 */
442	inm = malloc(sizeof(*inm), M_IP6MADDR, M_NOWAIT | M_ZERO);
443	if (inm == NULL) {
444		IN6_MULTI_LIST_UNLOCK();
445		IF_ADDR_WUNLOCK(ifp);
446		if_delmulti_ifma(ifma);
447		return (ENOMEM);
448	}
449	inm->in6m_addr = *group;
450	inm->in6m_ifp = ifp;
451	inm->in6m_mli = MLD_IFINFO(ifp);
452	inm->in6m_ifma = ifma;
453	inm->in6m_refcount = 1;
454	inm->in6m_state = MLD_NOT_MEMBER;
455	mbufq_init(&inm->in6m_scq, MLD_MAX_STATE_CHANGES);
456
457	inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED;
458	inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
459	RB_INIT(&inm->in6m_srcs);
460
461	ifma->ifma_protospec = inm;
462	*pinm = inm;
463
464 out_locked:
465	IN6_MULTI_LIST_UNLOCK();
466	IF_ADDR_WUNLOCK(ifp);
467	return (error);
468}
469
470/*
471 * Drop a reference to an in6_multi record.
472 *
473 * If the refcount drops to 0, free the in6_multi record and
474 * delete the underlying link-layer membership.
475 */
476static void
477in6m_release(struct in6_multi *inm)
478{
479	struct ifmultiaddr *ifma;
480	struct ifnet *ifp;
481
482	CTR2(KTR_MLD, "%s: refcount is %d", __func__, inm->in6m_refcount);
483
484	MPASS(inm->in6m_refcount == 0);
485	CTR2(KTR_MLD, "%s: freeing inm %p", __func__, inm);
486
487	ifma = inm->in6m_ifma;
488	ifp = inm->in6m_ifp;
489	MPASS(ifma->ifma_llifma == NULL);
490
491	/* XXX this access is not covered by IF_ADDR_LOCK */
492	CTR2(KTR_MLD, "%s: purging ifma %p", __func__, ifma);
493	KASSERT(ifma->ifma_protospec == NULL,
494	    ("%s: ifma_protospec != NULL", __func__));
495	if (ifp == NULL)
496		ifp = ifma->ifma_ifp;
497
498	if (ifp != NULL) {
499		CURVNET_SET(ifp->if_vnet);
500		in6m_purge(inm);
501		free(inm, M_IP6MADDR);
502		if_delmulti_ifma_flags(ifma, 1);
503		CURVNET_RESTORE();
504		if_rele(ifp);
505	} else {
506		in6m_purge(inm);
507		free(inm, M_IP6MADDR);
508		if_delmulti_ifma_flags(ifma, 1);
509	}
510}
511
512/*
513 * Interface detach can happen in a taskqueue thread context, so we must use a
514 * dedicated thread to avoid deadlocks when draining in6m_release tasks.
515 */
516TASKQUEUE_DEFINE_THREAD(in6m_free);
517static struct in6_multi_head in6m_free_list = SLIST_HEAD_INITIALIZER();
518static void in6m_release_task(void *arg __unused, int pending __unused);
519static struct task in6m_free_task = TASK_INITIALIZER(0, in6m_release_task, NULL);
520
521void
522in6m_release_list_deferred(struct in6_multi_head *inmh)
523{
524	if (SLIST_EMPTY(inmh))
525		return;
526	mtx_lock(&in6_multi_free_mtx);
527	SLIST_CONCAT(&in6m_free_list, inmh, in6_multi, in6m_nrele);
528	mtx_unlock(&in6_multi_free_mtx);
529	taskqueue_enqueue(taskqueue_in6m_free, &in6m_free_task);
530}
531
532void
533in6m_release_wait(void *arg __unused)
534{
535
536	/*
537	 * Make sure all pending multicast addresses are freed before
538	 * the VNET or network device is destroyed:
539	 */
540	taskqueue_drain_all(taskqueue_in6m_free);
541}
542#ifdef VIMAGE
543VNET_SYSUNINIT(in6m_release_wait, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, in6m_release_wait, NULL);
544#endif
545
546void
547in6m_disconnect_locked(struct in6_multi_head *inmh, struct in6_multi *inm)
548{
549	struct ifnet *ifp;
550	struct ifaddr *ifa;
551	struct in6_ifaddr *ifa6;
552	struct in6_multi_mship *imm, *imm_tmp;
553	struct ifmultiaddr *ifma, *ll_ifma;
554
555	IN6_MULTI_LIST_LOCK_ASSERT();
556
557	ifp = inm->in6m_ifp;
558	if (ifp == NULL)
559		return;		/* already called */
560
561	inm->in6m_ifp = NULL;
562	IF_ADDR_WLOCK_ASSERT(ifp);
563	ifma = inm->in6m_ifma;
564	if (ifma == NULL)
565		return;
566
567	if_ref(ifp);
568	if (ifma->ifma_flags & IFMA_F_ENQUEUED) {
569		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
570		ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
571	}
572	MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname);
573	if ((ll_ifma = ifma->ifma_llifma) != NULL) {
574		MPASS(ifma != ll_ifma);
575		ifma->ifma_llifma = NULL;
576		MPASS(ll_ifma->ifma_llifma == NULL);
577		MPASS(ll_ifma->ifma_ifp == ifp);
578		if (--ll_ifma->ifma_refcount == 0) {
579			if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
580				CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
581				ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
582			}
583			MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname);
584			if_freemulti(ll_ifma);
585		}
586	}
587	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
588		if (ifa->ifa_addr->sa_family != AF_INET6)
589			continue;
590		ifa6 = (void *)ifa;
591		LIST_FOREACH_SAFE(imm, &ifa6->ia6_memberships,
592		    i6mm_chain, imm_tmp) {
593			if (inm == imm->i6mm_maddr) {
594				LIST_REMOVE(imm, i6mm_chain);
595				free(imm, M_IP6MADDR);
596				in6m_rele_locked(inmh, inm);
597			}
598		}
599	}
600}
601
602static void
603in6m_release_task(void *arg __unused, int pending __unused)
604{
605	struct in6_multi_head in6m_free_tmp;
606	struct in6_multi *inm, *tinm;
607
608	SLIST_INIT(&in6m_free_tmp);
609	mtx_lock(&in6_multi_free_mtx);
610	SLIST_CONCAT(&in6m_free_tmp, &in6m_free_list, in6_multi, in6m_nrele);
611	mtx_unlock(&in6_multi_free_mtx);
612	IN6_MULTI_LOCK();
613	SLIST_FOREACH_SAFE(inm, &in6m_free_tmp, in6m_nrele, tinm) {
614		SLIST_REMOVE_HEAD(&in6m_free_tmp, in6m_nrele);
615		in6m_release(inm);
616	}
617	IN6_MULTI_UNLOCK();
618}
619
620/*
621 * Clear recorded source entries for a group.
622 * Used by the MLD code. Caller must hold the IN6_MULTI lock.
623 * FIXME: Should reap.
624 */
625void
626in6m_clear_recorded(struct in6_multi *inm)
627{
628	struct ip6_msource	*ims;
629
630	IN6_MULTI_LIST_LOCK_ASSERT();
631
632	RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
633		if (ims->im6s_stp) {
634			ims->im6s_stp = 0;
635			--inm->in6m_st[1].iss_rec;
636		}
637	}
638	KASSERT(inm->in6m_st[1].iss_rec == 0,
639	    ("%s: iss_rec %d not 0", __func__, inm->in6m_st[1].iss_rec));
640}
641
642/*
643 * Record a source as pending for a Source-Group MLDv2 query.
644 * This lives here as it modifies the shared tree.
645 *
646 * inm is the group descriptor.
647 * naddr is the address of the source to record in network-byte order.
648 *
649 * If the net.inet6.mld.sgalloc sysctl is non-zero, we will
650 * lazy-allocate a source node in response to an SG query.
651 * Otherwise, no allocation is performed. This saves some memory
652 * with the trade-off that the source will not be reported to the
653 * router if joined in the window between the query response and
654 * the group actually being joined on the local host.
655 *
656 * VIMAGE: XXX: Currently the mld_sgalloc feature has been removed.
657 * This turns off the allocation of a recorded source entry if
658 * the group has not been joined.
659 *
660 * Return 0 if the source didn't exist or was already marked as recorded.
661 * Return 1 if the source was marked as recorded by this function.
662 * Return <0 if any error occurred (negated errno code).
663 */
664int
665in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr)
666{
667	struct ip6_msource	 find;
668	struct ip6_msource	*ims, *nims;
669
670	IN6_MULTI_LIST_LOCK_ASSERT();
671
672	find.im6s_addr = *addr;
673	ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find);
674	if (ims && ims->im6s_stp)
675		return (0);
676	if (ims == NULL) {
677		if (inm->in6m_nsrc == in6_mcast_maxgrpsrc)
678			return (-ENOSPC);
679		nims = malloc(sizeof(struct ip6_msource), M_IP6MSOURCE,
680		    M_NOWAIT | M_ZERO);
681		if (nims == NULL)
682			return (-ENOMEM);
683		nims->im6s_addr = find.im6s_addr;
684		RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims);
685		++inm->in6m_nsrc;
686		ims = nims;
687	}
688
689	/*
690	 * Mark the source as recorded and update the recorded
691	 * source count.
692	 */
693	++ims->im6s_stp;
694	++inm->in6m_st[1].iss_rec;
695
696	return (1);
697}
698
699/*
700 * Return a pointer to an in6_msource owned by an in6_mfilter,
701 * given its source address.
702 * Lazy-allocate if needed. If this is a new entry its filter state is
703 * undefined at t0.
704 *
705 * imf is the filter set being modified.
706 * addr is the source address.
707 *
708 * SMPng: May be called with locks held; malloc must not block.
709 */
710static int
711im6f_get_source(struct in6_mfilter *imf, const struct sockaddr_in6 *psin,
712    struct in6_msource **plims)
713{
714	struct ip6_msource	 find;
715	struct ip6_msource	*ims, *nims;
716	struct in6_msource	*lims;
717	int			 error;
718
719	error = 0;
720	ims = NULL;
721	lims = NULL;
722
723	find.im6s_addr = psin->sin6_addr;
724	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
725	lims = (struct in6_msource *)ims;
726	if (lims == NULL) {
727		if (imf->im6f_nsrc == in6_mcast_maxsocksrc)
728			return (ENOSPC);
729		nims = malloc(sizeof(struct in6_msource), M_IN6MFILTER,
730		    M_NOWAIT | M_ZERO);
731		if (nims == NULL)
732			return (ENOMEM);
733		lims = (struct in6_msource *)nims;
734		lims->im6s_addr = find.im6s_addr;
735		lims->im6sl_st[0] = MCAST_UNDEFINED;
736		RB_INSERT(ip6_msource_tree, &imf->im6f_sources, nims);
737		++imf->im6f_nsrc;
738	}
739
740	*plims = lims;
741
742	return (error);
743}
744
745/*
746 * Graft a source entry into an existing socket-layer filter set,
747 * maintaining any required invariants and checking allocations.
748 *
749 * The source is marked as being in the new filter mode at t1.
750 *
751 * Return the pointer to the new node, otherwise return NULL.
752 */
753static struct in6_msource *
754im6f_graft(struct in6_mfilter *imf, const uint8_t st1,
755    const struct sockaddr_in6 *psin)
756{
757	struct ip6_msource	*nims;
758	struct in6_msource	*lims;
759
760	nims = malloc(sizeof(struct in6_msource), M_IN6MFILTER,
761	    M_NOWAIT | M_ZERO);
762	if (nims == NULL)
763		return (NULL);
764	lims = (struct in6_msource *)nims;
765	lims->im6s_addr = psin->sin6_addr;
766	lims->im6sl_st[0] = MCAST_UNDEFINED;
767	lims->im6sl_st[1] = st1;
768	RB_INSERT(ip6_msource_tree, &imf->im6f_sources, nims);
769	++imf->im6f_nsrc;
770
771	return (lims);
772}
773
774/*
775 * Prune a source entry from an existing socket-layer filter set,
776 * maintaining any required invariants and checking allocations.
777 *
778 * The source is marked as being left at t1, it is not freed.
779 *
780 * Return 0 if no error occurred, otherwise return an errno value.
781 */
782static int
783im6f_prune(struct in6_mfilter *imf, const struct sockaddr_in6 *psin)
784{
785	struct ip6_msource	 find;
786	struct ip6_msource	*ims;
787	struct in6_msource	*lims;
788
789	find.im6s_addr = psin->sin6_addr;
790	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
791	if (ims == NULL)
792		return (ENOENT);
793	lims = (struct in6_msource *)ims;
794	lims->im6sl_st[1] = MCAST_UNDEFINED;
795	return (0);
796}
797
798/*
799 * Revert socket-layer filter set deltas at t1 to t0 state.
800 */
801static void
802im6f_rollback(struct in6_mfilter *imf)
803{
804	struct ip6_msource	*ims, *tims;
805	struct in6_msource	*lims;
806
807	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
808		lims = (struct in6_msource *)ims;
809		if (lims->im6sl_st[0] == lims->im6sl_st[1]) {
810			/* no change at t1 */
811			continue;
812		} else if (lims->im6sl_st[0] != MCAST_UNDEFINED) {
813			/* revert change to existing source at t1 */
814			lims->im6sl_st[1] = lims->im6sl_st[0];
815		} else {
816			/* revert source added t1 */
817			CTR2(KTR_MLD, "%s: free ims %p", __func__, ims);
818			RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
819			free(ims, M_IN6MFILTER);
820			imf->im6f_nsrc--;
821		}
822	}
823	imf->im6f_st[1] = imf->im6f_st[0];
824}
825
826/*
827 * Mark socket-layer filter set as INCLUDE {} at t1.
828 */
829static void
830im6f_leave(struct in6_mfilter *imf)
831{
832	struct ip6_msource	*ims;
833	struct in6_msource	*lims;
834
835	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
836		lims = (struct in6_msource *)ims;
837		lims->im6sl_st[1] = MCAST_UNDEFINED;
838	}
839	imf->im6f_st[1] = MCAST_INCLUDE;
840}
841
842/*
843 * Mark socket-layer filter set deltas as committed.
844 */
845static void
846im6f_commit(struct in6_mfilter *imf)
847{
848	struct ip6_msource	*ims;
849	struct in6_msource	*lims;
850
851	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
852		lims = (struct in6_msource *)ims;
853		lims->im6sl_st[0] = lims->im6sl_st[1];
854	}
855	imf->im6f_st[0] = imf->im6f_st[1];
856}
857
858/*
859 * Reap unreferenced sources from socket-layer filter set.
860 */
861static void
862im6f_reap(struct in6_mfilter *imf)
863{
864	struct ip6_msource	*ims, *tims;
865	struct in6_msource	*lims;
866
867	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
868		lims = (struct in6_msource *)ims;
869		if ((lims->im6sl_st[0] == MCAST_UNDEFINED) &&
870		    (lims->im6sl_st[1] == MCAST_UNDEFINED)) {
871			CTR2(KTR_MLD, "%s: free lims %p", __func__, ims);
872			RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
873			free(ims, M_IN6MFILTER);
874			imf->im6f_nsrc--;
875		}
876	}
877}
878
879/*
880 * Purge socket-layer filter set.
881 */
882static void
883im6f_purge(struct in6_mfilter *imf)
884{
885	struct ip6_msource	*ims, *tims;
886
887	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
888		CTR2(KTR_MLD, "%s: free ims %p", __func__, ims);
889		RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
890		free(ims, M_IN6MFILTER);
891		imf->im6f_nsrc--;
892	}
893	imf->im6f_st[0] = imf->im6f_st[1] = MCAST_UNDEFINED;
894	KASSERT(RB_EMPTY(&imf->im6f_sources),
895	    ("%s: im6f_sources not empty", __func__));
896}
897
898/*
899 * Look up a source filter entry for a multicast group.
900 *
901 * inm is the group descriptor to work with.
902 * addr is the IPv6 address to look up.
903 * noalloc may be non-zero to suppress allocation of sources.
904 * *pims will be set to the address of the retrieved or allocated source.
905 *
906 * SMPng: NOTE: may be called with locks held.
907 * Return 0 if successful, otherwise return a non-zero error code.
908 */
909static int
910in6m_get_source(struct in6_multi *inm, const struct in6_addr *addr,
911    const int noalloc, struct ip6_msource **pims)
912{
913	struct ip6_msource	 find;
914	struct ip6_msource	*ims, *nims;
915#ifdef KTR
916	char			 ip6tbuf[INET6_ADDRSTRLEN];
917#endif
918
919	find.im6s_addr = *addr;
920	ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find);
921	if (ims == NULL && !noalloc) {
922		if (inm->in6m_nsrc == in6_mcast_maxgrpsrc)
923			return (ENOSPC);
924		nims = malloc(sizeof(struct ip6_msource), M_IP6MSOURCE,
925		    M_NOWAIT | M_ZERO);
926		if (nims == NULL)
927			return (ENOMEM);
928		nims->im6s_addr = *addr;
929		RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims);
930		++inm->in6m_nsrc;
931		ims = nims;
932		CTR3(KTR_MLD, "%s: allocated %s as %p", __func__,
933		    ip6_sprintf(ip6tbuf, addr), ims);
934	}
935
936	*pims = ims;
937	return (0);
938}
939
940/*
941 * Merge socket-layer source into MLD-layer source.
942 * If rollback is non-zero, perform the inverse of the merge.
943 */
944static void
945im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims,
946    const int rollback)
947{
948	int n = rollback ? -1 : 1;
949#ifdef KTR
950	char ip6tbuf[INET6_ADDRSTRLEN];
951
952	ip6_sprintf(ip6tbuf, &lims->im6s_addr);
953#endif
954
955	if (lims->im6sl_st[0] == MCAST_EXCLUDE) {
956		CTR3(KTR_MLD, "%s: t1 ex -= %d on %s", __func__, n, ip6tbuf);
957		ims->im6s_st[1].ex -= n;
958	} else if (lims->im6sl_st[0] == MCAST_INCLUDE) {
959		CTR3(KTR_MLD, "%s: t1 in -= %d on %s", __func__, n, ip6tbuf);
960		ims->im6s_st[1].in -= n;
961	}
962
963	if (lims->im6sl_st[1] == MCAST_EXCLUDE) {
964		CTR3(KTR_MLD, "%s: t1 ex += %d on %s", __func__, n, ip6tbuf);
965		ims->im6s_st[1].ex += n;
966	} else if (lims->im6sl_st[1] == MCAST_INCLUDE) {
967		CTR3(KTR_MLD, "%s: t1 in += %d on %s", __func__, n, ip6tbuf);
968		ims->im6s_st[1].in += n;
969	}
970}
971
972/*
973 * Atomically update the global in6_multi state, when a membership's
974 * filter list is being updated in any way.
975 *
976 * imf is the per-inpcb-membership group filter pointer.
977 * A fake imf may be passed for in-kernel consumers.
978 *
979 * XXX This is a candidate for a set-symmetric-difference style loop
980 * which would eliminate the repeated lookup from root of ims nodes,
981 * as they share the same key space.
982 *
983 * If any error occurred this function will back out of refcounts
984 * and return a non-zero value.
985 */
986static int
987in6m_merge(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
988{
989	struct ip6_msource	*ims, *nims;
990	struct in6_msource	*lims;
991	int			 schanged, error;
992	int			 nsrc0, nsrc1;
993
994	schanged = 0;
995	error = 0;
996	nsrc1 = nsrc0 = 0;
997	IN6_MULTI_LIST_LOCK_ASSERT();
998
999	/*
1000	 * Update the source filters first, as this may fail.
1001	 * Maintain count of in-mode filters at t0, t1. These are
1002	 * used to work out if we transition into ASM mode or not.
1003	 * Maintain a count of source filters whose state was
1004	 * actually modified by this operation.
1005	 */
1006	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
1007		lims = (struct in6_msource *)ims;
1008		if (lims->im6sl_st[0] == imf->im6f_st[0]) nsrc0++;
1009		if (lims->im6sl_st[1] == imf->im6f_st[1]) nsrc1++;
1010		if (lims->im6sl_st[0] == lims->im6sl_st[1]) continue;
1011		error = in6m_get_source(inm, &lims->im6s_addr, 0, &nims);
1012		++schanged;
1013		if (error)
1014			break;
1015		im6s_merge(nims, lims, 0);
1016	}
1017	if (error) {
1018		struct ip6_msource *bims;
1019
1020		RB_FOREACH_REVERSE_FROM(ims, ip6_msource_tree, nims) {
1021			lims = (struct in6_msource *)ims;
1022			if (lims->im6sl_st[0] == lims->im6sl_st[1])
1023				continue;
1024			(void)in6m_get_source(inm, &lims->im6s_addr, 1, &bims);
1025			if (bims == NULL)
1026				continue;
1027			im6s_merge(bims, lims, 1);
1028		}
1029		goto out_reap;
1030	}
1031
1032	CTR3(KTR_MLD, "%s: imf filters in-mode: %d at t0, %d at t1",
1033	    __func__, nsrc0, nsrc1);
1034
1035	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
1036	if (imf->im6f_st[0] == imf->im6f_st[1] &&
1037	    imf->im6f_st[1] == MCAST_INCLUDE) {
1038		if (nsrc1 == 0) {
1039			CTR1(KTR_MLD, "%s: --in on inm at t1", __func__);
1040			--inm->in6m_st[1].iss_in;
1041		}
1042	}
1043
1044	/* Handle filter mode transition on socket. */
1045	if (imf->im6f_st[0] != imf->im6f_st[1]) {
1046		CTR3(KTR_MLD, "%s: imf transition %d to %d",
1047		    __func__, imf->im6f_st[0], imf->im6f_st[1]);
1048
1049		if (imf->im6f_st[0] == MCAST_EXCLUDE) {
1050			CTR1(KTR_MLD, "%s: --ex on inm at t1", __func__);
1051			--inm->in6m_st[1].iss_ex;
1052		} else if (imf->im6f_st[0] == MCAST_INCLUDE) {
1053			CTR1(KTR_MLD, "%s: --in on inm at t1", __func__);
1054			--inm->in6m_st[1].iss_in;
1055		}
1056
1057		if (imf->im6f_st[1] == MCAST_EXCLUDE) {
1058			CTR1(KTR_MLD, "%s: ex++ on inm at t1", __func__);
1059			inm->in6m_st[1].iss_ex++;
1060		} else if (imf->im6f_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
1061			CTR1(KTR_MLD, "%s: in++ on inm at t1", __func__);
1062			inm->in6m_st[1].iss_in++;
1063		}
1064	}
1065
1066	/*
1067	 * Track inm filter state in terms of listener counts.
1068	 * If there are any exclusive listeners, stack-wide
1069	 * membership is exclusive.
1070	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
1071	 * If no listeners remain, state is undefined at t1,
1072	 * and the MLD lifecycle for this group should finish.
1073	 */
1074	if (inm->in6m_st[1].iss_ex > 0) {
1075		CTR1(KTR_MLD, "%s: transition to EX", __func__);
1076		inm->in6m_st[1].iss_fmode = MCAST_EXCLUDE;
1077	} else if (inm->in6m_st[1].iss_in > 0) {
1078		CTR1(KTR_MLD, "%s: transition to IN", __func__);
1079		inm->in6m_st[1].iss_fmode = MCAST_INCLUDE;
1080	} else {
1081		CTR1(KTR_MLD, "%s: transition to UNDEF", __func__);
1082		inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
1083	}
1084
1085	/* Decrement ASM listener count on transition out of ASM mode. */
1086	if (imf->im6f_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1087		if ((imf->im6f_st[1] != MCAST_EXCLUDE) ||
1088		    (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
1089			CTR1(KTR_MLD, "%s: --asm on inm at t1", __func__);
1090			--inm->in6m_st[1].iss_asm;
1091		}
1092	}
1093
1094	/* Increment ASM listener count on transition to ASM mode. */
1095	if (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1096		CTR1(KTR_MLD, "%s: asm++ on inm at t1", __func__);
1097		inm->in6m_st[1].iss_asm++;
1098	}
1099
1100	CTR3(KTR_MLD, "%s: merged imf %p to inm %p", __func__, imf, inm);
1101	in6m_print(inm);
1102
1103out_reap:
1104	if (schanged > 0) {
1105		CTR1(KTR_MLD, "%s: sources changed; reaping", __func__);
1106		in6m_reap(inm);
1107	}
1108	return (error);
1109}
1110
1111/*
1112 * Mark an in6_multi's filter set deltas as committed.
1113 * Called by MLD after a state change has been enqueued.
1114 */
1115void
1116in6m_commit(struct in6_multi *inm)
1117{
1118	struct ip6_msource	*ims;
1119
1120	CTR2(KTR_MLD, "%s: commit inm %p", __func__, inm);
1121	CTR1(KTR_MLD, "%s: pre commit:", __func__);
1122	in6m_print(inm);
1123
1124	RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
1125		ims->im6s_st[0] = ims->im6s_st[1];
1126	}
1127	inm->in6m_st[0] = inm->in6m_st[1];
1128}
1129
1130/*
1131 * Reap unreferenced nodes from an in6_multi's filter set.
1132 */
1133static void
1134in6m_reap(struct in6_multi *inm)
1135{
1136	struct ip6_msource	*ims, *tims;
1137
1138	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) {
1139		if (ims->im6s_st[0].ex > 0 || ims->im6s_st[0].in > 0 ||
1140		    ims->im6s_st[1].ex > 0 || ims->im6s_st[1].in > 0 ||
1141		    ims->im6s_stp != 0)
1142			continue;
1143		CTR2(KTR_MLD, "%s: free ims %p", __func__, ims);
1144		RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims);
1145		free(ims, M_IP6MSOURCE);
1146		inm->in6m_nsrc--;
1147	}
1148}
1149
1150/*
1151 * Purge all source nodes from an in6_multi's filter set.
1152 */
1153static void
1154in6m_purge(struct in6_multi *inm)
1155{
1156	struct ip6_msource	*ims, *tims;
1157
1158	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) {
1159		CTR2(KTR_MLD, "%s: free ims %p", __func__, ims);
1160		RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims);
1161		free(ims, M_IP6MSOURCE);
1162		inm->in6m_nsrc--;
1163	}
1164	/* Free state-change requests that might be queued. */
1165	mbufq_drain(&inm->in6m_scq);
1166}
1167
1168/*
1169 * Join a multicast address w/o sources.
1170 * KAME compatibility entry point.
1171 *
1172 * SMPng: Assume no mc locks held by caller.
1173 */
1174int
1175in6_joingroup(struct ifnet *ifp, const struct in6_addr *mcaddr,
1176    /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm,
1177    const int delay)
1178{
1179	int error;
1180
1181	IN6_MULTI_LOCK();
1182	error = in6_joingroup_locked(ifp, mcaddr, NULL, pinm, delay);
1183	IN6_MULTI_UNLOCK();
1184	return (error);
1185}
1186
1187/*
1188 * Join a multicast group; real entry point.
1189 *
1190 * Only preserves atomicity at inm level.
1191 * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1192 *
1193 * If the MLD downcall fails, the group is not joined, and an error
1194 * code is returned.
1195 */
1196int
1197in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr,
1198    /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm,
1199    const int delay)
1200{
1201	struct in6_multi_head    inmh;
1202	struct in6_mfilter	 timf;
1203	struct in6_multi	*inm;
1204	struct ifmultiaddr *ifma;
1205	int			 error;
1206#ifdef KTR
1207	char			 ip6tbuf[INET6_ADDRSTRLEN];
1208#endif
1209
1210#ifdef INVARIANTS
1211	/*
1212	 * Sanity: Check scope zone ID was set for ifp, if and
1213	 * only if group is scoped to an interface.
1214	 */
1215	KASSERT(IN6_IS_ADDR_MULTICAST(mcaddr),
1216	    ("%s: not a multicast address", __func__));
1217	if (IN6_IS_ADDR_MC_LINKLOCAL(mcaddr) ||
1218	    IN6_IS_ADDR_MC_INTFACELOCAL(mcaddr)) {
1219		KASSERT(mcaddr->s6_addr16[1] != 0,
1220		    ("%s: scope zone ID not set", __func__));
1221	}
1222#endif
1223
1224	IN6_MULTI_LOCK_ASSERT();
1225	IN6_MULTI_LIST_UNLOCK_ASSERT();
1226
1227	CTR4(KTR_MLD, "%s: join %s on %p(%s))", __func__,
1228	    ip6_sprintf(ip6tbuf, mcaddr), ifp, if_name(ifp));
1229
1230	error = 0;
1231	inm = NULL;
1232
1233	/*
1234	 * If no imf was specified (i.e. kernel consumer),
1235	 * fake one up and assume it is an ASM join.
1236	 */
1237	if (imf == NULL) {
1238		im6f_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1239		imf = &timf;
1240	}
1241	error = in6_getmulti(ifp, mcaddr, &inm);
1242	if (error) {
1243		CTR1(KTR_MLD, "%s: in6_getmulti() failure", __func__);
1244		return (error);
1245	}
1246
1247	IN6_MULTI_LIST_LOCK();
1248	CTR1(KTR_MLD, "%s: merge inm state", __func__);
1249	error = in6m_merge(inm, imf);
1250	if (error) {
1251		CTR1(KTR_MLD, "%s: failed to merge inm state", __func__);
1252		goto out_in6m_release;
1253	}
1254
1255	CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
1256	error = mld_change_state(inm, delay);
1257	if (error) {
1258		CTR1(KTR_MLD, "%s: failed to update source", __func__);
1259		goto out_in6m_release;
1260	}
1261
1262out_in6m_release:
1263	SLIST_INIT(&inmh);
1264	if (error) {
1265		CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm);
1266		IF_ADDR_RLOCK(ifp);
1267		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1268			if (ifma->ifma_protospec == inm) {
1269				ifma->ifma_protospec = NULL;
1270				break;
1271			}
1272		}
1273		in6m_disconnect_locked(&inmh, inm);
1274		in6m_rele_locked(&inmh, inm);
1275		IF_ADDR_RUNLOCK(ifp);
1276	} else {
1277		*pinm = inm;
1278	}
1279	IN6_MULTI_LIST_UNLOCK();
1280	in6m_release_list_deferred(&inmh);
1281	return (error);
1282}
1283
1284/*
1285 * Leave a multicast group; unlocked entry point.
1286 */
1287int
1288in6_leavegroup(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
1289{
1290	int error;
1291
1292	IN6_MULTI_LOCK();
1293	error = in6_leavegroup_locked(inm, imf);
1294	IN6_MULTI_UNLOCK();
1295	return (error);
1296}
1297
1298/*
1299 * Leave a multicast group; real entry point.
1300 * All source filters will be expunged.
1301 *
1302 * Only preserves atomicity at inm level.
1303 *
1304 * Holding the write lock for the INP which contains imf
1305 * is highly advisable. We can't assert for it as imf does not
1306 * contain a back-pointer to the owning inp.
1307 *
1308 * Note: This is not the same as in6m_release(*) as this function also
1309 * makes a state change downcall into MLD.
1310 */
1311int
1312in6_leavegroup_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
1313{
1314	struct in6_multi_head	 inmh;
1315	struct in6_mfilter	 timf;
1316	struct ifnet *ifp;
1317	int			 error;
1318#ifdef KTR
1319	char			 ip6tbuf[INET6_ADDRSTRLEN];
1320#endif
1321
1322	error = 0;
1323
1324	IN6_MULTI_LOCK_ASSERT();
1325
1326	CTR5(KTR_MLD, "%s: leave inm %p, %s/%s, imf %p", __func__,
1327	    inm, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
1328	    (in6m_is_ifp_detached(inm) ? "null" : if_name(inm->in6m_ifp)),
1329	    imf);
1330
1331	/*
1332	 * If no imf was specified (i.e. kernel consumer),
1333	 * fake one up and assume it is an ASM join.
1334	 */
1335	if (imf == NULL) {
1336		im6f_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1337		imf = &timf;
1338	}
1339
1340	/*
1341	 * Begin state merge transaction at MLD layer.
1342	 *
1343	 * As this particular invocation should not cause any memory
1344	 * to be allocated, and there is no opportunity to roll back
1345	 * the transaction, it MUST NOT fail.
1346	 */
1347
1348	ifp = inm->in6m_ifp;
1349	IN6_MULTI_LIST_LOCK();
1350	CTR1(KTR_MLD, "%s: merge inm state", __func__);
1351	error = in6m_merge(inm, imf);
1352	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
1353
1354	CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
1355	error = 0;
1356	if (ifp)
1357		error = mld_change_state(inm, 0);
1358	if (error)
1359		CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
1360
1361	CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm);
1362	if (ifp)
1363		IF_ADDR_WLOCK(ifp);
1364
1365	SLIST_INIT(&inmh);
1366	if (inm->in6m_refcount == 1)
1367		in6m_disconnect_locked(&inmh, inm);
1368	in6m_rele_locked(&inmh, inm);
1369	if (ifp)
1370		IF_ADDR_WUNLOCK(ifp);
1371	IN6_MULTI_LIST_UNLOCK();
1372	in6m_release_list_deferred(&inmh);
1373	return (error);
1374}
1375
1376
1377/*
1378 * Block or unblock an ASM multicast source on an inpcb.
1379 * This implements the delta-based API described in RFC 3678.
1380 *
1381 * The delta-based API applies only to exclusive-mode memberships.
1382 * An MLD downcall will be performed.
1383 *
1384 * SMPng: NOTE: Must take Giant as a join may create a new ifma.
1385 *
1386 * Return 0 if successful, otherwise return an appropriate error code.
1387 */
1388static int
1389in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1390{
1391	struct group_source_req		 gsr;
1392	sockunion_t			*gsa, *ssa;
1393	struct ifnet			*ifp;
1394	struct in6_mfilter		*imf;
1395	struct ip6_moptions		*imo;
1396	struct in6_msource		*ims;
1397	struct in6_multi			*inm;
1398	uint16_t			 fmode;
1399	int				 error, doblock;
1400#ifdef KTR
1401	char				 ip6tbuf[INET6_ADDRSTRLEN];
1402#endif
1403
1404	ifp = NULL;
1405	error = 0;
1406	doblock = 0;
1407
1408	memset(&gsr, 0, sizeof(struct group_source_req));
1409	gsa = (sockunion_t *)&gsr.gsr_group;
1410	ssa = (sockunion_t *)&gsr.gsr_source;
1411
1412	switch (sopt->sopt_name) {
1413	case MCAST_BLOCK_SOURCE:
1414	case MCAST_UNBLOCK_SOURCE:
1415		error = sooptcopyin(sopt, &gsr,
1416		    sizeof(struct group_source_req),
1417		    sizeof(struct group_source_req));
1418		if (error)
1419			return (error);
1420
1421		if (gsa->sin6.sin6_family != AF_INET6 ||
1422		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
1423			return (EINVAL);
1424
1425		if (ssa->sin6.sin6_family != AF_INET6 ||
1426		    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
1427			return (EINVAL);
1428
1429		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1430			return (EADDRNOTAVAIL);
1431
1432		ifp = ifnet_byindex(gsr.gsr_interface);
1433
1434		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1435			doblock = 1;
1436		break;
1437
1438	default:
1439		CTR2(KTR_MLD, "%s: unknown sopt_name %d",
1440		    __func__, sopt->sopt_name);
1441		return (EOPNOTSUPP);
1442		break;
1443	}
1444
1445	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
1446		return (EINVAL);
1447
1448	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
1449
1450	/*
1451	 * Check if we are actually a member of this group.
1452	 */
1453	imo = in6p_findmoptions(inp);
1454	imf = im6o_match_group(imo, ifp, &gsa->sa);
1455	if (imf == NULL) {
1456		error = EADDRNOTAVAIL;
1457		goto out_in6p_locked;
1458	}
1459	inm = imf->im6f_in6m;
1460
1461	/*
1462	 * Attempting to use the delta-based API on an
1463	 * non exclusive-mode membership is an error.
1464	 */
1465	fmode = imf->im6f_st[0];
1466	if (fmode != MCAST_EXCLUDE) {
1467		error = EINVAL;
1468		goto out_in6p_locked;
1469	}
1470
1471	/*
1472	 * Deal with error cases up-front:
1473	 *  Asked to block, but already blocked; or
1474	 *  Asked to unblock, but nothing to unblock.
1475	 * If adding a new block entry, allocate it.
1476	 */
1477	ims = im6o_match_source(imf, &ssa->sa);
1478	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1479		CTR3(KTR_MLD, "%s: source %s %spresent", __func__,
1480		    ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr),
1481		    doblock ? "" : "not ");
1482		error = EADDRNOTAVAIL;
1483		goto out_in6p_locked;
1484	}
1485
1486	INP_WLOCK_ASSERT(inp);
1487
1488	/*
1489	 * Begin state merge transaction at socket layer.
1490	 */
1491	if (doblock) {
1492		CTR2(KTR_MLD, "%s: %s source", __func__, "block");
1493		ims = im6f_graft(imf, fmode, &ssa->sin6);
1494		if (ims == NULL)
1495			error = ENOMEM;
1496	} else {
1497		CTR2(KTR_MLD, "%s: %s source", __func__, "allow");
1498		error = im6f_prune(imf, &ssa->sin6);
1499	}
1500
1501	if (error) {
1502		CTR1(KTR_MLD, "%s: merge imf state failed", __func__);
1503		goto out_im6f_rollback;
1504	}
1505
1506	/*
1507	 * Begin state merge transaction at MLD layer.
1508	 */
1509	IN6_MULTI_LIST_LOCK();
1510	CTR1(KTR_MLD, "%s: merge inm state", __func__);
1511	error = in6m_merge(inm, imf);
1512	if (error)
1513		CTR1(KTR_MLD, "%s: failed to merge inm state", __func__);
1514	else {
1515		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
1516		error = mld_change_state(inm, 0);
1517		if (error)
1518			CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
1519	}
1520
1521	IN6_MULTI_LIST_UNLOCK();
1522
1523out_im6f_rollback:
1524	if (error)
1525		im6f_rollback(imf);
1526	else
1527		im6f_commit(imf);
1528
1529	im6f_reap(imf);
1530
1531out_in6p_locked:
1532	INP_WUNLOCK(inp);
1533	return (error);
1534}
1535
1536/*
1537 * Given an inpcb, return its multicast options structure pointer.  Accepts
1538 * an unlocked inpcb pointer, but will return it locked.  May sleep.
1539 *
1540 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
1541 * SMPng: NOTE: Returns with the INP write lock held.
1542 */
1543static struct ip6_moptions *
1544in6p_findmoptions(struct inpcb *inp)
1545{
1546	struct ip6_moptions	 *imo;
1547
1548	INP_WLOCK(inp);
1549	if (inp->in6p_moptions != NULL)
1550		return (inp->in6p_moptions);
1551
1552	INP_WUNLOCK(inp);
1553
1554	imo = malloc(sizeof(*imo), M_IP6MOPTS, M_WAITOK);
1555
1556	imo->im6o_multicast_ifp = NULL;
1557	imo->im6o_multicast_hlim = V_ip6_defmcasthlim;
1558	imo->im6o_multicast_loop = in6_mcast_loop;
1559	STAILQ_INIT(&imo->im6o_head);
1560
1561	INP_WLOCK(inp);
1562	if (inp->in6p_moptions != NULL) {
1563		free(imo, M_IP6MOPTS);
1564		return (inp->in6p_moptions);
1565	}
1566	inp->in6p_moptions = imo;
1567	return (imo);
1568}
1569
1570/*
1571 * Discard the IPv6 multicast options (and source filters).
1572 *
1573 * SMPng: NOTE: assumes INP write lock is held.
1574 *
1575 * XXX can all be safely deferred to epoch_call
1576 *
1577 */
1578
1579static void
1580inp_gcmoptions(struct ip6_moptions *imo)
1581{
1582	struct in6_mfilter *imf;
1583	struct in6_multi *inm;
1584	struct ifnet *ifp;
1585
1586	while ((imf = ip6_mfilter_first(&imo->im6o_head)) != NULL) {
1587                ip6_mfilter_remove(&imo->im6o_head, imf);
1588
1589                im6f_leave(imf);
1590                if ((inm = imf->im6f_in6m) != NULL) {
1591                        if ((ifp = inm->in6m_ifp) != NULL) {
1592                                CURVNET_SET(ifp->if_vnet);
1593                                (void)in6_leavegroup(inm, imf);
1594                                CURVNET_RESTORE();
1595                        } else {
1596                                (void)in6_leavegroup(inm, imf);
1597                        }
1598                }
1599                ip6_mfilter_free(imf);
1600        }
1601        free(imo, M_IP6MOPTS);
1602}
1603
1604void
1605ip6_freemoptions(struct ip6_moptions *imo)
1606{
1607	if (imo == NULL)
1608		return;
1609	inp_gcmoptions(imo);
1610}
1611
1612/*
1613 * Atomically get source filters on a socket for an IPv6 multicast group.
1614 * Called with INP lock held; returns with lock released.
1615 */
1616static int
1617in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1618{
1619	struct __msfilterreq	 msfr;
1620	sockunion_t		*gsa;
1621	struct ifnet		*ifp;
1622	struct ip6_moptions	*imo;
1623	struct in6_mfilter	*imf;
1624	struct ip6_msource	*ims;
1625	struct in6_msource	*lims;
1626	struct sockaddr_in6	*psin;
1627	struct sockaddr_storage	*ptss;
1628	struct sockaddr_storage	*tss;
1629	int			 error;
1630	size_t			 nsrcs, ncsrcs;
1631
1632	INP_WLOCK_ASSERT(inp);
1633
1634	imo = inp->in6p_moptions;
1635	KASSERT(imo != NULL, ("%s: null ip6_moptions", __func__));
1636
1637	INP_WUNLOCK(inp);
1638
1639	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1640	    sizeof(struct __msfilterreq));
1641	if (error)
1642		return (error);
1643
1644	if (msfr.msfr_group.ss_family != AF_INET6 ||
1645	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
1646		return (EINVAL);
1647
1648	gsa = (sockunion_t *)&msfr.msfr_group;
1649	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
1650		return (EINVAL);
1651
1652	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1653		return (EADDRNOTAVAIL);
1654	ifp = ifnet_byindex(msfr.msfr_ifindex);
1655	if (ifp == NULL)
1656		return (EADDRNOTAVAIL);
1657	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
1658
1659	INP_WLOCK(inp);
1660
1661	/*
1662	 * Lookup group on the socket.
1663	 */
1664	imf = im6o_match_group(imo, ifp, &gsa->sa);
1665	if (imf == NULL) {
1666		INP_WUNLOCK(inp);
1667		return (EADDRNOTAVAIL);
1668	}
1669
1670	/*
1671	 * Ignore memberships which are in limbo.
1672	 */
1673	if (imf->im6f_st[1] == MCAST_UNDEFINED) {
1674		INP_WUNLOCK(inp);
1675		return (EAGAIN);
1676	}
1677	msfr.msfr_fmode = imf->im6f_st[1];
1678
1679	/*
1680	 * If the user specified a buffer, copy out the source filter
1681	 * entries to userland gracefully.
1682	 * We only copy out the number of entries which userland
1683	 * has asked for, but we always tell userland how big the
1684	 * buffer really needs to be.
1685	 */
1686	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
1687		msfr.msfr_nsrcs = in6_mcast_maxsocksrc;
1688	tss = NULL;
1689	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
1690		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1691		    M_TEMP, M_NOWAIT | M_ZERO);
1692		if (tss == NULL) {
1693			INP_WUNLOCK(inp);
1694			return (ENOBUFS);
1695		}
1696	}
1697
1698	/*
1699	 * Count number of sources in-mode at t0.
1700	 * If buffer space exists and remains, copy out source entries.
1701	 */
1702	nsrcs = msfr.msfr_nsrcs;
1703	ncsrcs = 0;
1704	ptss = tss;
1705	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
1706		lims = (struct in6_msource *)ims;
1707		if (lims->im6sl_st[0] == MCAST_UNDEFINED ||
1708		    lims->im6sl_st[0] != imf->im6f_st[0])
1709			continue;
1710		++ncsrcs;
1711		if (tss != NULL && nsrcs > 0) {
1712			psin = (struct sockaddr_in6 *)ptss;
1713			psin->sin6_family = AF_INET6;
1714			psin->sin6_len = sizeof(struct sockaddr_in6);
1715			psin->sin6_addr = lims->im6s_addr;
1716			psin->sin6_port = 0;
1717			--nsrcs;
1718			++ptss;
1719		}
1720	}
1721
1722	INP_WUNLOCK(inp);
1723
1724	if (tss != NULL) {
1725		error = copyout(tss, msfr.msfr_srcs,
1726		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1727		free(tss, M_TEMP);
1728		if (error)
1729			return (error);
1730	}
1731
1732	msfr.msfr_nsrcs = ncsrcs;
1733	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
1734
1735	return (error);
1736}
1737
1738/*
1739 * Return the IP multicast options in response to user getsockopt().
1740 */
1741int
1742ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1743{
1744	struct ip6_moptions	*im6o;
1745	int			 error;
1746	u_int			 optval;
1747
1748	INP_WLOCK(inp);
1749	im6o = inp->in6p_moptions;
1750	/*
1751	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1752	 * or is a divert socket, reject it.
1753	 */
1754	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1755	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1756	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
1757		INP_WUNLOCK(inp);
1758		return (EOPNOTSUPP);
1759	}
1760
1761	error = 0;
1762	switch (sopt->sopt_name) {
1763	case IPV6_MULTICAST_IF:
1764		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) {
1765			optval = 0;
1766		} else {
1767			optval = im6o->im6o_multicast_ifp->if_index;
1768		}
1769		INP_WUNLOCK(inp);
1770		error = sooptcopyout(sopt, &optval, sizeof(u_int));
1771		break;
1772
1773	case IPV6_MULTICAST_HOPS:
1774		if (im6o == NULL)
1775			optval = V_ip6_defmcasthlim;
1776		else
1777			optval = im6o->im6o_multicast_hlim;
1778		INP_WUNLOCK(inp);
1779		error = sooptcopyout(sopt, &optval, sizeof(u_int));
1780		break;
1781
1782	case IPV6_MULTICAST_LOOP:
1783		if (im6o == NULL)
1784			optval = in6_mcast_loop; /* XXX VIMAGE */
1785		else
1786			optval = im6o->im6o_multicast_loop;
1787		INP_WUNLOCK(inp);
1788		error = sooptcopyout(sopt, &optval, sizeof(u_int));
1789		break;
1790
1791	case IPV6_MSFILTER:
1792		if (im6o == NULL) {
1793			error = EADDRNOTAVAIL;
1794			INP_WUNLOCK(inp);
1795		} else {
1796			error = in6p_get_source_filters(inp, sopt);
1797		}
1798		break;
1799
1800	default:
1801		INP_WUNLOCK(inp);
1802		error = ENOPROTOOPT;
1803		break;
1804	}
1805
1806	INP_UNLOCK_ASSERT(inp);
1807
1808	return (error);
1809}
1810
1811/*
1812 * Look up the ifnet to use for a multicast group membership,
1813 * given the address of an IPv6 group.
1814 *
1815 * This routine exists to support legacy IPv6 multicast applications.
1816 *
1817 * Use the socket's current FIB number for any required FIB lookup. Look up the
1818 * group address in the unicast FIB, and use its ifp; usually, this points to
1819 * the default next-hop.  If the FIB lookup fails, return NULL.
1820 *
1821 * FUTURE: Support multiple forwarding tables for IPv6.
1822 *
1823 * Returns NULL if no ifp could be found.
1824 */
1825static struct ifnet *
1826in6p_lookup_mcast_ifp(const struct inpcb *inp, const struct sockaddr_in6 *gsin6)
1827{
1828	struct nhop6_basic	nh6;
1829	struct in6_addr		dst;
1830	uint32_t		scopeid;
1831	uint32_t		fibnum;
1832
1833	KASSERT(gsin6->sin6_family == AF_INET6,
1834	    ("%s: not AF_INET6 group", __func__));
1835
1836	in6_splitscope(&gsin6->sin6_addr, &dst, &scopeid);
1837	fibnum = inp->inp_inc.inc_fibnum;
1838	if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6) != 0)
1839		return (NULL);
1840
1841	return (nh6.nh_ifp);
1842}
1843
1844/*
1845 * Join an IPv6 multicast group, possibly with a source.
1846 *
1847 * FIXME: The KAME use of the unspecified address (::)
1848 * to join *all* multicast groups is currently unsupported.
1849 */
1850static int
1851in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
1852{
1853	struct in6_multi_head		 inmh;
1854	struct group_source_req		 gsr;
1855	sockunion_t			*gsa, *ssa;
1856	struct ifnet			*ifp;
1857	struct in6_mfilter		*imf;
1858	struct ip6_moptions		*imo;
1859	struct in6_multi		*inm;
1860	struct in6_msource		*lims;
1861	int				 error, is_new;
1862
1863	SLIST_INIT(&inmh);
1864	ifp = NULL;
1865	lims = NULL;
1866	error = 0;
1867
1868	memset(&gsr, 0, sizeof(struct group_source_req));
1869	gsa = (sockunion_t *)&gsr.gsr_group;
1870	gsa->ss.ss_family = AF_UNSPEC;
1871	ssa = (sockunion_t *)&gsr.gsr_source;
1872	ssa->ss.ss_family = AF_UNSPEC;
1873
1874	/*
1875	 * Chew everything into struct group_source_req.
1876	 * Overwrite the port field if present, as the sockaddr
1877	 * being copied in may be matched with a binary comparison.
1878	 * Ignore passed-in scope ID.
1879	 */
1880	switch (sopt->sopt_name) {
1881	case IPV6_JOIN_GROUP: {
1882		struct ipv6_mreq mreq;
1883
1884		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
1885		    sizeof(struct ipv6_mreq));
1886		if (error)
1887			return (error);
1888
1889		gsa->sin6.sin6_family = AF_INET6;
1890		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
1891		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
1892
1893		if (mreq.ipv6mr_interface == 0) {
1894			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
1895		} else {
1896			if (V_if_index < mreq.ipv6mr_interface)
1897				return (EADDRNOTAVAIL);
1898			ifp = ifnet_byindex(mreq.ipv6mr_interface);
1899		}
1900		CTR3(KTR_MLD, "%s: ipv6mr_interface = %d, ifp = %p",
1901		    __func__, mreq.ipv6mr_interface, ifp);
1902	} break;
1903
1904	case MCAST_JOIN_GROUP:
1905	case MCAST_JOIN_SOURCE_GROUP:
1906		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1907			error = sooptcopyin(sopt, &gsr,
1908			    sizeof(struct group_req),
1909			    sizeof(struct group_req));
1910		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1911			error = sooptcopyin(sopt, &gsr,
1912			    sizeof(struct group_source_req),
1913			    sizeof(struct group_source_req));
1914		}
1915		if (error)
1916			return (error);
1917
1918		if (gsa->sin6.sin6_family != AF_INET6 ||
1919		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
1920			return (EINVAL);
1921
1922		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1923			if (ssa->sin6.sin6_family != AF_INET6 ||
1924			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
1925				return (EINVAL);
1926			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
1927				return (EINVAL);
1928			/*
1929			 * TODO: Validate embedded scope ID in source
1930			 * list entry against passed-in ifp, if and only
1931			 * if source list filter entry is iface or node local.
1932			 */
1933			in6_clearscope(&ssa->sin6.sin6_addr);
1934			ssa->sin6.sin6_port = 0;
1935			ssa->sin6.sin6_scope_id = 0;
1936		}
1937
1938		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1939			return (EADDRNOTAVAIL);
1940		ifp = ifnet_byindex(gsr.gsr_interface);
1941		break;
1942
1943	default:
1944		CTR2(KTR_MLD, "%s: unknown sopt_name %d",
1945		    __func__, sopt->sopt_name);
1946		return (EOPNOTSUPP);
1947		break;
1948	}
1949
1950	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
1951		return (EINVAL);
1952
1953	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1954		return (EADDRNOTAVAIL);
1955
1956	gsa->sin6.sin6_port = 0;
1957	gsa->sin6.sin6_scope_id = 0;
1958
1959	/*
1960	 * Always set the scope zone ID on memberships created from userland.
1961	 * Use the passed-in ifp to do this.
1962	 * XXX The in6_setscope() return value is meaningless.
1963	 * XXX SCOPE6_LOCK() is taken by in6_setscope().
1964	 */
1965	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
1966
1967	IN6_MULTI_LOCK();
1968
1969	/*
1970	 * Find the membership in the membership list.
1971	 */
1972	imo = in6p_findmoptions(inp);
1973	imf = im6o_match_group(imo, ifp, &gsa->sa);
1974	if (imf == NULL) {
1975		is_new = 1;
1976		inm = NULL;
1977
1978		if (ip6_mfilter_count(&imo->im6o_head) >= IPV6_MAX_MEMBERSHIPS) {
1979			error = ENOMEM;
1980			goto out_in6p_locked;
1981		}
1982	} else {
1983		is_new = 0;
1984		inm = imf->im6f_in6m;
1985
1986		if (ssa->ss.ss_family != AF_UNSPEC) {
1987			/*
1988			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
1989			 * is an error. On an existing inclusive membership,
1990			 * it just adds the source to the filter list.
1991			 */
1992			if (imf->im6f_st[1] != MCAST_INCLUDE) {
1993				error = EINVAL;
1994				goto out_in6p_locked;
1995			}
1996			/*
1997			 * Throw out duplicates.
1998			 *
1999			 * XXX FIXME: This makes a naive assumption that
2000			 * even if entries exist for *ssa in this imf,
2001			 * they will be rejected as dupes, even if they
2002			 * are not valid in the current mode (in-mode).
2003			 *
2004			 * in6_msource is transactioned just as for anything
2005			 * else in SSM -- but note naive use of in6m_graft()
2006			 * below for allocating new filter entries.
2007			 *
2008			 * This is only an issue if someone mixes the
2009			 * full-state SSM API with the delta-based API,
2010			 * which is discouraged in the relevant RFCs.
2011			 */
2012			lims = im6o_match_source(imf, &ssa->sa);
2013			if (lims != NULL /*&&
2014			    lims->im6sl_st[1] == MCAST_INCLUDE*/) {
2015				error = EADDRNOTAVAIL;
2016				goto out_in6p_locked;
2017			}
2018		} else {
2019			/*
2020			 * MCAST_JOIN_GROUP alone, on any existing membership,
2021			 * is rejected, to stop the same inpcb tying up
2022			 * multiple refs to the in_multi.
2023			 * On an existing inclusive membership, this is also
2024			 * an error; if you want to change filter mode,
2025			 * you must use the userland API setsourcefilter().
2026			 * XXX We don't reject this for imf in UNDEFINED
2027			 * state at t1, because allocation of a filter
2028			 * is atomic with allocation of a membership.
2029			 */
2030			error = EADDRINUSE;
2031			goto out_in6p_locked;
2032		}
2033	}
2034
2035	/*
2036	 * Begin state merge transaction at socket layer.
2037	 */
2038	INP_WLOCK_ASSERT(inp);
2039
2040	/*
2041	 * Graft new source into filter list for this inpcb's
2042	 * membership of the group. The in6_multi may not have
2043	 * been allocated yet if this is a new membership, however,
2044	 * the in_mfilter slot will be allocated and must be initialized.
2045	 *
2046	 * Note: Grafting of exclusive mode filters doesn't happen
2047	 * in this path.
2048	 * XXX: Should check for non-NULL lims (node exists but may
2049	 * not be in-mode) for interop with full-state API.
2050	 */
2051	if (ssa->ss.ss_family != AF_UNSPEC) {
2052		/* Membership starts in IN mode */
2053		if (is_new) {
2054			CTR1(KTR_MLD, "%s: new join w/source", __func__);
2055			imf = ip6_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE);
2056			if (imf == NULL) {
2057				error = ENOMEM;
2058				goto out_in6p_locked;
2059			}
2060		} else {
2061			CTR2(KTR_MLD, "%s: %s source", __func__, "allow");
2062		}
2063		lims = im6f_graft(imf, MCAST_INCLUDE, &ssa->sin6);
2064		if (lims == NULL) {
2065			CTR1(KTR_MLD, "%s: merge imf state failed",
2066			    __func__);
2067			error = ENOMEM;
2068			goto out_in6p_locked;
2069		}
2070	} else {
2071		/* No address specified; Membership starts in EX mode */
2072		if (is_new) {
2073			CTR1(KTR_MLD, "%s: new join w/o source", __func__);
2074			imf = ip6_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE);
2075			if (imf == NULL) {
2076				error = ENOMEM;
2077				goto out_in6p_locked;
2078			}
2079		}
2080	}
2081
2082	/*
2083	 * Begin state merge transaction at MLD layer.
2084	 */
2085	if (is_new) {
2086		in_pcbref(inp);
2087		INP_WUNLOCK(inp);
2088
2089		error = in6_joingroup_locked(ifp, &gsa->sin6.sin6_addr, imf,
2090		    &imf->im6f_in6m, 0);
2091
2092		INP_WLOCK(inp);
2093		if (in_pcbrele_wlocked(inp)) {
2094			error = ENXIO;
2095			goto out_in6p_unlocked;
2096		}
2097		if (error) {
2098			goto out_in6p_locked;
2099		}
2100		/*
2101		 * NOTE: Refcount from in6_joingroup_locked()
2102		 * is protecting membership.
2103		 */
2104		ip6_mfilter_insert(&imo->im6o_head, imf);
2105	} else {
2106		CTR1(KTR_MLD, "%s: merge inm state", __func__);
2107		IN6_MULTI_LIST_LOCK();
2108		error = in6m_merge(inm, imf);
2109		if (error) {
2110			CTR1(KTR_MLD, "%s: failed to merge inm state",
2111			    __func__);
2112			IN6_MULTI_LIST_UNLOCK();
2113			im6f_rollback(imf);
2114			im6f_reap(imf);
2115			goto out_in6p_locked;
2116		}
2117		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
2118		error = mld_change_state(inm, 0);
2119		IN6_MULTI_LIST_UNLOCK();
2120
2121		if (error) {
2122			CTR1(KTR_MLD, "%s: failed mld downcall",
2123			     __func__);
2124			im6f_rollback(imf);
2125			im6f_reap(imf);
2126			goto out_in6p_locked;
2127		}
2128	}
2129
2130	im6f_commit(imf);
2131	imf = NULL;
2132
2133out_in6p_locked:
2134	INP_WUNLOCK(inp);
2135out_in6p_unlocked:
2136	IN6_MULTI_UNLOCK();
2137
2138	if (is_new && imf) {
2139		if (imf->im6f_in6m != NULL) {
2140			struct in6_multi_head inmh;
2141
2142			SLIST_INIT(&inmh);
2143			SLIST_INSERT_HEAD(&inmh, imf->im6f_in6m, in6m_defer);
2144			in6m_release_list_deferred(&inmh);
2145		}
2146		ip6_mfilter_free(imf);
2147	}
2148	return (error);
2149}
2150
2151/*
2152 * Leave an IPv6 multicast group on an inpcb, possibly with a source.
2153 */
2154static int
2155in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
2156{
2157	struct ipv6_mreq		 mreq;
2158	struct group_source_req		 gsr;
2159	sockunion_t			*gsa, *ssa;
2160	struct ifnet			*ifp;
2161	struct in6_mfilter		*imf;
2162	struct ip6_moptions		*imo;
2163	struct in6_msource		*ims;
2164	struct in6_multi		*inm;
2165	uint32_t			 ifindex;
2166	int				 error;
2167	bool				 is_final;
2168#ifdef KTR
2169	char				 ip6tbuf[INET6_ADDRSTRLEN];
2170#endif
2171
2172	ifp = NULL;
2173	ifindex = 0;
2174	error = 0;
2175	is_final = true;
2176
2177	memset(&gsr, 0, sizeof(struct group_source_req));
2178	gsa = (sockunion_t *)&gsr.gsr_group;
2179	gsa->ss.ss_family = AF_UNSPEC;
2180	ssa = (sockunion_t *)&gsr.gsr_source;
2181	ssa->ss.ss_family = AF_UNSPEC;
2182
2183	/*
2184	 * Chew everything passed in up into a struct group_source_req
2185	 * as that is easier to process.
2186	 * Note: Any embedded scope ID in the multicast group passed
2187	 * in by userland is ignored, the interface index is the recommended
2188	 * mechanism to specify an interface; see below.
2189	 */
2190	switch (sopt->sopt_name) {
2191	case IPV6_LEAVE_GROUP:
2192		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
2193		    sizeof(struct ipv6_mreq));
2194		if (error)
2195			return (error);
2196		gsa->sin6.sin6_family = AF_INET6;
2197		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
2198		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
2199		gsa->sin6.sin6_port = 0;
2200		gsa->sin6.sin6_scope_id = 0;
2201		ifindex = mreq.ipv6mr_interface;
2202		break;
2203
2204	case MCAST_LEAVE_GROUP:
2205	case MCAST_LEAVE_SOURCE_GROUP:
2206		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2207			error = sooptcopyin(sopt, &gsr,
2208			    sizeof(struct group_req),
2209			    sizeof(struct group_req));
2210		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2211			error = sooptcopyin(sopt, &gsr,
2212			    sizeof(struct group_source_req),
2213			    sizeof(struct group_source_req));
2214		}
2215		if (error)
2216			return (error);
2217
2218		if (gsa->sin6.sin6_family != AF_INET6 ||
2219		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
2220			return (EINVAL);
2221		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2222			if (ssa->sin6.sin6_family != AF_INET6 ||
2223			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
2224				return (EINVAL);
2225			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
2226				return (EINVAL);
2227			/*
2228			 * TODO: Validate embedded scope ID in source
2229			 * list entry against passed-in ifp, if and only
2230			 * if source list filter entry is iface or node local.
2231			 */
2232			in6_clearscope(&ssa->sin6.sin6_addr);
2233		}
2234		gsa->sin6.sin6_port = 0;
2235		gsa->sin6.sin6_scope_id = 0;
2236		ifindex = gsr.gsr_interface;
2237		break;
2238
2239	default:
2240		CTR2(KTR_MLD, "%s: unknown sopt_name %d",
2241		    __func__, sopt->sopt_name);
2242		return (EOPNOTSUPP);
2243		break;
2244	}
2245
2246	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
2247		return (EINVAL);
2248
2249	/*
2250	 * Validate interface index if provided. If no interface index
2251	 * was provided separately, attempt to look the membership up
2252	 * from the default scope as a last resort to disambiguate
2253	 * the membership we are being asked to leave.
2254	 * XXX SCOPE6 lock potentially taken here.
2255	 */
2256	if (ifindex != 0) {
2257		if (V_if_index < ifindex)
2258			return (EADDRNOTAVAIL);
2259		ifp = ifnet_byindex(ifindex);
2260		if (ifp == NULL)
2261			return (EADDRNOTAVAIL);
2262		(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
2263	} else {
2264		error = sa6_embedscope(&gsa->sin6, V_ip6_use_defzone);
2265		if (error)
2266			return (EADDRNOTAVAIL);
2267		/*
2268		 * Some badly behaved applications don't pass an ifindex
2269		 * or a scope ID, which is an API violation. In this case,
2270		 * perform a lookup as per a v6 join.
2271		 *
2272		 * XXX For now, stomp on zone ID for the corner case.
2273		 * This is not the 'KAME way', but we need to see the ifp
2274		 * directly until such time as this implementation is
2275		 * refactored, assuming the scope IDs are the way to go.
2276		 */
2277		ifindex = ntohs(gsa->sin6.sin6_addr.s6_addr16[1]);
2278		if (ifindex == 0) {
2279			CTR2(KTR_MLD, "%s: warning: no ifindex, looking up "
2280			    "ifp for group %s.", __func__,
2281			    ip6_sprintf(ip6tbuf, &gsa->sin6.sin6_addr));
2282			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
2283		} else {
2284			ifp = ifnet_byindex(ifindex);
2285		}
2286		if (ifp == NULL)
2287			return (EADDRNOTAVAIL);
2288	}
2289
2290	CTR2(KTR_MLD, "%s: ifp = %p", __func__, ifp);
2291	KASSERT(ifp != NULL, ("%s: ifp did not resolve", __func__));
2292
2293	IN6_MULTI_LOCK();
2294
2295	/*
2296	 * Find the membership in the membership list.
2297	 */
2298	imo = in6p_findmoptions(inp);
2299	imf = im6o_match_group(imo, ifp, &gsa->sa);
2300	if (imf == NULL) {
2301		error = EADDRNOTAVAIL;
2302		goto out_in6p_locked;
2303	}
2304	inm = imf->im6f_in6m;
2305
2306	if (ssa->ss.ss_family != AF_UNSPEC)
2307		is_final = false;
2308
2309	/*
2310	 * Begin state merge transaction at socket layer.
2311	 */
2312	INP_WLOCK_ASSERT(inp);
2313
2314	/*
2315	 * If we were instructed only to leave a given source, do so.
2316	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2317	 */
2318	if (is_final) {
2319		ip6_mfilter_remove(&imo->im6o_head, imf);
2320		im6f_leave(imf);
2321
2322		/*
2323		 * Give up the multicast address record to which
2324		 * the membership points.
2325		 */
2326		(void)in6_leavegroup_locked(inm, imf);
2327	} else {
2328		if (imf->im6f_st[0] == MCAST_EXCLUDE) {
2329			error = EADDRNOTAVAIL;
2330			goto out_in6p_locked;
2331		}
2332		ims = im6o_match_source(imf, &ssa->sa);
2333		if (ims == NULL) {
2334			CTR3(KTR_MLD, "%s: source %p %spresent", __func__,
2335			    ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr),
2336			    "not ");
2337			error = EADDRNOTAVAIL;
2338			goto out_in6p_locked;
2339		}
2340		CTR2(KTR_MLD, "%s: %s source", __func__, "block");
2341		error = im6f_prune(imf, &ssa->sin6);
2342		if (error) {
2343			CTR1(KTR_MLD, "%s: merge imf state failed",
2344			    __func__);
2345			goto out_in6p_locked;
2346		}
2347	}
2348
2349	/*
2350	 * Begin state merge transaction at MLD layer.
2351	 */
2352	if (!is_final) {
2353		CTR1(KTR_MLD, "%s: merge inm state", __func__);
2354		IN6_MULTI_LIST_LOCK();
2355		error = in6m_merge(inm, imf);
2356		if (error) {
2357			CTR1(KTR_MLD, "%s: failed to merge inm state",
2358			    __func__);
2359			IN6_MULTI_LIST_UNLOCK();
2360			im6f_rollback(imf);
2361			im6f_reap(imf);
2362                        goto out_in6p_locked;
2363		}
2364
2365		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
2366		error = mld_change_state(inm, 0);
2367		IN6_MULTI_LIST_UNLOCK();
2368		if (error) {
2369			CTR1(KTR_MLD, "%s: failed mld downcall",
2370			     __func__);
2371			im6f_rollback(imf);
2372			im6f_reap(imf);
2373                        goto out_in6p_locked;
2374		}
2375	}
2376
2377	im6f_commit(imf);
2378	im6f_reap(imf);
2379
2380out_in6p_locked:
2381	INP_WUNLOCK(inp);
2382
2383	if (is_final && imf)
2384		ip6_mfilter_free(imf);
2385
2386	IN6_MULTI_UNLOCK();
2387	return (error);
2388}
2389
2390/*
2391 * Select the interface for transmitting IPv6 multicast datagrams.
2392 *
2393 * Either an instance of struct in6_addr or an instance of struct ipv6_mreqn
2394 * may be passed to this socket option. An address of in6addr_any or an
2395 * interface index of 0 is used to remove a previous selection.
2396 * When no interface is selected, one is chosen for every send.
2397 */
2398static int
2399in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2400{
2401	struct ifnet		*ifp;
2402	struct ip6_moptions	*imo;
2403	u_int			 ifindex;
2404	int			 error;
2405
2406	if (sopt->sopt_valsize != sizeof(u_int))
2407		return (EINVAL);
2408
2409	error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int));
2410	if (error)
2411		return (error);
2412	if (V_if_index < ifindex)
2413		return (EINVAL);
2414	if (ifindex == 0)
2415		ifp = NULL;
2416	else {
2417		ifp = ifnet_byindex(ifindex);
2418		if (ifp == NULL)
2419			return (EINVAL);
2420		if ((ifp->if_flags & IFF_MULTICAST) == 0)
2421			return (EADDRNOTAVAIL);
2422	}
2423	imo = in6p_findmoptions(inp);
2424	imo->im6o_multicast_ifp = ifp;
2425	INP_WUNLOCK(inp);
2426
2427	return (0);
2428}
2429
2430/*
2431 * Atomically set source filters on a socket for an IPv6 multicast group.
2432 *
2433 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
2434 */
2435static int
2436in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2437{
2438	struct __msfilterreq	 msfr;
2439	sockunion_t		*gsa;
2440	struct ifnet		*ifp;
2441	struct in6_mfilter	*imf;
2442	struct ip6_moptions	*imo;
2443	struct in6_multi		*inm;
2444	int			 error;
2445
2446	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
2447	    sizeof(struct __msfilterreq));
2448	if (error)
2449		return (error);
2450
2451	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
2452		return (ENOBUFS);
2453
2454	if (msfr.msfr_fmode != MCAST_EXCLUDE &&
2455	    msfr.msfr_fmode != MCAST_INCLUDE)
2456		return (EINVAL);
2457
2458	if (msfr.msfr_group.ss_family != AF_INET6 ||
2459	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
2460		return (EINVAL);
2461
2462	gsa = (sockunion_t *)&msfr.msfr_group;
2463	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
2464		return (EINVAL);
2465
2466	gsa->sin6.sin6_port = 0;	/* ignore port */
2467
2468	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
2469		return (EADDRNOTAVAIL);
2470	ifp = ifnet_byindex(msfr.msfr_ifindex);
2471	if (ifp == NULL)
2472		return (EADDRNOTAVAIL);
2473	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
2474
2475	/*
2476	 * Take the INP write lock.
2477	 * Check if this socket is a member of this group.
2478	 */
2479	imo = in6p_findmoptions(inp);
2480	imf = im6o_match_group(imo, ifp, &gsa->sa);
2481	if (imf == NULL) {
2482		error = EADDRNOTAVAIL;
2483		goto out_in6p_locked;
2484	}
2485	inm = imf->im6f_in6m;
2486
2487	/*
2488	 * Begin state merge transaction at socket layer.
2489	 */
2490	INP_WLOCK_ASSERT(inp);
2491
2492	imf->im6f_st[1] = msfr.msfr_fmode;
2493
2494	/*
2495	 * Apply any new source filters, if present.
2496	 * Make a copy of the user-space source vector so
2497	 * that we may copy them with a single copyin. This
2498	 * allows us to deal with page faults up-front.
2499	 */
2500	if (msfr.msfr_nsrcs > 0) {
2501		struct in6_msource	*lims;
2502		struct sockaddr_in6	*psin;
2503		struct sockaddr_storage	*kss, *pkss;
2504		int			 i;
2505
2506		INP_WUNLOCK(inp);
2507
2508		CTR2(KTR_MLD, "%s: loading %lu source list entries",
2509		    __func__, (unsigned long)msfr.msfr_nsrcs);
2510		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
2511		    M_TEMP, M_WAITOK);
2512		error = copyin(msfr.msfr_srcs, kss,
2513		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
2514		if (error) {
2515			free(kss, M_TEMP);
2516			return (error);
2517		}
2518
2519		INP_WLOCK(inp);
2520
2521		/*
2522		 * Mark all source filters as UNDEFINED at t1.
2523		 * Restore new group filter mode, as im6f_leave()
2524		 * will set it to INCLUDE.
2525		 */
2526		im6f_leave(imf);
2527		imf->im6f_st[1] = msfr.msfr_fmode;
2528
2529		/*
2530		 * Update socket layer filters at t1, lazy-allocating
2531		 * new entries. This saves a bunch of memory at the
2532		 * cost of one RB_FIND() per source entry; duplicate
2533		 * entries in the msfr_nsrcs vector are ignored.
2534		 * If we encounter an error, rollback transaction.
2535		 *
2536		 * XXX This too could be replaced with a set-symmetric
2537		 * difference like loop to avoid walking from root
2538		 * every time, as the key space is common.
2539		 */
2540		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2541			psin = (struct sockaddr_in6 *)pkss;
2542			if (psin->sin6_family != AF_INET6) {
2543				error = EAFNOSUPPORT;
2544				break;
2545			}
2546			if (psin->sin6_len != sizeof(struct sockaddr_in6)) {
2547				error = EINVAL;
2548				break;
2549			}
2550			if (IN6_IS_ADDR_MULTICAST(&psin->sin6_addr)) {
2551				error = EINVAL;
2552				break;
2553			}
2554			/*
2555			 * TODO: Validate embedded scope ID in source
2556			 * list entry against passed-in ifp, if and only
2557			 * if source list filter entry is iface or node local.
2558			 */
2559			in6_clearscope(&psin->sin6_addr);
2560			error = im6f_get_source(imf, psin, &lims);
2561			if (error)
2562				break;
2563			lims->im6sl_st[1] = imf->im6f_st[1];
2564		}
2565		free(kss, M_TEMP);
2566	}
2567
2568	if (error)
2569		goto out_im6f_rollback;
2570
2571	INP_WLOCK_ASSERT(inp);
2572	IN6_MULTI_LIST_LOCK();
2573
2574	/*
2575	 * Begin state merge transaction at MLD layer.
2576	 */
2577	CTR1(KTR_MLD, "%s: merge inm state", __func__);
2578	error = in6m_merge(inm, imf);
2579	if (error)
2580		CTR1(KTR_MLD, "%s: failed to merge inm state", __func__);
2581	else {
2582		CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
2583		error = mld_change_state(inm, 0);
2584		if (error)
2585			CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
2586	}
2587
2588	IN6_MULTI_LIST_UNLOCK();
2589
2590out_im6f_rollback:
2591	if (error)
2592		im6f_rollback(imf);
2593	else
2594		im6f_commit(imf);
2595
2596	im6f_reap(imf);
2597
2598out_in6p_locked:
2599	INP_WUNLOCK(inp);
2600	return (error);
2601}
2602
2603/*
2604 * Set the IP multicast options in response to user setsockopt().
2605 *
2606 * Many of the socket options handled in this function duplicate the
2607 * functionality of socket options in the regular unicast API. However,
2608 * it is not possible to merge the duplicate code, because the idempotence
2609 * of the IPv6 multicast part of the BSD Sockets API must be preserved;
2610 * the effects of these options must be treated as separate and distinct.
2611 *
2612 * SMPng: XXX: Unlocked read of inp_socket believed OK.
2613 */
2614int
2615ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2616{
2617	struct ip6_moptions	*im6o;
2618	int			 error;
2619
2620	error = 0;
2621
2622	/*
2623	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2624	 * or is a divert socket, reject it.
2625	 */
2626	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
2627	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2628	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
2629		return (EOPNOTSUPP);
2630
2631	switch (sopt->sopt_name) {
2632	case IPV6_MULTICAST_IF:
2633		error = in6p_set_multicast_if(inp, sopt);
2634		break;
2635
2636	case IPV6_MULTICAST_HOPS: {
2637		int hlim;
2638
2639		if (sopt->sopt_valsize != sizeof(int)) {
2640			error = EINVAL;
2641			break;
2642		}
2643		error = sooptcopyin(sopt, &hlim, sizeof(hlim), sizeof(int));
2644		if (error)
2645			break;
2646		if (hlim < -1 || hlim > 255) {
2647			error = EINVAL;
2648			break;
2649		} else if (hlim == -1) {
2650			hlim = V_ip6_defmcasthlim;
2651		}
2652		im6o = in6p_findmoptions(inp);
2653		im6o->im6o_multicast_hlim = hlim;
2654		INP_WUNLOCK(inp);
2655		break;
2656	}
2657
2658	case IPV6_MULTICAST_LOOP: {
2659		u_int loop;
2660
2661		/*
2662		 * Set the loopback flag for outgoing multicast packets.
2663		 * Must be zero or one.
2664		 */
2665		if (sopt->sopt_valsize != sizeof(u_int)) {
2666			error = EINVAL;
2667			break;
2668		}
2669		error = sooptcopyin(sopt, &loop, sizeof(u_int), sizeof(u_int));
2670		if (error)
2671			break;
2672		if (loop > 1) {
2673			error = EINVAL;
2674			break;
2675		}
2676		im6o = in6p_findmoptions(inp);
2677		im6o->im6o_multicast_loop = loop;
2678		INP_WUNLOCK(inp);
2679		break;
2680	}
2681
2682	case IPV6_JOIN_GROUP:
2683	case MCAST_JOIN_GROUP:
2684	case MCAST_JOIN_SOURCE_GROUP:
2685		error = in6p_join_group(inp, sopt);
2686		break;
2687
2688	case IPV6_LEAVE_GROUP:
2689	case MCAST_LEAVE_GROUP:
2690	case MCAST_LEAVE_SOURCE_GROUP:
2691		error = in6p_leave_group(inp, sopt);
2692		break;
2693
2694	case MCAST_BLOCK_SOURCE:
2695	case MCAST_UNBLOCK_SOURCE:
2696		error = in6p_block_unblock_source(inp, sopt);
2697		break;
2698
2699	case IPV6_MSFILTER:
2700		error = in6p_set_source_filters(inp, sopt);
2701		break;
2702
2703	default:
2704		error = EOPNOTSUPP;
2705		break;
2706	}
2707
2708	INP_UNLOCK_ASSERT(inp);
2709
2710	return (error);
2711}
2712
2713/*
2714 * Expose MLD's multicast filter mode and source list(s) to userland,
2715 * keyed by (ifindex, group).
2716 * The filter mode is written out as a uint32_t, followed by
2717 * 0..n of struct in6_addr.
2718 * For use by ifmcstat(8).
2719 * SMPng: NOTE: unlocked read of ifindex space.
2720 */
2721static int
2722sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS)
2723{
2724	struct in6_addr			 mcaddr;
2725	struct in6_addr			 src;
2726	struct ifnet			*ifp;
2727	struct ifmultiaddr		*ifma;
2728	struct in6_multi		*inm;
2729	struct ip6_msource		*ims;
2730	int				*name;
2731	int				 retval;
2732	u_int				 namelen;
2733	uint32_t			 fmode, ifindex;
2734#ifdef KTR
2735	char				 ip6tbuf[INET6_ADDRSTRLEN];
2736#endif
2737
2738	name = (int *)arg1;
2739	namelen = arg2;
2740
2741	if (req->newptr != NULL)
2742		return (EPERM);
2743
2744	/* int: ifindex + 4 * 32 bits of IPv6 address */
2745	if (namelen != 5)
2746		return (EINVAL);
2747
2748	ifindex = name[0];
2749	if (ifindex <= 0 || ifindex > V_if_index) {
2750		CTR2(KTR_MLD, "%s: ifindex %u out of range",
2751		    __func__, ifindex);
2752		return (ENOENT);
2753	}
2754
2755	memcpy(&mcaddr, &name[1], sizeof(struct in6_addr));
2756	if (!IN6_IS_ADDR_MULTICAST(&mcaddr)) {
2757		CTR2(KTR_MLD, "%s: group %s is not multicast",
2758		    __func__, ip6_sprintf(ip6tbuf, &mcaddr));
2759		return (EINVAL);
2760	}
2761
2762	ifp = ifnet_byindex(ifindex);
2763	if (ifp == NULL) {
2764		CTR2(KTR_MLD, "%s: no ifp for ifindex %u",
2765		    __func__, ifindex);
2766		return (ENOENT);
2767	}
2768	/*
2769	 * Internal MLD lookups require that scope/zone ID is set.
2770	 */
2771	(void)in6_setscope(&mcaddr, ifp, NULL);
2772
2773	retval = sysctl_wire_old_buffer(req,
2774	    sizeof(uint32_t) + (in6_mcast_maxgrpsrc * sizeof(struct in6_addr)));
2775	if (retval)
2776		return (retval);
2777
2778	IN6_MULTI_LOCK();
2779	IN6_MULTI_LIST_LOCK();
2780	IF_ADDR_RLOCK(ifp);
2781	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2782		inm = in6m_ifmultiaddr_get_inm(ifma);
2783		if (inm == NULL)
2784			continue;
2785		if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr))
2786			continue;
2787		fmode = inm->in6m_st[1].iss_fmode;
2788		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
2789		if (retval != 0)
2790			break;
2791		RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
2792			CTR2(KTR_MLD, "%s: visit node %p", __func__, ims);
2793			/*
2794			 * Only copy-out sources which are in-mode.
2795			 */
2796			if (fmode != im6s_get_mode(inm, ims, 1)) {
2797				CTR1(KTR_MLD, "%s: skip non-in-mode",
2798				    __func__);
2799				continue;
2800			}
2801			src = ims->im6s_addr;
2802			retval = SYSCTL_OUT(req, &src,
2803			    sizeof(struct in6_addr));
2804			if (retval != 0)
2805				break;
2806		}
2807	}
2808	IF_ADDR_RUNLOCK(ifp);
2809
2810	IN6_MULTI_LIST_UNLOCK();
2811	IN6_MULTI_UNLOCK();
2812
2813	return (retval);
2814}
2815
2816#ifdef KTR
2817
2818static const char *in6m_modestrs[] = { "un", "in", "ex" };
2819
2820static const char *
2821in6m_mode_str(const int mode)
2822{
2823
2824	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
2825		return (in6m_modestrs[mode]);
2826	return ("??");
2827}
2828
2829static const char *in6m_statestrs[] = {
2830	"not-member",
2831	"silent",
2832	"idle",
2833	"lazy",
2834	"sleeping",
2835	"awakening",
2836	"query-pending",
2837	"sg-query-pending",
2838	"leaving"
2839};
2840
2841static const char *
2842in6m_state_str(const int state)
2843{
2844
2845	if (state >= MLD_NOT_MEMBER && state <= MLD_LEAVING_MEMBER)
2846		return (in6m_statestrs[state]);
2847	return ("??");
2848}
2849
2850/*
2851 * Dump an in6_multi structure to the console.
2852 */
2853void
2854in6m_print(const struct in6_multi *inm)
2855{
2856	int t;
2857	char ip6tbuf[INET6_ADDRSTRLEN];
2858
2859	if ((ktr_mask & KTR_MLD) == 0)
2860		return;
2861
2862	printf("%s: --- begin in6m %p ---\n", __func__, inm);
2863	printf("addr %s ifp %p(%s) ifma %p\n",
2864	    ip6_sprintf(ip6tbuf, &inm->in6m_addr),
2865	    inm->in6m_ifp,
2866	    if_name(inm->in6m_ifp),
2867	    inm->in6m_ifma);
2868	printf("timer %u state %s refcount %u scq.len %u\n",
2869	    inm->in6m_timer,
2870	    in6m_state_str(inm->in6m_state),
2871	    inm->in6m_refcount,
2872	    mbufq_len(&inm->in6m_scq));
2873	printf("mli %p nsrc %lu sctimer %u scrv %u\n",
2874	    inm->in6m_mli,
2875	    inm->in6m_nsrc,
2876	    inm->in6m_sctimer,
2877	    inm->in6m_scrv);
2878	for (t = 0; t < 2; t++) {
2879		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
2880		    in6m_mode_str(inm->in6m_st[t].iss_fmode),
2881		    inm->in6m_st[t].iss_asm,
2882		    inm->in6m_st[t].iss_ex,
2883		    inm->in6m_st[t].iss_in,
2884		    inm->in6m_st[t].iss_rec);
2885	}
2886	printf("%s: --- end in6m %p ---\n", __func__, inm);
2887}
2888
2889#else /* !KTR */
2890
2891void
2892in6m_print(const struct in6_multi *inm)
2893{
2894
2895}
2896
2897#endif /* KTR */
2898