1/*
2 * Copyright (c) 2010-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 2009 Bruce Simpson.
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. The name of the author may not be used to endorse or promote
41 *    products derived from this software without specific prior written
42 *    permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 */
56
57/*
58 * IPv6 multicast socket, group, and socket option processing module.
59 * Normative references: RFC 2292, RFC 3492, RFC 3542, RFC 3678, RFC 3810.
60 */
61
62#include <sys/cdefs.h>
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/kernel.h>
67#include <sys/malloc.h>
68#include <sys/mbuf.h>
69#include <sys/protosw.h>
70#include <sys/socket.h>
71#include <sys/socketvar.h>
72#include <sys/protosw.h>
73#include <sys/sysctl.h>
74#include <sys/tree.h>
75#include <sys/mcache.h>
76
77#include <kern/zalloc.h>
78
79#include <pexpert/pexpert.h>
80
81#include <net/if.h>
82#include <net/if_dl.h>
83#include <net/route.h>
84
85#include <netinet/in.h>
86#include <netinet/in_var.h>
87#include <netinet6/in6_var.h>
88#include <netinet/ip6.h>
89#include <netinet/icmp6.h>
90#include <netinet6/ip6_var.h>
91#include <netinet/in_pcb.h>
92#include <netinet/tcp.h>
93#include <netinet/tcp_seq.h>
94#include <netinet/tcp_var.h>
95#include <netinet6/nd6.h>
96#include <netinet6/mld6_var.h>
97#include <netinet6/scope6_var.h>
98
99#ifndef __SOCKUNION_DECLARED
100union sockunion {
101	struct sockaddr_storage	ss;
102	struct sockaddr		sa;
103	struct sockaddr_dl	sdl;
104	struct sockaddr_in6	sin6;
105};
106typedef union sockunion sockunion_t;
107#define __SOCKUNION_DECLARED
108#endif /* __SOCKUNION_DECLARED */
109
110static void	im6f_commit(struct in6_mfilter *);
111static int	im6f_get_source(struct in6_mfilter *imf,
112		    const struct sockaddr_in6 *psin,
113		    struct in6_msource **);
114static struct in6_msource *
115		im6f_graft(struct in6_mfilter *, const uint8_t,
116		    const struct sockaddr_in6 *);
117static int	im6f_prune(struct in6_mfilter *, const struct sockaddr_in6 *);
118static void	im6f_rollback(struct in6_mfilter *);
119static void	im6f_reap(struct in6_mfilter *);
120static int	im6o_grow(struct ip6_moptions *, size_t);
121static size_t	im6o_match_group(const struct ip6_moptions *,
122		    const struct ifnet *, const struct sockaddr *);
123static struct in6_msource *
124		im6o_match_source(const struct ip6_moptions *, const size_t,
125		    const struct sockaddr *);
126static void	im6s_merge(struct ip6_msource *ims,
127		    const struct in6_msource *lims, const int rollback);
128static int	in6_mc_get(struct ifnet *, const struct in6_addr *,
129		    struct in6_multi **);
130static int	in6m_get_source(struct in6_multi *inm,
131		    const struct in6_addr *addr, const int noalloc,
132		    struct ip6_msource **pims);
133static int	in6m_is_ifp_detached(const struct in6_multi *);
134static int	in6m_merge(struct in6_multi *, /*const*/ struct in6_mfilter *);
135static void	in6m_reap(struct in6_multi *);
136static struct ip6_moptions *
137		in6p_findmoptions(struct inpcb *);
138static int	in6p_get_source_filters(struct inpcb *, struct sockopt *);
139static int	in6p_lookup_v4addr(struct ipv6_mreq *, struct ip_mreq *);
140static int	in6p_join_group(struct inpcb *, struct sockopt *);
141static int	in6p_leave_group(struct inpcb *, struct sockopt *);
142static struct ifnet *
143		in6p_lookup_mcast_ifp(const struct inpcb *,
144		    const struct sockaddr_in6 *);
145static int	in6p_block_unblock_source(struct inpcb *, struct sockopt *);
146static int	in6p_set_multicast_if(struct inpcb *, struct sockopt *);
147static int	in6p_set_source_filters(struct inpcb *, struct sockopt *);
148static int	sysctl_ip6_mcast_filters SYSCTL_HANDLER_ARGS;
149static __inline__ int ip6_msource_cmp(const struct ip6_msource *,
150		    const struct ip6_msource *);
151
152SYSCTL_DECL(_net_inet6_ip6);	/* XXX Not in any common header. */
153
154SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPv6 multicast");
155
156static unsigned long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER;
157SYSCTL_LONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc,
158    CTLFLAG_RW | CTLFLAG_LOCKED, &in6_mcast_maxgrpsrc,
159    "Max source filters per group");
160
161static unsigned long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER;
162SYSCTL_LONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc,
163    CTLFLAG_RW | CTLFLAG_LOCKED, &in6_mcast_maxsocksrc,
164    "Max source filters per socket");
165
166int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
167SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_LOCKED,
168    &in6_mcast_loop, 0, "Loopback multicast datagrams by default");
169
170SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters,
171    CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_ip6_mcast_filters,
172    "Per-interface stack-wide source filters");
173
174RB_GENERATE_PREV(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp);
175
176#define	IN6M_TRACE_HIST_SIZE	32	/* size of trace history */
177
178/* For gdb */
179__private_extern__ unsigned int in6m_trace_hist_size = IN6M_TRACE_HIST_SIZE;
180
181struct in6_multi_dbg {
182	struct in6_multi	in6m;			/* in6_multi */
183	u_int16_t		in6m_refhold_cnt;	/* # of ref */
184	u_int16_t		in6m_refrele_cnt;	/* # of rele */
185	/*
186	 * Circular lists of in6m_addref and in6m_remref callers.
187	 */
188	ctrace_t		in6m_refhold[IN6M_TRACE_HIST_SIZE];
189	ctrace_t		in6m_refrele[IN6M_TRACE_HIST_SIZE];
190	/*
191	 * Trash list linkage
192	 */
193	TAILQ_ENTRY(in6_multi_dbg) in6m_trash_link;
194};
195
196/* List of trash in6_multi entries protected by in6m_trash_lock */
197static TAILQ_HEAD(, in6_multi_dbg) in6m_trash_head;
198static decl_lck_mtx_data(, in6m_trash_lock);
199
200#if DEBUG
201static unsigned int in6m_debug = 1;		/* debugging (enabled) */
202#else
203static unsigned int in6m_debug;			/* debugging (disabled) */
204#endif /* !DEBUG */
205static unsigned int in6m_size;			/* size of zone element */
206static struct zone *in6m_zone;			/* zone for in6_multi */
207
208#define	IN6M_ZONE_MAX		64		/* maximum elements in zone */
209#define	IN6M_ZONE_NAME		"in6_multi"	/* zone name */
210
211static unsigned int imm_size;			/* size of zone element */
212static struct zone *imm_zone;			/* zone for in6_multi_mship */
213
214#define	IMM_ZONE_MAX		64		/* maximum elements in zone */
215#define	IMM_ZONE_NAME		"in6_multi_mship" /* zone name */
216
217#define	IP6MS_ZONE_MAX		64		/* maximum elements in zone */
218#define	IP6MS_ZONE_NAME		"ip6_msource"	/* zone name */
219
220static unsigned int ip6ms_size;			/* size of zone element */
221static struct zone *ip6ms_zone;			/* zone for ip6_msource */
222
223#define	IN6MS_ZONE_MAX		64		/* maximum elements in zone */
224#define	IN6MS_ZONE_NAME		"in6_msource"	/* zone name */
225
226static unsigned int in6ms_size;			/* size of zone element */
227static struct zone *in6ms_zone;			/* zone for in6_msource */
228
229/* Lock group and attribute for in6_multihead_lock lock */
230static lck_attr_t	*in6_multihead_lock_attr;
231static lck_grp_t	*in6_multihead_lock_grp;
232static lck_grp_attr_t	*in6_multihead_lock_grp_attr;
233
234static decl_lck_rw_data(, in6_multihead_lock);
235struct in6_multihead in6_multihead;
236
237static struct in6_multi *in6_multi_alloc(int);
238static void in6_multi_free(struct in6_multi *);
239static void in6_multi_attach(struct in6_multi *);
240static struct in6_multi_mship *in6_multi_mship_alloc(int);
241static void in6_multi_mship_free(struct in6_multi_mship *);
242static void in6m_trace(struct in6_multi *, int);
243
244static struct ip6_msource *ip6ms_alloc(int);
245static void ip6ms_free(struct ip6_msource *);
246static struct in6_msource *in6ms_alloc(int);
247static void in6ms_free(struct in6_msource *);
248
249/*
250 * IPv6 source tree comparison function.
251 *
252 * An ordered predicate is necessary; bcmp() is not documented to return
253 * an indication of order, memcmp() is, and is an ISO C99 requirement.
254 */
255static __inline int
256ip6_msource_cmp(const struct ip6_msource *a, const struct ip6_msource *b)
257{
258	return (memcmp(&a->im6s_addr, &b->im6s_addr, sizeof(struct in6_addr)));
259}
260
261/*
262 * Inline function which wraps assertions for a valid ifp.
263 */
264static __inline__ int
265in6m_is_ifp_detached(const struct in6_multi *inm)
266{
267	VERIFY(inm->in6m_ifma != NULL);
268	VERIFY(inm->in6m_ifp == inm->in6m_ifma->ifma_ifp);
269
270	return (!ifnet_is_attached(inm->in6m_ifp, 0));
271}
272
273/*
274 * Initialize an in6_mfilter structure to a known state at t0, t1
275 * with an empty source filter list.
276 */
277static __inline__ void
278im6f_init(struct in6_mfilter *imf, const int st0, const int st1)
279{
280	memset(imf, 0, sizeof(struct in6_mfilter));
281	RB_INIT(&imf->im6f_sources);
282	imf->im6f_st[0] = st0;
283	imf->im6f_st[1] = st1;
284}
285
286/*
287 * Resize the ip6_moptions vector to the next power-of-two minus 1.
288 */
289static int
290im6o_grow(struct ip6_moptions *imo, size_t newmax)
291{
292	struct in6_multi	**nmships;
293	struct in6_multi	**omships;
294	struct in6_mfilter	 *nmfilters;
295	struct in6_mfilter	 *omfilters;
296	size_t			  idx;
297	size_t			  oldmax;
298
299	IM6O_LOCK_ASSERT_HELD(imo);
300
301	nmships = NULL;
302	nmfilters = NULL;
303	omships = imo->im6o_membership;
304	omfilters = imo->im6o_mfilters;
305	oldmax = imo->im6o_max_memberships;
306	if (newmax == 0)
307		newmax = ((oldmax + 1) * 2) - 1;
308
309	if (newmax > IPV6_MAX_MEMBERSHIPS)
310		return (ETOOMANYREFS);
311
312	if ((nmships = (struct in6_multi **)_REALLOC(omships,
313	    sizeof (struct in6_multi *) * newmax, M_IP6MOPTS,
314	    M_WAITOK | M_ZERO)) == NULL)
315		return (ENOMEM);
316
317	imo->im6o_membership = nmships;
318
319	if ((nmfilters = (struct in6_mfilter *)_REALLOC(omfilters,
320	    sizeof (struct in6_mfilter) * newmax, M_IN6MFILTER,
321	    M_WAITOK | M_ZERO)) == NULL)
322		return (ENOMEM);
323
324	imo->im6o_mfilters = nmfilters;
325
326	/* Initialize newly allocated source filter heads. */
327	for (idx = oldmax; idx < newmax; idx++)
328		im6f_init(&nmfilters[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
329
330	imo->im6o_max_memberships = newmax;
331
332	return (0);
333}
334
335/*
336 * Find an IPv6 multicast group entry for this ip6_moptions instance
337 * which matches the specified group, and optionally an interface.
338 * Return its index into the array, or -1 if not found.
339 */
340static size_t
341im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp,
342    const struct sockaddr *group)
343{
344	const struct sockaddr_in6 *gsin6;
345	struct in6_multi *pinm;
346	int		  idx;
347	int		  nmships;
348
349	IM6O_LOCK_ASSERT_HELD(__DECONST(struct ip6_moptions *, imo));
350
351	gsin6 = (struct sockaddr_in6 *)(uintptr_t)(size_t)group;
352
353	/* The im6o_membership array may be lazy allocated. */
354	if (imo->im6o_membership == NULL || imo->im6o_num_memberships == 0)
355		return (-1);
356
357	nmships = imo->im6o_num_memberships;
358	for (idx = 0; idx < nmships; idx++) {
359		pinm = imo->im6o_membership[idx];
360		if (pinm == NULL)
361			continue;
362		IN6M_LOCK(pinm);
363		if ((ifp == NULL || (pinm->in6m_ifp == ifp)) &&
364		    IN6_ARE_ADDR_EQUAL(&pinm->in6m_addr,
365		    &gsin6->sin6_addr)) {
366			IN6M_UNLOCK(pinm);
367			break;
368		}
369		IN6M_UNLOCK(pinm);
370	}
371	if (idx >= nmships)
372		idx = -1;
373
374	return (idx);
375}
376
377/*
378 * Find an IPv6 multicast source entry for this imo which matches
379 * the given group index for this socket, and source address.
380 *
381 * XXX TODO: The scope ID, if present in src, is stripped before
382 * any comparison. We SHOULD enforce scope/zone checks where the source
383 * filter entry has a link scope.
384 *
385 * NOTE: This does not check if the entry is in-mode, merely if
386 * it exists, which may not be the desired behaviour.
387 */
388static struct in6_msource *
389im6o_match_source(const struct ip6_moptions *imo, const size_t gidx,
390    const struct sockaddr *src)
391{
392	struct ip6_msource	 find;
393	struct in6_mfilter	*imf;
394	struct ip6_msource	*ims;
395	const sockunion_t	*psa;
396
397	IM6O_LOCK_ASSERT_HELD(__DECONST(struct ip6_moptions *, imo));
398
399	VERIFY(src->sa_family == AF_INET6);
400	VERIFY(gidx != (size_t)-1 && gidx < imo->im6o_num_memberships);
401
402	/* The im6o_mfilters array may be lazy allocated. */
403	if (imo->im6o_mfilters == NULL)
404		return (NULL);
405	imf = &imo->im6o_mfilters[gidx];
406
407	psa = (sockunion_t *)(uintptr_t)(size_t)src;
408	find.im6s_addr = psa->sin6.sin6_addr;
409	in6_clearscope(&find.im6s_addr);		/* XXX */
410	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
411
412	return ((struct in6_msource *)ims);
413}
414
415/*
416 * Perform filtering for multicast datagrams on a socket by group and source.
417 *
418 * Returns 0 if a datagram should be allowed through, or various error codes
419 * if the socket was not a member of the group, or the source was muted, etc.
420 */
421int
422im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp,
423    const struct sockaddr *group, const struct sockaddr *src)
424{
425	size_t gidx;
426	struct in6_msource *ims;
427	int mode;
428
429	IM6O_LOCK_ASSERT_HELD(__DECONST(struct ip6_moptions *, imo));
430	VERIFY(ifp != NULL);
431
432	gidx = im6o_match_group(imo, ifp, group);
433	if (gidx == (size_t)-1)
434		return (MCAST_NOTGMEMBER);
435
436	/*
437	 * Check if the source was included in an (S,G) join.
438	 * Allow reception on exclusive memberships by default,
439	 * reject reception on inclusive memberships by default.
440	 * Exclude source only if an in-mode exclude filter exists.
441	 * Include source only if an in-mode include filter exists.
442	 * NOTE: We are comparing group state here at MLD t1 (now)
443	 * with socket-layer t0 (since last downcall).
444	 */
445	mode = imo->im6o_mfilters[gidx].im6f_st[1];
446	ims = im6o_match_source(imo, gidx, src);
447
448	if ((ims == NULL && mode == MCAST_INCLUDE) ||
449	    (ims != NULL && ims->im6sl_st[0] != mode))
450		return (MCAST_NOTSMEMBER);
451
452	return (MCAST_PASS);
453}
454
455/*
456 * Find and return a reference to an in6_multi record for (ifp, group),
457 * and bump its reference count.
458 * If one does not exist, try to allocate it, and update link-layer multicast
459 * filters on ifp to listen for group.
460 * Assumes the IN6_MULTI lock is held across the call.
461 * Return 0 if successful, otherwise return an appropriate error code.
462 */
463static int
464in6_mc_get(struct ifnet *ifp, const struct in6_addr *group,
465    struct in6_multi **pinm)
466{
467	struct sockaddr_in6	 gsin6;
468	struct ifmultiaddr	*ifma;
469	struct in6_multi	*inm;
470	int			 error;
471
472	*pinm = NULL;
473
474	in6_multihead_lock_shared();
475	IN6_LOOKUP_MULTI(group, ifp, inm);
476	if (inm != NULL) {
477		IN6M_LOCK(inm);
478		VERIFY(inm->in6m_reqcnt >= 1);
479		inm->in6m_reqcnt++;
480		VERIFY(inm->in6m_reqcnt != 0);
481		*pinm = inm;
482		IN6M_UNLOCK(inm);
483		in6_multihead_lock_done();
484		/*
485		 * We already joined this group; return the in6m
486		 * with a refcount held (via lookup) for caller.
487		 */
488		return (0);
489	}
490	in6_multihead_lock_done();
491
492	memset(&gsin6, 0, sizeof(gsin6));
493	gsin6.sin6_family = AF_INET6;
494	gsin6.sin6_len = sizeof(struct sockaddr_in6);
495	gsin6.sin6_addr = *group;
496
497	/*
498	 * Check if a link-layer group is already associated
499	 * with this network-layer group on the given ifnet.
500	 */
501	error = if_addmulti(ifp, (struct sockaddr *)&gsin6, &ifma);
502	if (error != 0)
503		return (error);
504
505	/*
506	 * See comments in in6m_remref() for access to ifma_protospec.
507	 */
508	in6_multihead_lock_exclusive();
509	IFMA_LOCK(ifma);
510	if ((inm = ifma->ifma_protospec) != NULL) {
511		VERIFY(ifma->ifma_addr != NULL);
512		VERIFY(ifma->ifma_addr->sa_family == AF_INET6);
513		IN6M_ADDREF(inm);	/* for caller */
514		IFMA_UNLOCK(ifma);
515		IN6M_LOCK(inm);
516		VERIFY(inm->in6m_ifma == ifma);
517		VERIFY(inm->in6m_ifp == ifp);
518		VERIFY(IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, group));
519		if (inm->in6m_debug & IFD_ATTACHED) {
520			VERIFY(inm->in6m_reqcnt >= 1);
521			inm->in6m_reqcnt++;
522			VERIFY(inm->in6m_reqcnt != 0);
523			*pinm = inm;
524			IN6M_UNLOCK(inm);
525			in6_multihead_lock_done();
526			IFMA_REMREF(ifma);
527			/*
528			 * We lost the race with another thread doing
529			 * in6_mc_get(); since this group has already
530			 * been joined; return the inm with a refcount
531			 * held for caller.
532			 */
533			return (0);
534		}
535		/*
536		 * We lost the race with another thread doing in6_delmulti();
537		 * the inm referring to the ifma has been detached, thus we
538		 * reattach it back to the in6_multihead list, and return the
539		 * inm with a refcount held for the caller.
540		 */
541		in6_multi_attach(inm);
542		VERIFY((inm->in6m_debug &
543		    (IFD_ATTACHED | IFD_TRASHED)) == IFD_ATTACHED);
544		*pinm = inm;
545		IN6M_UNLOCK(inm);
546		in6_multihead_lock_done();
547		IFMA_REMREF(ifma);
548		return (0);
549	}
550	IFMA_UNLOCK(ifma);
551
552	/*
553	 * A new in6_multi record is needed; allocate and initialize it.
554	 * We DO NOT perform an MLD join as the in6_ layer may need to
555	 * push an initial source list down to MLD to support SSM.
556	 *
557	 * The initial source filter state is INCLUDE, {} as per the RFC.
558	 * Pending state-changes per group are subject to a bounds check.
559	 */
560	inm = in6_multi_alloc(M_WAITOK);
561	if (inm == NULL) {
562		in6_multihead_lock_done();
563		IFMA_REMREF(ifma);
564		return (ENOMEM);
565	}
566	IN6M_LOCK(inm);
567	inm->in6m_addr = *group;
568	inm->in6m_ifp = ifp;
569	inm->in6m_mli = MLD_IFINFO(ifp);
570	VERIFY(inm->in6m_mli != NULL);
571	MLI_ADDREF(inm->in6m_mli);
572	inm->in6m_ifma = ifma;		/* keep refcount from if_addmulti() */
573	inm->in6m_state = MLD_NOT_MEMBER;
574	/*
575	 * Pending state-changes per group are subject to a bounds check.
576	 */
577	inm->in6m_scq.ifq_maxlen = MLD_MAX_STATE_CHANGES;
578	inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED;
579	inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
580	RB_INIT(&inm->in6m_srcs);
581	*pinm = inm;
582	in6_multi_attach(inm);
583	VERIFY((inm->in6m_debug &
584	    (IFD_ATTACHED | IFD_TRASHED)) == IFD_ATTACHED);
585	IN6M_ADDREF_LOCKED(inm);	/* for caller */
586	IN6M_UNLOCK(inm);
587
588	IFMA_LOCK(ifma);
589	VERIFY(ifma->ifma_protospec == NULL);
590	ifma->ifma_protospec = inm;
591	IFMA_UNLOCK(ifma);
592	in6_multihead_lock_done();
593
594	return (0);
595}
596
597/*
598 * Clear recorded source entries for a group.
599 * Used by the MLD code. Caller must hold the IN6_MULTI lock.
600 * FIXME: Should reap.
601 */
602void
603in6m_clear_recorded(struct in6_multi *inm)
604{
605	struct ip6_msource	*ims;
606
607	IN6M_LOCK_ASSERT_HELD(inm);
608
609	RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
610		if (ims->im6s_stp) {
611			ims->im6s_stp = 0;
612			--inm->in6m_st[1].iss_rec;
613		}
614	}
615	VERIFY(inm->in6m_st[1].iss_rec == 0);
616}
617
618/*
619 * Record a source as pending for a Source-Group MLDv2 query.
620 * This lives here as it modifies the shared tree.
621 *
622 * inm is the group descriptor.
623 * naddr is the address of the source to record in network-byte order.
624 *
625 * If the net.inet6.mld.sgalloc sysctl is non-zero, we will
626 * lazy-allocate a source node in response to an SG query.
627 * Otherwise, no allocation is performed. This saves some memory
628 * with the trade-off that the source will not be reported to the
629 * router if joined in the window between the query response and
630 * the group actually being joined on the local host.
631 *
632 * VIMAGE: XXX: Currently the mld_sgalloc feature has been removed.
633 * This turns off the allocation of a recorded source entry if
634 * the group has not been joined.
635 *
636 * Return 0 if the source didn't exist or was already marked as recorded.
637 * Return 1 if the source was marked as recorded by this function.
638 * Return <0 if any error occured (negated errno code).
639 */
640int
641in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr)
642{
643	struct ip6_msource	 find;
644	struct ip6_msource	*ims, *nims;
645
646	IN6M_LOCK_ASSERT_HELD(inm);
647
648	find.im6s_addr = *addr;
649	ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find);
650	if (ims && ims->im6s_stp)
651		return (0);
652	if (ims == NULL) {
653		if (inm->in6m_nsrc == in6_mcast_maxgrpsrc)
654			return (-ENOSPC);
655		nims = ip6ms_alloc(M_WAITOK);
656		if (nims == NULL)
657			return (-ENOMEM);
658		nims->im6s_addr = find.im6s_addr;
659		RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims);
660		++inm->in6m_nsrc;
661		ims = nims;
662	}
663
664	/*
665	 * Mark the source as recorded and update the recorded
666	 * source count.
667	 */
668	++ims->im6s_stp;
669	++inm->in6m_st[1].iss_rec;
670
671	return (1);
672}
673
674/*
675 * Return a pointer to an in6_msource owned by an in6_mfilter,
676 * given its source address.
677 * Lazy-allocate if needed. If this is a new entry its filter state is
678 * undefined at t0.
679 *
680 * imf is the filter set being modified.
681 * addr is the source address.
682 *
683 * Caller is expected to be holding im6o_lock.
684 */
685static int
686im6f_get_source(struct in6_mfilter *imf, const struct sockaddr_in6 *psin,
687    struct in6_msource **plims)
688{
689	struct ip6_msource	 find;
690	struct ip6_msource	*ims;
691	struct in6_msource	*lims;
692	int			 error;
693
694	error = 0;
695	ims = NULL;
696	lims = NULL;
697
698	find.im6s_addr = psin->sin6_addr;
699	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
700	lims = (struct in6_msource *)ims;
701	if (lims == NULL) {
702		if (imf->im6f_nsrc == in6_mcast_maxsocksrc)
703			return (ENOSPC);
704		lims = in6ms_alloc(M_WAITOK);
705		if (lims == NULL)
706			return (ENOMEM);
707		lims->im6s_addr = find.im6s_addr;
708		lims->im6sl_st[0] = MCAST_UNDEFINED;
709		RB_INSERT(ip6_msource_tree, &imf->im6f_sources,
710		    (struct ip6_msource *)lims);
711		++imf->im6f_nsrc;
712	}
713
714	*plims = lims;
715
716	return (error);
717}
718
719/*
720 * Graft a source entry into an existing socket-layer filter set,
721 * maintaining any required invariants and checking allocations.
722 *
723 * The source is marked as being in the new filter mode at t1.
724 *
725 * Return the pointer to the new node, otherwise return NULL.
726 *
727 * Caller is expected to be holding im6o_lock.
728 */
729static struct in6_msource *
730im6f_graft(struct in6_mfilter *imf, const uint8_t st1,
731    const struct sockaddr_in6 *psin)
732{
733	struct in6_msource	*lims;
734
735	lims = in6ms_alloc(M_WAITOK);
736	if (lims == NULL)
737		return (NULL);
738	lims->im6s_addr = psin->sin6_addr;
739	lims->im6sl_st[0] = MCAST_UNDEFINED;
740	lims->im6sl_st[1] = st1;
741	RB_INSERT(ip6_msource_tree, &imf->im6f_sources,
742	    (struct ip6_msource *)lims);
743	++imf->im6f_nsrc;
744
745	return (lims);
746}
747
748/*
749 * Prune a source entry from an existing socket-layer filter set,
750 * maintaining any required invariants and checking allocations.
751 *
752 * The source is marked as being left at t1, it is not freed.
753 *
754 * Return 0 if no error occurred, otherwise return an errno value.
755 *
756 * Caller is expected to be holding im6o_lock.
757 */
758static int
759im6f_prune(struct in6_mfilter *imf, const struct sockaddr_in6 *psin)
760{
761	struct ip6_msource	 find;
762	struct ip6_msource	*ims;
763	struct in6_msource	*lims;
764
765	find.im6s_addr = psin->sin6_addr;
766	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
767	if (ims == NULL)
768		return (ENOENT);
769	lims = (struct in6_msource *)ims;
770	lims->im6sl_st[1] = MCAST_UNDEFINED;
771	return (0);
772}
773
774/*
775 * Revert socket-layer filter set deltas at t1 to t0 state.
776 *
777 * Caller is expected to be holding im6o_lock.
778 */
779static void
780im6f_rollback(struct in6_mfilter *imf)
781{
782	struct ip6_msource	*ims, *tims;
783	struct in6_msource	*lims;
784
785	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
786		lims = (struct in6_msource *)ims;
787		if (lims->im6sl_st[0] == lims->im6sl_st[1]) {
788			/* no change at t1 */
789			continue;
790		} else if (lims->im6sl_st[0] != MCAST_UNDEFINED) {
791			/* revert change to existing source at t1 */
792			lims->im6sl_st[1] = lims->im6sl_st[0];
793		} else {
794			/* revert source added t1 */
795			MLD_PRINTF(("%s: free in6ms 0x%llx\n", __func__,
796			    (uint64_t)VM_KERNEL_ADDRPERM(lims)));
797			RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
798			in6ms_free(lims);
799			imf->im6f_nsrc--;
800		}
801	}
802	imf->im6f_st[1] = imf->im6f_st[0];
803}
804
805/*
806 * Mark socket-layer filter set as INCLUDE {} at t1.
807 *
808 * Caller is expected to be holding im6o_lock.
809 */
810void
811im6f_leave(struct in6_mfilter *imf)
812{
813	struct ip6_msource	*ims;
814	struct in6_msource	*lims;
815
816	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
817		lims = (struct in6_msource *)ims;
818		lims->im6sl_st[1] = MCAST_UNDEFINED;
819	}
820	imf->im6f_st[1] = MCAST_INCLUDE;
821}
822
823/*
824 * Mark socket-layer filter set deltas as committed.
825 *
826 * Caller is expected to be holding im6o_lock.
827 */
828static void
829im6f_commit(struct in6_mfilter *imf)
830{
831	struct ip6_msource	*ims;
832	struct in6_msource	*lims;
833
834	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
835		lims = (struct in6_msource *)ims;
836		lims->im6sl_st[0] = lims->im6sl_st[1];
837	}
838	imf->im6f_st[0] = imf->im6f_st[1];
839}
840
841/*
842 * Reap unreferenced sources from socket-layer filter set.
843 *
844 * Caller is expected to be holding im6o_lock.
845 */
846static void
847im6f_reap(struct in6_mfilter *imf)
848{
849	struct ip6_msource	*ims, *tims;
850	struct in6_msource	*lims;
851
852	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
853		lims = (struct in6_msource *)ims;
854		if ((lims->im6sl_st[0] == MCAST_UNDEFINED) &&
855		    (lims->im6sl_st[1] == MCAST_UNDEFINED)) {
856			MLD_PRINTF(("%s: free in6ms 0x%llx\n", __func__,
857			    (uint64_t)VM_KERNEL_ADDRPERM(lims)));
858			RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
859			in6ms_free(lims);
860			imf->im6f_nsrc--;
861		}
862	}
863}
864
865/*
866 * Purge socket-layer filter set.
867 *
868 * Caller is expected to be holding im6o_lock.
869 */
870void
871im6f_purge(struct in6_mfilter *imf)
872{
873	struct ip6_msource	*ims, *tims;
874	struct in6_msource	*lims;
875
876	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
877		lims = (struct in6_msource *)ims;
878		MLD_PRINTF(("%s: free in6ms 0x%llx\n", __func__,
879		    (uint64_t)VM_KERNEL_ADDRPERM(lims)));
880		RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
881		in6ms_free(lims);
882		imf->im6f_nsrc--;
883	}
884	imf->im6f_st[0] = imf->im6f_st[1] = MCAST_UNDEFINED;
885	VERIFY(RB_EMPTY(&imf->im6f_sources));
886}
887
888/*
889 * Look up a source filter entry for a multicast group.
890 *
891 * inm is the group descriptor to work with.
892 * addr is the IPv6 address to look up.
893 * noalloc may be non-zero to suppress allocation of sources.
894 * *pims will be set to the address of the retrieved or allocated source.
895 *
896 * Return 0 if successful, otherwise return a non-zero error code.
897 */
898static int
899in6m_get_source(struct in6_multi *inm, const struct in6_addr *addr,
900    const int noalloc, struct ip6_msource **pims)
901{
902	struct ip6_msource	 find;
903	struct ip6_msource	*ims, *nims;
904
905	IN6M_LOCK_ASSERT_HELD(inm);
906
907	find.im6s_addr = *addr;
908	ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find);
909	if (ims == NULL && !noalloc) {
910		if (inm->in6m_nsrc == in6_mcast_maxgrpsrc)
911			return (ENOSPC);
912		nims = ip6ms_alloc(M_WAITOK);
913		if (nims == NULL)
914			return (ENOMEM);
915		nims->im6s_addr = *addr;
916		RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims);
917		++inm->in6m_nsrc;
918		ims = nims;
919		MLD_PRINTF(("%s: allocated %s as 0x%llx\n", __func__,
920		    ip6_sprintf(addr), (uint64_t)VM_KERNEL_ADDRPERM(ims)));
921	}
922
923	*pims = ims;
924	return (0);
925}
926
927/*
928 * Helper function to derive the filter mode on a source entry
929 * from its internal counters. Predicates are:
930 *  A source is only excluded if all listeners exclude it.
931 *  A source is only included if no listeners exclude it,
932 *  and at least one listener includes it.
933 * May be used by ifmcstat(8).
934 */
935uint8_t
936im6s_get_mode(const struct in6_multi *inm, const struct ip6_msource *ims,
937    uint8_t t)
938{
939	IN6M_LOCK_ASSERT_HELD(__DECONST(struct in6_multi *, inm));
940
941	t = !!t;
942	if (inm->in6m_st[t].iss_ex > 0 &&
943	    inm->in6m_st[t].iss_ex == ims->im6s_st[t].ex)
944		return (MCAST_EXCLUDE);
945	else if (ims->im6s_st[t].in > 0 && ims->im6s_st[t].ex == 0)
946		return (MCAST_INCLUDE);
947	return (MCAST_UNDEFINED);
948}
949
950/*
951 * Merge socket-layer source into MLD-layer source.
952 * If rollback is non-zero, perform the inverse of the merge.
953 */
954static void
955im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims,
956    const int rollback)
957{
958	int n = rollback ? -1 : 1;
959
960	if (lims->im6sl_st[0] == MCAST_EXCLUDE) {
961		MLD_PRINTF(("%s: t1 ex -= %d on %s\n", __func__, n,
962		    ip6_sprintf(&lims->im6s_addr)));
963		ims->im6s_st[1].ex -= n;
964	} else if (lims->im6sl_st[0] == MCAST_INCLUDE) {
965		MLD_PRINTF(("%s: t1 in -= %d on %s\n", __func__, n,
966		    ip6_sprintf(&lims->im6s_addr)));
967		ims->im6s_st[1].in -= n;
968	}
969
970	if (lims->im6sl_st[1] == MCAST_EXCLUDE) {
971		MLD_PRINTF(("%s: t1 ex += %d on %s\n", __func__, n,
972		    ip6_sprintf(&lims->im6s_addr)));
973		ims->im6s_st[1].ex += n;
974	} else if (lims->im6sl_st[1] == MCAST_INCLUDE) {
975		MLD_PRINTF(("%s: t1 in += %d on %s\n", __func__, n,
976		    ip6_sprintf(&lims->im6s_addr)));
977		ims->im6s_st[1].in += n;
978	}
979}
980
981/*
982 * Atomically update the global in6_multi state, when a membership's
983 * filter list is being updated in any way.
984 *
985 * imf is the per-inpcb-membership group filter pointer.
986 * A fake imf may be passed for in-kernel consumers.
987 *
988 * XXX This is a candidate for a set-symmetric-difference style loop
989 * which would eliminate the repeated lookup from root of ims nodes,
990 * as they share the same key space.
991 *
992 * If any error occurred this function will back out of refcounts
993 * and return a non-zero value.
994 */
995static int
996in6m_merge(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
997{
998	struct ip6_msource	*ims, *nims;
999	struct in6_msource	*lims;
1000	int			 schanged, error;
1001	int			 nsrc0, nsrc1;
1002
1003	IN6M_LOCK_ASSERT_HELD(inm);
1004
1005	schanged = 0;
1006	error = 0;
1007	nsrc1 = nsrc0 = 0;
1008
1009	/*
1010	 * Update the source filters first, as this may fail.
1011	 * Maintain count of in-mode filters at t0, t1. These are
1012	 * used to work out if we transition into ASM mode or not.
1013	 * Maintain a count of source filters whose state was
1014	 * actually modified by this operation.
1015	 */
1016	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
1017		lims = (struct in6_msource *)ims;
1018		if (lims->im6sl_st[0] == imf->im6f_st[0]) nsrc0++;
1019		if (lims->im6sl_st[1] == imf->im6f_st[1]) nsrc1++;
1020		if (lims->im6sl_st[0] == lims->im6sl_st[1]) continue;
1021		error = in6m_get_source(inm, &lims->im6s_addr, 0, &nims);
1022		++schanged;
1023		if (error)
1024			break;
1025		im6s_merge(nims, lims, 0);
1026	}
1027	if (error) {
1028		struct ip6_msource *bims;
1029
1030		RB_FOREACH_REVERSE_FROM(ims, ip6_msource_tree, nims) {
1031			lims = (struct in6_msource *)ims;
1032			if (lims->im6sl_st[0] == lims->im6sl_st[1])
1033				continue;
1034			(void) in6m_get_source(inm, &lims->im6s_addr, 1, &bims);
1035			if (bims == NULL)
1036				continue;
1037			im6s_merge(bims, lims, 1);
1038		}
1039		goto out_reap;
1040	}
1041
1042	MLD_PRINTF(("%s: imf filters in-mode: %d at t0, %d at t1\n",
1043	    __func__, nsrc0, nsrc1));
1044
1045	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
1046	if (imf->im6f_st[0] == imf->im6f_st[1] &&
1047	    imf->im6f_st[1] == MCAST_INCLUDE) {
1048		if (nsrc1 == 0) {
1049			MLD_PRINTF(("%s: --in on inm at t1\n", __func__));
1050			--inm->in6m_st[1].iss_in;
1051		}
1052	}
1053
1054	/* Handle filter mode transition on socket. */
1055	if (imf->im6f_st[0] != imf->im6f_st[1]) {
1056		MLD_PRINTF(("%s: imf transition %d to %d\n",
1057		    __func__, imf->im6f_st[0], imf->im6f_st[1]));
1058
1059		if (imf->im6f_st[0] == MCAST_EXCLUDE) {
1060			MLD_PRINTF(("%s: --ex on inm at t1\n", __func__));
1061			--inm->in6m_st[1].iss_ex;
1062		} else if (imf->im6f_st[0] == MCAST_INCLUDE) {
1063			MLD_PRINTF(("%s: --in on inm at t1\n", __func__));
1064			--inm->in6m_st[1].iss_in;
1065		}
1066
1067		if (imf->im6f_st[1] == MCAST_EXCLUDE) {
1068			MLD_PRINTF(("%s: ex++ on inm at t1\n", __func__));
1069			inm->in6m_st[1].iss_ex++;
1070		} else if (imf->im6f_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
1071			MLD_PRINTF(("%s: in++ on inm at t1\n", __func__));
1072			inm->in6m_st[1].iss_in++;
1073		}
1074	}
1075
1076	/*
1077	 * Track inm filter state in terms of listener counts.
1078	 * If there are any exclusive listeners, stack-wide
1079	 * membership is exclusive.
1080	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
1081	 * If no listeners remain, state is undefined at t1,
1082	 * and the MLD lifecycle for this group should finish.
1083	 */
1084	if (inm->in6m_st[1].iss_ex > 0) {
1085		MLD_PRINTF(("%s: transition to EX\n", __func__));
1086		inm->in6m_st[1].iss_fmode = MCAST_EXCLUDE;
1087	} else if (inm->in6m_st[1].iss_in > 0) {
1088		MLD_PRINTF(("%s: transition to IN\n", __func__));
1089		inm->in6m_st[1].iss_fmode = MCAST_INCLUDE;
1090	} else {
1091		MLD_PRINTF(("%s: transition to UNDEF\n", __func__));
1092		inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
1093	}
1094
1095	/* Decrement ASM listener count on transition out of ASM mode. */
1096	if (imf->im6f_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1097		if ((imf->im6f_st[1] != MCAST_EXCLUDE) ||
1098		    (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
1099			MLD_PRINTF(("%s: --asm on inm at t1\n", __func__));
1100			--inm->in6m_st[1].iss_asm;
1101		}
1102	}
1103
1104	/* Increment ASM listener count on transition to ASM mode. */
1105	if (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1106		MLD_PRINTF(("%s: asm++ on inm at t1\n", __func__));
1107		inm->in6m_st[1].iss_asm++;
1108	}
1109
1110	MLD_PRINTF(("%s: merged imf 0x%llx to inm 0x%llx\n", __func__,
1111	    (uint64_t)VM_KERNEL_ADDRPERM(imf),
1112	    (uint64_t)VM_KERNEL_ADDRPERM(inm)));
1113	in6m_print(inm);
1114
1115out_reap:
1116	if (schanged > 0) {
1117		MLD_PRINTF(("%s: sources changed; reaping\n", __func__));
1118		in6m_reap(inm);
1119	}
1120	return (error);
1121}
1122
1123/*
1124 * Mark an in6_multi's filter set deltas as committed.
1125 * Called by MLD after a state change has been enqueued.
1126 */
1127void
1128in6m_commit(struct in6_multi *inm)
1129{
1130	struct ip6_msource	*ims;
1131
1132	IN6M_LOCK_ASSERT_HELD(inm);
1133
1134	MLD_PRINTF(("%s: commit inm 0x%llx\n", __func__,
1135	    (uint64_t)VM_KERNEL_ADDRPERM(inm)));
1136	MLD_PRINTF(("%s: pre commit:\n", __func__));
1137	in6m_print(inm);
1138
1139	RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
1140		ims->im6s_st[0] = ims->im6s_st[1];
1141	}
1142	inm->in6m_st[0] = inm->in6m_st[1];
1143}
1144
1145/*
1146 * Reap unreferenced nodes from an in6_multi's filter set.
1147 */
1148static void
1149in6m_reap(struct in6_multi *inm)
1150{
1151	struct ip6_msource	*ims, *tims;
1152
1153	IN6M_LOCK_ASSERT_HELD(inm);
1154
1155	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) {
1156		if (ims->im6s_st[0].ex > 0 || ims->im6s_st[0].in > 0 ||
1157		    ims->im6s_st[1].ex > 0 || ims->im6s_st[1].in > 0 ||
1158		    ims->im6s_stp != 0)
1159			continue;
1160		MLD_PRINTF(("%s: free ims 0x%llx\n", __func__,
1161		    (uint64_t)VM_KERNEL_ADDRPERM(ims)));
1162		RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims);
1163		ip6ms_free(ims);
1164		inm->in6m_nsrc--;
1165	}
1166}
1167
1168/*
1169 * Purge all source nodes from an in6_multi's filter set.
1170 */
1171void
1172in6m_purge(struct in6_multi *inm)
1173{
1174	struct ip6_msource	*ims, *tims;
1175
1176	IN6M_LOCK_ASSERT_HELD(inm);
1177
1178	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) {
1179		MLD_PRINTF(("%s: free ims 0x%llx\n", __func__,
1180		    (uint64_t)VM_KERNEL_ADDRPERM(ims)));
1181		RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims);
1182		ip6ms_free(ims);
1183		inm->in6m_nsrc--;
1184	}
1185}
1186
1187/*
1188 * Join a multicast address w/o sources.
1189 * KAME compatibility entry point.
1190 *
1191 */
1192struct in6_multi_mship *
1193in6_joingroup(struct ifnet *ifp, struct in6_addr *mcaddr,
1194    int *errorp, int delay)
1195{
1196	struct in6_multi_mship *imm;
1197	int error;
1198
1199	*errorp = 0;
1200
1201	imm = in6_multi_mship_alloc(M_WAITOK);
1202	if (imm == NULL) {
1203		*errorp = ENOBUFS;
1204		return (NULL);
1205	}
1206
1207	error = in6_mc_join(ifp, mcaddr, NULL, &imm->i6mm_maddr, delay);
1208	if (error) {
1209		*errorp = error;
1210		in6_multi_mship_free(imm);
1211		return (NULL);
1212	}
1213
1214	return (imm);
1215}
1216
1217/*
1218 * Leave a multicast address w/o sources.
1219 * KAME compatibility entry point.
1220 */
1221int
1222in6_leavegroup(struct in6_multi_mship *imm)
1223{
1224	if (imm->i6mm_maddr != NULL) {
1225		in6_mc_leave(imm->i6mm_maddr, NULL);
1226		IN6M_REMREF(imm->i6mm_maddr);
1227		imm->i6mm_maddr = NULL;
1228	}
1229	in6_multi_mship_free(imm);
1230	return 0;
1231}
1232
1233/*
1234 * Join a multicast group; real entry point.
1235 *
1236 * Only preserves atomicity at inm level.
1237 * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1238 *
1239 * If the MLD downcall fails, the group is not joined, and an error
1240 * code is returned.
1241 */
1242int
1243in6_mc_join(struct ifnet *ifp, const struct in6_addr *mcaddr,
1244    /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm,
1245    const int delay)
1246{
1247	struct in6_mfilter	 timf;
1248	struct in6_multi	*inm = NULL;
1249	int			 error = 0;
1250	struct mld_tparams	 mtp;
1251
1252	/*
1253	 * Sanity: Check scope zone ID was set for ifp, if and
1254	 * only if group is scoped to an interface.
1255	 */
1256	VERIFY(IN6_IS_ADDR_MULTICAST(mcaddr));
1257	if (IN6_IS_ADDR_MC_LINKLOCAL(mcaddr) ||
1258	    IN6_IS_ADDR_MC_INTFACELOCAL(mcaddr)) {
1259		VERIFY(mcaddr->s6_addr16[1] != 0);
1260	}
1261
1262	MLD_PRINTF(("%s: join %s on 0x%llx(%s))\n", __func__,
1263	    ip6_sprintf(mcaddr), (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1264	    if_name(ifp)));
1265
1266	bzero(&mtp, sizeof (mtp));
1267	*pinm = NULL;
1268
1269	/*
1270	 * If no imf was specified (i.e. kernel consumer),
1271	 * fake one up and assume it is an ASM join.
1272	 */
1273	if (imf == NULL) {
1274		im6f_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1275		imf = &timf;
1276	}
1277
1278	error = in6_mc_get(ifp, mcaddr, &inm);
1279	if (error) {
1280		MLD_PRINTF(("%s: in6_mc_get() failure\n", __func__));
1281		return (error);
1282	}
1283
1284	MLD_PRINTF(("%s: merge inm state\n", __func__));
1285
1286	IN6M_LOCK(inm);
1287	error = in6m_merge(inm, imf);
1288	if (error) {
1289		MLD_PRINTF(("%s: failed to merge inm state\n", __func__));
1290		goto out_in6m_release;
1291	}
1292
1293	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
1294	error = mld_change_state(inm, &mtp, delay);
1295	if (error) {
1296		MLD_PRINTF(("%s: failed to update source\n", __func__));
1297		im6f_rollback(imf);
1298		goto out_in6m_release;
1299	}
1300
1301out_in6m_release:
1302	if (error) {
1303		MLD_PRINTF(("%s: dropping ref on 0x%llx\n", __func__,
1304		    (uint64_t)VM_KERNEL_ADDRPERM(inm)));
1305		IN6M_UNLOCK(inm);
1306		IN6M_REMREF(inm);
1307	} else {
1308		IN6M_UNLOCK(inm);
1309		*pinm = inm;	/* keep refcount from in6_mc_get() */
1310	}
1311
1312	/* schedule timer now that we've dropped the lock(s) */
1313	mld_set_timeout(&mtp);
1314
1315	return (error);
1316}
1317
1318/*
1319 * Leave a multicast group; real entry point.
1320 * All source filters will be expunged.
1321 *
1322 * Only preserves atomicity at inm level.
1323 *
1324 * Holding the write lock for the INP which contains imf
1325 * is highly advisable. We can't assert for it as imf does not
1326 * contain a back-pointer to the owning inp.
1327 *
1328 * Note: This is not the same as in6m_release(*) as this function also
1329 * makes a state change downcall into MLD.
1330 */
1331int
1332in6_mc_leave(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
1333{
1334	struct in6_mfilter	 timf;
1335	int			 error, lastref;
1336	struct mld_tparams	 mtp;
1337
1338	bzero(&mtp, sizeof (mtp));
1339	error = 0;
1340
1341	IN6M_LOCK_ASSERT_NOTHELD(inm);
1342
1343	in6_multihead_lock_exclusive();
1344	IN6M_LOCK(inm);
1345
1346	MLD_PRINTF(("%s: leave inm 0x%llx, %s/%s%d, imf 0x%llx\n", __func__,
1347	    (uint64_t)VM_KERNEL_ADDRPERM(inm), ip6_sprintf(&inm->in6m_addr),
1348	    (in6m_is_ifp_detached(inm) ? "null" : inm->in6m_ifp->if_name),
1349	    inm->in6m_ifp->if_unit, (uint64_t)VM_KERNEL_ADDRPERM(imf)));
1350
1351	/*
1352	 * If no imf was specified (i.e. kernel consumer),
1353	 * fake one up and assume it is an ASM join.
1354	 */
1355	if (imf == NULL) {
1356		im6f_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1357		imf = &timf;
1358	}
1359
1360	/*
1361	 * Begin state merge transaction at MLD layer.
1362	 *
1363	 * As this particular invocation should not cause any memory
1364	 * to be allocated, and there is no opportunity to roll back
1365	 * the transaction, it MUST NOT fail.
1366	 */
1367	MLD_PRINTF(("%s: merge inm state\n", __func__));
1368
1369	error = in6m_merge(inm, imf);
1370	KASSERT(error == 0, ("%s: failed to merge inm state\n", __func__));
1371
1372	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
1373	error = mld_change_state(inm, &mtp, 0);
1374#if MLD_DEBUG
1375	if (error)
1376		MLD_PRINTF(("%s: failed mld downcall\n", __func__));
1377#endif
1378	lastref = in6_multi_detach(inm);
1379	VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
1380	    inm->in6m_reqcnt == 0));
1381	IN6M_UNLOCK(inm);
1382	in6_multihead_lock_done();
1383
1384	if (lastref)
1385		IN6M_REMREF(inm);	/* for in6_multihead list */
1386
1387	/* schedule timer now that we've dropped the lock(s) */
1388	mld_set_timeout(&mtp);
1389
1390	return (error);
1391}
1392
1393/*
1394 * Block or unblock an ASM multicast source on an inpcb.
1395 * This implements the delta-based API described in RFC 3678.
1396 *
1397 * The delta-based API applies only to exclusive-mode memberships.
1398 * An MLD downcall will be performed.
1399 *
1400 * Return 0 if successful, otherwise return an appropriate error code.
1401 */
1402static int
1403in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1404{
1405	struct group_source_req		 gsr;
1406	sockunion_t			*gsa, *ssa;
1407	struct ifnet			*ifp;
1408	struct in6_mfilter		*imf;
1409	struct ip6_moptions		*imo;
1410	struct in6_msource		*ims;
1411	struct in6_multi		*inm;
1412	size_t				 idx;
1413	uint16_t			 fmode;
1414	int				 error, doblock;
1415	struct mld_tparams		 mtp;
1416
1417	bzero(&mtp, sizeof (mtp));
1418	ifp = NULL;
1419	error = 0;
1420	doblock = 0;
1421
1422	memset(&gsr, 0, sizeof(struct group_source_req));
1423	gsa = (sockunion_t *)&gsr.gsr_group;
1424	ssa = (sockunion_t *)&gsr.gsr_source;
1425
1426	switch (sopt->sopt_name) {
1427	case MCAST_BLOCK_SOURCE:
1428	case MCAST_UNBLOCK_SOURCE:
1429		error = sooptcopyin(sopt, &gsr,
1430		    sizeof(struct group_source_req),
1431		    sizeof(struct group_source_req));
1432		if (error)
1433			return (error);
1434
1435		if (gsa->sin6.sin6_family != AF_INET6 ||
1436		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
1437			return (EINVAL);
1438
1439		if (ssa->sin6.sin6_family != AF_INET6 ||
1440		    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
1441			return (EINVAL);
1442
1443		ifnet_head_lock_shared();
1444		if (gsr.gsr_interface == 0 ||
1445		    (u_int)if_index < gsr.gsr_interface) {
1446			ifnet_head_done();
1447			return (EADDRNOTAVAIL);
1448		}
1449
1450		ifp = ifindex2ifnet[gsr.gsr_interface];
1451		ifnet_head_done();
1452
1453		if (ifp == NULL)
1454			return (EADDRNOTAVAIL);
1455
1456		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1457			doblock = 1;
1458		break;
1459
1460	default:
1461		MLD_PRINTF(("%s: unknown sopt_name %d\n",
1462		    __func__, sopt->sopt_name));
1463		return (EOPNOTSUPP);
1464		break;
1465	}
1466
1467	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
1468		return (EINVAL);
1469
1470	(void) in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
1471
1472	/*
1473	 * Check if we are actually a member of this group.
1474	 */
1475	imo = in6p_findmoptions(inp);
1476	if (imo == NULL)
1477		return (ENOMEM);
1478
1479	IM6O_LOCK(imo);
1480	idx = im6o_match_group(imo, ifp, &gsa->sa);
1481	if (idx == (size_t)-1 || imo->im6o_mfilters == NULL) {
1482		error = EADDRNOTAVAIL;
1483		goto out_imo_locked;
1484	}
1485
1486	VERIFY(imo->im6o_mfilters != NULL);
1487	imf = &imo->im6o_mfilters[idx];
1488	inm = imo->im6o_membership[idx];
1489
1490	/*
1491	 * Attempting to use the delta-based API on an
1492	 * non exclusive-mode membership is an error.
1493	 */
1494	fmode = imf->im6f_st[0];
1495	if (fmode != MCAST_EXCLUDE) {
1496		error = EINVAL;
1497		goto out_imo_locked;
1498	}
1499
1500	/*
1501	 * Deal with error cases up-front:
1502	 *  Asked to block, but already blocked; or
1503	 *  Asked to unblock, but nothing to unblock.
1504	 * If adding a new block entry, allocate it.
1505	 */
1506	ims = im6o_match_source(imo, idx, &ssa->sa);
1507	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1508		MLD_PRINTF(("%s: source %s %spresent\n", __func__,
1509		    ip6_sprintf(&ssa->sin6.sin6_addr),
1510		    doblock ? "" : "not "));
1511		error = EADDRNOTAVAIL;
1512		goto out_imo_locked;
1513	}
1514
1515	/*
1516	 * Begin state merge transaction at socket layer.
1517	 */
1518	if (doblock) {
1519		MLD_PRINTF(("%s: %s source\n", __func__, "block"));
1520		ims = im6f_graft(imf, fmode, &ssa->sin6);
1521		if (ims == NULL)
1522			error = ENOMEM;
1523	} else {
1524		MLD_PRINTF(("%s: %s source\n", __func__, "allow"));
1525		error = im6f_prune(imf, &ssa->sin6);
1526	}
1527
1528	if (error) {
1529		MLD_PRINTF(("%s: merge imf state failed\n", __func__));
1530		goto out_im6f_rollback;
1531	}
1532
1533	/*
1534	 * Begin state merge transaction at MLD layer.
1535	 */
1536	IN6M_LOCK(inm);
1537	MLD_PRINTF(("%s: merge inm state\n", __func__));
1538	error = in6m_merge(inm, imf);
1539	if (error) {
1540		MLD_PRINTF(("%s: failed to merge inm state\n", __func__));
1541		IN6M_UNLOCK(inm);
1542		goto out_im6f_rollback;
1543	}
1544
1545	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
1546	error = mld_change_state(inm, &mtp, 0);
1547	IN6M_UNLOCK(inm);
1548#if MLD_DEBUG
1549	if (error)
1550		MLD_PRINTF(("%s: failed mld downcall\n", __func__));
1551#endif
1552
1553out_im6f_rollback:
1554	if (error)
1555		im6f_rollback(imf);
1556	else
1557		im6f_commit(imf);
1558
1559	im6f_reap(imf);
1560
1561out_imo_locked:
1562	IM6O_UNLOCK(imo);
1563	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
1564
1565	/* schedule timer now that we've dropped the lock(s) */
1566	mld_set_timeout(&mtp);
1567
1568	return (error);
1569}
1570
1571/*
1572 * Given an inpcb, return its multicast options structure pointer.  Accepts
1573 * an unlocked inpcb pointer, but will return it locked.  May sleep.
1574 *
1575 */
1576static struct ip6_moptions *
1577in6p_findmoptions(struct inpcb *inp)
1578{
1579	struct ip6_moptions	 *imo;
1580	struct in6_multi	**immp;
1581	struct in6_mfilter	 *imfp;
1582	size_t			  idx;
1583
1584	if ((imo = inp->in6p_moptions) != NULL) {
1585		IM6O_ADDREF(imo);	/* for caller */
1586		return (imo);
1587	}
1588
1589	imo = ip6_allocmoptions(M_WAITOK);
1590	if (imo == NULL)
1591		return (NULL);
1592
1593	immp = _MALLOC(sizeof (*immp) * IPV6_MIN_MEMBERSHIPS, M_IP6MOPTS,
1594	    M_WAITOK | M_ZERO);
1595	if (immp == NULL) {
1596		IM6O_REMREF(imo);
1597		return (NULL);
1598	}
1599
1600	imfp = _MALLOC(sizeof (struct in6_mfilter) * IPV6_MIN_MEMBERSHIPS,
1601	    M_IN6MFILTER, M_WAITOK | M_ZERO);
1602	if (imfp == NULL) {
1603		_FREE(immp, M_IP6MOPTS);
1604		IM6O_REMREF(imo);
1605		return (NULL);
1606	}
1607
1608	imo->im6o_multicast_ifp = NULL;
1609	imo->im6o_multicast_hlim = ip6_defmcasthlim;
1610	imo->im6o_multicast_loop = in6_mcast_loop;
1611	imo->im6o_num_memberships = 0;
1612	imo->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
1613	imo->im6o_membership = immp;
1614
1615	/* Initialize per-group source filters. */
1616	for (idx = 0; idx < IPV6_MIN_MEMBERSHIPS; idx++)
1617		im6f_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
1618
1619	imo->im6o_mfilters = imfp;
1620	inp->in6p_moptions = imo; /* keep reference from ip6_allocmoptions() */
1621	IM6O_ADDREF(imo);	/* for caller */
1622
1623	return (imo);
1624}
1625
1626/*
1627 * Atomically get source filters on a socket for an IPv6 multicast group.
1628 * Called with INP lock held; returns with lock released.
1629 */
1630static int
1631in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1632{
1633	struct __msfilterreq64	msfr, msfr64;
1634	struct __msfilterreq32	msfr32;
1635	sockunion_t		*gsa;
1636	struct ifnet		*ifp;
1637	struct ip6_moptions	*imo;
1638	struct in6_mfilter	*imf;
1639	struct ip6_msource	*ims;
1640	struct in6_msource	*lims;
1641	struct sockaddr_in6	*psin;
1642	struct sockaddr_storage	*ptss;
1643	struct sockaddr_storage	*tss;
1644	int	 		 error;
1645	size_t		 	 idx, nsrcs, ncsrcs;
1646	user_addr_t 		 tmp_ptr;
1647
1648	imo = inp->in6p_moptions;
1649	VERIFY(imo != NULL);
1650
1651	if (IS_64BIT_PROCESS(current_proc())) {
1652		error = sooptcopyin(sopt, &msfr64,
1653		    sizeof(struct __msfilterreq64),
1654		    sizeof(struct __msfilterreq64));
1655		if (error)
1656			return (error);
1657		/* we never use msfr.msfr_srcs; */
1658		memcpy(&msfr, &msfr64, sizeof(msfr));
1659	} else {
1660		error = sooptcopyin(sopt, &msfr32,
1661		    sizeof(struct __msfilterreq32),
1662		    sizeof(struct __msfilterreq32));
1663		if (error)
1664			return (error);
1665		/* we never use msfr.msfr_srcs; */
1666		memcpy(&msfr, &msfr32, sizeof(msfr));
1667	}
1668
1669	if (msfr.msfr_group.ss_family != AF_INET6 ||
1670	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
1671		return (EINVAL);
1672
1673	gsa = (sockunion_t *)&msfr.msfr_group;
1674	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
1675		return (EINVAL);
1676
1677	ifnet_head_lock_shared();
1678	if (msfr.msfr_ifindex == 0 || (u_int)if_index < msfr.msfr_ifindex) {
1679		ifnet_head_done();
1680		return (EADDRNOTAVAIL);
1681	}
1682	ifp = ifindex2ifnet[msfr.msfr_ifindex];
1683	ifnet_head_done();
1684
1685	if (ifp == NULL)
1686		return (EADDRNOTAVAIL);
1687
1688	if ((size_t) msfr.msfr_nsrcs >
1689	    UINT32_MAX / sizeof(struct sockaddr_storage))
1690		msfr.msfr_nsrcs = UINT32_MAX / sizeof(struct sockaddr_storage);
1691
1692	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
1693		msfr.msfr_nsrcs = in6_mcast_maxsocksrc;
1694
1695	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
1696
1697	IM6O_LOCK(imo);
1698	/*
1699	 * Lookup group on the socket.
1700	 */
1701	idx = im6o_match_group(imo, ifp, &gsa->sa);
1702	if (idx == (size_t)-1 || imo->im6o_mfilters == NULL) {
1703		IM6O_UNLOCK(imo);
1704		return (EADDRNOTAVAIL);
1705	}
1706	imf = &imo->im6o_mfilters[idx];
1707
1708	/*
1709	 * Ignore memberships which are in limbo.
1710	 */
1711	if (imf->im6f_st[1] == MCAST_UNDEFINED) {
1712		IM6O_UNLOCK(imo);
1713		return (EAGAIN);
1714	}
1715	msfr.msfr_fmode = imf->im6f_st[1];
1716
1717	/*
1718	 * If the user specified a buffer, copy out the source filter
1719	 * entries to userland gracefully.
1720	 * We only copy out the number of entries which userland
1721	 * has asked for, but we always tell userland how big the
1722	 * buffer really needs to be.
1723	 */
1724	tss = NULL;
1725
1726	if (IS_64BIT_PROCESS(current_proc()))
1727		tmp_ptr = msfr64.msfr_srcs;
1728	else
1729		tmp_ptr = CAST_USER_ADDR_T(msfr32.msfr_srcs);
1730
1731	if (tmp_ptr != USER_ADDR_NULL && msfr.msfr_nsrcs > 0) {
1732		tss = _MALLOC((size_t) msfr.msfr_nsrcs * sizeof(*tss),
1733		    M_TEMP, M_WAITOK | M_ZERO);
1734		if (tss == NULL) {
1735			IM6O_UNLOCK(imo);
1736			return (ENOBUFS);
1737		}
1738		bzero(tss, (size_t) msfr.msfr_nsrcs * sizeof(*tss));
1739	}
1740
1741	/*
1742	 * Count number of sources in-mode at t0.
1743	 * If buffer space exists and remains, copy out source entries.
1744	 */
1745	nsrcs = msfr.msfr_nsrcs;
1746	ncsrcs = 0;
1747	ptss = tss;
1748	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
1749		lims = (struct in6_msource *)ims;
1750		if (lims->im6sl_st[0] == MCAST_UNDEFINED ||
1751		    lims->im6sl_st[0] != imf->im6f_st[0])
1752			continue;
1753		if (tss != NULL && nsrcs > 0) {
1754			psin = (struct sockaddr_in6 *)ptss;
1755			psin->sin6_family = AF_INET6;
1756			psin->sin6_len = sizeof(struct sockaddr_in6);
1757			psin->sin6_addr = lims->im6s_addr;
1758			psin->sin6_port = 0;
1759			--nsrcs;
1760			++ptss;
1761			++ncsrcs;
1762		}
1763	}
1764
1765	IM6O_UNLOCK(imo);
1766
1767	if (tss != NULL) {
1768		error = copyout(tss, tmp_ptr, ncsrcs * sizeof(*tss));
1769		FREE(tss, M_TEMP);
1770		if (error)
1771			return (error);
1772	}
1773
1774	msfr.msfr_nsrcs = ncsrcs;
1775	if (IS_64BIT_PROCESS(current_proc())) {
1776		msfr64.msfr_ifindex = msfr.msfr_ifindex;
1777		msfr64.msfr_fmode   = msfr.msfr_fmode;
1778		msfr64.msfr_nsrcs   = msfr.msfr_nsrcs;
1779		memcpy(&msfr64.msfr_group, &msfr.msfr_group,
1780		    sizeof(struct sockaddr_storage));
1781		error = sooptcopyout(sopt, &msfr64,
1782		    sizeof(struct __msfilterreq64));
1783	} else {
1784		msfr32.msfr_ifindex = msfr.msfr_ifindex;
1785		msfr32.msfr_fmode   = msfr.msfr_fmode;
1786		msfr32.msfr_nsrcs   = msfr.msfr_nsrcs;
1787		memcpy(&msfr64.msfr_group, &msfr.msfr_group,
1788		    sizeof(struct sockaddr_storage));
1789		error = sooptcopyout(sopt, &msfr32,
1790		    sizeof(struct __msfilterreq32));
1791	}
1792
1793	return (error);
1794}
1795
1796/*
1797 * Return the IP multicast options in response to user getsockopt().
1798 */
1799int
1800ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1801{
1802	struct ip6_moptions	*im6o;
1803	int			 error;
1804	u_int			 optval;
1805
1806	im6o = inp->in6p_moptions;
1807	/*
1808	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1809	 * or is a divert socket, reject it.
1810	 */
1811	if (SOCK_PROTO(inp->inp_socket) == IPPROTO_DIVERT ||
1812	    (SOCK_TYPE(inp->inp_socket) != SOCK_RAW &&
1813	    SOCK_TYPE(inp->inp_socket) != SOCK_DGRAM)) {
1814		return (EOPNOTSUPP);
1815	}
1816
1817	error = 0;
1818	switch (sopt->sopt_name) {
1819	case IPV6_MULTICAST_IF:
1820		if (im6o != NULL)
1821			IM6O_LOCK(im6o);
1822		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) {
1823			optval = 0;
1824		} else {
1825			optval = im6o->im6o_multicast_ifp->if_index;
1826		}
1827		if (im6o != NULL)
1828			IM6O_UNLOCK(im6o);
1829		error = sooptcopyout(sopt, &optval, sizeof(u_int));
1830		break;
1831
1832	case IPV6_MULTICAST_HOPS:
1833		if (im6o == NULL) {
1834			optval = ip6_defmcasthlim;
1835		} else {
1836			IM6O_LOCK(im6o);
1837			optval = im6o->im6o_multicast_hlim;
1838			IM6O_UNLOCK(im6o);
1839		}
1840		error = sooptcopyout(sopt, &optval, sizeof(u_int));
1841		break;
1842
1843	case IPV6_MULTICAST_LOOP:
1844		if (im6o == NULL) {
1845			optval = in6_mcast_loop; /* XXX VIMAGE */
1846		} else {
1847			IM6O_LOCK(im6o);
1848			optval = im6o->im6o_multicast_loop;
1849			IM6O_UNLOCK(im6o);
1850		}
1851		error = sooptcopyout(sopt, &optval, sizeof(u_int));
1852		break;
1853
1854	case IPV6_MSFILTER:
1855		if (im6o == NULL) {
1856			error = EADDRNOTAVAIL;
1857		} else {
1858			error = in6p_get_source_filters(inp, sopt);
1859		}
1860		break;
1861
1862	default:
1863		error = ENOPROTOOPT;
1864		break;
1865	}
1866
1867	return (error);
1868}
1869
1870/*
1871 * Look up the ifnet to use for a multicast group membership,
1872 * given the address of an IPv6 group.
1873 *
1874 * This routine exists to support legacy IPv6 multicast applications.
1875 *
1876 * If inp is non-NULL and is bound to an interface, use this socket's
1877 * inp_boundif for any required routing table lookup.
1878 *
1879 * If the route lookup fails, return NULL.
1880 *
1881 * FUTURE: Support multiple forwarding tables for IPv6.
1882 *
1883 * Returns NULL if no ifp could be found.
1884 */
1885static struct ifnet *
1886in6p_lookup_mcast_ifp(const struct inpcb *in6p,
1887    const struct sockaddr_in6 *gsin6)
1888{
1889	struct route_in6	 ro6;
1890	struct ifnet		*ifp;
1891	unsigned int		ifscope = IFSCOPE_NONE;
1892
1893	VERIFY(in6p == NULL || (in6p->inp_vflag & INP_IPV6));
1894	VERIFY(gsin6->sin6_family == AF_INET6);
1895	if (IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr) == 0)
1896		return NULL;
1897
1898	if (in6p != NULL && (in6p->inp_flags & INP_BOUND_IF))
1899		ifscope = in6p->inp_boundifp->if_index;
1900
1901	ifp = NULL;
1902	memset(&ro6, 0, sizeof(struct route_in6));
1903	memcpy(&ro6.ro_dst, gsin6, sizeof(struct sockaddr_in6));
1904	rtalloc_scoped_ign((struct route *)&ro6, 0, ifscope);
1905	if (ro6.ro_rt != NULL) {
1906		ifp = ro6.ro_rt->rt_ifp;
1907		VERIFY(ifp != NULL);
1908	}
1909	ROUTE_RELEASE(&ro6);
1910
1911	return (ifp);
1912}
1913
1914/*
1915 * Since ipv6_mreq contains an ifindex and ip_mreq contains an AF_INET
1916 * address, we need to lookup the AF_INET address when translating an
1917 * ipv6_mreq structure into an ipmreq structure.
1918 * This is used when userland performs multicast setsockopt() on AF_INET6
1919 * sockets with AF_INET multicast addresses (IPv6 v4 mapped addresses).
1920 */
1921static int
1922in6p_lookup_v4addr(struct ipv6_mreq *mreq, struct ip_mreq *v4mreq)
1923{
1924	struct ifnet *ifp;
1925	struct ifaddr *ifa;
1926	struct sockaddr_in *sin;
1927
1928	ifnet_head_lock_shared();
1929	if (mreq->ipv6mr_interface > (unsigned int)if_index) {
1930		ifnet_head_done();
1931		return (EADDRNOTAVAIL);
1932	} else
1933		ifp = ifindex2ifnet[mreq->ipv6mr_interface];
1934	ifnet_head_done();
1935	if (ifp == NULL)
1936		return (EADDRNOTAVAIL);
1937	ifa = ifa_ifpgetprimary(ifp, AF_INET);
1938	if (ifa == NULL)
1939		return (EADDRNOTAVAIL);
1940	sin = (struct sockaddr_in *)(uintptr_t)(size_t)ifa->ifa_addr;
1941	v4mreq->imr_interface.s_addr = sin->sin_addr.s_addr;
1942	IFA_REMREF(ifa);
1943
1944	return (0);
1945}
1946
1947/*
1948 * Join an IPv6 multicast group, possibly with a source.
1949 *
1950 * FIXME: The KAME use of the unspecified address (::)
1951 * to join *all* multicast groups is currently unsupported.
1952 */
1953static int
1954in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
1955{
1956	struct group_source_req		 gsr;
1957	sockunion_t			*gsa, *ssa;
1958	struct ifnet			*ifp;
1959	struct in6_mfilter		*imf;
1960	struct ip6_moptions		*imo;
1961	struct in6_multi		*inm = NULL;
1962	struct in6_msource		*lims = NULL;
1963	size_t				 idx;
1964	int				 error, is_new;
1965	uint32_t			scopeid = 0;
1966	struct mld_tparams		mtp;
1967
1968	bzero(&mtp, sizeof (mtp));
1969	ifp = NULL;
1970	imf = NULL;
1971	error = 0;
1972	is_new = 0;
1973
1974	memset(&gsr, 0, sizeof(struct group_source_req));
1975	gsa = (sockunion_t *)&gsr.gsr_group;
1976	gsa->ss.ss_family = AF_UNSPEC;
1977	ssa = (sockunion_t *)&gsr.gsr_source;
1978	ssa->ss.ss_family = AF_UNSPEC;
1979
1980	/*
1981	 * Chew everything into struct group_source_req.
1982	 * Overwrite the port field if present, as the sockaddr
1983	 * being copied in may be matched with a binary comparison.
1984	 * Ignore passed-in scope ID.
1985	 */
1986	switch (sopt->sopt_name) {
1987	case IPV6_JOIN_GROUP: {
1988		struct ipv6_mreq mreq;
1989    		struct sockaddr_in6 *gsin6;
1990
1991		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
1992		    sizeof(struct ipv6_mreq));
1993		if (error)
1994			return (error);
1995		if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
1996			struct ip_mreq v4mreq;
1997			struct sockopt v4sopt;
1998
1999			v4mreq.imr_multiaddr.s_addr =
2000			    mreq.ipv6mr_multiaddr.s6_addr32[3];
2001			if (mreq.ipv6mr_interface == 0)
2002				v4mreq.imr_interface.s_addr = INADDR_ANY;
2003			else
2004				error = in6p_lookup_v4addr(&mreq, &v4mreq);
2005			if (error)
2006				return (error);
2007			v4sopt.sopt_dir     = SOPT_SET;
2008			v4sopt.sopt_level   = sopt->sopt_level;
2009			v4sopt.sopt_name    = IP_ADD_MEMBERSHIP;
2010			v4sopt.sopt_val     = CAST_USER_ADDR_T(&v4mreq);
2011			v4sopt.sopt_valsize = sizeof(v4mreq);
2012			v4sopt.sopt_p       = kernproc;
2013
2014			return (inp_join_group(inp, &v4sopt));
2015		}
2016		gsa->sin6.sin6_family = AF_INET6;
2017		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
2018		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
2019
2020		gsin6 = &gsa->sin6;
2021
2022		/* Only allow IPv6 multicast addresses */
2023		if (IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr) == 0) {
2024			return (EINVAL);
2025		}
2026
2027		if (mreq.ipv6mr_interface == 0) {
2028			ifp = in6p_lookup_mcast_ifp(inp, gsin6);
2029		} else {
2030			ifnet_head_lock_shared();
2031			if ((u_int)if_index < mreq.ipv6mr_interface) {
2032				ifnet_head_done();
2033				return (EADDRNOTAVAIL);
2034			    }
2035			ifp = ifindex2ifnet[mreq.ipv6mr_interface];
2036			ifnet_head_done();
2037		}
2038		MLD_PRINTF(("%s: ipv6mr_interface = %d, ifp = 0x%llx\n",
2039		    __func__, mreq.ipv6mr_interface,
2040		    (uint64_t)VM_KERNEL_ADDRPERM(ifp)));
2041		break;
2042	}
2043
2044	case MCAST_JOIN_GROUP:
2045	case MCAST_JOIN_SOURCE_GROUP:
2046		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
2047			error = sooptcopyin(sopt, &gsr,
2048			    sizeof(struct group_req),
2049			    sizeof(struct group_req));
2050		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
2051			error = sooptcopyin(sopt, &gsr,
2052			    sizeof(struct group_source_req),
2053			    sizeof(struct group_source_req));
2054		}
2055		if (error)
2056			return (error);
2057
2058		if (gsa->sin6.sin6_family != AF_INET6 ||
2059		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
2060			return (EINVAL);
2061
2062		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
2063			if (ssa->sin6.sin6_family != AF_INET6 ||
2064			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
2065				return (EINVAL);
2066			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
2067				return (EINVAL);
2068			/*
2069			 * TODO: Validate embedded scope ID in source
2070			 * list entry against passed-in ifp, if and only
2071			 * if source list filter entry is iface or node local.
2072			 */
2073			in6_clearscope(&ssa->sin6.sin6_addr);
2074			ssa->sin6.sin6_port = 0;
2075			ssa->sin6.sin6_scope_id = 0;
2076		}
2077
2078		ifnet_head_lock_shared();
2079		if (gsr.gsr_interface == 0 ||
2080		    (u_int)if_index < gsr.gsr_interface) {
2081			ifnet_head_done();
2082			return (EADDRNOTAVAIL);
2083		}
2084		ifp = ifindex2ifnet[gsr.gsr_interface];
2085		ifnet_head_done();
2086		break;
2087
2088	default:
2089		MLD_PRINTF(("%s: unknown sopt_name %d\n",
2090		    __func__, sopt->sopt_name));
2091		return (EOPNOTSUPP);
2092		break;
2093	}
2094
2095	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
2096		return (EINVAL);
2097
2098	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
2099		return (EADDRNOTAVAIL);
2100
2101	gsa->sin6.sin6_port = 0;
2102	gsa->sin6.sin6_scope_id = 0;
2103
2104	/*
2105	 * Always set the scope zone ID on memberships created from userland.
2106	 * Use the passed-in ifp to do this.
2107	 */
2108	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, &scopeid);
2109	/*
2110	 * Some addresses are not valid without an embedded scopeid.
2111	 * This check must be present because otherwise we will later hit
2112	 * a VERIFY() in in6_mc_join().
2113	 */
2114	if ((IN6_IS_ADDR_MC_LINKLOCAL(&gsa->sin6.sin6_addr) ||
2115	    IN6_IS_ADDR_MC_INTFACELOCAL(&gsa->sin6.sin6_addr)) &&
2116	    (scopeid == 0 || gsa->sin6.sin6_addr.s6_addr16[1] == 0))
2117		return (EINVAL);
2118
2119	imo = in6p_findmoptions(inp);
2120	if (imo == NULL)
2121		return (ENOMEM);
2122
2123	IM6O_LOCK(imo);
2124	idx = im6o_match_group(imo, ifp, &gsa->sa);
2125	if (idx == (size_t)-1) {
2126		is_new = 1;
2127	} else {
2128		inm = imo->im6o_membership[idx];
2129		imf = &imo->im6o_mfilters[idx];
2130		if (ssa->ss.ss_family != AF_UNSPEC) {
2131			/*
2132			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
2133			 * is an error. On an existing inclusive membership,
2134			 * it just adds the source to the filter list.
2135			 */
2136			if (imf->im6f_st[1] != MCAST_INCLUDE) {
2137				error = EINVAL;
2138				goto out_imo_locked;
2139			}
2140			/*
2141			 * Throw out duplicates.
2142			 *
2143			 * XXX FIXME: This makes a naive assumption that
2144			 * even if entries exist for *ssa in this imf,
2145			 * they will be rejected as dupes, even if they
2146			 * are not valid in the current mode (in-mode).
2147			 *
2148			 * in6_msource is transactioned just as for anything
2149			 * else in SSM -- but note naive use of in6m_graft()
2150			 * below for allocating new filter entries.
2151			 *
2152			 * This is only an issue if someone mixes the
2153			 * full-state SSM API with the delta-based API,
2154			 * which is discouraged in the relevant RFCs.
2155			 */
2156			lims = im6o_match_source(imo, idx, &ssa->sa);
2157			if (lims != NULL /*&&
2158			    lims->im6sl_st[1] == MCAST_INCLUDE*/) {
2159				error = EADDRNOTAVAIL;
2160				goto out_imo_locked;
2161			}
2162		} else {
2163			/*
2164			 * MCAST_JOIN_GROUP on an existing exclusive
2165			 * membership is an error; return EADDRINUSE
2166			 * to preserve 4.4BSD API idempotence, and
2167			 * avoid tedious detour to code below.
2168			 * NOTE: This is bending RFC 3678 a bit.
2169			 *
2170			 * On an existing inclusive membership, this is also
2171			 * an error; if you want to change filter mode,
2172			 * you must use the userland API setsourcefilter().
2173			 * XXX We don't reject this for imf in UNDEFINED
2174			 * state at t1, because allocation of a filter
2175			 * is atomic with allocation of a membership.
2176			 */
2177			error = EINVAL;
2178			/* See comments above for EADDRINUSE */
2179			if (imf->im6f_st[1] == MCAST_EXCLUDE)
2180				error = EADDRINUSE;
2181			goto out_imo_locked;
2182		}
2183	}
2184
2185	/*
2186	 * Begin state merge transaction at socket layer.
2187	 */
2188
2189	if (is_new) {
2190		if (imo->im6o_num_memberships == imo->im6o_max_memberships) {
2191			error = im6o_grow(imo, 0);
2192			if (error)
2193				goto out_imo_locked;
2194		}
2195		/*
2196		 * Allocate the new slot upfront so we can deal with
2197		 * grafting the new source filter in same code path
2198		 * as for join-source on existing membership.
2199		 */
2200		idx = imo->im6o_num_memberships;
2201		imo->im6o_membership[idx] = NULL;
2202		imo->im6o_num_memberships++;
2203		VERIFY(imo->im6o_mfilters != NULL);
2204		imf = &imo->im6o_mfilters[idx];
2205		VERIFY(RB_EMPTY(&imf->im6f_sources));
2206	}
2207
2208	/*
2209	 * Graft new source into filter list for this inpcb's
2210	 * membership of the group. The in6_multi may not have
2211	 * been allocated yet if this is a new membership, however,
2212	 * the in_mfilter slot will be allocated and must be initialized.
2213	 *
2214	 * Note: Grafting of exclusive mode filters doesn't happen
2215	 * in this path.
2216	 * XXX: Should check for non-NULL lims (node exists but may
2217	 * not be in-mode) for interop with full-state API.
2218	 */
2219	if (ssa->ss.ss_family != AF_UNSPEC) {
2220		/* Membership starts in IN mode */
2221		if (is_new) {
2222			MLD_PRINTF(("%s: new join w/source\n", __func__);
2223			im6f_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE));
2224		} else {
2225			MLD_PRINTF(("%s: %s source\n", __func__, "allow"));
2226		}
2227		lims = im6f_graft(imf, MCAST_INCLUDE, &ssa->sin6);
2228		if (lims == NULL) {
2229			MLD_PRINTF(("%s: merge imf state failed\n",
2230			    __func__));
2231			error = ENOMEM;
2232			goto out_im6o_free;
2233		}
2234	} else {
2235		/* No address specified; Membership starts in EX mode */
2236		if (is_new) {
2237			MLD_PRINTF(("%s: new join w/o source", __func__));
2238			im6f_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
2239		}
2240	}
2241
2242	/*
2243	 * Begin state merge transaction at MLD layer.
2244	 */
2245
2246	if (is_new) {
2247		VERIFY(inm == NULL);
2248		error = in6_mc_join(ifp, &gsa->sin6.sin6_addr, imf, &inm, 0);
2249		VERIFY(inm != NULL || error != 0);
2250		if (error)
2251			goto out_im6o_free;
2252		imo->im6o_membership[idx] = inm; /* from in6_mc_join() */
2253	} else {
2254		MLD_PRINTF(("%s: merge inm state\n", __func__));
2255		IN6M_LOCK(inm);
2256		error = in6m_merge(inm, imf);
2257		if (error) {
2258			MLD_PRINTF(("%s: failed to merge inm state\n",
2259			    __func__));
2260			IN6M_UNLOCK(inm);
2261			goto out_im6f_rollback;
2262		}
2263		MLD_PRINTF(("%s: doing mld downcall\n", __func__));
2264		error = mld_change_state(inm, &mtp, 0);
2265		IN6M_UNLOCK(inm);
2266		if (error) {
2267			MLD_PRINTF(("%s: failed mld downcall\n",
2268			    __func__));
2269			goto out_im6f_rollback;
2270		}
2271	}
2272
2273out_im6f_rollback:
2274	if (error) {
2275		im6f_rollback(imf);
2276		if (is_new)
2277			im6f_purge(imf);
2278		else
2279			im6f_reap(imf);
2280	} else {
2281		im6f_commit(imf);
2282	}
2283
2284out_im6o_free:
2285	if (error && is_new) {
2286		VERIFY(inm == NULL);
2287		imo->im6o_membership[idx] = NULL;
2288		--imo->im6o_num_memberships;
2289	}
2290
2291out_imo_locked:
2292	IM6O_UNLOCK(imo);
2293	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
2294
2295	/* schedule timer now that we've dropped the lock(s) */
2296	mld_set_timeout(&mtp);
2297
2298	return (error);
2299}
2300
2301/*
2302 * Leave an IPv6 multicast group on an inpcb, possibly with a source.
2303 */
2304static int
2305in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
2306{
2307	struct ipv6_mreq		 mreq;
2308	struct group_source_req		 gsr;
2309	sockunion_t			*gsa, *ssa;
2310	struct ifnet			*ifp;
2311	struct in6_mfilter		*imf;
2312	struct ip6_moptions		*imo;
2313	struct in6_msource		*ims;
2314	struct in6_multi		*inm = NULL;
2315	uint32_t			 ifindex = 0;
2316	size_t				 idx;
2317	int				 error, is_final;
2318	struct mld_tparams		 mtp;
2319
2320	bzero(&mtp, sizeof (mtp));
2321	ifp = NULL;
2322	error = 0;
2323	is_final = 1;
2324
2325	memset(&gsr, 0, sizeof(struct group_source_req));
2326	gsa = (sockunion_t *)&gsr.gsr_group;
2327	gsa->ss.ss_family = AF_UNSPEC;
2328	ssa = (sockunion_t *)&gsr.gsr_source;
2329	ssa->ss.ss_family = AF_UNSPEC;
2330
2331	/*
2332	 * Chew everything passed in up into a struct group_source_req
2333	 * as that is easier to process.
2334	 * Note: Any embedded scope ID in the multicast group passed
2335	 * in by userland is ignored, the interface index is the recommended
2336	 * mechanism to specify an interface; see below.
2337	 */
2338	switch (sopt->sopt_name) {
2339	case IPV6_LEAVE_GROUP: {
2340    		struct sockaddr_in6 *gsin6;
2341
2342		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
2343		    sizeof(struct ipv6_mreq));
2344		if (error)
2345			return (error);
2346		if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
2347			struct ip_mreq v4mreq;
2348			struct sockopt v4sopt;
2349
2350			v4mreq.imr_multiaddr.s_addr =
2351			    mreq.ipv6mr_multiaddr.s6_addr32[3];
2352			if (mreq.ipv6mr_interface == 0)
2353				v4mreq.imr_interface.s_addr = INADDR_ANY;
2354			else
2355				error = in6p_lookup_v4addr(&mreq, &v4mreq);
2356			if (error)
2357				return (error);
2358			v4sopt.sopt_dir     = SOPT_SET;
2359			v4sopt.sopt_level   = sopt->sopt_level;
2360			v4sopt.sopt_name    = IP_DROP_MEMBERSHIP;
2361			v4sopt.sopt_val     = CAST_USER_ADDR_T(&v4mreq);
2362			v4sopt.sopt_valsize = sizeof(v4mreq);
2363			v4sopt.sopt_p       = kernproc;
2364
2365			return (inp_leave_group(inp, &v4sopt));
2366		}
2367		gsa->sin6.sin6_family = AF_INET6;
2368		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
2369		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
2370		gsa->sin6.sin6_port = 0;
2371		gsa->sin6.sin6_scope_id = 0;
2372		ifindex = mreq.ipv6mr_interface;
2373		gsin6 = &gsa->sin6;
2374		/* Only allow IPv6 multicast addresses */
2375		if (IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr) == 0) {
2376			return (EINVAL);
2377		}
2378		break;
2379	}
2380
2381	case MCAST_LEAVE_GROUP:
2382	case MCAST_LEAVE_SOURCE_GROUP:
2383		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2384			error = sooptcopyin(sopt, &gsr,
2385			    sizeof(struct group_req),
2386			    sizeof(struct group_req));
2387		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2388			error = sooptcopyin(sopt, &gsr,
2389			    sizeof(struct group_source_req),
2390			    sizeof(struct group_source_req));
2391		}
2392		if (error)
2393			return (error);
2394
2395		if (gsa->sin6.sin6_family != AF_INET6 ||
2396		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
2397			return (EINVAL);
2398		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2399			if (ssa->sin6.sin6_family != AF_INET6 ||
2400			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
2401				return (EINVAL);
2402			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
2403				return (EINVAL);
2404			/*
2405			 * TODO: Validate embedded scope ID in source
2406			 * list entry against passed-in ifp, if and only
2407			 * if source list filter entry is iface or node local.
2408			 */
2409			in6_clearscope(&ssa->sin6.sin6_addr);
2410		}
2411		gsa->sin6.sin6_port = 0;
2412		gsa->sin6.sin6_scope_id = 0;
2413		ifindex = gsr.gsr_interface;
2414		break;
2415
2416	default:
2417		MLD_PRINTF(("%s: unknown sopt_name %d\n",
2418		    __func__, sopt->sopt_name));
2419		return (EOPNOTSUPP);
2420		break;
2421	}
2422
2423	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
2424		return (EINVAL);
2425
2426	/*
2427	 * Validate interface index if provided. If no interface index
2428	 * was provided separately, attempt to look the membership up
2429	 * from the default scope as a last resort to disambiguate
2430	 * the membership we are being asked to leave.
2431	 * XXX SCOPE6 lock potentially taken here.
2432	 */
2433	if (ifindex != 0) {
2434		ifnet_head_lock_shared();
2435		if ((u_int)if_index < ifindex) {
2436			ifnet_head_done();
2437			return (EADDRNOTAVAIL);
2438		}
2439		ifp = ifindex2ifnet[ifindex];
2440		ifnet_head_done();
2441		if (ifp == NULL)
2442			return (EADDRNOTAVAIL);
2443		(void) in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
2444	} else {
2445		error = sa6_embedscope(&gsa->sin6, ip6_use_defzone);
2446		if (error)
2447			return (EADDRNOTAVAIL);
2448		/*
2449		 * Some badly behaved applications don't pass an ifindex
2450		 * or a scope ID, which is an API violation. In this case,
2451		 * perform a lookup as per a v6 join.
2452		 *
2453		 * XXX For now, stomp on zone ID for the corner case.
2454		 * This is not the 'KAME way', but we need to see the ifp
2455		 * directly until such time as this implementation is
2456		 * refactored, assuming the scope IDs are the way to go.
2457		 */
2458		ifindex = ntohs(gsa->sin6.sin6_addr.s6_addr16[1]);
2459		if (ifindex == 0) {
2460			MLD_PRINTF(("%s: warning: no ifindex, looking up "
2461			    "ifp for group %s.\n", __func__,
2462			    ip6_sprintf(&gsa->sin6.sin6_addr)));
2463			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
2464		} else {
2465			ifnet_head_lock_shared();
2466			ifp = ifindex2ifnet[ifindex];
2467			ifnet_head_done();
2468		}
2469		if (ifp == NULL)
2470			return (EADDRNOTAVAIL);
2471	}
2472
2473	VERIFY(ifp != NULL);
2474	MLD_PRINTF(("%s: ifp = 0x%llx\n", __func__,
2475	    (uint64_t)VM_KERNEL_ADDRPERM(ifp)));
2476
2477	/*
2478	 * Find the membership in the membership array.
2479	 */
2480	imo = in6p_findmoptions(inp);
2481	if (imo == NULL)
2482		return (ENOMEM);
2483
2484	IM6O_LOCK(imo);
2485	idx = im6o_match_group(imo, ifp, &gsa->sa);
2486	if (idx == (size_t)-1) {
2487		error = EADDRNOTAVAIL;
2488		goto out_locked;
2489	}
2490	inm = imo->im6o_membership[idx];
2491	imf = &imo->im6o_mfilters[idx];
2492
2493	if (ssa->ss.ss_family != AF_UNSPEC)
2494		is_final = 0;
2495
2496	/*
2497	 * Begin state merge transaction at socket layer.
2498	 */
2499
2500	/*
2501	 * If we were instructed only to leave a given source, do so.
2502	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2503	 */
2504	if (is_final) {
2505		im6f_leave(imf);
2506	} else {
2507		if (imf->im6f_st[0] == MCAST_EXCLUDE) {
2508			error = EADDRNOTAVAIL;
2509			goto out_locked;
2510		}
2511		ims = im6o_match_source(imo, idx, &ssa->sa);
2512		if (ims == NULL) {
2513			MLD_PRINTF(("%s: source %s %spresent\n", __func__,
2514			    ip6_sprintf(&ssa->sin6.sin6_addr),
2515			    "not "));
2516			error = EADDRNOTAVAIL;
2517			goto out_locked;
2518		}
2519		MLD_PRINTF(("%s: %s source\n", __func__, "block"));
2520		error = im6f_prune(imf, &ssa->sin6);
2521		if (error) {
2522			MLD_PRINTF(("%s: merge imf state failed\n",
2523			    __func__));
2524			goto out_locked;
2525		}
2526	}
2527
2528	/*
2529	 * Begin state merge transaction at MLD layer.
2530	 */
2531
2532	if (is_final) {
2533		/*
2534		 * Give up the multicast address record to which
2535		 * the membership points.  Reference held in im6o
2536		 * will be released below.
2537		 */
2538		(void) in6_mc_leave(inm, imf);
2539	} else {
2540		MLD_PRINTF(("%s: merge inm state\n", __func__));
2541		IN6M_LOCK(inm);
2542		error = in6m_merge(inm, imf);
2543		if (error) {
2544			MLD_PRINTF(("%s: failed to merge inm state\n",
2545			    __func__));
2546			IN6M_UNLOCK(inm);
2547			goto out_im6f_rollback;
2548		}
2549
2550		MLD_PRINTF(("%s: doing mld downcall\n", __func__));
2551		error = mld_change_state(inm, &mtp, 0);
2552		if (error) {
2553			MLD_PRINTF(("%s: failed mld downcall\n", __func__));
2554		}
2555		IN6M_UNLOCK(inm);
2556	}
2557
2558out_im6f_rollback:
2559	if (error)
2560		im6f_rollback(imf);
2561	else
2562		im6f_commit(imf);
2563
2564	im6f_reap(imf);
2565
2566	if (is_final) {
2567		/* Remove the gap in the membership array. */
2568		VERIFY(inm == imo->im6o_membership[idx]);
2569		imo->im6o_membership[idx] = NULL;
2570		IN6M_REMREF(inm);
2571		for (++idx; idx < imo->im6o_num_memberships; ++idx) {
2572			imo->im6o_membership[idx-1] = imo->im6o_membership[idx];
2573			imo->im6o_mfilters[idx-1] = imo->im6o_mfilters[idx];
2574		}
2575		imo->im6o_num_memberships--;
2576	}
2577
2578out_locked:
2579	IM6O_UNLOCK(imo);
2580	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
2581
2582	/* schedule timer now that we've dropped the lock(s) */
2583	mld_set_timeout(&mtp);
2584
2585	return (error);
2586}
2587
2588/*
2589 * Select the interface for transmitting IPv6 multicast datagrams.
2590 *
2591 * Either an instance of struct in6_addr or an instance of struct ipv6_mreqn
2592 * may be passed to this socket option. An address of in6addr_any or an
2593 * interface index of 0 is used to remove a previous selection.
2594 * When no interface is selected, one is chosen for every send.
2595 */
2596static int
2597in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2598{
2599	struct ifnet		*ifp;
2600	struct ip6_moptions	*imo;
2601	u_int			 ifindex;
2602	int			 error;
2603
2604	if (sopt->sopt_valsize != sizeof(u_int))
2605		return (EINVAL);
2606
2607	error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int));
2608	if (error)
2609		return (error);
2610
2611	ifnet_head_lock_shared();
2612	if ((u_int)if_index < ifindex) {
2613		ifnet_head_done();
2614		return (EINVAL);
2615	}
2616
2617	ifp = ifindex2ifnet[ifindex];
2618	ifnet_head_done();
2619	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
2620		return (EADDRNOTAVAIL);
2621
2622	imo = in6p_findmoptions(inp);
2623	if (imo == NULL)
2624		return (ENOMEM);
2625
2626	IM6O_LOCK(imo);
2627	imo->im6o_multicast_ifp = ifp;
2628	IM6O_UNLOCK(imo);
2629	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
2630
2631	return (0);
2632}
2633
2634/*
2635 * Atomically set source filters on a socket for an IPv6 multicast group.
2636 *
2637 */
2638static int
2639in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2640{
2641	struct __msfilterreq64	 msfr, msfr64;
2642	struct __msfilterreq32	 msfr32;
2643	sockunion_t		*gsa;
2644	struct ifnet		*ifp;
2645	struct in6_mfilter	*imf;
2646	struct ip6_moptions	*imo;
2647	struct in6_multi	*inm;
2648	size_t			 idx;
2649	int			 error;
2650	user_addr_t		 tmp_ptr;
2651	struct mld_tparams	 mtp;
2652
2653	bzero(&mtp, sizeof (mtp));
2654
2655	if (IS_64BIT_PROCESS(current_proc())) {
2656		error = sooptcopyin(sopt, &msfr64,
2657		    sizeof(struct __msfilterreq64),
2658		    sizeof(struct __msfilterreq64));
2659		if (error)
2660			return (error);
2661		/* we never use msfr.msfr_srcs; */
2662		memcpy(&msfr, &msfr64, sizeof(msfr));
2663	} else {
2664		error = sooptcopyin(sopt, &msfr32,
2665		    sizeof(struct __msfilterreq32),
2666		    sizeof(struct __msfilterreq32));
2667		if (error)
2668			return (error);
2669		/* we never use msfr.msfr_srcs; */
2670		memcpy(&msfr, &msfr32, sizeof(msfr));
2671	}
2672
2673	if ((size_t) msfr.msfr_nsrcs >
2674	    UINT32_MAX / sizeof(struct sockaddr_storage))
2675		msfr.msfr_nsrcs = UINT32_MAX / sizeof(struct sockaddr_storage);
2676
2677	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
2678		return (ENOBUFS);
2679
2680	if (msfr.msfr_fmode != MCAST_EXCLUDE &&
2681	     msfr.msfr_fmode != MCAST_INCLUDE)
2682		return (EINVAL);
2683
2684	if (msfr.msfr_group.ss_family != AF_INET6 ||
2685	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
2686		return (EINVAL);
2687
2688	gsa = (sockunion_t *)&msfr.msfr_group;
2689	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
2690		return (EINVAL);
2691
2692	gsa->sin6.sin6_port = 0;	/* ignore port */
2693
2694	ifnet_head_lock_shared();
2695	if (msfr.msfr_ifindex == 0 || (u_int)if_index < msfr.msfr_ifindex) {
2696		ifnet_head_done();
2697		return (EADDRNOTAVAIL);
2698	}
2699	ifp = ifindex2ifnet[msfr.msfr_ifindex];
2700	ifnet_head_done();
2701	if (ifp == NULL)
2702		return (EADDRNOTAVAIL);
2703
2704	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
2705
2706	/*
2707	 * Take the INP write lock.
2708	 * Check if this socket is a member of this group.
2709	 */
2710	imo = in6p_findmoptions(inp);
2711	if (imo == NULL)
2712		return (ENOMEM);
2713
2714	IM6O_LOCK(imo);
2715	idx = im6o_match_group(imo, ifp, &gsa->sa);
2716	if (idx == (size_t)-1 || imo->im6o_mfilters == NULL) {
2717		error = EADDRNOTAVAIL;
2718		goto out_imo_locked;
2719	}
2720	inm = imo->im6o_membership[idx];
2721	imf = &imo->im6o_mfilters[idx];
2722
2723	/*
2724	 * Begin state merge transaction at socket layer.
2725	 */
2726
2727	imf->im6f_st[1] = msfr.msfr_fmode;
2728
2729	/*
2730	 * Apply any new source filters, if present.
2731	 * Make a copy of the user-space source vector so
2732	 * that we may copy them with a single copyin. This
2733	 * allows us to deal with page faults up-front.
2734	 */
2735	if (msfr.msfr_nsrcs > 0) {
2736		struct in6_msource	*lims;
2737		struct sockaddr_in6	*psin;
2738		struct sockaddr_storage	*kss, *pkss;
2739		unsigned int		 i;
2740
2741		if (IS_64BIT_PROCESS(current_proc()))
2742			tmp_ptr = msfr64.msfr_srcs;
2743		else
2744			tmp_ptr = CAST_USER_ADDR_T(msfr32.msfr_srcs);
2745
2746		MLD_PRINTF(("%s: loading %lu source list entries\n",
2747		    __func__, (unsigned long)msfr.msfr_nsrcs));
2748		kss = _MALLOC((size_t) msfr.msfr_nsrcs * sizeof(*kss),
2749		    M_TEMP, M_WAITOK);
2750		if (kss == NULL) {
2751			error = ENOMEM;
2752			goto out_imo_locked;
2753		}
2754
2755		error = copyin(tmp_ptr, kss,
2756		    (size_t) msfr.msfr_nsrcs * sizeof(*kss));
2757		if (error) {
2758			FREE(kss, M_TEMP);
2759			goto out_imo_locked;
2760		}
2761
2762		/*
2763		 * Mark all source filters as UNDEFINED at t1.
2764		 * Restore new group filter mode, as im6f_leave()
2765		 * will set it to INCLUDE.
2766		 */
2767		im6f_leave(imf);
2768		imf->im6f_st[1] = msfr.msfr_fmode;
2769
2770		/*
2771		 * Update socket layer filters at t1, lazy-allocating
2772		 * new entries. This saves a bunch of memory at the
2773		 * cost of one RB_FIND() per source entry; duplicate
2774		 * entries in the msfr_nsrcs vector are ignored.
2775		 * If we encounter an error, rollback transaction.
2776		 *
2777		 * XXX This too could be replaced with a set-symmetric
2778		 * difference like loop to avoid walking from root
2779		 * every time, as the key space is common.
2780		 */
2781		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2782			psin = (struct sockaddr_in6 *)pkss;
2783			if (psin->sin6_family != AF_INET6) {
2784				error = EAFNOSUPPORT;
2785				break;
2786			}
2787			if (psin->sin6_len != sizeof(struct sockaddr_in6)) {
2788				error = EINVAL;
2789				break;
2790			}
2791			if (IN6_IS_ADDR_MULTICAST(&psin->sin6_addr)) {
2792				error = EINVAL;
2793				break;
2794			}
2795			/*
2796			 * TODO: Validate embedded scope ID in source
2797			 * list entry against passed-in ifp, if and only
2798			 * if source list filter entry is iface or node local.
2799			 */
2800			in6_clearscope(&psin->sin6_addr);
2801			error = im6f_get_source(imf, psin, &lims);
2802			if (error)
2803				break;
2804			lims->im6sl_st[1] = imf->im6f_st[1];
2805		}
2806		FREE(kss, M_TEMP);
2807	}
2808
2809	if (error)
2810		goto out_im6f_rollback;
2811
2812	/*
2813	 * Begin state merge transaction at MLD layer.
2814	 */
2815	IN6M_LOCK(inm);
2816	MLD_PRINTF(("%s: merge inm state\n", __func__));
2817	error = in6m_merge(inm, imf);
2818	if (error) {
2819		MLD_PRINTF(("%s: failed to merge inm state\n", __func__));
2820		IN6M_UNLOCK(inm);
2821		goto out_im6f_rollback;
2822	}
2823
2824	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
2825	error = mld_change_state(inm, &mtp, 0);
2826	IN6M_UNLOCK(inm);
2827#if MLD_DEBUG
2828	if (error)
2829		MLD_PRINTF(("%s: failed mld downcall\n", __func__));
2830#endif
2831
2832out_im6f_rollback:
2833	if (error)
2834		im6f_rollback(imf);
2835	else
2836		im6f_commit(imf);
2837
2838	im6f_reap(imf);
2839
2840out_imo_locked:
2841	IM6O_UNLOCK(imo);
2842	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
2843
2844	/* schedule timer now that we've dropped the lock(s) */
2845	mld_set_timeout(&mtp);
2846
2847	return (error);
2848}
2849
2850/*
2851 * Set the IP multicast options in response to user setsockopt().
2852 *
2853 * Many of the socket options handled in this function duplicate the
2854 * functionality of socket options in the regular unicast API. However,
2855 * it is not possible to merge the duplicate code, because the idempotence
2856 * of the IPv6 multicast part of the BSD Sockets API must be preserved;
2857 * the effects of these options must be treated as separate and distinct.
2858 *
2859 */
2860int
2861ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2862{
2863	struct ip6_moptions	*im6o;
2864	int			 error;
2865
2866	error = 0;
2867
2868	/*
2869	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2870	 * or is a divert socket, reject it.
2871	 */
2872	if (SOCK_PROTO(inp->inp_socket) == IPPROTO_DIVERT ||
2873	    (SOCK_TYPE(inp->inp_socket) != SOCK_RAW &&
2874	     SOCK_TYPE(inp->inp_socket) != SOCK_DGRAM))
2875		return (EOPNOTSUPP);
2876
2877	switch (sopt->sopt_name) {
2878	case IPV6_MULTICAST_IF:
2879		error = in6p_set_multicast_if(inp, sopt);
2880		break;
2881
2882	case IPV6_MULTICAST_HOPS: {
2883		int hlim;
2884
2885		if (sopt->sopt_valsize != sizeof(int)) {
2886			error = EINVAL;
2887			break;
2888		}
2889		error = sooptcopyin(sopt, &hlim, sizeof(hlim), sizeof(int));
2890		if (error)
2891			break;
2892		if (hlim < -1 || hlim > 255) {
2893			error = EINVAL;
2894			break;
2895		} else if (hlim == -1) {
2896			hlim = ip6_defmcasthlim;
2897		}
2898		im6o = in6p_findmoptions(inp);
2899		if (im6o == NULL) {
2900			error = ENOMEM;
2901			break;
2902		}
2903		IM6O_LOCK(im6o);
2904		im6o->im6o_multicast_hlim = hlim;
2905		IM6O_UNLOCK(im6o);
2906		IM6O_REMREF(im6o);	/* from in6p_findmoptions() */
2907		break;
2908	}
2909
2910	case IPV6_MULTICAST_LOOP: {
2911		u_int loop;
2912
2913		/*
2914		 * Set the loopback flag for outgoing multicast packets.
2915		 * Must be zero or one.
2916		 */
2917		if (sopt->sopt_valsize != sizeof(u_int)) {
2918			error = EINVAL;
2919			break;
2920		}
2921		error = sooptcopyin(sopt, &loop, sizeof(u_int), sizeof(u_int));
2922		if (error)
2923			break;
2924		if (loop > 1) {
2925			error = EINVAL;
2926			break;
2927		}
2928		im6o = in6p_findmoptions(inp);
2929		if (im6o == NULL) {
2930			error = ENOMEM;
2931			break;
2932		}
2933		IM6O_LOCK(im6o);
2934		im6o->im6o_multicast_loop = loop;
2935		IM6O_UNLOCK(im6o);
2936		IM6O_REMREF(im6o);	/* from in6p_findmoptions() */
2937		break;
2938	}
2939
2940	case IPV6_JOIN_GROUP:
2941	case MCAST_JOIN_GROUP:
2942	case MCAST_JOIN_SOURCE_GROUP:
2943		error = in6p_join_group(inp, sopt);
2944		break;
2945
2946	case IPV6_LEAVE_GROUP:
2947	case MCAST_LEAVE_GROUP:
2948	case MCAST_LEAVE_SOURCE_GROUP:
2949		error = in6p_leave_group(inp, sopt);
2950		break;
2951
2952	case MCAST_BLOCK_SOURCE:
2953	case MCAST_UNBLOCK_SOURCE:
2954		error = in6p_block_unblock_source(inp, sopt);
2955		break;
2956
2957	case IPV6_MSFILTER:
2958		error = in6p_set_source_filters(inp, sopt);
2959		break;
2960
2961	default:
2962		error = EOPNOTSUPP;
2963		break;
2964	}
2965
2966	return (error);
2967}
2968/*
2969 * Expose MLD's multicast filter mode and source list(s) to userland,
2970 * keyed by (ifindex, group).
2971 * The filter mode is written out as a uint32_t, followed by
2972 * 0..n of struct in6_addr.
2973 * For use by ifmcstat(8).
2974 */
2975static int
2976sysctl_ip6_mcast_filters SYSCTL_HANDLER_ARGS
2977{
2978#pragma unused(oidp)
2979
2980	struct in6_addr			 mcaddr;
2981	struct in6_addr			 src;
2982	struct ifnet			*ifp;
2983	struct in6_multi		*inm;
2984	struct in6_multistep		step;
2985	struct ip6_msource		*ims;
2986	int				*name;
2987	int				 retval = 0;
2988	u_int				 namelen;
2989	uint32_t			 fmode, ifindex;
2990
2991	name = (int *)arg1;
2992	namelen = arg2;
2993
2994	if (req->newptr != USER_ADDR_NULL)
2995		return (EPERM);
2996
2997	/* int: ifindex + 4 * 32 bits of IPv6 address */
2998	if (namelen != 5)
2999		return (EINVAL);
3000
3001	ifindex = name[0];
3002	ifnet_head_lock_shared();
3003	if (ifindex <= 0 || ifindex > (u_int)if_index) {
3004		MLD_PRINTF(("%s: ifindex %u out of range\n",
3005		    __func__, ifindex));
3006		ifnet_head_done();
3007		return (ENOENT);
3008	}
3009
3010	memcpy(&mcaddr, &name[1], sizeof(struct in6_addr));
3011	if (!IN6_IS_ADDR_MULTICAST(&mcaddr)) {
3012		MLD_PRINTF(("%s: group %s is not multicast\n",
3013		    __func__, ip6_sprintf(&mcaddr)));
3014		ifnet_head_done();
3015		return (EINVAL);
3016	}
3017
3018	ifp = ifindex2ifnet[ifindex];
3019	ifnet_head_done();
3020	if (ifp == NULL) {
3021		MLD_PRINTF(("%s: no ifp for ifindex %u\n", __func__, ifindex));
3022		return (ENOENT);
3023	}
3024	/*
3025	 * Internal MLD lookups require that scope/zone ID is set.
3026	 */
3027	(void)in6_setscope(&mcaddr, ifp, NULL);
3028
3029	in6_multihead_lock_shared();
3030	IN6_FIRST_MULTI(step, inm);
3031	while (inm != NULL) {
3032		IN6M_LOCK(inm);
3033		if (inm->in6m_ifp != ifp)
3034			goto next;
3035
3036		if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr))
3037			goto next;
3038
3039		fmode = inm->in6m_st[1].iss_fmode;
3040		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
3041		if (retval != 0) {
3042			IN6M_UNLOCK(inm);
3043			break;		/* abort */
3044		}
3045		RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
3046			MLD_PRINTF(("%s: visit node 0x%llx\n", __func__,
3047			    (uint64_t)VM_KERNEL_ADDRPERM(ims)));
3048			/*
3049			 * Only copy-out sources which are in-mode.
3050			 */
3051			if (fmode != im6s_get_mode(inm, ims, 1)) {
3052				MLD_PRINTF(("%s: skip non-in-mode\n",
3053				    __func__));
3054				continue; /* process next source */
3055			}
3056			src = ims->im6s_addr;
3057			retval = SYSCTL_OUT(req, &src, sizeof(struct in6_addr));
3058			if (retval != 0)
3059				break;	/* process next inm */
3060		}
3061next:
3062		IN6M_UNLOCK(inm);
3063		IN6_NEXT_MULTI(step, inm);
3064	}
3065	in6_multihead_lock_done();
3066
3067	return (retval);
3068}
3069
3070void
3071in6_multi_init(void)
3072{
3073	PE_parse_boot_argn("ifa_debug", &in6m_debug, sizeof (in6m_debug));
3074
3075	/* Setup lock group and attribute for in6_multihead */
3076	in6_multihead_lock_grp_attr = lck_grp_attr_alloc_init();
3077	in6_multihead_lock_grp = lck_grp_alloc_init("in6_multihead",
3078	    in6_multihead_lock_grp_attr);
3079	in6_multihead_lock_attr = lck_attr_alloc_init();
3080	lck_rw_init(&in6_multihead_lock, in6_multihead_lock_grp,
3081	    in6_multihead_lock_attr);
3082
3083	lck_mtx_init(&in6m_trash_lock, in6_multihead_lock_grp,
3084	    in6_multihead_lock_attr);
3085	TAILQ_INIT(&in6m_trash_head);
3086
3087	in6m_size = (in6m_debug == 0) ? sizeof (struct in6_multi) :
3088	    sizeof (struct in6_multi_dbg);
3089	in6m_zone = zinit(in6m_size, IN6M_ZONE_MAX * in6m_size,
3090	    0, IN6M_ZONE_NAME);
3091	if (in6m_zone == NULL) {
3092		panic("%s: failed allocating %s", __func__, IN6M_ZONE_NAME);
3093		/* NOTREACHED */
3094	}
3095	zone_change(in6m_zone, Z_EXPAND, TRUE);
3096
3097	imm_size = sizeof (struct in6_multi_mship);
3098	imm_zone = zinit(imm_size, IMM_ZONE_MAX * imm_size, 0, IMM_ZONE_NAME);
3099	if (imm_zone == NULL) {
3100		panic("%s: failed allocating %s", __func__, IMM_ZONE_NAME);
3101		/* NOTREACHED */
3102	}
3103	zone_change(imm_zone, Z_EXPAND, TRUE);
3104
3105	ip6ms_size = sizeof (struct ip6_msource);
3106	ip6ms_zone = zinit(ip6ms_size, IP6MS_ZONE_MAX * ip6ms_size,
3107	    0, IP6MS_ZONE_NAME);
3108	if (ip6ms_zone == NULL) {
3109		panic("%s: failed allocating %s", __func__, IP6MS_ZONE_NAME);
3110		/* NOTREACHED */
3111	}
3112	zone_change(ip6ms_zone, Z_EXPAND, TRUE);
3113
3114	in6ms_size = sizeof (struct in6_msource);
3115	in6ms_zone = zinit(in6ms_size, IN6MS_ZONE_MAX * in6ms_size,
3116	    0, IN6MS_ZONE_NAME);
3117	if (in6ms_zone == NULL) {
3118		panic("%s: failed allocating %s", __func__, IN6MS_ZONE_NAME);
3119		/* NOTREACHED */
3120	}
3121	zone_change(in6ms_zone, Z_EXPAND, TRUE);
3122}
3123
3124static struct in6_multi *
3125in6_multi_alloc(int how)
3126{
3127	struct in6_multi *in6m;
3128
3129	in6m = (how == M_WAITOK) ? zalloc(in6m_zone) :
3130	    zalloc_noblock(in6m_zone);
3131	if (in6m != NULL) {
3132		bzero(in6m, in6m_size);
3133		lck_mtx_init(&in6m->in6m_lock, in6_multihead_lock_grp,
3134		    in6_multihead_lock_attr);
3135		in6m->in6m_debug |= IFD_ALLOC;
3136		if (in6m_debug != 0) {
3137			in6m->in6m_debug |= IFD_DEBUG;
3138			in6m->in6m_trace = in6m_trace;
3139		}
3140	}
3141	return (in6m);
3142}
3143
3144static void
3145in6_multi_free(struct in6_multi *in6m)
3146{
3147	IN6M_LOCK(in6m);
3148	if (in6m->in6m_debug & IFD_ATTACHED) {
3149		panic("%s: attached in6m=%p is being freed", __func__, in6m);
3150		/* NOTREACHED */
3151	} else if (in6m->in6m_ifma != NULL) {
3152		panic("%s: ifma not NULL for in6m=%p", __func__, in6m);
3153		/* NOTREACHED */
3154	} else if (!(in6m->in6m_debug & IFD_ALLOC)) {
3155		panic("%s: in6m %p cannot be freed", __func__, in6m);
3156		/* NOTREACHED */
3157	} else if (in6m->in6m_refcount != 0) {
3158		panic("%s: non-zero refcount in6m=%p", __func__, in6m);
3159		/* NOTREACHED */
3160	} else if (in6m->in6m_reqcnt != 0) {
3161		panic("%s: non-zero reqcnt in6m=%p", __func__, in6m);
3162		/* NOTREACHED */
3163	}
3164
3165	/* Free any pending MLDv2 state-change records */
3166	IF_DRAIN(&in6m->in6m_scq);
3167
3168	in6m->in6m_debug &= ~IFD_ALLOC;
3169	if ((in6m->in6m_debug & (IFD_DEBUG | IFD_TRASHED)) ==
3170	    (IFD_DEBUG | IFD_TRASHED)) {
3171		lck_mtx_lock(&in6m_trash_lock);
3172		TAILQ_REMOVE(&in6m_trash_head, (struct in6_multi_dbg *)in6m,
3173		    in6m_trash_link);
3174		lck_mtx_unlock(&in6m_trash_lock);
3175		in6m->in6m_debug &= ~IFD_TRASHED;
3176	}
3177	IN6M_UNLOCK(in6m);
3178
3179	lck_mtx_destroy(&in6m->in6m_lock, in6_multihead_lock_grp);
3180	zfree(in6m_zone, in6m);
3181}
3182
3183static void
3184in6_multi_attach(struct in6_multi *in6m)
3185{
3186	in6_multihead_lock_assert(LCK_RW_ASSERT_EXCLUSIVE);
3187	IN6M_LOCK_ASSERT_HELD(in6m);
3188
3189	if (in6m->in6m_debug & IFD_ATTACHED) {
3190		panic("%s: Attempt to attach an already attached in6m=%p",
3191		    __func__, in6m);
3192		/* NOTREACHED */
3193	} else if (in6m->in6m_debug & IFD_TRASHED) {
3194		panic("%s: Attempt to reattach a detached in6m=%p",
3195		    __func__, in6m);
3196		/* NOTREACHED */
3197	}
3198
3199	in6m->in6m_reqcnt++;
3200	VERIFY(in6m->in6m_reqcnt == 1);
3201	IN6M_ADDREF_LOCKED(in6m);
3202	in6m->in6m_debug |= IFD_ATTACHED;
3203	/*
3204	 * Reattach case:  If debugging is enabled, take it
3205	 * out of the trash list and clear IFD_TRASHED.
3206	 */
3207	if ((in6m->in6m_debug & (IFD_DEBUG | IFD_TRASHED)) ==
3208	    (IFD_DEBUG | IFD_TRASHED)) {
3209		/* Become a regular mutex, just in case */
3210		IN6M_CONVERT_LOCK(in6m);
3211		lck_mtx_lock(&in6m_trash_lock);
3212		TAILQ_REMOVE(&in6m_trash_head, (struct in6_multi_dbg *)in6m,
3213		    in6m_trash_link);
3214		lck_mtx_unlock(&in6m_trash_lock);
3215		in6m->in6m_debug &= ~IFD_TRASHED;
3216	}
3217
3218	LIST_INSERT_HEAD(&in6_multihead, in6m, in6m_entry);
3219}
3220
3221int
3222in6_multi_detach(struct in6_multi *in6m)
3223{
3224	in6_multihead_lock_assert(LCK_RW_ASSERT_EXCLUSIVE);
3225	IN6M_LOCK_ASSERT_HELD(in6m);
3226
3227	if (in6m->in6m_reqcnt == 0) {
3228		panic("%s: in6m=%p negative reqcnt", __func__, in6m);
3229		/* NOTREACHED */
3230	}
3231
3232	--in6m->in6m_reqcnt;
3233	if (in6m->in6m_reqcnt > 0)
3234		return (0);
3235
3236	if (!(in6m->in6m_debug & IFD_ATTACHED)) {
3237		panic("%s: Attempt to detach an unattached record in6m=%p",
3238		    __func__, in6m);
3239		/* NOTREACHED */
3240	} else if (in6m->in6m_debug & IFD_TRASHED) {
3241		panic("%s: in6m %p is already in trash list", __func__, in6m);
3242		/* NOTREACHED */
3243	}
3244
3245	/*
3246	 * NOTE: Caller calls IFMA_REMREF
3247	 */
3248	in6m->in6m_debug &= ~IFD_ATTACHED;
3249	LIST_REMOVE(in6m, in6m_entry);
3250
3251	if (in6m->in6m_debug & IFD_DEBUG) {
3252		/* Become a regular mutex, just in case */
3253		IN6M_CONVERT_LOCK(in6m);
3254		lck_mtx_lock(&in6m_trash_lock);
3255		TAILQ_INSERT_TAIL(&in6m_trash_head,
3256		    (struct in6_multi_dbg *)in6m, in6m_trash_link);
3257		lck_mtx_unlock(&in6m_trash_lock);
3258		in6m->in6m_debug |= IFD_TRASHED;
3259	}
3260
3261	return (1);
3262}
3263
3264void
3265in6m_addref(struct in6_multi *in6m, int locked)
3266{
3267	if (!locked)
3268		IN6M_LOCK_SPIN(in6m);
3269	else
3270		IN6M_LOCK_ASSERT_HELD(in6m);
3271
3272	if (++in6m->in6m_refcount == 0) {
3273		panic("%s: in6m=%p wraparound refcnt", __func__, in6m);
3274		/* NOTREACHED */
3275	} else if (in6m->in6m_trace != NULL) {
3276		(*in6m->in6m_trace)(in6m, TRUE);
3277	}
3278	if (!locked)
3279		IN6M_UNLOCK(in6m);
3280}
3281
3282void
3283in6m_remref(struct in6_multi *in6m, int locked)
3284{
3285	struct ifmultiaddr *ifma;
3286	struct mld_ifinfo *mli;
3287
3288	if (!locked)
3289		IN6M_LOCK_SPIN(in6m);
3290	else
3291		IN6M_LOCK_ASSERT_HELD(in6m);
3292
3293	if (in6m->in6m_refcount == 0 || (in6m->in6m_refcount == 1 && locked)) {
3294		panic("%s: in6m=%p negative refcnt", __func__, in6m);
3295		/* NOTREACHED */
3296	} else if (in6m->in6m_trace != NULL) {
3297		(*in6m->in6m_trace)(in6m, FALSE);
3298	}
3299
3300	--in6m->in6m_refcount;
3301	if (in6m->in6m_refcount > 0) {
3302		if (!locked)
3303			IN6M_UNLOCK(in6m);
3304		return;
3305	}
3306
3307	/*
3308	 * Synchronization with in6_mc_get().  In the event the in6m has been
3309	 * detached, the underlying ifma would still be in the if_multiaddrs
3310	 * list, and thus can be looked up via if_addmulti().  At that point,
3311	 * the only way to find this in6m is via ifma_protospec.  To avoid
3312	 * race conditions between the last in6m_remref() of that in6m and its
3313	 * use via ifma_protospec, in6_multihead lock is used for serialization.
3314	 * In order to avoid violating the lock order, we must drop in6m_lock
3315	 * before acquiring in6_multihead lock.  To prevent the in6m from being
3316	 * freed prematurely, we hold an extra reference.
3317	 */
3318	++in6m->in6m_refcount;
3319	IN6M_UNLOCK(in6m);
3320	in6_multihead_lock_shared();
3321	IN6M_LOCK_SPIN(in6m);
3322	--in6m->in6m_refcount;
3323	if (in6m->in6m_refcount > 0) {
3324		/* We've lost the race, so abort since in6m is still in use */
3325		IN6M_UNLOCK(in6m);
3326		in6_multihead_lock_done();
3327		/* If it was locked, return it as such */
3328		if (locked)
3329			IN6M_LOCK(in6m);
3330		return;
3331	}
3332	in6m_purge(in6m);
3333	ifma = in6m->in6m_ifma;
3334	in6m->in6m_ifma = NULL;
3335	in6m->in6m_ifp = NULL;
3336	mli = in6m->in6m_mli;
3337	in6m->in6m_mli = NULL;
3338	IN6M_UNLOCK(in6m);
3339	IFMA_LOCK_SPIN(ifma);
3340	ifma->ifma_protospec = NULL;
3341	IFMA_UNLOCK(ifma);
3342	in6_multihead_lock_done();
3343
3344	in6_multi_free(in6m);
3345	if_delmulti_ifma(ifma);
3346	/* Release reference held to the underlying ifmultiaddr */
3347	IFMA_REMREF(ifma);
3348
3349	if (mli != NULL)
3350		MLI_REMREF(mli);
3351}
3352
3353static void
3354in6m_trace(struct in6_multi *in6m, int refhold)
3355{
3356	struct in6_multi_dbg *in6m_dbg = (struct in6_multi_dbg *)in6m;
3357	ctrace_t *tr;
3358	u_int32_t idx;
3359	u_int16_t *cnt;
3360
3361	if (!(in6m->in6m_debug & IFD_DEBUG)) {
3362		panic("%s: in6m %p has no debug structure", __func__, in6m);
3363		/* NOTREACHED */
3364	}
3365	if (refhold) {
3366		cnt = &in6m_dbg->in6m_refhold_cnt;
3367		tr = in6m_dbg->in6m_refhold;
3368	} else {
3369		cnt = &in6m_dbg->in6m_refrele_cnt;
3370		tr = in6m_dbg->in6m_refrele;
3371	}
3372
3373	idx = atomic_add_16_ov(cnt, 1) % IN6M_TRACE_HIST_SIZE;
3374	ctrace_record(&tr[idx]);
3375}
3376
3377static struct in6_multi_mship *
3378in6_multi_mship_alloc(int how)
3379{
3380	struct in6_multi_mship *imm;
3381
3382	imm = (how == M_WAITOK) ? zalloc(imm_zone) : zalloc_noblock(imm_zone);
3383	if (imm != NULL)
3384		bzero(imm, imm_size);
3385
3386	return (imm);
3387}
3388
3389static void
3390in6_multi_mship_free(struct in6_multi_mship *imm)
3391{
3392	if (imm->i6mm_maddr != NULL) {
3393		panic("%s: i6mm_maddr not NULL for imm=%p", __func__, imm);
3394		/* NOTREACHED */
3395	}
3396	zfree(imm_zone, imm);
3397}
3398
3399void
3400in6_multihead_lock_exclusive(void)
3401{
3402	lck_rw_lock_exclusive(&in6_multihead_lock);
3403}
3404
3405void
3406in6_multihead_lock_shared(void)
3407{
3408	lck_rw_lock_shared(&in6_multihead_lock);
3409}
3410
3411void
3412in6_multihead_lock_assert(int what)
3413{
3414	lck_rw_assert(&in6_multihead_lock, what);
3415}
3416
3417void
3418in6_multihead_lock_done(void)
3419{
3420	lck_rw_done(&in6_multihead_lock);
3421}
3422
3423static struct ip6_msource *
3424ip6ms_alloc(int how)
3425{
3426	struct ip6_msource *i6ms;
3427
3428	i6ms = (how == M_WAITOK) ? zalloc(ip6ms_zone) :
3429	    zalloc_noblock(ip6ms_zone);
3430	if (i6ms != NULL)
3431		bzero(i6ms, ip6ms_size);
3432
3433	return (i6ms);
3434}
3435
3436static void
3437ip6ms_free(struct ip6_msource *i6ms)
3438{
3439	zfree(ip6ms_zone, i6ms);
3440}
3441
3442static struct in6_msource *
3443in6ms_alloc(int how)
3444{
3445	struct in6_msource *in6ms;
3446
3447	in6ms = (how == M_WAITOK) ? zalloc(in6ms_zone) :
3448	    zalloc_noblock(in6ms_zone);
3449	if (in6ms != NULL)
3450		bzero(in6ms, in6ms_size);
3451
3452	return (in6ms);
3453}
3454
3455static void
3456in6ms_free(struct in6_msource *in6ms)
3457{
3458	zfree(in6ms_zone, in6ms);
3459}
3460
3461#ifdef MLD_DEBUG
3462
3463static const char *in6m_modestrs[] = { "un\n", "in", "ex" };
3464
3465static const char *
3466in6m_mode_str(const int mode)
3467{
3468	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
3469		return (in6m_modestrs[mode]);
3470	return ("??");
3471}
3472
3473static const char *in6m_statestrs[] = {
3474	"not-member\n",
3475	"silent\n",
3476	"reporting\n",
3477	"idle\n",
3478	"lazy\n",
3479	"sleeping\n",
3480	"awakening\n",
3481	"query-pending\n",
3482	"sg-query-pending\n",
3483	"leaving"
3484};
3485
3486static const char *
3487in6m_state_str(const int state)
3488{
3489	if (state >= MLD_NOT_MEMBER && state <= MLD_LEAVING_MEMBER)
3490		return (in6m_statestrs[state]);
3491	return ("??");
3492}
3493
3494/*
3495 * Dump an in6_multi structure to the console.
3496 */
3497void
3498in6m_print(const struct in6_multi *inm)
3499{
3500	int t;
3501
3502	IN6M_LOCK_ASSERT_HELD(__DECONST(struct in6_multi *, inm));
3503
3504	if (mld_debug == 0)
3505		return;
3506
3507	printf("%s: --- begin in6m 0x%llx ---\n", __func__,
3508	    (uint64_t)VM_KERNEL_ADDRPERM(inm));
3509	printf("addr %s ifp 0x%llx(%s) ifma 0x%llx\n",
3510	    ip6_sprintf(&inm->in6m_addr),
3511	    (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
3512	    if_name(inm->in6m_ifp),
3513	    (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifma));
3514	printf("timer %u state %s refcount %u scq.len %u\n",
3515	    inm->in6m_timer,
3516	    in6m_state_str(inm->in6m_state),
3517	    inm->in6m_refcount,
3518	    inm->in6m_scq.ifq_len);
3519	printf("mli 0x%llx nsrc %lu sctimer %u scrv %u\n",
3520	    (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_mli),
3521	    inm->in6m_nsrc,
3522	    inm->in6m_sctimer,
3523	    inm->in6m_scrv);
3524	for (t = 0; t < 2; t++) {
3525		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
3526		    in6m_mode_str(inm->in6m_st[t].iss_fmode),
3527		    inm->in6m_st[t].iss_asm,
3528		    inm->in6m_st[t].iss_ex,
3529		    inm->in6m_st[t].iss_in,
3530		    inm->in6m_st[t].iss_rec);
3531	}
3532	printf("%s: --- end in6m 0x%llx ---\n", __func__,
3533	    (uint64_t)VM_KERNEL_ADDRPERM(inm));
3534}
3535
3536#else
3537
3538void
3539in6m_print(__unused const struct in6_multi *inm)
3540{
3541
3542}
3543
3544#endif
3545