1/*
2 * Copyright (c) 2010-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 2009 Bruce Simpson.
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. The name of the author may not be used to endorse or promote
41 *    products derived from this software without specific prior written
42 *    permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 */
56
57/*
58 * IPv6 multicast socket, group, and socket option processing module.
59 * Normative references: RFC 2292, RFC 3492, RFC 3542, RFC 3678, RFC 3810.
60 */
61
62#include <sys/cdefs.h>
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/kernel.h>
67#include <sys/malloc.h>
68#include <sys/mbuf.h>
69#include <sys/protosw.h>
70#include <sys/socket.h>
71#include <sys/socketvar.h>
72#include <sys/protosw.h>
73#include <sys/sysctl.h>
74#include <sys/tree.h>
75#include <sys/mcache.h>
76
77#include <kern/zalloc.h>
78
79#include <pexpert/pexpert.h>
80
81#include <net/if.h>
82#include <net/if_dl.h>
83#include <net/route.h>
84
85#include <netinet/in.h>
86#include <netinet/in_var.h>
87#include <netinet6/in6_var.h>
88#include <netinet/ip6.h>
89#include <netinet/icmp6.h>
90#include <netinet6/ip6_var.h>
91#include <netinet/in_pcb.h>
92#include <netinet/tcp.h>
93#include <netinet/tcp_seq.h>
94#include <netinet/tcp_var.h>
95#include <netinet6/nd6.h>
96#include <netinet6/mld6_var.h>
97#include <netinet6/scope6_var.h>
98
99#ifndef __SOCKUNION_DECLARED
100union sockunion {
101	struct sockaddr_storage	ss;
102	struct sockaddr		sa;
103	struct sockaddr_dl	sdl;
104	struct sockaddr_in6	sin6;
105};
106typedef union sockunion sockunion_t;
107#define __SOCKUNION_DECLARED
108#endif /* __SOCKUNION_DECLARED */
109
110static void	im6f_commit(struct in6_mfilter *);
111static int	im6f_get_source(struct in6_mfilter *imf,
112		    const struct sockaddr_in6 *psin,
113		    struct in6_msource **);
114static struct in6_msource *
115		im6f_graft(struct in6_mfilter *, const uint8_t,
116		    const struct sockaddr_in6 *);
117static int	im6f_prune(struct in6_mfilter *, const struct sockaddr_in6 *);
118static void	im6f_rollback(struct in6_mfilter *);
119static void	im6f_reap(struct in6_mfilter *);
120static int	im6o_grow(struct ip6_moptions *, size_t);
121static size_t	im6o_match_group(const struct ip6_moptions *,
122		    const struct ifnet *, const struct sockaddr *);
123static struct in6_msource *
124		im6o_match_source(const struct ip6_moptions *, const size_t,
125		    const struct sockaddr *);
126static void	im6s_merge(struct ip6_msource *ims,
127		    const struct in6_msource *lims, const int rollback);
128static int	in6_mc_get(struct ifnet *, const struct in6_addr *,
129		    struct in6_multi **);
130static int	in6m_get_source(struct in6_multi *inm,
131		    const struct in6_addr *addr, const int noalloc,
132		    struct ip6_msource **pims);
133static int	in6m_is_ifp_detached(const struct in6_multi *);
134static int	in6m_merge(struct in6_multi *, /*const*/ struct in6_mfilter *);
135static void	in6m_reap(struct in6_multi *);
136static struct ip6_moptions *
137		in6p_findmoptions(struct inpcb *);
138static int	in6p_get_source_filters(struct inpcb *, struct sockopt *);
139static int	in6p_lookup_v4addr(struct ipv6_mreq *, struct ip_mreq *);
140static int	in6p_join_group(struct inpcb *, struct sockopt *);
141static int	in6p_leave_group(struct inpcb *, struct sockopt *);
142static struct ifnet *
143		in6p_lookup_mcast_ifp(const struct inpcb *,
144		    const struct sockaddr_in6 *);
145static int	in6p_block_unblock_source(struct inpcb *, struct sockopt *);
146static int	in6p_set_multicast_if(struct inpcb *, struct sockopt *);
147static int	in6p_set_source_filters(struct inpcb *, struct sockopt *);
148static int	sysctl_ip6_mcast_filters SYSCTL_HANDLER_ARGS;
149static __inline__ int ip6_msource_cmp(const struct ip6_msource *,
150		    const struct ip6_msource *);
151
152SYSCTL_DECL(_net_inet6_ip6);	/* XXX Not in any common header. */
153
154SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPv6 multicast");
155
156static unsigned long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER;
157SYSCTL_LONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc,
158    CTLFLAG_RW | CTLFLAG_LOCKED, &in6_mcast_maxgrpsrc,
159    "Max source filters per group");
160
161static unsigned long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER;
162SYSCTL_LONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc,
163    CTLFLAG_RW | CTLFLAG_LOCKED, &in6_mcast_maxsocksrc,
164    "Max source filters per socket");
165
166int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
167SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_LOCKED,
168    &in6_mcast_loop, 0, "Loopback multicast datagrams by default");
169
170SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters,
171    CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_ip6_mcast_filters,
172    "Per-interface stack-wide source filters");
173
174RB_GENERATE_PREV(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp);
175
176#define	IN6M_TRACE_HIST_SIZE	32	/* size of trace history */
177
178/* For gdb */
179__private_extern__ unsigned int in6m_trace_hist_size = IN6M_TRACE_HIST_SIZE;
180
181struct in6_multi_dbg {
182	struct in6_multi	in6m;			/* in6_multi */
183	u_int16_t		in6m_refhold_cnt;	/* # of ref */
184	u_int16_t		in6m_refrele_cnt;	/* # of rele */
185	/*
186	 * Circular lists of in6m_addref and in6m_remref callers.
187	 */
188	ctrace_t		in6m_refhold[IN6M_TRACE_HIST_SIZE];
189	ctrace_t		in6m_refrele[IN6M_TRACE_HIST_SIZE];
190	/*
191	 * Trash list linkage
192	 */
193	TAILQ_ENTRY(in6_multi_dbg) in6m_trash_link;
194};
195
196/* List of trash in6_multi entries protected by in6m_trash_lock */
197static TAILQ_HEAD(, in6_multi_dbg) in6m_trash_head;
198static decl_lck_mtx_data(, in6m_trash_lock);
199
200#if DEBUG
201static unsigned int in6m_debug = 1;		/* debugging (enabled) */
202#else
203static unsigned int in6m_debug;			/* debugging (disabled) */
204#endif /* !DEBUG */
205static unsigned int in6m_size;			/* size of zone element */
206static struct zone *in6m_zone;			/* zone for in6_multi */
207
208#define	IN6M_ZONE_MAX		64		/* maximum elements in zone */
209#define	IN6M_ZONE_NAME		"in6_multi"	/* zone name */
210
211static unsigned int imm_size;			/* size of zone element */
212static struct zone *imm_zone;			/* zone for in6_multi_mship */
213
214#define	IMM_ZONE_MAX		64		/* maximum elements in zone */
215#define	IMM_ZONE_NAME		"in6_multi_mship" /* zone name */
216
217#define	IP6MS_ZONE_MAX		64		/* maximum elements in zone */
218#define	IP6MS_ZONE_NAME		"ip6_msource"	/* zone name */
219
220static unsigned int ip6ms_size;			/* size of zone element */
221static struct zone *ip6ms_zone;			/* zone for ip6_msource */
222
223#define	IN6MS_ZONE_MAX		64		/* maximum elements in zone */
224#define	IN6MS_ZONE_NAME		"in6_msource"	/* zone name */
225
226static unsigned int in6ms_size;			/* size of zone element */
227static struct zone *in6ms_zone;			/* zone for in6_msource */
228
229/* Lock group and attribute for in6_multihead_lock lock */
230static lck_attr_t	*in6_multihead_lock_attr;
231static lck_grp_t	*in6_multihead_lock_grp;
232static lck_grp_attr_t	*in6_multihead_lock_grp_attr;
233
234static decl_lck_rw_data(, in6_multihead_lock);
235struct in6_multihead in6_multihead;
236
237static struct in6_multi *in6_multi_alloc(int);
238static void in6_multi_free(struct in6_multi *);
239static void in6_multi_attach(struct in6_multi *);
240static struct in6_multi_mship *in6_multi_mship_alloc(int);
241static void in6_multi_mship_free(struct in6_multi_mship *);
242static void in6m_trace(struct in6_multi *, int);
243
244static struct ip6_msource *ip6ms_alloc(int);
245static void ip6ms_free(struct ip6_msource *);
246static struct in6_msource *in6ms_alloc(int);
247static void in6ms_free(struct in6_msource *);
248
249#define	IM6O_CAST_TO_NONCONST(x) ((struct ip6_moptions *)(void *)(uintptr_t)x)
250#define	IN6M_CAST_TO_NONCONST(x) ((struct in6_multi *)(void *)(uintptr_t)x)
251
252/*
253 * IPv6 source tree comparison function.
254 *
255 * An ordered predicate is necessary; bcmp() is not documented to return
256 * an indication of order, memcmp() is, and is an ISO C99 requirement.
257 */
258static __inline int
259ip6_msource_cmp(const struct ip6_msource *a, const struct ip6_msource *b)
260{
261	return (memcmp(&a->im6s_addr, &b->im6s_addr, sizeof(struct in6_addr)));
262}
263
264/*
265 * Inline function which wraps assertions for a valid ifp.
266 */
267static __inline__ int
268in6m_is_ifp_detached(const struct in6_multi *inm)
269{
270	VERIFY(inm->in6m_ifma != NULL);
271	VERIFY(inm->in6m_ifp == inm->in6m_ifma->ifma_ifp);
272
273	return (!ifnet_is_attached(inm->in6m_ifp, 0));
274}
275
276/*
277 * Initialize an in6_mfilter structure to a known state at t0, t1
278 * with an empty source filter list.
279 */
280static __inline__ void
281im6f_init(struct in6_mfilter *imf, const int st0, const int st1)
282{
283	memset(imf, 0, sizeof(struct in6_mfilter));
284	RB_INIT(&imf->im6f_sources);
285	imf->im6f_st[0] = st0;
286	imf->im6f_st[1] = st1;
287}
288
289/*
290 * Resize the ip6_moptions vector to the next power-of-two minus 1.
291 */
292static int
293im6o_grow(struct ip6_moptions *imo, size_t newmax)
294{
295	struct in6_multi	**nmships;
296	struct in6_multi	**omships;
297	struct in6_mfilter	 *nmfilters;
298	struct in6_mfilter	 *omfilters;
299	size_t			  idx;
300	size_t			  oldmax;
301
302	IM6O_LOCK_ASSERT_HELD(imo);
303
304	nmships = NULL;
305	nmfilters = NULL;
306	omships = imo->im6o_membership;
307	omfilters = imo->im6o_mfilters;
308	oldmax = imo->im6o_max_memberships;
309	if (newmax == 0)
310		newmax = ((oldmax + 1) * 2) - 1;
311
312	if (newmax > IPV6_MAX_MEMBERSHIPS)
313		return (ETOOMANYREFS);
314
315	if ((nmships = (struct in6_multi **)_REALLOC(omships,
316	    sizeof (struct in6_multi *) * newmax, M_IP6MOPTS,
317	    M_WAITOK | M_ZERO)) == NULL)
318		return (ENOMEM);
319
320	imo->im6o_membership = nmships;
321
322	if ((nmfilters = (struct in6_mfilter *)_REALLOC(omfilters,
323	    sizeof (struct in6_mfilter) * newmax, M_IN6MFILTER,
324	    M_WAITOK | M_ZERO)) == NULL)
325		return (ENOMEM);
326
327	imo->im6o_mfilters = nmfilters;
328
329	/* Initialize newly allocated source filter heads. */
330	for (idx = oldmax; idx < newmax; idx++)
331		im6f_init(&nmfilters[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
332
333	imo->im6o_max_memberships = newmax;
334
335	return (0);
336}
337
338/*
339 * Find an IPv6 multicast group entry for this ip6_moptions instance
340 * which matches the specified group, and optionally an interface.
341 * Return its index into the array, or -1 if not found.
342 */
343static size_t
344im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp,
345    const struct sockaddr *group)
346{
347	const struct sockaddr_in6 *gsin6;
348	struct in6_multi *pinm;
349	int		  idx;
350	int		  nmships;
351
352	IM6O_LOCK_ASSERT_HELD(IM6O_CAST_TO_NONCONST(imo));
353
354	gsin6 = (struct sockaddr_in6 *)(uintptr_t)(size_t)group;
355
356	/* The im6o_membership array may be lazy allocated. */
357	if (imo->im6o_membership == NULL || imo->im6o_num_memberships == 0)
358		return (-1);
359
360	nmships = imo->im6o_num_memberships;
361	for (idx = 0; idx < nmships; idx++) {
362		pinm = imo->im6o_membership[idx];
363		if (pinm == NULL)
364			continue;
365		IN6M_LOCK(pinm);
366		if ((ifp == NULL || (pinm->in6m_ifp == ifp)) &&
367		    IN6_ARE_ADDR_EQUAL(&pinm->in6m_addr,
368		    &gsin6->sin6_addr)) {
369			IN6M_UNLOCK(pinm);
370			break;
371		}
372		IN6M_UNLOCK(pinm);
373	}
374	if (idx >= nmships)
375		idx = -1;
376
377	return (idx);
378}
379
380/*
381 * Find an IPv6 multicast source entry for this imo which matches
382 * the given group index for this socket, and source address.
383 *
384 * XXX TODO: The scope ID, if present in src, is stripped before
385 * any comparison. We SHOULD enforce scope/zone checks where the source
386 * filter entry has a link scope.
387 *
388 * NOTE: This does not check if the entry is in-mode, merely if
389 * it exists, which may not be the desired behaviour.
390 */
391static struct in6_msource *
392im6o_match_source(const struct ip6_moptions *imo, const size_t gidx,
393    const struct sockaddr *src)
394{
395	struct ip6_msource	 find;
396	struct in6_mfilter	*imf;
397	struct ip6_msource	*ims;
398	const sockunion_t	*psa;
399
400	IM6O_LOCK_ASSERT_HELD(IM6O_CAST_TO_NONCONST(imo));
401
402	VERIFY(src->sa_family == AF_INET6);
403	VERIFY(gidx != (size_t)-1 && gidx < imo->im6o_num_memberships);
404
405	/* The im6o_mfilters array may be lazy allocated. */
406	if (imo->im6o_mfilters == NULL)
407		return (NULL);
408	imf = &imo->im6o_mfilters[gidx];
409
410	psa = (sockunion_t *)(uintptr_t)(size_t)src;
411	find.im6s_addr = psa->sin6.sin6_addr;
412	in6_clearscope(&find.im6s_addr);		/* XXX */
413	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
414
415	return ((struct in6_msource *)ims);
416}
417
418/*
419 * Perform filtering for multicast datagrams on a socket by group and source.
420 *
421 * Returns 0 if a datagram should be allowed through, or various error codes
422 * if the socket was not a member of the group, or the source was muted, etc.
423 */
424int
425im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp,
426    const struct sockaddr *group, const struct sockaddr *src)
427{
428	size_t gidx;
429	struct in6_msource *ims;
430	int mode;
431
432	IM6O_LOCK_ASSERT_HELD(IM6O_CAST_TO_NONCONST(imo));
433	VERIFY(ifp != NULL);
434
435	gidx = im6o_match_group(imo, ifp, group);
436	if (gidx == (size_t)-1)
437		return (MCAST_NOTGMEMBER);
438
439	/*
440	 * Check if the source was included in an (S,G) join.
441	 * Allow reception on exclusive memberships by default,
442	 * reject reception on inclusive memberships by default.
443	 * Exclude source only if an in-mode exclude filter exists.
444	 * Include source only if an in-mode include filter exists.
445	 * NOTE: We are comparing group state here at MLD t1 (now)
446	 * with socket-layer t0 (since last downcall).
447	 */
448	mode = imo->im6o_mfilters[gidx].im6f_st[1];
449	ims = im6o_match_source(imo, gidx, src);
450
451	if ((ims == NULL && mode == MCAST_INCLUDE) ||
452	    (ims != NULL && ims->im6sl_st[0] != mode))
453		return (MCAST_NOTSMEMBER);
454
455	return (MCAST_PASS);
456}
457
458/*
459 * Find and return a reference to an in6_multi record for (ifp, group),
460 * and bump its reference count.
461 * If one does not exist, try to allocate it, and update link-layer multicast
462 * filters on ifp to listen for group.
463 * Assumes the IN6_MULTI lock is held across the call.
464 * Return 0 if successful, otherwise return an appropriate error code.
465 */
466static int
467in6_mc_get(struct ifnet *ifp, const struct in6_addr *group,
468    struct in6_multi **pinm)
469{
470	struct sockaddr_in6	 gsin6;
471	struct ifmultiaddr	*ifma;
472	struct in6_multi	*inm;
473	int			 error;
474
475	*pinm = NULL;
476
477	in6_multihead_lock_shared();
478	IN6_LOOKUP_MULTI(group, ifp, inm);
479	if (inm != NULL) {
480		IN6M_LOCK(inm);
481		VERIFY(inm->in6m_reqcnt >= 1);
482		inm->in6m_reqcnt++;
483		VERIFY(inm->in6m_reqcnt != 0);
484		*pinm = inm;
485		IN6M_UNLOCK(inm);
486		in6_multihead_lock_done();
487		/*
488		 * We already joined this group; return the in6m
489		 * with a refcount held (via lookup) for caller.
490		 */
491		return (0);
492	}
493	in6_multihead_lock_done();
494
495	memset(&gsin6, 0, sizeof(gsin6));
496	gsin6.sin6_family = AF_INET6;
497	gsin6.sin6_len = sizeof(struct sockaddr_in6);
498	gsin6.sin6_addr = *group;
499
500	/*
501	 * Check if a link-layer group is already associated
502	 * with this network-layer group on the given ifnet.
503	 */
504	error = if_addmulti(ifp, (struct sockaddr *)&gsin6, &ifma);
505	if (error != 0)
506		return (error);
507
508	/*
509	 * See comments in in6m_remref() for access to ifma_protospec.
510	 */
511	in6_multihead_lock_exclusive();
512	IFMA_LOCK(ifma);
513	if ((inm = ifma->ifma_protospec) != NULL) {
514		VERIFY(ifma->ifma_addr != NULL);
515		VERIFY(ifma->ifma_addr->sa_family == AF_INET6);
516		IN6M_ADDREF(inm);	/* for caller */
517		IFMA_UNLOCK(ifma);
518		IN6M_LOCK(inm);
519		VERIFY(inm->in6m_ifma == ifma);
520		VERIFY(inm->in6m_ifp == ifp);
521		VERIFY(IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, group));
522		if (inm->in6m_debug & IFD_ATTACHED) {
523			VERIFY(inm->in6m_reqcnt >= 1);
524			inm->in6m_reqcnt++;
525			VERIFY(inm->in6m_reqcnt != 0);
526			*pinm = inm;
527			IN6M_UNLOCK(inm);
528			in6_multihead_lock_done();
529			IFMA_REMREF(ifma);
530			/*
531			 * We lost the race with another thread doing
532			 * in6_mc_get(); since this group has already
533			 * been joined; return the inm with a refcount
534			 * held for caller.
535			 */
536			return (0);
537		}
538		/*
539		 * We lost the race with another thread doing in6_delmulti();
540		 * the inm referring to the ifma has been detached, thus we
541		 * reattach it back to the in6_multihead list, and return the
542		 * inm with a refcount held for the caller.
543		 */
544		in6_multi_attach(inm);
545		VERIFY((inm->in6m_debug &
546		    (IFD_ATTACHED | IFD_TRASHED)) == IFD_ATTACHED);
547		*pinm = inm;
548		IN6M_UNLOCK(inm);
549		in6_multihead_lock_done();
550		IFMA_REMREF(ifma);
551		return (0);
552	}
553	IFMA_UNLOCK(ifma);
554
555	/*
556	 * A new in6_multi record is needed; allocate and initialize it.
557	 * We DO NOT perform an MLD join as the in6_ layer may need to
558	 * push an initial source list down to MLD to support SSM.
559	 *
560	 * The initial source filter state is INCLUDE, {} as per the RFC.
561	 * Pending state-changes per group are subject to a bounds check.
562	 */
563	inm = in6_multi_alloc(M_WAITOK);
564	if (inm == NULL) {
565		in6_multihead_lock_done();
566		IFMA_REMREF(ifma);
567		return (ENOMEM);
568	}
569	IN6M_LOCK(inm);
570	inm->in6m_addr = *group;
571	inm->in6m_ifp = ifp;
572	inm->in6m_mli = MLD_IFINFO(ifp);
573	VERIFY(inm->in6m_mli != NULL);
574	MLI_ADDREF(inm->in6m_mli);
575	inm->in6m_ifma = ifma;		/* keep refcount from if_addmulti() */
576	inm->in6m_state = MLD_NOT_MEMBER;
577	/*
578	 * Pending state-changes per group are subject to a bounds check.
579	 */
580	inm->in6m_scq.ifq_maxlen = MLD_MAX_STATE_CHANGES;
581	inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED;
582	inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
583	RB_INIT(&inm->in6m_srcs);
584	*pinm = inm;
585	in6_multi_attach(inm);
586	VERIFY((inm->in6m_debug &
587	    (IFD_ATTACHED | IFD_TRASHED)) == IFD_ATTACHED);
588	IN6M_ADDREF_LOCKED(inm);	/* for caller */
589	IN6M_UNLOCK(inm);
590
591	IFMA_LOCK(ifma);
592	VERIFY(ifma->ifma_protospec == NULL);
593	ifma->ifma_protospec = inm;
594	IFMA_UNLOCK(ifma);
595	in6_multihead_lock_done();
596
597	return (0);
598}
599
600/*
601 * Clear recorded source entries for a group.
602 * Used by the MLD code. Caller must hold the IN6_MULTI lock.
603 * FIXME: Should reap.
604 */
605void
606in6m_clear_recorded(struct in6_multi *inm)
607{
608	struct ip6_msource	*ims;
609
610	IN6M_LOCK_ASSERT_HELD(inm);
611
612	RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
613		if (ims->im6s_stp) {
614			ims->im6s_stp = 0;
615			--inm->in6m_st[1].iss_rec;
616		}
617	}
618	VERIFY(inm->in6m_st[1].iss_rec == 0);
619}
620
621/*
622 * Record a source as pending for a Source-Group MLDv2 query.
623 * This lives here as it modifies the shared tree.
624 *
625 * inm is the group descriptor.
626 * naddr is the address of the source to record in network-byte order.
627 *
628 * If the net.inet6.mld.sgalloc sysctl is non-zero, we will
629 * lazy-allocate a source node in response to an SG query.
630 * Otherwise, no allocation is performed. This saves some memory
631 * with the trade-off that the source will not be reported to the
632 * router if joined in the window between the query response and
633 * the group actually being joined on the local host.
634 *
635 * VIMAGE: XXX: Currently the mld_sgalloc feature has been removed.
636 * This turns off the allocation of a recorded source entry if
637 * the group has not been joined.
638 *
639 * Return 0 if the source didn't exist or was already marked as recorded.
640 * Return 1 if the source was marked as recorded by this function.
641 * Return <0 if any error occured (negated errno code).
642 */
643int
644in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr)
645{
646	struct ip6_msource	 find;
647	struct ip6_msource	*ims, *nims;
648
649	IN6M_LOCK_ASSERT_HELD(inm);
650
651	find.im6s_addr = *addr;
652	ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find);
653	if (ims && ims->im6s_stp)
654		return (0);
655	if (ims == NULL) {
656		if (inm->in6m_nsrc == in6_mcast_maxgrpsrc)
657			return (-ENOSPC);
658		nims = ip6ms_alloc(M_WAITOK);
659		if (nims == NULL)
660			return (-ENOMEM);
661		nims->im6s_addr = find.im6s_addr;
662		RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims);
663		++inm->in6m_nsrc;
664		ims = nims;
665	}
666
667	/*
668	 * Mark the source as recorded and update the recorded
669	 * source count.
670	 */
671	++ims->im6s_stp;
672	++inm->in6m_st[1].iss_rec;
673
674	return (1);
675}
676
677/*
678 * Return a pointer to an in6_msource owned by an in6_mfilter,
679 * given its source address.
680 * Lazy-allocate if needed. If this is a new entry its filter state is
681 * undefined at t0.
682 *
683 * imf is the filter set being modified.
684 * addr is the source address.
685 *
686 * Caller is expected to be holding im6o_lock.
687 */
688static int
689im6f_get_source(struct in6_mfilter *imf, const struct sockaddr_in6 *psin,
690    struct in6_msource **plims)
691{
692	struct ip6_msource	 find;
693	struct ip6_msource	*ims;
694	struct in6_msource	*lims;
695	int			 error;
696
697	error = 0;
698	ims = NULL;
699	lims = NULL;
700
701	find.im6s_addr = psin->sin6_addr;
702	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
703	lims = (struct in6_msource *)ims;
704	if (lims == NULL) {
705		if (imf->im6f_nsrc == in6_mcast_maxsocksrc)
706			return (ENOSPC);
707		lims = in6ms_alloc(M_WAITOK);
708		if (lims == NULL)
709			return (ENOMEM);
710		lims->im6s_addr = find.im6s_addr;
711		lims->im6sl_st[0] = MCAST_UNDEFINED;
712		RB_INSERT(ip6_msource_tree, &imf->im6f_sources,
713		    (struct ip6_msource *)lims);
714		++imf->im6f_nsrc;
715	}
716
717	*plims = lims;
718
719	return (error);
720}
721
722/*
723 * Graft a source entry into an existing socket-layer filter set,
724 * maintaining any required invariants and checking allocations.
725 *
726 * The source is marked as being in the new filter mode at t1.
727 *
728 * Return the pointer to the new node, otherwise return NULL.
729 *
730 * Caller is expected to be holding im6o_lock.
731 */
732static struct in6_msource *
733im6f_graft(struct in6_mfilter *imf, const uint8_t st1,
734    const struct sockaddr_in6 *psin)
735{
736	struct in6_msource	*lims;
737
738	lims = in6ms_alloc(M_WAITOK);
739	if (lims == NULL)
740		return (NULL);
741	lims->im6s_addr = psin->sin6_addr;
742	lims->im6sl_st[0] = MCAST_UNDEFINED;
743	lims->im6sl_st[1] = st1;
744	RB_INSERT(ip6_msource_tree, &imf->im6f_sources,
745	    (struct ip6_msource *)lims);
746	++imf->im6f_nsrc;
747
748	return (lims);
749}
750
751/*
752 * Prune a source entry from an existing socket-layer filter set,
753 * maintaining any required invariants and checking allocations.
754 *
755 * The source is marked as being left at t1, it is not freed.
756 *
757 * Return 0 if no error occurred, otherwise return an errno value.
758 *
759 * Caller is expected to be holding im6o_lock.
760 */
761static int
762im6f_prune(struct in6_mfilter *imf, const struct sockaddr_in6 *psin)
763{
764	struct ip6_msource	 find;
765	struct ip6_msource	*ims;
766	struct in6_msource	*lims;
767
768	find.im6s_addr = psin->sin6_addr;
769	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
770	if (ims == NULL)
771		return (ENOENT);
772	lims = (struct in6_msource *)ims;
773	lims->im6sl_st[1] = MCAST_UNDEFINED;
774	return (0);
775}
776
777/*
778 * Revert socket-layer filter set deltas at t1 to t0 state.
779 *
780 * Caller is expected to be holding im6o_lock.
781 */
782static void
783im6f_rollback(struct in6_mfilter *imf)
784{
785	struct ip6_msource	*ims, *tims;
786	struct in6_msource	*lims;
787
788	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
789		lims = (struct in6_msource *)ims;
790		if (lims->im6sl_st[0] == lims->im6sl_st[1]) {
791			/* no change at t1 */
792			continue;
793		} else if (lims->im6sl_st[0] != MCAST_UNDEFINED) {
794			/* revert change to existing source at t1 */
795			lims->im6sl_st[1] = lims->im6sl_st[0];
796		} else {
797			/* revert source added t1 */
798			MLD_PRINTF(("%s: free in6ms %p\n", __func__, lims));
799			RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
800			in6ms_free(lims);
801			imf->im6f_nsrc--;
802		}
803	}
804	imf->im6f_st[1] = imf->im6f_st[0];
805}
806
807/*
808 * Mark socket-layer filter set as INCLUDE {} at t1.
809 *
810 * Caller is expected to be holding im6o_lock.
811 */
812void
813im6f_leave(struct in6_mfilter *imf)
814{
815	struct ip6_msource	*ims;
816	struct in6_msource	*lims;
817
818	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
819		lims = (struct in6_msource *)ims;
820		lims->im6sl_st[1] = MCAST_UNDEFINED;
821	}
822	imf->im6f_st[1] = MCAST_INCLUDE;
823}
824
825/*
826 * Mark socket-layer filter set deltas as committed.
827 *
828 * Caller is expected to be holding im6o_lock.
829 */
830static void
831im6f_commit(struct in6_mfilter *imf)
832{
833	struct ip6_msource	*ims;
834	struct in6_msource	*lims;
835
836	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
837		lims = (struct in6_msource *)ims;
838		lims->im6sl_st[0] = lims->im6sl_st[1];
839	}
840	imf->im6f_st[0] = imf->im6f_st[1];
841}
842
843/*
844 * Reap unreferenced sources from socket-layer filter set.
845 *
846 * Caller is expected to be holding im6o_lock.
847 */
848static void
849im6f_reap(struct in6_mfilter *imf)
850{
851	struct ip6_msource	*ims, *tims;
852	struct in6_msource	*lims;
853
854	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
855		lims = (struct in6_msource *)ims;
856		if ((lims->im6sl_st[0] == MCAST_UNDEFINED) &&
857		    (lims->im6sl_st[1] == MCAST_UNDEFINED)) {
858			MLD_PRINTF(("%s: free in6ms %p\n", __func__, lims));
859			RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
860			in6ms_free(lims);
861			imf->im6f_nsrc--;
862		}
863	}
864}
865
866/*
867 * Purge socket-layer filter set.
868 *
869 * Caller is expected to be holding im6o_lock.
870 */
871void
872im6f_purge(struct in6_mfilter *imf)
873{
874	struct ip6_msource	*ims, *tims;
875	struct in6_msource	*lims;
876
877	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
878		lims = (struct in6_msource *)ims;
879		MLD_PRINTF(("%s: free in6ms %p\n", __func__, lims));
880		RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
881		in6ms_free(lims);
882		imf->im6f_nsrc--;
883	}
884	imf->im6f_st[0] = imf->im6f_st[1] = MCAST_UNDEFINED;
885	VERIFY(RB_EMPTY(&imf->im6f_sources));
886}
887
888/*
889 * Look up a source filter entry for a multicast group.
890 *
891 * inm is the group descriptor to work with.
892 * addr is the IPv6 address to look up.
893 * noalloc may be non-zero to suppress allocation of sources.
894 * *pims will be set to the address of the retrieved or allocated source.
895 *
896 * Return 0 if successful, otherwise return a non-zero error code.
897 */
898static int
899in6m_get_source(struct in6_multi *inm, const struct in6_addr *addr,
900    const int noalloc, struct ip6_msource **pims)
901{
902	struct ip6_msource	 find;
903	struct ip6_msource	*ims, *nims;
904
905	IN6M_LOCK_ASSERT_HELD(inm);
906
907	find.im6s_addr = *addr;
908	ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find);
909	if (ims == NULL && !noalloc) {
910		if (inm->in6m_nsrc == in6_mcast_maxgrpsrc)
911			return (ENOSPC);
912		nims = ip6ms_alloc(M_WAITOK);
913		if (nims == NULL)
914			return (ENOMEM);
915		nims->im6s_addr = *addr;
916		RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims);
917		++inm->in6m_nsrc;
918		ims = nims;
919		MLD_PRINTF(("%s: allocated %s as %p\n", __func__,
920		    ip6_sprintf(addr), ims));
921	}
922
923	*pims = ims;
924	return (0);
925}
926
927/*
928 * Helper function to derive the filter mode on a source entry
929 * from its internal counters. Predicates are:
930 *  A source is only excluded if all listeners exclude it.
931 *  A source is only included if no listeners exclude it,
932 *  and at least one listener includes it.
933 * May be used by ifmcstat(8).
934 */
935uint8_t
936im6s_get_mode(const struct in6_multi *inm, const struct ip6_msource *ims,
937    uint8_t t)
938{
939	IN6M_LOCK_ASSERT_HELD(IN6M_CAST_TO_NONCONST(inm));
940
941	t = !!t;
942	if (inm->in6m_st[t].iss_ex > 0 &&
943	    inm->in6m_st[t].iss_ex == ims->im6s_st[t].ex)
944		return (MCAST_EXCLUDE);
945	else if (ims->im6s_st[t].in > 0 && ims->im6s_st[t].ex == 0)
946		return (MCAST_INCLUDE);
947	return (MCAST_UNDEFINED);
948}
949
950/*
951 * Merge socket-layer source into MLD-layer source.
952 * If rollback is non-zero, perform the inverse of the merge.
953 */
954static void
955im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims,
956    const int rollback)
957{
958	int n = rollback ? -1 : 1;
959
960	if (lims->im6sl_st[0] == MCAST_EXCLUDE) {
961		MLD_PRINTF(("%s: t1 ex -= %d on %s\n", __func__, n,
962		    ip6_sprintf(&lims->im6s_addr)));
963		ims->im6s_st[1].ex -= n;
964	} else if (lims->im6sl_st[0] == MCAST_INCLUDE) {
965		MLD_PRINTF(("%s: t1 in -= %d on %s\n", __func__, n,
966		    ip6_sprintf(&lims->im6s_addr)));
967		ims->im6s_st[1].in -= n;
968	}
969
970	if (lims->im6sl_st[1] == MCAST_EXCLUDE) {
971		MLD_PRINTF(("%s: t1 ex += %d on %s\n", __func__, n,
972		    ip6_sprintf(&lims->im6s_addr)));
973		ims->im6s_st[1].ex += n;
974	} else if (lims->im6sl_st[1] == MCAST_INCLUDE) {
975		MLD_PRINTF(("%s: t1 in += %d on %s\n", __func__, n,
976		    ip6_sprintf(&lims->im6s_addr)));
977		ims->im6s_st[1].in += n;
978	}
979}
980
981/*
982 * Atomically update the global in6_multi state, when a membership's
983 * filter list is being updated in any way.
984 *
985 * imf is the per-inpcb-membership group filter pointer.
986 * A fake imf may be passed for in-kernel consumers.
987 *
988 * XXX This is a candidate for a set-symmetric-difference style loop
989 * which would eliminate the repeated lookup from root of ims nodes,
990 * as they share the same key space.
991 *
992 * If any error occurred this function will back out of refcounts
993 * and return a non-zero value.
994 */
995static int
996in6m_merge(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
997{
998	struct ip6_msource	*ims, *nims;
999	struct in6_msource	*lims;
1000	int			 schanged, error;
1001	int			 nsrc0, nsrc1;
1002
1003	IN6M_LOCK_ASSERT_HELD(inm);
1004
1005	schanged = 0;
1006	error = 0;
1007	nsrc1 = nsrc0 = 0;
1008
1009	/*
1010	 * Update the source filters first, as this may fail.
1011	 * Maintain count of in-mode filters at t0, t1. These are
1012	 * used to work out if we transition into ASM mode or not.
1013	 * Maintain a count of source filters whose state was
1014	 * actually modified by this operation.
1015	 */
1016	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
1017		lims = (struct in6_msource *)ims;
1018		if (lims->im6sl_st[0] == imf->im6f_st[0]) nsrc0++;
1019		if (lims->im6sl_st[1] == imf->im6f_st[1]) nsrc1++;
1020		if (lims->im6sl_st[0] == lims->im6sl_st[1]) continue;
1021		error = in6m_get_source(inm, &lims->im6s_addr, 0, &nims);
1022		++schanged;
1023		if (error)
1024			break;
1025		im6s_merge(nims, lims, 0);
1026	}
1027	if (error) {
1028		struct ip6_msource *bims;
1029
1030		RB_FOREACH_REVERSE_FROM(ims, ip6_msource_tree, nims) {
1031			lims = (struct in6_msource *)ims;
1032			if (lims->im6sl_st[0] == lims->im6sl_st[1])
1033				continue;
1034			(void) in6m_get_source(inm, &lims->im6s_addr, 1, &bims);
1035			if (bims == NULL)
1036				continue;
1037			im6s_merge(bims, lims, 1);
1038		}
1039		goto out_reap;
1040	}
1041
1042	MLD_PRINTF(("%s: imf filters in-mode: %d at t0, %d at t1\n",
1043	    __func__, nsrc0, nsrc1));
1044
1045	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
1046	if (imf->im6f_st[0] == imf->im6f_st[1] &&
1047	    imf->im6f_st[1] == MCAST_INCLUDE) {
1048		if (nsrc1 == 0) {
1049			MLD_PRINTF(("%s: --in on inm at t1\n", __func__));
1050			--inm->in6m_st[1].iss_in;
1051		}
1052	}
1053
1054	/* Handle filter mode transition on socket. */
1055	if (imf->im6f_st[0] != imf->im6f_st[1]) {
1056		MLD_PRINTF(("%s: imf transition %d to %d\n",
1057		    __func__, imf->im6f_st[0], imf->im6f_st[1]));
1058
1059		if (imf->im6f_st[0] == MCAST_EXCLUDE) {
1060			MLD_PRINTF(("%s: --ex on inm at t1\n", __func__));
1061			--inm->in6m_st[1].iss_ex;
1062		} else if (imf->im6f_st[0] == MCAST_INCLUDE) {
1063			MLD_PRINTF(("%s: --in on inm at t1\n", __func__));
1064			--inm->in6m_st[1].iss_in;
1065		}
1066
1067		if (imf->im6f_st[1] == MCAST_EXCLUDE) {
1068			MLD_PRINTF(("%s: ex++ on inm at t1\n", __func__));
1069			inm->in6m_st[1].iss_ex++;
1070		} else if (imf->im6f_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
1071			MLD_PRINTF(("%s: in++ on inm at t1\n", __func__));
1072			inm->in6m_st[1].iss_in++;
1073		}
1074	}
1075
1076	/*
1077	 * Track inm filter state in terms of listener counts.
1078	 * If there are any exclusive listeners, stack-wide
1079	 * membership is exclusive.
1080	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
1081	 * If no listeners remain, state is undefined at t1,
1082	 * and the MLD lifecycle for this group should finish.
1083	 */
1084	if (inm->in6m_st[1].iss_ex > 0) {
1085		MLD_PRINTF(("%s: transition to EX\n", __func__));
1086		inm->in6m_st[1].iss_fmode = MCAST_EXCLUDE;
1087	} else if (inm->in6m_st[1].iss_in > 0) {
1088		MLD_PRINTF(("%s: transition to IN\n", __func__));
1089		inm->in6m_st[1].iss_fmode = MCAST_INCLUDE;
1090	} else {
1091		MLD_PRINTF(("%s: transition to UNDEF\n", __func__));
1092		inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
1093	}
1094
1095	/* Decrement ASM listener count on transition out of ASM mode. */
1096	if (imf->im6f_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1097		if ((imf->im6f_st[1] != MCAST_EXCLUDE) ||
1098		    (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
1099			MLD_PRINTF(("%s: --asm on inm at t1\n", __func__));
1100			--inm->in6m_st[1].iss_asm;
1101		}
1102	}
1103
1104	/* Increment ASM listener count on transition to ASM mode. */
1105	if (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1106		MLD_PRINTF(("%s: asm++ on inm at t1\n", __func__));
1107		inm->in6m_st[1].iss_asm++;
1108	}
1109
1110	MLD_PRINTF(("%s: merged imf %p to inm %p\n", __func__, imf, inm));
1111	in6m_print(inm);
1112
1113out_reap:
1114	if (schanged > 0) {
1115		MLD_PRINTF(("%s: sources changed; reaping\n", __func__));
1116		in6m_reap(inm);
1117	}
1118	return (error);
1119}
1120
1121/*
1122 * Mark an in6_multi's filter set deltas as committed.
1123 * Called by MLD after a state change has been enqueued.
1124 */
1125void
1126in6m_commit(struct in6_multi *inm)
1127{
1128	struct ip6_msource	*ims;
1129
1130	IN6M_LOCK_ASSERT_HELD(inm);
1131
1132	MLD_PRINTF(("%s: commit inm %p\n", __func__, inm));
1133	MLD_PRINTF(("%s: pre commit:\n", __func__));
1134	in6m_print(inm);
1135
1136	RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
1137		ims->im6s_st[0] = ims->im6s_st[1];
1138	}
1139	inm->in6m_st[0] = inm->in6m_st[1];
1140}
1141
1142/*
1143 * Reap unreferenced nodes from an in6_multi's filter set.
1144 */
1145static void
1146in6m_reap(struct in6_multi *inm)
1147{
1148	struct ip6_msource	*ims, *tims;
1149
1150	IN6M_LOCK_ASSERT_HELD(inm);
1151
1152	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) {
1153		if (ims->im6s_st[0].ex > 0 || ims->im6s_st[0].in > 0 ||
1154		    ims->im6s_st[1].ex > 0 || ims->im6s_st[1].in > 0 ||
1155		    ims->im6s_stp != 0)
1156			continue;
1157		MLD_PRINTF(("%s: free ims %p\n", __func__, ims));
1158		RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims);
1159		ip6ms_free(ims);
1160		inm->in6m_nsrc--;
1161	}
1162}
1163
1164/*
1165 * Purge all source nodes from an in6_multi's filter set.
1166 */
1167void
1168in6m_purge(struct in6_multi *inm)
1169{
1170	struct ip6_msource	*ims, *tims;
1171
1172	IN6M_LOCK_ASSERT_HELD(inm);
1173
1174	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) {
1175		MLD_PRINTF(("%s: free ims %p\n", __func__, ims));
1176		RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims);
1177		ip6ms_free(ims);
1178		inm->in6m_nsrc--;
1179	}
1180}
1181
1182/*
1183 * Join a multicast address w/o sources.
1184 * KAME compatibility entry point.
1185 *
1186 */
1187struct in6_multi_mship *
1188in6_joingroup(struct ifnet *ifp, struct in6_addr *mcaddr,
1189    int *errorp, int delay)
1190{
1191	struct in6_multi_mship *imm;
1192	int error;
1193
1194	*errorp = 0;
1195
1196	imm = in6_multi_mship_alloc(M_WAITOK);
1197	if (imm == NULL) {
1198		*errorp = ENOBUFS;
1199		return (NULL);
1200	}
1201
1202	delay = (delay * PR_SLOWHZ) / hz;
1203
1204	error = in6_mc_join(ifp, mcaddr, NULL, &imm->i6mm_maddr, delay);
1205	if (error) {
1206		*errorp = error;
1207		in6_multi_mship_free(imm);
1208		return (NULL);
1209	}
1210
1211	return (imm);
1212}
1213
1214/*
1215 * Leave a multicast address w/o sources.
1216 * KAME compatibility entry point.
1217 */
1218int
1219in6_leavegroup(struct in6_multi_mship *imm)
1220{
1221	if (imm->i6mm_maddr != NULL) {
1222		in6_mc_leave(imm->i6mm_maddr, NULL);
1223		IN6M_REMREF(imm->i6mm_maddr);
1224		imm->i6mm_maddr = NULL;
1225	}
1226	in6_multi_mship_free(imm);
1227	return 0;
1228}
1229
1230/*
1231 * Join a multicast group; real entry point.
1232 *
1233 * Only preserves atomicity at inm level.
1234 * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1235 *
1236 * If the MLD downcall fails, the group is not joined, and an error
1237 * code is returned.
1238 */
1239int
1240in6_mc_join(struct ifnet *ifp, const struct in6_addr *mcaddr,
1241    /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm,
1242    const int delay)
1243{
1244	struct in6_mfilter	 timf;
1245	struct in6_multi	*inm = NULL;
1246	int			 error = 0;
1247
1248	/*
1249	 * Sanity: Check scope zone ID was set for ifp, if and
1250	 * only if group is scoped to an interface.
1251	 */
1252	VERIFY(IN6_IS_ADDR_MULTICAST(mcaddr));
1253	if (IN6_IS_ADDR_MC_LINKLOCAL(mcaddr) ||
1254	    IN6_IS_ADDR_MC_INTFACELOCAL(mcaddr)) {
1255		VERIFY(mcaddr->s6_addr16[1] != 0);
1256	}
1257
1258	MLD_PRINTF(("%s: join %s on %p(%s%d))\n", __func__,
1259	    ip6_sprintf(mcaddr), ifp, ifp->if_name, ifp->if_unit));
1260
1261	*pinm = NULL;
1262
1263	/*
1264	 * If no imf was specified (i.e. kernel consumer),
1265	 * fake one up and assume it is an ASM join.
1266	 */
1267	if (imf == NULL) {
1268		im6f_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1269		imf = &timf;
1270	}
1271
1272	error = in6_mc_get(ifp, mcaddr, &inm);
1273	if (error) {
1274		MLD_PRINTF(("%s: in6_mc_get() failure\n", __func__));
1275		return (error);
1276	}
1277
1278	MLD_PRINTF(("%s: merge inm state\n", __func__));
1279
1280	IN6M_LOCK(inm);
1281	error = in6m_merge(inm, imf);
1282	if (error) {
1283		MLD_PRINTF(("%s: failed to merge inm state\n", __func__));
1284		goto out_in6m_release;
1285	}
1286
1287	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
1288	error = mld_change_state(inm, delay);
1289	if (error) {
1290		MLD_PRINTF(("%s: failed to update source\n", __func__));
1291		goto out_in6m_release;
1292	}
1293
1294out_in6m_release:
1295	if (error) {
1296		MLD_PRINTF(("%s: dropping ref on %p\n", __func__, inm));
1297		IN6M_UNLOCK(inm);
1298		IN6M_REMREF(inm);
1299	} else {
1300		IN6M_UNLOCK(inm);
1301		*pinm = inm;	/* keep refcount from in6_mc_get() */
1302	}
1303
1304	return (error);
1305}
1306
1307/*
1308 * Leave a multicast group; real entry point.
1309 * All source filters will be expunged.
1310 *
1311 * Only preserves atomicity at inm level.
1312 *
1313 * Holding the write lock for the INP which contains imf
1314 * is highly advisable. We can't assert for it as imf does not
1315 * contain a back-pointer to the owning inp.
1316 *
1317 * Note: This is not the same as in6m_release(*) as this function also
1318 * makes a state change downcall into MLD.
1319 */
1320int
1321in6_mc_leave(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
1322{
1323	struct in6_mfilter	 timf;
1324	int			 error, lastref;
1325
1326	error = 0;
1327
1328	IN6M_LOCK_ASSERT_NOTHELD(inm);
1329
1330	in6_multihead_lock_exclusive();
1331	IN6M_LOCK(inm);
1332
1333	MLD_PRINTF(("%s: leave inm %p, %s/%s%d, imf %p\n", __func__,
1334	    inm, ip6_sprintf(&inm->in6m_addr),
1335	    (in6m_is_ifp_detached(inm) ? "null" : inm->in6m_ifp->if_name),
1336	    inm->in6m_ifp->if_unit, imf));
1337
1338	/*
1339	 * If no imf was specified (i.e. kernel consumer),
1340	 * fake one up and assume it is an ASM join.
1341	 */
1342	if (imf == NULL) {
1343		im6f_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1344		imf = &timf;
1345	}
1346
1347	/*
1348	 * Begin state merge transaction at MLD layer.
1349	 *
1350	 * As this particular invocation should not cause any memory
1351	 * to be allocated, and there is no opportunity to roll back
1352	 * the transaction, it MUST NOT fail.
1353	 */
1354	MLD_PRINTF(("%s: merge inm state\n", __func__));
1355
1356	error = in6m_merge(inm, imf);
1357	KASSERT(error == 0, ("%s: failed to merge inm state\n", __func__));
1358
1359	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
1360	error = mld_change_state(inm, 0);
1361#if MLD_DEBUG
1362	if (error)
1363		MLD_PRINTF(("%s: failed mld downcall\n", __func__));
1364#endif
1365	lastref = in6_multi_detach(inm);
1366	VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
1367	    inm->in6m_reqcnt == 0));
1368	IN6M_UNLOCK(inm);
1369	in6_multihead_lock_done();
1370
1371	if (lastref)
1372		IN6M_REMREF(inm);	/* for in6_multihead list */
1373
1374	return (error);
1375}
1376
1377/*
1378 * Block or unblock an ASM multicast source on an inpcb.
1379 * This implements the delta-based API described in RFC 3678.
1380 *
1381 * The delta-based API applies only to exclusive-mode memberships.
1382 * An MLD downcall will be performed.
1383 *
1384 * Return 0 if successful, otherwise return an appropriate error code.
1385 */
1386static int
1387in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1388{
1389	struct group_source_req		 gsr;
1390	sockunion_t			*gsa, *ssa;
1391	struct ifnet			*ifp;
1392	struct in6_mfilter		*imf;
1393	struct ip6_moptions		*imo;
1394	struct in6_msource		*ims;
1395	struct in6_multi		*inm;
1396	size_t				 idx;
1397	uint16_t			 fmode;
1398	int				 error, doblock;
1399
1400	ifp = NULL;
1401	error = 0;
1402	doblock = 0;
1403
1404	memset(&gsr, 0, sizeof(struct group_source_req));
1405	gsa = (sockunion_t *)&gsr.gsr_group;
1406	ssa = (sockunion_t *)&gsr.gsr_source;
1407
1408	switch (sopt->sopt_name) {
1409	case MCAST_BLOCK_SOURCE:
1410	case MCAST_UNBLOCK_SOURCE:
1411		error = sooptcopyin(sopt, &gsr,
1412		    sizeof(struct group_source_req),
1413		    sizeof(struct group_source_req));
1414		if (error)
1415			return (error);
1416
1417		if (gsa->sin6.sin6_family != AF_INET6 ||
1418		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
1419			return (EINVAL);
1420
1421		if (ssa->sin6.sin6_family != AF_INET6 ||
1422		    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
1423			return (EINVAL);
1424
1425		ifnet_head_lock_shared();
1426		if (gsr.gsr_interface == 0 ||
1427		    (u_int)if_index < gsr.gsr_interface) {
1428			ifnet_head_done();
1429			return (EADDRNOTAVAIL);
1430		}
1431
1432		ifp = ifindex2ifnet[gsr.gsr_interface];
1433		ifnet_head_done();
1434
1435		if (ifp == NULL)
1436			return (EADDRNOTAVAIL);
1437
1438		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1439			doblock = 1;
1440		break;
1441
1442	default:
1443		MLD_PRINTF(("%s: unknown sopt_name %d\n",
1444		    __func__, sopt->sopt_name));
1445		return (EOPNOTSUPP);
1446		break;
1447	}
1448
1449	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
1450		return (EINVAL);
1451
1452	(void) in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
1453
1454	/*
1455	 * Check if we are actually a member of this group.
1456	 */
1457	imo = in6p_findmoptions(inp);
1458	if (imo == NULL)
1459		return (ENOMEM);
1460
1461	IM6O_LOCK(imo);
1462	idx = im6o_match_group(imo, ifp, &gsa->sa);
1463	if (idx == (size_t)-1 || imo->im6o_mfilters == NULL) {
1464		error = EADDRNOTAVAIL;
1465		goto out_imo_locked;
1466	}
1467
1468	VERIFY(imo->im6o_mfilters != NULL);
1469	imf = &imo->im6o_mfilters[idx];
1470	inm = imo->im6o_membership[idx];
1471
1472	/*
1473	 * Attempting to use the delta-based API on an
1474	 * non exclusive-mode membership is an error.
1475	 */
1476	fmode = imf->im6f_st[0];
1477	if (fmode != MCAST_EXCLUDE) {
1478		error = EINVAL;
1479		goto out_imo_locked;
1480	}
1481
1482	/*
1483	 * Deal with error cases up-front:
1484	 *  Asked to block, but already blocked; or
1485	 *  Asked to unblock, but nothing to unblock.
1486	 * If adding a new block entry, allocate it.
1487	 */
1488	ims = im6o_match_source(imo, idx, &ssa->sa);
1489	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1490		MLD_PRINTF(("%s: source %s %spresent\n", __func__,
1491		    ip6_sprintf(&ssa->sin6.sin6_addr),
1492		    doblock ? "" : "not "));
1493		error = EADDRNOTAVAIL;
1494		goto out_imo_locked;
1495	}
1496
1497	/*
1498	 * Begin state merge transaction at socket layer.
1499	 */
1500	if (doblock) {
1501		MLD_PRINTF(("%s: %s source\n", __func__, "block"));
1502		ims = im6f_graft(imf, fmode, &ssa->sin6);
1503		if (ims == NULL)
1504			error = ENOMEM;
1505	} else {
1506		MLD_PRINTF(("%s: %s source\n", __func__, "allow"));
1507		error = im6f_prune(imf, &ssa->sin6);
1508	}
1509
1510	if (error) {
1511		MLD_PRINTF(("%s: merge imf state failed\n", __func__));
1512		goto out_im6f_rollback;
1513	}
1514
1515	/*
1516	 * Begin state merge transaction at MLD layer.
1517	 */
1518	IN6M_LOCK(inm);
1519	MLD_PRINTF(("%s: merge inm state\n", __func__));
1520	error = in6m_merge(inm, imf);
1521	if (error) {
1522		MLD_PRINTF(("%s: failed to merge inm state\n", __func__));
1523		IN6M_UNLOCK(inm);
1524		goto out_im6f_rollback;
1525	}
1526
1527	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
1528	error = mld_change_state(inm, 0);
1529	IN6M_UNLOCK(inm);
1530#if MLD_DEBUG
1531	if (error)
1532		MLD_PRINTF(("%s: failed mld downcall\n", __func__));
1533#endif
1534
1535out_im6f_rollback:
1536	if (error)
1537		im6f_rollback(imf);
1538	else
1539		im6f_commit(imf);
1540
1541	im6f_reap(imf);
1542
1543out_imo_locked:
1544	IM6O_UNLOCK(imo);
1545	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
1546	return (error);
1547}
1548
1549/*
1550 * Given an inpcb, return its multicast options structure pointer.  Accepts
1551 * an unlocked inpcb pointer, but will return it locked.  May sleep.
1552 *
1553 */
1554static struct ip6_moptions *
1555in6p_findmoptions(struct inpcb *inp)
1556{
1557	struct ip6_moptions	 *imo;
1558	struct in6_multi	**immp;
1559	struct in6_mfilter	 *imfp;
1560	size_t			  idx;
1561
1562	if ((imo = inp->in6p_moptions) != NULL) {
1563		IM6O_ADDREF(imo);	/* for caller */
1564		return (imo);
1565	}
1566
1567	imo = ip6_allocmoptions(M_WAITOK);
1568	if (imo == NULL)
1569		return (NULL);
1570
1571	immp = _MALLOC(sizeof (*immp) * IPV6_MIN_MEMBERSHIPS, M_IP6MOPTS,
1572	    M_WAITOK | M_ZERO);
1573	if (immp == NULL) {
1574		IM6O_REMREF(imo);
1575		return (NULL);
1576	}
1577
1578	imfp = _MALLOC(sizeof (struct in6_mfilter) * IPV6_MIN_MEMBERSHIPS,
1579	    M_IN6MFILTER, M_WAITOK | M_ZERO);
1580	if (imfp == NULL) {
1581		_FREE(immp, M_IP6MOPTS);
1582		IM6O_REMREF(imo);
1583		return (NULL);
1584	}
1585
1586	imo->im6o_multicast_ifp = NULL;
1587	imo->im6o_multicast_hlim = ip6_defmcasthlim;
1588	imo->im6o_multicast_loop = in6_mcast_loop;
1589	imo->im6o_num_memberships = 0;
1590	imo->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
1591	imo->im6o_membership = immp;
1592
1593	/* Initialize per-group source filters. */
1594	for (idx = 0; idx < IPV6_MIN_MEMBERSHIPS; idx++)
1595		im6f_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
1596
1597	imo->im6o_mfilters = imfp;
1598	inp->in6p_moptions = imo; /* keep reference from ip6_allocmoptions() */
1599	IM6O_ADDREF(imo);	/* for caller */
1600
1601	return (imo);
1602}
1603
1604/*
1605 * Atomically get source filters on a socket for an IPv6 multicast group.
1606 * Called with INP lock held; returns with lock released.
1607 */
1608static int
1609in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1610{
1611	struct __msfilterreq64	msfr, msfr64;
1612	struct __msfilterreq32	msfr32;
1613	sockunion_t		*gsa;
1614	struct ifnet		*ifp;
1615	struct ip6_moptions	*imo;
1616	struct in6_mfilter	*imf;
1617	struct ip6_msource	*ims;
1618	struct in6_msource	*lims;
1619	struct sockaddr_in6	*psin;
1620	struct sockaddr_storage	*ptss;
1621	struct sockaddr_storage	*tss;
1622	int	 		 error;
1623	size_t		 	 idx, nsrcs, ncsrcs;
1624	user_addr_t 		 tmp_ptr;
1625
1626	imo = inp->in6p_moptions;
1627	VERIFY(imo != NULL);
1628
1629	if (IS_64BIT_PROCESS(current_proc())) {
1630		error = sooptcopyin(sopt, &msfr64,
1631		    sizeof(struct __msfilterreq64),
1632		    sizeof(struct __msfilterreq64));
1633		if (error)
1634			return (error);
1635		/* we never use msfr.msfr_srcs; */
1636		memcpy(&msfr, &msfr64, sizeof(msfr));
1637	} else {
1638		error = sooptcopyin(sopt, &msfr32,
1639		    sizeof(struct __msfilterreq32),
1640		    sizeof(struct __msfilterreq32));
1641		if (error)
1642			return (error);
1643		/* we never use msfr.msfr_srcs; */
1644		memcpy(&msfr, &msfr32, sizeof(msfr));
1645	}
1646
1647	if (msfr.msfr_group.ss_family != AF_INET6 ||
1648	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
1649		return (EINVAL);
1650
1651	gsa = (sockunion_t *)&msfr.msfr_group;
1652	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
1653		return (EINVAL);
1654
1655	ifnet_head_lock_shared();
1656	if (msfr.msfr_ifindex == 0 || (u_int)if_index < msfr.msfr_ifindex) {
1657		ifnet_head_done();
1658		return (EADDRNOTAVAIL);
1659	}
1660	ifp = ifindex2ifnet[msfr.msfr_ifindex];
1661	ifnet_head_done();
1662
1663	if (ifp == NULL)
1664		return (EADDRNOTAVAIL);
1665
1666	if ((size_t) msfr.msfr_nsrcs >
1667	    SIZE_MAX / sizeof(struct sockaddr_storage))
1668		msfr.msfr_nsrcs = SIZE_MAX / sizeof(struct sockaddr_storage);
1669
1670	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
1671		msfr.msfr_nsrcs = in6_mcast_maxsocksrc;
1672
1673	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
1674
1675	IM6O_LOCK(imo);
1676	/*
1677	 * Lookup group on the socket.
1678	 */
1679	idx = im6o_match_group(imo, ifp, &gsa->sa);
1680	if (idx == (size_t)-1 || imo->im6o_mfilters == NULL) {
1681		IM6O_UNLOCK(imo);
1682		return (EADDRNOTAVAIL);
1683	}
1684	imf = &imo->im6o_mfilters[idx];
1685
1686	/*
1687	 * Ignore memberships which are in limbo.
1688	 */
1689	if (imf->im6f_st[1] == MCAST_UNDEFINED) {
1690		IM6O_UNLOCK(imo);
1691		return (EAGAIN);
1692	}
1693	msfr.msfr_fmode = imf->im6f_st[1];
1694
1695	/*
1696	 * If the user specified a buffer, copy out the source filter
1697	 * entries to userland gracefully.
1698	 * We only copy out the number of entries which userland
1699	 * has asked for, but we always tell userland how big the
1700	 * buffer really needs to be.
1701	 */
1702	tss = NULL;
1703
1704	if (IS_64BIT_PROCESS(current_proc()))
1705		tmp_ptr = msfr64.msfr_srcs;
1706	else
1707		tmp_ptr = CAST_USER_ADDR_T(msfr32.msfr_srcs);
1708
1709	if (tmp_ptr != USER_ADDR_NULL && msfr.msfr_nsrcs > 0) {
1710		tss = _MALLOC((size_t) msfr.msfr_nsrcs * sizeof(*tss),
1711		    M_TEMP, M_WAITOK | M_ZERO);
1712		if (tss == NULL) {
1713			IM6O_UNLOCK(imo);
1714			return (ENOBUFS);
1715		}
1716		bzero(tss, (size_t) msfr.msfr_nsrcs * sizeof(*tss));
1717	}
1718
1719	/*
1720	 * Count number of sources in-mode at t0.
1721	 * If buffer space exists and remains, copy out source entries.
1722	 */
1723	nsrcs = msfr.msfr_nsrcs;
1724	ncsrcs = 0;
1725	ptss = tss;
1726	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
1727		lims = (struct in6_msource *)ims;
1728		if (lims->im6sl_st[0] == MCAST_UNDEFINED ||
1729		    lims->im6sl_st[0] != imf->im6f_st[0])
1730			continue;
1731		if (tss != NULL && nsrcs > 0) {
1732			psin = (struct sockaddr_in6 *)ptss;
1733			psin->sin6_family = AF_INET6;
1734			psin->sin6_len = sizeof(struct sockaddr_in6);
1735			psin->sin6_addr = lims->im6s_addr;
1736			psin->sin6_port = 0;
1737			--nsrcs;
1738			++ptss;
1739			++ncsrcs;
1740		}
1741	}
1742
1743	IM6O_UNLOCK(imo);
1744
1745	if (tss != NULL) {
1746		error = copyout(tss, tmp_ptr, ncsrcs * sizeof(*tss));
1747		FREE(tss, M_TEMP);
1748		if (error)
1749			return (error);
1750	}
1751
1752	msfr.msfr_nsrcs = ncsrcs;
1753	if (IS_64BIT_PROCESS(current_proc())) {
1754		msfr64.msfr_ifindex = msfr.msfr_ifindex;
1755		msfr64.msfr_fmode   = msfr.msfr_fmode;
1756		msfr64.msfr_nsrcs   = msfr.msfr_nsrcs;
1757		memcpy(&msfr64.msfr_group, &msfr.msfr_group,
1758		    sizeof(struct sockaddr_storage));
1759		error = sooptcopyout(sopt, &msfr64,
1760		    sizeof(struct __msfilterreq64));
1761	} else {
1762		msfr32.msfr_ifindex = msfr.msfr_ifindex;
1763		msfr32.msfr_fmode   = msfr.msfr_fmode;
1764		msfr32.msfr_nsrcs   = msfr.msfr_nsrcs;
1765		memcpy(&msfr64.msfr_group, &msfr.msfr_group,
1766		    sizeof(struct sockaddr_storage));
1767		error = sooptcopyout(sopt, &msfr32,
1768		    sizeof(struct __msfilterreq32));
1769	}
1770
1771	return (error);
1772}
1773
1774/*
1775 * Return the IP multicast options in response to user getsockopt().
1776 */
1777int
1778ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1779{
1780	struct ip6_moptions	*im6o;
1781	int			 error;
1782	u_int			 optval;
1783
1784	im6o = inp->in6p_moptions;
1785	/*
1786	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1787	 * or is a divert socket, reject it.
1788	 */
1789	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1790	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1791	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
1792		return (EOPNOTSUPP);
1793	}
1794
1795	error = 0;
1796	switch (sopt->sopt_name) {
1797	case IPV6_MULTICAST_IF:
1798		if (im6o != NULL)
1799			IM6O_LOCK(im6o);
1800		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) {
1801			optval = 0;
1802		} else {
1803			optval = im6o->im6o_multicast_ifp->if_index;
1804		}
1805		if (im6o != NULL)
1806			IM6O_UNLOCK(im6o);
1807		error = sooptcopyout(sopt, &optval, sizeof(u_int));
1808		break;
1809
1810	case IPV6_MULTICAST_HOPS:
1811		if (im6o == NULL) {
1812			optval = ip6_defmcasthlim;
1813		} else {
1814			IM6O_LOCK(im6o);
1815			optval = im6o->im6o_multicast_hlim;
1816			IM6O_UNLOCK(im6o);
1817		}
1818		error = sooptcopyout(sopt, &optval, sizeof(u_int));
1819		break;
1820
1821	case IPV6_MULTICAST_LOOP:
1822		if (im6o == NULL) {
1823			optval = in6_mcast_loop; /* XXX VIMAGE */
1824		} else {
1825			IM6O_LOCK(im6o);
1826			optval = im6o->im6o_multicast_loop;
1827			IM6O_UNLOCK(im6o);
1828		}
1829		error = sooptcopyout(sopt, &optval, sizeof(u_int));
1830		break;
1831
1832	case IPV6_MSFILTER:
1833		if (im6o == NULL) {
1834			error = EADDRNOTAVAIL;
1835		} else {
1836			error = in6p_get_source_filters(inp, sopt);
1837		}
1838		break;
1839
1840	default:
1841		error = ENOPROTOOPT;
1842		break;
1843	}
1844
1845	return (error);
1846}
1847
1848/*
1849 * Look up the ifnet to use for a multicast group membership,
1850 * given the address of an IPv6 group.
1851 *
1852 * This routine exists to support legacy IPv6 multicast applications.
1853 *
1854 * If inp is non-NULL and is bound to an interface, use this socket's
1855 * inp_boundif for any required routing table lookup.
1856 *
1857 * If the route lookup fails, return NULL.
1858 *
1859 * FUTURE: Support multiple forwarding tables for IPv6.
1860 *
1861 * Returns NULL if no ifp could be found.
1862 */
1863static struct ifnet *
1864in6p_lookup_mcast_ifp(const struct inpcb *in6p,
1865    const struct sockaddr_in6 *gsin6)
1866{
1867	struct route_in6	 ro6;
1868	struct ifnet		*ifp;
1869	unsigned int		ifscope = IFSCOPE_NONE;
1870
1871	VERIFY(in6p == NULL || (in6p->inp_vflag & INP_IPV6));
1872	VERIFY(gsin6->sin6_family == AF_INET6);
1873	if (IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr) == 0)
1874		return NULL;
1875
1876	if (in6p != NULL && (in6p->inp_flags & INP_BOUND_IF))
1877		ifscope = in6p->inp_boundifp->if_index;
1878
1879	ifp = NULL;
1880	memset(&ro6, 0, sizeof(struct route_in6));
1881	memcpy(&ro6.ro_dst, gsin6, sizeof(struct sockaddr_in6));
1882	rtalloc_scoped_ign((struct route *)&ro6, 0, ifscope);
1883	if (ro6.ro_rt != NULL) {
1884		ifp = ro6.ro_rt->rt_ifp;
1885		VERIFY(ifp != NULL);
1886		rtfree(ro6.ro_rt);
1887	}
1888
1889	return (ifp);
1890}
1891
1892/*
1893 * Since ipv6_mreq contains an ifindex and ip_mreq contains an AF_INET
1894 * address, we need to lookup the AF_INET address when translating an
1895 * ipv6_mreq structure into an ipmreq structure.
1896 * This is used when userland performs multicast setsockopt() on AF_INET6
1897 * sockets with AF_INET multicast addresses (IPv6 v4 mapped addresses).
1898 */
1899static int
1900in6p_lookup_v4addr(struct ipv6_mreq *mreq, struct ip_mreq *v4mreq)
1901{
1902	struct ifnet *ifp;
1903	struct ifaddr *ifa;
1904	struct sockaddr_in *sin;
1905
1906	ifnet_head_lock_shared();
1907	if (mreq->ipv6mr_interface > (unsigned int)if_index) {
1908		ifnet_head_done();
1909		return (EADDRNOTAVAIL);
1910	} else
1911		ifp = ifindex2ifnet[mreq->ipv6mr_interface];
1912	ifnet_head_done();
1913	if (ifp == NULL)
1914		return (EADDRNOTAVAIL);
1915	ifa = ifa_ifpgetprimary(ifp, AF_INET);
1916	if (ifa == NULL)
1917		return (EADDRNOTAVAIL);
1918	sin = (struct sockaddr_in *)(uintptr_t)(size_t)ifa->ifa_addr;
1919	v4mreq->imr_interface.s_addr = sin->sin_addr.s_addr;
1920	IFA_REMREF(ifa);
1921
1922	return (0);
1923}
1924
1925/*
1926 * Join an IPv6 multicast group, possibly with a source.
1927 *
1928 * FIXME: The KAME use of the unspecified address (::)
1929 * to join *all* multicast groups is currently unsupported.
1930 */
1931static int
1932in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
1933{
1934	struct group_source_req		 gsr;
1935	sockunion_t			*gsa, *ssa;
1936	struct ifnet			*ifp;
1937	struct in6_mfilter		*imf;
1938	struct ip6_moptions		*imo;
1939	struct in6_multi		*inm = NULL;
1940	struct in6_msource		*lims = NULL;
1941	size_t				 idx;
1942	int				 error, is_new;
1943	uint32_t			scopeid = 0;
1944
1945	ifp = NULL;
1946	imf = NULL;
1947	error = 0;
1948	is_new = 0;
1949
1950	memset(&gsr, 0, sizeof(struct group_source_req));
1951	gsa = (sockunion_t *)&gsr.gsr_group;
1952	gsa->ss.ss_family = AF_UNSPEC;
1953	ssa = (sockunion_t *)&gsr.gsr_source;
1954	ssa->ss.ss_family = AF_UNSPEC;
1955
1956	/*
1957	 * Chew everything into struct group_source_req.
1958	 * Overwrite the port field if present, as the sockaddr
1959	 * being copied in may be matched with a binary comparison.
1960	 * Ignore passed-in scope ID.
1961	 */
1962	switch (sopt->sopt_name) {
1963	case IPV6_JOIN_GROUP: {
1964		struct ipv6_mreq mreq;
1965    		struct sockaddr_in6 *gsin6;
1966
1967		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
1968		    sizeof(struct ipv6_mreq));
1969		if (error)
1970			return (error);
1971		if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
1972			struct ip_mreq v4mreq;
1973			struct sockopt v4sopt;
1974
1975			v4mreq.imr_multiaddr.s_addr =
1976			    mreq.ipv6mr_multiaddr.s6_addr32[3];
1977			if (mreq.ipv6mr_interface == 0)
1978				v4mreq.imr_interface.s_addr = INADDR_ANY;
1979			else
1980				error = in6p_lookup_v4addr(&mreq, &v4mreq);
1981			if (error)
1982				return (error);
1983			v4sopt.sopt_dir     = SOPT_SET;
1984			v4sopt.sopt_level   = sopt->sopt_level;
1985			v4sopt.sopt_name    = IP_ADD_MEMBERSHIP;
1986			v4sopt.sopt_val     = CAST_USER_ADDR_T(&v4mreq);
1987			v4sopt.sopt_valsize = sizeof(v4mreq);
1988			v4sopt.sopt_p       = kernproc;
1989
1990			return (inp_join_group(inp, &v4sopt));
1991		}
1992		gsa->sin6.sin6_family = AF_INET6;
1993		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
1994		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
1995
1996		gsin6 = &gsa->sin6;
1997
1998		/* Only allow IPv6 multicast addresses */
1999		if (IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr) == 0) {
2000			return (EINVAL);
2001		}
2002
2003		if (mreq.ipv6mr_interface == 0) {
2004			ifp = in6p_lookup_mcast_ifp(inp, gsin6);
2005		} else {
2006			ifnet_head_lock_shared();
2007			if ((u_int)if_index < mreq.ipv6mr_interface) {
2008				ifnet_head_done();
2009				return (EADDRNOTAVAIL);
2010			    }
2011			ifp = ifindex2ifnet[mreq.ipv6mr_interface];
2012			ifnet_head_done();
2013		}
2014		MLD_PRINTF(("%s: ipv6mr_interface = %d, ifp = %p\n",
2015		    __func__, mreq.ipv6mr_interface, ifp));
2016		break;
2017	}
2018
2019	case MCAST_JOIN_GROUP:
2020	case MCAST_JOIN_SOURCE_GROUP:
2021		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
2022			error = sooptcopyin(sopt, &gsr,
2023			    sizeof(struct group_req),
2024			    sizeof(struct group_req));
2025		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
2026			error = sooptcopyin(sopt, &gsr,
2027			    sizeof(struct group_source_req),
2028			    sizeof(struct group_source_req));
2029		}
2030		if (error)
2031			return (error);
2032
2033		if (gsa->sin6.sin6_family != AF_INET6 ||
2034		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
2035			return (EINVAL);
2036
2037		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
2038			if (ssa->sin6.sin6_family != AF_INET6 ||
2039			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
2040				return (EINVAL);
2041			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
2042				return (EINVAL);
2043			/*
2044			 * TODO: Validate embedded scope ID in source
2045			 * list entry against passed-in ifp, if and only
2046			 * if source list filter entry is iface or node local.
2047			 */
2048			in6_clearscope(&ssa->sin6.sin6_addr);
2049			ssa->sin6.sin6_port = 0;
2050			ssa->sin6.sin6_scope_id = 0;
2051		}
2052
2053		ifnet_head_lock_shared();
2054		if (gsr.gsr_interface == 0 ||
2055		    (u_int)if_index < gsr.gsr_interface) {
2056			ifnet_head_done();
2057			return (EADDRNOTAVAIL);
2058		}
2059		ifp = ifindex2ifnet[gsr.gsr_interface];
2060		ifnet_head_done();
2061		break;
2062
2063	default:
2064		MLD_PRINTF(("%s: unknown sopt_name %d\n",
2065		    __func__, sopt->sopt_name));
2066		return (EOPNOTSUPP);
2067		break;
2068	}
2069
2070	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
2071		return (EINVAL);
2072
2073	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
2074		return (EADDRNOTAVAIL);
2075
2076	gsa->sin6.sin6_port = 0;
2077	gsa->sin6.sin6_scope_id = 0;
2078
2079	/*
2080	 * Always set the scope zone ID on memberships created from userland.
2081	 * Use the passed-in ifp to do this.
2082	 */
2083	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, &scopeid);
2084	/*
2085	 * Some addresses are not valid without an embedded scopeid.
2086	 * This check must be present because otherwise we will later hit
2087	 * a VERIFY() in in6_mc_join().
2088	 */
2089	if ((IN6_IS_ADDR_MC_LINKLOCAL(&gsa->sin6.sin6_addr) ||
2090	    IN6_IS_ADDR_MC_INTFACELOCAL(&gsa->sin6.sin6_addr)) &&
2091	    (scopeid == 0 || gsa->sin6.sin6_addr.s6_addr16[1] == 0))
2092		return (EINVAL);
2093
2094	imo = in6p_findmoptions(inp);
2095	if (imo == NULL)
2096		return (ENOMEM);
2097
2098	IM6O_LOCK(imo);
2099	idx = im6o_match_group(imo, ifp, &gsa->sa);
2100	if (idx == (size_t)-1) {
2101		is_new = 1;
2102	} else {
2103		inm = imo->im6o_membership[idx];
2104		imf = &imo->im6o_mfilters[idx];
2105		if (ssa->ss.ss_family != AF_UNSPEC) {
2106			/*
2107			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
2108			 * is an error. On an existing inclusive membership,
2109			 * it just adds the source to the filter list.
2110			 */
2111			if (imf->im6f_st[1] != MCAST_INCLUDE) {
2112				error = EINVAL;
2113				goto out_imo_locked;
2114			}
2115			/*
2116			 * Throw out duplicates.
2117			 *
2118			 * XXX FIXME: This makes a naive assumption that
2119			 * even if entries exist for *ssa in this imf,
2120			 * they will be rejected as dupes, even if they
2121			 * are not valid in the current mode (in-mode).
2122			 *
2123			 * in6_msource is transactioned just as for anything
2124			 * else in SSM -- but note naive use of in6m_graft()
2125			 * below for allocating new filter entries.
2126			 *
2127			 * This is only an issue if someone mixes the
2128			 * full-state SSM API with the delta-based API,
2129			 * which is discouraged in the relevant RFCs.
2130			 */
2131			lims = im6o_match_source(imo, idx, &ssa->sa);
2132			if (lims != NULL /*&&
2133			    lims->im6sl_st[1] == MCAST_INCLUDE*/) {
2134				error = EADDRNOTAVAIL;
2135				goto out_imo_locked;
2136			}
2137		} else {
2138			/*
2139			 * MCAST_JOIN_GROUP on an existing exclusive
2140			 * membership is an error; return EADDRINUSE
2141			 * to preserve 4.4BSD API idempotence, and
2142			 * avoid tedious detour to code below.
2143			 * NOTE: This is bending RFC 3678 a bit.
2144			 *
2145			 * On an existing inclusive membership, this is also
2146			 * an error; if you want to change filter mode,
2147			 * you must use the userland API setsourcefilter().
2148			 * XXX We don't reject this for imf in UNDEFINED
2149			 * state at t1, because allocation of a filter
2150			 * is atomic with allocation of a membership.
2151			 */
2152			error = EINVAL;
2153			/* See comments above for EADDRINUSE */
2154			if (imf->im6f_st[1] == MCAST_EXCLUDE)
2155				error = EADDRINUSE;
2156			goto out_imo_locked;
2157		}
2158	}
2159
2160	/*
2161	 * Begin state merge transaction at socket layer.
2162	 */
2163
2164	if (is_new) {
2165		if (imo->im6o_num_memberships == imo->im6o_max_memberships) {
2166			error = im6o_grow(imo, 0);
2167			if (error)
2168				goto out_imo_locked;
2169		}
2170		/*
2171		 * Allocate the new slot upfront so we can deal with
2172		 * grafting the new source filter in same code path
2173		 * as for join-source on existing membership.
2174		 */
2175		idx = imo->im6o_num_memberships;
2176		imo->im6o_membership[idx] = NULL;
2177		imo->im6o_num_memberships++;
2178		VERIFY(imo->im6o_mfilters != NULL);
2179		imf = &imo->im6o_mfilters[idx];
2180		VERIFY(RB_EMPTY(&imf->im6f_sources));
2181	}
2182
2183	/*
2184	 * Graft new source into filter list for this inpcb's
2185	 * membership of the group. The in6_multi may not have
2186	 * been allocated yet if this is a new membership, however,
2187	 * the in_mfilter slot will be allocated and must be initialized.
2188	 *
2189	 * Note: Grafting of exclusive mode filters doesn't happen
2190	 * in this path.
2191	 * XXX: Should check for non-NULL lims (node exists but may
2192	 * not be in-mode) for interop with full-state API.
2193	 */
2194	if (ssa->ss.ss_family != AF_UNSPEC) {
2195		/* Membership starts in IN mode */
2196		if (is_new) {
2197			MLD_PRINTF(("%s: new join w/source\n", __func__);
2198			im6f_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE));
2199		} else {
2200			MLD_PRINTF(("%s: %s source\n", __func__, "allow"));
2201		}
2202		lims = im6f_graft(imf, MCAST_INCLUDE, &ssa->sin6);
2203		if (lims == NULL) {
2204			MLD_PRINTF(("%s: merge imf state failed\n",
2205			    __func__));
2206			error = ENOMEM;
2207			goto out_im6o_free;
2208		}
2209	} else {
2210		/* No address specified; Membership starts in EX mode */
2211		if (is_new) {
2212			MLD_PRINTF(("%s: new join w/o source", __func__));
2213			im6f_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
2214		}
2215	}
2216
2217	/*
2218	 * Begin state merge transaction at MLD layer.
2219	 */
2220
2221	if (is_new) {
2222		VERIFY(inm == NULL);
2223		error = in6_mc_join(ifp, &gsa->sin6.sin6_addr, imf, &inm, 0);
2224		VERIFY(inm != NULL || error != 0);
2225		if (error)
2226			goto out_im6o_free;
2227		imo->im6o_membership[idx] = inm; /* from in6_mc_join() */
2228	} else {
2229		MLD_PRINTF(("%s: merge inm state\n", __func__));
2230		IN6M_LOCK(inm);
2231		error = in6m_merge(inm, imf);
2232		if (error) {
2233			MLD_PRINTF(("%s: failed to merge inm state\n",
2234			    __func__));
2235			IN6M_UNLOCK(inm);
2236			goto out_im6f_rollback;
2237		}
2238		MLD_PRINTF(("%s: doing mld downcall\n", __func__));
2239		error = mld_change_state(inm, 0);
2240		IN6M_UNLOCK(inm);
2241		if (error) {
2242			MLD_PRINTF(("%s: failed mld downcall\n",
2243			    __func__));
2244			goto out_im6f_rollback;
2245		}
2246	}
2247
2248out_im6f_rollback:
2249	if (error) {
2250		im6f_rollback(imf);
2251		if (is_new)
2252			im6f_purge(imf);
2253		else
2254			im6f_reap(imf);
2255	} else {
2256		im6f_commit(imf);
2257	}
2258
2259out_im6o_free:
2260	if (error && is_new) {
2261		VERIFY(inm == NULL);
2262		imo->im6o_membership[idx] = NULL;
2263		--imo->im6o_num_memberships;
2264	}
2265
2266out_imo_locked:
2267	IM6O_UNLOCK(imo);
2268	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
2269	return (error);
2270}
2271
2272/*
2273 * Leave an IPv6 multicast group on an inpcb, possibly with a source.
2274 */
2275static int
2276in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
2277{
2278	struct ipv6_mreq		 mreq;
2279	struct group_source_req		 gsr;
2280	sockunion_t			*gsa, *ssa;
2281	struct ifnet			*ifp;
2282	struct in6_mfilter		*imf;
2283	struct ip6_moptions		*imo;
2284	struct in6_msource		*ims;
2285	struct in6_multi		*inm = NULL;
2286	uint32_t			 ifindex = 0;
2287	size_t				 idx;
2288	int				 error, is_final;
2289
2290	ifp = NULL;
2291	error = 0;
2292	is_final = 1;
2293
2294	memset(&gsr, 0, sizeof(struct group_source_req));
2295	gsa = (sockunion_t *)&gsr.gsr_group;
2296	gsa->ss.ss_family = AF_UNSPEC;
2297	ssa = (sockunion_t *)&gsr.gsr_source;
2298	ssa->ss.ss_family = AF_UNSPEC;
2299
2300	/*
2301	 * Chew everything passed in up into a struct group_source_req
2302	 * as that is easier to process.
2303	 * Note: Any embedded scope ID in the multicast group passed
2304	 * in by userland is ignored, the interface index is the recommended
2305	 * mechanism to specify an interface; see below.
2306	 */
2307	switch (sopt->sopt_name) {
2308	case IPV6_LEAVE_GROUP: {
2309    		struct sockaddr_in6 *gsin6;
2310
2311		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
2312		    sizeof(struct ipv6_mreq));
2313		if (error)
2314			return (error);
2315		if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
2316			struct ip_mreq v4mreq;
2317			struct sockopt v4sopt;
2318
2319			v4mreq.imr_multiaddr.s_addr =
2320			    mreq.ipv6mr_multiaddr.s6_addr32[3];
2321			if (mreq.ipv6mr_interface == 0)
2322				v4mreq.imr_interface.s_addr = INADDR_ANY;
2323			else
2324				error = in6p_lookup_v4addr(&mreq, &v4mreq);
2325			if (error)
2326				return (error);
2327			v4sopt.sopt_dir     = SOPT_SET;
2328			v4sopt.sopt_level   = sopt->sopt_level;
2329			v4sopt.sopt_name    = IP_DROP_MEMBERSHIP;
2330			v4sopt.sopt_val     = CAST_USER_ADDR_T(&v4mreq);
2331			v4sopt.sopt_valsize = sizeof(v4mreq);
2332			v4sopt.sopt_p       = kernproc;
2333
2334			return (inp_leave_group(inp, &v4sopt));
2335		}
2336		gsa->sin6.sin6_family = AF_INET6;
2337		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
2338		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
2339		gsa->sin6.sin6_port = 0;
2340		gsa->sin6.sin6_scope_id = 0;
2341		ifindex = mreq.ipv6mr_interface;
2342		gsin6 = &gsa->sin6;
2343		/* Only allow IPv6 multicast addresses */
2344		if (IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr) == 0) {
2345			return (EINVAL);
2346		}
2347		break;
2348	}
2349
2350	case MCAST_LEAVE_GROUP:
2351	case MCAST_LEAVE_SOURCE_GROUP:
2352		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2353			error = sooptcopyin(sopt, &gsr,
2354			    sizeof(struct group_req),
2355			    sizeof(struct group_req));
2356		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2357			error = sooptcopyin(sopt, &gsr,
2358			    sizeof(struct group_source_req),
2359			    sizeof(struct group_source_req));
2360		}
2361		if (error)
2362			return (error);
2363
2364		if (gsa->sin6.sin6_family != AF_INET6 ||
2365		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
2366			return (EINVAL);
2367		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2368			if (ssa->sin6.sin6_family != AF_INET6 ||
2369			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
2370				return (EINVAL);
2371			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
2372				return (EINVAL);
2373			/*
2374			 * TODO: Validate embedded scope ID in source
2375			 * list entry against passed-in ifp, if and only
2376			 * if source list filter entry is iface or node local.
2377			 */
2378			in6_clearscope(&ssa->sin6.sin6_addr);
2379		}
2380		gsa->sin6.sin6_port = 0;
2381		gsa->sin6.sin6_scope_id = 0;
2382		ifindex = gsr.gsr_interface;
2383		break;
2384
2385	default:
2386		MLD_PRINTF(("%s: unknown sopt_name %d\n",
2387		    __func__, sopt->sopt_name));
2388		return (EOPNOTSUPP);
2389		break;
2390	}
2391
2392	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
2393		return (EINVAL);
2394
2395	/*
2396	 * Validate interface index if provided. If no interface index
2397	 * was provided separately, attempt to look the membership up
2398	 * from the default scope as a last resort to disambiguate
2399	 * the membership we are being asked to leave.
2400	 * XXX SCOPE6 lock potentially taken here.
2401	 */
2402	if (ifindex != 0) {
2403		ifnet_head_lock_shared();
2404		if ((u_int)if_index < ifindex) {
2405			ifnet_head_done();
2406			return (EADDRNOTAVAIL);
2407		}
2408		ifp = ifindex2ifnet[ifindex];
2409		ifnet_head_done();
2410		if (ifp == NULL)
2411			return (EADDRNOTAVAIL);
2412		(void) in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
2413	} else {
2414		error = sa6_embedscope(&gsa->sin6, ip6_use_defzone);
2415		if (error)
2416			return (EADDRNOTAVAIL);
2417		/*
2418		 * Some badly behaved applications don't pass an ifindex
2419		 * or a scope ID, which is an API violation. In this case,
2420		 * perform a lookup as per a v6 join.
2421		 *
2422		 * XXX For now, stomp on zone ID for the corner case.
2423		 * This is not the 'KAME way', but we need to see the ifp
2424		 * directly until such time as this implementation is
2425		 * refactored, assuming the scope IDs are the way to go.
2426		 */
2427		ifindex = ntohs(gsa->sin6.sin6_addr.s6_addr16[1]);
2428		if (ifindex == 0) {
2429			MLD_PRINTF(("%s: warning: no ifindex, looking up "
2430			    "ifp for group %s.\n", __func__,
2431			    ip6_sprintf(&gsa->sin6.sin6_addr)));
2432			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
2433		} else {
2434			ifnet_head_lock_shared();
2435			ifp = ifindex2ifnet[ifindex];
2436			ifnet_head_done();
2437		}
2438		if (ifp == NULL)
2439			return (EADDRNOTAVAIL);
2440	}
2441
2442	VERIFY(ifp != NULL);
2443	MLD_PRINTF(("%s: ifp = %p\n", __func__, ifp));
2444
2445	/*
2446	 * Find the membership in the membership array.
2447	 */
2448	imo = in6p_findmoptions(inp);
2449	if (imo == NULL)
2450		return (ENOMEM);
2451
2452	IM6O_LOCK(imo);
2453	idx = im6o_match_group(imo, ifp, &gsa->sa);
2454	if (idx == (size_t)-1) {
2455		error = EADDRNOTAVAIL;
2456		goto out_locked;
2457	}
2458	inm = imo->im6o_membership[idx];
2459	imf = &imo->im6o_mfilters[idx];
2460
2461	if (ssa->ss.ss_family != AF_UNSPEC)
2462		is_final = 0;
2463
2464	/*
2465	 * Begin state merge transaction at socket layer.
2466	 */
2467
2468	/*
2469	 * If we were instructed only to leave a given source, do so.
2470	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2471	 */
2472	if (is_final) {
2473		im6f_leave(imf);
2474	} else {
2475		if (imf->im6f_st[0] == MCAST_EXCLUDE) {
2476			error = EADDRNOTAVAIL;
2477			goto out_locked;
2478		}
2479		ims = im6o_match_source(imo, idx, &ssa->sa);
2480		if (ims == NULL) {
2481			MLD_PRINTF(("%s: source %p %spresent\n", __func__,
2482			    ip6_sprintf(&ssa->sin6.sin6_addr),
2483			    "not "));
2484			error = EADDRNOTAVAIL;
2485			goto out_locked;
2486		}
2487		MLD_PRINTF(("%s: %s source\n", __func__, "block"));
2488		error = im6f_prune(imf, &ssa->sin6);
2489		if (error) {
2490			MLD_PRINTF(("%s: merge imf state failed\n",
2491			    __func__));
2492			goto out_locked;
2493		}
2494	}
2495
2496	/*
2497	 * Begin state merge transaction at MLD layer.
2498	 */
2499
2500	if (is_final) {
2501		/*
2502		 * Give up the multicast address record to which
2503		 * the membership points.  Reference held in im6o
2504		 * will be released below.
2505		 */
2506		(void) in6_mc_leave(inm, imf);
2507	} else {
2508		MLD_PRINTF(("%s: merge inm state\n", __func__));
2509		IN6M_LOCK(inm);
2510		error = in6m_merge(inm, imf);
2511		if (error) {
2512			MLD_PRINTF(("%s: failed to merge inm state\n",
2513			    __func__));
2514			IN6M_UNLOCK(inm);
2515			goto out_im6f_rollback;
2516		}
2517
2518		MLD_PRINTF(("%s: doing mld downcall\n", __func__));
2519		error = mld_change_state(inm, 0);
2520		if (error) {
2521			MLD_PRINTF(("%s: failed mld downcall\n", __func__));
2522		}
2523		IN6M_UNLOCK(inm);
2524	}
2525
2526out_im6f_rollback:
2527	if (error)
2528		im6f_rollback(imf);
2529	else
2530		im6f_commit(imf);
2531
2532	im6f_reap(imf);
2533
2534	if (is_final) {
2535		/* Remove the gap in the membership array. */
2536		VERIFY(inm == imo->im6o_membership[idx]);
2537		imo->im6o_membership[idx] = NULL;
2538		IN6M_REMREF(inm);
2539		for (++idx; idx < imo->im6o_num_memberships; ++idx) {
2540			imo->im6o_membership[idx-1] = imo->im6o_membership[idx];
2541			imo->im6o_mfilters[idx-1] = imo->im6o_mfilters[idx];
2542		}
2543		imo->im6o_num_memberships--;
2544	}
2545
2546out_locked:
2547	IM6O_UNLOCK(imo);
2548	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
2549	return (error);
2550}
2551
2552/*
2553 * Select the interface for transmitting IPv6 multicast datagrams.
2554 *
2555 * Either an instance of struct in6_addr or an instance of struct ipv6_mreqn
2556 * may be passed to this socket option. An address of in6addr_any or an
2557 * interface index of 0 is used to remove a previous selection.
2558 * When no interface is selected, one is chosen for every send.
2559 */
2560static int
2561in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2562{
2563	struct ifnet		*ifp;
2564	struct ip6_moptions	*imo;
2565	u_int			 ifindex;
2566	int			 error;
2567
2568	if (sopt->sopt_valsize != sizeof(u_int))
2569		return (EINVAL);
2570
2571	error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int));
2572	if (error)
2573		return (error);
2574
2575	ifnet_head_lock_shared();
2576	if ((u_int)if_index < ifindex) {
2577		ifnet_head_done();
2578		return (EINVAL);
2579	}
2580
2581	ifp = ifindex2ifnet[ifindex];
2582	ifnet_head_done();
2583	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
2584		return (EADDRNOTAVAIL);
2585
2586	imo = in6p_findmoptions(inp);
2587	if (imo == NULL)
2588		return (ENOMEM);
2589
2590	IM6O_LOCK(imo);
2591	imo->im6o_multicast_ifp = ifp;
2592	IM6O_UNLOCK(imo);
2593	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
2594
2595	return (0);
2596}
2597
2598/*
2599 * Atomically set source filters on a socket for an IPv6 multicast group.
2600 *
2601 */
2602static int
2603in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2604{
2605	struct __msfilterreq64	 msfr, msfr64;
2606	struct __msfilterreq32	 msfr32;
2607	sockunion_t		*gsa;
2608	struct ifnet		*ifp;
2609	struct in6_mfilter	*imf;
2610	struct ip6_moptions	*imo;
2611	struct in6_multi	*inm;
2612	size_t			 idx;
2613	int			 error;
2614	user_addr_t 		 tmp_ptr;
2615
2616	if (IS_64BIT_PROCESS(current_proc())) {
2617		error = sooptcopyin(sopt, &msfr64,
2618		    sizeof(struct __msfilterreq64),
2619		    sizeof(struct __msfilterreq64));
2620		if (error)
2621			return (error);
2622		/* we never use msfr.msfr_srcs; */
2623		memcpy(&msfr, &msfr64, sizeof(msfr));
2624	} else {
2625		error = sooptcopyin(sopt, &msfr32,
2626		    sizeof(struct __msfilterreq32),
2627		    sizeof(struct __msfilterreq32));
2628		if (error)
2629			return (error);
2630		/* we never use msfr.msfr_srcs; */
2631		memcpy(&msfr, &msfr32, sizeof(msfr));
2632	}
2633
2634	if ((size_t) msfr.msfr_nsrcs >
2635	    SIZE_MAX / sizeof(struct sockaddr_storage))
2636		msfr.msfr_nsrcs = SIZE_MAX / sizeof(struct sockaddr_storage);
2637
2638	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
2639		return (ENOBUFS);
2640
2641	if (msfr.msfr_fmode != MCAST_EXCLUDE &&
2642	     msfr.msfr_fmode != MCAST_INCLUDE)
2643		return (EINVAL);
2644
2645	if (msfr.msfr_group.ss_family != AF_INET6 ||
2646	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
2647		return (EINVAL);
2648
2649	gsa = (sockunion_t *)&msfr.msfr_group;
2650	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
2651		return (EINVAL);
2652
2653	gsa->sin6.sin6_port = 0;	/* ignore port */
2654
2655	ifnet_head_lock_shared();
2656	if (msfr.msfr_ifindex == 0 || (u_int)if_index < msfr.msfr_ifindex) {
2657		ifnet_head_done();
2658		return (EADDRNOTAVAIL);
2659	}
2660	ifp = ifindex2ifnet[msfr.msfr_ifindex];
2661	ifnet_head_done();
2662	if (ifp == NULL)
2663		return (EADDRNOTAVAIL);
2664
2665	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
2666
2667	/*
2668	 * Take the INP write lock.
2669	 * Check if this socket is a member of this group.
2670	 */
2671	imo = in6p_findmoptions(inp);
2672	if (imo == NULL)
2673		return (ENOMEM);
2674
2675	IM6O_LOCK(imo);
2676	idx = im6o_match_group(imo, ifp, &gsa->sa);
2677	if (idx == (size_t)-1 || imo->im6o_mfilters == NULL) {
2678		error = EADDRNOTAVAIL;
2679		goto out_imo_locked;
2680	}
2681	inm = imo->im6o_membership[idx];
2682	imf = &imo->im6o_mfilters[idx];
2683
2684	/*
2685	 * Begin state merge transaction at socket layer.
2686	 */
2687
2688	imf->im6f_st[1] = msfr.msfr_fmode;
2689
2690	/*
2691	 * Apply any new source filters, if present.
2692	 * Make a copy of the user-space source vector so
2693	 * that we may copy them with a single copyin. This
2694	 * allows us to deal with page faults up-front.
2695	 */
2696	if (msfr.msfr_nsrcs > 0) {
2697		struct in6_msource	*lims;
2698		struct sockaddr_in6	*psin;
2699		struct sockaddr_storage	*kss, *pkss;
2700		unsigned int		 i;
2701
2702		if (IS_64BIT_PROCESS(current_proc()))
2703			tmp_ptr = msfr64.msfr_srcs;
2704		else
2705			tmp_ptr = CAST_USER_ADDR_T(msfr32.msfr_srcs);
2706
2707		MLD_PRINTF(("%s: loading %lu source list entries\n",
2708		    __func__, (unsigned long)msfr.msfr_nsrcs));
2709		kss = _MALLOC((size_t) msfr.msfr_nsrcs * sizeof(*kss),
2710		    M_TEMP, M_WAITOK);
2711		if (kss == NULL) {
2712			error = ENOMEM;
2713			goto out_imo_locked;
2714		}
2715
2716		error = copyin(tmp_ptr, kss,
2717		    (size_t) msfr.msfr_nsrcs * sizeof(*kss));
2718		if (error) {
2719			FREE(kss, M_TEMP);
2720			goto out_imo_locked;
2721		}
2722
2723		/*
2724		 * Mark all source filters as UNDEFINED at t1.
2725		 * Restore new group filter mode, as im6f_leave()
2726		 * will set it to INCLUDE.
2727		 */
2728		im6f_leave(imf);
2729		imf->im6f_st[1] = msfr.msfr_fmode;
2730
2731		/*
2732		 * Update socket layer filters at t1, lazy-allocating
2733		 * new entries. This saves a bunch of memory at the
2734		 * cost of one RB_FIND() per source entry; duplicate
2735		 * entries in the msfr_nsrcs vector are ignored.
2736		 * If we encounter an error, rollback transaction.
2737		 *
2738		 * XXX This too could be replaced with a set-symmetric
2739		 * difference like loop to avoid walking from root
2740		 * every time, as the key space is common.
2741		 */
2742		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2743			psin = (struct sockaddr_in6 *)pkss;
2744			if (psin->sin6_family != AF_INET6) {
2745				error = EAFNOSUPPORT;
2746				break;
2747			}
2748			if (psin->sin6_len != sizeof(struct sockaddr_in6)) {
2749				error = EINVAL;
2750				break;
2751			}
2752			if (IN6_IS_ADDR_MULTICAST(&psin->sin6_addr)) {
2753				error = EINVAL;
2754				break;
2755			}
2756			/*
2757			 * TODO: Validate embedded scope ID in source
2758			 * list entry against passed-in ifp, if and only
2759			 * if source list filter entry is iface or node local.
2760			 */
2761			in6_clearscope(&psin->sin6_addr);
2762			error = im6f_get_source(imf, psin, &lims);
2763			if (error)
2764				break;
2765			lims->im6sl_st[1] = imf->im6f_st[1];
2766		}
2767		FREE(kss, M_TEMP);
2768	}
2769
2770	if (error)
2771		goto out_im6f_rollback;
2772
2773	/*
2774	 * Begin state merge transaction at MLD layer.
2775	 */
2776	IN6M_LOCK(inm);
2777	MLD_PRINTF(("%s: merge inm state\n", __func__));
2778	error = in6m_merge(inm, imf);
2779	if (error) {
2780		MLD_PRINTF(("%s: failed to merge inm state\n", __func__));
2781		IN6M_UNLOCK(inm);
2782		goto out_im6f_rollback;
2783	}
2784
2785	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
2786	error = mld_change_state(inm, 0);
2787	IN6M_UNLOCK(inm);
2788#if MLD_DEBUG
2789	if (error)
2790		MLD_PRINTF(("%s: failed mld downcall\n", __func__));
2791#endif
2792
2793out_im6f_rollback:
2794	if (error)
2795		im6f_rollback(imf);
2796	else
2797		im6f_commit(imf);
2798
2799	im6f_reap(imf);
2800
2801out_imo_locked:
2802	IM6O_UNLOCK(imo);
2803	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
2804
2805	return (error);
2806}
2807
2808/*
2809 * Set the IP multicast options in response to user setsockopt().
2810 *
2811 * Many of the socket options handled in this function duplicate the
2812 * functionality of socket options in the regular unicast API. However,
2813 * it is not possible to merge the duplicate code, because the idempotence
2814 * of the IPv6 multicast part of the BSD Sockets API must be preserved;
2815 * the effects of these options must be treated as separate and distinct.
2816 *
2817 */
2818int
2819ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2820{
2821	struct ip6_moptions	*im6o;
2822	int			 error;
2823
2824	error = 0;
2825
2826	/*
2827	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2828	 * or is a divert socket, reject it.
2829	 */
2830	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
2831	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2832	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
2833		return (EOPNOTSUPP);
2834
2835	switch (sopt->sopt_name) {
2836	case IPV6_MULTICAST_IF:
2837		error = in6p_set_multicast_if(inp, sopt);
2838		break;
2839
2840	case IPV6_MULTICAST_HOPS: {
2841		int hlim;
2842
2843		if (sopt->sopt_valsize != sizeof(int)) {
2844			error = EINVAL;
2845			break;
2846		}
2847		error = sooptcopyin(sopt, &hlim, sizeof(hlim), sizeof(int));
2848		if (error)
2849			break;
2850		if (hlim < -1 || hlim > 255) {
2851			error = EINVAL;
2852			break;
2853		} else if (hlim == -1) {
2854			hlim = ip6_defmcasthlim;
2855		}
2856		im6o = in6p_findmoptions(inp);
2857		if (im6o == NULL) {
2858			error = ENOMEM;
2859			break;
2860		}
2861		IM6O_LOCK(im6o);
2862		im6o->im6o_multicast_hlim = hlim;
2863		IM6O_UNLOCK(im6o);
2864		IM6O_REMREF(im6o);	/* from in6p_findmoptions() */
2865		break;
2866	}
2867
2868	case IPV6_MULTICAST_LOOP: {
2869		u_int loop;
2870
2871		/*
2872		 * Set the loopback flag for outgoing multicast packets.
2873		 * Must be zero or one.
2874		 */
2875		if (sopt->sopt_valsize != sizeof(u_int)) {
2876			error = EINVAL;
2877			break;
2878		}
2879		error = sooptcopyin(sopt, &loop, sizeof(u_int), sizeof(u_int));
2880		if (error)
2881			break;
2882		if (loop > 1) {
2883			error = EINVAL;
2884			break;
2885		}
2886		im6o = in6p_findmoptions(inp);
2887		if (im6o == NULL) {
2888			error = ENOMEM;
2889			break;
2890		}
2891		IM6O_LOCK(im6o);
2892		im6o->im6o_multicast_loop = loop;
2893		IM6O_UNLOCK(im6o);
2894		IM6O_REMREF(im6o);	/* from in6p_findmoptions() */
2895		break;
2896	}
2897
2898	case IPV6_JOIN_GROUP:
2899	case MCAST_JOIN_GROUP:
2900	case MCAST_JOIN_SOURCE_GROUP:
2901		error = in6p_join_group(inp, sopt);
2902		break;
2903
2904	case IPV6_LEAVE_GROUP:
2905	case MCAST_LEAVE_GROUP:
2906	case MCAST_LEAVE_SOURCE_GROUP:
2907		error = in6p_leave_group(inp, sopt);
2908		break;
2909
2910	case MCAST_BLOCK_SOURCE:
2911	case MCAST_UNBLOCK_SOURCE:
2912		error = in6p_block_unblock_source(inp, sopt);
2913		break;
2914
2915	case IPV6_MSFILTER:
2916		error = in6p_set_source_filters(inp, sopt);
2917		break;
2918
2919	default:
2920		error = EOPNOTSUPP;
2921		break;
2922	}
2923
2924	return (error);
2925}
2926/*
2927 * Expose MLD's multicast filter mode and source list(s) to userland,
2928 * keyed by (ifindex, group).
2929 * The filter mode is written out as a uint32_t, followed by
2930 * 0..n of struct in6_addr.
2931 * For use by ifmcstat(8).
2932 */
2933static int
2934sysctl_ip6_mcast_filters SYSCTL_HANDLER_ARGS
2935{
2936#pragma unused(oidp)
2937
2938	struct in6_addr			 mcaddr;
2939	struct in6_addr			 src;
2940	struct ifnet			*ifp;
2941	struct in6_multi		*inm;
2942	struct in6_multistep		step;
2943	struct ip6_msource		*ims;
2944	int				*name;
2945	int				 retval = 0;
2946	u_int				 namelen;
2947	uint32_t			 fmode, ifindex;
2948
2949	name = (int *)arg1;
2950	namelen = arg2;
2951
2952	if (req->newptr != USER_ADDR_NULL)
2953		return (EPERM);
2954
2955	/* int: ifindex + 4 * 32 bits of IPv6 address */
2956	if (namelen != 5)
2957		return (EINVAL);
2958
2959	ifindex = name[0];
2960	ifnet_head_lock_shared();
2961	if (ifindex <= 0 || ifindex > (u_int)if_index) {
2962		MLD_PRINTF(("%s: ifindex %u out of range\n",
2963		    __func__, ifindex));
2964		ifnet_head_done();
2965		return (ENOENT);
2966	}
2967
2968	memcpy(&mcaddr, &name[1], sizeof(struct in6_addr));
2969	if (!IN6_IS_ADDR_MULTICAST(&mcaddr)) {
2970		MLD_PRINTF(("%s: group %s is not multicast\n",
2971		    __func__, ip6_sprintf(&mcaddr)));
2972		ifnet_head_done();
2973		return (EINVAL);
2974	}
2975
2976	ifp = ifindex2ifnet[ifindex];
2977	ifnet_head_done();
2978	if (ifp == NULL) {
2979		MLD_PRINTF(("%s: no ifp for ifindex %u\n", __func__, ifindex));
2980		return (ENOENT);
2981	}
2982	/*
2983	 * Internal MLD lookups require that scope/zone ID is set.
2984	 */
2985	(void)in6_setscope(&mcaddr, ifp, NULL);
2986
2987	in6_multihead_lock_shared();
2988	IN6_FIRST_MULTI(step, inm);
2989	while (inm != NULL) {
2990		IN6M_LOCK(inm);
2991		if (inm->in6m_ifp != ifp)
2992			goto next;
2993
2994		if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr))
2995			goto next;
2996
2997		fmode = inm->in6m_st[1].iss_fmode;
2998		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
2999		if (retval != 0) {
3000			IN6M_UNLOCK(inm);
3001			break;		/* abort */
3002		}
3003		RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
3004			MLD_PRINTF(("%s: visit node %p\n", __func__, ims));
3005			/*
3006			 * Only copy-out sources which are in-mode.
3007			 */
3008			if (fmode != im6s_get_mode(inm, ims, 1)) {
3009				MLD_PRINTF(("%s: skip non-in-mode\n",
3010				    __func__));
3011				continue; /* process next source */
3012			}
3013			src = ims->im6s_addr;
3014			retval = SYSCTL_OUT(req, &src, sizeof(struct in6_addr));
3015			if (retval != 0)
3016				break;	/* process next inm */
3017		}
3018next:
3019		IN6M_UNLOCK(inm);
3020		IN6_NEXT_MULTI(step, inm);
3021	}
3022	in6_multihead_lock_done();
3023
3024	return (retval);
3025}
3026
3027void
3028in6_multi_init(void)
3029{
3030	PE_parse_boot_argn("ifa_debug", &in6m_debug, sizeof (in6m_debug));
3031
3032	/* Setup lock group and attribute for in6_multihead */
3033	in6_multihead_lock_grp_attr = lck_grp_attr_alloc_init();
3034	in6_multihead_lock_grp = lck_grp_alloc_init("in6_multihead",
3035	    in6_multihead_lock_grp_attr);
3036	in6_multihead_lock_attr = lck_attr_alloc_init();
3037	lck_rw_init(&in6_multihead_lock, in6_multihead_lock_grp,
3038	    in6_multihead_lock_attr);
3039
3040	lck_mtx_init(&in6m_trash_lock, in6_multihead_lock_grp,
3041	    in6_multihead_lock_attr);
3042	TAILQ_INIT(&in6m_trash_head);
3043
3044	in6m_size = (in6m_debug == 0) ? sizeof (struct in6_multi) :
3045	    sizeof (struct in6_multi_dbg);
3046	in6m_zone = zinit(in6m_size, IN6M_ZONE_MAX * in6m_size,
3047	    0, IN6M_ZONE_NAME);
3048	if (in6m_zone == NULL) {
3049		panic("%s: failed allocating %s", __func__, IN6M_ZONE_NAME);
3050		/* NOTREACHED */
3051	}
3052	zone_change(in6m_zone, Z_EXPAND, TRUE);
3053
3054	imm_size = sizeof (struct in6_multi_mship);
3055	imm_zone = zinit(imm_size, IMM_ZONE_MAX * imm_size, 0, IMM_ZONE_NAME);
3056	if (imm_zone == NULL) {
3057		panic("%s: failed allocating %s", __func__, IMM_ZONE_NAME);
3058		/* NOTREACHED */
3059	}
3060	zone_change(imm_zone, Z_EXPAND, TRUE);
3061
3062	ip6ms_size = sizeof (struct ip6_msource);
3063	ip6ms_zone = zinit(ip6ms_size, IP6MS_ZONE_MAX * ip6ms_size,
3064	    0, IP6MS_ZONE_NAME);
3065	if (ip6ms_zone == NULL) {
3066		panic("%s: failed allocating %s", __func__, IP6MS_ZONE_NAME);
3067		/* NOTREACHED */
3068	}
3069	zone_change(ip6ms_zone, Z_EXPAND, TRUE);
3070
3071	in6ms_size = sizeof (struct in6_msource);
3072	in6ms_zone = zinit(in6ms_size, IN6MS_ZONE_MAX * in6ms_size,
3073	    0, IN6MS_ZONE_NAME);
3074	if (in6ms_zone == NULL) {
3075		panic("%s: failed allocating %s", __func__, IN6MS_ZONE_NAME);
3076		/* NOTREACHED */
3077	}
3078	zone_change(in6ms_zone, Z_EXPAND, TRUE);
3079}
3080
3081static struct in6_multi *
3082in6_multi_alloc(int how)
3083{
3084	struct in6_multi *in6m;
3085
3086	in6m = (how == M_WAITOK) ? zalloc(in6m_zone) :
3087	    zalloc_noblock(in6m_zone);
3088	if (in6m != NULL) {
3089		bzero(in6m, in6m_size);
3090		lck_mtx_init(&in6m->in6m_lock, in6_multihead_lock_grp,
3091		    in6_multihead_lock_attr);
3092		in6m->in6m_debug |= IFD_ALLOC;
3093		if (in6m_debug != 0) {
3094			in6m->in6m_debug |= IFD_DEBUG;
3095			in6m->in6m_trace = in6m_trace;
3096		}
3097	}
3098	return (in6m);
3099}
3100
3101static void
3102in6_multi_free(struct in6_multi *in6m)
3103{
3104	IN6M_LOCK(in6m);
3105	if (in6m->in6m_debug & IFD_ATTACHED) {
3106		panic("%s: attached in6m=%p is being freed", __func__, in6m);
3107		/* NOTREACHED */
3108	} else if (in6m->in6m_ifma != NULL) {
3109		panic("%s: ifma not NULL for in6m=%p", __func__, in6m);
3110		/* NOTREACHED */
3111	} else if (!(in6m->in6m_debug & IFD_ALLOC)) {
3112		panic("%s: in6m %p cannot be freed", __func__, in6m);
3113		/* NOTREACHED */
3114	} else if (in6m->in6m_refcount != 0) {
3115		panic("%s: non-zero refcount in6m=%p", __func__, in6m);
3116		/* NOTREACHED */
3117	} else if (in6m->in6m_reqcnt != 0) {
3118		panic("%s: non-zero reqcnt in6m=%p", __func__, in6m);
3119		/* NOTREACHED */
3120	}
3121
3122	/* Free any pending MLDv2 state-change records */
3123	IF_DRAIN(&in6m->in6m_scq);
3124
3125	in6m->in6m_debug &= ~IFD_ALLOC;
3126	if ((in6m->in6m_debug & (IFD_DEBUG | IFD_TRASHED)) ==
3127	    (IFD_DEBUG | IFD_TRASHED)) {
3128		lck_mtx_lock(&in6m_trash_lock);
3129		TAILQ_REMOVE(&in6m_trash_head, (struct in6_multi_dbg *)in6m,
3130		    in6m_trash_link);
3131		lck_mtx_unlock(&in6m_trash_lock);
3132		in6m->in6m_debug &= ~IFD_TRASHED;
3133	}
3134	IN6M_UNLOCK(in6m);
3135
3136	lck_mtx_destroy(&in6m->in6m_lock, in6_multihead_lock_grp);
3137	zfree(in6m_zone, in6m);
3138}
3139
3140static void
3141in6_multi_attach(struct in6_multi *in6m)
3142{
3143	in6_multihead_lock_assert(LCK_RW_ASSERT_EXCLUSIVE);
3144	IN6M_LOCK_ASSERT_HELD(in6m);
3145
3146	if (in6m->in6m_debug & IFD_ATTACHED) {
3147		panic("%s: Attempt to attach an already attached in6m=%p",
3148		    __func__, in6m);
3149		/* NOTREACHED */
3150	} else if (in6m->in6m_debug & IFD_TRASHED) {
3151		panic("%s: Attempt to reattach a detached in6m=%p",
3152		    __func__, in6m);
3153		/* NOTREACHED */
3154	}
3155
3156	in6m->in6m_reqcnt++;
3157	VERIFY(in6m->in6m_reqcnt == 1);
3158	IN6M_ADDREF_LOCKED(in6m);
3159	in6m->in6m_debug |= IFD_ATTACHED;
3160	/*
3161	 * Reattach case:  If debugging is enabled, take it
3162	 * out of the trash list and clear IFD_TRASHED.
3163	 */
3164	if ((in6m->in6m_debug & (IFD_DEBUG | IFD_TRASHED)) ==
3165	    (IFD_DEBUG | IFD_TRASHED)) {
3166		/* Become a regular mutex, just in case */
3167		IN6M_CONVERT_LOCK(in6m);
3168		lck_mtx_lock(&in6m_trash_lock);
3169		TAILQ_REMOVE(&in6m_trash_head, (struct in6_multi_dbg *)in6m,
3170		    in6m_trash_link);
3171		lck_mtx_unlock(&in6m_trash_lock);
3172		in6m->in6m_debug &= ~IFD_TRASHED;
3173	}
3174
3175	LIST_INSERT_HEAD(&in6_multihead, in6m, in6m_entry);
3176}
3177
3178int
3179in6_multi_detach(struct in6_multi *in6m)
3180{
3181	in6_multihead_lock_assert(LCK_RW_ASSERT_EXCLUSIVE);
3182	IN6M_LOCK_ASSERT_HELD(in6m);
3183
3184	if (in6m->in6m_reqcnt == 0) {
3185		panic("%s: in6m=%p negative reqcnt", __func__, in6m);
3186		/* NOTREACHED */
3187	}
3188
3189	--in6m->in6m_reqcnt;
3190	if (in6m->in6m_reqcnt > 0)
3191		return (0);
3192
3193	if (!(in6m->in6m_debug & IFD_ATTACHED)) {
3194		panic("%s: Attempt to detach an unattached record in6m=%p",
3195		    __func__, in6m);
3196		/* NOTREACHED */
3197	} else if (in6m->in6m_debug & IFD_TRASHED) {
3198		panic("%s: in6m %p is already in trash list", __func__, in6m);
3199		/* NOTREACHED */
3200	}
3201
3202	/*
3203	 * NOTE: Caller calls IFMA_REMREF
3204	 */
3205	in6m->in6m_debug &= ~IFD_ATTACHED;
3206	LIST_REMOVE(in6m, in6m_entry);
3207
3208	if (in6m->in6m_debug & IFD_DEBUG) {
3209		/* Become a regular mutex, just in case */
3210		IN6M_CONVERT_LOCK(in6m);
3211		lck_mtx_lock(&in6m_trash_lock);
3212		TAILQ_INSERT_TAIL(&in6m_trash_head,
3213		    (struct in6_multi_dbg *)in6m, in6m_trash_link);
3214		lck_mtx_unlock(&in6m_trash_lock);
3215		in6m->in6m_debug |= IFD_TRASHED;
3216	}
3217
3218	return (1);
3219}
3220
3221void
3222in6m_addref(struct in6_multi *in6m, int locked)
3223{
3224	if (!locked)
3225		IN6M_LOCK_SPIN(in6m);
3226	else
3227		IN6M_LOCK_ASSERT_HELD(in6m);
3228
3229	if (++in6m->in6m_refcount == 0) {
3230		panic("%s: in6m=%p wraparound refcnt", __func__, in6m);
3231		/* NOTREACHED */
3232	} else if (in6m->in6m_trace != NULL) {
3233		(*in6m->in6m_trace)(in6m, TRUE);
3234	}
3235	if (!locked)
3236		IN6M_UNLOCK(in6m);
3237}
3238
3239void
3240in6m_remref(struct in6_multi *in6m, int locked)
3241{
3242	struct ifmultiaddr *ifma;
3243	struct mld_ifinfo *mli;
3244
3245	if (!locked)
3246		IN6M_LOCK_SPIN(in6m);
3247	else
3248		IN6M_LOCK_ASSERT_HELD(in6m);
3249
3250	if (in6m->in6m_refcount == 0 || (in6m->in6m_refcount == 1 && locked)) {
3251		panic("%s: in6m=%p negative refcnt", __func__, in6m);
3252		/* NOTREACHED */
3253	} else if (in6m->in6m_trace != NULL) {
3254		(*in6m->in6m_trace)(in6m, FALSE);
3255	}
3256
3257	--in6m->in6m_refcount;
3258	if (in6m->in6m_refcount > 0) {
3259		if (!locked)
3260			IN6M_UNLOCK(in6m);
3261		return;
3262	}
3263
3264	/*
3265	 * Synchronization with in6_mc_get().  In the event the in6m has been
3266	 * detached, the underlying ifma would still be in the if_multiaddrs
3267	 * list, and thus can be looked up via if_addmulti().  At that point,
3268	 * the only way to find this in6m is via ifma_protospec.  To avoid
3269	 * race conditions between the last in6m_remref() of that in6m and its
3270	 * use via ifma_protospec, in6_multihead lock is used for serialization.
3271	 * In order to avoid violating the lock order, we must drop in6m_lock
3272	 * before acquiring in6_multihead lock.  To prevent the in6m from being
3273	 * freed prematurely, we hold an extra reference.
3274	 */
3275	++in6m->in6m_refcount;
3276	IN6M_UNLOCK(in6m);
3277	in6_multihead_lock_shared();
3278	IN6M_LOCK_SPIN(in6m);
3279	--in6m->in6m_refcount;
3280	if (in6m->in6m_refcount > 0) {
3281		/* We've lost the race, so abort since in6m is still in use */
3282		IN6M_UNLOCK(in6m);
3283		in6_multihead_lock_done();
3284		/* If it was locked, return it as such */
3285		if (locked)
3286			IN6M_LOCK(in6m);
3287		return;
3288	}
3289	in6m_purge(in6m);
3290	ifma = in6m->in6m_ifma;
3291	in6m->in6m_ifma = NULL;
3292	in6m->in6m_ifp = NULL;
3293	mli = in6m->in6m_mli;
3294	in6m->in6m_mli = NULL;
3295	IN6M_UNLOCK(in6m);
3296	IFMA_LOCK_SPIN(ifma);
3297	ifma->ifma_protospec = NULL;
3298	IFMA_UNLOCK(ifma);
3299	in6_multihead_lock_done();
3300
3301	in6_multi_free(in6m);
3302	if_delmulti_ifma(ifma);
3303	/* Release reference held to the underlying ifmultiaddr */
3304	IFMA_REMREF(ifma);
3305
3306	if (mli != NULL)
3307		MLI_REMREF(mli);
3308}
3309
3310static void
3311in6m_trace(struct in6_multi *in6m, int refhold)
3312{
3313	struct in6_multi_dbg *in6m_dbg = (struct in6_multi_dbg *)in6m;
3314	ctrace_t *tr;
3315	u_int32_t idx;
3316	u_int16_t *cnt;
3317
3318	if (!(in6m->in6m_debug & IFD_DEBUG)) {
3319		panic("%s: in6m %p has no debug structure", __func__, in6m);
3320		/* NOTREACHED */
3321	}
3322	if (refhold) {
3323		cnt = &in6m_dbg->in6m_refhold_cnt;
3324		tr = in6m_dbg->in6m_refhold;
3325	} else {
3326		cnt = &in6m_dbg->in6m_refrele_cnt;
3327		tr = in6m_dbg->in6m_refrele;
3328	}
3329
3330	idx = atomic_add_16_ov(cnt, 1) % IN6M_TRACE_HIST_SIZE;
3331	ctrace_record(&tr[idx]);
3332}
3333
3334static struct in6_multi_mship *
3335in6_multi_mship_alloc(int how)
3336{
3337	struct in6_multi_mship *imm;
3338
3339	imm = (how == M_WAITOK) ? zalloc(imm_zone) : zalloc_noblock(imm_zone);
3340	if (imm != NULL)
3341		bzero(imm, imm_size);
3342
3343	return (imm);
3344}
3345
3346static void
3347in6_multi_mship_free(struct in6_multi_mship *imm)
3348{
3349	if (imm->i6mm_maddr != NULL) {
3350		panic("%s: i6mm_maddr not NULL for imm=%p", __func__, imm);
3351		/* NOTREACHED */
3352	}
3353	zfree(imm_zone, imm);
3354}
3355
3356void
3357in6_multihead_lock_exclusive(void)
3358{
3359	lck_rw_lock_exclusive(&in6_multihead_lock);
3360}
3361
3362void
3363in6_multihead_lock_shared(void)
3364{
3365	lck_rw_lock_shared(&in6_multihead_lock);
3366}
3367
3368void
3369in6_multihead_lock_assert(int what)
3370{
3371	lck_rw_assert(&in6_multihead_lock, what);
3372}
3373
3374void
3375in6_multihead_lock_done(void)
3376{
3377	lck_rw_done(&in6_multihead_lock);
3378}
3379
3380static struct ip6_msource *
3381ip6ms_alloc(int how)
3382{
3383	struct ip6_msource *i6ms;
3384
3385	i6ms = (how == M_WAITOK) ? zalloc(ip6ms_zone) :
3386	    zalloc_noblock(ip6ms_zone);
3387	if (i6ms != NULL)
3388		bzero(i6ms, ip6ms_size);
3389
3390	return (i6ms);
3391}
3392
3393static void
3394ip6ms_free(struct ip6_msource *i6ms)
3395{
3396	zfree(ip6ms_zone, i6ms);
3397}
3398
3399static struct in6_msource *
3400in6ms_alloc(int how)
3401{
3402	struct in6_msource *in6ms;
3403
3404	in6ms = (how == M_WAITOK) ? zalloc(in6ms_zone) :
3405	    zalloc_noblock(in6ms_zone);
3406	if (in6ms != NULL)
3407		bzero(in6ms, in6ms_size);
3408
3409	return (in6ms);
3410}
3411
3412static void
3413in6ms_free(struct in6_msource *in6ms)
3414{
3415	zfree(in6ms_zone, in6ms);
3416}
3417
3418#ifdef MLD_DEBUG
3419
3420static const char *in6m_modestrs[] = { "un\n", "in", "ex" };
3421
3422static const char *
3423in6m_mode_str(const int mode)
3424{
3425	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
3426		return (in6m_modestrs[mode]);
3427	return ("??");
3428}
3429
3430static const char *in6m_statestrs[] = {
3431	"not-member\n",
3432	"silent\n",
3433	"idle\n",
3434	"lazy\n",
3435	"sleeping\n",
3436	"awakening\n",
3437	"query-pending\n",
3438	"sg-query-pending\n",
3439	"leaving"
3440};
3441
3442static const char *
3443in6m_state_str(const int state)
3444{
3445	if (state >= MLD_NOT_MEMBER && state <= MLD_LEAVING_MEMBER)
3446		return (in6m_statestrs[state]);
3447	return ("??");
3448}
3449
3450/*
3451 * Dump an in6_multi structure to the console.
3452 */
3453void
3454in6m_print(const struct in6_multi *inm)
3455{
3456	int t;
3457
3458	IN6M_LOCK_ASSERT_HELD(IN6M_CAST_TO_NONCONST(inm));
3459
3460	if (mld_debug == 0)
3461		return;
3462
3463	printf("%s: --- begin in6m %p ---\n", __func__, inm);
3464	printf("addr %s ifp %p(%s%d) ifma %p\n",
3465	    ip6_sprintf(&inm->in6m_addr),
3466	    inm->in6m_ifp,
3467	    inm->in6m_ifp->if_name,
3468	    inm->in6m_ifp->if_unit,
3469	    inm->in6m_ifma);
3470	printf("timer %u state %s refcount %u scq.len %u\n",
3471	    inm->in6m_timer,
3472	    in6m_state_str(inm->in6m_state),
3473	    inm->in6m_refcount,
3474	    inm->in6m_scq.ifq_len);
3475	printf("mli %p nsrc %lu sctimer %u scrv %u\n",
3476	    inm->in6m_mli,
3477	    inm->in6m_nsrc,
3478	    inm->in6m_sctimer,
3479	    inm->in6m_scrv);
3480	for (t = 0; t < 2; t++) {
3481		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
3482		    in6m_mode_str(inm->in6m_st[t].iss_fmode),
3483		    inm->in6m_st[t].iss_asm,
3484		    inm->in6m_st[t].iss_ex,
3485		    inm->in6m_st[t].iss_in,
3486		    inm->in6m_st[t].iss_rec);
3487	}
3488	printf("%s: --- end in6m %p ---\n", __func__, inm);
3489}
3490
3491#else
3492
3493void
3494in6m_print(__unused const struct in6_multi *inm)
3495{
3496
3497}
3498
3499#endif
3500