ip_multi.c revision 6827:f0d3566dbbf4
156698Sjasone/*
263364Sjasone * CDDL HEADER START
356698Sjasone *
456698Sjasone * The contents of this file are subject to the terms of the
556698Sjasone * Common Development and Distribution License (the "License").
656698Sjasone * You may not use this file except in compliance with the License.
756698Sjasone *
856698Sjasone * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
956698Sjasone * or http://www.opensolaris.org/os/licensing.
1056698Sjasone * See the License for the specific language governing permissions
1156698Sjasone * and limitations under the License.
1256698Sjasone *
1356698Sjasone * When distributing Covered Code, include this CDDL HEADER in each
1456698Sjasone * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1556698Sjasone * If applicable, add the following below this CDDL HEADER, with the
1656698Sjasone * fields enclosed by brackets "[]" replaced with your own identifying
1756698Sjasone * information: Portions Copyright [yyyy] [name of copyright owner]
1856698Sjasone *
1956698Sjasone * CDDL HEADER END
2056698Sjasone */
2156698Sjasone/*
2256698Sjasone * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
2356698Sjasone * Use is subject to license terms.
2456698Sjasone */
2556698Sjasone/* Copyright (c) 1990 Mentat Inc. */
2656698Sjasone
2756698Sjasone#pragma ident	"%Z%%M%	%I%	%E% SMI"
2856698Sjasone
2956698Sjasone#include <sys/types.h>
3056698Sjasone#include <sys/stream.h>
3156698Sjasone#include <sys/dlpi.h>
3256698Sjasone#include <sys/stropts.h>
3356698Sjasone#include <sys/strsun.h>
34103388Smini#include <sys/ddi.h>
3556698Sjasone#include <sys/cmn_err.h>
3675369Sdeischen#include <sys/sdt.h>
3771581Sdeischen#include <sys/zone.h>
3856698Sjasone
3956698Sjasone#include <sys/param.h>
4056698Sjasone#include <sys/socket.h>
4156698Sjasone#include <sys/sockio.h>
4256698Sjasone#include <net/if.h>
4356698Sjasone#include <sys/systm.h>
4456698Sjasone#include <sys/strsubr.h>
4571581Sdeischen#include <net/route.h>
4656698Sjasone#include <netinet/in.h>
4771581Sdeischen#include <net/if_dl.h>
4856698Sjasone#include <netinet/ip6.h>
4956698Sjasone#include <netinet/icmp6.h>
5056698Sjasone
51#include <inet/common.h>
52#include <inet/mi.h>
53#include <inet/nd.h>
54#include <inet/arp.h>
55#include <inet/ip.h>
56#include <inet/ip6.h>
57#include <inet/ip_if.h>
58#include <inet/ip_ndp.h>
59#include <inet/ip_multi.h>
60#include <inet/ipclassifier.h>
61#include <inet/ipsec_impl.h>
62#include <inet/sctp_ip.h>
63#include <inet/ip_listutils.h>
64#include <inet/udp_impl.h>
65
66/* igmpv3/mldv2 source filter manipulation */
67static void	ilm_bld_flists(conn_t *conn, void *arg);
68static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
69    slist_t *flist);
70
71static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
72    ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
73    int orig_ifindex, zoneid_t zoneid);
74static void	ilm_delete(ilm_t *ilm);
75static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
76static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
77static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
78    const in6_addr_t *v6group, int index);
79static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
80    ipif_t *ipif);
81static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
82    mcast_record_t fmode, ipaddr_t src);
83static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
84    mcast_record_t fmode, const in6_addr_t *v6src);
85static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
86static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
87    uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
88static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
89    uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
90static void	conn_ilg_reap(conn_t *connp);
91static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
92    ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
93static int	ip_opt_delete_group_excl_v6(conn_t *connp,
94    const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
95    const in6_addr_t *v6src);
96
97/*
98 * MT notes:
99 *
100 * Multicast joins operate on both the ilg and ilm structures. Multiple
101 * threads operating on an conn (socket) trying to do multicast joins
102 * need to synchronize  when operating on the ilg. Multiple threads
103 * potentially operating on different conn (socket endpoints) trying to
104 * do multicast joins could eventually end up trying to manipulate the
105 * ilm simulatenously and need to synchronize on the access to the ilm.
106 * Both are amenable to standard Solaris MT techniques, but it would be
107 * complex to handle a failover or failback which needs to manipulate
108 * ilg/ilms if an applications can also simultaenously join/leave
109 * multicast groups. Hence multicast join/leave also go through the ipsq_t
110 * serialization.
111 *
112 * Multicast joins and leaves are single-threaded per phyint/IPMP group
113 * using the ipsq serialization mechanism.
114 *
115 * An ilm is an IP data structure used to track multicast join/leave.
116 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
117 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
118 * referencing the ilm. ilms are created / destroyed only as writer. ilms
119 * are not passed around, instead they are looked up and used under the
120 * ill_lock or as writer. So we don't need a dynamic refcount of the number
121 * of threads holding reference to an ilm.
122 *
123 * Multicast Join operation:
124 *
125 * The first step is to determine the ipif (v4) or ill (v6) on which
126 * the join operation is to be done. The join is done after becoming
127 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
128 * and ill->ill_ilm are thus accessed and modified exclusively per ill.
129 * Multiple threads can attempt to join simultaneously on different ipif/ill
130 * on the same conn. In this case the ipsq serialization does not help in
131 * protecting the ilg. It is the conn_lock that is used to protect the ilg.
132 * The conn_lock also protects all the ilg_t members.
133 *
134 * Leave operation.
135 *
136 * Similar to the join operation, the first step is to determine the ipif
137 * or ill (v6) on which the leave operation is to be done. The leave operation
138 * is done after becoming exclusive on the ipsq associated with the ipif or ill.
139 * As with join ilg modification is done under the protection of the conn lock.
140 */
141
142#define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
143	ASSERT(connp != NULL);					\
144	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
145	    (first_mp), (func), (type), B_TRUE);		\
146	if ((ipsq) == NULL) {					\
147		ipif_refrele(ipif);				\
148		return (EINPROGRESS);				\
149	}
150
151#define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
152	ASSERT(connp != NULL);					\
153	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
154	    (first_mp),	(func), (type), B_TRUE);		\
155	if ((ipsq) == NULL) {					\
156		ill_refrele(ill);				\
157		return (EINPROGRESS);				\
158	}
159
160#define	IPSQ_EXIT(ipsq)	\
161	if (ipsq != NULL)	\
162		ipsq_exit(ipsq, B_TRUE, B_TRUE);
163
164#define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
165
166#define	ILG_WALKER_RELE(connp)				\
167	{						\
168		(connp)->conn_ilg_walker_cnt--;		\
169		if ((connp)->conn_ilg_walker_cnt == 0)	\
170			conn_ilg_reap(connp);		\
171	}
172
173static void
174conn_ilg_reap(conn_t *connp)
175{
176	int	to;
177	int	from;
178	ilg_t	*ilg;
179
180	ASSERT(MUTEX_HELD(&connp->conn_lock));
181
182	to = 0;
183	from = 0;
184	while (from < connp->conn_ilg_inuse) {
185		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
186			ilg = &connp->conn_ilg[from];
187			FREE_SLIST(ilg->ilg_filter);
188			ilg->ilg_flags &= ~ILG_DELETED;
189			from++;
190			continue;
191		}
192		if (to != from)
193			connp->conn_ilg[to] = connp->conn_ilg[from];
194		to++;
195		from++;
196	}
197
198	connp->conn_ilg_inuse = to;
199
200	if (connp->conn_ilg_inuse == 0) {
201		mi_free((char *)connp->conn_ilg);
202		connp->conn_ilg = NULL;
203		cv_broadcast(&connp->conn_refcv);
204	}
205}
206
207#define	GETSTRUCT(structure, number)	\
208	((structure *)mi_zalloc(sizeof (structure) * (number)))
209
210#define	ILG_ALLOC_CHUNK	16
211
212/*
213 * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
214 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
215 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
216 * returned ilg).  Returns NULL on failure (ENOMEM).
217 *
218 * Assumes connp->conn_lock is held.
219 */
220static ilg_t *
221conn_ilg_alloc(conn_t *connp)
222{
223	ilg_t *new, *ret;
224	int curcnt;
225
226	ASSERT(MUTEX_HELD(&connp->conn_lock));
227	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
228
229	if (connp->conn_ilg == NULL) {
230		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
231		if (connp->conn_ilg == NULL)
232			return (NULL);
233		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
234		connp->conn_ilg_inuse = 0;
235	}
236	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
237		if (connp->conn_ilg_walker_cnt != 0) {
238			/*
239			 * XXX We cannot grow the array at this point
240			 * because a list walker could be in progress, and
241			 * we cannot wipe out the existing array until the
242			 * walker is done. Just return NULL for now.
243			 * ilg_delete_all() will have to be changed when
244			 * this logic is changed.
245			 */
246			return (NULL);
247		}
248		curcnt = connp->conn_ilg_allocated;
249		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
250		if (new == NULL)
251			return (NULL);
252		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
253		mi_free((char *)connp->conn_ilg);
254		connp->conn_ilg = new;
255		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
256	}
257
258	ret = &connp->conn_ilg[connp->conn_ilg_inuse++];
259	ASSERT((ret->ilg_flags & ILG_DELETED) == 0);
260	bzero(ret, sizeof (*ret));
261	return (ret);
262}
263
264typedef struct ilm_fbld_s {
265	ilm_t		*fbld_ilm;
266	int		fbld_in_cnt;
267	int		fbld_ex_cnt;
268	slist_t		fbld_in;
269	slist_t		fbld_ex;
270	boolean_t	fbld_in_overflow;
271} ilm_fbld_t;
272
273static void
274ilm_bld_flists(conn_t *conn, void *arg)
275{
276	int i;
277	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
278	ilm_t *ilm = fbld->fbld_ilm;
279	in6_addr_t *v6group = &ilm->ilm_v6addr;
280
281	if (conn->conn_ilg_inuse == 0)
282		return;
283
284	/*
285	 * Since we can't break out of the ipcl_walk once started, we still
286	 * have to look at every conn.  But if we've already found one
287	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
288	 * ilgs--that will be our state.
289	 */
290	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
291		return;
292
293	/*
294	 * Check this conn's ilgs to see if any are interested in our
295	 * ilm (group, interface match).  If so, update the master
296	 * include and exclude lists we're building in the fbld struct
297	 * with this ilg's filter info.
298	 */
299	mutex_enter(&conn->conn_lock);
300	for (i = 0; i < conn->conn_ilg_inuse; i++) {
301		ilg_t *ilg = &conn->conn_ilg[i];
302		if ((ilg->ilg_ill == ilm->ilm_ill) &&
303		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
304		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
305			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
306				fbld->fbld_in_cnt++;
307				if (!fbld->fbld_in_overflow)
308					l_union_in_a(&fbld->fbld_in,
309					    ilg->ilg_filter,
310					    &fbld->fbld_in_overflow);
311			} else {
312				fbld->fbld_ex_cnt++;
313				/*
314				 * On the first exclude list, don't try to do
315				 * an intersection, as the master exclude list
316				 * is intentionally empty.  If the master list
317				 * is still empty on later iterations, that
318				 * means we have at least one ilg with an empty
319				 * exclude list, so that should be reflected
320				 * when we take the intersection.
321				 */
322				if (fbld->fbld_ex_cnt == 1) {
323					if (ilg->ilg_filter != NULL)
324						l_copy(ilg->ilg_filter,
325						    &fbld->fbld_ex);
326				} else {
327					l_intersection_in_a(&fbld->fbld_ex,
328					    ilg->ilg_filter);
329				}
330			}
331			/* there will only be one match, so break now. */
332			break;
333		}
334	}
335	mutex_exit(&conn->conn_lock);
336}
337
338static void
339ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
340{
341	ilm_fbld_t fbld;
342	ip_stack_t *ipst = ilm->ilm_ipst;
343
344	fbld.fbld_ilm = ilm;
345	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
346	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
347	fbld.fbld_in_overflow = B_FALSE;
348
349	/* first, construct our master include and exclude lists */
350	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
351
352	/* now use those master lists to generate the interface filter */
353
354	/* if include list overflowed, filter is (EXCLUDE, NULL) */
355	if (fbld.fbld_in_overflow) {
356		*fmode = MODE_IS_EXCLUDE;
357		flist->sl_numsrc = 0;
358		return;
359	}
360
361	/* if nobody interested, interface filter is (INCLUDE, NULL) */
362	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
363		*fmode = MODE_IS_INCLUDE;
364		flist->sl_numsrc = 0;
365		return;
366	}
367
368	/*
369	 * If there are no exclude lists, then the interface filter
370	 * is INCLUDE, with its filter list equal to fbld_in.  A single
371	 * exclude list makes the interface filter EXCLUDE, with its
372	 * filter list equal to (fbld_ex - fbld_in).
373	 */
374	if (fbld.fbld_ex_cnt == 0) {
375		*fmode = MODE_IS_INCLUDE;
376		l_copy(&fbld.fbld_in, flist);
377	} else {
378		*fmode = MODE_IS_EXCLUDE;
379		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
380	}
381}
382
383/*
384 * If the given interface has failed, choose a new one to join on so
385 * that we continue to receive packets.  ilg_orig_ifindex remembers
386 * what the application used to join on so that we know the ilg to
387 * delete even though we change the ill here.  Callers will store the
388 * ilg returned from this function in ilg_ill.  Thus when we receive
389 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
390 *
391 * This function must be called as writer so we can walk the group
392 * list and examine flags without holding a lock.
393 */
394ill_t *
395ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
396{
397	ill_t	*till;
398	ill_group_t *illgrp = ill->ill_group;
399
400	ASSERT(IAM_WRITER_ILL(ill));
401
402	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
403		return (ill);
404
405	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
406		return (ill);
407
408	till = illgrp->illgrp_ill;
409	while (till != NULL &&
410	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
411		till = till->ill_group_next;
412	}
413	if (till != NULL)
414		return (till);
415
416	return (ill);
417}
418
419static int
420ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
421    boolean_t isv6)
422{
423	mcast_record_t fmode;
424	slist_t *flist;
425	boolean_t fdefault;
426	char buf[INET6_ADDRSTRLEN];
427	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
428
429	/*
430	 * There are several cases where the ilm's filter state
431	 * defaults to (EXCLUDE, NULL):
432	 *	- we've had previous joins without associated ilgs
433	 *	- this join has no associated ilg
434	 *	- the ilg's filter state is (EXCLUDE, NULL)
435	 */
436	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
437	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
438
439	/* attempt mallocs (if needed) before doing anything else */
440	if ((flist = l_alloc()) == NULL)
441		return (ENOMEM);
442	if (!fdefault && ilm->ilm_filter == NULL) {
443		ilm->ilm_filter = l_alloc();
444		if (ilm->ilm_filter == NULL) {
445			l_free(flist);
446			return (ENOMEM);
447		}
448	}
449
450	if (ilgstat != ILGSTAT_CHANGE)
451		ilm->ilm_refcnt++;
452
453	if (ilgstat == ILGSTAT_NONE)
454		ilm->ilm_no_ilg_cnt++;
455
456	/*
457	 * Determine new filter state.  If it's not the default
458	 * (EXCLUDE, NULL), we must walk the conn list to find
459	 * any ilgs interested in this group, and re-build the
460	 * ilm filter.
461	 */
462	if (fdefault) {
463		fmode = MODE_IS_EXCLUDE;
464		flist->sl_numsrc = 0;
465	} else {
466		ilm_gen_filter(ilm, &fmode, flist);
467	}
468
469	/* make sure state actually changed; nothing to do if not. */
470	if ((ilm->ilm_fmode == fmode) &&
471	    !lists_are_different(ilm->ilm_filter, flist)) {
472		l_free(flist);
473		return (0);
474	}
475
476	/* send the state change report */
477	if (!IS_LOOPBACK(ill)) {
478		if (isv6)
479			mld_statechange(ilm, fmode, flist);
480		else
481			igmp_statechange(ilm, fmode, flist);
482	}
483
484	/* update the ilm state */
485	ilm->ilm_fmode = fmode;
486	if (flist->sl_numsrc > 0)
487		l_copy(flist, ilm->ilm_filter);
488	else
489		CLEAR_SLIST(ilm->ilm_filter);
490
491	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
492	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
493
494	l_free(flist);
495	return (0);
496}
497
498static int
499ilm_update_del(ilm_t *ilm, boolean_t isv6)
500{
501	mcast_record_t fmode;
502	slist_t *flist;
503	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
504
505	ip1dbg(("ilm_update_del: still %d left; updating state\n",
506	    ilm->ilm_refcnt));
507
508	if ((flist = l_alloc()) == NULL)
509		return (ENOMEM);
510
511	/*
512	 * If present, the ilg in question has already either been
513	 * updated or removed from our list; so all we need to do
514	 * now is walk the list to update the ilm filter state.
515	 *
516	 * Skip the list walk if we have any no-ilg joins, which
517	 * cause the filter state to revert to (EXCLUDE, NULL).
518	 */
519	if (ilm->ilm_no_ilg_cnt != 0) {
520		fmode = MODE_IS_EXCLUDE;
521		flist->sl_numsrc = 0;
522	} else {
523		ilm_gen_filter(ilm, &fmode, flist);
524	}
525
526	/* check to see if state needs to be updated */
527	if ((ilm->ilm_fmode == fmode) &&
528	    (!lists_are_different(ilm->ilm_filter, flist))) {
529		l_free(flist);
530		return (0);
531	}
532
533	if (!IS_LOOPBACK(ill)) {
534		if (isv6)
535			mld_statechange(ilm, fmode, flist);
536		else
537			igmp_statechange(ilm, fmode, flist);
538	}
539
540	ilm->ilm_fmode = fmode;
541	if (flist->sl_numsrc > 0) {
542		if (ilm->ilm_filter == NULL) {
543			ilm->ilm_filter = l_alloc();
544			if (ilm->ilm_filter == NULL) {
545				char buf[INET6_ADDRSTRLEN];
546				ip1dbg(("ilm_update_del: failed to alloc ilm "
547				    "filter; no source filtering for %s on %s",
548				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
549				    buf, sizeof (buf)), ill->ill_name));
550				ilm->ilm_fmode = MODE_IS_EXCLUDE;
551				l_free(flist);
552				return (0);
553			}
554		}
555		l_copy(flist, ilm->ilm_filter);
556	} else {
557		CLEAR_SLIST(ilm->ilm_filter);
558	}
559
560	l_free(flist);
561	return (0);
562}
563
564/*
565 * INADDR_ANY means all multicast addresses. This is only used
566 * by the multicast router.
567 * INADDR_ANY is stored as IPv6 unspecified addr.
568 */
569int
570ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
571    mcast_record_t ilg_fmode, slist_t *ilg_flist)
572{
573	ill_t	*ill = ipif->ipif_ill;
574	ilm_t 	*ilm;
575	in6_addr_t v6group;
576	int	ret;
577
578	ASSERT(IAM_WRITER_IPIF(ipif));
579
580	if (!CLASSD(group) && group != INADDR_ANY)
581		return (EINVAL);
582
583	/*
584	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
585	 */
586	if (group == INADDR_ANY)
587		v6group = ipv6_all_zeros;
588	else
589		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
590
591	mutex_enter(&ill->ill_lock);
592	ilm = ilm_lookup_ipif(ipif, group);
593	mutex_exit(&ill->ill_lock);
594	/*
595	 * Since we are writer, we know the ilm_flags itself cannot
596	 * change at this point, and ilm_lookup_ipif would not have
597	 * returned a DELETED ilm. However, the data path can free
598	 * ilm->next via ilm_walker_cleanup() so we can safely
599	 * access anything in ilm except ilm_next (for safe access to
600	 * ilm_next we'd have  to take the ill_lock).
601	 */
602	if (ilm != NULL)
603		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
604
605	/*
606	 * ilms are associated with ipifs in IPv4. It moves with the
607	 * ipif if the ipif moves to a new ill when the interface
608	 * fails. Thus we really don't check whether the ipif_ill
609	 * has failed like in IPv6. If it has FAILED the ipif
610	 * will move (daemon will move it) and hence the ilm, if the
611	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
612	 * we continue to receive in the same place even if the
613	 * interface fails.
614	 */
615	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
616	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
617	if (ilm == NULL)
618		return (ENOMEM);
619
620	if (group == INADDR_ANY) {
621		/*
622		 * Check how many ipif's have members in this group -
623		 * if more then one we should not tell the driver to join
624		 * this time
625		 */
626		if (ilm_numentries_v6(ill, &v6group) > 1)
627			return (0);
628		if (ill->ill_group == NULL)
629			ret = ip_join_allmulti(ipif);
630		else
631			ret = ill_nominate_mcast_rcv(ill->ill_group);
632		if (ret != 0)
633			ilm_delete(ilm);
634		return (ret);
635	}
636
637	if (!IS_LOOPBACK(ill))
638		igmp_joingroup(ilm);
639
640	if (ilm_numentries_v6(ill, &v6group) > 1)
641		return (0);
642
643	ret = ip_ll_addmulti_v6(ipif, &v6group);
644	if (ret != 0)
645		ilm_delete(ilm);
646	return (ret);
647}
648
649/*
650 * The unspecified address means all multicast addresses.
651 * This is only used by the multicast router.
652 *
653 * ill identifies the interface to join on; it may not match the
654 * interface requested by the application of a failover has taken
655 * place.  orig_ifindex always identifies the interface requested
656 * by the app.
657 *
658 * ilgstat tells us if there's an ilg associated with this join,
659 * and if so, if it's a new ilg or a change to an existing one.
660 * ilg_fmode and ilg_flist give us the current filter state of
661 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
662 */
663int
664ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
665    zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
666    slist_t *ilg_flist)
667{
668	ilm_t	*ilm;
669	int	ret;
670
671	ASSERT(IAM_WRITER_ILL(ill));
672
673	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
674	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
675		return (EINVAL);
676	}
677
678	/*
679	 * An ilm is uniquely identified by the tuple of (group, ill,
680	 * orig_ill).  group is the multicast group address, ill is
681	 * the interface on which it is currently joined, and orig_ill
682	 * is the interface on which the application requested the
683	 * join.  orig_ill and ill are the same unless orig_ill has
684	 * failed over.
685	 *
686	 * Both orig_ill and ill are required, which means we may have
687	 * 2 ilms on an ill for the same group, but with different
688	 * orig_ills.  These must be kept separate, so that when failback
689	 * occurs, the appropriate ilms are moved back to their orig_ill
690	 * without disrupting memberships on the ill to which they had
691	 * been moved.
692	 *
693	 * In order to track orig_ill, we store orig_ifindex in the
694	 * ilm and ilg.
695	 */
696	mutex_enter(&ill->ill_lock);
697	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
698	mutex_exit(&ill->ill_lock);
699	if (ilm != NULL)
700		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
701
702	/*
703	 * We need to remember where the application really wanted
704	 * to join. This will be used later if we want to failback
705	 * to the original interface.
706	 */
707	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
708	    ilg_flist, orig_ifindex, zoneid);
709	if (ilm == NULL)
710		return (ENOMEM);
711
712	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
713		/*
714		 * Check how many ipif's that have members in this group -
715		 * if more then one we should not tell the driver to join
716		 * this time
717		 */
718		if (ilm_numentries_v6(ill, v6group) > 1)
719			return (0);
720		if (ill->ill_group == NULL)
721			ret = ip_join_allmulti(ill->ill_ipif);
722		else
723			ret = ill_nominate_mcast_rcv(ill->ill_group);
724
725		if (ret != 0)
726			ilm_delete(ilm);
727		return (ret);
728	}
729
730	if (!IS_LOOPBACK(ill))
731		mld_joingroup(ilm);
732
733	/*
734	 * If we have more then one we should not tell the driver
735	 * to join this time.
736	 */
737	if (ilm_numentries_v6(ill, v6group) > 1)
738		return (0);
739
740	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
741	if (ret != 0)
742		ilm_delete(ilm);
743	return (ret);
744}
745
746/*
747 * Send a multicast request to the driver for enabling multicast reception
748 * for v6groupp address. The caller has already checked whether it is
749 * appropriate to send one or not.
750 */
751int
752ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
753{
754	mblk_t	*mp;
755	uint32_t addrlen, addroff;
756	char	group_buf[INET6_ADDRSTRLEN];
757
758	ASSERT(IAM_WRITER_ILL(ill));
759
760	/*
761	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
762	 * on.
763	 */
764	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
765	    &addrlen, &addroff);
766	if (!mp)
767		return (ENOMEM);
768	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
769		ipaddr_t v4group;
770
771		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
772		/*
773		 * NOTE!!!
774		 * The "addroff" passed in here was calculated by
775		 * ill_create_dl(), and will be used by ill_create_squery()
776		 * to perform some twisted coding magic. It is the offset
777		 * into the dl_xxx_req of the hw addr. Here, it will be
778		 * added to b_wptr - b_rptr to create a magic number that
779		 * is not an offset into this squery mblk.
780		 * The actual hardware address will be accessed only in the
781		 * dl_xxx_req, not in the squery. More importantly,
782		 * that hardware address can *only* be accessed in this
783		 * mblk chain by calling mi_offset_param_c(), which uses
784		 * the magic number in the squery hw offset field to go
785		 * to the *next* mblk (the dl_xxx_req), subtract the
786		 * (b_wptr - b_rptr), and find the actual offset into
787		 * the dl_xxx_req.
788		 * Any method that depends on using the
789		 * offset field in the dl_disabmulti_req or squery
790		 * to find either hardware address will similarly fail.
791		 *
792		 * Look in ar_entry_squery() in arp.c to see how this offset
793		 * is used.
794		 */
795		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
796		if (!mp)
797			return (ENOMEM);
798		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
799		    inet_ntop(AF_INET6, v6groupp, group_buf,
800		    sizeof (group_buf)),
801		    ill->ill_name));
802		putnext(ill->ill_rq, mp);
803	} else {
804		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on"
805		    " %s\n",
806		    inet_ntop(AF_INET6, v6groupp, group_buf,
807		    sizeof (group_buf)),
808		    ill->ill_name));
809		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
810	}
811	return (0);
812}
813
814/*
815 * Send a multicast request to the driver for enabling multicast
816 * membership for v6group if appropriate.
817 */
818static int
819ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
820{
821	ill_t	*ill = ipif->ipif_ill;
822
823	ASSERT(IAM_WRITER_IPIF(ipif));
824
825	if (ill->ill_net_type != IRE_IF_RESOLVER ||
826	    ipif->ipif_flags & IPIF_POINTOPOINT) {
827		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
828		return (0);	/* Must be IRE_IF_NORESOLVER */
829	}
830
831	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
832		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
833		return (0);
834	}
835	if (!ill->ill_dl_up) {
836		/*
837		 * Nobody there. All multicast addresses will be re-joined
838		 * when we get the DL_BIND_ACK bringing the interface up.
839		 */
840		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
841		return (0);
842	}
843	return (ip_ll_send_enabmulti_req(ill, v6groupp));
844}
845
846/*
847 * INADDR_ANY means all multicast addresses. This is only used
848 * by the multicast router.
849 * INADDR_ANY is stored as the IPv6 unspecifed addr.
850 */
851int
852ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
853{
854	ill_t	*ill = ipif->ipif_ill;
855	ilm_t *ilm;
856	in6_addr_t v6group;
857	int	ret;
858
859	ASSERT(IAM_WRITER_IPIF(ipif));
860
861	if (!CLASSD(group) && group != INADDR_ANY)
862		return (EINVAL);
863
864	/*
865	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
866	 */
867	if (group == INADDR_ANY)
868		v6group = ipv6_all_zeros;
869	else
870		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
871
872	/*
873	 * Look for a match on the ipif.
874	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
875	 */
876	mutex_enter(&ill->ill_lock);
877	ilm = ilm_lookup_ipif(ipif, group);
878	mutex_exit(&ill->ill_lock);
879	if (ilm == NULL)
880		return (ENOENT);
881
882	/* Update counters */
883	if (no_ilg)
884		ilm->ilm_no_ilg_cnt--;
885
886	if (leaving)
887		ilm->ilm_refcnt--;
888
889	if (ilm->ilm_refcnt > 0)
890		return (ilm_update_del(ilm, B_FALSE));
891
892	if (group == INADDR_ANY) {
893		ilm_delete(ilm);
894		/*
895		 * Check how many ipif's that have members in this group -
896		 * if there are still some left then don't tell the driver
897		 * to drop it.
898		 */
899		if (ilm_numentries_v6(ill, &v6group) != 0)
900			return (0);
901
902		/*
903		 * If we never joined, then don't leave.  This can happen
904		 * if we're in an IPMP group, since only one ill per IPMP
905		 * group receives all multicast packets.
906		 */
907		if (!ill->ill_join_allmulti) {
908			ASSERT(ill->ill_group != NULL);
909			return (0);
910		}
911
912		ret = ip_leave_allmulti(ipif);
913		if (ill->ill_group != NULL)
914			(void) ill_nominate_mcast_rcv(ill->ill_group);
915		return (ret);
916	}
917
918	if (!IS_LOOPBACK(ill))
919		igmp_leavegroup(ilm);
920
921	ilm_delete(ilm);
922	/*
923	 * Check how many ipif's that have members in this group -
924	 * if there are still some left then don't tell the driver
925	 * to drop it.
926	 */
927	if (ilm_numentries_v6(ill, &v6group) != 0)
928		return (0);
929	return (ip_ll_delmulti_v6(ipif, &v6group));
930}
931
932/*
933 * The unspecified address means all multicast addresses.
934 * This is only used by the multicast router.
935 */
936int
937ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
938    zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
939{
940	ipif_t	*ipif;
941	ilm_t *ilm;
942	int	ret;
943
944	ASSERT(IAM_WRITER_ILL(ill));
945
946	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
947	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
948		return (EINVAL);
949
950	/*
951	 * Look for a match on the ill.
952	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
953	 *
954	 * Similar to ip_addmulti_v6, we should always look using
955	 * the orig_ifindex.
956	 *
957	 * 1) If orig_ifindex is different from ill's ifindex
958	 *    we should have an ilm with orig_ifindex created in
959	 *    ip_addmulti_v6. We should delete that here.
960	 *
961	 * 2) If orig_ifindex is same as ill's ifindex, we should
962	 *    not delete the ilm that is temporarily here because of
963	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
964	 *    different from ill's ifindex.
965	 *
966	 * Thus, always lookup using orig_ifindex.
967	 */
968	mutex_enter(&ill->ill_lock);
969	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
970	mutex_exit(&ill->ill_lock);
971	if (ilm == NULL)
972		return (ENOENT);
973
974	ASSERT(ilm->ilm_ill == ill);
975
976	ipif = ill->ill_ipif;
977
978	/* Update counters */
979	if (no_ilg)
980		ilm->ilm_no_ilg_cnt--;
981
982	if (leaving)
983		ilm->ilm_refcnt--;
984
985	if (ilm->ilm_refcnt > 0)
986		return (ilm_update_del(ilm, B_TRUE));
987
988	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
989		ilm_delete(ilm);
990		/*
991		 * Check how many ipif's that have members in this group -
992		 * if there are still some left then don't tell the driver
993		 * to drop it.
994		 */
995		if (ilm_numentries_v6(ill, v6group) != 0)
996			return (0);
997
998		/*
999		 * If we never joined, then don't leave.  This can happen
1000		 * if we're in an IPMP group, since only one ill per IPMP
1001		 * group receives all multicast packets.
1002		 */
1003		if (!ill->ill_join_allmulti) {
1004			ASSERT(ill->ill_group != NULL);
1005			return (0);
1006		}
1007
1008		ret = ip_leave_allmulti(ipif);
1009		if (ill->ill_group != NULL)
1010			(void) ill_nominate_mcast_rcv(ill->ill_group);
1011		return (ret);
1012	}
1013
1014	if (!IS_LOOPBACK(ill))
1015		mld_leavegroup(ilm);
1016
1017	ilm_delete(ilm);
1018	/*
1019	 * Check how many ipif's that have members in this group -
1020	 * if there are still some left then don't tell the driver
1021	 * to drop it.
1022	 */
1023	if (ilm_numentries_v6(ill, v6group) != 0)
1024		return (0);
1025	return (ip_ll_delmulti_v6(ipif, v6group));
1026}
1027
1028/*
1029 * Send a multicast request to the driver for disabling multicast reception
1030 * for v6groupp address. The caller has already checked whether it is
1031 * appropriate to send one or not.
1032 */
1033int
1034ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
1035{
1036	mblk_t	*mp;
1037	char	group_buf[INET6_ADDRSTRLEN];
1038	uint32_t	addrlen, addroff;
1039
1040	ASSERT(IAM_WRITER_ILL(ill));
1041	/*
1042	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
1043	 * on.
1044	 */
1045	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
1046	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
1047
1048	if (!mp)
1049		return (ENOMEM);
1050
1051	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
1052		ipaddr_t v4group;
1053
1054		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
1055		/*
1056		 * NOTE!!!
1057		 * The "addroff" passed in here was calculated by
1058		 * ill_create_dl(), and will be used by ill_create_squery()
1059		 * to perform some twisted coding magic. It is the offset
1060		 * into the dl_xxx_req of the hw addr. Here, it will be
1061		 * added to b_wptr - b_rptr to create a magic number that
1062		 * is not an offset into this mblk.
1063		 *
1064		 * Please see the comment in ip_ll_send)enabmulti_req()
1065		 * for a complete explanation.
1066		 *
1067		 * Look in ar_entry_squery() in arp.c to see how this offset
1068		 * is used.
1069		 */
1070		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
1071		if (!mp)
1072			return (ENOMEM);
1073		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
1074		    inet_ntop(AF_INET6, v6groupp, group_buf,
1075		    sizeof (group_buf)),
1076		    ill->ill_name));
1077		putnext(ill->ill_rq, mp);
1078	} else {
1079		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on"
1080		    " %s\n",
1081		    inet_ntop(AF_INET6, v6groupp, group_buf,
1082		    sizeof (group_buf)),
1083		    ill->ill_name));
1084		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
1085	}
1086	return (0);
1087}
1088
1089/*
1090 * Send a multicast request to the driver for disabling multicast
1091 * membership for v6group if appropriate.
1092 */
1093static int
1094ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1095{
1096	ill_t	*ill = ipif->ipif_ill;
1097
1098	ASSERT(IAM_WRITER_IPIF(ipif));
1099
1100	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1101	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1102		return (0);	/* Must be IRE_IF_NORESOLVER */
1103	}
1104	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1105		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1106		return (0);
1107	}
1108	if (!ill->ill_dl_up) {
1109		/*
1110		 * Nobody there. All multicast addresses will be re-joined
1111		 * when we get the DL_BIND_ACK bringing the interface up.
1112		 */
1113		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1114		return (0);
1115	}
1116	return (ip_ll_send_disabmulti_req(ill, v6group));
1117}
1118
1119/*
1120 * Make the driver pass up all multicast packets
1121 *
1122 * With ill groups, the caller makes sure that there is only
1123 * one ill joining the allmulti group.
1124 */
1125int
1126ip_join_allmulti(ipif_t *ipif)
1127{
1128	ill_t	*ill = ipif->ipif_ill;
1129	mblk_t	*mp;
1130	uint32_t	addrlen, addroff;
1131
1132	ASSERT(IAM_WRITER_IPIF(ipif));
1133
1134	if (!ill->ill_dl_up) {
1135		/*
1136		 * Nobody there. All multicast addresses will be re-joined
1137		 * when we get the DL_BIND_ACK bringing the interface up.
1138		 */
1139		return (0);
1140	}
1141
1142	ASSERT(!ill->ill_join_allmulti);
1143
1144	/*
1145	 * Create a DL_PROMISCON_REQ message and send it directly to
1146	 * the DLPI provider.  We don't need to do this for certain
1147	 * media types for which we never need to turn promiscuous
1148	 * mode on.
1149	 */
1150	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1151	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1152		mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1153		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1154		if (mp == NULL)
1155			return (ENOMEM);
1156		ill_dlpi_send(ill, mp);
1157	}
1158
1159	ill->ill_join_allmulti = B_TRUE;
1160	return (0);
1161}
1162
1163/*
1164 * Make the driver stop passing up all multicast packets
1165 *
1166 * With ill groups, we need to nominate some other ill as
1167 * this ipif->ipif_ill is leaving the group.
1168 */
1169int
1170ip_leave_allmulti(ipif_t *ipif)
1171{
1172	ill_t	*ill = ipif->ipif_ill;
1173	mblk_t	*mp;
1174	uint32_t	addrlen, addroff;
1175
1176	ASSERT(IAM_WRITER_IPIF(ipif));
1177
1178	if (!ill->ill_dl_up) {
1179		/*
1180		 * Nobody there. All multicast addresses will be re-joined
1181		 * when we get the DL_BIND_ACK bringing the interface up.
1182		 */
1183		return (0);
1184	}
1185
1186	ASSERT(ill->ill_join_allmulti);
1187
1188	/*
1189	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1190	 * the DLPI provider.  We don't need to do this for certain
1191	 * media types for which we never need to turn promiscuous
1192	 * mode on.
1193	 */
1194	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1195	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1196		mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1197		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1198		if (mp == NULL)
1199			return (ENOMEM);
1200		ill_dlpi_send(ill, mp);
1201	}
1202
1203	ill->ill_join_allmulti = B_FALSE;
1204	return (0);
1205}
1206
1207/*
1208 * Copy mp_orig and pass it in as a local message.
1209 */
1210void
1211ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1212    zoneid_t zoneid)
1213{
1214	mblk_t	*mp;
1215	mblk_t	*ipsec_mp;
1216	ipha_t	*iph;
1217	ip_stack_t *ipst = ill->ill_ipst;
1218
1219	if (DB_TYPE(mp_orig) == M_DATA &&
1220	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1221		uint_t hdrsz;
1222
1223		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1224		    sizeof (udpha_t);
1225		ASSERT(MBLKL(mp_orig) >= hdrsz);
1226
1227		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1228		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1229			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1230			mp->b_wptr += hdrsz;
1231			mp->b_cont = mp_orig;
1232			mp_orig->b_rptr += hdrsz;
1233			if (is_system_labeled() && DB_CRED(mp_orig) != NULL)
1234				mblk_setcred(mp, DB_CRED(mp_orig));
1235			if (MBLKL(mp_orig) == 0) {
1236				mp->b_cont = mp_orig->b_cont;
1237				mp_orig->b_cont = NULL;
1238				freeb(mp_orig);
1239			}
1240		} else if (mp != NULL) {
1241			freeb(mp);
1242			mp = NULL;
1243		}
1244	} else {
1245		mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */
1246	}
1247
1248	if (mp == NULL)
1249		return;
1250	if (DB_TYPE(mp) == M_CTL) {
1251		ipsec_mp = mp;
1252		mp = mp->b_cont;
1253	} else {
1254		ipsec_mp = mp;
1255	}
1256
1257	iph = (ipha_t *)mp->b_rptr;
1258
1259	DTRACE_PROBE4(ip4__loopback__out__start,
1260	    ill_t *, NULL, ill_t *, ill,
1261	    ipha_t *, iph, mblk_t *, ipsec_mp);
1262
1263	FW_HOOKS(ipst->ips_ip4_loopback_out_event,
1264	    ipst->ips_ipv4firewall_loopback_out,
1265	    NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst);
1266
1267	DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp);
1268
1269	if (ipsec_mp != NULL)
1270		ip_wput_local(q, ill, iph, ipsec_mp, NULL,
1271		    fanout_flags, zoneid);
1272}
1273
1274static area_t	ip_aresq_template = {
1275	AR_ENTRY_SQUERY,		/* cmd */
1276	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1277	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1278	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1279	sizeof (area_t),			/* proto addr offset */
1280	IP_ADDR_LEN,			/* proto addr_length */
1281	0,				/* proto mask offset */
1282	/* Rest is initialized when used */
1283	0,				/* flags */
1284	0,				/* hw addr offset */
1285	0,				/* hw addr length */
1286};
1287
1288static mblk_t *
1289ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1290    uint32_t addroff, mblk_t *mp_tail)
1291{
1292	mblk_t	*mp;
1293	area_t	*area;
1294
1295	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1296	    (caddr_t)&ipaddr);
1297	if (!mp) {
1298		freemsg(mp_tail);
1299		return (NULL);
1300	}
1301	area = (area_t *)mp->b_rptr;
1302	area->area_hw_addr_length = addrlen;
1303	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1304	/*
1305	 * NOTE!
1306	 *
1307	 * The area_hw_addr_offset, as can be seen, does not hold the
1308	 * actual hardware address offset. Rather, it holds the offset
1309	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1310	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1311	 * mi_offset_paramc() to find the hardware address in the
1312	 * *second* mblk (dl_xxx_req), not this mblk.
1313	 *
1314	 * Using mi_offset_paramc() is thus the *only* way to access
1315	 * the dl_xxx_hw address.
1316	 *
1317	 * The squery hw address should *not* be accessed.
1318	 *
1319	 * See ar_entry_squery() in arp.c for an example of how all this works.
1320	 */
1321
1322	mp->b_cont = mp_tail;
1323	return (mp);
1324}
1325
1326/*
1327 * Create a dlpi message with room for phys+sap. When we come back in
1328 * ip_wput_ctl() we will strip the sap for those primitives which
1329 * only need a physical address.
1330 */
1331static mblk_t *
1332ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1333    uint32_t *addr_lenp, uint32_t *addr_offp)
1334{
1335	mblk_t	*mp;
1336	uint32_t	hw_addr_length;
1337	char		*cp;
1338	uint32_t	offset;
1339	uint32_t 	size;
1340
1341	*addr_lenp = *addr_offp = 0;
1342
1343	hw_addr_length = ill->ill_phys_addr_length;
1344	if (!hw_addr_length) {
1345		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1346		return (NULL);
1347	}
1348
1349	size = length;
1350	switch (dl_primitive) {
1351	case DL_ENABMULTI_REQ:
1352	case DL_DISABMULTI_REQ:
1353		size += hw_addr_length;
1354		break;
1355	case DL_PROMISCON_REQ:
1356	case DL_PROMISCOFF_REQ:
1357		break;
1358	default:
1359		return (NULL);
1360	}
1361	mp = allocb(size, BPRI_HI);
1362	if (!mp)
1363		return (NULL);
1364	mp->b_wptr += size;
1365	mp->b_datap->db_type = M_PROTO;
1366
1367	cp = (char *)mp->b_rptr;
1368	offset = length;
1369
1370	switch (dl_primitive) {
1371	case DL_ENABMULTI_REQ: {
1372		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1373
1374		dl->dl_primitive = dl_primitive;
1375		dl->dl_addr_offset = offset;
1376		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1377		*addr_offp = offset;
1378		break;
1379	}
1380	case DL_DISABMULTI_REQ: {
1381		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1382
1383		dl->dl_primitive = dl_primitive;
1384		dl->dl_addr_offset = offset;
1385		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1386		*addr_offp = offset;
1387		break;
1388	}
1389	case DL_PROMISCON_REQ:
1390	case DL_PROMISCOFF_REQ: {
1391		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1392
1393		dl->dl_primitive = dl_primitive;
1394		dl->dl_level = DL_PROMISC_MULTI;
1395		break;
1396	}
1397	}
1398	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1399	    *addr_lenp, *addr_offp));
1400	return (mp);
1401}
1402
1403void
1404ip_wput_ctl(queue_t *q, mblk_t *mp_orig)
1405{
1406	ill_t	*ill = (ill_t *)q->q_ptr;
1407	mblk_t	*mp = mp_orig;
1408	area_t	*area = (area_t *)mp->b_rptr;
1409
1410	/* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */
1411	if (MBLKL(mp) < sizeof (area_t) || mp->b_cont == NULL ||
1412	    area->area_cmd != AR_ENTRY_SQUERY) {
1413		putnext(q, mp);
1414		return;
1415	}
1416	mp = mp->b_cont;
1417
1418	/*
1419	 * Update dl_addr_length and dl_addr_offset for primitives that
1420	 * have physical addresses as opposed to full saps
1421	 */
1422	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
1423	case DL_ENABMULTI_REQ:
1424		/* Track the state if this is the first enabmulti */
1425		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
1426			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
1427		ip1dbg(("ip_wput_ctl: ENABMULTI\n"));
1428		break;
1429	case DL_DISABMULTI_REQ:
1430		ip1dbg(("ip_wput_ctl: DISABMULTI\n"));
1431		break;
1432	default:
1433		ip1dbg(("ip_wput_ctl: default\n"));
1434		break;
1435	}
1436	freeb(mp_orig);
1437	ill_dlpi_send(ill, mp);
1438}
1439
1440/*
1441 * Rejoin any groups which have been explicitly joined by the application (we
1442 * left all explicitly joined groups as part of ill_leave_multicast() prior to
1443 * bringing the interface down).  Note that because groups can be joined and
1444 * left while an interface is down, this may not be the same set of groups
1445 * that we left in ill_leave_multicast().
1446 */
1447void
1448ill_recover_multicast(ill_t *ill)
1449{
1450	ilm_t	*ilm;
1451	char    addrbuf[INET6_ADDRSTRLEN];
1452
1453	ASSERT(IAM_WRITER_ILL(ill));
1454	ILM_WALKER_HOLD(ill);
1455	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1456		/*
1457		 * Check how many ipif's that have members in this group -
1458		 * if more then one we make sure that this entry is first
1459		 * in the list.
1460		 */
1461		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1462		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1463			continue;
1464		ip1dbg(("ill_recover_multicast: %s\n",
1465		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1466		    sizeof (addrbuf))));
1467		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1468			if (ill->ill_group == NULL) {
1469				(void) ip_join_allmulti(ill->ill_ipif);
1470			} else {
1471				/*
1472				 * We don't want to join on this ill,
1473				 * if somebody else in the group has
1474				 * already been nominated.
1475				 */
1476				(void) ill_nominate_mcast_rcv(ill->ill_group);
1477			}
1478		} else {
1479			(void) ip_ll_addmulti_v6(ill->ill_ipif,
1480			    &ilm->ilm_v6addr);
1481		}
1482	}
1483	ILM_WALKER_RELE(ill);
1484}
1485
1486/*
1487 * The opposite of ill_recover_multicast() -- leaves all multicast groups
1488 * that were explicitly joined.  Note that both these functions could be
1489 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ
1490 * and DL_ENABMULTI_REQ messages when an interface is down.
1491 */
1492void
1493ill_leave_multicast(ill_t *ill)
1494{
1495	ilm_t	*ilm;
1496	char    addrbuf[INET6_ADDRSTRLEN];
1497
1498	ASSERT(IAM_WRITER_ILL(ill));
1499	ILM_WALKER_HOLD(ill);
1500	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1501		/*
1502		 * Check how many ipif's that have members in this group -
1503		 * if more then one we make sure that this entry is first
1504		 * in the list.
1505		 */
1506		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1507		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1508			continue;
1509		ip1dbg(("ill_leave_multicast: %s\n",
1510		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1511		    sizeof (addrbuf))));
1512		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1513			(void) ip_leave_allmulti(ill->ill_ipif);
1514			/*
1515			 * If we were part of an IPMP group, then
1516			 * ill_handoff_responsibility() has already
1517			 * nominated a new member (so we don't).
1518			 */
1519			ASSERT(ill->ill_group == NULL);
1520		} else {
1521			(void) ip_ll_delmulti_v6(ill->ill_ipif,
1522			    &ilm->ilm_v6addr);
1523		}
1524	}
1525	ILM_WALKER_RELE(ill);
1526}
1527
1528/* Find an ilm for matching the ill */
1529ilm_t *
1530ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1531{
1532	in6_addr_t	v6group;
1533
1534	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1535	/*
1536	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1537	 */
1538	if (group == INADDR_ANY)
1539		v6group = ipv6_all_zeros;
1540	else
1541		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1542
1543	return (ilm_lookup_ill_v6(ill, &v6group, zoneid));
1544}
1545
1546/*
1547 * Find an ilm for matching the ill. All the ilm lookup functions
1548 * ignore ILM_DELETED ilms. These have been logically deleted, and
1549 * igmp and linklayer disable multicast have been done. Only mi_free
1550 * yet to be done. Still there in the list due to ilm_walkers. The
1551 * last walker will release it.
1552 */
1553ilm_t *
1554ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1555{
1556	ilm_t	*ilm;
1557
1558	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1559
1560	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1561		if (ilm->ilm_flags & ILM_DELETED)
1562			continue;
1563		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1564		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid))
1565			return (ilm);
1566	}
1567	return (NULL);
1568}
1569
1570ilm_t *
1571ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index,
1572    zoneid_t zoneid)
1573{
1574	ilm_t *ilm;
1575
1576	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1577
1578	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1579		if (ilm->ilm_flags & ILM_DELETED)
1580			continue;
1581		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1582		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) &&
1583		    ilm->ilm_orig_ifindex == index) {
1584			return (ilm);
1585		}
1586	}
1587	return (NULL);
1588}
1589
1590
1591/*
1592 * Found an ilm for the ipif. Only needed for IPv4 which does
1593 * ipif specific socket options.
1594 */
1595ilm_t *
1596ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1597{
1598	ill_t	*ill = ipif->ipif_ill;
1599	ilm_t	*ilm;
1600	in6_addr_t	v6group;
1601
1602	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1603	/*
1604	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1605	 */
1606	if (group == INADDR_ANY)
1607		v6group = ipv6_all_zeros;
1608	else
1609		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1610
1611	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1612		if (ilm->ilm_flags & ILM_DELETED)
1613			continue;
1614		if (ilm->ilm_ipif == ipif &&
1615		    IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
1616			return (ilm);
1617	}
1618	return (NULL);
1619}
1620
1621/*
1622 * How many members on this ill?
1623 */
1624int
1625ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1626{
1627	ilm_t	*ilm;
1628	int i = 0;
1629
1630	mutex_enter(&ill->ill_lock);
1631	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1632		if (ilm->ilm_flags & ILM_DELETED)
1633			continue;
1634		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1635			i++;
1636		}
1637	}
1638	mutex_exit(&ill->ill_lock);
1639	return (i);
1640}
1641
1642/* Caller guarantees that the group is not already on the list */
1643static ilm_t *
1644ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1645    mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex,
1646    zoneid_t zoneid)
1647{
1648	ill_t	*ill = ipif->ipif_ill;
1649	ilm_t	*ilm;
1650	ilm_t	*ilm_cur;
1651	ilm_t	**ilm_ptpn;
1652
1653	ASSERT(IAM_WRITER_IPIF(ipif));
1654
1655	ilm = GETSTRUCT(ilm_t, 1);
1656	if (ilm == NULL)
1657		return (NULL);
1658	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1659		ilm->ilm_filter = l_alloc();
1660		if (ilm->ilm_filter == NULL) {
1661			mi_free(ilm);
1662			return (NULL);
1663		}
1664	}
1665	ilm->ilm_v6addr = *v6group;
1666	ilm->ilm_refcnt = 1;
1667	ilm->ilm_zoneid = zoneid;
1668	ilm->ilm_timer = INFINITY;
1669	ilm->ilm_rtx.rtx_timer = INFINITY;
1670
1671	/*
1672	 * IPv4 Multicast groups are joined using ipif.
1673	 * IPv6 Multicast groups are joined using ill.
1674	 */
1675	if (ill->ill_isv6) {
1676		ilm->ilm_ill = ill;
1677		ilm->ilm_ipif = NULL;
1678		DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1679		    (char *), "ilm", (void *), ilm);
1680		ill->ill_ilm_cnt++;
1681	} else {
1682		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1683		ilm->ilm_ipif = ipif;
1684		ilm->ilm_ill = NULL;
1685		DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif,
1686		    (char *), "ilm", (void *), ilm);
1687		ipif->ipif_ilm_cnt++;
1688	}
1689	ASSERT(ill->ill_ipst);
1690	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1691
1692	/*
1693	 * After this if ilm moves to a new ill, we don't change
1694	 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex,
1695	 * it has been moved. Indexes don't match even when the application
1696	 * wants to join on a FAILED/INACTIVE interface because we choose
1697	 * a new interface to join in. This is considered as an implicit
1698	 * move.
1699	 */
1700	ilm->ilm_orig_ifindex = orig_ifindex;
1701
1702	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1703	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1704
1705	/*
1706	 * Grab lock to give consistent view to readers
1707	 */
1708	mutex_enter(&ill->ill_lock);
1709	/*
1710	 * All ilms in the same zone are contiguous in the ill_ilm list.
1711	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1712	 * sending duplicates up when two applications in the same zone join the
1713	 * same group on different logical interfaces.
1714	 */
1715	ilm_cur = ill->ill_ilm;
1716	ilm_ptpn = &ill->ill_ilm;
1717	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1718		ilm_ptpn = &ilm_cur->ilm_next;
1719		ilm_cur = ilm_cur->ilm_next;
1720	}
1721	ilm->ilm_next = ilm_cur;
1722	*ilm_ptpn = ilm;
1723
1724	/*
1725	 * If we have an associated ilg, use its filter state; if not,
1726	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1727	 */
1728	if (ilgstat != ILGSTAT_NONE) {
1729		if (!SLIST_IS_EMPTY(ilg_flist))
1730			l_copy(ilg_flist, ilm->ilm_filter);
1731		ilm->ilm_fmode = ilg_fmode;
1732	} else {
1733		ilm->ilm_no_ilg_cnt = 1;
1734		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1735	}
1736
1737	mutex_exit(&ill->ill_lock);
1738	return (ilm);
1739}
1740
1741void
1742ilm_inactive(ilm_t *ilm)
1743{
1744	FREE_SLIST(ilm->ilm_filter);
1745	FREE_SLIST(ilm->ilm_pendsrcs);
1746	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1747	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1748	ilm->ilm_ipst = NULL;
1749	mi_free((char *)ilm);
1750}
1751
1752void
1753ilm_walker_cleanup(ill_t *ill)
1754{
1755	ilm_t	**ilmp;
1756	ilm_t	*ilm;
1757	boolean_t need_wakeup = B_FALSE;
1758
1759	ASSERT(MUTEX_HELD(&ill->ill_lock));
1760	ASSERT(ill->ill_ilm_walker_cnt == 0);
1761
1762	ilmp = &ill->ill_ilm;
1763	while (*ilmp != NULL) {
1764		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1765			ilm = *ilmp;
1766			*ilmp = ilm->ilm_next;
1767			/*
1768			 * check if there are any pending FREE or unplumb
1769			 * operations that need to be restarted.
1770			 */
1771			if (ilm->ilm_ipif != NULL) {
1772				/*
1773				 * IPv4 ilms hold a ref on the ipif.
1774				 */
1775				DTRACE_PROBE3(ipif__decr__cnt,
1776				    (ipif_t *), ilm->ilm_ipif,
1777				    (char *), "ilm", (void *), ilm);
1778				ilm->ilm_ipif->ipif_ilm_cnt--;
1779				if (IPIF_FREE_OK(ilm->ilm_ipif))
1780					need_wakeup = B_TRUE;
1781			} else {
1782				/*
1783				 * IPv6 ilms hold a ref on the ill.
1784				 */
1785				ASSERT(ilm->ilm_ill == ill);
1786				DTRACE_PROBE3(ill__decr__cnt,
1787				    (ill_t *), ill,
1788				    (char *), "ilm", (void *), ilm);
1789				ASSERT(ill->ill_ilm_cnt > 0);
1790				ill->ill_ilm_cnt--;
1791				if (ILL_FREE_OK(ill))
1792					need_wakeup = B_TRUE;
1793			}
1794			ilm_inactive(ilm); /* frees ilm */
1795		} else {
1796			ilmp = &(*ilmp)->ilm_next;
1797		}
1798	}
1799	ill->ill_ilm_cleanup_reqd = 0;
1800	if (need_wakeup)
1801		ipif_ill_refrele_tail(ill);
1802	else
1803		mutex_exit(&ill->ill_lock);
1804}
1805
1806/*
1807 * Unlink ilm and free it.
1808 */
1809static void
1810ilm_delete(ilm_t *ilm)
1811{
1812	ill_t		*ill;
1813	ilm_t		**ilmp;
1814	boolean_t	need_wakeup;
1815
1816
1817	if (ilm->ilm_ipif != NULL) {
1818		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1819		ASSERT(ilm->ilm_ill == NULL);
1820		ill = ilm->ilm_ipif->ipif_ill;
1821		ASSERT(!ill->ill_isv6);
1822	} else {
1823		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1824		ASSERT(ilm->ilm_ipif == NULL);
1825		ill = ilm->ilm_ill;
1826		ASSERT(ill->ill_isv6);
1827	}
1828	/*
1829	 * Delete under lock protection so that readers don't stumble
1830	 * on bad ilm_next
1831	 */
1832	mutex_enter(&ill->ill_lock);
1833	if (ill->ill_ilm_walker_cnt != 0) {
1834		ilm->ilm_flags |= ILM_DELETED;
1835		ill->ill_ilm_cleanup_reqd = 1;
1836		mutex_exit(&ill->ill_lock);
1837		return;
1838	}
1839
1840	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1841				;
1842	*ilmp = ilm->ilm_next;
1843
1844	/*
1845	 * if we are the last reference to the ipif (for IPv4 ilms)
1846	 * or the ill (for IPv6 ilms), we may need to wakeup any
1847	 * pending FREE or unplumb operations.
1848	 */
1849	need_wakeup = B_FALSE;
1850	if (ilm->ilm_ipif != NULL) {
1851		DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif,
1852		    (char *), "ilm", (void *), ilm);
1853		ilm->ilm_ipif->ipif_ilm_cnt--;
1854		if (IPIF_FREE_OK(ilm->ilm_ipif))
1855			need_wakeup = B_TRUE;
1856	} else {
1857		DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1858		    (char *), "ilm", (void *), ilm);
1859		ASSERT(ill->ill_ilm_cnt > 0);
1860		ill->ill_ilm_cnt--;
1861		if (ILL_FREE_OK(ill))
1862			need_wakeup = B_TRUE;
1863	}
1864
1865	ilm_inactive(ilm); /* frees this ilm */
1866
1867	if (need_wakeup) {
1868		/* drops ill lock */
1869		ipif_ill_refrele_tail(ill);
1870	} else {
1871		mutex_exit(&ill->ill_lock);
1872	}
1873}
1874
1875
1876/*
1877 * Looks up the appropriate ipif given a v4 multicast group and interface
1878 * address.  On success, returns 0, with *ipifpp pointing to the found
1879 * struct.  On failure, returns an errno and *ipifpp is NULL.
1880 */
1881int
1882ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1883    uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1884{
1885	ipif_t *ipif;
1886	int err = 0;
1887	zoneid_t zoneid;
1888	ip_stack_t	*ipst =  connp->conn_netstack->netstack_ip;
1889
1890	if (!CLASSD(group) || CLASSD(src)) {
1891		return (EINVAL);
1892	}
1893	*ipifpp = NULL;
1894
1895	zoneid = IPCL_ZONEID(connp);
1896
1897	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1898	if (ifaddr != INADDR_ANY) {
1899		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1900		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1901		if (err != 0 && err != EINPROGRESS)
1902			err = EADDRNOTAVAIL;
1903	} else if (ifindexp != NULL && *ifindexp != 0) {
1904		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
1905		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1906	} else {
1907		ipif = ipif_lookup_group(group, zoneid, ipst);
1908		if (ipif == NULL)
1909			return (EADDRNOTAVAIL);
1910	}
1911	if (ipif == NULL)
1912		return (err);
1913
1914	*ipifpp = ipif;
1915	return (0);
1916}
1917
1918/*
1919 * Looks up the appropriate ill (or ipif if v4mapped) given an interface
1920 * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
1921 * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
1922 * an errno and *illpp and *ipifpp are undefined.
1923 */
1924int
1925ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
1926    const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
1927    mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
1928{
1929	boolean_t src_unspec;
1930	ill_t *ill = NULL;
1931	ipif_t *ipif = NULL;
1932	int err;
1933	zoneid_t zoneid = connp->conn_zoneid;
1934	queue_t *wq = CONNP_TO_WQ(connp);
1935	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1936
1937	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1938
1939	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1940		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1941			return (EINVAL);
1942		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
1943		if (src_unspec) {
1944			*v4src = INADDR_ANY;
1945		} else {
1946			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
1947		}
1948		if (!CLASSD(*v4group) || CLASSD(*v4src))
1949			return (EINVAL);
1950		*ipifpp = NULL;
1951		*isv6 = B_FALSE;
1952	} else {
1953		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1954			return (EINVAL);
1955		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1956		    IN6_IS_ADDR_MULTICAST(v6src)) {
1957			return (EINVAL);
1958		}
1959		*illpp = NULL;
1960		*isv6 = B_TRUE;
1961	}
1962
1963	if (ifindex == 0) {
1964		if (*isv6)
1965			ill = ill_lookup_group_v6(v6group, zoneid, ipst);
1966		else
1967			ipif = ipif_lookup_group(*v4group, zoneid, ipst);
1968		if (ill == NULL && ipif == NULL)
1969			return (EADDRNOTAVAIL);
1970	} else {
1971		if (*isv6) {
1972			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1973			    wq, first_mp, func, &err, ipst);
1974			if (ill != NULL &&
1975			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
1976				ill_refrele(ill);
1977				ill = NULL;
1978				err = EADDRNOTAVAIL;
1979			}
1980		} else {
1981			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
1982			    zoneid, wq, first_mp, func, &err, ipst);
1983		}
1984		if (ill == NULL && ipif == NULL)
1985			return (err);
1986	}
1987
1988	*ipifpp = ipif;
1989	*illpp = ill;
1990	return (0);
1991}
1992
1993static int
1994ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1995    struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
1996{
1997	ilg_t *ilg;
1998	int i, numsrc, fmode, outsrcs;
1999	struct sockaddr_in *sin;
2000	struct sockaddr_in6 *sin6;
2001	struct in_addr *addrp;
2002	slist_t *fp;
2003	boolean_t is_v4only_api;
2004
2005	mutex_enter(&connp->conn_lock);
2006
2007	ilg = ilg_lookup_ipif(connp, grp, ipif);
2008	if (ilg == NULL) {
2009		mutex_exit(&connp->conn_lock);
2010		return (EADDRNOTAVAIL);
2011	}
2012
2013	if (gf == NULL) {
2014		ASSERT(imsf != NULL);
2015		ASSERT(!isv4mapped);
2016		is_v4only_api = B_TRUE;
2017		outsrcs = imsf->imsf_numsrc;
2018	} else {
2019		ASSERT(imsf == NULL);
2020		is_v4only_api = B_FALSE;
2021		outsrcs = gf->gf_numsrc;
2022	}
2023
2024	/*
2025	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2026	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2027	 * So we need to translate here.
2028	 */
2029	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2030	    MCAST_INCLUDE : MCAST_EXCLUDE;
2031	if ((fp = ilg->ilg_filter) == NULL) {
2032		numsrc = 0;
2033	} else {
2034		for (i = 0; i < outsrcs; i++) {
2035			if (i == fp->sl_numsrc)
2036				break;
2037			if (isv4mapped) {
2038				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2039				sin6->sin6_family = AF_INET6;
2040				sin6->sin6_addr = fp->sl_addr[i];
2041			} else {
2042				if (is_v4only_api) {
2043					addrp = &imsf->imsf_slist[i];
2044				} else {
2045					sin = (struct sockaddr_in *)
2046					    &gf->gf_slist[i];
2047					sin->sin_family = AF_INET;
2048					addrp = &sin->sin_addr;
2049				}
2050				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2051			}
2052		}
2053		numsrc = fp->sl_numsrc;
2054	}
2055
2056	if (is_v4only_api) {
2057		imsf->imsf_numsrc = numsrc;
2058		imsf->imsf_fmode = fmode;
2059	} else {
2060		gf->gf_numsrc = numsrc;
2061		gf->gf_fmode = fmode;
2062	}
2063
2064	mutex_exit(&connp->conn_lock);
2065
2066	return (0);
2067}
2068
2069static int
2070ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2071    const struct in6_addr *grp, ill_t *ill)
2072{
2073	ilg_t *ilg;
2074	int i;
2075	struct sockaddr_storage *sl;
2076	struct sockaddr_in6 *sin6;
2077	slist_t *fp;
2078
2079	mutex_enter(&connp->conn_lock);
2080
2081	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2082	if (ilg == NULL) {
2083		mutex_exit(&connp->conn_lock);
2084		return (EADDRNOTAVAIL);
2085	}
2086
2087	/*
2088	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2089	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2090	 * So we need to translate here.
2091	 */
2092	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2093	    MCAST_INCLUDE : MCAST_EXCLUDE;
2094	if ((fp = ilg->ilg_filter) == NULL) {
2095		gf->gf_numsrc = 0;
2096	} else {
2097		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2098			if (i == fp->sl_numsrc)
2099				break;
2100			sin6 = (struct sockaddr_in6 *)sl;
2101			sin6->sin6_family = AF_INET6;
2102			sin6->sin6_addr = fp->sl_addr[i];
2103		}
2104		gf->gf_numsrc = fp->sl_numsrc;
2105	}
2106
2107	mutex_exit(&connp->conn_lock);
2108
2109	return (0);
2110}
2111
2112static int
2113ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2114    struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2115{
2116	ilg_t *ilg;
2117	int i, err, infmode, new_fmode;
2118	uint_t insrcs;
2119	struct sockaddr_in *sin;
2120	struct sockaddr_in6 *sin6;
2121	struct in_addr *addrp;
2122	slist_t *orig_filter = NULL;
2123	slist_t *new_filter = NULL;
2124	mcast_record_t orig_fmode;
2125	boolean_t leave_grp, is_v4only_api;
2126	ilg_stat_t ilgstat;
2127
2128	if (gf == NULL) {
2129		ASSERT(imsf != NULL);
2130		ASSERT(!isv4mapped);
2131		is_v4only_api = B_TRUE;
2132		insrcs = imsf->imsf_numsrc;
2133		infmode = imsf->imsf_fmode;
2134	} else {
2135		ASSERT(imsf == NULL);
2136		is_v4only_api = B_FALSE;
2137		insrcs = gf->gf_numsrc;
2138		infmode = gf->gf_fmode;
2139	}
2140
2141	/* Make sure we can handle the source list */
2142	if (insrcs > MAX_FILTER_SIZE)
2143		return (ENOBUFS);
2144
2145	/*
2146	 * setting the filter to (INCLUDE, NULL) is treated
2147	 * as a request to leave the group.
2148	 */
2149	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2150
2151	ASSERT(IAM_WRITER_IPIF(ipif));
2152
2153	mutex_enter(&connp->conn_lock);
2154
2155	ilg = ilg_lookup_ipif(connp, grp, ipif);
2156	if (ilg == NULL) {
2157		/*
2158		 * if the request was actually to leave, and we
2159		 * didn't find an ilg, there's nothing to do.
2160		 */
2161		if (!leave_grp)
2162			ilg = conn_ilg_alloc(connp);
2163		if (leave_grp || ilg == NULL) {
2164			mutex_exit(&connp->conn_lock);
2165			return (leave_grp ? 0 : ENOMEM);
2166		}
2167		ilgstat = ILGSTAT_NEW;
2168		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2169		ilg->ilg_ipif = ipif;
2170		ilg->ilg_ill = NULL;
2171		ilg->ilg_orig_ifindex = 0;
2172	} else if (leave_grp) {
2173		ilg_delete(connp, ilg, NULL);
2174		mutex_exit(&connp->conn_lock);
2175		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2176		return (0);
2177	} else {
2178		ilgstat = ILGSTAT_CHANGE;
2179		/* Preserve existing state in case ip_addmulti() fails */
2180		orig_fmode = ilg->ilg_fmode;
2181		if (ilg->ilg_filter == NULL) {
2182			orig_filter = NULL;
2183		} else {
2184			orig_filter = l_alloc_copy(ilg->ilg_filter);
2185			if (orig_filter == NULL) {
2186				mutex_exit(&connp->conn_lock);
2187				return (ENOMEM);
2188			}
2189		}
2190	}
2191
2192	/*
2193	 * Alloc buffer to copy new state into (see below) before
2194	 * we make any changes, so we can bail if it fails.
2195	 */
2196	if ((new_filter = l_alloc()) == NULL) {
2197		mutex_exit(&connp->conn_lock);
2198		err = ENOMEM;
2199		goto free_and_exit;
2200	}
2201
2202	if (insrcs == 0) {
2203		CLEAR_SLIST(ilg->ilg_filter);
2204	} else {
2205		slist_t *fp;
2206		if (ilg->ilg_filter == NULL) {
2207			fp = l_alloc();
2208			if (fp == NULL) {
2209				if (ilgstat == ILGSTAT_NEW)
2210					ilg_delete(connp, ilg, NULL);
2211				mutex_exit(&connp->conn_lock);
2212				err = ENOMEM;
2213				goto free_and_exit;
2214			}
2215		} else {
2216			fp = ilg->ilg_filter;
2217		}
2218		for (i = 0; i < insrcs; i++) {
2219			if (isv4mapped) {
2220				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2221				fp->sl_addr[i] = sin6->sin6_addr;
2222			} else {
2223				if (is_v4only_api) {
2224					addrp = &imsf->imsf_slist[i];
2225				} else {
2226					sin = (struct sockaddr_in *)
2227					    &gf->gf_slist[i];
2228					addrp = &sin->sin_addr;
2229				}
2230				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2231			}
2232		}
2233		fp->sl_numsrc = insrcs;
2234		ilg->ilg_filter = fp;
2235	}
2236	/*
2237	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2238	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2239	 * So we need to translate here.
2240	 */
2241	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2242	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2243
2244	/*
2245	 * Save copy of ilg's filter state to pass to other functions,
2246	 * so we can release conn_lock now.
2247	 */
2248	new_fmode = ilg->ilg_fmode;
2249	l_copy(ilg->ilg_filter, new_filter);
2250
2251	mutex_exit(&connp->conn_lock);
2252
2253	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2254	if (err != 0) {
2255		/*
2256		 * Restore the original filter state, or delete the
2257		 * newly-created ilg.  We need to look up the ilg
2258		 * again, though, since we've not been holding the
2259		 * conn_lock.
2260		 */
2261		mutex_enter(&connp->conn_lock);
2262		ilg = ilg_lookup_ipif(connp, grp, ipif);
2263		ASSERT(ilg != NULL);
2264		if (ilgstat == ILGSTAT_NEW) {
2265			ilg_delete(connp, ilg, NULL);
2266		} else {
2267			ilg->ilg_fmode = orig_fmode;
2268			if (SLIST_IS_EMPTY(orig_filter)) {
2269				CLEAR_SLIST(ilg->ilg_filter);
2270			} else {
2271				/*
2272				 * We didn't free the filter, even if we
2273				 * were trying to make the source list empty;
2274				 * so if orig_filter isn't empty, the ilg
2275				 * must still have a filter alloc'd.
2276				 */
2277				l_copy(orig_filter, ilg->ilg_filter);
2278			}
2279		}
2280		mutex_exit(&connp->conn_lock);
2281	}
2282
2283free_and_exit:
2284	l_free(orig_filter);
2285	l_free(new_filter);
2286
2287	return (err);
2288}
2289
2290static int
2291ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2292    const struct in6_addr *grp, ill_t *ill)
2293{
2294	ilg_t *ilg;
2295	int i, orig_ifindex, orig_fmode, new_fmode, err;
2296	slist_t *orig_filter = NULL;
2297	slist_t *new_filter = NULL;
2298	struct sockaddr_storage *sl;
2299	struct sockaddr_in6 *sin6;
2300	boolean_t leave_grp;
2301	ilg_stat_t ilgstat;
2302
2303	/* Make sure we can handle the source list */
2304	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2305		return (ENOBUFS);
2306
2307	/*
2308	 * setting the filter to (INCLUDE, NULL) is treated
2309	 * as a request to leave the group.
2310	 */
2311	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2312
2313	ASSERT(IAM_WRITER_ILL(ill));
2314
2315	/*
2316	 * Use the ifindex to do the lookup.  We can't use the ill
2317	 * directly because ilg_ill could point to a different ill
2318	 * if things have moved.
2319	 */
2320	orig_ifindex = ill->ill_phyint->phyint_ifindex;
2321
2322	mutex_enter(&connp->conn_lock);
2323	ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2324	if (ilg == NULL) {
2325		/*
2326		 * if the request was actually to leave, and we
2327		 * didn't find an ilg, there's nothing to do.
2328		 */
2329		if (!leave_grp)
2330			ilg = conn_ilg_alloc(connp);
2331		if (leave_grp || ilg == NULL) {
2332			mutex_exit(&connp->conn_lock);
2333			return (leave_grp ? 0 : ENOMEM);
2334		}
2335		ilgstat = ILGSTAT_NEW;
2336		ilg->ilg_v6group = *grp;
2337		ilg->ilg_ipif = NULL;
2338		/*
2339		 * Choose our target ill to join on. This might be
2340		 * different from the ill we've been given if it's
2341		 * currently down and part of a group.
2342		 *
2343		 * new ill is not refheld; we are writer.
2344		 */
2345		ill = ip_choose_multi_ill(ill, grp);
2346		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
2347		ilg->ilg_ill = ill;
2348		/*
2349		 * Remember the index that we joined on, so that we can
2350		 * successfully delete them later on and also search for
2351		 * duplicates if the application wants to join again.
2352		 */
2353		ilg->ilg_orig_ifindex = orig_ifindex;
2354	} else if (leave_grp) {
2355		/*
2356		 * Use the ilg's current ill for the deletion,
2357		 * we might have failed over.
2358		 */
2359		ill = ilg->ilg_ill;
2360		ilg_delete(connp, ilg, NULL);
2361		mutex_exit(&connp->conn_lock);
2362		(void) ip_delmulti_v6(grp, ill, orig_ifindex,
2363		    connp->conn_zoneid, B_FALSE, B_TRUE);
2364		return (0);
2365	} else {
2366		ilgstat = ILGSTAT_CHANGE;
2367		/*
2368		 * The current ill might be different from the one we were
2369		 * asked to join on (if failover has occurred); we should
2370		 * join on the ill stored in the ilg.  The original ill
2371		 * is noted in ilg_orig_ifindex, which matched our request.
2372		 */
2373		ill = ilg->ilg_ill;
2374		/* preserve existing state in case ip_addmulti() fails */
2375		orig_fmode = ilg->ilg_fmode;
2376		if (ilg->ilg_filter == NULL) {
2377			orig_filter = NULL;
2378		} else {
2379			orig_filter = l_alloc_copy(ilg->ilg_filter);
2380			if (orig_filter == NULL) {
2381				mutex_exit(&connp->conn_lock);
2382				return (ENOMEM);
2383			}
2384		}
2385	}
2386
2387	/*
2388	 * Alloc buffer to copy new state into (see below) before
2389	 * we make any changes, so we can bail if it fails.
2390	 */
2391	if ((new_filter = l_alloc()) == NULL) {
2392		mutex_exit(&connp->conn_lock);
2393		err = ENOMEM;
2394		goto free_and_exit;
2395	}
2396
2397	if (gf->gf_numsrc == 0) {
2398		CLEAR_SLIST(ilg->ilg_filter);
2399	} else {
2400		slist_t *fp;
2401		if (ilg->ilg_filter == NULL) {
2402			fp = l_alloc();
2403			if (fp == NULL) {
2404				if (ilgstat == ILGSTAT_NEW)
2405					ilg_delete(connp, ilg, NULL);
2406				mutex_exit(&connp->conn_lock);
2407				err = ENOMEM;
2408				goto free_and_exit;
2409			}
2410		} else {
2411			fp = ilg->ilg_filter;
2412		}
2413		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2414			sin6 = (struct sockaddr_in6 *)sl;
2415			fp->sl_addr[i] = sin6->sin6_addr;
2416		}
2417		fp->sl_numsrc = gf->gf_numsrc;
2418		ilg->ilg_filter = fp;
2419	}
2420	/*
2421	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2422	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2423	 * So we need to translate here.
2424	 */
2425	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2426	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2427
2428	/*
2429	 * Save copy of ilg's filter state to pass to other functions,
2430	 * so we can release conn_lock now.
2431	 */
2432	new_fmode = ilg->ilg_fmode;
2433	l_copy(ilg->ilg_filter, new_filter);
2434
2435	mutex_exit(&connp->conn_lock);
2436
2437	err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid,
2438	    ilgstat, new_fmode, new_filter);
2439	if (err != 0) {
2440		/*
2441		 * Restore the original filter state, or delete the
2442		 * newly-created ilg.  We need to look up the ilg
2443		 * again, though, since we've not been holding the
2444		 * conn_lock.
2445		 */
2446		mutex_enter(&connp->conn_lock);
2447		ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2448		ASSERT(ilg != NULL);
2449		if (ilgstat == ILGSTAT_NEW) {
2450			ilg_delete(connp, ilg, NULL);
2451		} else {
2452			ilg->ilg_fmode = orig_fmode;
2453			if (SLIST_IS_EMPTY(orig_filter)) {
2454				CLEAR_SLIST(ilg->ilg_filter);
2455			} else {
2456				/*
2457				 * We didn't free the filter, even if we
2458				 * were trying to make the source list empty;
2459				 * so if orig_filter isn't empty, the ilg
2460				 * must still have a filter alloc'd.
2461				 */
2462				l_copy(orig_filter, ilg->ilg_filter);
2463			}
2464		}
2465		mutex_exit(&connp->conn_lock);
2466	}
2467
2468free_and_exit:
2469	l_free(orig_filter);
2470	l_free(new_filter);
2471
2472	return (err);
2473}
2474
2475/*
2476 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2477 */
2478/* ARGSUSED */
2479int
2480ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2481    ip_ioctl_cmd_t *ipip, void *ifreq)
2482{
2483	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2484	/* existence verified in ip_wput_nondata() */
2485	mblk_t *data_mp = mp->b_cont->b_cont;
2486	int datalen, err, cmd, minsize;
2487	uint_t expsize = 0;
2488	conn_t *connp;
2489	boolean_t isv6, is_v4only_api, getcmd;
2490	struct sockaddr_in *gsin;
2491	struct sockaddr_in6 *gsin6;
2492	ipaddr_t v4grp;
2493	in6_addr_t v6grp;
2494	struct group_filter *gf = NULL;
2495	struct ip_msfilter *imsf = NULL;
2496	mblk_t *ndp;
2497
2498	if (data_mp->b_cont != NULL) {
2499		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2500			return (ENOMEM);
2501		freemsg(data_mp);
2502		data_mp = ndp;
2503		mp->b_cont->b_cont = data_mp;
2504	}
2505
2506	cmd = iocp->ioc_cmd;
2507	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2508	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2509	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2510	datalen = MBLKL(data_mp);
2511
2512	if (datalen < minsize)
2513		return (EINVAL);
2514
2515	/*
2516	 * now we know we have at least have the initial structure,
2517	 * but need to check for the source list array.
2518	 */
2519	if (is_v4only_api) {
2520		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2521		isv6 = B_FALSE;
2522		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2523	} else {
2524		gf = (struct group_filter *)data_mp->b_rptr;
2525		if (gf->gf_group.ss_family == AF_INET6) {
2526			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2527			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2528		} else {
2529			isv6 = B_FALSE;
2530		}
2531		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2532	}
2533	if (datalen < expsize)
2534		return (EINVAL);
2535
2536	connp = Q_TO_CONN(q);
2537
2538	/* operation not supported on the virtual network interface */
2539	if (IS_VNI(ipif->ipif_ill))
2540		return (EINVAL);
2541
2542	if (isv6) {
2543		ill_t *ill = ipif->ipif_ill;
2544		ill_refhold(ill);
2545
2546		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2547		v6grp = gsin6->sin6_addr;
2548		if (getcmd)
2549			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2550		else
2551			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2552
2553		ill_refrele(ill);
2554	} else {
2555		boolean_t isv4mapped = B_FALSE;
2556		if (is_v4only_api) {
2557			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2558		} else {
2559			if (gf->gf_group.ss_family == AF_INET) {
2560				gsin = (struct sockaddr_in *)&gf->gf_group;
2561				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2562			} else {
2563				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2564				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2565				    v4grp);
2566				isv4mapped = B_TRUE;
2567			}
2568		}
2569		if (getcmd)
2570			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2571			    isv4mapped);
2572		else
2573			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2574			    isv4mapped);
2575	}
2576
2577	return (err);
2578}
2579
2580/*
2581 * Finds the ipif based on information in the ioctl headers.  Needed to make
2582 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2583 * ioctls prior to calling the ioctl's handler function).
2584 */
2585int
2586ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2587    cmd_info_t *ci, ipsq_func_t func)
2588{
2589	int cmd = ipip->ipi_cmd;
2590	int err = 0;
2591	conn_t *connp;
2592	ipif_t *ipif;
2593	/* caller has verified this mblk exists */
2594	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2595	struct ip_msfilter *imsf;
2596	struct group_filter *gf;
2597	ipaddr_t v4addr, v4grp;
2598	in6_addr_t v6grp;
2599	uint32_t index;
2600	zoneid_t zoneid;
2601	ip_stack_t *ipst;
2602
2603	connp = Q_TO_CONN(q);
2604	zoneid = connp->conn_zoneid;
2605	ipst = connp->conn_netstack->netstack_ip;
2606
2607	/* don't allow multicast operations on a tcp conn */
2608	if (IPCL_IS_TCP(connp))
2609		return (ENOPROTOOPT);
2610
2611	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2612		/* don't allow v4-specific ioctls on v6 socket */
2613		if (connp->conn_af_isv6)
2614			return (EAFNOSUPPORT);
2615
2616		imsf = (struct ip_msfilter *)dbuf;
2617		v4addr = imsf->imsf_interface.s_addr;
2618		v4grp = imsf->imsf_multiaddr.s_addr;
2619		if (v4addr == INADDR_ANY) {
2620			ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2621			if (ipif == NULL)
2622				err = EADDRNOTAVAIL;
2623		} else {
2624			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2625			    func, &err, ipst);
2626		}
2627	} else {
2628		boolean_t isv6 = B_FALSE;
2629		gf = (struct group_filter *)dbuf;
2630		index = gf->gf_interface;
2631		if (gf->gf_group.ss_family == AF_INET6) {
2632			struct sockaddr_in6 *sin6;
2633			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2634			v6grp = sin6->sin6_addr;
2635			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2636				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2637			else
2638				isv6 = B_TRUE;
2639		} else if (gf->gf_group.ss_family == AF_INET) {
2640			struct sockaddr_in *sin;
2641			sin = (struct sockaddr_in *)&gf->gf_group;
2642			v4grp = sin->sin_addr.s_addr;
2643		} else {
2644			return (EAFNOSUPPORT);
2645		}
2646		if (index == 0) {
2647			if (isv6) {
2648				ipif = ipif_lookup_group_v6(&v6grp, zoneid,
2649				    ipst);
2650			} else {
2651				ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2652			}
2653			if (ipif == NULL)
2654				err = EADDRNOTAVAIL;
2655		} else {
2656			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2657			    q, mp, func, &err, ipst);
2658		}
2659	}
2660
2661	ci->ci_ipif = ipif;
2662	return (err);
2663}
2664
2665/*
2666 * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2667 * in in two stages, as the first copyin tells us the size of the attached
2668 * source buffer.  This function is called by ip_wput_nondata() after the
2669 * first copyin has completed; it figures out how big the second stage
2670 * needs to be, and kicks it off.
2671 *
2672 * In some cases (numsrc < 2), the second copyin is not needed as the
2673 * first one gets a complete structure containing 1 source addr.
2674 *
2675 * The function returns 0 if a second copyin has been started (i.e. there's
2676 * no more work to be done right now), or 1 if the second copyin is not
2677 * needed and ip_wput_nondata() can continue its processing.
2678 */
2679int
2680ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2681{
2682	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2683	int cmd = iocp->ioc_cmd;
2684	/* validity of this checked in ip_wput_nondata() */
2685	mblk_t *mp1 = mp->b_cont->b_cont;
2686	int copysize = 0;
2687	int offset;
2688
2689	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2690		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2691		if (gf->gf_numsrc >= 2) {
2692			offset = sizeof (struct group_filter);
2693			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2694		}
2695	} else {
2696		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2697		if (imsf->imsf_numsrc >= 2) {
2698			offset = sizeof (struct ip_msfilter);
2699			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2700		}
2701	}
2702	if (copysize > 0) {
2703		mi_copyin_n(q, mp, offset, copysize);
2704		return (0);
2705	}
2706	return (1);
2707}
2708
2709/*
2710 * Handle the following optmgmt:
2711 *	IP_ADD_MEMBERSHIP		must not have joined already
2712 *	MCAST_JOIN_GROUP		must not have joined already
2713 *	IP_BLOCK_SOURCE			must have joined already
2714 *	MCAST_BLOCK_SOURCE		must have joined already
2715 *	IP_JOIN_SOURCE_GROUP		may have joined already
2716 *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2717 *
2718 * fmode and src parameters may be used to determine which option is
2719 * being set, as follows (the IP_* and MCAST_* versions of each option
2720 * are functionally equivalent):
2721 *	opt			fmode			src
2722 *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2723 *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2724 *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2725 *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2726 *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2727 *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2728 *
2729 * Changing the filter mode is not allowed; if a matching ilg already
2730 * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2731 *
2732 * Verifies that there is a source address of appropriate scope for
2733 * the group; if not, EADDRNOTAVAIL is returned.
2734 *
2735 * The interface to be used may be identified by an address or by an
2736 * index.  A pointer to the index is passed; if it is NULL, use the
2737 * address, otherwise, use the index.
2738 */
2739int
2740ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2741    ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2742    mblk_t *first_mp)
2743{
2744	ipif_t	*ipif;
2745	ipsq_t	*ipsq;
2746	int err = 0;
2747	ill_t	*ill;
2748
2749	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2750	    ip_restart_optmgmt, &ipif);
2751	if (err != 0) {
2752		if (err != EINPROGRESS) {
2753			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2754			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2755			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2756		}
2757		return (err);
2758	}
2759	ASSERT(ipif != NULL);
2760
2761	ill = ipif->ipif_ill;
2762	/* Operation not supported on a virtual network interface */
2763	if (IS_VNI(ill)) {
2764		ipif_refrele(ipif);
2765		return (EINVAL);
2766	}
2767
2768	if (checkonly) {
2769		/*
2770		 * do not do operation, just pretend to - new T_CHECK
2771		 * semantics. The error return case above if encountered
2772		 * considered a good enough "check" here.
2773		 */
2774		ipif_refrele(ipif);
2775		return (0);
2776	}
2777
2778	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2779	    NEW_OP);
2780
2781	/* unspecified source addr => no source filtering */
2782	err = ilg_add(connp, group, ipif, fmode, src);
2783
2784	IPSQ_EXIT(ipsq);
2785
2786	ipif_refrele(ipif);
2787	return (err);
2788}
2789
2790/*
2791 * Handle the following optmgmt:
2792 *	IPV6_JOIN_GROUP			must not have joined already
2793 *	MCAST_JOIN_GROUP		must not have joined already
2794 *	MCAST_BLOCK_SOURCE		must have joined already
2795 *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2796 *
2797 * fmode and src parameters may be used to determine which option is
2798 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2799 * are functionally equivalent):
2800 *	opt			fmode			v6src
2801 *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2802 *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2803 *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2804 *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2805 *
2806 * Changing the filter mode is not allowed; if a matching ilg already
2807 * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2808 *
2809 * Verifies that there is a source address of appropriate scope for
2810 * the group; if not, EADDRNOTAVAIL is returned.
2811 *
2812 * Handles IPv4-mapped IPv6 multicast addresses by associating them
2813 * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2814 * v6src is also v4-mapped.
2815 */
2816int
2817ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2818    const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2819    const in6_addr_t *v6src, mblk_t *first_mp)
2820{
2821	ill_t *ill;
2822	ipif_t	*ipif;
2823	char buf[INET6_ADDRSTRLEN];
2824	ipaddr_t v4group, v4src;
2825	boolean_t isv6;
2826	ipsq_t	*ipsq;
2827	int	err;
2828
2829	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2830	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2831	if (err != 0) {
2832		if (err != EINPROGRESS) {
2833			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2834			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2835			    sizeof (buf)), ifindex));
2836		}
2837		return (err);
2838	}
2839	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2840
2841	/* operation is not supported on the virtual network interface */
2842	if (isv6) {
2843		if (IS_VNI(ill)) {
2844			ill_refrele(ill);
2845			return (EINVAL);
2846		}
2847	} else {
2848		if (IS_VNI(ipif->ipif_ill)) {
2849			ipif_refrele(ipif);
2850			return (EINVAL);
2851		}
2852	}
2853
2854	if (checkonly) {
2855		/*
2856		 * do not do operation, just pretend to - new T_CHECK
2857		 * semantics. The error return case above if encountered
2858		 * considered a good enough "check" here.
2859		 */
2860		if (isv6)
2861			ill_refrele(ill);
2862		else
2863			ipif_refrele(ipif);
2864		return (0);
2865	}
2866
2867	if (!isv6) {
2868		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2869		    ipsq, NEW_OP);
2870		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2871		IPSQ_EXIT(ipsq);
2872		ipif_refrele(ipif);
2873	} else {
2874		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2875		    ipsq, NEW_OP);
2876		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2877		IPSQ_EXIT(ipsq);
2878		ill_refrele(ill);
2879	}
2880
2881	return (err);
2882}
2883
2884static int
2885ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2886    mcast_record_t fmode, ipaddr_t src)
2887{
2888	ilg_t	*ilg;
2889	in6_addr_t v6src;
2890	boolean_t leaving = B_FALSE;
2891
2892	ASSERT(IAM_WRITER_IPIF(ipif));
2893
2894	/*
2895	 * The ilg is valid only while we hold the conn lock. Once we drop
2896	 * the lock, another thread can locate another ilg on this connp,
2897	 * but on a different ipif, and delete it, and cause the ilg array
2898	 * to be reallocated and copied. Hence do the ilg_delete before
2899	 * dropping the lock.
2900	 */
2901	mutex_enter(&connp->conn_lock);
2902	ilg = ilg_lookup_ipif(connp, group, ipif);
2903	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2904		mutex_exit(&connp->conn_lock);
2905		return (EADDRNOTAVAIL);
2906	}
2907
2908	/*
2909	 * Decide if we're actually deleting the ilg or just removing a
2910	 * source filter address; if just removing an addr, make sure we
2911	 * aren't trying to change the filter mode, and that the addr is
2912	 * actually in our filter list already.  If we're removing the
2913	 * last src in an include list, just delete the ilg.
2914	 */
2915	if (src == INADDR_ANY) {
2916		v6src = ipv6_all_zeros;
2917		leaving = B_TRUE;
2918	} else {
2919		int err = 0;
2920		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2921		if (fmode != ilg->ilg_fmode)
2922			err = EINVAL;
2923		else if (ilg->ilg_filter == NULL ||
2924		    !list_has_addr(ilg->ilg_filter, &v6src))
2925			err = EADDRNOTAVAIL;
2926		if (err != 0) {
2927			mutex_exit(&connp->conn_lock);
2928			return (err);
2929		}
2930		if (fmode == MODE_IS_INCLUDE &&
2931		    ilg->ilg_filter->sl_numsrc == 1) {
2932			v6src = ipv6_all_zeros;
2933			leaving = B_TRUE;
2934		}
2935	}
2936
2937	ilg_delete(connp, ilg, &v6src);
2938	mutex_exit(&connp->conn_lock);
2939
2940	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
2941	return (0);
2942}
2943
2944static int
2945ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
2946    ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2947{
2948	ilg_t	*ilg;
2949	ill_t	*ilg_ill;
2950	uint_t	ilg_orig_ifindex;
2951	boolean_t leaving = B_TRUE;
2952
2953	ASSERT(IAM_WRITER_ILL(ill));
2954
2955	/*
2956	 * Use the index that we originally used to join. We can't
2957	 * use the ill directly because ilg_ill could point to
2958	 * a new ill if things have moved.
2959	 */
2960	mutex_enter(&connp->conn_lock);
2961	ilg = ilg_lookup_ill_index_v6(connp, v6group,
2962	    ill->ill_phyint->phyint_ifindex);
2963	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2964		mutex_exit(&connp->conn_lock);
2965		return (EADDRNOTAVAIL);
2966	}
2967
2968	/*
2969	 * Decide if we're actually deleting the ilg or just removing a
2970	 * source filter address; if just removing an addr, make sure we
2971	 * aren't trying to change the filter mode, and that the addr is
2972	 * actually in our filter list already.  If we're removing the
2973	 * last src in an include list, just delete the ilg.
2974	 */
2975	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2976		int err = 0;
2977		if (fmode != ilg->ilg_fmode)
2978			err = EINVAL;
2979		else if (ilg->ilg_filter == NULL ||
2980		    !list_has_addr(ilg->ilg_filter, v6src))
2981			err = EADDRNOTAVAIL;
2982		if (err != 0) {
2983			mutex_exit(&connp->conn_lock);
2984			return (err);
2985		}
2986		if (fmode == MODE_IS_INCLUDE &&
2987		    ilg->ilg_filter->sl_numsrc == 1)
2988			v6src = NULL;
2989		else
2990			leaving = B_FALSE;
2991	}
2992
2993	ilg_ill = ilg->ilg_ill;
2994	ilg_orig_ifindex = ilg->ilg_orig_ifindex;
2995	ilg_delete(connp, ilg, v6src);
2996	mutex_exit(&connp->conn_lock);
2997	(void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex,
2998	    connp->conn_zoneid, B_FALSE, leaving);
2999
3000	return (0);
3001}
3002
3003/*
3004 * Handle the following optmgmt:
3005 *	IP_DROP_MEMBERSHIP		will leave
3006 *	MCAST_LEAVE_GROUP		will leave
3007 *	IP_UNBLOCK_SOURCE		will not leave
3008 *	MCAST_UNBLOCK_SOURCE		will not leave
3009 *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
3010 *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3011 *
3012 * fmode and src parameters may be used to determine which option is
3013 * being set, as follows (the IP_* and MCAST_* versions of each option
3014 * are functionally equivalent):
3015 *	opt			 fmode			src
3016 *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
3017 *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
3018 *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3019 *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3020 *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
3021 *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
3022 *
3023 * Changing the filter mode is not allowed; if a matching ilg already
3024 * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3025 *
3026 * The interface to be used may be identified by an address or by an
3027 * index.  A pointer to the index is passed; if it is NULL, use the
3028 * address, otherwise, use the index.
3029 */
3030int
3031ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
3032    ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
3033    mblk_t *first_mp)
3034{
3035	ipif_t	*ipif;
3036	ipsq_t	*ipsq;
3037	int	err;
3038	ill_t	*ill;
3039
3040	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
3041	    ip_restart_optmgmt, &ipif);
3042	if (err != 0) {
3043		if (err != EINPROGRESS) {
3044			ip1dbg(("ip_opt_delete_group: no ipif for group "
3045			    "0x%x, ifaddr 0x%x\n",
3046			    (int)ntohl(group), (int)ntohl(ifaddr)));
3047		}
3048		return (err);
3049	}
3050	ASSERT(ipif != NULL);
3051
3052	ill = ipif->ipif_ill;
3053	/* Operation not supported on a virtual network interface */
3054	if (IS_VNI(ill)) {
3055		ipif_refrele(ipif);
3056		return (EINVAL);
3057	}
3058
3059	if (checkonly) {
3060		/*
3061		 * do not do operation, just pretend to - new T_CHECK
3062		 * semantics. The error return case above if encountered
3063		 * considered a good enough "check" here.
3064		 */
3065		ipif_refrele(ipif);
3066		return (0);
3067	}
3068
3069	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3070	    NEW_OP);
3071	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3072	IPSQ_EXIT(ipsq);
3073
3074	ipif_refrele(ipif);
3075	return (err);
3076}
3077
3078/*
3079 * Handle the following optmgmt:
3080 *	IPV6_LEAVE_GROUP		will leave
3081 *	MCAST_LEAVE_GROUP		will leave
3082 *	MCAST_UNBLOCK_SOURCE		will not leave
3083 *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3084 *
3085 * fmode and src parameters may be used to determine which option is
3086 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3087 * are functionally equivalent):
3088 *	opt			 fmode			v6src
3089 *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3090 *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3091 *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3092 *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3093 *
3094 * Changing the filter mode is not allowed; if a matching ilg already
3095 * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3096 *
3097 * Handles IPv4-mapped IPv6 multicast addresses by associating them
3098 * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3099 * v6src is also v4-mapped.
3100 */
3101int
3102ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3103    const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3104    const in6_addr_t *v6src, mblk_t *first_mp)
3105{
3106	ill_t *ill;
3107	ipif_t	*ipif;
3108	char	buf[INET6_ADDRSTRLEN];
3109	ipaddr_t v4group, v4src;
3110	boolean_t isv6;
3111	ipsq_t	*ipsq;
3112	int	err;
3113
3114	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3115	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3116	if (err != 0) {
3117		if (err != EINPROGRESS) {
3118			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3119			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3120			    sizeof (buf)), ifindex));
3121		}
3122		return (err);
3123	}
3124	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3125
3126	/* operation is not supported on the virtual network interface */
3127	if (isv6) {
3128		if (IS_VNI(ill)) {
3129			ill_refrele(ill);
3130			return (EINVAL);
3131		}
3132	} else {
3133		if (IS_VNI(ipif->ipif_ill)) {
3134			ipif_refrele(ipif);
3135			return (EINVAL);
3136		}
3137	}
3138
3139	if (checkonly) {
3140		/*
3141		 * do not do operation, just pretend to - new T_CHECK
3142		 * semantics. The error return case above if encountered
3143		 * considered a good enough "check" here.
3144		 */
3145		if (isv6)
3146			ill_refrele(ill);
3147		else
3148			ipif_refrele(ipif);
3149		return (0);
3150	}
3151
3152	if (!isv6) {
3153		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3154		    ipsq, NEW_OP);
3155		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3156		    v4src);
3157		IPSQ_EXIT(ipsq);
3158		ipif_refrele(ipif);
3159	} else {
3160		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3161		    ipsq, NEW_OP);
3162		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3163		    v6src);
3164		IPSQ_EXIT(ipsq);
3165		ill_refrele(ill);
3166	}
3167
3168	return (err);
3169}
3170
3171/*
3172 * Group mgmt for upper conn that passes things down
3173 * to the interface multicast list (and DLPI)
3174 * These routines can handle new style options that specify an interface name
3175 * as opposed to an interface address (needed for general handling of
3176 * unnumbered interfaces.)
3177 */
3178
3179/*
3180 * Add a group to an upper conn group data structure and pass things down
3181 * to the interface multicast list (and DLPI)
3182 */
3183static int
3184ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3185    ipaddr_t src)
3186{
3187	int	error = 0;
3188	ill_t	*ill;
3189	ilg_t	*ilg;
3190	ilg_stat_t ilgstat;
3191	slist_t	*new_filter = NULL;
3192	int	new_fmode;
3193
3194	ASSERT(IAM_WRITER_IPIF(ipif));
3195
3196	ill = ipif->ipif_ill;
3197
3198	if (!(ill->ill_flags & ILLF_MULTICAST))
3199		return (EADDRNOTAVAIL);
3200
3201	/*
3202	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3203	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3204	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3205	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3206	 * but both operations happen on the same conn.
3207	 */
3208	mutex_enter(&connp->conn_lock);
3209	ilg = ilg_lookup_ipif(connp, group, ipif);
3210
3211	/*
3212	 * Depending on the option we're handling, may or may not be okay
3213	 * if group has already been added.  Figure out our rules based
3214	 * on fmode and src params.  Also make sure there's enough room
3215	 * in the filter if we're adding a source to an existing filter.
3216	 */
3217	if (src == INADDR_ANY) {
3218		/* we're joining for all sources, must not have joined */
3219		if (ilg != NULL)
3220			error = EADDRINUSE;
3221	} else {
3222		if (fmode == MODE_IS_EXCLUDE) {
3223			/* (excl {addr}) => block source, must have joined */
3224			if (ilg == NULL)
3225				error = EADDRNOTAVAIL;
3226		}
3227		/* (incl {addr}) => join source, may have joined */
3228
3229		if (ilg != NULL &&
3230		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3231			error = ENOBUFS;
3232	}
3233	if (error != 0) {
3234		mutex_exit(&connp->conn_lock);
3235		return (error);
3236	}
3237
3238	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3239
3240	/*
3241	 * Alloc buffer to copy new state into (see below) before
3242	 * we make any changes, so we can bail if it fails.
3243	 */
3244	if ((new_filter = l_alloc()) == NULL) {
3245		mutex_exit(&connp->conn_lock);
3246		return (ENOMEM);
3247	}
3248
3249	if (ilg == NULL) {
3250		ilgstat = ILGSTAT_NEW;
3251		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3252			mutex_exit(&connp->conn_lock);
3253			l_free(new_filter);
3254			return (ENOMEM);
3255		}
3256		if (src != INADDR_ANY) {
3257			ilg->ilg_filter = l_alloc();
3258			if (ilg->ilg_filter == NULL) {
3259				ilg_delete(connp, ilg, NULL);
3260				mutex_exit(&connp->conn_lock);
3261				l_free(new_filter);
3262				return (ENOMEM);
3263			}
3264			ilg->ilg_filter->sl_numsrc = 1;
3265			IN6_IPADDR_TO_V4MAPPED(src,
3266			    &ilg->ilg_filter->sl_addr[0]);
3267		}
3268		if (group == INADDR_ANY) {
3269			ilg->ilg_v6group = ipv6_all_zeros;
3270		} else {
3271			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3272		}
3273		ilg->ilg_ipif = ipif;
3274		ilg->ilg_ill = NULL;
3275		ilg->ilg_orig_ifindex = 0;
3276		ilg->ilg_fmode = fmode;
3277	} else {
3278		int index;
3279		in6_addr_t v6src;
3280		ilgstat = ILGSTAT_CHANGE;
3281		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3282			mutex_exit(&connp->conn_lock);
3283			l_free(new_filter);
3284			return (EINVAL);
3285		}
3286		if (ilg->ilg_filter == NULL) {
3287			ilg->ilg_filter = l_alloc();
3288			if (ilg->ilg_filter == NULL) {
3289				mutex_exit(&connp->conn_lock);
3290				l_free(new_filter);
3291				return (ENOMEM);
3292			}
3293		}
3294		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3295		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3296			mutex_exit(&connp->conn_lock);
3297			l_free(new_filter);
3298			return (EADDRNOTAVAIL);
3299		}
3300		index = ilg->ilg_filter->sl_numsrc++;
3301		ilg->ilg_filter->sl_addr[index] = v6src;
3302	}
3303
3304	/*
3305	 * Save copy of ilg's filter state to pass to other functions,
3306	 * so we can release conn_lock now.
3307	 */
3308	new_fmode = ilg->ilg_fmode;
3309	l_copy(ilg->ilg_filter, new_filter);
3310
3311	mutex_exit(&connp->conn_lock);
3312
3313	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3314	if (error != 0) {
3315		/*
3316		 * Need to undo what we did before calling ip_addmulti()!
3317		 * Must look up the ilg again since we've not been holding
3318		 * conn_lock.
3319		 */
3320		in6_addr_t v6src;
3321		if (ilgstat == ILGSTAT_NEW)
3322			v6src = ipv6_all_zeros;
3323		else
3324			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3325		mutex_enter(&connp->conn_lock);
3326		ilg = ilg_lookup_ipif(connp, group, ipif);
3327		ASSERT(ilg != NULL);
3328		ilg_delete(connp, ilg, &v6src);
3329		mutex_exit(&connp->conn_lock);
3330		l_free(new_filter);
3331		return (error);
3332	}
3333
3334	l_free(new_filter);
3335	return (0);
3336}
3337
3338static int
3339ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3340    mcast_record_t fmode, const in6_addr_t *v6src)
3341{
3342	int	error = 0;
3343	int	orig_ifindex;
3344	ilg_t	*ilg;
3345	ilg_stat_t ilgstat;
3346	slist_t	*new_filter = NULL;
3347	int	new_fmode;
3348
3349	ASSERT(IAM_WRITER_ILL(ill));
3350
3351	if (!(ill->ill_flags & ILLF_MULTICAST))
3352		return (EADDRNOTAVAIL);
3353
3354	/*
3355	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3356	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3357	 * and hme1 map to different ipsq's, but both operations happen
3358	 * on the same conn.
3359	 */
3360	mutex_enter(&connp->conn_lock);
3361
3362	/*
3363	 * Use the ifindex to do the lookup. We can't use the ill
3364	 * directly because ilg_ill could point to a different ill if
3365	 * things have moved.
3366	 */
3367	orig_ifindex = ill->ill_phyint->phyint_ifindex;
3368	ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3369
3370	/*
3371	 * Depending on the option we're handling, may or may not be okay
3372	 * if group has already been added.  Figure out our rules based
3373	 * on fmode and src params.  Also make sure there's enough room
3374	 * in the filter if we're adding a source to an existing filter.
3375	 */
3376	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3377		/* we're joining for all sources, must not have joined */
3378		if (ilg != NULL)
3379			error = EADDRINUSE;
3380	} else {
3381		if (fmode == MODE_IS_EXCLUDE) {
3382			/* (excl {addr}) => block source, must have joined */
3383			if (ilg == NULL)
3384				error = EADDRNOTAVAIL;
3385		}
3386		/* (incl {addr}) => join source, may have joined */
3387
3388		if (ilg != NULL &&
3389		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3390			error = ENOBUFS;
3391	}
3392	if (error != 0) {
3393		mutex_exit(&connp->conn_lock);
3394		return (error);
3395	}
3396
3397	/*
3398	 * Alloc buffer to copy new state into (see below) before
3399	 * we make any changes, so we can bail if it fails.
3400	 */
3401	if ((new_filter = l_alloc()) == NULL) {
3402		mutex_exit(&connp->conn_lock);
3403		return (ENOMEM);
3404	}
3405
3406	if (ilg == NULL) {
3407		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3408			mutex_exit(&connp->conn_lock);
3409			l_free(new_filter);
3410			return (ENOMEM);
3411		}
3412		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3413			ilg->ilg_filter = l_alloc();
3414			if (ilg->ilg_filter == NULL) {
3415				ilg_delete(connp, ilg, NULL);
3416				mutex_exit(&connp->conn_lock);
3417				l_free(new_filter);
3418				return (ENOMEM);
3419			}
3420			ilg->ilg_filter->sl_numsrc = 1;
3421			ilg->ilg_filter->sl_addr[0] = *v6src;
3422		}
3423		ilgstat = ILGSTAT_NEW;
3424		ilg->ilg_v6group = *v6group;
3425		ilg->ilg_fmode = fmode;
3426		ilg->ilg_ipif = NULL;
3427		/*
3428		 * Choose our target ill to join on. This might be different
3429		 * from the ill we've been given if it's currently down and
3430		 * part of a group.
3431		 *
3432		 * new ill is not refheld; we are writer.
3433		 */
3434		ill = ip_choose_multi_ill(ill, v6group);
3435		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
3436		ilg->ilg_ill = ill;
3437		/*
3438		 * Remember the orig_ifindex that we joined on, so that we
3439		 * can successfully delete them later on and also search
3440		 * for duplicates if the application wants to join again.
3441		 */
3442		ilg->ilg_orig_ifindex = orig_ifindex;
3443	} else {
3444		int index;
3445		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3446			mutex_exit(&connp->conn_lock);
3447			l_free(new_filter);
3448			return (EINVAL);
3449		}
3450		if (ilg->ilg_filter == NULL) {
3451			ilg->ilg_filter = l_alloc();
3452			if (ilg->ilg_filter == NULL) {
3453				mutex_exit(&connp->conn_lock);
3454				l_free(new_filter);
3455				return (ENOMEM);
3456			}
3457		}
3458		if (list_has_addr(ilg->ilg_filter, v6src)) {
3459			mutex_exit(&connp->conn_lock);
3460			l_free(new_filter);
3461			return (EADDRNOTAVAIL);
3462		}
3463		ilgstat = ILGSTAT_CHANGE;
3464		index = ilg->ilg_filter->sl_numsrc++;
3465		ilg->ilg_filter->sl_addr[index] = *v6src;
3466		/*
3467		 * The current ill might be different from the one we were
3468		 * asked to join on (if failover has occurred); we should
3469		 * join on the ill stored in the ilg.  The original ill
3470		 * is noted in ilg_orig_ifindex, which matched our request.
3471		 */
3472		ill = ilg->ilg_ill;
3473	}
3474
3475	/*
3476	 * Save copy of ilg's filter state to pass to other functions,
3477	 * so we can release conn_lock now.
3478	 */
3479	new_fmode = ilg->ilg_fmode;
3480	l_copy(ilg->ilg_filter, new_filter);
3481
3482	mutex_exit(&connp->conn_lock);
3483
3484	/*
3485	 * Now update the ill. We wait to do this until after the ilg
3486	 * has been updated because we need to update the src filter
3487	 * info for the ill, which involves looking at the status of
3488	 * all the ilgs associated with this group/interface pair.
3489	 */
3490	error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid,
3491	    ilgstat, new_fmode, new_filter);
3492	if (error != 0) {
3493		/*
3494		 * But because we waited, we have to undo the ilg update
3495		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3496		 * again, since we've not been holding conn_lock.
3497		 */
3498		in6_addr_t delsrc =
3499		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3500		mutex_enter(&connp->conn_lock);
3501		ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3502		ASSERT(ilg != NULL);
3503		ilg_delete(connp, ilg, &delsrc);
3504		mutex_exit(&connp->conn_lock);
3505		l_free(new_filter);
3506		return (error);
3507	}
3508
3509	l_free(new_filter);
3510
3511	return (0);
3512}
3513
3514/*
3515 * Find an IPv4 ilg matching group, ill and source
3516 */
3517ilg_t *
3518ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3519{
3520	in6_addr_t v6group, v6src;
3521	int i;
3522	boolean_t isinlist;
3523	ilg_t *ilg;
3524	ipif_t *ipif;
3525	ill_t *ilg_ill;
3526
3527	ASSERT(MUTEX_HELD(&connp->conn_lock));
3528
3529	/*
3530	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3531	 */
3532	if (group == INADDR_ANY)
3533		v6group = ipv6_all_zeros;
3534	else
3535		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3536
3537	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3538		ilg = &connp->conn_ilg[i];
3539		if ((ipif = ilg->ilg_ipif) == NULL ||
3540		    (ilg->ilg_flags & ILG_DELETED) != 0)
3541			continue;
3542		ASSERT(ilg->ilg_ill == NULL);
3543		ilg_ill = ipif->ipif_ill;
3544		ASSERT(!ilg_ill->ill_isv6);
3545		if (ilg_ill == ill &&
3546		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3547			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3548				/* no source filter, so this is a match */
3549				return (ilg);
3550			}
3551			break;
3552		}
3553	}
3554	if (i == connp->conn_ilg_inuse)
3555		return (NULL);
3556
3557	/*
3558	 * we have an ilg with matching ill and group; but
3559	 * the ilg has a source list that we must check.
3560	 */
3561	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3562	isinlist = B_FALSE;
3563	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3564		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3565			isinlist = B_TRUE;
3566			break;
3567		}
3568	}
3569
3570	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3571	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3572		return (ilg);
3573
3574	return (NULL);
3575}
3576
3577/*
3578 * Find an IPv6 ilg matching group, ill, and source
3579 */
3580ilg_t *
3581ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3582    const in6_addr_t *v6src, ill_t *ill)
3583{
3584	int i;
3585	boolean_t isinlist;
3586	ilg_t *ilg;
3587	ill_t *ilg_ill;
3588
3589	ASSERT(MUTEX_HELD(&connp->conn_lock));
3590
3591	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3592		ilg = &connp->conn_ilg[i];
3593		if ((ilg_ill = ilg->ilg_ill) == NULL ||
3594		    (ilg->ilg_flags & ILG_DELETED) != 0)
3595			continue;
3596		ASSERT(ilg->ilg_ipif == NULL);
3597		ASSERT(ilg_ill->ill_isv6);
3598		if (ilg_ill == ill &&
3599		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3600			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3601				/* no source filter, so this is a match */
3602				return (ilg);
3603			}
3604			break;
3605		}
3606	}
3607	if (i == connp->conn_ilg_inuse)
3608		return (NULL);
3609
3610	/*
3611	 * we have an ilg with matching ill and group; but
3612	 * the ilg has a source list that we must check.
3613	 */
3614	isinlist = B_FALSE;
3615	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3616		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3617			isinlist = B_TRUE;
3618			break;
3619		}
3620	}
3621
3622	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3623	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3624		return (ilg);
3625
3626	return (NULL);
3627}
3628
3629/*
3630 * Get the ilg whose ilg_orig_ifindex is associated with ifindex.
3631 * This is useful when the interface fails and we have moved
3632 * to a new ill, but still would like to locate using the index
3633 * that we originally used to join. Used only for IPv6 currently.
3634 */
3635static ilg_t *
3636ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex)
3637{
3638	ilg_t	*ilg;
3639	int	i;
3640
3641	ASSERT(MUTEX_HELD(&connp->conn_lock));
3642	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3643		ilg = &connp->conn_ilg[i];
3644		if (ilg->ilg_ill == NULL ||
3645		    (ilg->ilg_flags & ILG_DELETED) != 0)
3646			continue;
3647		/* ilg_ipif is NULL for V6 */
3648		ASSERT(ilg->ilg_ipif == NULL);
3649		ASSERT(ilg->ilg_orig_ifindex != 0);
3650		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) &&
3651		    ilg->ilg_orig_ifindex == ifindex) {
3652			return (ilg);
3653		}
3654	}
3655	return (NULL);
3656}
3657
3658/*
3659 * Find an IPv6 ilg matching group and ill
3660 */
3661ilg_t *
3662ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3663{
3664	ilg_t	*ilg;
3665	int	i;
3666	ill_t 	*mem_ill;
3667
3668	ASSERT(MUTEX_HELD(&connp->conn_lock));
3669
3670	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3671		ilg = &connp->conn_ilg[i];
3672		if ((mem_ill = ilg->ilg_ill) == NULL ||
3673		    (ilg->ilg_flags & ILG_DELETED) != 0)
3674			continue;
3675		ASSERT(ilg->ilg_ipif == NULL);
3676		ASSERT(mem_ill->ill_isv6);
3677		if (mem_ill == ill &&
3678		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3679			return (ilg);
3680	}
3681	return (NULL);
3682}
3683
3684/*
3685 * Find an IPv4 ilg matching group and ipif
3686 */
3687static ilg_t *
3688ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3689{
3690	in6_addr_t v6group;
3691	int	i;
3692	ilg_t	*ilg;
3693
3694	ASSERT(MUTEX_HELD(&connp->conn_lock));
3695	ASSERT(!ipif->ipif_ill->ill_isv6);
3696
3697	if (group == INADDR_ANY)
3698		v6group = ipv6_all_zeros;
3699	else
3700		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3701
3702	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3703		ilg = &connp->conn_ilg[i];
3704		if ((ilg->ilg_flags & ILG_DELETED) == 0 &&
3705		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group) &&
3706		    ilg->ilg_ipif == ipif)
3707			return (ilg);
3708	}
3709	return (NULL);
3710}
3711
3712/*
3713 * If a source address is passed in (src != NULL and src is not
3714 * unspecified), remove the specified src addr from the given ilg's
3715 * filter list, else delete the ilg.
3716 */
3717static void
3718ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3719{
3720	int	i;
3721
3722	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3723	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3724	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3725	ASSERT(MUTEX_HELD(&connp->conn_lock));
3726	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3727
3728	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3729		if (connp->conn_ilg_walker_cnt != 0) {
3730			ilg->ilg_flags |= ILG_DELETED;
3731			return;
3732		}
3733
3734		FREE_SLIST(ilg->ilg_filter);
3735
3736		i = ilg - &connp->conn_ilg[0];
3737		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3738
3739		/* Move other entries up one step */
3740		connp->conn_ilg_inuse--;
3741		for (; i < connp->conn_ilg_inuse; i++)
3742			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3743
3744		if (connp->conn_ilg_inuse == 0) {
3745			mi_free((char *)connp->conn_ilg);
3746			connp->conn_ilg = NULL;
3747			cv_broadcast(&connp->conn_refcv);
3748		}
3749	} else {
3750		l_remove(ilg->ilg_filter, src);
3751	}
3752}
3753
3754/*
3755 * Called from conn close. No new ilg can be added or removed.
3756 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3757 * will return error if conn has started closing.
3758 */
3759void
3760ilg_delete_all(conn_t *connp)
3761{
3762	int	i;
3763	ipif_t	*ipif = NULL;
3764	ill_t	*ill = NULL;
3765	ilg_t	*ilg;
3766	in6_addr_t v6group;
3767	boolean_t success;
3768	ipsq_t	*ipsq;
3769	int	orig_ifindex;
3770
3771	mutex_enter(&connp->conn_lock);
3772retry:
3773	ILG_WALKER_HOLD(connp);
3774	for (i = connp->conn_ilg_inuse - 1; i >= 0; ) {
3775		ilg = &connp->conn_ilg[i];
3776		/*
3777		 * Since this walk is not atomic (we drop the
3778		 * conn_lock and wait in ipsq_enter) we need
3779		 * to check for the ILG_DELETED flag.
3780		 */
3781		if (ilg->ilg_flags & ILG_DELETED) {
3782			/* Go to the next ilg */
3783			i--;
3784			continue;
3785		}
3786		v6group = ilg->ilg_v6group;
3787
3788		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3789			ipif = ilg->ilg_ipif;
3790			ill = ipif->ipif_ill;
3791		} else {
3792			ipif = NULL;
3793			ill = ilg->ilg_ill;
3794		}
3795		/*
3796		 * We may not be able to refhold the ill if the ill/ipif
3797		 * is changing. But we need to make sure that the ill will
3798		 * not vanish. So we just bump up the ill_waiter count.
3799		 * If we are unable to do even that, then the ill is closing,
3800		 * in which case the unplumb thread will handle the cleanup,
3801		 * and we move on to the next ilg.
3802		 */
3803		if (!ill_waiter_inc(ill)) {
3804			/* Go to the next ilg */
3805			i--;
3806			continue;
3807		}
3808		mutex_exit(&connp->conn_lock);
3809		/*
3810		 * To prevent deadlock between ill close which waits inside
3811		 * the perimeter, and conn close, ipsq_enter returns error,
3812		 * the moment ILL_CONDEMNED is set, in which case ill close
3813		 * takes responsibility to cleanup the ilgs. Note that we
3814		 * have not yet set condemned flag, otherwise the conn can't
3815		 * be refheld for cleanup by those routines and it would be
3816		 * a mutual deadlock.
3817		 */
3818		success = ipsq_enter(ill, B_FALSE);
3819		ipsq = ill->ill_phyint->phyint_ipsq;
3820		ill_waiter_dcr(ill);
3821		mutex_enter(&connp->conn_lock);
3822		if (!success) {
3823			/* Go to the next ilg */
3824			i--;
3825			continue;
3826		}
3827
3828		/*
3829		 * Make sure that nothing has changed under. For eg.
3830		 * a failover/failback can change ilg_ill while we were
3831		 * waiting to become exclusive above
3832		 */
3833		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3834			ipif = ilg->ilg_ipif;
3835			ill = ipif->ipif_ill;
3836		} else {
3837			ipif = NULL;
3838			ill = ilg->ilg_ill;
3839		}
3840		if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) {
3841			/*
3842			 * The ilg has changed under us probably due
3843			 * to a failover or unplumb. Retry on the same ilg.
3844			 */
3845			mutex_exit(&connp->conn_lock);
3846			ipsq_exit(ipsq, B_TRUE, B_TRUE);
3847			mutex_enter(&connp->conn_lock);
3848			continue;
3849		}
3850		v6group = ilg->ilg_v6group;
3851		orig_ifindex = ilg->ilg_orig_ifindex;
3852		ilg_delete(connp, ilg, NULL);
3853		mutex_exit(&connp->conn_lock);
3854
3855		if (ipif != NULL)
3856			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3857			    B_FALSE, B_TRUE);
3858
3859		else
3860			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3861			    connp->conn_zoneid, B_FALSE, B_TRUE);
3862
3863		ipsq_exit(ipsq, B_TRUE, B_TRUE);
3864		mutex_enter(&connp->conn_lock);
3865		/* Go to the next ilg */
3866		i--;
3867	}
3868	ILG_WALKER_RELE(connp);
3869
3870	/* If any ill was skipped above wait and retry */
3871	if (connp->conn_ilg_inuse != 0) {
3872		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3873		goto retry;
3874	}
3875	mutex_exit(&connp->conn_lock);
3876}
3877
3878/*
3879 * Called from ill close by ipcl_walk for clearing conn_ilg and
3880 * conn_multicast_ipif for a given ipif. conn is held by caller.
3881 * Note that ipcl_walk only walks conns that are not yet condemned.
3882 * condemned conns can't be refheld. For this reason, conn must become clean
3883 * first, i.e. it must not refer to any ill/ire/ipif and then only set
3884 * condemned flag.
3885 */
3886static void
3887conn_delete_ipif(conn_t *connp, caddr_t arg)
3888{
3889	ipif_t	*ipif = (ipif_t *)arg;
3890	int	i;
3891	char	group_buf1[INET6_ADDRSTRLEN];
3892	char	group_buf2[INET6_ADDRSTRLEN];
3893	ipaddr_t group;
3894	ilg_t	*ilg;
3895
3896	/*
3897	 * Even though conn_ilg_inuse can change while we are in this loop,
3898	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3899	 * be created or deleted for this connp, on this ill, since this ill
3900	 * is the perimeter. So we won't miss any ilg in this cleanup.
3901	 */
3902	mutex_enter(&connp->conn_lock);
3903
3904	/*
3905	 * Increment the walker count, so that ilg repacking does not
3906	 * occur while we are in the loop.
3907	 */
3908	ILG_WALKER_HOLD(connp);
3909	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3910		ilg = &connp->conn_ilg[i];
3911		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3912			continue;
3913		/*
3914		 * ip_close cannot be cleaning this ilg at the same time.
3915		 * since it also has to execute in this ill's perimeter which
3916		 * we are now holding. Only a clean conn can be condemned.
3917		 */
3918		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3919
3920		/* Blow away the membership */
3921		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3922		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3923		    group_buf1, sizeof (group_buf1)),
3924		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3925		    group_buf2, sizeof (group_buf2)),
3926		    ipif->ipif_ill->ill_name));
3927
3928		/* ilg_ipif is NULL for V6, so we won't be here */
3929		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3930
3931		group = V4_PART_OF_V6(ilg->ilg_v6group);
3932		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3933		mutex_exit(&connp->conn_lock);
3934
3935		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3936		mutex_enter(&connp->conn_lock);
3937	}
3938
3939	/*
3940	 * If we are the last walker, need to physically delete the
3941	 * ilgs and repack.
3942	 */
3943	ILG_WALKER_RELE(connp);
3944
3945	if (connp->conn_multicast_ipif == ipif) {
3946		/* Revert to late binding */
3947		connp->conn_multicast_ipif = NULL;
3948	}
3949	mutex_exit(&connp->conn_lock);
3950
3951	conn_delete_ire(connp, (caddr_t)ipif);
3952}
3953
3954/*
3955 * Called from ill close by ipcl_walk for clearing conn_ilg and
3956 * conn_multicast_ill for a given ill. conn is held by caller.
3957 * Note that ipcl_walk only walks conns that are not yet condemned.
3958 * condemned conns can't be refheld. For this reason, conn must become clean
3959 * first, i.e. it must not refer to any ill/ire/ipif and then only set
3960 * condemned flag.
3961 */
3962static void
3963conn_delete_ill(conn_t *connp, caddr_t arg)
3964{
3965	ill_t	*ill = (ill_t *)arg;
3966	int	i;
3967	char	group_buf[INET6_ADDRSTRLEN];
3968	in6_addr_t v6group;
3969	int	orig_ifindex;
3970	ilg_t	*ilg;
3971
3972	/*
3973	 * Even though conn_ilg_inuse can change while we are in this loop,
3974	 * no new ilgs can be created/deleted for this connp, on this
3975	 * ill, since this ill is the perimeter. So we won't miss any ilg
3976	 * in this cleanup.
3977	 */
3978	mutex_enter(&connp->conn_lock);
3979
3980	/*
3981	 * Increment the walker count, so that ilg repacking does not
3982	 * occur while we are in the loop.
3983	 */
3984	ILG_WALKER_HOLD(connp);
3985	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3986		ilg = &connp->conn_ilg[i];
3987		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
3988			/*
3989			 * ip_close cannot be cleaning this ilg at the same
3990			 * time, since it also has to execute in this ill's
3991			 * perimeter which we are now holding. Only a clean
3992			 * conn can be condemned.
3993			 */
3994			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3995
3996			/* Blow away the membership */
3997			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
3998			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3999			    group_buf, sizeof (group_buf)),
4000			    ill->ill_name));
4001
4002			v6group = ilg->ilg_v6group;
4003			orig_ifindex = ilg->ilg_orig_ifindex;
4004			ilg_delete(connp, ilg, NULL);
4005			mutex_exit(&connp->conn_lock);
4006
4007			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
4008			    connp->conn_zoneid, B_FALSE, B_TRUE);
4009			mutex_enter(&connp->conn_lock);
4010		}
4011	}
4012	/*
4013	 * If we are the last walker, need to physically delete the
4014	 * ilgs and repack.
4015	 */
4016	ILG_WALKER_RELE(connp);
4017
4018	if (connp->conn_multicast_ill == ill) {
4019		/* Revert to late binding */
4020		connp->conn_multicast_ill = NULL;
4021		connp->conn_orig_multicast_ifindex = 0;
4022	}
4023	mutex_exit(&connp->conn_lock);
4024}
4025
4026/*
4027 * Called when an ipif is unplumbed to make sure that there are no
4028 * dangling conn references to that ipif.
4029 * Handles ilg_ipif and conn_multicast_ipif
4030 */
4031void
4032reset_conn_ipif(ipif)
4033	ipif_t	*ipif;
4034{
4035	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4036
4037	ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst);
4038}
4039
4040/*
4041 * Called when an ill is unplumbed to make sure that there are no
4042 * dangling conn references to that ill.
4043 * Handles ilg_ill, conn_multicast_ill.
4044 */
4045void
4046reset_conn_ill(ill_t *ill)
4047{
4048	ip_stack_t	*ipst = ill->ill_ipst;
4049
4050	ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst);
4051}
4052
4053#ifdef DEBUG
4054/*
4055 * Walk functions walk all the interfaces in the system to make
4056 * sure that there is no refernece to the ipif or ill that is
4057 * going away.
4058 */
4059int
4060ilm_walk_ill(ill_t *ill)
4061{
4062	int cnt = 0;
4063	ill_t *till;
4064	ilm_t *ilm;
4065	ill_walk_context_t ctx;
4066	ip_stack_t	*ipst = ill->ill_ipst;
4067
4068	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
4069	till = ILL_START_WALK_ALL(&ctx, ipst);
4070	for (; till != NULL; till = ill_next(&ctx, till)) {
4071		mutex_enter(&till->ill_lock);
4072		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4073			if (ilm->ilm_ill == ill) {
4074				cnt++;
4075			}
4076		}
4077		mutex_exit(&till->ill_lock);
4078	}
4079	rw_exit(&ipst->ips_ill_g_lock);
4080
4081	return (cnt);
4082}
4083
4084/*
4085 * This function is called before the ipif is freed.
4086 */
4087int
4088ilm_walk_ipif(ipif_t *ipif)
4089{
4090	int cnt = 0;
4091	ill_t *till;
4092	ilm_t *ilm;
4093	ill_walk_context_t ctx;
4094	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4095
4096	till = ILL_START_WALK_ALL(&ctx, ipst);
4097	for (; till != NULL; till = ill_next(&ctx, till)) {
4098		mutex_enter(&till->ill_lock);
4099		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4100			if (ilm->ilm_ipif == ipif) {
4101					cnt++;
4102			}
4103		}
4104		mutex_exit(&till->ill_lock);
4105	}
4106	return (cnt);
4107}
4108#endif
4109