1/*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*-
29 * Copyright (c) 2007-2009 Bruce Simpson.
30 * Copyright (c) 1988 Stephen Deering.
31 * Copyright (c) 1992, 1993
32 *	The Regents of the University of California.  All rights reserved.
33 *
34 * This code is derived from software contributed to Berkeley by
35 * Stephen Deering of Stanford University.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the University of
48 *	California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
66 */
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections.  This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74/*
75 * Internet Group Management Protocol (IGMP) routines.
76 * [RFC1112, RFC2236, RFC3376]
77 *
78 * Written by Steve Deering, Stanford, May 1988.
79 * Modified by Rosen Sharma, Stanford, Aug 1994.
80 * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81 * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82 * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83 *
84 * MULTICAST Revision: 3.5.1.4
85 */
86
87#include <sys/cdefs.h>
88
89#include <sys/param.h>
90#include <sys/systm.h>
91#include <sys/malloc.h>
92#include <sys/mbuf.h>
93#include <sys/socket.h>
94#include <sys/protosw.h>
95#include <sys/kernel.h>
96#include <sys/sysctl.h>
97#include <sys/mcache.h>
98
99#include <libkern/libkern.h>
100#include <kern/zalloc.h>
101
102#include <net/if.h>
103#include <net/route.h>
104
105#include <netinet/in.h>
106#include <netinet/in_var.h>
107#include <netinet/in_systm.h>
108#include <netinet/ip.h>
109#include <netinet/ip_var.h>
110#include <netinet/igmp.h>
111#include <netinet/igmp_var.h>
112#include <netinet/kpi_ipfilter_var.h>
113
114#ifdef IGMP_DEBUG
115__inline__ char *
116inet_ntoa(struct in_addr ina)
117{
118	static char buf[4*sizeof "123"];
119	unsigned char *ucp = (unsigned char *)&ina;
120
121	snprintf(buf, sizeof(buf), "%d.%d.%d.%d",
122		ucp[0] & 0xff,
123		ucp[1] & 0xff,
124		ucp[2] & 0xff,
125		ucp[3] & 0xff);
126	return buf;
127}
128#endif
129
130SLIST_HEAD(igmp_inm_relhead, in_multi);
131
132static void	igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
133static struct igmp_ifinfo *igi_alloc(int);
134static void	igi_free(struct igmp_ifinfo *);
135static void	igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
136static void	igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
137    int, const int, struct ifnet *);
138static void	igmp_final_leave(struct in_multi *, struct igmp_ifinfo *);
139static int	igmp_handle_state_change(struct in_multi *,
140		    struct igmp_ifinfo *);
141static int	igmp_initial_join(struct in_multi *, struct igmp_ifinfo *);
142static int	igmp_input_v1_query(struct ifnet *, const struct ip *,
143		    const struct igmp *);
144static int	igmp_input_v2_query(struct ifnet *, const struct ip *,
145		    const struct igmp *);
146static int	igmp_input_v3_query(struct ifnet *, const struct ip *,
147		    /*const*/ struct igmpv3 *);
148static int	igmp_input_v3_group_query(struct in_multi *,
149		     int, /*const*/ struct igmpv3 *);
150static int	igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
151		    /*const*/ struct igmp *);
152static int	igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
153		    /*const*/ struct igmp *);
154void		igmp_sendpkt(struct mbuf *, struct ifnet *);
155static __inline__ int	igmp_isgroupreported(const struct in_addr);
156static struct mbuf *
157		igmp_ra_alloc(void);
158#ifdef IGMP_DEBUG
159static const char *	igmp_rec_type_to_str(const int);
160#endif
161static void	igmp_set_version(struct igmp_ifinfo *, const int);
162static void	igmp_flush_relq(struct igmp_ifinfo *,
163    struct igmp_inm_relhead *);
164static int	igmp_v1v2_queue_report(struct in_multi *, const int);
165static void	igmp_v1v2_process_group_timer(struct in_multi *, const int);
166static void	igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
167static void	igmp_v2_update_group(struct in_multi *, const int);
168static void	igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
169static void	igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
170static struct mbuf *
171		igmp_v3_encap_report(struct ifnet *, struct mbuf *);
172static int	igmp_v3_enqueue_group_record(struct ifqueue *,
173		    struct in_multi *, const int, const int, const int);
174static int	igmp_v3_enqueue_filter_change(struct ifqueue *,
175		    struct in_multi *);
176static void	igmp_v3_process_group_timers(struct igmp_ifinfo *,
177		    struct ifqueue *, struct ifqueue *, struct in_multi *,
178		    const int);
179static int	igmp_v3_merge_state_changes(struct in_multi *,
180		    struct ifqueue *);
181static void	igmp_v3_suppress_group_record(struct in_multi *);
182static int	sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
183static int	sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
184static int	sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
185
186struct mbuf		*m_raopt;		 /* Router Alert option */
187
188static int interface_timers_running;		/* IGMPv3 general
189				  		 * query response */
190static int state_change_timers_running;		/* IGMPv3 state-change
191					 	 * retransmit */
192static int current_state_timers_running;	/* IGMPv1/v2 host
193						 * report; IGMPv3 g/sg
194						 * query response */
195
196static LIST_HEAD(, igmp_ifinfo) igi_head;
197static struct igmpstat_v3 igmpstat_v3 = {
198	.igps_version = IGPS_VERSION_3,
199	.igps_len = sizeof(struct igmpstat_v3),
200};
201static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
202static struct timeval igmp_gsrdelay = {10, 0};
203
204static int igmp_recvifkludge = 1;
205static int igmp_sendra = 1;
206static int igmp_sendlocal = 1;
207static int igmp_v1enable = 1;
208static int igmp_v2enable = 1;
209static int igmp_legacysupp = 0;
210static int igmp_default_version = IGMP_VERSION_3;
211
212SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
213    &igmpstat, igmpstat, "");
214SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
215    CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
216SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
217    &igmp_recvifkludge, 0,
218    "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
219SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
220    &igmp_sendra, 0,
221    "Send IP Router Alert option in IGMPv2/v3 messages");
222SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
223    &igmp_sendlocal, 0,
224    "Send IGMP membership reports for 224.0.0.0/24 groups");
225SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
226    &igmp_v1enable, 0,
227    "Enable backwards compatibility with IGMPv1");
228SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
229    &igmp_v2enable, 0,
230    "Enable backwards compatibility with IGMPv2");
231SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
232    &igmp_legacysupp, 0,
233    "Allow v1/v2 reports to suppress v3 group responses");
234SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
235  CTLTYPE_INT | CTLFLAG_RW,
236  &igmp_default_version, 0, sysctl_igmp_default_version, "I",
237    "Default version of IGMP to run on each interface");
238SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
239    CTLTYPE_INT | CTLFLAG_RW,
240    &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
241    "Rate limit for IGMPv3 Group-and-Source queries in seconds");
242#ifdef IGMP_DEBUG
243int igmp_debug = 0;
244SYSCTL_INT(_net_inet_igmp, OID_AUTO,
245	debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
246#endif
247
248SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
249    sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
250
251/* Lock group and attribute for igmp_mtx */
252static lck_attr_t	*igmp_mtx_attr;
253static lck_grp_t	*igmp_mtx_grp;
254static lck_grp_attr_t	*igmp_mtx_grp_attr;
255
256/*
257 * Locking and reference counting:
258 *
259 * igmp_mtx mainly protects igi_head.  In cases where both igmp_mtx and
260 * in_multihead_lock must be held, the former must be acquired first in order
261 * to maintain lock ordering.  It is not a requirement that igmp_mtx be
262 * acquired first before in_multihead_lock, but in case both must be acquired
263 * in succession, the correct lock ordering must be followed.
264 *
265 * Instead of walking the if_multiaddrs list at the interface and returning
266 * the ifma_protospec value of a matching entry, we search the global list
267 * of in_multi records and find it that way; this is done with in_multihead
268 * lock held.  Doing so avoids the race condition issues that many other BSDs
269 * suffer from (therefore in our implementation, ifma_protospec will never be
270 * NULL for as long as the in_multi is valid.)
271 *
272 * The above creates a requirement for the in_multi to stay in in_multihead
273 * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
274 * be retransmitted (this is not required for IGMPv1/v2.)  In order to handle
275 * this, the request and reference counts of the in_multi are bumped up when
276 * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
277 * handler.  Each in_multi holds a reference to the underlying igmp_ifinfo.
278 *
279 * Thus, the permitted lock oder is:
280 *
281 *	igmp_mtx, in_multihead_lock, inm_lock, igi_lock
282 *
283 * Any may be taken independently, but if any are held at the same time,
284 * the above lock order must be followed.
285 */
286static decl_lck_mtx_data(, igmp_mtx);
287static int igmp_timers_are_running;
288
289#define	IGMP_ADD_DETACHED_INM(_head, _inm) {				\
290	SLIST_INSERT_HEAD(_head, _inm, inm_dtle);			\
291}
292
293#define	IGMP_REMOVE_DETACHED_INM(_head) {				\
294	struct in_multi *_inm, *_inm_tmp;				\
295	SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) {		\
296		SLIST_REMOVE(_head, _inm, in_multi, inm_dtle);		\
297		INM_REMREF(_inm);					\
298	}								\
299	VERIFY(SLIST_EMPTY(_head));					\
300}
301
302#define	IGI_ZONE_MAX		64		/* maximum elements in zone */
303#define	IGI_ZONE_NAME		"igmp_ifinfo"	/* zone name */
304
305static unsigned int igi_size;			/* size of zone element */
306static struct zone *igi_zone;			/* zone for igmp_ifinfo */
307
308#ifdef IGMP_DEBUG
309static __inline char *
310inet_ntoa_haddr(in_addr_t haddr)
311{
312	struct in_addr ia;
313
314	ia.s_addr = htonl(haddr);
315	return (inet_ntoa(ia));
316}
317#endif
318/*
319 * Retrieve or set default IGMP version.
320 */
321static int
322sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
323{
324#pragma unused(oidp, arg2)
325	int	 error;
326	int	 new;
327
328	lck_mtx_lock(&igmp_mtx);
329
330	error = SYSCTL_OUT(req, arg1, sizeof(int));
331	if (error || !req->newptr)
332		goto out_locked;
333
334	new = igmp_default_version;
335
336	error = SYSCTL_IN(req, &new, sizeof(int));
337	if (error)
338		goto out_locked;
339
340	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
341		error = EINVAL;
342		goto out_locked;
343	}
344
345	IGMP_PRINTF(("change igmp_default_version from %d to %d\n",
346	     igmp_default_version, new));
347
348	igmp_default_version = new;
349
350out_locked:
351	lck_mtx_unlock(&igmp_mtx);
352	return (error);
353}
354
355/*
356 * Retrieve or set threshold between group-source queries in seconds.
357 *
358 */
359static int
360sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
361{
362#pragma unused(arg1, arg2)
363	int error;
364	int i;
365
366	lck_mtx_lock(&igmp_mtx);
367
368	i = igmp_gsrdelay.tv_sec;
369
370	error = sysctl_handle_int(oidp, &i, 0, req);
371	if (error || !req->newptr)
372		goto out_locked;
373
374	if (i < -1 || i >= 60) {
375		error = EINVAL;
376		goto out_locked;
377	}
378
379	igmp_gsrdelay.tv_sec = i;
380
381out_locked:
382	lck_mtx_unlock(&igmp_mtx);
383	return (error);
384}
385
386/*
387 * Expose struct igmp_ifinfo to userland, keyed by ifindex.
388 * For use by ifmcstat(8).
389 *
390 */
391static int
392sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
393{
394#pragma unused(oidp)
395	int			*name;
396	int			 error;
397	u_int			 namelen;
398	struct ifnet		*ifp;
399	struct igmp_ifinfo	*igi;
400	struct igmp_ifinfo_u	igi_u;
401
402	name = (int *)arg1;
403	namelen = arg2;
404
405	if (req->newptr != USER_ADDR_NULL)
406		return (EPERM);
407
408	if (namelen != 1)
409		return (EINVAL);
410
411	lck_mtx_lock(&igmp_mtx);
412
413	if (name[0] <= 0 || name[0] > (u_int)if_index) {
414		error = ENOENT;
415		goto out_locked;
416	}
417
418	error = ENOENT;
419
420	ifnet_head_lock_shared();
421	ifp = ifindex2ifnet[name[0]];
422	ifnet_head_done();
423	if (ifp == NULL)
424		goto out_locked;
425
426	bzero(&igi_u, sizeof (igi_u));
427
428	LIST_FOREACH(igi, &igi_head, igi_link) {
429		IGI_LOCK(igi);
430		if (ifp != igi->igi_ifp) {
431			IGI_UNLOCK(igi);
432			continue;
433		}
434		igi_u.igi_ifindex = igi->igi_ifp->if_index;
435		igi_u.igi_version = igi->igi_version;
436		igi_u.igi_v1_timer = igi->igi_v1_timer;
437		igi_u.igi_v2_timer = igi->igi_v2_timer;
438		igi_u.igi_v3_timer = igi->igi_v3_timer;
439		igi_u.igi_flags = igi->igi_flags;
440		igi_u.igi_rv = igi->igi_rv;
441		igi_u.igi_qi = igi->igi_qi;
442		igi_u.igi_qri = igi->igi_qri;
443		igi_u.igi_uri = igi->igi_uri;
444		IGI_UNLOCK(igi);
445
446		error = SYSCTL_OUT(req, &igi_u, sizeof (igi_u));
447		break;
448	}
449
450out_locked:
451	lck_mtx_unlock(&igmp_mtx);
452	return (error);
453}
454
455/*
456 * Dispatch an entire queue of pending packet chains
457 *
458 * Must not be called with inm_lock held.
459 */
460static void
461igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
462    const int loop, struct ifnet *ifp)
463{
464	struct mbuf *m;
465	struct ip *ip;
466
467	if (igi != NULL)
468		IGI_LOCK_ASSERT_HELD(igi);
469
470	for (;;) {
471		IF_DEQUEUE(ifq, m);
472		if (m == NULL)
473			break;
474		IGMP_PRINTF(("%s: dispatch %p from %p\n", __func__, ifq, m));
475		ip = mtod(m, struct ip *);
476		if (loop)
477			m->m_flags |= M_IGMP_LOOP;
478		if (igi != NULL)
479			IGI_UNLOCK(igi);
480		igmp_sendpkt(m, ifp);
481		if (igi != NULL)
482			IGI_LOCK(igi);
483		if (--limit == 0)
484			break;
485	}
486
487	if (igi != NULL)
488		IGI_LOCK_ASSERT_HELD(igi);
489}
490
491/*
492 * Filter outgoing IGMP report state by group.
493 *
494 * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
495 * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
496 * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
497 * this may break certain IGMP snooping switches which rely on the old
498 * report behaviour.
499 *
500 * Return zero if the given group is one for which IGMP reports
501 * should be suppressed, or non-zero if reports should be issued.
502 */
503
504static __inline__
505int igmp_isgroupreported(const struct in_addr addr)
506{
507
508	if (in_allhosts(addr) ||
509	    ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
510		return (0);
511
512	return (1);
513}
514
515/*
516 * Construct a Router Alert option to use in outgoing packets.
517 */
518static struct mbuf *
519igmp_ra_alloc(void)
520{
521	struct mbuf	*m;
522	struct ipoption	*p;
523
524	MGET(m, M_WAITOK, MT_DATA);
525	p = mtod(m, struct ipoption *);
526	p->ipopt_dst.s_addr = INADDR_ANY;
527	p->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
528	p->ipopt_list[1] = 0x04;	/* 4 bytes long */
529	p->ipopt_list[2] = IPOPT_EOL;	/* End of IP option list */
530	p->ipopt_list[3] = 0x00;	/* pad byte */
531	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
532
533	return (m);
534}
535
536/*
537 * Attach IGMP when PF_INET is attached to an interface.
538 */
539struct igmp_ifinfo *
540igmp_domifattach(struct ifnet *ifp, int how)
541{
542	struct igmp_ifinfo *igi;
543
544	IGMP_PRINTF(("%s: called for ifp %p(%s)\n",
545	    __func__, ifp, ifp->if_name));
546
547	igi = igi_alloc(how);
548	if (igi == NULL)
549		return (NULL);
550
551	lck_mtx_lock(&igmp_mtx);
552
553	IGI_LOCK(igi);
554	igi_initvar(igi, ifp, 0);
555	igi->igi_debug |= IFD_ATTACHED;
556	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
557	IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
558	IGI_UNLOCK(igi);
559	ifnet_lock_shared(ifp);
560	igmp_initsilent(ifp, igi);
561	ifnet_lock_done(ifp);
562
563	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
564
565	lck_mtx_unlock(&igmp_mtx);
566
567	IGMP_PRINTF(("allocate igmp_ifinfo for ifp %p(%s)\n",
568	     ifp, ifp->if_name));
569
570	return (igi);
571}
572
573/*
574 * Attach IGMP when PF_INET is reattached to an interface.  Caller is
575 * expected to have an outstanding reference to the igi.
576 */
577void
578igmp_domifreattach(struct igmp_ifinfo *igi)
579{
580	struct ifnet *ifp;
581
582	lck_mtx_lock(&igmp_mtx);
583
584	IGI_LOCK(igi);
585	VERIFY(!(igi->igi_debug & IFD_ATTACHED));
586	ifp = igi->igi_ifp;
587	VERIFY(ifp != NULL);
588	igi_initvar(igi, ifp, 1);
589	igi->igi_debug |= IFD_ATTACHED;
590	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
591	IGI_UNLOCK(igi);
592	ifnet_lock_shared(ifp);
593	igmp_initsilent(ifp, igi);
594	ifnet_lock_done(ifp);
595
596	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
597
598	lck_mtx_unlock(&igmp_mtx);
599
600	IGMP_PRINTF(("reattached igmp_ifinfo for ifp %p(%s)\n",
601	     ifp, ifp->if_name));
602}
603
604/*
605 * Hook for domifdetach.
606 */
607void
608igmp_domifdetach(struct ifnet *ifp)
609{
610	SLIST_HEAD(, in_multi) inm_dthead;
611
612	SLIST_INIT(&inm_dthead);
613
614	IGMP_PRINTF(("%s: called for ifp %p(%s%d)\n",
615	    __func__, ifp, ifp->if_name, ifp->if_unit));
616
617	lck_mtx_lock(&igmp_mtx);
618	igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
619	lck_mtx_unlock(&igmp_mtx);
620
621	/* Now that we're dropped all locks, release detached records */
622	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
623}
624
625/*
626 * Called at interface detach time.  Note that we only flush all deferred
627 * responses and record releases; all remaining inm records and their source
628 * entries related to this interface are left intact, in order to handle
629 * the reattach case.
630 */
631static void
632igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
633{
634	struct igmp_ifinfo *igi, *tigi;
635
636	lck_mtx_assert(&igmp_mtx, LCK_MTX_ASSERT_OWNED);
637
638	LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
639		IGI_LOCK(igi);
640		if (igi->igi_ifp == ifp) {
641			/*
642			 * Free deferred General Query responses.
643			 */
644			IF_DRAIN(&igi->igi_gq);
645			IF_DRAIN(&igi->igi_v2q);
646			igmp_flush_relq(igi, inm_dthead);
647			VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
648			igi->igi_debug &= ~IFD_ATTACHED;
649			IGI_UNLOCK(igi);
650
651			LIST_REMOVE(igi, igi_link);
652			IGI_REMREF(igi); /* release igi_head reference */
653			return;
654		}
655		IGI_UNLOCK(igi);
656	}
657	panic("%s: igmp_ifinfo not found for ifp %p\n", __func__,  ifp);
658}
659
660__private_extern__ void
661igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
662{
663	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
664
665	IGI_LOCK_ASSERT_NOTHELD(igi);
666	IGI_LOCK(igi);
667	if (!(ifp->if_flags & IFF_MULTICAST))
668		igi->igi_flags |= IGIF_SILENT;
669	else
670		igi->igi_flags &= ~IGIF_SILENT;
671	IGI_UNLOCK(igi);
672}
673
674static void
675igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
676{
677	IGI_LOCK_ASSERT_HELD(igi);
678
679	igi->igi_ifp = ifp;
680	igi->igi_version = igmp_default_version;
681	igi->igi_flags = 0;
682	igi->igi_rv = IGMP_RV_INIT;
683	igi->igi_qi = IGMP_QI_INIT;
684	igi->igi_qri = IGMP_QRI_INIT;
685	igi->igi_uri = IGMP_URI_INIT;
686
687	if (!reattach)
688		SLIST_INIT(&igi->igi_relinmhead);
689
690	/*
691	 * Responses to general queries are subject to bounds.
692	 */
693	igi->igi_gq.ifq_maxlen =  IGMP_MAX_RESPONSE_PACKETS;
694	igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
695}
696
697static struct igmp_ifinfo *
698igi_alloc(int how)
699{
700	struct igmp_ifinfo *igi;
701
702	igi = (how == M_WAITOK) ? zalloc(igi_zone) : zalloc_noblock(igi_zone);
703	if (igi != NULL) {
704		bzero(igi, igi_size);
705		lck_mtx_init(&igi->igi_lock, igmp_mtx_grp, igmp_mtx_attr);
706		igi->igi_debug |= IFD_ALLOC;
707	}
708	return (igi);
709}
710
711static void
712igi_free(struct igmp_ifinfo *igi)
713{
714	IGI_LOCK(igi);
715	if (igi->igi_debug & IFD_ATTACHED) {
716		panic("%s: attached igi=%p is being freed", __func__, igi);
717		/* NOTREACHED */
718	} else if (igi->igi_ifp != NULL) {
719		panic("%s: ifp not NULL for igi=%p", __func__, igi);
720		/* NOTREACHED */
721	} else if (!(igi->igi_debug & IFD_ALLOC)) {
722		panic("%s: igi %p cannot be freed", __func__, igi);
723		/* NOTREACHED */
724	} else if (igi->igi_refcnt != 0) {
725		panic("%s: non-zero refcnt igi=%p", __func__, igi);
726		/* NOTREACHED */
727	}
728	igi->igi_debug &= ~IFD_ALLOC;
729	IGI_UNLOCK(igi);
730
731	lck_mtx_destroy(&igi->igi_lock, igmp_mtx_grp);
732	zfree(igi_zone, igi);
733}
734
735void
736igi_addref(struct igmp_ifinfo *igi, int locked)
737{
738	if (!locked)
739		IGI_LOCK_SPIN(igi);
740	else
741		IGI_LOCK_ASSERT_HELD(igi);
742
743	if (++igi->igi_refcnt == 0) {
744		panic("%s: igi=%p wraparound refcnt", __func__, igi);
745		/* NOTREACHED */
746	}
747	if (!locked)
748		IGI_UNLOCK(igi);
749}
750
751void
752igi_remref(struct igmp_ifinfo *igi)
753{
754	SLIST_HEAD(, in_multi) inm_dthead;
755	struct ifnet *ifp;
756
757	IGI_LOCK_SPIN(igi);
758
759	if (igi->igi_refcnt == 0) {
760		panic("%s: igi=%p negative refcnt", __func__, igi);
761		/* NOTREACHED */
762	}
763
764	--igi->igi_refcnt;
765	if (igi->igi_refcnt > 0) {
766		IGI_UNLOCK(igi);
767		return;
768	}
769
770	ifp = igi->igi_ifp;
771	igi->igi_ifp = NULL;
772	IF_DRAIN(&igi->igi_gq);
773	IF_DRAIN(&igi->igi_v2q);
774	SLIST_INIT(&inm_dthead);
775	igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
776	VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
777	IGI_UNLOCK(igi);
778
779	/* Now that we're dropped all locks, release detached records */
780	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
781
782	IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp %p(%s%d)\n",
783	    __func__, ifp, ifp->if_name, ifp->if_unit));
784
785	igi_free(igi);
786}
787
788/*
789 * Process a received IGMPv1 query.
790 * Return non-zero if the message should be dropped.
791 */
792static int
793igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
794    const struct igmp *igmp)
795{
796	struct igmp_ifinfo	*igi;
797	struct in_multi		*inm;
798	struct in_multistep	step;
799
800	/*
801	 * IGMPv1 Host Membership Queries SHOULD always be addressed to
802	 * 224.0.0.1. They are always treated as General Queries.
803	 * igmp_group is always ignored. Do not drop it as a userland
804	 * daemon may wish to see it.
805	 */
806	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
807		IGMPSTAT_INC(igps_rcv_badqueries);
808		OIGMPSTAT_INC(igps_rcv_badqueries);
809		return (0);
810	}
811	IGMPSTAT_INC(igps_rcv_gen_queries);
812
813	igi = IGMP_IFINFO(ifp);
814	VERIFY(igi != NULL);
815
816	IGI_LOCK(igi);
817	if (igi->igi_flags & IGIF_LOOPBACK) {
818		IGMP_PRINTF(("ignore v1 query on IGIF_LOOPBACK ifp %p(%s%d)\n",
819		    ifp, ifp->if_name, ifp->if_unit));
820		IGI_UNLOCK(igi);
821		return (0);
822	}
823	/*
824	 * Switch to IGMPv1 host compatibility mode.
825	 */
826	igmp_set_version(igi, IGMP_VERSION_1);
827	IGI_UNLOCK(igi);
828
829	IGMP_PRINTF(("process v1 query on ifp %p(%s%d)\n", ifp, ifp->if_name,
830	    ifp->if_unit));
831
832	/*
833	 * Start the timers in all of our group records
834	 * for the interface on which the query arrived,
835	 * except those which are already running.
836	 */
837	in_multihead_lock_shared();
838	IN_FIRST_MULTI(step, inm);
839	while (inm != NULL) {
840		INM_LOCK(inm);
841		if (inm->inm_ifp != ifp)
842			goto next;
843		if (inm->inm_timer != 0)
844			goto next;
845
846		switch (inm->inm_state) {
847		case IGMP_NOT_MEMBER:
848		case IGMP_SILENT_MEMBER:
849			break;
850		case IGMP_G_QUERY_PENDING_MEMBER:
851		case IGMP_SG_QUERY_PENDING_MEMBER:
852		case IGMP_REPORTING_MEMBER:
853		case IGMP_IDLE_MEMBER:
854		case IGMP_LAZY_MEMBER:
855		case IGMP_SLEEPING_MEMBER:
856		case IGMP_AWAKENING_MEMBER:
857			inm->inm_state = IGMP_REPORTING_MEMBER;
858			inm->inm_timer = IGMP_RANDOM_DELAY(
859			    IGMP_V1V2_MAX_RI * PR_SLOWHZ);
860			current_state_timers_running = 1;
861			break;
862		case IGMP_LEAVING_MEMBER:
863			break;
864		}
865next:
866		INM_UNLOCK(inm);
867		IN_NEXT_MULTI(step, inm);
868	}
869	in_multihead_lock_done();
870
871	return (0);
872}
873
874/*
875 * Process a received IGMPv2 general or group-specific query.
876 */
877static int
878igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
879    const struct igmp *igmp)
880{
881	struct igmp_ifinfo	*igi;
882	struct in_multi		*inm;
883	int			 is_general_query;
884	uint16_t		 timer;
885
886	is_general_query = 0;
887
888	/*
889	 * Validate address fields upfront.
890	 */
891	if (in_nullhost(igmp->igmp_group)) {
892		/*
893		 * IGMPv2 General Query.
894		 * If this was not sent to the all-hosts group, ignore it.
895		 */
896		if (!in_allhosts(ip->ip_dst))
897			return (0);
898		IGMPSTAT_INC(igps_rcv_gen_queries);
899		is_general_query = 1;
900	} else {
901		/* IGMPv2 Group-Specific Query. */
902		IGMPSTAT_INC(igps_rcv_group_queries);
903	}
904
905	igi = IGMP_IFINFO(ifp);
906	VERIFY(igi != NULL);
907
908	IGI_LOCK(igi);
909	if (igi->igi_flags & IGIF_LOOPBACK) {
910		IGMP_PRINTF(("ignore v2 query on IGIF_LOOPBACK ifp %p(%s%d)\n",
911		    ifp, ifp->if_name, ifp->if_unit));
912		IGI_UNLOCK(igi);
913		return(0);
914	}
915	/*
916	 * Ignore v2 query if in v1 Compatibility Mode.
917	 */
918	if (igi->igi_version == IGMP_VERSION_1) {
919		IGI_UNLOCK(igi);
920		return (0);
921	}
922	igmp_set_version(igi, IGMP_VERSION_2);
923	IGI_UNLOCK(igi);
924
925	timer = igmp->igmp_code * PR_SLOWHZ / IGMP_TIMER_SCALE;
926	if (timer == 0)
927		timer = 1;
928
929	if (is_general_query) {
930		struct in_multistep step;
931
932		IGMP_PRINTF(("process v2 general query on ifp %p(%s%d)\n",
933		    ifp, ifp->if_name, ifp->if_unit));
934		/*
935		 * For each reporting group joined on this
936		 * interface, kick the report timer.
937		 */
938		in_multihead_lock_shared();
939		IN_FIRST_MULTI(step, inm);
940		while (inm != NULL) {
941			INM_LOCK(inm);
942			if (inm->inm_ifp == ifp)
943				igmp_v2_update_group(inm, timer);
944			INM_UNLOCK(inm);
945			IN_NEXT_MULTI(step, inm);
946		}
947		in_multihead_lock_done();
948	} else {
949		/*
950		 * Group-specific IGMPv2 query, we need only
951		 * look up the single group to process it.
952		 */
953		in_multihead_lock_shared();
954		IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
955		in_multihead_lock_done();
956		if (inm != NULL) {
957			INM_LOCK(inm);
958			IGMP_PRINTF(("process v2 query %s on ifp %p(%s%d)\n",
959			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_name,
960			    ifp->if_unit));
961			igmp_v2_update_group(inm, timer);
962			INM_UNLOCK(inm);
963			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
964		}
965	}
966
967	return (0);
968}
969
970/*
971 * Update the report timer on a group in response to an IGMPv2 query.
972 *
973 * If we are becoming the reporting member for this group, start the timer.
974 * If we already are the reporting member for this group, and timer is
975 * below the threshold, reset it.
976 *
977 * We may be updating the group for the first time since we switched
978 * to IGMPv3. If we are, then we must clear any recorded source lists,
979 * and transition to REPORTING state; the group timer is overloaded
980 * for group and group-source query responses.
981 *
982 * Unlike IGMPv3, the delay per group should be jittered
983 * to avoid bursts of IGMPv2 reports.
984 */
985static void
986igmp_v2_update_group(struct in_multi *inm, const int timer)
987{
988
989	IGMP_PRINTF(("%s: %s/%s%d timer=%d\n", __func__,
990	    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name,
991	    inm->inm_ifp->if_unit, timer));
992
993	INM_LOCK_ASSERT_HELD(inm);
994
995	switch (inm->inm_state) {
996	case IGMP_NOT_MEMBER:
997	case IGMP_SILENT_MEMBER:
998		break;
999	case IGMP_REPORTING_MEMBER:
1000		if (inm->inm_timer != 0 &&
1001		    inm->inm_timer <= timer) {
1002			IGMP_PRINTF(("%s: REPORTING and timer running, "
1003			    "skipping.\n", __func__));
1004			break;
1005		}
1006		/* FALLTHROUGH */
1007	case IGMP_SG_QUERY_PENDING_MEMBER:
1008	case IGMP_G_QUERY_PENDING_MEMBER:
1009	case IGMP_IDLE_MEMBER:
1010	case IGMP_LAZY_MEMBER:
1011	case IGMP_AWAKENING_MEMBER:
1012		IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1013		inm->inm_state = IGMP_REPORTING_MEMBER;
1014		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1015		current_state_timers_running = 1;
1016		break;
1017	case IGMP_SLEEPING_MEMBER:
1018		IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1019		inm->inm_state = IGMP_AWAKENING_MEMBER;
1020		break;
1021	case IGMP_LEAVING_MEMBER:
1022		break;
1023	}
1024}
1025
1026/*
1027 * Process a received IGMPv3 general, group-specific or
1028 * group-and-source-specific query.
1029 * Assumes m has already been pulled up to the full IGMP message length.
1030 * Return 0 if successful, otherwise an appropriate error code is returned.
1031 */
1032static int
1033igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1034    /*const*/ struct igmpv3 *igmpv3)
1035{
1036	struct igmp_ifinfo	*igi;
1037	struct in_multi		*inm;
1038	int			 is_general_query;
1039	uint32_t		 maxresp, nsrc, qqi;
1040	uint16_t		 timer;
1041	uint8_t			 qrv;
1042
1043	is_general_query = 0;
1044
1045	IGMP_PRINTF(("process v3 query on ifp %p(%s%d)\n", ifp, ifp->if_name,
1046	    ifp->if_unit));
1047
1048	maxresp = igmpv3->igmp_code;	/* in 1/10ths of a second */
1049	if (maxresp >= 128) {
1050		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1051			  (IGMP_EXP(igmpv3->igmp_code) + 3);
1052	}
1053
1054	/*
1055	 * Robustness must never be less than 2 for on-wire IGMPv3.
1056	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1057	 * an exception for interfaces whose IGMPv3 state changes
1058	 * are redirected to loopback (e.g. MANET).
1059	 */
1060	qrv = IGMP_QRV(igmpv3->igmp_misc);
1061	if (qrv < 2) {
1062		IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1063		    qrv, IGMP_RV_INIT));
1064		qrv = IGMP_RV_INIT;
1065	}
1066
1067	qqi = igmpv3->igmp_qqi;
1068	if (qqi >= 128) {
1069		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1070		     (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1071	}
1072
1073	timer = maxresp * PR_SLOWHZ / IGMP_TIMER_SCALE;
1074	if (timer == 0)
1075		timer = 1;
1076
1077	nsrc = ntohs(igmpv3->igmp_numsrc);
1078
1079	/*
1080	 * Validate address fields and versions upfront before
1081	 * accepting v3 query.
1082	 */
1083	if (in_nullhost(igmpv3->igmp_group)) {
1084		/*
1085		 * IGMPv3 General Query.
1086		 *
1087		 * General Queries SHOULD be directed to 224.0.0.1.
1088		 * A general query with a source list has undefined
1089		 * behaviour; discard it.
1090		 */
1091		IGMPSTAT_INC(igps_rcv_gen_queries);
1092		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1093			IGMPSTAT_INC(igps_rcv_badqueries);
1094			OIGMPSTAT_INC(igps_rcv_badqueries);
1095			return (0);
1096		}
1097		is_general_query = 1;
1098	} else {
1099		/* Group or group-source specific query. */
1100		if (nsrc == 0)
1101			IGMPSTAT_INC(igps_rcv_group_queries);
1102		else
1103			IGMPSTAT_INC(igps_rcv_gsr_queries);
1104	}
1105
1106	igi = IGMP_IFINFO(ifp);
1107	VERIFY(igi != NULL);
1108
1109	IGI_LOCK(igi);
1110	if (igi->igi_flags & IGIF_LOOPBACK) {
1111		IGMP_PRINTF(("ignore v3 query on IGIF_LOOPBACK ifp %p(%s%d)\n",
1112		    ifp, ifp->if_name, ifp->if_unit));
1113		IGI_UNLOCK(igi);
1114		return (0);
1115	}
1116
1117	/*
1118	 * Discard the v3 query if we're in Compatibility Mode.
1119	 * The RFC is not obviously worded that hosts need to stay in
1120	 * compatibility mode until the Old Version Querier Present
1121	 * timer expires.
1122	 */
1123	if (igi->igi_version != IGMP_VERSION_3) {
1124		IGMP_PRINTF(("ignore v3 query in v%d mode on ifp %p(%s%d)\n",
1125		    igi->igi_version, ifp, ifp->if_name, ifp->if_unit));
1126		IGI_UNLOCK(igi);
1127		return (0);
1128	}
1129
1130	igmp_set_version(igi, IGMP_VERSION_3);
1131	igi->igi_rv = qrv;
1132	igi->igi_qi = qqi;
1133	igi->igi_qri = maxresp;
1134
1135
1136	IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, qrv, qqi,
1137	    maxresp));
1138
1139	if (is_general_query) {
1140		/*
1141		 * Schedule a current-state report on this ifp for
1142		 * all groups, possibly containing source lists.
1143		 * If there is a pending General Query response
1144		 * scheduled earlier than the selected delay, do
1145		 * not schedule any other reports.
1146		 * Otherwise, reset the interface timer.
1147		 */
1148		IGMP_PRINTF(("process v3 general query on ifp %p(%s%d)\n",
1149		    ifp, ifp->if_name, ifp->if_unit));
1150		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1151			igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1152			interface_timers_running = 1;
1153		}
1154		IGI_UNLOCK(igi);
1155	} else {
1156		IGI_UNLOCK(igi);
1157		/*
1158		 * Group-source-specific queries are throttled on
1159		 * a per-group basis to defeat denial-of-service attempts.
1160		 * Queries for groups we are not a member of on this
1161		 * link are simply ignored.
1162		 */
1163		in_multihead_lock_shared();
1164		IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1165		in_multihead_lock_done();
1166		if (inm == NULL)
1167			return (0);
1168
1169		INM_LOCK(inm);
1170#ifndef __APPLE__
1171		/* TODO: need ratecheck equivalent */
1172		if (nsrc > 0) {
1173			if (!ratecheck(&inm->inm_lastgsrtv,
1174			    &igmp_gsrdelay)) {
1175				IGMP_PRINTF(("%s: GS query throttled.\n",
1176				    __func__));
1177				IGMPSTAT_INC(igps_drop_gsr_queries);
1178				INM_UNLOCK(inm);
1179				INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1180				return (0);
1181			}
1182		}
1183#endif
1184		IGMP_PRINTF(("process v3 %s query on ifp %p(%s%d)\n",
1185		     inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_name,
1186		     ifp->if_unit));
1187		/*
1188		 * If there is a pending General Query response
1189		 * scheduled sooner than the selected delay, no
1190		 * further report need be scheduled.
1191		 * Otherwise, prepare to respond to the
1192		 * group-specific or group-and-source query.
1193		 */
1194		IGI_LOCK(igi);
1195		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1196			IGI_UNLOCK(igi);
1197			igmp_input_v3_group_query(inm, timer, igmpv3);
1198		} else {
1199			IGI_UNLOCK(igi);
1200		}
1201		INM_UNLOCK(inm);
1202		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1203	}
1204
1205	return (0);
1206}
1207
1208/*
1209 * Process a recieved IGMPv3 group-specific or group-and-source-specific
1210 * query.
1211 * Return <0 if any error occured. Currently this is ignored.
1212 */
1213static int
1214igmp_input_v3_group_query(struct in_multi *inm,
1215    int timer, /*const*/ struct igmpv3 *igmpv3)
1216{
1217	int			 retval;
1218	uint16_t		 nsrc;
1219
1220	INM_LOCK_ASSERT_HELD(inm);
1221
1222	retval = 0;
1223
1224	switch (inm->inm_state) {
1225	case IGMP_NOT_MEMBER:
1226	case IGMP_SILENT_MEMBER:
1227	case IGMP_SLEEPING_MEMBER:
1228	case IGMP_LAZY_MEMBER:
1229	case IGMP_AWAKENING_MEMBER:
1230	case IGMP_IDLE_MEMBER:
1231	case IGMP_LEAVING_MEMBER:
1232		return (retval);
1233	case IGMP_REPORTING_MEMBER:
1234	case IGMP_G_QUERY_PENDING_MEMBER:
1235	case IGMP_SG_QUERY_PENDING_MEMBER:
1236		break;
1237	}
1238
1239	nsrc = ntohs(igmpv3->igmp_numsrc);
1240
1241	/*
1242	 * Deal with group-specific queries upfront.
1243	 * If any group query is already pending, purge any recorded
1244	 * source-list state if it exists, and schedule a query response
1245	 * for this group-specific query.
1246	 */
1247	if (nsrc == 0) {
1248		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1249		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1250			inm_clear_recorded(inm);
1251			timer = min(inm->inm_timer, timer);
1252		}
1253		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1254		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1255		current_state_timers_running = 1;
1256		return (retval);
1257	}
1258
1259	/*
1260	 * Deal with the case where a group-and-source-specific query has
1261	 * been received but a group-specific query is already pending.
1262	 */
1263	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1264		timer = min(inm->inm_timer, timer);
1265		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1266		current_state_timers_running = 1;
1267		return (retval);
1268	}
1269
1270	/*
1271	 * Finally, deal with the case where a group-and-source-specific
1272	 * query has been received, where a response to a previous g-s-r
1273	 * query exists, or none exists.
1274	 * In this case, we need to parse the source-list which the Querier
1275	 * has provided us with and check if we have any source list filter
1276	 * entries at T1 for these sources. If we do not, there is no need
1277	 * schedule a report and the query may be dropped.
1278	 * If we do, we must record them and schedule a current-state
1279	 * report for those sources.
1280	 * FIXME: Handling source lists larger than 1 mbuf requires that
1281	 * we pass the mbuf chain pointer down to this function, and use
1282	 * m_getptr() to walk the chain.
1283	 */
1284	if (inm->inm_nsrc > 0) {
1285		const struct in_addr	*ap;
1286		int			 i, nrecorded;
1287
1288		ap = (const struct in_addr *)(igmpv3 + 1);
1289		nrecorded = 0;
1290		for (i = 0; i < nsrc; i++, ap++) {
1291			retval = inm_record_source(inm, ap->s_addr);
1292			if (retval < 0)
1293				break;
1294			nrecorded += retval;
1295		}
1296		if (nrecorded > 0) {
1297			IGMP_PRINTF(("%s: schedule response to SG query\n",
1298			    __func__));
1299			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1300			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1301			current_state_timers_running = 1;
1302		}
1303	}
1304
1305	return (retval);
1306}
1307
1308/*
1309 * Process a received IGMPv1 host membership report.
1310 *
1311 * NOTE: 0.0.0.0 workaround breaks const correctness.
1312 */
1313static int
1314igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
1315    /*const*/ struct igmp *igmp)
1316{
1317	struct in_ifaddr *ia;
1318	struct in_multi *inm;
1319
1320	IGMPSTAT_INC(igps_rcv_reports);
1321	OIGMPSTAT_INC(igps_rcv_reports);
1322
1323	if (ifp->if_flags & IFF_LOOPBACK)
1324		return (0);
1325
1326	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1327	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1328		IGMPSTAT_INC(igps_rcv_badreports);
1329		OIGMPSTAT_INC(igps_rcv_badreports);
1330		return (EINVAL);
1331	}
1332
1333	/*
1334	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1335	 * Booting clients may use the source address 0.0.0.0. Some
1336	 * IGMP daemons may not know how to use IP_RECVIF to determine
1337	 * the interface upon which this message was received.
1338	 * Replace 0.0.0.0 with the subnet address if told to do so.
1339	 */
1340	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1341		IFP_TO_IA(ifp, ia);
1342		if (ia != NULL) {
1343			IFA_LOCK(&ia->ia_ifa);
1344			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1345			IFA_UNLOCK(&ia->ia_ifa);
1346			IFA_REMREF(&ia->ia_ifa);
1347		}
1348	}
1349
1350	IGMP_PRINTF(("process v1 report %s on ifp %p(%s%d)\n",
1351	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_name, ifp->if_unit));
1352
1353	/*
1354	 * IGMPv1 report suppression.
1355	 * If we are a member of this group, and our membership should be
1356	 * reported, stop our group timer and transition to the 'lazy' state.
1357	 */
1358	in_multihead_lock_shared();
1359	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1360	in_multihead_lock_done();
1361	if (inm != NULL) {
1362		struct igmp_ifinfo *igi;
1363
1364		INM_LOCK(inm);
1365
1366		igi = inm->inm_igi;
1367		VERIFY(igi != NULL);
1368
1369		IGMPSTAT_INC(igps_rcv_ourreports);
1370		OIGMPSTAT_INC(igps_rcv_ourreports);
1371
1372		/*
1373		 * If we are in IGMPv3 host mode, do not allow the
1374		 * other host's IGMPv1 report to suppress our reports
1375		 * unless explicitly configured to do so.
1376		 */
1377		IGI_LOCK(igi);
1378		if (igi->igi_version == IGMP_VERSION_3) {
1379			if (igmp_legacysupp)
1380				igmp_v3_suppress_group_record(inm);
1381			IGI_UNLOCK(igi);
1382			INM_UNLOCK(inm);
1383			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1384			return (0);
1385		}
1386
1387		INM_LOCK_ASSERT_HELD(inm);
1388		inm->inm_timer = 0;
1389
1390		switch (inm->inm_state) {
1391		case IGMP_NOT_MEMBER:
1392		case IGMP_SILENT_MEMBER:
1393			break;
1394		case IGMP_IDLE_MEMBER:
1395		case IGMP_LAZY_MEMBER:
1396		case IGMP_AWAKENING_MEMBER:
1397			IGMP_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n",
1398			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_name,
1399			    ifp->if_unit));
1400		case IGMP_SLEEPING_MEMBER:
1401			inm->inm_state = IGMP_SLEEPING_MEMBER;
1402			break;
1403		case IGMP_REPORTING_MEMBER:
1404			IGMP_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n",
1405			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_name,
1406			    ifp->if_unit));
1407			if (igi->igi_version == IGMP_VERSION_1)
1408				inm->inm_state = IGMP_LAZY_MEMBER;
1409			else if (igi->igi_version == IGMP_VERSION_2)
1410				inm->inm_state = IGMP_SLEEPING_MEMBER;
1411			break;
1412		case IGMP_G_QUERY_PENDING_MEMBER:
1413		case IGMP_SG_QUERY_PENDING_MEMBER:
1414		case IGMP_LEAVING_MEMBER:
1415			break;
1416		}
1417		IGI_UNLOCK(igi);
1418		INM_UNLOCK(inm);
1419		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1420	}
1421
1422	return (0);
1423}
1424
1425/*
1426 * Process a received IGMPv2 host membership report.
1427 *
1428 * NOTE: 0.0.0.0 workaround breaks const correctness.
1429 */
1430static int
1431igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
1432    /*const*/ struct igmp *igmp)
1433{
1434	struct in_ifaddr *ia;
1435	struct in_multi *inm;
1436
1437	/*
1438	 * Make sure we don't hear our own membership report.  Fast
1439	 * leave requires knowing that we are the only member of a
1440	 * group.
1441	 */
1442	IFP_TO_IA(ifp, ia);
1443	if (ia != NULL) {
1444		IFA_LOCK(&ia->ia_ifa);
1445		if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1446			IFA_UNLOCK(&ia->ia_ifa);
1447			IFA_REMREF(&ia->ia_ifa);
1448			return (0);
1449		}
1450		IFA_UNLOCK(&ia->ia_ifa);
1451	}
1452
1453	IGMPSTAT_INC(igps_rcv_reports);
1454	OIGMPSTAT_INC(igps_rcv_reports);
1455
1456	if (ifp->if_flags & IFF_LOOPBACK) {
1457		if (ia != NULL)
1458			IFA_REMREF(&ia->ia_ifa);
1459		return (0);
1460	}
1461
1462	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1463	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1464		if (ia != NULL)
1465			IFA_REMREF(&ia->ia_ifa);
1466		IGMPSTAT_INC(igps_rcv_badreports);
1467		OIGMPSTAT_INC(igps_rcv_badreports);
1468		return (EINVAL);
1469	}
1470
1471	/*
1472	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1473	 * Booting clients may use the source address 0.0.0.0. Some
1474	 * IGMP daemons may not know how to use IP_RECVIF to determine
1475	 * the interface upon which this message was received.
1476	 * Replace 0.0.0.0 with the subnet address if told to do so.
1477	 */
1478	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1479		if (ia != NULL) {
1480			IFA_LOCK(&ia->ia_ifa);
1481			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1482			IFA_UNLOCK(&ia->ia_ifa);
1483		}
1484	}
1485	if (ia != NULL)
1486		IFA_REMREF(&ia->ia_ifa);
1487
1488	IGMP_PRINTF(("process v2 report %s on ifp %p(%s%d)\n",
1489	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_name, ifp->if_unit));
1490
1491	/*
1492	 * IGMPv2 report suppression.
1493	 * If we are a member of this group, and our membership should be
1494	 * reported, and our group timer is pending or about to be reset,
1495	 * stop our group timer by transitioning to the 'lazy' state.
1496	 */
1497	in_multihead_lock_shared();
1498	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1499	in_multihead_lock_done();
1500	if (inm != NULL) {
1501		struct igmp_ifinfo *igi;
1502
1503		INM_LOCK(inm);
1504		igi = inm->inm_igi;
1505		VERIFY(igi != NULL);
1506
1507		IGMPSTAT_INC(igps_rcv_ourreports);
1508		OIGMPSTAT_INC(igps_rcv_ourreports);
1509
1510		/*
1511		 * If we are in IGMPv3 host mode, do not allow the
1512		 * other host's IGMPv1 report to suppress our reports
1513		 * unless explicitly configured to do so.
1514		 */
1515		IGI_LOCK(igi);
1516		if (igi->igi_version == IGMP_VERSION_3) {
1517			if (igmp_legacysupp)
1518				igmp_v3_suppress_group_record(inm);
1519			IGI_UNLOCK(igi);
1520			INM_UNLOCK(inm);
1521			INM_REMREF(inm);
1522			return (0);
1523		}
1524
1525		inm->inm_timer = 0;
1526
1527		switch (inm->inm_state) {
1528		case IGMP_NOT_MEMBER:
1529		case IGMP_SILENT_MEMBER:
1530		case IGMP_SLEEPING_MEMBER:
1531			break;
1532		case IGMP_REPORTING_MEMBER:
1533		case IGMP_IDLE_MEMBER:
1534		case IGMP_AWAKENING_MEMBER:
1535			IGMP_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n",
1536			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_name,
1537			    ifp->if_unit));
1538		case IGMP_LAZY_MEMBER:
1539			inm->inm_state = IGMP_LAZY_MEMBER;
1540			break;
1541		case IGMP_G_QUERY_PENDING_MEMBER:
1542		case IGMP_SG_QUERY_PENDING_MEMBER:
1543		case IGMP_LEAVING_MEMBER:
1544			break;
1545		}
1546		IGI_UNLOCK(igi);
1547		INM_UNLOCK(inm);
1548		INM_REMREF(inm);
1549	}
1550
1551	return (0);
1552}
1553
1554void
1555igmp_input(struct mbuf *m, int off)
1556{
1557	int iphlen;
1558	struct ifnet *ifp;
1559	struct igmp *igmp;
1560	struct ip *ip;
1561	int igmplen;
1562	int minlen;
1563	int queryver;
1564
1565	IGMP_PRINTF(("%s: called w/mbuf (%p,%d)\n", __func__, m, off));
1566
1567	ifp = m->m_pkthdr.rcvif;
1568
1569	IGMPSTAT_INC(igps_rcv_total);
1570	OIGMPSTAT_INC(igps_rcv_total);
1571
1572	/* Expect 32-bit aligned data pointer on strict-align platforms */
1573	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1574
1575	ip = mtod(m, struct ip *);
1576	iphlen = off;
1577
1578	/* By now, ip_len no longer contains the length of IP header */
1579	igmplen = ip->ip_len;
1580
1581	/*
1582	 * Validate lengths.
1583	 */
1584	if (igmplen < IGMP_MINLEN) {
1585		IGMPSTAT_INC(igps_rcv_tooshort);
1586		OIGMPSTAT_INC(igps_rcv_tooshort);
1587		m_freem(m);
1588		return;
1589	}
1590
1591	/*
1592	 * Always pullup to the minimum size for v1/v2 or v3
1593	 * to amortize calls to m_pulldown().
1594	 */
1595	if (igmplen >= IGMP_V3_QUERY_MINLEN)
1596		minlen = IGMP_V3_QUERY_MINLEN;
1597	else
1598		minlen = IGMP_MINLEN;
1599
1600	/* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1601	M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1602	if (igmp == NULL) {
1603		IGMPSTAT_INC(igps_rcv_tooshort);
1604		OIGMPSTAT_INC(igps_rcv_tooshort);
1605		return;
1606	}
1607	VERIFY(IS_P2ALIGNED(igmp, sizeof (u_int32_t)));
1608
1609	/*
1610	 * Validate checksum.
1611	 */
1612	m->m_data += iphlen;
1613	m->m_len -= iphlen;
1614	if (in_cksum(m, igmplen)) {
1615		IGMPSTAT_INC(igps_rcv_badsum);
1616		OIGMPSTAT_INC(igps_rcv_badsum);
1617		m_freem(m);
1618		return;
1619	}
1620	m->m_data -= iphlen;
1621	m->m_len += iphlen;
1622
1623	/*
1624	 * IGMP control traffic is link-scope, and must have a TTL of 1.
1625	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1626	 * probe packets may come from beyond the LAN.
1627	 */
1628	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1629		IGMPSTAT_INC(igps_rcv_badttl);
1630		m_freem(m);
1631		return;
1632	}
1633
1634	switch (igmp->igmp_type) {
1635	case IGMP_HOST_MEMBERSHIP_QUERY:
1636		if (igmplen == IGMP_MINLEN) {
1637			if (igmp->igmp_code == 0)
1638				queryver = IGMP_VERSION_1;
1639			else
1640				queryver = IGMP_VERSION_2;
1641		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1642			queryver = IGMP_VERSION_3;
1643		} else {
1644			IGMPSTAT_INC(igps_rcv_tooshort);
1645			OIGMPSTAT_INC(igps_rcv_tooshort);
1646			m_freem(m);
1647			return;
1648		}
1649
1650		OIGMPSTAT_INC(igps_rcv_queries);
1651
1652		switch (queryver) {
1653		case IGMP_VERSION_1:
1654			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1655			if (!igmp_v1enable)
1656				break;
1657			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1658				m_freem(m);
1659				return;
1660			}
1661			break;
1662
1663		case IGMP_VERSION_2:
1664			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1665			if (!igmp_v2enable)
1666				break;
1667			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1668				m_freem(m);
1669				return;
1670			}
1671			break;
1672
1673		case IGMP_VERSION_3: {
1674				struct igmpv3 *igmpv3;
1675				uint16_t igmpv3len;
1676				uint16_t srclen;
1677				int nsrc;
1678
1679				IGMPSTAT_INC(igps_rcv_v3_queries);
1680				igmpv3 = (struct igmpv3 *)igmp;
1681				/*
1682				 * Validate length based on source count.
1683				 */
1684				nsrc = ntohs(igmpv3->igmp_numsrc);
1685				srclen = sizeof(struct in_addr) * nsrc;
1686				if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1687					IGMPSTAT_INC(igps_rcv_tooshort);
1688					OIGMPSTAT_INC(igps_rcv_tooshort);
1689					m_freem(m);
1690					return;
1691				}
1692				igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1693				/*
1694				 * A bit more expensive than M_STRUCT_GET,
1695				 * but ensures alignment.
1696				 */
1697				M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1698				    off, igmpv3len);
1699				if (igmpv3 == NULL) {
1700					IGMPSTAT_INC(igps_rcv_tooshort);
1701					OIGMPSTAT_INC(igps_rcv_tooshort);
1702					return;
1703				}
1704				VERIFY(IS_P2ALIGNED(igmpv3,
1705				    sizeof (u_int32_t)));
1706				if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1707					m_freem(m);
1708					return;
1709				}
1710			}
1711			break;
1712		}
1713		break;
1714
1715	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1716		if (!igmp_v1enable)
1717			break;
1718		if (igmp_input_v1_report(ifp, ip, igmp) != 0) {
1719			m_freem(m);
1720			return;
1721		}
1722		break;
1723
1724	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1725		if (!igmp_v2enable)
1726			break;
1727#ifndef __APPLE__
1728		if (!ip_checkrouteralert(m))
1729			IGMPSTAT_INC(igps_rcv_nora);
1730#endif
1731		if (igmp_input_v2_report(ifp, ip, igmp) != 0) {
1732			m_freem(m);
1733			return;
1734		}
1735		break;
1736
1737	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1738		/*
1739		 * Hosts do not need to process IGMPv3 membership reports,
1740		 * as report suppression is no longer required.
1741		 */
1742#ifndef __APPLE__
1743		if (!ip_checkrouteralert(m))
1744			IGMPSTAT_INC(igps_rcv_nora);
1745#endif
1746		break;
1747
1748	default:
1749		break;
1750	}
1751
1752	lck_mtx_assert(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED);
1753	/*
1754	 * Pass all valid IGMP packets up to any process(es) listening on a
1755	 * raw IGMP socket.
1756	 */
1757	rip_input(m, off);
1758}
1759
1760
1761/*
1762 * IGMP slowtimo handler.
1763 * Combiles both the slow and fast timer into one. We loose some responsivness but
1764 * allows the system to avoid having a pr_fasttimo, thus allowing for power savings.
1765 *
1766 */
1767void
1768igmp_slowtimo(void)
1769{
1770	struct ifqueue		 scq;	/* State-change packets */
1771	struct ifqueue		 qrq;	/* Query response packets */
1772	struct ifnet		*ifp;
1773	struct igmp_ifinfo	*igi;
1774	struct in_multi		*inm;
1775	int			 loop = 0, uri_fasthz = 0;
1776	SLIST_HEAD(, in_multi)	inm_dthead;
1777
1778	SLIST_INIT(&inm_dthead);
1779
1780	lck_mtx_lock(&igmp_mtx);
1781
1782	LIST_FOREACH(igi, &igi_head, igi_link) {
1783		IGI_LOCK(igi);
1784		igmp_v1v2_process_querier_timers(igi);
1785		IGI_UNLOCK(igi);
1786	}
1787
1788	/*
1789	 * NOTE: previously handled by fasttimo
1790	 *
1791	 * Quick check to see if any work needs to be done, in order to
1792	 * minimize the overhead of fasttimo processing.
1793	 */
1794	if (!current_state_timers_running &&
1795	    !interface_timers_running &&
1796	    !state_change_timers_running) {
1797		lck_mtx_unlock(&igmp_mtx);
1798		return;
1799	}
1800
1801	/*
1802	 * IGMPv3 General Query response timer processing.
1803	 */
1804	if (interface_timers_running) {
1805		interface_timers_running = 0;
1806		LIST_FOREACH(igi, &igi_head, igi_link) {
1807			IGI_LOCK(igi);
1808			if (igi->igi_v3_timer == 0) {
1809				/* Do nothing. */
1810			} else if (--igi->igi_v3_timer == 0) {
1811				igmp_v3_dispatch_general_query(igi);
1812			} else {
1813				interface_timers_running = 1;
1814			}
1815			IGI_UNLOCK(igi);
1816		}
1817	}
1818
1819	if (!current_state_timers_running &&
1820	    !state_change_timers_running)
1821		goto out_locked;
1822
1823	current_state_timers_running = 0;
1824	state_change_timers_running = 0;
1825
1826	memset(&qrq, 0, sizeof(struct ifqueue));
1827	qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
1828
1829	memset(&scq, 0, sizeof(struct ifqueue));
1830	scq.ifq_maxlen =  IGMP_MAX_STATE_CHANGE_PACKETS;
1831
1832	/*
1833	 * IGMPv1/v2/v3 host report and state-change timer processing.
1834	 * Note: Processing a v3 group timer may remove a node.
1835	 */
1836	LIST_FOREACH(igi, &igi_head, igi_link) {
1837		struct in_multistep step;
1838
1839		IGI_LOCK(igi);
1840		ifp = igi->igi_ifp;
1841		loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
1842		uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri * PR_SLOWHZ);
1843		IGI_UNLOCK(igi);
1844
1845		in_multihead_lock_shared();
1846		IN_FIRST_MULTI(step, inm);
1847		while (inm != NULL) {
1848			INM_LOCK(inm);
1849			if (inm->inm_ifp != ifp)
1850				goto next;
1851
1852			IGI_LOCK(igi);
1853			switch (igi->igi_version) {
1854				case IGMP_VERSION_1:
1855				case IGMP_VERSION_2:
1856					igmp_v1v2_process_group_timer(inm,
1857					    igi->igi_version);
1858					break;
1859				case IGMP_VERSION_3:
1860					igmp_v3_process_group_timers(igi, &qrq,
1861					    &scq, inm, uri_fasthz);
1862					break;
1863			}
1864			IGI_UNLOCK(igi);
1865next:
1866			INM_UNLOCK(inm);
1867			IN_NEXT_MULTI(step, inm);
1868		}
1869		in_multihead_lock_done();
1870
1871		IGI_LOCK(igi);
1872		if (igi->igi_version == IGMP_VERSION_1 ||
1873		    igi->igi_version == IGMP_VERSION_2) {
1874			igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop, ifp);
1875		} else if (igi->igi_version == IGMP_VERSION_3) {
1876			IGI_UNLOCK(igi);
1877			igmp_dispatch_queue(NULL, &qrq, 0, loop, ifp);
1878			igmp_dispatch_queue(NULL, &scq, 0, loop, ifp);
1879			VERIFY(qrq.ifq_len == 0);
1880			VERIFY(scq.ifq_len == 0);
1881			IGI_LOCK(igi);
1882		}
1883		/*
1884		 * In case there are still any pending membership reports
1885		 * which didn't get drained at version change time.
1886		 */
1887		IF_DRAIN(&igi->igi_v2q);
1888		/*
1889		 * Release all deferred inm records, and drain any locally
1890		 * enqueued packets; do it even if the current IGMP version
1891		 * for the link is no longer IGMPv3, in order to handle the
1892		 * version change case.
1893		 */
1894		igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
1895		VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
1896		IGI_UNLOCK(igi);
1897
1898		IF_DRAIN(&qrq);
1899		IF_DRAIN(&scq);
1900	}
1901
1902out_locked:
1903	lck_mtx_unlock(&igmp_mtx);
1904
1905	/* Now that we're dropped all locks, release detached records */
1906	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
1907}
1908
1909/*
1910 * Free the in_multi reference(s) for this IGMP lifecycle.
1911 *
1912 * Caller must be holding igi_lock.
1913 */
1914static void
1915igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
1916{
1917	struct in_multi *inm;
1918
1919again:
1920	IGI_LOCK_ASSERT_HELD(igi);
1921	inm = SLIST_FIRST(&igi->igi_relinmhead);
1922	if (inm != NULL) {
1923		int lastref;
1924
1925		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
1926		IGI_UNLOCK(igi);
1927
1928		in_multihead_lock_exclusive();
1929		INM_LOCK(inm);
1930		VERIFY(inm->inm_nrelecnt != 0);
1931		inm->inm_nrelecnt--;
1932		lastref = in_multi_detach(inm);
1933		VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
1934		    inm->inm_reqcnt == 0));
1935		INM_UNLOCK(inm);
1936		in_multihead_lock_done();
1937		/* from igi_relinmhead */
1938		INM_REMREF(inm);
1939		/* from in_multihead list */
1940		if (lastref) {
1941			/*
1942			 * Defer releasing our final reference, as we
1943			 * are holding the IGMP lock at this point, and
1944			 * we could end up with locking issues later on
1945			 * (while issuing SIOCDELMULTI) when this is the
1946			 * final reference count.  Let the caller do it
1947			 * when it is safe.
1948			 */
1949			IGMP_ADD_DETACHED_INM(inm_dthead, inm);
1950		}
1951		IGI_LOCK(igi);
1952		goto again;
1953	}
1954}
1955
1956/*
1957 * Update host report group timer for IGMPv1/v2.
1958 * Will update the global pending timer flags.
1959 */
1960static void
1961igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
1962{
1963	int report_timer_expired;
1964
1965	INM_LOCK_ASSERT_HELD(inm);
1966	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
1967
1968	if (inm->inm_timer == 0) {
1969		report_timer_expired = 0;
1970	} else if (--inm->inm_timer == 0) {
1971		report_timer_expired = 1;
1972	} else {
1973		current_state_timers_running = 1;
1974		return;
1975	}
1976
1977	switch (inm->inm_state) {
1978	case IGMP_NOT_MEMBER:
1979	case IGMP_SILENT_MEMBER:
1980	case IGMP_IDLE_MEMBER:
1981	case IGMP_LAZY_MEMBER:
1982	case IGMP_SLEEPING_MEMBER:
1983	case IGMP_AWAKENING_MEMBER:
1984		break;
1985	case IGMP_REPORTING_MEMBER:
1986		if (report_timer_expired) {
1987			inm->inm_state = IGMP_IDLE_MEMBER;
1988			(void) igmp_v1v2_queue_report(inm,
1989			    (igmp_version == IGMP_VERSION_2) ?
1990			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
1991			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
1992			INM_LOCK_ASSERT_HELD(inm);
1993			IGI_LOCK_ASSERT_HELD(inm->inm_igi);
1994		}
1995		break;
1996	case IGMP_G_QUERY_PENDING_MEMBER:
1997	case IGMP_SG_QUERY_PENDING_MEMBER:
1998	case IGMP_LEAVING_MEMBER:
1999		break;
2000	}
2001}
2002
2003/*
2004 * Update a group's timers for IGMPv3.
2005 * Will update the global pending timer flags.
2006 * Note: Unlocked read from igi.
2007 */
2008static void
2009igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2010    struct ifqueue *qrq, struct ifqueue *scq,
2011    struct in_multi *inm, const int uri_fasthz)
2012{
2013	int query_response_timer_expired;
2014	int state_change_retransmit_timer_expired;
2015
2016	INM_LOCK_ASSERT_HELD(inm);
2017	IGI_LOCK_ASSERT_HELD(igi);
2018	VERIFY(igi == inm->inm_igi);
2019
2020	query_response_timer_expired = 0;
2021	state_change_retransmit_timer_expired = 0;
2022
2023	/*
2024	 * During a transition from v1/v2 compatibility mode back to v3,
2025	 * a group record in REPORTING state may still have its group
2026	 * timer active. This is a no-op in this function; it is easier
2027	 * to deal with it here than to complicate the slow-timeout path.
2028	 */
2029	if (inm->inm_timer == 0) {
2030		query_response_timer_expired = 0;
2031	} else if (--inm->inm_timer == 0) {
2032		query_response_timer_expired = 1;
2033	} else {
2034		current_state_timers_running = 1;
2035	}
2036
2037	if (inm->inm_sctimer == 0) {
2038		state_change_retransmit_timer_expired = 0;
2039	} else if (--inm->inm_sctimer == 0) {
2040		state_change_retransmit_timer_expired = 1;
2041	} else {
2042		state_change_timers_running = 1;
2043	}
2044
2045	/* We are in fasttimo, so be quick about it. */
2046	if (!state_change_retransmit_timer_expired &&
2047	    !query_response_timer_expired)
2048		return;
2049
2050	switch (inm->inm_state) {
2051	case IGMP_NOT_MEMBER:
2052	case IGMP_SILENT_MEMBER:
2053	case IGMP_SLEEPING_MEMBER:
2054	case IGMP_LAZY_MEMBER:
2055	case IGMP_AWAKENING_MEMBER:
2056	case IGMP_IDLE_MEMBER:
2057		break;
2058	case IGMP_G_QUERY_PENDING_MEMBER:
2059	case IGMP_SG_QUERY_PENDING_MEMBER:
2060		/*
2061		 * Respond to a previously pending Group-Specific
2062		 * or Group-and-Source-Specific query by enqueueing
2063		 * the appropriate Current-State report for
2064		 * immediate transmission.
2065		 */
2066		if (query_response_timer_expired) {
2067			int retval;
2068
2069			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2070			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2071			IGMP_PRINTF(("%s: enqueue record = %d\n",
2072			    __func__, retval));
2073			inm->inm_state = IGMP_REPORTING_MEMBER;
2074			/* XXX Clear recorded sources for next time. */
2075			inm_clear_recorded(inm);
2076		}
2077		/* FALLTHROUGH */
2078	case IGMP_REPORTING_MEMBER:
2079	case IGMP_LEAVING_MEMBER:
2080		if (state_change_retransmit_timer_expired) {
2081			/*
2082			 * State-change retransmission timer fired.
2083			 * If there are any further pending retransmissions,
2084			 * set the global pending state-change flag, and
2085			 * reset the timer.
2086			 */
2087			if (--inm->inm_scrv > 0) {
2088				inm->inm_sctimer = uri_fasthz;
2089				state_change_timers_running = 1;
2090			}
2091			/*
2092			 * Retransmit the previously computed state-change
2093			 * report. If there are no further pending
2094			 * retransmissions, the mbuf queue will be consumed.
2095			 * Update T0 state to T1 as we have now sent
2096			 * a state-change.
2097			 */
2098			(void) igmp_v3_merge_state_changes(inm, scq);
2099
2100			inm_commit(inm);
2101			IGMP_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2102			    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name,
2103			    inm->inm_ifp->if_unit));
2104
2105			/*
2106			 * If we are leaving the group for good, make sure
2107			 * we release IGMP's reference to it.
2108			 * This release must be deferred using a SLIST,
2109			 * as we are called from a loop which traverses
2110			 * the in_multihead list.
2111			 */
2112			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2113			    inm->inm_scrv == 0) {
2114				inm->inm_state = IGMP_NOT_MEMBER;
2115				/*
2116				 * A reference has already been held in
2117				 * igmp_final_leave() for this inm, so
2118				 * no need to hold another one.  We also
2119				 * bumped up its request count then, so
2120				 * that it stays in in_multihead.  Both
2121				 * of them will be released when it is
2122				 * dequeued later on.
2123				 */
2124				VERIFY(inm->inm_nrelecnt != 0);
2125				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
2126				    inm, inm_nrele);
2127			}
2128		}
2129		break;
2130	}
2131}
2132
2133/*
2134 * Suppress a group's pending response to a group or source/group query.
2135 *
2136 * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2137 * Do NOT update ST1/ST0 as this operation merely suppresses
2138 * the currently pending group record.
2139 * Do NOT suppress the response to a general query. It is possible but
2140 * it would require adding another state or flag.
2141 */
2142static void
2143igmp_v3_suppress_group_record(struct in_multi *inm)
2144{
2145
2146	INM_LOCK_ASSERT_HELD(inm);
2147	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2148
2149	VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2150
2151	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
2152	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER)
2153		return;
2154
2155	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
2156		inm_clear_recorded(inm);
2157
2158	inm->inm_timer = 0;
2159	inm->inm_state = IGMP_REPORTING_MEMBER;
2160}
2161
2162/*
2163 * Switch to a different IGMP version on the given interface,
2164 * as per Section 7.2.1.
2165 */
2166static void
2167igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2168{
2169	int old_version_timer;
2170
2171	IGI_LOCK_ASSERT_HELD(igi);
2172
2173	IGMP_PRINTF(("%s: switching to v%d on ifp %p(%s%d)\n", __func__,
2174	    igmp_version, igi->igi_ifp, igi->igi_ifp->if_name,
2175	    igi->igi_ifp->if_unit));
2176
2177	if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2178		/*
2179		 * Compute the "Older Version Querier Present" timer as per
2180		 * Section 8.12.
2181		 */
2182		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2183		old_version_timer *= PR_SLOWHZ;
2184
2185		if (igmp_version == IGMP_VERSION_1) {
2186			igi->igi_v1_timer = old_version_timer;
2187			igi->igi_v2_timer = 0;
2188		} else if (igmp_version == IGMP_VERSION_2) {
2189			igi->igi_v1_timer = 0;
2190			igi->igi_v2_timer = old_version_timer;
2191		}
2192	}
2193
2194	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2195		if (igi->igi_version != IGMP_VERSION_2) {
2196			igi->igi_version = IGMP_VERSION_2;
2197			igmp_v3_cancel_link_timers(igi);
2198		}
2199	} else if (igi->igi_v1_timer > 0) {
2200		if (igi->igi_version != IGMP_VERSION_1) {
2201			igi->igi_version = IGMP_VERSION_1;
2202			igmp_v3_cancel_link_timers(igi);
2203		}
2204	}
2205
2206	IGI_LOCK_ASSERT_HELD(igi);
2207}
2208
2209/*
2210 * Cancel pending IGMPv3 timers for the given link and all groups
2211 * joined on it; state-change, general-query, and group-query timers.
2212 *
2213 * Only ever called on a transition from v3 to Compatibility mode. Kill
2214 * the timers stone dead (this may be expensive for large N groups), they
2215 * will be restarted if Compatibility Mode deems that they must be due to
2216 * query processing.
2217 */
2218static void
2219igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2220{
2221	struct ifnet		*ifp;
2222	struct in_multi		*inm;
2223	struct in_multistep	step;
2224
2225	IGI_LOCK_ASSERT_HELD(igi);
2226
2227	IGMP_PRINTF(("%s: cancel v3 timers on ifp %p(%s%d)\n", __func__,
2228	    igi->igi_ifp, igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2229
2230	/*
2231	 * Stop the v3 General Query Response on this link stone dead.
2232	 * If fasttimo is woken up due to interface_timers_running,
2233	 * the flag will be cleared if there are no pending link timers.
2234	 */
2235	igi->igi_v3_timer = 0;
2236
2237	/*
2238	 * Now clear the current-state and state-change report timers
2239	 * for all memberships scoped to this link.
2240	 */
2241	ifp = igi->igi_ifp;
2242	IGI_UNLOCK(igi);
2243
2244	in_multihead_lock_shared();
2245	IN_FIRST_MULTI(step, inm);
2246	while (inm != NULL) {
2247		INM_LOCK(inm);
2248		if (inm->inm_ifp != ifp)
2249			goto next;
2250
2251		switch (inm->inm_state) {
2252		case IGMP_NOT_MEMBER:
2253		case IGMP_SILENT_MEMBER:
2254		case IGMP_IDLE_MEMBER:
2255		case IGMP_LAZY_MEMBER:
2256		case IGMP_SLEEPING_MEMBER:
2257		case IGMP_AWAKENING_MEMBER:
2258			/*
2259			 * These states are either not relevant in v3 mode,
2260			 * or are unreported. Do nothing.
2261			 */
2262			break;
2263		case IGMP_LEAVING_MEMBER:
2264			/*
2265			 * If we are leaving the group and switching to
2266			 * compatibility mode, we need to release the final
2267			 * reference held for issuing the INCLUDE {}, and
2268			 * transition to REPORTING to ensure the host leave
2269			 * message is sent upstream to the old querier --
2270			 * transition to NOT would lose the leave and race.
2271			 * During igmp_final_leave(), we bumped up both the
2272			 * request and reference counts.  Since we cannot
2273			 * call in_multi_detach() here, defer this task to
2274			 * the timer routine.
2275			 */
2276			VERIFY(inm->inm_nrelecnt != 0);
2277			IGI_LOCK(igi);
2278			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2279			IGI_UNLOCK(igi);
2280			/* FALLTHROUGH */
2281		case IGMP_G_QUERY_PENDING_MEMBER:
2282		case IGMP_SG_QUERY_PENDING_MEMBER:
2283			inm_clear_recorded(inm);
2284			/* FALLTHROUGH */
2285		case IGMP_REPORTING_MEMBER:
2286			inm->inm_state = IGMP_REPORTING_MEMBER;
2287			break;
2288		}
2289		/*
2290		 * Always clear state-change and group report timers.
2291		 * Free any pending IGMPv3 state-change records.
2292		 */
2293		inm->inm_sctimer = 0;
2294		inm->inm_timer = 0;
2295		IF_DRAIN(&inm->inm_scq);
2296next:
2297		INM_UNLOCK(inm);
2298		IN_NEXT_MULTI(step, inm);
2299	}
2300	in_multihead_lock_done();
2301
2302	IGI_LOCK(igi);
2303}
2304
2305/*
2306 * Update the Older Version Querier Present timers for a link.
2307 * See Section 7.2.1 of RFC 3376.
2308 */
2309static void
2310igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2311{
2312	IGI_LOCK_ASSERT_HELD(igi);
2313
2314	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2315		/*
2316		 * IGMPv1 and IGMPv2 Querier Present timers expired.
2317		 *
2318		 * Revert to IGMPv3.
2319		 */
2320		if (igi->igi_version != IGMP_VERSION_3) {
2321			IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
2322			    __func__, igi->igi_version, IGMP_VERSION_3,
2323			    igi->igi_ifp, igi->igi_ifp->if_name,
2324			    igi->igi_ifp->if_unit));
2325			igi->igi_version = IGMP_VERSION_3;
2326			IF_DRAIN(&igi->igi_v2q);
2327		}
2328	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2329		/*
2330		 * IGMPv1 Querier Present timer expired,
2331		 * IGMPv2 Querier Present timer running.
2332		 * If IGMPv2 was disabled since last timeout,
2333		 * revert to IGMPv3.
2334		 * If IGMPv2 is enabled, revert to IGMPv2.
2335		 */
2336		if (!igmp_v2enable) {
2337			IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
2338			    __func__, igi->igi_version, IGMP_VERSION_3,
2339			    igi->igi_ifp, igi->igi_ifp->if_name,
2340			    igi->igi_ifp->if_unit));
2341			igi->igi_v2_timer = 0;
2342			igi->igi_version = IGMP_VERSION_3;
2343			IF_DRAIN(&igi->igi_v2q);
2344		} else {
2345			--igi->igi_v2_timer;
2346			if (igi->igi_version != IGMP_VERSION_2) {
2347				IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
2348				    __func__, igi->igi_version, IGMP_VERSION_2,
2349				    igi->igi_ifp, igi->igi_ifp->if_name,
2350				    igi->igi_ifp->if_unit));
2351				igi->igi_version = IGMP_VERSION_2;
2352				IF_DRAIN(&igi->igi_gq);
2353			}
2354		}
2355	} else if (igi->igi_v1_timer > 0) {
2356		/*
2357		 * IGMPv1 Querier Present timer running.
2358		 * Stop IGMPv2 timer if running.
2359		 *
2360		 * If IGMPv1 was disabled since last timeout,
2361		 * revert to IGMPv3.
2362		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2363		 */
2364		if (!igmp_v1enable) {
2365			IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
2366			    __func__, igi->igi_version, IGMP_VERSION_3,
2367			    igi->igi_ifp, igi->igi_ifp->if_name,
2368			    igi->igi_ifp->if_unit));
2369			igi->igi_v1_timer = 0;
2370			igi->igi_version = IGMP_VERSION_3;
2371			IF_DRAIN(&igi->igi_v2q);
2372		} else {
2373			--igi->igi_v1_timer;
2374		}
2375		if (igi->igi_v2_timer > 0) {
2376			IGMP_PRINTF(("%s: cancel v2 timer on %p(%s%d)\n",
2377			    __func__, igi->igi_ifp, igi->igi_ifp->if_name,
2378			    igi->igi_ifp->if_unit));
2379			igi->igi_v2_timer = 0;
2380		}
2381	}
2382}
2383
2384/*
2385 * Dispatch an IGMPv1/v2 host report or leave message.
2386 * These are always small enough to fit inside a single mbuf.
2387 */
2388static int
2389igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2390{
2391	struct ifnet		*ifp;
2392	struct igmp		*igmp;
2393	struct ip		*ip;
2394	struct mbuf		*m;
2395	int			error = 0;
2396
2397	INM_LOCK_ASSERT_HELD(inm);
2398	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2399
2400	ifp = inm->inm_ifp;
2401
2402	MGETHDR(m, M_DONTWAIT, MT_DATA);
2403	if (m == NULL)
2404		return (ENOMEM);
2405	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2406
2407	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2408
2409	m->m_data += sizeof(struct ip);
2410	m->m_len = sizeof(struct igmp);
2411
2412	igmp = mtod(m, struct igmp *);
2413	igmp->igmp_type = type;
2414	igmp->igmp_code = 0;
2415	igmp->igmp_group = inm->inm_addr;
2416	igmp->igmp_cksum = 0;
2417	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2418
2419	m->m_data -= sizeof(struct ip);
2420	m->m_len += sizeof(struct ip);
2421
2422	ip = mtod(m, struct ip *);
2423	ip->ip_tos = 0;
2424	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2425	ip->ip_off = 0;
2426	ip->ip_p = IPPROTO_IGMP;
2427	ip->ip_src.s_addr = INADDR_ANY;
2428
2429	if (type == IGMP_HOST_LEAVE_MESSAGE)
2430		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2431	else
2432		ip->ip_dst = inm->inm_addr;
2433
2434	m->m_flags |= M_IGMPV2;
2435	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK)
2436		m->m_flags |= M_IGMP_LOOP;
2437
2438	/*
2439	 * Due to the fact that at this point we are possibly holding
2440	 * in_multihead_lock in shared or exclusive mode, we can't call
2441	 * igmp_sendpkt() here since that will eventually call ip_output(),
2442	 * which will try to lock in_multihead_lock and cause a deadlock.
2443	 * Instead we defer the work to the igmp_slowtimo() thread, thus
2444	 * avoiding unlocking in_multihead_lock here.
2445	 */
2446	if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2447		IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__));
2448		error = ENOMEM;
2449		m_freem(m);
2450	} else
2451		IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2452
2453	return (error);
2454}
2455
2456/*
2457 * Process a state change from the upper layer for the given IPv4 group.
2458 *
2459 * Each socket holds a reference on the in_multi in its own ip_moptions.
2460 * The socket layer will have made the necessary updates to the group
2461 * state, it is now up to IGMP to issue a state change report if there
2462 * has been any change between T0 (when the last state-change was issued)
2463 * and T1 (now).
2464 *
2465 * We use the IGMPv3 state machine at group level. The IGMP module
2466 * however makes the decision as to which IGMP protocol version to speak.
2467 * A state change *from* INCLUDE {} always means an initial join.
2468 * A state change *to* INCLUDE {} always means a final leave.
2469 *
2470 * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2471 * save ourselves a bunch of work; any exclusive mode groups need not
2472 * compute source filter lists.
2473 */
2474int
2475igmp_change_state(struct in_multi *inm)
2476{
2477	struct igmp_ifinfo *igi;
2478	struct ifnet *ifp;
2479	int error = 0;
2480
2481	INM_LOCK_ASSERT_HELD(inm);
2482	VERIFY(inm->inm_igi != NULL);
2483	IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2484
2485	/*
2486	 * Try to detect if the upper layer just asked us to change state
2487	 * for an interface which has now gone away.
2488	 */
2489	VERIFY(inm->inm_ifma != NULL);
2490	ifp = inm->inm_ifma->ifma_ifp;
2491	/*
2492	 * Sanity check that netinet's notion of ifp is the same as net's.
2493	 */
2494	VERIFY(inm->inm_ifp == ifp);
2495
2496	igi = IGMP_IFINFO(ifp);
2497	VERIFY(igi != NULL);
2498
2499	/*
2500	 * If we detect a state transition to or from MCAST_UNDEFINED
2501	 * for this group, then we are starting or finishing an IGMP
2502	 * life cycle for this group.
2503	 */
2504	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2505		IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2506		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2507		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2508			IGMP_PRINTF(("%s: initial join\n", __func__));
2509			error = igmp_initial_join(inm, igi);
2510			goto out;
2511		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2512			IGMP_PRINTF(("%s: final leave\n", __func__));
2513			igmp_final_leave(inm, igi);
2514			goto out;
2515		}
2516	} else {
2517		IGMP_PRINTF(("%s: filter set change\n", __func__));
2518	}
2519
2520	error = igmp_handle_state_change(inm, igi);
2521out:
2522	return (error);
2523}
2524
2525/*
2526 * Perform the initial join for an IGMP group.
2527 *
2528 * When joining a group:
2529 *  If the group should have its IGMP traffic suppressed, do nothing.
2530 *  IGMPv1 starts sending IGMPv1 host membership reports.
2531 *  IGMPv2 starts sending IGMPv2 host membership reports.
2532 *  IGMPv3 will schedule an IGMPv3 state-change report containing the
2533 *  initial state of the membership.
2534 */
2535static int
2536igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi)
2537{
2538	struct ifnet		*ifp;
2539	struct ifqueue		*ifq;
2540	int			 error, retval, syncstates;
2541
2542	INM_LOCK_ASSERT_HELD(inm);
2543	IGI_LOCK_ASSERT_NOTHELD(igi);
2544
2545	IGMP_PRINTF(("%s: initial join %s on ifp %p(%s%d)\n",
2546	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
2547	    inm->inm_ifp->if_name, inm->inm_ifp->if_unit));
2548
2549	error = 0;
2550	syncstates = 1;
2551
2552	ifp = inm->inm_ifp;
2553
2554	IGI_LOCK(igi);
2555	VERIFY(igi->igi_ifp == ifp);
2556
2557	/*
2558	 * Groups joined on loopback or marked as 'not reported',
2559	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2560	 * are never reported in any IGMP protocol exchanges.
2561	 * All other groups enter the appropriate IGMP state machine
2562	 * for the version in use on this link.
2563	 * A link marked as IGIF_SILENT causes IGMP to be completely
2564	 * disabled for the link.
2565	 */
2566	if ((ifp->if_flags & IFF_LOOPBACK) ||
2567	    (igi->igi_flags & IGIF_SILENT) ||
2568	    !igmp_isgroupreported(inm->inm_addr)) {
2569		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2570		    __func__));
2571		inm->inm_state = IGMP_SILENT_MEMBER;
2572		inm->inm_timer = 0;
2573	} else {
2574		/*
2575		 * Deal with overlapping in_multi lifecycle.
2576		 * If this group was LEAVING, then make sure
2577		 * we drop the reference we picked up to keep the
2578		 * group around for the final INCLUDE {} enqueue.
2579		 * Since we cannot call in_multi_detach() here,
2580		 * defer this task to the timer routine.
2581		 */
2582		if (igi->igi_version == IGMP_VERSION_3 &&
2583		    inm->inm_state == IGMP_LEAVING_MEMBER) {
2584			VERIFY(inm->inm_nrelecnt != 0);
2585			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2586		}
2587
2588		inm->inm_state = IGMP_REPORTING_MEMBER;
2589
2590		switch (igi->igi_version) {
2591		case IGMP_VERSION_1:
2592		case IGMP_VERSION_2:
2593			inm->inm_state = IGMP_IDLE_MEMBER;
2594			error = igmp_v1v2_queue_report(inm,
2595			    (igi->igi_version == IGMP_VERSION_2) ?
2596			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
2597			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
2598
2599			INM_LOCK_ASSERT_HELD(inm);
2600			IGI_LOCK_ASSERT_HELD(igi);
2601
2602			if (error == 0) {
2603				inm->inm_timer = IGMP_RANDOM_DELAY(
2604				    IGMP_V1V2_MAX_RI * PR_SLOWHZ);
2605				current_state_timers_running = 1;
2606			}
2607			break;
2608
2609		case IGMP_VERSION_3:
2610			/*
2611			 * Defer update of T0 to T1, until the first copy
2612			 * of the state change has been transmitted.
2613			 */
2614			syncstates = 0;
2615
2616			/*
2617			 * Immediately enqueue a State-Change Report for
2618			 * this interface, freeing any previous reports.
2619			 * Don't kick the timers if there is nothing to do,
2620			 * or if an error occurred.
2621			 */
2622			ifq = &inm->inm_scq;
2623			IF_DRAIN(ifq);
2624			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2625			    0, 0);
2626			IGMP_PRINTF(("%s: enqueue record = %d\n",
2627			    __func__, retval));
2628			if (retval <= 0) {
2629				error = retval * -1;
2630				break;
2631			}
2632
2633			/*
2634			 * Schedule transmission of pending state-change
2635			 * report up to RV times for this link. The timer
2636			 * will fire at the next igmp_fasttimo (~200ms),
2637			 * giving us an opportunity to merge the reports.
2638			 */
2639			if (igi->igi_flags & IGIF_LOOPBACK) {
2640				inm->inm_scrv = 1;
2641			} else {
2642				VERIFY(igi->igi_rv > 1);
2643				inm->inm_scrv = igi->igi_rv;
2644			}
2645			inm->inm_sctimer = 1;
2646			state_change_timers_running = 1;
2647
2648			error = 0;
2649			break;
2650		}
2651	}
2652	IGI_UNLOCK(igi);
2653
2654	/*
2655	 * Only update the T0 state if state change is atomic,
2656	 * i.e. we don't need to wait for a timer to fire before we
2657	 * can consider the state change to have been communicated.
2658	 */
2659	if (syncstates) {
2660		inm_commit(inm);
2661		IGMP_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2662		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name,
2663		    inm->inm_ifp->if_unit));
2664	}
2665
2666	return (error);
2667}
2668
2669/*
2670 * Issue an intermediate state change during the IGMP life-cycle.
2671 */
2672static int
2673igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi)
2674{
2675	struct ifnet		*ifp;
2676	int			 retval;
2677
2678	INM_LOCK_ASSERT_HELD(inm);
2679	IGI_LOCK_ASSERT_NOTHELD(igi);
2680
2681	IGMP_PRINTF(("%s: state change for %s on ifp %p(%s%d)\n",
2682	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
2683	    inm->inm_ifp->if_name, inm->inm_ifp->if_unit));
2684
2685	ifp = inm->inm_ifp;
2686
2687	IGI_LOCK(igi);
2688	VERIFY(igi->igi_ifp == ifp);
2689
2690	if ((ifp->if_flags & IFF_LOOPBACK) ||
2691	    (igi->igi_flags & IGIF_SILENT) ||
2692	    !igmp_isgroupreported(inm->inm_addr) ||
2693	    (igi->igi_version != IGMP_VERSION_3)) {
2694		IGI_UNLOCK(igi);
2695		if (!igmp_isgroupreported(inm->inm_addr)) {
2696			IGMP_PRINTF(("%s: not kicking state "
2697			    "machine for silent group\n", __func__));
2698		}
2699		IGMP_PRINTF(("%s: nothing to do\n", __func__));
2700		inm_commit(inm);
2701		IGMP_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2702		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name));
2703		return (0);
2704	}
2705
2706	IF_DRAIN(&inm->inm_scq);
2707
2708	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
2709	IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2710	if (retval <= 0) {
2711		IGI_UNLOCK(igi);
2712		return (-retval);
2713	}
2714	/*
2715	 * If record(s) were enqueued, start the state-change
2716	 * report timer for this group.
2717	 */
2718	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv);
2719	inm->inm_sctimer = 1;
2720	state_change_timers_running = 1;
2721	IGI_UNLOCK(igi);
2722
2723	return (0);
2724}
2725
2726/*
2727 * Perform the final leave for an IGMP group.
2728 *
2729 * When leaving a group:
2730 *  IGMPv1 does nothing.
2731 *  IGMPv2 sends a host leave message, if and only if we are the reporter.
2732 *  IGMPv3 enqueues a state-change report containing a transition
2733 *  to INCLUDE {} for immediate transmission.
2734 */
2735static void
2736igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi)
2737{
2738	int syncstates = 1;
2739
2740	INM_LOCK_ASSERT_HELD(inm);
2741	IGI_LOCK_ASSERT_NOTHELD(igi);
2742
2743	IGMP_PRINTF(("%s: final leave %s on ifp %p(%s%d)\n",
2744	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
2745	    inm->inm_ifp->if_name, inm->inm_ifp->if_unit));
2746
2747	switch (inm->inm_state) {
2748	case IGMP_NOT_MEMBER:
2749	case IGMP_SILENT_MEMBER:
2750	case IGMP_LEAVING_MEMBER:
2751		/* Already leaving or left; do nothing. */
2752		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2753		    __func__));
2754		break;
2755	case IGMP_REPORTING_MEMBER:
2756	case IGMP_IDLE_MEMBER:
2757	case IGMP_G_QUERY_PENDING_MEMBER:
2758	case IGMP_SG_QUERY_PENDING_MEMBER:
2759		IGI_LOCK(igi);
2760		if (igi->igi_version == IGMP_VERSION_2) {
2761			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
2762			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2763				panic("%s: IGMPv3 state reached, not IGMPv3 "
2764				    "mode\n", __func__);
2765				/* NOTREACHED */
2766			}
2767			igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE);
2768
2769			INM_LOCK_ASSERT_HELD(inm);
2770			IGI_LOCK_ASSERT_HELD(igi);
2771
2772			inm->inm_state = IGMP_NOT_MEMBER;
2773		} else if (igi->igi_version == IGMP_VERSION_3) {
2774			/*
2775			 * Stop group timer and all pending reports.
2776			 * Immediately enqueue a state-change report
2777			 * TO_IN {} to be sent on the next fast timeout,
2778			 * giving us an opportunity to merge reports.
2779			 */
2780			IF_DRAIN(&inm->inm_scq);
2781			inm->inm_timer = 0;
2782			if (igi->igi_flags & IGIF_LOOPBACK) {
2783				inm->inm_scrv = 1;
2784			} else {
2785				inm->inm_scrv = igi->igi_rv;
2786			}
2787			IGMP_PRINTF(("%s: Leaving %s/%s%d with %d "
2788			    "pending retransmissions.\n", __func__,
2789			    inet_ntoa(inm->inm_addr),
2790			    inm->inm_ifp->if_name, inm->inm_ifp->if_unit,
2791			    inm->inm_scrv));
2792			if (inm->inm_scrv == 0) {
2793				inm->inm_state = IGMP_NOT_MEMBER;
2794				inm->inm_sctimer = 0;
2795			} else {
2796				int retval;
2797				/*
2798				 * Stick around in the in_multihead list;
2799				 * the final detach will be issued by
2800				 * igmp_v3_process_group_timers() when
2801				 * the retransmit timer expires.
2802				 */
2803				INM_ADDREF_LOCKED(inm);
2804				VERIFY(inm->inm_debug & IFD_ATTACHED);
2805				inm->inm_reqcnt++;
2806				VERIFY(inm->inm_reqcnt >= 1);
2807				inm->inm_nrelecnt++;
2808				VERIFY(inm->inm_nrelecnt != 0);
2809
2810				retval = igmp_v3_enqueue_group_record(
2811				    &inm->inm_scq, inm, 1, 0, 0);
2812				KASSERT(retval != 0,
2813				    ("%s: enqueue record = %d\n", __func__,
2814				     retval));
2815
2816				inm->inm_state = IGMP_LEAVING_MEMBER;
2817				inm->inm_sctimer = 1;
2818				state_change_timers_running = 1;
2819				syncstates = 0;
2820			}
2821		}
2822		IGI_UNLOCK(igi);
2823		break;
2824	case IGMP_LAZY_MEMBER:
2825	case IGMP_SLEEPING_MEMBER:
2826	case IGMP_AWAKENING_MEMBER:
2827		/* Our reports are suppressed; do nothing. */
2828		break;
2829	}
2830
2831	if (syncstates) {
2832		inm_commit(inm);
2833		IGMP_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2834		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name,
2835		    inm->inm_ifp->if_unit));
2836		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
2837		IGMP_PRINTF(("%s: T1 now MCAST_UNDEFINED for %s/%s%d\n",
2838		    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name,
2839		    inm->inm_ifp->if_unit));
2840	}
2841}
2842
2843/*
2844 * Enqueue an IGMPv3 group record to the given output queue.
2845 *
2846 * XXX This function could do with having the allocation code
2847 * split out, and the multiple-tree-walks coalesced into a single
2848 * routine as has been done in igmp_v3_enqueue_filter_change().
2849 *
2850 * If is_state_change is zero, a current-state record is appended.
2851 * If is_state_change is non-zero, a state-change report is appended.
2852 *
2853 * If is_group_query is non-zero, an mbuf packet chain is allocated.
2854 * If is_group_query is zero, and if there is a packet with free space
2855 * at the tail of the queue, it will be appended to providing there
2856 * is enough free space.
2857 * Otherwise a new mbuf packet chain is allocated.
2858 *
2859 * If is_source_query is non-zero, each source is checked to see if
2860 * it was recorded for a Group-Source query, and will be omitted if
2861 * it is not both in-mode and recorded.
2862 *
2863 * The function will attempt to allocate leading space in the packet
2864 * for the IP/IGMP header to be prepended without fragmenting the chain.
2865 *
2866 * If successful the size of all data appended to the queue is returned,
2867 * otherwise an error code less than zero is returned, or zero if
2868 * no record(s) were appended.
2869 */
2870static int
2871igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
2872    const int is_state_change, const int is_group_query,
2873    const int is_source_query)
2874{
2875	struct igmp_grouprec	 ig;
2876	struct igmp_grouprec	*pig;
2877	struct ifnet		*ifp;
2878	struct ip_msource	*ims, *nims;
2879	struct mbuf		*m0, *m, *md;
2880	int			 error, is_filter_list_change;
2881	int			 minrec0len, m0srcs, msrcs, nbytes, off;
2882	int			 record_has_sources;
2883	int			 now;
2884	int			 type;
2885	in_addr_t		 naddr;
2886	uint8_t			 mode;
2887	u_int16_t		 ig_numsrc;
2888
2889	INM_LOCK_ASSERT_HELD(inm);
2890	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2891
2892	error = 0;
2893	ifp = inm->inm_ifp;
2894	is_filter_list_change = 0;
2895	m = NULL;
2896	m0 = NULL;
2897	m0srcs = 0;
2898	msrcs = 0;
2899	nbytes = 0;
2900	nims = NULL;
2901	record_has_sources = 1;
2902	pig = NULL;
2903	type = IGMP_DO_NOTHING;
2904	mode = inm->inm_st[1].iss_fmode;
2905
2906	/*
2907	 * If we did not transition out of ASM mode during t0->t1,
2908	 * and there are no source nodes to process, we can skip
2909	 * the generation of source records.
2910	 */
2911	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
2912	    inm->inm_nsrc == 0)
2913		record_has_sources = 0;
2914
2915	if (is_state_change) {
2916		/*
2917		 * Queue a state change record.
2918		 * If the mode did not change, and there are non-ASM
2919		 * listeners or source filters present,
2920		 * we potentially need to issue two records for the group.
2921		 * If we are transitioning to MCAST_UNDEFINED, we need
2922		 * not send any sources.
2923		 * If there are ASM listeners, and there was no filter
2924		 * mode transition of any kind, do nothing.
2925		 */
2926		if (mode != inm->inm_st[0].iss_fmode) {
2927			if (mode == MCAST_EXCLUDE) {
2928				IGMP_PRINTF(("%s: change to EXCLUDE\n",
2929				    __func__));
2930				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
2931			} else {
2932				IGMP_PRINTF(("%s: change to INCLUDE\n",
2933				    __func__));
2934				type = IGMP_CHANGE_TO_INCLUDE_MODE;
2935				if (mode == MCAST_UNDEFINED)
2936					record_has_sources = 0;
2937			}
2938		} else {
2939			if (record_has_sources) {
2940				is_filter_list_change = 1;
2941			} else {
2942				type = IGMP_DO_NOTHING;
2943			}
2944		}
2945	} else {
2946		/*
2947		 * Queue a current state record.
2948		 */
2949		if (mode == MCAST_EXCLUDE) {
2950			type = IGMP_MODE_IS_EXCLUDE;
2951		} else if (mode == MCAST_INCLUDE) {
2952			type = IGMP_MODE_IS_INCLUDE;
2953			VERIFY(inm->inm_st[1].iss_asm == 0);
2954		}
2955	}
2956
2957	/*
2958	 * Generate the filter list changes using a separate function.
2959	 */
2960	if (is_filter_list_change)
2961		return (igmp_v3_enqueue_filter_change(ifq, inm));
2962
2963	if (type == IGMP_DO_NOTHING) {
2964		IGMP_PRINTF(("%s: nothing to do for %s/%s%d\n",
2965		    __func__, inet_ntoa(inm->inm_addr),
2966		    inm->inm_ifp->if_name, inm->inm_ifp->if_unit));
2967		return (0);
2968	}
2969
2970	/*
2971	 * If any sources are present, we must be able to fit at least
2972	 * one in the trailing space of the tail packet's mbuf,
2973	 * ideally more.
2974	 */
2975	minrec0len = sizeof(struct igmp_grouprec);
2976	if (record_has_sources)
2977		minrec0len += sizeof(in_addr_t);
2978
2979	IGMP_PRINTF(("%s: queueing %s for %s/%s%d\n", __func__,
2980	    igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr),
2981	    inm->inm_ifp->if_name, inm->inm_ifp->if_unit));
2982
2983	/*
2984	 * Check if we have a packet in the tail of the queue for this
2985	 * group into which the first group record for this group will fit.
2986	 * Otherwise allocate a new packet.
2987	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
2988	 * Note: Group records for G/GSR query responses MUST be sent
2989	 * in their own packet.
2990	 */
2991	m0 = ifq->ifq_tail;
2992	if (!is_group_query &&
2993	    m0 != NULL &&
2994	    (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
2995	    (m0->m_pkthdr.len + minrec0len) <
2996	     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
2997		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2998			    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
2999		m = m0;
3000		IGMP_PRINTF(("%s: use existing packet\n", __func__));
3001	} else {
3002		if (IF_QFULL(ifq)) {
3003			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3004			return (-ENOMEM);
3005		}
3006		m = NULL;
3007		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3008		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3009		if (!is_state_change && !is_group_query) {
3010			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3011			if (m)
3012				m->m_data += IGMP_LEADINGSPACE;
3013		}
3014		if (m == NULL) {
3015			m = m_gethdr(M_DONTWAIT, MT_DATA);
3016			if (m)
3017				MH_ALIGN(m, IGMP_LEADINGSPACE);
3018		}
3019		if (m == NULL)
3020			return (-ENOMEM);
3021
3022		IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3023	}
3024
3025	/*
3026	 * Append group record.
3027	 * If we have sources, we don't know how many yet.
3028	 */
3029	ig.ig_type = type;
3030	ig.ig_datalen = 0;
3031	ig.ig_numsrc = 0;
3032	ig.ig_group = inm->inm_addr;
3033	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3034		if (m != m0)
3035			m_freem(m);
3036		IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3037		return (-ENOMEM);
3038	}
3039	nbytes += sizeof(struct igmp_grouprec);
3040
3041	/*
3042	 * Append as many sources as will fit in the first packet.
3043	 * If we are appending to a new packet, the chain allocation
3044	 * may potentially use clusters; use m_getptr() in this case.
3045	 * If we are appending to an existing packet, we need to obtain
3046	 * a pointer to the group record after m_append(), in case a new
3047	 * mbuf was allocated.
3048	 * Only append sources which are in-mode at t1. If we are
3049	 * transitioning to MCAST_UNDEFINED state on the group, do not
3050	 * include source entries.
3051	 * Only report recorded sources in our filter set when responding
3052	 * to a group-source query.
3053	 */
3054	if (record_has_sources) {
3055		if (m == m0) {
3056			md = m_last(m);
3057			pig = (struct igmp_grouprec *)(void *)
3058			    (mtod(md, uint8_t *) + md->m_len - nbytes);
3059		} else {
3060			md = m_getptr(m, 0, &off);
3061			pig = (struct igmp_grouprec *)(void *)
3062			    (mtod(md, uint8_t *) + off);
3063		}
3064		msrcs = 0;
3065		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3066			IGMP_PRINTF(("%s: visit node %s\n", __func__,
3067			    inet_ntoa_haddr(ims->ims_haddr)));
3068			now = ims_get_mode(inm, ims, 1);
3069			IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3070			if ((now != mode) ||
3071			    (now == mode && mode == MCAST_UNDEFINED)) {
3072				IGMP_PRINTF(("%s: skip node\n", __func__));
3073				continue;
3074			}
3075			if (is_source_query && ims->ims_stp == 0) {
3076				IGMP_PRINTF(("%s: skip unrecorded node\n",
3077				    __func__));
3078				continue;
3079			}
3080			IGMP_PRINTF(("%s: append node\n", __func__));
3081			naddr = htonl(ims->ims_haddr);
3082			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3083				if (m != m0)
3084					m_freem(m);
3085				IGMP_PRINTF(("%s: m_append() failed.\n",
3086				    __func__));
3087				return (-ENOMEM);
3088			}
3089			nbytes += sizeof(in_addr_t);
3090			++msrcs;
3091			if (msrcs == m0srcs)
3092				break;
3093		}
3094		IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3095		    msrcs));
3096		ig_numsrc = htons(msrcs);
3097		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof (ig_numsrc));
3098		nbytes += (msrcs * sizeof(in_addr_t));
3099	}
3100
3101	if (is_source_query && msrcs == 0) {
3102		IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3103		if (m != m0)
3104			m_freem(m);
3105		return (0);
3106	}
3107
3108	/*
3109	 * We are good to go with first packet.
3110	 */
3111	if (m != m0) {
3112		IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3113		m->m_pkthdr.vt_nrecs = 1;
3114		m->m_pkthdr.rcvif = ifp;
3115		IF_ENQUEUE(ifq, m);
3116	} else {
3117		m->m_pkthdr.vt_nrecs++;
3118	}
3119	/*
3120	 * No further work needed if no source list in packet(s).
3121	 */
3122	if (!record_has_sources)
3123		return (nbytes);
3124
3125	/*
3126	 * Whilst sources remain to be announced, we need to allocate
3127	 * a new packet and fill out as many sources as will fit.
3128	 * Always try for a cluster first.
3129	 */
3130	while (nims != NULL) {
3131		if (IF_QFULL(ifq)) {
3132			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3133			return (-ENOMEM);
3134		}
3135		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3136		if (m)
3137			m->m_data += IGMP_LEADINGSPACE;
3138		if (m == NULL) {
3139			m = m_gethdr(M_DONTWAIT, MT_DATA);
3140			if (m)
3141				MH_ALIGN(m, IGMP_LEADINGSPACE);
3142		}
3143		if (m == NULL)
3144			return (-ENOMEM);
3145		md = m_getptr(m, 0, &off);
3146		pig = (struct igmp_grouprec *)(void *)
3147		    (mtod(md, uint8_t *) + off);
3148		IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3149
3150		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3151			if (m != m0)
3152				m_freem(m);
3153			IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3154			return (-ENOMEM);
3155		}
3156		m->m_pkthdr.vt_nrecs = 1;
3157		nbytes += sizeof(struct igmp_grouprec);
3158
3159		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3160		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3161
3162		msrcs = 0;
3163		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3164			IGMP_PRINTF(("%s: visit node %s\n", __func__,
3165			    inet_ntoa_haddr(ims->ims_haddr)));
3166			now = ims_get_mode(inm, ims, 1);
3167			if ((now != mode) ||
3168			    (now == mode && mode == MCAST_UNDEFINED)) {
3169				IGMP_PRINTF(("%s: skip node\n", __func__));
3170				continue;
3171			}
3172			if (is_source_query && ims->ims_stp == 0) {
3173				IGMP_PRINTF(("%s: skip unrecorded node\n",
3174				    __func__));
3175				continue;
3176			}
3177			IGMP_PRINTF(("%s: append node\n", __func__));
3178			naddr = htonl(ims->ims_haddr);
3179			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3180				if (m != m0)
3181					m_freem(m);
3182				IGMP_PRINTF(("%s: m_append() failed.\n",
3183				    __func__));
3184				return (-ENOMEM);
3185			}
3186			++msrcs;
3187			if (msrcs == m0srcs)
3188				break;
3189		}
3190		ig_numsrc = htons(msrcs);
3191		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof (ig_numsrc));
3192		nbytes += (msrcs * sizeof(in_addr_t));
3193
3194		IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3195		m->m_pkthdr.rcvif = ifp;
3196		IF_ENQUEUE(ifq, m);
3197	}
3198
3199	return (nbytes);
3200}
3201
3202/*
3203 * Type used to mark record pass completion.
3204 * We exploit the fact we can cast to this easily from the
3205 * current filter modes on each ip_msource node.
3206 */
3207typedef enum {
3208	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
3209	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
3210	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
3211	REC_FULL = REC_ALLOW | REC_BLOCK
3212} rectype_t;
3213
3214/*
3215 * Enqueue an IGMPv3 filter list change to the given output queue.
3216 *
3217 * Source list filter state is held in an RB-tree. When the filter list
3218 * for a group is changed without changing its mode, we need to compute
3219 * the deltas between T0 and T1 for each source in the filter set,
3220 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3221 *
3222 * As we may potentially queue two record types, and the entire R-B tree
3223 * needs to be walked at once, we break this out into its own function
3224 * so we can generate a tightly packed queue of packets.
3225 *
3226 * XXX This could be written to only use one tree walk, although that makes
3227 * serializing into the mbuf chains a bit harder. For now we do two walks
3228 * which makes things easier on us, and it may or may not be harder on
3229 * the L2 cache.
3230 *
3231 * If successful the size of all data appended to the queue is returned,
3232 * otherwise an error code less than zero is returned, or zero if
3233 * no record(s) were appended.
3234 */
3235static int
3236igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3237{
3238	static const int MINRECLEN =
3239	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3240	struct ifnet		*ifp;
3241	struct igmp_grouprec	 ig;
3242	struct igmp_grouprec	*pig;
3243	struct ip_msource	*ims, *nims;
3244	struct mbuf		*m, *m0, *md;
3245	in_addr_t		 naddr;
3246	int			 m0srcs, nbytes, npbytes, off, rsrcs, schanged;
3247	int			 nallow, nblock;
3248	uint8_t			 mode, now, then;
3249	rectype_t		 crt, drt, nrt;
3250	u_int16_t		 ig_numsrc;
3251
3252	INM_LOCK_ASSERT_HELD(inm);
3253
3254	if (inm->inm_nsrc == 0 ||
3255	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0))
3256		return (0);
3257
3258	ifp = inm->inm_ifp;			/* interface */
3259	mode = inm->inm_st[1].iss_fmode;	/* filter mode at t1 */
3260	crt = REC_NONE;	/* current group record type */
3261	drt = REC_NONE;	/* mask of completed group record types */
3262	nrt = REC_NONE;	/* record type for current node */
3263	m0srcs = 0;	/* # source which will fit in current mbuf chain */
3264	nbytes = 0;	/* # of bytes appended to group's state-change queue */
3265	npbytes = 0;	/* # of bytes appended this packet */
3266	rsrcs = 0;	/* # sources encoded in current record */
3267	schanged = 0;	/* # nodes encoded in overall filter change */
3268	nallow = 0;	/* # of source entries in ALLOW_NEW */
3269	nblock = 0;	/* # of source entries in BLOCK_OLD */
3270	nims = NULL;	/* next tree node pointer */
3271
3272	/*
3273	 * For each possible filter record mode.
3274	 * The first kind of source we encounter tells us which
3275	 * is the first kind of record we start appending.
3276	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3277	 * as the inverse of the group's filter mode.
3278	 */
3279	while (drt != REC_FULL) {
3280		do {
3281			m0 = ifq->ifq_tail;
3282			if (m0 != NULL &&
3283			    (m0->m_pkthdr.vt_nrecs + 1 <=
3284			     IGMP_V3_REPORT_MAXRECS) &&
3285			    (m0->m_pkthdr.len + MINRECLEN) <
3286			     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3287				m = m0;
3288				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3289					    sizeof(struct igmp_grouprec)) /
3290				    sizeof(in_addr_t);
3291				IGMP_PRINTF(("%s: use previous packet\n",
3292				    __func__));
3293			} else {
3294				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3295				if (m)
3296					m->m_data += IGMP_LEADINGSPACE;
3297				if (m == NULL) {
3298					m = m_gethdr(M_DONTWAIT, MT_DATA);
3299					if (m)
3300						MH_ALIGN(m, IGMP_LEADINGSPACE);
3301				}
3302				if (m == NULL) {
3303					IGMP_PRINTF(("%s: m_get*() failed\n",
3304					    __func__));
3305					return (-ENOMEM);
3306				}
3307				m->m_pkthdr.vt_nrecs = 0;
3308				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3309				    sizeof(struct igmp_grouprec)) /
3310				    sizeof(in_addr_t);
3311				npbytes = 0;
3312				IGMP_PRINTF(("%s: allocated new packet\n",
3313				    __func__));
3314			}
3315			/*
3316			 * Append the IGMP group record header to the
3317			 * current packet's data area.
3318			 * Recalculate pointer to free space for next
3319			 * group record, in case m_append() allocated
3320			 * a new mbuf or cluster.
3321			 */
3322			memset(&ig, 0, sizeof(ig));
3323			ig.ig_group = inm->inm_addr;
3324			if (!m_append(m, sizeof(ig), (void *)&ig)) {
3325				if (m != m0)
3326					m_freem(m);
3327				IGMP_PRINTF(("%s: m_append() failed\n",
3328				    __func__));
3329				return (-ENOMEM);
3330			}
3331			npbytes += sizeof(struct igmp_grouprec);
3332			if (m != m0) {
3333				/* new packet; offset in c hain */
3334				md = m_getptr(m, npbytes -
3335				    sizeof(struct igmp_grouprec), &off);
3336				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3337				    uint8_t *) + off);
3338			} else {
3339				/* current packet; offset from last append */
3340				md = m_last(m);
3341				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3342				    uint8_t *) + md->m_len -
3343				    sizeof(struct igmp_grouprec));
3344			}
3345			/*
3346			 * Begin walking the tree for this record type
3347			 * pass, or continue from where we left off
3348			 * previously if we had to allocate a new packet.
3349			 * Only report deltas in-mode at t1.
3350			 * We need not report included sources as allowed
3351			 * if we are in inclusive mode on the group,
3352			 * however the converse is not true.
3353			 */
3354			rsrcs = 0;
3355			if (nims == NULL)
3356				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3357			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3358				IGMP_PRINTF(("%s: visit node %s\n",
3359				    __func__, inet_ntoa_haddr(ims->ims_haddr)));
3360				now = ims_get_mode(inm, ims, 1);
3361				then = ims_get_mode(inm, ims, 0);
3362				IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3363				    __func__, then, now));
3364				if (now == then) {
3365					IGMP_PRINTF(("%s: skip unchanged\n",
3366					    __func__));
3367					continue;
3368				}
3369				if (mode == MCAST_EXCLUDE &&
3370				    now == MCAST_INCLUDE) {
3371					IGMP_PRINTF(("%s: skip IN src on EX "
3372					    "group\n", __func__));
3373					continue;
3374				}
3375				nrt = (rectype_t)now;
3376				if (nrt == REC_NONE)
3377					nrt = (rectype_t)(~mode & REC_FULL);
3378				if (schanged++ == 0) {
3379					crt = nrt;
3380				} else if (crt != nrt)
3381					continue;
3382				naddr = htonl(ims->ims_haddr);
3383				if (!m_append(m, sizeof(in_addr_t),
3384				    (void *)&naddr)) {
3385					if (m != m0)
3386						m_freem(m);
3387					IGMP_PRINTF(("%s: m_append() failed\n",
3388					    __func__));
3389					return (-ENOMEM);
3390				}
3391				nallow += !!(crt == REC_ALLOW);
3392				nblock += !!(crt == REC_BLOCK);
3393				if (++rsrcs == m0srcs)
3394					break;
3395			}
3396			/*
3397			 * If we did not append any tree nodes on this
3398			 * pass, back out of allocations.
3399			 */
3400			if (rsrcs == 0) {
3401				npbytes -= sizeof(struct igmp_grouprec);
3402				if (m != m0) {
3403					IGMP_PRINTF(("%s: m_free(m)\n",
3404					    __func__));
3405					m_freem(m);
3406				} else {
3407					IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3408					    __func__));
3409					m_adj(m, -((int)sizeof(
3410					    struct igmp_grouprec)));
3411				}
3412				continue;
3413			}
3414			npbytes += (rsrcs * sizeof(in_addr_t));
3415			if (crt == REC_ALLOW)
3416				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3417			else if (crt == REC_BLOCK)
3418				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3419			ig_numsrc = htons(rsrcs);
3420			bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof (ig_numsrc));
3421			/*
3422			 * Count the new group record, and enqueue this
3423			 * packet if it wasn't already queued.
3424			 */
3425			m->m_pkthdr.vt_nrecs++;
3426			m->m_pkthdr.rcvif = ifp;
3427			if (m != m0)
3428				IF_ENQUEUE(ifq, m);
3429			nbytes += npbytes;
3430		} while (nims != NULL);
3431		drt |= crt;
3432		crt = (~crt & REC_FULL);
3433	}
3434
3435	IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3436	    nallow, nblock));
3437
3438	return (nbytes);
3439}
3440
3441static int
3442igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3443{
3444	struct ifqueue	*gq;
3445	struct mbuf	*m;		/* pending state-change */
3446	struct mbuf	*m0;		/* copy of pending state-change */
3447	struct mbuf	*mt;		/* last state-change in packet */
3448	struct mbuf	*n;
3449	int		 docopy, domerge;
3450	u_int		 recslen;
3451
3452	INM_LOCK_ASSERT_HELD(inm);
3453
3454	docopy = 0;
3455	domerge = 0;
3456	recslen = 0;
3457
3458	/*
3459	 * If there are further pending retransmissions, make a writable
3460	 * copy of each queued state-change message before merging.
3461	 */
3462	if (inm->inm_scrv > 0)
3463		docopy = 1;
3464
3465	gq = &inm->inm_scq;
3466#ifdef IGMP_DEBUG
3467	if (gq->ifq_head == NULL) {
3468		IGMP_PRINTF(("%s: WARNING: queue for inm %p is empty\n",
3469		    __func__, inm));
3470	}
3471#endif
3472
3473	/*
3474	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3475	 * packet might not always be at the head of the ifqueue.
3476	 */
3477	m = gq->ifq_head;
3478	while (m != NULL) {
3479		/*
3480		 * Only merge the report into the current packet if
3481		 * there is sufficient space to do so; an IGMPv3 report
3482		 * packet may only contain 65,535 group records.
3483		 * Always use a simple mbuf chain concatentation to do this,
3484		 * as large state changes for single groups may have
3485		 * allocated clusters.
3486		 */
3487		domerge = 0;
3488		mt = ifscq->ifq_tail;
3489		if (mt != NULL) {
3490			recslen = m_length(m);
3491
3492			if ((mt->m_pkthdr.vt_nrecs +
3493			    m->m_pkthdr.vt_nrecs <=
3494			    IGMP_V3_REPORT_MAXRECS) &&
3495			    (mt->m_pkthdr.len + recslen <=
3496			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE)))
3497				domerge = 1;
3498		}
3499
3500		if (!domerge && IF_QFULL(gq)) {
3501			IGMP_PRINTF(("%s: outbound queue full, skipping whole "
3502			    "packet %p\n", __func__, m));
3503			n = m->m_nextpkt;
3504			if (!docopy) {
3505				IF_REMQUEUE(gq, m);
3506				m_freem(m);
3507			}
3508			m = n;
3509			continue;
3510		}
3511
3512		if (!docopy) {
3513			IGMP_PRINTF(("%s: dequeueing %p\n", __func__, m));
3514			n = m->m_nextpkt;
3515			IF_REMQUEUE(gq, m);
3516			m0 = m;
3517			m = n;
3518		} else {
3519			IGMP_PRINTF(("%s: copying %p\n", __func__, m));
3520			m0 = m_dup(m, M_NOWAIT);
3521			if (m0 == NULL)
3522				return (ENOMEM);
3523			m0->m_nextpkt = NULL;
3524			m = m->m_nextpkt;
3525		}
3526
3527		if (!domerge) {
3528			IGMP_PRINTF(("%s: queueing %p to ifscq %p)\n",
3529			    __func__, m0, ifscq));
3530			m0->m_pkthdr.rcvif = inm->inm_ifp;
3531			IF_ENQUEUE(ifscq, m0);
3532		} else {
3533			struct mbuf *mtl;	/* last mbuf of packet mt */
3534
3535			IGMP_PRINTF(("%s: merging %p with ifscq tail %p)\n",
3536			    __func__, m0, mt));
3537
3538			mtl = m_last(mt);
3539			m0->m_flags &= ~M_PKTHDR;
3540			mt->m_pkthdr.len += recslen;
3541			mt->m_pkthdr.vt_nrecs +=
3542			    m0->m_pkthdr.vt_nrecs;
3543
3544			mtl->m_next = m0;
3545		}
3546	}
3547
3548	return (0);
3549}
3550
3551/*
3552 * Respond to a pending IGMPv3 General Query.
3553 */
3554static void
3555igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3556{
3557	struct ifnet		*ifp;
3558	struct in_multi		*inm;
3559	struct in_multistep	step;
3560	int			 retval, loop;
3561
3562	IGI_LOCK_ASSERT_HELD(igi);
3563
3564	VERIFY(igi->igi_version == IGMP_VERSION_3);
3565
3566	ifp = igi->igi_ifp;
3567	IGI_UNLOCK(igi);
3568
3569	in_multihead_lock_shared();
3570	IN_FIRST_MULTI(step, inm);
3571	while (inm != NULL) {
3572		INM_LOCK(inm);
3573		if (inm->inm_ifp != ifp)
3574			goto next;
3575
3576		switch (inm->inm_state) {
3577		case IGMP_NOT_MEMBER:
3578		case IGMP_SILENT_MEMBER:
3579			break;
3580		case IGMP_REPORTING_MEMBER:
3581		case IGMP_IDLE_MEMBER:
3582		case IGMP_LAZY_MEMBER:
3583		case IGMP_SLEEPING_MEMBER:
3584		case IGMP_AWAKENING_MEMBER:
3585			inm->inm_state = IGMP_REPORTING_MEMBER;
3586			IGI_LOCK(igi);
3587			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3588			    inm, 0, 0, 0);
3589			IGI_UNLOCK(igi);
3590			IGMP_PRINTF(("%s: enqueue record = %d\n",
3591			    __func__, retval));
3592			break;
3593		case IGMP_G_QUERY_PENDING_MEMBER:
3594		case IGMP_SG_QUERY_PENDING_MEMBER:
3595		case IGMP_LEAVING_MEMBER:
3596			break;
3597		}
3598next:
3599		INM_UNLOCK(inm);
3600		IN_NEXT_MULTI(step, inm);
3601	}
3602	in_multihead_lock_done();
3603
3604	IGI_LOCK(igi);
3605	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3606	igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
3607	    loop, ifp);
3608	IGI_LOCK_ASSERT_HELD(igi);
3609	/*
3610	 * Slew transmission of bursts over 500ms intervals.
3611	 */
3612	if (igi->igi_gq.ifq_head != NULL) {
3613		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
3614		    IGMP_RESPONSE_BURST_INTERVAL);
3615		interface_timers_running = 1;
3616	}
3617}
3618
3619/*
3620 * Transmit the next pending IGMP message in the output queue.
3621 *
3622 * Must not be called with inm_lock or igi_lock held.
3623 */
3624void
3625igmp_sendpkt(struct mbuf *m, struct ifnet *ifp)
3626{
3627	struct ip_moptions	*imo;
3628	struct mbuf		*ipopts, *m0;
3629	int			 error;
3630	struct route		ro;
3631
3632	IGMP_PRINTF(("%s: transmit %p\n", __func__, m));
3633
3634	/*
3635	 * Check if the ifnet is still attached.
3636	 */
3637	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3638		IGMP_PRINTF(("%s: dropped %p as ifp u went away.\n",
3639		    __func__, m));
3640		m_freem(m);
3641		OSAddAtomic(1, &ipstat.ips_noroute);
3642		return;
3643	}
3644
3645	ipopts = igmp_sendra ? m_raopt : NULL;
3646
3647	imo = ip_allocmoptions(M_WAITOK);
3648	if (imo == NULL) {
3649		m_freem(m);
3650		return;
3651	}
3652
3653	imo->imo_multicast_ttl  = 1;
3654	imo->imo_multicast_vif  = -1;
3655#if MROUTING
3656	imo->imo_multicast_loop = (ip_mrouter != NULL);
3657#else
3658	imo->imo_multicast_loop = 0;
3659#endif
3660
3661	/*
3662	 * If the user requested that IGMP traffic be explicitly
3663	 * redirected to the loopback interface (e.g. they are running a
3664	 * MANET interface and the routing protocol needs to see the
3665	 * updates), handle this now.
3666	 */
3667	if (m->m_flags & M_IGMP_LOOP)
3668		imo->imo_multicast_ifp = lo_ifp;
3669	else
3670		imo->imo_multicast_ifp = ifp;
3671
3672	if (m->m_flags & M_IGMPV2) {
3673		m0 = m;
3674	} else {
3675		m0 = igmp_v3_encap_report(ifp, m);
3676		if (m0 == NULL) {
3677			/*
3678			 * If igmp_v3_encap_report() failed, then M_PREPEND()
3679			 * already freed the original mbuf chain.
3680			 * This means that we don't have to m_freem(m) here.
3681			 */
3682			IGMP_PRINTF(("%s: dropped %p\n", __func__, m));
3683			IMO_REMREF(imo);
3684			atomic_add_32(&ipstat.ips_odropped, 1);
3685			return;
3686		}
3687	}
3688
3689	m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
3690	m0->m_pkthdr.rcvif = lo_ifp;
3691#ifdef MAC
3692	mac_netinet_igmp_send(ifp, m0);
3693#endif
3694
3695	if (ifp->if_eflags & IFEF_TXSTART) {
3696		/* Use control service class if the interface supports
3697		 * transmit-start model.
3698		 */
3699		(void) m_set_service_class(m0, MBUF_SC_CTL);
3700	}
3701	bzero(&ro, sizeof (ro));
3702	error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
3703	if (ro.ro_rt != NULL) {
3704		rtfree(ro.ro_rt);
3705		ro.ro_rt = NULL;
3706	}
3707
3708	IMO_REMREF(imo);
3709
3710	if (error) {
3711		IGMP_PRINTF(("%s: ip_output(%p) = %d\n", __func__, m0, error));
3712		return;
3713	}
3714
3715	IGMPSTAT_INC(igps_snd_reports);
3716	OIGMPSTAT_INC(igps_snd_reports);
3717}
3718/*
3719 * Encapsulate an IGMPv3 report.
3720 *
3721 * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
3722 * chain has already had its IP/IGMPv3 header prepended. In this case
3723 * the function will not attempt to prepend; the lengths and checksums
3724 * will however be re-computed.
3725 *
3726 * Returns a pointer to the new mbuf chain head, or NULL if the
3727 * allocation failed.
3728 */
3729static struct mbuf *
3730igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
3731{
3732	struct igmp_report	*igmp;
3733	struct ip		*ip;
3734	int			 hdrlen, igmpreclen;
3735
3736	VERIFY((m->m_flags & M_PKTHDR));
3737
3738	igmpreclen = m_length(m);
3739	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
3740
3741	if (m->m_flags & M_IGMPV3_HDR) {
3742		igmpreclen -= hdrlen;
3743	} else {
3744		M_PREPEND(m, hdrlen, M_DONTWAIT);
3745		if (m == NULL)
3746			return (NULL);
3747		m->m_flags |= M_IGMPV3_HDR;
3748	}
3749
3750	IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
3751
3752	m->m_data += sizeof(struct ip);
3753	m->m_len -= sizeof(struct ip);
3754
3755	igmp = mtod(m, struct igmp_report *);
3756	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
3757	igmp->ir_rsv1 = 0;
3758	igmp->ir_rsv2 = 0;
3759	igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
3760	igmp->ir_cksum = 0;
3761	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
3762	m->m_pkthdr.vt_nrecs = 0;
3763
3764	m->m_data -= sizeof(struct ip);
3765	m->m_len += sizeof(struct ip);
3766
3767	ip = mtod(m, struct ip *);
3768	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
3769	ip->ip_len = hdrlen + igmpreclen;
3770	ip->ip_off = IP_DF;
3771	ip->ip_p = IPPROTO_IGMP;
3772	ip->ip_sum = 0;
3773
3774	ip->ip_src.s_addr = INADDR_ANY;
3775
3776	if (m->m_flags & M_IGMP_LOOP) {
3777		struct in_ifaddr *ia;
3778
3779		IFP_TO_IA(ifp, ia);
3780		if (ia != NULL) {
3781			IFA_LOCK(&ia->ia_ifa);
3782			ip->ip_src = ia->ia_addr.sin_addr;
3783			IFA_UNLOCK(&ia->ia_ifa);
3784			IFA_REMREF(&ia->ia_ifa);
3785		}
3786	}
3787
3788	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
3789
3790	return (m);
3791}
3792
3793#ifdef IGMP_DEBUG
3794static const char *
3795igmp_rec_type_to_str(const int type)
3796{
3797	switch (type) {
3798		case IGMP_CHANGE_TO_EXCLUDE_MODE:
3799			return "TO_EX";
3800			break;
3801		case IGMP_CHANGE_TO_INCLUDE_MODE:
3802			return "TO_IN";
3803			break;
3804		case IGMP_MODE_IS_EXCLUDE:
3805			return "MODE_EX";
3806			break;
3807		case IGMP_MODE_IS_INCLUDE:
3808			return "MODE_IN";
3809			break;
3810		case IGMP_ALLOW_NEW_SOURCES:
3811			return "ALLOW_NEW";
3812			break;
3813		case IGMP_BLOCK_OLD_SOURCES:
3814			return "BLOCK_OLD";
3815			break;
3816		default:
3817			break;
3818	}
3819	return "unknown";
3820}
3821#endif
3822
3823void
3824igmp_init(void)
3825{
3826
3827	IGMP_PRINTF(("%s: initializing\n", __func__));
3828
3829	igmp_timers_are_running = 0;
3830
3831	/* Setup lock group and attribute for igmp_mtx */
3832	igmp_mtx_grp_attr = lck_grp_attr_alloc_init();
3833	igmp_mtx_grp = lck_grp_alloc_init("igmp_mtx", igmp_mtx_grp_attr);
3834	igmp_mtx_attr = lck_attr_alloc_init();
3835	lck_mtx_init(&igmp_mtx, igmp_mtx_grp, igmp_mtx_attr);
3836
3837	LIST_INIT(&igi_head);
3838	m_raopt = igmp_ra_alloc();
3839
3840	igi_size = sizeof (struct igmp_ifinfo);
3841	igi_zone = zinit(igi_size, IGI_ZONE_MAX * igi_size,
3842	    0, IGI_ZONE_NAME);
3843	if (igi_zone == NULL) {
3844		panic("%s: failed allocating %s", __func__, IGI_ZONE_NAME);
3845		/* NOTREACHED */
3846	}
3847	zone_change(igi_zone, Z_EXPAND, TRUE);
3848	zone_change(igi_zone, Z_CALLERACCT, FALSE);
3849}
3850