igmp.c revision 191548
1/*-
2 * Copyright (c) 2007-2009 Bruce Simpson.
3 * Copyright (c) 1988 Stephen Deering.
4 * Copyright (c) 1992, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Stephen Deering of Stanford University.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
35 */
36
37/*
38 * Internet Group Management Protocol (IGMP) routines.
39 * [RFC1112, RFC2236, RFC3376]
40 *
41 * Written by Steve Deering, Stanford, May 1988.
42 * Modified by Rosen Sharma, Stanford, Aug 1994.
43 * Modified by Bill Fenner, Xerox PARC, Feb 1995.
44 * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
45 * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
46 *
47 * MULTICAST Revision: 3.5.1.4
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/netinet/igmp.c 191548 2009-04-26 22:06:42Z zec $");
52
53#include "opt_mac.h"
54#include "opt_route.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/module.h>
59#include <sys/malloc.h>
60#include <sys/mbuf.h>
61#include <sys/socket.h>
62#include <sys/protosw.h>
63#include <sys/kernel.h>
64#include <sys/sysctl.h>
65#include <sys/vimage.h>
66#include <sys/ktr.h>
67#include <sys/condvar.h>
68
69#include <net/if.h>
70#include <net/netisr.h>
71#include <net/route.h>
72#include <net/vnet.h>
73
74#include <netinet/in.h>
75#include <netinet/in_var.h>
76#include <netinet/in_systm.h>
77#include <netinet/ip.h>
78#include <netinet/ip_var.h>
79#include <netinet/ip_options.h>
80#include <netinet/igmp.h>
81#include <netinet/igmp_var.h>
82#include <netinet/vinet.h>
83
84#include <machine/in_cksum.h>
85
86#include <security/mac/mac_framework.h>
87
88#ifndef KTR_IGMPV3
89#define KTR_IGMPV3 KTR_SUBSYS
90#endif
91
92static struct igmp_ifinfo *
93		igi_alloc_locked(struct ifnet *);
94static void	igi_delete_locked(const struct ifnet *);
95static void	igmp_dispatch_queue(struct ifqueue *, int, const int);
96static void	igmp_fasttimo_vnet(void);
97static void	igmp_final_leave(struct in_multi *, struct igmp_ifinfo *);
98static int	igmp_handle_state_change(struct in_multi *,
99		    struct igmp_ifinfo *);
100static int	igmp_initial_join(struct in_multi *, struct igmp_ifinfo *);
101static int	igmp_input_v1_query(struct ifnet *, const struct ip *);
102static int	igmp_input_v2_query(struct ifnet *, const struct ip *,
103		    const struct igmp *);
104static int	igmp_input_v3_query(struct ifnet *, const struct ip *,
105		    /*const*/ struct igmpv3 *);
106static int	igmp_input_v3_group_query(struct in_multi *,
107		    struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *);
108static int	igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
109		    /*const*/ struct igmp *);
110static int	igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
111		    /*const*/ struct igmp *);
112static void	igmp_intr(struct mbuf *);
113static int	igmp_isgroupreported(const struct in_addr);
114static struct mbuf *
115		igmp_ra_alloc(void);
116#ifdef KTR
117static char *	igmp_rec_type_to_str(const int);
118#endif
119static void	igmp_set_version(struct igmp_ifinfo *, const int);
120static void	igmp_slowtimo_vnet(void);
121static void	igmp_sysinit(void);
122static int	igmp_v1v2_queue_report(struct in_multi *, const int);
123static void	igmp_v1v2_process_group_timer(struct in_multi *, const int);
124static void	igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
125static void	igmp_v2_update_group(struct in_multi *, const int);
126static void	igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
127static void	igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
128static struct mbuf *
129		igmp_v3_encap_report(struct ifnet *, struct mbuf *);
130static int	igmp_v3_enqueue_group_record(struct ifqueue *,
131		    struct in_multi *, const int, const int, const int);
132static int	igmp_v3_enqueue_filter_change(struct ifqueue *,
133		    struct in_multi *);
134static void	igmp_v3_process_group_timers(struct igmp_ifinfo *,
135		    struct ifqueue *, struct ifqueue *, struct in_multi *,
136		    const int);
137static int	igmp_v3_merge_state_changes(struct in_multi *,
138		    struct ifqueue *);
139static void	igmp_v3_suppress_group_record(struct in_multi *);
140static int	sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS);
141static int	sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS);
142static int	sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS);
143
144static vnet_attach_fn	vnet_igmp_iattach;
145static vnet_detach_fn	vnet_igmp_idetach;
146
147/*
148 * System-wide globals.
149 *
150 * Unlocked access to these is OK, except for the global IGMP output
151 * queue. The IGMP subsystem lock ends up being system-wide for the moment,
152 * because all VIMAGEs have to share a global output queue, as netisrs
153 * themselves are not virtualized.
154 *
155 * Locking:
156 *  * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
157 *    Any may be taken independently; if any are held at the same
158 *    time, the above lock order must be followed.
159 *  * All output is delegated to the netisr.
160 *    Now that Giant has been eliminated, the netisr may be inlined.
161 *  * IN_MULTI_LOCK covers in_multi.
162 *  * IGMP_LOCK covers igmp_ifinfo and any global variables in this file,
163 *    including the output queue.
164 *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
165 *    per-link state iterators.
166 *  * igmp_ifinfo is valid as long as PF_INET is attached to the interface,
167 *    therefore it is not refcounted.
168 *    We allow unlocked reads of igmp_ifinfo when accessed via in_multi.
169 *
170 * Reference counting
171 *  * IGMP acquires its own reference every time an in_multi is passed to
172 *    it and the group is being joined for the first time.
173 *  * IGMP releases its reference(s) on in_multi in a deferred way,
174 *    because the operations which process the release run as part of
175 *    a loop whose control variables are directly affected by the release
176 *    (that, and not recursing on the IF_ADDR_LOCK).
177 *
178 * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds
179 * to a vnet in ifp->if_vnet.
180 *
181 * SMPng: XXX We may potentially race operations on ifma_protospec.
182 * The problem is that we currently lack a clean way of taking the
183 * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing,
184 * as anything which modifies ifma needs to be covered by that lock.
185 * So check for ifma_protospec being NULL before proceeding.
186 */
187struct mtx		 igmp_mtx;
188
189struct mbuf		*m_raopt;		 /* Router Alert option */
190MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
191
192/*
193 * Global netisr output queue.
194 */
195struct ifqueue		 igmpoq;
196
197/*
198 * VIMAGE-wide globals.
199 *
200 * The IGMPv3 timers themselves need to run per-image, however,
201 * protosw timers run globally (see tcp).
202 * An ifnet can only be in one vimage at a time, and the loopback
203 * ifnet, loif, is itself virtualized.
204 * It would otherwise be possible to seriously hose IGMP state,
205 * and create inconsistencies in upstream multicast routing, if you have
206 * multiple VIMAGEs running on the same link joining different multicast
207 * groups, UNLESS the "primary IP address" is different. This is because
208 * IGMP for IPv4 does not force link-local addresses to be used for each
209 * node, unlike MLD for IPv6.
210 * Obviously the IGMPv3 per-interface state has per-vimage granularity
211 * also as a result.
212 *
213 * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
214 * policy to control the address used by IGMP on the link.
215 */
216#ifdef VIMAGE_GLOBALS
217int	 interface_timers_running;	 /* IGMPv3 general query response */
218int	 state_change_timers_running;	 /* IGMPv3 state-change retransmit */
219int	 current_state_timers_running;	 /* IGMPv1/v2 host report;
220					  * IGMPv3 g/sg query response */
221
222LIST_HEAD(, igmp_ifinfo)	 igi_head;
223struct igmpstat			 igmpstat;
224struct timeval			 igmp_gsrdelay;
225
226int	 igmp_recvifkludge;
227int	 igmp_sendra;
228int	 igmp_sendlocal;
229int	 igmp_v1enable;
230int	 igmp_v2enable;
231int	 igmp_legacysupp;
232int	 igmp_default_version;
233#endif /* VIMAGE_GLOBALS */
234
235/*
236 * Virtualized sysctls.
237 */
238SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_igmp, IGMPCTL_STATS, stats,
239    CTLFLAG_RW, igmpstat, igmpstat, "");
240SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, recvifkludge,
241    CTLFLAG_RW, igmp_recvifkludge, 0,
242    "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
243SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendra,
244    CTLFLAG_RW, igmp_sendra, 0,
245    "Send IP Router Alert option in IGMPv2/v3 messages");
246SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendlocal,
247    CTLFLAG_RW, igmp_sendlocal, 0,
248    "Send IGMP membership reports for 224.0.0.0/24 groups");
249SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, v1enable,
250    CTLFLAG_RW, igmp_v1enable, 0,
251    "Enable backwards compatibility with IGMPv1");
252SYSCTL_V_INT(V_NET, vnet_inet,  _net_inet_igmp, OID_AUTO, v2enable,
253    CTLFLAG_RW, igmp_v2enable, 0,
254    "Enable backwards compatibility with IGMPv2");
255SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, legacysupp,
256    CTLFLAG_RW, igmp_legacysupp, 0,
257    "Allow v1/v2 reports to suppress v3 group responses");
258SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, default_version,
259    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, igmp_default_version, 0,
260    sysctl_igmp_default_version, "I",
261    "Default version of IGMP to run on each interface");
262SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, gsrdelay,
263    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, igmp_gsrdelay.tv_sec, 0,
264    sysctl_igmp_gsr, "I",
265    "Rate limit for IGMPv3 Group-and-Source queries in seconds");
266
267/*
268 * Non-virtualized sysctls.
269 */
270SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE,
271    sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
272
273static __inline void
274igmp_save_context(struct mbuf *m, struct ifnet *ifp)
275{
276
277#ifdef VIMAGE
278	m->m_pkthdr.header = ifp->if_vnet;
279#endif /* VIMAGE */
280	m->m_pkthdr.flowid = ifp->if_index;
281}
282
283static __inline void
284igmp_scrub_context(struct mbuf *m)
285{
286
287	m->m_pkthdr.header = NULL;
288	m->m_pkthdr.flowid = 0;
289}
290
291#ifdef KTR
292static __inline char *
293inet_ntoa_haddr(in_addr_t haddr)
294{
295	struct in_addr ia;
296
297	ia.s_addr = htonl(haddr);
298	return (inet_ntoa(ia));
299}
300#endif
301
302/*
303 * Restore context from a queued IGMP output chain.
304 * Return saved ifindex.
305 *
306 * VIMAGE: The assertion is there to make sure that we
307 * actually called CURVNET_SET() with what's in the mbuf chain.
308 */
309static __inline uint32_t
310igmp_restore_context(struct mbuf *m)
311{
312
313#ifdef notyet
314#if defined(VIMAGE) && defined(INVARIANTS)
315	KASSERT(curvnet == (m->m_pkthdr.header),
316	    ("%s: called when curvnet was not restored", __func__));
317#endif
318#endif
319	return (m->m_pkthdr.flowid);
320}
321
322/*
323 * Retrieve or set default IGMP version.
324 *
325 * VIMAGE: Assume curvnet set by caller.
326 * SMPng: NOTE: Serialized by IGMP lock.
327 */
328static int
329sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS)
330{
331	INIT_VNET_INET(curvnet);
332	int	 error;
333	int	 new;
334
335	error = sysctl_wire_old_buffer(req, sizeof(int));
336	if (error)
337		return (error);
338
339	IGMP_LOCK();
340
341	new = V_igmp_default_version;
342
343	error = sysctl_handle_int(oidp, &new, 0, req);
344	if (error || !req->newptr)
345		goto out_locked;
346
347	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
348		error = EINVAL;
349		goto out_locked;
350	}
351
352	CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
353	     V_igmp_default_version, new);
354
355	V_igmp_default_version = new;
356
357out_locked:
358	IGMP_UNLOCK();
359	return (error);
360}
361
362/*
363 * Retrieve or set threshold between group-source queries in seconds.
364 *
365 * VIMAGE: Assume curvnet set by caller.
366 * SMPng: NOTE: Serialized by IGMP lock.
367 */
368static int
369sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS)
370{
371	INIT_VNET_INET(curvnet);
372	int error;
373	int i;
374
375	error = sysctl_wire_old_buffer(req, sizeof(int));
376	if (error)
377		return (error);
378
379	IGMP_LOCK();
380
381	i = V_igmp_gsrdelay.tv_sec;
382
383	error = sysctl_handle_int(oidp, &i, 0, req);
384	if (error || !req->newptr)
385		goto out_locked;
386
387	if (i < -1 || i >= 60) {
388		error = EINVAL;
389		goto out_locked;
390	}
391
392	CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d",
393	     V_igmp_gsrdelay.tv_sec, i);
394	V_igmp_gsrdelay.tv_sec = i;
395
396out_locked:
397	IGMP_UNLOCK();
398	return (error);
399}
400
401/*
402 * Expose struct igmp_ifinfo to userland, keyed by ifindex.
403 * For use by ifmcstat(8).
404 *
405 * SMPng: NOTE: Does an unlocked ifindex space read.
406 * VIMAGE: Assume curvnet set by caller. The node handler itself
407 * is not directly virtualized.
408 */
409static int
410sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
411{
412	INIT_VNET_NET(curvnet);
413	INIT_VNET_INET(curvnet);
414	int			*name;
415	int			 error;
416	u_int			 namelen;
417	struct ifnet		*ifp;
418	struct igmp_ifinfo	*igi;
419
420	name = (int *)arg1;
421	namelen = arg2;
422
423	if (req->newptr != NULL)
424		return (EPERM);
425
426	if (namelen != 1)
427		return (EINVAL);
428
429	error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo));
430	if (error)
431		return (error);
432
433	IN_MULTI_LOCK();
434	IGMP_LOCK();
435
436	if (name[0] <= 0 || name[0] > V_if_index) {
437		error = ENOENT;
438		goto out_locked;
439	}
440
441	error = ENOENT;
442
443	ifp = ifnet_byindex(name[0]);
444	if (ifp == NULL)
445		goto out_locked;
446
447	LIST_FOREACH(igi, &V_igi_head, igi_link) {
448		if (ifp == igi->igi_ifp) {
449			error = SYSCTL_OUT(req, igi,
450			    sizeof(struct igmp_ifinfo));
451			break;
452		}
453	}
454
455out_locked:
456	IGMP_UNLOCK();
457	IN_MULTI_UNLOCK();
458	return (error);
459}
460
461/*
462 * Dispatch an entire queue of pending packet chains
463 * using the netisr.
464 * VIMAGE: Assumes the vnet pointer has been set.
465 */
466static void
467igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop)
468{
469	struct mbuf *m;
470
471	for (;;) {
472		_IF_DEQUEUE(ifq, m);
473		if (m == NULL)
474			break;
475		CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m);
476		if (loop)
477			m->m_flags |= M_IGMP_LOOP;
478		netisr_dispatch(NETISR_IGMP, m);
479		if (--limit == 0)
480			break;
481	}
482}
483
484/*
485 * Filter outgoing IGMP report state by group.
486 *
487 * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
488 * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
489 * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
490 * this may break certain IGMP snooping switches which rely on the old
491 * report behaviour.
492 *
493 * Return zero if the given group is one for which IGMP reports
494 * should be suppressed, or non-zero if reports should be issued.
495 */
496static __inline int
497igmp_isgroupreported(const struct in_addr addr)
498{
499	INIT_VNET_INET(curvnet);
500
501	if (in_allhosts(addr) ||
502	    ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
503		return (0);
504
505	return (1);
506}
507
508/*
509 * Construct a Router Alert option to use in outgoing packets.
510 */
511static struct mbuf *
512igmp_ra_alloc(void)
513{
514	struct mbuf	*m;
515	struct ipoption	*p;
516
517	MGET(m, M_DONTWAIT, MT_DATA);
518	p = mtod(m, struct ipoption *);
519	p->ipopt_dst.s_addr = INADDR_ANY;
520	p->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
521	p->ipopt_list[1] = 0x04;	/* 4 bytes long */
522	p->ipopt_list[2] = IPOPT_EOL;	/* End of IP option list */
523	p->ipopt_list[3] = 0x00;	/* pad byte */
524	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
525
526	return (m);
527}
528
529/*
530 * Attach IGMP when PF_INET is attached to an interface.
531 *
532 * VIMAGE: Currently we set the vnet pointer, although it is
533 * likely that it was already set by our caller.
534 */
535struct igmp_ifinfo *
536igmp_domifattach(struct ifnet *ifp)
537{
538	struct igmp_ifinfo *igi;
539
540	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
541	    __func__, ifp, ifp->if_xname);
542
543	CURVNET_SET(ifp->if_vnet);
544	IGMP_LOCK();
545
546	igi = igi_alloc_locked(ifp);
547	if (!(ifp->if_flags & IFF_MULTICAST))
548		igi->igi_flags |= IGIF_SILENT;
549
550	IGMP_UNLOCK();
551	CURVNET_RESTORE();
552
553	return (igi);
554}
555
556/*
557 * VIMAGE: assume curvnet set by caller.
558 */
559static struct igmp_ifinfo *
560igi_alloc_locked(/*const*/ struct ifnet *ifp)
561{
562	INIT_VNET_INET(ifp->if_vnet);
563	struct igmp_ifinfo *igi;
564
565	IGMP_LOCK_ASSERT();
566
567	igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO);
568	if (igi == NULL)
569		goto out;
570
571	igi->igi_ifp = ifp;
572	igi->igi_version = V_igmp_default_version;
573	igi->igi_flags = 0;
574	igi->igi_rv = IGMP_RV_INIT;
575	igi->igi_qi = IGMP_QI_INIT;
576	igi->igi_qri = IGMP_QRI_INIT;
577	igi->igi_uri = IGMP_URI_INIT;
578
579	SLIST_INIT(&igi->igi_relinmhead);
580
581	/*
582	 * Responses to general queries are subject to bounds.
583	 */
584	IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
585
586	LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
587
588	CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)",
589	     ifp, ifp->if_xname);
590
591out:
592	return (igi);
593}
594
595/*
596 * Hook for ifdetach.
597 *
598 * NOTE: Some finalization tasks need to run before the protocol domain
599 * is detached, but also before the link layer does its cleanup.
600 *
601 * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK().
602 * XXX This is also bitten by unlocked ifma_protospec access.
603 *
604 * VIMAGE: curvnet should have been set by caller, but let's not assume
605 * that for now.
606 */
607void
608igmp_ifdetach(struct ifnet *ifp)
609{
610	struct igmp_ifinfo	*igi;
611	struct ifmultiaddr	*ifma;
612	struct in_multi		*inm, *tinm;
613
614	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp,
615	    ifp->if_xname);
616
617	CURVNET_SET(ifp->if_vnet);
618
619	IGMP_LOCK();
620
621	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
622	if (igi->igi_version == IGMP_VERSION_3) {
623		IF_ADDR_LOCK(ifp);
624		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
625			if (ifma->ifma_addr->sa_family != AF_INET ||
626			    ifma->ifma_protospec == NULL)
627				continue;
628#if 0
629			KASSERT(ifma->ifma_protospec != NULL,
630			    ("%s: ifma_protospec is NULL", __func__));
631#endif
632			inm = (struct in_multi *)ifma->ifma_protospec;
633			if (inm->inm_state == IGMP_LEAVING_MEMBER) {
634				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
635				    inm, inm_nrele);
636			}
637			inm_clear_recorded(inm);
638		}
639		IF_ADDR_UNLOCK(ifp);
640		/*
641		 * Free the in_multi reference(s) for this IGMP lifecycle.
642		 */
643		SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele,
644		    tinm) {
645			SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
646			inm_release_locked(inm);
647		}
648	}
649
650	IGMP_UNLOCK();
651
652	CURVNET_RESTORE();
653}
654
655/*
656 * Hook for domifdetach.
657 *
658 * VIMAGE: curvnet should have been set by caller, but let's not assume
659 * that for now.
660 */
661void
662igmp_domifdetach(struct ifnet *ifp)
663{
664	struct igmp_ifinfo *igi;
665
666	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
667	    __func__, ifp, ifp->if_xname);
668
669	CURVNET_SET(ifp->if_vnet);
670	IGMP_LOCK();
671
672	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
673	igi_delete_locked(ifp);
674
675	IGMP_UNLOCK();
676	CURVNET_RESTORE();
677}
678
679static void
680igi_delete_locked(const struct ifnet *ifp)
681{
682	INIT_VNET_INET(ifp->if_vnet);
683	struct igmp_ifinfo *igi, *tigi;
684
685	CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)",
686	    __func__, ifp, ifp->if_xname);
687
688	IGMP_LOCK_ASSERT();
689
690	LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) {
691		if (igi->igi_ifp == ifp) {
692			/*
693			 * Free deferred General Query responses.
694			 */
695			_IF_DRAIN(&igi->igi_gq);
696
697			LIST_REMOVE(igi, igi_link);
698
699			KASSERT(SLIST_EMPTY(&igi->igi_relinmhead),
700			    ("%s: there are dangling in_multi references",
701			    __func__));
702
703			free(igi, M_IGMP);
704			return;
705		}
706	}
707
708#ifdef INVARIANTS
709	panic("%s: igmp_ifinfo not found for ifp %p\n", __func__,  ifp);
710#endif
711}
712
713/*
714 * Process a received IGMPv1 query.
715 * Return non-zero if the message should be dropped.
716 *
717 * VIMAGE: The curvnet pointer is derived from the input ifp.
718 */
719static int
720igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip)
721{
722	INIT_VNET_INET(ifp->if_vnet);
723	struct ifmultiaddr	*ifma;
724	struct igmp_ifinfo	*igi;
725	struct in_multi		*inm;
726
727	/*
728	 * IGMPv1 General Queries SHOULD always addressed to 224.0.0.1.
729	 * igmp_group is always ignored. Do not drop it as a userland
730	 * daemon may wish to see it.
731	 */
732	if (!in_allhosts(ip->ip_dst)) {
733		IGMPSTAT_INC(igps_rcv_badqueries);
734		return (0);
735	}
736
737	IGMPSTAT_INC(igps_rcv_gen_queries);
738
739	/*
740	 * Switch to IGMPv1 host compatibility mode.
741	 */
742	IN_MULTI_LOCK();
743	IGMP_LOCK();
744
745	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
746	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
747
748	if (igi->igi_flags & IGIF_LOOPBACK) {
749		CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)",
750		    ifp, ifp->if_xname);
751		goto out_locked;
752	}
753
754	igmp_set_version(igi, IGMP_VERSION_1);
755
756	CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname);
757
758	/*
759	 * Start the timers in all of our group records
760	 * for the interface on which the query arrived,
761	 * except those which are already running.
762	 */
763	IF_ADDR_LOCK(ifp);
764	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
765		if (ifma->ifma_addr->sa_family != AF_INET ||
766		    ifma->ifma_protospec == NULL)
767			continue;
768		inm = (struct in_multi *)ifma->ifma_protospec;
769		if (inm->inm_timer != 0)
770			continue;
771		switch (inm->inm_state) {
772		case IGMP_NOT_MEMBER:
773		case IGMP_SILENT_MEMBER:
774			break;
775		case IGMP_G_QUERY_PENDING_MEMBER:
776		case IGMP_SG_QUERY_PENDING_MEMBER:
777		case IGMP_REPORTING_MEMBER:
778		case IGMP_IDLE_MEMBER:
779		case IGMP_LAZY_MEMBER:
780		case IGMP_SLEEPING_MEMBER:
781		case IGMP_AWAKENING_MEMBER:
782			inm->inm_state = IGMP_REPORTING_MEMBER;
783			inm->inm_timer = IGMP_RANDOM_DELAY(
784			    IGMP_V1V2_MAX_RI * PR_FASTHZ);
785			V_current_state_timers_running = 1;
786			break;
787		case IGMP_LEAVING_MEMBER:
788			break;
789		}
790	}
791	IF_ADDR_UNLOCK(ifp);
792
793out_locked:
794	IGMP_UNLOCK();
795	IN_MULTI_UNLOCK();
796
797	return (0);
798}
799
800/*
801 * Process a received IGMPv2 general or group-specific query.
802 */
803static int
804igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
805    const struct igmp *igmp)
806{
807	INIT_VNET_INET(ifp->if_vnet);
808	struct ifmultiaddr	*ifma;
809	struct igmp_ifinfo	*igi;
810	struct in_multi		*inm;
811	uint16_t		 timer;
812
813	/*
814	 * Perform lazy allocation of IGMP link info if required,
815	 * and switch to IGMPv2 host compatibility mode.
816	 */
817	IN_MULTI_LOCK();
818	IGMP_LOCK();
819
820	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
821	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
822
823	if (igi->igi_flags & IGIF_LOOPBACK) {
824		CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)",
825		    ifp, ifp->if_xname);
826		goto out_locked;
827	}
828
829	igmp_set_version(igi, IGMP_VERSION_2);
830
831	timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
832	if (timer == 0)
833		timer = 1;
834
835	if (!in_nullhost(igmp->igmp_group)) {
836		/*
837		 * IGMPv2 Group-Specific Query.
838		 * If this is a group-specific IGMPv2 query, we need only
839		 * look up the single group to process it.
840		 */
841		inm = inm_lookup(ifp, igmp->igmp_group);
842		if (inm != NULL) {
843			CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)",
844			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
845			igmp_v2_update_group(inm, timer);
846		}
847		IGMPSTAT_INC(igps_rcv_group_queries);
848	} else {
849		/*
850		 * IGMPv2 General Query.
851		 * If this was not sent to the all-hosts group, ignore it.
852		 */
853		if (in_allhosts(ip->ip_dst)) {
854			/*
855			 * For each reporting group joined on this
856			 * interface, kick the report timer.
857			 */
858			CTR2(KTR_IGMPV3,
859			    "process v2 general query on ifp %p(%s)",
860			    ifp, ifp->if_xname);
861
862			IF_ADDR_LOCK(ifp);
863			TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
864				if (ifma->ifma_addr->sa_family != AF_INET ||
865				    ifma->ifma_protospec == NULL)
866					continue;
867				inm = (struct in_multi *)ifma->ifma_protospec;
868				igmp_v2_update_group(inm, timer);
869			}
870			IF_ADDR_UNLOCK(ifp);
871		}
872		IGMPSTAT_INC(igps_rcv_gen_queries);
873	}
874
875out_locked:
876	IGMP_UNLOCK();
877	IN_MULTI_UNLOCK();
878
879	return (0);
880}
881
882/*
883 * Update the report timer on a group in response to an IGMPv2 query.
884 *
885 * If we are becoming the reporting member for this group, start the timer.
886 * If we already are the reporting member for this group, and timer is
887 * below the threshold, reset it.
888 *
889 * We may be updating the group for the first time since we switched
890 * to IGMPv3. If we are, then we must clear any recorded source lists,
891 * and transition to REPORTING state; the group timer is overloaded
892 * for group and group-source query responses.
893 *
894 * Unlike IGMPv3, the delay per group should be jittered
895 * to avoid bursts of IGMPv2 reports.
896 */
897static void
898igmp_v2_update_group(struct in_multi *inm, const int timer)
899{
900	INIT_VNET_INET(curvnet);
901
902	CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__,
903	    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer);
904
905	IN_MULTI_LOCK_ASSERT();
906
907	switch (inm->inm_state) {
908	case IGMP_NOT_MEMBER:
909	case IGMP_SILENT_MEMBER:
910		break;
911	case IGMP_REPORTING_MEMBER:
912		if (inm->inm_timer != 0 &&
913		    inm->inm_timer <= timer) {
914			CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, "
915			    "skipping.", __func__);
916			break;
917		}
918		/* FALLTHROUGH */
919	case IGMP_SG_QUERY_PENDING_MEMBER:
920	case IGMP_G_QUERY_PENDING_MEMBER:
921	case IGMP_IDLE_MEMBER:
922	case IGMP_LAZY_MEMBER:
923	case IGMP_AWAKENING_MEMBER:
924		CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__);
925		inm->inm_state = IGMP_REPORTING_MEMBER;
926		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
927		V_current_state_timers_running = 1;
928		break;
929	case IGMP_SLEEPING_MEMBER:
930		CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__);
931		inm->inm_state = IGMP_AWAKENING_MEMBER;
932		break;
933	case IGMP_LEAVING_MEMBER:
934		break;
935	}
936}
937
938/*
939 * Process a received IGMPv3 general, group-specific or
940 * group-and-source-specific query.
941 * Assumes m has already been pulled up to the full IGMP message length.
942 * Return 0 if successful, otherwise an appropriate error code is returned.
943 */
944static int
945igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
946    /*const*/ struct igmpv3 *igmpv3)
947{
948	INIT_VNET_INET(ifp->if_vnet);
949	struct igmp_ifinfo	*igi;
950	struct in_multi		*inm;
951	uint32_t		 maxresp, nsrc, qqi;
952	uint16_t		 timer;
953	uint8_t			 qrv;
954
955	CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname);
956
957	maxresp = igmpv3->igmp_code;	/* in 1/10ths of a second */
958	if (maxresp >= 128) {
959		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
960			  (IGMP_EXP(igmpv3->igmp_code) + 3);
961	}
962
963	/*
964	 * Robustness must never be less than 2 for on-wire IGMPv3.
965	 * FIXME: Check if ifp has IGIF_LOOPBACK set, as we make
966	 * an exception for interfaces whose IGMPv3 state changes
967	 * are redirected to loopback (e.g. MANET).
968	 */
969	qrv = IGMP_QRV(igmpv3->igmp_misc);
970	if (qrv < 2) {
971		CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__,
972		    qrv, IGMP_RV_INIT);
973		qrv = IGMP_RV_INIT;
974	}
975
976	qqi = igmpv3->igmp_qqi;
977	if (qqi >= 128) {
978		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
979		     (IGMP_EXP(igmpv3->igmp_qqi) + 3);
980	}
981
982	timer = maxresp * PR_FASTHZ / IGMP_TIMER_SCALE;
983	if (timer == 0)
984		timer = 1;
985
986	nsrc = ntohs(igmpv3->igmp_numsrc);
987
988	IN_MULTI_LOCK();
989	IGMP_LOCK();
990
991	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
992	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
993
994	if (igi->igi_flags & IGIF_LOOPBACK) {
995		CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)",
996		    ifp, ifp->if_xname);
997		goto out_locked;
998	}
999
1000	igmp_set_version(igi, IGMP_VERSION_3);
1001
1002	igi->igi_rv = qrv;
1003	igi->igi_qi = qqi;
1004	igi->igi_qri = maxresp;
1005
1006	CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi,
1007	    maxresp);
1008
1009	if (in_nullhost(igmpv3->igmp_group)) {
1010		/*
1011		 * IGMPv3 General Query.
1012		 * Schedule a current-state report on this ifp for
1013		 * all groups, possibly containing source lists.
1014		 */
1015		IGMPSTAT_INC(igps_rcv_gen_queries);
1016
1017		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1018			/*
1019			 * General Queries SHOULD be directed to 224.0.0.1.
1020			 * A general query with a source list has undefined
1021			 * behaviour; discard it.
1022			 */
1023			IGMPSTAT_INC(igps_rcv_badqueries);
1024			goto out_locked;
1025		}
1026
1027		CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)",
1028		    ifp, ifp->if_xname);
1029
1030		/*
1031		 * If there is a pending General Query response
1032		 * scheduled earlier than the selected delay, do
1033		 * not schedule any other reports.
1034		 * Otherwise, reset the interface timer.
1035		 */
1036		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1037			igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1038			V_interface_timers_running = 1;
1039		}
1040	} else {
1041		/*
1042		 * IGMPv3 Group-specific or Group-and-source-specific Query.
1043		 *
1044		 * Group-source-specific queries are throttled on
1045		 * a per-group basis to defeat denial-of-service attempts.
1046		 * Queries for groups we are not a member of on this
1047		 * link are simply ignored.
1048		 */
1049		inm = inm_lookup(ifp, igmpv3->igmp_group);
1050		if (inm == NULL)
1051			goto out_locked;
1052		if (nsrc > 0) {
1053			IGMPSTAT_INC(igps_rcv_gsr_queries);
1054			if (!ratecheck(&inm->inm_lastgsrtv,
1055			    &V_igmp_gsrdelay)) {
1056				CTR1(KTR_IGMPV3, "%s: GS query throttled.",
1057				    __func__);
1058				IGMPSTAT_INC(igps_drop_gsr_queries);
1059				goto out_locked;
1060			}
1061		} else {
1062			IGMPSTAT_INC(igps_rcv_group_queries);
1063		}
1064		CTR3(KTR_IGMPV3, "process v3 %s query on ifp %p(%s)",
1065		     inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_xname);
1066		/*
1067		 * If there is a pending General Query response
1068		 * scheduled sooner than the selected delay, no
1069		 * further report need be scheduled.
1070		 * Otherwise, prepare to respond to the
1071		 * group-specific or group-and-source query.
1072		 */
1073		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer)
1074			igmp_input_v3_group_query(inm, igi, timer, igmpv3);
1075	}
1076
1077out_locked:
1078	IGMP_UNLOCK();
1079	IN_MULTI_UNLOCK();
1080
1081	return (0);
1082}
1083
1084/*
1085 * Process a recieved IGMPv3 group-specific or group-and-source-specific
1086 * query.
1087 * Return <0 if any error occured. Currently this is ignored.
1088 */
1089static int
1090igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifinfo *igi,
1091    int timer, /*const*/ struct igmpv3 *igmpv3)
1092{
1093	INIT_VNET_INET(curvnet);
1094	int			 retval;
1095	uint16_t		 nsrc;
1096
1097	IN_MULTI_LOCK_ASSERT();
1098	IGMP_LOCK_ASSERT();
1099
1100	retval = 0;
1101
1102	switch (inm->inm_state) {
1103	case IGMP_NOT_MEMBER:
1104	case IGMP_SILENT_MEMBER:
1105	case IGMP_SLEEPING_MEMBER:
1106	case IGMP_LAZY_MEMBER:
1107	case IGMP_AWAKENING_MEMBER:
1108	case IGMP_IDLE_MEMBER:
1109	case IGMP_LEAVING_MEMBER:
1110		return (retval);
1111		break;
1112	case IGMP_REPORTING_MEMBER:
1113	case IGMP_G_QUERY_PENDING_MEMBER:
1114	case IGMP_SG_QUERY_PENDING_MEMBER:
1115		break;
1116	}
1117
1118	nsrc = ntohs(igmpv3->igmp_numsrc);
1119
1120	/*
1121	 * Deal with group-specific queries upfront.
1122	 * If any group query is already pending, purge any recorded
1123	 * source-list state if it exists, and schedule a query response
1124	 * for this group-specific query.
1125	 */
1126	if (nsrc == 0) {
1127		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1128		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1129			inm_clear_recorded(inm);
1130			timer = min(inm->inm_timer, timer);
1131		}
1132		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1133		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1134		V_current_state_timers_running = 1;
1135		return (retval);
1136	}
1137
1138	/*
1139	 * Deal with the case where a group-and-source-specific query has
1140	 * been received but a group-specific query is already pending.
1141	 */
1142	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1143		timer = min(inm->inm_timer, timer);
1144		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1145		V_current_state_timers_running = 1;
1146		return (retval);
1147	}
1148
1149	/*
1150	 * Finally, deal with the case where a group-and-source-specific
1151	 * query has been received, where a response to a previous g-s-r
1152	 * query exists, or none exists.
1153	 * In this case, we need to parse the source-list which the Querier
1154	 * has provided us with and check if we have any source list filter
1155	 * entries at T1 for these sources. If we do not, there is no need
1156	 * schedule a report and the query may be dropped.
1157	 * If we do, we must record them and schedule a current-state
1158	 * report for those sources.
1159	 * FIXME: Handling source lists larger than 1 mbuf requires that
1160	 * we pass the mbuf chain pointer down to this function, and use
1161	 * m_getptr() to walk the chain.
1162	 */
1163	if (inm->inm_nsrc > 0) {
1164		const struct in_addr	*ap;
1165		int			 i, nrecorded;
1166
1167		ap = (const struct in_addr *)(igmpv3 + 1);
1168		nrecorded = 0;
1169		for (i = 0; i < nsrc; i++, ap++) {
1170			retval = inm_record_source(inm, ap->s_addr);
1171			if (retval < 0)
1172				break;
1173			nrecorded += retval;
1174		}
1175		if (nrecorded > 0) {
1176			CTR1(KTR_IGMPV3,
1177			    "%s: schedule response to SG query", __func__);
1178			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1179			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1180			V_current_state_timers_running = 1;
1181		}
1182	}
1183
1184	return (retval);
1185}
1186
1187/*
1188 * Process a received IGMPv1 host membership report.
1189 *
1190 * NOTE: 0.0.0.0 workaround breaks const correctness.
1191 */
1192static int
1193igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
1194    /*const*/ struct igmp *igmp)
1195{
1196	INIT_VNET_INET(ifp->if_vnet);
1197	struct in_ifaddr *ia;
1198	struct in_multi *inm;
1199
1200	IGMPSTAT_INC(igps_rcv_reports);
1201
1202	if (ifp->if_flags & IFF_LOOPBACK)
1203		return (0);
1204
1205	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1206	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1207		IGMPSTAT_INC(igps_rcv_badreports);
1208		return (EINVAL);
1209	}
1210
1211	/*
1212	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1213	 * Booting clients may use the source address 0.0.0.0. Some
1214	 * IGMP daemons may not know how to use IP_RECVIF to determine
1215	 * the interface upon which this message was received.
1216	 * Replace 0.0.0.0 with the subnet address if told to do so.
1217	 */
1218	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1219		IFP_TO_IA(ifp, ia);
1220		if (ia != NULL)
1221			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1222	}
1223
1224	CTR3(KTR_IGMPV3, "process v1 report %s on ifp %p(%s)",
1225	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
1226
1227	/*
1228	 * IGMPv1 report suppression.
1229	 * If we are a member of this group, and our membership should be
1230	 * reported, stop our group timer and transition to the 'lazy' state.
1231	 */
1232	IN_MULTI_LOCK();
1233	inm = inm_lookup(ifp, igmp->igmp_group);
1234	if (inm != NULL) {
1235		struct igmp_ifinfo *igi;
1236
1237		igi = inm->inm_igi;
1238		if (igi == NULL) {
1239			KASSERT(igi != NULL,
1240			    ("%s: no igi for ifp %p", __func__, ifp));
1241			goto out_locked;
1242		}
1243
1244		IGMPSTAT_INC(igps_rcv_ourreports);
1245
1246		/*
1247		 * If we are in IGMPv3 host mode, do not allow the
1248		 * other host's IGMPv1 report to suppress our reports
1249		 * unless explicitly configured to do so.
1250		 */
1251		if (igi->igi_version == IGMP_VERSION_3) {
1252			if (V_igmp_legacysupp)
1253				igmp_v3_suppress_group_record(inm);
1254			goto out_locked;
1255		}
1256
1257		inm->inm_timer = 0;
1258
1259		switch (inm->inm_state) {
1260		case IGMP_NOT_MEMBER:
1261		case IGMP_SILENT_MEMBER:
1262			break;
1263		case IGMP_IDLE_MEMBER:
1264		case IGMP_LAZY_MEMBER:
1265		case IGMP_AWAKENING_MEMBER:
1266			CTR3(KTR_IGMPV3,
1267			    "report suppressed for %s on ifp %p(%s)",
1268			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
1269		case IGMP_SLEEPING_MEMBER:
1270			inm->inm_state = IGMP_SLEEPING_MEMBER;
1271			break;
1272		case IGMP_REPORTING_MEMBER:
1273			CTR3(KTR_IGMPV3,
1274			    "report suppressed for %s on ifp %p(%s)",
1275			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
1276			if (igi->igi_version == IGMP_VERSION_1)
1277				inm->inm_state = IGMP_LAZY_MEMBER;
1278			else if (igi->igi_version == IGMP_VERSION_2)
1279				inm->inm_state = IGMP_SLEEPING_MEMBER;
1280			break;
1281		case IGMP_G_QUERY_PENDING_MEMBER:
1282		case IGMP_SG_QUERY_PENDING_MEMBER:
1283		case IGMP_LEAVING_MEMBER:
1284			break;
1285		}
1286	}
1287
1288out_locked:
1289	IN_MULTI_UNLOCK();
1290
1291	return (0);
1292}
1293
1294/*
1295 * Process a received IGMPv2 host membership report.
1296 *
1297 * NOTE: 0.0.0.0 workaround breaks const correctness.
1298 */
1299static int
1300igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
1301    /*const*/ struct igmp *igmp)
1302{
1303	INIT_VNET_INET(ifp->if_vnet);
1304	struct in_ifaddr *ia;
1305	struct in_multi *inm;
1306
1307	/*
1308	 * Make sure we don't hear our own membership report.  Fast
1309	 * leave requires knowing that we are the only member of a
1310	 * group.
1311	 */
1312	IFP_TO_IA(ifp, ia);
1313	if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr))
1314		return (0);
1315
1316	IGMPSTAT_INC(igps_rcv_reports);
1317
1318	if (ifp->if_flags & IFF_LOOPBACK)
1319		return (0);
1320
1321	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1322	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1323		IGMPSTAT_INC(igps_rcv_badreports);
1324		return (EINVAL);
1325	}
1326
1327	/*
1328	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1329	 * Booting clients may use the source address 0.0.0.0. Some
1330	 * IGMP daemons may not know how to use IP_RECVIF to determine
1331	 * the interface upon which this message was received.
1332	 * Replace 0.0.0.0 with the subnet address if told to do so.
1333	 */
1334	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1335		if (ia != NULL)
1336			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1337	}
1338
1339	CTR3(KTR_IGMPV3, "process v2 report %s on ifp %p(%s)",
1340	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
1341
1342	/*
1343	 * IGMPv2 report suppression.
1344	 * If we are a member of this group, and our membership should be
1345	 * reported, and our group timer is pending or about to be reset,
1346	 * stop our group timer by transitioning to the 'lazy' state.
1347	 */
1348	IN_MULTI_LOCK();
1349	inm = inm_lookup(ifp, igmp->igmp_group);
1350	if (inm != NULL) {
1351		struct igmp_ifinfo *igi;
1352
1353		igi = inm->inm_igi;
1354		KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp));
1355
1356		IGMPSTAT_INC(igps_rcv_ourreports);
1357
1358		/*
1359		 * If we are in IGMPv3 host mode, do not allow the
1360		 * other host's IGMPv1 report to suppress our reports
1361		 * unless explicitly configured to do so.
1362		 */
1363		if (igi->igi_version == IGMP_VERSION_3) {
1364			if (V_igmp_legacysupp)
1365				igmp_v3_suppress_group_record(inm);
1366			goto out_locked;
1367		}
1368
1369		inm->inm_timer = 0;
1370
1371		switch (inm->inm_state) {
1372		case IGMP_NOT_MEMBER:
1373		case IGMP_SILENT_MEMBER:
1374		case IGMP_SLEEPING_MEMBER:
1375			break;
1376		case IGMP_REPORTING_MEMBER:
1377		case IGMP_IDLE_MEMBER:
1378		case IGMP_AWAKENING_MEMBER:
1379			CTR3(KTR_IGMPV3,
1380			    "report suppressed for %s on ifp %p(%s)",
1381			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname);
1382		case IGMP_LAZY_MEMBER:
1383			inm->inm_state = IGMP_LAZY_MEMBER;
1384			break;
1385		case IGMP_G_QUERY_PENDING_MEMBER:
1386		case IGMP_SG_QUERY_PENDING_MEMBER:
1387		case IGMP_LEAVING_MEMBER:
1388			break;
1389		}
1390	}
1391
1392out_locked:
1393	IN_MULTI_UNLOCK();
1394
1395	return (0);
1396}
1397
1398void
1399igmp_input(struct mbuf *m, int off)
1400{
1401	int iphlen;
1402	struct ifnet *ifp;
1403	struct igmp *igmp;
1404	struct ip *ip;
1405	int igmplen;
1406	int minlen;
1407	int queryver;
1408
1409	CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, m, off);
1410
1411	ifp = m->m_pkthdr.rcvif;
1412	INIT_VNET_INET(ifp->if_vnet);
1413
1414	IGMPSTAT_INC(igps_rcv_total);
1415
1416	ip = mtod(m, struct ip *);
1417	iphlen = off;
1418	igmplen = ip->ip_len;
1419
1420	/*
1421	 * Validate lengths.
1422	 */
1423	if (igmplen < IGMP_MINLEN) {
1424		IGMPSTAT_INC(igps_rcv_tooshort);
1425		m_freem(m);
1426		return;
1427	}
1428
1429	/*
1430	 * Always pullup to the minimum size for v1/v2 or v3
1431	 * to amortize calls to m_pullup().
1432	 */
1433	minlen = iphlen;
1434	if (igmplen >= IGMP_V3_QUERY_MINLEN)
1435		minlen += IGMP_V3_QUERY_MINLEN;
1436	else
1437		minlen += IGMP_MINLEN;
1438	if ((m->m_flags & M_EXT || m->m_len < minlen) &&
1439	    (m = m_pullup(m, minlen)) == 0) {
1440		IGMPSTAT_INC(igps_rcv_tooshort);
1441		return;
1442	}
1443	ip = mtod(m, struct ip *);
1444
1445	if (ip->ip_ttl != 1) {
1446		IGMPSTAT_INC(igps_rcv_badttl);
1447		m_freem(m);
1448		return;
1449	}
1450
1451	/*
1452	 * Validate checksum.
1453	 */
1454	m->m_data += iphlen;
1455	m->m_len -= iphlen;
1456	igmp = mtod(m, struct igmp *);
1457	if (in_cksum(m, igmplen)) {
1458		IGMPSTAT_INC(igps_rcv_badsum);
1459		m_freem(m);
1460		return;
1461	}
1462	m->m_data -= iphlen;
1463	m->m_len += iphlen;
1464
1465	switch (igmp->igmp_type) {
1466	case IGMP_HOST_MEMBERSHIP_QUERY:
1467		if (igmplen == IGMP_MINLEN) {
1468			if (igmp->igmp_code == 0)
1469				queryver = IGMP_VERSION_1;
1470			else
1471				queryver = IGMP_VERSION_2;
1472		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1473			queryver = IGMP_VERSION_3;
1474		} else {
1475			IGMPSTAT_INC(igps_rcv_tooshort);
1476			m_freem(m);
1477			return;
1478		}
1479
1480		switch (queryver) {
1481		case IGMP_VERSION_1:
1482			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1483			if (!V_igmp_v1enable)
1484				break;
1485			if (igmp_input_v1_query(ifp, ip) != 0) {
1486				m_freem(m);
1487				return;
1488			}
1489			break;
1490
1491		case IGMP_VERSION_2:
1492			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1493			if (!V_igmp_v2enable)
1494				break;
1495			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1496				m_freem(m);
1497				return;
1498			}
1499			break;
1500
1501		case IGMP_VERSION_3: {
1502				struct igmpv3 *igmpv3;
1503				uint16_t igmpv3len;
1504				uint16_t srclen;
1505				int nsrc;
1506
1507				IGMPSTAT_INC(igps_rcv_v3_queries);
1508				igmpv3 = (struct igmpv3 *)igmp;
1509				/*
1510				 * Validate length based on source count.
1511				 */
1512				nsrc = ntohs(igmpv3->igmp_numsrc);
1513				srclen = sizeof(struct in_addr) * nsrc;
1514				if (nsrc * sizeof(in_addr_t) > srclen) {
1515					IGMPSTAT_INC(igps_rcv_tooshort);
1516					return;
1517				}
1518				/*
1519				 * m_pullup() may modify m, so pullup in
1520				 * this scope.
1521				 */
1522				igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN +
1523				    srclen;
1524				if ((m->m_flags & M_EXT ||
1525				     m->m_len < igmpv3len) &&
1526				    (m = m_pullup(m, igmpv3len)) == NULL) {
1527					IGMPSTAT_INC(igps_rcv_tooshort);
1528					return;
1529				}
1530				igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *)
1531				    + iphlen);
1532				if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1533					m_freem(m);
1534					return;
1535				}
1536			}
1537			break;
1538		}
1539		break;
1540
1541	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1542		if (!V_igmp_v1enable)
1543			break;
1544		if (igmp_input_v1_report(ifp, ip, igmp) != 0) {
1545			m_freem(m);
1546			return;
1547		}
1548		break;
1549
1550	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1551		if (!V_igmp_v2enable)
1552			break;
1553		if (!ip_checkrouteralert(m))
1554			IGMPSTAT_INC(igps_rcv_nora);
1555		if (igmp_input_v2_report(ifp, ip, igmp) != 0) {
1556			m_freem(m);
1557			return;
1558		}
1559		break;
1560
1561	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1562		/*
1563		 * Hosts do not need to process IGMPv3 membership reports,
1564		 * as report suppression is no longer required.
1565		 */
1566		if (!ip_checkrouteralert(m))
1567			IGMPSTAT_INC(igps_rcv_nora);
1568		break;
1569
1570	default:
1571		break;
1572	}
1573
1574	/*
1575	 * Pass all valid IGMP packets up to any process(es) listening on a
1576	 * raw IGMP socket.
1577	 */
1578	rip_input(m, off);
1579}
1580
1581
1582/*
1583 * Fast timeout handler (global).
1584 * VIMAGE: Timeout handlers are expected to service all vimages.
1585 */
1586void
1587igmp_fasttimo(void)
1588{
1589	VNET_ITERATOR_DECL(vnet_iter);
1590
1591	VNET_LIST_RLOCK();
1592	VNET_FOREACH(vnet_iter) {
1593		CURVNET_SET(vnet_iter);
1594		igmp_fasttimo_vnet();
1595		CURVNET_RESTORE();
1596	}
1597	VNET_LIST_RUNLOCK();
1598}
1599
1600/*
1601 * Fast timeout handler (per-vnet).
1602 * Sends are shuffled off to a netisr to deal with Giant.
1603 *
1604 * VIMAGE: Assume caller has set up our curvnet.
1605 */
1606static void
1607igmp_fasttimo_vnet(void)
1608{
1609	INIT_VNET_INET(curvnet);
1610	struct ifqueue		 scq;	/* State-change packets */
1611	struct ifqueue		 qrq;	/* Query response packets */
1612	struct ifnet		*ifp;
1613	struct igmp_ifinfo	*igi;
1614	struct ifmultiaddr	*ifma, *tifma;
1615	struct in_multi		*inm;
1616	int			 loop, uri_fasthz;
1617
1618	loop = 0;
1619	uri_fasthz = 0;
1620
1621	/*
1622	 * Quick check to see if any work needs to be done, in order to
1623	 * minimize the overhead of fasttimo processing.
1624	 * SMPng: XXX Unlocked reads.
1625	 */
1626	if (!V_current_state_timers_running &&
1627	    !V_interface_timers_running &&
1628	    !V_state_change_timers_running)
1629		return;
1630
1631	IN_MULTI_LOCK();
1632	IGMP_LOCK();
1633
1634	/*
1635	 * IGMPv3 General Query response timer processing.
1636	 */
1637	if (V_interface_timers_running) {
1638		CTR1(KTR_IGMPV3, "%s: interface timers running", __func__);
1639
1640		V_interface_timers_running = 0;
1641		LIST_FOREACH(igi, &V_igi_head, igi_link) {
1642			if (igi->igi_v3_timer == 0) {
1643				/* Do nothing. */
1644			} else if (--igi->igi_v3_timer == 0) {
1645				igmp_v3_dispatch_general_query(igi);
1646			} else {
1647				V_interface_timers_running = 1;
1648			}
1649		}
1650	}
1651
1652	if (!V_current_state_timers_running &&
1653	    !V_state_change_timers_running)
1654		goto out_locked;
1655
1656	V_current_state_timers_running = 0;
1657	V_state_change_timers_running = 0;
1658
1659	CTR1(KTR_IGMPV3, "%s: state change timers running", __func__);
1660
1661	/*
1662	 * IGMPv1/v2/v3 host report and state-change timer processing.
1663	 * Note: Processing a v3 group timer may remove a node.
1664	 */
1665	LIST_FOREACH(igi, &V_igi_head, igi_link) {
1666		ifp = igi->igi_ifp;
1667
1668		if (igi->igi_version == IGMP_VERSION_3) {
1669			loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
1670			uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri *
1671			    PR_FASTHZ);
1672
1673			memset(&qrq, 0, sizeof(struct ifqueue));
1674			IFQ_SET_MAXLEN(&qrq, IGMP_MAX_G_GS_PACKETS);
1675
1676			memset(&scq, 0, sizeof(struct ifqueue));
1677			IFQ_SET_MAXLEN(&scq, IGMP_MAX_STATE_CHANGE_PACKETS);
1678		}
1679
1680		IF_ADDR_LOCK(ifp);
1681		TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link,
1682		    tifma) {
1683			if (ifma->ifma_addr->sa_family != AF_INET ||
1684			    ifma->ifma_protospec == NULL)
1685				continue;
1686			inm = (struct in_multi *)ifma->ifma_protospec;
1687			switch (igi->igi_version) {
1688			case IGMP_VERSION_1:
1689			case IGMP_VERSION_2:
1690				igmp_v1v2_process_group_timer(inm,
1691				    igi->igi_version);
1692				break;
1693			case IGMP_VERSION_3:
1694				igmp_v3_process_group_timers(igi, &qrq,
1695				    &scq, inm, uri_fasthz);
1696				break;
1697			}
1698		}
1699		IF_ADDR_UNLOCK(ifp);
1700
1701		if (igi->igi_version == IGMP_VERSION_3) {
1702			struct in_multi		*tinm;
1703
1704			igmp_dispatch_queue(&qrq, 0, loop);
1705			igmp_dispatch_queue(&scq, 0, loop);
1706
1707			/*
1708			 * Free the in_multi reference(s) for this
1709			 * IGMP lifecycle.
1710			 */
1711			SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead,
1712			    inm_nrele, tinm) {
1713				SLIST_REMOVE_HEAD(&igi->igi_relinmhead,
1714				    inm_nrele);
1715				inm_release_locked(inm);
1716			}
1717		}
1718	}
1719
1720out_locked:
1721	IGMP_UNLOCK();
1722	IN_MULTI_UNLOCK();
1723}
1724
1725/*
1726 * Update host report group timer for IGMPv1/v2.
1727 * Will update the global pending timer flags.
1728 */
1729static void
1730igmp_v1v2_process_group_timer(struct in_multi *inm, const int version)
1731{
1732	INIT_VNET_INET(curvnet);
1733	int report_timer_expired;
1734
1735	IN_MULTI_LOCK_ASSERT();
1736	IGMP_LOCK_ASSERT();
1737
1738	if (inm->inm_timer == 0) {
1739		report_timer_expired = 0;
1740	} else if (--inm->inm_timer == 0) {
1741		report_timer_expired = 1;
1742	} else {
1743		V_current_state_timers_running = 1;
1744		return;
1745	}
1746
1747	switch (inm->inm_state) {
1748	case IGMP_NOT_MEMBER:
1749	case IGMP_SILENT_MEMBER:
1750	case IGMP_IDLE_MEMBER:
1751	case IGMP_LAZY_MEMBER:
1752	case IGMP_SLEEPING_MEMBER:
1753	case IGMP_AWAKENING_MEMBER:
1754		break;
1755	case IGMP_REPORTING_MEMBER:
1756		if (report_timer_expired) {
1757			inm->inm_state = IGMP_IDLE_MEMBER;
1758			(void)igmp_v1v2_queue_report(inm,
1759			    (version == IGMP_VERSION_2) ?
1760			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
1761			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
1762		}
1763		break;
1764	case IGMP_G_QUERY_PENDING_MEMBER:
1765	case IGMP_SG_QUERY_PENDING_MEMBER:
1766	case IGMP_LEAVING_MEMBER:
1767		break;
1768	}
1769}
1770
1771/*
1772 * Update a group's timers for IGMPv3.
1773 * Will update the global pending timer flags.
1774 * Note: Unlocked read from igi.
1775 */
1776static void
1777igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
1778    struct ifqueue *qrq, struct ifqueue *scq,
1779    struct in_multi *inm, const int uri_fasthz)
1780{
1781	INIT_VNET_INET(curvnet);
1782	int query_response_timer_expired;
1783	int state_change_retransmit_timer_expired;
1784
1785	IN_MULTI_LOCK_ASSERT();
1786	IGMP_LOCK_ASSERT();
1787
1788	query_response_timer_expired = 0;
1789	state_change_retransmit_timer_expired = 0;
1790
1791	/*
1792	 * During a transition from v1/v2 compatibility mode back to v3,
1793	 * a group record in REPORTING state may still have its group
1794	 * timer active. This is a no-op in this function; it is easier
1795	 * to deal with it here than to complicate the slow-timeout path.
1796	 */
1797	if (inm->inm_timer == 0) {
1798		query_response_timer_expired = 0;
1799	} else if (--inm->inm_timer == 0) {
1800		query_response_timer_expired = 1;
1801	} else {
1802		V_current_state_timers_running = 1;
1803	}
1804
1805	if (inm->inm_sctimer == 0) {
1806		state_change_retransmit_timer_expired = 0;
1807	} else if (--inm->inm_sctimer == 0) {
1808		state_change_retransmit_timer_expired = 1;
1809	} else {
1810		V_state_change_timers_running = 1;
1811	}
1812
1813	/* We are in fasttimo, so be quick about it. */
1814	if (!state_change_retransmit_timer_expired &&
1815	    !query_response_timer_expired)
1816		return;
1817
1818	switch (inm->inm_state) {
1819	case IGMP_NOT_MEMBER:
1820	case IGMP_SILENT_MEMBER:
1821	case IGMP_SLEEPING_MEMBER:
1822	case IGMP_LAZY_MEMBER:
1823	case IGMP_AWAKENING_MEMBER:
1824	case IGMP_IDLE_MEMBER:
1825		break;
1826	case IGMP_G_QUERY_PENDING_MEMBER:
1827	case IGMP_SG_QUERY_PENDING_MEMBER:
1828		/*
1829		 * Respond to a previously pending Group-Specific
1830		 * or Group-and-Source-Specific query by enqueueing
1831		 * the appropriate Current-State report for
1832		 * immediate transmission.
1833		 */
1834		if (query_response_timer_expired) {
1835			int retval;
1836
1837			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
1838			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
1839			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
1840			    __func__, retval);
1841			inm->inm_state = IGMP_REPORTING_MEMBER;
1842			/* XXX Clear recorded sources for next time. */
1843			inm_clear_recorded(inm);
1844		}
1845		/* FALLTHROUGH */
1846	case IGMP_REPORTING_MEMBER:
1847	case IGMP_LEAVING_MEMBER:
1848		if (state_change_retransmit_timer_expired) {
1849			/*
1850			 * State-change retransmission timer fired.
1851			 * If there are any further pending retransmissions,
1852			 * set the global pending state-change flag, and
1853			 * reset the timer.
1854			 */
1855			if (--inm->inm_scrv > 0) {
1856				inm->inm_sctimer = uri_fasthz;
1857				V_state_change_timers_running = 1;
1858			}
1859			/*
1860			 * Retransmit the previously computed state-change
1861			 * report. If there are no further pending
1862			 * retransmissions, the mbuf queue will be consumed.
1863			 * Update T0 state to T1 as we have now sent
1864			 * a state-change.
1865			 */
1866			(void)igmp_v3_merge_state_changes(inm, scq);
1867
1868			inm_commit(inm);
1869			CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
1870			    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
1871
1872			/*
1873			 * If we are leaving the group for good, make sure
1874			 * we release IGMP's reference to it.
1875			 * This release must be deferred using a SLIST,
1876			 * as we are called from a loop which traverses
1877			 * the in_ifmultiaddr TAILQ.
1878			 */
1879			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
1880			    inm->inm_scrv == 0) {
1881				inm->inm_state = IGMP_NOT_MEMBER;
1882				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
1883				    inm, inm_nrele);
1884			}
1885		}
1886		break;
1887	}
1888}
1889
1890
1891/*
1892 * Suppress a group's pending response to a group or source/group query.
1893 *
1894 * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
1895 * Do NOT update ST1/ST0 as this operation merely suppresses
1896 * the currently pending group record.
1897 * Do NOT suppress the response to a general query. It is possible but
1898 * it would require adding another state or flag.
1899 */
1900static void
1901igmp_v3_suppress_group_record(struct in_multi *inm)
1902{
1903
1904	IN_MULTI_LOCK_ASSERT();
1905
1906	KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3,
1907		("%s: not IGMPv3 mode on link", __func__));
1908
1909	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
1910	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER)
1911		return;
1912
1913	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
1914		inm_clear_recorded(inm);
1915
1916	inm->inm_timer = 0;
1917	inm->inm_state = IGMP_REPORTING_MEMBER;
1918}
1919
1920/*
1921 * Switch to a different IGMP version on the given interface,
1922 * as per Section 7.2.1.
1923 */
1924static void
1925igmp_set_version(struct igmp_ifinfo *igi, const int version)
1926{
1927
1928	IGMP_LOCK_ASSERT();
1929
1930	CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__,
1931	    version, igi->igi_ifp, igi->igi_ifp->if_xname);
1932
1933	if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) {
1934		int old_version_timer;
1935		/*
1936		 * Compute the "Older Version Querier Present" timer as per
1937		 * Section 8.12.
1938		 */
1939		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
1940		old_version_timer *= PR_SLOWHZ;
1941
1942		if (version == IGMP_VERSION_1) {
1943			igi->igi_v1_timer = old_version_timer;
1944			igi->igi_v2_timer = 0;
1945		} else if (version == IGMP_VERSION_2) {
1946			igi->igi_v1_timer = 0;
1947			igi->igi_v2_timer = old_version_timer;
1948		}
1949	}
1950
1951	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
1952		if (igi->igi_version != IGMP_VERSION_2) {
1953			igi->igi_version = IGMP_VERSION_2;
1954			igmp_v3_cancel_link_timers(igi);
1955		}
1956	} else if (igi->igi_v1_timer > 0) {
1957		if (igi->igi_version != IGMP_VERSION_1) {
1958			igi->igi_version = IGMP_VERSION_1;
1959			igmp_v3_cancel_link_timers(igi);
1960		}
1961	}
1962}
1963
1964/*
1965 * Cancel pending IGMPv3 timers for the given link and all groups
1966 * joined on it; state-change, general-query, and group-query timers.
1967 */
1968static void
1969igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
1970{
1971	INIT_VNET_INET(curvnet);
1972	struct ifmultiaddr	*ifma;
1973	struct ifnet		*ifp;
1974	struct in_multi		*inm;
1975
1976	CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__,
1977	    igi->igi_ifp, igi->igi_ifp->if_xname);
1978
1979	IN_MULTI_LOCK_ASSERT();
1980	IGMP_LOCK_ASSERT();
1981
1982	/*
1983	 * Fast-track this potentially expensive operation
1984	 * by checking all the global 'timer pending' flags.
1985	 */
1986	if (!V_interface_timers_running &&
1987	    !V_state_change_timers_running &&
1988	    !V_current_state_timers_running)
1989		return;
1990
1991	igi->igi_v3_timer = 0;
1992
1993	ifp = igi->igi_ifp;
1994
1995	IF_ADDR_LOCK(ifp);
1996	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1997		if (ifma->ifma_addr->sa_family != AF_INET)
1998			continue;
1999		inm = (struct in_multi *)ifma->ifma_protospec;
2000		switch (inm->inm_state) {
2001		case IGMP_NOT_MEMBER:
2002		case IGMP_SILENT_MEMBER:
2003		case IGMP_IDLE_MEMBER:
2004		case IGMP_LAZY_MEMBER:
2005		case IGMP_SLEEPING_MEMBER:
2006		case IGMP_AWAKENING_MEMBER:
2007			break;
2008		case IGMP_LEAVING_MEMBER:
2009			/*
2010			 * If we are leaving the group and switching
2011			 * IGMP version, we need to release the final
2012			 * reference held for issuing the INCLUDE {}.
2013			 *
2014			 * SMPNG: Must drop and re-acquire IF_ADDR_LOCK
2015			 * around inm_release_locked(), as it is not
2016			 * a recursive mutex.
2017			 */
2018			IF_ADDR_UNLOCK(ifp);
2019			inm_release_locked(inm);
2020			IF_ADDR_LOCK(ifp);
2021			/* FALLTHROUGH */
2022		case IGMP_G_QUERY_PENDING_MEMBER:
2023		case IGMP_SG_QUERY_PENDING_MEMBER:
2024			inm_clear_recorded(inm);
2025			/* FALLTHROUGH */
2026		case IGMP_REPORTING_MEMBER:
2027			inm->inm_sctimer = 0;
2028			inm->inm_timer = 0;
2029			inm->inm_state = IGMP_REPORTING_MEMBER;
2030			/*
2031			 * Free any pending IGMPv3 state-change records.
2032			 */
2033			_IF_DRAIN(&inm->inm_scq);
2034			break;
2035		}
2036	}
2037	IF_ADDR_UNLOCK(ifp);
2038}
2039
2040/*
2041 * Update the Older Version Querier Present timers for a link.
2042 * See Section 7.2.1 of RFC 3376.
2043 */
2044static void
2045igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2046{
2047	INIT_VNET_INET(curvnet);
2048
2049	IGMP_LOCK_ASSERT();
2050
2051	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2052		/*
2053		 * IGMPv1 and IGMPv2 Querier Present timers expired.
2054		 *
2055		 * Revert to IGMPv3.
2056		 */
2057		if (igi->igi_version != IGMP_VERSION_3) {
2058			CTR5(KTR_IGMPV3,
2059			    "%s: transition from v%d -> v%d on %p(%s)",
2060			    __func__, igi->igi_version, IGMP_VERSION_3,
2061			    igi->igi_ifp, igi->igi_ifp->if_xname);
2062			igi->igi_version = IGMP_VERSION_3;
2063		}
2064	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2065		/*
2066		 * IGMPv1 Querier Present timer expired,
2067		 * IGMPv2 Querier Present timer running.
2068		 * If IGMPv2 was disabled since last timeout,
2069		 * revert to IGMPv3.
2070		 * If IGMPv2 is enabled, revert to IGMPv2.
2071		 */
2072		if (!V_igmp_v2enable) {
2073			CTR5(KTR_IGMPV3,
2074			    "%s: transition from v%d -> v%d on %p(%s)",
2075			    __func__, igi->igi_version, IGMP_VERSION_3,
2076			    igi->igi_ifp, igi->igi_ifp->if_xname);
2077			igi->igi_v2_timer = 0;
2078			igi->igi_version = IGMP_VERSION_3;
2079		} else {
2080			--igi->igi_v2_timer;
2081			if (igi->igi_version != IGMP_VERSION_2) {
2082				CTR5(KTR_IGMPV3,
2083				    "%s: transition from v%d -> v%d on %p(%s)",
2084				    __func__, igi->igi_version, IGMP_VERSION_2,
2085				    igi->igi_ifp, igi->igi_ifp->if_xname);
2086				igi->igi_version = IGMP_VERSION_2;
2087			}
2088		}
2089	} else if (igi->igi_v1_timer > 0) {
2090		/*
2091		 * IGMPv1 Querier Present timer running.
2092		 * Stop IGMPv2 timer if running.
2093		 *
2094		 * If IGMPv1 was disabled since last timeout,
2095		 * revert to IGMPv3.
2096		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2097		 */
2098		if (!V_igmp_v1enable) {
2099			CTR5(KTR_IGMPV3,
2100			    "%s: transition from v%d -> v%d on %p(%s)",
2101			    __func__, igi->igi_version, IGMP_VERSION_3,
2102			    igi->igi_ifp, igi->igi_ifp->if_xname);
2103			igi->igi_v1_timer = 0;
2104			igi->igi_version = IGMP_VERSION_3;
2105		} else {
2106			--igi->igi_v1_timer;
2107		}
2108		if (igi->igi_v2_timer > 0) {
2109			CTR3(KTR_IGMPV3,
2110			    "%s: cancel v2 timer on %p(%s)",
2111			    __func__, igi->igi_ifp, igi->igi_ifp->if_xname);
2112			igi->igi_v2_timer = 0;
2113		}
2114	}
2115}
2116
2117/*
2118 * Global slowtimo handler.
2119 * VIMAGE: Timeout handlers are expected to service all vimages.
2120 */
2121void
2122igmp_slowtimo(void)
2123{
2124	VNET_ITERATOR_DECL(vnet_iter);
2125
2126	VNET_LIST_RLOCK();
2127	VNET_FOREACH(vnet_iter) {
2128		CURVNET_SET(vnet_iter);
2129		igmp_slowtimo_vnet();
2130		CURVNET_RESTORE();
2131	}
2132	VNET_LIST_RUNLOCK();
2133}
2134
2135/*
2136 * Per-vnet slowtimo handler.
2137 */
2138static void
2139igmp_slowtimo_vnet(void)
2140{
2141	INIT_VNET_INET(curvnet);
2142	struct igmp_ifinfo *igi;
2143
2144	IGMP_LOCK();
2145
2146	LIST_FOREACH(igi, &V_igi_head, igi_link) {
2147		igmp_v1v2_process_querier_timers(igi);
2148	}
2149
2150	IGMP_UNLOCK();
2151}
2152
2153/*
2154 * Dispatch an IGMPv1/v2 host report or leave message.
2155 * These are always small enough to fit inside a single mbuf.
2156 */
2157static int
2158igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2159{
2160	struct ifnet		*ifp;
2161	struct igmp		*igmp;
2162	struct ip		*ip;
2163	struct mbuf		*m;
2164
2165	IN_MULTI_LOCK_ASSERT();
2166	IGMP_LOCK_ASSERT();
2167
2168	ifp = inm->inm_ifp;
2169
2170	MGETHDR(m, M_DONTWAIT, MT_DATA);
2171	if (m == NULL)
2172		return (ENOMEM);
2173	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2174
2175	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2176
2177	m->m_data += sizeof(struct ip);
2178	m->m_len = sizeof(struct igmp);
2179
2180	igmp = mtod(m, struct igmp *);
2181	igmp->igmp_type = type;
2182	igmp->igmp_code = 0;
2183	igmp->igmp_group = inm->inm_addr;
2184	igmp->igmp_cksum = 0;
2185	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2186
2187	m->m_data -= sizeof(struct ip);
2188	m->m_len += sizeof(struct ip);
2189
2190	ip = mtod(m, struct ip *);
2191	ip->ip_tos = 0;
2192	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2193	ip->ip_off = 0;
2194	ip->ip_p = IPPROTO_IGMP;
2195	ip->ip_src.s_addr = INADDR_ANY;
2196
2197	if (type == IGMP_HOST_LEAVE_MESSAGE)
2198		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2199	else
2200		ip->ip_dst = inm->inm_addr;
2201
2202	igmp_save_context(m, ifp);
2203
2204	m->m_flags |= M_IGMPV2;
2205	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK)
2206		m->m_flags |= M_IGMP_LOOP;
2207
2208	CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m);
2209	netisr_dispatch(NETISR_IGMP, m);
2210
2211	return (0);
2212}
2213
2214/*
2215 * Process a state change from the upper layer for the given IPv4 group.
2216 *
2217 * Each socket holds a reference on the in_multi in its own ip_moptions.
2218 * The socket layer will have made the necessary updates to.the group
2219 * state, it is now up to IGMP to issue a state change report if there
2220 * has been any change between T0 (when the last state-change was issued)
2221 * and T1 (now).
2222 *
2223 * We use the IGMPv3 state machine at group level. The IGMP module
2224 * however makes the decision as to which IGMP protocol version to speak.
2225 * A state change *from* INCLUDE {} always means an initial join.
2226 * A state change *to* INCLUDE {} always means a final leave.
2227 *
2228 * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2229 * save ourselves a bunch of work; any exclusive mode groups need not
2230 * compute source filter lists.
2231 *
2232 * VIMAGE: curvnet should have been set by caller, as this routine
2233 * is called from the socket option handlers.
2234 */
2235int
2236igmp_change_state(struct in_multi *inm)
2237{
2238	struct igmp_ifinfo *igi;
2239	struct ifnet *ifp;
2240	int error;
2241
2242	IN_MULTI_LOCK_ASSERT();
2243
2244	error = 0;
2245
2246	/*
2247	 * Try to detect if the upper layer just asked us to change state
2248	 * for an interface which has now gone away.
2249	 */
2250	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
2251	ifp = inm->inm_ifma->ifma_ifp;
2252	if (ifp != NULL) {
2253		/*
2254		 * Sanity check that netinet's notion of ifp is the
2255		 * same as net's.
2256		 */
2257		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
2258	}
2259
2260	IGMP_LOCK();
2261
2262	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
2263	KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp));
2264
2265	/*
2266	 * If we detect a state transition to or from MCAST_UNDEFINED
2267	 * for this group, then we are starting or finishing an IGMP
2268	 * life cycle for this group.
2269	 */
2270	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2271		CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__,
2272		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode);
2273		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2274			CTR1(KTR_IGMPV3, "%s: initial join", __func__);
2275			error = igmp_initial_join(inm, igi);
2276			goto out_locked;
2277		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2278			CTR1(KTR_IGMPV3, "%s: final leave", __func__);
2279			igmp_final_leave(inm, igi);
2280			goto out_locked;
2281		}
2282	} else {
2283		CTR1(KTR_IGMPV3, "%s: filter set change", __func__);
2284	}
2285
2286	error = igmp_handle_state_change(inm, igi);
2287
2288out_locked:
2289	IGMP_UNLOCK();
2290	return (error);
2291}
2292
2293/*
2294 * Perform the initial join for an IGMP group.
2295 *
2296 * When joining a group:
2297 *  If the group should have its IGMP traffic suppressed, do nothing.
2298 *  IGMPv1 starts sending IGMPv1 host membership reports.
2299 *  IGMPv2 starts sending IGMPv2 host membership reports.
2300 *  IGMPv3 will schedule an IGMPv3 state-change report containing the
2301 *  initial state of the membership.
2302 */
2303static int
2304igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi)
2305{
2306	INIT_VNET_INET(curvnet);
2307	struct ifnet		*ifp;
2308	struct ifqueue		*ifq;
2309	int			 error, retval, syncstates;
2310
2311	CTR4(KTR_IGMPV3, "%s: initial join %s on ifp %p(%s)",
2312	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
2313	    inm->inm_ifp->if_xname);
2314
2315	error = 0;
2316	syncstates = 1;
2317
2318	ifp = inm->inm_ifp;
2319
2320	IN_MULTI_LOCK_ASSERT();
2321	IGMP_LOCK_ASSERT();
2322
2323	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
2324
2325	/*
2326	 * Groups joined on loopback or marked as 'not reported',
2327	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2328	 * are never reported in any IGMP protocol exchanges.
2329	 * All other groups enter the appropriate IGMP state machine
2330	 * for the version in use on this link.
2331	 * A link marked as IGIF_SILENT causes IGMP to be completely
2332	 * disabled for the link.
2333	 */
2334	if ((ifp->if_flags & IFF_LOOPBACK) ||
2335	    (igi->igi_flags & IGIF_SILENT) ||
2336	    !igmp_isgroupreported(inm->inm_addr)) {
2337		CTR1(KTR_IGMPV3,
2338"%s: not kicking state machine for silent group", __func__);
2339		inm->inm_state = IGMP_SILENT_MEMBER;
2340		inm->inm_timer = 0;
2341	} else {
2342		/*
2343		 * Deal with overlapping in_multi lifecycle.
2344		 * If this group was LEAVING, then make sure
2345		 * we drop the reference we picked up to keep the
2346		 * group around for the final INCLUDE {} enqueue.
2347		 */
2348		if (igi->igi_version == IGMP_VERSION_3 &&
2349		    inm->inm_state == IGMP_LEAVING_MEMBER)
2350			inm_release_locked(inm);
2351
2352		inm->inm_state = IGMP_REPORTING_MEMBER;
2353
2354		switch (igi->igi_version) {
2355		case IGMP_VERSION_1:
2356		case IGMP_VERSION_2:
2357			inm->inm_state = IGMP_IDLE_MEMBER;
2358			error = igmp_v1v2_queue_report(inm,
2359			    (igi->igi_version == IGMP_VERSION_2) ?
2360			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
2361			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
2362			if (error == 0) {
2363				inm->inm_timer = IGMP_RANDOM_DELAY(
2364				    IGMP_V1V2_MAX_RI * PR_FASTHZ);
2365				V_current_state_timers_running = 1;
2366			}
2367			break;
2368
2369		case IGMP_VERSION_3:
2370			/*
2371			 * Defer update of T0 to T1, until the first copy
2372			 * of the state change has been transmitted.
2373			 */
2374			syncstates = 0;
2375
2376			/*
2377			 * Immediately enqueue a State-Change Report for
2378			 * this interface, freeing any previous reports.
2379			 * Don't kick the timers if there is nothing to do,
2380			 * or if an error occurred.
2381			 */
2382			ifq = &inm->inm_scq;
2383			_IF_DRAIN(ifq);
2384			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2385			    0, 0);
2386			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
2387			    __func__, retval);
2388			if (retval <= 0) {
2389				error = retval * -1;
2390				break;
2391			}
2392
2393			/*
2394			 * Schedule transmission of pending state-change
2395			 * report up to RV times for this link. The timer
2396			 * will fire at the next igmp_fasttimo (~200ms),
2397			 * giving us an opportunity to merge the reports.
2398			 */
2399			if (igi->igi_flags & IGIF_LOOPBACK) {
2400				inm->inm_scrv = 1;
2401			} else {
2402				KASSERT(igi->igi_rv > 1,
2403				   ("%s: invalid robustness %d", __func__,
2404				    igi->igi_rv));
2405				inm->inm_scrv = igi->igi_rv;
2406			}
2407			inm->inm_sctimer = 1;
2408			V_state_change_timers_running = 1;
2409
2410			error = 0;
2411			break;
2412		}
2413	}
2414
2415	/*
2416	 * Only update the T0 state if state change is atomic,
2417	 * i.e. we don't need to wait for a timer to fire before we
2418	 * can consider the state change to have been communicated.
2419	 */
2420	if (syncstates) {
2421		inm_commit(inm);
2422		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
2423		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
2424	}
2425
2426	return (error);
2427}
2428
2429/*
2430 * Issue an intermediate state change during the IGMP life-cycle.
2431 */
2432static int
2433igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi)
2434{
2435	INIT_VNET_INET(curvnet);
2436	struct ifnet		*ifp;
2437	int			 retval;
2438
2439	CTR4(KTR_IGMPV3, "%s: state change for %s on ifp %p(%s)",
2440	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
2441	    inm->inm_ifp->if_xname);
2442
2443	ifp = inm->inm_ifp;
2444
2445	IN_MULTI_LOCK_ASSERT();
2446	IGMP_LOCK_ASSERT();
2447
2448	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
2449
2450	if ((ifp->if_flags & IFF_LOOPBACK) ||
2451	    (igi->igi_flags & IGIF_SILENT) ||
2452	    !igmp_isgroupreported(inm->inm_addr) ||
2453	    (igi->igi_version != IGMP_VERSION_3)) {
2454		if (!igmp_isgroupreported(inm->inm_addr)) {
2455			CTR1(KTR_IGMPV3,
2456"%s: not kicking state machine for silent group", __func__);
2457		}
2458		CTR1(KTR_IGMPV3, "%s: nothing to do", __func__);
2459		inm_commit(inm);
2460		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
2461		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
2462		return (0);
2463	}
2464
2465	_IF_DRAIN(&inm->inm_scq);
2466
2467	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
2468	CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval);
2469	if (retval <= 0)
2470		return (-retval);
2471
2472	/*
2473	 * If record(s) were enqueued, start the state-change
2474	 * report timer for this group.
2475	 */
2476	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv);
2477	inm->inm_sctimer = 1;
2478	V_state_change_timers_running = 1;
2479
2480	return (0);
2481}
2482
2483/*
2484 * Perform the final leave for an IGMP group.
2485 *
2486 * When leaving a group:
2487 *  IGMPv1 does nothing.
2488 *  IGMPv2 sends a host leave message, if and only if we are the reporter.
2489 *  IGMPv3 enqueues a state-change report containing a transition
2490 *  to INCLUDE {} for immediate transmission.
2491 */
2492static void
2493igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi)
2494{
2495	INIT_VNET_INET(curvnet);
2496	int syncstates;
2497
2498	syncstates = 1;
2499
2500	CTR4(KTR_IGMPV3, "%s: final leave %s on ifp %p(%s)",
2501	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
2502	    inm->inm_ifp->if_xname);
2503
2504	IN_MULTI_LOCK_ASSERT();
2505	IGMP_LOCK_ASSERT();
2506
2507	switch (inm->inm_state) {
2508	case IGMP_NOT_MEMBER:
2509	case IGMP_SILENT_MEMBER:
2510	case IGMP_LEAVING_MEMBER:
2511		/* Already leaving or left; do nothing. */
2512		CTR1(KTR_IGMPV3,
2513"%s: not kicking state machine for silent group", __func__);
2514		break;
2515	case IGMP_REPORTING_MEMBER:
2516	case IGMP_IDLE_MEMBER:
2517	case IGMP_G_QUERY_PENDING_MEMBER:
2518	case IGMP_SG_QUERY_PENDING_MEMBER:
2519		if (igi->igi_version == IGMP_VERSION_2) {
2520#ifdef INVARIANTS
2521			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
2522			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
2523			panic("%s: IGMPv3 state reached, not IGMPv3 mode",
2524			     __func__);
2525#endif
2526			igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE);
2527			inm->inm_state = IGMP_NOT_MEMBER;
2528		} else if (igi->igi_version == IGMP_VERSION_3) {
2529			/*
2530			 * Stop group timer and all pending reports.
2531			 * Immediately enqueue a state-change report
2532			 * TO_IN {} to be sent on the next fast timeout,
2533			 * giving us an opportunity to merge reports.
2534			 */
2535			_IF_DRAIN(&inm->inm_scq);
2536			inm->inm_timer = 0;
2537			if (igi->igi_flags & IGIF_LOOPBACK) {
2538				inm->inm_scrv = 1;
2539			} else {
2540				inm->inm_scrv = igi->igi_rv;
2541			}
2542			CTR4(KTR_IGMPV3, "%s: Leaving %s/%s with %d "
2543			    "pending retransmissions.", __func__,
2544			    inet_ntoa(inm->inm_addr),
2545			    inm->inm_ifp->if_xname, inm->inm_scrv);
2546			if (inm->inm_scrv == 0) {
2547				inm->inm_state = IGMP_NOT_MEMBER;
2548				inm->inm_sctimer = 0;
2549			} else {
2550				int retval;
2551
2552				inm_acquire_locked(inm);
2553
2554				retval = igmp_v3_enqueue_group_record(
2555				    &inm->inm_scq, inm, 1, 0, 0);
2556				KASSERT(retval != 0,
2557				    ("%s: enqueue record = %d", __func__,
2558				     retval));
2559
2560				inm->inm_state = IGMP_LEAVING_MEMBER;
2561				inm->inm_sctimer = 1;
2562				V_state_change_timers_running = 1;
2563				syncstates = 0;
2564			}
2565			break;
2566		}
2567		break;
2568	case IGMP_LAZY_MEMBER:
2569	case IGMP_SLEEPING_MEMBER:
2570	case IGMP_AWAKENING_MEMBER:
2571		/* Our reports are suppressed; do nothing. */
2572		break;
2573	}
2574
2575	if (syncstates) {
2576		inm_commit(inm);
2577		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__,
2578		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
2579		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
2580		CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for %s/%s",
2581		    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname);
2582	}
2583}
2584
2585/*
2586 * Enqueue an IGMPv3 group record to the given output queue.
2587 *
2588 * XXX This function could do with having the allocation code
2589 * split out, and the multiple-tree-walks coalesced into a single
2590 * routine as has been done in igmp_v3_enqueue_filter_change().
2591 *
2592 * If is_state_change is zero, a current-state record is appended.
2593 * If is_state_change is non-zero, a state-change report is appended.
2594 *
2595 * If is_group_query is non-zero, an mbuf packet chain is allocated.
2596 * If is_group_query is zero, and if there is a packet with free space
2597 * at the tail of the queue, it will be appended to providing there
2598 * is enough free space.
2599 * Otherwise a new mbuf packet chain is allocated.
2600 *
2601 * If is_source_query is non-zero, each source is checked to see if
2602 * it was recorded for a Group-Source query, and will be omitted if
2603 * it is not both in-mode and recorded.
2604 *
2605 * The function will attempt to allocate leading space in the packet
2606 * for the IP/IGMP header to be prepended without fragmenting the chain.
2607 *
2608 * If successful the size of all data appended to the queue is returned,
2609 * otherwise an error code less than zero is returned, or zero if
2610 * no record(s) were appended.
2611 */
2612static int
2613igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
2614    const int is_state_change, const int is_group_query,
2615    const int is_source_query)
2616{
2617	struct igmp_grouprec	 ig;
2618	struct igmp_grouprec	*pig;
2619	struct ifnet		*ifp;
2620	struct ip_msource	*ims, *nims;
2621	struct mbuf		*m0, *m, *md;
2622	int			 error, is_filter_list_change;
2623	int			 minrec0len, m0srcs, msrcs, nbytes, off;
2624	int			 record_has_sources;
2625	int			 now;
2626	int			 type;
2627	in_addr_t		 naddr;
2628	uint8_t			 mode;
2629
2630	IN_MULTI_LOCK_ASSERT();
2631
2632	error = 0;
2633	ifp = inm->inm_ifp;
2634	is_filter_list_change = 0;
2635	m = NULL;
2636	m0 = NULL;
2637	m0srcs = 0;
2638	msrcs = 0;
2639	nbytes = 0;
2640	nims = NULL;
2641	record_has_sources = 1;
2642	pig = NULL;
2643	type = IGMP_DO_NOTHING;
2644	mode = inm->inm_st[1].iss_fmode;
2645
2646	/*
2647	 * If we did not transition out of ASM mode during t0->t1,
2648	 * and there are no source nodes to process, we can skip
2649	 * the generation of source records.
2650	 */
2651	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
2652	    inm->inm_nsrc == 0)
2653		record_has_sources = 0;
2654
2655	if (is_state_change) {
2656		/*
2657		 * Queue a state change record.
2658		 * If the mode did not change, and there are non-ASM
2659		 * listeners or source filters present,
2660		 * we potentially need to issue two records for the group.
2661		 * If we are transitioning to MCAST_UNDEFINED, we need
2662		 * not send any sources.
2663		 * If there are ASM listeners, and there was no filter
2664		 * mode transition of any kind, do nothing.
2665		 */
2666		if (mode != inm->inm_st[0].iss_fmode) {
2667			if (mode == MCAST_EXCLUDE) {
2668				CTR1(KTR_IGMPV3, "%s: change to EXCLUDE",
2669				    __func__);
2670				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
2671			} else {
2672				CTR1(KTR_IGMPV3, "%s: change to INCLUDE",
2673				    __func__);
2674				type = IGMP_CHANGE_TO_INCLUDE_MODE;
2675				if (mode == MCAST_UNDEFINED)
2676					record_has_sources = 0;
2677			}
2678		} else {
2679			if (record_has_sources) {
2680				is_filter_list_change = 1;
2681			} else {
2682				type = IGMP_DO_NOTHING;
2683			}
2684		}
2685	} else {
2686		/*
2687		 * Queue a current state record.
2688		 */
2689		if (mode == MCAST_EXCLUDE) {
2690			type = IGMP_MODE_IS_EXCLUDE;
2691		} else if (mode == MCAST_INCLUDE) {
2692			type = IGMP_MODE_IS_INCLUDE;
2693			KASSERT(inm->inm_st[1].iss_asm == 0,
2694			    ("%s: inm %p is INCLUDE but ASM count is %d",
2695			     __func__, inm, inm->inm_st[1].iss_asm));
2696		}
2697	}
2698
2699	/*
2700	 * Generate the filter list changes using a separate function.
2701	 */
2702	if (is_filter_list_change)
2703		return (igmp_v3_enqueue_filter_change(ifq, inm));
2704
2705	if (type == IGMP_DO_NOTHING) {
2706		CTR3(KTR_IGMPV3, "%s: nothing to do for %s/%s",
2707		    __func__, inet_ntoa(inm->inm_addr),
2708		    inm->inm_ifp->if_xname);
2709		return (0);
2710	}
2711
2712	/*
2713	 * If any sources are present, we must be able to fit at least
2714	 * one in the trailing space of the tail packet's mbuf,
2715	 * ideally more.
2716	 */
2717	minrec0len = sizeof(struct igmp_grouprec);
2718	if (record_has_sources)
2719		minrec0len += sizeof(in_addr_t);
2720
2721	CTR4(KTR_IGMPV3, "%s: queueing %s for %s/%s", __func__,
2722	    igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr),
2723	    inm->inm_ifp->if_xname);
2724
2725	/*
2726	 * Check if we have a packet in the tail of the queue for this
2727	 * group into which the first group record for this group will fit.
2728	 * Otherwise allocate a new packet.
2729	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
2730	 * Note: Group records for G/GSR query responses MUST be sent
2731	 * in their own packet.
2732	 */
2733	m0 = ifq->ifq_tail;
2734	if (!is_group_query &&
2735	    m0 != NULL &&
2736	    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
2737	    (m0->m_pkthdr.len + minrec0len) <
2738	     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
2739		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2740			    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
2741		m = m0;
2742		CTR1(KTR_IGMPV3, "%s: use existing packet", __func__);
2743	} else {
2744		if (_IF_QFULL(ifq)) {
2745			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
2746			return (-ENOMEM);
2747		}
2748		m = NULL;
2749		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
2750		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
2751		if (!is_state_change && !is_group_query) {
2752			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2753			if (m)
2754				m->m_data += IGMP_LEADINGSPACE;
2755		}
2756		if (m == NULL) {
2757			m = m_gethdr(M_DONTWAIT, MT_DATA);
2758			if (m)
2759				MH_ALIGN(m, IGMP_LEADINGSPACE);
2760		}
2761		if (m == NULL)
2762			return (-ENOMEM);
2763
2764		igmp_save_context(m, ifp);
2765
2766		CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__);
2767	}
2768
2769	/*
2770	 * Append group record.
2771	 * If we have sources, we don't know how many yet.
2772	 */
2773	ig.ig_type = type;
2774	ig.ig_datalen = 0;
2775	ig.ig_numsrc = 0;
2776	ig.ig_group = inm->inm_addr;
2777	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
2778		if (m != m0)
2779			m_freem(m);
2780		CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
2781		return (-ENOMEM);
2782	}
2783	nbytes += sizeof(struct igmp_grouprec);
2784
2785	/*
2786	 * Append as many sources as will fit in the first packet.
2787	 * If we are appending to a new packet, the chain allocation
2788	 * may potentially use clusters; use m_getptr() in this case.
2789	 * If we are appending to an existing packet, we need to obtain
2790	 * a pointer to the group record after m_append(), in case a new
2791	 * mbuf was allocated.
2792	 * Only append sources which are in-mode at t1. If we are
2793	 * transitioning to MCAST_UNDEFINED state on the group, do not
2794	 * include source entries.
2795	 * Only report recorded sources in our filter set when responding
2796	 * to a group-source query.
2797	 */
2798	if (record_has_sources) {
2799		if (m == m0) {
2800			md = m_last(m);
2801			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
2802			    md->m_len - nbytes);
2803		} else {
2804			md = m_getptr(m, 0, &off);
2805			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
2806			    off);
2807		}
2808		msrcs = 0;
2809		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
2810			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
2811			    inet_ntoa_haddr(ims->ims_haddr));
2812			now = ims_get_mode(inm, ims, 1);
2813			CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now);
2814			if ((now != mode) ||
2815			    (now == mode && mode == MCAST_UNDEFINED)) {
2816				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
2817				continue;
2818			}
2819			if (is_source_query && ims->ims_stp == 0) {
2820				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
2821				    __func__);
2822				continue;
2823			}
2824			CTR1(KTR_IGMPV3, "%s: append node", __func__);
2825			naddr = htonl(ims->ims_haddr);
2826			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
2827				if (m != m0)
2828					m_freem(m);
2829				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
2830				    __func__);
2831				return (-ENOMEM);
2832			}
2833			nbytes += sizeof(in_addr_t);
2834			++msrcs;
2835			if (msrcs == m0srcs)
2836				break;
2837		}
2838		CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__,
2839		    msrcs);
2840		pig->ig_numsrc = htons(msrcs);
2841		nbytes += (msrcs * sizeof(in_addr_t));
2842	}
2843
2844	if (is_source_query && msrcs == 0) {
2845		CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__);
2846		if (m != m0)
2847			m_freem(m);
2848		return (0);
2849	}
2850
2851	/*
2852	 * We are good to go with first packet.
2853	 */
2854	if (m != m0) {
2855		CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__);
2856		m->m_pkthdr.PH_vt.vt_nrecs = 1;
2857		_IF_ENQUEUE(ifq, m);
2858	} else
2859		m->m_pkthdr.PH_vt.vt_nrecs++;
2860
2861	/*
2862	 * No further work needed if no source list in packet(s).
2863	 */
2864	if (!record_has_sources)
2865		return (nbytes);
2866
2867	/*
2868	 * Whilst sources remain to be announced, we need to allocate
2869	 * a new packet and fill out as many sources as will fit.
2870	 * Always try for a cluster first.
2871	 */
2872	while (nims != NULL) {
2873		if (_IF_QFULL(ifq)) {
2874			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
2875			return (-ENOMEM);
2876		}
2877		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2878		if (m)
2879			m->m_data += IGMP_LEADINGSPACE;
2880		if (m == NULL) {
2881			m = m_gethdr(M_DONTWAIT, MT_DATA);
2882			if (m)
2883				MH_ALIGN(m, IGMP_LEADINGSPACE);
2884		}
2885		if (m == NULL)
2886			return (-ENOMEM);
2887		igmp_save_context(m, ifp);
2888		md = m_getptr(m, 0, &off);
2889		pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off);
2890		CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__);
2891
2892		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
2893			if (m != m0)
2894				m_freem(m);
2895			CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
2896			return (-ENOMEM);
2897		}
2898		m->m_pkthdr.PH_vt.vt_nrecs = 1;
2899		nbytes += sizeof(struct igmp_grouprec);
2900
2901		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
2902		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
2903
2904		msrcs = 0;
2905		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
2906			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
2907			    inet_ntoa_haddr(ims->ims_haddr));
2908			now = ims_get_mode(inm, ims, 1);
2909			if ((now != mode) ||
2910			    (now == mode && mode == MCAST_UNDEFINED)) {
2911				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
2912				continue;
2913			}
2914			if (is_source_query && ims->ims_stp == 0) {
2915				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
2916				    __func__);
2917				continue;
2918			}
2919			CTR1(KTR_IGMPV3, "%s: append node", __func__);
2920			naddr = htonl(ims->ims_haddr);
2921			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
2922				if (m != m0)
2923					m_freem(m);
2924				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
2925				    __func__);
2926				return (-ENOMEM);
2927			}
2928			++msrcs;
2929			if (msrcs == m0srcs)
2930				break;
2931		}
2932		pig->ig_numsrc = htons(msrcs);
2933		nbytes += (msrcs * sizeof(in_addr_t));
2934
2935		CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__);
2936		_IF_ENQUEUE(ifq, m);
2937	}
2938
2939	return (nbytes);
2940}
2941
2942/*
2943 * Type used to mark record pass completion.
2944 * We exploit the fact we can cast to this easily from the
2945 * current filter modes on each ip_msource node.
2946 */
2947typedef enum {
2948	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
2949	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
2950	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
2951	REC_FULL = REC_ALLOW | REC_BLOCK
2952} rectype_t;
2953
2954/*
2955 * Enqueue an IGMPv3 filter list change to the given output queue.
2956 *
2957 * Source list filter state is held in an RB-tree. When the filter list
2958 * for a group is changed without changing its mode, we need to compute
2959 * the deltas between T0 and T1 for each source in the filter set,
2960 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
2961 *
2962 * As we may potentially queue two record types, and the entire R-B tree
2963 * needs to be walked at once, we break this out into its own function
2964 * so we can generate a tightly packed queue of packets.
2965 *
2966 * XXX This could be written to only use one tree walk, although that makes
2967 * serializing into the mbuf chains a bit harder. For now we do two walks
2968 * which makes things easier on us, and it may or may not be harder on
2969 * the L2 cache.
2970 *
2971 * If successful the size of all data appended to the queue is returned,
2972 * otherwise an error code less than zero is returned, or zero if
2973 * no record(s) were appended.
2974 */
2975static int
2976igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
2977{
2978	static const int MINRECLEN =
2979	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
2980	struct ifnet		*ifp;
2981	struct igmp_grouprec	 ig;
2982	struct igmp_grouprec	*pig;
2983	struct ip_msource	*ims, *nims;
2984	struct mbuf		*m, *m0, *md;
2985	in_addr_t		 naddr;
2986	int			 m0srcs, nbytes, off, rsrcs, schanged;
2987	int			 nallow, nblock;
2988	uint8_t			 mode, now, then;
2989	rectype_t		 crt, drt, nrt;
2990
2991	IN_MULTI_LOCK_ASSERT();
2992
2993	if (inm->inm_nsrc == 0 ||
2994	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0))
2995		return (0);
2996
2997	ifp = inm->inm_ifp;			/* interface */
2998	mode = inm->inm_st[1].iss_fmode;	/* filter mode at t1 */
2999	crt = REC_NONE;	/* current group record type */
3000	drt = REC_NONE;	/* mask of completed group record types */
3001	nrt = REC_NONE;	/* record type for current node */
3002	m0srcs = 0;	/* # source which will fit in current mbuf chain */
3003	nbytes = 0;	/* # of bytes appended to group's state-change queue */
3004	rsrcs = 0;	/* # sources encoded in current record */
3005	schanged = 0;	/* # nodes encoded in overall filter change */
3006	nallow = 0;	/* # of source entries in ALLOW_NEW */
3007	nblock = 0;	/* # of source entries in BLOCK_OLD */
3008	nims = NULL;	/* next tree node pointer */
3009
3010	/*
3011	 * For each possible filter record mode.
3012	 * The first kind of source we encounter tells us which
3013	 * is the first kind of record we start appending.
3014	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3015	 * as the inverse of the group's filter mode.
3016	 */
3017	while (drt != REC_FULL) {
3018		do {
3019			m0 = ifq->ifq_tail;
3020			if (m0 != NULL &&
3021			    (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <=
3022			     IGMP_V3_REPORT_MAXRECS) &&
3023			    (m0->m_pkthdr.len + MINRECLEN) <
3024			     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3025				m = m0;
3026				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3027					    sizeof(struct igmp_grouprec)) /
3028				    sizeof(in_addr_t);
3029				CTR1(KTR_IGMPV3,
3030				    "%s: use previous packet", __func__);
3031			} else {
3032				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3033				if (m)
3034					m->m_data += IGMP_LEADINGSPACE;
3035				if (m == NULL) {
3036					m = m_gethdr(M_DONTWAIT, MT_DATA);
3037					if (m)
3038						MH_ALIGN(m, IGMP_LEADINGSPACE);
3039				}
3040				if (m == NULL) {
3041					CTR1(KTR_IGMPV3,
3042					    "%s: m_get*() failed", __func__);
3043					return (-ENOMEM);
3044				}
3045				m->m_pkthdr.PH_vt.vt_nrecs = 0;
3046				igmp_save_context(m, ifp);
3047				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3048				    sizeof(struct igmp_grouprec)) /
3049				    sizeof(in_addr_t);
3050				CTR1(KTR_IGMPV3,
3051				    "%s: allocated new packet", __func__);
3052			}
3053			/*
3054			 * Append the IGMP group record header to the
3055			 * current packet's data area.
3056			 * Recalculate pointer to free space for next
3057			 * group record, in case m_append() allocated
3058			 * a new mbuf or cluster.
3059			 */
3060			memset(&ig, 0, sizeof(ig));
3061			ig.ig_group = inm->inm_addr;
3062			if (!m_append(m, sizeof(ig), (void *)&ig)) {
3063				if (m != m0)
3064					m_freem(m);
3065				CTR1(KTR_IGMPV3,
3066				    "%s: m_append() failed", __func__);
3067				return (-ENOMEM);
3068			}
3069			nbytes += sizeof(struct igmp_grouprec);
3070			if (m == m0) {
3071				md = m_last(m);
3072				pig = (struct igmp_grouprec *)(mtod(md,
3073				    uint8_t *) + md->m_len - nbytes);
3074			} else {
3075				md = m_getptr(m, 0, &off);
3076				pig = (struct igmp_grouprec *)(mtod(md,
3077				    uint8_t *) + off);
3078			}
3079			/*
3080			 * Begin walking the tree for this record type
3081			 * pass, or continue from where we left off
3082			 * previously if we had to allocate a new packet.
3083			 * Only report deltas in-mode at t1.
3084			 * We need not report included sources as allowed
3085			 * if we are in inclusive mode on the group,
3086			 * however the converse is not true.
3087			 */
3088			rsrcs = 0;
3089			if (nims == NULL)
3090				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3091			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3092				CTR2(KTR_IGMPV3, "%s: visit node %s",
3093				    __func__, inet_ntoa_haddr(ims->ims_haddr));
3094				now = ims_get_mode(inm, ims, 1);
3095				then = ims_get_mode(inm, ims, 0);
3096				CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d",
3097				    __func__, then, now);
3098				if (now == then) {
3099					CTR1(KTR_IGMPV3,
3100					    "%s: skip unchanged", __func__);
3101					continue;
3102				}
3103				if (mode == MCAST_EXCLUDE &&
3104				    now == MCAST_INCLUDE) {
3105					CTR1(KTR_IGMPV3,
3106					    "%s: skip IN src on EX group",
3107					    __func__);
3108					continue;
3109				}
3110				nrt = (rectype_t)now;
3111				if (nrt == REC_NONE)
3112					nrt = (rectype_t)(~mode & REC_FULL);
3113				if (schanged++ == 0) {
3114					crt = nrt;
3115				} else if (crt != nrt)
3116					continue;
3117				naddr = htonl(ims->ims_haddr);
3118				if (!m_append(m, sizeof(in_addr_t),
3119				    (void *)&naddr)) {
3120					if (m != m0)
3121						m_freem(m);
3122					CTR1(KTR_IGMPV3,
3123					    "%s: m_append() failed", __func__);
3124					return (-ENOMEM);
3125				}
3126				nallow += !!(crt == REC_ALLOW);
3127				nblock += !!(crt == REC_BLOCK);
3128				if (++rsrcs == m0srcs)
3129					break;
3130			}
3131			/*
3132			 * If we did not append any tree nodes on this
3133			 * pass, back out of allocations.
3134			 */
3135			if (rsrcs == 0) {
3136				nbytes -= sizeof(struct igmp_grouprec);
3137				if (m != m0) {
3138					CTR1(KTR_IGMPV3,
3139					    "%s: m_free(m)", __func__);
3140					m_freem(m);
3141				} else {
3142					CTR1(KTR_IGMPV3,
3143					    "%s: m_adj(m, -ig)", __func__);
3144					m_adj(m, -((int)sizeof(
3145					    struct igmp_grouprec)));
3146				}
3147				continue;
3148			}
3149			nbytes += (rsrcs * sizeof(in_addr_t));
3150			if (crt == REC_ALLOW)
3151				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3152			else if (crt == REC_BLOCK)
3153				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3154			pig->ig_numsrc = htons(rsrcs);
3155			/*
3156			 * Count the new group record, and enqueue this
3157			 * packet if it wasn't already queued.
3158			 */
3159			m->m_pkthdr.PH_vt.vt_nrecs++;
3160			if (m != m0)
3161				_IF_ENQUEUE(ifq, m);
3162		} while (nims != NULL);
3163		drt |= crt;
3164		crt = (~crt & REC_FULL);
3165	}
3166
3167	CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__,
3168	    nallow, nblock);
3169
3170	return (nbytes);
3171}
3172
3173static int
3174igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3175{
3176	struct ifqueue	*gq;
3177	struct mbuf	*m;		/* pending state-change */
3178	struct mbuf	*m0;		/* copy of pending state-change */
3179	struct mbuf	*mt;		/* last state-change in packet */
3180	int		 docopy, domerge;
3181	u_int		 recslen;
3182
3183	docopy = 0;
3184	domerge = 0;
3185	recslen = 0;
3186
3187	IN_MULTI_LOCK_ASSERT();
3188	IGMP_LOCK_ASSERT();
3189
3190	/*
3191	 * If there are further pending retransmissions, make a writable
3192	 * copy of each queued state-change message before merging.
3193	 */
3194	if (inm->inm_scrv > 0)
3195		docopy = 1;
3196
3197	gq = &inm->inm_scq;
3198#ifdef KTR
3199	if (gq->ifq_head == NULL) {
3200		CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty",
3201		    __func__, inm);
3202	}
3203#endif
3204
3205	m = gq->ifq_head;
3206	while (m != NULL) {
3207		/*
3208		 * Only merge the report into the current packet if
3209		 * there is sufficient space to do so; an IGMPv3 report
3210		 * packet may only contain 65,535 group records.
3211		 * Always use a simple mbuf chain concatentation to do this,
3212		 * as large state changes for single groups may have
3213		 * allocated clusters.
3214		 */
3215		domerge = 0;
3216		mt = ifscq->ifq_tail;
3217		if (mt != NULL) {
3218			recslen = m_length(m, NULL);
3219
3220			if ((mt->m_pkthdr.PH_vt.vt_nrecs +
3221			    m->m_pkthdr.PH_vt.vt_nrecs <=
3222			    IGMP_V3_REPORT_MAXRECS) &&
3223			    (mt->m_pkthdr.len + recslen <=
3224			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE)))
3225				domerge = 1;
3226		}
3227
3228		if (!domerge && _IF_QFULL(gq)) {
3229			CTR2(KTR_IGMPV3,
3230			    "%s: outbound queue full, skipping whole packet %p",
3231			    __func__, m);
3232			mt = m->m_nextpkt;
3233			if (!docopy)
3234				m_freem(m);
3235			m = mt;
3236			continue;
3237		}
3238
3239		if (!docopy) {
3240			CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m);
3241			_IF_DEQUEUE(gq, m0);
3242			m = m0->m_nextpkt;
3243		} else {
3244			CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m);
3245			m0 = m_dup(m, M_NOWAIT);
3246			if (m0 == NULL)
3247				return (ENOMEM);
3248			m0->m_nextpkt = NULL;
3249			m = m->m_nextpkt;
3250		}
3251
3252		if (!domerge) {
3253			CTR3(KTR_IGMPV3, "%s: queueing %p to ifscq %p)",
3254			    __func__, m0, ifscq);
3255			_IF_ENQUEUE(ifscq, m0);
3256		} else {
3257			struct mbuf *mtl;	/* last mbuf of packet mt */
3258
3259			CTR3(KTR_IGMPV3, "%s: merging %p with ifscq tail %p)",
3260			    __func__, m0, mt);
3261
3262			mtl = m_last(mt);
3263			m0->m_flags &= ~M_PKTHDR;
3264			mt->m_pkthdr.len += recslen;
3265			mt->m_pkthdr.PH_vt.vt_nrecs +=
3266			    m0->m_pkthdr.PH_vt.vt_nrecs;
3267
3268			mtl->m_next = m0;
3269		}
3270	}
3271
3272	return (0);
3273}
3274
3275/*
3276 * Respond to a pending IGMPv3 General Query.
3277 */
3278static void
3279igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3280{
3281	INIT_VNET_INET(curvnet);
3282	struct ifmultiaddr	*ifma, *tifma;
3283	struct ifnet		*ifp;
3284	struct in_multi		*inm;
3285	int			 retval, loop;
3286
3287	IN_MULTI_LOCK_ASSERT();
3288	IGMP_LOCK_ASSERT();
3289
3290	KASSERT(igi->igi_version == IGMP_VERSION_3,
3291	    ("%s: called when version %d", __func__, igi->igi_version));
3292
3293	ifp = igi->igi_ifp;
3294
3295	IF_ADDR_LOCK(ifp);
3296	TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, tifma) {
3297		if (ifma->ifma_addr->sa_family != AF_INET ||
3298		    ifma->ifma_protospec == NULL)
3299			continue;
3300
3301		inm = (struct in_multi *)ifma->ifma_protospec;
3302		KASSERT(ifp == inm->inm_ifp,
3303		    ("%s: inconsistent ifp", __func__));
3304
3305		switch (inm->inm_state) {
3306		case IGMP_NOT_MEMBER:
3307		case IGMP_SILENT_MEMBER:
3308			break;
3309		case IGMP_REPORTING_MEMBER:
3310		case IGMP_IDLE_MEMBER:
3311		case IGMP_LAZY_MEMBER:
3312		case IGMP_SLEEPING_MEMBER:
3313		case IGMP_AWAKENING_MEMBER:
3314			inm->inm_state = IGMP_REPORTING_MEMBER;
3315			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3316			    inm, 0, 0, 0);
3317			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
3318			    __func__, retval);
3319			break;
3320		case IGMP_G_QUERY_PENDING_MEMBER:
3321		case IGMP_SG_QUERY_PENDING_MEMBER:
3322		case IGMP_LEAVING_MEMBER:
3323			break;
3324		}
3325	}
3326	IF_ADDR_UNLOCK(ifp);
3327
3328	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3329	igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
3330
3331	/*
3332	 * Slew transmission of bursts over 500ms intervals.
3333	 */
3334	if (igi->igi_gq.ifq_head != NULL) {
3335		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
3336		    IGMP_RESPONSE_BURST_INTERVAL);
3337		V_interface_timers_running = 1;
3338	}
3339}
3340
3341/*
3342 * Transmit the next pending IGMP message in the output queue.
3343 *
3344 * We get called from netisr_processqueue(). A mutex private to igmpoq
3345 * will be acquired and released around this routine.
3346 *
3347 * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis.
3348 * MRT: Nothing needs to be done, as IGMP traffic is always local to
3349 * a link and uses a link-scope multicast address.
3350 */
3351static void
3352igmp_intr(struct mbuf *m)
3353{
3354	struct ip_moptions	 imo;
3355	struct ifnet		*ifp;
3356	struct mbuf		*ipopts, *m0;
3357	int			 error;
3358	uint32_t		 ifindex;
3359
3360	CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m);
3361
3362	/*
3363	 * Set VNET image pointer from enqueued mbuf chain
3364	 * before doing anything else. Whilst we use interface
3365	 * indexes to guard against interface detach, they are
3366	 * unique to each VIMAGE and must be retrieved.
3367	 */
3368	CURVNET_SET(m->m_pkthdr.header);
3369	INIT_VNET_NET(curvnet);
3370	INIT_VNET_INET(curvnet);
3371	ifindex = igmp_restore_context(m);
3372
3373	/*
3374	 * Check if the ifnet still exists. This limits the scope of
3375	 * any race in the absence of a global ifp lock for low cost
3376	 * (an array lookup).
3377	 */
3378	ifp = ifnet_byindex(ifindex);
3379	if (ifp == NULL) {
3380		CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.",
3381		    __func__, m, ifindex);
3382		m_freem(m);
3383		IPSTAT_INC(ips_noroute);
3384		goto out;
3385	}
3386
3387	ipopts = V_igmp_sendra ? m_raopt : NULL;
3388
3389	imo.imo_multicast_ttl  = 1;
3390	imo.imo_multicast_vif  = -1;
3391	imo.imo_multicast_loop = (V_ip_mrouter != NULL);
3392
3393	/*
3394	 * If the user requested that IGMP traffic be explicitly
3395	 * redirected to the loopback interface (e.g. they are running a
3396	 * MANET interface and the routing protocol needs to see the
3397	 * updates), handle this now.
3398	 */
3399	if (m->m_flags & M_IGMP_LOOP)
3400		imo.imo_multicast_ifp = V_loif;
3401	else
3402		imo.imo_multicast_ifp = ifp;
3403
3404	if (m->m_flags & M_IGMPV2) {
3405		m0 = m;
3406	} else {
3407		m0 = igmp_v3_encap_report(ifp, m);
3408		if (m0 == NULL) {
3409			CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m);
3410			m_freem(m);
3411			IPSTAT_INC(ips_odropped);
3412			goto out;
3413		}
3414	}
3415
3416	igmp_scrub_context(m0);
3417	m->m_flags &= ~(M_PROTOFLAGS);
3418	m0->m_pkthdr.rcvif = V_loif;
3419#ifdef MAC
3420	mac_netinet_igmp_send(ifp, m0);
3421#endif
3422	error = ip_output(m0, ipopts, NULL, 0, &imo, NULL);
3423	if (error) {
3424		CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error);
3425		goto out;
3426	}
3427
3428	IGMPSTAT_INC(igps_snd_reports);
3429
3430out:
3431	/*
3432	 * We must restore the existing vnet pointer before
3433	 * continuing as we are run from netisr context.
3434	 */
3435	CURVNET_RESTORE();
3436}
3437
3438/*
3439 * Encapsulate an IGMPv3 report.
3440 *
3441 * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
3442 * chain has already had its IP/IGMPv3 header prepended. In this case
3443 * the function will not attempt to prepend; the lengths and checksums
3444 * will however be re-computed.
3445 *
3446 * Returns a pointer to the new mbuf chain head, or NULL if the
3447 * allocation failed.
3448 */
3449static struct mbuf *
3450igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
3451{
3452	INIT_VNET_INET(curvnet);
3453	struct igmp_report	*igmp;
3454	struct ip		*ip;
3455	int			 hdrlen, igmpreclen;
3456
3457	KASSERT((m->m_flags & M_PKTHDR),
3458	    ("%s: mbuf chain %p is !M_PKTHDR", __func__, m));
3459
3460	igmpreclen = m_length(m, NULL);
3461	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
3462
3463	if (m->m_flags & M_IGMPV3_HDR) {
3464		igmpreclen -= hdrlen;
3465	} else {
3466		M_PREPEND(m, hdrlen, M_DONTWAIT);
3467		if (m == NULL)
3468			return (NULL);
3469		m->m_flags |= M_IGMPV3_HDR;
3470	}
3471
3472	CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen);
3473
3474	m->m_data += sizeof(struct ip);
3475	m->m_len -= sizeof(struct ip);
3476
3477	igmp = mtod(m, struct igmp_report *);
3478	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
3479	igmp->ir_rsv1 = 0;
3480	igmp->ir_rsv2 = 0;
3481	igmp->ir_numgrps = htons(m->m_pkthdr.PH_vt.vt_nrecs);
3482	igmp->ir_cksum = 0;
3483	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
3484	m->m_pkthdr.PH_vt.vt_nrecs = 0;
3485
3486	m->m_data -= sizeof(struct ip);
3487	m->m_len += sizeof(struct ip);
3488
3489	ip = mtod(m, struct ip *);
3490	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
3491	ip->ip_len = hdrlen + igmpreclen;
3492	ip->ip_off = IP_DF;
3493	ip->ip_p = IPPROTO_IGMP;
3494	ip->ip_sum = 0;
3495
3496	ip->ip_src.s_addr = INADDR_ANY;
3497
3498	if (m->m_flags & M_IGMP_LOOP) {
3499		struct in_ifaddr *ia;
3500
3501		IFP_TO_IA(ifp, ia);
3502		if (ia != NULL)
3503			ip->ip_src = ia->ia_addr.sin_addr;
3504	}
3505
3506	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
3507
3508	return (m);
3509}
3510
3511#ifdef KTR
3512static char *
3513igmp_rec_type_to_str(const int type)
3514{
3515
3516	switch (type) {
3517		case IGMP_CHANGE_TO_EXCLUDE_MODE:
3518			return "TO_EX";
3519			break;
3520		case IGMP_CHANGE_TO_INCLUDE_MODE:
3521			return "TO_IN";
3522			break;
3523		case IGMP_MODE_IS_EXCLUDE:
3524			return "MODE_EX";
3525			break;
3526		case IGMP_MODE_IS_INCLUDE:
3527			return "MODE_IN";
3528			break;
3529		case IGMP_ALLOW_NEW_SOURCES:
3530			return "ALLOW_NEW";
3531			break;
3532		case IGMP_BLOCK_OLD_SOURCES:
3533			return "BLOCK_OLD";
3534			break;
3535		default:
3536			break;
3537	}
3538	return "unknown";
3539}
3540#endif
3541
3542static void
3543igmp_sysinit(void)
3544{
3545
3546	CTR1(KTR_IGMPV3, "%s: initializing", __func__);
3547
3548	IGMP_LOCK_INIT();
3549
3550	mtx_init(&igmpoq.ifq_mtx, "igmpoq_mtx", NULL, MTX_DEF);
3551	IFQ_SET_MAXLEN(&igmpoq, IFQ_MAXLEN);
3552
3553	m_raopt = igmp_ra_alloc();
3554
3555	netisr_register(NETISR_IGMP, igmp_intr, &igmpoq, 0);
3556}
3557
3558static void
3559igmp_sysuninit(void)
3560{
3561
3562	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
3563
3564	netisr_unregister(NETISR_IGMP);
3565	mtx_destroy(&igmpoq.ifq_mtx);
3566
3567	m_free(m_raopt);
3568	m_raopt = NULL;
3569
3570	IGMP_LOCK_DESTROY();
3571}
3572
3573/*
3574 * Initialize an IGMPv3 instance.
3575 * VIMAGE: Assumes curvnet set by caller and called per vimage.
3576 */
3577static int
3578vnet_igmp_iattach(const void *unused __unused)
3579{
3580	INIT_VNET_INET(curvnet);
3581
3582	CTR1(KTR_IGMPV3, "%s: initializing", __func__);
3583
3584	LIST_INIT(&V_igi_head);
3585
3586	V_current_state_timers_running = 0;
3587	V_state_change_timers_running = 0;
3588	V_interface_timers_running = 0;
3589
3590	/*
3591	 * Initialize sysctls to default values.
3592	 */
3593	V_igmp_recvifkludge = 1;
3594	V_igmp_sendra = 1;
3595	V_igmp_sendlocal = 1;
3596	V_igmp_v1enable = 1;
3597	V_igmp_v2enable = 1;
3598	V_igmp_legacysupp = 0;
3599	V_igmp_default_version = IGMP_VERSION_3;
3600	V_igmp_gsrdelay.tv_sec = 10;
3601	V_igmp_gsrdelay.tv_usec = 0;
3602
3603	memset(&V_igmpstat, 0, sizeof(struct igmpstat));
3604	V_igmpstat.igps_version = IGPS_VERSION_3;
3605	V_igmpstat.igps_len = sizeof(struct igmpstat);
3606
3607	return (0);
3608}
3609
3610static int
3611vnet_igmp_idetach(const void *unused __unused)
3612{
3613	INIT_VNET_INET(curvnet);
3614
3615	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
3616
3617	KASSERT(LIST_EMPTY(&V_igi_head),
3618	    ("%s: igi list not empty; ifnets not detached?", __func__));
3619
3620	return (0);
3621}
3622
3623#ifndef VIMAGE_GLOBALS
3624static vnet_modinfo_t vnet_igmp_modinfo = {
3625	.vmi_id		= VNET_MOD_IGMP,
3626	.vmi_name	= "igmp",
3627	.vmi_dependson	= VNET_MOD_INET,
3628	.vmi_iattach	= vnet_igmp_iattach,
3629	.vmi_idetach	= vnet_igmp_idetach
3630};
3631#endif
3632
3633static int
3634igmp_modevent(module_t mod, int type, void *unused __unused)
3635{
3636
3637    switch (type) {
3638    case MOD_LOAD:
3639	igmp_sysinit();
3640#ifndef VIMAGE_GLOBALS
3641	vnet_mod_register(&vnet_igmp_modinfo);
3642#else
3643	vnet_igmp_iattach(NULL);
3644#endif
3645	break;
3646    case MOD_UNLOAD:
3647#ifndef VIMAGE_GLOBALS
3648#ifdef NOTYET
3649	vnet_mod_deregister(&vnet_igmp_modinfo);
3650#endif
3651#else
3652	vnet_igmp_idetach(NULL);
3653#endif
3654	igmp_sysuninit();
3655	break;
3656    default:
3657	return (EOPNOTSUPP);
3658    }
3659    return (0);
3660}
3661
3662static moduledata_t igmp_mod = {
3663    "igmp",
3664    igmp_modevent,
3665    0
3666};
3667DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3668