1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/*- 29 * Copyright (c) 2007-2009 Bruce Simpson. 30 * Copyright (c) 1988 Stephen Deering. 31 * Copyright (c) 1992, 1993 32 * The Regents of the University of California. All rights reserved. 33 * 34 * This code is derived from software contributed to Berkeley by 35 * Stephen Deering of Stanford University. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)igmp.c 8.1 (Berkeley) 7/19/93 66 */ 67/* 68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 69 * support for mandatory and extensible security protections. This notice 70 * is included in support of clause 2.2 (b) of the Apple Public License, 71 * Version 2.0. 72 */ 73 74/* 75 * Internet Group Management Protocol (IGMP) routines. 76 * [RFC1112, RFC2236, RFC3376] 77 * 78 * Written by Steve Deering, Stanford, May 1988. 79 * Modified by Rosen Sharma, Stanford, Aug 1994. 80 * Modified by Bill Fenner, Xerox PARC, Feb 1995. 81 * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995. 82 * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson. 83 * 84 * MULTICAST Revision: 3.5.1.4 85 */ 86 87#include <sys/cdefs.h> 88 89#include <sys/param.h> 90#include <sys/systm.h> 91#include <sys/malloc.h> 92#include <sys/mbuf.h> 93#include <sys/socket.h> 94#include <sys/protosw.h> 95#include <sys/kernel.h> 96#include <sys/sysctl.h> 97#include <sys/mcache.h> 98 99#include <libkern/libkern.h> 100#include <kern/zalloc.h> 101 102#include <net/if.h> 103#include <net/route.h> 104 105#include <netinet/in.h> 106#include <netinet/in_var.h> 107#include <netinet/in_systm.h> 108#include <netinet/ip.h> 109#include <netinet/ip_var.h> 110#include <netinet/igmp.h> 111#include <netinet/igmp_var.h> 112#include <netinet/kpi_ipfilter_var.h> 113 114SLIST_HEAD(igmp_inm_relhead, in_multi); 115 116static void igi_initvar(struct igmp_ifinfo *, struct ifnet *, int); 117static struct igmp_ifinfo *igi_alloc(int); 118static void igi_free(struct igmp_ifinfo *); 119static void igi_delete(const struct ifnet *, struct igmp_inm_relhead *); 120static void igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *, 121 int, const int); 122static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *, 123 struct igmp_tparams *); 124static int igmp_handle_state_change(struct in_multi *, 125 struct igmp_ifinfo *, struct igmp_tparams *); 126static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *, 127 struct igmp_tparams *); 128static int igmp_input_v1_query(struct ifnet *, const struct ip *, 129 const struct igmp *); 130static int igmp_input_v2_query(struct ifnet *, const struct ip *, 131 const struct igmp *); 132static int igmp_input_v3_query(struct ifnet *, const struct ip *, 133 /*const*/ struct igmpv3 *); 134static int igmp_input_v3_group_query(struct in_multi *, 135 int, /*const*/ struct igmpv3 *); 136static int igmp_input_v1_report(struct ifnet *, struct mbuf *, 137 /*const*/ struct ip *, /*const*/ struct igmp *); 138static int igmp_input_v2_report(struct ifnet *, struct mbuf *, 139 /*const*/ struct ip *, /*const*/ struct igmp *); 140static void igmp_sendpkt(struct mbuf *); 141static __inline__ int igmp_isgroupreported(const struct in_addr); 142static struct mbuf *igmp_ra_alloc(void); 143#ifdef IGMP_DEBUG 144static const char *igmp_rec_type_to_str(const int); 145#endif 146static uint32_t igmp_set_version(struct igmp_ifinfo *, const int); 147static void igmp_flush_relq(struct igmp_ifinfo *, 148 struct igmp_inm_relhead *); 149static int igmp_v1v2_queue_report(struct in_multi *, const int); 150static void igmp_v1v2_process_group_timer(struct in_multi *, const int); 151static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *); 152static uint32_t igmp_v2_update_group(struct in_multi *, const int); 153static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *); 154static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *); 155static struct mbuf * 156 igmp_v3_encap_report(struct ifnet *, struct mbuf *); 157static int igmp_v3_enqueue_group_record(struct ifqueue *, 158 struct in_multi *, const int, const int, const int); 159static int igmp_v3_enqueue_filter_change(struct ifqueue *, 160 struct in_multi *); 161static void igmp_v3_process_group_timers(struct igmp_ifinfo *, 162 struct ifqueue *, struct ifqueue *, struct in_multi *, 163 const int); 164static int igmp_v3_merge_state_changes(struct in_multi *, 165 struct ifqueue *); 166static void igmp_v3_suppress_group_record(struct in_multi *); 167static int sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS; 168static int sysctl_igmp_gsr SYSCTL_HANDLER_ARGS; 169static int sysctl_igmp_default_version SYSCTL_HANDLER_ARGS; 170 171static int igmp_timeout_run; /* IGMP timer is scheduled to run */ 172static void igmp_timeout(void *); 173static void igmp_sched_timeout(void); 174 175static struct mbuf *m_raopt; /* Router Alert option */ 176 177static int querier_present_timers_running; /* IGMPv1/v2 older version 178 * querier present */ 179static int interface_timers_running; /* IGMPv3 general 180 * query response */ 181static int state_change_timers_running; /* IGMPv3 state-change 182 * retransmit */ 183static int current_state_timers_running; /* IGMPv1/v2 host 184 * report; IGMPv3 g/sg 185 * query response */ 186 187/* 188 * Subsystem lock macros. 189 */ 190#define IGMP_LOCK() \ 191 lck_mtx_lock(&igmp_mtx) 192#define IGMP_LOCK_ASSERT_HELD() \ 193 lck_mtx_assert(&igmp_mtx, LCK_MTX_ASSERT_OWNED) 194#define IGMP_LOCK_ASSERT_NOTHELD() \ 195 lck_mtx_assert(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED) 196#define IGMP_UNLOCK() \ 197 lck_mtx_unlock(&igmp_mtx) 198 199static LIST_HEAD(, igmp_ifinfo) igi_head; 200static struct igmpstat_v3 igmpstat_v3 = { 201 .igps_version = IGPS_VERSION_3, 202 .igps_len = sizeof(struct igmpstat_v3), 203}; 204static struct igmpstat igmpstat; /* old IGMPv2 stats structure */ 205static struct timeval igmp_gsrdelay = {10, 0}; 206 207static int igmp_recvifkludge = 1; 208static int igmp_sendra = 1; 209static int igmp_sendlocal = 1; 210static int igmp_v1enable = 1; 211static int igmp_v2enable = 1; 212static int igmp_legacysupp = 0; 213static int igmp_default_version = IGMP_VERSION_3; 214 215SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, 216 &igmpstat, igmpstat, ""); 217SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats, 218 CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, ""); 219SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED, 220 &igmp_recvifkludge, 0, 221 "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address"); 222SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED, 223 &igmp_sendra, 0, 224 "Send IP Router Alert option in IGMPv2/v3 messages"); 225SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED, 226 &igmp_sendlocal, 0, 227 "Send IGMP membership reports for 224.0.0.0/24 groups"); 228SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED, 229 &igmp_v1enable, 0, 230 "Enable backwards compatibility with IGMPv1"); 231SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED, 232 &igmp_v2enable, 0, 233 "Enable backwards compatibility with IGMPv2"); 234SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED, 235 &igmp_legacysupp, 0, 236 "Allow v1/v2 reports to suppress v3 group responses"); 237SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version, 238 CTLTYPE_INT | CTLFLAG_RW, 239 &igmp_default_version, 0, sysctl_igmp_default_version, "I", 240 "Default version of IGMP to run on each interface"); 241SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay, 242 CTLTYPE_INT | CTLFLAG_RW, 243 &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I", 244 "Rate limit for IGMPv3 Group-and-Source queries in seconds"); 245#ifdef IGMP_DEBUG 246int igmp_debug = 0; 247SYSCTL_INT(_net_inet_igmp, OID_AUTO, 248 debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, ""); 249#endif 250 251SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED, 252 sysctl_igmp_ifinfo, "Per-interface IGMPv3 state"); 253 254/* Lock group and attribute for igmp_mtx */ 255static lck_attr_t *igmp_mtx_attr; 256static lck_grp_t *igmp_mtx_grp; 257static lck_grp_attr_t *igmp_mtx_grp_attr; 258 259/* 260 * Locking and reference counting: 261 * 262 * igmp_mtx mainly protects igi_head. In cases where both igmp_mtx and 263 * in_multihead_lock must be held, the former must be acquired first in order 264 * to maintain lock ordering. It is not a requirement that igmp_mtx be 265 * acquired first before in_multihead_lock, but in case both must be acquired 266 * in succession, the correct lock ordering must be followed. 267 * 268 * Instead of walking the if_multiaddrs list at the interface and returning 269 * the ifma_protospec value of a matching entry, we search the global list 270 * of in_multi records and find it that way; this is done with in_multihead 271 * lock held. Doing so avoids the race condition issues that many other BSDs 272 * suffer from (therefore in our implementation, ifma_protospec will never be 273 * NULL for as long as the in_multi is valid.) 274 * 275 * The above creates a requirement for the in_multi to stay in in_multihead 276 * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs 277 * be retransmitted (this is not required for IGMPv1/v2.) In order to handle 278 * this, the request and reference counts of the in_multi are bumped up when 279 * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout 280 * handler. Each in_multi holds a reference to the underlying igmp_ifinfo. 281 * 282 * Thus, the permitted lock oder is: 283 * 284 * igmp_mtx, in_multihead_lock, inm_lock, igi_lock 285 * 286 * Any may be taken independently, but if any are held at the same time, 287 * the above lock order must be followed. 288 */ 289static decl_lck_mtx_data(, igmp_mtx); 290static int igmp_timers_are_running; 291 292#define IGMP_ADD_DETACHED_INM(_head, _inm) { \ 293 SLIST_INSERT_HEAD(_head, _inm, inm_dtle); \ 294} 295 296#define IGMP_REMOVE_DETACHED_INM(_head) { \ 297 struct in_multi *_inm, *_inm_tmp; \ 298 SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) { \ 299 SLIST_REMOVE(_head, _inm, in_multi, inm_dtle); \ 300 INM_REMREF(_inm); \ 301 } \ 302 VERIFY(SLIST_EMPTY(_head)); \ 303} 304 305#define IGI_ZONE_MAX 64 /* maximum elements in zone */ 306#define IGI_ZONE_NAME "igmp_ifinfo" /* zone name */ 307 308static unsigned int igi_size; /* size of zone element */ 309static struct zone *igi_zone; /* zone for igmp_ifinfo */ 310 311/* Store IGMPv3 record count in the module private scratch space */ 312#define vt_nrecs pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0] 313 314static __inline void 315igmp_save_context(struct mbuf *m, struct ifnet *ifp) 316{ 317 m->m_pkthdr.rcvif = ifp; 318} 319 320static __inline void 321igmp_scrub_context(struct mbuf *m) 322{ 323 m->m_pkthdr.rcvif = NULL; 324} 325 326#ifdef IGMP_DEBUG 327static __inline const char * 328inet_ntop_haddr(in_addr_t haddr, char *buf, socklen_t size) 329{ 330 struct in_addr ia; 331 332 ia.s_addr = htonl(haddr); 333 return (inet_ntop(AF_INET, &ia, buf, size)); 334} 335#endif 336 337/* 338 * Restore context from a queued IGMP output chain. 339 * Return saved ifp. 340 */ 341static __inline struct ifnet * 342igmp_restore_context(struct mbuf *m) 343{ 344 return (m->m_pkthdr.rcvif); 345} 346 347/* 348 * Retrieve or set default IGMP version. 349 */ 350static int 351sysctl_igmp_default_version SYSCTL_HANDLER_ARGS 352{ 353#pragma unused(oidp, arg2) 354 int error; 355 int new; 356 357 IGMP_LOCK(); 358 359 error = SYSCTL_OUT(req, arg1, sizeof(int)); 360 if (error || !req->newptr) 361 goto out_locked; 362 363 new = igmp_default_version; 364 365 error = SYSCTL_IN(req, &new, sizeof(int)); 366 if (error) 367 goto out_locked; 368 369 if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) { 370 error = EINVAL; 371 goto out_locked; 372 } 373 374 IGMP_PRINTF(("%s: change igmp_default_version from %d to %d\n", 375 __func__, igmp_default_version, new)); 376 377 igmp_default_version = new; 378 379out_locked: 380 IGMP_UNLOCK(); 381 return (error); 382} 383 384/* 385 * Retrieve or set threshold between group-source queries in seconds. 386 * 387 */ 388static int 389sysctl_igmp_gsr SYSCTL_HANDLER_ARGS 390{ 391#pragma unused(arg1, arg2) 392 int error; 393 int i; 394 395 IGMP_LOCK(); 396 397 i = igmp_gsrdelay.tv_sec; 398 399 error = sysctl_handle_int(oidp, &i, 0, req); 400 if (error || !req->newptr) 401 goto out_locked; 402 403 if (i < -1 || i >= 60) { 404 error = EINVAL; 405 goto out_locked; 406 } 407 408 igmp_gsrdelay.tv_sec = i; 409 410out_locked: 411 IGMP_UNLOCK(); 412 return (error); 413} 414 415/* 416 * Expose struct igmp_ifinfo to userland, keyed by ifindex. 417 * For use by ifmcstat(8). 418 * 419 */ 420static int 421sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS 422{ 423#pragma unused(oidp) 424 int *name; 425 int error; 426 u_int namelen; 427 struct ifnet *ifp; 428 struct igmp_ifinfo *igi; 429 struct igmp_ifinfo_u igi_u; 430 431 name = (int *)arg1; 432 namelen = arg2; 433 434 if (req->newptr != USER_ADDR_NULL) 435 return (EPERM); 436 437 if (namelen != 1) 438 return (EINVAL); 439 440 IGMP_LOCK(); 441 442 if (name[0] <= 0 || name[0] > (u_int)if_index) { 443 error = ENOENT; 444 goto out_locked; 445 } 446 447 error = ENOENT; 448 449 ifnet_head_lock_shared(); 450 ifp = ifindex2ifnet[name[0]]; 451 ifnet_head_done(); 452 if (ifp == NULL) 453 goto out_locked; 454 455 bzero(&igi_u, sizeof (igi_u)); 456 457 LIST_FOREACH(igi, &igi_head, igi_link) { 458 IGI_LOCK(igi); 459 if (ifp != igi->igi_ifp) { 460 IGI_UNLOCK(igi); 461 continue; 462 } 463 igi_u.igi_ifindex = igi->igi_ifp->if_index; 464 igi_u.igi_version = igi->igi_version; 465 igi_u.igi_v1_timer = igi->igi_v1_timer; 466 igi_u.igi_v2_timer = igi->igi_v2_timer; 467 igi_u.igi_v3_timer = igi->igi_v3_timer; 468 igi_u.igi_flags = igi->igi_flags; 469 igi_u.igi_rv = igi->igi_rv; 470 igi_u.igi_qi = igi->igi_qi; 471 igi_u.igi_qri = igi->igi_qri; 472 igi_u.igi_uri = igi->igi_uri; 473 IGI_UNLOCK(igi); 474 475 error = SYSCTL_OUT(req, &igi_u, sizeof (igi_u)); 476 break; 477 } 478 479out_locked: 480 IGMP_UNLOCK(); 481 return (error); 482} 483 484/* 485 * Dispatch an entire queue of pending packet chains 486 * 487 * Must not be called with inm_lock held. 488 */ 489static void 490igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit, 491 const int loop) 492{ 493 struct mbuf *m; 494 struct ip *ip; 495 496 if (igi != NULL) 497 IGI_LOCK_ASSERT_HELD(igi); 498 499 for (;;) { 500 IF_DEQUEUE(ifq, m); 501 if (m == NULL) 502 break; 503 IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__, 504 (uint64_t)VM_KERNEL_ADDRPERM(ifq), 505 (uint64_t)VM_KERNEL_ADDRPERM(m))); 506 ip = mtod(m, struct ip *); 507 if (loop) 508 m->m_flags |= M_IGMP_LOOP; 509 if (igi != NULL) 510 IGI_UNLOCK(igi); 511 igmp_sendpkt(m); 512 if (igi != NULL) 513 IGI_LOCK(igi); 514 if (--limit == 0) 515 break; 516 } 517 518 if (igi != NULL) 519 IGI_LOCK_ASSERT_HELD(igi); 520} 521 522/* 523 * Filter outgoing IGMP report state by group. 524 * 525 * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1). 526 * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are 527 * disabled for all groups in the 224.0.0.0/24 link-local scope. However, 528 * this may break certain IGMP snooping switches which rely on the old 529 * report behaviour. 530 * 531 * Return zero if the given group is one for which IGMP reports 532 * should be suppressed, or non-zero if reports should be issued. 533 */ 534 535static __inline__ 536int igmp_isgroupreported(const struct in_addr addr) 537{ 538 539 if (in_allhosts(addr) || 540 ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) 541 return (0); 542 543 return (1); 544} 545 546/* 547 * Construct a Router Alert option to use in outgoing packets. 548 */ 549static struct mbuf * 550igmp_ra_alloc(void) 551{ 552 struct mbuf *m; 553 struct ipoption *p; 554 555 MGET(m, M_WAITOK, MT_DATA); 556 p = mtod(m, struct ipoption *); 557 p->ipopt_dst.s_addr = INADDR_ANY; 558 p->ipopt_list[0] = IPOPT_RA; /* Router Alert Option */ 559 p->ipopt_list[1] = 0x04; /* 4 bytes long */ 560 p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */ 561 p->ipopt_list[3] = 0x00; /* pad byte */ 562 m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1]; 563 564 return (m); 565} 566 567/* 568 * Attach IGMP when PF_INET is attached to an interface. 569 */ 570struct igmp_ifinfo * 571igmp_domifattach(struct ifnet *ifp, int how) 572{ 573 struct igmp_ifinfo *igi; 574 575 IGMP_PRINTF(("%s: called for ifp 0x%llx(%s)\n", 576 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name)); 577 578 igi = igi_alloc(how); 579 if (igi == NULL) 580 return (NULL); 581 582 IGMP_LOCK(); 583 584 IGI_LOCK(igi); 585 igi_initvar(igi, ifp, 0); 586 igi->igi_debug |= IFD_ATTACHED; 587 IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */ 588 IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */ 589 IGI_UNLOCK(igi); 590 ifnet_lock_shared(ifp); 591 igmp_initsilent(ifp, igi); 592 ifnet_lock_done(ifp); 593 594 LIST_INSERT_HEAD(&igi_head, igi, igi_link); 595 596 IGMP_UNLOCK(); 597 598 IGMP_PRINTF(("%s: allocate igmp_ifinfo for ifp 0x%llx(%s)\n", __func__, 599 (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name)); 600 601 return (igi); 602} 603 604/* 605 * Attach IGMP when PF_INET is reattached to an interface. Caller is 606 * expected to have an outstanding reference to the igi. 607 */ 608void 609igmp_domifreattach(struct igmp_ifinfo *igi) 610{ 611 struct ifnet *ifp; 612 613 IGMP_LOCK(); 614 615 IGI_LOCK(igi); 616 VERIFY(!(igi->igi_debug & IFD_ATTACHED)); 617 ifp = igi->igi_ifp; 618 VERIFY(ifp != NULL); 619 igi_initvar(igi, ifp, 1); 620 igi->igi_debug |= IFD_ATTACHED; 621 IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */ 622 IGI_UNLOCK(igi); 623 ifnet_lock_shared(ifp); 624 igmp_initsilent(ifp, igi); 625 ifnet_lock_done(ifp); 626 627 LIST_INSERT_HEAD(&igi_head, igi, igi_link); 628 629 IGMP_UNLOCK(); 630 631 IGMP_PRINTF(("%s: reattached igmp_ifinfo for ifp 0x%llx(%s)\n", 632 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name)); 633} 634 635/* 636 * Hook for domifdetach. 637 */ 638void 639igmp_domifdetach(struct ifnet *ifp) 640{ 641 SLIST_HEAD(, in_multi) inm_dthead; 642 643 SLIST_INIT(&inm_dthead); 644 645 IGMP_PRINTF(("%s: called for ifp 0x%llx(%s%d)\n", __func__, 646 (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name, ifp->if_unit)); 647 648 IGMP_LOCK(); 649 igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead); 650 IGMP_UNLOCK(); 651 652 /* Now that we're dropped all locks, release detached records */ 653 IGMP_REMOVE_DETACHED_INM(&inm_dthead); 654} 655 656/* 657 * Called at interface detach time. Note that we only flush all deferred 658 * responses and record releases; all remaining inm records and their source 659 * entries related to this interface are left intact, in order to handle 660 * the reattach case. 661 */ 662static void 663igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead) 664{ 665 struct igmp_ifinfo *igi, *tigi; 666 667 IGMP_LOCK_ASSERT_HELD(); 668 669 LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) { 670 IGI_LOCK(igi); 671 if (igi->igi_ifp == ifp) { 672 /* 673 * Free deferred General Query responses. 674 */ 675 IF_DRAIN(&igi->igi_gq); 676 IF_DRAIN(&igi->igi_v2q); 677 igmp_flush_relq(igi, inm_dthead); 678 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead)); 679 igi->igi_debug &= ~IFD_ATTACHED; 680 IGI_UNLOCK(igi); 681 682 LIST_REMOVE(igi, igi_link); 683 IGI_REMREF(igi); /* release igi_head reference */ 684 return; 685 } 686 IGI_UNLOCK(igi); 687 } 688 panic("%s: igmp_ifinfo not found for ifp %p(%s)\n", __func__, 689 ifp, ifp->if_xname); 690} 691 692__private_extern__ void 693igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi) 694{ 695 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED); 696 697 IGI_LOCK_ASSERT_NOTHELD(igi); 698 IGI_LOCK(igi); 699 if (!(ifp->if_flags & IFF_MULTICAST)) 700 igi->igi_flags |= IGIF_SILENT; 701 else 702 igi->igi_flags &= ~IGIF_SILENT; 703 IGI_UNLOCK(igi); 704} 705 706static void 707igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach) 708{ 709 IGI_LOCK_ASSERT_HELD(igi); 710 711 igi->igi_ifp = ifp; 712 igi->igi_version = igmp_default_version; 713 igi->igi_flags = 0; 714 igi->igi_rv = IGMP_RV_INIT; 715 igi->igi_qi = IGMP_QI_INIT; 716 igi->igi_qri = IGMP_QRI_INIT; 717 igi->igi_uri = IGMP_URI_INIT; 718 719 if (!reattach) 720 SLIST_INIT(&igi->igi_relinmhead); 721 722 /* 723 * Responses to general queries are subject to bounds. 724 */ 725 igi->igi_gq.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS; 726 igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS; 727} 728 729static struct igmp_ifinfo * 730igi_alloc(int how) 731{ 732 struct igmp_ifinfo *igi; 733 734 igi = (how == M_WAITOK) ? zalloc(igi_zone) : zalloc_noblock(igi_zone); 735 if (igi != NULL) { 736 bzero(igi, igi_size); 737 lck_mtx_init(&igi->igi_lock, igmp_mtx_grp, igmp_mtx_attr); 738 igi->igi_debug |= IFD_ALLOC; 739 } 740 return (igi); 741} 742 743static void 744igi_free(struct igmp_ifinfo *igi) 745{ 746 IGI_LOCK(igi); 747 if (igi->igi_debug & IFD_ATTACHED) { 748 panic("%s: attached igi=%p is being freed", __func__, igi); 749 /* NOTREACHED */ 750 } else if (igi->igi_ifp != NULL) { 751 panic("%s: ifp not NULL for igi=%p", __func__, igi); 752 /* NOTREACHED */ 753 } else if (!(igi->igi_debug & IFD_ALLOC)) { 754 panic("%s: igi %p cannot be freed", __func__, igi); 755 /* NOTREACHED */ 756 } else if (igi->igi_refcnt != 0) { 757 panic("%s: non-zero refcnt igi=%p", __func__, igi); 758 /* NOTREACHED */ 759 } 760 igi->igi_debug &= ~IFD_ALLOC; 761 IGI_UNLOCK(igi); 762 763 lck_mtx_destroy(&igi->igi_lock, igmp_mtx_grp); 764 zfree(igi_zone, igi); 765} 766 767void 768igi_addref(struct igmp_ifinfo *igi, int locked) 769{ 770 if (!locked) 771 IGI_LOCK_SPIN(igi); 772 else 773 IGI_LOCK_ASSERT_HELD(igi); 774 775 if (++igi->igi_refcnt == 0) { 776 panic("%s: igi=%p wraparound refcnt", __func__, igi); 777 /* NOTREACHED */ 778 } 779 if (!locked) 780 IGI_UNLOCK(igi); 781} 782 783void 784igi_remref(struct igmp_ifinfo *igi) 785{ 786 SLIST_HEAD(, in_multi) inm_dthead; 787 struct ifnet *ifp; 788 789 IGI_LOCK_SPIN(igi); 790 791 if (igi->igi_refcnt == 0) { 792 panic("%s: igi=%p negative refcnt", __func__, igi); 793 /* NOTREACHED */ 794 } 795 796 --igi->igi_refcnt; 797 if (igi->igi_refcnt > 0) { 798 IGI_UNLOCK(igi); 799 return; 800 } 801 802 ifp = igi->igi_ifp; 803 igi->igi_ifp = NULL; 804 IF_DRAIN(&igi->igi_gq); 805 IF_DRAIN(&igi->igi_v2q); 806 SLIST_INIT(&inm_dthead); 807 igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead); 808 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead)); 809 IGI_UNLOCK(igi); 810 811 /* Now that we're dropped all locks, release detached records */ 812 IGMP_REMOVE_DETACHED_INM(&inm_dthead); 813 814 IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp 0x%llx(%s)\n", 815 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 816 817 igi_free(igi); 818} 819 820/* 821 * Process a received IGMPv1 query. 822 * Return non-zero if the message should be dropped. 823 */ 824static int 825igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, 826 const struct igmp *igmp) 827{ 828 struct igmp_ifinfo *igi; 829 struct in_multi *inm; 830 struct in_multistep step; 831 struct igmp_tparams itp = { 0, 0, 0, 0 }; 832 833 IGMP_LOCK_ASSERT_NOTHELD(); 834 835 /* 836 * IGMPv1 Host Membership Queries SHOULD always be addressed to 837 * 224.0.0.1. They are always treated as General Queries. 838 * igmp_group is always ignored. Do not drop it as a userland 839 * daemon may wish to see it. 840 */ 841 if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) { 842 IGMPSTAT_INC(igps_rcv_badqueries); 843 OIGMPSTAT_INC(igps_rcv_badqueries); 844 goto done; 845 } 846 IGMPSTAT_INC(igps_rcv_gen_queries); 847 848 igi = IGMP_IFINFO(ifp); 849 VERIFY(igi != NULL); 850 851 IGI_LOCK(igi); 852 if (igi->igi_flags & IGIF_LOOPBACK) { 853 IGMP_PRINTF(("%s: ignore v1 query on IGIF_LOOPBACK " 854 "ifp 0x%llx(%s)\n", __func__, 855 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 856 IGI_UNLOCK(igi); 857 goto done; 858 } 859 /* 860 * Switch to IGMPv1 host compatibility mode. 861 */ 862 itp.qpt = igmp_set_version(igi, IGMP_VERSION_1); 863 IGI_UNLOCK(igi); 864 865 IGMP_PRINTF(("%s: process v1 query on ifp 0x%llx(%s)\n", __func__, 866 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 867 868 /* 869 * Start the timers in all of our group records 870 * for the interface on which the query arrived, 871 * except those which are already running. 872 */ 873 in_multihead_lock_shared(); 874 IN_FIRST_MULTI(step, inm); 875 while (inm != NULL) { 876 INM_LOCK(inm); 877 if (inm->inm_ifp != ifp || inm->inm_timer != 0) 878 goto next; 879 880 switch (inm->inm_state) { 881 case IGMP_NOT_MEMBER: 882 case IGMP_SILENT_MEMBER: 883 break; 884 case IGMP_G_QUERY_PENDING_MEMBER: 885 case IGMP_SG_QUERY_PENDING_MEMBER: 886 case IGMP_REPORTING_MEMBER: 887 case IGMP_IDLE_MEMBER: 888 case IGMP_LAZY_MEMBER: 889 case IGMP_SLEEPING_MEMBER: 890 case IGMP_AWAKENING_MEMBER: 891 inm->inm_state = IGMP_REPORTING_MEMBER; 892 inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI); 893 itp.cst = 1; 894 break; 895 case IGMP_LEAVING_MEMBER: 896 break; 897 } 898next: 899 INM_UNLOCK(inm); 900 IN_NEXT_MULTI(step, inm); 901 } 902 in_multihead_lock_done(); 903done: 904 igmp_set_timeout(&itp); 905 906 return (0); 907} 908 909/* 910 * Process a received IGMPv2 general or group-specific query. 911 */ 912static int 913igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, 914 const struct igmp *igmp) 915{ 916 struct igmp_ifinfo *igi; 917 struct in_multi *inm; 918 int is_general_query; 919 uint16_t timer; 920 struct igmp_tparams itp = { 0, 0, 0, 0 }; 921 922 IGMP_LOCK_ASSERT_NOTHELD(); 923 924 is_general_query = 0; 925 926 /* 927 * Validate address fields upfront. 928 */ 929 if (in_nullhost(igmp->igmp_group)) { 930 /* 931 * IGMPv2 General Query. 932 * If this was not sent to the all-hosts group, ignore it. 933 */ 934 if (!in_allhosts(ip->ip_dst)) 935 goto done; 936 IGMPSTAT_INC(igps_rcv_gen_queries); 937 is_general_query = 1; 938 } else { 939 /* IGMPv2 Group-Specific Query. */ 940 IGMPSTAT_INC(igps_rcv_group_queries); 941 } 942 943 igi = IGMP_IFINFO(ifp); 944 VERIFY(igi != NULL); 945 946 IGI_LOCK(igi); 947 if (igi->igi_flags & IGIF_LOOPBACK) { 948 IGMP_PRINTF(("%s: ignore v2 query on IGIF_LOOPBACK " 949 "ifp 0x%llx(%s)\n", __func__, 950 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 951 IGI_UNLOCK(igi); 952 goto done; 953 } 954 /* 955 * Ignore v2 query if in v1 Compatibility Mode. 956 */ 957 if (igi->igi_version == IGMP_VERSION_1) { 958 IGI_UNLOCK(igi); 959 goto done; 960 } 961 itp.qpt = igmp_set_version(igi, IGMP_VERSION_2); 962 IGI_UNLOCK(igi); 963 964 timer = igmp->igmp_code / IGMP_TIMER_SCALE; 965 if (timer == 0) 966 timer = 1; 967 968 if (is_general_query) { 969 struct in_multistep step; 970 971 IGMP_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n", 972 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 973 /* 974 * For each reporting group joined on this 975 * interface, kick the report timer. 976 */ 977 in_multihead_lock_shared(); 978 IN_FIRST_MULTI(step, inm); 979 while (inm != NULL) { 980 INM_LOCK(inm); 981 if (inm->inm_ifp == ifp) 982 itp.cst += igmp_v2_update_group(inm, timer); 983 INM_UNLOCK(inm); 984 IN_NEXT_MULTI(step, inm); 985 } 986 in_multihead_lock_done(); 987 } else { 988 /* 989 * Group-specific IGMPv2 query, we need only 990 * look up the single group to process it. 991 */ 992 in_multihead_lock_shared(); 993 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm); 994 in_multihead_lock_done(); 995 if (inm != NULL) { 996 INM_LOCK(inm); 997 IGMP_INET_PRINTF(igmp->igmp_group, 998 ("process v2 query %s on ifp 0x%llx(%s)\n", 999 _igmp_inet_buf, 1000 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1001 itp.cst = igmp_v2_update_group(inm, timer); 1002 INM_UNLOCK(inm); 1003 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */ 1004 } 1005 } 1006done: 1007 igmp_set_timeout(&itp); 1008 1009 return (0); 1010} 1011 1012/* 1013 * Update the report timer on a group in response to an IGMPv2 query. 1014 * 1015 * If we are becoming the reporting member for this group, start the timer. 1016 * If we already are the reporting member for this group, and timer is 1017 * below the threshold, reset it. 1018 * 1019 * We may be updating the group for the first time since we switched 1020 * to IGMPv3. If we are, then we must clear any recorded source lists, 1021 * and transition to REPORTING state; the group timer is overloaded 1022 * for group and group-source query responses. 1023 * 1024 * Unlike IGMPv3, the delay per group should be jittered 1025 * to avoid bursts of IGMPv2 reports. 1026 */ 1027static uint32_t 1028igmp_v2_update_group(struct in_multi *inm, const int timer) 1029{ 1030 1031 IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n", 1032 __func__, _igmp_inet_buf, if_name(inm->inm_ifp), 1033 timer)); 1034 1035 INM_LOCK_ASSERT_HELD(inm); 1036 1037 switch (inm->inm_state) { 1038 case IGMP_NOT_MEMBER: 1039 case IGMP_SILENT_MEMBER: 1040 break; 1041 case IGMP_REPORTING_MEMBER: 1042 if (inm->inm_timer != 0 && 1043 inm->inm_timer <= timer) { 1044 IGMP_PRINTF(("%s: REPORTING and timer running, " 1045 "skipping.\n", __func__)); 1046 break; 1047 } 1048 /* FALLTHROUGH */ 1049 case IGMP_SG_QUERY_PENDING_MEMBER: 1050 case IGMP_G_QUERY_PENDING_MEMBER: 1051 case IGMP_IDLE_MEMBER: 1052 case IGMP_LAZY_MEMBER: 1053 case IGMP_AWAKENING_MEMBER: 1054 IGMP_PRINTF(("%s: ->REPORTING\n", __func__)); 1055 inm->inm_state = IGMP_REPORTING_MEMBER; 1056 inm->inm_timer = IGMP_RANDOM_DELAY(timer); 1057 break; 1058 case IGMP_SLEEPING_MEMBER: 1059 IGMP_PRINTF(("%s: ->AWAKENING\n", __func__)); 1060 inm->inm_state = IGMP_AWAKENING_MEMBER; 1061 break; 1062 case IGMP_LEAVING_MEMBER: 1063 break; 1064 } 1065 1066 return (inm->inm_timer); 1067} 1068 1069/* 1070 * Process a received IGMPv3 general, group-specific or 1071 * group-and-source-specific query. 1072 * Assumes m has already been pulled up to the full IGMP message length. 1073 * Return 0 if successful, otherwise an appropriate error code is returned. 1074 */ 1075static int 1076igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, 1077 /*const*/ struct igmpv3 *igmpv3) 1078{ 1079 struct igmp_ifinfo *igi; 1080 struct in_multi *inm; 1081 int is_general_query; 1082 uint32_t maxresp, nsrc, qqi; 1083 uint16_t timer; 1084 uint8_t qrv; 1085 struct igmp_tparams itp = { 0, 0, 0, 0 }; 1086 1087 IGMP_LOCK_ASSERT_NOTHELD(); 1088 1089 is_general_query = 0; 1090 1091 IGMP_PRINTF(("%s: process v3 query on ifp 0x%llx(%s)\n", __func__, 1092 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1093 1094 maxresp = igmpv3->igmp_code; /* in 1/10ths of a second */ 1095 if (maxresp >= 128) { 1096 maxresp = IGMP_MANT(igmpv3->igmp_code) << 1097 (IGMP_EXP(igmpv3->igmp_code) + 3); 1098 } 1099 1100 /* 1101 * Robustness must never be less than 2 for on-wire IGMPv3. 1102 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make 1103 * an exception for interfaces whose IGMPv3 state changes 1104 * are redirected to loopback (e.g. MANET). 1105 */ 1106 qrv = IGMP_QRV(igmpv3->igmp_misc); 1107 if (qrv < 2) { 1108 IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__, 1109 qrv, IGMP_RV_INIT)); 1110 qrv = IGMP_RV_INIT; 1111 } 1112 1113 qqi = igmpv3->igmp_qqi; 1114 if (qqi >= 128) { 1115 qqi = IGMP_MANT(igmpv3->igmp_qqi) << 1116 (IGMP_EXP(igmpv3->igmp_qqi) + 3); 1117 } 1118 1119 timer = maxresp / IGMP_TIMER_SCALE; 1120 if (timer == 0) 1121 timer = 1; 1122 1123 nsrc = ntohs(igmpv3->igmp_numsrc); 1124 1125 /* 1126 * Validate address fields and versions upfront before 1127 * accepting v3 query. 1128 */ 1129 if (in_nullhost(igmpv3->igmp_group)) { 1130 /* 1131 * IGMPv3 General Query. 1132 * 1133 * General Queries SHOULD be directed to 224.0.0.1. 1134 * A general query with a source list has undefined 1135 * behaviour; discard it. 1136 */ 1137 IGMPSTAT_INC(igps_rcv_gen_queries); 1138 if (!in_allhosts(ip->ip_dst) || nsrc > 0) { 1139 IGMPSTAT_INC(igps_rcv_badqueries); 1140 OIGMPSTAT_INC(igps_rcv_badqueries); 1141 goto done; 1142 } 1143 is_general_query = 1; 1144 } else { 1145 /* Group or group-source specific query. */ 1146 if (nsrc == 0) 1147 IGMPSTAT_INC(igps_rcv_group_queries); 1148 else 1149 IGMPSTAT_INC(igps_rcv_gsr_queries); 1150 } 1151 1152 igi = IGMP_IFINFO(ifp); 1153 VERIFY(igi != NULL); 1154 1155 IGI_LOCK(igi); 1156 if (igi->igi_flags & IGIF_LOOPBACK) { 1157 IGMP_PRINTF(("%s: ignore v3 query on IGIF_LOOPBACK " 1158 "ifp 0x%llx(%s)\n", __func__, 1159 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1160 IGI_UNLOCK(igi); 1161 goto done; 1162 } 1163 1164 /* 1165 * Discard the v3 query if we're in Compatibility Mode. 1166 * The RFC is not obviously worded that hosts need to stay in 1167 * compatibility mode until the Old Version Querier Present 1168 * timer expires. 1169 */ 1170 if (igi->igi_version != IGMP_VERSION_3) { 1171 IGMP_PRINTF(("%s: ignore v3 query in v%d mode on " 1172 "ifp 0x%llx(%s)\n", __func__, igi->igi_version, 1173 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1174 IGI_UNLOCK(igi); 1175 goto done; 1176 } 1177 1178 itp.qpt = igmp_set_version(igi, IGMP_VERSION_3); 1179 igi->igi_rv = qrv; 1180 igi->igi_qi = qqi; 1181 igi->igi_qri = MAX(timer, IGMP_QRI_MIN); 1182 1183 IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv, 1184 igi->igi_qi, igi->igi_qri)); 1185 1186 if (is_general_query) { 1187 /* 1188 * Schedule a current-state report on this ifp for 1189 * all groups, possibly containing source lists. 1190 * If there is a pending General Query response 1191 * scheduled earlier than the selected delay, do 1192 * not schedule any other reports. 1193 * Otherwise, reset the interface timer. 1194 */ 1195 IGMP_PRINTF(("%s: process v3 general query on ifp 0x%llx(%s)\n", 1196 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1197 if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) { 1198 itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer); 1199 } 1200 IGI_UNLOCK(igi); 1201 } else { 1202 IGI_UNLOCK(igi); 1203 /* 1204 * Group-source-specific queries are throttled on 1205 * a per-group basis to defeat denial-of-service attempts. 1206 * Queries for groups we are not a member of on this 1207 * link are simply ignored. 1208 */ 1209 in_multihead_lock_shared(); 1210 IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm); 1211 in_multihead_lock_done(); 1212 if (inm == NULL) 1213 goto done; 1214 1215 INM_LOCK(inm); 1216 if (nsrc > 0) { 1217 if (!ratecheck(&inm->inm_lastgsrtv, 1218 &igmp_gsrdelay)) { 1219 IGMP_PRINTF(("%s: GS query throttled.\n", 1220 __func__)); 1221 IGMPSTAT_INC(igps_drop_gsr_queries); 1222 INM_UNLOCK(inm); 1223 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */ 1224 goto done; 1225 } 1226 } 1227 IGMP_INET_PRINTF(igmpv3->igmp_group, 1228 ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf, 1229 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1230 /* 1231 * If there is a pending General Query response 1232 * scheduled sooner than the selected delay, no 1233 * further report need be scheduled. 1234 * Otherwise, prepare to respond to the 1235 * group-specific or group-and-source query. 1236 */ 1237 IGI_LOCK(igi); 1238 itp.it = igi->igi_v3_timer; 1239 IGI_UNLOCK(igi); 1240 if (itp.it == 0 || itp.it >= timer) { 1241 (void) igmp_input_v3_group_query(inm, timer, igmpv3); 1242 itp.cst = inm->inm_timer; 1243 } 1244 INM_UNLOCK(inm); 1245 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */ 1246 } 1247done: 1248 if (itp.it > 0) { 1249 IGMP_PRINTF(("%s: v3 general query response scheduled in " 1250 "T+%d seconds on ifp 0x%llx(%s)\n", __func__, itp.it, 1251 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1252 } 1253 igmp_set_timeout(&itp); 1254 1255 return (0); 1256} 1257 1258/* 1259 * Process a recieved IGMPv3 group-specific or group-and-source-specific 1260 * query. 1261 * Return <0 if any error occured. Currently this is ignored. 1262 */ 1263static int 1264igmp_input_v3_group_query(struct in_multi *inm, 1265 int timer, /*const*/ struct igmpv3 *igmpv3) 1266{ 1267 int retval; 1268 uint16_t nsrc; 1269 1270 INM_LOCK_ASSERT_HELD(inm); 1271 1272 retval = 0; 1273 1274 switch (inm->inm_state) { 1275 case IGMP_NOT_MEMBER: 1276 case IGMP_SILENT_MEMBER: 1277 case IGMP_SLEEPING_MEMBER: 1278 case IGMP_LAZY_MEMBER: 1279 case IGMP_AWAKENING_MEMBER: 1280 case IGMP_IDLE_MEMBER: 1281 case IGMP_LEAVING_MEMBER: 1282 return (retval); 1283 case IGMP_REPORTING_MEMBER: 1284 case IGMP_G_QUERY_PENDING_MEMBER: 1285 case IGMP_SG_QUERY_PENDING_MEMBER: 1286 break; 1287 } 1288 1289 nsrc = ntohs(igmpv3->igmp_numsrc); 1290 1291 /* 1292 * Deal with group-specific queries upfront. 1293 * If any group query is already pending, purge any recorded 1294 * source-list state if it exists, and schedule a query response 1295 * for this group-specific query. 1296 */ 1297 if (nsrc == 0) { 1298 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER || 1299 inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) { 1300 inm_clear_recorded(inm); 1301 timer = min(inm->inm_timer, timer); 1302 } 1303 inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER; 1304 inm->inm_timer = IGMP_RANDOM_DELAY(timer); 1305 return (retval); 1306 } 1307 1308 /* 1309 * Deal with the case where a group-and-source-specific query has 1310 * been received but a group-specific query is already pending. 1311 */ 1312 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) { 1313 timer = min(inm->inm_timer, timer); 1314 inm->inm_timer = IGMP_RANDOM_DELAY(timer); 1315 return (retval); 1316 } 1317 1318 /* 1319 * Finally, deal with the case where a group-and-source-specific 1320 * query has been received, where a response to a previous g-s-r 1321 * query exists, or none exists. 1322 * In this case, we need to parse the source-list which the Querier 1323 * has provided us with and check if we have any source list filter 1324 * entries at T1 for these sources. If we do not, there is no need 1325 * schedule a report and the query may be dropped. 1326 * If we do, we must record them and schedule a current-state 1327 * report for those sources. 1328 * FIXME: Handling source lists larger than 1 mbuf requires that 1329 * we pass the mbuf chain pointer down to this function, and use 1330 * m_getptr() to walk the chain. 1331 */ 1332 if (inm->inm_nsrc > 0) { 1333 const struct in_addr *ap; 1334 int i, nrecorded; 1335 1336 ap = (const struct in_addr *)(igmpv3 + 1); 1337 nrecorded = 0; 1338 for (i = 0; i < nsrc; i++, ap++) { 1339 retval = inm_record_source(inm, ap->s_addr); 1340 if (retval < 0) 1341 break; 1342 nrecorded += retval; 1343 } 1344 if (nrecorded > 0) { 1345 IGMP_PRINTF(("%s: schedule response to SG query\n", 1346 __func__)); 1347 inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER; 1348 inm->inm_timer = IGMP_RANDOM_DELAY(timer); 1349 } 1350 } 1351 1352 return (retval); 1353} 1354 1355/* 1356 * Process a received IGMPv1 host membership report. 1357 * 1358 * NOTE: 0.0.0.0 workaround breaks const correctness. 1359 */ 1360static int 1361igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip, 1362 /*const*/ struct igmp *igmp) 1363{ 1364 struct in_ifaddr *ia; 1365 struct in_multi *inm; 1366 1367 IGMPSTAT_INC(igps_rcv_reports); 1368 OIGMPSTAT_INC(igps_rcv_reports); 1369 1370 if ((ifp->if_flags & IFF_LOOPBACK) || 1371 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) 1372 return (0); 1373 1374 if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) || 1375 !in_hosteq(igmp->igmp_group, ip->ip_dst))) { 1376 IGMPSTAT_INC(igps_rcv_badreports); 1377 OIGMPSTAT_INC(igps_rcv_badreports); 1378 return (EINVAL); 1379 } 1380 1381 /* 1382 * RFC 3376, Section 4.2.13, 9.2, 9.3: 1383 * Booting clients may use the source address 0.0.0.0. Some 1384 * IGMP daemons may not know how to use IP_RECVIF to determine 1385 * the interface upon which this message was received. 1386 * Replace 0.0.0.0 with the subnet address if told to do so. 1387 */ 1388 if (igmp_recvifkludge && in_nullhost(ip->ip_src)) { 1389 IFP_TO_IA(ifp, ia); 1390 if (ia != NULL) { 1391 IFA_LOCK(&ia->ia_ifa); 1392 ip->ip_src.s_addr = htonl(ia->ia_subnet); 1393 IFA_UNLOCK(&ia->ia_ifa); 1394 IFA_REMREF(&ia->ia_ifa); 1395 } 1396 } 1397 1398 IGMP_INET_PRINTF(igmp->igmp_group, 1399 ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf, 1400 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1401 1402 /* 1403 * IGMPv1 report suppression. 1404 * If we are a member of this group, and our membership should be 1405 * reported, stop our group timer and transition to the 'lazy' state. 1406 */ 1407 in_multihead_lock_shared(); 1408 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm); 1409 in_multihead_lock_done(); 1410 if (inm != NULL) { 1411 struct igmp_ifinfo *igi; 1412 1413 INM_LOCK(inm); 1414 1415 igi = inm->inm_igi; 1416 VERIFY(igi != NULL); 1417 1418 IGMPSTAT_INC(igps_rcv_ourreports); 1419 OIGMPSTAT_INC(igps_rcv_ourreports); 1420 1421 /* 1422 * If we are in IGMPv3 host mode, do not allow the 1423 * other host's IGMPv1 report to suppress our reports 1424 * unless explicitly configured to do so. 1425 */ 1426 IGI_LOCK(igi); 1427 if (igi->igi_version == IGMP_VERSION_3) { 1428 if (igmp_legacysupp) 1429 igmp_v3_suppress_group_record(inm); 1430 IGI_UNLOCK(igi); 1431 INM_UNLOCK(inm); 1432 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */ 1433 return (0); 1434 } 1435 1436 INM_LOCK_ASSERT_HELD(inm); 1437 inm->inm_timer = 0; 1438 1439 switch (inm->inm_state) { 1440 case IGMP_NOT_MEMBER: 1441 case IGMP_SILENT_MEMBER: 1442 break; 1443 case IGMP_IDLE_MEMBER: 1444 case IGMP_LAZY_MEMBER: 1445 case IGMP_AWAKENING_MEMBER: 1446 IGMP_INET_PRINTF(igmp->igmp_group, 1447 ("report suppressed for %s on ifp 0x%llx(%s)\n", 1448 _igmp_inet_buf, 1449 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1450 case IGMP_SLEEPING_MEMBER: 1451 inm->inm_state = IGMP_SLEEPING_MEMBER; 1452 break; 1453 case IGMP_REPORTING_MEMBER: 1454 IGMP_INET_PRINTF(igmp->igmp_group, 1455 ("report suppressed for %s on ifp 0x%llx(%s)\n", 1456 _igmp_inet_buf, 1457 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1458 if (igi->igi_version == IGMP_VERSION_1) 1459 inm->inm_state = IGMP_LAZY_MEMBER; 1460 else if (igi->igi_version == IGMP_VERSION_2) 1461 inm->inm_state = IGMP_SLEEPING_MEMBER; 1462 break; 1463 case IGMP_G_QUERY_PENDING_MEMBER: 1464 case IGMP_SG_QUERY_PENDING_MEMBER: 1465 case IGMP_LEAVING_MEMBER: 1466 break; 1467 } 1468 IGI_UNLOCK(igi); 1469 INM_UNLOCK(inm); 1470 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */ 1471 } 1472 1473 return (0); 1474} 1475 1476/* 1477 * Process a received IGMPv2 host membership report. 1478 * 1479 * NOTE: 0.0.0.0 workaround breaks const correctness. 1480 */ 1481static int 1482igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip, 1483 /*const*/ struct igmp *igmp) 1484{ 1485 struct in_ifaddr *ia; 1486 struct in_multi *inm; 1487 1488 /* 1489 * Make sure we don't hear our own membership report. Fast 1490 * leave requires knowing that we are the only member of a 1491 * group. 1492 */ 1493 IFP_TO_IA(ifp, ia); 1494 if (ia != NULL) { 1495 IFA_LOCK(&ia->ia_ifa); 1496 if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) { 1497 IFA_UNLOCK(&ia->ia_ifa); 1498 IFA_REMREF(&ia->ia_ifa); 1499 return (0); 1500 } 1501 IFA_UNLOCK(&ia->ia_ifa); 1502 } 1503 1504 IGMPSTAT_INC(igps_rcv_reports); 1505 OIGMPSTAT_INC(igps_rcv_reports); 1506 1507 if ((ifp->if_flags & IFF_LOOPBACK) || 1508 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { 1509 if (ia != NULL) 1510 IFA_REMREF(&ia->ia_ifa); 1511 return (0); 1512 } 1513 1514 if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || 1515 !in_hosteq(igmp->igmp_group, ip->ip_dst)) { 1516 if (ia != NULL) 1517 IFA_REMREF(&ia->ia_ifa); 1518 IGMPSTAT_INC(igps_rcv_badreports); 1519 OIGMPSTAT_INC(igps_rcv_badreports); 1520 return (EINVAL); 1521 } 1522 1523 /* 1524 * RFC 3376, Section 4.2.13, 9.2, 9.3: 1525 * Booting clients may use the source address 0.0.0.0. Some 1526 * IGMP daemons may not know how to use IP_RECVIF to determine 1527 * the interface upon which this message was received. 1528 * Replace 0.0.0.0 with the subnet address if told to do so. 1529 */ 1530 if (igmp_recvifkludge && in_nullhost(ip->ip_src)) { 1531 if (ia != NULL) { 1532 IFA_LOCK(&ia->ia_ifa); 1533 ip->ip_src.s_addr = htonl(ia->ia_subnet); 1534 IFA_UNLOCK(&ia->ia_ifa); 1535 } 1536 } 1537 if (ia != NULL) 1538 IFA_REMREF(&ia->ia_ifa); 1539 1540 IGMP_INET_PRINTF(igmp->igmp_group, 1541 ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf, 1542 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); 1543 1544 /* 1545 * IGMPv2 report suppression. 1546 * If we are a member of this group, and our membership should be 1547 * reported, and our group timer is pending or about to be reset, 1548 * stop our group timer by transitioning to the 'lazy' state. 1549 */ 1550 in_multihead_lock_shared(); 1551 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm); 1552 in_multihead_lock_done(); 1553 if (inm != NULL) { 1554 struct igmp_ifinfo *igi; 1555 1556 INM_LOCK(inm); 1557 igi = inm->inm_igi; 1558 VERIFY(igi != NULL); 1559 1560 IGMPSTAT_INC(igps_rcv_ourreports); 1561 OIGMPSTAT_INC(igps_rcv_ourreports); 1562 1563 /* 1564 * If we are in IGMPv3 host mode, do not allow the 1565 * other host's IGMPv1 report to suppress our reports 1566 * unless explicitly configured to do so. 1567 */ 1568 IGI_LOCK(igi); 1569 if (igi->igi_version == IGMP_VERSION_3) { 1570 if (igmp_legacysupp) 1571 igmp_v3_suppress_group_record(inm); 1572 IGI_UNLOCK(igi); 1573 INM_UNLOCK(inm); 1574 INM_REMREF(inm); 1575 return (0); 1576 } 1577 1578 inm->inm_timer = 0; 1579 1580 switch (inm->inm_state) { 1581 case IGMP_NOT_MEMBER: 1582 case IGMP_SILENT_MEMBER: 1583 case IGMP_SLEEPING_MEMBER: 1584 break; 1585 case IGMP_REPORTING_MEMBER: 1586 case IGMP_IDLE_MEMBER: 1587 case IGMP_AWAKENING_MEMBER: 1588 IGMP_INET_PRINTF(igmp->igmp_group, 1589 ("report suppressed for %s on ifp 0x%llx(%s)\n", 1590 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp), 1591 if_name(ifp))); 1592 case IGMP_LAZY_MEMBER: 1593 inm->inm_state = IGMP_LAZY_MEMBER; 1594 break; 1595 case IGMP_G_QUERY_PENDING_MEMBER: 1596 case IGMP_SG_QUERY_PENDING_MEMBER: 1597 case IGMP_LEAVING_MEMBER: 1598 break; 1599 } 1600 IGI_UNLOCK(igi); 1601 INM_UNLOCK(inm); 1602 INM_REMREF(inm); 1603 } 1604 1605 return (0); 1606} 1607 1608void 1609igmp_input(struct mbuf *m, int off) 1610{ 1611 int iphlen; 1612 struct ifnet *ifp; 1613 struct igmp *igmp; 1614 struct ip *ip; 1615 int igmplen; 1616 int minlen; 1617 int queryver; 1618 1619 IGMP_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__, 1620 (uint64_t)VM_KERNEL_ADDRPERM(m), off)); 1621 1622 ifp = m->m_pkthdr.rcvif; 1623 1624 IGMPSTAT_INC(igps_rcv_total); 1625 OIGMPSTAT_INC(igps_rcv_total); 1626 1627 /* Expect 32-bit aligned data pointer on strict-align platforms */ 1628 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); 1629 1630 ip = mtod(m, struct ip *); 1631 iphlen = off; 1632 1633 /* By now, ip_len no longer contains the length of IP header */ 1634 igmplen = ip->ip_len; 1635 1636 /* 1637 * Validate lengths. 1638 */ 1639 if (igmplen < IGMP_MINLEN) { 1640 IGMPSTAT_INC(igps_rcv_tooshort); 1641 OIGMPSTAT_INC(igps_rcv_tooshort); 1642 m_freem(m); 1643 return; 1644 } 1645 1646 /* 1647 * Always pullup to the minimum size for v1/v2 or v3 1648 * to amortize calls to m_pulldown(). 1649 */ 1650 if (igmplen >= IGMP_V3_QUERY_MINLEN) 1651 minlen = IGMP_V3_QUERY_MINLEN; 1652 else 1653 minlen = IGMP_MINLEN; 1654 1655 /* A bit more expensive than M_STRUCT_GET, but ensures alignment */ 1656 M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen); 1657 if (igmp == NULL) { 1658 IGMPSTAT_INC(igps_rcv_tooshort); 1659 OIGMPSTAT_INC(igps_rcv_tooshort); 1660 return; 1661 } 1662 /* N.B.: we assume the packet was correctly aligned in ip_input. */ 1663 1664 /* 1665 * Validate checksum. 1666 */ 1667 m->m_data += iphlen; 1668 m->m_len -= iphlen; 1669 if (in_cksum(m, igmplen)) { 1670 IGMPSTAT_INC(igps_rcv_badsum); 1671 OIGMPSTAT_INC(igps_rcv_badsum); 1672 m_freem(m); 1673 return; 1674 } 1675 m->m_data -= iphlen; 1676 m->m_len += iphlen; 1677 1678 /* 1679 * IGMP control traffic is link-scope, and must have a TTL of 1. 1680 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception; 1681 * probe packets may come from beyond the LAN. 1682 */ 1683 if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) { 1684 IGMPSTAT_INC(igps_rcv_badttl); 1685 m_freem(m); 1686 return; 1687 } 1688 1689 switch (igmp->igmp_type) { 1690 case IGMP_HOST_MEMBERSHIP_QUERY: 1691 if (igmplen == IGMP_MINLEN) { 1692 if (igmp->igmp_code == 0) 1693 queryver = IGMP_VERSION_1; 1694 else 1695 queryver = IGMP_VERSION_2; 1696 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 1697 queryver = IGMP_VERSION_3; 1698 } else { 1699 IGMPSTAT_INC(igps_rcv_tooshort); 1700 OIGMPSTAT_INC(igps_rcv_tooshort); 1701 m_freem(m); 1702 return; 1703 } 1704 1705 OIGMPSTAT_INC(igps_rcv_queries); 1706 1707 switch (queryver) { 1708 case IGMP_VERSION_1: 1709 IGMPSTAT_INC(igps_rcv_v1v2_queries); 1710 if (!igmp_v1enable) 1711 break; 1712 if (igmp_input_v1_query(ifp, ip, igmp) != 0) { 1713 m_freem(m); 1714 return; 1715 } 1716 break; 1717 1718 case IGMP_VERSION_2: 1719 IGMPSTAT_INC(igps_rcv_v1v2_queries); 1720 if (!igmp_v2enable) 1721 break; 1722 if (igmp_input_v2_query(ifp, ip, igmp) != 0) { 1723 m_freem(m); 1724 return; 1725 } 1726 break; 1727 1728 case IGMP_VERSION_3: { 1729 struct igmpv3 *igmpv3; 1730 uint16_t igmpv3len; 1731 uint16_t srclen; 1732 int nsrc; 1733 1734 IGMPSTAT_INC(igps_rcv_v3_queries); 1735 igmpv3 = (struct igmpv3 *)igmp; 1736 /* 1737 * Validate length based on source count. 1738 */ 1739 nsrc = ntohs(igmpv3->igmp_numsrc); 1740 srclen = sizeof(struct in_addr) * nsrc; 1741 if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) { 1742 IGMPSTAT_INC(igps_rcv_tooshort); 1743 OIGMPSTAT_INC(igps_rcv_tooshort); 1744 m_freem(m); 1745 return; 1746 } 1747 igmpv3len = IGMP_V3_QUERY_MINLEN + srclen; 1748 /* 1749 * A bit more expensive than M_STRUCT_GET, 1750 * but ensures alignment. 1751 */ 1752 M_STRUCT_GET0(igmpv3, struct igmpv3 *, m, 1753 off, igmpv3len); 1754 if (igmpv3 == NULL) { 1755 IGMPSTAT_INC(igps_rcv_tooshort); 1756 OIGMPSTAT_INC(igps_rcv_tooshort); 1757 return; 1758 } 1759 /* 1760 * N.B.: we assume the packet was correctly 1761 * aligned in ip_input. 1762 */ 1763 if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) { 1764 m_freem(m); 1765 return; 1766 } 1767 } 1768 break; 1769 } 1770 break; 1771 1772 case IGMP_v1_HOST_MEMBERSHIP_REPORT: 1773 if (!igmp_v1enable) 1774 break; 1775 if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) { 1776 m_freem(m); 1777 return; 1778 } 1779 break; 1780 1781 case IGMP_v2_HOST_MEMBERSHIP_REPORT: 1782 if (!igmp_v2enable) 1783 break; 1784 if (!ip_checkrouteralert(m)) 1785 IGMPSTAT_INC(igps_rcv_nora); 1786 if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) { 1787 m_freem(m); 1788 return; 1789 } 1790 break; 1791 1792 case IGMP_v3_HOST_MEMBERSHIP_REPORT: 1793 /* 1794 * Hosts do not need to process IGMPv3 membership reports, 1795 * as report suppression is no longer required. 1796 */ 1797 if (!ip_checkrouteralert(m)) 1798 IGMPSTAT_INC(igps_rcv_nora); 1799 break; 1800 1801 default: 1802 break; 1803 } 1804 1805 IGMP_LOCK_ASSERT_NOTHELD(); 1806 /* 1807 * Pass all valid IGMP packets up to any process(es) listening on a 1808 * raw IGMP socket. 1809 */ 1810 rip_input(m, off); 1811} 1812 1813/* 1814 * Schedule IGMP timer based on various parameters; caller must ensure that 1815 * lock ordering is maintained as this routine acquires IGMP global lock. 1816 */ 1817void 1818igmp_set_timeout(struct igmp_tparams *itp) 1819{ 1820 IGMP_LOCK_ASSERT_NOTHELD(); 1821 VERIFY(itp != NULL); 1822 1823 if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) { 1824 IGMP_LOCK(); 1825 if (itp->qpt != 0) 1826 querier_present_timers_running = 1; 1827 if (itp->it != 0) 1828 interface_timers_running = 1; 1829 if (itp->cst != 0) 1830 current_state_timers_running = 1; 1831 if (itp->sct != 0) 1832 state_change_timers_running = 1; 1833 igmp_sched_timeout(); 1834 IGMP_UNLOCK(); 1835 } 1836} 1837 1838/* 1839 * IGMP timer handler (per 1 second). 1840 */ 1841static void 1842igmp_timeout(void *arg) 1843{ 1844#pragma unused(arg) 1845 struct ifqueue scq; /* State-change packets */ 1846 struct ifqueue qrq; /* Query response packets */ 1847 struct ifnet *ifp; 1848 struct igmp_ifinfo *igi; 1849 struct in_multi *inm; 1850 int loop = 0, uri_sec = 0; 1851 SLIST_HEAD(, in_multi) inm_dthead; 1852 1853 SLIST_INIT(&inm_dthead); 1854 1855 /* 1856 * Update coarse-grained networking timestamp (in sec.); the idea 1857 * is to piggy-back on the timeout callout to update the counter 1858 * returnable via net_uptime(). 1859 */ 1860 net_update_uptime(); 1861 1862 IGMP_LOCK(); 1863 1864 IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d\n", __func__, 1865 querier_present_timers_running, interface_timers_running, 1866 current_state_timers_running, state_change_timers_running)); 1867 1868 /* 1869 * IGMPv1/v2 querier present timer processing. 1870 */ 1871 if (querier_present_timers_running) { 1872 querier_present_timers_running = 0; 1873 LIST_FOREACH(igi, &igi_head, igi_link) { 1874 IGI_LOCK(igi); 1875 igmp_v1v2_process_querier_timers(igi); 1876 if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) 1877 querier_present_timers_running = 1; 1878 IGI_UNLOCK(igi); 1879 } 1880 } 1881 1882 /* 1883 * IGMPv3 General Query response timer processing. 1884 */ 1885 if (interface_timers_running) { 1886 IGMP_PRINTF(("%s: interface timers running\n", __func__)); 1887 interface_timers_running = 0; 1888 LIST_FOREACH(igi, &igi_head, igi_link) { 1889 IGI_LOCK(igi); 1890 if (igi->igi_version != IGMP_VERSION_3) { 1891 IGI_UNLOCK(igi); 1892 continue; 1893 } 1894 if (igi->igi_v3_timer == 0) { 1895 /* Do nothing. */ 1896 } else if (--igi->igi_v3_timer == 0) { 1897 if (igmp_v3_dispatch_general_query(igi) > 0) 1898 interface_timers_running = 1; 1899 } else { 1900 interface_timers_running = 1; 1901 } 1902 IGI_UNLOCK(igi); 1903 } 1904 } 1905 1906 if (!current_state_timers_running && 1907 !state_change_timers_running) 1908 goto out_locked; 1909 1910 current_state_timers_running = 0; 1911 state_change_timers_running = 0; 1912 1913 memset(&qrq, 0, sizeof(struct ifqueue)); 1914 qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS; 1915 1916 memset(&scq, 0, sizeof(struct ifqueue)); 1917 scq.ifq_maxlen = IGMP_MAX_STATE_CHANGE_PACKETS; 1918 1919 IGMP_PRINTF(("%s: state change timers running\n", __func__)); 1920 1921 /* 1922 * IGMPv1/v2/v3 host report and state-change timer processing. 1923 * Note: Processing a v3 group timer may remove a node. 1924 */ 1925 LIST_FOREACH(igi, &igi_head, igi_link) { 1926 struct in_multistep step; 1927 1928 IGI_LOCK(igi); 1929 ifp = igi->igi_ifp; 1930 loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; 1931 uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri); 1932 IGI_UNLOCK(igi); 1933 1934 in_multihead_lock_shared(); 1935 IN_FIRST_MULTI(step, inm); 1936 while (inm != NULL) { 1937 INM_LOCK(inm); 1938 if (inm->inm_ifp != ifp) 1939 goto next; 1940 1941 IGI_LOCK(igi); 1942 switch (igi->igi_version) { 1943 case IGMP_VERSION_1: 1944 case IGMP_VERSION_2: 1945 igmp_v1v2_process_group_timer(inm, 1946 igi->igi_version); 1947 break; 1948 case IGMP_VERSION_3: 1949 igmp_v3_process_group_timers(igi, &qrq, 1950 &scq, inm, uri_sec); 1951 break; 1952 } 1953 IGI_UNLOCK(igi); 1954next: 1955 INM_UNLOCK(inm); 1956 IN_NEXT_MULTI(step, inm); 1957 } 1958 in_multihead_lock_done(); 1959 1960 IGI_LOCK(igi); 1961 if (igi->igi_version == IGMP_VERSION_1 || 1962 igi->igi_version == IGMP_VERSION_2) { 1963 igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop); 1964 } else if (igi->igi_version == IGMP_VERSION_3) { 1965 IGI_UNLOCK(igi); 1966 igmp_dispatch_queue(NULL, &qrq, 0, loop); 1967 igmp_dispatch_queue(NULL, &scq, 0, loop); 1968 VERIFY(qrq.ifq_len == 0); 1969 VERIFY(scq.ifq_len == 0); 1970 IGI_LOCK(igi); 1971 } 1972 /* 1973 * In case there are still any pending membership reports 1974 * which didn't get drained at version change time. 1975 */ 1976 IF_DRAIN(&igi->igi_v2q); 1977 /* 1978 * Release all deferred inm records, and drain any locally 1979 * enqueued packets; do it even if the current IGMP version 1980 * for the link is no longer IGMPv3, in order to handle the 1981 * version change case. 1982 */ 1983 igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead); 1984 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead)); 1985 IGI_UNLOCK(igi); 1986 1987 IF_DRAIN(&qrq); 1988 IF_DRAIN(&scq); 1989 } 1990 1991out_locked: 1992 /* re-arm the timer if there's work to do */ 1993 igmp_timeout_run = 0; 1994 igmp_sched_timeout(); 1995 IGMP_UNLOCK(); 1996 1997 /* Now that we're dropped all locks, release detached records */ 1998 IGMP_REMOVE_DETACHED_INM(&inm_dthead); 1999} 2000 2001static void 2002igmp_sched_timeout(void) 2003{ 2004 IGMP_LOCK_ASSERT_HELD(); 2005 2006 if (!igmp_timeout_run && 2007 (querier_present_timers_running || current_state_timers_running || 2008 interface_timers_running || state_change_timers_running)) { 2009 igmp_timeout_run = 1; 2010 timeout(igmp_timeout, NULL, hz); 2011 } 2012} 2013 2014/* 2015 * Free the in_multi reference(s) for this IGMP lifecycle. 2016 * 2017 * Caller must be holding igi_lock. 2018 */ 2019static void 2020igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead) 2021{ 2022 struct in_multi *inm; 2023 2024again: 2025 IGI_LOCK_ASSERT_HELD(igi); 2026 inm = SLIST_FIRST(&igi->igi_relinmhead); 2027 if (inm != NULL) { 2028 int lastref; 2029 2030 SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele); 2031 IGI_UNLOCK(igi); 2032 2033 in_multihead_lock_exclusive(); 2034 INM_LOCK(inm); 2035 VERIFY(inm->inm_nrelecnt != 0); 2036 inm->inm_nrelecnt--; 2037 lastref = in_multi_detach(inm); 2038 VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) && 2039 inm->inm_reqcnt == 0)); 2040 INM_UNLOCK(inm); 2041 in_multihead_lock_done(); 2042 /* from igi_relinmhead */ 2043 INM_REMREF(inm); 2044 /* from in_multihead list */ 2045 if (lastref) { 2046 /* 2047 * Defer releasing our final reference, as we 2048 * are holding the IGMP lock at this point, and 2049 * we could end up with locking issues later on 2050 * (while issuing SIOCDELMULTI) when this is the 2051 * final reference count. Let the caller do it 2052 * when it is safe. 2053 */ 2054 IGMP_ADD_DETACHED_INM(inm_dthead, inm); 2055 } 2056 IGI_LOCK(igi); 2057 goto again; 2058 } 2059} 2060 2061/* 2062 * Update host report group timer for IGMPv1/v2. 2063 * Will update the global pending timer flags. 2064 */ 2065static void 2066igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version) 2067{ 2068 int report_timer_expired; 2069 2070 IGMP_LOCK_ASSERT_HELD(); 2071 INM_LOCK_ASSERT_HELD(inm); 2072 IGI_LOCK_ASSERT_HELD(inm->inm_igi); 2073 2074 if (inm->inm_timer == 0) { 2075 report_timer_expired = 0; 2076 } else if (--inm->inm_timer == 0) { 2077 report_timer_expired = 1; 2078 } else { 2079 current_state_timers_running = 1; 2080 /* caller will schedule timer */ 2081 return; 2082 } 2083 2084 switch (inm->inm_state) { 2085 case IGMP_NOT_MEMBER: 2086 case IGMP_SILENT_MEMBER: 2087 case IGMP_IDLE_MEMBER: 2088 case IGMP_LAZY_MEMBER: 2089 case IGMP_SLEEPING_MEMBER: 2090 case IGMP_AWAKENING_MEMBER: 2091 break; 2092 case IGMP_REPORTING_MEMBER: 2093 if (report_timer_expired) { 2094 inm->inm_state = IGMP_IDLE_MEMBER; 2095 (void) igmp_v1v2_queue_report(inm, 2096 (igmp_version == IGMP_VERSION_2) ? 2097 IGMP_v2_HOST_MEMBERSHIP_REPORT : 2098 IGMP_v1_HOST_MEMBERSHIP_REPORT); 2099 INM_LOCK_ASSERT_HELD(inm); 2100 IGI_LOCK_ASSERT_HELD(inm->inm_igi); 2101 } 2102 break; 2103 case IGMP_G_QUERY_PENDING_MEMBER: 2104 case IGMP_SG_QUERY_PENDING_MEMBER: 2105 case IGMP_LEAVING_MEMBER: 2106 break; 2107 } 2108} 2109 2110/* 2111 * Update a group's timers for IGMPv3. 2112 * Will update the global pending timer flags. 2113 * Note: Unlocked read from igi. 2114 */ 2115static void 2116igmp_v3_process_group_timers(struct igmp_ifinfo *igi, 2117 struct ifqueue *qrq, struct ifqueue *scq, 2118 struct in_multi *inm, const int uri_sec) 2119{ 2120 int query_response_timer_expired; 2121 int state_change_retransmit_timer_expired; 2122 2123 IGMP_LOCK_ASSERT_HELD(); 2124 INM_LOCK_ASSERT_HELD(inm); 2125 IGI_LOCK_ASSERT_HELD(igi); 2126 VERIFY(igi == inm->inm_igi); 2127 2128 query_response_timer_expired = 0; 2129 state_change_retransmit_timer_expired = 0; 2130 2131 /* 2132 * During a transition from v1/v2 compatibility mode back to v3, 2133 * a group record in REPORTING state may still have its group 2134 * timer active. This is a no-op in this function; it is easier 2135 * to deal with it here than to complicate the timeout path. 2136 */ 2137 if (inm->inm_timer == 0) { 2138 query_response_timer_expired = 0; 2139 } else if (--inm->inm_timer == 0) { 2140 query_response_timer_expired = 1; 2141 } else { 2142 current_state_timers_running = 1; 2143 /* caller will schedule timer */ 2144 } 2145 2146 if (inm->inm_sctimer == 0) { 2147 state_change_retransmit_timer_expired = 0; 2148 } else if (--inm->inm_sctimer == 0) { 2149 state_change_retransmit_timer_expired = 1; 2150 } else { 2151 state_change_timers_running = 1; 2152 /* caller will schedule timer */ 2153 } 2154 2155 /* We are in timer callback, so be quick about it. */ 2156 if (!state_change_retransmit_timer_expired && 2157 !query_response_timer_expired) 2158 return; 2159 2160 switch (inm->inm_state) { 2161 case IGMP_NOT_MEMBER: 2162 case IGMP_SILENT_MEMBER: 2163 case IGMP_SLEEPING_MEMBER: 2164 case IGMP_LAZY_MEMBER: 2165 case IGMP_AWAKENING_MEMBER: 2166 case IGMP_IDLE_MEMBER: 2167 break; 2168 case IGMP_G_QUERY_PENDING_MEMBER: 2169 case IGMP_SG_QUERY_PENDING_MEMBER: 2170 /* 2171 * Respond to a previously pending Group-Specific 2172 * or Group-and-Source-Specific query by enqueueing 2173 * the appropriate Current-State report for 2174 * immediate transmission. 2175 */ 2176 if (query_response_timer_expired) { 2177 int retval; 2178 2179 retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1, 2180 (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)); 2181 IGMP_PRINTF(("%s: enqueue record = %d\n", 2182 __func__, retval)); 2183 inm->inm_state = IGMP_REPORTING_MEMBER; 2184 /* XXX Clear recorded sources for next time. */ 2185 inm_clear_recorded(inm); 2186 } 2187 /* FALLTHROUGH */ 2188 case IGMP_REPORTING_MEMBER: 2189 case IGMP_LEAVING_MEMBER: 2190 if (state_change_retransmit_timer_expired) { 2191 /* 2192 * State-change retransmission timer fired. 2193 * If there are any further pending retransmissions, 2194 * set the global pending state-change flag, and 2195 * reset the timer. 2196 */ 2197 if (--inm->inm_scrv > 0) { 2198 inm->inm_sctimer = uri_sec; 2199 state_change_timers_running = 1; 2200 /* caller will schedule timer */ 2201 } 2202 /* 2203 * Retransmit the previously computed state-change 2204 * report. If there are no further pending 2205 * retransmissions, the mbuf queue will be consumed. 2206 * Update T0 state to T1 as we have now sent 2207 * a state-change. 2208 */ 2209 (void) igmp_v3_merge_state_changes(inm, scq); 2210 2211 inm_commit(inm); 2212 IGMP_INET_PRINTF(inm->inm_addr, 2213 ("%s: T1 -> T0 for %s/%s\n", __func__, 2214 _igmp_inet_buf, if_name(inm->inm_ifp))); 2215 2216 /* 2217 * If we are leaving the group for good, make sure 2218 * we release IGMP's reference to it. 2219 * This release must be deferred using a SLIST, 2220 * as we are called from a loop which traverses 2221 * the in_multihead list. 2222 */ 2223 if (inm->inm_state == IGMP_LEAVING_MEMBER && 2224 inm->inm_scrv == 0) { 2225 inm->inm_state = IGMP_NOT_MEMBER; 2226 /* 2227 * A reference has already been held in 2228 * igmp_final_leave() for this inm, so 2229 * no need to hold another one. We also 2230 * bumped up its request count then, so 2231 * that it stays in in_multihead. Both 2232 * of them will be released when it is 2233 * dequeued later on. 2234 */ 2235 VERIFY(inm->inm_nrelecnt != 0); 2236 SLIST_INSERT_HEAD(&igi->igi_relinmhead, 2237 inm, inm_nrele); 2238 } 2239 } 2240 break; 2241 } 2242} 2243 2244/* 2245 * Suppress a group's pending response to a group or source/group query. 2246 * 2247 * Do NOT suppress state changes. This leads to IGMPv3 inconsistency. 2248 * Do NOT update ST1/ST0 as this operation merely suppresses 2249 * the currently pending group record. 2250 * Do NOT suppress the response to a general query. It is possible but 2251 * it would require adding another state or flag. 2252 */ 2253static void 2254igmp_v3_suppress_group_record(struct in_multi *inm) 2255{ 2256 2257 INM_LOCK_ASSERT_HELD(inm); 2258 IGI_LOCK_ASSERT_HELD(inm->inm_igi); 2259 2260 VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3); 2261 2262 if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER || 2263 inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) 2264 return; 2265 2266 if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) 2267 inm_clear_recorded(inm); 2268 2269 inm->inm_timer = 0; 2270 inm->inm_state = IGMP_REPORTING_MEMBER; 2271} 2272 2273/* 2274 * Switch to a different IGMP version on the given interface, 2275 * as per Section 7.2.1. 2276 */ 2277static uint32_t 2278igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version) 2279{ 2280 int old_version_timer; 2281 2282 IGI_LOCK_ASSERT_HELD(igi); 2283 2284 IGMP_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__, 2285 igmp_version, (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), 2286 if_name(igi->igi_ifp))); 2287 2288 if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) { 2289 /* 2290 * Compute the "Older Version Querier Present" timer as per 2291 * Section 8.12, in seconds. 2292 */ 2293 old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri; 2294 2295 if (igmp_version == IGMP_VERSION_1) { 2296 igi->igi_v1_timer = old_version_timer; 2297 igi->igi_v2_timer = 0; 2298 } else if (igmp_version == IGMP_VERSION_2) { 2299 igi->igi_v1_timer = 0; 2300 igi->igi_v2_timer = old_version_timer; 2301 } 2302 } 2303 2304 if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) { 2305 if (igi->igi_version != IGMP_VERSION_2) { 2306 igi->igi_version = IGMP_VERSION_2; 2307 igmp_v3_cancel_link_timers(igi); 2308 } 2309 } else if (igi->igi_v1_timer > 0) { 2310 if (igi->igi_version != IGMP_VERSION_1) { 2311 igi->igi_version = IGMP_VERSION_1; 2312 igmp_v3_cancel_link_timers(igi); 2313 } 2314 } 2315 2316 IGI_LOCK_ASSERT_HELD(igi); 2317 2318 return (MAX(igi->igi_v1_timer, igi->igi_v2_timer)); 2319} 2320 2321/* 2322 * Cancel pending IGMPv3 timers for the given link and all groups 2323 * joined on it; state-change, general-query, and group-query timers. 2324 * 2325 * Only ever called on a transition from v3 to Compatibility mode. Kill 2326 * the timers stone dead (this may be expensive for large N groups), they 2327 * will be restarted if Compatibility Mode deems that they must be due to 2328 * query processing. 2329 */ 2330static void 2331igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi) 2332{ 2333 struct ifnet *ifp; 2334 struct in_multi *inm; 2335 struct in_multistep step; 2336 2337 IGI_LOCK_ASSERT_HELD(igi); 2338 2339 IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__, 2340 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp))); 2341 2342 /* 2343 * Stop the v3 General Query Response on this link stone dead. 2344 * If timer is woken up due to interface_timers_running, 2345 * the flag will be cleared if there are no pending link timers. 2346 */ 2347 igi->igi_v3_timer = 0; 2348 2349 /* 2350 * Now clear the current-state and state-change report timers 2351 * for all memberships scoped to this link. 2352 */ 2353 ifp = igi->igi_ifp; 2354 IGI_UNLOCK(igi); 2355 2356 in_multihead_lock_shared(); 2357 IN_FIRST_MULTI(step, inm); 2358 while (inm != NULL) { 2359 INM_LOCK(inm); 2360 if (inm->inm_ifp != ifp) 2361 goto next; 2362 2363 switch (inm->inm_state) { 2364 case IGMP_NOT_MEMBER: 2365 case IGMP_SILENT_MEMBER: 2366 case IGMP_IDLE_MEMBER: 2367 case IGMP_LAZY_MEMBER: 2368 case IGMP_SLEEPING_MEMBER: 2369 case IGMP_AWAKENING_MEMBER: 2370 /* 2371 * These states are either not relevant in v3 mode, 2372 * or are unreported. Do nothing. 2373 */ 2374 break; 2375 case IGMP_LEAVING_MEMBER: 2376 /* 2377 * If we are leaving the group and switching to 2378 * compatibility mode, we need to release the final 2379 * reference held for issuing the INCLUDE {}, and 2380 * transition to REPORTING to ensure the host leave 2381 * message is sent upstream to the old querier -- 2382 * transition to NOT would lose the leave and race. 2383 * During igmp_final_leave(), we bumped up both the 2384 * request and reference counts. Since we cannot 2385 * call in_multi_detach() here, defer this task to 2386 * the timer routine. 2387 */ 2388 VERIFY(inm->inm_nrelecnt != 0); 2389 IGI_LOCK(igi); 2390 SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele); 2391 IGI_UNLOCK(igi); 2392 /* FALLTHROUGH */ 2393 case IGMP_G_QUERY_PENDING_MEMBER: 2394 case IGMP_SG_QUERY_PENDING_MEMBER: 2395 inm_clear_recorded(inm); 2396 /* FALLTHROUGH */ 2397 case IGMP_REPORTING_MEMBER: 2398 inm->inm_state = IGMP_REPORTING_MEMBER; 2399 break; 2400 } 2401 /* 2402 * Always clear state-change and group report timers. 2403 * Free any pending IGMPv3 state-change records. 2404 */ 2405 inm->inm_sctimer = 0; 2406 inm->inm_timer = 0; 2407 IF_DRAIN(&inm->inm_scq); 2408next: 2409 INM_UNLOCK(inm); 2410 IN_NEXT_MULTI(step, inm); 2411 } 2412 in_multihead_lock_done(); 2413 2414 IGI_LOCK(igi); 2415} 2416 2417/* 2418 * Update the Older Version Querier Present timers for a link. 2419 * See Section 7.2.1 of RFC 3376. 2420 */ 2421static void 2422igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi) 2423{ 2424 IGI_LOCK_ASSERT_HELD(igi); 2425 2426 if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) { 2427 /* 2428 * IGMPv1 and IGMPv2 Querier Present timers expired. 2429 * 2430 * Revert to IGMPv3. 2431 */ 2432 if (igi->igi_version != IGMP_VERSION_3) { 2433 IGMP_PRINTF(("%s: transition from v%d -> v%d " 2434 "on 0x%llx(%s)\n", __func__, 2435 igi->igi_version, IGMP_VERSION_3, 2436 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), 2437 if_name(igi->igi_ifp))); 2438 igi->igi_version = IGMP_VERSION_3; 2439 IF_DRAIN(&igi->igi_v2q); 2440 } 2441 } else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) { 2442 /* 2443 * IGMPv1 Querier Present timer expired, 2444 * IGMPv2 Querier Present timer running. 2445 * If IGMPv2 was disabled since last timeout, 2446 * revert to IGMPv3. 2447 * If IGMPv2 is enabled, revert to IGMPv2. 2448 */ 2449 if (!igmp_v2enable) { 2450 IGMP_PRINTF(("%s: transition from v%d -> v%d " 2451 "on 0x%llx(%s%d)\n", __func__, 2452 igi->igi_version, IGMP_VERSION_3, 2453 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), 2454 igi->igi_ifp->if_name, igi->igi_ifp->if_unit)); 2455 igi->igi_v2_timer = 0; 2456 igi->igi_version = IGMP_VERSION_3; 2457 IF_DRAIN(&igi->igi_v2q); 2458 } else { 2459 --igi->igi_v2_timer; 2460 if (igi->igi_version != IGMP_VERSION_2) { 2461 IGMP_PRINTF(("%s: transition from v%d -> v%d " 2462 "on 0x%llx(%s)\n", __func__, 2463 igi->igi_version, IGMP_VERSION_2, 2464 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), 2465 if_name(igi->igi_ifp))); 2466 igi->igi_version = IGMP_VERSION_2; 2467 IF_DRAIN(&igi->igi_gq); 2468 igmp_v3_cancel_link_timers(igi); 2469 } 2470 } 2471 } else if (igi->igi_v1_timer > 0) { 2472 /* 2473 * IGMPv1 Querier Present timer running. 2474 * Stop IGMPv2 timer if running. 2475 * 2476 * If IGMPv1 was disabled since last timeout, 2477 * revert to IGMPv3. 2478 * If IGMPv1 is enabled, reset IGMPv2 timer if running. 2479 */ 2480 if (!igmp_v1enable) { 2481 IGMP_PRINTF(("%s: transition from v%d -> v%d " 2482 "on 0x%llx(%s%d)\n", __func__, 2483 igi->igi_version, IGMP_VERSION_3, 2484 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), 2485 igi->igi_ifp->if_name, igi->igi_ifp->if_unit)); 2486 igi->igi_v1_timer = 0; 2487 igi->igi_version = IGMP_VERSION_3; 2488 IF_DRAIN(&igi->igi_v2q); 2489 } else { 2490 --igi->igi_v1_timer; 2491 } 2492 if (igi->igi_v2_timer > 0) { 2493 IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s%d)\n", 2494 __func__, 2495 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), 2496 igi->igi_ifp->if_name, igi->igi_ifp->if_unit)); 2497 igi->igi_v2_timer = 0; 2498 } 2499 } 2500} 2501 2502/* 2503 * Dispatch an IGMPv1/v2 host report or leave message. 2504 * These are always small enough to fit inside a single mbuf. 2505 */ 2506static int 2507igmp_v1v2_queue_report(struct in_multi *inm, const int type) 2508{ 2509 struct ifnet *ifp; 2510 struct igmp *igmp; 2511 struct ip *ip; 2512 struct mbuf *m; 2513 int error = 0; 2514 2515 INM_LOCK_ASSERT_HELD(inm); 2516 IGI_LOCK_ASSERT_HELD(inm->inm_igi); 2517 2518 ifp = inm->inm_ifp; 2519 2520 MGETHDR(m, M_DONTWAIT, MT_DATA); 2521 if (m == NULL) 2522 return (ENOMEM); 2523 MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp)); 2524 2525 m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp); 2526 2527 m->m_data += sizeof(struct ip); 2528 m->m_len = sizeof(struct igmp); 2529 2530 igmp = mtod(m, struct igmp *); 2531 igmp->igmp_type = type; 2532 igmp->igmp_code = 0; 2533 igmp->igmp_group = inm->inm_addr; 2534 igmp->igmp_cksum = 0; 2535 igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp)); 2536 2537 m->m_data -= sizeof(struct ip); 2538 m->m_len += sizeof(struct ip); 2539 2540 ip = mtod(m, struct ip *); 2541 ip->ip_tos = 0; 2542 ip->ip_len = sizeof(struct ip) + sizeof(struct igmp); 2543 ip->ip_off = 0; 2544 ip->ip_p = IPPROTO_IGMP; 2545 ip->ip_src.s_addr = INADDR_ANY; 2546 2547 if (type == IGMP_HOST_LEAVE_MESSAGE) 2548 ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP); 2549 else 2550 ip->ip_dst = inm->inm_addr; 2551 2552 igmp_save_context(m, ifp); 2553 2554 m->m_flags |= M_IGMPV2; 2555 if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) 2556 m->m_flags |= M_IGMP_LOOP; 2557 2558 /* 2559 * Due to the fact that at this point we are possibly holding 2560 * in_multihead_lock in shared or exclusive mode, we can't call 2561 * igmp_sendpkt() here since that will eventually call ip_output(), 2562 * which will try to lock in_multihead_lock and cause a deadlock. 2563 * Instead we defer the work to the igmp_timeout() thread, thus 2564 * avoiding unlocking in_multihead_lock here. 2565 */ 2566 if (IF_QFULL(&inm->inm_igi->igi_v2q)) { 2567 IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__)); 2568 error = ENOMEM; 2569 m_freem(m); 2570 } else { 2571 IF_ENQUEUE(&inm->inm_igi->igi_v2q, m); 2572 VERIFY(error == 0); 2573 } 2574 return (error); 2575} 2576 2577/* 2578 * Process a state change from the upper layer for the given IPv4 group. 2579 * 2580 * Each socket holds a reference on the in_multi in its own ip_moptions. 2581 * The socket layer will have made the necessary updates to the group 2582 * state, it is now up to IGMP to issue a state change report if there 2583 * has been any change between T0 (when the last state-change was issued) 2584 * and T1 (now). 2585 * 2586 * We use the IGMPv3 state machine at group level. The IGMP module 2587 * however makes the decision as to which IGMP protocol version to speak. 2588 * A state change *from* INCLUDE {} always means an initial join. 2589 * A state change *to* INCLUDE {} always means a final leave. 2590 * 2591 * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can 2592 * save ourselves a bunch of work; any exclusive mode groups need not 2593 * compute source filter lists. 2594 */ 2595int 2596igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp) 2597{ 2598 struct igmp_ifinfo *igi; 2599 struct ifnet *ifp; 2600 int error = 0; 2601 2602 VERIFY(itp != NULL); 2603 bzero(itp, sizeof (*itp)); 2604 2605 INM_LOCK_ASSERT_HELD(inm); 2606 VERIFY(inm->inm_igi != NULL); 2607 IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi); 2608 2609 /* 2610 * Try to detect if the upper layer just asked us to change state 2611 * for an interface which has now gone away. 2612 */ 2613 VERIFY(inm->inm_ifma != NULL); 2614 ifp = inm->inm_ifma->ifma_ifp; 2615 /* 2616 * Sanity check that netinet's notion of ifp is the same as net's. 2617 */ 2618 VERIFY(inm->inm_ifp == ifp); 2619 2620 igi = IGMP_IFINFO(ifp); 2621 VERIFY(igi != NULL); 2622 2623 /* 2624 * If we detect a state transition to or from MCAST_UNDEFINED 2625 * for this group, then we are starting or finishing an IGMP 2626 * life cycle for this group. 2627 */ 2628 if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) { 2629 IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__, 2630 inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode)); 2631 if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) { 2632 IGMP_PRINTF(("%s: initial join\n", __func__)); 2633 error = igmp_initial_join(inm, igi, itp); 2634 goto out; 2635 } else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) { 2636 IGMP_PRINTF(("%s: final leave\n", __func__)); 2637 igmp_final_leave(inm, igi, itp); 2638 goto out; 2639 } 2640 } else { 2641 IGMP_PRINTF(("%s: filter set change\n", __func__)); 2642 } 2643 2644 error = igmp_handle_state_change(inm, igi, itp); 2645out: 2646 return (error); 2647} 2648 2649/* 2650 * Perform the initial join for an IGMP group. 2651 * 2652 * When joining a group: 2653 * If the group should have its IGMP traffic suppressed, do nothing. 2654 * IGMPv1 starts sending IGMPv1 host membership reports. 2655 * IGMPv2 starts sending IGMPv2 host membership reports. 2656 * IGMPv3 will schedule an IGMPv3 state-change report containing the 2657 * initial state of the membership. 2658 */ 2659static int 2660igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi, 2661 struct igmp_tparams *itp) 2662{ 2663 struct ifnet *ifp; 2664 struct ifqueue *ifq; 2665 int error, retval, syncstates; 2666 2667 INM_LOCK_ASSERT_HELD(inm); 2668 IGI_LOCK_ASSERT_NOTHELD(igi); 2669 VERIFY(itp != NULL); 2670 2671 IGMP_INET_PRINTF(inm->inm_addr, 2672 ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__, 2673 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp), 2674 if_name(inm->inm_ifp))); 2675 2676 error = 0; 2677 syncstates = 1; 2678 2679 ifp = inm->inm_ifp; 2680 2681 IGI_LOCK(igi); 2682 VERIFY(igi->igi_ifp == ifp); 2683 2684 /* 2685 * Groups joined on loopback or marked as 'not reported', 2686 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and 2687 * are never reported in any IGMP protocol exchanges. 2688 * All other groups enter the appropriate IGMP state machine 2689 * for the version in use on this link. 2690 * A link marked as IGIF_SILENT causes IGMP to be completely 2691 * disabled for the link. 2692 */ 2693 if ((ifp->if_flags & IFF_LOOPBACK) || 2694 (igi->igi_flags & IGIF_SILENT) || 2695 !igmp_isgroupreported(inm->inm_addr)) { 2696 IGMP_PRINTF(("%s: not kicking state machine for silent group\n", 2697 __func__)); 2698 inm->inm_state = IGMP_SILENT_MEMBER; 2699 inm->inm_timer = 0; 2700 } else { 2701 /* 2702 * Deal with overlapping in_multi lifecycle. 2703 * If this group was LEAVING, then make sure 2704 * we drop the reference we picked up to keep the 2705 * group around for the final INCLUDE {} enqueue. 2706 * Since we cannot call in_multi_detach() here, 2707 * defer this task to the timer routine. 2708 */ 2709 if (igi->igi_version == IGMP_VERSION_3 && 2710 inm->inm_state == IGMP_LEAVING_MEMBER) { 2711 VERIFY(inm->inm_nrelecnt != 0); 2712 SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele); 2713 } 2714 2715 inm->inm_state = IGMP_REPORTING_MEMBER; 2716 2717 switch (igi->igi_version) { 2718 case IGMP_VERSION_1: 2719 case IGMP_VERSION_2: 2720 inm->inm_state = IGMP_IDLE_MEMBER; 2721 error = igmp_v1v2_queue_report(inm, 2722 (igi->igi_version == IGMP_VERSION_2) ? 2723 IGMP_v2_HOST_MEMBERSHIP_REPORT : 2724 IGMP_v1_HOST_MEMBERSHIP_REPORT); 2725 2726 INM_LOCK_ASSERT_HELD(inm); 2727 IGI_LOCK_ASSERT_HELD(igi); 2728 2729 if (error == 0) { 2730 inm->inm_timer = 2731 IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI); 2732 itp->cst = 1; 2733 } 2734 break; 2735 2736 case IGMP_VERSION_3: 2737 /* 2738 * Defer update of T0 to T1, until the first copy 2739 * of the state change has been transmitted. 2740 */ 2741 syncstates = 0; 2742 2743 /* 2744 * Immediately enqueue a State-Change Report for 2745 * this interface, freeing any previous reports. 2746 * Don't kick the timers if there is nothing to do, 2747 * or if an error occurred. 2748 */ 2749 ifq = &inm->inm_scq; 2750 IF_DRAIN(ifq); 2751 retval = igmp_v3_enqueue_group_record(ifq, inm, 1, 2752 0, 0); 2753 itp->cst = (ifq->ifq_len > 0); 2754 IGMP_PRINTF(("%s: enqueue record = %d\n", 2755 __func__, retval)); 2756 if (retval <= 0) { 2757 error = retval * -1; 2758 break; 2759 } 2760 2761 /* 2762 * Schedule transmission of pending state-change 2763 * report up to RV times for this link. The timer 2764 * will fire at the next igmp_timeout (1 second), 2765 * giving us an opportunity to merge the reports. 2766 */ 2767 if (igi->igi_flags & IGIF_LOOPBACK) { 2768 inm->inm_scrv = 1; 2769 } else { 2770 VERIFY(igi->igi_rv > 1); 2771 inm->inm_scrv = igi->igi_rv; 2772 } 2773 inm->inm_sctimer = 1; 2774 itp->sct = 1; 2775 2776 error = 0; 2777 break; 2778 } 2779 } 2780 IGI_UNLOCK(igi); 2781 2782 /* 2783 * Only update the T0 state if state change is atomic, 2784 * i.e. we don't need to wait for a timer to fire before we 2785 * can consider the state change to have been communicated. 2786 */ 2787 if (syncstates) { 2788 inm_commit(inm); 2789 IGMP_INET_PRINTF(inm->inm_addr, 2790 ("%s: T1 -> T0 for %s/%s\n", __func__, 2791 _igmp_inet_buf, if_name(inm->inm_ifp))); 2792 } 2793 2794 return (error); 2795} 2796 2797/* 2798 * Issue an intermediate state change during the IGMP life-cycle. 2799 */ 2800static int 2801igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi, 2802 struct igmp_tparams *itp) 2803{ 2804 struct ifnet *ifp; 2805 int retval = 0; 2806 2807 INM_LOCK_ASSERT_HELD(inm); 2808 IGI_LOCK_ASSERT_NOTHELD(igi); 2809 VERIFY(itp != NULL); 2810 2811 IGMP_INET_PRINTF(inm->inm_addr, 2812 ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__, 2813 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp), 2814 if_name(inm->inm_ifp))); 2815 2816 ifp = inm->inm_ifp; 2817 2818 IGI_LOCK(igi); 2819 VERIFY(igi->igi_ifp == ifp); 2820 2821 if ((ifp->if_flags & IFF_LOOPBACK) || 2822 (igi->igi_flags & IGIF_SILENT) || 2823 !igmp_isgroupreported(inm->inm_addr) || 2824 (igi->igi_version != IGMP_VERSION_3)) { 2825 IGI_UNLOCK(igi); 2826 if (!igmp_isgroupreported(inm->inm_addr)) { 2827 IGMP_PRINTF(("%s: not kicking state " 2828 "machine for silent group\n", __func__)); 2829 } 2830 IGMP_PRINTF(("%s: nothing to do\n", __func__)); 2831 inm_commit(inm); 2832 IGMP_INET_PRINTF(inm->inm_addr, 2833 ("%s: T1 -> T0 for %s/%s\n", __func__, 2834 _igmp_inet_buf, inm->inm_ifp->if_name)); 2835 goto done; 2836 } 2837 2838 IF_DRAIN(&inm->inm_scq); 2839 2840 retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0); 2841 itp->cst = (inm->inm_scq.ifq_len > 0); 2842 IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval)); 2843 if (retval <= 0) { 2844 IGI_UNLOCK(igi); 2845 retval *= -1; 2846 goto done; 2847 } 2848 /* 2849 * If record(s) were enqueued, start the state-change 2850 * report timer for this group. 2851 */ 2852 inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv); 2853 inm->inm_sctimer = 1; 2854 itp->sct = 1; 2855 IGI_UNLOCK(igi); 2856done: 2857 return (retval); 2858} 2859 2860/* 2861 * Perform the final leave for an IGMP group. 2862 * 2863 * When leaving a group: 2864 * IGMPv1 does nothing. 2865 * IGMPv2 sends a host leave message, if and only if we are the reporter. 2866 * IGMPv3 enqueues a state-change report containing a transition 2867 * to INCLUDE {} for immediate transmission. 2868 */ 2869static void 2870igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi, 2871 struct igmp_tparams *itp) 2872{ 2873 int syncstates = 1; 2874 2875 INM_LOCK_ASSERT_HELD(inm); 2876 IGI_LOCK_ASSERT_NOTHELD(igi); 2877 VERIFY(itp != NULL); 2878 2879 IGMP_INET_PRINTF(inm->inm_addr, 2880 ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__, 2881 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp), 2882 if_name(inm->inm_ifp))); 2883 2884 switch (inm->inm_state) { 2885 case IGMP_NOT_MEMBER: 2886 case IGMP_SILENT_MEMBER: 2887 case IGMP_LEAVING_MEMBER: 2888 /* Already leaving or left; do nothing. */ 2889 IGMP_PRINTF(("%s: not kicking state machine for silent group\n", 2890 __func__)); 2891 break; 2892 case IGMP_REPORTING_MEMBER: 2893 case IGMP_IDLE_MEMBER: 2894 case IGMP_G_QUERY_PENDING_MEMBER: 2895 case IGMP_SG_QUERY_PENDING_MEMBER: 2896 IGI_LOCK(igi); 2897 if (igi->igi_version == IGMP_VERSION_2) { 2898 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER || 2899 inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) { 2900 panic("%s: IGMPv3 state reached, not IGMPv3 " 2901 "mode\n", __func__); 2902 /* NOTREACHED */ 2903 } 2904 /* scheduler timer if enqueue is successful */ 2905 itp->cst = (igmp_v1v2_queue_report(inm, 2906 IGMP_HOST_LEAVE_MESSAGE) == 0); 2907 2908 INM_LOCK_ASSERT_HELD(inm); 2909 IGI_LOCK_ASSERT_HELD(igi); 2910 2911 inm->inm_state = IGMP_NOT_MEMBER; 2912 } else if (igi->igi_version == IGMP_VERSION_3) { 2913 /* 2914 * Stop group timer and all pending reports. 2915 * Immediately enqueue a state-change report 2916 * TO_IN {} to be sent on the next timeout, 2917 * giving us an opportunity to merge reports. 2918 */ 2919 IF_DRAIN(&inm->inm_scq); 2920 inm->inm_timer = 0; 2921 if (igi->igi_flags & IGIF_LOOPBACK) { 2922 inm->inm_scrv = 1; 2923 } else { 2924 inm->inm_scrv = igi->igi_rv; 2925 } 2926 IGMP_INET_PRINTF(inm->inm_addr, 2927 ("%s: Leaving %s/%s with %d " 2928 "pending retransmissions.\n", __func__, 2929 _igmp_inet_buf, if_name(inm->inm_ifp), 2930 inm->inm_scrv)); 2931 if (inm->inm_scrv == 0) { 2932 inm->inm_state = IGMP_NOT_MEMBER; 2933 inm->inm_sctimer = 0; 2934 } else { 2935 int retval; 2936 /* 2937 * Stick around in the in_multihead list; 2938 * the final detach will be issued by 2939 * igmp_v3_process_group_timers() when 2940 * the retransmit timer expires. 2941 */ 2942 INM_ADDREF_LOCKED(inm); 2943 VERIFY(inm->inm_debug & IFD_ATTACHED); 2944 inm->inm_reqcnt++; 2945 VERIFY(inm->inm_reqcnt >= 1); 2946 inm->inm_nrelecnt++; 2947 VERIFY(inm->inm_nrelecnt != 0); 2948 2949 retval = igmp_v3_enqueue_group_record( 2950 &inm->inm_scq, inm, 1, 0, 0); 2951 itp->cst = (inm->inm_scq.ifq_len > 0); 2952 KASSERT(retval != 0, 2953 ("%s: enqueue record = %d\n", __func__, 2954 retval)); 2955 2956 inm->inm_state = IGMP_LEAVING_MEMBER; 2957 inm->inm_sctimer = 1; 2958 itp->sct = 1; 2959 syncstates = 0; 2960 } 2961 } 2962 IGI_UNLOCK(igi); 2963 break; 2964 case IGMP_LAZY_MEMBER: 2965 case IGMP_SLEEPING_MEMBER: 2966 case IGMP_AWAKENING_MEMBER: 2967 /* Our reports are suppressed; do nothing. */ 2968 break; 2969 } 2970 2971 if (syncstates) { 2972 inm_commit(inm); 2973 IGMP_INET_PRINTF(inm->inm_addr, 2974 ("%s: T1 -> T0 for %s/%s\n", __func__, 2975 _igmp_inet_buf, if_name(inm->inm_ifp))); 2976 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 2977 IGMP_INET_PRINTF(inm->inm_addr, 2978 ("%s: T1 now MCAST_UNDEFINED for %s/%s\n", 2979 __func__, _igmp_inet_buf, if_name(inm->inm_ifp))); 2980 } 2981} 2982 2983/* 2984 * Enqueue an IGMPv3 group record to the given output queue. 2985 * 2986 * XXX This function could do with having the allocation code 2987 * split out, and the multiple-tree-walks coalesced into a single 2988 * routine as has been done in igmp_v3_enqueue_filter_change(). 2989 * 2990 * If is_state_change is zero, a current-state record is appended. 2991 * If is_state_change is non-zero, a state-change report is appended. 2992 * 2993 * If is_group_query is non-zero, an mbuf packet chain is allocated. 2994 * If is_group_query is zero, and if there is a packet with free space 2995 * at the tail of the queue, it will be appended to providing there 2996 * is enough free space. 2997 * Otherwise a new mbuf packet chain is allocated. 2998 * 2999 * If is_source_query is non-zero, each source is checked to see if 3000 * it was recorded for a Group-Source query, and will be omitted if 3001 * it is not both in-mode and recorded. 3002 * 3003 * The function will attempt to allocate leading space in the packet 3004 * for the IP/IGMP header to be prepended without fragmenting the chain. 3005 * 3006 * If successful the size of all data appended to the queue is returned, 3007 * otherwise an error code less than zero is returned, or zero if 3008 * no record(s) were appended. 3009 */ 3010static int 3011igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, 3012 const int is_state_change, const int is_group_query, 3013 const int is_source_query) 3014{ 3015 struct igmp_grouprec ig; 3016 struct igmp_grouprec *pig; 3017 struct ifnet *ifp; 3018 struct ip_msource *ims, *nims; 3019 struct mbuf *m0, *m, *md; 3020 int error, is_filter_list_change; 3021 int minrec0len, m0srcs, msrcs, nbytes, off; 3022 int record_has_sources; 3023 int now; 3024 int type; 3025 in_addr_t naddr; 3026 uint8_t mode; 3027 u_int16_t ig_numsrc; 3028 3029 INM_LOCK_ASSERT_HELD(inm); 3030 IGI_LOCK_ASSERT_HELD(inm->inm_igi); 3031 3032 error = 0; 3033 ifp = inm->inm_ifp; 3034 is_filter_list_change = 0; 3035 m = NULL; 3036 m0 = NULL; 3037 m0srcs = 0; 3038 msrcs = 0; 3039 nbytes = 0; 3040 nims = NULL; 3041 record_has_sources = 1; 3042 pig = NULL; 3043 type = IGMP_DO_NOTHING; 3044 mode = inm->inm_st[1].iss_fmode; 3045 3046 /* 3047 * If we did not transition out of ASM mode during t0->t1, 3048 * and there are no source nodes to process, we can skip 3049 * the generation of source records. 3050 */ 3051 if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 && 3052 inm->inm_nsrc == 0) 3053 record_has_sources = 0; 3054 3055 if (is_state_change) { 3056 /* 3057 * Queue a state change record. 3058 * If the mode did not change, and there are non-ASM 3059 * listeners or source filters present, 3060 * we potentially need to issue two records for the group. 3061 * If we are transitioning to MCAST_UNDEFINED, we need 3062 * not send any sources. 3063 * If there are ASM listeners, and there was no filter 3064 * mode transition of any kind, do nothing. 3065 */ 3066 if (mode != inm->inm_st[0].iss_fmode) { 3067 if (mode == MCAST_EXCLUDE) { 3068 IGMP_PRINTF(("%s: change to EXCLUDE\n", 3069 __func__)); 3070 type = IGMP_CHANGE_TO_EXCLUDE_MODE; 3071 } else { 3072 IGMP_PRINTF(("%s: change to INCLUDE\n", 3073 __func__)); 3074 type = IGMP_CHANGE_TO_INCLUDE_MODE; 3075 if (mode == MCAST_UNDEFINED) 3076 record_has_sources = 0; 3077 } 3078 } else { 3079 if (record_has_sources) { 3080 is_filter_list_change = 1; 3081 } else { 3082 type = IGMP_DO_NOTHING; 3083 } 3084 } 3085 } else { 3086 /* 3087 * Queue a current state record. 3088 */ 3089 if (mode == MCAST_EXCLUDE) { 3090 type = IGMP_MODE_IS_EXCLUDE; 3091 } else if (mode == MCAST_INCLUDE) { 3092 type = IGMP_MODE_IS_INCLUDE; 3093 VERIFY(inm->inm_st[1].iss_asm == 0); 3094 } 3095 } 3096 3097 /* 3098 * Generate the filter list changes using a separate function. 3099 */ 3100 if (is_filter_list_change) 3101 return (igmp_v3_enqueue_filter_change(ifq, inm)); 3102 3103 if (type == IGMP_DO_NOTHING) { 3104 IGMP_INET_PRINTF(inm->inm_addr, 3105 ("%s: nothing to do for %s/%s\n", 3106 __func__, _igmp_inet_buf, 3107 if_name(inm->inm_ifp))); 3108 return (0); 3109 } 3110 3111 /* 3112 * If any sources are present, we must be able to fit at least 3113 * one in the trailing space of the tail packet's mbuf, 3114 * ideally more. 3115 */ 3116 minrec0len = sizeof(struct igmp_grouprec); 3117 if (record_has_sources) 3118 minrec0len += sizeof(in_addr_t); 3119 3120 IGMP_INET_PRINTF(inm->inm_addr, 3121 ("%s: queueing %s for %s/%s\n", __func__, 3122 igmp_rec_type_to_str(type), _igmp_inet_buf, 3123 if_name(inm->inm_ifp))); 3124 3125 /* 3126 * Check if we have a packet in the tail of the queue for this 3127 * group into which the first group record for this group will fit. 3128 * Otherwise allocate a new packet. 3129 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT. 3130 * Note: Group records for G/GSR query responses MUST be sent 3131 * in their own packet. 3132 */ 3133 m0 = ifq->ifq_tail; 3134 if (!is_group_query && 3135 m0 != NULL && 3136 (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) && 3137 (m0->m_pkthdr.len + minrec0len) < 3138 (ifp->if_mtu - IGMP_LEADINGSPACE)) { 3139 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - 3140 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); 3141 m = m0; 3142 IGMP_PRINTF(("%s: use existing packet\n", __func__)); 3143 } else { 3144 if (IF_QFULL(ifq)) { 3145 IGMP_PRINTF(("%s: outbound queue full\n", __func__)); 3146 return (-ENOMEM); 3147 } 3148 m = NULL; 3149 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - 3150 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); 3151 if (!is_state_change && !is_group_query) { 3152 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 3153 if (m) 3154 m->m_data += IGMP_LEADINGSPACE; 3155 } 3156 if (m == NULL) { 3157 m = m_gethdr(M_DONTWAIT, MT_DATA); 3158 if (m) 3159 MH_ALIGN(m, IGMP_LEADINGSPACE); 3160 } 3161 if (m == NULL) 3162 return (-ENOMEM); 3163 3164 igmp_save_context(m, ifp); 3165 3166 IGMP_PRINTF(("%s: allocated first packet\n", __func__)); 3167 } 3168 3169 /* 3170 * Append group record. 3171 * If we have sources, we don't know how many yet. 3172 */ 3173 ig.ig_type = type; 3174 ig.ig_datalen = 0; 3175 ig.ig_numsrc = 0; 3176 ig.ig_group = inm->inm_addr; 3177 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) { 3178 if (m != m0) 3179 m_freem(m); 3180 IGMP_PRINTF(("%s: m_append() failed.\n", __func__)); 3181 return (-ENOMEM); 3182 } 3183 nbytes += sizeof(struct igmp_grouprec); 3184 3185 /* 3186 * Append as many sources as will fit in the first packet. 3187 * If we are appending to a new packet, the chain allocation 3188 * may potentially use clusters; use m_getptr() in this case. 3189 * If we are appending to an existing packet, we need to obtain 3190 * a pointer to the group record after m_append(), in case a new 3191 * mbuf was allocated. 3192 * Only append sources which are in-mode at t1. If we are 3193 * transitioning to MCAST_UNDEFINED state on the group, do not 3194 * include source entries. 3195 * Only report recorded sources in our filter set when responding 3196 * to a group-source query. 3197 */ 3198 if (record_has_sources) { 3199 if (m == m0) { 3200 md = m_last(m); 3201 pig = (struct igmp_grouprec *)(void *) 3202 (mtod(md, uint8_t *) + md->m_len - nbytes); 3203 } else { 3204 md = m_getptr(m, 0, &off); 3205 pig = (struct igmp_grouprec *)(void *) 3206 (mtod(md, uint8_t *) + off); 3207 } 3208 msrcs = 0; 3209 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) { 3210#ifdef IGMP_DEBUG 3211 char buf[MAX_IPv4_STR_LEN]; 3212 3213 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf)); 3214 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf)); 3215#endif 3216 now = ims_get_mode(inm, ims, 1); 3217 IGMP_PRINTF(("%s: node is %d\n", __func__, now)); 3218 if ((now != mode) || 3219 (now == mode && mode == MCAST_UNDEFINED)) { 3220 IGMP_PRINTF(("%s: skip node\n", __func__)); 3221 continue; 3222 } 3223 if (is_source_query && ims->ims_stp == 0) { 3224 IGMP_PRINTF(("%s: skip unrecorded node\n", 3225 __func__)); 3226 continue; 3227 } 3228 IGMP_PRINTF(("%s: append node\n", __func__)); 3229 naddr = htonl(ims->ims_haddr); 3230 if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { 3231 if (m != m0) 3232 m_freem(m); 3233 IGMP_PRINTF(("%s: m_append() failed.\n", 3234 __func__)); 3235 return (-ENOMEM); 3236 } 3237 nbytes += sizeof(in_addr_t); 3238 ++msrcs; 3239 if (msrcs == m0srcs) 3240 break; 3241 } 3242 IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__, 3243 msrcs)); 3244 ig_numsrc = htons(msrcs); 3245 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof (ig_numsrc)); 3246 nbytes += (msrcs * sizeof(in_addr_t)); 3247 } 3248 3249 if (is_source_query && msrcs == 0) { 3250 IGMP_PRINTF(("%s: no recorded sources to report\n", __func__)); 3251 if (m != m0) 3252 m_freem(m); 3253 return (0); 3254 } 3255 3256 /* 3257 * We are good to go with first packet. 3258 */ 3259 if (m != m0) { 3260 IGMP_PRINTF(("%s: enqueueing first packet\n", __func__)); 3261 m->m_pkthdr.vt_nrecs = 1; 3262 IF_ENQUEUE(ifq, m); 3263 } else { 3264 m->m_pkthdr.vt_nrecs++; 3265 } 3266 /* 3267 * No further work needed if no source list in packet(s). 3268 */ 3269 if (!record_has_sources) 3270 return (nbytes); 3271 3272 /* 3273 * Whilst sources remain to be announced, we need to allocate 3274 * a new packet and fill out as many sources as will fit. 3275 * Always try for a cluster first. 3276 */ 3277 while (nims != NULL) { 3278 if (IF_QFULL(ifq)) { 3279 IGMP_PRINTF(("%s: outbound queue full\n", __func__)); 3280 return (-ENOMEM); 3281 } 3282 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 3283 if (m) 3284 m->m_data += IGMP_LEADINGSPACE; 3285 if (m == NULL) { 3286 m = m_gethdr(M_DONTWAIT, MT_DATA); 3287 if (m) 3288 MH_ALIGN(m, IGMP_LEADINGSPACE); 3289 } 3290 if (m == NULL) 3291 return (-ENOMEM); 3292 igmp_save_context(m, ifp); 3293 md = m_getptr(m, 0, &off); 3294 pig = (struct igmp_grouprec *)(void *) 3295 (mtod(md, uint8_t *) + off); 3296 IGMP_PRINTF(("%s: allocated next packet\n", __func__)); 3297 3298 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) { 3299 if (m != m0) 3300 m_freem(m); 3301 IGMP_PRINTF(("%s: m_append() failed.\n", __func__)); 3302 return (-ENOMEM); 3303 } 3304 m->m_pkthdr.vt_nrecs = 1; 3305 nbytes += sizeof(struct igmp_grouprec); 3306 3307 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - 3308 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); 3309 3310 msrcs = 0; 3311 RB_FOREACH_FROM(ims, ip_msource_tree, nims) { 3312#ifdef IGMP_DEBUG 3313 char buf[MAX_IPv4_STR_LEN]; 3314 3315 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf)); 3316 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf)); 3317#endif 3318 now = ims_get_mode(inm, ims, 1); 3319 if ((now != mode) || 3320 (now == mode && mode == MCAST_UNDEFINED)) { 3321 IGMP_PRINTF(("%s: skip node\n", __func__)); 3322 continue; 3323 } 3324 if (is_source_query && ims->ims_stp == 0) { 3325 IGMP_PRINTF(("%s: skip unrecorded node\n", 3326 __func__)); 3327 continue; 3328 } 3329 IGMP_PRINTF(("%s: append node\n", __func__)); 3330 naddr = htonl(ims->ims_haddr); 3331 if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { 3332 if (m != m0) 3333 m_freem(m); 3334 IGMP_PRINTF(("%s: m_append() failed.\n", 3335 __func__)); 3336 return (-ENOMEM); 3337 } 3338 ++msrcs; 3339 if (msrcs == m0srcs) 3340 break; 3341 } 3342 ig_numsrc = htons(msrcs); 3343 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof (ig_numsrc)); 3344 nbytes += (msrcs * sizeof(in_addr_t)); 3345 3346 IGMP_PRINTF(("%s: enqueueing next packet\n", __func__)); 3347 IF_ENQUEUE(ifq, m); 3348 } 3349 3350 return (nbytes); 3351} 3352 3353/* 3354 * Type used to mark record pass completion. 3355 * We exploit the fact we can cast to this easily from the 3356 * current filter modes on each ip_msource node. 3357 */ 3358typedef enum { 3359 REC_NONE = 0x00, /* MCAST_UNDEFINED */ 3360 REC_ALLOW = 0x01, /* MCAST_INCLUDE */ 3361 REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ 3362 REC_FULL = REC_ALLOW | REC_BLOCK 3363} rectype_t; 3364 3365/* 3366 * Enqueue an IGMPv3 filter list change to the given output queue. 3367 * 3368 * Source list filter state is held in an RB-tree. When the filter list 3369 * for a group is changed without changing its mode, we need to compute 3370 * the deltas between T0 and T1 for each source in the filter set, 3371 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. 3372 * 3373 * As we may potentially queue two record types, and the entire R-B tree 3374 * needs to be walked at once, we break this out into its own function 3375 * so we can generate a tightly packed queue of packets. 3376 * 3377 * XXX This could be written to only use one tree walk, although that makes 3378 * serializing into the mbuf chains a bit harder. For now we do two walks 3379 * which makes things easier on us, and it may or may not be harder on 3380 * the L2 cache. 3381 * 3382 * If successful the size of all data appended to the queue is returned, 3383 * otherwise an error code less than zero is returned, or zero if 3384 * no record(s) were appended. 3385 */ 3386static int 3387igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm) 3388{ 3389 static const int MINRECLEN = 3390 sizeof(struct igmp_grouprec) + sizeof(in_addr_t); 3391 struct ifnet *ifp; 3392 struct igmp_grouprec ig; 3393 struct igmp_grouprec *pig; 3394 struct ip_msource *ims, *nims; 3395 struct mbuf *m, *m0, *md; 3396 in_addr_t naddr; 3397 int m0srcs, nbytes, npbytes, off, rsrcs, schanged; 3398 int nallow, nblock; 3399 uint8_t mode, now, then; 3400 rectype_t crt, drt, nrt; 3401 u_int16_t ig_numsrc; 3402 3403 INM_LOCK_ASSERT_HELD(inm); 3404 3405 if (inm->inm_nsrc == 0 || 3406 (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) 3407 return (0); 3408 3409 ifp = inm->inm_ifp; /* interface */ 3410 mode = inm->inm_st[1].iss_fmode; /* filter mode at t1 */ 3411 crt = REC_NONE; /* current group record type */ 3412 drt = REC_NONE; /* mask of completed group record types */ 3413 nrt = REC_NONE; /* record type for current node */ 3414 m0srcs = 0; /* # source which will fit in current mbuf chain */ 3415 nbytes = 0; /* # of bytes appended to group's state-change queue */ 3416 npbytes = 0; /* # of bytes appended this packet */ 3417 rsrcs = 0; /* # sources encoded in current record */ 3418 schanged = 0; /* # nodes encoded in overall filter change */ 3419 nallow = 0; /* # of source entries in ALLOW_NEW */ 3420 nblock = 0; /* # of source entries in BLOCK_OLD */ 3421 nims = NULL; /* next tree node pointer */ 3422 3423 /* 3424 * For each possible filter record mode. 3425 * The first kind of source we encounter tells us which 3426 * is the first kind of record we start appending. 3427 * If a node transitioned to UNDEFINED at t1, its mode is treated 3428 * as the inverse of the group's filter mode. 3429 */ 3430 while (drt != REC_FULL) { 3431 do { 3432 m0 = ifq->ifq_tail; 3433 if (m0 != NULL && 3434 (m0->m_pkthdr.vt_nrecs + 1 <= 3435 IGMP_V3_REPORT_MAXRECS) && 3436 (m0->m_pkthdr.len + MINRECLEN) < 3437 (ifp->if_mtu - IGMP_LEADINGSPACE)) { 3438 m = m0; 3439 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - 3440 sizeof(struct igmp_grouprec)) / 3441 sizeof(in_addr_t); 3442 IGMP_PRINTF(("%s: use previous packet\n", 3443 __func__)); 3444 } else { 3445 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 3446 if (m) 3447 m->m_data += IGMP_LEADINGSPACE; 3448 if (m == NULL) { 3449 m = m_gethdr(M_DONTWAIT, MT_DATA); 3450 if (m) 3451 MH_ALIGN(m, IGMP_LEADINGSPACE); 3452 } 3453 if (m == NULL) { 3454 IGMP_PRINTF(("%s: m_get*() failed\n", 3455 __func__)); 3456 return (-ENOMEM); 3457 } 3458 m->m_pkthdr.vt_nrecs = 0; 3459 igmp_save_context(m, ifp); 3460 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - 3461 sizeof(struct igmp_grouprec)) / 3462 sizeof(in_addr_t); 3463 npbytes = 0; 3464 IGMP_PRINTF(("%s: allocated new packet\n", 3465 __func__)); 3466 } 3467 /* 3468 * Append the IGMP group record header to the 3469 * current packet's data area. 3470 * Recalculate pointer to free space for next 3471 * group record, in case m_append() allocated 3472 * a new mbuf or cluster. 3473 */ 3474 memset(&ig, 0, sizeof(ig)); 3475 ig.ig_group = inm->inm_addr; 3476 if (!m_append(m, sizeof(ig), (void *)&ig)) { 3477 if (m != m0) 3478 m_freem(m); 3479 IGMP_PRINTF(("%s: m_append() failed\n", 3480 __func__)); 3481 return (-ENOMEM); 3482 } 3483 npbytes += sizeof(struct igmp_grouprec); 3484 if (m != m0) { 3485 /* new packet; offset in c hain */ 3486 md = m_getptr(m, npbytes - 3487 sizeof(struct igmp_grouprec), &off); 3488 pig = (struct igmp_grouprec *)(void *)(mtod(md, 3489 uint8_t *) + off); 3490 } else { 3491 /* current packet; offset from last append */ 3492 md = m_last(m); 3493 pig = (struct igmp_grouprec *)(void *)(mtod(md, 3494 uint8_t *) + md->m_len - 3495 sizeof(struct igmp_grouprec)); 3496 } 3497 /* 3498 * Begin walking the tree for this record type 3499 * pass, or continue from where we left off 3500 * previously if we had to allocate a new packet. 3501 * Only report deltas in-mode at t1. 3502 * We need not report included sources as allowed 3503 * if we are in inclusive mode on the group, 3504 * however the converse is not true. 3505 */ 3506 rsrcs = 0; 3507 if (nims == NULL) 3508 nims = RB_MIN(ip_msource_tree, &inm->inm_srcs); 3509 RB_FOREACH_FROM(ims, ip_msource_tree, nims) { 3510#ifdef IGMP_DEBUG 3511 char buf[MAX_IPv4_STR_LEN]; 3512 3513 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf)); 3514 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf)); 3515#endif 3516 now = ims_get_mode(inm, ims, 1); 3517 then = ims_get_mode(inm, ims, 0); 3518 IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n", 3519 __func__, then, now)); 3520 if (now == then) { 3521 IGMP_PRINTF(("%s: skip unchanged\n", 3522 __func__)); 3523 continue; 3524 } 3525 if (mode == MCAST_EXCLUDE && 3526 now == MCAST_INCLUDE) { 3527 IGMP_PRINTF(("%s: skip IN src on EX " 3528 "group\n", __func__)); 3529 continue; 3530 } 3531 nrt = (rectype_t)now; 3532 if (nrt == REC_NONE) 3533 nrt = (rectype_t)(~mode & REC_FULL); 3534 if (schanged++ == 0) { 3535 crt = nrt; 3536 } else if (crt != nrt) 3537 continue; 3538 naddr = htonl(ims->ims_haddr); 3539 if (!m_append(m, sizeof(in_addr_t), 3540 (void *)&naddr)) { 3541 if (m != m0) 3542 m_freem(m); 3543 IGMP_PRINTF(("%s: m_append() failed\n", 3544 __func__)); 3545 return (-ENOMEM); 3546 } 3547 nallow += !!(crt == REC_ALLOW); 3548 nblock += !!(crt == REC_BLOCK); 3549 if (++rsrcs == m0srcs) 3550 break; 3551 } 3552 /* 3553 * If we did not append any tree nodes on this 3554 * pass, back out of allocations. 3555 */ 3556 if (rsrcs == 0) { 3557 npbytes -= sizeof(struct igmp_grouprec); 3558 if (m != m0) { 3559 IGMP_PRINTF(("%s: m_free(m)\n", 3560 __func__)); 3561 m_freem(m); 3562 } else { 3563 IGMP_PRINTF(("%s: m_adj(m, -ig)\n", 3564 __func__)); 3565 m_adj(m, -((int)sizeof( 3566 struct igmp_grouprec))); 3567 } 3568 continue; 3569 } 3570 npbytes += (rsrcs * sizeof(in_addr_t)); 3571 if (crt == REC_ALLOW) 3572 pig->ig_type = IGMP_ALLOW_NEW_SOURCES; 3573 else if (crt == REC_BLOCK) 3574 pig->ig_type = IGMP_BLOCK_OLD_SOURCES; 3575 ig_numsrc = htons(rsrcs); 3576 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof (ig_numsrc)); 3577 /* 3578 * Count the new group record, and enqueue this 3579 * packet if it wasn't already queued. 3580 */ 3581 m->m_pkthdr.vt_nrecs++; 3582 if (m != m0) 3583 IF_ENQUEUE(ifq, m); 3584 nbytes += npbytes; 3585 } while (nims != NULL); 3586 drt |= crt; 3587 crt = (~crt & REC_FULL); 3588 } 3589 3590 IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__, 3591 nallow, nblock)); 3592 3593 return (nbytes); 3594} 3595 3596static int 3597igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq) 3598{ 3599 struct ifqueue *gq; 3600 struct mbuf *m; /* pending state-change */ 3601 struct mbuf *m0; /* copy of pending state-change */ 3602 struct mbuf *mt; /* last state-change in packet */ 3603 struct mbuf *n; 3604 int docopy, domerge; 3605 u_int recslen; 3606 3607 INM_LOCK_ASSERT_HELD(inm); 3608 3609 docopy = 0; 3610 domerge = 0; 3611 recslen = 0; 3612 3613 /* 3614 * If there are further pending retransmissions, make a writable 3615 * copy of each queued state-change message before merging. 3616 */ 3617 if (inm->inm_scrv > 0) 3618 docopy = 1; 3619 3620 gq = &inm->inm_scq; 3621#ifdef IGMP_DEBUG 3622 if (gq->ifq_head == NULL) { 3623 IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n", 3624 __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm))); 3625 } 3626#endif 3627 3628 /* 3629 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the 3630 * packet might not always be at the head of the ifqueue. 3631 */ 3632 m = gq->ifq_head; 3633 while (m != NULL) { 3634 /* 3635 * Only merge the report into the current packet if 3636 * there is sufficient space to do so; an IGMPv3 report 3637 * packet may only contain 65,535 group records. 3638 * Always use a simple mbuf chain concatentation to do this, 3639 * as large state changes for single groups may have 3640 * allocated clusters. 3641 */ 3642 domerge = 0; 3643 mt = ifscq->ifq_tail; 3644 if (mt != NULL) { 3645 recslen = m_length(m); 3646 3647 if ((mt->m_pkthdr.vt_nrecs + 3648 m->m_pkthdr.vt_nrecs <= 3649 IGMP_V3_REPORT_MAXRECS) && 3650 (mt->m_pkthdr.len + recslen <= 3651 (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) 3652 domerge = 1; 3653 } 3654 3655 if (!domerge && IF_QFULL(gq)) { 3656 IGMP_PRINTF(("%s: outbound queue full, skipping whole " 3657 "packet 0x%llx\n", __func__, 3658 (uint64_t)VM_KERNEL_ADDRPERM(m))); 3659 n = m->m_nextpkt; 3660 if (!docopy) { 3661 IF_REMQUEUE(gq, m); 3662 m_freem(m); 3663 } 3664 m = n; 3665 continue; 3666 } 3667 3668 if (!docopy) { 3669 IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__, 3670 (uint64_t)VM_KERNEL_ADDRPERM(m))); 3671 n = m->m_nextpkt; 3672 IF_REMQUEUE(gq, m); 3673 m0 = m; 3674 m = n; 3675 } else { 3676 IGMP_PRINTF(("%s: copying 0x%llx\n", __func__, 3677 (uint64_t)VM_KERNEL_ADDRPERM(m))); 3678 m0 = m_dup(m, M_NOWAIT); 3679 if (m0 == NULL) 3680 return (ENOMEM); 3681 m0->m_nextpkt = NULL; 3682 m = m->m_nextpkt; 3683 } 3684 3685 if (!domerge) { 3686 IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n", 3687 __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0), 3688 (uint64_t)VM_KERNEL_ADDRPERM(ifscq))); 3689 IF_ENQUEUE(ifscq, m0); 3690 } else { 3691 struct mbuf *mtl; /* last mbuf of packet mt */ 3692 3693 IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail " 3694 "0x%llx)\n", __func__, 3695 (uint64_t)VM_KERNEL_ADDRPERM(m0), 3696 (uint64_t)VM_KERNEL_ADDRPERM(mt))); 3697 3698 mtl = m_last(mt); 3699 m0->m_flags &= ~M_PKTHDR; 3700 mt->m_pkthdr.len += recslen; 3701 mt->m_pkthdr.vt_nrecs += 3702 m0->m_pkthdr.vt_nrecs; 3703 3704 mtl->m_next = m0; 3705 } 3706 } 3707 3708 return (0); 3709} 3710 3711/* 3712 * Respond to a pending IGMPv3 General Query. 3713 */ 3714static uint32_t 3715igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi) 3716{ 3717 struct ifnet *ifp; 3718 struct in_multi *inm; 3719 struct in_multistep step; 3720 int retval, loop; 3721 3722 IGI_LOCK_ASSERT_HELD(igi); 3723 3724 VERIFY(igi->igi_version == IGMP_VERSION_3); 3725 3726 ifp = igi->igi_ifp; 3727 IGI_UNLOCK(igi); 3728 3729 in_multihead_lock_shared(); 3730 IN_FIRST_MULTI(step, inm); 3731 while (inm != NULL) { 3732 INM_LOCK(inm); 3733 if (inm->inm_ifp != ifp) 3734 goto next; 3735 3736 switch (inm->inm_state) { 3737 case IGMP_NOT_MEMBER: 3738 case IGMP_SILENT_MEMBER: 3739 break; 3740 case IGMP_REPORTING_MEMBER: 3741 case IGMP_IDLE_MEMBER: 3742 case IGMP_LAZY_MEMBER: 3743 case IGMP_SLEEPING_MEMBER: 3744 case IGMP_AWAKENING_MEMBER: 3745 inm->inm_state = IGMP_REPORTING_MEMBER; 3746 IGI_LOCK(igi); 3747 retval = igmp_v3_enqueue_group_record(&igi->igi_gq, 3748 inm, 0, 0, 0); 3749 IGI_UNLOCK(igi); 3750 IGMP_PRINTF(("%s: enqueue record = %d\n", 3751 __func__, retval)); 3752 break; 3753 case IGMP_G_QUERY_PENDING_MEMBER: 3754 case IGMP_SG_QUERY_PENDING_MEMBER: 3755 case IGMP_LEAVING_MEMBER: 3756 break; 3757 } 3758next: 3759 INM_UNLOCK(inm); 3760 IN_NEXT_MULTI(step, inm); 3761 } 3762 in_multihead_lock_done(); 3763 3764 IGI_LOCK(igi); 3765 loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; 3766 igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST, 3767 loop); 3768 IGI_LOCK_ASSERT_HELD(igi); 3769 /* 3770 * Slew transmission of bursts over 1 second intervals. 3771 */ 3772 if (igi->igi_gq.ifq_head != NULL) { 3773 igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY( 3774 IGMP_RESPONSE_BURST_INTERVAL); 3775 } 3776 3777 return (igi->igi_v3_timer); 3778} 3779 3780/* 3781 * Transmit the next pending IGMP message in the output queue. 3782 * 3783 * Must not be called with inm_lock or igi_lock held. 3784 */ 3785static void 3786igmp_sendpkt(struct mbuf *m) 3787{ 3788 struct ip_moptions *imo; 3789 struct mbuf *ipopts, *m0; 3790 int error; 3791 struct route ro; 3792 struct ifnet *ifp; 3793 3794 IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__, 3795 (uint64_t)VM_KERNEL_ADDRPERM(m))); 3796 3797 ifp = igmp_restore_context(m); 3798 /* 3799 * Check if the ifnet is still attached. 3800 */ 3801 if (ifp == NULL || !ifnet_is_attached(ifp, 0)) { 3802 IGMP_PRINTF(("%s: dropped 0x%llx as ifp went away.\n", 3803 __func__, (uint64_t)VM_KERNEL_ADDRPERM(m))); 3804 m_freem(m); 3805 OSAddAtomic(1, &ipstat.ips_noroute); 3806 return; 3807 } 3808 3809 ipopts = igmp_sendra ? m_raopt : NULL; 3810 3811 imo = ip_allocmoptions(M_WAITOK); 3812 if (imo == NULL) { 3813 m_freem(m); 3814 return; 3815 } 3816 3817 imo->imo_multicast_ttl = 1; 3818 imo->imo_multicast_vif = -1; 3819 imo->imo_multicast_loop = 0; 3820 3821 /* 3822 * If the user requested that IGMP traffic be explicitly 3823 * redirected to the loopback interface (e.g. they are running a 3824 * MANET interface and the routing protocol needs to see the 3825 * updates), handle this now. 3826 */ 3827 if (m->m_flags & M_IGMP_LOOP) 3828 imo->imo_multicast_ifp = lo_ifp; 3829 else 3830 imo->imo_multicast_ifp = ifp; 3831 3832 if (m->m_flags & M_IGMPV2) { 3833 m0 = m; 3834 } else { 3835 m0 = igmp_v3_encap_report(ifp, m); 3836 if (m0 == NULL) { 3837 /* 3838 * If igmp_v3_encap_report() failed, then M_PREPEND() 3839 * already freed the original mbuf chain. 3840 * This means that we don't have to m_freem(m) here. 3841 */ 3842 IGMP_PRINTF(("%s: dropped 0x%llx\n", __func__, 3843 (uint64_t)VM_KERNEL_ADDRPERM(m))); 3844 IMO_REMREF(imo); 3845 atomic_add_32(&ipstat.ips_odropped, 1); 3846 return; 3847 } 3848 } 3849 3850 igmp_scrub_context(m0); 3851 m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP); 3852 m0->m_pkthdr.rcvif = lo_ifp; 3853#ifdef MAC 3854 mac_netinet_igmp_send(ifp, m0); 3855#endif 3856 3857 if (ifp->if_eflags & IFEF_TXSTART) { 3858 /* 3859 * Use control service class if the interface supports 3860 * transmit-start model. 3861 */ 3862 (void) m_set_service_class(m0, MBUF_SC_CTL); 3863 } 3864 bzero(&ro, sizeof (ro)); 3865 error = ip_output(m0, ipopts, &ro, 0, imo, NULL); 3866 ROUTE_RELEASE(&ro); 3867 3868 IMO_REMREF(imo); 3869 3870 if (error) { 3871 IGMP_PRINTF(("%s: ip_output(0x%llx) = %d\n", __func__, 3872 (uint64_t)VM_KERNEL_ADDRPERM(m0), error)); 3873 return; 3874 } 3875 3876 IGMPSTAT_INC(igps_snd_reports); 3877 OIGMPSTAT_INC(igps_snd_reports); 3878} 3879/* 3880 * Encapsulate an IGMPv3 report. 3881 * 3882 * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf 3883 * chain has already had its IP/IGMPv3 header prepended. In this case 3884 * the function will not attempt to prepend; the lengths and checksums 3885 * will however be re-computed. 3886 * 3887 * Returns a pointer to the new mbuf chain head, or NULL if the 3888 * allocation failed. 3889 */ 3890static struct mbuf * 3891igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m) 3892{ 3893 struct igmp_report *igmp; 3894 struct ip *ip; 3895 int hdrlen, igmpreclen; 3896 3897 VERIFY((m->m_flags & M_PKTHDR)); 3898 3899 igmpreclen = m_length(m); 3900 hdrlen = sizeof(struct ip) + sizeof(struct igmp_report); 3901 3902 if (m->m_flags & M_IGMPV3_HDR) { 3903 igmpreclen -= hdrlen; 3904 } else { 3905 M_PREPEND(m, hdrlen, M_DONTWAIT); 3906 if (m == NULL) 3907 return (NULL); 3908 m->m_flags |= M_IGMPV3_HDR; 3909 } 3910 3911 IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen)); 3912 3913 m->m_data += sizeof(struct ip); 3914 m->m_len -= sizeof(struct ip); 3915 3916 igmp = mtod(m, struct igmp_report *); 3917 igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT; 3918 igmp->ir_rsv1 = 0; 3919 igmp->ir_rsv2 = 0; 3920 igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs); 3921 igmp->ir_cksum = 0; 3922 igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen); 3923 m->m_pkthdr.vt_nrecs = 0; 3924 3925 m->m_data -= sizeof(struct ip); 3926 m->m_len += sizeof(struct ip); 3927 3928 ip = mtod(m, struct ip *); 3929 ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; 3930 ip->ip_len = hdrlen + igmpreclen; 3931 ip->ip_off = IP_DF; 3932 ip->ip_p = IPPROTO_IGMP; 3933 ip->ip_sum = 0; 3934 3935 ip->ip_src.s_addr = INADDR_ANY; 3936 3937 if (m->m_flags & M_IGMP_LOOP) { 3938 struct in_ifaddr *ia; 3939 3940 IFP_TO_IA(ifp, ia); 3941 if (ia != NULL) { 3942 IFA_LOCK(&ia->ia_ifa); 3943 ip->ip_src = ia->ia_addr.sin_addr; 3944 IFA_UNLOCK(&ia->ia_ifa); 3945 IFA_REMREF(&ia->ia_ifa); 3946 } 3947 } 3948 3949 ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP); 3950 3951 return (m); 3952} 3953 3954#ifdef IGMP_DEBUG 3955static const char * 3956igmp_rec_type_to_str(const int type) 3957{ 3958 switch (type) { 3959 case IGMP_CHANGE_TO_EXCLUDE_MODE: 3960 return "TO_EX"; 3961 break; 3962 case IGMP_CHANGE_TO_INCLUDE_MODE: 3963 return "TO_IN"; 3964 break; 3965 case IGMP_MODE_IS_EXCLUDE: 3966 return "MODE_EX"; 3967 break; 3968 case IGMP_MODE_IS_INCLUDE: 3969 return "MODE_IN"; 3970 break; 3971 case IGMP_ALLOW_NEW_SOURCES: 3972 return "ALLOW_NEW"; 3973 break; 3974 case IGMP_BLOCK_OLD_SOURCES: 3975 return "BLOCK_OLD"; 3976 break; 3977 default: 3978 break; 3979 } 3980 return "unknown"; 3981} 3982#endif 3983 3984void 3985igmp_init(struct protosw *pp, struct domain *dp) 3986{ 3987#pragma unused(dp) 3988 static int igmp_initialized = 0; 3989 3990 VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); 3991 3992 if (igmp_initialized) 3993 return; 3994 igmp_initialized = 1; 3995 3996 IGMP_PRINTF(("%s: initializing\n", __func__)); 3997 3998 igmp_timers_are_running = 0; 3999 4000 /* Setup lock group and attribute for igmp_mtx */ 4001 igmp_mtx_grp_attr = lck_grp_attr_alloc_init(); 4002 igmp_mtx_grp = lck_grp_alloc_init("igmp_mtx", igmp_mtx_grp_attr); 4003 igmp_mtx_attr = lck_attr_alloc_init(); 4004 lck_mtx_init(&igmp_mtx, igmp_mtx_grp, igmp_mtx_attr); 4005 4006 LIST_INIT(&igi_head); 4007 m_raopt = igmp_ra_alloc(); 4008 4009 igi_size = sizeof (struct igmp_ifinfo); 4010 igi_zone = zinit(igi_size, IGI_ZONE_MAX * igi_size, 4011 0, IGI_ZONE_NAME); 4012 if (igi_zone == NULL) { 4013 panic("%s: failed allocating %s", __func__, IGI_ZONE_NAME); 4014 /* NOTREACHED */ 4015 } 4016 zone_change(igi_zone, Z_EXPAND, TRUE); 4017 zone_change(igi_zone, Z_CALLERACCT, FALSE); 4018} 4019