1/*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Bruce Simpson. 5 * Copyright (c) 2005 Robert N. M. Watson. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33/* 34 * IPv4 multicast socket, group, and socket option processing module. 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD$"); 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/malloc.h> 45#include <sys/mbuf.h> 46#include <sys/protosw.h> 47#include <sys/rmlock.h> 48#include <sys/socket.h> 49#include <sys/socketvar.h> 50#include <sys/protosw.h> 51#include <sys/sysctl.h> 52#include <sys/ktr.h> 53#include <sys/taskqueue.h> 54#include <sys/tree.h> 55 56#include <net/if.h> 57#include <net/if_var.h> 58#include <net/if_dl.h> 59#include <net/route.h> 60#include <net/vnet.h> 61 62#include <net/ethernet.h> 63 64#include <netinet/in.h> 65#include <netinet/in_systm.h> 66#include <netinet/in_fib.h> 67#include <netinet/in_pcb.h> 68#include <netinet/in_var.h> 69#include <netinet/ip_var.h> 70#include <netinet/igmp_var.h> 71 72#ifndef KTR_IGMPV3 73#define KTR_IGMPV3 KTR_INET 74#endif 75 76#ifndef __SOCKUNION_DECLARED 77union sockunion { 78 struct sockaddr_storage ss; 79 struct sockaddr sa; 80 struct sockaddr_dl sdl; 81 struct sockaddr_in sin; 82}; 83typedef union sockunion sockunion_t; 84#define __SOCKUNION_DECLARED 85#endif /* __SOCKUNION_DECLARED */ 86 87static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 88 "IPv4 multicast PCB-layer source filter"); 89static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 90static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 91static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 92 "IPv4 multicast IGMP-layer source filter"); 93 94/* 95 * Locking: 96 * 97 * - Lock order is: Giant, IN_MULTI_LOCK, INP_WLOCK, 98 * IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 99 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 100 * it can be taken by code in net/if.c also. 101 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 102 * 103 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly 104 * any need for in_multi itself to be virtualized -- it is bound to an ifp 105 * anyway no matter what happens. 106 */ 107struct mtx in_multi_list_mtx; 108MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF); 109 110struct mtx in_multi_free_mtx; 111MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF); 112 113struct sx in_multi_sx; 114SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx"); 115 116int ifma_restart; 117 118/* 119 * Functions with non-static linkage defined in this file should be 120 * declared in in_var.h: 121 * imo_multi_filter() 122 * in_addmulti() 123 * in_delmulti() 124 * in_joingroup() 125 * in_joingroup_locked() 126 * in_leavegroup() 127 * in_leavegroup_locked() 128 * and ip_var.h: 129 * inp_freemoptions() 130 * inp_getmoptions() 131 * inp_setmoptions() 132 * 133 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 134 * and in_delmulti(). 135 */ 136static void imf_commit(struct in_mfilter *); 137static int imf_get_source(struct in_mfilter *imf, 138 const struct sockaddr_in *psin, 139 struct in_msource **); 140static struct in_msource * 141 imf_graft(struct in_mfilter *, const uint8_t, 142 const struct sockaddr_in *); 143static void imf_leave(struct in_mfilter *); 144static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 145static void imf_purge(struct in_mfilter *); 146static void imf_rollback(struct in_mfilter *); 147static void imf_reap(struct in_mfilter *); 148static struct in_mfilter * 149 imo_match_group(const struct ip_moptions *, 150 const struct ifnet *, const struct sockaddr *); 151static struct in_msource * 152 imo_match_source(struct in_mfilter *, const struct sockaddr *); 153static void ims_merge(struct ip_msource *ims, 154 const struct in_msource *lims, const int rollback); 155static int in_getmulti(struct ifnet *, const struct in_addr *, 156 struct in_multi **); 157static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 158 const int noalloc, struct ip_msource **pims); 159#ifdef KTR 160static int inm_is_ifp_detached(const struct in_multi *); 161#endif 162static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 163static void inm_purge(struct in_multi *); 164static void inm_reap(struct in_multi *); 165static void inm_release(struct in_multi *); 166static struct ip_moptions * 167 inp_findmoptions(struct inpcb *); 168static int inp_get_source_filters(struct inpcb *, struct sockopt *); 169static int inp_join_group(struct inpcb *, struct sockopt *); 170static int inp_leave_group(struct inpcb *, struct sockopt *); 171static struct ifnet * 172 inp_lookup_mcast_ifp(const struct inpcb *, 173 const struct sockaddr_in *, const struct in_addr); 174static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 175static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 176static int inp_set_source_filters(struct inpcb *, struct sockopt *); 177static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 178 179static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 180 "IPv4 multicast"); 181 182static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 183SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 184 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 185 "Max source filters per group"); 186 187static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 188SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 189 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 190 "Max source filters per socket"); 191 192int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 193SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 194 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 195 196static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 197 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 198 "Per-interface stack-wide source filters"); 199 200#ifdef KTR 201/* 202 * Inline function which wraps assertions for a valid ifp. 203 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 204 * is detached. 205 */ 206static int __inline 207inm_is_ifp_detached(const struct in_multi *inm) 208{ 209 struct ifnet *ifp; 210 211 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 212 ifp = inm->inm_ifma->ifma_ifp; 213 if (ifp != NULL) { 214 /* 215 * Sanity check that netinet's notion of ifp is the 216 * same as net's. 217 */ 218 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 219 } 220 221 return (ifp == NULL); 222} 223#endif 224 225/* 226 * Interface detach can happen in a taskqueue thread context, so we must use a 227 * dedicated thread to avoid deadlocks when draining inm_release tasks. 228 */ 229TASKQUEUE_DEFINE_THREAD(inm_free); 230static struct in_multi_head inm_free_list = SLIST_HEAD_INITIALIZER(); 231static void inm_release_task(void *arg __unused, int pending __unused); 232static struct task inm_free_task = TASK_INITIALIZER(0, inm_release_task, NULL); 233 234void 235inm_release_wait(void *arg __unused) 236{ 237 238 /* 239 * Make sure all pending multicast addresses are freed before 240 * the VNET or network device is destroyed: 241 */ 242 taskqueue_drain(taskqueue_inm_free, &inm_free_task); 243} 244#ifdef VIMAGE 245VNET_SYSUNINIT(inm_release_wait, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, inm_release_wait, NULL); 246#endif 247 248void 249inm_release_list_deferred(struct in_multi_head *inmh) 250{ 251 252 if (SLIST_EMPTY(inmh)) 253 return; 254 mtx_lock(&in_multi_free_mtx); 255 SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele); 256 mtx_unlock(&in_multi_free_mtx); 257 taskqueue_enqueue(taskqueue_inm_free, &inm_free_task); 258} 259 260void 261inm_disconnect(struct in_multi *inm) 262{ 263 struct ifnet *ifp; 264 struct ifmultiaddr *ifma, *ll_ifma; 265 266 ifp = inm->inm_ifp; 267 IF_ADDR_WLOCK_ASSERT(ifp); 268 ifma = inm->inm_ifma; 269 270 if_ref(ifp); 271 if (ifma->ifma_flags & IFMA_F_ENQUEUED) { 272 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 273 ifma->ifma_flags &= ~IFMA_F_ENQUEUED; 274 } 275 MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); 276 if ((ll_ifma = ifma->ifma_llifma) != NULL) { 277 MPASS(ifma != ll_ifma); 278 ifma->ifma_llifma = NULL; 279 MPASS(ll_ifma->ifma_llifma == NULL); 280 MPASS(ll_ifma->ifma_ifp == ifp); 281 if (--ll_ifma->ifma_refcount == 0) { 282 if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { 283 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); 284 ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; 285 } 286 MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); 287 if_freemulti(ll_ifma); 288 ifma_restart = true; 289 } 290 } 291} 292 293void 294inm_release_deferred(struct in_multi *inm) 295{ 296 struct in_multi_head tmp; 297 298 IN_MULTI_LIST_LOCK_ASSERT(); 299 MPASS(inm->inm_refcount > 0); 300 if (--inm->inm_refcount == 0) { 301 SLIST_INIT(&tmp); 302 inm_disconnect(inm); 303 inm->inm_ifma->ifma_protospec = NULL; 304 SLIST_INSERT_HEAD(&tmp, inm, inm_nrele); 305 inm_release_list_deferred(&tmp); 306 } 307} 308 309static void 310inm_release_task(void *arg __unused, int pending __unused) 311{ 312 struct in_multi_head inm_free_tmp; 313 struct in_multi *inm, *tinm; 314 315 SLIST_INIT(&inm_free_tmp); 316 mtx_lock(&in_multi_free_mtx); 317 SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele); 318 mtx_unlock(&in_multi_free_mtx); 319 IN_MULTI_LOCK(); 320 SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) { 321 SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele); 322 MPASS(inm); 323 inm_release(inm); 324 } 325 IN_MULTI_UNLOCK(); 326} 327 328/* 329 * Initialize an in_mfilter structure to a known state at t0, t1 330 * with an empty source filter list. 331 */ 332static __inline void 333imf_init(struct in_mfilter *imf, const int st0, const int st1) 334{ 335 memset(imf, 0, sizeof(struct in_mfilter)); 336 RB_INIT(&imf->imf_sources); 337 imf->imf_st[0] = st0; 338 imf->imf_st[1] = st1; 339} 340 341struct in_mfilter * 342ip_mfilter_alloc(const int mflags, const int st0, const int st1) 343{ 344 struct in_mfilter *imf; 345 346 imf = malloc(sizeof(*imf), M_INMFILTER, mflags); 347 if (imf != NULL) 348 imf_init(imf, st0, st1); 349 350 return (imf); 351} 352 353void 354ip_mfilter_free(struct in_mfilter *imf) 355{ 356 357 imf_purge(imf); 358 free(imf, M_INMFILTER); 359} 360 361/* 362 * Function for looking up an in_multi record for an IPv4 multicast address 363 * on a given interface. ifp must be valid. If no record found, return NULL. 364 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held. 365 */ 366struct in_multi * 367inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 368{ 369 struct ifmultiaddr *ifma; 370 struct in_multi *inm; 371 372 IN_MULTI_LIST_LOCK_ASSERT(); 373 IF_ADDR_LOCK_ASSERT(ifp); 374 375 inm = NULL; 376 CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 377 if (ifma->ifma_addr->sa_family != AF_INET || 378 ifma->ifma_protospec == NULL) 379 continue; 380 inm = (struct in_multi *)ifma->ifma_protospec; 381 if (inm->inm_addr.s_addr == ina.s_addr) 382 break; 383 inm = NULL; 384 } 385 return (inm); 386} 387 388/* 389 * Wrapper for inm_lookup_locked(). 390 * The IF_ADDR_LOCK will be taken on ifp and released on return. 391 */ 392struct in_multi * 393inm_lookup(struct ifnet *ifp, const struct in_addr ina) 394{ 395 struct in_multi *inm; 396 397 IN_MULTI_LIST_LOCK_ASSERT(); 398 IF_ADDR_RLOCK(ifp); 399 inm = inm_lookup_locked(ifp, ina); 400 IF_ADDR_RUNLOCK(ifp); 401 402 return (inm); 403} 404 405/* 406 * Find an IPv4 multicast group entry for this ip_moptions instance 407 * which matches the specified group, and optionally an interface. 408 * Return its index into the array, or -1 if not found. 409 */ 410static struct in_mfilter * 411imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 412 const struct sockaddr *group) 413{ 414 const struct sockaddr_in *gsin; 415 struct in_mfilter *imf; 416 struct in_multi *inm; 417 418 gsin = (const struct sockaddr_in *)group; 419 420 IP_MFILTER_FOREACH(imf, &imo->imo_head) { 421 inm = imf->imf_inm; 422 if (inm == NULL) 423 continue; 424 if ((ifp == NULL || (inm->inm_ifp == ifp)) && 425 in_hosteq(inm->inm_addr, gsin->sin_addr)) { 426 break; 427 } 428 } 429 return (imf); 430} 431 432/* 433 * Find an IPv4 multicast source entry for this imo which matches 434 * the given group index for this socket, and source address. 435 * 436 * NOTE: This does not check if the entry is in-mode, merely if 437 * it exists, which may not be the desired behaviour. 438 */ 439static struct in_msource * 440imo_match_source(struct in_mfilter *imf, const struct sockaddr *src) 441{ 442 struct ip_msource find; 443 struct ip_msource *ims; 444 const sockunion_t *psa; 445 446 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 447 448 /* Source trees are keyed in host byte order. */ 449 psa = (const sockunion_t *)src; 450 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 451 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 452 453 return ((struct in_msource *)ims); 454} 455 456/* 457 * Perform filtering for multicast datagrams on a socket by group and source. 458 * 459 * Returns 0 if a datagram should be allowed through, or various error codes 460 * if the socket was not a member of the group, or the source was muted, etc. 461 */ 462int 463imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 464 const struct sockaddr *group, const struct sockaddr *src) 465{ 466 struct in_mfilter *imf; 467 struct in_msource *ims; 468 int mode; 469 470 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 471 472 imf = imo_match_group(imo, ifp, group); 473 if (imf == NULL) 474 return (MCAST_NOTGMEMBER); 475 476 /* 477 * Check if the source was included in an (S,G) join. 478 * Allow reception on exclusive memberships by default, 479 * reject reception on inclusive memberships by default. 480 * Exclude source only if an in-mode exclude filter exists. 481 * Include source only if an in-mode include filter exists. 482 * NOTE: We are comparing group state here at IGMP t1 (now) 483 * with socket-layer t0 (since last downcall). 484 */ 485 mode = imf->imf_st[1]; 486 ims = imo_match_source(imf, src); 487 488 if ((ims == NULL && mode == MCAST_INCLUDE) || 489 (ims != NULL && ims->imsl_st[0] != mode)) 490 return (MCAST_NOTSMEMBER); 491 492 return (MCAST_PASS); 493} 494 495/* 496 * Find and return a reference to an in_multi record for (ifp, group), 497 * and bump its reference count. 498 * If one does not exist, try to allocate it, and update link-layer multicast 499 * filters on ifp to listen for group. 500 * Assumes the IN_MULTI lock is held across the call. 501 * Return 0 if successful, otherwise return an appropriate error code. 502 */ 503static int 504in_getmulti(struct ifnet *ifp, const struct in_addr *group, 505 struct in_multi **pinm) 506{ 507 struct sockaddr_in gsin; 508 struct ifmultiaddr *ifma; 509 struct in_ifinfo *ii; 510 struct in_multi *inm; 511 int error; 512 513 IN_MULTI_LOCK_ASSERT(); 514 515 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 516 IN_MULTI_LIST_LOCK(); 517 inm = inm_lookup(ifp, *group); 518 if (inm != NULL) { 519 /* 520 * If we already joined this group, just bump the 521 * refcount and return it. 522 */ 523 KASSERT(inm->inm_refcount >= 1, 524 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 525 inm_acquire_locked(inm); 526 *pinm = inm; 527 } 528 IN_MULTI_LIST_UNLOCK(); 529 if (inm != NULL) 530 return (0); 531 532 memset(&gsin, 0, sizeof(gsin)); 533 gsin.sin_family = AF_INET; 534 gsin.sin_len = sizeof(struct sockaddr_in); 535 gsin.sin_addr = *group; 536 537 /* 538 * Check if a link-layer group is already associated 539 * with this network-layer group on the given ifnet. 540 */ 541 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 542 if (error != 0) 543 return (error); 544 545 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 546 IN_MULTI_LIST_LOCK(); 547 IF_ADDR_WLOCK(ifp); 548 549 /* 550 * If something other than netinet is occupying the link-layer 551 * group, print a meaningful error message and back out of 552 * the allocation. 553 * Otherwise, bump the refcount on the existing network-layer 554 * group association and return it. 555 */ 556 if (ifma->ifma_protospec != NULL) { 557 inm = (struct in_multi *)ifma->ifma_protospec; 558#ifdef INVARIANTS 559 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 560 __func__)); 561 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 562 ("%s: ifma not AF_INET", __func__)); 563 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 564 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 565 !in_hosteq(inm->inm_addr, *group)) { 566 char addrbuf[INET_ADDRSTRLEN]; 567 568 panic("%s: ifma %p is inconsistent with %p (%s)", 569 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 570 } 571#endif 572 inm_acquire_locked(inm); 573 *pinm = inm; 574 goto out_locked; 575 } 576 577 IF_ADDR_WLOCK_ASSERT(ifp); 578 579 /* 580 * A new in_multi record is needed; allocate and initialize it. 581 * We DO NOT perform an IGMP join as the in_ layer may need to 582 * push an initial source list down to IGMP to support SSM. 583 * 584 * The initial source filter state is INCLUDE, {} as per the RFC. 585 */ 586 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 587 if (inm == NULL) { 588 IF_ADDR_WUNLOCK(ifp); 589 IN_MULTI_LIST_UNLOCK(); 590 if_delmulti_ifma(ifma); 591 return (ENOMEM); 592 } 593 inm->inm_addr = *group; 594 inm->inm_ifp = ifp; 595 inm->inm_igi = ii->ii_igmp; 596 inm->inm_ifma = ifma; 597 inm->inm_refcount = 1; 598 inm->inm_state = IGMP_NOT_MEMBER; 599 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 600 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 601 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 602 RB_INIT(&inm->inm_srcs); 603 604 ifma->ifma_protospec = inm; 605 606 *pinm = inm; 607 out_locked: 608 IF_ADDR_WUNLOCK(ifp); 609 IN_MULTI_LIST_UNLOCK(); 610 return (0); 611} 612 613/* 614 * Drop a reference to an in_multi record. 615 * 616 * If the refcount drops to 0, free the in_multi record and 617 * delete the underlying link-layer membership. 618 */ 619static void 620inm_release(struct in_multi *inm) 621{ 622 struct ifmultiaddr *ifma; 623 struct ifnet *ifp; 624 625 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 626 MPASS(inm->inm_refcount == 0); 627 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 628 629 ifma = inm->inm_ifma; 630 ifp = inm->inm_ifp; 631 632 /* XXX this access is not covered by IF_ADDR_LOCK */ 633 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 634 if (ifp != NULL) { 635 CURVNET_SET(ifp->if_vnet); 636 inm_purge(inm); 637 free(inm, M_IPMADDR); 638 if_delmulti_ifma_flags(ifma, 1); 639 CURVNET_RESTORE(); 640 if_rele(ifp); 641 } else { 642 inm_purge(inm); 643 free(inm, M_IPMADDR); 644 if_delmulti_ifma_flags(ifma, 1); 645 } 646} 647 648/* 649 * Clear recorded source entries for a group. 650 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 651 * FIXME: Should reap. 652 */ 653void 654inm_clear_recorded(struct in_multi *inm) 655{ 656 struct ip_msource *ims; 657 658 IN_MULTI_LIST_LOCK_ASSERT(); 659 660 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 661 if (ims->ims_stp) { 662 ims->ims_stp = 0; 663 --inm->inm_st[1].iss_rec; 664 } 665 } 666 KASSERT(inm->inm_st[1].iss_rec == 0, 667 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 668} 669 670/* 671 * Record a source as pending for a Source-Group IGMPv3 query. 672 * This lives here as it modifies the shared tree. 673 * 674 * inm is the group descriptor. 675 * naddr is the address of the source to record in network-byte order. 676 * 677 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 678 * lazy-allocate a source node in response to an SG query. 679 * Otherwise, no allocation is performed. This saves some memory 680 * with the trade-off that the source will not be reported to the 681 * router if joined in the window between the query response and 682 * the group actually being joined on the local host. 683 * 684 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 685 * This turns off the allocation of a recorded source entry if 686 * the group has not been joined. 687 * 688 * Return 0 if the source didn't exist or was already marked as recorded. 689 * Return 1 if the source was marked as recorded by this function. 690 * Return <0 if any error occurred (negated errno code). 691 */ 692int 693inm_record_source(struct in_multi *inm, const in_addr_t naddr) 694{ 695 struct ip_msource find; 696 struct ip_msource *ims, *nims; 697 698 IN_MULTI_LIST_LOCK_ASSERT(); 699 700 find.ims_haddr = ntohl(naddr); 701 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 702 if (ims && ims->ims_stp) 703 return (0); 704 if (ims == NULL) { 705 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 706 return (-ENOSPC); 707 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 708 M_NOWAIT | M_ZERO); 709 if (nims == NULL) 710 return (-ENOMEM); 711 nims->ims_haddr = find.ims_haddr; 712 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 713 ++inm->inm_nsrc; 714 ims = nims; 715 } 716 717 /* 718 * Mark the source as recorded and update the recorded 719 * source count. 720 */ 721 ++ims->ims_stp; 722 ++inm->inm_st[1].iss_rec; 723 724 return (1); 725} 726 727/* 728 * Return a pointer to an in_msource owned by an in_mfilter, 729 * given its source address. 730 * Lazy-allocate if needed. If this is a new entry its filter state is 731 * undefined at t0. 732 * 733 * imf is the filter set being modified. 734 * haddr is the source address in *host* byte-order. 735 * 736 * SMPng: May be called with locks held; malloc must not block. 737 */ 738static int 739imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 740 struct in_msource **plims) 741{ 742 struct ip_msource find; 743 struct ip_msource *ims, *nims; 744 struct in_msource *lims; 745 int error; 746 747 error = 0; 748 ims = NULL; 749 lims = NULL; 750 751 /* key is host byte order */ 752 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 753 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 754 lims = (struct in_msource *)ims; 755 if (lims == NULL) { 756 if (imf->imf_nsrc == in_mcast_maxsocksrc) 757 return (ENOSPC); 758 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 759 M_NOWAIT | M_ZERO); 760 if (nims == NULL) 761 return (ENOMEM); 762 lims = (struct in_msource *)nims; 763 lims->ims_haddr = find.ims_haddr; 764 lims->imsl_st[0] = MCAST_UNDEFINED; 765 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 766 ++imf->imf_nsrc; 767 } 768 769 *plims = lims; 770 771 return (error); 772} 773 774/* 775 * Graft a source entry into an existing socket-layer filter set, 776 * maintaining any required invariants and checking allocations. 777 * 778 * The source is marked as being in the new filter mode at t1. 779 * 780 * Return the pointer to the new node, otherwise return NULL. 781 */ 782static struct in_msource * 783imf_graft(struct in_mfilter *imf, const uint8_t st1, 784 const struct sockaddr_in *psin) 785{ 786 struct ip_msource *nims; 787 struct in_msource *lims; 788 789 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 790 M_NOWAIT | M_ZERO); 791 if (nims == NULL) 792 return (NULL); 793 lims = (struct in_msource *)nims; 794 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 795 lims->imsl_st[0] = MCAST_UNDEFINED; 796 lims->imsl_st[1] = st1; 797 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 798 ++imf->imf_nsrc; 799 800 return (lims); 801} 802 803/* 804 * Prune a source entry from an existing socket-layer filter set, 805 * maintaining any required invariants and checking allocations. 806 * 807 * The source is marked as being left at t1, it is not freed. 808 * 809 * Return 0 if no error occurred, otherwise return an errno value. 810 */ 811static int 812imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 813{ 814 struct ip_msource find; 815 struct ip_msource *ims; 816 struct in_msource *lims; 817 818 /* key is host byte order */ 819 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 820 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 821 if (ims == NULL) 822 return (ENOENT); 823 lims = (struct in_msource *)ims; 824 lims->imsl_st[1] = MCAST_UNDEFINED; 825 return (0); 826} 827 828/* 829 * Revert socket-layer filter set deltas at t1 to t0 state. 830 */ 831static void 832imf_rollback(struct in_mfilter *imf) 833{ 834 struct ip_msource *ims, *tims; 835 struct in_msource *lims; 836 837 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 838 lims = (struct in_msource *)ims; 839 if (lims->imsl_st[0] == lims->imsl_st[1]) { 840 /* no change at t1 */ 841 continue; 842 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 843 /* revert change to existing source at t1 */ 844 lims->imsl_st[1] = lims->imsl_st[0]; 845 } else { 846 /* revert source added t1 */ 847 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 848 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 849 free(ims, M_INMFILTER); 850 imf->imf_nsrc--; 851 } 852 } 853 imf->imf_st[1] = imf->imf_st[0]; 854} 855 856/* 857 * Mark socket-layer filter set as INCLUDE {} at t1. 858 */ 859static void 860imf_leave(struct in_mfilter *imf) 861{ 862 struct ip_msource *ims; 863 struct in_msource *lims; 864 865 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 866 lims = (struct in_msource *)ims; 867 lims->imsl_st[1] = MCAST_UNDEFINED; 868 } 869 imf->imf_st[1] = MCAST_INCLUDE; 870} 871 872/* 873 * Mark socket-layer filter set deltas as committed. 874 */ 875static void 876imf_commit(struct in_mfilter *imf) 877{ 878 struct ip_msource *ims; 879 struct in_msource *lims; 880 881 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 882 lims = (struct in_msource *)ims; 883 lims->imsl_st[0] = lims->imsl_st[1]; 884 } 885 imf->imf_st[0] = imf->imf_st[1]; 886} 887 888/* 889 * Reap unreferenced sources from socket-layer filter set. 890 */ 891static void 892imf_reap(struct in_mfilter *imf) 893{ 894 struct ip_msource *ims, *tims; 895 struct in_msource *lims; 896 897 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 898 lims = (struct in_msource *)ims; 899 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 900 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 901 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 902 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 903 free(ims, M_INMFILTER); 904 imf->imf_nsrc--; 905 } 906 } 907} 908 909/* 910 * Purge socket-layer filter set. 911 */ 912static void 913imf_purge(struct in_mfilter *imf) 914{ 915 struct ip_msource *ims, *tims; 916 917 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 918 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 919 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 920 free(ims, M_INMFILTER); 921 imf->imf_nsrc--; 922 } 923 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 924 KASSERT(RB_EMPTY(&imf->imf_sources), 925 ("%s: imf_sources not empty", __func__)); 926} 927 928/* 929 * Look up a source filter entry for a multicast group. 930 * 931 * inm is the group descriptor to work with. 932 * haddr is the host-byte-order IPv4 address to look up. 933 * noalloc may be non-zero to suppress allocation of sources. 934 * *pims will be set to the address of the retrieved or allocated source. 935 * 936 * SMPng: NOTE: may be called with locks held. 937 * Return 0 if successful, otherwise return a non-zero error code. 938 */ 939static int 940inm_get_source(struct in_multi *inm, const in_addr_t haddr, 941 const int noalloc, struct ip_msource **pims) 942{ 943 struct ip_msource find; 944 struct ip_msource *ims, *nims; 945 946 find.ims_haddr = haddr; 947 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 948 if (ims == NULL && !noalloc) { 949 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 950 return (ENOSPC); 951 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 952 M_NOWAIT | M_ZERO); 953 if (nims == NULL) 954 return (ENOMEM); 955 nims->ims_haddr = haddr; 956 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 957 ++inm->inm_nsrc; 958 ims = nims; 959#ifdef KTR 960 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 961 haddr, ims); 962#endif 963 } 964 965 *pims = ims; 966 return (0); 967} 968 969/* 970 * Merge socket-layer source into IGMP-layer source. 971 * If rollback is non-zero, perform the inverse of the merge. 972 */ 973static void 974ims_merge(struct ip_msource *ims, const struct in_msource *lims, 975 const int rollback) 976{ 977 int n = rollback ? -1 : 1; 978 979 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 980 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 981 __func__, n, ims->ims_haddr); 982 ims->ims_st[1].ex -= n; 983 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 984 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 985 __func__, n, ims->ims_haddr); 986 ims->ims_st[1].in -= n; 987 } 988 989 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 990 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 991 __func__, n, ims->ims_haddr); 992 ims->ims_st[1].ex += n; 993 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 994 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 995 __func__, n, ims->ims_haddr); 996 ims->ims_st[1].in += n; 997 } 998} 999 1000/* 1001 * Atomically update the global in_multi state, when a membership's 1002 * filter list is being updated in any way. 1003 * 1004 * imf is the per-inpcb-membership group filter pointer. 1005 * A fake imf may be passed for in-kernel consumers. 1006 * 1007 * XXX This is a candidate for a set-symmetric-difference style loop 1008 * which would eliminate the repeated lookup from root of ims nodes, 1009 * as they share the same key space. 1010 * 1011 * If any error occurred this function will back out of refcounts 1012 * and return a non-zero value. 1013 */ 1014static int 1015inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1016{ 1017 struct ip_msource *ims, *nims; 1018 struct in_msource *lims; 1019 int schanged, error; 1020 int nsrc0, nsrc1; 1021 1022 schanged = 0; 1023 error = 0; 1024 nsrc1 = nsrc0 = 0; 1025 IN_MULTI_LIST_LOCK_ASSERT(); 1026 1027 /* 1028 * Update the source filters first, as this may fail. 1029 * Maintain count of in-mode filters at t0, t1. These are 1030 * used to work out if we transition into ASM mode or not. 1031 * Maintain a count of source filters whose state was 1032 * actually modified by this operation. 1033 */ 1034 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1035 lims = (struct in_msource *)ims; 1036 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 1037 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 1038 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 1039 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 1040 ++schanged; 1041 if (error) 1042 break; 1043 ims_merge(nims, lims, 0); 1044 } 1045 if (error) { 1046 struct ip_msource *bims; 1047 1048 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 1049 lims = (struct in_msource *)ims; 1050 if (lims->imsl_st[0] == lims->imsl_st[1]) 1051 continue; 1052 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 1053 if (bims == NULL) 1054 continue; 1055 ims_merge(bims, lims, 1); 1056 } 1057 goto out_reap; 1058 } 1059 1060 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1061 __func__, nsrc0, nsrc1); 1062 1063 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1064 if (imf->imf_st[0] == imf->imf_st[1] && 1065 imf->imf_st[1] == MCAST_INCLUDE) { 1066 if (nsrc1 == 0) { 1067 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1068 --inm->inm_st[1].iss_in; 1069 } 1070 } 1071 1072 /* Handle filter mode transition on socket. */ 1073 if (imf->imf_st[0] != imf->imf_st[1]) { 1074 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1075 __func__, imf->imf_st[0], imf->imf_st[1]); 1076 1077 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1078 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1079 --inm->inm_st[1].iss_ex; 1080 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1081 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1082 --inm->inm_st[1].iss_in; 1083 } 1084 1085 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1086 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1087 inm->inm_st[1].iss_ex++; 1088 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1089 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1090 inm->inm_st[1].iss_in++; 1091 } 1092 } 1093 1094 /* 1095 * Track inm filter state in terms of listener counts. 1096 * If there are any exclusive listeners, stack-wide 1097 * membership is exclusive. 1098 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1099 * If no listeners remain, state is undefined at t1, 1100 * and the IGMP lifecycle for this group should finish. 1101 */ 1102 if (inm->inm_st[1].iss_ex > 0) { 1103 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1104 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1105 } else if (inm->inm_st[1].iss_in > 0) { 1106 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1107 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1108 } else { 1109 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1110 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1111 } 1112 1113 /* Decrement ASM listener count on transition out of ASM mode. */ 1114 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1115 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1116 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1117 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1118 --inm->inm_st[1].iss_asm; 1119 } 1120 } 1121 1122 /* Increment ASM listener count on transition to ASM mode. */ 1123 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1124 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1125 inm->inm_st[1].iss_asm++; 1126 } 1127 1128 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1129 inm_print(inm); 1130 1131out_reap: 1132 if (schanged > 0) { 1133 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1134 inm_reap(inm); 1135 } 1136 return (error); 1137} 1138 1139/* 1140 * Mark an in_multi's filter set deltas as committed. 1141 * Called by IGMP after a state change has been enqueued. 1142 */ 1143void 1144inm_commit(struct in_multi *inm) 1145{ 1146 struct ip_msource *ims; 1147 1148 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1149 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1150 inm_print(inm); 1151 1152 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1153 ims->ims_st[0] = ims->ims_st[1]; 1154 } 1155 inm->inm_st[0] = inm->inm_st[1]; 1156} 1157 1158/* 1159 * Reap unreferenced nodes from an in_multi's filter set. 1160 */ 1161static void 1162inm_reap(struct in_multi *inm) 1163{ 1164 struct ip_msource *ims, *tims; 1165 1166 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1167 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1168 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1169 ims->ims_stp != 0) 1170 continue; 1171 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1172 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1173 free(ims, M_IPMSOURCE); 1174 inm->inm_nsrc--; 1175 } 1176} 1177 1178/* 1179 * Purge all source nodes from an in_multi's filter set. 1180 */ 1181static void 1182inm_purge(struct in_multi *inm) 1183{ 1184 struct ip_msource *ims, *tims; 1185 1186 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1187 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1188 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1189 free(ims, M_IPMSOURCE); 1190 inm->inm_nsrc--; 1191 } 1192} 1193 1194/* 1195 * Join a multicast group; unlocked entry point. 1196 * 1197 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1198 * is not held. Fortunately, ifp is unlikely to have been detached 1199 * at this point, so we assume it's OK to recurse. 1200 */ 1201int 1202in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1203 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1204{ 1205 int error; 1206 1207 IN_MULTI_LOCK(); 1208 error = in_joingroup_locked(ifp, gina, imf, pinm); 1209 IN_MULTI_UNLOCK(); 1210 1211 return (error); 1212} 1213 1214/* 1215 * Join a multicast group; real entry point. 1216 * 1217 * Only preserves atomicity at inm level. 1218 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1219 * 1220 * If the IGMP downcall fails, the group is not joined, and an error 1221 * code is returned. 1222 */ 1223int 1224in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1225 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1226{ 1227 struct in_mfilter timf; 1228 struct in_multi *inm; 1229 int error; 1230 1231 IN_MULTI_LOCK_ASSERT(); 1232 IN_MULTI_LIST_UNLOCK_ASSERT(); 1233 1234 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1235 ntohl(gina->s_addr), ifp, ifp->if_xname); 1236 1237 error = 0; 1238 inm = NULL; 1239 1240 /* 1241 * If no imf was specified (i.e. kernel consumer), 1242 * fake one up and assume it is an ASM join. 1243 */ 1244 if (imf == NULL) { 1245 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1246 imf = &timf; 1247 } 1248 1249 error = in_getmulti(ifp, gina, &inm); 1250 if (error) { 1251 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1252 return (error); 1253 } 1254 IN_MULTI_LIST_LOCK(); 1255 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1256 error = inm_merge(inm, imf); 1257 if (error) { 1258 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1259 goto out_inm_release; 1260 } 1261 1262 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1263 error = igmp_change_state(inm); 1264 if (error) { 1265 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1266 goto out_inm_release; 1267 } 1268 1269 out_inm_release: 1270 if (error) { 1271 1272 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1273 inm_release_deferred(inm); 1274 } else { 1275 *pinm = inm; 1276 } 1277 IN_MULTI_LIST_UNLOCK(); 1278 1279 return (error); 1280} 1281 1282/* 1283 * Leave a multicast group; unlocked entry point. 1284 */ 1285int 1286in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1287{ 1288 int error; 1289 1290 IN_MULTI_LOCK(); 1291 error = in_leavegroup_locked(inm, imf); 1292 IN_MULTI_UNLOCK(); 1293 1294 return (error); 1295} 1296 1297/* 1298 * Leave a multicast group; real entry point. 1299 * All source filters will be expunged. 1300 * 1301 * Only preserves atomicity at inm level. 1302 * 1303 * Holding the write lock for the INP which contains imf 1304 * is highly advisable. We can't assert for it as imf does not 1305 * contain a back-pointer to the owning inp. 1306 * 1307 * Note: This is not the same as inm_release(*) as this function also 1308 * makes a state change downcall into IGMP. 1309 */ 1310int 1311in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1312{ 1313 struct in_mfilter timf; 1314 int error; 1315 1316 error = 0; 1317 1318 IN_MULTI_LOCK_ASSERT(); 1319 IN_MULTI_LIST_UNLOCK_ASSERT(); 1320 1321 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1322 inm, ntohl(inm->inm_addr.s_addr), 1323 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1324 imf); 1325 1326 /* 1327 * If no imf was specified (i.e. kernel consumer), 1328 * fake one up and assume it is an ASM join. 1329 */ 1330 if (imf == NULL) { 1331 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1332 imf = &timf; 1333 } 1334 1335 /* 1336 * Begin state merge transaction at IGMP layer. 1337 * 1338 * As this particular invocation should not cause any memory 1339 * to be allocated, and there is no opportunity to roll back 1340 * the transaction, it MUST NOT fail. 1341 */ 1342 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1343 IN_MULTI_LIST_LOCK(); 1344 error = inm_merge(inm, imf); 1345 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1346 1347 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1348 CURVNET_SET(inm->inm_ifp->if_vnet); 1349 error = igmp_change_state(inm); 1350 IF_ADDR_WLOCK(inm->inm_ifp); 1351 inm_release_deferred(inm); 1352 IF_ADDR_WUNLOCK(inm->inm_ifp); 1353 IN_MULTI_LIST_UNLOCK(); 1354 CURVNET_RESTORE(); 1355 if (error) 1356 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1357 1358 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1359 1360 return (error); 1361} 1362 1363/*#ifndef BURN_BRIDGES*/ 1364/* 1365 * Join an IPv4 multicast group in (*,G) exclusive mode. 1366 * The group must be a 224.0.0.0/24 link-scope group. 1367 * This KPI is for legacy kernel consumers only. 1368 */ 1369struct in_multi * 1370in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1371{ 1372 struct in_multi *pinm; 1373 int error; 1374#ifdef INVARIANTS 1375 char addrbuf[INET_ADDRSTRLEN]; 1376#endif 1377 1378 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1379 ("%s: %s not in 224.0.0.0/24", __func__, 1380 inet_ntoa_r(*ap, addrbuf))); 1381 1382 error = in_joingroup(ifp, ap, NULL, &pinm); 1383 if (error != 0) 1384 pinm = NULL; 1385 1386 return (pinm); 1387} 1388 1389/* 1390 * Block or unblock an ASM multicast source on an inpcb. 1391 * This implements the delta-based API described in RFC 3678. 1392 * 1393 * The delta-based API applies only to exclusive-mode memberships. 1394 * An IGMP downcall will be performed. 1395 * 1396 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1397 * 1398 * Return 0 if successful, otherwise return an appropriate error code. 1399 */ 1400static int 1401inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1402{ 1403 struct group_source_req gsr; 1404 struct rm_priotracker in_ifa_tracker; 1405 sockunion_t *gsa, *ssa; 1406 struct ifnet *ifp; 1407 struct in_mfilter *imf; 1408 struct ip_moptions *imo; 1409 struct in_msource *ims; 1410 struct in_multi *inm; 1411 uint16_t fmode; 1412 int error, doblock; 1413 1414 ifp = NULL; 1415 error = 0; 1416 doblock = 0; 1417 1418 memset(&gsr, 0, sizeof(struct group_source_req)); 1419 gsa = (sockunion_t *)&gsr.gsr_group; 1420 ssa = (sockunion_t *)&gsr.gsr_source; 1421 1422 switch (sopt->sopt_name) { 1423 case IP_BLOCK_SOURCE: 1424 case IP_UNBLOCK_SOURCE: { 1425 struct ip_mreq_source mreqs; 1426 1427 error = sooptcopyin(sopt, &mreqs, 1428 sizeof(struct ip_mreq_source), 1429 sizeof(struct ip_mreq_source)); 1430 if (error) 1431 return (error); 1432 1433 gsa->sin.sin_family = AF_INET; 1434 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1435 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1436 1437 ssa->sin.sin_family = AF_INET; 1438 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1439 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1440 1441 if (!in_nullhost(mreqs.imr_interface)) { 1442 IN_IFADDR_RLOCK(&in_ifa_tracker); 1443 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1444 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1445 } 1446 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1447 doblock = 1; 1448 1449 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1450 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1451 break; 1452 } 1453 1454 case MCAST_BLOCK_SOURCE: 1455 case MCAST_UNBLOCK_SOURCE: 1456 error = sooptcopyin(sopt, &gsr, 1457 sizeof(struct group_source_req), 1458 sizeof(struct group_source_req)); 1459 if (error) 1460 return (error); 1461 1462 if (gsa->sin.sin_family != AF_INET || 1463 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1464 return (EINVAL); 1465 1466 if (ssa->sin.sin_family != AF_INET || 1467 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1468 return (EINVAL); 1469 1470 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1471 return (EADDRNOTAVAIL); 1472 1473 ifp = ifnet_byindex(gsr.gsr_interface); 1474 1475 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1476 doblock = 1; 1477 break; 1478 1479 default: 1480 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1481 __func__, sopt->sopt_name); 1482 return (EOPNOTSUPP); 1483 break; 1484 } 1485 1486 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1487 return (EINVAL); 1488 1489 IN_MULTI_LOCK(); 1490 1491 /* 1492 * Check if we are actually a member of this group. 1493 */ 1494 imo = inp_findmoptions(inp); 1495 imf = imo_match_group(imo, ifp, &gsa->sa); 1496 if (imf == NULL) { 1497 error = EADDRNOTAVAIL; 1498 goto out_inp_locked; 1499 } 1500 inm = imf->imf_inm; 1501 1502 /* 1503 * Attempting to use the delta-based API on an 1504 * non exclusive-mode membership is an error. 1505 */ 1506 fmode = imf->imf_st[0]; 1507 if (fmode != MCAST_EXCLUDE) { 1508 error = EINVAL; 1509 goto out_inp_locked; 1510 } 1511 1512 /* 1513 * Deal with error cases up-front: 1514 * Asked to block, but already blocked; or 1515 * Asked to unblock, but nothing to unblock. 1516 * If adding a new block entry, allocate it. 1517 */ 1518 ims = imo_match_source(imf, &ssa->sa); 1519 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1520 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1521 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1522 error = EADDRNOTAVAIL; 1523 goto out_inp_locked; 1524 } 1525 1526 INP_WLOCK_ASSERT(inp); 1527 1528 /* 1529 * Begin state merge transaction at socket layer. 1530 */ 1531 if (doblock) { 1532 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1533 ims = imf_graft(imf, fmode, &ssa->sin); 1534 if (ims == NULL) 1535 error = ENOMEM; 1536 } else { 1537 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1538 error = imf_prune(imf, &ssa->sin); 1539 } 1540 1541 if (error) { 1542 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1543 goto out_imf_rollback; 1544 } 1545 1546 /* 1547 * Begin state merge transaction at IGMP layer. 1548 */ 1549 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1550 IN_MULTI_LIST_LOCK(); 1551 error = inm_merge(inm, imf); 1552 if (error) { 1553 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1554 IN_MULTI_LIST_UNLOCK(); 1555 goto out_imf_rollback; 1556 } 1557 1558 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1559 error = igmp_change_state(inm); 1560 IN_MULTI_LIST_UNLOCK(); 1561 if (error) 1562 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1563 1564out_imf_rollback: 1565 if (error) 1566 imf_rollback(imf); 1567 else 1568 imf_commit(imf); 1569 1570 imf_reap(imf); 1571 1572out_inp_locked: 1573 INP_WUNLOCK(inp); 1574 IN_MULTI_UNLOCK(); 1575 return (error); 1576} 1577 1578/* 1579 * Given an inpcb, return its multicast options structure pointer. Accepts 1580 * an unlocked inpcb pointer, but will return it locked. May sleep. 1581 * 1582 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1583 * SMPng: NOTE: Returns with the INP write lock held. 1584 */ 1585static struct ip_moptions * 1586inp_findmoptions(struct inpcb *inp) 1587{ 1588 struct ip_moptions *imo; 1589 1590 INP_WLOCK(inp); 1591 if (inp->inp_moptions != NULL) 1592 return (inp->inp_moptions); 1593 1594 INP_WUNLOCK(inp); 1595 1596 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1597 1598 imo->imo_multicast_ifp = NULL; 1599 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1600 imo->imo_multicast_vif = -1; 1601 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1602 imo->imo_multicast_loop = in_mcast_loop; 1603 STAILQ_INIT(&imo->imo_head); 1604 1605 INP_WLOCK(inp); 1606 if (inp->inp_moptions != NULL) { 1607 free(imo, M_IPMOPTS); 1608 return (inp->inp_moptions); 1609 } 1610 inp->inp_moptions = imo; 1611 return (imo); 1612} 1613 1614static void 1615inp_gcmoptions(struct ip_moptions *imo) 1616{ 1617 struct in_mfilter *imf; 1618 struct in_multi *inm; 1619 struct ifnet *ifp; 1620 1621 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 1622 ip_mfilter_remove(&imo->imo_head, imf); 1623 1624 imf_leave(imf); 1625 if ((inm = imf->imf_inm) != NULL) { 1626 if ((ifp = inm->inm_ifp) != NULL) { 1627 CURVNET_SET(ifp->if_vnet); 1628 (void)in_leavegroup(inm, imf); 1629 CURVNET_RESTORE(); 1630 } else { 1631 (void)in_leavegroup(inm, imf); 1632 } 1633 } 1634 ip_mfilter_free(imf); 1635 } 1636 free(imo, M_IPMOPTS); 1637} 1638 1639/* 1640 * Discard the IP multicast options (and source filters). To minimize 1641 * the amount of work done while holding locks such as the INP's 1642 * pcbinfo lock (which is used in the receive path), the free 1643 * operation is deferred to the epoch callback task. 1644 */ 1645void 1646inp_freemoptions(struct ip_moptions *imo) 1647{ 1648 if (imo == NULL) 1649 return; 1650 inp_gcmoptions(imo); 1651} 1652 1653/* 1654 * Atomically get source filters on a socket for an IPv4 multicast group. 1655 * Called with INP lock held; returns with lock released. 1656 */ 1657static int 1658inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1659{ 1660 struct __msfilterreq msfr; 1661 sockunion_t *gsa; 1662 struct ifnet *ifp; 1663 struct ip_moptions *imo; 1664 struct in_mfilter *imf; 1665 struct ip_msource *ims; 1666 struct in_msource *lims; 1667 struct sockaddr_in *psin; 1668 struct sockaddr_storage *ptss; 1669 struct sockaddr_storage *tss; 1670 int error; 1671 size_t nsrcs, ncsrcs; 1672 1673 INP_WLOCK_ASSERT(inp); 1674 1675 imo = inp->inp_moptions; 1676 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1677 1678 INP_WUNLOCK(inp); 1679 1680 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1681 sizeof(struct __msfilterreq)); 1682 if (error) 1683 return (error); 1684 1685 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1686 return (EINVAL); 1687 1688 ifp = ifnet_byindex(msfr.msfr_ifindex); 1689 if (ifp == NULL) 1690 return (EINVAL); 1691 1692 INP_WLOCK(inp); 1693 1694 /* 1695 * Lookup group on the socket. 1696 */ 1697 gsa = (sockunion_t *)&msfr.msfr_group; 1698 imf = imo_match_group(imo, ifp, &gsa->sa); 1699 if (imf == NULL) { 1700 INP_WUNLOCK(inp); 1701 return (EADDRNOTAVAIL); 1702 } 1703 1704 /* 1705 * Ignore memberships which are in limbo. 1706 */ 1707 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1708 INP_WUNLOCK(inp); 1709 return (EAGAIN); 1710 } 1711 msfr.msfr_fmode = imf->imf_st[1]; 1712 1713 /* 1714 * If the user specified a buffer, copy out the source filter 1715 * entries to userland gracefully. 1716 * We only copy out the number of entries which userland 1717 * has asked for, but we always tell userland how big the 1718 * buffer really needs to be. 1719 */ 1720 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1721 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1722 tss = NULL; 1723 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1724 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1725 M_TEMP, M_NOWAIT | M_ZERO); 1726 if (tss == NULL) { 1727 INP_WUNLOCK(inp); 1728 return (ENOBUFS); 1729 } 1730 } 1731 1732 /* 1733 * Count number of sources in-mode at t0. 1734 * If buffer space exists and remains, copy out source entries. 1735 */ 1736 nsrcs = msfr.msfr_nsrcs; 1737 ncsrcs = 0; 1738 ptss = tss; 1739 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1740 lims = (struct in_msource *)ims; 1741 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1742 lims->imsl_st[0] != imf->imf_st[0]) 1743 continue; 1744 ++ncsrcs; 1745 if (tss != NULL && nsrcs > 0) { 1746 psin = (struct sockaddr_in *)ptss; 1747 psin->sin_family = AF_INET; 1748 psin->sin_len = sizeof(struct sockaddr_in); 1749 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1750 psin->sin_port = 0; 1751 ++ptss; 1752 --nsrcs; 1753 } 1754 } 1755 1756 INP_WUNLOCK(inp); 1757 1758 if (tss != NULL) { 1759 error = copyout(tss, msfr.msfr_srcs, 1760 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1761 free(tss, M_TEMP); 1762 if (error) 1763 return (error); 1764 } 1765 1766 msfr.msfr_nsrcs = ncsrcs; 1767 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1768 1769 return (error); 1770} 1771 1772/* 1773 * Return the IP multicast options in response to user getsockopt(). 1774 */ 1775int 1776inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1777{ 1778 struct rm_priotracker in_ifa_tracker; 1779 struct ip_mreqn mreqn; 1780 struct ip_moptions *imo; 1781 struct ifnet *ifp; 1782 struct in_ifaddr *ia; 1783 int error, optval; 1784 u_char coptval; 1785 1786 INP_WLOCK(inp); 1787 imo = inp->inp_moptions; 1788 /* 1789 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1790 * or is a divert socket, reject it. 1791 */ 1792 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1793 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1794 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1795 INP_WUNLOCK(inp); 1796 return (EOPNOTSUPP); 1797 } 1798 1799 error = 0; 1800 switch (sopt->sopt_name) { 1801 case IP_MULTICAST_VIF: 1802 if (imo != NULL) 1803 optval = imo->imo_multicast_vif; 1804 else 1805 optval = -1; 1806 INP_WUNLOCK(inp); 1807 error = sooptcopyout(sopt, &optval, sizeof(int)); 1808 break; 1809 1810 case IP_MULTICAST_IF: 1811 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1812 if (imo != NULL) { 1813 ifp = imo->imo_multicast_ifp; 1814 if (!in_nullhost(imo->imo_multicast_addr)) { 1815 mreqn.imr_address = imo->imo_multicast_addr; 1816 } else if (ifp != NULL) { 1817 mreqn.imr_ifindex = ifp->if_index; 1818 NET_EPOCH_ENTER(); 1819 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1820 if (ia != NULL) 1821 mreqn.imr_address = 1822 IA_SIN(ia)->sin_addr; 1823 NET_EPOCH_EXIT(); 1824 } 1825 } 1826 INP_WUNLOCK(inp); 1827 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1828 error = sooptcopyout(sopt, &mreqn, 1829 sizeof(struct ip_mreqn)); 1830 } else { 1831 error = sooptcopyout(sopt, &mreqn.imr_address, 1832 sizeof(struct in_addr)); 1833 } 1834 break; 1835 1836 case IP_MULTICAST_TTL: 1837 if (imo == NULL) 1838 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1839 else 1840 optval = coptval = imo->imo_multicast_ttl; 1841 INP_WUNLOCK(inp); 1842 if (sopt->sopt_valsize == sizeof(u_char)) 1843 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1844 else 1845 error = sooptcopyout(sopt, &optval, sizeof(int)); 1846 break; 1847 1848 case IP_MULTICAST_LOOP: 1849 if (imo == NULL) 1850 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1851 else 1852 optval = coptval = imo->imo_multicast_loop; 1853 INP_WUNLOCK(inp); 1854 if (sopt->sopt_valsize == sizeof(u_char)) 1855 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1856 else 1857 error = sooptcopyout(sopt, &optval, sizeof(int)); 1858 break; 1859 1860 case IP_MSFILTER: 1861 if (imo == NULL) { 1862 error = EADDRNOTAVAIL; 1863 INP_WUNLOCK(inp); 1864 } else { 1865 error = inp_get_source_filters(inp, sopt); 1866 } 1867 break; 1868 1869 default: 1870 INP_WUNLOCK(inp); 1871 error = ENOPROTOOPT; 1872 break; 1873 } 1874 1875 INP_UNLOCK_ASSERT(inp); 1876 1877 return (error); 1878} 1879 1880/* 1881 * Look up the ifnet to use for a multicast group membership, 1882 * given the IPv4 address of an interface, and the IPv4 group address. 1883 * 1884 * This routine exists to support legacy multicast applications 1885 * which do not understand that multicast memberships are scoped to 1886 * specific physical links in the networking stack, or which need 1887 * to join link-scope groups before IPv4 addresses are configured. 1888 * 1889 * If inp is non-NULL, use this socket's current FIB number for any 1890 * required FIB lookup. 1891 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1892 * and use its ifp; usually, this points to the default next-hop. 1893 * 1894 * If the FIB lookup fails, attempt to use the first non-loopback 1895 * interface with multicast capability in the system as a 1896 * last resort. The legacy IPv4 ASM API requires that we do 1897 * this in order to allow groups to be joined when the routing 1898 * table has not yet been populated during boot. 1899 * 1900 * Returns NULL if no ifp could be found. 1901 * 1902 * FUTURE: Implement IPv4 source-address selection. 1903 */ 1904static struct ifnet * 1905inp_lookup_mcast_ifp(const struct inpcb *inp, 1906 const struct sockaddr_in *gsin, const struct in_addr ina) 1907{ 1908 struct rm_priotracker in_ifa_tracker; 1909 struct ifnet *ifp; 1910 struct nhop4_basic nh4; 1911 uint32_t fibnum; 1912 1913 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1914 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1915 ("%s: not multicast", __func__)); 1916 1917 ifp = NULL; 1918 if (!in_nullhost(ina)) { 1919 IN_IFADDR_RLOCK(&in_ifa_tracker); 1920 INADDR_TO_IFP(ina, ifp); 1921 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1922 } else { 1923 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 1924 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 1925 ifp = nh4.nh_ifp; 1926 else { 1927 struct in_ifaddr *ia; 1928 struct ifnet *mifp; 1929 1930 mifp = NULL; 1931 IN_IFADDR_RLOCK(&in_ifa_tracker); 1932 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1933 mifp = ia->ia_ifp; 1934 if (!(mifp->if_flags & IFF_LOOPBACK) && 1935 (mifp->if_flags & IFF_MULTICAST)) { 1936 ifp = mifp; 1937 break; 1938 } 1939 } 1940 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1941 } 1942 } 1943 1944 return (ifp); 1945} 1946 1947/* 1948 * Join an IPv4 multicast group, possibly with a source. 1949 */ 1950static int 1951inp_join_group(struct inpcb *inp, struct sockopt *sopt) 1952{ 1953 struct group_source_req gsr; 1954 sockunion_t *gsa, *ssa; 1955 struct ifnet *ifp; 1956 struct in_mfilter *imf; 1957 struct ip_moptions *imo; 1958 struct in_multi *inm; 1959 struct in_msource *lims; 1960 int error, is_new; 1961 1962 ifp = NULL; 1963 lims = NULL; 1964 error = 0; 1965 1966 memset(&gsr, 0, sizeof(struct group_source_req)); 1967 gsa = (sockunion_t *)&gsr.gsr_group; 1968 gsa->ss.ss_family = AF_UNSPEC; 1969 ssa = (sockunion_t *)&gsr.gsr_source; 1970 ssa->ss.ss_family = AF_UNSPEC; 1971 1972 switch (sopt->sopt_name) { 1973 case IP_ADD_MEMBERSHIP: { 1974 struct ip_mreqn mreqn; 1975 1976 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) 1977 error = sooptcopyin(sopt, &mreqn, 1978 sizeof(struct ip_mreqn), sizeof(struct ip_mreqn)); 1979 else 1980 error = sooptcopyin(sopt, &mreqn, 1981 sizeof(struct ip_mreq), sizeof(struct ip_mreq)); 1982 if (error) 1983 return (error); 1984 1985 gsa->sin.sin_family = AF_INET; 1986 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1987 gsa->sin.sin_addr = mreqn.imr_multiaddr; 1988 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1989 return (EINVAL); 1990 1991 if (sopt->sopt_valsize == sizeof(struct ip_mreqn) && 1992 mreqn.imr_ifindex != 0) 1993 ifp = ifnet_byindex(mreqn.imr_ifindex); 1994 else 1995 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 1996 mreqn.imr_address); 1997 break; 1998 } 1999 case IP_ADD_SOURCE_MEMBERSHIP: { 2000 struct ip_mreq_source mreqs; 2001 2002 error = sooptcopyin(sopt, &mreqs, sizeof(struct ip_mreq_source), 2003 sizeof(struct ip_mreq_source)); 2004 if (error) 2005 return (error); 2006 2007 gsa->sin.sin_family = ssa->sin.sin_family = AF_INET; 2008 gsa->sin.sin_len = ssa->sin.sin_len = 2009 sizeof(struct sockaddr_in); 2010 2011 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2012 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2013 return (EINVAL); 2014 2015 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2016 2017 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 2018 mreqs.imr_interface); 2019 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2020 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2021 break; 2022 } 2023 2024 case MCAST_JOIN_GROUP: 2025 case MCAST_JOIN_SOURCE_GROUP: 2026 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 2027 error = sooptcopyin(sopt, &gsr, 2028 sizeof(struct group_req), 2029 sizeof(struct group_req)); 2030 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2031 error = sooptcopyin(sopt, &gsr, 2032 sizeof(struct group_source_req), 2033 sizeof(struct group_source_req)); 2034 } 2035 if (error) 2036 return (error); 2037 2038 if (gsa->sin.sin_family != AF_INET || 2039 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2040 return (EINVAL); 2041 2042 /* 2043 * Overwrite the port field if present, as the sockaddr 2044 * being copied in may be matched with a binary comparison. 2045 */ 2046 gsa->sin.sin_port = 0; 2047 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2048 if (ssa->sin.sin_family != AF_INET || 2049 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2050 return (EINVAL); 2051 ssa->sin.sin_port = 0; 2052 } 2053 2054 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2055 return (EINVAL); 2056 2057 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2058 return (EADDRNOTAVAIL); 2059 ifp = ifnet_byindex(gsr.gsr_interface); 2060 break; 2061 2062 default: 2063 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2064 __func__, sopt->sopt_name); 2065 return (EOPNOTSUPP); 2066 break; 2067 } 2068 2069 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2070 return (EADDRNOTAVAIL); 2071 2072 IN_MULTI_LOCK(); 2073 2074 /* 2075 * Find the membership in the membership list. 2076 */ 2077 imo = inp_findmoptions(inp); 2078 imf = imo_match_group(imo, ifp, &gsa->sa); 2079 if (imf == NULL) { 2080 is_new = 1; 2081 inm = NULL; 2082 2083 if (ip_mfilter_count(&imo->imo_head) >= IP_MAX_MEMBERSHIPS) { 2084 error = ENOMEM; 2085 goto out_inp_locked; 2086 } 2087 } else { 2088 is_new = 0; 2089 inm = imf->imf_inm; 2090 2091 if (ssa->ss.ss_family != AF_UNSPEC) { 2092 /* 2093 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2094 * is an error. On an existing inclusive membership, 2095 * it just adds the source to the filter list. 2096 */ 2097 if (imf->imf_st[1] != MCAST_INCLUDE) { 2098 error = EINVAL; 2099 goto out_inp_locked; 2100 } 2101 /* 2102 * Throw out duplicates. 2103 * 2104 * XXX FIXME: This makes a naive assumption that 2105 * even if entries exist for *ssa in this imf, 2106 * they will be rejected as dupes, even if they 2107 * are not valid in the current mode (in-mode). 2108 * 2109 * in_msource is transactioned just as for anything 2110 * else in SSM -- but note naive use of inm_graft() 2111 * below for allocating new filter entries. 2112 * 2113 * This is only an issue if someone mixes the 2114 * full-state SSM API with the delta-based API, 2115 * which is discouraged in the relevant RFCs. 2116 */ 2117 lims = imo_match_source(imf, &ssa->sa); 2118 if (lims != NULL /*&& 2119 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2120 error = EADDRNOTAVAIL; 2121 goto out_inp_locked; 2122 } 2123 } else { 2124 /* 2125 * MCAST_JOIN_GROUP on an existing exclusive 2126 * membership is an error; return EADDRINUSE 2127 * to preserve 4.4BSD API idempotence, and 2128 * avoid tedious detour to code below. 2129 * NOTE: This is bending RFC 3678 a bit. 2130 * 2131 * On an existing inclusive membership, this is also 2132 * an error; if you want to change filter mode, 2133 * you must use the userland API setsourcefilter(). 2134 * XXX We don't reject this for imf in UNDEFINED 2135 * state at t1, because allocation of a filter 2136 * is atomic with allocation of a membership. 2137 */ 2138 error = EINVAL; 2139 if (imf->imf_st[1] == MCAST_EXCLUDE) 2140 error = EADDRINUSE; 2141 goto out_inp_locked; 2142 } 2143 } 2144 2145 /* 2146 * Begin state merge transaction at socket layer. 2147 */ 2148 INP_WLOCK_ASSERT(inp); 2149 2150 /* 2151 * Graft new source into filter list for this inpcb's 2152 * membership of the group. The in_multi may not have 2153 * been allocated yet if this is a new membership, however, 2154 * the in_mfilter slot will be allocated and must be initialized. 2155 * 2156 * Note: Grafting of exclusive mode filters doesn't happen 2157 * in this path. 2158 * XXX: Should check for non-NULL lims (node exists but may 2159 * not be in-mode) for interop with full-state API. 2160 */ 2161 if (ssa->ss.ss_family != AF_UNSPEC) { 2162 /* Membership starts in IN mode */ 2163 if (is_new) { 2164 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2165 imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE); 2166 if (imf == NULL) { 2167 error = ENOMEM; 2168 goto out_inp_locked; 2169 } 2170 } else { 2171 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2172 } 2173 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2174 if (lims == NULL) { 2175 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2176 __func__); 2177 error = ENOMEM; 2178 goto out_inp_locked; 2179 } 2180 } else { 2181 /* No address specified; Membership starts in EX mode */ 2182 if (is_new) { 2183 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2184 imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE); 2185 if (imf == NULL) { 2186 error = ENOMEM; 2187 goto out_inp_locked; 2188 } 2189 } 2190 } 2191 2192 /* 2193 * Begin state merge transaction at IGMP layer. 2194 */ 2195 if (is_new) { 2196 in_pcbref(inp); 2197 INP_WUNLOCK(inp); 2198 2199 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2200 &imf->imf_inm); 2201 2202 INP_WLOCK(inp); 2203 if (in_pcbrele_wlocked(inp)) { 2204 error = ENXIO; 2205 goto out_inp_unlocked; 2206 } 2207 if (error) { 2208 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2209 __func__); 2210 goto out_inp_locked; 2211 } 2212 /* 2213 * NOTE: Refcount from in_joingroup_locked() 2214 * is protecting membership. 2215 */ 2216 ip_mfilter_insert(&imo->imo_head, imf); 2217 } else { 2218 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2219 IN_MULTI_LIST_LOCK(); 2220 error = inm_merge(inm, imf); 2221 if (error) { 2222 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2223 __func__); 2224 IN_MULTI_LIST_UNLOCK(); 2225 imf_rollback(imf); 2226 imf_reap(imf); 2227 goto out_inp_locked; 2228 } 2229 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2230 error = igmp_change_state(inm); 2231 IN_MULTI_LIST_UNLOCK(); 2232 if (error) { 2233 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2234 __func__); 2235 imf_rollback(imf); 2236 imf_reap(imf); 2237 goto out_inp_locked; 2238 } 2239 } 2240 2241 imf_commit(imf); 2242 imf = NULL; 2243 2244out_inp_locked: 2245 INP_WUNLOCK(inp); 2246out_inp_unlocked: 2247 IN_MULTI_UNLOCK(); 2248 2249 if (is_new && imf) { 2250 if (imf->imf_inm != NULL) { 2251 IN_MULTI_LIST_LOCK(); 2252 inm_release_deferred(imf->imf_inm); 2253 IN_MULTI_LIST_UNLOCK(); 2254 } 2255 ip_mfilter_free(imf); 2256 } 2257 return (error); 2258} 2259 2260/* 2261 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2262 */ 2263static int 2264inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2265{ 2266 struct group_source_req gsr; 2267 struct ip_mreq_source mreqs; 2268 struct rm_priotracker in_ifa_tracker; 2269 sockunion_t *gsa, *ssa; 2270 struct ifnet *ifp; 2271 struct in_mfilter *imf; 2272 struct ip_moptions *imo; 2273 struct in_msource *ims; 2274 struct in_multi *inm; 2275 int error; 2276 bool is_final; 2277 2278 ifp = NULL; 2279 error = 0; 2280 is_final = true; 2281 2282 memset(&gsr, 0, sizeof(struct group_source_req)); 2283 gsa = (sockunion_t *)&gsr.gsr_group; 2284 gsa->ss.ss_family = AF_UNSPEC; 2285 ssa = (sockunion_t *)&gsr.gsr_source; 2286 ssa->ss.ss_family = AF_UNSPEC; 2287 2288 switch (sopt->sopt_name) { 2289 case IP_DROP_MEMBERSHIP: 2290 case IP_DROP_SOURCE_MEMBERSHIP: 2291 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2292 error = sooptcopyin(sopt, &mreqs, 2293 sizeof(struct ip_mreq), 2294 sizeof(struct ip_mreq)); 2295 /* 2296 * Swap interface and sourceaddr arguments, 2297 * as ip_mreq and ip_mreq_source are laid 2298 * out differently. 2299 */ 2300 mreqs.imr_interface = mreqs.imr_sourceaddr; 2301 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2302 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2303 error = sooptcopyin(sopt, &mreqs, 2304 sizeof(struct ip_mreq_source), 2305 sizeof(struct ip_mreq_source)); 2306 } 2307 if (error) 2308 return (error); 2309 2310 gsa->sin.sin_family = AF_INET; 2311 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2312 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2313 2314 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2315 ssa->sin.sin_family = AF_INET; 2316 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2317 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2318 } 2319 2320 /* 2321 * Attempt to look up hinted ifp from interface address. 2322 * Fallthrough with null ifp iff lookup fails, to 2323 * preserve 4.4BSD mcast API idempotence. 2324 * XXX NOTE WELL: The RFC 3678 API is preferred because 2325 * using an IPv4 address as a key is racy. 2326 */ 2327 if (!in_nullhost(mreqs.imr_interface)) { 2328 IN_IFADDR_RLOCK(&in_ifa_tracker); 2329 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2330 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2331 } 2332 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2333 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2334 2335 break; 2336 2337 case MCAST_LEAVE_GROUP: 2338 case MCAST_LEAVE_SOURCE_GROUP: 2339 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2340 error = sooptcopyin(sopt, &gsr, 2341 sizeof(struct group_req), 2342 sizeof(struct group_req)); 2343 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2344 error = sooptcopyin(sopt, &gsr, 2345 sizeof(struct group_source_req), 2346 sizeof(struct group_source_req)); 2347 } 2348 if (error) 2349 return (error); 2350 2351 if (gsa->sin.sin_family != AF_INET || 2352 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2353 return (EINVAL); 2354 2355 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2356 if (ssa->sin.sin_family != AF_INET || 2357 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2358 return (EINVAL); 2359 } 2360 2361 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2362 return (EADDRNOTAVAIL); 2363 2364 ifp = ifnet_byindex(gsr.gsr_interface); 2365 2366 if (ifp == NULL) 2367 return (EADDRNOTAVAIL); 2368 break; 2369 2370 default: 2371 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2372 __func__, sopt->sopt_name); 2373 return (EOPNOTSUPP); 2374 break; 2375 } 2376 2377 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2378 return (EINVAL); 2379 2380 IN_MULTI_LOCK(); 2381 2382 /* 2383 * Find the membership in the membership list. 2384 */ 2385 imo = inp_findmoptions(inp); 2386 imf = imo_match_group(imo, ifp, &gsa->sa); 2387 if (imf == NULL) { 2388 error = EADDRNOTAVAIL; 2389 goto out_inp_locked; 2390 } 2391 inm = imf->imf_inm; 2392 2393 if (ssa->ss.ss_family != AF_UNSPEC) 2394 is_final = false; 2395 2396 /* 2397 * Begin state merge transaction at socket layer. 2398 */ 2399 INP_WLOCK_ASSERT(inp); 2400 2401 /* 2402 * If we were instructed only to leave a given source, do so. 2403 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2404 */ 2405 if (is_final) { 2406 ip_mfilter_remove(&imo->imo_head, imf); 2407 imf_leave(imf); 2408 2409 /* 2410 * Give up the multicast address record to which 2411 * the membership points. 2412 */ 2413 (void) in_leavegroup_locked(imf->imf_inm, imf); 2414 } else { 2415 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2416 error = EADDRNOTAVAIL; 2417 goto out_inp_locked; 2418 } 2419 ims = imo_match_source(imf, &ssa->sa); 2420 if (ims == NULL) { 2421 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2422 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2423 error = EADDRNOTAVAIL; 2424 goto out_inp_locked; 2425 } 2426 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2427 error = imf_prune(imf, &ssa->sin); 2428 if (error) { 2429 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2430 __func__); 2431 goto out_inp_locked; 2432 } 2433 } 2434 2435 /* 2436 * Begin state merge transaction at IGMP layer. 2437 */ 2438 if (!is_final) { 2439 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2440 IN_MULTI_LIST_LOCK(); 2441 error = inm_merge(inm, imf); 2442 if (error) { 2443 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2444 __func__); 2445 IN_MULTI_LIST_UNLOCK(); 2446 imf_rollback(imf); 2447 imf_reap(imf); 2448 goto out_inp_locked; 2449 } 2450 2451 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2452 error = igmp_change_state(inm); 2453 IN_MULTI_LIST_UNLOCK(); 2454 if (error) { 2455 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2456 __func__); 2457 imf_rollback(imf); 2458 imf_reap(imf); 2459 goto out_inp_locked; 2460 } 2461 } 2462 imf_commit(imf); 2463 imf_reap(imf); 2464 2465out_inp_locked: 2466 INP_WUNLOCK(inp); 2467 2468 if (is_final && imf) 2469 ip_mfilter_free(imf); 2470 2471 IN_MULTI_UNLOCK(); 2472 return (error); 2473} 2474 2475/* 2476 * Select the interface for transmitting IPv4 multicast datagrams. 2477 * 2478 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2479 * may be passed to this socket option. An address of INADDR_ANY or an 2480 * interface index of 0 is used to remove a previous selection. 2481 * When no interface is selected, one is chosen for every send. 2482 */ 2483static int 2484inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2485{ 2486 struct rm_priotracker in_ifa_tracker; 2487 struct in_addr addr; 2488 struct ip_mreqn mreqn; 2489 struct ifnet *ifp; 2490 struct ip_moptions *imo; 2491 int error; 2492 2493 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2494 /* 2495 * An interface index was specified using the 2496 * Linux-derived ip_mreqn structure. 2497 */ 2498 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2499 sizeof(struct ip_mreqn)); 2500 if (error) 2501 return (error); 2502 2503 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2504 return (EINVAL); 2505 2506 if (mreqn.imr_ifindex == 0) { 2507 ifp = NULL; 2508 } else { 2509 ifp = ifnet_byindex(mreqn.imr_ifindex); 2510 if (ifp == NULL) 2511 return (EADDRNOTAVAIL); 2512 } 2513 } else { 2514 /* 2515 * An interface was specified by IPv4 address. 2516 * This is the traditional BSD usage. 2517 */ 2518 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2519 sizeof(struct in_addr)); 2520 if (error) 2521 return (error); 2522 if (in_nullhost(addr)) { 2523 ifp = NULL; 2524 } else { 2525 IN_IFADDR_RLOCK(&in_ifa_tracker); 2526 INADDR_TO_IFP(addr, ifp); 2527 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2528 if (ifp == NULL) 2529 return (EADDRNOTAVAIL); 2530 } 2531 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2532 ntohl(addr.s_addr)); 2533 } 2534 2535 /* Reject interfaces which do not support multicast. */ 2536 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2537 return (EOPNOTSUPP); 2538 2539 imo = inp_findmoptions(inp); 2540 imo->imo_multicast_ifp = ifp; 2541 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2542 INP_WUNLOCK(inp); 2543 2544 return (0); 2545} 2546 2547/* 2548 * Atomically set source filters on a socket for an IPv4 multicast group. 2549 * 2550 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2551 */ 2552static int 2553inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2554{ 2555 struct __msfilterreq msfr; 2556 sockunion_t *gsa; 2557 struct ifnet *ifp; 2558 struct in_mfilter *imf; 2559 struct ip_moptions *imo; 2560 struct in_multi *inm; 2561 int error; 2562 2563 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2564 sizeof(struct __msfilterreq)); 2565 if (error) 2566 return (error); 2567 2568 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2569 return (ENOBUFS); 2570 2571 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2572 msfr.msfr_fmode != MCAST_INCLUDE)) 2573 return (EINVAL); 2574 2575 if (msfr.msfr_group.ss_family != AF_INET || 2576 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2577 return (EINVAL); 2578 2579 gsa = (sockunion_t *)&msfr.msfr_group; 2580 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2581 return (EINVAL); 2582 2583 gsa->sin.sin_port = 0; /* ignore port */ 2584 2585 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2586 return (EADDRNOTAVAIL); 2587 2588 ifp = ifnet_byindex(msfr.msfr_ifindex); 2589 if (ifp == NULL) 2590 return (EADDRNOTAVAIL); 2591 2592 IN_MULTI_LOCK(); 2593 2594 /* 2595 * Take the INP write lock. 2596 * Check if this socket is a member of this group. 2597 */ 2598 imo = inp_findmoptions(inp); 2599 imf = imo_match_group(imo, ifp, &gsa->sa); 2600 if (imf == NULL) { 2601 error = EADDRNOTAVAIL; 2602 goto out_inp_locked; 2603 } 2604 inm = imf->imf_inm; 2605 2606 /* 2607 * Begin state merge transaction at socket layer. 2608 */ 2609 INP_WLOCK_ASSERT(inp); 2610 2611 imf->imf_st[1] = msfr.msfr_fmode; 2612 2613 /* 2614 * Apply any new source filters, if present. 2615 * Make a copy of the user-space source vector so 2616 * that we may copy them with a single copyin. This 2617 * allows us to deal with page faults up-front. 2618 */ 2619 if (msfr.msfr_nsrcs > 0) { 2620 struct in_msource *lims; 2621 struct sockaddr_in *psin; 2622 struct sockaddr_storage *kss, *pkss; 2623 int i; 2624 2625 INP_WUNLOCK(inp); 2626 2627 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2628 __func__, (unsigned long)msfr.msfr_nsrcs); 2629 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2630 M_TEMP, M_WAITOK); 2631 error = copyin(msfr.msfr_srcs, kss, 2632 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2633 if (error) { 2634 free(kss, M_TEMP); 2635 return (error); 2636 } 2637 2638 INP_WLOCK(inp); 2639 2640 /* 2641 * Mark all source filters as UNDEFINED at t1. 2642 * Restore new group filter mode, as imf_leave() 2643 * will set it to INCLUDE. 2644 */ 2645 imf_leave(imf); 2646 imf->imf_st[1] = msfr.msfr_fmode; 2647 2648 /* 2649 * Update socket layer filters at t1, lazy-allocating 2650 * new entries. This saves a bunch of memory at the 2651 * cost of one RB_FIND() per source entry; duplicate 2652 * entries in the msfr_nsrcs vector are ignored. 2653 * If we encounter an error, rollback transaction. 2654 * 2655 * XXX This too could be replaced with a set-symmetric 2656 * difference like loop to avoid walking from root 2657 * every time, as the key space is common. 2658 */ 2659 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2660 psin = (struct sockaddr_in *)pkss; 2661 if (psin->sin_family != AF_INET) { 2662 error = EAFNOSUPPORT; 2663 break; 2664 } 2665 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2666 error = EINVAL; 2667 break; 2668 } 2669 error = imf_get_source(imf, psin, &lims); 2670 if (error) 2671 break; 2672 lims->imsl_st[1] = imf->imf_st[1]; 2673 } 2674 free(kss, M_TEMP); 2675 } 2676 2677 if (error) 2678 goto out_imf_rollback; 2679 2680 INP_WLOCK_ASSERT(inp); 2681 2682 /* 2683 * Begin state merge transaction at IGMP layer. 2684 */ 2685 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2686 IN_MULTI_LIST_LOCK(); 2687 error = inm_merge(inm, imf); 2688 if (error) { 2689 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2690 IN_MULTI_LIST_UNLOCK(); 2691 goto out_imf_rollback; 2692 } 2693 2694 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2695 error = igmp_change_state(inm); 2696 IN_MULTI_LIST_UNLOCK(); 2697 if (error) 2698 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2699 2700out_imf_rollback: 2701 if (error) 2702 imf_rollback(imf); 2703 else 2704 imf_commit(imf); 2705 2706 imf_reap(imf); 2707 2708out_inp_locked: 2709 INP_WUNLOCK(inp); 2710 IN_MULTI_UNLOCK(); 2711 return (error); 2712} 2713 2714/* 2715 * Set the IP multicast options in response to user setsockopt(). 2716 * 2717 * Many of the socket options handled in this function duplicate the 2718 * functionality of socket options in the regular unicast API. However, 2719 * it is not possible to merge the duplicate code, because the idempotence 2720 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2721 * the effects of these options must be treated as separate and distinct. 2722 * 2723 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2724 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2725 * is refactored to no longer use vifs. 2726 */ 2727int 2728inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2729{ 2730 struct ip_moptions *imo; 2731 int error; 2732 2733 error = 0; 2734 2735 /* 2736 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2737 * or is a divert socket, reject it. 2738 */ 2739 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2740 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2741 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2742 return (EOPNOTSUPP); 2743 2744 switch (sopt->sopt_name) { 2745 case IP_MULTICAST_VIF: { 2746 int vifi; 2747 /* 2748 * Select a multicast VIF for transmission. 2749 * Only useful if multicast forwarding is active. 2750 */ 2751 if (legal_vif_num == NULL) { 2752 error = EOPNOTSUPP; 2753 break; 2754 } 2755 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2756 if (error) 2757 break; 2758 if (!legal_vif_num(vifi) && (vifi != -1)) { 2759 error = EINVAL; 2760 break; 2761 } 2762 imo = inp_findmoptions(inp); 2763 imo->imo_multicast_vif = vifi; 2764 INP_WUNLOCK(inp); 2765 break; 2766 } 2767 2768 case IP_MULTICAST_IF: 2769 error = inp_set_multicast_if(inp, sopt); 2770 break; 2771 2772 case IP_MULTICAST_TTL: { 2773 u_char ttl; 2774 2775 /* 2776 * Set the IP time-to-live for outgoing multicast packets. 2777 * The original multicast API required a char argument, 2778 * which is inconsistent with the rest of the socket API. 2779 * We allow either a char or an int. 2780 */ 2781 if (sopt->sopt_valsize == sizeof(u_char)) { 2782 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2783 sizeof(u_char)); 2784 if (error) 2785 break; 2786 } else { 2787 u_int ittl; 2788 2789 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2790 sizeof(u_int)); 2791 if (error) 2792 break; 2793 if (ittl > 255) { 2794 error = EINVAL; 2795 break; 2796 } 2797 ttl = (u_char)ittl; 2798 } 2799 imo = inp_findmoptions(inp); 2800 imo->imo_multicast_ttl = ttl; 2801 INP_WUNLOCK(inp); 2802 break; 2803 } 2804 2805 case IP_MULTICAST_LOOP: { 2806 u_char loop; 2807 2808 /* 2809 * Set the loopback flag for outgoing multicast packets. 2810 * Must be zero or one. The original multicast API required a 2811 * char argument, which is inconsistent with the rest 2812 * of the socket API. We allow either a char or an int. 2813 */ 2814 if (sopt->sopt_valsize == sizeof(u_char)) { 2815 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2816 sizeof(u_char)); 2817 if (error) 2818 break; 2819 } else { 2820 u_int iloop; 2821 2822 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2823 sizeof(u_int)); 2824 if (error) 2825 break; 2826 loop = (u_char)iloop; 2827 } 2828 imo = inp_findmoptions(inp); 2829 imo->imo_multicast_loop = !!loop; 2830 INP_WUNLOCK(inp); 2831 break; 2832 } 2833 2834 case IP_ADD_MEMBERSHIP: 2835 case IP_ADD_SOURCE_MEMBERSHIP: 2836 case MCAST_JOIN_GROUP: 2837 case MCAST_JOIN_SOURCE_GROUP: 2838 error = inp_join_group(inp, sopt); 2839 break; 2840 2841 case IP_DROP_MEMBERSHIP: 2842 case IP_DROP_SOURCE_MEMBERSHIP: 2843 case MCAST_LEAVE_GROUP: 2844 case MCAST_LEAVE_SOURCE_GROUP: 2845 error = inp_leave_group(inp, sopt); 2846 break; 2847 2848 case IP_BLOCK_SOURCE: 2849 case IP_UNBLOCK_SOURCE: 2850 case MCAST_BLOCK_SOURCE: 2851 case MCAST_UNBLOCK_SOURCE: 2852 error = inp_block_unblock_source(inp, sopt); 2853 break; 2854 2855 case IP_MSFILTER: 2856 error = inp_set_source_filters(inp, sopt); 2857 break; 2858 2859 default: 2860 error = EOPNOTSUPP; 2861 break; 2862 } 2863 2864 INP_UNLOCK_ASSERT(inp); 2865 2866 return (error); 2867} 2868 2869/* 2870 * Expose IGMP's multicast filter mode and source list(s) to userland, 2871 * keyed by (ifindex, group). 2872 * The filter mode is written out as a uint32_t, followed by 2873 * 0..n of struct in_addr. 2874 * For use by ifmcstat(8). 2875 * SMPng: NOTE: unlocked read of ifindex space. 2876 */ 2877static int 2878sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2879{ 2880 struct in_addr src, group; 2881 struct ifnet *ifp; 2882 struct ifmultiaddr *ifma; 2883 struct in_multi *inm; 2884 struct ip_msource *ims; 2885 int *name; 2886 int retval; 2887 u_int namelen; 2888 uint32_t fmode, ifindex; 2889 2890 name = (int *)arg1; 2891 namelen = arg2; 2892 2893 if (req->newptr != NULL) 2894 return (EPERM); 2895 2896 if (namelen != 2) 2897 return (EINVAL); 2898 2899 ifindex = name[0]; 2900 if (ifindex <= 0 || ifindex > V_if_index) { 2901 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2902 __func__, ifindex); 2903 return (ENOENT); 2904 } 2905 2906 group.s_addr = name[1]; 2907 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2908 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2909 __func__, ntohl(group.s_addr)); 2910 return (EINVAL); 2911 } 2912 2913 ifp = ifnet_byindex(ifindex); 2914 if (ifp == NULL) { 2915 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2916 __func__, ifindex); 2917 return (ENOENT); 2918 } 2919 2920 retval = sysctl_wire_old_buffer(req, 2921 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2922 if (retval) 2923 return (retval); 2924 2925 IN_MULTI_LIST_LOCK(); 2926 2927 IF_ADDR_RLOCK(ifp); 2928 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2929 if (ifma->ifma_addr->sa_family != AF_INET || 2930 ifma->ifma_protospec == NULL) 2931 continue; 2932 inm = (struct in_multi *)ifma->ifma_protospec; 2933 if (!in_hosteq(inm->inm_addr, group)) 2934 continue; 2935 fmode = inm->inm_st[1].iss_fmode; 2936 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2937 if (retval != 0) 2938 break; 2939 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 2940 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 2941 ims->ims_haddr); 2942 /* 2943 * Only copy-out sources which are in-mode. 2944 */ 2945 if (fmode != ims_get_mode(inm, ims, 1)) { 2946 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 2947 __func__); 2948 continue; 2949 } 2950 src.s_addr = htonl(ims->ims_haddr); 2951 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 2952 if (retval != 0) 2953 break; 2954 } 2955 } 2956 IF_ADDR_RUNLOCK(ifp); 2957 2958 IN_MULTI_LIST_UNLOCK(); 2959 2960 return (retval); 2961} 2962 2963#if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 2964 2965static const char *inm_modestrs[] = { 2966 [MCAST_UNDEFINED] = "un", 2967 [MCAST_INCLUDE] = "in", 2968 [MCAST_EXCLUDE] = "ex", 2969}; 2970_Static_assert(MCAST_UNDEFINED == 0 && 2971 MCAST_EXCLUDE + 1 == nitems(inm_modestrs), 2972 "inm_modestrs: no longer matches #defines"); 2973 2974static const char * 2975inm_mode_str(const int mode) 2976{ 2977 2978 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 2979 return (inm_modestrs[mode]); 2980 return ("??"); 2981} 2982 2983static const char *inm_statestrs[] = { 2984 [IGMP_NOT_MEMBER] = "not-member", 2985 [IGMP_SILENT_MEMBER] = "silent", 2986 [IGMP_REPORTING_MEMBER] = "reporting", 2987 [IGMP_IDLE_MEMBER] = "idle", 2988 [IGMP_LAZY_MEMBER] = "lazy", 2989 [IGMP_SLEEPING_MEMBER] = "sleeping", 2990 [IGMP_AWAKENING_MEMBER] = "awakening", 2991 [IGMP_G_QUERY_PENDING_MEMBER] = "query-pending", 2992 [IGMP_SG_QUERY_PENDING_MEMBER] = "sg-query-pending", 2993 [IGMP_LEAVING_MEMBER] = "leaving", 2994}; 2995_Static_assert(IGMP_NOT_MEMBER == 0 && 2996 IGMP_LEAVING_MEMBER + 1 == nitems(inm_statestrs), 2997 "inm_statetrs: no longer matches #defines"); 2998 2999static const char * 3000inm_state_str(const int state) 3001{ 3002 3003 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 3004 return (inm_statestrs[state]); 3005 return ("??"); 3006} 3007 3008/* 3009 * Dump an in_multi structure to the console. 3010 */ 3011void 3012inm_print(const struct in_multi *inm) 3013{ 3014 int t; 3015 char addrbuf[INET_ADDRSTRLEN]; 3016 3017 if ((ktr_mask & KTR_IGMPV3) == 0) 3018 return; 3019 3020 printf("%s: --- begin inm %p ---\n", __func__, inm); 3021 printf("addr %s ifp %p(%s) ifma %p\n", 3022 inet_ntoa_r(inm->inm_addr, addrbuf), 3023 inm->inm_ifp, 3024 inm->inm_ifp->if_xname, 3025 inm->inm_ifma); 3026 printf("timer %u state %s refcount %u scq.len %u\n", 3027 inm->inm_timer, 3028 inm_state_str(inm->inm_state), 3029 inm->inm_refcount, 3030 inm->inm_scq.mq_len); 3031 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 3032 inm->inm_igi, 3033 inm->inm_nsrc, 3034 inm->inm_sctimer, 3035 inm->inm_scrv); 3036 for (t = 0; t < 2; t++) { 3037 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 3038 inm_mode_str(inm->inm_st[t].iss_fmode), 3039 inm->inm_st[t].iss_asm, 3040 inm->inm_st[t].iss_ex, 3041 inm->inm_st[t].iss_in, 3042 inm->inm_st[t].iss_rec); 3043 } 3044 printf("%s: --- end inm %p ---\n", __func__, inm); 3045} 3046 3047#else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3048 3049void 3050inm_print(const struct in_multi *inm) 3051{ 3052 3053} 3054 3055#endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3056 3057RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3058