1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2007-2009 Bruce Simpson.
5 * Copyright (c) 2005 Robert N. M. Watson.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote
17 *    products derived from this software without specific prior written
18 *    permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * IPv4 multicast socket, group, and socket option processing module.
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD$");
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mbuf.h>
46#include <sys/protosw.h>
47#include <sys/rmlock.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/protosw.h>
51#include <sys/sysctl.h>
52#include <sys/ktr.h>
53#include <sys/taskqueue.h>
54#include <sys/tree.h>
55
56#include <net/if.h>
57#include <net/if_var.h>
58#include <net/if_dl.h>
59#include <net/route.h>
60#include <net/route/nhop.h>
61#include <net/vnet.h>
62
63#include <net/ethernet.h>
64
65#include <netinet/in.h>
66#include <netinet/in_systm.h>
67#include <netinet/in_fib.h>
68#include <netinet/in_pcb.h>
69#include <netinet/in_var.h>
70#include <netinet/ip_var.h>
71#include <netinet/igmp_var.h>
72
73#ifndef KTR_IGMPV3
74#define KTR_IGMPV3 KTR_INET
75#endif
76
77#ifndef __SOCKUNION_DECLARED
78union sockunion {
79	struct sockaddr_storage	ss;
80	struct sockaddr		sa;
81	struct sockaddr_dl	sdl;
82	struct sockaddr_in	sin;
83};
84typedef union sockunion sockunion_t;
85#define __SOCKUNION_DECLARED
86#endif /* __SOCKUNION_DECLARED */
87
88static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
89    "IPv4 multicast PCB-layer source filter");
90static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
91static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
92static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
93    "IPv4 multicast IGMP-layer source filter");
94
95/*
96 * Locking:
97 *
98 * - Lock order is: Giant, IN_MULTI_LOCK, INP_WLOCK,
99 *   IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
100 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
101 *   it can be taken by code in net/if.c also.
102 * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
103 *
104 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly
105 * any need for in_multi itself to be virtualized -- it is bound to an ifp
106 * anyway no matter what happens.
107 */
108struct mtx in_multi_list_mtx;
109MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF);
110
111struct mtx in_multi_free_mtx;
112MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF);
113
114struct sx in_multi_sx;
115SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx");
116
117int ifma_restart;
118
119/*
120 * Functions with non-static linkage defined in this file should be
121 * declared in in_var.h:
122 *  imo_multi_filter()
123 *  in_addmulti()
124 *  in_delmulti()
125 *  in_joingroup()
126 *  in_joingroup_locked()
127 *  in_leavegroup()
128 *  in_leavegroup_locked()
129 * and ip_var.h:
130 *  inp_freemoptions()
131 *  inp_getmoptions()
132 *  inp_setmoptions()
133 *
134 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
135 * and in_delmulti().
136 */
137static void	imf_commit(struct in_mfilter *);
138static int	imf_get_source(struct in_mfilter *imf,
139		    const struct sockaddr_in *psin,
140		    struct in_msource **);
141static struct in_msource *
142		imf_graft(struct in_mfilter *, const uint8_t,
143		    const struct sockaddr_in *);
144static void	imf_leave(struct in_mfilter *);
145static int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
146static void	imf_purge(struct in_mfilter *);
147static void	imf_rollback(struct in_mfilter *);
148static void	imf_reap(struct in_mfilter *);
149static struct in_mfilter *
150		imo_match_group(const struct ip_moptions *,
151		    const struct ifnet *, const struct sockaddr *);
152static struct in_msource *
153		imo_match_source(struct in_mfilter *, const struct sockaddr *);
154static void	ims_merge(struct ip_msource *ims,
155		    const struct in_msource *lims, const int rollback);
156static int	in_getmulti(struct ifnet *, const struct in_addr *,
157		    struct in_multi **);
158static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
159		    const int noalloc, struct ip_msource **pims);
160#ifdef KTR
161static int	inm_is_ifp_detached(const struct in_multi *);
162#endif
163static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
164static void	inm_purge(struct in_multi *);
165static void	inm_reap(struct in_multi *);
166static void inm_release(struct in_multi *);
167static struct ip_moptions *
168		inp_findmoptions(struct inpcb *);
169static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
170static int	inp_join_group(struct inpcb *, struct sockopt *);
171static int	inp_leave_group(struct inpcb *, struct sockopt *);
172static struct ifnet *
173		inp_lookup_mcast_ifp(const struct inpcb *,
174		    const struct sockaddr_in *, const struct in_addr);
175static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
176static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
177static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
178static int	sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
179
180static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast,
181    CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
182    "IPv4 multicast");
183
184static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
185SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
186    CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0,
187    "Max source filters per group");
188
189static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
190SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
191    CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0,
192    "Max source filters per socket");
193
194int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
195SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN,
196    &in_mcast_loop, 0, "Loopback multicast datagrams by default");
197
198static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
199    CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
200    "Per-interface stack-wide source filters");
201
202#ifdef KTR
203/*
204 * Inline function which wraps assertions for a valid ifp.
205 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
206 * is detached.
207 */
208static int __inline
209inm_is_ifp_detached(const struct in_multi *inm)
210{
211	struct ifnet *ifp;
212
213	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
214	ifp = inm->inm_ifma->ifma_ifp;
215	if (ifp != NULL) {
216		/*
217		 * Sanity check that netinet's notion of ifp is the
218		 * same as net's.
219		 */
220		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
221	}
222
223	return (ifp == NULL);
224}
225#endif
226
227/*
228 * Interface detach can happen in a taskqueue thread context, so we must use a
229 * dedicated thread to avoid deadlocks when draining inm_release tasks.
230 */
231TASKQUEUE_DEFINE_THREAD(inm_free);
232static struct in_multi_head inm_free_list = SLIST_HEAD_INITIALIZER();
233static void inm_release_task(void *arg __unused, int pending __unused);
234static struct task inm_free_task = TASK_INITIALIZER(0, inm_release_task, NULL);
235
236void
237inm_release_wait(void *arg __unused)
238{
239
240	/*
241	 * Make sure all pending multicast addresses are freed before
242	 * the VNET or network device is destroyed:
243	 */
244	taskqueue_drain(taskqueue_inm_free, &inm_free_task);
245}
246#ifdef VIMAGE
247/* XXX-BZ FIXME, see D24914. */
248VNET_SYSUNINIT(inm_release_wait, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, inm_release_wait, NULL);
249#endif
250
251void
252inm_release_list_deferred(struct in_multi_head *inmh)
253{
254
255	if (SLIST_EMPTY(inmh))
256		return;
257	mtx_lock(&in_multi_free_mtx);
258	SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele);
259	mtx_unlock(&in_multi_free_mtx);
260	taskqueue_enqueue(taskqueue_inm_free, &inm_free_task);
261}
262
263void
264inm_disconnect(struct in_multi *inm)
265{
266	struct ifnet *ifp;
267	struct ifmultiaddr *ifma, *ll_ifma;
268
269	ifp = inm->inm_ifp;
270	IF_ADDR_WLOCK_ASSERT(ifp);
271	ifma = inm->inm_ifma;
272
273	if_ref(ifp);
274	if (ifma->ifma_flags & IFMA_F_ENQUEUED) {
275		CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
276		ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
277	}
278	MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname);
279	if ((ll_ifma = ifma->ifma_llifma) != NULL) {
280		MPASS(ifma != ll_ifma);
281		ifma->ifma_llifma = NULL;
282		MPASS(ll_ifma->ifma_llifma == NULL);
283		MPASS(ll_ifma->ifma_ifp == ifp);
284		if (--ll_ifma->ifma_refcount == 0) {
285			if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
286				CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
287				ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
288			}
289			MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname);
290			if_freemulti(ll_ifma);
291			ifma_restart = true;
292		}
293	}
294}
295
296void
297inm_release_deferred(struct in_multi *inm)
298{
299	struct in_multi_head tmp;
300
301	IN_MULTI_LIST_LOCK_ASSERT();
302	MPASS(inm->inm_refcount > 0);
303	if (--inm->inm_refcount == 0) {
304		SLIST_INIT(&tmp);
305		inm_disconnect(inm);
306		inm->inm_ifma->ifma_protospec = NULL;
307		SLIST_INSERT_HEAD(&tmp, inm, inm_nrele);
308		inm_release_list_deferred(&tmp);
309	}
310}
311
312static void
313inm_release_task(void *arg __unused, int pending __unused)
314{
315	struct in_multi_head inm_free_tmp;
316	struct in_multi *inm, *tinm;
317
318	SLIST_INIT(&inm_free_tmp);
319	mtx_lock(&in_multi_free_mtx);
320	SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele);
321	mtx_unlock(&in_multi_free_mtx);
322	IN_MULTI_LOCK();
323	SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) {
324		SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele);
325		MPASS(inm);
326		inm_release(inm);
327	}
328	IN_MULTI_UNLOCK();
329}
330
331/*
332 * Initialize an in_mfilter structure to a known state at t0, t1
333 * with an empty source filter list.
334 */
335static __inline void
336imf_init(struct in_mfilter *imf, const int st0, const int st1)
337{
338	memset(imf, 0, sizeof(struct in_mfilter));
339	RB_INIT(&imf->imf_sources);
340	imf->imf_st[0] = st0;
341	imf->imf_st[1] = st1;
342}
343
344struct in_mfilter *
345ip_mfilter_alloc(const int mflags, const int st0, const int st1)
346{
347	struct in_mfilter *imf;
348
349	imf = malloc(sizeof(*imf), M_INMFILTER, mflags);
350	if (imf != NULL)
351		imf_init(imf, st0, st1);
352
353	return (imf);
354}
355
356void
357ip_mfilter_free(struct in_mfilter *imf)
358{
359
360	imf_purge(imf);
361	free(imf, M_INMFILTER);
362}
363
364/*
365 * Function for looking up an in_multi record for an IPv4 multicast address
366 * on a given interface. ifp must be valid. If no record found, return NULL.
367 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held.
368 */
369struct in_multi *
370inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
371{
372	struct ifmultiaddr *ifma;
373	struct in_multi *inm;
374
375	IN_MULTI_LIST_LOCK_ASSERT();
376	IF_ADDR_LOCK_ASSERT(ifp);
377
378	inm = NULL;
379	CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) {
380		if (ifma->ifma_addr->sa_family != AF_INET ||
381			ifma->ifma_protospec == NULL)
382			continue;
383		inm = (struct in_multi *)ifma->ifma_protospec;
384		if (inm->inm_addr.s_addr == ina.s_addr)
385			break;
386		inm = NULL;
387	}
388	return (inm);
389}
390
391/*
392 * Wrapper for inm_lookup_locked().
393 * The IF_ADDR_LOCK will be taken on ifp and released on return.
394 */
395struct in_multi *
396inm_lookup(struct ifnet *ifp, const struct in_addr ina)
397{
398	struct epoch_tracker et;
399	struct in_multi *inm;
400
401	IN_MULTI_LIST_LOCK_ASSERT();
402	NET_EPOCH_ENTER(et);
403
404	inm = inm_lookup_locked(ifp, ina);
405	NET_EPOCH_EXIT(et);
406
407	return (inm);
408}
409
410/*
411 * Find an IPv4 multicast group entry for this ip_moptions instance
412 * which matches the specified group, and optionally an interface.
413 * Return its index into the array, or -1 if not found.
414 */
415static struct in_mfilter *
416imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
417    const struct sockaddr *group)
418{
419	const struct sockaddr_in *gsin;
420	struct in_mfilter *imf;
421	struct in_multi	*inm;
422
423	gsin = (const struct sockaddr_in *)group;
424
425	IP_MFILTER_FOREACH(imf, &imo->imo_head) {
426		inm = imf->imf_inm;
427		if (inm == NULL)
428			continue;
429		if ((ifp == NULL || (inm->inm_ifp == ifp)) &&
430		    in_hosteq(inm->inm_addr, gsin->sin_addr)) {
431			break;
432		}
433	}
434	return (imf);
435}
436
437/*
438 * Find an IPv4 multicast source entry for this imo which matches
439 * the given group index for this socket, and source address.
440 *
441 * NOTE: This does not check if the entry is in-mode, merely if
442 * it exists, which may not be the desired behaviour.
443 */
444static struct in_msource *
445imo_match_source(struct in_mfilter *imf, const struct sockaddr *src)
446{
447	struct ip_msource	 find;
448	struct ip_msource	*ims;
449	const sockunion_t	*psa;
450
451	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
452
453	/* Source trees are keyed in host byte order. */
454	psa = (const sockunion_t *)src;
455	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
456	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
457
458	return ((struct in_msource *)ims);
459}
460
461/*
462 * Perform filtering for multicast datagrams on a socket by group and source.
463 *
464 * Returns 0 if a datagram should be allowed through, or various error codes
465 * if the socket was not a member of the group, or the source was muted, etc.
466 */
467int
468imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
469    const struct sockaddr *group, const struct sockaddr *src)
470{
471	struct in_mfilter *imf;
472	struct in_msource *ims;
473	int mode;
474
475	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
476
477	imf = imo_match_group(imo, ifp, group);
478	if (imf == NULL)
479		return (MCAST_NOTGMEMBER);
480
481	/*
482	 * Check if the source was included in an (S,G) join.
483	 * Allow reception on exclusive memberships by default,
484	 * reject reception on inclusive memberships by default.
485	 * Exclude source only if an in-mode exclude filter exists.
486	 * Include source only if an in-mode include filter exists.
487	 * NOTE: We are comparing group state here at IGMP t1 (now)
488	 * with socket-layer t0 (since last downcall).
489	 */
490	mode = imf->imf_st[1];
491	ims = imo_match_source(imf, src);
492
493	if ((ims == NULL && mode == MCAST_INCLUDE) ||
494	    (ims != NULL && ims->imsl_st[0] != mode))
495		return (MCAST_NOTSMEMBER);
496
497	return (MCAST_PASS);
498}
499
500/*
501 * Find and return a reference to an in_multi record for (ifp, group),
502 * and bump its reference count.
503 * If one does not exist, try to allocate it, and update link-layer multicast
504 * filters on ifp to listen for group.
505 * Assumes the IN_MULTI lock is held across the call.
506 * Return 0 if successful, otherwise return an appropriate error code.
507 */
508static int
509in_getmulti(struct ifnet *ifp, const struct in_addr *group,
510    struct in_multi **pinm)
511{
512	struct sockaddr_in	 gsin;
513	struct ifmultiaddr	*ifma;
514	struct in_ifinfo	*ii;
515	struct in_multi		*inm;
516	int error;
517
518	IN_MULTI_LOCK_ASSERT();
519
520	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
521	IN_MULTI_LIST_LOCK();
522	inm = inm_lookup(ifp, *group);
523	if (inm != NULL) {
524		/*
525		 * If we already joined this group, just bump the
526		 * refcount and return it.
527		 */
528		KASSERT(inm->inm_refcount >= 1,
529		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
530		inm_acquire_locked(inm);
531		*pinm = inm;
532	}
533	IN_MULTI_LIST_UNLOCK();
534	if (inm != NULL)
535		return (0);
536
537	memset(&gsin, 0, sizeof(gsin));
538	gsin.sin_family = AF_INET;
539	gsin.sin_len = sizeof(struct sockaddr_in);
540	gsin.sin_addr = *group;
541
542	/*
543	 * Check if a link-layer group is already associated
544	 * with this network-layer group on the given ifnet.
545	 */
546	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
547	if (error != 0)
548		return (error);
549
550	/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
551	IN_MULTI_LIST_LOCK();
552	IF_ADDR_WLOCK(ifp);
553
554	/*
555	 * If something other than netinet is occupying the link-layer
556	 * group, print a meaningful error message and back out of
557	 * the allocation.
558	 * Otherwise, bump the refcount on the existing network-layer
559	 * group association and return it.
560	 */
561	if (ifma->ifma_protospec != NULL) {
562		inm = (struct in_multi *)ifma->ifma_protospec;
563#ifdef INVARIANTS
564		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
565		    __func__));
566		KASSERT(ifma->ifma_addr->sa_family == AF_INET,
567		    ("%s: ifma not AF_INET", __func__));
568		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
569		if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
570		    !in_hosteq(inm->inm_addr, *group)) {
571			char addrbuf[INET_ADDRSTRLEN];
572
573			panic("%s: ifma %p is inconsistent with %p (%s)",
574			    __func__, ifma, inm, inet_ntoa_r(*group, addrbuf));
575		}
576#endif
577		inm_acquire_locked(inm);
578		*pinm = inm;
579		goto out_locked;
580	}
581
582	IF_ADDR_WLOCK_ASSERT(ifp);
583
584	/*
585	 * A new in_multi record is needed; allocate and initialize it.
586	 * We DO NOT perform an IGMP join as the in_ layer may need to
587	 * push an initial source list down to IGMP to support SSM.
588	 *
589	 * The initial source filter state is INCLUDE, {} as per the RFC.
590	 */
591	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
592	if (inm == NULL) {
593		IF_ADDR_WUNLOCK(ifp);
594		IN_MULTI_LIST_UNLOCK();
595		if_delmulti_ifma(ifma);
596		return (ENOMEM);
597	}
598	inm->inm_addr = *group;
599	inm->inm_ifp = ifp;
600	inm->inm_igi = ii->ii_igmp;
601	inm->inm_ifma = ifma;
602	inm->inm_refcount = 1;
603	inm->inm_state = IGMP_NOT_MEMBER;
604	mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
605	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
606	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
607	RB_INIT(&inm->inm_srcs);
608
609	ifma->ifma_protospec = inm;
610
611	*pinm = inm;
612 out_locked:
613	IF_ADDR_WUNLOCK(ifp);
614	IN_MULTI_LIST_UNLOCK();
615	return (0);
616}
617
618/*
619 * Drop a reference to an in_multi record.
620 *
621 * If the refcount drops to 0, free the in_multi record and
622 * delete the underlying link-layer membership.
623 */
624static void
625inm_release(struct in_multi *inm)
626{
627	struct ifmultiaddr *ifma;
628	struct ifnet *ifp;
629
630	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
631	MPASS(inm->inm_refcount == 0);
632	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
633
634	ifma = inm->inm_ifma;
635	ifp = inm->inm_ifp;
636
637	/* XXX this access is not covered by IF_ADDR_LOCK */
638	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
639	if (ifp != NULL) {
640		CURVNET_SET(ifp->if_vnet);
641		inm_purge(inm);
642		free(inm, M_IPMADDR);
643		if_delmulti_ifma_flags(ifma, 1);
644		CURVNET_RESTORE();
645		if_rele(ifp);
646	} else {
647		inm_purge(inm);
648		free(inm, M_IPMADDR);
649		if_delmulti_ifma_flags(ifma, 1);
650	}
651}
652
653/*
654 * Clear recorded source entries for a group.
655 * Used by the IGMP code. Caller must hold the IN_MULTI lock.
656 * FIXME: Should reap.
657 */
658void
659inm_clear_recorded(struct in_multi *inm)
660{
661	struct ip_msource	*ims;
662
663	IN_MULTI_LIST_LOCK_ASSERT();
664
665	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
666		if (ims->ims_stp) {
667			ims->ims_stp = 0;
668			--inm->inm_st[1].iss_rec;
669		}
670	}
671	KASSERT(inm->inm_st[1].iss_rec == 0,
672	    ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
673}
674
675/*
676 * Record a source as pending for a Source-Group IGMPv3 query.
677 * This lives here as it modifies the shared tree.
678 *
679 * inm is the group descriptor.
680 * naddr is the address of the source to record in network-byte order.
681 *
682 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
683 * lazy-allocate a source node in response to an SG query.
684 * Otherwise, no allocation is performed. This saves some memory
685 * with the trade-off that the source will not be reported to the
686 * router if joined in the window between the query response and
687 * the group actually being joined on the local host.
688 *
689 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
690 * This turns off the allocation of a recorded source entry if
691 * the group has not been joined.
692 *
693 * Return 0 if the source didn't exist or was already marked as recorded.
694 * Return 1 if the source was marked as recorded by this function.
695 * Return <0 if any error occurred (negated errno code).
696 */
697int
698inm_record_source(struct in_multi *inm, const in_addr_t naddr)
699{
700	struct ip_msource	 find;
701	struct ip_msource	*ims, *nims;
702
703	IN_MULTI_LIST_LOCK_ASSERT();
704
705	find.ims_haddr = ntohl(naddr);
706	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
707	if (ims && ims->ims_stp)
708		return (0);
709	if (ims == NULL) {
710		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
711			return (-ENOSPC);
712		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
713		    M_NOWAIT | M_ZERO);
714		if (nims == NULL)
715			return (-ENOMEM);
716		nims->ims_haddr = find.ims_haddr;
717		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
718		++inm->inm_nsrc;
719		ims = nims;
720	}
721
722	/*
723	 * Mark the source as recorded and update the recorded
724	 * source count.
725	 */
726	++ims->ims_stp;
727	++inm->inm_st[1].iss_rec;
728
729	return (1);
730}
731
732/*
733 * Return a pointer to an in_msource owned by an in_mfilter,
734 * given its source address.
735 * Lazy-allocate if needed. If this is a new entry its filter state is
736 * undefined at t0.
737 *
738 * imf is the filter set being modified.
739 * haddr is the source address in *host* byte-order.
740 *
741 * SMPng: May be called with locks held; malloc must not block.
742 */
743static int
744imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
745    struct in_msource **plims)
746{
747	struct ip_msource	 find;
748	struct ip_msource	*ims, *nims;
749	struct in_msource	*lims;
750	int			 error;
751
752	error = 0;
753	ims = NULL;
754	lims = NULL;
755
756	/* key is host byte order */
757	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
758	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
759	lims = (struct in_msource *)ims;
760	if (lims == NULL) {
761		if (imf->imf_nsrc == in_mcast_maxsocksrc)
762			return (ENOSPC);
763		nims = malloc(sizeof(struct in_msource), M_INMFILTER,
764		    M_NOWAIT | M_ZERO);
765		if (nims == NULL)
766			return (ENOMEM);
767		lims = (struct in_msource *)nims;
768		lims->ims_haddr = find.ims_haddr;
769		lims->imsl_st[0] = MCAST_UNDEFINED;
770		RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
771		++imf->imf_nsrc;
772	}
773
774	*plims = lims;
775
776	return (error);
777}
778
779/*
780 * Graft a source entry into an existing socket-layer filter set,
781 * maintaining any required invariants and checking allocations.
782 *
783 * The source is marked as being in the new filter mode at t1.
784 *
785 * Return the pointer to the new node, otherwise return NULL.
786 */
787static struct in_msource *
788imf_graft(struct in_mfilter *imf, const uint8_t st1,
789    const struct sockaddr_in *psin)
790{
791	struct ip_msource	*nims;
792	struct in_msource	*lims;
793
794	nims = malloc(sizeof(struct in_msource), M_INMFILTER,
795	    M_NOWAIT | M_ZERO);
796	if (nims == NULL)
797		return (NULL);
798	lims = (struct in_msource *)nims;
799	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
800	lims->imsl_st[0] = MCAST_UNDEFINED;
801	lims->imsl_st[1] = st1;
802	RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
803	++imf->imf_nsrc;
804
805	return (lims);
806}
807
808/*
809 * Prune a source entry from an existing socket-layer filter set,
810 * maintaining any required invariants and checking allocations.
811 *
812 * The source is marked as being left at t1, it is not freed.
813 *
814 * Return 0 if no error occurred, otherwise return an errno value.
815 */
816static int
817imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
818{
819	struct ip_msource	 find;
820	struct ip_msource	*ims;
821	struct in_msource	*lims;
822
823	/* key is host byte order */
824	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
825	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
826	if (ims == NULL)
827		return (ENOENT);
828	lims = (struct in_msource *)ims;
829	lims->imsl_st[1] = MCAST_UNDEFINED;
830	return (0);
831}
832
833/*
834 * Revert socket-layer filter set deltas at t1 to t0 state.
835 */
836static void
837imf_rollback(struct in_mfilter *imf)
838{
839	struct ip_msource	*ims, *tims;
840	struct in_msource	*lims;
841
842	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
843		lims = (struct in_msource *)ims;
844		if (lims->imsl_st[0] == lims->imsl_st[1]) {
845			/* no change at t1 */
846			continue;
847		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
848			/* revert change to existing source at t1 */
849			lims->imsl_st[1] = lims->imsl_st[0];
850		} else {
851			/* revert source added t1 */
852			CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
853			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
854			free(ims, M_INMFILTER);
855			imf->imf_nsrc--;
856		}
857	}
858	imf->imf_st[1] = imf->imf_st[0];
859}
860
861/*
862 * Mark socket-layer filter set as INCLUDE {} at t1.
863 */
864static void
865imf_leave(struct in_mfilter *imf)
866{
867	struct ip_msource	*ims;
868	struct in_msource	*lims;
869
870	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
871		lims = (struct in_msource *)ims;
872		lims->imsl_st[1] = MCAST_UNDEFINED;
873	}
874	imf->imf_st[1] = MCAST_INCLUDE;
875}
876
877/*
878 * Mark socket-layer filter set deltas as committed.
879 */
880static void
881imf_commit(struct in_mfilter *imf)
882{
883	struct ip_msource	*ims;
884	struct in_msource	*lims;
885
886	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
887		lims = (struct in_msource *)ims;
888		lims->imsl_st[0] = lims->imsl_st[1];
889	}
890	imf->imf_st[0] = imf->imf_st[1];
891}
892
893/*
894 * Reap unreferenced sources from socket-layer filter set.
895 */
896static void
897imf_reap(struct in_mfilter *imf)
898{
899	struct ip_msource	*ims, *tims;
900	struct in_msource	*lims;
901
902	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
903		lims = (struct in_msource *)ims;
904		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
905		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
906			CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
907			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
908			free(ims, M_INMFILTER);
909			imf->imf_nsrc--;
910		}
911	}
912}
913
914/*
915 * Purge socket-layer filter set.
916 */
917static void
918imf_purge(struct in_mfilter *imf)
919{
920	struct ip_msource	*ims, *tims;
921
922	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
923		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
924		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
925		free(ims, M_INMFILTER);
926		imf->imf_nsrc--;
927	}
928	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
929	KASSERT(RB_EMPTY(&imf->imf_sources),
930	    ("%s: imf_sources not empty", __func__));
931}
932
933/*
934 * Look up a source filter entry for a multicast group.
935 *
936 * inm is the group descriptor to work with.
937 * haddr is the host-byte-order IPv4 address to look up.
938 * noalloc may be non-zero to suppress allocation of sources.
939 * *pims will be set to the address of the retrieved or allocated source.
940 *
941 * SMPng: NOTE: may be called with locks held.
942 * Return 0 if successful, otherwise return a non-zero error code.
943 */
944static int
945inm_get_source(struct in_multi *inm, const in_addr_t haddr,
946    const int noalloc, struct ip_msource **pims)
947{
948	struct ip_msource	 find;
949	struct ip_msource	*ims, *nims;
950
951	find.ims_haddr = haddr;
952	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
953	if (ims == NULL && !noalloc) {
954		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
955			return (ENOSPC);
956		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
957		    M_NOWAIT | M_ZERO);
958		if (nims == NULL)
959			return (ENOMEM);
960		nims->ims_haddr = haddr;
961		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
962		++inm->inm_nsrc;
963		ims = nims;
964#ifdef KTR
965		CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__,
966		    haddr, ims);
967#endif
968	}
969
970	*pims = ims;
971	return (0);
972}
973
974/*
975 * Merge socket-layer source into IGMP-layer source.
976 * If rollback is non-zero, perform the inverse of the merge.
977 */
978static void
979ims_merge(struct ip_msource *ims, const struct in_msource *lims,
980    const int rollback)
981{
982	int n = rollback ? -1 : 1;
983
984	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
985		CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x",
986		    __func__, n, ims->ims_haddr);
987		ims->ims_st[1].ex -= n;
988	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
989		CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x",
990		    __func__, n, ims->ims_haddr);
991		ims->ims_st[1].in -= n;
992	}
993
994	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
995		CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x",
996		    __func__, n, ims->ims_haddr);
997		ims->ims_st[1].ex += n;
998	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
999		CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x",
1000		    __func__, n, ims->ims_haddr);
1001		ims->ims_st[1].in += n;
1002	}
1003}
1004
1005/*
1006 * Atomically update the global in_multi state, when a membership's
1007 * filter list is being updated in any way.
1008 *
1009 * imf is the per-inpcb-membership group filter pointer.
1010 * A fake imf may be passed for in-kernel consumers.
1011 *
1012 * XXX This is a candidate for a set-symmetric-difference style loop
1013 * which would eliminate the repeated lookup from root of ims nodes,
1014 * as they share the same key space.
1015 *
1016 * If any error occurred this function will back out of refcounts
1017 * and return a non-zero value.
1018 */
1019static int
1020inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1021{
1022	struct ip_msource	*ims, *nims;
1023	struct in_msource	*lims;
1024	int			 schanged, error;
1025	int			 nsrc0, nsrc1;
1026
1027	schanged = 0;
1028	error = 0;
1029	nsrc1 = nsrc0 = 0;
1030	IN_MULTI_LIST_LOCK_ASSERT();
1031
1032	/*
1033	 * Update the source filters first, as this may fail.
1034	 * Maintain count of in-mode filters at t0, t1. These are
1035	 * used to work out if we transition into ASM mode or not.
1036	 * Maintain a count of source filters whose state was
1037	 * actually modified by this operation.
1038	 */
1039	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
1040		lims = (struct in_msource *)ims;
1041		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
1042		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
1043		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
1044		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
1045		++schanged;
1046		if (error)
1047			break;
1048		ims_merge(nims, lims, 0);
1049	}
1050	if (error) {
1051		struct ip_msource *bims;
1052
1053		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
1054			lims = (struct in_msource *)ims;
1055			if (lims->imsl_st[0] == lims->imsl_st[1])
1056				continue;
1057			(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
1058			if (bims == NULL)
1059				continue;
1060			ims_merge(bims, lims, 1);
1061		}
1062		goto out_reap;
1063	}
1064
1065	CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
1066	    __func__, nsrc0, nsrc1);
1067
1068	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
1069	if (imf->imf_st[0] == imf->imf_st[1] &&
1070	    imf->imf_st[1] == MCAST_INCLUDE) {
1071		if (nsrc1 == 0) {
1072			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
1073			--inm->inm_st[1].iss_in;
1074		}
1075	}
1076
1077	/* Handle filter mode transition on socket. */
1078	if (imf->imf_st[0] != imf->imf_st[1]) {
1079		CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
1080		    __func__, imf->imf_st[0], imf->imf_st[1]);
1081
1082		if (imf->imf_st[0] == MCAST_EXCLUDE) {
1083			CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
1084			--inm->inm_st[1].iss_ex;
1085		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
1086			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
1087			--inm->inm_st[1].iss_in;
1088		}
1089
1090		if (imf->imf_st[1] == MCAST_EXCLUDE) {
1091			CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
1092			inm->inm_st[1].iss_ex++;
1093		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
1094			CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
1095			inm->inm_st[1].iss_in++;
1096		}
1097	}
1098
1099	/*
1100	 * Track inm filter state in terms of listener counts.
1101	 * If there are any exclusive listeners, stack-wide
1102	 * membership is exclusive.
1103	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
1104	 * If no listeners remain, state is undefined at t1,
1105	 * and the IGMP lifecycle for this group should finish.
1106	 */
1107	if (inm->inm_st[1].iss_ex > 0) {
1108		CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
1109		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
1110	} else if (inm->inm_st[1].iss_in > 0) {
1111		CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
1112		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
1113	} else {
1114		CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
1115		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
1116	}
1117
1118	/* Decrement ASM listener count on transition out of ASM mode. */
1119	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1120		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
1121		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
1122			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
1123			--inm->inm_st[1].iss_asm;
1124		}
1125	}
1126
1127	/* Increment ASM listener count on transition to ASM mode. */
1128	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1129		CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
1130		inm->inm_st[1].iss_asm++;
1131	}
1132
1133	CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
1134	inm_print(inm);
1135
1136out_reap:
1137	if (schanged > 0) {
1138		CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
1139		inm_reap(inm);
1140	}
1141	return (error);
1142}
1143
1144/*
1145 * Mark an in_multi's filter set deltas as committed.
1146 * Called by IGMP after a state change has been enqueued.
1147 */
1148void
1149inm_commit(struct in_multi *inm)
1150{
1151	struct ip_msource	*ims;
1152
1153	CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
1154	CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
1155	inm_print(inm);
1156
1157	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
1158		ims->ims_st[0] = ims->ims_st[1];
1159	}
1160	inm->inm_st[0] = inm->inm_st[1];
1161}
1162
1163/*
1164 * Reap unreferenced nodes from an in_multi's filter set.
1165 */
1166static void
1167inm_reap(struct in_multi *inm)
1168{
1169	struct ip_msource	*ims, *tims;
1170
1171	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1172		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
1173		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
1174		    ims->ims_stp != 0)
1175			continue;
1176		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1177		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1178		free(ims, M_IPMSOURCE);
1179		inm->inm_nsrc--;
1180	}
1181}
1182
1183/*
1184 * Purge all source nodes from an in_multi's filter set.
1185 */
1186static void
1187inm_purge(struct in_multi *inm)
1188{
1189	struct ip_msource	*ims, *tims;
1190
1191	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1192		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1193		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1194		free(ims, M_IPMSOURCE);
1195		inm->inm_nsrc--;
1196	}
1197}
1198
1199/*
1200 * Join a multicast group; unlocked entry point.
1201 *
1202 * SMPng: XXX: in_joingroup() is called from in_control() when Giant
1203 * is not held. Fortunately, ifp is unlikely to have been detached
1204 * at this point, so we assume it's OK to recurse.
1205 */
1206int
1207in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
1208    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1209{
1210	int error;
1211
1212	IN_MULTI_LOCK();
1213	error = in_joingroup_locked(ifp, gina, imf, pinm);
1214	IN_MULTI_UNLOCK();
1215
1216	return (error);
1217}
1218
1219/*
1220 * Join a multicast group; real entry point.
1221 *
1222 * Only preserves atomicity at inm level.
1223 * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1224 *
1225 * If the IGMP downcall fails, the group is not joined, and an error
1226 * code is returned.
1227 */
1228int
1229in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
1230    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1231{
1232	struct in_mfilter	 timf;
1233	struct in_multi		*inm;
1234	int			 error;
1235
1236	IN_MULTI_LOCK_ASSERT();
1237	IN_MULTI_LIST_UNLOCK_ASSERT();
1238
1239	CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__,
1240	    ntohl(gina->s_addr), ifp, ifp->if_xname);
1241
1242	error = 0;
1243	inm = NULL;
1244
1245	/*
1246	 * If no imf was specified (i.e. kernel consumer),
1247	 * fake one up and assume it is an ASM join.
1248	 */
1249	if (imf == NULL) {
1250		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1251		imf = &timf;
1252	}
1253
1254	error = in_getmulti(ifp, gina, &inm);
1255	if (error) {
1256		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
1257		return (error);
1258	}
1259	IN_MULTI_LIST_LOCK();
1260	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1261	error = inm_merge(inm, imf);
1262	if (error) {
1263		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1264		goto out_inm_release;
1265	}
1266
1267	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1268	error = igmp_change_state(inm);
1269	if (error) {
1270		CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
1271		goto out_inm_release;
1272	}
1273
1274 out_inm_release:
1275	if (error) {
1276		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1277		IF_ADDR_WLOCK(ifp);
1278		inm_release_deferred(inm);
1279		IF_ADDR_WUNLOCK(ifp);
1280	} else {
1281		*pinm = inm;
1282	}
1283	IN_MULTI_LIST_UNLOCK();
1284
1285	return (error);
1286}
1287
1288/*
1289 * Leave a multicast group; unlocked entry point.
1290 */
1291int
1292in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1293{
1294	int error;
1295
1296	IN_MULTI_LOCK();
1297	error = in_leavegroup_locked(inm, imf);
1298	IN_MULTI_UNLOCK();
1299
1300	return (error);
1301}
1302
1303/*
1304 * Leave a multicast group; real entry point.
1305 * All source filters will be expunged.
1306 *
1307 * Only preserves atomicity at inm level.
1308 *
1309 * Holding the write lock for the INP which contains imf
1310 * is highly advisable. We can't assert for it as imf does not
1311 * contain a back-pointer to the owning inp.
1312 *
1313 * Note: This is not the same as inm_release(*) as this function also
1314 * makes a state change downcall into IGMP.
1315 */
1316int
1317in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1318{
1319	struct in_mfilter	 timf;
1320	int			 error;
1321
1322	IN_MULTI_LOCK_ASSERT();
1323	IN_MULTI_LIST_UNLOCK_ASSERT();
1324
1325	error = 0;
1326
1327	CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__,
1328	    inm, ntohl(inm->inm_addr.s_addr),
1329	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
1330	    imf);
1331
1332	/*
1333	 * If no imf was specified (i.e. kernel consumer),
1334	 * fake one up and assume it is an ASM join.
1335	 */
1336	if (imf == NULL) {
1337		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1338		imf = &timf;
1339	}
1340
1341	/*
1342	 * Begin state merge transaction at IGMP layer.
1343	 *
1344	 * As this particular invocation should not cause any memory
1345	 * to be allocated, and there is no opportunity to roll back
1346	 * the transaction, it MUST NOT fail.
1347	 */
1348	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1349	IN_MULTI_LIST_LOCK();
1350	error = inm_merge(inm, imf);
1351	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
1352
1353	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1354	CURVNET_SET(inm->inm_ifp->if_vnet);
1355	error = igmp_change_state(inm);
1356	IF_ADDR_WLOCK(inm->inm_ifp);
1357	inm_release_deferred(inm);
1358	IF_ADDR_WUNLOCK(inm->inm_ifp);
1359	IN_MULTI_LIST_UNLOCK();
1360	CURVNET_RESTORE();
1361	if (error)
1362		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1363
1364	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1365
1366	return (error);
1367}
1368
1369/*#ifndef BURN_BRIDGES*/
1370/*
1371 * Join an IPv4 multicast group in (*,G) exclusive mode.
1372 * The group must be a 224.0.0.0/24 link-scope group.
1373 * This KPI is for legacy kernel consumers only.
1374 */
1375struct in_multi *
1376in_addmulti(struct in_addr *ap, struct ifnet *ifp)
1377{
1378	struct in_multi *pinm;
1379	int error;
1380#ifdef INVARIANTS
1381	char addrbuf[INET_ADDRSTRLEN];
1382#endif
1383
1384	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
1385	    ("%s: %s not in 224.0.0.0/24", __func__,
1386	    inet_ntoa_r(*ap, addrbuf)));
1387
1388	error = in_joingroup(ifp, ap, NULL, &pinm);
1389	if (error != 0)
1390		pinm = NULL;
1391
1392	return (pinm);
1393}
1394
1395/*
1396 * Block or unblock an ASM multicast source on an inpcb.
1397 * This implements the delta-based API described in RFC 3678.
1398 *
1399 * The delta-based API applies only to exclusive-mode memberships.
1400 * An IGMP downcall will be performed.
1401 *
1402 * SMPng: NOTE: Must take Giant as a join may create a new ifma.
1403 *
1404 * Return 0 if successful, otherwise return an appropriate error code.
1405 */
1406static int
1407inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1408{
1409	struct group_source_req		 gsr;
1410	struct rm_priotracker		 in_ifa_tracker;
1411	sockunion_t			*gsa, *ssa;
1412	struct ifnet			*ifp;
1413	struct in_mfilter		*imf;
1414	struct ip_moptions		*imo;
1415	struct in_msource		*ims;
1416	struct in_multi			*inm;
1417	uint16_t			 fmode;
1418	int				 error, doblock;
1419
1420	ifp = NULL;
1421	error = 0;
1422	doblock = 0;
1423
1424	memset(&gsr, 0, sizeof(struct group_source_req));
1425	gsa = (sockunion_t *)&gsr.gsr_group;
1426	ssa = (sockunion_t *)&gsr.gsr_source;
1427
1428	switch (sopt->sopt_name) {
1429	case IP_BLOCK_SOURCE:
1430	case IP_UNBLOCK_SOURCE: {
1431		struct ip_mreq_source	 mreqs;
1432
1433		error = sooptcopyin(sopt, &mreqs,
1434		    sizeof(struct ip_mreq_source),
1435		    sizeof(struct ip_mreq_source));
1436		if (error)
1437			return (error);
1438
1439		gsa->sin.sin_family = AF_INET;
1440		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1441		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1442
1443		ssa->sin.sin_family = AF_INET;
1444		ssa->sin.sin_len = sizeof(struct sockaddr_in);
1445		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1446
1447		if (!in_nullhost(mreqs.imr_interface)) {
1448			IN_IFADDR_RLOCK(&in_ifa_tracker);
1449			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1450			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
1451		}
1452		if (sopt->sopt_name == IP_BLOCK_SOURCE)
1453			doblock = 1;
1454
1455		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
1456		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
1457		break;
1458	    }
1459
1460	case MCAST_BLOCK_SOURCE:
1461	case MCAST_UNBLOCK_SOURCE:
1462		error = sooptcopyin(sopt, &gsr,
1463		    sizeof(struct group_source_req),
1464		    sizeof(struct group_source_req));
1465		if (error)
1466			return (error);
1467
1468		if (gsa->sin.sin_family != AF_INET ||
1469		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1470			return (EINVAL);
1471
1472		if (ssa->sin.sin_family != AF_INET ||
1473		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1474			return (EINVAL);
1475
1476		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1477			return (EADDRNOTAVAIL);
1478
1479		ifp = ifnet_byindex(gsr.gsr_interface);
1480
1481		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1482			doblock = 1;
1483		break;
1484
1485	default:
1486		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1487		    __func__, sopt->sopt_name);
1488		return (EOPNOTSUPP);
1489		break;
1490	}
1491
1492	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1493		return (EINVAL);
1494
1495	IN_MULTI_LOCK();
1496
1497	/*
1498	 * Check if we are actually a member of this group.
1499	 */
1500	imo = inp_findmoptions(inp);
1501	imf = imo_match_group(imo, ifp, &gsa->sa);
1502	if (imf == NULL) {
1503		error = EADDRNOTAVAIL;
1504		goto out_inp_locked;
1505	}
1506	inm = imf->imf_inm;
1507
1508	/*
1509	 * Attempting to use the delta-based API on an
1510	 * non exclusive-mode membership is an error.
1511	 */
1512	fmode = imf->imf_st[0];
1513	if (fmode != MCAST_EXCLUDE) {
1514		error = EINVAL;
1515		goto out_inp_locked;
1516	}
1517
1518	/*
1519	 * Deal with error cases up-front:
1520	 *  Asked to block, but already blocked; or
1521	 *  Asked to unblock, but nothing to unblock.
1522	 * If adding a new block entry, allocate it.
1523	 */
1524	ims = imo_match_source(imf, &ssa->sa);
1525	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1526		CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__,
1527		    ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not ");
1528		error = EADDRNOTAVAIL;
1529		goto out_inp_locked;
1530	}
1531
1532	INP_WLOCK_ASSERT(inp);
1533
1534	/*
1535	 * Begin state merge transaction at socket layer.
1536	 */
1537	if (doblock) {
1538		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
1539		ims = imf_graft(imf, fmode, &ssa->sin);
1540		if (ims == NULL)
1541			error = ENOMEM;
1542	} else {
1543		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
1544		error = imf_prune(imf, &ssa->sin);
1545	}
1546
1547	if (error) {
1548		CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
1549		goto out_imf_rollback;
1550	}
1551
1552	/*
1553	 * Begin state merge transaction at IGMP layer.
1554	 */
1555	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1556	IN_MULTI_LIST_LOCK();
1557	error = inm_merge(inm, imf);
1558	if (error) {
1559		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1560		IN_MULTI_LIST_UNLOCK();
1561		goto out_imf_rollback;
1562	}
1563
1564	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1565	error = igmp_change_state(inm);
1566	IN_MULTI_LIST_UNLOCK();
1567	if (error)
1568		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1569
1570out_imf_rollback:
1571	if (error)
1572		imf_rollback(imf);
1573	else
1574		imf_commit(imf);
1575
1576	imf_reap(imf);
1577
1578out_inp_locked:
1579	INP_WUNLOCK(inp);
1580	IN_MULTI_UNLOCK();
1581	return (error);
1582}
1583
1584/*
1585 * Given an inpcb, return its multicast options structure pointer.  Accepts
1586 * an unlocked inpcb pointer, but will return it locked.  May sleep.
1587 *
1588 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
1589 * SMPng: NOTE: Returns with the INP write lock held.
1590 */
1591static struct ip_moptions *
1592inp_findmoptions(struct inpcb *inp)
1593{
1594	struct ip_moptions	 *imo;
1595
1596	INP_WLOCK(inp);
1597	if (inp->inp_moptions != NULL)
1598		return (inp->inp_moptions);
1599
1600	INP_WUNLOCK(inp);
1601
1602	imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1603
1604	imo->imo_multicast_ifp = NULL;
1605	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1606	imo->imo_multicast_vif = -1;
1607	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1608	imo->imo_multicast_loop = in_mcast_loop;
1609	STAILQ_INIT(&imo->imo_head);
1610
1611	INP_WLOCK(inp);
1612	if (inp->inp_moptions != NULL) {
1613		free(imo, M_IPMOPTS);
1614		return (inp->inp_moptions);
1615	}
1616	inp->inp_moptions = imo;
1617	return (imo);
1618}
1619
1620static void
1621inp_gcmoptions(struct ip_moptions *imo)
1622{
1623	struct in_mfilter *imf;
1624	struct in_multi *inm;
1625	struct ifnet *ifp;
1626
1627	while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
1628		ip_mfilter_remove(&imo->imo_head, imf);
1629
1630		imf_leave(imf);
1631		if ((inm = imf->imf_inm) != NULL) {
1632			if ((ifp = inm->inm_ifp) != NULL) {
1633				CURVNET_SET(ifp->if_vnet);
1634				(void)in_leavegroup(inm, imf);
1635				CURVNET_RESTORE();
1636			} else {
1637				(void)in_leavegroup(inm, imf);
1638			}
1639		}
1640		ip_mfilter_free(imf);
1641	}
1642	free(imo, M_IPMOPTS);
1643}
1644
1645/*
1646 * Discard the IP multicast options (and source filters).  To minimize
1647 * the amount of work done while holding locks such as the INP's
1648 * pcbinfo lock (which is used in the receive path), the free
1649 * operation is deferred to the epoch callback task.
1650 */
1651void
1652inp_freemoptions(struct ip_moptions *imo)
1653{
1654	if (imo == NULL)
1655		return;
1656	inp_gcmoptions(imo);
1657}
1658
1659/*
1660 * Atomically get source filters on a socket for an IPv4 multicast group.
1661 * Called with INP lock held; returns with lock released.
1662 */
1663static int
1664inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1665{
1666	struct __msfilterreq	 msfr;
1667	sockunion_t		*gsa;
1668	struct ifnet		*ifp;
1669	struct ip_moptions	*imo;
1670	struct in_mfilter	*imf;
1671	struct ip_msource	*ims;
1672	struct in_msource	*lims;
1673	struct sockaddr_in	*psin;
1674	struct sockaddr_storage	*ptss;
1675	struct sockaddr_storage	*tss;
1676	int			 error;
1677	size_t			 nsrcs, ncsrcs;
1678
1679	INP_WLOCK_ASSERT(inp);
1680
1681	imo = inp->inp_moptions;
1682	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
1683
1684	INP_WUNLOCK(inp);
1685
1686	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1687	    sizeof(struct __msfilterreq));
1688	if (error)
1689		return (error);
1690
1691	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1692		return (EINVAL);
1693
1694	ifp = ifnet_byindex(msfr.msfr_ifindex);
1695	if (ifp == NULL)
1696		return (EINVAL);
1697
1698	INP_WLOCK(inp);
1699
1700	/*
1701	 * Lookup group on the socket.
1702	 */
1703	gsa = (sockunion_t *)&msfr.msfr_group;
1704	imf = imo_match_group(imo, ifp, &gsa->sa);
1705	if (imf == NULL) {
1706		INP_WUNLOCK(inp);
1707		return (EADDRNOTAVAIL);
1708	}
1709
1710	/*
1711	 * Ignore memberships which are in limbo.
1712	 */
1713	if (imf->imf_st[1] == MCAST_UNDEFINED) {
1714		INP_WUNLOCK(inp);
1715		return (EAGAIN);
1716	}
1717	msfr.msfr_fmode = imf->imf_st[1];
1718
1719	/*
1720	 * If the user specified a buffer, copy out the source filter
1721	 * entries to userland gracefully.
1722	 * We only copy out the number of entries which userland
1723	 * has asked for, but we always tell userland how big the
1724	 * buffer really needs to be.
1725	 */
1726	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
1727		msfr.msfr_nsrcs = in_mcast_maxsocksrc;
1728	tss = NULL;
1729	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
1730		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1731		    M_TEMP, M_NOWAIT | M_ZERO);
1732		if (tss == NULL) {
1733			INP_WUNLOCK(inp);
1734			return (ENOBUFS);
1735		}
1736	}
1737
1738	/*
1739	 * Count number of sources in-mode at t0.
1740	 * If buffer space exists and remains, copy out source entries.
1741	 */
1742	nsrcs = msfr.msfr_nsrcs;
1743	ncsrcs = 0;
1744	ptss = tss;
1745	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
1746		lims = (struct in_msource *)ims;
1747		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
1748		    lims->imsl_st[0] != imf->imf_st[0])
1749			continue;
1750		++ncsrcs;
1751		if (tss != NULL && nsrcs > 0) {
1752			psin = (struct sockaddr_in *)ptss;
1753			psin->sin_family = AF_INET;
1754			psin->sin_len = sizeof(struct sockaddr_in);
1755			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
1756			psin->sin_port = 0;
1757			++ptss;
1758			--nsrcs;
1759		}
1760	}
1761
1762	INP_WUNLOCK(inp);
1763
1764	if (tss != NULL) {
1765		error = copyout(tss, msfr.msfr_srcs,
1766		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1767		free(tss, M_TEMP);
1768		if (error)
1769			return (error);
1770	}
1771
1772	msfr.msfr_nsrcs = ncsrcs;
1773	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
1774
1775	return (error);
1776}
1777
1778/*
1779 * Return the IP multicast options in response to user getsockopt().
1780 */
1781int
1782inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1783{
1784	struct rm_priotracker	 in_ifa_tracker;
1785	struct ip_mreqn		 mreqn;
1786	struct ip_moptions	*imo;
1787	struct ifnet		*ifp;
1788	struct in_ifaddr	*ia;
1789	int			 error, optval;
1790	u_char			 coptval;
1791
1792	INP_WLOCK(inp);
1793	imo = inp->inp_moptions;
1794	/*
1795	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1796	 * or is a divert socket, reject it.
1797	 */
1798	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1799	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1800	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
1801		INP_WUNLOCK(inp);
1802		return (EOPNOTSUPP);
1803	}
1804
1805	error = 0;
1806	switch (sopt->sopt_name) {
1807	case IP_MULTICAST_VIF:
1808		if (imo != NULL)
1809			optval = imo->imo_multicast_vif;
1810		else
1811			optval = -1;
1812		INP_WUNLOCK(inp);
1813		error = sooptcopyout(sopt, &optval, sizeof(int));
1814		break;
1815
1816	case IP_MULTICAST_IF:
1817		memset(&mreqn, 0, sizeof(struct ip_mreqn));
1818		if (imo != NULL) {
1819			ifp = imo->imo_multicast_ifp;
1820			if (!in_nullhost(imo->imo_multicast_addr)) {
1821				mreqn.imr_address = imo->imo_multicast_addr;
1822			} else if (ifp != NULL) {
1823				struct epoch_tracker et;
1824
1825				mreqn.imr_ifindex = ifp->if_index;
1826				NET_EPOCH_ENTER(et);
1827				IFP_TO_IA(ifp, ia, &in_ifa_tracker);
1828				if (ia != NULL)
1829					mreqn.imr_address =
1830					    IA_SIN(ia)->sin_addr;
1831				NET_EPOCH_EXIT(et);
1832			}
1833		}
1834		INP_WUNLOCK(inp);
1835		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1836			error = sooptcopyout(sopt, &mreqn,
1837			    sizeof(struct ip_mreqn));
1838		} else {
1839			error = sooptcopyout(sopt, &mreqn.imr_address,
1840			    sizeof(struct in_addr));
1841		}
1842		break;
1843
1844	case IP_MULTICAST_TTL:
1845		if (imo == NULL)
1846			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1847		else
1848			optval = coptval = imo->imo_multicast_ttl;
1849		INP_WUNLOCK(inp);
1850		if (sopt->sopt_valsize == sizeof(u_char))
1851			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1852		else
1853			error = sooptcopyout(sopt, &optval, sizeof(int));
1854		break;
1855
1856	case IP_MULTICAST_LOOP:
1857		if (imo == NULL)
1858			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1859		else
1860			optval = coptval = imo->imo_multicast_loop;
1861		INP_WUNLOCK(inp);
1862		if (sopt->sopt_valsize == sizeof(u_char))
1863			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1864		else
1865			error = sooptcopyout(sopt, &optval, sizeof(int));
1866		break;
1867
1868	case IP_MSFILTER:
1869		if (imo == NULL) {
1870			error = EADDRNOTAVAIL;
1871			INP_WUNLOCK(inp);
1872		} else {
1873			error = inp_get_source_filters(inp, sopt);
1874		}
1875		break;
1876
1877	default:
1878		INP_WUNLOCK(inp);
1879		error = ENOPROTOOPT;
1880		break;
1881	}
1882
1883	INP_UNLOCK_ASSERT(inp);
1884
1885	return (error);
1886}
1887
1888/*
1889 * Look up the ifnet to use for a multicast group membership,
1890 * given the IPv4 address of an interface, and the IPv4 group address.
1891 *
1892 * This routine exists to support legacy multicast applications
1893 * which do not understand that multicast memberships are scoped to
1894 * specific physical links in the networking stack, or which need
1895 * to join link-scope groups before IPv4 addresses are configured.
1896 *
1897 * Use this socket's current FIB number for any required FIB lookup.
1898 * If ina is INADDR_ANY, look up the group address in the unicast FIB,
1899 * and use its ifp; usually, this points to the default next-hop.
1900 *
1901 * If the FIB lookup fails, attempt to use the first non-loopback
1902 * interface with multicast capability in the system as a
1903 * last resort. The legacy IPv4 ASM API requires that we do
1904 * this in order to allow groups to be joined when the routing
1905 * table has not yet been populated during boot.
1906 *
1907 * Returns NULL if no ifp could be found, otherwise return referenced ifp.
1908 *
1909 * FUTURE: Implement IPv4 source-address selection.
1910 */
1911static struct ifnet *
1912inp_lookup_mcast_ifp(const struct inpcb *inp,
1913    const struct sockaddr_in *gsin, const struct in_addr ina)
1914{
1915	struct rm_priotracker in_ifa_tracker;
1916	struct ifnet *ifp;
1917	struct nhop_object *nh;
1918
1919	KASSERT(inp != NULL, ("%s: inp must not be NULL", __func__));
1920	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
1921	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
1922	    ("%s: not multicast", __func__));
1923
1924	ifp = NULL;
1925	if (!in_nullhost(ina)) {
1926		IN_IFADDR_RLOCK(&in_ifa_tracker);
1927		INADDR_TO_IFP(ina, ifp);
1928		if (ifp != NULL)
1929			if_ref(ifp);
1930		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
1931	} else {
1932		nh = fib4_lookup(inp->inp_inc.inc_fibnum, gsin->sin_addr, 0, NHR_NONE, 0);
1933		if (nh != NULL) {
1934			ifp = nh->nh_ifp;
1935			if_ref(ifp);
1936		} else {
1937			struct in_ifaddr *ia;
1938			struct ifnet *mifp;
1939
1940			mifp = NULL;
1941			IN_IFADDR_RLOCK(&in_ifa_tracker);
1942			CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1943				mifp = ia->ia_ifp;
1944				if (!(mifp->if_flags & IFF_LOOPBACK) &&
1945				     (mifp->if_flags & IFF_MULTICAST)) {
1946					ifp = mifp;
1947					if_ref(ifp);
1948					break;
1949				}
1950			}
1951			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
1952		}
1953	}
1954
1955	return (ifp);
1956}
1957
1958/*
1959 * Join an IPv4 multicast group, possibly with a source.
1960 */
1961static int
1962inp_join_group(struct inpcb *inp, struct sockopt *sopt)
1963{
1964	struct group_source_req		 gsr;
1965	sockunion_t			*gsa, *ssa;
1966	struct ifnet			*ifp;
1967	struct in_mfilter		*imf;
1968	struct ip_moptions		*imo;
1969	struct in_multi			*inm;
1970	struct in_msource		*lims;
1971	struct epoch_tracker		 et;
1972	int				 error, is_new;
1973
1974	ifp = NULL;
1975	lims = NULL;
1976	error = 0;
1977
1978	memset(&gsr, 0, sizeof(struct group_source_req));
1979	gsa = (sockunion_t *)&gsr.gsr_group;
1980	gsa->ss.ss_family = AF_UNSPEC;
1981	ssa = (sockunion_t *)&gsr.gsr_source;
1982	ssa->ss.ss_family = AF_UNSPEC;
1983
1984	switch (sopt->sopt_name) {
1985	case IP_ADD_MEMBERSHIP: {
1986		struct ip_mreqn mreqn;
1987
1988		if (sopt->sopt_valsize == sizeof(struct ip_mreqn))
1989			error = sooptcopyin(sopt, &mreqn,
1990			    sizeof(struct ip_mreqn), sizeof(struct ip_mreqn));
1991		else
1992			error = sooptcopyin(sopt, &mreqn,
1993			    sizeof(struct ip_mreq), sizeof(struct ip_mreq));
1994		if (error)
1995			return (error);
1996
1997		gsa->sin.sin_family = AF_INET;
1998		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1999		gsa->sin.sin_addr = mreqn.imr_multiaddr;
2000		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2001			return (EINVAL);
2002
2003		NET_EPOCH_ENTER(et);
2004		if (sopt->sopt_valsize == sizeof(struct ip_mreqn) &&
2005		    mreqn.imr_ifindex != 0)
2006			ifp = ifnet_byindex_ref(mreqn.imr_ifindex);
2007		else
2008			ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
2009			    mreqn.imr_address);
2010		NET_EPOCH_EXIT(et);
2011		break;
2012	}
2013	case IP_ADD_SOURCE_MEMBERSHIP: {
2014		struct ip_mreq_source	 mreqs;
2015
2016		error = sooptcopyin(sopt, &mreqs, sizeof(struct ip_mreq_source),
2017			    sizeof(struct ip_mreq_source));
2018		if (error)
2019			return (error);
2020
2021		gsa->sin.sin_family = ssa->sin.sin_family = AF_INET;
2022		gsa->sin.sin_len = ssa->sin.sin_len =
2023		    sizeof(struct sockaddr_in);
2024
2025		gsa->sin.sin_addr = mreqs.imr_multiaddr;
2026		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2027			return (EINVAL);
2028
2029		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
2030
2031		NET_EPOCH_ENTER(et);
2032		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
2033		    mreqs.imr_interface);
2034		NET_EPOCH_EXIT(et);
2035		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
2036		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
2037		break;
2038	}
2039
2040	case MCAST_JOIN_GROUP:
2041	case MCAST_JOIN_SOURCE_GROUP:
2042		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
2043			error = sooptcopyin(sopt, &gsr,
2044			    sizeof(struct group_req),
2045			    sizeof(struct group_req));
2046		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
2047			error = sooptcopyin(sopt, &gsr,
2048			    sizeof(struct group_source_req),
2049			    sizeof(struct group_source_req));
2050		}
2051		if (error)
2052			return (error);
2053
2054		if (gsa->sin.sin_family != AF_INET ||
2055		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
2056			return (EINVAL);
2057
2058		/*
2059		 * Overwrite the port field if present, as the sockaddr
2060		 * being copied in may be matched with a binary comparison.
2061		 */
2062		gsa->sin.sin_port = 0;
2063		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
2064			if (ssa->sin.sin_family != AF_INET ||
2065			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
2066				return (EINVAL);
2067			ssa->sin.sin_port = 0;
2068		}
2069
2070		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2071			return (EINVAL);
2072
2073		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
2074			return (EADDRNOTAVAIL);
2075		NET_EPOCH_ENTER(et);
2076		ifp = ifnet_byindex_ref(gsr.gsr_interface);
2077		NET_EPOCH_EXIT(et);
2078		break;
2079
2080	default:
2081		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2082		    __func__, sopt->sopt_name);
2083		return (EOPNOTSUPP);
2084		break;
2085	}
2086
2087	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2088		if (ifp != NULL)
2089			if_rele(ifp);
2090		return (EADDRNOTAVAIL);
2091	}
2092
2093	IN_MULTI_LOCK();
2094
2095	/*
2096	 * Find the membership in the membership list.
2097	 */
2098	imo = inp_findmoptions(inp);
2099	imf = imo_match_group(imo, ifp, &gsa->sa);
2100	if (imf == NULL) {
2101		is_new = 1;
2102		inm = NULL;
2103
2104		if (ip_mfilter_count(&imo->imo_head) >= IP_MAX_MEMBERSHIPS) {
2105			error = ENOMEM;
2106			goto out_inp_locked;
2107		}
2108	} else {
2109		is_new = 0;
2110		inm = imf->imf_inm;
2111
2112		if (ssa->ss.ss_family != AF_UNSPEC) {
2113			/*
2114			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
2115			 * is an error. On an existing inclusive membership,
2116			 * it just adds the source to the filter list.
2117			 */
2118			if (imf->imf_st[1] != MCAST_INCLUDE) {
2119				error = EINVAL;
2120				goto out_inp_locked;
2121			}
2122			/*
2123			 * Throw out duplicates.
2124			 *
2125			 * XXX FIXME: This makes a naive assumption that
2126			 * even if entries exist for *ssa in this imf,
2127			 * they will be rejected as dupes, even if they
2128			 * are not valid in the current mode (in-mode).
2129			 *
2130			 * in_msource is transactioned just as for anything
2131			 * else in SSM -- but note naive use of inm_graft()
2132			 * below for allocating new filter entries.
2133			 *
2134			 * This is only an issue if someone mixes the
2135			 * full-state SSM API with the delta-based API,
2136			 * which is discouraged in the relevant RFCs.
2137			 */
2138			lims = imo_match_source(imf, &ssa->sa);
2139			if (lims != NULL /*&&
2140			    lims->imsl_st[1] == MCAST_INCLUDE*/) {
2141				error = EADDRNOTAVAIL;
2142				goto out_inp_locked;
2143			}
2144		} else {
2145			/*
2146			 * MCAST_JOIN_GROUP on an existing exclusive
2147			 * membership is an error; return EADDRINUSE
2148			 * to preserve 4.4BSD API idempotence, and
2149			 * avoid tedious detour to code below.
2150			 * NOTE: This is bending RFC 3678 a bit.
2151			 *
2152			 * On an existing inclusive membership, this is also
2153			 * an error; if you want to change filter mode,
2154			 * you must use the userland API setsourcefilter().
2155			 * XXX We don't reject this for imf in UNDEFINED
2156			 * state at t1, because allocation of a filter
2157			 * is atomic with allocation of a membership.
2158			 */
2159			error = EINVAL;
2160			if (imf->imf_st[1] == MCAST_EXCLUDE)
2161				error = EADDRINUSE;
2162			goto out_inp_locked;
2163		}
2164	}
2165
2166	/*
2167	 * Begin state merge transaction at socket layer.
2168	 */
2169	INP_WLOCK_ASSERT(inp);
2170
2171	/*
2172	 * Graft new source into filter list for this inpcb's
2173	 * membership of the group. The in_multi may not have
2174	 * been allocated yet if this is a new membership, however,
2175	 * the in_mfilter slot will be allocated and must be initialized.
2176	 *
2177	 * Note: Grafting of exclusive mode filters doesn't happen
2178	 * in this path.
2179	 * XXX: Should check for non-NULL lims (node exists but may
2180	 * not be in-mode) for interop with full-state API.
2181	 */
2182	if (ssa->ss.ss_family != AF_UNSPEC) {
2183		/* Membership starts in IN mode */
2184		if (is_new) {
2185			CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
2186			imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE);
2187			if (imf == NULL) {
2188				error = ENOMEM;
2189				goto out_inp_locked;
2190			}
2191		} else {
2192			CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
2193		}
2194		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
2195		if (lims == NULL) {
2196			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2197			    __func__);
2198			error = ENOMEM;
2199			goto out_inp_locked;
2200		}
2201	} else {
2202		/* No address specified; Membership starts in EX mode */
2203		if (is_new) {
2204			CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__);
2205			imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE);
2206			if (imf == NULL) {
2207				error = ENOMEM;
2208				goto out_inp_locked;
2209			}
2210		}
2211	}
2212
2213	/*
2214	 * Begin state merge transaction at IGMP layer.
2215	 */
2216	if (is_new) {
2217		in_pcbref(inp);
2218		INP_WUNLOCK(inp);
2219
2220		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
2221		    &imf->imf_inm);
2222
2223		INP_WLOCK(inp);
2224		if (in_pcbrele_wlocked(inp)) {
2225			error = ENXIO;
2226			goto out_inp_unlocked;
2227		}
2228		if (error) {
2229                        CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed",
2230                            __func__);
2231			goto out_inp_locked;
2232		}
2233		/*
2234		 * NOTE: Refcount from in_joingroup_locked()
2235		 * is protecting membership.
2236		 */
2237		ip_mfilter_insert(&imo->imo_head, imf);
2238	} else {
2239		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2240		IN_MULTI_LIST_LOCK();
2241		error = inm_merge(inm, imf);
2242		if (error) {
2243			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2244				 __func__);
2245			IN_MULTI_LIST_UNLOCK();
2246			imf_rollback(imf);
2247			imf_reap(imf);
2248			goto out_inp_locked;
2249		}
2250		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2251		error = igmp_change_state(inm);
2252		IN_MULTI_LIST_UNLOCK();
2253		if (error) {
2254			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2255			    __func__);
2256			imf_rollback(imf);
2257			imf_reap(imf);
2258			goto out_inp_locked;
2259		}
2260	}
2261
2262	imf_commit(imf);
2263	imf = NULL;
2264
2265out_inp_locked:
2266	INP_WUNLOCK(inp);
2267out_inp_unlocked:
2268	IN_MULTI_UNLOCK();
2269
2270	if (is_new && imf) {
2271		if (imf->imf_inm != NULL) {
2272			IN_MULTI_LIST_LOCK();
2273			IF_ADDR_WLOCK(ifp);
2274			inm_release_deferred(imf->imf_inm);
2275			IF_ADDR_WUNLOCK(ifp);
2276			IN_MULTI_LIST_UNLOCK();
2277		}
2278		ip_mfilter_free(imf);
2279	}
2280	if_rele(ifp);
2281	return (error);
2282}
2283
2284/*
2285 * Leave an IPv4 multicast group on an inpcb, possibly with a source.
2286 */
2287static int
2288inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
2289{
2290	struct group_source_req		 gsr;
2291	struct ip_mreq_source		 mreqs;
2292	struct rm_priotracker		 in_ifa_tracker;
2293	sockunion_t			*gsa, *ssa;
2294	struct ifnet			*ifp;
2295	struct in_mfilter		*imf;
2296	struct ip_moptions		*imo;
2297	struct in_msource		*ims;
2298	struct in_multi			*inm;
2299	int				 error;
2300	bool				 is_final;
2301
2302	ifp = NULL;
2303	error = 0;
2304	is_final = true;
2305
2306	memset(&gsr, 0, sizeof(struct group_source_req));
2307	gsa = (sockunion_t *)&gsr.gsr_group;
2308	gsa->ss.ss_family = AF_UNSPEC;
2309	ssa = (sockunion_t *)&gsr.gsr_source;
2310	ssa->ss.ss_family = AF_UNSPEC;
2311
2312	switch (sopt->sopt_name) {
2313	case IP_DROP_MEMBERSHIP:
2314	case IP_DROP_SOURCE_MEMBERSHIP:
2315		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
2316			error = sooptcopyin(sopt, &mreqs,
2317			    sizeof(struct ip_mreq),
2318			    sizeof(struct ip_mreq));
2319			/*
2320			 * Swap interface and sourceaddr arguments,
2321			 * as ip_mreq and ip_mreq_source are laid
2322			 * out differently.
2323			 */
2324			mreqs.imr_interface = mreqs.imr_sourceaddr;
2325			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
2326		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2327			error = sooptcopyin(sopt, &mreqs,
2328			    sizeof(struct ip_mreq_source),
2329			    sizeof(struct ip_mreq_source));
2330		}
2331		if (error)
2332			return (error);
2333
2334		gsa->sin.sin_family = AF_INET;
2335		gsa->sin.sin_len = sizeof(struct sockaddr_in);
2336		gsa->sin.sin_addr = mreqs.imr_multiaddr;
2337
2338		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2339			ssa->sin.sin_family = AF_INET;
2340			ssa->sin.sin_len = sizeof(struct sockaddr_in);
2341			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
2342		}
2343
2344		/*
2345		 * Attempt to look up hinted ifp from interface address.
2346		 * Fallthrough with null ifp iff lookup fails, to
2347		 * preserve 4.4BSD mcast API idempotence.
2348		 * XXX NOTE WELL: The RFC 3678 API is preferred because
2349		 * using an IPv4 address as a key is racy.
2350		 */
2351		if (!in_nullhost(mreqs.imr_interface)) {
2352			IN_IFADDR_RLOCK(&in_ifa_tracker);
2353			INADDR_TO_IFP(mreqs.imr_interface, ifp);
2354			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
2355		}
2356		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
2357		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
2358
2359		break;
2360
2361	case MCAST_LEAVE_GROUP:
2362	case MCAST_LEAVE_SOURCE_GROUP:
2363		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2364			error = sooptcopyin(sopt, &gsr,
2365			    sizeof(struct group_req),
2366			    sizeof(struct group_req));
2367		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2368			error = sooptcopyin(sopt, &gsr,
2369			    sizeof(struct group_source_req),
2370			    sizeof(struct group_source_req));
2371		}
2372		if (error)
2373			return (error);
2374
2375		if (gsa->sin.sin_family != AF_INET ||
2376		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
2377			return (EINVAL);
2378
2379		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2380			if (ssa->sin.sin_family != AF_INET ||
2381			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
2382				return (EINVAL);
2383		}
2384
2385		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
2386			return (EADDRNOTAVAIL);
2387
2388		ifp = ifnet_byindex(gsr.gsr_interface);
2389
2390		if (ifp == NULL)
2391			return (EADDRNOTAVAIL);
2392		break;
2393
2394	default:
2395		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2396		    __func__, sopt->sopt_name);
2397		return (EOPNOTSUPP);
2398		break;
2399	}
2400
2401	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2402		return (EINVAL);
2403
2404	IN_MULTI_LOCK();
2405
2406	/*
2407	 * Find the membership in the membership list.
2408	 */
2409	imo = inp_findmoptions(inp);
2410	imf = imo_match_group(imo, ifp, &gsa->sa);
2411	if (imf == NULL) {
2412		error = EADDRNOTAVAIL;
2413		goto out_inp_locked;
2414	}
2415	inm = imf->imf_inm;
2416
2417	if (ssa->ss.ss_family != AF_UNSPEC)
2418		is_final = false;
2419
2420	/*
2421	 * Begin state merge transaction at socket layer.
2422	 */
2423	INP_WLOCK_ASSERT(inp);
2424
2425	/*
2426	 * If we were instructed only to leave a given source, do so.
2427	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2428	 */
2429	if (is_final) {
2430		ip_mfilter_remove(&imo->imo_head, imf);
2431		imf_leave(imf);
2432
2433		/*
2434		 * Give up the multicast address record to which
2435		 * the membership points.
2436		 */
2437		(void) in_leavegroup_locked(imf->imf_inm, imf);
2438	} else {
2439		if (imf->imf_st[0] == MCAST_EXCLUDE) {
2440			error = EADDRNOTAVAIL;
2441			goto out_inp_locked;
2442		}
2443		ims = imo_match_source(imf, &ssa->sa);
2444		if (ims == NULL) {
2445			CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent",
2446			    __func__, ntohl(ssa->sin.sin_addr.s_addr), "not ");
2447			error = EADDRNOTAVAIL;
2448			goto out_inp_locked;
2449		}
2450		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
2451		error = imf_prune(imf, &ssa->sin);
2452		if (error) {
2453			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2454			    __func__);
2455			goto out_inp_locked;
2456		}
2457	}
2458
2459	/*
2460	 * Begin state merge transaction at IGMP layer.
2461	 */
2462	if (!is_final) {
2463		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2464		IN_MULTI_LIST_LOCK();
2465		error = inm_merge(inm, imf);
2466		if (error) {
2467			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2468			    __func__);
2469			IN_MULTI_LIST_UNLOCK();
2470			imf_rollback(imf);
2471			imf_reap(imf);
2472			goto out_inp_locked;
2473		}
2474
2475		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2476		error = igmp_change_state(inm);
2477		IN_MULTI_LIST_UNLOCK();
2478		if (error) {
2479			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2480			    __func__);
2481			imf_rollback(imf);
2482			imf_reap(imf);
2483			goto out_inp_locked;
2484		}
2485	}
2486	imf_commit(imf);
2487	imf_reap(imf);
2488
2489out_inp_locked:
2490	INP_WUNLOCK(inp);
2491
2492	if (is_final && imf)
2493		ip_mfilter_free(imf);
2494
2495	IN_MULTI_UNLOCK();
2496	return (error);
2497}
2498
2499/*
2500 * Select the interface for transmitting IPv4 multicast datagrams.
2501 *
2502 * Either an instance of struct in_addr or an instance of struct ip_mreqn
2503 * may be passed to this socket option. An address of INADDR_ANY or an
2504 * interface index of 0 is used to remove a previous selection.
2505 * When no interface is selected, one is chosen for every send.
2506 */
2507static int
2508inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2509{
2510	struct rm_priotracker	 in_ifa_tracker;
2511	struct in_addr		 addr;
2512	struct ip_mreqn		 mreqn;
2513	struct ifnet		*ifp;
2514	struct ip_moptions	*imo;
2515	int			 error;
2516
2517	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
2518		/*
2519		 * An interface index was specified using the
2520		 * Linux-derived ip_mreqn structure.
2521		 */
2522		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
2523		    sizeof(struct ip_mreqn));
2524		if (error)
2525			return (error);
2526
2527		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
2528			return (EINVAL);
2529
2530		if (mreqn.imr_ifindex == 0) {
2531			ifp = NULL;
2532		} else {
2533			ifp = ifnet_byindex(mreqn.imr_ifindex);
2534			if (ifp == NULL)
2535				return (EADDRNOTAVAIL);
2536		}
2537	} else {
2538		/*
2539		 * An interface was specified by IPv4 address.
2540		 * This is the traditional BSD usage.
2541		 */
2542		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
2543		    sizeof(struct in_addr));
2544		if (error)
2545			return (error);
2546		if (in_nullhost(addr)) {
2547			ifp = NULL;
2548		} else {
2549			IN_IFADDR_RLOCK(&in_ifa_tracker);
2550			INADDR_TO_IFP(addr, ifp);
2551			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
2552			if (ifp == NULL)
2553				return (EADDRNOTAVAIL);
2554		}
2555		CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp,
2556		    ntohl(addr.s_addr));
2557	}
2558
2559	/* Reject interfaces which do not support multicast. */
2560	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
2561		return (EOPNOTSUPP);
2562
2563	imo = inp_findmoptions(inp);
2564	imo->imo_multicast_ifp = ifp;
2565	imo->imo_multicast_addr.s_addr = INADDR_ANY;
2566	INP_WUNLOCK(inp);
2567
2568	return (0);
2569}
2570
2571/*
2572 * Atomically set source filters on a socket for an IPv4 multicast group.
2573 *
2574 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
2575 */
2576static int
2577inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2578{
2579	struct __msfilterreq	 msfr;
2580	sockunion_t		*gsa;
2581	struct ifnet		*ifp;
2582	struct in_mfilter	*imf;
2583	struct ip_moptions	*imo;
2584	struct in_multi		*inm;
2585	int			 error;
2586
2587	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
2588	    sizeof(struct __msfilterreq));
2589	if (error)
2590		return (error);
2591
2592	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
2593		return (ENOBUFS);
2594
2595	if ((msfr.msfr_fmode != MCAST_EXCLUDE &&
2596	     msfr.msfr_fmode != MCAST_INCLUDE))
2597		return (EINVAL);
2598
2599	if (msfr.msfr_group.ss_family != AF_INET ||
2600	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
2601		return (EINVAL);
2602
2603	gsa = (sockunion_t *)&msfr.msfr_group;
2604	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2605		return (EINVAL);
2606
2607	gsa->sin.sin_port = 0;	/* ignore port */
2608
2609	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
2610		return (EADDRNOTAVAIL);
2611
2612	ifp = ifnet_byindex(msfr.msfr_ifindex);
2613	if (ifp == NULL)
2614		return (EADDRNOTAVAIL);
2615
2616	IN_MULTI_LOCK();
2617
2618	/*
2619	 * Take the INP write lock.
2620	 * Check if this socket is a member of this group.
2621	 */
2622	imo = inp_findmoptions(inp);
2623	imf = imo_match_group(imo, ifp, &gsa->sa);
2624	if (imf == NULL) {
2625		error = EADDRNOTAVAIL;
2626		goto out_inp_locked;
2627	}
2628	inm = imf->imf_inm;
2629
2630	/*
2631	 * Begin state merge transaction at socket layer.
2632	 */
2633	INP_WLOCK_ASSERT(inp);
2634
2635	imf->imf_st[1] = msfr.msfr_fmode;
2636
2637	/*
2638	 * Apply any new source filters, if present.
2639	 * Make a copy of the user-space source vector so
2640	 * that we may copy them with a single copyin. This
2641	 * allows us to deal with page faults up-front.
2642	 */
2643	if (msfr.msfr_nsrcs > 0) {
2644		struct in_msource	*lims;
2645		struct sockaddr_in	*psin;
2646		struct sockaddr_storage	*kss, *pkss;
2647		int			 i;
2648
2649		INP_WUNLOCK(inp);
2650
2651		CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
2652		    __func__, (unsigned long)msfr.msfr_nsrcs);
2653		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
2654		    M_TEMP, M_WAITOK);
2655		error = copyin(msfr.msfr_srcs, kss,
2656		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
2657		if (error) {
2658			free(kss, M_TEMP);
2659			return (error);
2660		}
2661
2662		INP_WLOCK(inp);
2663
2664		/*
2665		 * Mark all source filters as UNDEFINED at t1.
2666		 * Restore new group filter mode, as imf_leave()
2667		 * will set it to INCLUDE.
2668		 */
2669		imf_leave(imf);
2670		imf->imf_st[1] = msfr.msfr_fmode;
2671
2672		/*
2673		 * Update socket layer filters at t1, lazy-allocating
2674		 * new entries. This saves a bunch of memory at the
2675		 * cost of one RB_FIND() per source entry; duplicate
2676		 * entries in the msfr_nsrcs vector are ignored.
2677		 * If we encounter an error, rollback transaction.
2678		 *
2679		 * XXX This too could be replaced with a set-symmetric
2680		 * difference like loop to avoid walking from root
2681		 * every time, as the key space is common.
2682		 */
2683		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2684			psin = (struct sockaddr_in *)pkss;
2685			if (psin->sin_family != AF_INET) {
2686				error = EAFNOSUPPORT;
2687				break;
2688			}
2689			if (psin->sin_len != sizeof(struct sockaddr_in)) {
2690				error = EINVAL;
2691				break;
2692			}
2693			error = imf_get_source(imf, psin, &lims);
2694			if (error)
2695				break;
2696			lims->imsl_st[1] = imf->imf_st[1];
2697		}
2698		free(kss, M_TEMP);
2699	}
2700
2701	if (error)
2702		goto out_imf_rollback;
2703
2704	INP_WLOCK_ASSERT(inp);
2705
2706	/*
2707	 * Begin state merge transaction at IGMP layer.
2708	 */
2709	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2710	IN_MULTI_LIST_LOCK();
2711	error = inm_merge(inm, imf);
2712	if (error) {
2713		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
2714		IN_MULTI_LIST_UNLOCK();
2715		goto out_imf_rollback;
2716	}
2717
2718	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2719	error = igmp_change_state(inm);
2720	IN_MULTI_LIST_UNLOCK();
2721	if (error)
2722		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
2723
2724out_imf_rollback:
2725	if (error)
2726		imf_rollback(imf);
2727	else
2728		imf_commit(imf);
2729
2730	imf_reap(imf);
2731
2732out_inp_locked:
2733	INP_WUNLOCK(inp);
2734	IN_MULTI_UNLOCK();
2735	return (error);
2736}
2737
2738/*
2739 * Set the IP multicast options in response to user setsockopt().
2740 *
2741 * Many of the socket options handled in this function duplicate the
2742 * functionality of socket options in the regular unicast API. However,
2743 * it is not possible to merge the duplicate code, because the idempotence
2744 * of the IPv4 multicast part of the BSD Sockets API must be preserved;
2745 * the effects of these options must be treated as separate and distinct.
2746 *
2747 * SMPng: XXX: Unlocked read of inp_socket believed OK.
2748 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
2749 * is refactored to no longer use vifs.
2750 */
2751int
2752inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2753{
2754	struct ip_moptions	*imo;
2755	int			 error;
2756
2757	error = 0;
2758
2759	/*
2760	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2761	 * or is a divert socket, reject it.
2762	 */
2763	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
2764	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2765	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
2766		return (EOPNOTSUPP);
2767
2768	switch (sopt->sopt_name) {
2769	case IP_MULTICAST_VIF: {
2770		int vifi;
2771		/*
2772		 * Select a multicast VIF for transmission.
2773		 * Only useful if multicast forwarding is active.
2774		 */
2775		if (legal_vif_num == NULL) {
2776			error = EOPNOTSUPP;
2777			break;
2778		}
2779		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
2780		if (error)
2781			break;
2782		if (!legal_vif_num(vifi) && (vifi != -1)) {
2783			error = EINVAL;
2784			break;
2785		}
2786		imo = inp_findmoptions(inp);
2787		imo->imo_multicast_vif = vifi;
2788		INP_WUNLOCK(inp);
2789		break;
2790	}
2791
2792	case IP_MULTICAST_IF:
2793		error = inp_set_multicast_if(inp, sopt);
2794		break;
2795
2796	case IP_MULTICAST_TTL: {
2797		u_char ttl;
2798
2799		/*
2800		 * Set the IP time-to-live for outgoing multicast packets.
2801		 * The original multicast API required a char argument,
2802		 * which is inconsistent with the rest of the socket API.
2803		 * We allow either a char or an int.
2804		 */
2805		if (sopt->sopt_valsize == sizeof(u_char)) {
2806			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
2807			    sizeof(u_char));
2808			if (error)
2809				break;
2810		} else {
2811			u_int ittl;
2812
2813			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
2814			    sizeof(u_int));
2815			if (error)
2816				break;
2817			if (ittl > 255) {
2818				error = EINVAL;
2819				break;
2820			}
2821			ttl = (u_char)ittl;
2822		}
2823		imo = inp_findmoptions(inp);
2824		imo->imo_multicast_ttl = ttl;
2825		INP_WUNLOCK(inp);
2826		break;
2827	}
2828
2829	case IP_MULTICAST_LOOP: {
2830		u_char loop;
2831
2832		/*
2833		 * Set the loopback flag for outgoing multicast packets.
2834		 * Must be zero or one.  The original multicast API required a
2835		 * char argument, which is inconsistent with the rest
2836		 * of the socket API.  We allow either a char or an int.
2837		 */
2838		if (sopt->sopt_valsize == sizeof(u_char)) {
2839			error = sooptcopyin(sopt, &loop, sizeof(u_char),
2840			    sizeof(u_char));
2841			if (error)
2842				break;
2843		} else {
2844			u_int iloop;
2845
2846			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
2847					    sizeof(u_int));
2848			if (error)
2849				break;
2850			loop = (u_char)iloop;
2851		}
2852		imo = inp_findmoptions(inp);
2853		imo->imo_multicast_loop = !!loop;
2854		INP_WUNLOCK(inp);
2855		break;
2856	}
2857
2858	case IP_ADD_MEMBERSHIP:
2859	case IP_ADD_SOURCE_MEMBERSHIP:
2860	case MCAST_JOIN_GROUP:
2861	case MCAST_JOIN_SOURCE_GROUP:
2862		error = inp_join_group(inp, sopt);
2863		break;
2864
2865	case IP_DROP_MEMBERSHIP:
2866	case IP_DROP_SOURCE_MEMBERSHIP:
2867	case MCAST_LEAVE_GROUP:
2868	case MCAST_LEAVE_SOURCE_GROUP:
2869		error = inp_leave_group(inp, sopt);
2870		break;
2871
2872	case IP_BLOCK_SOURCE:
2873	case IP_UNBLOCK_SOURCE:
2874	case MCAST_BLOCK_SOURCE:
2875	case MCAST_UNBLOCK_SOURCE:
2876		error = inp_block_unblock_source(inp, sopt);
2877		break;
2878
2879	case IP_MSFILTER:
2880		error = inp_set_source_filters(inp, sopt);
2881		break;
2882
2883	default:
2884		error = EOPNOTSUPP;
2885		break;
2886	}
2887
2888	INP_UNLOCK_ASSERT(inp);
2889
2890	return (error);
2891}
2892
2893/*
2894 * Expose IGMP's multicast filter mode and source list(s) to userland,
2895 * keyed by (ifindex, group).
2896 * The filter mode is written out as a uint32_t, followed by
2897 * 0..n of struct in_addr.
2898 * For use by ifmcstat(8).
2899 * SMPng: NOTE: unlocked read of ifindex space.
2900 */
2901static int
2902sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
2903{
2904	struct in_addr			 src, group;
2905	struct epoch_tracker		 et;
2906	struct ifnet			*ifp;
2907	struct ifmultiaddr		*ifma;
2908	struct in_multi			*inm;
2909	struct ip_msource		*ims;
2910	int				*name;
2911	int				 retval;
2912	u_int				 namelen;
2913	uint32_t			 fmode, ifindex;
2914
2915	name = (int *)arg1;
2916	namelen = arg2;
2917
2918	if (req->newptr != NULL)
2919		return (EPERM);
2920
2921	if (namelen != 2)
2922		return (EINVAL);
2923
2924	ifindex = name[0];
2925	if (ifindex <= 0 || ifindex > V_if_index) {
2926		CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
2927		    __func__, ifindex);
2928		return (ENOENT);
2929	}
2930
2931	group.s_addr = name[1];
2932	if (!IN_MULTICAST(ntohl(group.s_addr))) {
2933		CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast",
2934		    __func__, ntohl(group.s_addr));
2935		return (EINVAL);
2936	}
2937
2938	NET_EPOCH_ENTER(et);
2939	ifp = ifnet_byindex(ifindex);
2940	if (ifp == NULL) {
2941		NET_EPOCH_EXIT(et);
2942		CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
2943		    __func__, ifindex);
2944		return (ENOENT);
2945	}
2946
2947	retval = sysctl_wire_old_buffer(req,
2948	    sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
2949	if (retval) {
2950		NET_EPOCH_EXIT(et);
2951		return (retval);
2952	}
2953
2954	IN_MULTI_LIST_LOCK();
2955
2956	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2957		if (ifma->ifma_addr->sa_family != AF_INET ||
2958		    ifma->ifma_protospec == NULL)
2959			continue;
2960		inm = (struct in_multi *)ifma->ifma_protospec;
2961		if (!in_hosteq(inm->inm_addr, group))
2962			continue;
2963		fmode = inm->inm_st[1].iss_fmode;
2964		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
2965		if (retval != 0)
2966			break;
2967		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
2968			CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__,
2969			    ims->ims_haddr);
2970			/*
2971			 * Only copy-out sources which are in-mode.
2972			 */
2973			if (fmode != ims_get_mode(inm, ims, 1)) {
2974				CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
2975				    __func__);
2976				continue;
2977			}
2978			src.s_addr = htonl(ims->ims_haddr);
2979			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
2980			if (retval != 0)
2981				break;
2982		}
2983	}
2984
2985	IN_MULTI_LIST_UNLOCK();
2986	NET_EPOCH_EXIT(et);
2987
2988	return (retval);
2989}
2990
2991#if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3)
2992
2993static const char *inm_modestrs[] = {
2994	[MCAST_UNDEFINED] = "un",
2995	[MCAST_INCLUDE] = "in",
2996	[MCAST_EXCLUDE] = "ex",
2997};
2998_Static_assert(MCAST_UNDEFINED == 0 &&
2999	       MCAST_EXCLUDE + 1 == nitems(inm_modestrs),
3000	       "inm_modestrs: no longer matches #defines");
3001
3002static const char *
3003inm_mode_str(const int mode)
3004{
3005
3006	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
3007		return (inm_modestrs[mode]);
3008	return ("??");
3009}
3010
3011static const char *inm_statestrs[] = {
3012	[IGMP_NOT_MEMBER] = "not-member",
3013	[IGMP_SILENT_MEMBER] = "silent",
3014	[IGMP_REPORTING_MEMBER] = "reporting",
3015	[IGMP_IDLE_MEMBER] = "idle",
3016	[IGMP_LAZY_MEMBER] = "lazy",
3017	[IGMP_SLEEPING_MEMBER] = "sleeping",
3018	[IGMP_AWAKENING_MEMBER] = "awakening",
3019	[IGMP_G_QUERY_PENDING_MEMBER] = "query-pending",
3020	[IGMP_SG_QUERY_PENDING_MEMBER] = "sg-query-pending",
3021	[IGMP_LEAVING_MEMBER] = "leaving",
3022};
3023_Static_assert(IGMP_NOT_MEMBER == 0 &&
3024	       IGMP_LEAVING_MEMBER + 1 == nitems(inm_statestrs),
3025	       "inm_statetrs: no longer matches #defines");
3026
3027static const char *
3028inm_state_str(const int state)
3029{
3030
3031	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
3032		return (inm_statestrs[state]);
3033	return ("??");
3034}
3035
3036/*
3037 * Dump an in_multi structure to the console.
3038 */
3039void
3040inm_print(const struct in_multi *inm)
3041{
3042	int t;
3043	char addrbuf[INET_ADDRSTRLEN];
3044
3045	if ((ktr_mask & KTR_IGMPV3) == 0)
3046		return;
3047
3048	printf("%s: --- begin inm %p ---\n", __func__, inm);
3049	printf("addr %s ifp %p(%s) ifma %p\n",
3050	    inet_ntoa_r(inm->inm_addr, addrbuf),
3051	    inm->inm_ifp,
3052	    inm->inm_ifp->if_xname,
3053	    inm->inm_ifma);
3054	printf("timer %u state %s refcount %u scq.len %u\n",
3055	    inm->inm_timer,
3056	    inm_state_str(inm->inm_state),
3057	    inm->inm_refcount,
3058	    inm->inm_scq.mq_len);
3059	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
3060	    inm->inm_igi,
3061	    inm->inm_nsrc,
3062	    inm->inm_sctimer,
3063	    inm->inm_scrv);
3064	for (t = 0; t < 2; t++) {
3065		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
3066		    inm_mode_str(inm->inm_st[t].iss_fmode),
3067		    inm->inm_st[t].iss_asm,
3068		    inm->inm_st[t].iss_ex,
3069		    inm->inm_st[t].iss_in,
3070		    inm->inm_st[t].iss_rec);
3071	}
3072	printf("%s: --- end inm %p ---\n", __func__, inm);
3073}
3074
3075#else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */
3076
3077void
3078inm_print(const struct in_multi *inm)
3079{
3080
3081}
3082
3083#endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */
3084
3085RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
3086