in_mcast.c revision 260796
1/*-
2 * Copyright (c) 2007-2009 Bruce Simpson.
3 * Copyright (c) 2005 Robert N. M. Watson.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote
15 *    products derived from this software without specific prior written
16 *    permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * IPv4 multicast socket, group, and socket option processing module.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/netinet/in_mcast.c 260796 2014-01-16 22:14:54Z gnn $");
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/protosw.h>
44#include <sys/socket.h>
45#include <sys/socketvar.h>
46#include <sys/protosw.h>
47#include <sys/sysctl.h>
48#include <sys/ktr.h>
49#include <sys/taskqueue.h>
50#include <sys/tree.h>
51
52#include <net/if.h>
53#include <net/if_var.h>
54#include <net/if_dl.h>
55#include <net/route.h>
56#include <net/vnet.h>
57
58#include <netinet/in.h>
59#include <netinet/in_systm.h>
60#include <netinet/in_pcb.h>
61#include <netinet/in_var.h>
62#include <netinet/ip_var.h>
63#include <netinet/igmp_var.h>
64
65#ifndef KTR_IGMPV3
66#define KTR_IGMPV3 KTR_INET
67#endif
68
69#ifndef __SOCKUNION_DECLARED
70union sockunion {
71	struct sockaddr_storage	ss;
72	struct sockaddr		sa;
73	struct sockaddr_dl	sdl;
74	struct sockaddr_in	sin;
75};
76typedef union sockunion sockunion_t;
77#define __SOCKUNION_DECLARED
78#endif /* __SOCKUNION_DECLARED */
79
80static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
81    "IPv4 multicast PCB-layer source filter");
82static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
83static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
84static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
85    "IPv4 multicast IGMP-layer source filter");
86
87/*
88 * Locking:
89 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
90 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
91 *   it can be taken by code in net/if.c also.
92 * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
93 *
94 * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
95 * any need for in_multi itself to be virtualized -- it is bound to an ifp
96 * anyway no matter what happens.
97 */
98struct mtx in_multi_mtx;
99MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
100
101/*
102 * Functions with non-static linkage defined in this file should be
103 * declared in in_var.h:
104 *  imo_multi_filter()
105 *  in_addmulti()
106 *  in_delmulti()
107 *  in_joingroup()
108 *  in_joingroup_locked()
109 *  in_leavegroup()
110 *  in_leavegroup_locked()
111 * and ip_var.h:
112 *  inp_freemoptions()
113 *  inp_getmoptions()
114 *  inp_setmoptions()
115 *
116 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
117 * and in_delmulti().
118 */
119static void	imf_commit(struct in_mfilter *);
120static int	imf_get_source(struct in_mfilter *imf,
121		    const struct sockaddr_in *psin,
122		    struct in_msource **);
123static struct in_msource *
124		imf_graft(struct in_mfilter *, const uint8_t,
125		    const struct sockaddr_in *);
126static void	imf_leave(struct in_mfilter *);
127static int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
128static void	imf_purge(struct in_mfilter *);
129static void	imf_rollback(struct in_mfilter *);
130static void	imf_reap(struct in_mfilter *);
131static int	imo_grow(struct ip_moptions *);
132static size_t	imo_match_group(const struct ip_moptions *,
133		    const struct ifnet *, const struct sockaddr *);
134static struct in_msource *
135		imo_match_source(const struct ip_moptions *, const size_t,
136		    const struct sockaddr *);
137static void	ims_merge(struct ip_msource *ims,
138		    const struct in_msource *lims, const int rollback);
139static int	in_getmulti(struct ifnet *, const struct in_addr *,
140		    struct in_multi **);
141static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
142		    const int noalloc, struct ip_msource **pims);
143#ifdef KTR
144static int	inm_is_ifp_detached(const struct in_multi *);
145#endif
146static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
147static void	inm_purge(struct in_multi *);
148static void	inm_reap(struct in_multi *);
149static struct ip_moptions *
150		inp_findmoptions(struct inpcb *);
151static void	inp_freemoptions_internal(struct ip_moptions *);
152static void	inp_gcmoptions(void *, int);
153static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
154static int	inp_join_group(struct inpcb *, struct sockopt *);
155static int	inp_leave_group(struct inpcb *, struct sockopt *);
156static struct ifnet *
157		inp_lookup_mcast_ifp(const struct inpcb *,
158		    const struct sockaddr_in *, const struct in_addr);
159static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
160static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
161static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
162static int	sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
163
164static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0,
165    "IPv4 multicast");
166
167static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
168SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
169    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0,
170    "Max source filters per group");
171TUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc);
172
173static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
174SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
175    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0,
176    "Max source filters per socket");
177TUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc);
178
179int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
180SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
181    &in_mcast_loop, 0, "Loopback multicast datagrams by default");
182TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
183
184static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
185    CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
186    "Per-interface stack-wide source filters");
187
188static STAILQ_HEAD(, ip_moptions) imo_gc_list =
189    STAILQ_HEAD_INITIALIZER(imo_gc_list);
190static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
191
192#ifdef KTR
193/*
194 * Inline function which wraps assertions for a valid ifp.
195 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
196 * is detached.
197 */
198static int __inline
199inm_is_ifp_detached(const struct in_multi *inm)
200{
201	struct ifnet *ifp;
202
203	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
204	ifp = inm->inm_ifma->ifma_ifp;
205	if (ifp != NULL) {
206		/*
207		 * Sanity check that netinet's notion of ifp is the
208		 * same as net's.
209		 */
210		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
211	}
212
213	return (ifp == NULL);
214}
215#endif
216
217/*
218 * Initialize an in_mfilter structure to a known state at t0, t1
219 * with an empty source filter list.
220 */
221static __inline void
222imf_init(struct in_mfilter *imf, const int st0, const int st1)
223{
224	memset(imf, 0, sizeof(struct in_mfilter));
225	RB_INIT(&imf->imf_sources);
226	imf->imf_st[0] = st0;
227	imf->imf_st[1] = st1;
228}
229
230/*
231 * Function for looking up an in_multi record for an IPv4 multicast address
232 * on a given interface. ifp must be valid. If no record found, return NULL.
233 * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held.
234 */
235struct in_multi *
236inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
237{
238	struct ifmultiaddr *ifma;
239	struct in_multi *inm;
240
241	IN_MULTI_LOCK_ASSERT();
242	IF_ADDR_LOCK_ASSERT(ifp);
243
244	inm = NULL;
245	TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) {
246		if (ifma->ifma_addr->sa_family == AF_INET) {
247			inm = (struct in_multi *)ifma->ifma_protospec;
248			if (inm->inm_addr.s_addr == ina.s_addr)
249				break;
250			inm = NULL;
251		}
252	}
253	return (inm);
254}
255
256/*
257 * Wrapper for inm_lookup_locked().
258 * The IF_ADDR_LOCK will be taken on ifp and released on return.
259 */
260struct in_multi *
261inm_lookup(struct ifnet *ifp, const struct in_addr ina)
262{
263	struct in_multi *inm;
264
265	IN_MULTI_LOCK_ASSERT();
266	IF_ADDR_RLOCK(ifp);
267	inm = inm_lookup_locked(ifp, ina);
268	IF_ADDR_RUNLOCK(ifp);
269
270	return (inm);
271}
272
273/*
274 * Resize the ip_moptions vector to the next power-of-two minus 1.
275 * May be called with locks held; do not sleep.
276 */
277static int
278imo_grow(struct ip_moptions *imo)
279{
280	struct in_multi		**nmships;
281	struct in_multi		**omships;
282	struct in_mfilter	 *nmfilters;
283	struct in_mfilter	 *omfilters;
284	size_t			  idx;
285	size_t			  newmax;
286	size_t			  oldmax;
287
288	nmships = NULL;
289	nmfilters = NULL;
290	omships = imo->imo_membership;
291	omfilters = imo->imo_mfilters;
292	oldmax = imo->imo_max_memberships;
293	newmax = ((oldmax + 1) * 2) - 1;
294
295	if (newmax <= IP_MAX_MEMBERSHIPS) {
296		nmships = (struct in_multi **)realloc(omships,
297		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
298		nmfilters = (struct in_mfilter *)realloc(omfilters,
299		    sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
300		if (nmships != NULL && nmfilters != NULL) {
301			/* Initialize newly allocated source filter heads. */
302			for (idx = oldmax; idx < newmax; idx++) {
303				imf_init(&nmfilters[idx], MCAST_UNDEFINED,
304				    MCAST_EXCLUDE);
305			}
306			imo->imo_max_memberships = newmax;
307			imo->imo_membership = nmships;
308			imo->imo_mfilters = nmfilters;
309		}
310	}
311
312	if (nmships == NULL || nmfilters == NULL) {
313		if (nmships != NULL)
314			free(nmships, M_IPMOPTS);
315		if (nmfilters != NULL)
316			free(nmfilters, M_INMFILTER);
317		return (ETOOMANYREFS);
318	}
319
320	return (0);
321}
322
323/*
324 * Find an IPv4 multicast group entry for this ip_moptions instance
325 * which matches the specified group, and optionally an interface.
326 * Return its index into the array, or -1 if not found.
327 */
328static size_t
329imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
330    const struct sockaddr *group)
331{
332	const struct sockaddr_in *gsin;
333	struct in_multi	**pinm;
334	int		  idx;
335	int		  nmships;
336
337	gsin = (const struct sockaddr_in *)group;
338
339	/* The imo_membership array may be lazy allocated. */
340	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
341		return (-1);
342
343	nmships = imo->imo_num_memberships;
344	pinm = &imo->imo_membership[0];
345	for (idx = 0; idx < nmships; idx++, pinm++) {
346		if (*pinm == NULL)
347			continue;
348		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
349		    in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
350			break;
351		}
352	}
353	if (idx >= nmships)
354		idx = -1;
355
356	return (idx);
357}
358
359/*
360 * Find an IPv4 multicast source entry for this imo which matches
361 * the given group index for this socket, and source address.
362 *
363 * NOTE: This does not check if the entry is in-mode, merely if
364 * it exists, which may not be the desired behaviour.
365 */
366static struct in_msource *
367imo_match_source(const struct ip_moptions *imo, const size_t gidx,
368    const struct sockaddr *src)
369{
370	struct ip_msource	 find;
371	struct in_mfilter	*imf;
372	struct ip_msource	*ims;
373	const sockunion_t	*psa;
374
375	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
376	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
377	    ("%s: invalid index %d\n", __func__, (int)gidx));
378
379	/* The imo_mfilters array may be lazy allocated. */
380	if (imo->imo_mfilters == NULL)
381		return (NULL);
382	imf = &imo->imo_mfilters[gidx];
383
384	/* Source trees are keyed in host byte order. */
385	psa = (const sockunion_t *)src;
386	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
387	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
388
389	return ((struct in_msource *)ims);
390}
391
392/*
393 * Perform filtering for multicast datagrams on a socket by group and source.
394 *
395 * Returns 0 if a datagram should be allowed through, or various error codes
396 * if the socket was not a member of the group, or the source was muted, etc.
397 */
398int
399imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
400    const struct sockaddr *group, const struct sockaddr *src)
401{
402	size_t gidx;
403	struct in_msource *ims;
404	int mode;
405
406	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
407
408	gidx = imo_match_group(imo, ifp, group);
409	if (gidx == -1)
410		return (MCAST_NOTGMEMBER);
411
412	/*
413	 * Check if the source was included in an (S,G) join.
414	 * Allow reception on exclusive memberships by default,
415	 * reject reception on inclusive memberships by default.
416	 * Exclude source only if an in-mode exclude filter exists.
417	 * Include source only if an in-mode include filter exists.
418	 * NOTE: We are comparing group state here at IGMP t1 (now)
419	 * with socket-layer t0 (since last downcall).
420	 */
421	mode = imo->imo_mfilters[gidx].imf_st[1];
422	ims = imo_match_source(imo, gidx, src);
423
424	if ((ims == NULL && mode == MCAST_INCLUDE) ||
425	    (ims != NULL && ims->imsl_st[0] != mode))
426		return (MCAST_NOTSMEMBER);
427
428	return (MCAST_PASS);
429}
430
431/*
432 * Find and return a reference to an in_multi record for (ifp, group),
433 * and bump its reference count.
434 * If one does not exist, try to allocate it, and update link-layer multicast
435 * filters on ifp to listen for group.
436 * Assumes the IN_MULTI lock is held across the call.
437 * Return 0 if successful, otherwise return an appropriate error code.
438 */
439static int
440in_getmulti(struct ifnet *ifp, const struct in_addr *group,
441    struct in_multi **pinm)
442{
443	struct sockaddr_in	 gsin;
444	struct ifmultiaddr	*ifma;
445	struct in_ifinfo	*ii;
446	struct in_multi		*inm;
447	int error;
448
449	IN_MULTI_LOCK_ASSERT();
450
451	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
452
453	inm = inm_lookup(ifp, *group);
454	if (inm != NULL) {
455		/*
456		 * If we already joined this group, just bump the
457		 * refcount and return it.
458		 */
459		KASSERT(inm->inm_refcount >= 1,
460		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
461		++inm->inm_refcount;
462		*pinm = inm;
463		return (0);
464	}
465
466	memset(&gsin, 0, sizeof(gsin));
467	gsin.sin_family = AF_INET;
468	gsin.sin_len = sizeof(struct sockaddr_in);
469	gsin.sin_addr = *group;
470
471	/*
472	 * Check if a link-layer group is already associated
473	 * with this network-layer group on the given ifnet.
474	 */
475	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
476	if (error != 0)
477		return (error);
478
479	/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
480	IF_ADDR_WLOCK(ifp);
481
482	/*
483	 * If something other than netinet is occupying the link-layer
484	 * group, print a meaningful error message and back out of
485	 * the allocation.
486	 * Otherwise, bump the refcount on the existing network-layer
487	 * group association and return it.
488	 */
489	if (ifma->ifma_protospec != NULL) {
490		inm = (struct in_multi *)ifma->ifma_protospec;
491#ifdef INVARIANTS
492		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
493		    __func__));
494		KASSERT(ifma->ifma_addr->sa_family == AF_INET,
495		    ("%s: ifma not AF_INET", __func__));
496		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
497		if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
498		    !in_hosteq(inm->inm_addr, *group))
499			panic("%s: ifma %p is inconsistent with %p (%s)",
500			    __func__, ifma, inm, inet_ntoa(*group));
501#endif
502		++inm->inm_refcount;
503		*pinm = inm;
504		IF_ADDR_WUNLOCK(ifp);
505		return (0);
506	}
507
508	IF_ADDR_WLOCK_ASSERT(ifp);
509
510	/*
511	 * A new in_multi record is needed; allocate and initialize it.
512	 * We DO NOT perform an IGMP join as the in_ layer may need to
513	 * push an initial source list down to IGMP to support SSM.
514	 *
515	 * The initial source filter state is INCLUDE, {} as per the RFC.
516	 */
517	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
518	if (inm == NULL) {
519		if_delmulti_ifma(ifma);
520		IF_ADDR_WUNLOCK(ifp);
521		return (ENOMEM);
522	}
523	inm->inm_addr = *group;
524	inm->inm_ifp = ifp;
525	inm->inm_igi = ii->ii_igmp;
526	inm->inm_ifma = ifma;
527	inm->inm_refcount = 1;
528	inm->inm_state = IGMP_NOT_MEMBER;
529
530	/*
531	 * Pending state-changes per group are subject to a bounds check.
532	 */
533	IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
534
535	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
536	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
537	RB_INIT(&inm->inm_srcs);
538
539	ifma->ifma_protospec = inm;
540
541	*pinm = inm;
542
543	IF_ADDR_WUNLOCK(ifp);
544	return (0);
545}
546
547/*
548 * Drop a reference to an in_multi record.
549 *
550 * If the refcount drops to 0, free the in_multi record and
551 * delete the underlying link-layer membership.
552 */
553void
554inm_release_locked(struct in_multi *inm)
555{
556	struct ifmultiaddr *ifma;
557
558	IN_MULTI_LOCK_ASSERT();
559
560	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
561
562	if (--inm->inm_refcount > 0) {
563		CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
564		    inm->inm_refcount);
565		return;
566	}
567
568	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
569
570	ifma = inm->inm_ifma;
571
572	/* XXX this access is not covered by IF_ADDR_LOCK */
573	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
574	KASSERT(ifma->ifma_protospec == inm,
575	    ("%s: ifma_protospec != inm", __func__));
576	ifma->ifma_protospec = NULL;
577
578	inm_purge(inm);
579
580	free(inm, M_IPMADDR);
581
582	if_delmulti_ifma(ifma);
583}
584
585/*
586 * Clear recorded source entries for a group.
587 * Used by the IGMP code. Caller must hold the IN_MULTI lock.
588 * FIXME: Should reap.
589 */
590void
591inm_clear_recorded(struct in_multi *inm)
592{
593	struct ip_msource	*ims;
594
595	IN_MULTI_LOCK_ASSERT();
596
597	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
598		if (ims->ims_stp) {
599			ims->ims_stp = 0;
600			--inm->inm_st[1].iss_rec;
601		}
602	}
603	KASSERT(inm->inm_st[1].iss_rec == 0,
604	    ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
605}
606
607/*
608 * Record a source as pending for a Source-Group IGMPv3 query.
609 * This lives here as it modifies the shared tree.
610 *
611 * inm is the group descriptor.
612 * naddr is the address of the source to record in network-byte order.
613 *
614 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
615 * lazy-allocate a source node in response to an SG query.
616 * Otherwise, no allocation is performed. This saves some memory
617 * with the trade-off that the source will not be reported to the
618 * router if joined in the window between the query response and
619 * the group actually being joined on the local host.
620 *
621 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
622 * This turns off the allocation of a recorded source entry if
623 * the group has not been joined.
624 *
625 * Return 0 if the source didn't exist or was already marked as recorded.
626 * Return 1 if the source was marked as recorded by this function.
627 * Return <0 if any error occured (negated errno code).
628 */
629int
630inm_record_source(struct in_multi *inm, const in_addr_t naddr)
631{
632	struct ip_msource	 find;
633	struct ip_msource	*ims, *nims;
634
635	IN_MULTI_LOCK_ASSERT();
636
637	find.ims_haddr = ntohl(naddr);
638	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
639	if (ims && ims->ims_stp)
640		return (0);
641	if (ims == NULL) {
642		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
643			return (-ENOSPC);
644		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
645		    M_NOWAIT | M_ZERO);
646		if (nims == NULL)
647			return (-ENOMEM);
648		nims->ims_haddr = find.ims_haddr;
649		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
650		++inm->inm_nsrc;
651		ims = nims;
652	}
653
654	/*
655	 * Mark the source as recorded and update the recorded
656	 * source count.
657	 */
658	++ims->ims_stp;
659	++inm->inm_st[1].iss_rec;
660
661	return (1);
662}
663
664/*
665 * Return a pointer to an in_msource owned by an in_mfilter,
666 * given its source address.
667 * Lazy-allocate if needed. If this is a new entry its filter state is
668 * undefined at t0.
669 *
670 * imf is the filter set being modified.
671 * haddr is the source address in *host* byte-order.
672 *
673 * SMPng: May be called with locks held; malloc must not block.
674 */
675static int
676imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
677    struct in_msource **plims)
678{
679	struct ip_msource	 find;
680	struct ip_msource	*ims, *nims;
681	struct in_msource	*lims;
682	int			 error;
683
684	error = 0;
685	ims = NULL;
686	lims = NULL;
687
688	/* key is host byte order */
689	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
690	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
691	lims = (struct in_msource *)ims;
692	if (lims == NULL) {
693		if (imf->imf_nsrc == in_mcast_maxsocksrc)
694			return (ENOSPC);
695		nims = malloc(sizeof(struct in_msource), M_INMFILTER,
696		    M_NOWAIT | M_ZERO);
697		if (nims == NULL)
698			return (ENOMEM);
699		lims = (struct in_msource *)nims;
700		lims->ims_haddr = find.ims_haddr;
701		lims->imsl_st[0] = MCAST_UNDEFINED;
702		RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
703		++imf->imf_nsrc;
704	}
705
706	*plims = lims;
707
708	return (error);
709}
710
711/*
712 * Graft a source entry into an existing socket-layer filter set,
713 * maintaining any required invariants and checking allocations.
714 *
715 * The source is marked as being in the new filter mode at t1.
716 *
717 * Return the pointer to the new node, otherwise return NULL.
718 */
719static struct in_msource *
720imf_graft(struct in_mfilter *imf, const uint8_t st1,
721    const struct sockaddr_in *psin)
722{
723	struct ip_msource	*nims;
724	struct in_msource	*lims;
725
726	nims = malloc(sizeof(struct in_msource), M_INMFILTER,
727	    M_NOWAIT | M_ZERO);
728	if (nims == NULL)
729		return (NULL);
730	lims = (struct in_msource *)nims;
731	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
732	lims->imsl_st[0] = MCAST_UNDEFINED;
733	lims->imsl_st[1] = st1;
734	RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
735	++imf->imf_nsrc;
736
737	return (lims);
738}
739
740/*
741 * Prune a source entry from an existing socket-layer filter set,
742 * maintaining any required invariants and checking allocations.
743 *
744 * The source is marked as being left at t1, it is not freed.
745 *
746 * Return 0 if no error occurred, otherwise return an errno value.
747 */
748static int
749imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
750{
751	struct ip_msource	 find;
752	struct ip_msource	*ims;
753	struct in_msource	*lims;
754
755	/* key is host byte order */
756	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
757	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
758	if (ims == NULL)
759		return (ENOENT);
760	lims = (struct in_msource *)ims;
761	lims->imsl_st[1] = MCAST_UNDEFINED;
762	return (0);
763}
764
765/*
766 * Revert socket-layer filter set deltas at t1 to t0 state.
767 */
768static void
769imf_rollback(struct in_mfilter *imf)
770{
771	struct ip_msource	*ims, *tims;
772	struct in_msource	*lims;
773
774	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
775		lims = (struct in_msource *)ims;
776		if (lims->imsl_st[0] == lims->imsl_st[1]) {
777			/* no change at t1 */
778			continue;
779		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
780			/* revert change to existing source at t1 */
781			lims->imsl_st[1] = lims->imsl_st[0];
782		} else {
783			/* revert source added t1 */
784			CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
785			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
786			free(ims, M_INMFILTER);
787			imf->imf_nsrc--;
788		}
789	}
790	imf->imf_st[1] = imf->imf_st[0];
791}
792
793/*
794 * Mark socket-layer filter set as INCLUDE {} at t1.
795 */
796static void
797imf_leave(struct in_mfilter *imf)
798{
799	struct ip_msource	*ims;
800	struct in_msource	*lims;
801
802	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
803		lims = (struct in_msource *)ims;
804		lims->imsl_st[1] = MCAST_UNDEFINED;
805	}
806	imf->imf_st[1] = MCAST_INCLUDE;
807}
808
809/*
810 * Mark socket-layer filter set deltas as committed.
811 */
812static void
813imf_commit(struct in_mfilter *imf)
814{
815	struct ip_msource	*ims;
816	struct in_msource	*lims;
817
818	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
819		lims = (struct in_msource *)ims;
820		lims->imsl_st[0] = lims->imsl_st[1];
821	}
822	imf->imf_st[0] = imf->imf_st[1];
823}
824
825/*
826 * Reap unreferenced sources from socket-layer filter set.
827 */
828static void
829imf_reap(struct in_mfilter *imf)
830{
831	struct ip_msource	*ims, *tims;
832	struct in_msource	*lims;
833
834	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
835		lims = (struct in_msource *)ims;
836		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
837		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
838			CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
839			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
840			free(ims, M_INMFILTER);
841			imf->imf_nsrc--;
842		}
843	}
844}
845
846/*
847 * Purge socket-layer filter set.
848 */
849static void
850imf_purge(struct in_mfilter *imf)
851{
852	struct ip_msource	*ims, *tims;
853
854	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
855		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
856		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
857		free(ims, M_INMFILTER);
858		imf->imf_nsrc--;
859	}
860	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
861	KASSERT(RB_EMPTY(&imf->imf_sources),
862	    ("%s: imf_sources not empty", __func__));
863}
864
865/*
866 * Look up a source filter entry for a multicast group.
867 *
868 * inm is the group descriptor to work with.
869 * haddr is the host-byte-order IPv4 address to look up.
870 * noalloc may be non-zero to suppress allocation of sources.
871 * *pims will be set to the address of the retrieved or allocated source.
872 *
873 * SMPng: NOTE: may be called with locks held.
874 * Return 0 if successful, otherwise return a non-zero error code.
875 */
876static int
877inm_get_source(struct in_multi *inm, const in_addr_t haddr,
878    const int noalloc, struct ip_msource **pims)
879{
880	struct ip_msource	 find;
881	struct ip_msource	*ims, *nims;
882#ifdef KTR
883	struct in_addr ia;
884#endif
885
886	find.ims_haddr = haddr;
887	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
888	if (ims == NULL && !noalloc) {
889		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
890			return (ENOSPC);
891		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
892		    M_NOWAIT | M_ZERO);
893		if (nims == NULL)
894			return (ENOMEM);
895		nims->ims_haddr = haddr;
896		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
897		++inm->inm_nsrc;
898		ims = nims;
899#ifdef KTR
900		ia.s_addr = htonl(haddr);
901		CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__,
902		    inet_ntoa(ia), ims);
903#endif
904	}
905
906	*pims = ims;
907	return (0);
908}
909
910/*
911 * Merge socket-layer source into IGMP-layer source.
912 * If rollback is non-zero, perform the inverse of the merge.
913 */
914static void
915ims_merge(struct ip_msource *ims, const struct in_msource *lims,
916    const int rollback)
917{
918	int n = rollback ? -1 : 1;
919#ifdef KTR
920	struct in_addr ia;
921
922	ia.s_addr = htonl(ims->ims_haddr);
923#endif
924
925	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
926		CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s",
927		    __func__, n, inet_ntoa(ia));
928		ims->ims_st[1].ex -= n;
929	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
930		CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s",
931		    __func__, n, inet_ntoa(ia));
932		ims->ims_st[1].in -= n;
933	}
934
935	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
936		CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s",
937		    __func__, n, inet_ntoa(ia));
938		ims->ims_st[1].ex += n;
939	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
940		CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s",
941		    __func__, n, inet_ntoa(ia));
942		ims->ims_st[1].in += n;
943	}
944}
945
946/*
947 * Atomically update the global in_multi state, when a membership's
948 * filter list is being updated in any way.
949 *
950 * imf is the per-inpcb-membership group filter pointer.
951 * A fake imf may be passed for in-kernel consumers.
952 *
953 * XXX This is a candidate for a set-symmetric-difference style loop
954 * which would eliminate the repeated lookup from root of ims nodes,
955 * as they share the same key space.
956 *
957 * If any error occurred this function will back out of refcounts
958 * and return a non-zero value.
959 */
960static int
961inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
962{
963	struct ip_msource	*ims, *nims;
964	struct in_msource	*lims;
965	int			 schanged, error;
966	int			 nsrc0, nsrc1;
967
968	schanged = 0;
969	error = 0;
970	nsrc1 = nsrc0 = 0;
971
972	/*
973	 * Update the source filters first, as this may fail.
974	 * Maintain count of in-mode filters at t0, t1. These are
975	 * used to work out if we transition into ASM mode or not.
976	 * Maintain a count of source filters whose state was
977	 * actually modified by this operation.
978	 */
979	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
980		lims = (struct in_msource *)ims;
981		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
982		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
983		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
984		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
985		++schanged;
986		if (error)
987			break;
988		ims_merge(nims, lims, 0);
989	}
990	if (error) {
991		struct ip_msource *bims;
992
993		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
994			lims = (struct in_msource *)ims;
995			if (lims->imsl_st[0] == lims->imsl_st[1])
996				continue;
997			(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
998			if (bims == NULL)
999				continue;
1000			ims_merge(bims, lims, 1);
1001		}
1002		goto out_reap;
1003	}
1004
1005	CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
1006	    __func__, nsrc0, nsrc1);
1007
1008	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
1009	if (imf->imf_st[0] == imf->imf_st[1] &&
1010	    imf->imf_st[1] == MCAST_INCLUDE) {
1011		if (nsrc1 == 0) {
1012			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
1013			--inm->inm_st[1].iss_in;
1014		}
1015	}
1016
1017	/* Handle filter mode transition on socket. */
1018	if (imf->imf_st[0] != imf->imf_st[1]) {
1019		CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
1020		    __func__, imf->imf_st[0], imf->imf_st[1]);
1021
1022		if (imf->imf_st[0] == MCAST_EXCLUDE) {
1023			CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
1024			--inm->inm_st[1].iss_ex;
1025		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
1026			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
1027			--inm->inm_st[1].iss_in;
1028		}
1029
1030		if (imf->imf_st[1] == MCAST_EXCLUDE) {
1031			CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
1032			inm->inm_st[1].iss_ex++;
1033		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
1034			CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
1035			inm->inm_st[1].iss_in++;
1036		}
1037	}
1038
1039	/*
1040	 * Track inm filter state in terms of listener counts.
1041	 * If there are any exclusive listeners, stack-wide
1042	 * membership is exclusive.
1043	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
1044	 * If no listeners remain, state is undefined at t1,
1045	 * and the IGMP lifecycle for this group should finish.
1046	 */
1047	if (inm->inm_st[1].iss_ex > 0) {
1048		CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
1049		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
1050	} else if (inm->inm_st[1].iss_in > 0) {
1051		CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
1052		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
1053	} else {
1054		CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
1055		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
1056	}
1057
1058	/* Decrement ASM listener count on transition out of ASM mode. */
1059	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1060		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
1061		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
1062			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
1063			--inm->inm_st[1].iss_asm;
1064	}
1065
1066	/* Increment ASM listener count on transition to ASM mode. */
1067	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1068		CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
1069		inm->inm_st[1].iss_asm++;
1070	}
1071
1072	CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
1073	inm_print(inm);
1074
1075out_reap:
1076	if (schanged > 0) {
1077		CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
1078		inm_reap(inm);
1079	}
1080	return (error);
1081}
1082
1083/*
1084 * Mark an in_multi's filter set deltas as committed.
1085 * Called by IGMP after a state change has been enqueued.
1086 */
1087void
1088inm_commit(struct in_multi *inm)
1089{
1090	struct ip_msource	*ims;
1091
1092	CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
1093	CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
1094	inm_print(inm);
1095
1096	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
1097		ims->ims_st[0] = ims->ims_st[1];
1098	}
1099	inm->inm_st[0] = inm->inm_st[1];
1100}
1101
1102/*
1103 * Reap unreferenced nodes from an in_multi's filter set.
1104 */
1105static void
1106inm_reap(struct in_multi *inm)
1107{
1108	struct ip_msource	*ims, *tims;
1109
1110	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1111		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
1112		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
1113		    ims->ims_stp != 0)
1114			continue;
1115		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1116		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1117		free(ims, M_IPMSOURCE);
1118		inm->inm_nsrc--;
1119	}
1120}
1121
1122/*
1123 * Purge all source nodes from an in_multi's filter set.
1124 */
1125static void
1126inm_purge(struct in_multi *inm)
1127{
1128	struct ip_msource	*ims, *tims;
1129
1130	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1131		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1132		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1133		free(ims, M_IPMSOURCE);
1134		inm->inm_nsrc--;
1135	}
1136}
1137
1138/*
1139 * Join a multicast group; unlocked entry point.
1140 *
1141 * SMPng: XXX: in_joingroup() is called from in_control() when Giant
1142 * is not held. Fortunately, ifp is unlikely to have been detached
1143 * at this point, so we assume it's OK to recurse.
1144 */
1145int
1146in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
1147    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1148{
1149	int error;
1150
1151	IN_MULTI_LOCK();
1152	error = in_joingroup_locked(ifp, gina, imf, pinm);
1153	IN_MULTI_UNLOCK();
1154
1155	return (error);
1156}
1157
1158/*
1159 * Join a multicast group; real entry point.
1160 *
1161 * Only preserves atomicity at inm level.
1162 * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1163 *
1164 * If the IGMP downcall fails, the group is not joined, and an error
1165 * code is returned.
1166 */
1167int
1168in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
1169    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1170{
1171	struct in_mfilter	 timf;
1172	struct in_multi		*inm;
1173	int			 error;
1174
1175	IN_MULTI_LOCK_ASSERT();
1176
1177	CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__,
1178	    inet_ntoa(*gina), ifp, ifp->if_xname);
1179
1180	error = 0;
1181	inm = NULL;
1182
1183	/*
1184	 * If no imf was specified (i.e. kernel consumer),
1185	 * fake one up and assume it is an ASM join.
1186	 */
1187	if (imf == NULL) {
1188		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1189		imf = &timf;
1190	}
1191
1192	error = in_getmulti(ifp, gina, &inm);
1193	if (error) {
1194		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
1195		return (error);
1196	}
1197
1198	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1199	error = inm_merge(inm, imf);
1200	if (error) {
1201		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1202		goto out_inm_release;
1203	}
1204
1205	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1206	error = igmp_change_state(inm);
1207	if (error) {
1208		CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
1209		goto out_inm_release;
1210	}
1211
1212out_inm_release:
1213	if (error) {
1214		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1215		inm_release_locked(inm);
1216	} else {
1217		*pinm = inm;
1218	}
1219
1220	return (error);
1221}
1222
1223/*
1224 * Leave a multicast group; unlocked entry point.
1225 */
1226int
1227in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1228{
1229	int error;
1230
1231	IN_MULTI_LOCK();
1232	error = in_leavegroup_locked(inm, imf);
1233	IN_MULTI_UNLOCK();
1234
1235	return (error);
1236}
1237
1238/*
1239 * Leave a multicast group; real entry point.
1240 * All source filters will be expunged.
1241 *
1242 * Only preserves atomicity at inm level.
1243 *
1244 * Holding the write lock for the INP which contains imf
1245 * is highly advisable. We can't assert for it as imf does not
1246 * contain a back-pointer to the owning inp.
1247 *
1248 * Note: This is not the same as inm_release(*) as this function also
1249 * makes a state change downcall into IGMP.
1250 */
1251int
1252in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1253{
1254	struct in_mfilter	 timf;
1255	int			 error;
1256
1257	error = 0;
1258
1259	IN_MULTI_LOCK_ASSERT();
1260
1261	CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__,
1262	    inm, inet_ntoa(inm->inm_addr),
1263	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
1264	    imf);
1265
1266	/*
1267	 * If no imf was specified (i.e. kernel consumer),
1268	 * fake one up and assume it is an ASM join.
1269	 */
1270	if (imf == NULL) {
1271		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1272		imf = &timf;
1273	}
1274
1275	/*
1276	 * Begin state merge transaction at IGMP layer.
1277	 *
1278	 * As this particular invocation should not cause any memory
1279	 * to be allocated, and there is no opportunity to roll back
1280	 * the transaction, it MUST NOT fail.
1281	 */
1282	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1283	error = inm_merge(inm, imf);
1284	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
1285
1286	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1287	CURVNET_SET(inm->inm_ifp->if_vnet);
1288	error = igmp_change_state(inm);
1289	CURVNET_RESTORE();
1290	if (error)
1291		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1292
1293	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1294	inm_release_locked(inm);
1295
1296	return (error);
1297}
1298
1299/*#ifndef BURN_BRIDGES*/
1300/*
1301 * Join an IPv4 multicast group in (*,G) exclusive mode.
1302 * The group must be a 224.0.0.0/24 link-scope group.
1303 * This KPI is for legacy kernel consumers only.
1304 */
1305struct in_multi *
1306in_addmulti(struct in_addr *ap, struct ifnet *ifp)
1307{
1308	struct in_multi *pinm;
1309	int error;
1310
1311	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
1312	    ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap)));
1313
1314	error = in_joingroup(ifp, ap, NULL, &pinm);
1315	if (error != 0)
1316		pinm = NULL;
1317
1318	return (pinm);
1319}
1320
1321/*
1322 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
1323 * This KPI is for legacy kernel consumers only.
1324 */
1325void
1326in_delmulti(struct in_multi *inm)
1327{
1328
1329	(void)in_leavegroup(inm, NULL);
1330}
1331/*#endif*/
1332
1333/*
1334 * Block or unblock an ASM multicast source on an inpcb.
1335 * This implements the delta-based API described in RFC 3678.
1336 *
1337 * The delta-based API applies only to exclusive-mode memberships.
1338 * An IGMP downcall will be performed.
1339 *
1340 * SMPng: NOTE: Must take Giant as a join may create a new ifma.
1341 *
1342 * Return 0 if successful, otherwise return an appropriate error code.
1343 */
1344static int
1345inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1346{
1347	struct group_source_req		 gsr;
1348	sockunion_t			*gsa, *ssa;
1349	struct ifnet			*ifp;
1350	struct in_mfilter		*imf;
1351	struct ip_moptions		*imo;
1352	struct in_msource		*ims;
1353	struct in_multi			*inm;
1354	size_t				 idx;
1355	uint16_t			 fmode;
1356	int				 error, doblock;
1357
1358	ifp = NULL;
1359	error = 0;
1360	doblock = 0;
1361
1362	memset(&gsr, 0, sizeof(struct group_source_req));
1363	gsa = (sockunion_t *)&gsr.gsr_group;
1364	ssa = (sockunion_t *)&gsr.gsr_source;
1365
1366	switch (sopt->sopt_name) {
1367	case IP_BLOCK_SOURCE:
1368	case IP_UNBLOCK_SOURCE: {
1369		struct ip_mreq_source	 mreqs;
1370
1371		error = sooptcopyin(sopt, &mreqs,
1372		    sizeof(struct ip_mreq_source),
1373		    sizeof(struct ip_mreq_source));
1374		if (error)
1375			return (error);
1376
1377		gsa->sin.sin_family = AF_INET;
1378		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1379		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1380
1381		ssa->sin.sin_family = AF_INET;
1382		ssa->sin.sin_len = sizeof(struct sockaddr_in);
1383		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1384
1385		if (!in_nullhost(mreqs.imr_interface))
1386			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1387
1388		if (sopt->sopt_name == IP_BLOCK_SOURCE)
1389			doblock = 1;
1390
1391		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
1392		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1393		break;
1394	    }
1395
1396	case MCAST_BLOCK_SOURCE:
1397	case MCAST_UNBLOCK_SOURCE:
1398		error = sooptcopyin(sopt, &gsr,
1399		    sizeof(struct group_source_req),
1400		    sizeof(struct group_source_req));
1401		if (error)
1402			return (error);
1403
1404		if (gsa->sin.sin_family != AF_INET ||
1405		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1406			return (EINVAL);
1407
1408		if (ssa->sin.sin_family != AF_INET ||
1409		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1410			return (EINVAL);
1411
1412		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1413			return (EADDRNOTAVAIL);
1414
1415		ifp = ifnet_byindex(gsr.gsr_interface);
1416
1417		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1418			doblock = 1;
1419		break;
1420
1421	default:
1422		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1423		    __func__, sopt->sopt_name);
1424		return (EOPNOTSUPP);
1425		break;
1426	}
1427
1428	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1429		return (EINVAL);
1430
1431	/*
1432	 * Check if we are actually a member of this group.
1433	 */
1434	imo = inp_findmoptions(inp);
1435	idx = imo_match_group(imo, ifp, &gsa->sa);
1436	if (idx == -1 || imo->imo_mfilters == NULL) {
1437		error = EADDRNOTAVAIL;
1438		goto out_inp_locked;
1439	}
1440
1441	KASSERT(imo->imo_mfilters != NULL,
1442	    ("%s: imo_mfilters not allocated", __func__));
1443	imf = &imo->imo_mfilters[idx];
1444	inm = imo->imo_membership[idx];
1445
1446	/*
1447	 * Attempting to use the delta-based API on an
1448	 * non exclusive-mode membership is an error.
1449	 */
1450	fmode = imf->imf_st[0];
1451	if (fmode != MCAST_EXCLUDE) {
1452		error = EINVAL;
1453		goto out_inp_locked;
1454	}
1455
1456	/*
1457	 * Deal with error cases up-front:
1458	 *  Asked to block, but already blocked; or
1459	 *  Asked to unblock, but nothing to unblock.
1460	 * If adding a new block entry, allocate it.
1461	 */
1462	ims = imo_match_source(imo, idx, &ssa->sa);
1463	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1464		CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
1465		    inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not ");
1466		error = EADDRNOTAVAIL;
1467		goto out_inp_locked;
1468	}
1469
1470	INP_WLOCK_ASSERT(inp);
1471
1472	/*
1473	 * Begin state merge transaction at socket layer.
1474	 */
1475	if (doblock) {
1476		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
1477		ims = imf_graft(imf, fmode, &ssa->sin);
1478		if (ims == NULL)
1479			error = ENOMEM;
1480	} else {
1481		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
1482		error = imf_prune(imf, &ssa->sin);
1483	}
1484
1485	if (error) {
1486		CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
1487		goto out_imf_rollback;
1488	}
1489
1490	/*
1491	 * Begin state merge transaction at IGMP layer.
1492	 */
1493	IN_MULTI_LOCK();
1494
1495	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1496	error = inm_merge(inm, imf);
1497	if (error) {
1498		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1499		goto out_in_multi_locked;
1500	}
1501
1502	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1503	error = igmp_change_state(inm);
1504	if (error)
1505		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1506
1507out_in_multi_locked:
1508
1509	IN_MULTI_UNLOCK();
1510
1511out_imf_rollback:
1512	if (error)
1513		imf_rollback(imf);
1514	else
1515		imf_commit(imf);
1516
1517	imf_reap(imf);
1518
1519out_inp_locked:
1520	INP_WUNLOCK(inp);
1521	return (error);
1522}
1523
1524/*
1525 * Given an inpcb, return its multicast options structure pointer.  Accepts
1526 * an unlocked inpcb pointer, but will return it locked.  May sleep.
1527 *
1528 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
1529 * SMPng: NOTE: Returns with the INP write lock held.
1530 */
1531static struct ip_moptions *
1532inp_findmoptions(struct inpcb *inp)
1533{
1534	struct ip_moptions	 *imo;
1535	struct in_multi		**immp;
1536	struct in_mfilter	 *imfp;
1537	size_t			  idx;
1538
1539	INP_WLOCK(inp);
1540	if (inp->inp_moptions != NULL)
1541		return (inp->inp_moptions);
1542
1543	INP_WUNLOCK(inp);
1544
1545	imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1546	immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
1547	    M_WAITOK | M_ZERO);
1548	imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
1549	    M_INMFILTER, M_WAITOK);
1550
1551	imo->imo_multicast_ifp = NULL;
1552	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1553	imo->imo_multicast_vif = -1;
1554	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1555	imo->imo_multicast_loop = in_mcast_loop;
1556	imo->imo_num_memberships = 0;
1557	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
1558	imo->imo_membership = immp;
1559
1560	/* Initialize per-group source filters. */
1561	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
1562		imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
1563	imo->imo_mfilters = imfp;
1564
1565	INP_WLOCK(inp);
1566	if (inp->inp_moptions != NULL) {
1567		free(imfp, M_INMFILTER);
1568		free(immp, M_IPMOPTS);
1569		free(imo, M_IPMOPTS);
1570		return (inp->inp_moptions);
1571	}
1572	inp->inp_moptions = imo;
1573	return (imo);
1574}
1575
1576/*
1577 * Discard the IP multicast options (and source filters).  To minimize
1578 * the amount of work done while holding locks such as the INP's
1579 * pcbinfo lock (which is used in the receive path), the free
1580 * operation is performed asynchronously in a separate task.
1581 *
1582 * SMPng: NOTE: assumes INP write lock is held.
1583 */
1584void
1585inp_freemoptions(struct ip_moptions *imo)
1586{
1587
1588	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
1589	IN_MULTI_LOCK();
1590	STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
1591	IN_MULTI_UNLOCK();
1592	taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
1593}
1594
1595static void
1596inp_freemoptions_internal(struct ip_moptions *imo)
1597{
1598	struct in_mfilter	*imf;
1599	size_t			 idx, nmships;
1600
1601	nmships = imo->imo_num_memberships;
1602	for (idx = 0; idx < nmships; ++idx) {
1603		imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
1604		if (imf)
1605			imf_leave(imf);
1606		(void)in_leavegroup(imo->imo_membership[idx], imf);
1607		if (imf)
1608			imf_purge(imf);
1609	}
1610
1611	if (imo->imo_mfilters)
1612		free(imo->imo_mfilters, M_INMFILTER);
1613	free(imo->imo_membership, M_IPMOPTS);
1614	free(imo, M_IPMOPTS);
1615}
1616
1617static void
1618inp_gcmoptions(void *context, int pending)
1619{
1620	struct ip_moptions *imo;
1621
1622	IN_MULTI_LOCK();
1623	while (!STAILQ_EMPTY(&imo_gc_list)) {
1624		imo = STAILQ_FIRST(&imo_gc_list);
1625		STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
1626		IN_MULTI_UNLOCK();
1627		inp_freemoptions_internal(imo);
1628		IN_MULTI_LOCK();
1629	}
1630	IN_MULTI_UNLOCK();
1631}
1632
1633/*
1634 * Atomically get source filters on a socket for an IPv4 multicast group.
1635 * Called with INP lock held; returns with lock released.
1636 */
1637static int
1638inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1639{
1640	struct __msfilterreq	 msfr;
1641	sockunion_t		*gsa;
1642	struct ifnet		*ifp;
1643	struct ip_moptions	*imo;
1644	struct in_mfilter	*imf;
1645	struct ip_msource	*ims;
1646	struct in_msource	*lims;
1647	struct sockaddr_in	*psin;
1648	struct sockaddr_storage	*ptss;
1649	struct sockaddr_storage	*tss;
1650	int			 error;
1651	size_t			 idx, nsrcs, ncsrcs;
1652
1653	INP_WLOCK_ASSERT(inp);
1654
1655	imo = inp->inp_moptions;
1656	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
1657
1658	INP_WUNLOCK(inp);
1659
1660	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1661	    sizeof(struct __msfilterreq));
1662	if (error)
1663		return (error);
1664
1665	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1666		return (EINVAL);
1667
1668	ifp = ifnet_byindex(msfr.msfr_ifindex);
1669	if (ifp == NULL)
1670		return (EINVAL);
1671
1672	INP_WLOCK(inp);
1673
1674	/*
1675	 * Lookup group on the socket.
1676	 */
1677	gsa = (sockunion_t *)&msfr.msfr_group;
1678	idx = imo_match_group(imo, ifp, &gsa->sa);
1679	if (idx == -1 || imo->imo_mfilters == NULL) {
1680		INP_WUNLOCK(inp);
1681		return (EADDRNOTAVAIL);
1682	}
1683	imf = &imo->imo_mfilters[idx];
1684
1685	/*
1686	 * Ignore memberships which are in limbo.
1687	 */
1688	if (imf->imf_st[1] == MCAST_UNDEFINED) {
1689		INP_WUNLOCK(inp);
1690		return (EAGAIN);
1691	}
1692	msfr.msfr_fmode = imf->imf_st[1];
1693
1694	/*
1695	 * If the user specified a buffer, copy out the source filter
1696	 * entries to userland gracefully.
1697	 * We only copy out the number of entries which userland
1698	 * has asked for, but we always tell userland how big the
1699	 * buffer really needs to be.
1700	 */
1701	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
1702		msfr.msfr_nsrcs = in_mcast_maxsocksrc;
1703	tss = NULL;
1704	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
1705		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1706		    M_TEMP, M_NOWAIT | M_ZERO);
1707		if (tss == NULL) {
1708			INP_WUNLOCK(inp);
1709			return (ENOBUFS);
1710		}
1711	}
1712
1713	/*
1714	 * Count number of sources in-mode at t0.
1715	 * If buffer space exists and remains, copy out source entries.
1716	 */
1717	nsrcs = msfr.msfr_nsrcs;
1718	ncsrcs = 0;
1719	ptss = tss;
1720	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
1721		lims = (struct in_msource *)ims;
1722		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
1723		    lims->imsl_st[0] != imf->imf_st[0])
1724			continue;
1725		++ncsrcs;
1726		if (tss != NULL && nsrcs > 0) {
1727			psin = (struct sockaddr_in *)ptss;
1728			psin->sin_family = AF_INET;
1729			psin->sin_len = sizeof(struct sockaddr_in);
1730			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
1731			psin->sin_port = 0;
1732			++ptss;
1733			--nsrcs;
1734		}
1735	}
1736
1737	INP_WUNLOCK(inp);
1738
1739	if (tss != NULL) {
1740		error = copyout(tss, msfr.msfr_srcs,
1741		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1742		free(tss, M_TEMP);
1743		if (error)
1744			return (error);
1745	}
1746
1747	msfr.msfr_nsrcs = ncsrcs;
1748	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
1749
1750	return (error);
1751}
1752
1753/*
1754 * Return the IP multicast options in response to user getsockopt().
1755 */
1756int
1757inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1758{
1759	struct ip_mreqn		 mreqn;
1760	struct ip_moptions	*imo;
1761	struct ifnet		*ifp;
1762	struct in_ifaddr	*ia;
1763	int			 error, optval;
1764	u_char			 coptval;
1765
1766	INP_WLOCK(inp);
1767	imo = inp->inp_moptions;
1768	/*
1769	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1770	 * or is a divert socket, reject it.
1771	 */
1772	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1773	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1774	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
1775		INP_WUNLOCK(inp);
1776		return (EOPNOTSUPP);
1777	}
1778
1779	error = 0;
1780	switch (sopt->sopt_name) {
1781	case IP_MULTICAST_VIF:
1782		if (imo != NULL)
1783			optval = imo->imo_multicast_vif;
1784		else
1785			optval = -1;
1786		INP_WUNLOCK(inp);
1787		error = sooptcopyout(sopt, &optval, sizeof(int));
1788		break;
1789
1790	case IP_MULTICAST_IF:
1791		memset(&mreqn, 0, sizeof(struct ip_mreqn));
1792		if (imo != NULL) {
1793			ifp = imo->imo_multicast_ifp;
1794			if (!in_nullhost(imo->imo_multicast_addr)) {
1795				mreqn.imr_address = imo->imo_multicast_addr;
1796			} else if (ifp != NULL) {
1797				mreqn.imr_ifindex = ifp->if_index;
1798				IFP_TO_IA(ifp, ia);
1799				if (ia != NULL) {
1800					mreqn.imr_address =
1801					    IA_SIN(ia)->sin_addr;
1802					ifa_free(&ia->ia_ifa);
1803				}
1804			}
1805		}
1806		INP_WUNLOCK(inp);
1807		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1808			error = sooptcopyout(sopt, &mreqn,
1809			    sizeof(struct ip_mreqn));
1810		} else {
1811			error = sooptcopyout(sopt, &mreqn.imr_address,
1812			    sizeof(struct in_addr));
1813		}
1814		break;
1815
1816	case IP_MULTICAST_TTL:
1817		if (imo == 0)
1818			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1819		else
1820			optval = coptval = imo->imo_multicast_ttl;
1821		INP_WUNLOCK(inp);
1822		if (sopt->sopt_valsize == sizeof(u_char))
1823			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1824		else
1825			error = sooptcopyout(sopt, &optval, sizeof(int));
1826		break;
1827
1828	case IP_MULTICAST_LOOP:
1829		if (imo == 0)
1830			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1831		else
1832			optval = coptval = imo->imo_multicast_loop;
1833		INP_WUNLOCK(inp);
1834		if (sopt->sopt_valsize == sizeof(u_char))
1835			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1836		else
1837			error = sooptcopyout(sopt, &optval, sizeof(int));
1838		break;
1839
1840	case IP_MSFILTER:
1841		if (imo == NULL) {
1842			error = EADDRNOTAVAIL;
1843			INP_WUNLOCK(inp);
1844		} else {
1845			error = inp_get_source_filters(inp, sopt);
1846		}
1847		break;
1848
1849	default:
1850		INP_WUNLOCK(inp);
1851		error = ENOPROTOOPT;
1852		break;
1853	}
1854
1855	INP_UNLOCK_ASSERT(inp);
1856
1857	return (error);
1858}
1859
1860/*
1861 * Look up the ifnet to use for a multicast group membership,
1862 * given the IPv4 address of an interface, and the IPv4 group address.
1863 *
1864 * This routine exists to support legacy multicast applications
1865 * which do not understand that multicast memberships are scoped to
1866 * specific physical links in the networking stack, or which need
1867 * to join link-scope groups before IPv4 addresses are configured.
1868 *
1869 * If inp is non-NULL, use this socket's current FIB number for any
1870 * required FIB lookup.
1871 * If ina is INADDR_ANY, look up the group address in the unicast FIB,
1872 * and use its ifp; usually, this points to the default next-hop.
1873 *
1874 * If the FIB lookup fails, attempt to use the first non-loopback
1875 * interface with multicast capability in the system as a
1876 * last resort. The legacy IPv4 ASM API requires that we do
1877 * this in order to allow groups to be joined when the routing
1878 * table has not yet been populated during boot.
1879 *
1880 * Returns NULL if no ifp could be found.
1881 *
1882 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP.
1883 * FUTURE: Implement IPv4 source-address selection.
1884 */
1885static struct ifnet *
1886inp_lookup_mcast_ifp(const struct inpcb *inp,
1887    const struct sockaddr_in *gsin, const struct in_addr ina)
1888{
1889	struct ifnet *ifp;
1890
1891	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
1892	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
1893	    ("%s: not multicast", __func__));
1894
1895	ifp = NULL;
1896	if (!in_nullhost(ina)) {
1897		INADDR_TO_IFP(ina, ifp);
1898	} else {
1899		struct route ro;
1900
1901		ro.ro_rt = NULL;
1902		memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
1903		in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0);
1904		if (ro.ro_rt != NULL) {
1905			ifp = ro.ro_rt->rt_ifp;
1906			KASSERT(ifp != NULL, ("%s: null ifp", __func__));
1907			RTFREE(ro.ro_rt);
1908		} else {
1909			struct in_ifaddr *ia;
1910			struct ifnet *mifp;
1911
1912			mifp = NULL;
1913			IN_IFADDR_RLOCK();
1914			TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1915				mifp = ia->ia_ifp;
1916				if (!(mifp->if_flags & IFF_LOOPBACK) &&
1917				     (mifp->if_flags & IFF_MULTICAST)) {
1918					ifp = mifp;
1919					break;
1920				}
1921			}
1922			IN_IFADDR_RUNLOCK();
1923		}
1924	}
1925
1926	return (ifp);
1927}
1928
1929/*
1930 * Join an IPv4 multicast group, possibly with a source.
1931 */
1932static int
1933inp_join_group(struct inpcb *inp, struct sockopt *sopt)
1934{
1935	struct group_source_req		 gsr;
1936	sockunion_t			*gsa, *ssa;
1937	struct ifnet			*ifp;
1938	struct in_mfilter		*imf;
1939	struct ip_moptions		*imo;
1940	struct in_multi			*inm;
1941	struct in_msource		*lims;
1942	size_t				 idx;
1943	int				 error, is_new;
1944
1945	ifp = NULL;
1946	imf = NULL;
1947	lims = NULL;
1948	error = 0;
1949	is_new = 0;
1950
1951	memset(&gsr, 0, sizeof(struct group_source_req));
1952	gsa = (sockunion_t *)&gsr.gsr_group;
1953	gsa->ss.ss_family = AF_UNSPEC;
1954	ssa = (sockunion_t *)&gsr.gsr_source;
1955	ssa->ss.ss_family = AF_UNSPEC;
1956
1957	switch (sopt->sopt_name) {
1958	case IP_ADD_MEMBERSHIP:
1959	case IP_ADD_SOURCE_MEMBERSHIP: {
1960		struct ip_mreq_source	 mreqs;
1961
1962		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
1963			error = sooptcopyin(sopt, &mreqs,
1964			    sizeof(struct ip_mreq),
1965			    sizeof(struct ip_mreq));
1966			/*
1967			 * Do argument switcharoo from ip_mreq into
1968			 * ip_mreq_source to avoid using two instances.
1969			 */
1970			mreqs.imr_interface = mreqs.imr_sourceaddr;
1971			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1972		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1973			error = sooptcopyin(sopt, &mreqs,
1974			    sizeof(struct ip_mreq_source),
1975			    sizeof(struct ip_mreq_source));
1976		}
1977		if (error)
1978			return (error);
1979
1980		gsa->sin.sin_family = AF_INET;
1981		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1982		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1983
1984		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1985			ssa->sin.sin_family = AF_INET;
1986			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1987			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1988		}
1989
1990		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1991			return (EINVAL);
1992
1993		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
1994		    mreqs.imr_interface);
1995		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
1996		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1997		break;
1998	}
1999
2000	case MCAST_JOIN_GROUP:
2001	case MCAST_JOIN_SOURCE_GROUP:
2002		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
2003			error = sooptcopyin(sopt, &gsr,
2004			    sizeof(struct group_req),
2005			    sizeof(struct group_req));
2006		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
2007			error = sooptcopyin(sopt, &gsr,
2008			    sizeof(struct group_source_req),
2009			    sizeof(struct group_source_req));
2010		}
2011		if (error)
2012			return (error);
2013
2014		if (gsa->sin.sin_family != AF_INET ||
2015		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
2016			return (EINVAL);
2017
2018		/*
2019		 * Overwrite the port field if present, as the sockaddr
2020		 * being copied in may be matched with a binary comparison.
2021		 */
2022		gsa->sin.sin_port = 0;
2023		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
2024			if (ssa->sin.sin_family != AF_INET ||
2025			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
2026				return (EINVAL);
2027			ssa->sin.sin_port = 0;
2028		}
2029
2030		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2031			return (EINVAL);
2032
2033		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
2034			return (EADDRNOTAVAIL);
2035		ifp = ifnet_byindex(gsr.gsr_interface);
2036		break;
2037
2038	default:
2039		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2040		    __func__, sopt->sopt_name);
2041		return (EOPNOTSUPP);
2042		break;
2043	}
2044
2045	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
2046		return (EADDRNOTAVAIL);
2047
2048	imo = inp_findmoptions(inp);
2049	idx = imo_match_group(imo, ifp, &gsa->sa);
2050	if (idx == -1) {
2051		is_new = 1;
2052	} else {
2053		inm = imo->imo_membership[idx];
2054		imf = &imo->imo_mfilters[idx];
2055		if (ssa->ss.ss_family != AF_UNSPEC) {
2056			/*
2057			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
2058			 * is an error. On an existing inclusive membership,
2059			 * it just adds the source to the filter list.
2060			 */
2061			if (imf->imf_st[1] != MCAST_INCLUDE) {
2062				error = EINVAL;
2063				goto out_inp_locked;
2064			}
2065			/*
2066			 * Throw out duplicates.
2067			 *
2068			 * XXX FIXME: This makes a naive assumption that
2069			 * even if entries exist for *ssa in this imf,
2070			 * they will be rejected as dupes, even if they
2071			 * are not valid in the current mode (in-mode).
2072			 *
2073			 * in_msource is transactioned just as for anything
2074			 * else in SSM -- but note naive use of inm_graft()
2075			 * below for allocating new filter entries.
2076			 *
2077			 * This is only an issue if someone mixes the
2078			 * full-state SSM API with the delta-based API,
2079			 * which is discouraged in the relevant RFCs.
2080			 */
2081			lims = imo_match_source(imo, idx, &ssa->sa);
2082			if (lims != NULL /*&&
2083			    lims->imsl_st[1] == MCAST_INCLUDE*/) {
2084				error = EADDRNOTAVAIL;
2085				goto out_inp_locked;
2086			}
2087		} else {
2088			/*
2089			 * MCAST_JOIN_GROUP on an existing exclusive
2090			 * membership is an error; return EADDRINUSE
2091			 * to preserve 4.4BSD API idempotence, and
2092			 * avoid tedious detour to code below.
2093			 * NOTE: This is bending RFC 3678 a bit.
2094			 *
2095			 * On an existing inclusive membership, this is also
2096			 * an error; if you want to change filter mode,
2097			 * you must use the userland API setsourcefilter().
2098			 * XXX We don't reject this for imf in UNDEFINED
2099			 * state at t1, because allocation of a filter
2100			 * is atomic with allocation of a membership.
2101			 */
2102			error = EINVAL;
2103			if (imf->imf_st[1] == MCAST_EXCLUDE)
2104				error = EADDRINUSE;
2105			goto out_inp_locked;
2106		}
2107	}
2108
2109	/*
2110	 * Begin state merge transaction at socket layer.
2111	 */
2112	INP_WLOCK_ASSERT(inp);
2113
2114	if (is_new) {
2115		if (imo->imo_num_memberships == imo->imo_max_memberships) {
2116			error = imo_grow(imo);
2117			if (error)
2118				goto out_inp_locked;
2119		}
2120		/*
2121		 * Allocate the new slot upfront so we can deal with
2122		 * grafting the new source filter in same code path
2123		 * as for join-source on existing membership.
2124		 */
2125		idx = imo->imo_num_memberships;
2126		imo->imo_membership[idx] = NULL;
2127		imo->imo_num_memberships++;
2128		KASSERT(imo->imo_mfilters != NULL,
2129		    ("%s: imf_mfilters vector was not allocated", __func__));
2130		imf = &imo->imo_mfilters[idx];
2131		KASSERT(RB_EMPTY(&imf->imf_sources),
2132		    ("%s: imf_sources not empty", __func__));
2133	}
2134
2135	/*
2136	 * Graft new source into filter list for this inpcb's
2137	 * membership of the group. The in_multi may not have
2138	 * been allocated yet if this is a new membership, however,
2139	 * the in_mfilter slot will be allocated and must be initialized.
2140	 *
2141	 * Note: Grafting of exclusive mode filters doesn't happen
2142	 * in this path.
2143	 * XXX: Should check for non-NULL lims (node exists but may
2144	 * not be in-mode) for interop with full-state API.
2145	 */
2146	if (ssa->ss.ss_family != AF_UNSPEC) {
2147		/* Membership starts in IN mode */
2148		if (is_new) {
2149			CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
2150			imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
2151		} else {
2152			CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
2153		}
2154		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
2155		if (lims == NULL) {
2156			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2157			    __func__);
2158			error = ENOMEM;
2159			goto out_imo_free;
2160		}
2161	} else {
2162		/* No address specified; Membership starts in EX mode */
2163		if (is_new) {
2164			CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__);
2165			imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
2166		}
2167	}
2168
2169	/*
2170	 * Begin state merge transaction at IGMP layer.
2171	 */
2172	IN_MULTI_LOCK();
2173
2174	if (is_new) {
2175		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
2176		    &inm);
2177		if (error) {
2178                        CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed",
2179                            __func__);
2180                        IN_MULTI_UNLOCK();
2181			goto out_imo_free;
2182                }
2183		imo->imo_membership[idx] = inm;
2184	} else {
2185		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2186		error = inm_merge(inm, imf);
2187		if (error) {
2188			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2189			    __func__);
2190			goto out_in_multi_locked;
2191		}
2192		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2193		error = igmp_change_state(inm);
2194		if (error) {
2195			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2196			    __func__);
2197			goto out_in_multi_locked;
2198		}
2199	}
2200
2201out_in_multi_locked:
2202
2203	IN_MULTI_UNLOCK();
2204
2205	INP_WLOCK_ASSERT(inp);
2206	if (error) {
2207		imf_rollback(imf);
2208		if (is_new)
2209			imf_purge(imf);
2210		else
2211			imf_reap(imf);
2212	} else {
2213		imf_commit(imf);
2214	}
2215
2216out_imo_free:
2217	if (error && is_new) {
2218		imo->imo_membership[idx] = NULL;
2219		--imo->imo_num_memberships;
2220	}
2221
2222out_inp_locked:
2223	INP_WUNLOCK(inp);
2224	return (error);
2225}
2226
2227/*
2228 * Leave an IPv4 multicast group on an inpcb, possibly with a source.
2229 */
2230static int
2231inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
2232{
2233	struct group_source_req		 gsr;
2234	struct ip_mreq_source		 mreqs;
2235	sockunion_t			*gsa, *ssa;
2236	struct ifnet			*ifp;
2237	struct in_mfilter		*imf;
2238	struct ip_moptions		*imo;
2239	struct in_msource		*ims;
2240	struct in_multi			*inm;
2241	size_t				 idx;
2242	int				 error, is_final;
2243
2244	ifp = NULL;
2245	error = 0;
2246	is_final = 1;
2247
2248	memset(&gsr, 0, sizeof(struct group_source_req));
2249	gsa = (sockunion_t *)&gsr.gsr_group;
2250	gsa->ss.ss_family = AF_UNSPEC;
2251	ssa = (sockunion_t *)&gsr.gsr_source;
2252	ssa->ss.ss_family = AF_UNSPEC;
2253
2254	switch (sopt->sopt_name) {
2255	case IP_DROP_MEMBERSHIP:
2256	case IP_DROP_SOURCE_MEMBERSHIP:
2257		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
2258			error = sooptcopyin(sopt, &mreqs,
2259			    sizeof(struct ip_mreq),
2260			    sizeof(struct ip_mreq));
2261			/*
2262			 * Swap interface and sourceaddr arguments,
2263			 * as ip_mreq and ip_mreq_source are laid
2264			 * out differently.
2265			 */
2266			mreqs.imr_interface = mreqs.imr_sourceaddr;
2267			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
2268		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2269			error = sooptcopyin(sopt, &mreqs,
2270			    sizeof(struct ip_mreq_source),
2271			    sizeof(struct ip_mreq_source));
2272		}
2273		if (error)
2274			return (error);
2275
2276		gsa->sin.sin_family = AF_INET;
2277		gsa->sin.sin_len = sizeof(struct sockaddr_in);
2278		gsa->sin.sin_addr = mreqs.imr_multiaddr;
2279
2280		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2281			ssa->sin.sin_family = AF_INET;
2282			ssa->sin.sin_len = sizeof(struct sockaddr_in);
2283			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
2284		}
2285
2286		/*
2287		 * Attempt to look up hinted ifp from interface address.
2288		 * Fallthrough with null ifp iff lookup fails, to
2289		 * preserve 4.4BSD mcast API idempotence.
2290		 * XXX NOTE WELL: The RFC 3678 API is preferred because
2291		 * using an IPv4 address as a key is racy.
2292		 */
2293		if (!in_nullhost(mreqs.imr_interface))
2294			INADDR_TO_IFP(mreqs.imr_interface, ifp);
2295
2296		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
2297		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
2298
2299		break;
2300
2301	case MCAST_LEAVE_GROUP:
2302	case MCAST_LEAVE_SOURCE_GROUP:
2303		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2304			error = sooptcopyin(sopt, &gsr,
2305			    sizeof(struct group_req),
2306			    sizeof(struct group_req));
2307		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2308			error = sooptcopyin(sopt, &gsr,
2309			    sizeof(struct group_source_req),
2310			    sizeof(struct group_source_req));
2311		}
2312		if (error)
2313			return (error);
2314
2315		if (gsa->sin.sin_family != AF_INET ||
2316		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
2317			return (EINVAL);
2318
2319		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2320			if (ssa->sin.sin_family != AF_INET ||
2321			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
2322				return (EINVAL);
2323		}
2324
2325		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
2326			return (EADDRNOTAVAIL);
2327
2328		ifp = ifnet_byindex(gsr.gsr_interface);
2329
2330		if (ifp == NULL)
2331			return (EADDRNOTAVAIL);
2332		break;
2333
2334	default:
2335		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2336		    __func__, sopt->sopt_name);
2337		return (EOPNOTSUPP);
2338		break;
2339	}
2340
2341	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2342		return (EINVAL);
2343
2344	/*
2345	 * Find the membership in the membership array.
2346	 */
2347	imo = inp_findmoptions(inp);
2348	idx = imo_match_group(imo, ifp, &gsa->sa);
2349	if (idx == -1) {
2350		error = EADDRNOTAVAIL;
2351		goto out_inp_locked;
2352	}
2353	inm = imo->imo_membership[idx];
2354	imf = &imo->imo_mfilters[idx];
2355
2356	if (ssa->ss.ss_family != AF_UNSPEC)
2357		is_final = 0;
2358
2359	/*
2360	 * Begin state merge transaction at socket layer.
2361	 */
2362	INP_WLOCK_ASSERT(inp);
2363
2364	/*
2365	 * If we were instructed only to leave a given source, do so.
2366	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2367	 */
2368	if (is_final) {
2369		imf_leave(imf);
2370	} else {
2371		if (imf->imf_st[0] == MCAST_EXCLUDE) {
2372			error = EADDRNOTAVAIL;
2373			goto out_inp_locked;
2374		}
2375		ims = imo_match_source(imo, idx, &ssa->sa);
2376		if (ims == NULL) {
2377			CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
2378			    inet_ntoa(ssa->sin.sin_addr), "not ");
2379			error = EADDRNOTAVAIL;
2380			goto out_inp_locked;
2381		}
2382		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
2383		error = imf_prune(imf, &ssa->sin);
2384		if (error) {
2385			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2386			    __func__);
2387			goto out_inp_locked;
2388		}
2389	}
2390
2391	/*
2392	 * Begin state merge transaction at IGMP layer.
2393	 */
2394	IN_MULTI_LOCK();
2395
2396	if (is_final) {
2397		/*
2398		 * Give up the multicast address record to which
2399		 * the membership points.
2400		 */
2401		(void)in_leavegroup_locked(inm, imf);
2402	} else {
2403		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2404		error = inm_merge(inm, imf);
2405		if (error) {
2406			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2407			    __func__);
2408			goto out_in_multi_locked;
2409		}
2410
2411		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2412		error = igmp_change_state(inm);
2413		if (error) {
2414			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2415			    __func__);
2416		}
2417	}
2418
2419out_in_multi_locked:
2420
2421	IN_MULTI_UNLOCK();
2422
2423	if (error)
2424		imf_rollback(imf);
2425	else
2426		imf_commit(imf);
2427
2428	imf_reap(imf);
2429
2430	if (is_final) {
2431		/* Remove the gap in the membership and filter array. */
2432		for (++idx; idx < imo->imo_num_memberships; ++idx) {
2433			imo->imo_membership[idx-1] = imo->imo_membership[idx];
2434			imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx];
2435		}
2436		imo->imo_num_memberships--;
2437	}
2438
2439out_inp_locked:
2440	INP_WUNLOCK(inp);
2441	return (error);
2442}
2443
2444/*
2445 * Select the interface for transmitting IPv4 multicast datagrams.
2446 *
2447 * Either an instance of struct in_addr or an instance of struct ip_mreqn
2448 * may be passed to this socket option. An address of INADDR_ANY or an
2449 * interface index of 0 is used to remove a previous selection.
2450 * When no interface is selected, one is chosen for every send.
2451 */
2452static int
2453inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2454{
2455	struct in_addr		 addr;
2456	struct ip_mreqn		 mreqn;
2457	struct ifnet		*ifp;
2458	struct ip_moptions	*imo;
2459	int			 error;
2460
2461	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
2462		/*
2463		 * An interface index was specified using the
2464		 * Linux-derived ip_mreqn structure.
2465		 */
2466		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
2467		    sizeof(struct ip_mreqn));
2468		if (error)
2469			return (error);
2470
2471		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
2472			return (EINVAL);
2473
2474		if (mreqn.imr_ifindex == 0) {
2475			ifp = NULL;
2476		} else {
2477			ifp = ifnet_byindex(mreqn.imr_ifindex);
2478			if (ifp == NULL)
2479				return (EADDRNOTAVAIL);
2480		}
2481	} else {
2482		/*
2483		 * An interface was specified by IPv4 address.
2484		 * This is the traditional BSD usage.
2485		 */
2486		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
2487		    sizeof(struct in_addr));
2488		if (error)
2489			return (error);
2490		if (in_nullhost(addr)) {
2491			ifp = NULL;
2492		} else {
2493			INADDR_TO_IFP(addr, ifp);
2494			if (ifp == NULL)
2495				return (EADDRNOTAVAIL);
2496		}
2497		CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp,
2498		    inet_ntoa(addr));
2499	}
2500
2501	/* Reject interfaces which do not support multicast. */
2502	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
2503		return (EOPNOTSUPP);
2504
2505	imo = inp_findmoptions(inp);
2506	imo->imo_multicast_ifp = ifp;
2507	imo->imo_multicast_addr.s_addr = INADDR_ANY;
2508	INP_WUNLOCK(inp);
2509
2510	return (0);
2511}
2512
2513/*
2514 * Atomically set source filters on a socket for an IPv4 multicast group.
2515 *
2516 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
2517 */
2518static int
2519inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2520{
2521	struct __msfilterreq	 msfr;
2522	sockunion_t		*gsa;
2523	struct ifnet		*ifp;
2524	struct in_mfilter	*imf;
2525	struct ip_moptions	*imo;
2526	struct in_multi		*inm;
2527	size_t			 idx;
2528	int			 error;
2529
2530	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
2531	    sizeof(struct __msfilterreq));
2532	if (error)
2533		return (error);
2534
2535	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
2536		return (ENOBUFS);
2537
2538	if ((msfr.msfr_fmode != MCAST_EXCLUDE &&
2539	     msfr.msfr_fmode != MCAST_INCLUDE))
2540		return (EINVAL);
2541
2542	if (msfr.msfr_group.ss_family != AF_INET ||
2543	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
2544		return (EINVAL);
2545
2546	gsa = (sockunion_t *)&msfr.msfr_group;
2547	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2548		return (EINVAL);
2549
2550	gsa->sin.sin_port = 0;	/* ignore port */
2551
2552	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
2553		return (EADDRNOTAVAIL);
2554
2555	ifp = ifnet_byindex(msfr.msfr_ifindex);
2556	if (ifp == NULL)
2557		return (EADDRNOTAVAIL);
2558
2559	/*
2560	 * Take the INP write lock.
2561	 * Check if this socket is a member of this group.
2562	 */
2563	imo = inp_findmoptions(inp);
2564	idx = imo_match_group(imo, ifp, &gsa->sa);
2565	if (idx == -1 || imo->imo_mfilters == NULL) {
2566		error = EADDRNOTAVAIL;
2567		goto out_inp_locked;
2568	}
2569	inm = imo->imo_membership[idx];
2570	imf = &imo->imo_mfilters[idx];
2571
2572	/*
2573	 * Begin state merge transaction at socket layer.
2574	 */
2575	INP_WLOCK_ASSERT(inp);
2576
2577	imf->imf_st[1] = msfr.msfr_fmode;
2578
2579	/*
2580	 * Apply any new source filters, if present.
2581	 * Make a copy of the user-space source vector so
2582	 * that we may copy them with a single copyin. This
2583	 * allows us to deal with page faults up-front.
2584	 */
2585	if (msfr.msfr_nsrcs > 0) {
2586		struct in_msource	*lims;
2587		struct sockaddr_in	*psin;
2588		struct sockaddr_storage	*kss, *pkss;
2589		int			 i;
2590
2591		INP_WUNLOCK(inp);
2592
2593		CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
2594		    __func__, (unsigned long)msfr.msfr_nsrcs);
2595		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
2596		    M_TEMP, M_WAITOK);
2597		error = copyin(msfr.msfr_srcs, kss,
2598		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
2599		if (error) {
2600			free(kss, M_TEMP);
2601			return (error);
2602		}
2603
2604		INP_WLOCK(inp);
2605
2606		/*
2607		 * Mark all source filters as UNDEFINED at t1.
2608		 * Restore new group filter mode, as imf_leave()
2609		 * will set it to INCLUDE.
2610		 */
2611		imf_leave(imf);
2612		imf->imf_st[1] = msfr.msfr_fmode;
2613
2614		/*
2615		 * Update socket layer filters at t1, lazy-allocating
2616		 * new entries. This saves a bunch of memory at the
2617		 * cost of one RB_FIND() per source entry; duplicate
2618		 * entries in the msfr_nsrcs vector are ignored.
2619		 * If we encounter an error, rollback transaction.
2620		 *
2621		 * XXX This too could be replaced with a set-symmetric
2622		 * difference like loop to avoid walking from root
2623		 * every time, as the key space is common.
2624		 */
2625		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2626			psin = (struct sockaddr_in *)pkss;
2627			if (psin->sin_family != AF_INET) {
2628				error = EAFNOSUPPORT;
2629				break;
2630			}
2631			if (psin->sin_len != sizeof(struct sockaddr_in)) {
2632				error = EINVAL;
2633				break;
2634			}
2635			error = imf_get_source(imf, psin, &lims);
2636			if (error)
2637				break;
2638			lims->imsl_st[1] = imf->imf_st[1];
2639		}
2640		free(kss, M_TEMP);
2641	}
2642
2643	if (error)
2644		goto out_imf_rollback;
2645
2646	INP_WLOCK_ASSERT(inp);
2647	IN_MULTI_LOCK();
2648
2649	/*
2650	 * Begin state merge transaction at IGMP layer.
2651	 */
2652	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2653	error = inm_merge(inm, imf);
2654	if (error) {
2655		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
2656		goto out_in_multi_locked;
2657	}
2658
2659	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2660	error = igmp_change_state(inm);
2661	if (error)
2662		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
2663
2664out_in_multi_locked:
2665
2666	IN_MULTI_UNLOCK();
2667
2668out_imf_rollback:
2669	if (error)
2670		imf_rollback(imf);
2671	else
2672		imf_commit(imf);
2673
2674	imf_reap(imf);
2675
2676out_inp_locked:
2677	INP_WUNLOCK(inp);
2678	return (error);
2679}
2680
2681/*
2682 * Set the IP multicast options in response to user setsockopt().
2683 *
2684 * Many of the socket options handled in this function duplicate the
2685 * functionality of socket options in the regular unicast API. However,
2686 * it is not possible to merge the duplicate code, because the idempotence
2687 * of the IPv4 multicast part of the BSD Sockets API must be preserved;
2688 * the effects of these options must be treated as separate and distinct.
2689 *
2690 * SMPng: XXX: Unlocked read of inp_socket believed OK.
2691 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
2692 * is refactored to no longer use vifs.
2693 */
2694int
2695inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2696{
2697	struct ip_moptions	*imo;
2698	int			 error;
2699
2700	error = 0;
2701
2702	/*
2703	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2704	 * or is a divert socket, reject it.
2705	 */
2706	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
2707	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2708	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
2709		return (EOPNOTSUPP);
2710
2711	switch (sopt->sopt_name) {
2712	case IP_MULTICAST_VIF: {
2713		int vifi;
2714		/*
2715		 * Select a multicast VIF for transmission.
2716		 * Only useful if multicast forwarding is active.
2717		 */
2718		if (legal_vif_num == NULL) {
2719			error = EOPNOTSUPP;
2720			break;
2721		}
2722		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
2723		if (error)
2724			break;
2725		if (!legal_vif_num(vifi) && (vifi != -1)) {
2726			error = EINVAL;
2727			break;
2728		}
2729		imo = inp_findmoptions(inp);
2730		imo->imo_multicast_vif = vifi;
2731		INP_WUNLOCK(inp);
2732		break;
2733	}
2734
2735	case IP_MULTICAST_IF:
2736		error = inp_set_multicast_if(inp, sopt);
2737		break;
2738
2739	case IP_MULTICAST_TTL: {
2740		u_char ttl;
2741
2742		/*
2743		 * Set the IP time-to-live for outgoing multicast packets.
2744		 * The original multicast API required a char argument,
2745		 * which is inconsistent with the rest of the socket API.
2746		 * We allow either a char or an int.
2747		 */
2748		if (sopt->sopt_valsize == sizeof(u_char)) {
2749			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
2750			    sizeof(u_char));
2751			if (error)
2752				break;
2753		} else {
2754			u_int ittl;
2755
2756			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
2757			    sizeof(u_int));
2758			if (error)
2759				break;
2760			if (ittl > 255) {
2761				error = EINVAL;
2762				break;
2763			}
2764			ttl = (u_char)ittl;
2765		}
2766		imo = inp_findmoptions(inp);
2767		imo->imo_multicast_ttl = ttl;
2768		INP_WUNLOCK(inp);
2769		break;
2770	}
2771
2772	case IP_MULTICAST_LOOP: {
2773		u_char loop;
2774
2775		/*
2776		 * Set the loopback flag for outgoing multicast packets.
2777		 * Must be zero or one.  The original multicast API required a
2778		 * char argument, which is inconsistent with the rest
2779		 * of the socket API.  We allow either a char or an int.
2780		 */
2781		if (sopt->sopt_valsize == sizeof(u_char)) {
2782			error = sooptcopyin(sopt, &loop, sizeof(u_char),
2783			    sizeof(u_char));
2784			if (error)
2785				break;
2786		} else {
2787			u_int iloop;
2788
2789			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
2790					    sizeof(u_int));
2791			if (error)
2792				break;
2793			loop = (u_char)iloop;
2794		}
2795		imo = inp_findmoptions(inp);
2796		imo->imo_multicast_loop = !!loop;
2797		INP_WUNLOCK(inp);
2798		break;
2799	}
2800
2801	case IP_ADD_MEMBERSHIP:
2802	case IP_ADD_SOURCE_MEMBERSHIP:
2803	case MCAST_JOIN_GROUP:
2804	case MCAST_JOIN_SOURCE_GROUP:
2805		error = inp_join_group(inp, sopt);
2806		break;
2807
2808	case IP_DROP_MEMBERSHIP:
2809	case IP_DROP_SOURCE_MEMBERSHIP:
2810	case MCAST_LEAVE_GROUP:
2811	case MCAST_LEAVE_SOURCE_GROUP:
2812		error = inp_leave_group(inp, sopt);
2813		break;
2814
2815	case IP_BLOCK_SOURCE:
2816	case IP_UNBLOCK_SOURCE:
2817	case MCAST_BLOCK_SOURCE:
2818	case MCAST_UNBLOCK_SOURCE:
2819		error = inp_block_unblock_source(inp, sopt);
2820		break;
2821
2822	case IP_MSFILTER:
2823		error = inp_set_source_filters(inp, sopt);
2824		break;
2825
2826	default:
2827		error = EOPNOTSUPP;
2828		break;
2829	}
2830
2831	INP_UNLOCK_ASSERT(inp);
2832
2833	return (error);
2834}
2835
2836/*
2837 * Expose IGMP's multicast filter mode and source list(s) to userland,
2838 * keyed by (ifindex, group).
2839 * The filter mode is written out as a uint32_t, followed by
2840 * 0..n of struct in_addr.
2841 * For use by ifmcstat(8).
2842 * SMPng: NOTE: unlocked read of ifindex space.
2843 */
2844static int
2845sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
2846{
2847	struct in_addr			 src, group;
2848	struct ifnet			*ifp;
2849	struct ifmultiaddr		*ifma;
2850	struct in_multi			*inm;
2851	struct ip_msource		*ims;
2852	int				*name;
2853	int				 retval;
2854	u_int				 namelen;
2855	uint32_t			 fmode, ifindex;
2856
2857	name = (int *)arg1;
2858	namelen = arg2;
2859
2860	if (req->newptr != NULL)
2861		return (EPERM);
2862
2863	if (namelen != 2)
2864		return (EINVAL);
2865
2866	ifindex = name[0];
2867	if (ifindex <= 0 || ifindex > V_if_index) {
2868		CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
2869		    __func__, ifindex);
2870		return (ENOENT);
2871	}
2872
2873	group.s_addr = name[1];
2874	if (!IN_MULTICAST(ntohl(group.s_addr))) {
2875		CTR2(KTR_IGMPV3, "%s: group %s is not multicast",
2876		    __func__, inet_ntoa(group));
2877		return (EINVAL);
2878	}
2879
2880	ifp = ifnet_byindex(ifindex);
2881	if (ifp == NULL) {
2882		CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
2883		    __func__, ifindex);
2884		return (ENOENT);
2885	}
2886
2887	retval = sysctl_wire_old_buffer(req,
2888	    sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
2889	if (retval)
2890		return (retval);
2891
2892	IN_MULTI_LOCK();
2893
2894	IF_ADDR_RLOCK(ifp);
2895	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2896		if (ifma->ifma_addr->sa_family != AF_INET ||
2897		    ifma->ifma_protospec == NULL)
2898			continue;
2899		inm = (struct in_multi *)ifma->ifma_protospec;
2900		if (!in_hosteq(inm->inm_addr, group))
2901			continue;
2902		fmode = inm->inm_st[1].iss_fmode;
2903		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
2904		if (retval != 0)
2905			break;
2906		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
2907#ifdef KTR
2908			struct in_addr ina;
2909			ina.s_addr = htonl(ims->ims_haddr);
2910			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
2911			    inet_ntoa(ina));
2912#endif
2913			/*
2914			 * Only copy-out sources which are in-mode.
2915			 */
2916			if (fmode != ims_get_mode(inm, ims, 1)) {
2917				CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
2918				    __func__);
2919				continue;
2920			}
2921			src.s_addr = htonl(ims->ims_haddr);
2922			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
2923			if (retval != 0)
2924				break;
2925		}
2926	}
2927	IF_ADDR_RUNLOCK(ifp);
2928
2929	IN_MULTI_UNLOCK();
2930
2931	return (retval);
2932}
2933
2934#ifdef KTR
2935
2936static const char *inm_modestrs[] = { "un", "in", "ex" };
2937
2938static const char *
2939inm_mode_str(const int mode)
2940{
2941
2942	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
2943		return (inm_modestrs[mode]);
2944	return ("??");
2945}
2946
2947static const char *inm_statestrs[] = {
2948	"not-member",
2949	"silent",
2950	"idle",
2951	"lazy",
2952	"sleeping",
2953	"awakening",
2954	"query-pending",
2955	"sg-query-pending",
2956	"leaving"
2957};
2958
2959static const char *
2960inm_state_str(const int state)
2961{
2962
2963	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
2964		return (inm_statestrs[state]);
2965	return ("??");
2966}
2967
2968/*
2969 * Dump an in_multi structure to the console.
2970 */
2971void
2972inm_print(const struct in_multi *inm)
2973{
2974	int t;
2975
2976	if ((ktr_mask & KTR_IGMPV3) == 0)
2977		return;
2978
2979	printf("%s: --- begin inm %p ---\n", __func__, inm);
2980	printf("addr %s ifp %p(%s) ifma %p\n",
2981	    inet_ntoa(inm->inm_addr),
2982	    inm->inm_ifp,
2983	    inm->inm_ifp->if_xname,
2984	    inm->inm_ifma);
2985	printf("timer %u state %s refcount %u scq.len %u\n",
2986	    inm->inm_timer,
2987	    inm_state_str(inm->inm_state),
2988	    inm->inm_refcount,
2989	    inm->inm_scq.ifq_len);
2990	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
2991	    inm->inm_igi,
2992	    inm->inm_nsrc,
2993	    inm->inm_sctimer,
2994	    inm->inm_scrv);
2995	for (t = 0; t < 2; t++) {
2996		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
2997		    inm_mode_str(inm->inm_st[t].iss_fmode),
2998		    inm->inm_st[t].iss_asm,
2999		    inm->inm_st[t].iss_ex,
3000		    inm->inm_st[t].iss_in,
3001		    inm->inm_st[t].iss_rec);
3002	}
3003	printf("%s: --- end inm %p ---\n", __func__, inm);
3004}
3005
3006#else /* !KTR */
3007
3008void
3009inm_print(const struct in_multi *inm)
3010{
3011
3012}
3013
3014#endif /* KTR */
3015
3016RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
3017