in_mcast.c revision 257176
1/*-
2 * Copyright (c) 2007-2009 Bruce Simpson.
3 * Copyright (c) 2005 Robert N. M. Watson.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote
15 *    products derived from this software without specific prior written
16 *    permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * IPv4 multicast socket, group, and socket option processing module.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/netinet/in_mcast.c 257176 2013-10-26 17:58:36Z glebius $");
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/malloc.h>
42#include <sys/mbuf.h>
43#include <sys/protosw.h>
44#include <sys/socket.h>
45#include <sys/socketvar.h>
46#include <sys/protosw.h>
47#include <sys/sysctl.h>
48#include <sys/ktr.h>
49#include <sys/taskqueue.h>
50#include <sys/tree.h>
51
52#include <net/if.h>
53#include <net/if_var.h>
54#include <net/if_dl.h>
55#include <net/route.h>
56#include <net/vnet.h>
57
58#include <netinet/in.h>
59#include <netinet/in_systm.h>
60#include <netinet/in_pcb.h>
61#include <netinet/in_var.h>
62#include <netinet/ip_var.h>
63#include <netinet/igmp_var.h>
64
65#ifndef KTR_IGMPV3
66#define KTR_IGMPV3 KTR_INET
67#endif
68
69#ifndef __SOCKUNION_DECLARED
70union sockunion {
71	struct sockaddr_storage	ss;
72	struct sockaddr		sa;
73	struct sockaddr_dl	sdl;
74	struct sockaddr_in	sin;
75};
76typedef union sockunion sockunion_t;
77#define __SOCKUNION_DECLARED
78#endif /* __SOCKUNION_DECLARED */
79
80static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
81    "IPv4 multicast PCB-layer source filter");
82static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
83static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
84static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
85    "IPv4 multicast IGMP-layer source filter");
86
87/*
88 * Locking:
89 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
90 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
91 *   it can be taken by code in net/if.c also.
92 * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
93 *
94 * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
95 * any need for in_multi itself to be virtualized -- it is bound to an ifp
96 * anyway no matter what happens.
97 */
98struct mtx in_multi_mtx;
99MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
100
101/*
102 * Functions with non-static linkage defined in this file should be
103 * declared in in_var.h:
104 *  imo_multi_filter()
105 *  in_addmulti()
106 *  in_delmulti()
107 *  in_joingroup()
108 *  in_joingroup_locked()
109 *  in_leavegroup()
110 *  in_leavegroup_locked()
111 * and ip_var.h:
112 *  inp_freemoptions()
113 *  inp_getmoptions()
114 *  inp_setmoptions()
115 *
116 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
117 * and in_delmulti().
118 */
119static void	imf_commit(struct in_mfilter *);
120static int	imf_get_source(struct in_mfilter *imf,
121		    const struct sockaddr_in *psin,
122		    struct in_msource **);
123static struct in_msource *
124		imf_graft(struct in_mfilter *, const uint8_t,
125		    const struct sockaddr_in *);
126static void	imf_leave(struct in_mfilter *);
127static int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
128static void	imf_purge(struct in_mfilter *);
129static void	imf_rollback(struct in_mfilter *);
130static void	imf_reap(struct in_mfilter *);
131static int	imo_grow(struct ip_moptions *);
132static size_t	imo_match_group(const struct ip_moptions *,
133		    const struct ifnet *, const struct sockaddr *);
134static struct in_msource *
135		imo_match_source(const struct ip_moptions *, const size_t,
136		    const struct sockaddr *);
137static void	ims_merge(struct ip_msource *ims,
138		    const struct in_msource *lims, const int rollback);
139static int	in_getmulti(struct ifnet *, const struct in_addr *,
140		    struct in_multi **);
141static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
142		    const int noalloc, struct ip_msource **pims);
143static int	inm_is_ifp_detached(const struct in_multi *);
144static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
145static void	inm_purge(struct in_multi *);
146static void	inm_reap(struct in_multi *);
147static struct ip_moptions *
148		inp_findmoptions(struct inpcb *);
149static void	inp_freemoptions_internal(struct ip_moptions *);
150static void	inp_gcmoptions(void *, int);
151static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
152static int	inp_join_group(struct inpcb *, struct sockopt *);
153static int	inp_leave_group(struct inpcb *, struct sockopt *);
154static struct ifnet *
155		inp_lookup_mcast_ifp(const struct inpcb *,
156		    const struct sockaddr_in *, const struct in_addr);
157static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
158static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
159static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
160static int	sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
161
162static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0,
163    "IPv4 multicast");
164
165static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
166SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
167    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0,
168    "Max source filters per group");
169TUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc);
170
171static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
172SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
173    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0,
174    "Max source filters per socket");
175TUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc);
176
177int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
178SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
179    &in_mcast_loop, 0, "Loopback multicast datagrams by default");
180TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
181
182static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
183    CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
184    "Per-interface stack-wide source filters");
185
186static STAILQ_HEAD(, ip_moptions) imo_gc_list =
187    STAILQ_HEAD_INITIALIZER(imo_gc_list);
188static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
189
190/*
191 * Inline function which wraps assertions for a valid ifp.
192 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
193 * is detached.
194 */
195static int __inline
196inm_is_ifp_detached(const struct in_multi *inm)
197{
198	struct ifnet *ifp;
199
200	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
201	ifp = inm->inm_ifma->ifma_ifp;
202	if (ifp != NULL) {
203		/*
204		 * Sanity check that netinet's notion of ifp is the
205		 * same as net's.
206		 */
207		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
208	}
209
210	return (ifp == NULL);
211}
212
213/*
214 * Initialize an in_mfilter structure to a known state at t0, t1
215 * with an empty source filter list.
216 */
217static __inline void
218imf_init(struct in_mfilter *imf, const int st0, const int st1)
219{
220	memset(imf, 0, sizeof(struct in_mfilter));
221	RB_INIT(&imf->imf_sources);
222	imf->imf_st[0] = st0;
223	imf->imf_st[1] = st1;
224}
225
226/*
227 * Resize the ip_moptions vector to the next power-of-two minus 1.
228 * May be called with locks held; do not sleep.
229 */
230static int
231imo_grow(struct ip_moptions *imo)
232{
233	struct in_multi		**nmships;
234	struct in_multi		**omships;
235	struct in_mfilter	 *nmfilters;
236	struct in_mfilter	 *omfilters;
237	size_t			  idx;
238	size_t			  newmax;
239	size_t			  oldmax;
240
241	nmships = NULL;
242	nmfilters = NULL;
243	omships = imo->imo_membership;
244	omfilters = imo->imo_mfilters;
245	oldmax = imo->imo_max_memberships;
246	newmax = ((oldmax + 1) * 2) - 1;
247
248	if (newmax <= IP_MAX_MEMBERSHIPS) {
249		nmships = (struct in_multi **)realloc(omships,
250		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
251		nmfilters = (struct in_mfilter *)realloc(omfilters,
252		    sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
253		if (nmships != NULL && nmfilters != NULL) {
254			/* Initialize newly allocated source filter heads. */
255			for (idx = oldmax; idx < newmax; idx++) {
256				imf_init(&nmfilters[idx], MCAST_UNDEFINED,
257				    MCAST_EXCLUDE);
258			}
259			imo->imo_max_memberships = newmax;
260			imo->imo_membership = nmships;
261			imo->imo_mfilters = nmfilters;
262		}
263	}
264
265	if (nmships == NULL || nmfilters == NULL) {
266		if (nmships != NULL)
267			free(nmships, M_IPMOPTS);
268		if (nmfilters != NULL)
269			free(nmfilters, M_INMFILTER);
270		return (ETOOMANYREFS);
271	}
272
273	return (0);
274}
275
276/*
277 * Find an IPv4 multicast group entry for this ip_moptions instance
278 * which matches the specified group, and optionally an interface.
279 * Return its index into the array, or -1 if not found.
280 */
281static size_t
282imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
283    const struct sockaddr *group)
284{
285	const struct sockaddr_in *gsin;
286	struct in_multi	**pinm;
287	int		  idx;
288	int		  nmships;
289
290	gsin = (const struct sockaddr_in *)group;
291
292	/* The imo_membership array may be lazy allocated. */
293	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
294		return (-1);
295
296	nmships = imo->imo_num_memberships;
297	pinm = &imo->imo_membership[0];
298	for (idx = 0; idx < nmships; idx++, pinm++) {
299		if (*pinm == NULL)
300			continue;
301		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
302		    in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
303			break;
304		}
305	}
306	if (idx >= nmships)
307		idx = -1;
308
309	return (idx);
310}
311
312/*
313 * Find an IPv4 multicast source entry for this imo which matches
314 * the given group index for this socket, and source address.
315 *
316 * NOTE: This does not check if the entry is in-mode, merely if
317 * it exists, which may not be the desired behaviour.
318 */
319static struct in_msource *
320imo_match_source(const struct ip_moptions *imo, const size_t gidx,
321    const struct sockaddr *src)
322{
323	struct ip_msource	 find;
324	struct in_mfilter	*imf;
325	struct ip_msource	*ims;
326	const sockunion_t	*psa;
327
328	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
329	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
330	    ("%s: invalid index %d\n", __func__, (int)gidx));
331
332	/* The imo_mfilters array may be lazy allocated. */
333	if (imo->imo_mfilters == NULL)
334		return (NULL);
335	imf = &imo->imo_mfilters[gidx];
336
337	/* Source trees are keyed in host byte order. */
338	psa = (const sockunion_t *)src;
339	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
340	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
341
342	return ((struct in_msource *)ims);
343}
344
345/*
346 * Perform filtering for multicast datagrams on a socket by group and source.
347 *
348 * Returns 0 if a datagram should be allowed through, or various error codes
349 * if the socket was not a member of the group, or the source was muted, etc.
350 */
351int
352imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
353    const struct sockaddr *group, const struct sockaddr *src)
354{
355	size_t gidx;
356	struct in_msource *ims;
357	int mode;
358
359	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
360
361	gidx = imo_match_group(imo, ifp, group);
362	if (gidx == -1)
363		return (MCAST_NOTGMEMBER);
364
365	/*
366	 * Check if the source was included in an (S,G) join.
367	 * Allow reception on exclusive memberships by default,
368	 * reject reception on inclusive memberships by default.
369	 * Exclude source only if an in-mode exclude filter exists.
370	 * Include source only if an in-mode include filter exists.
371	 * NOTE: We are comparing group state here at IGMP t1 (now)
372	 * with socket-layer t0 (since last downcall).
373	 */
374	mode = imo->imo_mfilters[gidx].imf_st[1];
375	ims = imo_match_source(imo, gidx, src);
376
377	if ((ims == NULL && mode == MCAST_INCLUDE) ||
378	    (ims != NULL && ims->imsl_st[0] != mode))
379		return (MCAST_NOTSMEMBER);
380
381	return (MCAST_PASS);
382}
383
384/*
385 * Find and return a reference to an in_multi record for (ifp, group),
386 * and bump its reference count.
387 * If one does not exist, try to allocate it, and update link-layer multicast
388 * filters on ifp to listen for group.
389 * Assumes the IN_MULTI lock is held across the call.
390 * Return 0 if successful, otherwise return an appropriate error code.
391 */
392static int
393in_getmulti(struct ifnet *ifp, const struct in_addr *group,
394    struct in_multi **pinm)
395{
396	struct sockaddr_in	 gsin;
397	struct ifmultiaddr	*ifma;
398	struct in_ifinfo	*ii;
399	struct in_multi		*inm;
400	int error;
401
402	IN_MULTI_LOCK_ASSERT();
403
404	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
405
406	inm = inm_lookup(ifp, *group);
407	if (inm != NULL) {
408		/*
409		 * If we already joined this group, just bump the
410		 * refcount and return it.
411		 */
412		KASSERT(inm->inm_refcount >= 1,
413		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
414		++inm->inm_refcount;
415		*pinm = inm;
416		return (0);
417	}
418
419	memset(&gsin, 0, sizeof(gsin));
420	gsin.sin_family = AF_INET;
421	gsin.sin_len = sizeof(struct sockaddr_in);
422	gsin.sin_addr = *group;
423
424	/*
425	 * Check if a link-layer group is already associated
426	 * with this network-layer group on the given ifnet.
427	 */
428	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
429	if (error != 0)
430		return (error);
431
432	/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
433	IF_ADDR_WLOCK(ifp);
434
435	/*
436	 * If something other than netinet is occupying the link-layer
437	 * group, print a meaningful error message and back out of
438	 * the allocation.
439	 * Otherwise, bump the refcount on the existing network-layer
440	 * group association and return it.
441	 */
442	if (ifma->ifma_protospec != NULL) {
443		inm = (struct in_multi *)ifma->ifma_protospec;
444#ifdef INVARIANTS
445		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
446		    __func__));
447		KASSERT(ifma->ifma_addr->sa_family == AF_INET,
448		    ("%s: ifma not AF_INET", __func__));
449		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
450		if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
451		    !in_hosteq(inm->inm_addr, *group))
452			panic("%s: ifma %p is inconsistent with %p (%s)",
453			    __func__, ifma, inm, inet_ntoa(*group));
454#endif
455		++inm->inm_refcount;
456		*pinm = inm;
457		IF_ADDR_WUNLOCK(ifp);
458		return (0);
459	}
460
461	IF_ADDR_WLOCK_ASSERT(ifp);
462
463	/*
464	 * A new in_multi record is needed; allocate and initialize it.
465	 * We DO NOT perform an IGMP join as the in_ layer may need to
466	 * push an initial source list down to IGMP to support SSM.
467	 *
468	 * The initial source filter state is INCLUDE, {} as per the RFC.
469	 */
470	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
471	if (inm == NULL) {
472		if_delmulti_ifma(ifma);
473		IF_ADDR_WUNLOCK(ifp);
474		return (ENOMEM);
475	}
476	inm->inm_addr = *group;
477	inm->inm_ifp = ifp;
478	inm->inm_igi = ii->ii_igmp;
479	inm->inm_ifma = ifma;
480	inm->inm_refcount = 1;
481	inm->inm_state = IGMP_NOT_MEMBER;
482
483	/*
484	 * Pending state-changes per group are subject to a bounds check.
485	 */
486	IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
487
488	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
489	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
490	RB_INIT(&inm->inm_srcs);
491
492	ifma->ifma_protospec = inm;
493
494	*pinm = inm;
495
496	IF_ADDR_WUNLOCK(ifp);
497	return (0);
498}
499
500/*
501 * Drop a reference to an in_multi record.
502 *
503 * If the refcount drops to 0, free the in_multi record and
504 * delete the underlying link-layer membership.
505 */
506void
507inm_release_locked(struct in_multi *inm)
508{
509	struct ifmultiaddr *ifma;
510
511	IN_MULTI_LOCK_ASSERT();
512
513	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
514
515	if (--inm->inm_refcount > 0) {
516		CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
517		    inm->inm_refcount);
518		return;
519	}
520
521	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
522
523	ifma = inm->inm_ifma;
524
525	/* XXX this access is not covered by IF_ADDR_LOCK */
526	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
527	KASSERT(ifma->ifma_protospec == inm,
528	    ("%s: ifma_protospec != inm", __func__));
529	ifma->ifma_protospec = NULL;
530
531	inm_purge(inm);
532
533	free(inm, M_IPMADDR);
534
535	if_delmulti_ifma(ifma);
536}
537
538/*
539 * Clear recorded source entries for a group.
540 * Used by the IGMP code. Caller must hold the IN_MULTI lock.
541 * FIXME: Should reap.
542 */
543void
544inm_clear_recorded(struct in_multi *inm)
545{
546	struct ip_msource	*ims;
547
548	IN_MULTI_LOCK_ASSERT();
549
550	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
551		if (ims->ims_stp) {
552			ims->ims_stp = 0;
553			--inm->inm_st[1].iss_rec;
554		}
555	}
556	KASSERT(inm->inm_st[1].iss_rec == 0,
557	    ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
558}
559
560/*
561 * Record a source as pending for a Source-Group IGMPv3 query.
562 * This lives here as it modifies the shared tree.
563 *
564 * inm is the group descriptor.
565 * naddr is the address of the source to record in network-byte order.
566 *
567 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
568 * lazy-allocate a source node in response to an SG query.
569 * Otherwise, no allocation is performed. This saves some memory
570 * with the trade-off that the source will not be reported to the
571 * router if joined in the window between the query response and
572 * the group actually being joined on the local host.
573 *
574 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
575 * This turns off the allocation of a recorded source entry if
576 * the group has not been joined.
577 *
578 * Return 0 if the source didn't exist or was already marked as recorded.
579 * Return 1 if the source was marked as recorded by this function.
580 * Return <0 if any error occured (negated errno code).
581 */
582int
583inm_record_source(struct in_multi *inm, const in_addr_t naddr)
584{
585	struct ip_msource	 find;
586	struct ip_msource	*ims, *nims;
587
588	IN_MULTI_LOCK_ASSERT();
589
590	find.ims_haddr = ntohl(naddr);
591	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
592	if (ims && ims->ims_stp)
593		return (0);
594	if (ims == NULL) {
595		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
596			return (-ENOSPC);
597		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
598		    M_NOWAIT | M_ZERO);
599		if (nims == NULL)
600			return (-ENOMEM);
601		nims->ims_haddr = find.ims_haddr;
602		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
603		++inm->inm_nsrc;
604		ims = nims;
605	}
606
607	/*
608	 * Mark the source as recorded and update the recorded
609	 * source count.
610	 */
611	++ims->ims_stp;
612	++inm->inm_st[1].iss_rec;
613
614	return (1);
615}
616
617/*
618 * Return a pointer to an in_msource owned by an in_mfilter,
619 * given its source address.
620 * Lazy-allocate if needed. If this is a new entry its filter state is
621 * undefined at t0.
622 *
623 * imf is the filter set being modified.
624 * haddr is the source address in *host* byte-order.
625 *
626 * SMPng: May be called with locks held; malloc must not block.
627 */
628static int
629imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
630    struct in_msource **plims)
631{
632	struct ip_msource	 find;
633	struct ip_msource	*ims, *nims;
634	struct in_msource	*lims;
635	int			 error;
636
637	error = 0;
638	ims = NULL;
639	lims = NULL;
640
641	/* key is host byte order */
642	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
643	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
644	lims = (struct in_msource *)ims;
645	if (lims == NULL) {
646		if (imf->imf_nsrc == in_mcast_maxsocksrc)
647			return (ENOSPC);
648		nims = malloc(sizeof(struct in_msource), M_INMFILTER,
649		    M_NOWAIT | M_ZERO);
650		if (nims == NULL)
651			return (ENOMEM);
652		lims = (struct in_msource *)nims;
653		lims->ims_haddr = find.ims_haddr;
654		lims->imsl_st[0] = MCAST_UNDEFINED;
655		RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
656		++imf->imf_nsrc;
657	}
658
659	*plims = lims;
660
661	return (error);
662}
663
664/*
665 * Graft a source entry into an existing socket-layer filter set,
666 * maintaining any required invariants and checking allocations.
667 *
668 * The source is marked as being in the new filter mode at t1.
669 *
670 * Return the pointer to the new node, otherwise return NULL.
671 */
672static struct in_msource *
673imf_graft(struct in_mfilter *imf, const uint8_t st1,
674    const struct sockaddr_in *psin)
675{
676	struct ip_msource	*nims;
677	struct in_msource	*lims;
678
679	nims = malloc(sizeof(struct in_msource), M_INMFILTER,
680	    M_NOWAIT | M_ZERO);
681	if (nims == NULL)
682		return (NULL);
683	lims = (struct in_msource *)nims;
684	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
685	lims->imsl_st[0] = MCAST_UNDEFINED;
686	lims->imsl_st[1] = st1;
687	RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
688	++imf->imf_nsrc;
689
690	return (lims);
691}
692
693/*
694 * Prune a source entry from an existing socket-layer filter set,
695 * maintaining any required invariants and checking allocations.
696 *
697 * The source is marked as being left at t1, it is not freed.
698 *
699 * Return 0 if no error occurred, otherwise return an errno value.
700 */
701static int
702imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
703{
704	struct ip_msource	 find;
705	struct ip_msource	*ims;
706	struct in_msource	*lims;
707
708	/* key is host byte order */
709	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
710	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
711	if (ims == NULL)
712		return (ENOENT);
713	lims = (struct in_msource *)ims;
714	lims->imsl_st[1] = MCAST_UNDEFINED;
715	return (0);
716}
717
718/*
719 * Revert socket-layer filter set deltas at t1 to t0 state.
720 */
721static void
722imf_rollback(struct in_mfilter *imf)
723{
724	struct ip_msource	*ims, *tims;
725	struct in_msource	*lims;
726
727	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
728		lims = (struct in_msource *)ims;
729		if (lims->imsl_st[0] == lims->imsl_st[1]) {
730			/* no change at t1 */
731			continue;
732		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
733			/* revert change to existing source at t1 */
734			lims->imsl_st[1] = lims->imsl_st[0];
735		} else {
736			/* revert source added t1 */
737			CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
738			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
739			free(ims, M_INMFILTER);
740			imf->imf_nsrc--;
741		}
742	}
743	imf->imf_st[1] = imf->imf_st[0];
744}
745
746/*
747 * Mark socket-layer filter set as INCLUDE {} at t1.
748 */
749static void
750imf_leave(struct in_mfilter *imf)
751{
752	struct ip_msource	*ims;
753	struct in_msource	*lims;
754
755	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
756		lims = (struct in_msource *)ims;
757		lims->imsl_st[1] = MCAST_UNDEFINED;
758	}
759	imf->imf_st[1] = MCAST_INCLUDE;
760}
761
762/*
763 * Mark socket-layer filter set deltas as committed.
764 */
765static void
766imf_commit(struct in_mfilter *imf)
767{
768	struct ip_msource	*ims;
769	struct in_msource	*lims;
770
771	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
772		lims = (struct in_msource *)ims;
773		lims->imsl_st[0] = lims->imsl_st[1];
774	}
775	imf->imf_st[0] = imf->imf_st[1];
776}
777
778/*
779 * Reap unreferenced sources from socket-layer filter set.
780 */
781static void
782imf_reap(struct in_mfilter *imf)
783{
784	struct ip_msource	*ims, *tims;
785	struct in_msource	*lims;
786
787	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
788		lims = (struct in_msource *)ims;
789		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
790		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
791			CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
792			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
793			free(ims, M_INMFILTER);
794			imf->imf_nsrc--;
795		}
796	}
797}
798
799/*
800 * Purge socket-layer filter set.
801 */
802static void
803imf_purge(struct in_mfilter *imf)
804{
805	struct ip_msource	*ims, *tims;
806
807	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
808		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
809		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
810		free(ims, M_INMFILTER);
811		imf->imf_nsrc--;
812	}
813	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
814	KASSERT(RB_EMPTY(&imf->imf_sources),
815	    ("%s: imf_sources not empty", __func__));
816}
817
818/*
819 * Look up a source filter entry for a multicast group.
820 *
821 * inm is the group descriptor to work with.
822 * haddr is the host-byte-order IPv4 address to look up.
823 * noalloc may be non-zero to suppress allocation of sources.
824 * *pims will be set to the address of the retrieved or allocated source.
825 *
826 * SMPng: NOTE: may be called with locks held.
827 * Return 0 if successful, otherwise return a non-zero error code.
828 */
829static int
830inm_get_source(struct in_multi *inm, const in_addr_t haddr,
831    const int noalloc, struct ip_msource **pims)
832{
833	struct ip_msource	 find;
834	struct ip_msource	*ims, *nims;
835#ifdef KTR
836	struct in_addr ia;
837#endif
838
839	find.ims_haddr = haddr;
840	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
841	if (ims == NULL && !noalloc) {
842		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
843			return (ENOSPC);
844		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
845		    M_NOWAIT | M_ZERO);
846		if (nims == NULL)
847			return (ENOMEM);
848		nims->ims_haddr = haddr;
849		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
850		++inm->inm_nsrc;
851		ims = nims;
852#ifdef KTR
853		ia.s_addr = htonl(haddr);
854		CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__,
855		    inet_ntoa(ia), ims);
856#endif
857	}
858
859	*pims = ims;
860	return (0);
861}
862
863/*
864 * Merge socket-layer source into IGMP-layer source.
865 * If rollback is non-zero, perform the inverse of the merge.
866 */
867static void
868ims_merge(struct ip_msource *ims, const struct in_msource *lims,
869    const int rollback)
870{
871	int n = rollback ? -1 : 1;
872#ifdef KTR
873	struct in_addr ia;
874
875	ia.s_addr = htonl(ims->ims_haddr);
876#endif
877
878	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
879		CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s",
880		    __func__, n, inet_ntoa(ia));
881		ims->ims_st[1].ex -= n;
882	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
883		CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s",
884		    __func__, n, inet_ntoa(ia));
885		ims->ims_st[1].in -= n;
886	}
887
888	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
889		CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s",
890		    __func__, n, inet_ntoa(ia));
891		ims->ims_st[1].ex += n;
892	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
893		CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s",
894		    __func__, n, inet_ntoa(ia));
895		ims->ims_st[1].in += n;
896	}
897}
898
899/*
900 * Atomically update the global in_multi state, when a membership's
901 * filter list is being updated in any way.
902 *
903 * imf is the per-inpcb-membership group filter pointer.
904 * A fake imf may be passed for in-kernel consumers.
905 *
906 * XXX This is a candidate for a set-symmetric-difference style loop
907 * which would eliminate the repeated lookup from root of ims nodes,
908 * as they share the same key space.
909 *
910 * If any error occurred this function will back out of refcounts
911 * and return a non-zero value.
912 */
913static int
914inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
915{
916	struct ip_msource	*ims, *nims;
917	struct in_msource	*lims;
918	int			 schanged, error;
919	int			 nsrc0, nsrc1;
920
921	schanged = 0;
922	error = 0;
923	nsrc1 = nsrc0 = 0;
924
925	/*
926	 * Update the source filters first, as this may fail.
927	 * Maintain count of in-mode filters at t0, t1. These are
928	 * used to work out if we transition into ASM mode or not.
929	 * Maintain a count of source filters whose state was
930	 * actually modified by this operation.
931	 */
932	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
933		lims = (struct in_msource *)ims;
934		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
935		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
936		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
937		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
938		++schanged;
939		if (error)
940			break;
941		ims_merge(nims, lims, 0);
942	}
943	if (error) {
944		struct ip_msource *bims;
945
946		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
947			lims = (struct in_msource *)ims;
948			if (lims->imsl_st[0] == lims->imsl_st[1])
949				continue;
950			(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
951			if (bims == NULL)
952				continue;
953			ims_merge(bims, lims, 1);
954		}
955		goto out_reap;
956	}
957
958	CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
959	    __func__, nsrc0, nsrc1);
960
961	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
962	if (imf->imf_st[0] == imf->imf_st[1] &&
963	    imf->imf_st[1] == MCAST_INCLUDE) {
964		if (nsrc1 == 0) {
965			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
966			--inm->inm_st[1].iss_in;
967		}
968	}
969
970	/* Handle filter mode transition on socket. */
971	if (imf->imf_st[0] != imf->imf_st[1]) {
972		CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
973		    __func__, imf->imf_st[0], imf->imf_st[1]);
974
975		if (imf->imf_st[0] == MCAST_EXCLUDE) {
976			CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
977			--inm->inm_st[1].iss_ex;
978		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
979			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
980			--inm->inm_st[1].iss_in;
981		}
982
983		if (imf->imf_st[1] == MCAST_EXCLUDE) {
984			CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
985			inm->inm_st[1].iss_ex++;
986		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
987			CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
988			inm->inm_st[1].iss_in++;
989		}
990	}
991
992	/*
993	 * Track inm filter state in terms of listener counts.
994	 * If there are any exclusive listeners, stack-wide
995	 * membership is exclusive.
996	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
997	 * If no listeners remain, state is undefined at t1,
998	 * and the IGMP lifecycle for this group should finish.
999	 */
1000	if (inm->inm_st[1].iss_ex > 0) {
1001		CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
1002		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
1003	} else if (inm->inm_st[1].iss_in > 0) {
1004		CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
1005		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
1006	} else {
1007		CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
1008		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
1009	}
1010
1011	/* Decrement ASM listener count on transition out of ASM mode. */
1012	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1013		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
1014		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
1015			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
1016			--inm->inm_st[1].iss_asm;
1017	}
1018
1019	/* Increment ASM listener count on transition to ASM mode. */
1020	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1021		CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
1022		inm->inm_st[1].iss_asm++;
1023	}
1024
1025	CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
1026	inm_print(inm);
1027
1028out_reap:
1029	if (schanged > 0) {
1030		CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
1031		inm_reap(inm);
1032	}
1033	return (error);
1034}
1035
1036/*
1037 * Mark an in_multi's filter set deltas as committed.
1038 * Called by IGMP after a state change has been enqueued.
1039 */
1040void
1041inm_commit(struct in_multi *inm)
1042{
1043	struct ip_msource	*ims;
1044
1045	CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
1046	CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
1047	inm_print(inm);
1048
1049	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
1050		ims->ims_st[0] = ims->ims_st[1];
1051	}
1052	inm->inm_st[0] = inm->inm_st[1];
1053}
1054
1055/*
1056 * Reap unreferenced nodes from an in_multi's filter set.
1057 */
1058static void
1059inm_reap(struct in_multi *inm)
1060{
1061	struct ip_msource	*ims, *tims;
1062
1063	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1064		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
1065		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
1066		    ims->ims_stp != 0)
1067			continue;
1068		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1069		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1070		free(ims, M_IPMSOURCE);
1071		inm->inm_nsrc--;
1072	}
1073}
1074
1075/*
1076 * Purge all source nodes from an in_multi's filter set.
1077 */
1078static void
1079inm_purge(struct in_multi *inm)
1080{
1081	struct ip_msource	*ims, *tims;
1082
1083	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1084		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1085		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1086		free(ims, M_IPMSOURCE);
1087		inm->inm_nsrc--;
1088	}
1089}
1090
1091/*
1092 * Join a multicast group; unlocked entry point.
1093 *
1094 * SMPng: XXX: in_joingroup() is called from in_control() when Giant
1095 * is not held. Fortunately, ifp is unlikely to have been detached
1096 * at this point, so we assume it's OK to recurse.
1097 */
1098int
1099in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
1100    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1101{
1102	int error;
1103
1104	IN_MULTI_LOCK();
1105	error = in_joingroup_locked(ifp, gina, imf, pinm);
1106	IN_MULTI_UNLOCK();
1107
1108	return (error);
1109}
1110
1111/*
1112 * Join a multicast group; real entry point.
1113 *
1114 * Only preserves atomicity at inm level.
1115 * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1116 *
1117 * If the IGMP downcall fails, the group is not joined, and an error
1118 * code is returned.
1119 */
1120int
1121in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
1122    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1123{
1124	struct in_mfilter	 timf;
1125	struct in_multi		*inm;
1126	int			 error;
1127
1128	IN_MULTI_LOCK_ASSERT();
1129
1130	CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__,
1131	    inet_ntoa(*gina), ifp, ifp->if_xname);
1132
1133	error = 0;
1134	inm = NULL;
1135
1136	/*
1137	 * If no imf was specified (i.e. kernel consumer),
1138	 * fake one up and assume it is an ASM join.
1139	 */
1140	if (imf == NULL) {
1141		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1142		imf = &timf;
1143	}
1144
1145	error = in_getmulti(ifp, gina, &inm);
1146	if (error) {
1147		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
1148		return (error);
1149	}
1150
1151	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1152	error = inm_merge(inm, imf);
1153	if (error) {
1154		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1155		goto out_inm_release;
1156	}
1157
1158	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1159	error = igmp_change_state(inm);
1160	if (error) {
1161		CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
1162		goto out_inm_release;
1163	}
1164
1165out_inm_release:
1166	if (error) {
1167		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1168		inm_release_locked(inm);
1169	} else {
1170		*pinm = inm;
1171	}
1172
1173	return (error);
1174}
1175
1176/*
1177 * Leave a multicast group; unlocked entry point.
1178 */
1179int
1180in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1181{
1182	int error;
1183
1184	IN_MULTI_LOCK();
1185	error = in_leavegroup_locked(inm, imf);
1186	IN_MULTI_UNLOCK();
1187
1188	return (error);
1189}
1190
1191/*
1192 * Leave a multicast group; real entry point.
1193 * All source filters will be expunged.
1194 *
1195 * Only preserves atomicity at inm level.
1196 *
1197 * Holding the write lock for the INP which contains imf
1198 * is highly advisable. We can't assert for it as imf does not
1199 * contain a back-pointer to the owning inp.
1200 *
1201 * Note: This is not the same as inm_release(*) as this function also
1202 * makes a state change downcall into IGMP.
1203 */
1204int
1205in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1206{
1207	struct in_mfilter	 timf;
1208	int			 error;
1209
1210	error = 0;
1211
1212	IN_MULTI_LOCK_ASSERT();
1213
1214	CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__,
1215	    inm, inet_ntoa(inm->inm_addr),
1216	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
1217	    imf);
1218
1219	/*
1220	 * If no imf was specified (i.e. kernel consumer),
1221	 * fake one up and assume it is an ASM join.
1222	 */
1223	if (imf == NULL) {
1224		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1225		imf = &timf;
1226	}
1227
1228	/*
1229	 * Begin state merge transaction at IGMP layer.
1230	 *
1231	 * As this particular invocation should not cause any memory
1232	 * to be allocated, and there is no opportunity to roll back
1233	 * the transaction, it MUST NOT fail.
1234	 */
1235	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1236	error = inm_merge(inm, imf);
1237	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
1238
1239	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1240	CURVNET_SET(inm->inm_ifp->if_vnet);
1241	error = igmp_change_state(inm);
1242	CURVNET_RESTORE();
1243	if (error)
1244		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1245
1246	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1247	inm_release_locked(inm);
1248
1249	return (error);
1250}
1251
1252/*#ifndef BURN_BRIDGES*/
1253/*
1254 * Join an IPv4 multicast group in (*,G) exclusive mode.
1255 * The group must be a 224.0.0.0/24 link-scope group.
1256 * This KPI is for legacy kernel consumers only.
1257 */
1258struct in_multi *
1259in_addmulti(struct in_addr *ap, struct ifnet *ifp)
1260{
1261	struct in_multi *pinm;
1262	int error;
1263
1264	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
1265	    ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap)));
1266
1267	error = in_joingroup(ifp, ap, NULL, &pinm);
1268	if (error != 0)
1269		pinm = NULL;
1270
1271	return (pinm);
1272}
1273
1274/*
1275 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
1276 * This KPI is for legacy kernel consumers only.
1277 */
1278void
1279in_delmulti(struct in_multi *inm)
1280{
1281
1282	(void)in_leavegroup(inm, NULL);
1283}
1284/*#endif*/
1285
1286/*
1287 * Block or unblock an ASM multicast source on an inpcb.
1288 * This implements the delta-based API described in RFC 3678.
1289 *
1290 * The delta-based API applies only to exclusive-mode memberships.
1291 * An IGMP downcall will be performed.
1292 *
1293 * SMPng: NOTE: Must take Giant as a join may create a new ifma.
1294 *
1295 * Return 0 if successful, otherwise return an appropriate error code.
1296 */
1297static int
1298inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1299{
1300	struct group_source_req		 gsr;
1301	sockunion_t			*gsa, *ssa;
1302	struct ifnet			*ifp;
1303	struct in_mfilter		*imf;
1304	struct ip_moptions		*imo;
1305	struct in_msource		*ims;
1306	struct in_multi			*inm;
1307	size_t				 idx;
1308	uint16_t			 fmode;
1309	int				 error, doblock;
1310
1311	ifp = NULL;
1312	error = 0;
1313	doblock = 0;
1314
1315	memset(&gsr, 0, sizeof(struct group_source_req));
1316	gsa = (sockunion_t *)&gsr.gsr_group;
1317	ssa = (sockunion_t *)&gsr.gsr_source;
1318
1319	switch (sopt->sopt_name) {
1320	case IP_BLOCK_SOURCE:
1321	case IP_UNBLOCK_SOURCE: {
1322		struct ip_mreq_source	 mreqs;
1323
1324		error = sooptcopyin(sopt, &mreqs,
1325		    sizeof(struct ip_mreq_source),
1326		    sizeof(struct ip_mreq_source));
1327		if (error)
1328			return (error);
1329
1330		gsa->sin.sin_family = AF_INET;
1331		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1332		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1333
1334		ssa->sin.sin_family = AF_INET;
1335		ssa->sin.sin_len = sizeof(struct sockaddr_in);
1336		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1337
1338		if (!in_nullhost(mreqs.imr_interface))
1339			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1340
1341		if (sopt->sopt_name == IP_BLOCK_SOURCE)
1342			doblock = 1;
1343
1344		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
1345		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1346		break;
1347	    }
1348
1349	case MCAST_BLOCK_SOURCE:
1350	case MCAST_UNBLOCK_SOURCE:
1351		error = sooptcopyin(sopt, &gsr,
1352		    sizeof(struct group_source_req),
1353		    sizeof(struct group_source_req));
1354		if (error)
1355			return (error);
1356
1357		if (gsa->sin.sin_family != AF_INET ||
1358		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1359			return (EINVAL);
1360
1361		if (ssa->sin.sin_family != AF_INET ||
1362		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1363			return (EINVAL);
1364
1365		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1366			return (EADDRNOTAVAIL);
1367
1368		ifp = ifnet_byindex(gsr.gsr_interface);
1369
1370		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1371			doblock = 1;
1372		break;
1373
1374	default:
1375		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1376		    __func__, sopt->sopt_name);
1377		return (EOPNOTSUPP);
1378		break;
1379	}
1380
1381	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1382		return (EINVAL);
1383
1384	/*
1385	 * Check if we are actually a member of this group.
1386	 */
1387	imo = inp_findmoptions(inp);
1388	idx = imo_match_group(imo, ifp, &gsa->sa);
1389	if (idx == -1 || imo->imo_mfilters == NULL) {
1390		error = EADDRNOTAVAIL;
1391		goto out_inp_locked;
1392	}
1393
1394	KASSERT(imo->imo_mfilters != NULL,
1395	    ("%s: imo_mfilters not allocated", __func__));
1396	imf = &imo->imo_mfilters[idx];
1397	inm = imo->imo_membership[idx];
1398
1399	/*
1400	 * Attempting to use the delta-based API on an
1401	 * non exclusive-mode membership is an error.
1402	 */
1403	fmode = imf->imf_st[0];
1404	if (fmode != MCAST_EXCLUDE) {
1405		error = EINVAL;
1406		goto out_inp_locked;
1407	}
1408
1409	/*
1410	 * Deal with error cases up-front:
1411	 *  Asked to block, but already blocked; or
1412	 *  Asked to unblock, but nothing to unblock.
1413	 * If adding a new block entry, allocate it.
1414	 */
1415	ims = imo_match_source(imo, idx, &ssa->sa);
1416	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1417		CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
1418		    inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not ");
1419		error = EADDRNOTAVAIL;
1420		goto out_inp_locked;
1421	}
1422
1423	INP_WLOCK_ASSERT(inp);
1424
1425	/*
1426	 * Begin state merge transaction at socket layer.
1427	 */
1428	if (doblock) {
1429		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
1430		ims = imf_graft(imf, fmode, &ssa->sin);
1431		if (ims == NULL)
1432			error = ENOMEM;
1433	} else {
1434		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
1435		error = imf_prune(imf, &ssa->sin);
1436	}
1437
1438	if (error) {
1439		CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
1440		goto out_imf_rollback;
1441	}
1442
1443	/*
1444	 * Begin state merge transaction at IGMP layer.
1445	 */
1446	IN_MULTI_LOCK();
1447
1448	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1449	error = inm_merge(inm, imf);
1450	if (error) {
1451		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1452		goto out_imf_rollback;
1453	}
1454
1455	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1456	error = igmp_change_state(inm);
1457	if (error)
1458		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1459
1460	IN_MULTI_UNLOCK();
1461
1462out_imf_rollback:
1463	if (error)
1464		imf_rollback(imf);
1465	else
1466		imf_commit(imf);
1467
1468	imf_reap(imf);
1469
1470out_inp_locked:
1471	INP_WUNLOCK(inp);
1472	return (error);
1473}
1474
1475/*
1476 * Given an inpcb, return its multicast options structure pointer.  Accepts
1477 * an unlocked inpcb pointer, but will return it locked.  May sleep.
1478 *
1479 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
1480 * SMPng: NOTE: Returns with the INP write lock held.
1481 */
1482static struct ip_moptions *
1483inp_findmoptions(struct inpcb *inp)
1484{
1485	struct ip_moptions	 *imo;
1486	struct in_multi		**immp;
1487	struct in_mfilter	 *imfp;
1488	size_t			  idx;
1489
1490	INP_WLOCK(inp);
1491	if (inp->inp_moptions != NULL)
1492		return (inp->inp_moptions);
1493
1494	INP_WUNLOCK(inp);
1495
1496	imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1497	immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
1498	    M_WAITOK | M_ZERO);
1499	imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
1500	    M_INMFILTER, M_WAITOK);
1501
1502	imo->imo_multicast_ifp = NULL;
1503	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1504	imo->imo_multicast_vif = -1;
1505	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1506	imo->imo_multicast_loop = in_mcast_loop;
1507	imo->imo_num_memberships = 0;
1508	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
1509	imo->imo_membership = immp;
1510
1511	/* Initialize per-group source filters. */
1512	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
1513		imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
1514	imo->imo_mfilters = imfp;
1515
1516	INP_WLOCK(inp);
1517	if (inp->inp_moptions != NULL) {
1518		free(imfp, M_INMFILTER);
1519		free(immp, M_IPMOPTS);
1520		free(imo, M_IPMOPTS);
1521		return (inp->inp_moptions);
1522	}
1523	inp->inp_moptions = imo;
1524	return (imo);
1525}
1526
1527/*
1528 * Discard the IP multicast options (and source filters).  To minimize
1529 * the amount of work done while holding locks such as the INP's
1530 * pcbinfo lock (which is used in the receive path), the free
1531 * operation is performed asynchronously in a separate task.
1532 *
1533 * SMPng: NOTE: assumes INP write lock is held.
1534 */
1535void
1536inp_freemoptions(struct ip_moptions *imo)
1537{
1538
1539	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
1540	IN_MULTI_LOCK();
1541	STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
1542	IN_MULTI_UNLOCK();
1543	taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
1544}
1545
1546static void
1547inp_freemoptions_internal(struct ip_moptions *imo)
1548{
1549	struct in_mfilter	*imf;
1550	size_t			 idx, nmships;
1551
1552	nmships = imo->imo_num_memberships;
1553	for (idx = 0; idx < nmships; ++idx) {
1554		imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
1555		if (imf)
1556			imf_leave(imf);
1557		(void)in_leavegroup(imo->imo_membership[idx], imf);
1558		if (imf)
1559			imf_purge(imf);
1560	}
1561
1562	if (imo->imo_mfilters)
1563		free(imo->imo_mfilters, M_INMFILTER);
1564	free(imo->imo_membership, M_IPMOPTS);
1565	free(imo, M_IPMOPTS);
1566}
1567
1568static void
1569inp_gcmoptions(void *context, int pending)
1570{
1571	struct ip_moptions *imo;
1572
1573	IN_MULTI_LOCK();
1574	while (!STAILQ_EMPTY(&imo_gc_list)) {
1575		imo = STAILQ_FIRST(&imo_gc_list);
1576		STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
1577		IN_MULTI_UNLOCK();
1578		inp_freemoptions_internal(imo);
1579		IN_MULTI_LOCK();
1580	}
1581	IN_MULTI_UNLOCK();
1582}
1583
1584/*
1585 * Atomically get source filters on a socket for an IPv4 multicast group.
1586 * Called with INP lock held; returns with lock released.
1587 */
1588static int
1589inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1590{
1591	struct __msfilterreq	 msfr;
1592	sockunion_t		*gsa;
1593	struct ifnet		*ifp;
1594	struct ip_moptions	*imo;
1595	struct in_mfilter	*imf;
1596	struct ip_msource	*ims;
1597	struct in_msource	*lims;
1598	struct sockaddr_in	*psin;
1599	struct sockaddr_storage	*ptss;
1600	struct sockaddr_storage	*tss;
1601	int			 error;
1602	size_t			 idx, nsrcs, ncsrcs;
1603
1604	INP_WLOCK_ASSERT(inp);
1605
1606	imo = inp->inp_moptions;
1607	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
1608
1609	INP_WUNLOCK(inp);
1610
1611	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1612	    sizeof(struct __msfilterreq));
1613	if (error)
1614		return (error);
1615
1616	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1617		return (EINVAL);
1618
1619	ifp = ifnet_byindex(msfr.msfr_ifindex);
1620	if (ifp == NULL)
1621		return (EINVAL);
1622
1623	INP_WLOCK(inp);
1624
1625	/*
1626	 * Lookup group on the socket.
1627	 */
1628	gsa = (sockunion_t *)&msfr.msfr_group;
1629	idx = imo_match_group(imo, ifp, &gsa->sa);
1630	if (idx == -1 || imo->imo_mfilters == NULL) {
1631		INP_WUNLOCK(inp);
1632		return (EADDRNOTAVAIL);
1633	}
1634	imf = &imo->imo_mfilters[idx];
1635
1636	/*
1637	 * Ignore memberships which are in limbo.
1638	 */
1639	if (imf->imf_st[1] == MCAST_UNDEFINED) {
1640		INP_WUNLOCK(inp);
1641		return (EAGAIN);
1642	}
1643	msfr.msfr_fmode = imf->imf_st[1];
1644
1645	/*
1646	 * If the user specified a buffer, copy out the source filter
1647	 * entries to userland gracefully.
1648	 * We only copy out the number of entries which userland
1649	 * has asked for, but we always tell userland how big the
1650	 * buffer really needs to be.
1651	 */
1652	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
1653		msfr.msfr_nsrcs = in_mcast_maxsocksrc;
1654	tss = NULL;
1655	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
1656		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1657		    M_TEMP, M_NOWAIT | M_ZERO);
1658		if (tss == NULL) {
1659			INP_WUNLOCK(inp);
1660			return (ENOBUFS);
1661		}
1662	}
1663
1664	/*
1665	 * Count number of sources in-mode at t0.
1666	 * If buffer space exists and remains, copy out source entries.
1667	 */
1668	nsrcs = msfr.msfr_nsrcs;
1669	ncsrcs = 0;
1670	ptss = tss;
1671	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
1672		lims = (struct in_msource *)ims;
1673		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
1674		    lims->imsl_st[0] != imf->imf_st[0])
1675			continue;
1676		++ncsrcs;
1677		if (tss != NULL && nsrcs > 0) {
1678			psin = (struct sockaddr_in *)ptss;
1679			psin->sin_family = AF_INET;
1680			psin->sin_len = sizeof(struct sockaddr_in);
1681			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
1682			psin->sin_port = 0;
1683			++ptss;
1684			--nsrcs;
1685		}
1686	}
1687
1688	INP_WUNLOCK(inp);
1689
1690	if (tss != NULL) {
1691		error = copyout(tss, msfr.msfr_srcs,
1692		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1693		free(tss, M_TEMP);
1694		if (error)
1695			return (error);
1696	}
1697
1698	msfr.msfr_nsrcs = ncsrcs;
1699	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
1700
1701	return (error);
1702}
1703
1704/*
1705 * Return the IP multicast options in response to user getsockopt().
1706 */
1707int
1708inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1709{
1710	struct ip_mreqn		 mreqn;
1711	struct ip_moptions	*imo;
1712	struct ifnet		*ifp;
1713	struct in_ifaddr	*ia;
1714	int			 error, optval;
1715	u_char			 coptval;
1716
1717	INP_WLOCK(inp);
1718	imo = inp->inp_moptions;
1719	/*
1720	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1721	 * or is a divert socket, reject it.
1722	 */
1723	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1724	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1725	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
1726		INP_WUNLOCK(inp);
1727		return (EOPNOTSUPP);
1728	}
1729
1730	error = 0;
1731	switch (sopt->sopt_name) {
1732	case IP_MULTICAST_VIF:
1733		if (imo != NULL)
1734			optval = imo->imo_multicast_vif;
1735		else
1736			optval = -1;
1737		INP_WUNLOCK(inp);
1738		error = sooptcopyout(sopt, &optval, sizeof(int));
1739		break;
1740
1741	case IP_MULTICAST_IF:
1742		memset(&mreqn, 0, sizeof(struct ip_mreqn));
1743		if (imo != NULL) {
1744			ifp = imo->imo_multicast_ifp;
1745			if (!in_nullhost(imo->imo_multicast_addr)) {
1746				mreqn.imr_address = imo->imo_multicast_addr;
1747			} else if (ifp != NULL) {
1748				mreqn.imr_ifindex = ifp->if_index;
1749				IFP_TO_IA(ifp, ia);
1750				if (ia != NULL) {
1751					mreqn.imr_address =
1752					    IA_SIN(ia)->sin_addr;
1753					ifa_free(&ia->ia_ifa);
1754				}
1755			}
1756		}
1757		INP_WUNLOCK(inp);
1758		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1759			error = sooptcopyout(sopt, &mreqn,
1760			    sizeof(struct ip_mreqn));
1761		} else {
1762			error = sooptcopyout(sopt, &mreqn.imr_address,
1763			    sizeof(struct in_addr));
1764		}
1765		break;
1766
1767	case IP_MULTICAST_TTL:
1768		if (imo == 0)
1769			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1770		else
1771			optval = coptval = imo->imo_multicast_ttl;
1772		INP_WUNLOCK(inp);
1773		if (sopt->sopt_valsize == sizeof(u_char))
1774			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1775		else
1776			error = sooptcopyout(sopt, &optval, sizeof(int));
1777		break;
1778
1779	case IP_MULTICAST_LOOP:
1780		if (imo == 0)
1781			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1782		else
1783			optval = coptval = imo->imo_multicast_loop;
1784		INP_WUNLOCK(inp);
1785		if (sopt->sopt_valsize == sizeof(u_char))
1786			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1787		else
1788			error = sooptcopyout(sopt, &optval, sizeof(int));
1789		break;
1790
1791	case IP_MSFILTER:
1792		if (imo == NULL) {
1793			error = EADDRNOTAVAIL;
1794			INP_WUNLOCK(inp);
1795		} else {
1796			error = inp_get_source_filters(inp, sopt);
1797		}
1798		break;
1799
1800	default:
1801		INP_WUNLOCK(inp);
1802		error = ENOPROTOOPT;
1803		break;
1804	}
1805
1806	INP_UNLOCK_ASSERT(inp);
1807
1808	return (error);
1809}
1810
1811/*
1812 * Look up the ifnet to use for a multicast group membership,
1813 * given the IPv4 address of an interface, and the IPv4 group address.
1814 *
1815 * This routine exists to support legacy multicast applications
1816 * which do not understand that multicast memberships are scoped to
1817 * specific physical links in the networking stack, or which need
1818 * to join link-scope groups before IPv4 addresses are configured.
1819 *
1820 * If inp is non-NULL, use this socket's current FIB number for any
1821 * required FIB lookup.
1822 * If ina is INADDR_ANY, look up the group address in the unicast FIB,
1823 * and use its ifp; usually, this points to the default next-hop.
1824 *
1825 * If the FIB lookup fails, attempt to use the first non-loopback
1826 * interface with multicast capability in the system as a
1827 * last resort. The legacy IPv4 ASM API requires that we do
1828 * this in order to allow groups to be joined when the routing
1829 * table has not yet been populated during boot.
1830 *
1831 * Returns NULL if no ifp could be found.
1832 *
1833 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP.
1834 * FUTURE: Implement IPv4 source-address selection.
1835 */
1836static struct ifnet *
1837inp_lookup_mcast_ifp(const struct inpcb *inp,
1838    const struct sockaddr_in *gsin, const struct in_addr ina)
1839{
1840	struct ifnet *ifp;
1841
1842	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
1843	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
1844	    ("%s: not multicast", __func__));
1845
1846	ifp = NULL;
1847	if (!in_nullhost(ina)) {
1848		INADDR_TO_IFP(ina, ifp);
1849	} else {
1850		struct route ro;
1851
1852		ro.ro_rt = NULL;
1853		memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
1854		in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0);
1855		if (ro.ro_rt != NULL) {
1856			ifp = ro.ro_rt->rt_ifp;
1857			KASSERT(ifp != NULL, ("%s: null ifp", __func__));
1858			RTFREE(ro.ro_rt);
1859		} else {
1860			struct in_ifaddr *ia;
1861			struct ifnet *mifp;
1862
1863			mifp = NULL;
1864			IN_IFADDR_RLOCK();
1865			TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1866				mifp = ia->ia_ifp;
1867				if (!(mifp->if_flags & IFF_LOOPBACK) &&
1868				     (mifp->if_flags & IFF_MULTICAST)) {
1869					ifp = mifp;
1870					break;
1871				}
1872			}
1873			IN_IFADDR_RUNLOCK();
1874		}
1875	}
1876
1877	return (ifp);
1878}
1879
1880/*
1881 * Join an IPv4 multicast group, possibly with a source.
1882 */
1883static int
1884inp_join_group(struct inpcb *inp, struct sockopt *sopt)
1885{
1886	struct group_source_req		 gsr;
1887	sockunion_t			*gsa, *ssa;
1888	struct ifnet			*ifp;
1889	struct in_mfilter		*imf;
1890	struct ip_moptions		*imo;
1891	struct in_multi			*inm;
1892	struct in_msource		*lims;
1893	size_t				 idx;
1894	int				 error, is_new;
1895
1896	ifp = NULL;
1897	imf = NULL;
1898	lims = NULL;
1899	error = 0;
1900	is_new = 0;
1901
1902	memset(&gsr, 0, sizeof(struct group_source_req));
1903	gsa = (sockunion_t *)&gsr.gsr_group;
1904	gsa->ss.ss_family = AF_UNSPEC;
1905	ssa = (sockunion_t *)&gsr.gsr_source;
1906	ssa->ss.ss_family = AF_UNSPEC;
1907
1908	switch (sopt->sopt_name) {
1909	case IP_ADD_MEMBERSHIP:
1910	case IP_ADD_SOURCE_MEMBERSHIP: {
1911		struct ip_mreq_source	 mreqs;
1912
1913		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
1914			error = sooptcopyin(sopt, &mreqs,
1915			    sizeof(struct ip_mreq),
1916			    sizeof(struct ip_mreq));
1917			/*
1918			 * Do argument switcharoo from ip_mreq into
1919			 * ip_mreq_source to avoid using two instances.
1920			 */
1921			mreqs.imr_interface = mreqs.imr_sourceaddr;
1922			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1923		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1924			error = sooptcopyin(sopt, &mreqs,
1925			    sizeof(struct ip_mreq_source),
1926			    sizeof(struct ip_mreq_source));
1927		}
1928		if (error)
1929			return (error);
1930
1931		gsa->sin.sin_family = AF_INET;
1932		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1933		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1934
1935		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1936			ssa->sin.sin_family = AF_INET;
1937			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1938			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1939		}
1940
1941		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1942			return (EINVAL);
1943
1944		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
1945		    mreqs.imr_interface);
1946		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
1947		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1948		break;
1949	}
1950
1951	case MCAST_JOIN_GROUP:
1952	case MCAST_JOIN_SOURCE_GROUP:
1953		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1954			error = sooptcopyin(sopt, &gsr,
1955			    sizeof(struct group_req),
1956			    sizeof(struct group_req));
1957		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1958			error = sooptcopyin(sopt, &gsr,
1959			    sizeof(struct group_source_req),
1960			    sizeof(struct group_source_req));
1961		}
1962		if (error)
1963			return (error);
1964
1965		if (gsa->sin.sin_family != AF_INET ||
1966		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1967			return (EINVAL);
1968
1969		/*
1970		 * Overwrite the port field if present, as the sockaddr
1971		 * being copied in may be matched with a binary comparison.
1972		 */
1973		gsa->sin.sin_port = 0;
1974		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1975			if (ssa->sin.sin_family != AF_INET ||
1976			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1977				return (EINVAL);
1978			ssa->sin.sin_port = 0;
1979		}
1980
1981		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1982			return (EINVAL);
1983
1984		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1985			return (EADDRNOTAVAIL);
1986		ifp = ifnet_byindex(gsr.gsr_interface);
1987		break;
1988
1989	default:
1990		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1991		    __func__, sopt->sopt_name);
1992		return (EOPNOTSUPP);
1993		break;
1994	}
1995
1996	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1997		return (EADDRNOTAVAIL);
1998
1999	imo = inp_findmoptions(inp);
2000	idx = imo_match_group(imo, ifp, &gsa->sa);
2001	if (idx == -1) {
2002		is_new = 1;
2003	} else {
2004		inm = imo->imo_membership[idx];
2005		imf = &imo->imo_mfilters[idx];
2006		if (ssa->ss.ss_family != AF_UNSPEC) {
2007			/*
2008			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
2009			 * is an error. On an existing inclusive membership,
2010			 * it just adds the source to the filter list.
2011			 */
2012			if (imf->imf_st[1] != MCAST_INCLUDE) {
2013				error = EINVAL;
2014				goto out_inp_locked;
2015			}
2016			/*
2017			 * Throw out duplicates.
2018			 *
2019			 * XXX FIXME: This makes a naive assumption that
2020			 * even if entries exist for *ssa in this imf,
2021			 * they will be rejected as dupes, even if they
2022			 * are not valid in the current mode (in-mode).
2023			 *
2024			 * in_msource is transactioned just as for anything
2025			 * else in SSM -- but note naive use of inm_graft()
2026			 * below for allocating new filter entries.
2027			 *
2028			 * This is only an issue if someone mixes the
2029			 * full-state SSM API with the delta-based API,
2030			 * which is discouraged in the relevant RFCs.
2031			 */
2032			lims = imo_match_source(imo, idx, &ssa->sa);
2033			if (lims != NULL /*&&
2034			    lims->imsl_st[1] == MCAST_INCLUDE*/) {
2035				error = EADDRNOTAVAIL;
2036				goto out_inp_locked;
2037			}
2038		} else {
2039			/*
2040			 * MCAST_JOIN_GROUP on an existing exclusive
2041			 * membership is an error; return EADDRINUSE
2042			 * to preserve 4.4BSD API idempotence, and
2043			 * avoid tedious detour to code below.
2044			 * NOTE: This is bending RFC 3678 a bit.
2045			 *
2046			 * On an existing inclusive membership, this is also
2047			 * an error; if you want to change filter mode,
2048			 * you must use the userland API setsourcefilter().
2049			 * XXX We don't reject this for imf in UNDEFINED
2050			 * state at t1, because allocation of a filter
2051			 * is atomic with allocation of a membership.
2052			 */
2053			error = EINVAL;
2054			if (imf->imf_st[1] == MCAST_EXCLUDE)
2055				error = EADDRINUSE;
2056			goto out_inp_locked;
2057		}
2058	}
2059
2060	/*
2061	 * Begin state merge transaction at socket layer.
2062	 */
2063	INP_WLOCK_ASSERT(inp);
2064
2065	if (is_new) {
2066		if (imo->imo_num_memberships == imo->imo_max_memberships) {
2067			error = imo_grow(imo);
2068			if (error)
2069				goto out_inp_locked;
2070		}
2071		/*
2072		 * Allocate the new slot upfront so we can deal with
2073		 * grafting the new source filter in same code path
2074		 * as for join-source on existing membership.
2075		 */
2076		idx = imo->imo_num_memberships;
2077		imo->imo_membership[idx] = NULL;
2078		imo->imo_num_memberships++;
2079		KASSERT(imo->imo_mfilters != NULL,
2080		    ("%s: imf_mfilters vector was not allocated", __func__));
2081		imf = &imo->imo_mfilters[idx];
2082		KASSERT(RB_EMPTY(&imf->imf_sources),
2083		    ("%s: imf_sources not empty", __func__));
2084	}
2085
2086	/*
2087	 * Graft new source into filter list for this inpcb's
2088	 * membership of the group. The in_multi may not have
2089	 * been allocated yet if this is a new membership, however,
2090	 * the in_mfilter slot will be allocated and must be initialized.
2091	 *
2092	 * Note: Grafting of exclusive mode filters doesn't happen
2093	 * in this path.
2094	 * XXX: Should check for non-NULL lims (node exists but may
2095	 * not be in-mode) for interop with full-state API.
2096	 */
2097	if (ssa->ss.ss_family != AF_UNSPEC) {
2098		/* Membership starts in IN mode */
2099		if (is_new) {
2100			CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
2101			imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
2102		} else {
2103			CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
2104		}
2105		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
2106		if (lims == NULL) {
2107			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2108			    __func__);
2109			error = ENOMEM;
2110			goto out_imo_free;
2111		}
2112	} else {
2113		/* No address specified; Membership starts in EX mode */
2114		if (is_new) {
2115			CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__);
2116			imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
2117		}
2118	}
2119
2120	/*
2121	 * Begin state merge transaction at IGMP layer.
2122	 */
2123	IN_MULTI_LOCK();
2124
2125	if (is_new) {
2126		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
2127		    &inm);
2128		if (error)
2129			goto out_imo_free;
2130		imo->imo_membership[idx] = inm;
2131	} else {
2132		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2133		error = inm_merge(inm, imf);
2134		if (error) {
2135			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2136			    __func__);
2137			goto out_imf_rollback;
2138		}
2139		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2140		error = igmp_change_state(inm);
2141		if (error) {
2142			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2143			    __func__);
2144			goto out_imf_rollback;
2145		}
2146	}
2147
2148	IN_MULTI_UNLOCK();
2149
2150out_imf_rollback:
2151	INP_WLOCK_ASSERT(inp);
2152	if (error) {
2153		imf_rollback(imf);
2154		if (is_new)
2155			imf_purge(imf);
2156		else
2157			imf_reap(imf);
2158	} else {
2159		imf_commit(imf);
2160	}
2161
2162out_imo_free:
2163	if (error && is_new) {
2164		imo->imo_membership[idx] = NULL;
2165		--imo->imo_num_memberships;
2166	}
2167
2168out_inp_locked:
2169	INP_WUNLOCK(inp);
2170	return (error);
2171}
2172
2173/*
2174 * Leave an IPv4 multicast group on an inpcb, possibly with a source.
2175 */
2176static int
2177inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
2178{
2179	struct group_source_req		 gsr;
2180	struct ip_mreq_source		 mreqs;
2181	sockunion_t			*gsa, *ssa;
2182	struct ifnet			*ifp;
2183	struct in_mfilter		*imf;
2184	struct ip_moptions		*imo;
2185	struct in_msource		*ims;
2186	struct in_multi			*inm;
2187	size_t				 idx;
2188	int				 error, is_final;
2189
2190	ifp = NULL;
2191	error = 0;
2192	is_final = 1;
2193
2194	memset(&gsr, 0, sizeof(struct group_source_req));
2195	gsa = (sockunion_t *)&gsr.gsr_group;
2196	gsa->ss.ss_family = AF_UNSPEC;
2197	ssa = (sockunion_t *)&gsr.gsr_source;
2198	ssa->ss.ss_family = AF_UNSPEC;
2199
2200	switch (sopt->sopt_name) {
2201	case IP_DROP_MEMBERSHIP:
2202	case IP_DROP_SOURCE_MEMBERSHIP:
2203		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
2204			error = sooptcopyin(sopt, &mreqs,
2205			    sizeof(struct ip_mreq),
2206			    sizeof(struct ip_mreq));
2207			/*
2208			 * Swap interface and sourceaddr arguments,
2209			 * as ip_mreq and ip_mreq_source are laid
2210			 * out differently.
2211			 */
2212			mreqs.imr_interface = mreqs.imr_sourceaddr;
2213			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
2214		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2215			error = sooptcopyin(sopt, &mreqs,
2216			    sizeof(struct ip_mreq_source),
2217			    sizeof(struct ip_mreq_source));
2218		}
2219		if (error)
2220			return (error);
2221
2222		gsa->sin.sin_family = AF_INET;
2223		gsa->sin.sin_len = sizeof(struct sockaddr_in);
2224		gsa->sin.sin_addr = mreqs.imr_multiaddr;
2225
2226		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2227			ssa->sin.sin_family = AF_INET;
2228			ssa->sin.sin_len = sizeof(struct sockaddr_in);
2229			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
2230		}
2231
2232		/*
2233		 * Attempt to look up hinted ifp from interface address.
2234		 * Fallthrough with null ifp iff lookup fails, to
2235		 * preserve 4.4BSD mcast API idempotence.
2236		 * XXX NOTE WELL: The RFC 3678 API is preferred because
2237		 * using an IPv4 address as a key is racy.
2238		 */
2239		if (!in_nullhost(mreqs.imr_interface))
2240			INADDR_TO_IFP(mreqs.imr_interface, ifp);
2241
2242		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
2243		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
2244
2245		break;
2246
2247	case MCAST_LEAVE_GROUP:
2248	case MCAST_LEAVE_SOURCE_GROUP:
2249		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2250			error = sooptcopyin(sopt, &gsr,
2251			    sizeof(struct group_req),
2252			    sizeof(struct group_req));
2253		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2254			error = sooptcopyin(sopt, &gsr,
2255			    sizeof(struct group_source_req),
2256			    sizeof(struct group_source_req));
2257		}
2258		if (error)
2259			return (error);
2260
2261		if (gsa->sin.sin_family != AF_INET ||
2262		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
2263			return (EINVAL);
2264
2265		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2266			if (ssa->sin.sin_family != AF_INET ||
2267			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
2268				return (EINVAL);
2269		}
2270
2271		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
2272			return (EADDRNOTAVAIL);
2273
2274		ifp = ifnet_byindex(gsr.gsr_interface);
2275
2276		if (ifp == NULL)
2277			return (EADDRNOTAVAIL);
2278		break;
2279
2280	default:
2281		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2282		    __func__, sopt->sopt_name);
2283		return (EOPNOTSUPP);
2284		break;
2285	}
2286
2287	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2288		return (EINVAL);
2289
2290	/*
2291	 * Find the membership in the membership array.
2292	 */
2293	imo = inp_findmoptions(inp);
2294	idx = imo_match_group(imo, ifp, &gsa->sa);
2295	if (idx == -1) {
2296		error = EADDRNOTAVAIL;
2297		goto out_inp_locked;
2298	}
2299	inm = imo->imo_membership[idx];
2300	imf = &imo->imo_mfilters[idx];
2301
2302	if (ssa->ss.ss_family != AF_UNSPEC)
2303		is_final = 0;
2304
2305	/*
2306	 * Begin state merge transaction at socket layer.
2307	 */
2308	INP_WLOCK_ASSERT(inp);
2309
2310	/*
2311	 * If we were instructed only to leave a given source, do so.
2312	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2313	 */
2314	if (is_final) {
2315		imf_leave(imf);
2316	} else {
2317		if (imf->imf_st[0] == MCAST_EXCLUDE) {
2318			error = EADDRNOTAVAIL;
2319			goto out_inp_locked;
2320		}
2321		ims = imo_match_source(imo, idx, &ssa->sa);
2322		if (ims == NULL) {
2323			CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
2324			    inet_ntoa(ssa->sin.sin_addr), "not ");
2325			error = EADDRNOTAVAIL;
2326			goto out_inp_locked;
2327		}
2328		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
2329		error = imf_prune(imf, &ssa->sin);
2330		if (error) {
2331			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2332			    __func__);
2333			goto out_inp_locked;
2334		}
2335	}
2336
2337	/*
2338	 * Begin state merge transaction at IGMP layer.
2339	 */
2340	IN_MULTI_LOCK();
2341
2342	if (is_final) {
2343		/*
2344		 * Give up the multicast address record to which
2345		 * the membership points.
2346		 */
2347		(void)in_leavegroup_locked(inm, imf);
2348	} else {
2349		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2350		error = inm_merge(inm, imf);
2351		if (error) {
2352			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2353			    __func__);
2354			goto out_imf_rollback;
2355		}
2356
2357		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2358		error = igmp_change_state(inm);
2359		if (error) {
2360			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2361			    __func__);
2362		}
2363	}
2364
2365	IN_MULTI_UNLOCK();
2366
2367out_imf_rollback:
2368	if (error)
2369		imf_rollback(imf);
2370	else
2371		imf_commit(imf);
2372
2373	imf_reap(imf);
2374
2375	if (is_final) {
2376		/* Remove the gap in the membership and filter array. */
2377		for (++idx; idx < imo->imo_num_memberships; ++idx) {
2378			imo->imo_membership[idx-1] = imo->imo_membership[idx];
2379			imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx];
2380		}
2381		imo->imo_num_memberships--;
2382	}
2383
2384out_inp_locked:
2385	INP_WUNLOCK(inp);
2386	return (error);
2387}
2388
2389/*
2390 * Select the interface for transmitting IPv4 multicast datagrams.
2391 *
2392 * Either an instance of struct in_addr or an instance of struct ip_mreqn
2393 * may be passed to this socket option. An address of INADDR_ANY or an
2394 * interface index of 0 is used to remove a previous selection.
2395 * When no interface is selected, one is chosen for every send.
2396 */
2397static int
2398inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2399{
2400	struct in_addr		 addr;
2401	struct ip_mreqn		 mreqn;
2402	struct ifnet		*ifp;
2403	struct ip_moptions	*imo;
2404	int			 error;
2405
2406	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
2407		/*
2408		 * An interface index was specified using the
2409		 * Linux-derived ip_mreqn structure.
2410		 */
2411		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
2412		    sizeof(struct ip_mreqn));
2413		if (error)
2414			return (error);
2415
2416		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
2417			return (EINVAL);
2418
2419		if (mreqn.imr_ifindex == 0) {
2420			ifp = NULL;
2421		} else {
2422			ifp = ifnet_byindex(mreqn.imr_ifindex);
2423			if (ifp == NULL)
2424				return (EADDRNOTAVAIL);
2425		}
2426	} else {
2427		/*
2428		 * An interface was specified by IPv4 address.
2429		 * This is the traditional BSD usage.
2430		 */
2431		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
2432		    sizeof(struct in_addr));
2433		if (error)
2434			return (error);
2435		if (in_nullhost(addr)) {
2436			ifp = NULL;
2437		} else {
2438			INADDR_TO_IFP(addr, ifp);
2439			if (ifp == NULL)
2440				return (EADDRNOTAVAIL);
2441		}
2442		CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp,
2443		    inet_ntoa(addr));
2444	}
2445
2446	/* Reject interfaces which do not support multicast. */
2447	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
2448		return (EOPNOTSUPP);
2449
2450	imo = inp_findmoptions(inp);
2451	imo->imo_multicast_ifp = ifp;
2452	imo->imo_multicast_addr.s_addr = INADDR_ANY;
2453	INP_WUNLOCK(inp);
2454
2455	return (0);
2456}
2457
2458/*
2459 * Atomically set source filters on a socket for an IPv4 multicast group.
2460 *
2461 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
2462 */
2463static int
2464inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2465{
2466	struct __msfilterreq	 msfr;
2467	sockunion_t		*gsa;
2468	struct ifnet		*ifp;
2469	struct in_mfilter	*imf;
2470	struct ip_moptions	*imo;
2471	struct in_multi		*inm;
2472	size_t			 idx;
2473	int			 error;
2474
2475	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
2476	    sizeof(struct __msfilterreq));
2477	if (error)
2478		return (error);
2479
2480	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
2481		return (ENOBUFS);
2482
2483	if ((msfr.msfr_fmode != MCAST_EXCLUDE &&
2484	     msfr.msfr_fmode != MCAST_INCLUDE))
2485		return (EINVAL);
2486
2487	if (msfr.msfr_group.ss_family != AF_INET ||
2488	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
2489		return (EINVAL);
2490
2491	gsa = (sockunion_t *)&msfr.msfr_group;
2492	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2493		return (EINVAL);
2494
2495	gsa->sin.sin_port = 0;	/* ignore port */
2496
2497	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
2498		return (EADDRNOTAVAIL);
2499
2500	ifp = ifnet_byindex(msfr.msfr_ifindex);
2501	if (ifp == NULL)
2502		return (EADDRNOTAVAIL);
2503
2504	/*
2505	 * Take the INP write lock.
2506	 * Check if this socket is a member of this group.
2507	 */
2508	imo = inp_findmoptions(inp);
2509	idx = imo_match_group(imo, ifp, &gsa->sa);
2510	if (idx == -1 || imo->imo_mfilters == NULL) {
2511		error = EADDRNOTAVAIL;
2512		goto out_inp_locked;
2513	}
2514	inm = imo->imo_membership[idx];
2515	imf = &imo->imo_mfilters[idx];
2516
2517	/*
2518	 * Begin state merge transaction at socket layer.
2519	 */
2520	INP_WLOCK_ASSERT(inp);
2521
2522	imf->imf_st[1] = msfr.msfr_fmode;
2523
2524	/*
2525	 * Apply any new source filters, if present.
2526	 * Make a copy of the user-space source vector so
2527	 * that we may copy them with a single copyin. This
2528	 * allows us to deal with page faults up-front.
2529	 */
2530	if (msfr.msfr_nsrcs > 0) {
2531		struct in_msource	*lims;
2532		struct sockaddr_in	*psin;
2533		struct sockaddr_storage	*kss, *pkss;
2534		int			 i;
2535
2536		INP_WUNLOCK(inp);
2537
2538		CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
2539		    __func__, (unsigned long)msfr.msfr_nsrcs);
2540		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
2541		    M_TEMP, M_WAITOK);
2542		error = copyin(msfr.msfr_srcs, kss,
2543		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
2544		if (error) {
2545			free(kss, M_TEMP);
2546			return (error);
2547		}
2548
2549		INP_WLOCK(inp);
2550
2551		/*
2552		 * Mark all source filters as UNDEFINED at t1.
2553		 * Restore new group filter mode, as imf_leave()
2554		 * will set it to INCLUDE.
2555		 */
2556		imf_leave(imf);
2557		imf->imf_st[1] = msfr.msfr_fmode;
2558
2559		/*
2560		 * Update socket layer filters at t1, lazy-allocating
2561		 * new entries. This saves a bunch of memory at the
2562		 * cost of one RB_FIND() per source entry; duplicate
2563		 * entries in the msfr_nsrcs vector are ignored.
2564		 * If we encounter an error, rollback transaction.
2565		 *
2566		 * XXX This too could be replaced with a set-symmetric
2567		 * difference like loop to avoid walking from root
2568		 * every time, as the key space is common.
2569		 */
2570		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2571			psin = (struct sockaddr_in *)pkss;
2572			if (psin->sin_family != AF_INET) {
2573				error = EAFNOSUPPORT;
2574				break;
2575			}
2576			if (psin->sin_len != sizeof(struct sockaddr_in)) {
2577				error = EINVAL;
2578				break;
2579			}
2580			error = imf_get_source(imf, psin, &lims);
2581			if (error)
2582				break;
2583			lims->imsl_st[1] = imf->imf_st[1];
2584		}
2585		free(kss, M_TEMP);
2586	}
2587
2588	if (error)
2589		goto out_imf_rollback;
2590
2591	INP_WLOCK_ASSERT(inp);
2592	IN_MULTI_LOCK();
2593
2594	/*
2595	 * Begin state merge transaction at IGMP layer.
2596	 */
2597	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2598	error = inm_merge(inm, imf);
2599	if (error) {
2600		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
2601		goto out_imf_rollback;
2602	}
2603
2604	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2605	error = igmp_change_state(inm);
2606	if (error)
2607		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
2608
2609	IN_MULTI_UNLOCK();
2610
2611out_imf_rollback:
2612	if (error)
2613		imf_rollback(imf);
2614	else
2615		imf_commit(imf);
2616
2617	imf_reap(imf);
2618
2619out_inp_locked:
2620	INP_WUNLOCK(inp);
2621	return (error);
2622}
2623
2624/*
2625 * Set the IP multicast options in response to user setsockopt().
2626 *
2627 * Many of the socket options handled in this function duplicate the
2628 * functionality of socket options in the regular unicast API. However,
2629 * it is not possible to merge the duplicate code, because the idempotence
2630 * of the IPv4 multicast part of the BSD Sockets API must be preserved;
2631 * the effects of these options must be treated as separate and distinct.
2632 *
2633 * SMPng: XXX: Unlocked read of inp_socket believed OK.
2634 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
2635 * is refactored to no longer use vifs.
2636 */
2637int
2638inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2639{
2640	struct ip_moptions	*imo;
2641	int			 error;
2642
2643	error = 0;
2644
2645	/*
2646	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2647	 * or is a divert socket, reject it.
2648	 */
2649	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
2650	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2651	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
2652		return (EOPNOTSUPP);
2653
2654	switch (sopt->sopt_name) {
2655	case IP_MULTICAST_VIF: {
2656		int vifi;
2657		/*
2658		 * Select a multicast VIF for transmission.
2659		 * Only useful if multicast forwarding is active.
2660		 */
2661		if (legal_vif_num == NULL) {
2662			error = EOPNOTSUPP;
2663			break;
2664		}
2665		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
2666		if (error)
2667			break;
2668		if (!legal_vif_num(vifi) && (vifi != -1)) {
2669			error = EINVAL;
2670			break;
2671		}
2672		imo = inp_findmoptions(inp);
2673		imo->imo_multicast_vif = vifi;
2674		INP_WUNLOCK(inp);
2675		break;
2676	}
2677
2678	case IP_MULTICAST_IF:
2679		error = inp_set_multicast_if(inp, sopt);
2680		break;
2681
2682	case IP_MULTICAST_TTL: {
2683		u_char ttl;
2684
2685		/*
2686		 * Set the IP time-to-live for outgoing multicast packets.
2687		 * The original multicast API required a char argument,
2688		 * which is inconsistent with the rest of the socket API.
2689		 * We allow either a char or an int.
2690		 */
2691		if (sopt->sopt_valsize == sizeof(u_char)) {
2692			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
2693			    sizeof(u_char));
2694			if (error)
2695				break;
2696		} else {
2697			u_int ittl;
2698
2699			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
2700			    sizeof(u_int));
2701			if (error)
2702				break;
2703			if (ittl > 255) {
2704				error = EINVAL;
2705				break;
2706			}
2707			ttl = (u_char)ittl;
2708		}
2709		imo = inp_findmoptions(inp);
2710		imo->imo_multicast_ttl = ttl;
2711		INP_WUNLOCK(inp);
2712		break;
2713	}
2714
2715	case IP_MULTICAST_LOOP: {
2716		u_char loop;
2717
2718		/*
2719		 * Set the loopback flag for outgoing multicast packets.
2720		 * Must be zero or one.  The original multicast API required a
2721		 * char argument, which is inconsistent with the rest
2722		 * of the socket API.  We allow either a char or an int.
2723		 */
2724		if (sopt->sopt_valsize == sizeof(u_char)) {
2725			error = sooptcopyin(sopt, &loop, sizeof(u_char),
2726			    sizeof(u_char));
2727			if (error)
2728				break;
2729		} else {
2730			u_int iloop;
2731
2732			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
2733					    sizeof(u_int));
2734			if (error)
2735				break;
2736			loop = (u_char)iloop;
2737		}
2738		imo = inp_findmoptions(inp);
2739		imo->imo_multicast_loop = !!loop;
2740		INP_WUNLOCK(inp);
2741		break;
2742	}
2743
2744	case IP_ADD_MEMBERSHIP:
2745	case IP_ADD_SOURCE_MEMBERSHIP:
2746	case MCAST_JOIN_GROUP:
2747	case MCAST_JOIN_SOURCE_GROUP:
2748		error = inp_join_group(inp, sopt);
2749		break;
2750
2751	case IP_DROP_MEMBERSHIP:
2752	case IP_DROP_SOURCE_MEMBERSHIP:
2753	case MCAST_LEAVE_GROUP:
2754	case MCAST_LEAVE_SOURCE_GROUP:
2755		error = inp_leave_group(inp, sopt);
2756		break;
2757
2758	case IP_BLOCK_SOURCE:
2759	case IP_UNBLOCK_SOURCE:
2760	case MCAST_BLOCK_SOURCE:
2761	case MCAST_UNBLOCK_SOURCE:
2762		error = inp_block_unblock_source(inp, sopt);
2763		break;
2764
2765	case IP_MSFILTER:
2766		error = inp_set_source_filters(inp, sopt);
2767		break;
2768
2769	default:
2770		error = EOPNOTSUPP;
2771		break;
2772	}
2773
2774	INP_UNLOCK_ASSERT(inp);
2775
2776	return (error);
2777}
2778
2779/*
2780 * Expose IGMP's multicast filter mode and source list(s) to userland,
2781 * keyed by (ifindex, group).
2782 * The filter mode is written out as a uint32_t, followed by
2783 * 0..n of struct in_addr.
2784 * For use by ifmcstat(8).
2785 * SMPng: NOTE: unlocked read of ifindex space.
2786 */
2787static int
2788sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
2789{
2790	struct in_addr			 src, group;
2791	struct ifnet			*ifp;
2792	struct ifmultiaddr		*ifma;
2793	struct in_multi			*inm;
2794	struct ip_msource		*ims;
2795	int				*name;
2796	int				 retval;
2797	u_int				 namelen;
2798	uint32_t			 fmode, ifindex;
2799
2800	name = (int *)arg1;
2801	namelen = arg2;
2802
2803	if (req->newptr != NULL)
2804		return (EPERM);
2805
2806	if (namelen != 2)
2807		return (EINVAL);
2808
2809	ifindex = name[0];
2810	if (ifindex <= 0 || ifindex > V_if_index) {
2811		CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
2812		    __func__, ifindex);
2813		return (ENOENT);
2814	}
2815
2816	group.s_addr = name[1];
2817	if (!IN_MULTICAST(ntohl(group.s_addr))) {
2818		CTR2(KTR_IGMPV3, "%s: group %s is not multicast",
2819		    __func__, inet_ntoa(group));
2820		return (EINVAL);
2821	}
2822
2823	ifp = ifnet_byindex(ifindex);
2824	if (ifp == NULL) {
2825		CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
2826		    __func__, ifindex);
2827		return (ENOENT);
2828	}
2829
2830	retval = sysctl_wire_old_buffer(req,
2831	    sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
2832	if (retval)
2833		return (retval);
2834
2835	IN_MULTI_LOCK();
2836
2837	IF_ADDR_RLOCK(ifp);
2838	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2839		if (ifma->ifma_addr->sa_family != AF_INET ||
2840		    ifma->ifma_protospec == NULL)
2841			continue;
2842		inm = (struct in_multi *)ifma->ifma_protospec;
2843		if (!in_hosteq(inm->inm_addr, group))
2844			continue;
2845		fmode = inm->inm_st[1].iss_fmode;
2846		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
2847		if (retval != 0)
2848			break;
2849		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
2850#ifdef KTR
2851			struct in_addr ina;
2852			ina.s_addr = htonl(ims->ims_haddr);
2853			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
2854			    inet_ntoa(ina));
2855#endif
2856			/*
2857			 * Only copy-out sources which are in-mode.
2858			 */
2859			if (fmode != ims_get_mode(inm, ims, 1)) {
2860				CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
2861				    __func__);
2862				continue;
2863			}
2864			src.s_addr = htonl(ims->ims_haddr);
2865			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
2866			if (retval != 0)
2867				break;
2868		}
2869	}
2870	IF_ADDR_RUNLOCK(ifp);
2871
2872	IN_MULTI_UNLOCK();
2873
2874	return (retval);
2875}
2876
2877#ifdef KTR
2878
2879static const char *inm_modestrs[] = { "un", "in", "ex" };
2880
2881static const char *
2882inm_mode_str(const int mode)
2883{
2884
2885	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
2886		return (inm_modestrs[mode]);
2887	return ("??");
2888}
2889
2890static const char *inm_statestrs[] = {
2891	"not-member",
2892	"silent",
2893	"idle",
2894	"lazy",
2895	"sleeping",
2896	"awakening",
2897	"query-pending",
2898	"sg-query-pending",
2899	"leaving"
2900};
2901
2902static const char *
2903inm_state_str(const int state)
2904{
2905
2906	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
2907		return (inm_statestrs[state]);
2908	return ("??");
2909}
2910
2911/*
2912 * Dump an in_multi structure to the console.
2913 */
2914void
2915inm_print(const struct in_multi *inm)
2916{
2917	int t;
2918
2919	if ((ktr_mask & KTR_IGMPV3) == 0)
2920		return;
2921
2922	printf("%s: --- begin inm %p ---\n", __func__, inm);
2923	printf("addr %s ifp %p(%s) ifma %p\n",
2924	    inet_ntoa(inm->inm_addr),
2925	    inm->inm_ifp,
2926	    inm->inm_ifp->if_xname,
2927	    inm->inm_ifma);
2928	printf("timer %u state %s refcount %u scq.len %u\n",
2929	    inm->inm_timer,
2930	    inm_state_str(inm->inm_state),
2931	    inm->inm_refcount,
2932	    inm->inm_scq.ifq_len);
2933	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
2934	    inm->inm_igi,
2935	    inm->inm_nsrc,
2936	    inm->inm_sctimer,
2937	    inm->inm_scrv);
2938	for (t = 0; t < 2; t++) {
2939		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
2940		    inm_mode_str(inm->inm_st[t].iss_fmode),
2941		    inm->inm_st[t].iss_asm,
2942		    inm->inm_st[t].iss_ex,
2943		    inm->inm_st[t].iss_in,
2944		    inm->inm_st[t].iss_rec);
2945	}
2946	printf("%s: --- end inm %p ---\n", __func__, inm);
2947}
2948
2949#else /* !KTR */
2950
2951void
2952inm_print(const struct in_multi *inm)
2953{
2954
2955}
2956
2957#endif /* KTR */
2958
2959RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
2960