in_mcast.c revision 184214
1/*-
2 * Copyright (c) 2007 Bruce M. Simpson.
3 * Copyright (c) 2005 Robert N. M. Watson.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote
15 *    products derived from this software without specific prior written
16 *    permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * IPv4 multicast socket, group, and socket option processing module.
33 * Until further notice, this file requires INET to compile.
34 * TODO: Make this infrastructure independent of address family.
35 * TODO: Teach netinet6 to use this code.
36 * TODO: Hook up SSM logic to IGMPv3/MLDv2.
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: head/sys/netinet/in_mcast.c 184214 2008-10-23 20:26:15Z des $");
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/protosw.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/sysctl.h>
51#include <sys/vimage.h>
52
53#include <net/if.h>
54#include <net/if_dl.h>
55#include <net/route.h>
56
57#include <netinet/in.h>
58#include <netinet/in_systm.h>
59#include <netinet/in_pcb.h>
60#include <netinet/in_var.h>
61#include <netinet/ip_var.h>
62#include <netinet/igmp_var.h>
63
64#ifndef __SOCKUNION_DECLARED
65union sockunion {
66	struct sockaddr_storage	ss;
67	struct sockaddr		sa;
68	struct sockaddr_dl	sdl;
69	struct sockaddr_in	sin;
70#ifdef INET6
71	struct sockaddr_in6	sin6;
72#endif
73};
74typedef union sockunion sockunion_t;
75#define __SOCKUNION_DECLARED
76#endif /* __SOCKUNION_DECLARED */
77
78static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
79static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
80static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
81
82/*
83 * The IPv4 multicast list (in_multihead and associated structures) are
84 * protected by the global in_multi_mtx.  See in_var.h for more details.  For
85 * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
86 * ip_output() to send IGMP packets while holding the lock; this probably is
87 * not quite desirable.
88 */
89struct in_multihead in_multihead;	/* XXX BSS initialization */
90struct mtx in_multi_mtx;
91MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
92
93/*
94 * Functions with non-static linkage defined in this file should be
95 * declared in in_var.h:
96 *  imo_match_group()
97 *  imo_match_source()
98 *  in_addmulti()
99 *  in_delmulti()
100 *  in_delmulti_locked()
101 * and ip_var.h:
102 *  inp_freemoptions()
103 *  inp_getmoptions()
104 *  inp_setmoptions()
105 */
106static int	imo_grow(struct ip_moptions *);
107static int	imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
108static int	imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
109static int	inp_change_source_filter(struct inpcb *, struct sockopt *);
110static struct ip_moptions *
111		inp_findmoptions(struct inpcb *);
112static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
113static int	inp_join_group(struct inpcb *, struct sockopt *);
114static int	inp_leave_group(struct inpcb *, struct sockopt *);
115static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
116static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
117
118/*
119 * Resize the ip_moptions vector to the next power-of-two minus 1.
120 * May be called with locks held; do not sleep.
121 */
122static int
123imo_grow(struct ip_moptions *imo)
124{
125	struct in_multi		**nmships;
126	struct in_multi		**omships;
127	struct in_mfilter	 *nmfilters;
128	struct in_mfilter	 *omfilters;
129	size_t			  idx;
130	size_t			  newmax;
131	size_t			  oldmax;
132
133	nmships = NULL;
134	nmfilters = NULL;
135	omships = imo->imo_membership;
136	omfilters = imo->imo_mfilters;
137	oldmax = imo->imo_max_memberships;
138	newmax = ((oldmax + 1) * 2) - 1;
139
140	if (newmax <= IP_MAX_MEMBERSHIPS) {
141		nmships = (struct in_multi **)realloc(omships,
142		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
143		nmfilters = (struct in_mfilter *)realloc(omfilters,
144		    sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
145		if (nmships != NULL && nmfilters != NULL) {
146			/* Initialize newly allocated source filter heads. */
147			for (idx = oldmax; idx < newmax; idx++) {
148				nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
149				nmfilters[idx].imf_nsources = 0;
150				TAILQ_INIT(&nmfilters[idx].imf_sources);
151			}
152			imo->imo_max_memberships = newmax;
153			imo->imo_membership = nmships;
154			imo->imo_mfilters = nmfilters;
155		}
156	}
157
158	if (nmships == NULL || nmfilters == NULL) {
159		if (nmships != NULL)
160			free(nmships, M_IPMOPTS);
161		if (nmfilters != NULL)
162			free(nmfilters, M_IPMSOURCE);
163		return (ETOOMANYREFS);
164	}
165
166	return (0);
167}
168
169/*
170 * Add a source to a multicast filter list.
171 * Assumes the associated inpcb is locked.
172 */
173static int
174imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
175{
176	struct in_msource	*ims, *nims;
177	struct in_mfilter	*imf;
178
179	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
180	KASSERT(imo->imo_mfilters != NULL,
181	    ("%s: imo_mfilters vector not allocated", __func__));
182
183	imf = &imo->imo_mfilters[gidx];
184	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
185		return (ENOBUFS);
186
187	ims = imo_match_source(imo, gidx, &src->sa);
188	if (ims != NULL)
189		return (EADDRNOTAVAIL);
190
191	/* Do not sleep with inp lock held. */
192	nims = malloc(sizeof(struct in_msource),
193	    M_IPMSOURCE, M_NOWAIT | M_ZERO);
194	if (nims == NULL)
195		return (ENOBUFS);
196
197	nims->ims_addr = src->ss;
198	TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
199	imf->imf_nsources++;
200
201	return (0);
202}
203
204static int
205imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
206{
207	struct in_msource	*ims;
208	struct in_mfilter	*imf;
209
210	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
211	KASSERT(imo->imo_mfilters != NULL,
212	    ("%s: imo_mfilters vector not allocated", __func__));
213
214	imf = &imo->imo_mfilters[gidx];
215	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
216		return (ENOBUFS);
217
218	ims = imo_match_source(imo, gidx, &src->sa);
219	if (ims == NULL)
220		return (EADDRNOTAVAIL);
221
222	TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
223	free(ims, M_IPMSOURCE);
224	imf->imf_nsources--;
225
226	return (0);
227}
228
229/*
230 * Find an IPv4 multicast group entry for this ip_moptions instance
231 * which matches the specified group, and optionally an interface.
232 * Return its index into the array, or -1 if not found.
233 */
234size_t
235imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
236    struct sockaddr *group)
237{
238	sockunion_t	 *gsa;
239	struct in_multi	**pinm;
240	int		  idx;
241	int		  nmships;
242
243	gsa = (sockunion_t *)group;
244
245	/* The imo_membership array may be lazy allocated. */
246	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
247		return (-1);
248
249	nmships = imo->imo_num_memberships;
250	pinm = &imo->imo_membership[0];
251	for (idx = 0; idx < nmships; idx++, pinm++) {
252		if (*pinm == NULL)
253			continue;
254#if 0
255		printf("%s: trying ifp = %p, inaddr = %s ", __func__,
256		    ifp, inet_ntoa(gsa->sin.sin_addr));
257		printf("against %p, %s\n",
258		    (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
259#endif
260		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
261		    (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
262			break;
263		}
264	}
265	if (idx >= nmships)
266		idx = -1;
267
268	return (idx);
269}
270
271/*
272 * Find a multicast source entry for this imo which matches
273 * the given group index for this socket, and source address.
274 */
275struct in_msource *
276imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
277{
278	struct in_mfilter	*imf;
279	struct in_msource	*ims, *pims;
280
281	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
282	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
283	    ("%s: invalid index %d\n", __func__, (int)gidx));
284
285	/* The imo_mfilters array may be lazy allocated. */
286	if (imo->imo_mfilters == NULL)
287		return (NULL);
288
289	pims = NULL;
290	imf = &imo->imo_mfilters[gidx];
291	TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
292		/*
293		 * Perform bitwise comparison of two IPv4 addresses.
294		 * TODO: Do the same for IPv6.
295		 * Do not use sa_equal() for this as it is not aware of
296		 * deeper structure in sockaddr_in or sockaddr_in6.
297		 */
298		if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
299		    ((struct sockaddr_in *)src)->sin_addr.s_addr) {
300			pims = ims;
301			break;
302		}
303	}
304
305	return (pims);
306}
307
308/*
309 * Join an IPv4 multicast group.
310 */
311struct in_multi *
312in_addmulti(struct in_addr *ap, struct ifnet *ifp)
313{
314	INIT_VNET_INET(ifp->if_vnet);
315	struct in_multi *inm;
316
317	inm = NULL;
318
319	IFF_LOCKGIANT(ifp);
320	IN_MULTI_LOCK();
321
322	IN_LOOKUP_MULTI(*ap, ifp, inm);
323	if (inm != NULL) {
324		/*
325		 * If we already joined this group, just bump the
326		 * refcount and return it.
327		 */
328		KASSERT(inm->inm_refcount >= 1,
329		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
330		++inm->inm_refcount;
331	} else do {
332		sockunion_t		 gsa;
333		struct ifmultiaddr	*ifma;
334		struct in_multi		*ninm;
335		int			 error;
336
337		memset(&gsa, 0, sizeof(gsa));
338		gsa.sin.sin_family = AF_INET;
339		gsa.sin.sin_len = sizeof(struct sockaddr_in);
340		gsa.sin.sin_addr = *ap;
341
342		/*
343		 * Check if a link-layer group is already associated
344		 * with this network-layer group on the given ifnet.
345		 * If so, bump the refcount on the existing network-layer
346		 * group association and return it.
347		 */
348		error = if_addmulti(ifp, &gsa.sa, &ifma);
349		if (error)
350			break;
351		if (ifma->ifma_protospec != NULL) {
352			inm = (struct in_multi *)ifma->ifma_protospec;
353#ifdef INVARIANTS
354			if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
355			    inm->inm_addr.s_addr != ap->s_addr)
356				panic("%s: ifma is inconsistent", __func__);
357#endif
358			++inm->inm_refcount;
359			break;
360		}
361
362		/*
363		 * A new membership is needed; construct it and
364		 * perform the IGMP join.
365		 */
366		ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
367		if (ninm == NULL) {
368			if_delmulti_ifma(ifma);
369			break;
370		}
371		ninm->inm_addr = *ap;
372		ninm->inm_ifp = ifp;
373		ninm->inm_ifma = ifma;
374		ninm->inm_refcount = 1;
375		ifma->ifma_protospec = ninm;
376		LIST_INSERT_HEAD(&V_in_multihead, ninm, inm_link);
377
378		igmp_joingroup(ninm);
379
380		inm = ninm;
381	} while (0);
382
383	IN_MULTI_UNLOCK();
384	IFF_UNLOCKGIANT(ifp);
385
386	return (inm);
387}
388
389/*
390 * Leave an IPv4 multicast group.
391 * It is OK to call this routine if the underlying ifnet went away.
392 *
393 * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
394 * will set ifma_ifp to NULL when the associated ifnet instance is detached
395 * from the system.
396 *
397 * The only reason we need to violate layers and check ifma_ifp here at all
398 * is because certain hardware drivers still require Giant to be held,
399 * and it must always be taken before other locks.
400 */
401void
402in_delmulti(struct in_multi *inm)
403{
404	struct ifnet *ifp;
405
406	KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
407	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
408	ifp = inm->inm_ifma->ifma_ifp;
409
410	if (ifp != NULL) {
411		/*
412		 * Sanity check that netinet's notion of ifp is the
413		 * same as net's.
414		 */
415		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
416		IFF_LOCKGIANT(ifp);
417	}
418
419	IN_MULTI_LOCK();
420	in_delmulti_locked(inm);
421	IN_MULTI_UNLOCK();
422
423	if (ifp != NULL)
424		IFF_UNLOCKGIANT(ifp);
425}
426
427/*
428 * Delete a multicast address record, with locks held.
429 *
430 * It is OK to call this routine if the ifp went away.
431 * Assumes that caller holds the IN_MULTI lock, and that
432 * Giant was taken before other locks if required by the hardware.
433 */
434void
435in_delmulti_locked(struct in_multi *inm)
436{
437	struct ifmultiaddr *ifma;
438
439	IN_MULTI_LOCK_ASSERT();
440	KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
441
442	if (--inm->inm_refcount == 0) {
443		igmp_leavegroup(inm);
444
445		ifma = inm->inm_ifma;
446#ifdef DIAGNOSTIC
447		if (bootverbose)
448			printf("%s: purging ifma %p\n", __func__, ifma);
449#endif
450		KASSERT(ifma->ifma_protospec == inm,
451		    ("%s: ifma_protospec != inm", __func__));
452		ifma->ifma_protospec = NULL;
453
454		LIST_REMOVE(inm, inm_link);
455		free(inm, M_IPMADDR);
456
457		if_delmulti_ifma(ifma);
458	}
459}
460
461/*
462 * Block or unblock an ASM/SSM multicast source on an inpcb.
463 */
464static int
465inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
466{
467	INIT_VNET_NET(curvnet);
468	INIT_VNET_INET(curvnet);
469	struct group_source_req		 gsr;
470	sockunion_t			*gsa, *ssa;
471	struct ifnet			*ifp;
472	struct in_mfilter		*imf;
473	struct ip_moptions		*imo;
474	struct in_msource		*ims;
475	size_t				 idx;
476	int				 error;
477	int				 block;
478
479	ifp = NULL;
480	error = 0;
481	block = 0;
482
483	memset(&gsr, 0, sizeof(struct group_source_req));
484	gsa = (sockunion_t *)&gsr.gsr_group;
485	ssa = (sockunion_t *)&gsr.gsr_source;
486
487	switch (sopt->sopt_name) {
488	case IP_BLOCK_SOURCE:
489	case IP_UNBLOCK_SOURCE: {
490		struct ip_mreq_source	 mreqs;
491
492		error = sooptcopyin(sopt, &mreqs,
493		    sizeof(struct ip_mreq_source),
494		    sizeof(struct ip_mreq_source));
495		if (error)
496			return (error);
497
498		gsa->sin.sin_family = AF_INET;
499		gsa->sin.sin_len = sizeof(struct sockaddr_in);
500		gsa->sin.sin_addr = mreqs.imr_multiaddr;
501
502		ssa->sin.sin_family = AF_INET;
503		ssa->sin.sin_len = sizeof(struct sockaddr_in);
504		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
505
506		if (mreqs.imr_interface.s_addr != INADDR_ANY)
507			INADDR_TO_IFP(mreqs.imr_interface, ifp);
508
509		if (sopt->sopt_name == IP_BLOCK_SOURCE)
510			block = 1;
511
512#ifdef DIAGNOSTIC
513		if (bootverbose) {
514			printf("%s: imr_interface = %s, ifp = %p\n",
515			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
516		}
517#endif
518		break;
519	    }
520
521	case MCAST_BLOCK_SOURCE:
522	case MCAST_UNBLOCK_SOURCE:
523		error = sooptcopyin(sopt, &gsr,
524		    sizeof(struct group_source_req),
525		    sizeof(struct group_source_req));
526		if (error)
527			return (error);
528
529		if (gsa->sin.sin_family != AF_INET ||
530		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
531			return (EINVAL);
532
533		if (ssa->sin.sin_family != AF_INET ||
534		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
535			return (EINVAL);
536
537		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
538			return (EADDRNOTAVAIL);
539
540		ifp = ifnet_byindex(gsr.gsr_interface);
541
542		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
543			block = 1;
544		break;
545
546	default:
547#ifdef DIAGNOSTIC
548		if (bootverbose) {
549			printf("%s: unknown sopt_name %d\n", __func__,
550			    sopt->sopt_name);
551		}
552#endif
553		return (EOPNOTSUPP);
554		break;
555	}
556
557	/* XXX INET6 */
558	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
559		return (EINVAL);
560
561	/*
562	 * Check if we are actually a member of this group.
563	 */
564	imo = inp_findmoptions(inp);
565	idx = imo_match_group(imo, ifp, &gsa->sa);
566	if (idx == -1 || imo->imo_mfilters == NULL) {
567		error = EADDRNOTAVAIL;
568		goto out_locked;
569	}
570
571	KASSERT(imo->imo_mfilters != NULL,
572	    ("%s: imo_mfilters not allocated", __func__));
573	imf = &imo->imo_mfilters[idx];
574
575	/*
576	 * SSM multicast truth table for block/unblock operations.
577	 *
578	 * Operation   Filter Mode  Entry exists?   Action
579	 *
580	 * block       exclude      no              add source to filter
581	 * unblock     include      no              add source to filter
582	 * block       include      no              EINVAL
583	 * unblock     exclude      no              EINVAL
584	 * block       exclude      yes             EADDRNOTAVAIL
585	 * unblock     include      yes             EADDRNOTAVAIL
586	 * block       include      yes             remove source from filter
587	 * unblock     exclude      yes             remove source from filter
588	 *
589	 * FreeBSD does not explicitly distinguish between ASM and SSM
590	 * mode sockets; all sockets are assumed to have a filter list.
591	 */
592#ifdef DIAGNOSTIC
593	if (bootverbose) {
594		printf("%s: imf_fmode is %s\n", __func__,
595		    imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
596	}
597#endif
598	ims = imo_match_source(imo, idx, &ssa->sa);
599	if (ims == NULL) {
600		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
601		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
602#ifdef DIAGNOSTIC
603			if (bootverbose) {
604				printf("%s: adding %s to filter list\n",
605				    __func__, inet_ntoa(ssa->sin.sin_addr));
606			}
607#endif
608			error = imo_join_source(imo, idx, ssa);
609		}
610		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
611		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
612			/*
613			 * If the socket is in inclusive mode:
614			 *  the source is already blocked as it has no entry.
615			 * If the socket is in exclusive mode:
616			 *  the source is already unblocked as it has no entry.
617			 */
618#ifdef DIAGNOSTIC
619			if (bootverbose) {
620				printf("%s: ims %p; %s already [un]blocked\n",
621				    __func__, ims,
622				    inet_ntoa(ssa->sin.sin_addr));
623			}
624#endif
625			error = EINVAL;
626		}
627	} else {
628		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
629		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
630			/*
631			 * If the socket is in exclusive mode:
632			 *  the source is already blocked as it has an entry.
633			 * If the socket is in inclusive mode:
634			 *  the source is already unblocked as it has an entry.
635			 */
636#ifdef DIAGNOSTIC
637			if (bootverbose) {
638				printf("%s: ims %p; %s already [un]blocked\n",
639				    __func__, ims,
640				    inet_ntoa(ssa->sin.sin_addr));
641			}
642#endif
643			error = EADDRNOTAVAIL;
644		}
645		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
646		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
647#ifdef DIAGNOSTIC
648			if (bootverbose) {
649				printf("%s: removing %s from filter list\n",
650				    __func__, inet_ntoa(ssa->sin.sin_addr));
651			}
652#endif
653			error = imo_leave_source(imo, idx, ssa);
654		}
655	}
656
657out_locked:
658	INP_WUNLOCK(inp);
659	return (error);
660}
661
662/*
663 * Given an inpcb, return its multicast options structure pointer.  Accepts
664 * an unlocked inpcb pointer, but will return it locked.  May sleep.
665 */
666static struct ip_moptions *
667inp_findmoptions(struct inpcb *inp)
668{
669	struct ip_moptions	 *imo;
670	struct in_multi		**immp;
671	struct in_mfilter	 *imfp;
672	size_t			  idx;
673
674	INP_WLOCK(inp);
675	if (inp->inp_moptions != NULL)
676		return (inp->inp_moptions);
677
678	INP_WUNLOCK(inp);
679
680	imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
681	    M_WAITOK);
682	immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
683	    M_IPMOPTS, M_WAITOK | M_ZERO);
684	imfp = (struct in_mfilter *)malloc(
685	    sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
686	    M_IPMSOURCE, M_WAITOK);
687
688	imo->imo_multicast_ifp = NULL;
689	imo->imo_multicast_addr.s_addr = INADDR_ANY;
690	imo->imo_multicast_vif = -1;
691	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
692	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
693	imo->imo_num_memberships = 0;
694	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
695	imo->imo_membership = immp;
696
697	/* Initialize per-group source filters. */
698	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
699		imfp[idx].imf_fmode = MCAST_EXCLUDE;
700		imfp[idx].imf_nsources = 0;
701		TAILQ_INIT(&imfp[idx].imf_sources);
702	}
703	imo->imo_mfilters = imfp;
704
705	INP_WLOCK(inp);
706	if (inp->inp_moptions != NULL) {
707		free(imfp, M_IPMSOURCE);
708		free(immp, M_IPMOPTS);
709		free(imo, M_IPMOPTS);
710		return (inp->inp_moptions);
711	}
712	inp->inp_moptions = imo;
713	return (imo);
714}
715
716/*
717 * Discard the IP multicast options (and source filters).
718 */
719void
720inp_freemoptions(struct ip_moptions *imo)
721{
722	struct in_mfilter	*imf;
723	struct in_msource	*ims, *tims;
724	size_t			 idx, nmships;
725
726	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
727
728	nmships = imo->imo_num_memberships;
729	for (idx = 0; idx < nmships; ++idx) {
730		in_delmulti(imo->imo_membership[idx]);
731
732		if (imo->imo_mfilters != NULL) {
733			imf = &imo->imo_mfilters[idx];
734			TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
735			    ims_next, tims) {
736				TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
737				free(ims, M_IPMSOURCE);
738				imf->imf_nsources--;
739			}
740			KASSERT(imf->imf_nsources == 0,
741			    ("%s: did not free all imf_nsources", __func__));
742		}
743	}
744
745	if (imo->imo_mfilters != NULL)
746		free(imo->imo_mfilters, M_IPMSOURCE);
747	free(imo->imo_membership, M_IPMOPTS);
748	free(imo, M_IPMOPTS);
749}
750
751/*
752 * Atomically get source filters on a socket for an IPv4 multicast group.
753 * Called with INP lock held; returns with lock released.
754 */
755static int
756inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
757{
758	INIT_VNET_NET(curvnet);
759	struct __msfilterreq	 msfr;
760	sockunion_t		*gsa;
761	struct ifnet		*ifp;
762	struct ip_moptions	*imo;
763	struct in_mfilter	*imf;
764	struct in_msource	*ims;
765	struct sockaddr_storage	*ptss;
766	struct sockaddr_storage	*tss;
767	int			 error;
768	size_t			 idx;
769
770	INP_WLOCK_ASSERT(inp);
771
772	imo = inp->inp_moptions;
773	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
774
775	INP_WUNLOCK(inp);
776
777	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
778	    sizeof(struct __msfilterreq));
779	if (error)
780		return (error);
781
782	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
783		return (EINVAL);
784
785	ifp = ifnet_byindex(msfr.msfr_ifindex);
786	if (ifp == NULL)
787		return (EINVAL);
788
789	INP_WLOCK(inp);
790
791	/*
792	 * Lookup group on the socket.
793	 */
794	gsa = (sockunion_t *)&msfr.msfr_group;
795	idx = imo_match_group(imo, ifp, &gsa->sa);
796	if (idx == -1 || imo->imo_mfilters == NULL) {
797		INP_WUNLOCK(inp);
798		return (EADDRNOTAVAIL);
799	}
800
801	imf = &imo->imo_mfilters[idx];
802	msfr.msfr_fmode = imf->imf_fmode;
803	msfr.msfr_nsrcs = imf->imf_nsources;
804
805	/*
806	 * If the user specified a buffer, copy out the source filter
807	 * entries to userland gracefully.
808	 * msfr.msfr_nsrcs is always set to the total number of filter
809	 * entries which the kernel currently has for this group.
810	 */
811	tss = NULL;
812	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
813		/*
814		 * Make a copy of the source vector so that we do not
815		 * thrash the inpcb lock whilst copying it out.
816		 * We only copy out the number of entries which userland
817		 * has asked for, but we always tell userland how big the
818		 * buffer really needs to be.
819		 */
820		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
821		    M_TEMP, M_NOWAIT);
822		if (tss == NULL) {
823			error = ENOBUFS;
824		} else {
825			ptss = tss;
826			TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
827				memcpy(ptss++, &ims->ims_addr,
828				    sizeof(struct sockaddr_storage));
829			}
830		}
831	}
832
833	INP_WUNLOCK(inp);
834
835	if (tss != NULL) {
836		error = copyout(tss, msfr.msfr_srcs,
837		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
838		free(tss, M_TEMP);
839	}
840
841	if (error)
842		return (error);
843
844	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
845
846	return (error);
847}
848
849/*
850 * Return the IP multicast options in response to user getsockopt().
851 */
852int
853inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
854{
855	INIT_VNET_INET(curvnet);
856	struct ip_mreqn		 mreqn;
857	struct ip_moptions	*imo;
858	struct ifnet		*ifp;
859	struct in_ifaddr	*ia;
860	int			 error, optval;
861	u_char			 coptval;
862
863	INP_WLOCK(inp);
864	imo = inp->inp_moptions;
865	/*
866	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
867	 * or is a divert socket, reject it.
868	 */
869	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
870	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
871	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
872		INP_WUNLOCK(inp);
873		return (EOPNOTSUPP);
874	}
875
876	error = 0;
877	switch (sopt->sopt_name) {
878	case IP_MULTICAST_VIF:
879		if (imo != NULL)
880			optval = imo->imo_multicast_vif;
881		else
882			optval = -1;
883		INP_WUNLOCK(inp);
884		error = sooptcopyout(sopt, &optval, sizeof(int));
885		break;
886
887	case IP_MULTICAST_IF:
888		memset(&mreqn, 0, sizeof(struct ip_mreqn));
889		if (imo != NULL) {
890			ifp = imo->imo_multicast_ifp;
891			if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
892				mreqn.imr_address = imo->imo_multicast_addr;
893			} else if (ifp != NULL) {
894				mreqn.imr_ifindex = ifp->if_index;
895				IFP_TO_IA(ifp, ia);
896				if (ia != NULL) {
897					mreqn.imr_address =
898					    IA_SIN(ia)->sin_addr;
899				}
900			}
901		}
902		INP_WUNLOCK(inp);
903		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
904			error = sooptcopyout(sopt, &mreqn,
905			    sizeof(struct ip_mreqn));
906		} else {
907			error = sooptcopyout(sopt, &mreqn.imr_address,
908			    sizeof(struct in_addr));
909		}
910		break;
911
912	case IP_MULTICAST_TTL:
913		if (imo == 0)
914			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
915		else
916			optval = coptval = imo->imo_multicast_ttl;
917		INP_WUNLOCK(inp);
918		if (sopt->sopt_valsize == sizeof(u_char))
919			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
920		else
921			error = sooptcopyout(sopt, &optval, sizeof(int));
922		break;
923
924	case IP_MULTICAST_LOOP:
925		if (imo == 0)
926			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
927		else
928			optval = coptval = imo->imo_multicast_loop;
929		INP_WUNLOCK(inp);
930		if (sopt->sopt_valsize == sizeof(u_char))
931			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
932		else
933			error = sooptcopyout(sopt, &optval, sizeof(int));
934		break;
935
936	case IP_MSFILTER:
937		if (imo == NULL) {
938			error = EADDRNOTAVAIL;
939			INP_WUNLOCK(inp);
940		} else {
941			error = inp_get_source_filters(inp, sopt);
942		}
943		break;
944
945	default:
946		INP_WUNLOCK(inp);
947		error = ENOPROTOOPT;
948		break;
949	}
950
951	INP_UNLOCK_ASSERT(inp);
952
953	return (error);
954}
955
956/*
957 * Join an IPv4 multicast group, possibly with a source.
958 */
959static int
960inp_join_group(struct inpcb *inp, struct sockopt *sopt)
961{
962	INIT_VNET_NET(curvnet);
963	INIT_VNET_INET(curvnet);
964	struct group_source_req		 gsr;
965	sockunion_t			*gsa, *ssa;
966	struct ifnet			*ifp;
967	struct in_mfilter		*imf;
968	struct ip_moptions		*imo;
969	struct in_multi			*inm;
970	size_t				 idx;
971	int				 error;
972
973	ifp = NULL;
974	error = 0;
975
976	memset(&gsr, 0, sizeof(struct group_source_req));
977	gsa = (sockunion_t *)&gsr.gsr_group;
978	gsa->ss.ss_family = AF_UNSPEC;
979	ssa = (sockunion_t *)&gsr.gsr_source;
980	ssa->ss.ss_family = AF_UNSPEC;
981
982	switch (sopt->sopt_name) {
983	case IP_ADD_MEMBERSHIP:
984	case IP_ADD_SOURCE_MEMBERSHIP: {
985		struct ip_mreq_source	 mreqs;
986
987		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
988			error = sooptcopyin(sopt, &mreqs,
989			    sizeof(struct ip_mreq),
990			    sizeof(struct ip_mreq));
991			/*
992			 * Do argument switcharoo from ip_mreq into
993			 * ip_mreq_source to avoid using two instances.
994			 */
995			mreqs.imr_interface = mreqs.imr_sourceaddr;
996			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
997		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
998			error = sooptcopyin(sopt, &mreqs,
999			    sizeof(struct ip_mreq_source),
1000			    sizeof(struct ip_mreq_source));
1001		}
1002		if (error)
1003			return (error);
1004
1005		gsa->sin.sin_family = AF_INET;
1006		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1007		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1008
1009		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1010			ssa->sin.sin_family = AF_INET;
1011			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1012			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1013		}
1014
1015		/*
1016		 * Obtain ifp. If no interface address was provided,
1017		 * use the interface of the route in the unicast FIB for
1018		 * the given multicast destination; usually, this is the
1019		 * default route.
1020		 * If this lookup fails, attempt to use the first non-loopback
1021		 * interface with multicast capability in the system as a
1022		 * last resort. The legacy IPv4 ASM API requires that we do
1023		 * this in order to allow groups to be joined when the routing
1024		 * table has not yet been populated during boot.
1025		 * If all of these conditions fail, return EADDRNOTAVAIL, and
1026		 * reject the IPv4 multicast join.
1027		 */
1028		if (mreqs.imr_interface.s_addr != INADDR_ANY) {
1029			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1030		} else {
1031			struct route ro;
1032
1033			ro.ro_rt = NULL;
1034			*(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
1035			in_rtalloc_ign(&ro, RTF_CLONING,
1036			   inp->inp_inc.inc_fibnum);
1037			if (ro.ro_rt != NULL) {
1038				ifp = ro.ro_rt->rt_ifp;
1039				KASSERT(ifp != NULL, ("%s: null ifp",
1040				    __func__));
1041				RTFREE(ro.ro_rt);
1042			} else {
1043				struct in_ifaddr *ia;
1044				struct ifnet *mfp = NULL;
1045				TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1046					mfp = ia->ia_ifp;
1047					if (!(mfp->if_flags & IFF_LOOPBACK) &&
1048					     (mfp->if_flags & IFF_MULTICAST)) {
1049						ifp = mfp;
1050						break;
1051					}
1052				}
1053			}
1054		}
1055#ifdef DIAGNOSTIC
1056		if (bootverbose) {
1057			printf("%s: imr_interface = %s, ifp = %p\n",
1058			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1059		}
1060#endif
1061		break;
1062	}
1063
1064	case MCAST_JOIN_GROUP:
1065	case MCAST_JOIN_SOURCE_GROUP:
1066		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1067			error = sooptcopyin(sopt, &gsr,
1068			    sizeof(struct group_req),
1069			    sizeof(struct group_req));
1070		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1071			error = sooptcopyin(sopt, &gsr,
1072			    sizeof(struct group_source_req),
1073			    sizeof(struct group_source_req));
1074		}
1075		if (error)
1076			return (error);
1077
1078		if (gsa->sin.sin_family != AF_INET ||
1079		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1080			return (EINVAL);
1081
1082		/*
1083		 * Overwrite the port field if present, as the sockaddr
1084		 * being copied in may be matched with a binary comparison.
1085		 * XXX INET6
1086		 */
1087		gsa->sin.sin_port = 0;
1088		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1089			if (ssa->sin.sin_family != AF_INET ||
1090			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1091				return (EINVAL);
1092			ssa->sin.sin_port = 0;
1093		}
1094
1095		/*
1096		 * Obtain the ifp.
1097		 */
1098		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1099			return (EADDRNOTAVAIL);
1100		ifp = ifnet_byindex(gsr.gsr_interface);
1101
1102		break;
1103
1104	default:
1105#ifdef DIAGNOSTIC
1106		if (bootverbose) {
1107			printf("%s: unknown sopt_name %d\n", __func__,
1108			    sopt->sopt_name);
1109		}
1110#endif
1111		return (EOPNOTSUPP);
1112		break;
1113	}
1114
1115	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1116		return (EINVAL);
1117
1118	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1119		return (EADDRNOTAVAIL);
1120
1121	/*
1122	 * Check if we already hold membership of this group for this inpcb.
1123	 * If so, we do not need to perform the initial join.
1124	 */
1125	imo = inp_findmoptions(inp);
1126	idx = imo_match_group(imo, ifp, &gsa->sa);
1127	if (idx != -1) {
1128		if (ssa->ss.ss_family != AF_UNSPEC) {
1129			/*
1130			 * Attempting to join an ASM group (when already
1131			 * an ASM or SSM member) is an error.
1132			 */
1133			error = EADDRNOTAVAIL;
1134		} else {
1135			imf = &imo->imo_mfilters[idx];
1136			if (imf->imf_nsources == 0) {
1137				/*
1138				 * Attempting to join an SSM group (when
1139				 * already an ASM member) is an error.
1140				 */
1141				error = EINVAL;
1142			} else {
1143				/*
1144				 * Attempting to join an SSM group (when
1145				 * already an SSM member) means "add this
1146				 * source to the inclusive filter list".
1147				 */
1148				error = imo_join_source(imo, idx, ssa);
1149			}
1150		}
1151		goto out_locked;
1152	}
1153
1154	/*
1155	 * Call imo_grow() to reallocate the membership and source filter
1156	 * vectors if they are full. If the size would exceed the hard limit,
1157	 * then we know we've really run out of entries. We keep the INP
1158	 * lock held to avoid introducing a race condition.
1159	 */
1160	if (imo->imo_num_memberships == imo->imo_max_memberships) {
1161		error = imo_grow(imo);
1162		if (error)
1163			goto out_locked;
1164	}
1165
1166	/*
1167	 * So far, so good: perform the layer 3 join, layer 2 join,
1168	 * and make an IGMP announcement if needed.
1169	 */
1170	inm = in_addmulti(&gsa->sin.sin_addr, ifp);
1171	if (inm == NULL) {
1172		error = ENOBUFS;
1173		goto out_locked;
1174	}
1175	idx = imo->imo_num_memberships;
1176	imo->imo_membership[idx] = inm;
1177	imo->imo_num_memberships++;
1178
1179	KASSERT(imo->imo_mfilters != NULL,
1180	    ("%s: imf_mfilters vector was not allocated", __func__));
1181	imf = &imo->imo_mfilters[idx];
1182	KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1183	    ("%s: imf_sources not empty", __func__));
1184
1185	/*
1186	 * If this is a new SSM group join (i.e. a source was specified
1187	 * with this group), add this source to the filter list.
1188	 */
1189	if (ssa->ss.ss_family != AF_UNSPEC) {
1190		/*
1191		 * An initial SSM join implies that this socket's membership
1192		 * of the multicast group is now in inclusive mode.
1193		 */
1194		imf->imf_fmode = MCAST_INCLUDE;
1195
1196		error = imo_join_source(imo, idx, ssa);
1197		if (error) {
1198			/*
1199			 * Drop inp lock before calling in_delmulti(),
1200			 * to prevent a lock order reversal.
1201			 */
1202			--imo->imo_num_memberships;
1203			INP_WUNLOCK(inp);
1204			in_delmulti(inm);
1205			return (error);
1206		}
1207	}
1208
1209out_locked:
1210	INP_WUNLOCK(inp);
1211	return (error);
1212}
1213
1214/*
1215 * Leave an IPv4 multicast group on an inpcb, possibly with a source.
1216 */
1217static int
1218inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
1219{
1220	INIT_VNET_NET(curvnet);
1221	INIT_VNET_INET(curvnet);
1222	struct group_source_req		 gsr;
1223	struct ip_mreq_source		 mreqs;
1224	sockunion_t			*gsa, *ssa;
1225	struct ifnet			*ifp;
1226	struct in_mfilter		*imf;
1227	struct ip_moptions		*imo;
1228	struct in_msource		*ims, *tims;
1229	struct in_multi			*inm;
1230	size_t				 idx;
1231	int				 error;
1232
1233	ifp = NULL;
1234	error = 0;
1235
1236	memset(&gsr, 0, sizeof(struct group_source_req));
1237	gsa = (sockunion_t *)&gsr.gsr_group;
1238	gsa->ss.ss_family = AF_UNSPEC;
1239	ssa = (sockunion_t *)&gsr.gsr_source;
1240	ssa->ss.ss_family = AF_UNSPEC;
1241
1242	switch (sopt->sopt_name) {
1243	case IP_DROP_MEMBERSHIP:
1244	case IP_DROP_SOURCE_MEMBERSHIP:
1245		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
1246			error = sooptcopyin(sopt, &mreqs,
1247			    sizeof(struct ip_mreq),
1248			    sizeof(struct ip_mreq));
1249			/*
1250			 * Swap interface and sourceaddr arguments,
1251			 * as ip_mreq and ip_mreq_source are laid
1252			 * out differently.
1253			 */
1254			mreqs.imr_interface = mreqs.imr_sourceaddr;
1255			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1256		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1257			error = sooptcopyin(sopt, &mreqs,
1258			    sizeof(struct ip_mreq_source),
1259			    sizeof(struct ip_mreq_source));
1260		}
1261		if (error)
1262			return (error);
1263
1264		gsa->sin.sin_family = AF_INET;
1265		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1266		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1267
1268		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1269			ssa->sin.sin_family = AF_INET;
1270			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1271			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1272		}
1273
1274		if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
1275			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1276
1277#ifdef DIAGNOSTIC
1278		if (bootverbose) {
1279			printf("%s: imr_interface = %s, ifp = %p\n",
1280			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1281		}
1282#endif
1283		break;
1284
1285	case MCAST_LEAVE_GROUP:
1286	case MCAST_LEAVE_SOURCE_GROUP:
1287		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
1288			error = sooptcopyin(sopt, &gsr,
1289			    sizeof(struct group_req),
1290			    sizeof(struct group_req));
1291		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1292			error = sooptcopyin(sopt, &gsr,
1293			    sizeof(struct group_source_req),
1294			    sizeof(struct group_source_req));
1295		}
1296		if (error)
1297			return (error);
1298
1299		if (gsa->sin.sin_family != AF_INET ||
1300		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1301			return (EINVAL);
1302
1303		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1304			if (ssa->sin.sin_family != AF_INET ||
1305			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1306				return (EINVAL);
1307		}
1308
1309		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1310			return (EADDRNOTAVAIL);
1311
1312		ifp = ifnet_byindex(gsr.gsr_interface);
1313		break;
1314
1315	default:
1316#ifdef DIAGNOSTIC
1317		if (bootverbose) {
1318			printf("%s: unknown sopt_name %d\n", __func__,
1319			    sopt->sopt_name);
1320		}
1321#endif
1322		return (EOPNOTSUPP);
1323		break;
1324	}
1325
1326	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1327		return (EINVAL);
1328
1329	/*
1330	 * Find the membership in the membership array.
1331	 */
1332	imo = inp_findmoptions(inp);
1333	idx = imo_match_group(imo, ifp, &gsa->sa);
1334	if (idx == -1) {
1335		error = EADDRNOTAVAIL;
1336		goto out_locked;
1337	}
1338	imf = &imo->imo_mfilters[idx];
1339
1340	/*
1341	 * If we were instructed only to leave a given source, do so.
1342	 */
1343	if (ssa->ss.ss_family != AF_UNSPEC) {
1344		if (imf->imf_nsources == 0 ||
1345		    imf->imf_fmode == MCAST_EXCLUDE) {
1346			/*
1347			 * Attempting to SSM leave an ASM group
1348			 * is an error; should use *_BLOCK_SOURCE instead.
1349			 * Attempting to SSM leave a source in a group when
1350			 * the socket is in 'exclude mode' is also an error.
1351			 */
1352			error = EINVAL;
1353		} else {
1354			error = imo_leave_source(imo, idx, ssa);
1355		}
1356		/*
1357		 * If an error occurred, or this source is not the last
1358		 * source in the group, do not leave the whole group.
1359		 */
1360		if (error || imf->imf_nsources > 0)
1361			goto out_locked;
1362	}
1363
1364	/*
1365	 * Give up the multicast address record to which the membership points.
1366	 */
1367	inm = imo->imo_membership[idx];
1368	in_delmulti(inm);
1369
1370	/*
1371	 * Free any source filters for this group if they exist.
1372	 * Revert inpcb to the default MCAST_EXCLUDE state.
1373	 */
1374	if (imo->imo_mfilters != NULL) {
1375		TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1376			TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1377			free(ims, M_IPMSOURCE);
1378			imf->imf_nsources--;
1379		}
1380		KASSERT(imf->imf_nsources == 0,
1381		    ("%s: imf_nsources not 0", __func__));
1382		KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1383		    ("%s: imf_sources not empty", __func__));
1384		imf->imf_fmode = MCAST_EXCLUDE;
1385	}
1386
1387	/*
1388	 * Remove the gap in the membership array.
1389	 */
1390	for (++idx; idx < imo->imo_num_memberships; ++idx)
1391		imo->imo_membership[idx-1] = imo->imo_membership[idx];
1392	imo->imo_num_memberships--;
1393
1394out_locked:
1395	INP_WUNLOCK(inp);
1396	return (error);
1397}
1398
1399/*
1400 * Select the interface for transmitting IPv4 multicast datagrams.
1401 *
1402 * Either an instance of struct in_addr or an instance of struct ip_mreqn
1403 * may be passed to this socket option. An address of INADDR_ANY or an
1404 * interface index of 0 is used to remove a previous selection.
1405 * When no interface is selected, one is chosen for every send.
1406 */
1407static int
1408inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
1409{
1410	INIT_VNET_NET(curvnet);
1411	struct in_addr		 addr;
1412	struct ip_mreqn		 mreqn;
1413	struct ifnet		*ifp;
1414	struct ip_moptions	*imo;
1415	int			 error;
1416
1417	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1418		/*
1419		 * An interface index was specified using the
1420		 * Linux-derived ip_mreqn structure.
1421		 */
1422		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
1423		    sizeof(struct ip_mreqn));
1424		if (error)
1425			return (error);
1426
1427		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
1428			return (EINVAL);
1429
1430		if (mreqn.imr_ifindex == 0) {
1431			ifp = NULL;
1432		} else {
1433			ifp = ifnet_byindex(mreqn.imr_ifindex);
1434			if (ifp == NULL)
1435				return (EADDRNOTAVAIL);
1436		}
1437	} else {
1438		/*
1439		 * An interface was specified by IPv4 address.
1440		 * This is the traditional BSD usage.
1441		 */
1442		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
1443		    sizeof(struct in_addr));
1444		if (error)
1445			return (error);
1446		if (addr.s_addr == INADDR_ANY) {
1447			ifp = NULL;
1448		} else {
1449			INADDR_TO_IFP(addr, ifp);
1450			if (ifp == NULL)
1451				return (EADDRNOTAVAIL);
1452		}
1453#ifdef DIAGNOSTIC
1454		if (bootverbose) {
1455			printf("%s: ifp = %p, addr = %s\n",
1456			    __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
1457		}
1458#endif
1459	}
1460
1461	/* Reject interfaces which do not support multicast. */
1462	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
1463		return (EOPNOTSUPP);
1464
1465	imo = inp_findmoptions(inp);
1466	imo->imo_multicast_ifp = ifp;
1467	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1468	INP_WUNLOCK(inp);
1469
1470	return (0);
1471}
1472
1473/*
1474 * Atomically set source filters on a socket for an IPv4 multicast group.
1475 */
1476static int
1477inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
1478{
1479	INIT_VNET_NET(curvnet);
1480	struct __msfilterreq	 msfr;
1481	sockunion_t		*gsa;
1482	struct ifnet		*ifp;
1483	struct in_mfilter	*imf;
1484	struct ip_moptions	*imo;
1485	struct in_msource	*ims, *tims;
1486	size_t			 idx;
1487	int			 error;
1488
1489	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1490	    sizeof(struct __msfilterreq));
1491	if (error)
1492		return (error);
1493
1494	if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
1495	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
1496	     msfr.msfr_fmode != MCAST_INCLUDE))
1497		return (EINVAL);
1498
1499	if (msfr.msfr_group.ss_family != AF_INET ||
1500	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
1501		return (EINVAL);
1502
1503	gsa = (sockunion_t *)&msfr.msfr_group;
1504	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1505		return (EINVAL);
1506
1507	gsa->sin.sin_port = 0;	/* ignore port */
1508
1509	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1510		return (EADDRNOTAVAIL);
1511
1512	ifp = ifnet_byindex(msfr.msfr_ifindex);
1513	if (ifp == NULL)
1514		return (EADDRNOTAVAIL);
1515
1516	/*
1517	 * Take the INP lock.
1518	 * Check if this socket is a member of this group.
1519	 */
1520	imo = inp_findmoptions(inp);
1521	idx = imo_match_group(imo, ifp, &gsa->sa);
1522	if (idx == -1 || imo->imo_mfilters == NULL) {
1523		error = EADDRNOTAVAIL;
1524		goto out_locked;
1525	}
1526	imf = &imo->imo_mfilters[idx];
1527
1528#ifdef DIAGNOSTIC
1529	if (bootverbose)
1530		printf("%s: clearing source list\n", __func__);
1531#endif
1532
1533	/*
1534	 * Remove any existing source filters.
1535	 */
1536	TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1537		TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1538		free(ims, M_IPMSOURCE);
1539		imf->imf_nsources--;
1540	}
1541	KASSERT(imf->imf_nsources == 0,
1542	    ("%s: source list not cleared", __func__));
1543
1544	/*
1545	 * Apply any new source filters, if present.
1546	 */
1547	if (msfr.msfr_nsrcs > 0) {
1548		struct in_msource	**pnims;
1549		struct in_msource	*nims;
1550		struct sockaddr_storage	*kss;
1551		struct sockaddr_storage	*pkss;
1552		sockunion_t		*psu;
1553		int			 i, j;
1554
1555		/*
1556		 * Drop the inp lock so we may sleep if we need to
1557		 * in order to satisfy a malloc request.
1558		 * We will re-take it before changing socket state.
1559		 */
1560		INP_WUNLOCK(inp);
1561#ifdef DIAGNOSTIC
1562		if (bootverbose) {
1563			printf("%s: loading %lu source list entries\n",
1564			    __func__, (unsigned long)msfr.msfr_nsrcs);
1565		}
1566#endif
1567		/*
1568		 * Make a copy of the user-space source vector so
1569		 * that we may copy them with a single copyin. This
1570		 * allows us to deal with page faults up-front.
1571		 */
1572		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1573		    M_TEMP, M_WAITOK);
1574		error = copyin(msfr.msfr_srcs, kss,
1575		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1576		if (error) {
1577			free(kss, M_TEMP);
1578			return (error);
1579		}
1580
1581		/*
1582		 * Perform argument checking on every sockaddr_storage
1583		 * structure in the vector provided to us. Overwrite
1584		 * fields which should not apply to source entries.
1585		 * TODO: Check for duplicate sources on this pass.
1586		 */
1587		psu = (sockunion_t *)kss;
1588		for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
1589			switch (psu->ss.ss_family) {
1590			case AF_INET:
1591				if (psu->sin.sin_len !=
1592				    sizeof(struct sockaddr_in)) {
1593					error = EINVAL;
1594				} else {
1595					psu->sin.sin_port = 0;
1596				}
1597				break;
1598#ifdef notyet
1599			case AF_INET6;
1600				if (psu->sin6.sin6_len !=
1601				    sizeof(struct sockaddr_in6)) {
1602					error = EINVAL;
1603				} else {
1604					psu->sin6.sin6_port = 0;
1605					psu->sin6.sin6_flowinfo = 0;
1606				}
1607				break;
1608#endif
1609			default:
1610				error = EAFNOSUPPORT;
1611				break;
1612			}
1613			if (error)
1614				break;
1615		}
1616		if (error) {
1617			free(kss, M_TEMP);
1618			return (error);
1619		}
1620
1621		/*
1622		 * Allocate a block to track all the in_msource
1623		 * entries we are about to allocate, in case we
1624		 * abruptly need to free them.
1625		 */
1626		pnims = malloc(sizeof(struct in_msource *) * msfr.msfr_nsrcs,
1627		    M_TEMP, M_WAITOK | M_ZERO);
1628
1629		/*
1630		 * Allocate up to nsrcs individual chunks.
1631		 * If we encounter an error, backtrack out of
1632		 * all allocations cleanly; updates must be atomic.
1633		 */
1634		pkss = kss;
1635		nims = NULL;
1636		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1637			nims = malloc(sizeof(struct in_msource) *
1638			    msfr.msfr_nsrcs, M_IPMSOURCE, M_WAITOK | M_ZERO);
1639			pnims[i] = nims;
1640		}
1641		if (i < msfr.msfr_nsrcs) {
1642			for (j = 0; j < i; j++) {
1643				if (pnims[j] != NULL)
1644					free(pnims[j], M_IPMSOURCE);
1645			}
1646			free(pnims, M_TEMP);
1647			free(kss, M_TEMP);
1648			return (ENOBUFS);
1649		}
1650
1651		INP_UNLOCK_ASSERT(inp);
1652
1653		/*
1654		 * Finally, apply the filters to the socket.
1655		 * Re-take the inp lock; we are changing socket state.
1656		 */
1657		pkss = kss;
1658		INP_WLOCK(inp);
1659		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1660			memcpy(&(pnims[i]->ims_addr), pkss,
1661			    sizeof(struct sockaddr_storage));
1662			TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
1663			    ims_next);
1664			imf->imf_nsources++;
1665		}
1666		free(pnims, M_TEMP);
1667		free(kss, M_TEMP);
1668	}
1669
1670	/*
1671	 * Update the filter mode on the socket before releasing the inpcb.
1672	 */
1673	INP_WLOCK_ASSERT(inp);
1674	imf->imf_fmode = msfr.msfr_fmode;
1675
1676out_locked:
1677	INP_WUNLOCK(inp);
1678	return (error);
1679}
1680
1681/*
1682 * Set the IP multicast options in response to user setsockopt().
1683 *
1684 * Many of the socket options handled in this function duplicate the
1685 * functionality of socket options in the regular unicast API. However,
1686 * it is not possible to merge the duplicate code, because the idempotence
1687 * of the IPv4 multicast part of the BSD Sockets API must be preserved;
1688 * the effects of these options must be treated as separate and distinct.
1689 */
1690int
1691inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1692{
1693	struct ip_moptions	*imo;
1694	int			 error;
1695
1696	error = 0;
1697
1698	/*
1699	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1700	 * or is a divert socket, reject it.
1701	 * XXX Unlocked read of inp_socket believed OK.
1702	 */
1703	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1704	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1705	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
1706		return (EOPNOTSUPP);
1707
1708	switch (sopt->sopt_name) {
1709	case IP_MULTICAST_VIF: {
1710		int vifi;
1711		/*
1712		 * Select a multicast VIF for transmission.
1713		 * Only useful if multicast forwarding is active.
1714		 */
1715		if (legal_vif_num == NULL) {
1716			error = EOPNOTSUPP;
1717			break;
1718		}
1719		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
1720		if (error)
1721			break;
1722		if (!legal_vif_num(vifi) && (vifi != -1)) {
1723			error = EINVAL;
1724			break;
1725		}
1726		imo = inp_findmoptions(inp);
1727		imo->imo_multicast_vif = vifi;
1728		INP_WUNLOCK(inp);
1729		break;
1730	}
1731
1732	case IP_MULTICAST_IF:
1733		error = inp_set_multicast_if(inp, sopt);
1734		break;
1735
1736	case IP_MULTICAST_TTL: {
1737		u_char ttl;
1738
1739		/*
1740		 * Set the IP time-to-live for outgoing multicast packets.
1741		 * The original multicast API required a char argument,
1742		 * which is inconsistent with the rest of the socket API.
1743		 * We allow either a char or an int.
1744		 */
1745		if (sopt->sopt_valsize == sizeof(u_char)) {
1746			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
1747			    sizeof(u_char));
1748			if (error)
1749				break;
1750		} else {
1751			u_int ittl;
1752
1753			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
1754			    sizeof(u_int));
1755			if (error)
1756				break;
1757			if (ittl > 255) {
1758				error = EINVAL;
1759				break;
1760			}
1761			ttl = (u_char)ittl;
1762		}
1763		imo = inp_findmoptions(inp);
1764		imo->imo_multicast_ttl = ttl;
1765		INP_WUNLOCK(inp);
1766		break;
1767	}
1768
1769	case IP_MULTICAST_LOOP: {
1770		u_char loop;
1771
1772		/*
1773		 * Set the loopback flag for outgoing multicast packets.
1774		 * Must be zero or one.  The original multicast API required a
1775		 * char argument, which is inconsistent with the rest
1776		 * of the socket API.  We allow either a char or an int.
1777		 */
1778		if (sopt->sopt_valsize == sizeof(u_char)) {
1779			error = sooptcopyin(sopt, &loop, sizeof(u_char),
1780			    sizeof(u_char));
1781			if (error)
1782				break;
1783		} else {
1784			u_int iloop;
1785
1786			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
1787					    sizeof(u_int));
1788			if (error)
1789				break;
1790			loop = (u_char)iloop;
1791		}
1792		imo = inp_findmoptions(inp);
1793		imo->imo_multicast_loop = !!loop;
1794		INP_WUNLOCK(inp);
1795		break;
1796	}
1797
1798	case IP_ADD_MEMBERSHIP:
1799	case IP_ADD_SOURCE_MEMBERSHIP:
1800	case MCAST_JOIN_GROUP:
1801	case MCAST_JOIN_SOURCE_GROUP:
1802		error = inp_join_group(inp, sopt);
1803		break;
1804
1805	case IP_DROP_MEMBERSHIP:
1806	case IP_DROP_SOURCE_MEMBERSHIP:
1807	case MCAST_LEAVE_GROUP:
1808	case MCAST_LEAVE_SOURCE_GROUP:
1809		error = inp_leave_group(inp, sopt);
1810		break;
1811
1812	case IP_BLOCK_SOURCE:
1813	case IP_UNBLOCK_SOURCE:
1814	case MCAST_BLOCK_SOURCE:
1815	case MCAST_UNBLOCK_SOURCE:
1816		error = inp_change_source_filter(inp, sopt);
1817		break;
1818
1819	case IP_MSFILTER:
1820		error = inp_set_source_filters(inp, sopt);
1821		break;
1822
1823	default:
1824		error = EOPNOTSUPP;
1825		break;
1826	}
1827
1828	INP_UNLOCK_ASSERT(inp);
1829
1830	return (error);
1831}
1832