1/*	$NetBSD: mld6.c,v 1.101 2019/09/25 09:53:38 ozaki-r Exp $	*/
2/*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1992, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * This code is derived from software contributed to Berkeley by
38 * Stephen Deering of Stanford University.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 *    may be used to endorse or promote products derived from this software
50 *    without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
65 */
66
67/*
68 * Copyright (c) 1988 Stephen Deering.
69 *
70 * This code is derived from software contributed to Berkeley by
71 * Stephen Deering of Stanford University.
72 *
73 * Redistribution and use in source and binary forms, with or without
74 * modification, are permitted provided that the following conditions
75 * are met:
76 * 1. Redistributions of source code must retain the above copyright
77 *    notice, this list of conditions and the following disclaimer.
78 * 2. Redistributions in binary form must reproduce the above copyright
79 *    notice, this list of conditions and the following disclaimer in the
80 *    documentation and/or other materials provided with the distribution.
81 * 3. All advertising materials mentioning features or use of this software
82 *    must display the following acknowledgement:
83 *	This product includes software developed by the University of
84 *	California, Berkeley and its contributors.
85 * 4. Neither the name of the University nor the names of its contributors
86 *    may be used to endorse or promote products derived from this software
87 *    without specific prior written permission.
88 *
89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99 * SUCH DAMAGE.
100 *
101 *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
102 */
103
104#include <sys/cdefs.h>
105__KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.101 2019/09/25 09:53:38 ozaki-r Exp $");
106
107#ifdef _KERNEL_OPT
108#include "opt_inet.h"
109#include "opt_net_mpsafe.h"
110#endif
111
112#include <sys/param.h>
113#include <sys/systm.h>
114#include <sys/mbuf.h>
115#include <sys/socket.h>
116#include <sys/socketvar.h>
117#include <sys/syslog.h>
118#include <sys/sysctl.h>
119#include <sys/kernel.h>
120#include <sys/callout.h>
121#include <sys/cprng.h>
122#include <sys/rwlock.h>
123
124#include <net/if.h>
125
126#include <netinet/in.h>
127#include <netinet/in_var.h>
128#include <netinet6/in6_var.h>
129#include <netinet/ip6.h>
130#include <netinet6/ip6_var.h>
131#include <netinet6/scope6_var.h>
132#include <netinet/icmp6.h>
133#include <netinet6/icmp6_private.h>
134#include <netinet6/mld6_var.h>
135
136static krwlock_t	in6_multilock __cacheline_aligned;
137
138/*
139 * Protocol constants
140 */
141
142/*
143 * time between repetitions of a node's initial report of interest in a
144 * multicast address(in seconds)
145 */
146#define MLD_UNSOLICITED_REPORT_INTERVAL	10
147
148static struct ip6_pktopts ip6_opts;
149
150static void mld_start_listening(struct in6_multi *);
151static void mld_stop_listening(struct in6_multi *);
152
153static struct mld_hdr *mld_allocbuf(struct mbuf **, struct in6_multi *, int);
154static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
155static void mld_starttimer(struct in6_multi *);
156static void mld_stoptimer(struct in6_multi *);
157static u_long mld_timerresid(struct in6_multi *);
158
159static void in6m_ref(struct in6_multi *);
160static void in6m_unref(struct in6_multi *);
161static void in6m_destroy(struct in6_multi *);
162
163void
164mld_init(void)
165{
166	static u_int8_t hbh_buf[8];
167	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
168	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
169
170	/* ip6h_nxt will be fill in later */
171	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
172
173	/* XXX: grotty hard coding... */
174	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
175	hbh_buf[3] = 0;
176	hbh_buf[4] = IP6OPT_RTALERT;
177	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
178	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
179
180	ip6_opts.ip6po_hbh = hbh;
181	/* We will specify the hoplimit by a multicast option. */
182	ip6_opts.ip6po_hlim = -1;
183	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
184
185	rw_init(&in6_multilock);
186}
187
188static void
189mld_starttimer(struct in6_multi *in6m)
190{
191	struct timeval now;
192
193	KASSERT(rw_write_held(&in6_multilock));
194	KASSERTMSG(in6m->in6m_timer != IN6M_TIMER_UNDEF,
195	    "in6m_timer=%d", in6m->in6m_timer);
196
197	microtime(&now);
198	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
199	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
200	    (in6m->in6m_timer % hz) * (1000000 / hz);
201	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
202		in6m->in6m_timer_expire.tv_sec++;
203		in6m->in6m_timer_expire.tv_usec -= 1000000;
204	}
205
206	/* start or restart the timer */
207	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
208}
209
210/*
211 * mld_stoptimer releases in6_multilock when calling callout_halt.
212 * The caller must ensure in6m won't be freed while releasing the lock.
213 */
214static void
215mld_stoptimer(struct in6_multi *in6m)
216{
217
218	KASSERT(rw_write_held(&in6_multilock));
219
220	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
221		return;
222
223	rw_exit(&in6_multilock);
224
225	callout_halt(&in6m->in6m_timer_ch, NULL);
226
227	rw_enter(&in6_multilock, RW_WRITER);
228
229	in6m->in6m_timer = IN6M_TIMER_UNDEF;
230}
231
232static void
233mld_timeo(void *arg)
234{
235	struct in6_multi *in6m = arg;
236
237	KASSERTMSG(in6m->in6m_refcount > 0, "in6m_refcount=%d",
238	    in6m->in6m_refcount);
239
240	KERNEL_LOCK_UNLESS_NET_MPSAFE();
241	rw_enter(&in6_multilock, RW_WRITER);
242	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
243		goto out;
244
245	in6m->in6m_timer = IN6M_TIMER_UNDEF;
246
247	switch (in6m->in6m_state) {
248	case MLD_REPORTPENDING:
249		mld_start_listening(in6m);
250		break;
251	default:
252		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
253		break;
254	}
255
256out:
257	rw_exit(&in6_multilock);
258	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
259}
260
261static u_long
262mld_timerresid(struct in6_multi *in6m)
263{
264	struct timeval now, diff;
265
266	microtime(&now);
267
268	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
269	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
270	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
271		return (0);
272	}
273	diff = in6m->in6m_timer_expire;
274	diff.tv_sec -= now.tv_sec;
275	diff.tv_usec -= now.tv_usec;
276	if (diff.tv_usec < 0) {
277		diff.tv_sec--;
278		diff.tv_usec += 1000000;
279	}
280
281	/* return the remaining time in milliseconds */
282	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
283}
284
285static void
286mld_start_listening(struct in6_multi *in6m)
287{
288	struct in6_addr all_in6;
289
290	KASSERT(rw_write_held(&in6_multilock));
291
292	/*
293	 * RFC2710 page 10:
294	 * The node never sends a Report or Done for the link-scope all-nodes
295	 * address.
296	 * MLD messages are never sent for multicast addresses whose scope is 0
297	 * (reserved) or 1 (node-local).
298	 */
299	all_in6 = in6addr_linklocal_allnodes;
300	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
301		/* XXX: this should not happen! */
302		in6m->in6m_timer = 0;
303		in6m->in6m_state = MLD_OTHERLISTENER;
304	}
305	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
306	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
307		in6m->in6m_timer = IN6M_TIMER_UNDEF;
308		in6m->in6m_state = MLD_OTHERLISTENER;
309	} else {
310		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
311		in6m->in6m_timer = cprng_fast32() %
312		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
313		in6m->in6m_state = MLD_IREPORTEDLAST;
314
315		mld_starttimer(in6m);
316	}
317}
318
319static void
320mld_stop_listening(struct in6_multi *in6m)
321{
322	struct in6_addr allnode, allrouter;
323
324	KASSERT(rw_lock_held(&in6_multilock));
325
326	allnode = in6addr_linklocal_allnodes;
327	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
328		/* XXX: this should not happen! */
329		return;
330	}
331	allrouter = in6addr_linklocal_allrouters;
332	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
333		/* XXX impossible */
334		return;
335	}
336
337	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
338	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
339	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
340	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
341		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
342	}
343}
344
345void
346mld_input(struct mbuf *m, int off)
347{
348	struct ip6_hdr *ip6;
349	struct mld_hdr *mldh;
350	struct ifnet *ifp;
351	struct in6_multi *in6m = NULL;
352	struct in6_addr mld_addr, all_in6;
353	u_long timer = 0;	/* timer value in the MLD query header */
354	struct psref psref;
355
356	ifp = m_get_rcvif_psref(m, &psref);
357	if (__predict_false(ifp == NULL))
358		goto out;
359	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
360	if (mldh == NULL) {
361		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
362		goto out_nodrop;
363	}
364
365	ip6 = mtod(m, struct ip6_hdr *);
366
367	/* source address validation */
368	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
369		/*
370		 * RFC3590 allows the IPv6 unspecified address as the source
371		 * address of MLD report and done messages.  However, as this
372		 * same document says, this special rule is for snooping
373		 * switches and the RFC requires routers to discard MLD packets
374		 * with the unspecified source address.  The RFC only talks
375		 * about hosts receiving an MLD query or report in Security
376		 * Considerations, but this is probably the correct intention.
377		 * RFC3590 does not talk about other cases than link-local and
378		 * the unspecified source addresses, but we believe the same
379		 * rule should be applied.
380		 * As a result, we only allow link-local addresses as the
381		 * source address; otherwise, simply discard the packet.
382		 */
383#if 0
384		/*
385		 * XXX: do not log in an input path to avoid log flooding,
386		 * though RFC3590 says "SHOULD log" if the source of a query
387		 * is the unspecified address.
388		 */
389		char ip6bufs[INET6_ADDRSTRLEN];
390		char ip6bufm[INET6_ADDRSTRLEN];
391		log(LOG_INFO,
392		    "mld_input: src %s is not link-local (grp=%s)\n",
393		    IN6_PRINT(ip6bufs,&ip6->ip6_src),
394		    IN6_PRINT(ip6bufm, &mldh->mld_addr));
395#endif
396		goto out;
397	}
398
399	/*
400	 * make a copy for local work (in6_setscope() may modify the 1st arg)
401	 */
402	mld_addr = mldh->mld_addr;
403	if (in6_setscope(&mld_addr, ifp, NULL)) {
404		/* XXX: this should not happen! */
405		goto out;
406	}
407
408	/*
409	 * In the MLD specification, there are 3 states and a flag.
410	 *
411	 * In Non-Listener state, we simply don't have a membership record.
412	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
413	 * In Idle Listener state, our timer is not running
414	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
415	 *
416	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
417	 * we have heard a report from another member, or MLD_IREPORTEDLAST
418	 * if we sent the last report.
419	 */
420	switch (mldh->mld_type) {
421	case MLD_LISTENER_QUERY: {
422		struct in6_multi *next;
423
424		if (ifp->if_flags & IFF_LOOPBACK)
425			break;
426
427		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
428		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
429			break;	/* print error or log stat? */
430
431		all_in6 = in6addr_linklocal_allnodes;
432		if (in6_setscope(&all_in6, ifp, NULL)) {
433			/* XXX: this should not happen! */
434			break;
435		}
436
437		/*
438		 * - Start the timers in all of our membership records
439		 *   that the query applies to for the interface on
440		 *   which the query arrived excl. those that belong
441		 *   to the "all-nodes" group (ff02::1).
442		 * - Restart any timer that is already running but has
443		 *   a value longer than the requested timeout.
444		 * - Use the value specified in the query message as
445		 *   the maximum timeout.
446		 */
447		timer = ntohs(mldh->mld_maxdelay);
448
449		rw_enter(&in6_multilock, RW_WRITER);
450		/*
451		 * mld_stoptimer and mld_sendpkt release in6_multilock
452		 * temporarily, so we have to prevent in6m from being freed
453		 * while releasing the lock by having an extra reference to it.
454		 *
455		 * Also in6_purge_multi might remove items from the list of the
456		 * ifp while releasing the lock. Fortunately in6_purge_multi is
457		 * never executed as long as we have a psref of the ifp.
458		 */
459		LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
460			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
461			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
462			    IPV6_ADDR_SCOPE_LINKLOCAL)
463				continue;
464
465			if (in6m->in6m_state == MLD_REPORTPENDING)
466				continue; /* we are not yet ready */
467
468			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
469			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
470				continue;
471
472			if (timer == 0) {
473				in6m_ref(in6m);
474
475				/* send a report immediately */
476				mld_stoptimer(in6m);
477				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
478				in6m->in6m_state = MLD_IREPORTEDLAST;
479
480				in6m_unref(in6m); /* May free in6m */
481			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
482			    mld_timerresid(in6m) > timer) {
483				in6m->in6m_timer =
484				   1 + (cprng_fast32() % timer) * hz / 1000;
485				mld_starttimer(in6m);
486			}
487		}
488		rw_exit(&in6_multilock);
489		break;
490	    }
491
492	case MLD_LISTENER_REPORT:
493		/*
494		 * For fast leave to work, we have to know that we are the
495		 * last person to send a report for this group.  Reports
496		 * can potentially get looped back if we are a multicast
497		 * router, so discard reports sourced by me.
498		 * Note that it is impossible to check IFF_LOOPBACK flag of
499		 * ifp for this purpose, since ip6_mloopback pass the physical
500		 * interface to looutput.
501		 */
502		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
503			break;
504
505		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
506			break;
507
508		/*
509		 * If we belong to the group being reported, stop
510		 * our timer for that group.
511		 */
512		rw_enter(&in6_multilock, RW_WRITER);
513		in6m = in6_lookup_multi(&mld_addr, ifp);
514		if (in6m) {
515			in6m_ref(in6m);
516			mld_stoptimer(in6m); /* transit to idle state */
517			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
518			in6m_unref(in6m);
519			in6m = NULL; /* in6m might be freed */
520		}
521		rw_exit(&in6_multilock);
522		break;
523	default:		/* this is impossible */
524#if 0
525		/*
526		 * this case should be impossible because of filtering in
527		 * icmp6_input().  But we explicitly disabled this part
528		 * just in case.
529		 */
530		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
531#endif
532		break;
533	}
534
535out:
536	m_freem(m);
537out_nodrop:
538	m_put_rcvif_psref(ifp, &psref);
539}
540
541/*
542 * XXX mld_sendpkt must be called with in6_multilock held and
543 * will release in6_multilock before calling ip6_output and
544 * returning to avoid locking against myself in ip6_output.
545 */
546static void
547mld_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
548{
549	struct mbuf *mh;
550	struct mld_hdr *mldh;
551	struct ip6_hdr *ip6 = NULL;
552	struct ip6_moptions im6o;
553	struct in6_ifaddr *ia = NULL;
554	struct ifnet *ifp = in6m->in6m_ifp;
555	int ignflags;
556	struct psref psref;
557	int bound;
558
559	KASSERT(rw_write_held(&in6_multilock));
560
561	/*
562	 * At first, find a link local address on the outgoing interface
563	 * to use as the source address of the MLD packet.
564	 * We do not reject tentative addresses for MLD report to deal with
565	 * the case where we first join a link-local address.
566	 */
567	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
568	bound = curlwp_bind();
569	ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
570	if (ia == NULL) {
571		curlwp_bindx(bound);
572		return;
573	}
574	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
575		ia6_release(ia, &psref);
576		ia = NULL;
577	}
578
579	/* Allocate two mbufs to store IPv6 header and MLD header */
580	mldh = mld_allocbuf(&mh, in6m, type);
581	if (mldh == NULL) {
582		ia6_release(ia, &psref);
583		curlwp_bindx(bound);
584		return;
585	}
586
587	/* fill src/dst here */
588	ip6 = mtod(mh, struct ip6_hdr *);
589	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
590	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
591	ia6_release(ia, &psref);
592	curlwp_bindx(bound);
593
594	mldh->mld_addr = in6m->in6m_addr;
595	in6_clearscope(&mldh->mld_addr); /* XXX */
596	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
597	    sizeof(struct mld_hdr));
598
599	/* construct multicast option */
600	memset(&im6o, 0, sizeof(im6o));
601	im6o.im6o_multicast_if_index = if_get_index(ifp);
602	im6o.im6o_multicast_hlim = 1;
603
604	/*
605	 * Request loopback of the report if we are acting as a multicast
606	 * router, so that the process-level routing daemon can hear it.
607	 */
608	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
609
610	/* increment output statistics */
611	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
612	icmp6_ifstat_inc(ifp, ifs6_out_msg);
613	switch (type) {
614	case MLD_LISTENER_QUERY:
615		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
616		break;
617	case MLD_LISTENER_REPORT:
618		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
619		break;
620	case MLD_LISTENER_DONE:
621		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
622		break;
623	}
624
625	/* XXX we cannot call ip6_output with holding in6_multilock */
626	rw_exit(&in6_multilock);
627
628	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
629	    &im6o, NULL, NULL);
630
631	rw_enter(&in6_multilock, RW_WRITER);
632}
633
634static struct mld_hdr *
635mld_allocbuf(struct mbuf **mh, struct in6_multi *in6m, int type)
636{
637	struct mbuf *md;
638	struct mld_hdr *mldh;
639	struct ip6_hdr *ip6;
640
641	/*
642	 * Allocate mbufs to store ip6 header and MLD header.
643	 * We allocate 2 mbufs and make chain in advance because
644	 * it is more convenient when inserting the hop-by-hop option later.
645	 */
646	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
647	if (*mh == NULL)
648		return NULL;
649	MGET(md, M_DONTWAIT, MT_DATA);
650	if (md == NULL) {
651		m_free(*mh);
652		*mh = NULL;
653		return NULL;
654	}
655	(*mh)->m_next = md;
656	md->m_next = NULL;
657
658	m_reset_rcvif((*mh));
659	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
660	(*mh)->m_len = sizeof(struct ip6_hdr);
661	m_align(*mh, sizeof(struct ip6_hdr));
662
663	/* fill in the ip6 header */
664	ip6 = mtod(*mh, struct ip6_hdr *);
665	memset(ip6, 0, sizeof(*ip6));
666	ip6->ip6_flow = 0;
667	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
668	ip6->ip6_vfc |= IPV6_VERSION;
669	/* ip6_plen will be set later */
670	ip6->ip6_nxt = IPPROTO_ICMPV6;
671	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
672	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
673
674	/* fill in the MLD header as much as possible */
675	md->m_len = sizeof(struct mld_hdr);
676	mldh = mtod(md, struct mld_hdr *);
677	memset(mldh, 0, sizeof(struct mld_hdr));
678	mldh->mld_type = type;
679	return mldh;
680}
681
682static void
683in6m_ref(struct in6_multi *in6m)
684{
685
686	KASSERT(rw_write_held(&in6_multilock));
687	in6m->in6m_refcount++;
688}
689
690static void
691in6m_unref(struct in6_multi *in6m)
692{
693
694	KASSERT(rw_write_held(&in6_multilock));
695	if (--in6m->in6m_refcount == 0)
696		in6m_destroy(in6m);
697}
698
699/*
700 * Add an address to the list of IP6 multicast addresses for a given interface.
701 */
702struct	in6_multi *
703in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp, int *errorp,
704    int timer)
705{
706	struct	sockaddr_in6 sin6;
707	struct	in6_multi *in6m;
708
709	*errorp = 0;
710
711	rw_enter(&in6_multilock, RW_WRITER);
712	/*
713	 * See if address already in list.
714	 */
715	in6m = in6_lookup_multi(maddr6, ifp);
716	if (in6m != NULL) {
717		/*
718		 * Found it; just increment the reference count.
719		 */
720		in6m->in6m_refcount++;
721	} else {
722		/*
723		 * New address; allocate a new multicast record
724		 * and link it into the interface's multicast list.
725		 */
726		in6m = malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
727		if (in6m == NULL) {
728			*errorp = ENOBUFS;
729			goto out;
730		}
731
732		in6m->in6m_addr = *maddr6;
733		in6m->in6m_ifp = ifp;
734		in6m->in6m_refcount = 1;
735		in6m->in6m_timer = IN6M_TIMER_UNDEF;
736		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
737		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
738
739		LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);
740
741		/*
742		 * Ask the network driver to update its multicast reception
743		 * filter appropriately for the new address.
744		 */
745		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
746		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
747		if (*errorp) {
748			callout_destroy(&in6m->in6m_timer_ch);
749			LIST_REMOVE(in6m, in6m_entry);
750			free(in6m, M_IPMADDR);
751			in6m = NULL;
752			goto out;
753		}
754
755		in6m->in6m_timer = timer;
756		if (in6m->in6m_timer > 0) {
757			in6m->in6m_state = MLD_REPORTPENDING;
758			mld_starttimer(in6m);
759			goto out;
760		}
761
762		/*
763		 * Let MLD6 know that we have joined a new IP6 multicast
764		 * group.
765		 */
766		mld_start_listening(in6m);
767	}
768out:
769	rw_exit(&in6_multilock);
770	return in6m;
771}
772
773static void
774in6m_destroy(struct in6_multi *in6m)
775{
776	struct sockaddr_in6 sin6;
777
778	KASSERT(rw_write_held(&in6_multilock));
779	KASSERTMSG(in6m->in6m_refcount == 0, "in6m_refcount=%d",
780	    in6m->in6m_refcount);
781
782	/*
783	 * Unlink from list if it's listed.  This must be done before
784	 * mld_stop_listening because it releases in6_multilock and that allows
785	 * someone to look up the removing in6m from the list and add a
786	 * reference to the entry unexpectedly.
787	 */
788	if (in6_lookup_multi(&in6m->in6m_addr, in6m->in6m_ifp) != NULL)
789		LIST_REMOVE(in6m, in6m_entry);
790
791	/*
792	 * No remaining claims to this record; let MLD6 know
793	 * that we are leaving the multicast group.
794	 */
795	mld_stop_listening(in6m);
796
797	/*
798	 * Delete all references of this multicasting group from
799	 * the membership arrays
800	 */
801	in6_purge_mcast_references(in6m);
802
803	/*
804	 * Notify the network driver to update its multicast
805	 * reception filter.
806	 */
807	sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
808	if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
809
810	/* Tell mld_timeo we're halting the timer */
811	in6m->in6m_timer = IN6M_TIMER_UNDEF;
812
813	rw_exit(&in6_multilock);
814	callout_halt(&in6m->in6m_timer_ch, NULL);
815	callout_destroy(&in6m->in6m_timer_ch);
816
817	free(in6m, M_IPMADDR);
818	rw_enter(&in6_multilock, RW_WRITER);
819}
820
821/*
822 * Delete a multicast address record.
823 */
824void
825in6_delmulti_locked(struct in6_multi *in6m)
826{
827
828	KASSERT(rw_write_held(&in6_multilock));
829	KASSERTMSG(in6m->in6m_refcount > 0, "in6m_refcount=%d",
830	    in6m->in6m_refcount);
831
832	/*
833	 * The caller should have a reference to in6m. So we don't need to care
834	 * of releasing the lock in mld_stoptimer.
835	 */
836	mld_stoptimer(in6m);
837	if (--in6m->in6m_refcount == 0)
838		in6m_destroy(in6m);
839}
840
841void
842in6_delmulti(struct in6_multi *in6m)
843{
844
845	rw_enter(&in6_multilock, RW_WRITER);
846	in6_delmulti_locked(in6m);
847	rw_exit(&in6_multilock);
848}
849
850/*
851 * Look up the in6_multi record for a given IP6 multicast address
852 * on a given interface. If no matching record is found, "in6m"
853 * returns NULL.
854 */
855struct in6_multi *
856in6_lookup_multi(const struct in6_addr *addr, const struct ifnet *ifp)
857{
858	struct in6_multi *in6m;
859
860	KASSERT(rw_lock_held(&in6_multilock));
861
862	LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
863		if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
864			break;
865	}
866	return in6m;
867}
868
869void
870in6_lookup_and_delete_multi(const struct in6_addr *addr,
871    const struct ifnet *ifp)
872{
873	struct in6_multi *in6m;
874
875	rw_enter(&in6_multilock, RW_WRITER);
876	in6m = in6_lookup_multi(addr, ifp);
877	if (in6m != NULL)
878		in6_delmulti_locked(in6m);
879	rw_exit(&in6_multilock);
880}
881
882bool
883in6_multi_group(const struct in6_addr *addr, const struct ifnet *ifp)
884{
885	bool ingroup;
886
887	rw_enter(&in6_multilock, RW_READER);
888	ingroup = in6_lookup_multi(addr, ifp) != NULL;
889	rw_exit(&in6_multilock);
890
891	return ingroup;
892}
893
894/*
895 * Purge in6_multi records associated to the interface.
896 */
897void
898in6_purge_multi(struct ifnet *ifp)
899{
900	struct in6_multi *in6m, *next;
901
902	rw_enter(&in6_multilock, RW_WRITER);
903	LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
904		LIST_REMOVE(in6m, in6m_entry);
905		/*
906		 * Normally multicast addresses are already purged at this
907		 * point. Remaining references aren't accessible via ifp,
908		 * so what we can do here is to prevent ifp from being
909		 * accessed via in6m by removing it from the list of ifp.
910		 */
911		mld_stoptimer(in6m);
912	}
913	rw_exit(&in6_multilock);
914}
915
916void
917in6_multi_lock(int op)
918{
919
920	rw_enter(&in6_multilock, op);
921}
922
923void
924in6_multi_unlock(void)
925{
926
927	rw_exit(&in6_multilock);
928}
929
930bool
931in6_multi_locked(int op)
932{
933
934	switch (op) {
935	case RW_READER:
936		return rw_read_held(&in6_multilock);
937	case RW_WRITER:
938		return rw_write_held(&in6_multilock);
939	default:
940		return rw_lock_held(&in6_multilock);
941	}
942}
943
944struct in6_multi_mship *
945in6_joingroup(struct ifnet *ifp, struct in6_addr *addr, int *errorp, int timer)
946{
947	struct in6_multi_mship *imm;
948
949	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
950	if (imm == NULL) {
951		*errorp = ENOBUFS;
952		return NULL;
953	}
954
955	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
956	if (!imm->i6mm_maddr) {
957		/* *errorp is already set */
958		free(imm, M_IPMADDR);
959		return NULL;
960	}
961	return imm;
962}
963
964int
965in6_leavegroup(struct in6_multi_mship *imm)
966{
967	struct in6_multi *in6m;
968
969	rw_enter(&in6_multilock, RW_WRITER);
970	in6m = imm->i6mm_maddr;
971	imm->i6mm_maddr = NULL;
972	if (in6m != NULL) {
973		in6_delmulti_locked(in6m);
974	}
975	rw_exit(&in6_multilock);
976	free(imm, M_IPMADDR);
977	return 0;
978}
979
980/*
981 * DEPRECATED: keep it just to avoid breaking old sysctl users.
982 */
983static int
984in6_mkludge_sysctl(SYSCTLFN_ARGS)
985{
986
987	if (namelen != 1)
988		return EINVAL;
989	*oldlenp = 0;
990	return 0;
991}
992
993static int
994in6_multicast_sysctl(SYSCTLFN_ARGS)
995{
996	struct ifnet *ifp;
997	struct ifaddr *ifa;
998	struct in6_ifaddr *ia6;
999	struct in6_multi *in6m;
1000	uint32_t tmp;
1001	int error;
1002	size_t written;
1003	struct psref psref, psref_ia;
1004	int bound, s;
1005
1006	if (namelen != 1)
1007		return EINVAL;
1008
1009	rw_enter(&in6_multilock, RW_READER);
1010
1011	bound = curlwp_bind();
1012	ifp = if_get_byindex(name[0], &psref);
1013	if (ifp == NULL) {
1014		curlwp_bindx(bound);
1015		rw_exit(&in6_multilock);
1016		return ENODEV;
1017	}
1018
1019	if (oldp == NULL) {
1020		*oldlenp = 0;
1021		s = pserialize_read_enter();
1022		IFADDR_READER_FOREACH(ifa, ifp) {
1023			LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1024				*oldlenp += 2 * sizeof(struct in6_addr) +
1025				    sizeof(uint32_t);
1026			}
1027		}
1028		pserialize_read_exit(s);
1029		if_put(ifp, &psref);
1030		curlwp_bindx(bound);
1031		rw_exit(&in6_multilock);
1032		return 0;
1033	}
1034
1035	error = 0;
1036	written = 0;
1037	s = pserialize_read_enter();
1038	IFADDR_READER_FOREACH(ifa, ifp) {
1039		if (ifa->ifa_addr->sa_family != AF_INET6)
1040			continue;
1041
1042		ifa_acquire(ifa, &psref_ia);
1043		pserialize_read_exit(s);
1044
1045		ia6 = ifatoia6(ifa);
1046		LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1047			if (written + 2 * sizeof(struct in6_addr) +
1048			    sizeof(uint32_t) > *oldlenp)
1049				goto done;
1050			/*
1051			 * XXX return the first IPv6 address to keep backward
1052			 * compatibility, however now multicast addresses
1053			 * don't belong to any IPv6 addresses so it should be
1054			 * unnecessary.
1055			 */
1056			error = sysctl_copyout(l, &ia6->ia_addr.sin6_addr,
1057			    oldp, sizeof(struct in6_addr));
1058			if (error)
1059				goto done;
1060			oldp = (char *)oldp + sizeof(struct in6_addr);
1061			written += sizeof(struct in6_addr);
1062			error = sysctl_copyout(l, &in6m->in6m_addr,
1063			    oldp, sizeof(struct in6_addr));
1064			if (error)
1065				goto done;
1066			oldp = (char *)oldp + sizeof(struct in6_addr);
1067			written += sizeof(struct in6_addr);
1068			tmp = in6m->in6m_refcount;
1069			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1070			if (error)
1071				goto done;
1072			oldp = (char *)oldp + sizeof(tmp);
1073			written += sizeof(tmp);
1074		}
1075
1076		s = pserialize_read_enter();
1077
1078		break;
1079	}
1080	pserialize_read_exit(s);
1081done:
1082	ifa_release(ifa, &psref_ia);
1083	if_put(ifp, &psref);
1084	curlwp_bindx(bound);
1085	rw_exit(&in6_multilock);
1086	*oldlenp = written;
1087	return error;
1088}
1089
1090void
1091in6_sysctl_multicast_setup(struct sysctllog **clog)
1092{
1093
1094	sysctl_createv(clog, 0, NULL, NULL,
1095		       CTLFLAG_PERMANENT,
1096		       CTLTYPE_NODE, "inet6", NULL,
1097		       NULL, 0, NULL, 0,
1098		       CTL_NET, PF_INET6, CTL_EOL);
1099
1100	sysctl_createv(clog, 0, NULL, NULL,
1101		       CTLFLAG_PERMANENT,
1102		       CTLTYPE_NODE, "multicast",
1103		       SYSCTL_DESCR("Multicast information"),
1104		       in6_multicast_sysctl, 0, NULL, 0,
1105		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1106
1107	sysctl_createv(clog, 0, NULL, NULL,
1108		       CTLFLAG_PERMANENT,
1109		       CTLTYPE_NODE, "multicast_kludge",
1110		       SYSCTL_DESCR("multicast kludge information"),
1111		       in6_mkludge_sysctl, 0, NULL, 0,
1112		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1113}
1114