1/*	$OpenBSD: ip_mroute.c,v 1.142 2024/04/06 14:23:27 bluhm Exp $	*/
2/*	$NetBSD: ip_mroute.c,v 1.85 2004/04/26 01:31:57 matt Exp $	*/
3
4/*
5 * Copyright (c) 1989 Stephen Deering
6 * Copyright (c) 1992, 1993
7 *      The Regents of the University of California.  All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Stephen Deering of Stanford University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
37 */
38
39/*
40 * IP multicast forwarding procedures
41 *
42 * Written by David Waitzman, BBN Labs, August 1988.
43 * Modified by Steve Deering, Stanford, February 1989.
44 * Modified by Mark J. Steiglitz, Stanford, May, 1991
45 * Modified by Van Jacobson, LBL, January 1993
46 * Modified by Ajit Thyagarajan, PARC, August 1993
47 * Modified by Bill Fenner, PARC, April 1994
48 * Modified by Charles M. Hannum, NetBSD, May 1995.
49 * Modified by Ahmed Helmy, SGI, June 1996
50 * Modified by George Edmond Eddy (Rusty), ISI, February 1998
51 * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
52 * Modified by Hitoshi Asaeda, WIDE, August 2000
53 * Modified by Pavlin Radoslavov, ICSI, October 2002
54 *
55 * MROUTING Revision: 1.2
56 * advanced API support, bandwidth metering and signaling
57 */
58
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/mbuf.h>
62#include <sys/socket.h>
63#include <sys/socketvar.h>
64#include <sys/protosw.h>
65#include <sys/ioctl.h>
66#include <sys/syslog.h>
67
68#include <net/if.h>
69#include <net/if_var.h>
70#include <net/route.h>
71
72#include <netinet/in.h>
73#include <netinet/ip.h>
74#include <netinet/ip_var.h>
75#include <netinet/in_pcb.h>
76#include <netinet/igmp.h>
77#include <netinet/ip_mroute.h>
78
79/* #define MCAST_DEBUG */
80
81#ifdef MCAST_DEBUG
82int mcast_debug = 1;
83#define DPRINTF(fmt, args...)						\
84	do {								\
85		if (mcast_debug)					\
86			printf("%s:%d " fmt "\n",			\
87			    __func__, __LINE__, ## args);		\
88	} while (0)
89#else
90#define DPRINTF(fmt, args...)			\
91	do { } while (0)
92#endif
93
94/*
95 * Globals.  All but ip_mrouter and ip_mrtproto could be static,
96 * except for netstat or debugging purposes.
97 */
98struct socket	*ip_mrouter[RT_TABLEID_MAX + 1];
99struct rttimer_queue ip_mrouterq;
100uint64_t	 mrt_count[RT_TABLEID_MAX + 1];
101int		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */
102
103struct mrtstat	mrtstat;
104
105struct rtentry	*mfc_find(struct ifnet *, struct in_addr *,
106    struct in_addr *, unsigned int);
107int get_sg_cnt(unsigned int, struct sioc_sg_req *);
108int get_vif_cnt(unsigned int, struct sioc_vif_req *);
109int mrt_rtwalk_mfcsysctl(struct rtentry *, void *, unsigned int);
110int ip_mrouter_init(struct socket *, struct mbuf *);
111int mrouter_rtwalk_delete(struct rtentry *, void *, unsigned int);
112int get_version(struct mbuf *);
113int add_vif(struct socket *, struct mbuf *);
114int del_vif(struct socket *, struct mbuf *);
115void update_mfc_params(struct mfcctl2 *, int, unsigned int);
116int mfc_add(struct mfcctl2 *, struct in_addr *, struct in_addr *,
117    int, unsigned int, int);
118int add_mfc(struct socket *, struct mbuf *);
119int del_mfc(struct socket *, struct mbuf *);
120int set_api_config(struct socket *, struct mbuf *); /* chose API capabilities */
121int get_api_support(struct mbuf *);
122int get_api_config(struct mbuf *);
123int socket_send(struct socket *, struct mbuf *,
124			    struct sockaddr_in *);
125int ip_mdq(struct mbuf *, struct ifnet *, struct rtentry *);
126struct ifnet *if_lookupbyvif(vifi_t, unsigned int);
127struct rtentry *rt_mcast_add(struct ifnet *, struct sockaddr *,
128    struct sockaddr *);
129void mrt_mcast_del(struct rtentry *, unsigned int);
130
131/*
132 * Kernel multicast routing API capabilities and setup.
133 * If more API capabilities are added to the kernel, they should be
134 * recorded in `mrt_api_support'.
135 */
136static const u_int32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
137					  MRT_MFC_RP);
138static u_int32_t mrt_api_config = 0;
139
140/*
141 * Find a route for a given origin IP address and Multicast group address
142 * Type of service parameter to be added in the future!!!
143 * Statistics are updated by the caller if needed
144 * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses)
145 */
146struct rtentry *
147mfc_find(struct ifnet *ifp, struct in_addr *origin, struct in_addr *group,
148    unsigned int rtableid)
149{
150	struct rtentry		*rt;
151	struct sockaddr_in	 msin;
152
153	memset(&msin, 0, sizeof(msin));
154	msin.sin_len = sizeof(msin);
155	msin.sin_family = AF_INET;
156	msin.sin_addr = *group;
157
158	rt = rtalloc(sintosa(&msin), 0, rtableid);
159	do {
160		if (!rtisvalid(rt)) {
161			rtfree(rt);
162			return NULL;
163		}
164		/* Don't consider non multicast routes. */
165		if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
166		    (RTF_HOST | RTF_MULTICAST))
167			continue;
168		/* Return first occurrence if interface is not specified. */
169		if (ifp == NULL)
170			return (rt);
171		if (rt->rt_ifidx == ifp->if_index)
172			return (rt);
173	} while ((rt = rtable_iterate(rt)) != NULL);
174
175	return (NULL);
176}
177
178/*
179 * Handle MRT setsockopt commands to modify the multicast routing tables.
180 */
181int
182ip_mrouter_set(struct socket *so, int optname, struct mbuf *m)
183{
184	struct inpcb *inp = sotoinpcb(so);
185	int error;
186
187	if (optname != MRT_INIT &&
188	    so != ip_mrouter[inp->inp_rtableid])
189		error = ENOPROTOOPT;
190	else
191		switch (optname) {
192		case MRT_INIT:
193			error = ip_mrouter_init(so, m);
194			break;
195		case MRT_DONE:
196			error = ip_mrouter_done(so);
197			break;
198		case MRT_ADD_VIF:
199			error = add_vif(so, m);
200			break;
201		case MRT_DEL_VIF:
202			error = del_vif(so, m);
203			break;
204		case MRT_ADD_MFC:
205			error = add_mfc(so, m);
206			break;
207		case MRT_DEL_MFC:
208			error = del_mfc(so, m);
209			break;
210		case MRT_API_CONFIG:
211			error = set_api_config(so, m);
212			break;
213		default:
214			error = ENOPROTOOPT;
215			break;
216		}
217
218	return (error);
219}
220
221/*
222 * Handle MRT getsockopt commands
223 */
224int
225ip_mrouter_get(struct socket *so, int optname, struct mbuf *m)
226{
227	struct inpcb *inp = sotoinpcb(so);
228	int error;
229
230	if (so != ip_mrouter[inp->inp_rtableid])
231		error = ENOPROTOOPT;
232	else {
233		switch (optname) {
234		case MRT_VERSION:
235			error = get_version(m);
236			break;
237		case MRT_API_SUPPORT:
238			error = get_api_support(m);
239			break;
240		case MRT_API_CONFIG:
241			error = get_api_config(m);
242			break;
243		default:
244			error = ENOPROTOOPT;
245			break;
246		}
247	}
248
249	return (error);
250}
251
252/*
253 * Handle ioctl commands to obtain information from the cache
254 */
255int
256mrt_ioctl(struct socket *so, u_long cmd, caddr_t data)
257{
258	struct inpcb *inp = sotoinpcb(so);
259	int error;
260
261	if (inp == NULL)
262		return (ENOTCONN);
263
264	KERNEL_LOCK();
265
266	if (so != ip_mrouter[inp->inp_rtableid])
267		error = EINVAL;
268	else
269		switch (cmd) {
270		case SIOCGETVIFCNT:
271			NET_LOCK_SHARED();
272			error = get_vif_cnt(inp->inp_rtableid,
273			    (struct sioc_vif_req *)data);
274			NET_UNLOCK_SHARED();
275			break;
276		case SIOCGETSGCNT:
277			NET_LOCK_SHARED();
278			error = get_sg_cnt(inp->inp_rtableid,
279			    (struct sioc_sg_req *)data);
280			NET_UNLOCK_SHARED();
281			break;
282		default:
283			error = ENOTTY;
284			break;
285		}
286
287	KERNEL_UNLOCK();
288	return (error);
289}
290
291/*
292 * returns the packet, byte, rpf-failure count for the source group provided
293 */
294int
295get_sg_cnt(unsigned int rtableid, struct sioc_sg_req *req)
296{
297	struct rtentry *rt;
298	struct mfc *mfc;
299
300	rt = mfc_find(NULL, &req->src, &req->grp, rtableid);
301	if (rt == NULL) {
302		req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
303		return (EADDRNOTAVAIL);
304	}
305
306	req->pktcnt = req->bytecnt = req->wrong_if = 0;
307	do {
308		/* Don't consider non multicast routes. */
309		if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
310		    (RTF_HOST | RTF_MULTICAST))
311			continue;
312
313		mfc = (struct mfc *)rt->rt_llinfo;
314		if (mfc == NULL)
315			continue;
316
317		req->pktcnt += mfc->mfc_pkt_cnt;
318		req->bytecnt += mfc->mfc_byte_cnt;
319		req->wrong_if += mfc->mfc_wrong_if;
320	} while ((rt = rtable_iterate(rt)) != NULL);
321
322	return (0);
323}
324
325/*
326 * returns the input and output packet and byte counts on the vif provided
327 */
328int
329get_vif_cnt(unsigned int rtableid, struct sioc_vif_req *req)
330{
331	struct ifnet	*ifp;
332	struct vif	*v;
333	vifi_t		 vifi = req->vifi;
334
335	if ((ifp = if_lookupbyvif(vifi, rtableid)) == NULL)
336		return (EINVAL);
337
338	v = (struct vif *)ifp->if_mcast;
339	req->icount = v->v_pkt_in;
340	req->ocount = v->v_pkt_out;
341	req->ibytes = v->v_bytes_in;
342	req->obytes = v->v_bytes_out;
343
344	return (0);
345}
346
347int
348mrt_sysctl_vif(void *oldp, size_t *oldlenp)
349{
350	caddr_t where = oldp;
351	size_t needed, given;
352	struct ifnet *ifp;
353	struct vif *vifp;
354	struct vifinfo vinfo;
355
356	given = *oldlenp;
357	needed = 0;
358	memset(&vinfo, 0, sizeof vinfo);
359	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
360		if ((vifp = (struct vif *)ifp->if_mcast) == NULL)
361			continue;
362
363		vinfo.v_vifi = vifp->v_id;
364		vinfo.v_flags = vifp->v_flags;
365		vinfo.v_threshold = vifp->v_threshold;
366		vinfo.v_lcl_addr = vifp->v_lcl_addr;
367		vinfo.v_rmt_addr = vifp->v_rmt_addr;
368		vinfo.v_pkt_in = vifp->v_pkt_in;
369		vinfo.v_pkt_out = vifp->v_pkt_out;
370		vinfo.v_bytes_in = vifp->v_bytes_in;
371		vinfo.v_bytes_out = vifp->v_bytes_out;
372
373		needed += sizeof(vinfo);
374		if (where && needed <= given) {
375			int error;
376
377			error = copyout(&vinfo, where, sizeof(vinfo));
378			if (error)
379				return (error);
380			where += sizeof(vinfo);
381		}
382	}
383	if (where) {
384		*oldlenp = needed;
385		if (given < needed)
386			return (ENOMEM);
387	} else
388		*oldlenp = (11 * needed) / 10;
389
390	return (0);
391}
392
393struct mfcsysctlarg {
394	struct mfcinfo	*msa_minfos;
395	size_t		 msa_len;
396	size_t		 msa_needed;
397};
398
399int
400mrt_rtwalk_mfcsysctl(struct rtentry *rt, void *arg, unsigned int rtableid)
401{
402	struct mfc		*mfc = (struct mfc *)rt->rt_llinfo;
403	struct mfcsysctlarg	*msa = (struct mfcsysctlarg *)arg;
404	struct ifnet		*ifp;
405	struct vif		*v;
406	struct mfcinfo		*minfo;
407	int			 new = 0;
408
409	/* Skip entries being removed. */
410	if (mfc == NULL)
411		return (0);
412
413	/* Skip non-multicast routes. */
414	if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
415	    (RTF_HOST | RTF_MULTICAST))
416		return (0);
417
418	/* User just asked for the output size. */
419	if (msa->msa_minfos == NULL) {
420		msa->msa_needed += sizeof(*minfo);
421		return (0);
422	}
423
424	/* Skip route with invalid interfaces. */
425	if ((ifp = if_get(rt->rt_ifidx)) == NULL)
426		return (0);
427	if ((v = (struct vif *)ifp->if_mcast) == NULL) {
428		if_put(ifp);
429		return (0);
430	}
431
432	for (minfo = msa->msa_minfos;
433	    (uint8_t *)(minfo + 1) <=
434	    (uint8_t *)msa->msa_minfos + msa->msa_len;
435	    minfo++) {
436		/* Find a new entry or update old entry. */
437		if (minfo->mfc_origin.s_addr !=
438		    satosin(rt->rt_gateway)->sin_addr.s_addr ||
439		    minfo->mfc_mcastgrp.s_addr !=
440		    satosin(rt_key(rt))->sin_addr.s_addr) {
441			if (minfo->mfc_origin.s_addr != 0 ||
442			    minfo->mfc_mcastgrp.s_addr != 0)
443				continue;
444
445			new = 1;
446		}
447
448		minfo->mfc_origin = satosin(rt->rt_gateway)->sin_addr;
449		minfo->mfc_mcastgrp = satosin(rt_key(rt))->sin_addr;
450		minfo->mfc_parent = mfc->mfc_parent;
451		minfo->mfc_pkt_cnt += mfc->mfc_pkt_cnt;
452		minfo->mfc_byte_cnt += mfc->mfc_byte_cnt;
453		minfo->mfc_ttls[v->v_id] = mfc->mfc_ttl;
454		break;
455	}
456
457	if (new != 0)
458		msa->msa_needed += sizeof(*minfo);
459
460	if_put(ifp);
461
462	return (0);
463}
464
465int
466mrt_sysctl_mfc(void *oldp, size_t *oldlenp)
467{
468	unsigned int		 rtableid;
469	int			 error;
470	struct mfcsysctlarg	 msa;
471
472	if (oldp != NULL && *oldlenp > MAXPHYS)
473		return (EINVAL);
474
475	memset(&msa, 0, sizeof(msa));
476	if (oldp != NULL && *oldlenp > 0) {
477		msa.msa_minfos = malloc(*oldlenp, M_TEMP, M_WAITOK | M_ZERO);
478		msa.msa_len = *oldlenp;
479	}
480
481	for (rtableid = 0; rtableid <= RT_TABLEID_MAX; rtableid++) {
482		rtable_walk(rtableid, AF_INET, NULL, mrt_rtwalk_mfcsysctl,
483		    &msa);
484	}
485
486	if (msa.msa_minfos != NULL && msa.msa_needed > 0 &&
487	    (error = copyout(msa.msa_minfos, oldp, msa.msa_needed)) != 0) {
488		free(msa.msa_minfos, M_TEMP, msa.msa_len);
489		return (error);
490	}
491
492	free(msa.msa_minfos, M_TEMP, msa.msa_len);
493	*oldlenp = msa.msa_needed;
494
495	return (0);
496}
497
498/*
499 * Enable multicast routing
500 */
501int
502ip_mrouter_init(struct socket *so, struct mbuf *m)
503{
504	struct inpcb *inp = sotoinpcb(so);
505	unsigned int rtableid = inp->inp_rtableid;
506	int *v;
507
508	if (so->so_type != SOCK_RAW ||
509	    so->so_proto->pr_protocol != IPPROTO_IGMP)
510		return (EOPNOTSUPP);
511
512	if (m == NULL || m->m_len < sizeof(int))
513		return (EINVAL);
514
515	v = mtod(m, int *);
516	if (*v != 1)
517		return (EINVAL);
518
519	if (ip_mrouter[rtableid] != NULL)
520		return (EADDRINUSE);
521
522	ip_mrouter[rtableid] = so;
523
524	return (0);
525}
526
527int
528mrouter_rtwalk_delete(struct rtentry *rt, void *arg, unsigned int rtableid)
529{
530	/* Skip non-multicast routes. */
531	if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
532	    (RTF_HOST | RTF_MULTICAST))
533		return (0);
534
535	return EEXIST;
536}
537
538/*
539 * Disable multicast routing
540 */
541int
542ip_mrouter_done(struct socket *so)
543{
544	struct inpcb *inp = sotoinpcb(so);
545	struct ifnet *ifp;
546	unsigned int rtableid = inp->inp_rtableid;
547	int error;
548
549	NET_ASSERT_LOCKED();
550
551	/* Delete all remaining installed multicast routes. */
552	do {
553		struct rtentry *rt = NULL;
554
555		error = rtable_walk(rtableid, AF_INET, &rt,
556		    mrouter_rtwalk_delete, NULL);
557		if (rt != NULL && error == EEXIST) {
558			mrt_mcast_del(rt, rtableid);
559			error = EAGAIN;
560		}
561		rtfree(rt);
562	} while (error == EAGAIN);
563
564	/* Unregister all interfaces in the domain. */
565	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
566		if (ifp->if_rdomain != rtableid)
567			continue;
568
569		vif_delete(ifp);
570	}
571
572	mrt_api_config = 0;
573
574	ip_mrouter[rtableid] = NULL;
575	mrt_count[rtableid] = 0;
576
577	return (0);
578}
579
580int
581get_version(struct mbuf *m)
582{
583	int *v = mtod(m, int *);
584
585	*v = 0x0305;	/* XXX !!!! */
586	m->m_len = sizeof(int);
587	return (0);
588}
589
590/*
591 * Configure API capabilities
592 */
593int
594set_api_config(struct socket *so, struct mbuf *m)
595{
596	struct inpcb *inp = sotoinpcb(so);
597	struct ifnet *ifp;
598	u_int32_t *apival;
599	unsigned int rtableid = inp->inp_rtableid;
600
601	if (m == NULL || m->m_len < sizeof(u_int32_t))
602		return (EINVAL);
603
604	apival = mtod(m, u_int32_t *);
605
606	/*
607	 * We can set the API capabilities only if it is the first operation
608	 * after MRT_INIT. I.e.:
609	 *  - there are no vifs installed
610	 *  - the MFC table is empty
611	 */
612	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
613		if (ifp->if_rdomain != rtableid)
614			continue;
615		if (ifp->if_mcast == NULL)
616			continue;
617
618		*apival = 0;
619		return (EPERM);
620	}
621	if (mrt_count[rtableid] > 0) {
622		*apival = 0;
623		return (EPERM);
624	}
625
626	mrt_api_config = *apival & mrt_api_support;
627	*apival = mrt_api_config;
628
629	return (0);
630}
631
632/*
633 * Get API capabilities
634 */
635int
636get_api_support(struct mbuf *m)
637{
638	u_int32_t *apival;
639
640	if (m == NULL || m->m_len < sizeof(u_int32_t))
641		return (EINVAL);
642
643	apival = mtod(m, u_int32_t *);
644
645	*apival = mrt_api_support;
646
647	return (0);
648}
649
650/*
651 * Get API configured capabilities
652 */
653int
654get_api_config(struct mbuf *m)
655{
656	u_int32_t *apival;
657
658	if (m == NULL || m->m_len < sizeof(u_int32_t))
659		return (EINVAL);
660
661	apival = mtod(m, u_int32_t *);
662
663	*apival = mrt_api_config;
664
665	return (0);
666}
667
668static struct sockaddr_in sin = { sizeof(sin), AF_INET };
669
670int
671add_vif(struct socket *so, struct mbuf *m)
672{
673	struct inpcb *inp = sotoinpcb(so);
674	struct vifctl *vifcp;
675	struct vif *vifp;
676	struct ifaddr *ifa;
677	struct ifnet *ifp;
678	struct ifreq ifr;
679	int error;
680	unsigned int rtableid = inp->inp_rtableid;
681
682	NET_ASSERT_LOCKED();
683
684	if (m == NULL || m->m_len < sizeof(struct vifctl))
685		return (EINVAL);
686
687	vifcp = mtod(m, struct vifctl *);
688	if (vifcp->vifc_vifi >= MAXVIFS)
689		return (EINVAL);
690	if (in_nullhost(vifcp->vifc_lcl_addr))
691		return (EADDRNOTAVAIL);
692	if (if_lookupbyvif(vifcp->vifc_vifi, rtableid) != NULL)
693		return (EADDRINUSE);
694
695	/* Tunnels are no longer supported use gif(4) instead. */
696	if (vifcp->vifc_flags & VIFF_TUNNEL)
697		return (EOPNOTSUPP);
698	{
699		sin.sin_addr = vifcp->vifc_lcl_addr;
700		ifa = ifa_ifwithaddr(sintosa(&sin), rtableid);
701		if (ifa == NULL)
702			return (EADDRNOTAVAIL);
703	}
704
705	/* Use the physical interface associated with the address. */
706	ifp = ifa->ifa_ifp;
707	if (ifp->if_mcast != NULL)
708		return (EADDRINUSE);
709
710	{
711		/* Make sure the interface supports multicast. */
712		if ((ifp->if_flags & IFF_MULTICAST) == 0)
713			return (EOPNOTSUPP);
714
715		/* Enable promiscuous reception of all IP multicasts. */
716		memset(&ifr, 0, sizeof(ifr));
717		satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
718		satosin(&ifr.ifr_addr)->sin_family = AF_INET;
719		satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
720		KERNEL_LOCK();
721		error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
722		KERNEL_UNLOCK();
723		if (error)
724			return (error);
725	}
726
727	vifp = malloc(sizeof(*vifp), M_MRTABLE, M_WAITOK | M_ZERO);
728	ifp->if_mcast = (caddr_t)vifp;
729
730	vifp->v_id = vifcp->vifc_vifi;
731	vifp->v_flags = vifcp->vifc_flags;
732	vifp->v_threshold = vifcp->vifc_threshold;
733	vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
734	vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
735
736	return (0);
737}
738
739int
740del_vif(struct socket *so, struct mbuf *m)
741{
742	struct inpcb *inp = sotoinpcb(so);
743	struct ifnet *ifp;
744	vifi_t *vifip;
745	unsigned int rtableid = inp->inp_rtableid;
746
747	NET_ASSERT_LOCKED();
748
749	if (m == NULL || m->m_len < sizeof(vifi_t))
750		return (EINVAL);
751
752	vifip = mtod(m, vifi_t *);
753	if ((ifp = if_lookupbyvif(*vifip, rtableid)) == NULL)
754		return (EADDRNOTAVAIL);
755
756	vif_delete(ifp);
757	return (0);
758}
759
760void
761vif_delete(struct ifnet *ifp)
762{
763	struct vif	*v;
764	struct ifreq	 ifr;
765
766	if ((v = (struct vif *)ifp->if_mcast) == NULL)
767		return;
768
769	ifp->if_mcast = NULL;
770
771	memset(&ifr, 0, sizeof(ifr));
772	satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
773	satosin(&ifr.ifr_addr)->sin_family = AF_INET;
774	satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
775	KERNEL_LOCK();
776	(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
777	KERNEL_UNLOCK();
778
779	free(v, M_MRTABLE, sizeof(*v));
780}
781
782void
783mfc_expire_route(struct rtentry *rt, u_int rtableid)
784{
785	struct mfc	*mfc = (struct mfc *)rt->rt_llinfo;
786
787	/* Skip entry being deleted. */
788	if (mfc == NULL)
789		return;
790
791	DPRINTF("Route domain %d origin %#08X group %#08x interface %d "
792	    "expire %s", rtableid, satosin(rt->rt_gateway)->sin_addr.s_addr,
793	    satosin(rt_key(rt))->sin_addr.s_addr,
794	    rt->rt_ifidx, mfc->mfc_expire ? "yes" : "no");
795
796	/* Not expired, add it back to the queue. */
797	if (mfc->mfc_expire == 0) {
798		mfc->mfc_expire = 1;
799		rt_timer_add(rt, &ip_mrouterq, rtableid);
800		return;
801	}
802
803	mrt_mcast_del(rt, rtableid);
804}
805
806int
807mfc_add_route(struct ifnet *ifp, struct sockaddr *origin,
808    struct sockaddr *group, struct mfcctl2 *mfccp, int wait)
809{
810	struct vif		*v = (struct vif *)ifp->if_mcast;
811	struct rtentry		*rt;
812	struct mfc		*mfc;
813	unsigned int		 rtableid = ifp->if_rdomain;
814
815	rt = rt_mcast_add(ifp, origin, group);
816	if (rt == NULL)
817		return (EHOSTUNREACH);
818
819	mfc = malloc(sizeof(*mfc), M_MRTABLE, wait | M_ZERO);
820	if (mfc == NULL) {
821		DPRINTF("origin %#08X group %#08X parent %d (%s) "
822		    "malloc failed",
823		    satosin(origin)->sin_addr.s_addr,
824		    satosin(group)->sin_addr.s_addr,
825		    mfccp->mfcc_parent, ifp->if_xname);
826		mrt_mcast_del(rt, rtableid);
827		rtfree(rt);
828		return (ENOMEM);
829	}
830
831	rt->rt_llinfo = (caddr_t)mfc;
832
833	rt_timer_add(rt, &ip_mrouterq, rtableid);
834
835	mfc->mfc_parent = mfccp->mfcc_parent;
836	mfc->mfc_pkt_cnt = 0;
837	mfc->mfc_byte_cnt = 0;
838	mfc->mfc_wrong_if = 0;
839	mfc->mfc_ttl = mfccp->mfcc_ttls[v->v_id];
840	mfc->mfc_flags = mfccp->mfcc_flags[v->v_id] & mrt_api_config &
841	    MRT_MFC_FLAGS_ALL;
842	mfc->mfc_expire = 0;
843
844	/* set the RP address */
845	if (mrt_api_config & MRT_MFC_RP)
846		mfc->mfc_rp = mfccp->mfcc_rp;
847	else
848		mfc->mfc_rp = zeroin_addr;
849
850	rtfree(rt);
851
852	return (0);
853}
854
855void
856update_mfc_params(struct mfcctl2 *mfccp, int wait, unsigned int rtableid)
857{
858	struct rtentry		*rt;
859	struct mfc		*mfc;
860	struct ifnet		*ifp;
861	int			 i;
862	struct sockaddr_in	 osin, msin;
863
864	memset(&osin, 0, sizeof(osin));
865	osin.sin_len = sizeof(osin);
866	osin.sin_family = AF_INET;
867	osin.sin_addr = mfccp->mfcc_origin;
868
869	memset(&msin, 0, sizeof(msin));
870	msin.sin_len = sizeof(msin);
871	msin.sin_family = AF_INET;
872	msin.sin_addr = mfccp->mfcc_mcastgrp;
873
874	for (i = 0; i < MAXVIFS; i++) {
875		/* Don't add/del upstream routes here. */
876		if (i == mfccp->mfcc_parent)
877			continue;
878
879		/* Test for vif existence and then update the entry. */
880		if ((ifp = if_lookupbyvif(i, rtableid)) == NULL)
881			continue;
882
883		rt = mfc_find(ifp, &mfccp->mfcc_origin,
884		    &mfccp->mfcc_mcastgrp, rtableid);
885
886		/* vif not configured or removed. */
887		if (mfccp->mfcc_ttls[i] == 0) {
888			/* Route doesn't exist, nothing to do. */
889			if (rt == NULL)
890				continue;
891
892			DPRINTF("del route (group %#08X) for vif %d (%s)",
893			    mfccp->mfcc_mcastgrp.s_addr, i, ifp->if_xname);
894			mrt_mcast_del(rt, rtableid);
895			rtfree(rt);
896			continue;
897		}
898
899		/* Route exists, look for changes. */
900		if (rt != NULL) {
901			mfc = (struct mfc *)rt->rt_llinfo;
902			/* Skip route being deleted. */
903			if (mfc == NULL) {
904				rtfree(rt);
905				continue;
906			}
907
908			/* No new changes to apply. */
909			if (mfccp->mfcc_ttls[i] == mfc->mfc_ttl &&
910			    mfccp->mfcc_parent == mfc->mfc_parent) {
911				rtfree(rt);
912				continue;
913			}
914
915			DPRINTF("update route (group %#08X) for vif %d (%s)",
916			    mfccp->mfcc_mcastgrp.s_addr, i, ifp->if_xname);
917			mfc->mfc_ttl = mfccp->mfcc_ttls[i];
918			mfc->mfc_parent = mfccp->mfcc_parent;
919			rtfree(rt);
920			continue;
921		}
922
923		DPRINTF("add route (group %#08X) for vif %d (%s)",
924		    mfccp->mfcc_mcastgrp.s_addr, i, ifp->if_xname);
925
926		mfc_add_route(ifp, sintosa(&osin), sintosa(&msin),
927		    mfccp, wait);
928	}
929
930	/* Create route for the parent interface. */
931	if ((ifp = if_lookupbyvif(mfccp->mfcc_parent, rtableid)) == NULL) {
932		DPRINTF("failed to find upstream interface %d",
933		    mfccp->mfcc_parent);
934		return;
935	}
936
937	/* We already have a route, nothing to do here. */
938	if ((rt = mfc_find(ifp, &mfccp->mfcc_origin,
939	    &mfccp->mfcc_mcastgrp, rtableid)) != NULL) {
940		rtfree(rt);
941		return;
942	}
943
944	DPRINTF("add upstream route (group %#08X) for if %s",
945	    mfccp->mfcc_mcastgrp.s_addr, ifp->if_xname);
946	mfc_add_route(ifp, sintosa(&osin), sintosa(&msin), mfccp, wait);
947}
948
949int
950mfc_add(struct mfcctl2 *mfcctl2, struct in_addr *origin,
951    struct in_addr *group, int vidx, unsigned int rtableid, int wait)
952{
953	struct ifnet		*ifp;
954	struct vif		*v;
955	struct mfcctl2		 mfcctl;
956
957	ifp = if_lookupbyvif(vidx, rtableid);
958	if (ifp == NULL ||
959	    (v = (struct vif *)ifp->if_mcast) == NULL)
960		return (EHOSTUNREACH);
961
962	memset(&mfcctl, 0, sizeof(mfcctl));
963	if (mfcctl2 == NULL) {
964		mfcctl.mfcc_origin = *origin;
965		mfcctl.mfcc_mcastgrp = *group;
966		mfcctl.mfcc_parent = vidx;
967	} else
968		memcpy(&mfcctl, mfcctl2, sizeof(mfcctl));
969
970	update_mfc_params(&mfcctl, wait, rtableid);
971
972	return (0);
973}
974
975int
976add_mfc(struct socket *so, struct mbuf *m)
977{
978	struct inpcb *inp = sotoinpcb(so);
979	struct mfcctl2 mfcctl2;
980	int mfcctl_size = sizeof(struct mfcctl);
981	unsigned int rtableid = inp->inp_rtableid;
982
983	NET_ASSERT_LOCKED();
984
985	if (mrt_api_config & MRT_API_FLAGS_ALL)
986		mfcctl_size = sizeof(struct mfcctl2);
987
988	if (m == NULL || m->m_len < mfcctl_size)
989		return (EINVAL);
990
991	/*
992	 * select data size depending on API version.
993	 */
994	if (mrt_api_config & MRT_API_FLAGS_ALL) {
995		struct mfcctl2 *mp2 = mtod(m, struct mfcctl2 *);
996		memcpy((caddr_t)&mfcctl2, mp2, sizeof(*mp2));
997	} else {
998		struct mfcctl *mp = mtod(m, struct mfcctl *);
999		memcpy((caddr_t)&mfcctl2, mp, sizeof(*mp));
1000		memset((caddr_t)&mfcctl2 + sizeof(struct mfcctl), 0,
1001		    sizeof(mfcctl2) - sizeof(struct mfcctl));
1002	}
1003
1004	if (mfc_add(&mfcctl2, &mfcctl2.mfcc_origin, &mfcctl2.mfcc_mcastgrp,
1005	    mfcctl2.mfcc_parent, rtableid, M_WAITOK) == -1)
1006		return (EINVAL);
1007
1008	return (0);
1009}
1010
1011int
1012del_mfc(struct socket *so, struct mbuf *m)
1013{
1014	struct inpcb *inp = sotoinpcb(so);
1015	struct rtentry *rt;
1016	struct mfcctl2 mfcctl2;
1017	int mfcctl_size = sizeof(struct mfcctl);
1018	struct mfcctl *mp;
1019	unsigned int rtableid = inp->inp_rtableid;
1020
1021	NET_ASSERT_LOCKED();
1022
1023	/*
1024	 * XXX: for deleting MFC entries the information in entries
1025	 * of size "struct mfcctl" is sufficient.
1026	 */
1027
1028	if (m == NULL || m->m_len < mfcctl_size)
1029		return (EINVAL);
1030
1031	mp = mtod(m, struct mfcctl *);
1032
1033	memcpy((caddr_t)&mfcctl2, mp, sizeof(*mp));
1034	memset((caddr_t)&mfcctl2 + sizeof(struct mfcctl), 0,
1035	    sizeof(mfcctl2) - sizeof(struct mfcctl));
1036
1037	DPRINTF("origin %#08X group %#08X rtableid %d",
1038	    mfcctl2.mfcc_origin.s_addr, mfcctl2.mfcc_mcastgrp.s_addr, rtableid);
1039
1040	while ((rt = mfc_find(NULL, &mfcctl2.mfcc_origin,
1041	    &mfcctl2.mfcc_mcastgrp, rtableid)) != NULL) {
1042		mrt_mcast_del(rt, rtableid);
1043		rtfree(rt);
1044	}
1045
1046	return (0);
1047}
1048
1049int
1050socket_send(struct socket *so, struct mbuf *mm, struct sockaddr_in *src)
1051{
1052	if (so != NULL) {
1053		int ret;
1054
1055		mtx_enter(&so->so_rcv.sb_mtx);
1056		ret = sbappendaddr(so, &so->so_rcv, sintosa(src), mm, NULL);
1057		mtx_leave(&so->so_rcv.sb_mtx);
1058
1059		if (ret != 0) {
1060			sorwakeup(so);
1061			return (0);
1062		}
1063	}
1064	m_freem(mm);
1065	return (-1);
1066}
1067
1068/*
1069 * IP multicast forwarding function. This function assumes that the packet
1070 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
1071 * pointed to by "ifp", and the packet is to be relayed to other networks
1072 * that have members of the packet's destination IP multicast group.
1073 *
1074 * The packet is returned unscathed to the caller, unless it is
1075 * erroneous, in which case a non-zero return value tells the caller to
1076 * discard it.
1077 */
1078
1079#define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
1080#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
1081
1082int
1083ip_mforward(struct mbuf *m, struct ifnet *ifp)
1084{
1085	struct ip *ip = mtod(m, struct ip *);
1086	struct vif *v;
1087	struct rtentry *rt;
1088	static int srctun = 0;
1089	struct mbuf *mm;
1090	unsigned int rtableid = ifp->if_rdomain;
1091
1092	if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
1093	    ((u_char *)(ip + 1))[1] != IPOPT_LSRR) {
1094		/*
1095		 * Packet arrived via a physical interface or
1096		 * an encapsulated tunnel or a register_vif.
1097		 */
1098	} else {
1099		/*
1100		 * Packet arrived through a source-route tunnel.
1101		 * Source-route tunnels are no longer supported.
1102		 */
1103		if ((srctun++ % 1000) == 0)
1104			log(LOG_ERR, "ip_mforward: received source-routed "
1105			    "packet from %x\n", ntohl(ip->ip_src.s_addr));
1106		return (EOPNOTSUPP);
1107	}
1108
1109	/*
1110	 * Don't forward a packet with time-to-live of zero or one,
1111	 * or a packet destined to a local-only group.
1112	 */
1113	if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ip->ip_dst.s_addr))
1114		return (0);
1115
1116	/*
1117	 * Determine forwarding vifs from the forwarding cache table
1118	 */
1119	++mrtstat.mrts_mfc_lookups;
1120	rt = mfc_find(NULL, &ip->ip_src, &ip->ip_dst, rtableid);
1121
1122	/* Entry exists, so forward if necessary */
1123	if (rt != NULL) {
1124		return (ip_mdq(m, ifp, rt));
1125	} else {
1126		/*
1127		 * If we don't have a route for packet's origin,
1128		 * Make a copy of the packet & send message to routing daemon
1129		 */
1130		int hlen = ip->ip_hl << 2;
1131
1132		++mrtstat.mrts_mfc_misses;
1133		mrtstat.mrts_no_route++;
1134
1135		{
1136			struct igmpmsg *im;
1137
1138			/*
1139			 * Locate the vifi for the incoming interface for
1140			 * this packet.
1141			 * If none found, drop packet.
1142			 */
1143			if ((v = (struct vif *)ifp->if_mcast) == NULL)
1144				return (EHOSTUNREACH);
1145			/*
1146			 * Make a copy of the header to send to the user level
1147			 * process
1148			 */
1149			mm = m_copym(m, 0, hlen, M_NOWAIT);
1150			if (mm == NULL ||
1151			    (mm = m_pullup(mm, hlen)) == NULL)
1152				return (ENOBUFS);
1153
1154			/*
1155			 * Send message to routing daemon to install
1156			 * a route into the kernel table
1157			 */
1158
1159			im = mtod(mm, struct igmpmsg *);
1160			im->im_msgtype = IGMPMSG_NOCACHE;
1161			im->im_mbz = 0;
1162			im->im_vif = v->v_id;
1163
1164			mrtstat.mrts_upcalls++;
1165
1166			sin.sin_addr = ip->ip_src;
1167			if (socket_send(ip_mrouter[rtableid], mm, &sin) < 0) {
1168				log(LOG_WARNING, "ip_mforward: ip_mrouter "
1169				    "socket queue full\n");
1170				++mrtstat.mrts_upq_sockfull;
1171				return (ENOBUFS);
1172			}
1173
1174			mfc_add(NULL, &ip->ip_src, &ip->ip_dst, v->v_id,
1175			    rtableid, M_NOWAIT);
1176		}
1177
1178		return (0);
1179	}
1180}
1181
1182/*
1183 * Packet forwarding routine once entry in the cache is made
1184 */
1185int
1186ip_mdq(struct mbuf *m, struct ifnet *ifp0, struct rtentry *rt)
1187{
1188	struct ip  *ip = mtod(m, struct ip *);
1189	struct mfc *mfc = (struct mfc *)rt->rt_llinfo;
1190	struct vif *v = (struct vif *)ifp0->if_mcast;
1191	struct ifnet *ifp;
1192	struct mbuf *mc;
1193	struct ip_moptions imo;
1194
1195	/* Sanity check: we have all promised pointers. */
1196	if (v == NULL || mfc == NULL) {
1197		rtfree(rt);
1198		return (EHOSTUNREACH);
1199	}
1200
1201	/*
1202	 * Don't forward if it didn't arrive from the parent vif for its origin.
1203	 */
1204	if (mfc->mfc_parent != v->v_id) {
1205		/* came in the wrong interface */
1206		++mrtstat.mrts_wrong_if;
1207		mfc->mfc_wrong_if++;
1208		rtfree(rt);
1209		return (0);
1210	}
1211
1212	/* If I sourced this packet, it counts as output, else it was input. */
1213	if (in_hosteq(ip->ip_src, v->v_lcl_addr)) {
1214		v->v_pkt_out++;
1215		v->v_bytes_out += m->m_pkthdr.len;
1216	} else {
1217		v->v_pkt_in++;
1218		v->v_bytes_in += m->m_pkthdr.len;
1219	}
1220
1221	/*
1222	 * For each vif, decide if a copy of the packet should be forwarded.
1223	 * Forward if:
1224	 *		- the ttl exceeds the vif's threshold
1225	 *		- there are group members downstream on interface
1226	 */
1227	do {
1228		/* Don't consider non multicast routes. */
1229		if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) !=
1230		    (RTF_HOST | RTF_MULTICAST))
1231			continue;
1232
1233		mfc = (struct mfc *)rt->rt_llinfo;
1234		if (mfc == NULL)
1235			continue;
1236
1237		mfc->mfc_pkt_cnt++;
1238		mfc->mfc_byte_cnt += m->m_pkthdr.len;
1239
1240		/* Don't let this route expire. */
1241		mfc->mfc_expire = 0;
1242
1243		if (ip->ip_ttl <= mfc->mfc_ttl)
1244			continue;
1245		if ((ifp = if_get(rt->rt_ifidx)) == NULL)
1246			continue;
1247
1248		/* Sanity check: did we configure this? */
1249		if ((v = (struct vif *)ifp->if_mcast) == NULL) {
1250			if_put(ifp);
1251			continue;
1252		}
1253
1254		/* Don't send in the upstream interface. */
1255		if (mfc->mfc_parent == v->v_id) {
1256			if_put(ifp);
1257			continue;
1258		}
1259
1260		v->v_pkt_out++;
1261		v->v_bytes_out += m->m_pkthdr.len;
1262
1263		/*
1264		 * Make a new reference to the packet; make sure
1265		 * that the IP header is actually copied, not
1266		 * just referenced, so that ip_output() only
1267		 * scribbles on the copy.
1268		 */
1269		mc = m_dup_pkt(m, max_linkhdr, M_NOWAIT);
1270		if (mc == NULL) {
1271			if_put(ifp);
1272			rtfree(rt);
1273			return (ENOBUFS);
1274		}
1275
1276		/*
1277		 * if physical interface option, extract the options
1278		 * and then send
1279		 */
1280		imo.imo_ifidx = rt->rt_ifidx;
1281		imo.imo_ttl = ip->ip_ttl - IPTTLDEC;
1282		imo.imo_loop = 1;
1283
1284		ip_output(mc, NULL, NULL, IP_FORWARDING, &imo, NULL, 0);
1285		if_put(ifp);
1286	} while ((rt = rtable_iterate(rt)) != NULL);
1287
1288	return (0);
1289}
1290
1291struct ifnet *
1292if_lookupbyvif(vifi_t vifi, unsigned int rtableid)
1293{
1294	struct vif	*v;
1295	struct ifnet	*ifp;
1296
1297	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1298		if (ifp->if_rdomain != rtableid)
1299			continue;
1300		if ((v = (struct vif *)ifp->if_mcast) == NULL)
1301			continue;
1302		if (v->v_id != vifi)
1303			continue;
1304
1305		return (ifp);
1306	}
1307
1308	return (NULL);
1309}
1310
1311struct rtentry *
1312rt_mcast_add(struct ifnet *ifp, struct sockaddr *origin, struct sockaddr *group)
1313{
1314	struct ifaddr		*ifa;
1315	int			 rv;
1316	unsigned int		 rtableid = ifp->if_rdomain;
1317
1318	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1319		if (ifa->ifa_addr->sa_family == AF_INET)
1320			break;
1321	}
1322	if (ifa == NULL) {
1323		DPRINTF("ifa == NULL");
1324		return (NULL);
1325	}
1326
1327	rv = rt_ifa_add(ifa, RTF_HOST | RTF_MULTICAST | RTF_MPATH,
1328	    group, ifp->if_rdomain);
1329	if (rv != 0) {
1330		DPRINTF("rt_ifa_add failed (%d)", rv);
1331		return (NULL);
1332	}
1333
1334	mrt_count[rtableid]++;
1335
1336	return (mfc_find(ifp, NULL, &satosin(group)->sin_addr, rtableid));
1337}
1338
1339void
1340mrt_mcast_del(struct rtentry *rt, unsigned int rtableid)
1341{
1342	struct ifnet		*ifp;
1343	int			 error;
1344
1345	/* Remove all timers related to this route. */
1346	rt_timer_remove_all(rt);
1347
1348	free(rt->rt_llinfo, M_MRTABLE, sizeof(struct mfc));
1349	rt->rt_llinfo = NULL;
1350
1351	ifp = if_get(rt->rt_ifidx);
1352	if (ifp == NULL)
1353		return;
1354	error = rtdeletemsg(rt, ifp, rtableid);
1355	if_put(ifp);
1356
1357	if (error)
1358		DPRINTF("delete route error %d\n", error);
1359
1360	mrt_count[rtableid]--;
1361}
1362