ip_mroute.c revision 3311
1/*
2 * IP multicast forwarding procedures
3 *
4 * Written by David Waitzman, BBN Labs, August 1988.
5 * Modified by Steve Deering, Stanford, February 1989.
6 * Modified by Mark J. Steiglitz, Stanford, May, 1991
7 * Modified by Van Jacobson, LBL, January 1993
8 * Modified by Ajit Thyagarajan, PARC, August 1993
9 *
10 * MROUTING 1.8
11 */
12
13
14#include <sys/param.h>
15#include <sys/systm.h>
16#include <sys/mbuf.h>
17#include <sys/socket.h>
18#include <sys/socketvar.h>
19#include <sys/protosw.h>
20#include <sys/errno.h>
21#include <sys/time.h>
22#include <sys/ioctl.h>
23#include <sys/syslog.h>
24#include <net/if.h>
25#include <net/route.h>
26#include <net/raw_cb.h>
27#include <netinet/in.h>
28#include <netinet/in_systm.h>
29#include <netinet/ip.h>
30#include <netinet/ip_var.h>
31#include <netinet/in_pcb.h>
32#include <netinet/in_var.h>
33#include <netinet/igmp.h>
34#include <netinet/igmp_var.h>
35#include <netinet/ip_mroute.h>
36
37#ifndef NTOHL
38#if BYTE_ORDER != BIG_ENDIAN
39#define NTOHL(d) ((d) = ntohl((d)))
40#define NTOHS(d) ((d) = ntohs((u_short)(d)))
41#define HTONL(d) ((d) = htonl((d)))
42#define HTONS(d) ((d) = htons((u_short)(d)))
43#else
44#define NTOHL(d)
45#define NTOHS(d)
46#define HTONL(d)
47#define HTONS(d)
48#endif
49#endif
50
51#ifndef MROUTING
52/*
53 * Dummy routines and globals used when multicast routing is not compiled in.
54 */
55
56u_int		ip_mrtproto = 0;
57struct socket  *ip_mrouter  = NULL;
58struct mrtstat	mrtstat;
59
60
61int
62_ip_mrouter_cmd(cmd, so, m)
63	int cmd;
64	struct socket *so;
65	struct mbuf *m;
66{
67	return(EOPNOTSUPP);
68}
69
70int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd;
71
72int
73_ip_mrouter_done()
74{
75	return(0);
76}
77
78int (*ip_mrouter_done)(void) = _ip_mrouter_done;
79
80int
81_ip_mforward(ip, ifp, m, imo)
82	struct ip *ip;
83	struct ifnet *ifp;
84	struct mbuf *m;
85	struct ip_moptions *imo;
86{
87	return(0);
88}
89
90int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
91		   struct ip_moptions *) = _ip_mforward;
92
93int
94_mrt_ioctl(int req, caddr_t data, struct proc *p)
95{
96	return EOPNOTSUPP;
97}
98
99int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
100
101void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */
102	rip_input(m);
103}
104
105int (*legal_vif_num)(int) = 0;
106
107#else
108
109#define INSIZ		sizeof(struct in_addr)
110#define	same(a1, a2) \
111	(bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
112
113#define MT_MRTABLE MT_RTABLE	/* since nothing else uses it */
114
115/*
116 * Globals.  All but ip_mrouter and ip_mrtproto could be static,
117 * except for netstat or debugging purposes.
118 */
119#ifndef MROUTE_LKM
120struct socket  *ip_mrouter  = NULL;
121struct mrtstat	mrtstat;
122
123int		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */
124#else
125extern struct mrtstat mrtstat;
126extern int ip_mrtproto;
127#endif
128
129#define NO_RTE_FOUND 	0x1
130#define RTE_FOUND	0x2
131
132struct mbuf    *mfctable[MFCTBLSIZ];
133struct vif	viftable[MAXVIFS];
134u_int		mrtdebug = 0;	  /* debug level 	*/
135u_int       	tbfdebug = 0;     /* tbf debug level 	*/
136
137u_long timeout_val = 0;			/* count of outstanding upcalls */
138
139/*
140 * Define the token bucket filter structures
141 * tbftable -> each vif has one of these for storing info
142 * qtable   -> each interface has an associated queue of pkts
143 */
144
145struct tbf tbftable[MAXVIFS];
146struct pkt_queue qtable[MAXVIFS][MAXQSIZE];
147
148/*
149 * 'Interfaces' associated with decapsulator (so we can tell
150 * packets that went through it from ones that get reflected
151 * by a broken gateway).  These interfaces are never linked into
152 * the system ifnet list & no routes point to them.  I.e., packets
153 * can't be sent this way.  They only exist as a placeholder for
154 * multicast source verification.
155 */
156struct ifnet multicast_decap_if[MAXVIFS];
157
158#define ENCAP_TTL 64
159#define ENCAP_PROTO 4
160
161/* prototype IP hdr for encapsulated packets */
162struct ip multicast_encap_iphdr = {
163#if BYTE_ORDER == LITTLE_ENDIAN
164	sizeof(struct ip) >> 2, IPVERSION,
165#else
166	IPVERSION, sizeof(struct ip) >> 2,
167#endif
168	0,				/* tos */
169	sizeof(struct ip),		/* total length */
170	0,				/* id */
171	0,				/* frag offset */
172	ENCAP_TTL, ENCAP_PROTO,
173	0,				/* checksum */
174};
175
176/*
177 * Private variables.
178 */
179static vifi_t	   numvifs = 0;
180
181/*
182 * one-back cache used by multiencap_decap to locate a tunnel's vif
183 * given a datagram's src ip address.
184 */
185static u_long last_encap_src;
186static struct vif *last_encap_vif;
187
188static u_long nethash_fc(u_long, u_long);
189static struct mfc *mfcfind(u_long, u_long);
190int get_sg_cnt(struct sioc_sg_req *);
191int get_vif_cnt(struct sioc_vif_req *);
192int get_vifs(caddr_t);
193static int add_vif(struct vifctl *);
194static int del_vif(vifi_t *);
195static int add_mfc(struct mfcctl *);
196static int del_mfc(struct delmfcctl *);
197static void cleanup_cache(void *);
198static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *,
199		  struct ip_moptions *);
200extern int (*legal_vif_num)(int);
201static void phyint_send(struct ip *, struct vif *, struct mbuf *);
202static void srcrt_send(struct ip *, struct vif *, struct mbuf *);
203static void encap_send(struct ip *, struct vif *, struct mbuf *);
204void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long,
205		 struct ip_moptions *);
206void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *);
207void tbf_process_q(struct vif *);
208void tbf_dequeue(struct vif *, int);
209void tbf_reprocess_q(void *);
210int tbf_dq_sel(struct vif *, struct ip *);
211void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *);
212void tbf_update_tokens(struct vif *);
213static int priority(struct vif *, struct ip *);
214static int ip_mrouter_init(struct socket *);
215
216/*
217 * A simple hash function: returns MFCHASHMOD of the low-order octet of
218 * the argument's network or subnet number and the multicast group assoc.
219 */
220static u_long
221nethash_fc(m,n)
222    register u_long m;
223    register u_long n;
224{
225    struct in_addr in1;
226    struct in_addr in2;
227
228    in1.s_addr = m;
229    m = in_netof(in1);
230    while ((m & 0xff) == 0) m >>= 8;
231
232    in2.s_addr = n;
233    n = in_netof(in2);
234    while ((n & 0xff) == 0) n >>= 8;
235
236    return (MFCHASHMOD(m) ^ MFCHASHMOD(n));
237}
238
239/*
240 * this is a direct-mapped cache used to speed the mapping from a
241 * datagram source address to the associated multicast route.  Note
242 * that unlike mrttable, the hash is on IP address, not IP net number.
243 */
244#define MFCHASHSIZ 1024
245#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
246			((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1))
247struct mfc *mfchash[MFCHASHSIZ];
248
249/*
250 * Find a route for a given origin IP address and Multicast group address
251 * Type of service parameter to be added in the future!!!
252 */
253#define MFCFIND(o, g, rt) { \
254	register u_int _mrhasho = o; \
255	register u_int _mrhashg = g; \
256	_mrhasho = MFCHASH(_mrhasho, _mrhashg); \
257	++mrtstat.mrts_mfc_lookups; \
258	rt = mfchash[_mrhasho]; \
259	if ((rt == NULL) || \
260	    ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \
261	     (g != rt->mfc_mcastgrp.s_addr)) \
262	     if ((rt = mfcfind(o, g)) != NULL) \
263		mfchash[_mrhasho] = rt; \
264}
265
266/*
267 * Find route by examining hash table entries
268 */
269static struct mfc *
270mfcfind(origin, mcastgrp)
271    u_long origin;
272    u_long mcastgrp;
273{
274    register struct mbuf *mb_rt;
275    register struct mfc *rt;
276    register u_long hash;
277
278    hash = nethash_fc(origin, mcastgrp);
279    for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
280	rt = mtod(mb_rt, struct mfc *);
281	if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) &&
282	    (mcastgrp == rt->mfc_mcastgrp.s_addr) &&
283	    (mb_rt->m_act == NULL))
284	    return (rt);
285    }
286    mrtstat.mrts_mfc_misses++;
287    return NULL;
288}
289
290/*
291 * Macros to compute elapsed time efficiently
292 * Borrowed from Van Jacobson's scheduling code
293 */
294#define TV_DELTA(a, b, delta) { \
295	    register int xxs; \
296		\
297	    delta = (a).tv_usec - (b).tv_usec; \
298	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
299	       switch (xxs) { \
300		      case 2: \
301			  delta += 1000000; \
302			      /* fall through */ \
303		      case 1: \
304			  delta += 1000000; \
305			  break; \
306		      default: \
307			  delta += (1000000 * xxs); \
308	       } \
309	    } \
310}
311
312#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
313	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
314
315/*
316 * Handle DVMRP setsockopt commands to modify the multicast routing tables.
317 */
318int
319X_ip_mrouter_cmd(cmd, so, m)
320    int cmd;
321    struct socket *so;
322    struct mbuf *m;
323{
324   if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES;
325
326    switch (cmd) {
327	case DVMRP_INIT:     return ip_mrouter_init(so);
328	case DVMRP_DONE:     return ip_mrouter_done();
329	case DVMRP_ADD_VIF:  return add_vif (mtod(m, struct vifctl *));
330	case DVMRP_DEL_VIF:  return del_vif (mtod(m, vifi_t *));
331	case DVMRP_ADD_MFC:  return add_mfc (mtod(m, struct mfcctl *));
332	case DVMRP_DEL_MFC:  return del_mfc (mtod(m, struct delmfcctl *));
333	default:             return EOPNOTSUPP;
334    }
335}
336
337#ifndef MROUTE_LKM
338int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd;
339#endif
340
341/*
342 * Handle ioctl commands to obtain information from the cache
343 */
344int
345X_mrt_ioctl(cmd, data)
346    int cmd;
347    caddr_t data;
348{
349    int error = 0;
350
351    switch (cmd) {
352      case (SIOCGETVIFINF):		/* Read Virtual Interface (m/cast) */
353	  return (get_vifs(data));
354	  break;
355      case (SIOCGETVIFCNT):
356	  return (get_vif_cnt((struct sioc_vif_req *)data));
357	  break;
358      case (SIOCGETSGCNT):
359	  return (get_sg_cnt((struct sioc_sg_req *)data));
360	  break;
361	default:
362	  return (EINVAL);
363	  break;
364    }
365    return error;
366}
367
368#ifndef MROUTE_LKM
369int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl;
370#else
371extern int (*mrt_ioctl)(int, caddr_t, struct proc *);
372#endif
373
374/*
375 * returns the packet count for the source group provided
376 */
377int
378get_sg_cnt(req)
379    register struct sioc_sg_req *req;
380{
381    register struct mfc *rt;
382    int s;
383
384    s = splnet();
385    MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
386    splx(s);
387    if (rt != NULL)
388	req->count = rt->mfc_pkt_cnt;
389    else
390	req->count = 0xffffffff;
391
392    return 0;
393}
394
395/*
396 * returns the input and output packet counts on the interface provided
397 */
398int
399get_vif_cnt(req)
400    register struct sioc_vif_req *req;
401{
402    register vifi_t vifi = req->vifi;
403
404    req->icount = viftable[vifi].v_pkt_in;
405    req->ocount = viftable[vifi].v_pkt_out;
406
407    return 0;
408}
409
410int
411get_vifs(data)
412    char *data;
413{
414    struct vif_conf *vifc = (struct vif_conf *)data;
415    struct vif_req *vifrp, vifr;
416    int space, error=0;
417
418    vifi_t vifi;
419    int s;
420
421    space = vifc->vifc_len;
422    vifrp  = vifc->vifc_req;
423
424    s = splnet();
425    vifc->vifc_num=numvifs;
426
427    for (vifi = 0; vifi <  numvifs; vifi++, vifrp++) {
428	if (viftable[vifi].v_lcl_addr.s_addr != 0) {
429	    vifr.v_flags=viftable[vifi].v_flags;
430	    vifr.v_threshold=viftable[vifi].v_threshold;
431	    vifr.v_lcl_addr=viftable[vifi].v_lcl_addr;
432	    vifr.v_rmt_addr=viftable[vifi].v_rmt_addr;
433	    strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ);
434	    if ((space -= sizeof(vifr)) < 0) {
435		splx(s);
436		return(ENOSPC);
437	    }
438	    error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr));
439	    if (error) {
440		splx(s);
441		return(error);
442	    }
443	}
444    }
445    splx(s);
446    return 0;
447}
448/*
449 * Enable multicast routing
450 */
451static int
452ip_mrouter_init(so)
453	struct socket *so;
454{
455    if (so->so_type != SOCK_RAW ||
456	so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
457
458    if (ip_mrouter != NULL) return EADDRINUSE;
459
460    ip_mrouter = so;
461
462    if (mrtdebug)
463	log(LOG_DEBUG, "ip_mrouter_init");
464
465    return 0;
466}
467
468/*
469 * Disable multicast routing
470 */
471int
472X_ip_mrouter_done()
473{
474    vifi_t vifi;
475    int i;
476    struct ifnet *ifp;
477    struct ifreq ifr;
478    struct mbuf *mb_rt;
479    struct mbuf *m;
480    struct rtdetq *rte;
481    int s;
482
483    s = splnet();
484
485    /*
486     * For each phyint in use, disable promiscuous reception of all IP
487     * multicasts.
488     */
489    for (vifi = 0; vifi < numvifs; vifi++) {
490	if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
491	    !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
492	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
493	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
494								= INADDR_ANY;
495	    ifp = viftable[vifi].v_ifp;
496	    (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
497	}
498    }
499    bzero((caddr_t)qtable, sizeof(qtable));
500    bzero((caddr_t)tbftable, sizeof(tbftable));
501    bzero((caddr_t)viftable, sizeof(viftable));
502    numvifs = 0;
503
504    /*
505     * Check if any outstanding timeouts remain
506     */
507    if (timeout_val != 0)
508	for (i = 0; i < MFCTBLSIZ; i++) {
509	    mb_rt = mfctable[i];
510	    while (mb_rt) {
511		if ( mb_rt->m_act != NULL) {
512		    untimeout(cleanup_cache, (caddr_t)mb_rt);
513		    while (mb_rt->m_act) {
514		        m = mb_rt->m_act;
515			mb_rt->m_act = m->m_act;
516			rte = mtod(m, struct rtdetq *);
517			m_freem(rte->m);
518			m_free(m);
519		    }
520		    timeout_val--;
521		}
522	    mb_rt = mb_rt->m_next;
523	    }
524	    if (timeout_val == 0)
525		break;
526	}
527
528    /*
529     * Free all multicast forwarding cache entries.
530     */
531    for (i = 0; i < MFCTBLSIZ; i++)
532	m_freem(mfctable[i]);
533
534    bzero((caddr_t)mfctable, sizeof(mfctable));
535    bzero((caddr_t)mfchash, sizeof(mfchash));
536
537    /*
538     * Reset de-encapsulation cache
539     */
540    last_encap_src = NULL;
541    last_encap_vif = NULL;
542
543    ip_mrouter = NULL;
544
545    splx(s);
546
547    if (mrtdebug)
548	log(LOG_DEBUG, "ip_mrouter_done");
549
550    return 0;
551}
552
553#ifndef MROUTE_LKM
554int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
555#endif
556
557/*
558 * Add a vif to the vif table
559 */
560static int
561add_vif(vifcp)
562    register struct vifctl *vifcp;
563{
564    register struct vif *vifp = viftable + vifcp->vifc_vifi;
565    static struct sockaddr_in sin = {AF_INET};
566    struct ifaddr *ifa;
567    struct ifnet *ifp;
568    struct ifreq ifr;
569    int error, s;
570    struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
571
572    if (vifcp->vifc_vifi >= MAXVIFS)  return EINVAL;
573    if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
574
575    /* Find the interface with an address in AF_INET family */
576    sin.sin_addr = vifcp->vifc_lcl_addr;
577    ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
578    if (ifa == 0) return EADDRNOTAVAIL;
579    ifp = ifa->ifa_ifp;
580
581    if (vifcp->vifc_flags & VIFF_TUNNEL) {
582	if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
583	    static int inited = 0;
584	    if(!inited) {
585		for (s = 0; s < MAXVIFS; ++s) {
586		    multicast_decap_if[s].if_name = "mdecap";
587		    multicast_decap_if[s].if_unit = s;
588		}
589		inited = 1;
590	    }
591	    ifp = &multicast_decap_if[vifcp->vifc_vifi];
592	} else {
593	    ifp = 0;
594	}
595    } else {
596	/* Make sure the interface supports multicast */
597	if ((ifp->if_flags & IFF_MULTICAST) == 0)
598	    return EOPNOTSUPP;
599
600	/* Enable promiscuous reception of all IP multicasts from the if */
601	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
602	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
603	s = splnet();
604	error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
605	splx(s);
606	if (error)
607	    return error;
608    }
609
610    s = splnet();
611    /* define parameters for the tbf structure */
612    vifp->v_tbf = v_tbf;
613    vifp->v_tbf->q_len = 0;
614    vifp->v_tbf->n_tok = 0;
615    vifp->v_tbf->last_pkt_t = 0;
616
617    vifp->v_flags     = vifcp->vifc_flags;
618    vifp->v_threshold = vifcp->vifc_threshold;
619    vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
620    vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
621    vifp->v_ifp       = ifp;
622    vifp->v_rate_limit= vifcp->vifc_rate_limit;
623    /* initialize per vif pkt counters */
624    vifp->v_pkt_in    = 0;
625    vifp->v_pkt_out   = 0;
626    splx(s);
627
628    /* Adjust numvifs up if the vifi is higher than numvifs */
629    if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
630
631    if (mrtdebug)
632	log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d",
633	    vifcp->vifc_vifi,
634	    ntohl(vifcp->vifc_lcl_addr.s_addr),
635	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
636	    ntohl(vifcp->vifc_rmt_addr.s_addr),
637	    vifcp->vifc_threshold,
638	    vifcp->vifc_rate_limit);
639
640    return 0;
641}
642
643/*
644 * Delete a vif from the vif table
645 */
646static int
647del_vif(vifip)
648    vifi_t *vifip;
649{
650    register struct vif *vifp = viftable + *vifip;
651    register vifi_t vifi;
652    struct ifnet *ifp;
653    struct ifreq ifr;
654    int s;
655
656    if (*vifip >= numvifs) return EINVAL;
657    if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
658
659    s = splnet();
660
661    if (!(vifp->v_flags & VIFF_TUNNEL)) {
662	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
663	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
664	ifp = vifp->v_ifp;
665	(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
666    }
667
668    if (vifp == last_encap_vif) {
669	last_encap_vif = 0;
670	last_encap_src = 0;
671    }
672
673    bzero((caddr_t)qtable[*vifip],
674	  sizeof(qtable[*vifip]));
675    bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
676    bzero((caddr_t)vifp, sizeof (*vifp));
677
678    /* Adjust numvifs down */
679    for (vifi = numvifs; vifi > 0; vifi--)
680	if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
681    numvifs = vifi;
682
683    splx(s);
684
685    if (mrtdebug)
686      log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs);
687
688    return 0;
689}
690
691/*
692 * Add an mfc entry
693 */
694static int
695add_mfc(mfccp)
696    struct mfcctl *mfccp;
697{
698    struct mfc *rt;
699    struct mfc *rt1 = 0;
700    register struct mbuf *mb_rt;
701    struct mbuf *prev_mb_rt;
702    u_long hash;
703    struct mbuf *mb_ntry;
704    struct rtdetq *rte;
705    register u_short nstl;
706    int s;
707    int i;
708
709    rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
710
711    /* If an entry already exists, just update the fields */
712    if (rt) {
713	if (mrtdebug)
714	    log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x",
715		ntohl(mfccp->mfcc_origin.s_addr),
716		ntohl(mfccp->mfcc_mcastgrp.s_addr),
717		ntohl(mfccp->mfcc_originmask.s_addr),
718		mfccp->mfcc_parent);
719
720	s = splnet();
721	rt->mfc_parent = mfccp->mfcc_parent;
722	for (i = 0; i < numvifs; i++)
723	    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
724	splx(s);
725	return 0;
726    }
727
728    /*
729     * Find the entry for which the upcall was made and update
730     */
731    s = splnet();
732    hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
733    for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0;
734	 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
735
736	rt = mtod(mb_rt, struct mfc *);
737	if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr)
738	     == mfccp->mfcc_origin.s_addr) &&
739	    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
740	    (mb_rt->m_act != NULL)) {
741
742	    if (!nstl++) {
743		if (mrtdebug)
744		    log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x",
745			ntohl(mfccp->mfcc_origin.s_addr),
746			ntohl(mfccp->mfcc_mcastgrp.s_addr),
747			ntohl(mfccp->mfcc_originmask.s_addr),
748			mfccp->mfcc_parent, mb_rt->m_act);
749
750		rt->mfc_origin     = mfccp->mfcc_origin;
751		rt->mfc_originmask = mfccp->mfcc_originmask;
752		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
753		rt->mfc_parent     = mfccp->mfcc_parent;
754		for (i = 0; i < numvifs; i++)
755		    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
756		/* initialize pkt counters per src-grp */
757		rt->mfc_pkt_cnt    = 0;
758		rt1 = rt;
759	    }
760
761	    /* prevent cleanup of cache entry */
762	    untimeout(cleanup_cache, (caddr_t)mb_rt);
763	    timeout_val--;
764
765	    /* free packets Qed at the end of this entry */
766	    while (mb_rt->m_act) {
767		mb_ntry = mb_rt->m_act;
768		rte = mtod(mb_ntry, struct rtdetq *);
769		ip_mdq(rte->m, rte->ifp, rte->tunnel_src,
770		       rt1, rte->imo);
771		mb_rt->m_act = mb_ntry->m_act;
772		m_freem(rte->m);
773		m_free(mb_ntry);
774	    }
775
776	    /*
777	     * If more than one entry was created for a single upcall
778	     * delete that entry
779	     */
780	    if (nstl > 1) {
781		MFREE(mb_rt, prev_mb_rt->m_next);
782		mb_rt = prev_mb_rt;
783	    }
784	}
785    }
786
787    /*
788     * It is possible that an entry is being inserted without an upcall
789     */
790    if (nstl == 0) {
791	if (mrtdebug)
792	    log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x",
793		hash, ntohl(mfccp->mfcc_origin.s_addr),
794		ntohl(mfccp->mfcc_mcastgrp.s_addr),
795		ntohl(mfccp->mfcc_originmask.s_addr),
796		mfccp->mfcc_parent);
797
798	for (prev_mb_rt = mb_rt = mfctable[hash];
799	     mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
800
801	    rt = mtod(mb_rt, struct mfc *);
802	    if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr)
803		 == mfccp->mfcc_origin.s_addr) &&
804		(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
805
806		rt->mfc_origin     = mfccp->mfcc_origin;
807		rt->mfc_originmask = mfccp->mfcc_originmask;
808		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
809		rt->mfc_parent     = mfccp->mfcc_parent;
810		for (i = 0; i < numvifs; i++)
811		    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
812		/* initialize pkt counters per src-grp */
813		rt->mfc_pkt_cnt    = 0;
814	    }
815	}
816	if (mb_rt == NULL) {
817	    /* no upcall, so make a new entry */
818	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
819	    if (mb_rt == NULL) {
820		splx(s);
821		return ENOBUFS;
822	    }
823
824	    rt = mtod(mb_rt, struct mfc *);
825
826	    /* insert new entry at head of hash chain */
827	    rt->mfc_origin     = mfccp->mfcc_origin;
828	    rt->mfc_originmask = mfccp->mfcc_originmask;
829	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
830	    rt->mfc_parent     = mfccp->mfcc_parent;
831	    for (i = 0; i < numvifs; i++)
832		VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
833	    /* initialize pkt counters per src-grp */
834	    rt->mfc_pkt_cnt    = 0;
835
836	    /* link into table */
837	    mb_rt->m_next  = mfctable[hash];
838	    mfctable[hash] = mb_rt;
839	    mb_rt->m_act = NULL;
840	}
841    }
842    splx(s);
843    return 0;
844}
845
846/*
847 * Delete an mfc entry
848 */
849static int
850del_mfc(mfccp)
851    struct delmfcctl *mfccp;
852{
853    struct in_addr 	origin;
854    struct in_addr 	mcastgrp;
855    struct mfc 		*rt;
856    struct mbuf 	*mb_rt;
857    struct mbuf 	*prev_mb_rt;
858    u_long 		hash;
859    struct mfc 		**cmfc;
860    struct mfc 		**cmfcend;
861    int s;
862
863    origin = mfccp->mfcc_origin;
864    mcastgrp = mfccp->mfcc_mcastgrp;
865    hash = nethash_fc(origin.s_addr, mcastgrp.s_addr);
866
867    if (mrtdebug)
868	log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x",
869	    ntohl(origin.s_addr), ntohl(mcastgrp.s_addr));
870
871    for (prev_mb_rt = mb_rt = mfctable[hash]
872	 ; mb_rt
873	 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
874        rt = mtod(mb_rt, struct mfc *);
875	if (origin.s_addr == rt->mfc_origin.s_addr &&
876	    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
877	    mb_rt->m_act == NULL)
878	    break;
879    }
880    if (mb_rt == NULL) {
881	return ESRCH;
882    }
883
884    s = splnet();
885
886    cmfc = mfchash;
887    cmfcend = cmfc + MFCHASHSIZ;
888    for ( ; cmfc < cmfcend; ++cmfc)
889	if (*cmfc == rt)
890	    *cmfc = 0;
891
892    if (prev_mb_rt != mb_rt) {	/* if moved past head of list */
893	MFREE(mb_rt, prev_mb_rt->m_next);
894    } else			/* delete head of list, it is in the table */
895        mfctable[hash] = m_free(mb_rt);
896
897    splx(s);
898
899    return 0;
900}
901
902/*
903 * IP multicast forwarding function. This function assumes that the packet
904 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
905 * pointed to by "ifp", and the packet is to be relayed to other networks
906 * that have members of the packet's destination IP multicast group.
907 *
908 * The packet is returned unscathed to the caller, unless it is tunneled
909 * or erroneous, in which case a non-zero return value tells the caller to
910 * discard it.
911 */
912
913#define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
914#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
915
916int
917X_ip_mforward(ip, ifp, m, imo)
918    register struct ip *ip;
919    struct ifnet *ifp;
920    struct mbuf *m;
921    struct ip_moptions *imo;
922{
923    register struct mfc *rt;
924    register u_char *ipoptions;
925    u_long tunnel_src;
926    static struct sockproto	k_igmpproto 	= { AF_INET, IPPROTO_IGMP };
927    static struct sockaddr_in 	k_igmpsrc	= { AF_INET };
928    static struct sockaddr_in 	k_igmpdst 	= { AF_INET };
929    register struct mbuf *mm;
930    register struct ip *k_data;
931    int s;
932
933    if (mrtdebug > 1)
934	log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x",
935	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
936
937    if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
938	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
939	/*
940	 * Packet arrived via a physical interface.
941	 */
942	tunnel_src = 0;
943    } else {
944	/*
945	 * Packet arrived through a source-route tunnel.
946	 *
947	 * A source-route tunneled packet has a single NOP option and a
948	 * two-element
949	 * loose-source-and-record-route (LSRR) option immediately following
950	 * the fixed-size part of the IP header.  At this point in processing,
951	 * the IP header should contain the following IP addresses:
952	 *
953	 *	original source          - in the source address field
954	 *	destination group        - in the destination address field
955	 *	remote tunnel end-point  - in the first  element of LSRR
956	 *	one of this host's addrs - in the second element of LSRR
957	 *
958	 * NOTE: RFC-1075 would have the original source and remote tunnel
959	 *	 end-point addresses swapped.  However, that could cause
960	 *	 delivery of ICMP error messages to innocent applications
961	 *	 on intermediate routing hosts!  Therefore, we hereby
962	 *	 change the spec.
963	 */
964
965	/*
966	 * Verify that the tunnel options are well-formed.
967	 */
968	if (ipoptions[0] != IPOPT_NOP ||
969	    ipoptions[2] != 11 ||	/* LSRR option length   */
970	    ipoptions[3] != 12 ||	/* LSRR address pointer */
971	    (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) {
972	    mrtstat.mrts_bad_tunnel++;
973	    if (mrtdebug)
974		log(LOG_DEBUG,
975		    "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)",
976		    ntohl(ip->ip_src.s_addr),
977		    ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3],
978		    *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8]));
979	    return 1;
980	}
981
982	/*
983	 * Delete the tunnel options from the packet.
984	 */
985	ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions,
986		(unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN)));
987	m->m_len   -= TUNNEL_LEN;
988	ip->ip_len -= TUNNEL_LEN;
989	ip->ip_hl  -= TUNNEL_LEN >> 2;
990
991	ifp = 0;
992    }
993
994    /*
995     * Don't forward a packet with time-to-live of zero or one,
996     * or a packet destined to a local-only group.
997     */
998    if (ip->ip_ttl <= 1 ||
999	ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
1000	return (int)tunnel_src;
1001
1002    /*
1003     * Determine forwarding vifs from the forwarding cache table
1004     */
1005    s = splnet();
1006    MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1007
1008    /* Entry exists, so forward if necessary */
1009    if (rt != NULL) {
1010	splx(s);
1011	return (ip_mdq(m, ifp, tunnel_src, rt, imo));
1012    }
1013
1014    else {
1015	/*
1016	 * If we don't have a route for packet's origin,
1017	 * Make a copy of the packet &
1018	 * send message to routing daemon
1019	 */
1020
1021	register struct mbuf *mb_rt;
1022	register struct mbuf *mb_ntry;
1023	register struct mbuf *mb0;
1024	register struct rtdetq *rte;
1025	register struct mbuf *rte_m;
1026	register u_long hash;
1027
1028	mrtstat.mrts_no_route++;
1029	if (mrtdebug)
1030	    log(LOG_DEBUG, "ip_mforward: no rte s %x g %x",
1031		ntohl(ip->ip_src.s_addr),
1032		ntohl(ip->ip_dst.s_addr));
1033
1034	/* is there an upcall waiting for this packet? */
1035	hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1036	for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
1037	    rt = mtod(mb_rt, struct mfc *);
1038	    if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) ==
1039		 rt->mfc_origin.s_addr) &&
1040		(ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1041		(mb_rt->m_act != NULL))
1042		break;
1043	}
1044
1045	if (mb_rt == NULL) {
1046	    /* no upcall, so make a new entry */
1047	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
1048	    if (mb_rt == NULL) {
1049		splx(s);
1050		return ENOBUFS;
1051	    }
1052
1053	    rt = mtod(mb_rt, struct mfc *);
1054
1055	    /* insert new entry at head of hash chain */
1056	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
1057	    rt->mfc_originmask.s_addr = (u_long)0xffffffff;
1058	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
1059
1060	    /* link into table */
1061	    hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr);
1062	    mb_rt->m_next  = mfctable[hash];
1063	    mfctable[hash] = mb_rt;
1064	    mb_rt->m_act = NULL;
1065
1066	}
1067
1068	/* determine if q has overflowed */
1069	for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act)
1070	    hash++;
1071
1072	if (hash > MAX_UPQ) {
1073	    mrtstat.mrts_upq_ovflw++;
1074	    splx(s);
1075	    return 0;
1076	}
1077
1078	/* add this packet and timing, ifp info to m_act */
1079	MGET(mb_ntry, M_DONTWAIT, MT_DATA);
1080	if (mb_ntry == NULL) {
1081	    splx(s);
1082	    return ENOBUFS;
1083	}
1084
1085	mb_ntry->m_act = NULL;
1086	rte = mtod(mb_ntry, struct rtdetq *);
1087
1088	mb0 = m_copy(m, 0, M_COPYALL);
1089	if (mb0 == NULL) {
1090	    splx(s);
1091	    return ENOBUFS;
1092	}
1093
1094	rte->m 			= mb0;
1095	rte->ifp 		= ifp;
1096	rte->tunnel_src 	= tunnel_src;
1097	rte->imo		= imo;
1098
1099	rte_m->m_act = mb_ntry;
1100
1101	splx(s);
1102
1103	if (hash == 0) {
1104	    /*
1105	     * Send message to routing daemon to install
1106	     * a route into the kernel table
1107	     */
1108	    k_igmpsrc.sin_addr = ip->ip_src;
1109	    k_igmpdst.sin_addr = ip->ip_dst;
1110
1111	    mm = m_copy(m, 0, M_COPYALL);
1112	    if (mm == NULL) {
1113		splx(s);
1114		return ENOBUFS;
1115	    }
1116
1117	    k_data = mtod(mm, struct ip *);
1118	    k_data->ip_p = 0;
1119
1120	    mrtstat.mrts_upcalls++;
1121
1122	    raw_input(mm, &k_igmpproto,
1123		      (struct sockaddr *)&k_igmpsrc,
1124		      (struct sockaddr *)&k_igmpdst);
1125
1126	    /* set timer to cleanup entry if upcall is lost */
1127	    timeout(cleanup_cache, (caddr_t)mb_rt, 100);
1128	    timeout_val++;
1129	}
1130
1131	return 0;
1132    }
1133}
1134
1135#ifndef MROUTE_LKM
1136int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
1137		   struct ip_moptions *) = X_ip_mforward;
1138#endif
1139
1140/*
1141 * Clean up the cache entry if upcall is not serviced
1142 */
1143static void
1144cleanup_cache(xmb_rt)
1145	void *xmb_rt;
1146{
1147    struct mbuf *mb_rt = xmb_rt;
1148    struct mfc *rt;
1149    u_long hash;
1150    struct mbuf *prev_m0;
1151    struct mbuf *m0;
1152    struct mbuf *m;
1153    struct rtdetq *rte;
1154    int s;
1155
1156    rt = mtod(mb_rt, struct mfc *);
1157    hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr);
1158
1159    if (mrtdebug)
1160	log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x",
1161	    ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr),
1162	    ntohl(rt->mfc_mcastgrp.s_addr));
1163
1164    mrtstat.mrts_cache_cleanups++;
1165
1166    /*
1167     * determine entry to be cleaned up in cache table
1168     */
1169    s = splnet();
1170    for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next)
1171	if (m0 == mb_rt)
1172	    break;
1173
1174    /*
1175     * drop all the packets
1176     * free the mbuf with the pkt, if, timing info
1177     */
1178    while (mb_rt->m_act) {
1179	m = mb_rt->m_act;
1180	mb_rt->m_act = m->m_act;
1181
1182	rte = mtod(m, struct rtdetq *);
1183	m_freem(rte->m);
1184	m_free(m);
1185    }
1186
1187    /*
1188     * Delete the entry from the cache
1189     */
1190    if (prev_m0 != m0) {	/* if moved past head of list */
1191	MFREE(m0, prev_m0->m_next);
1192    } else			/* delete head of list, it is in the table */
1193	mfctable[hash] = m_free(m0);
1194
1195    timeout_val--;
1196    splx(s);
1197}
1198
1199/*
1200 * Packet forwarding routine once entry in the cache is made
1201 */
1202static int
1203ip_mdq(m, ifp, tunnel_src, rt, imo)
1204    register struct mbuf *m;
1205    register struct ifnet *ifp;
1206    register u_long tunnel_src;
1207    register struct mfc *rt;
1208    register struct ip_moptions *imo;
1209{
1210    register struct ip  *ip = mtod(m, struct ip *);
1211    register vifi_t vifi;
1212    register struct vif *vifp;
1213
1214    /*
1215     * Don't forward if it didn't arrive from the parent vif for its origin.
1216     * Notes: v_ifp is zero for src route tunnels, multicast_decap_if
1217     * for encapsulated tunnels and a real ifnet for non-tunnels so
1218     * the first part of the if catches wrong physical interface or
1219     * tunnel type; v_rmt_addr is zero for non-tunneled packets so
1220     * the 2nd part catches both packets that arrive via a tunnel
1221     * that shouldn't and packets that arrive via the wrong tunnel.
1222     */
1223    vifi = rt->mfc_parent;
1224    if (viftable[vifi].v_ifp != ifp ||
1225	(ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) {
1226	/* came in the wrong interface */
1227	if (mrtdebug)
1228	    log(LOG_DEBUG, "wrong if: ifp %x vifi %d",
1229		ifp, vifi);
1230	++mrtstat.mrts_wrong_if;
1231	return (int)tunnel_src;
1232    }
1233
1234    /* increment the interface and s-g counters */
1235    viftable[vifi].v_pkt_in++;
1236    rt->mfc_pkt_cnt++;
1237
1238    /*
1239     * For each vif, decide if a copy of the packet should be forwarded.
1240     * Forward if:
1241     *		- the ttl exceeds the vif's threshold
1242     *		- there are group members downstream on interface
1243     */
1244#define MC_SEND(ip,vifp,m) {                             \
1245		(vifp)->v_pkt_out++;                     \
1246                if ((vifp)->v_flags & VIFF_SRCRT)        \
1247                    srcrt_send((ip), (vifp), (m));       \
1248                else if ((vifp)->v_flags & VIFF_TUNNEL)  \
1249                    encap_send((ip), (vifp), (m));       \
1250                else                                     \
1251                    phyint_send((ip), (vifp), (m));      \
1252                }
1253
1254/* If no options or the imo_multicast_vif option is 0, don't do this part
1255 */
1256    if ((imo != NULL) &&
1257       (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/)
1258    {
1259        MC_SEND(ip,viftable+vifi,m);
1260        return (1);        /* make sure we are done: No more physical sends */
1261    }
1262
1263    for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1264	if ((rt->mfc_ttls[vifi] > 0) &&
1265	    (ip->ip_ttl > rt->mfc_ttls[vifi]))
1266	    MC_SEND(ip, vifp, m);
1267
1268    return 0;
1269}
1270
1271/* check if a vif number is legal/ok. This is used by ip_output, to export
1272 * numvifs there,
1273 */
1274int
1275X_legal_vif_num(vif)
1276    int vif;
1277{   if (vif>=0 && vif<=numvifs)
1278       return(1);
1279    else
1280       return(0);
1281}
1282
1283#ifndef MROUTE_LKM
1284int (*legal_vif_num)(int) = X_legal_vif_num;
1285#endif
1286
1287static void
1288phyint_send(ip, vifp, m)
1289    struct ip *ip;
1290    struct vif *vifp;
1291    struct mbuf *m;
1292{
1293    register struct mbuf *mb_copy;
1294    register struct ip_moptions *imo;
1295
1296    if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL)
1297	return;
1298
1299    MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT);
1300    if (imo == NULL) {
1301	m_freem(mb_copy);
1302	return;
1303    }
1304
1305    imo->imo_multicast_ifp  = vifp->v_ifp;
1306    imo->imo_multicast_ttl  = ip->ip_ttl - 1;
1307    imo->imo_multicast_loop = 1;
1308
1309    if (vifp->v_rate_limit <= 0)
1310	tbf_send_packet(vifp, mb_copy, imo);
1311    else
1312	tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len,
1313		    imo);
1314}
1315
1316static void
1317srcrt_send(ip, vifp, m)
1318    struct ip *ip;
1319    struct vif *vifp;
1320    struct mbuf *m;
1321{
1322    struct mbuf *mb_copy, *mb_opts;
1323    register struct ip *ip_copy;
1324    u_char *cp;
1325
1326    /*
1327     * Make sure that adding the tunnel options won't exceed the
1328     * maximum allowed number of option bytes.
1329     */
1330    if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) {
1331	mrtstat.mrts_cant_tunnel++;
1332	if (mrtdebug)
1333	    log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u",
1334		ntohl(ip->ip_src.s_addr));
1335	return;
1336    }
1337
1338    if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL)
1339	return;
1340
1341    ip_copy = mtod(mb_copy, struct ip *);
1342    ip_copy->ip_ttl--;
1343    ip_copy->ip_dst = vifp->v_rmt_addr;	  /* remote tunnel end-point */
1344    /*
1345     * Adjust the ip header length to account for the tunnel options.
1346     */
1347    ip_copy->ip_hl  += TUNNEL_LEN >> 2;
1348    ip_copy->ip_len += TUNNEL_LEN;
1349    MGET(mb_opts, M_DONTWAIT, MT_HEADER);
1350    if (mb_opts == NULL) {
1351	m_freem(mb_copy);
1352	return;
1353    }
1354    /*
1355     * 'Delete' the base ip header from the mb_copy chain
1356     */
1357    mb_copy->m_len -= IP_HDR_LEN;
1358    mb_copy->m_data += IP_HDR_LEN;
1359    /*
1360     * Make mb_opts be the new head of the packet chain.
1361     * Any options of the packet were left in the old packet chain head
1362     */
1363    mb_opts->m_next = mb_copy;
1364    mb_opts->m_data += 16;
1365    mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN;
1366    /*
1367     * Copy the base ip header from the mb_copy chain to the new head mbuf
1368     */
1369    bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), IP_HDR_LEN);
1370    /*
1371     * Add the NOP and LSRR after the base ip header
1372     */
1373    cp = mtod(mb_opts, u_char *) + IP_HDR_LEN;
1374    *cp++ = IPOPT_NOP;
1375    *cp++ = IPOPT_LSRR;
1376    *cp++ = 11; /* LSRR option length */
1377    *cp++ = 8;  /* LSSR pointer to second element */
1378    *(u_long*)cp = vifp->v_lcl_addr.s_addr;	/* local tunnel end-point */
1379    cp += 4;
1380    *(u_long*)cp = ip->ip_dst.s_addr;		/* destination group */
1381
1382    if (vifp->v_rate_limit <= 0)
1383	tbf_send_packet(vifp, mb_opts, 0);
1384    else
1385	tbf_control(vifp, mb_opts,
1386		    mtod(mb_opts, struct ip *), ip_copy->ip_len, 0);
1387}
1388
1389static void
1390encap_send(ip, vifp, m)
1391    register struct ip *ip;
1392    register struct vif *vifp;
1393    register struct mbuf *m;
1394{
1395    register struct mbuf *mb_copy;
1396    register struct ip *ip_copy;
1397    register int i, len = ip->ip_len;
1398
1399    /*
1400     * copy the old packet & pullup it's IP header into the
1401     * new mbuf so we can modify it.  Try to fill the new
1402     * mbuf since if we don't the ethernet driver will.
1403     */
1404    MGET(mb_copy, M_DONTWAIT, MT_DATA);
1405    if (mb_copy == NULL)
1406	return;
1407    mb_copy->m_data += 16;
1408    mb_copy->m_len = sizeof(multicast_encap_iphdr);
1409
1410    if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1411	m_freem(mb_copy);
1412	return;
1413    }
1414    i = MHLEN - M_LEADINGSPACE(mb_copy);
1415    if (i > len)
1416	i = len;
1417    mb_copy = m_pullup(mb_copy, i);
1418    if (mb_copy == NULL)
1419	return;
1420
1421    /*
1422     * fill in the encapsulating IP header.
1423     */
1424    ip_copy = mtod(mb_copy, struct ip *);
1425    *ip_copy = multicast_encap_iphdr;
1426    ip_copy->ip_id = htons(ip_id++);
1427    ip_copy->ip_len += len;
1428    ip_copy->ip_src = vifp->v_lcl_addr;
1429    ip_copy->ip_dst = vifp->v_rmt_addr;
1430
1431    /*
1432     * turn the encapsulated IP header back into a valid one.
1433     */
1434    ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1435    --ip->ip_ttl;
1436    HTONS(ip->ip_len);
1437    HTONS(ip->ip_off);
1438    ip->ip_sum = 0;
1439#if defined(LBL) && !defined(ultrix)
1440    ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0);
1441#else
1442    mb_copy->m_data += sizeof(multicast_encap_iphdr);
1443    ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1444    mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1445#endif
1446
1447    if (vifp->v_rate_limit <= 0)
1448	tbf_send_packet(vifp, mb_copy, 0);
1449    else
1450	tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0);
1451}
1452
1453/*
1454 * De-encapsulate a packet and feed it back through ip input (this
1455 * routine is called whenever IP gets a packet with proto type
1456 * ENCAP_PROTO and a local destination address).
1457 */
1458void
1459#ifdef MROUTE_LKM
1460X_multiencap_decap(m)
1461#else
1462multiencap_decap(m)
1463#endif
1464    register struct mbuf *m;
1465{
1466    struct ifnet *ifp = m->m_pkthdr.rcvif;
1467    register struct ip *ip = mtod(m, struct ip *);
1468    register int hlen = ip->ip_hl << 2;
1469    register int s;
1470    register struct ifqueue *ifq;
1471    register struct vif *vifp;
1472
1473    if (ip->ip_p != ENCAP_PROTO) {
1474    	rip_input(m);
1475	return;
1476    }
1477    /*
1478     * dump the packet if it's not to a multicast destination or if
1479     * we don't have an encapsulating tunnel with the source.
1480     * Note:  This code assumes that the remote site IP address
1481     * uniquely identifies the tunnel (i.e., that this site has
1482     * at most one tunnel with the remote site).
1483     */
1484    if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
1485	++mrtstat.mrts_bad_tunnel;
1486	m_freem(m);
1487	return;
1488    }
1489    if (ip->ip_src.s_addr != last_encap_src) {
1490	register struct vif *vife;
1491
1492	vifp = viftable;
1493	vife = vifp + numvifs;
1494	last_encap_src = ip->ip_src.s_addr;
1495	last_encap_vif = 0;
1496	for ( ; vifp < vife; ++vifp)
1497	    if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
1498		if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
1499		    == VIFF_TUNNEL)
1500		    last_encap_vif = vifp;
1501		break;
1502	    }
1503    }
1504    if ((vifp = last_encap_vif) == 0) {
1505	last_encap_src = 0;
1506	mrtstat.mrts_cant_tunnel++; /*XXX*/
1507	m_freem(m);
1508	if (mrtdebug)
1509	    log(LOG_DEBUG, "ip_mforward: no tunnel with %u",
1510		ntohl(ip->ip_src.s_addr));
1511	return;
1512    }
1513    ifp = vifp->v_ifp;
1514    hlen -= sizeof(struct ifnet *);
1515    m->m_data += hlen;
1516    m->m_len -= hlen;
1517    *(mtod(m, struct ifnet **)) = ifp;
1518    ifq = &ipintrq;
1519    s = splimp();
1520    if (IF_QFULL(ifq)) {
1521	IF_DROP(ifq);
1522	m_freem(m);
1523    } else {
1524	IF_ENQUEUE(ifq, m);
1525	/*
1526	 * normally we would need a "schednetisr(NETISR_IP)"
1527	 * here but we were called by ip_input and it is going
1528	 * to loop back & try to dequeue the packet we just
1529	 * queued as soon as we return so we avoid the
1530	 * unnecessary software interrrupt.
1531	 */
1532    }
1533    splx(s);
1534}
1535
1536/*
1537 * Token bucket filter module
1538 */
1539void
1540tbf_control(vifp, m, ip, p_len, imo)
1541	register struct vif *vifp;
1542	register struct mbuf *m;
1543	register struct ip *ip;
1544	register u_long p_len;
1545	struct ip_moptions *imo;
1546{
1547    tbf_update_tokens(vifp);
1548
1549    /* if there are enough tokens,
1550     * and the queue is empty,
1551     * send this packet out
1552     */
1553
1554    if (vifp->v_tbf->q_len == 0) {
1555	if (p_len <= vifp->v_tbf->n_tok) {
1556	    vifp->v_tbf->n_tok -= p_len;
1557	    tbf_send_packet(vifp, m, imo);
1558	} else if (p_len > MAX_BKT_SIZE) {
1559	    /* drop if packet is too large */
1560	    mrtstat.mrts_pkt2large++;
1561	    m_freem(m);
1562	    return;
1563	} else {
1564	    /* queue packet and timeout till later */
1565	    tbf_queue(vifp, m, ip, imo);
1566	    timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1567	}
1568    } else if (vifp->v_tbf->q_len < MAXQSIZE) {
1569	/* finite queue length, so queue pkts and process queue */
1570	tbf_queue(vifp, m, ip, imo);
1571	tbf_process_q(vifp);
1572    } else {
1573	/* queue length too much, try to dq and queue and process */
1574	if (!tbf_dq_sel(vifp, ip)) {
1575	    mrtstat.mrts_q_overflow++;
1576	    m_freem(m);
1577	    return;
1578	} else {
1579	    tbf_queue(vifp, m, ip, imo);
1580	    tbf_process_q(vifp);
1581	}
1582    }
1583    return;
1584}
1585
1586/*
1587 * adds a packet to the queue at the interface
1588 */
1589void
1590tbf_queue(vifp, m, ip, imo)
1591	register struct vif *vifp;
1592	register struct mbuf *m;
1593	register struct ip *ip;
1594	struct ip_moptions *imo;
1595{
1596    register u_long ql;
1597    register int index = (vifp - viftable);
1598    register int s = splnet();
1599
1600    ql = vifp->v_tbf->q_len;
1601
1602    qtable[index][ql].pkt_m = m;
1603    qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len;
1604    qtable[index][ql].pkt_ip = ip;
1605    qtable[index][ql].pkt_imo = imo;
1606
1607    vifp->v_tbf->q_len++;
1608    splx(s);
1609}
1610
1611
1612/*
1613 * processes the queue at the interface
1614 */
1615void
1616tbf_process_q(vifp)
1617    register struct vif *vifp;
1618{
1619    register struct pkt_queue pkt_1;
1620    register int index = (vifp - viftable);
1621    register int s = splnet();
1622
1623    /* loop through the queue at the interface and send as many packets
1624     * as possible
1625     */
1626    while (vifp->v_tbf->q_len > 0) {
1627	/* locate the first packet */
1628	pkt_1.pkt_len = ((qtable[index][0]).pkt_len);
1629	pkt_1.pkt_m   = (qtable[index][0]).pkt_m;
1630	pkt_1.pkt_ip   = (qtable[index][0]).pkt_ip;
1631	pkt_1.pkt_imo = (qtable[index][0]).pkt_imo;
1632
1633	/* determine if the packet can be sent */
1634	if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) {
1635	    /* if so,
1636	     * reduce no of tokens, dequeue the queue,
1637	     * send the packet.
1638	     */
1639	    vifp->v_tbf->n_tok -= pkt_1.pkt_len;
1640
1641	    tbf_dequeue(vifp, 0);
1642
1643	    tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo);
1644
1645	} else break;
1646    }
1647    splx(s);
1648}
1649
1650/*
1651 * removes the jth packet from the queue at the interface
1652 */
1653void
1654tbf_dequeue(vifp,j)
1655    register struct vif *vifp;
1656    register int j;
1657{
1658    register u_long index = vifp - viftable;
1659    register int i;
1660
1661    for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) {
1662	qtable[index][i-1].pkt_m   = qtable[index][i].pkt_m;
1663	qtable[index][i-1].pkt_len = qtable[index][i].pkt_len;
1664	qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip;
1665	qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo;
1666    }
1667    qtable[index][i-1].pkt_m = NULL;
1668    qtable[index][i-1].pkt_len = NULL;
1669    qtable[index][i-1].pkt_ip = NULL;
1670    qtable[index][i-1].pkt_imo = NULL;
1671
1672    vifp->v_tbf->q_len--;
1673
1674    if (tbfdebug > 1)
1675	log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1);
1676}
1677
1678void
1679tbf_reprocess_q(xvifp)
1680	void *xvifp;
1681{
1682    register struct vif *vifp = xvifp;
1683    if (ip_mrouter == NULL)
1684	return;
1685
1686    tbf_update_tokens(vifp);
1687
1688    tbf_process_q(vifp);
1689
1690    if (vifp->v_tbf->q_len)
1691	timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1692}
1693
1694/* function that will selectively discard a member of the queue
1695 * based on the precedence value and the priority obtained through
1696 * a lookup table - not yet implemented accurately!
1697 */
1698int
1699tbf_dq_sel(vifp, ip)
1700    register struct vif *vifp;
1701    register struct ip *ip;
1702{
1703    register int i;
1704    register int s = splnet();
1705    register u_int p;
1706
1707    p = priority(vifp, ip);
1708
1709    for(i=vifp->v_tbf->q_len-1;i >= 0;i--) {
1710	if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) {
1711	    m_freem(qtable[vifp-viftable][i].pkt_m);
1712	    tbf_dequeue(vifp,i);
1713	    splx(s);
1714	    mrtstat.mrts_drop_sel++;
1715	    return(1);
1716	}
1717    }
1718    splx(s);
1719    return(0);
1720}
1721
1722void
1723tbf_send_packet(vifp, m, imo)
1724    register struct vif *vifp;
1725    register struct mbuf *m;
1726    struct ip_moptions *imo;
1727{
1728    int error;
1729    int s = splnet();
1730
1731    /* if source route tunnels */
1732    if (vifp->v_flags & VIFF_SRCRT) {
1733	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1734			  IP_FORWARDING, imo);
1735	if (mrtdebug > 1)
1736	    log(LOG_DEBUG, "srcrt_send on vif %d err %d", vifp-viftable, error);
1737    } else if (vifp->v_flags & VIFF_TUNNEL) {
1738	/* If tunnel options */
1739	ip_output(m, (struct mbuf *)0, (struct route *)0,
1740		  IP_FORWARDING, imo);
1741    } else {
1742	/* if physical interface option, extract the options and then send */
1743	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1744			  IP_FORWARDING, imo);
1745	FREE(imo, M_IPMOPTS);
1746
1747	if (mrtdebug > 1)
1748	    log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error);
1749    }
1750    splx(s);
1751}
1752
1753/* determine the current time and then
1754 * the elapsed time (between the last time and time now)
1755 * in milliseconds & update the no. of tokens in the bucket
1756 */
1757void
1758tbf_update_tokens(vifp)
1759    register struct vif *vifp;
1760{
1761    struct timeval tp;
1762    register u_long t;
1763    register u_long elapsed;
1764    register int s = splnet();
1765
1766    GET_TIME(tp);
1767
1768    t = tp.tv_sec*1000 + tp.tv_usec/1000;
1769
1770    elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8;
1771    vifp->v_tbf->n_tok += elapsed;
1772    vifp->v_tbf->last_pkt_t = t;
1773
1774    if (vifp->v_tbf->n_tok > MAX_BKT_SIZE)
1775	vifp->v_tbf->n_tok = MAX_BKT_SIZE;
1776
1777    splx(s);
1778}
1779
1780static int
1781priority(vifp, ip)
1782    register struct vif *vifp;
1783    register struct ip *ip;
1784{
1785    register u_long graddr;
1786    register int prio;
1787
1788    /* temporary hack; will add general packet classifier some day */
1789
1790    prio = 50;  /* default priority */
1791
1792    /* check for source route options and add option length to get dst */
1793    if (vifp->v_flags & VIFF_SRCRT)
1794	graddr = ntohl((ip+8)->ip_dst.s_addr);
1795    else
1796	graddr = ntohl(ip->ip_dst.s_addr);
1797
1798    switch (graddr & 0xf) {
1799	case 0x0: break;
1800	case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */
1801		  break;
1802	case 0x2: break;
1803	case 0x3: break;
1804	case 0x4: break;
1805	case 0x5: break;
1806	case 0x6: break;
1807	case 0x7: break;
1808	case 0x8: break;
1809	case 0x9: break;
1810	case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */
1811		  break;
1812	case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */
1813		  break;
1814	case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */
1815		  break;
1816	case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */
1817		  break;
1818	case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */
1819		  break;
1820	case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */
1821		  break;
1822    }
1823
1824    if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d", graddr, prio);
1825
1826    return prio;
1827}
1828
1829/*
1830 * End of token bucket filter modifications
1831 */
1832
1833#ifdef MROUTE_LKM
1834#include <sys/conf.h>
1835#include <sys/exec.h>
1836#include <sys/sysent.h>
1837#include <sys/lkm.h>
1838
1839MOD_MISC("ip_mroute_mod")
1840
1841static int
1842ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
1843{
1844	int i;
1845	struct lkm_misc	*args = lkmtp->private.lkm_misc;
1846	int err = 0;
1847
1848	switch(cmd) {
1849		static int (*old_ip_mrouter_cmd)();
1850		static int (*old_ip_mrouter_done)();
1851		static int (*old_ip_mforward)();
1852		static int (*old_mrt_ioctl)();
1853		static int (*old_proto4_input)();
1854		static int (*old_legal_vif_num)();
1855		extern u_char ip_protox[];
1856		extern struct protosw inetsw[];
1857
1858	case LKM_E_LOAD:
1859		if(lkmexists(lkmtp) || ip_mrtproto)
1860		  return(EEXIST);
1861		old_ip_mrouter_cmd = ip_mrouter_cmd;
1862		ip_mrouter_cmd = X_ip_mrouter_cmd;
1863		old_ip_mrouter_done = ip_mrouter_done;
1864		ip_mrouter_done = X_ip_mrouter_done;
1865		old_ip_mforward = ip_mforward;
1866		ip_mforward = X_ip_mforward;
1867		old_mrt_ioctl = mrt_ioctl;
1868		mrt_ioctl = X_mrt_ioctl;
1869		old_proto4_input = inetsw[ip_protox[IPPROTO_ENCAP]].pr_input;
1870		inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = X_multiencap_decap;
1871		old_legal_vif_num = legal_vif_num;
1872		legal_vif_num = X_legal_vif_num;
1873		ip_mrtproto = IGMP_DVMRP;
1874
1875		printf("\nIP multicast routing loaded\n");
1876		break;
1877
1878	case LKM_E_UNLOAD:
1879		if (ip_mrouter)
1880		  return EINVAL;
1881
1882		ip_mrouter_cmd = old_ip_mrouter_cmd;
1883		ip_mrouter_done = old_ip_mrouter_done;
1884		ip_mforward = old_ip_mforward;
1885		mrt_ioctl = old_mrt_ioctl;
1886		inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = old_proto4_input;
1887		legal_vif_num = old_legal_vif_num;
1888		ip_mrtproto = 0;
1889		break;
1890
1891	default:
1892		err = EINVAL;
1893		break;
1894	}
1895
1896	return(err);
1897}
1898
1899int
1900ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
1901	DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle,
1902		 nosys);
1903}
1904
1905#endif /* MROUTE_LKM */
1906#endif /* MROUTING */
1907
1908
1909