ip_mroute.c revision 6616
1/*
2 * IP multicast forwarding procedures
3 *
4 * Written by David Waitzman, BBN Labs, August 1988.
5 * Modified by Steve Deering, Stanford, February 1989.
6 * Modified by Mark J. Steiglitz, Stanford, May, 1991
7 * Modified by Van Jacobson, LBL, January 1993
8 * Modified by Ajit Thyagarajan, PARC, August 1993
9 *
10 * MROUTING 1.8
11 */
12
13
14#include <sys/param.h>
15#include <sys/systm.h>
16#include <sys/mbuf.h>
17#include <sys/socket.h>
18#include <sys/socketvar.h>
19#include <sys/protosw.h>
20#include <sys/errno.h>
21#include <sys/time.h>
22#include <sys/ioctl.h>
23#include <sys/syslog.h>
24#include <net/if.h>
25#include <net/route.h>
26#include <net/raw_cb.h>
27#include <netinet/in.h>
28#include <netinet/in_systm.h>
29#include <netinet/ip.h>
30#include <netinet/ip_var.h>
31#include <netinet/in_pcb.h>
32#include <netinet/in_var.h>
33#include <netinet/igmp.h>
34#include <netinet/igmp_var.h>
35#include <netinet/ip_mroute.h>
36
37#ifndef NTOHL
38#if BYTE_ORDER != BIG_ENDIAN
39#define NTOHL(d) ((d) = ntohl((d)))
40#define NTOHS(d) ((d) = ntohs((u_short)(d)))
41#define HTONL(d) ((d) = htonl((d)))
42#define HTONS(d) ((d) = htons((u_short)(d)))
43#else
44#define NTOHL(d)
45#define NTOHS(d)
46#define HTONL(d)
47#define HTONS(d)
48#endif
49#endif
50
51#ifndef MROUTING
52/*
53 * Dummy routines and globals used when multicast routing is not compiled in.
54 */
55
56u_int		ip_mrtproto = 0;
57struct socket  *ip_mrouter  = NULL;
58struct mrtstat	mrtstat;
59
60
61int
62_ip_mrouter_cmd(cmd, so, m)
63	int cmd;
64	struct socket *so;
65	struct mbuf *m;
66{
67	return(EOPNOTSUPP);
68}
69
70int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd;
71
72int
73_ip_mrouter_done()
74{
75	return(0);
76}
77
78int (*ip_mrouter_done)(void) = _ip_mrouter_done;
79
80int
81_ip_mforward(ip, ifp, m, imo)
82	struct ip *ip;
83	struct ifnet *ifp;
84	struct mbuf *m;
85	struct ip_moptions *imo;
86{
87	return(0);
88}
89
90int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
91		   struct ip_moptions *) = _ip_mforward;
92
93int
94_mrt_ioctl(int req, caddr_t data, struct proc *p)
95{
96	return EOPNOTSUPP;
97}
98
99int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
100
101void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */
102	rip_input(m);
103}
104
105int (*legal_vif_num)(int) = 0;
106
107#else
108
109#define INSIZ		sizeof(struct in_addr)
110#define	same(a1, a2) \
111	(bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
112
113#define MT_MRTABLE MT_RTABLE	/* since nothing else uses it */
114
115/*
116 * Globals.  All but ip_mrouter and ip_mrtproto could be static,
117 * except for netstat or debugging purposes.
118 */
119#ifndef MROUTE_LKM
120struct socket  *ip_mrouter  = NULL;
121struct mrtstat	mrtstat;
122
123int		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */
124#else
125extern struct mrtstat mrtstat;
126extern int ip_mrtproto;
127#endif
128
129#define NO_RTE_FOUND 	0x1
130#define RTE_FOUND	0x2
131
132struct mbuf    *mfctable[MFCTBLSIZ];
133struct vif	viftable[MAXVIFS];
134u_int		mrtdebug = 0;	  /* debug level 	*/
135u_int       	tbfdebug = 0;     /* tbf debug level 	*/
136
137u_long timeout_val = 0;			/* count of outstanding upcalls */
138
139/*
140 * Define the token bucket filter structures
141 * tbftable -> each vif has one of these for storing info
142 * qtable   -> each interface has an associated queue of pkts
143 */
144
145struct tbf tbftable[MAXVIFS];
146struct pkt_queue qtable[MAXVIFS][MAXQSIZE];
147
148/*
149 * 'Interfaces' associated with decapsulator (so we can tell
150 * packets that went through it from ones that get reflected
151 * by a broken gateway).  These interfaces are never linked into
152 * the system ifnet list & no routes point to them.  I.e., packets
153 * can't be sent this way.  They only exist as a placeholder for
154 * multicast source verification.
155 */
156struct ifnet multicast_decap_if[MAXVIFS];
157
158#define ENCAP_TTL 64
159#define ENCAP_PROTO 4
160
161/* prototype IP hdr for encapsulated packets */
162struct ip multicast_encap_iphdr = {
163#if BYTE_ORDER == LITTLE_ENDIAN
164	sizeof(struct ip) >> 2, IPVERSION,
165#else
166	IPVERSION, sizeof(struct ip) >> 2,
167#endif
168	0,				/* tos */
169	sizeof(struct ip),		/* total length */
170	0,				/* id */
171	0,				/* frag offset */
172	ENCAP_TTL, ENCAP_PROTO,
173	0,				/* checksum */
174};
175
176/*
177 * Private variables.
178 */
179static vifi_t	   numvifs = 0;
180
181/*
182 * one-back cache used by multiencap_decap to locate a tunnel's vif
183 * given a datagram's src ip address.
184 */
185static u_long last_encap_src;
186static struct vif *last_encap_vif;
187
188static u_long nethash_fc(u_long, u_long);
189static struct mfc *mfcfind(u_long, u_long);
190int get_sg_cnt(struct sioc_sg_req *);
191int get_vif_cnt(struct sioc_vif_req *);
192int get_vifs(caddr_t);
193static int add_vif(struct vifctl *);
194static int del_vif(vifi_t *);
195static int add_mfc(struct mfcctl *);
196static int del_mfc(struct delmfcctl *);
197static void cleanup_cache(void *);
198static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *,
199		  struct ip_moptions *);
200extern int (*legal_vif_num)(int);
201static void phyint_send(struct ip *, struct vif *, struct mbuf *);
202static void srcrt_send(struct ip *, struct vif *, struct mbuf *);
203static void encap_send(struct ip *, struct vif *, struct mbuf *);
204void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long,
205		 struct ip_moptions *);
206void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *);
207void tbf_process_q(struct vif *);
208void tbf_dequeue(struct vif *, int);
209void tbf_reprocess_q(void *);
210int tbf_dq_sel(struct vif *, struct ip *);
211void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *);
212void tbf_update_tokens(struct vif *);
213static int priority(struct vif *, struct ip *);
214static int ip_mrouter_init(struct socket *);
215
216/*
217 * A simple hash function: returns MFCHASHMOD of the low-order octet of
218 * the argument's network or subnet number and the multicast group assoc.
219 */
220static u_long
221nethash_fc(m,n)
222    register u_long m;
223    register u_long n;
224{
225    struct in_addr in1;
226    struct in_addr in2;
227
228    in1.s_addr = m;
229    m = in_netof(in1);
230    while ((m & 0xff) == 0) m >>= 8;
231
232    in2.s_addr = n;
233    n = in_netof(in2);
234    while ((n & 0xff) == 0) n >>= 8;
235
236    return (MFCHASHMOD(m) ^ MFCHASHMOD(n));
237}
238
239/*
240 * this is a direct-mapped cache used to speed the mapping from a
241 * datagram source address to the associated multicast route.  Note
242 * that unlike mrttable, the hash is on IP address, not IP net number.
243 */
244#define MFCHASHSIZ 1024
245#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
246			((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1))
247struct mfc *mfchash[MFCHASHSIZ];
248
249/*
250 * Find a route for a given origin IP address and Multicast group address
251 * Type of service parameter to be added in the future!!!
252 */
253#define MFCFIND(o, g, rt) { \
254	register u_int _mrhasho = o; \
255	register u_int _mrhashg = g; \
256	_mrhasho = MFCHASH(_mrhasho, _mrhashg); \
257	++mrtstat.mrts_mfc_lookups; \
258	rt = mfchash[_mrhasho]; \
259	if ((rt == NULL) || \
260	    ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \
261	     (g != rt->mfc_mcastgrp.s_addr)) \
262	     if ((rt = mfcfind(o, g)) != NULL) \
263		mfchash[_mrhasho] = rt; \
264}
265
266/*
267 * Find route by examining hash table entries
268 */
269static struct mfc *
270mfcfind(origin, mcastgrp)
271    u_long origin;
272    u_long mcastgrp;
273{
274    register struct mbuf *mb_rt;
275    register struct mfc *rt;
276    register u_long hash;
277
278    hash = nethash_fc(origin, mcastgrp);
279    for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
280	rt = mtod(mb_rt, struct mfc *);
281	if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) &&
282	    (mcastgrp == rt->mfc_mcastgrp.s_addr) &&
283	    (mb_rt->m_act == NULL))
284	    return (rt);
285    }
286    mrtstat.mrts_mfc_misses++;
287    return NULL;
288}
289
290/*
291 * Macros to compute elapsed time efficiently
292 * Borrowed from Van Jacobson's scheduling code
293 */
294#define TV_DELTA(a, b, delta) { \
295	    register int xxs; \
296		\
297	    delta = (a).tv_usec - (b).tv_usec; \
298	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
299	       switch (xxs) { \
300		      case 2: \
301			  delta += 1000000; \
302			      /* fall through */ \
303		      case 1: \
304			  delta += 1000000; \
305			  break; \
306		      default: \
307			  delta += (1000000 * xxs); \
308	       } \
309	    } \
310}
311
312#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
313	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
314
315/*
316 * Handle DVMRP setsockopt commands to modify the multicast routing tables.
317 */
318int
319X_ip_mrouter_cmd(cmd, so, m)
320    int cmd;
321    struct socket *so;
322    struct mbuf *m;
323{
324   if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES;
325
326    switch (cmd) {
327	case DVMRP_INIT:     return ip_mrouter_init(so);
328	case DVMRP_DONE:     return ip_mrouter_done();
329	case DVMRP_ADD_VIF:  return add_vif (mtod(m, struct vifctl *));
330	case DVMRP_DEL_VIF:  return del_vif (mtod(m, vifi_t *));
331	case DVMRP_ADD_MFC:  return add_mfc (mtod(m, struct mfcctl *));
332	case DVMRP_DEL_MFC:  return del_mfc (mtod(m, struct delmfcctl *));
333	default:             return EOPNOTSUPP;
334    }
335}
336
337#ifndef MROUTE_LKM
338int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd;
339#endif
340
341/*
342 * Handle ioctl commands to obtain information from the cache
343 */
344int
345X_mrt_ioctl(cmd, data)
346    int cmd;
347    caddr_t data;
348{
349    int error = 0;
350
351    switch (cmd) {
352      case (SIOCGETVIFINF):		/* Read Virtual Interface (m/cast) */
353	  return (get_vifs(data));
354	  break;
355      case (SIOCGETVIFCNT):
356	  return (get_vif_cnt((struct sioc_vif_req *)data));
357	  break;
358      case (SIOCGETSGCNT):
359	  return (get_sg_cnt((struct sioc_sg_req *)data));
360	  break;
361	default:
362	  return (EINVAL);
363	  break;
364    }
365    return error;
366}
367
368#ifndef MROUTE_LKM
369int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl;
370#else
371extern int (*mrt_ioctl)(int, caddr_t, struct proc *);
372#endif
373
374/*
375 * returns the packet count for the source group provided
376 */
377int
378get_sg_cnt(req)
379    register struct sioc_sg_req *req;
380{
381    register struct mfc *rt;
382    int s;
383
384    s = splnet();
385    MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
386    splx(s);
387    if (rt != NULL)
388	req->count = rt->mfc_pkt_cnt;
389    else
390	req->count = 0xffffffff;
391
392    return 0;
393}
394
395/*
396 * returns the input and output packet counts on the interface provided
397 */
398int
399get_vif_cnt(req)
400    register struct sioc_vif_req *req;
401{
402    register vifi_t vifi = req->vifi;
403
404    req->icount = viftable[vifi].v_pkt_in;
405    req->ocount = viftable[vifi].v_pkt_out;
406
407    return 0;
408}
409
410int
411get_vifs(data)
412    char *data;
413{
414    struct vif_conf *vifc = (struct vif_conf *)data;
415    struct vif_req *vifrp, vifr;
416    int space, error=0;
417
418    vifi_t vifi;
419    int s;
420
421    space = vifc->vifc_len;
422    vifrp  = vifc->vifc_req;
423
424    s = splnet();
425    vifc->vifc_num=numvifs;
426
427    for (vifi = 0; vifi <  numvifs; vifi++, vifrp++) {
428	if (viftable[vifi].v_lcl_addr.s_addr != 0) {
429	    vifr.v_flags=viftable[vifi].v_flags;
430	    vifr.v_threshold=viftable[vifi].v_threshold;
431	    vifr.v_lcl_addr=viftable[vifi].v_lcl_addr;
432	    vifr.v_rmt_addr=viftable[vifi].v_rmt_addr;
433	    strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ);
434	    if ((space -= sizeof(vifr)) < 0) {
435		splx(s);
436		return(ENOSPC);
437	    }
438	    error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr));
439	    if (error) {
440		splx(s);
441		return(error);
442	    }
443	}
444    }
445    splx(s);
446    return 0;
447}
448/*
449 * Enable multicast routing
450 */
451static int
452ip_mrouter_init(so)
453	struct socket *so;
454{
455    if (so->so_type != SOCK_RAW ||
456	so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
457
458    if (ip_mrouter != NULL) return EADDRINUSE;
459
460    ip_mrouter = so;
461
462    if (mrtdebug)
463	log(LOG_DEBUG, "ip_mrouter_init\n");
464
465    return 0;
466}
467
468/*
469 * Disable multicast routing
470 */
471int
472X_ip_mrouter_done()
473{
474    vifi_t vifi;
475    int i;
476    struct ifnet *ifp;
477    struct ifreq ifr;
478    struct mbuf *mb_rt;
479    struct mbuf *m;
480    struct rtdetq *rte;
481    int s;
482
483    s = splnet();
484
485    /*
486     * For each phyint in use, disable promiscuous reception of all IP
487     * multicasts.
488     */
489    for (vifi = 0; vifi < numvifs; vifi++) {
490	if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
491	    !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
492	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
493	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
494								= INADDR_ANY;
495	    ifp = viftable[vifi].v_ifp;
496	    (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
497	}
498    }
499    bzero((caddr_t)qtable, sizeof(qtable));
500    bzero((caddr_t)tbftable, sizeof(tbftable));
501    bzero((caddr_t)viftable, sizeof(viftable));
502    numvifs = 0;
503
504    /*
505     * Check if any outstanding timeouts remain
506     */
507    if (timeout_val != 0)
508	for (i = 0; i < MFCTBLSIZ; i++) {
509	    mb_rt = mfctable[i];
510	    while (mb_rt) {
511		if ( mb_rt->m_act != NULL) {
512		    untimeout(cleanup_cache, (caddr_t)mb_rt);
513		    while (mb_rt->m_act) {
514		        m = mb_rt->m_act;
515			mb_rt->m_act = m->m_act;
516			rte = mtod(m, struct rtdetq *);
517			m_freem(rte->m);
518			m_free(m);
519		    }
520		    timeout_val--;
521		}
522	    mb_rt = mb_rt->m_next;
523	    }
524	    if (timeout_val == 0)
525		break;
526	}
527
528    /*
529     * Free all multicast forwarding cache entries.
530     */
531    for (i = 0; i < MFCTBLSIZ; i++)
532	m_freem(mfctable[i]);
533
534    bzero((caddr_t)mfctable, sizeof(mfctable));
535    bzero((caddr_t)mfchash, sizeof(mfchash));
536
537    /*
538     * Reset de-encapsulation cache
539     */
540    last_encap_src = NULL;
541    last_encap_vif = NULL;
542
543    ip_mrouter = NULL;
544
545    splx(s);
546
547    if (mrtdebug)
548	log(LOG_DEBUG, "ip_mrouter_done\n");
549
550    return 0;
551}
552
553#ifndef MROUTE_LKM
554int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
555#endif
556
557/*
558 * Add a vif to the vif table
559 */
560static int
561add_vif(vifcp)
562    register struct vifctl *vifcp;
563{
564    register struct vif *vifp = viftable + vifcp->vifc_vifi;
565    static struct sockaddr_in sin = {sizeof sin, AF_INET};
566    struct ifaddr *ifa;
567    struct ifnet *ifp;
568    struct ifreq ifr;
569    int error, s;
570    struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
571
572    if (vifcp->vifc_vifi >= MAXVIFS)  return EINVAL;
573    if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
574
575    /* Find the interface with an address in AF_INET family */
576    sin.sin_addr = vifcp->vifc_lcl_addr;
577    ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
578    if (ifa == 0) return EADDRNOTAVAIL;
579    ifp = ifa->ifa_ifp;
580
581    if (vifcp->vifc_flags & VIFF_TUNNEL) {
582	if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
583	    static int inited = 0;
584	    if(!inited) {
585		for (s = 0; s < MAXVIFS; ++s) {
586		    multicast_decap_if[s].if_name = "mdecap";
587		    multicast_decap_if[s].if_unit = s;
588		}
589		inited = 1;
590	    }
591	    ifp = &multicast_decap_if[vifcp->vifc_vifi];
592	} else {
593	    ifp = 0;
594	}
595    } else {
596	/* Make sure the interface supports multicast */
597	if ((ifp->if_flags & IFF_MULTICAST) == 0)
598	    return EOPNOTSUPP;
599
600	/* Enable promiscuous reception of all IP multicasts from the if */
601	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
602	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
603	s = splnet();
604	error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
605	splx(s);
606	if (error)
607	    return error;
608    }
609
610    s = splnet();
611    /* define parameters for the tbf structure */
612    vifp->v_tbf = v_tbf;
613    vifp->v_tbf->q_len = 0;
614    vifp->v_tbf->n_tok = 0;
615    vifp->v_tbf->last_pkt_t = 0;
616
617    vifp->v_flags     = vifcp->vifc_flags;
618    vifp->v_threshold = vifcp->vifc_threshold;
619    vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
620    vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
621    vifp->v_ifp       = ifp;
622    vifp->v_rate_limit= vifcp->vifc_rate_limit;
623    /* initialize per vif pkt counters */
624    vifp->v_pkt_in    = 0;
625    vifp->v_pkt_out   = 0;
626    splx(s);
627
628    /* Adjust numvifs up if the vifi is higher than numvifs */
629    if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
630
631    if (mrtdebug)
632	log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n",
633	    vifcp->vifc_vifi,
634	    ntohl(vifcp->vifc_lcl_addr.s_addr),
635	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
636	    ntohl(vifcp->vifc_rmt_addr.s_addr),
637	    vifcp->vifc_threshold,
638	    vifcp->vifc_rate_limit);
639
640    return 0;
641}
642
643/*
644 * Delete a vif from the vif table
645 */
646static int
647del_vif(vifip)
648    vifi_t *vifip;
649{
650    register struct vif *vifp = viftable + *vifip;
651    register vifi_t vifi;
652    struct ifnet *ifp;
653    struct ifreq ifr;
654    int s;
655
656    if (*vifip >= numvifs) return EINVAL;
657    if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
658
659    s = splnet();
660
661    if (!(vifp->v_flags & VIFF_TUNNEL)) {
662	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
663	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
664	ifp = vifp->v_ifp;
665	(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
666    }
667
668    if (vifp == last_encap_vif) {
669	last_encap_vif = 0;
670	last_encap_src = 0;
671    }
672
673    bzero((caddr_t)qtable[*vifip],
674	  sizeof(qtable[*vifip]));
675    bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
676    bzero((caddr_t)vifp, sizeof (*vifp));
677
678    /* Adjust numvifs down */
679    for (vifi = numvifs; vifi > 0; vifi--)
680	if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
681    numvifs = vifi;
682
683    splx(s);
684
685    if (mrtdebug)
686      log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs);
687
688    return 0;
689}
690
691/*
692 * Add an mfc entry
693 */
694static int
695add_mfc(mfccp)
696    struct mfcctl *mfccp;
697{
698    struct mfc *rt;
699    struct mfc *rt1 = 0;
700    register struct mbuf *mb_rt;
701    struct mbuf *prev_mb_rt;
702    u_long hash;
703    struct mbuf *mb_ntry;
704    struct rtdetq *rte;
705    register u_short nstl;
706    int s;
707    int i;
708
709    rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
710
711    /* If an entry already exists, just update the fields */
712    if (rt) {
713	if (mrtdebug)
714	    log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x\n",
715		ntohl(mfccp->mfcc_origin.s_addr),
716		ntohl(mfccp->mfcc_mcastgrp.s_addr),
717		ntohl(mfccp->mfcc_originmask.s_addr),
718		mfccp->mfcc_parent);
719
720	s = splnet();
721	rt->mfc_parent = mfccp->mfcc_parent;
722	for (i = 0; i < numvifs; i++)
723	    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
724	splx(s);
725	return 0;
726    }
727
728    /*
729     * Find the entry for which the upcall was made and update
730     */
731    s = splnet();
732    hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
733    for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0;
734	 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
735
736	rt = mtod(mb_rt, struct mfc *);
737	if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr)
738	     == mfccp->mfcc_origin.s_addr) &&
739	    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
740	    (mb_rt->m_act != NULL)) {
741
742	    if (!nstl++) {
743		if (mrtdebug)
744		    log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x\n",
745			ntohl(mfccp->mfcc_origin.s_addr),
746			ntohl(mfccp->mfcc_mcastgrp.s_addr),
747			ntohl(mfccp->mfcc_originmask.s_addr),
748			mfccp->mfcc_parent, mb_rt->m_act);
749
750		rt->mfc_origin     = mfccp->mfcc_origin;
751		rt->mfc_originmask = mfccp->mfcc_originmask;
752		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
753		rt->mfc_parent     = mfccp->mfcc_parent;
754		for (i = 0; i < numvifs; i++)
755		    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
756		/* initialize pkt counters per src-grp */
757		rt->mfc_pkt_cnt    = 0;
758		rt1 = rt;
759	    }
760
761	    /* prevent cleanup of cache entry */
762	    untimeout(cleanup_cache, (caddr_t)mb_rt);
763	    timeout_val--;
764
765	    /* free packets Qed at the end of this entry */
766	    while (mb_rt->m_act) {
767		mb_ntry = mb_rt->m_act;
768		rte = mtod(mb_ntry, struct rtdetq *);
769		ip_mdq(rte->m, rte->ifp, rte->tunnel_src,
770		       rt1, rte->imo);
771		mb_rt->m_act = mb_ntry->m_act;
772		m_freem(rte->m);
773		m_free(mb_ntry);
774	    }
775
776	    /*
777	     * If more than one entry was created for a single upcall
778	     * delete that entry
779	     */
780	    if (nstl > 1) {
781		MFREE(mb_rt, prev_mb_rt->m_next);
782		mb_rt = prev_mb_rt;
783	    }
784	}
785    }
786
787    /*
788     * It is possible that an entry is being inserted without an upcall
789     */
790    if (nstl == 0) {
791	if (mrtdebug)
792	    log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x\n",
793		hash, ntohl(mfccp->mfcc_origin.s_addr),
794		ntohl(mfccp->mfcc_mcastgrp.s_addr),
795		ntohl(mfccp->mfcc_originmask.s_addr),
796		mfccp->mfcc_parent);
797
798	for (prev_mb_rt = mb_rt = mfctable[hash];
799	     mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
800
801	    rt = mtod(mb_rt, struct mfc *);
802	    if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr)
803		 == mfccp->mfcc_origin.s_addr) &&
804		(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
805
806		rt->mfc_origin     = mfccp->mfcc_origin;
807		rt->mfc_originmask = mfccp->mfcc_originmask;
808		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
809		rt->mfc_parent     = mfccp->mfcc_parent;
810		for (i = 0; i < numvifs; i++)
811		    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
812		/* initialize pkt counters per src-grp */
813		rt->mfc_pkt_cnt    = 0;
814	    }
815	}
816	if (mb_rt == NULL) {
817	    /* no upcall, so make a new entry */
818	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
819	    if (mb_rt == NULL) {
820		splx(s);
821		return ENOBUFS;
822	    }
823
824	    rt = mtod(mb_rt, struct mfc *);
825
826	    /* insert new entry at head of hash chain */
827	    rt->mfc_origin     = mfccp->mfcc_origin;
828	    rt->mfc_originmask = mfccp->mfcc_originmask;
829	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
830	    rt->mfc_parent     = mfccp->mfcc_parent;
831	    for (i = 0; i < numvifs; i++)
832		VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
833	    /* initialize pkt counters per src-grp */
834	    rt->mfc_pkt_cnt    = 0;
835
836	    /* link into table */
837	    mb_rt->m_next  = mfctable[hash];
838	    mfctable[hash] = mb_rt;
839	    mb_rt->m_act = NULL;
840	}
841    }
842    splx(s);
843    return 0;
844}
845
846/*
847 * Delete an mfc entry
848 */
849static int
850del_mfc(mfccp)
851    struct delmfcctl *mfccp;
852{
853    struct in_addr 	origin;
854    struct in_addr 	mcastgrp;
855    struct mfc 		*rt;
856    struct mbuf 	*mb_rt;
857    struct mbuf 	*prev_mb_rt;
858    u_long 		hash;
859    struct mfc 		**cmfc;
860    struct mfc 		**cmfcend;
861    int s;
862
863    origin = mfccp->mfcc_origin;
864    mcastgrp = mfccp->mfcc_mcastgrp;
865    hash = nethash_fc(origin.s_addr, mcastgrp.s_addr);
866
867    if (mrtdebug)
868	log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x\n",
869	    ntohl(origin.s_addr), ntohl(mcastgrp.s_addr));
870
871    for (prev_mb_rt = mb_rt = mfctable[hash]
872	 ; mb_rt
873	 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
874        rt = mtod(mb_rt, struct mfc *);
875	if (origin.s_addr == rt->mfc_origin.s_addr &&
876	    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
877	    mb_rt->m_act == NULL)
878	    break;
879    }
880    if (mb_rt == NULL) {
881	return ESRCH;
882    }
883
884    s = splnet();
885
886    cmfc = mfchash;
887    cmfcend = cmfc + MFCHASHSIZ;
888    for ( ; cmfc < cmfcend; ++cmfc)
889	if (*cmfc == rt)
890	    *cmfc = 0;
891
892    if (prev_mb_rt != mb_rt) {	/* if moved past head of list */
893	MFREE(mb_rt, prev_mb_rt->m_next);
894    } else			/* delete head of list, it is in the table */
895        mfctable[hash] = m_free(mb_rt);
896
897    splx(s);
898
899    return 0;
900}
901
902/*
903 * IP multicast forwarding function. This function assumes that the packet
904 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
905 * pointed to by "ifp", and the packet is to be relayed to other networks
906 * that have members of the packet's destination IP multicast group.
907 *
908 * The packet is returned unscathed to the caller, unless it is tunneled
909 * or erroneous, in which case a non-zero return value tells the caller to
910 * discard it.
911 */
912
913#define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
914#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
915
916int
917X_ip_mforward(ip, ifp, m, imo)
918    register struct ip *ip;
919    struct ifnet *ifp;
920    struct mbuf *m;
921    struct ip_moptions *imo;
922{
923    register struct mfc *rt;
924    register u_char *ipoptions;
925    u_long tunnel_src;
926    static struct sockproto	k_igmpproto 	= { AF_INET, IPPROTO_IGMP };
927    static struct sockaddr_in 	k_igmpsrc	= { sizeof k_igmpsrc, AF_INET };
928    static struct sockaddr_in 	k_igmpdst 	= { sizeof k_igmpdst, AF_INET };
929    register struct mbuf *mm;
930    register struct ip *k_data;
931    int s;
932
933    if (mrtdebug > 1)
934	log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x\n",
935	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
936
937    if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
938	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
939	/*
940	 * Packet arrived via a physical interface.
941	 */
942	tunnel_src = 0;
943    } else {
944	/*
945	 * Packet arrived through a source-route tunnel.
946	 *
947	 * A source-route tunneled packet has a single NOP option and a
948	 * two-element
949	 * loose-source-and-record-route (LSRR) option immediately following
950	 * the fixed-size part of the IP header.  At this point in processing,
951	 * the IP header should contain the following IP addresses:
952	 *
953	 *	original source          - in the source address field
954	 *	destination group        - in the destination address field
955	 *	remote tunnel end-point  - in the first  element of LSRR
956	 *	one of this host's addrs - in the second element of LSRR
957	 *
958	 * NOTE: RFC-1075 would have the original source and remote tunnel
959	 *	 end-point addresses swapped.  However, that could cause
960	 *	 delivery of ICMP error messages to innocent applications
961	 *	 on intermediate routing hosts!  Therefore, we hereby
962	 *	 change the spec.
963	 */
964
965	/*
966	 * Verify that the tunnel options are well-formed.
967	 */
968	if (ipoptions[0] != IPOPT_NOP ||
969	    ipoptions[2] != 11 ||	/* LSRR option length   */
970	    ipoptions[3] != 12 ||	/* LSRR address pointer */
971	    (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) {
972	    mrtstat.mrts_bad_tunnel++;
973	    if (mrtdebug)
974		log(LOG_DEBUG,
975		    "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)\n",
976		    ntohl(ip->ip_src.s_addr),
977		    ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3],
978		    *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8]));
979	    return 1;
980	}
981
982	/*
983	 * Delete the tunnel options from the packet.
984	 */
985	ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions,
986		(unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN)));
987	m->m_len   -= TUNNEL_LEN;
988	ip->ip_len -= TUNNEL_LEN;
989	ip->ip_hl  -= TUNNEL_LEN >> 2;
990
991	ifp = 0;
992    }
993
994    /*
995     * Don't forward a packet with time-to-live of zero or one,
996     * or a packet destined to a local-only group.
997     */
998    if (ip->ip_ttl <= 1 ||
999	ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
1000	return (int)tunnel_src;
1001
1002    /*
1003     * Determine forwarding vifs from the forwarding cache table
1004     */
1005    s = splnet();
1006    MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1007
1008    /* Entry exists, so forward if necessary */
1009    if (rt != NULL) {
1010	splx(s);
1011	return (ip_mdq(m, ifp, tunnel_src, rt, imo));
1012    }
1013
1014    else {
1015	/*
1016	 * If we don't have a route for packet's origin,
1017	 * Make a copy of the packet &
1018	 * send message to routing daemon
1019	 */
1020
1021	register struct mbuf *mb_rt;
1022	register struct mbuf *mb_ntry;
1023	register struct mbuf *mb0;
1024	register struct rtdetq *rte;
1025	register struct mbuf *rte_m;
1026	register u_long hash;
1027
1028	mrtstat.mrts_no_route++;
1029	if (mrtdebug)
1030	    log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n",
1031		ntohl(ip->ip_src.s_addr),
1032		ntohl(ip->ip_dst.s_addr));
1033
1034	/* is there an upcall waiting for this packet? */
1035	hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1036	for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
1037	    rt = mtod(mb_rt, struct mfc *);
1038	    if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) ==
1039		 rt->mfc_origin.s_addr) &&
1040		(ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1041		(mb_rt->m_act != NULL))
1042		break;
1043	}
1044
1045	if (mb_rt == NULL) {
1046	    /* no upcall, so make a new entry */
1047	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
1048	    if (mb_rt == NULL) {
1049		splx(s);
1050		return ENOBUFS;
1051	    }
1052
1053	    rt = mtod(mb_rt, struct mfc *);
1054
1055	    /* insert new entry at head of hash chain */
1056	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
1057	    rt->mfc_originmask.s_addr = (u_long)0xffffffff;
1058	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
1059
1060	    /* link into table */
1061	    hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr);
1062	    mb_rt->m_next  = mfctable[hash];
1063	    mfctable[hash] = mb_rt;
1064	    mb_rt->m_act = NULL;
1065
1066	}
1067
1068	/* determine if q has overflowed */
1069	for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act)
1070	    hash++;
1071
1072	if (hash > MAX_UPQ) {
1073	    mrtstat.mrts_upq_ovflw++;
1074	    splx(s);
1075	    return 0;
1076	}
1077
1078	/* add this packet and timing, ifp info to m_act */
1079	MGET(mb_ntry, M_DONTWAIT, MT_DATA);
1080	if (mb_ntry == NULL) {
1081	    splx(s);
1082	    return ENOBUFS;
1083	}
1084
1085	mb_ntry->m_act = NULL;
1086	rte = mtod(mb_ntry, struct rtdetq *);
1087
1088	mb0 = m_copy(m, 0, M_COPYALL);
1089	if (mb0 == NULL) {
1090	    splx(s);
1091	    return ENOBUFS;
1092	}
1093
1094	rte->m 			= mb0;
1095	rte->ifp 		= ifp;
1096	rte->tunnel_src 	= tunnel_src;
1097	rte->imo		= imo;
1098
1099	rte_m->m_act = mb_ntry;
1100
1101	splx(s);
1102
1103	if (hash == 0) {
1104	    /*
1105	     * Send message to routing daemon to install
1106	     * a route into the kernel table
1107	     */
1108	    k_igmpsrc.sin_addr = ip->ip_src;
1109	    k_igmpdst.sin_addr = ip->ip_dst;
1110
1111	    mm = m_copy(m, 0, M_COPYALL);
1112	    if (mm == NULL) {
1113		splx(s);
1114		return ENOBUFS;
1115	    }
1116
1117	    k_data = mtod(mm, struct ip *);
1118	    k_data->ip_p = 0;
1119
1120	    mrtstat.mrts_upcalls++;
1121
1122	    raw_input(mm, &k_igmpproto,
1123		      (struct sockaddr *)&k_igmpsrc,
1124		      (struct sockaddr *)&k_igmpdst);
1125
1126	    /* set timer to cleanup entry if upcall is lost */
1127	    timeout(cleanup_cache, (caddr_t)mb_rt, 100);
1128	    timeout_val++;
1129	}
1130
1131	return 0;
1132    }
1133}
1134
1135#ifndef MROUTE_LKM
1136int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
1137		   struct ip_moptions *) = X_ip_mforward;
1138#endif
1139
1140/*
1141 * Clean up the cache entry if upcall is not serviced
1142 */
1143static void
1144cleanup_cache(xmb_rt)
1145	void *xmb_rt;
1146{
1147    struct mbuf *mb_rt = xmb_rt;
1148    struct mfc *rt;
1149    u_long hash;
1150    struct mbuf *prev_m0;
1151    struct mbuf *m0;
1152    struct mbuf *m;
1153    struct rtdetq *rte;
1154    int s;
1155
1156    rt = mtod(mb_rt, struct mfc *);
1157    hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr);
1158
1159    if (mrtdebug)
1160	log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x\n",
1161	    ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr),
1162	    ntohl(rt->mfc_mcastgrp.s_addr));
1163
1164    mrtstat.mrts_cache_cleanups++;
1165
1166    /*
1167     * determine entry to be cleaned up in cache table
1168     */
1169    s = splnet();
1170    for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next)
1171	if (m0 == mb_rt)
1172	    break;
1173
1174    /*
1175     * drop all the packets
1176     * free the mbuf with the pkt, if, timing info
1177     */
1178    while (mb_rt->m_act) {
1179	m = mb_rt->m_act;
1180	mb_rt->m_act = m->m_act;
1181
1182	rte = mtod(m, struct rtdetq *);
1183	m_freem(rte->m);
1184	m_free(m);
1185    }
1186
1187    /*
1188     * Delete the entry from the cache
1189     */
1190    if (prev_m0 != m0) {	/* if moved past head of list */
1191	MFREE(m0, prev_m0->m_next);
1192    } else			/* delete head of list, it is in the table */
1193	mfctable[hash] = m_free(m0);
1194
1195    timeout_val--;
1196    splx(s);
1197}
1198
1199/*
1200 * Packet forwarding routine once entry in the cache is made
1201 */
1202static int
1203ip_mdq(m, ifp, tunnel_src, rt, imo)
1204    register struct mbuf *m;
1205    register struct ifnet *ifp;
1206    register u_long tunnel_src;
1207    register struct mfc *rt;
1208    register struct ip_moptions *imo;
1209{
1210    register struct ip  *ip = mtod(m, struct ip *);
1211    register vifi_t vifi;
1212    register struct vif *vifp;
1213
1214    /*
1215     * Don't forward if it didn't arrive from the parent vif for its origin.
1216     * Notes: v_ifp is zero for src route tunnels, multicast_decap_if
1217     * for encapsulated tunnels and a real ifnet for non-tunnels so
1218     * the first part of the if catches wrong physical interface or
1219     * tunnel type; v_rmt_addr is zero for non-tunneled packets so
1220     * the 2nd part catches both packets that arrive via a tunnel
1221     * that shouldn't and packets that arrive via the wrong tunnel.
1222     */
1223    vifi = rt->mfc_parent;
1224    if (viftable[vifi].v_ifp != ifp ||
1225	(ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) {
1226	/* came in the wrong interface */
1227	if (mrtdebug)
1228	    log(LOG_DEBUG, "wrong if: ifp %x vifi %d\n",
1229		ifp, vifi);
1230	++mrtstat.mrts_wrong_if;
1231	return (int)tunnel_src;
1232    }
1233
1234    /* increment the interface and s-g counters */
1235    viftable[vifi].v_pkt_in++;
1236    rt->mfc_pkt_cnt++;
1237
1238    /*
1239     * For each vif, decide if a copy of the packet should be forwarded.
1240     * Forward if:
1241     *		- the ttl exceeds the vif's threshold
1242     *		- there are group members downstream on interface
1243     */
1244#define MC_SEND(ip,vifp,m) {                             \
1245		(vifp)->v_pkt_out++;                     \
1246                if ((vifp)->v_flags & VIFF_SRCRT)        \
1247                    srcrt_send((ip), (vifp), (m));       \
1248                else if ((vifp)->v_flags & VIFF_TUNNEL)  \
1249                    encap_send((ip), (vifp), (m));       \
1250                else                                     \
1251                    phyint_send((ip), (vifp), (m));      \
1252                }
1253
1254/* If no options or the imo_multicast_vif option is 0, don't do this part
1255 */
1256    if ((imo != NULL) &&
1257       (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/)
1258    {
1259        MC_SEND(ip,viftable+vifi,m);
1260        return (1);        /* make sure we are done: No more physical sends */
1261    }
1262
1263    for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1264	if ((rt->mfc_ttls[vifi] > 0) &&
1265	    (ip->ip_ttl > rt->mfc_ttls[vifi]))
1266	    MC_SEND(ip, vifp, m);
1267
1268    return 0;
1269}
1270
1271/* check if a vif number is legal/ok. This is used by ip_output, to export
1272 * numvifs there,
1273 */
1274int
1275X_legal_vif_num(vif)
1276    int vif;
1277{   if (vif>=0 && vif<=numvifs)
1278       return(1);
1279    else
1280       return(0);
1281}
1282
1283#ifndef MROUTE_LKM
1284int (*legal_vif_num)(int) = X_legal_vif_num;
1285#endif
1286
1287static void
1288phyint_send(ip, vifp, m)
1289    struct ip *ip;
1290    struct vif *vifp;
1291    struct mbuf *m;
1292{
1293    register struct mbuf *mb_copy;
1294    int hlen = ip->ip_hl << 2;
1295    register struct ip_moptions *imo;
1296
1297    if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL)
1298	return;
1299
1300    /*
1301     * Make sure the header isn't in an cluster, because the sharing
1302     * in clusters defeats the whole purpose of making the copy above.
1303     */
1304    mb_copy = m_pullup(mb_copy, hlen);
1305    if (mb_copy == NULL)
1306	    return;
1307
1308    MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT);
1309    if (imo == NULL) {
1310	m_freem(mb_copy);
1311	return;
1312    }
1313
1314    imo->imo_multicast_ifp  = vifp->v_ifp;
1315    imo->imo_multicast_ttl  = ip->ip_ttl - 1;
1316    imo->imo_multicast_loop = 1;
1317
1318    if (vifp->v_rate_limit <= 0)
1319	tbf_send_packet(vifp, mb_copy, imo);
1320    else
1321	tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len,
1322		    imo);
1323}
1324
1325static void
1326srcrt_send(ip, vifp, m)
1327    struct ip *ip;
1328    struct vif *vifp;
1329    struct mbuf *m;
1330{
1331    struct mbuf *mb_copy, *mb_opts;
1332    int hlen = ip->ip_hl << 2;
1333    register struct ip *ip_copy;
1334    u_char *cp;
1335
1336    /*
1337     * Make sure that adding the tunnel options won't exceed the
1338     * maximum allowed number of option bytes.
1339     */
1340    if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) {
1341	mrtstat.mrts_cant_tunnel++;
1342	if (mrtdebug)
1343	    log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u\n",
1344		ntohl(ip->ip_src.s_addr));
1345	return;
1346    }
1347
1348    if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL)
1349	return;
1350
1351    MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER);
1352    if (mb_opts == NULL) {
1353	m_freem(mb_copy);
1354	return;
1355    }
1356    /*
1357     * 'Delete' the base ip header from the mb_copy chain
1358     */
1359    mb_copy->m_len -= hlen;
1360    mb_copy->m_data += hlen;
1361    /*
1362     * Make mb_opts be the new head of the packet chain.
1363     * Any options of the packet were left in the old packet chain head
1364     */
1365    mb_opts->m_next = mb_copy;
1366    mb_opts->m_len = hlen + TUNNEL_LEN;
1367    mb_opts->m_data += MSIZE - mb_opts->m_len;
1368    mb_opts->m_pkthdr.len = mb_copy->m_pkthdr.len + TUNNEL_LEN;
1369    /*
1370     * Copy the base ip header from the mb_copy chain to the new head mbuf
1371     */
1372    ip_copy = mtod(mb_opts, struct ip *);
1373    bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), hlen);
1374    ip_copy->ip_ttl--;
1375    ip_copy->ip_dst = vifp->v_rmt_addr;	  /* remote tunnel end-point */
1376    /*
1377     * Adjust the ip header length to account for the tunnel options.
1378     */
1379    ip_copy->ip_hl  += TUNNEL_LEN >> 2;
1380    ip_copy->ip_len += TUNNEL_LEN;
1381    /*
1382     * Add the NOP and LSRR after the base ip header
1383     */
1384    cp = mtod(mb_opts, u_char *) + IP_HDR_LEN;
1385    *cp++ = IPOPT_NOP;
1386    *cp++ = IPOPT_LSRR;
1387    *cp++ = 11; /* LSRR option length */
1388    *cp++ = 8;  /* LSSR pointer to second element */
1389    *(u_long*)cp = vifp->v_lcl_addr.s_addr;	/* local tunnel end-point */
1390    cp += 4;
1391    *(u_long*)cp = ip->ip_dst.s_addr;		/* destination group */
1392
1393    if (vifp->v_rate_limit <= 0)
1394	tbf_send_packet(vifp, mb_opts, 0);
1395    else
1396	tbf_control(vifp, mb_opts,
1397		    mtod(mb_opts, struct ip *), ip_copy->ip_len, 0);
1398}
1399
1400static void
1401encap_send(ip, vifp, m)
1402    register struct ip *ip;
1403    register struct vif *vifp;
1404    register struct mbuf *m;
1405{
1406    register struct mbuf *mb_copy;
1407    register struct ip *ip_copy;
1408    int hlen = ip->ip_hl << 2;
1409    register int i, len = ip->ip_len;
1410
1411    /*
1412     * copy the old packet & pullup it's IP header into the
1413     * new mbuf so we can modify it.  Try to fill the new
1414     * mbuf since if we don't the ethernet driver will.
1415     */
1416    MGET(mb_copy, M_DONTWAIT, MT_DATA);
1417    if (mb_copy == NULL)
1418	return;
1419    mb_copy->m_data += 16;
1420    mb_copy->m_len = sizeof(multicast_encap_iphdr);
1421
1422    if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1423	m_freem(mb_copy);
1424	return;
1425    }
1426    i = MHLEN - M_LEADINGSPACE(mb_copy);
1427    if (i > len)
1428	i = len;
1429    mb_copy = m_pullup(mb_copy, i);
1430    if (mb_copy == NULL)
1431	return;
1432    mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
1433
1434    /*
1435     * fill in the encapsulating IP header.
1436     */
1437    ip_copy = mtod(mb_copy, struct ip *);
1438    *ip_copy = multicast_encap_iphdr;
1439    ip_copy->ip_id = htons(ip_id++);
1440    ip_copy->ip_len += len;
1441    ip_copy->ip_src = vifp->v_lcl_addr;
1442    ip_copy->ip_dst = vifp->v_rmt_addr;
1443
1444    /*
1445     * turn the encapsulated IP header back into a valid one.
1446     */
1447    ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1448    --ip->ip_ttl;
1449    HTONS(ip->ip_len);
1450    HTONS(ip->ip_off);
1451    ip->ip_sum = 0;
1452#if defined(LBL) && !defined(ultrix)
1453    ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0);
1454#else
1455    mb_copy->m_data += sizeof(multicast_encap_iphdr);
1456    ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1457    mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1458#endif
1459
1460    if (vifp->v_rate_limit <= 0)
1461	tbf_send_packet(vifp, mb_copy, 0);
1462    else
1463	tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0);
1464}
1465
1466/*
1467 * De-encapsulate a packet and feed it back through ip input (this
1468 * routine is called whenever IP gets a packet with proto type
1469 * ENCAP_PROTO and a local destination address).
1470 */
1471void
1472#ifdef MROUTE_LKM
1473X_multiencap_decap(m)
1474#else
1475multiencap_decap(m)
1476#endif
1477    register struct mbuf *m;
1478{
1479    struct ifnet *ifp = m->m_pkthdr.rcvif;
1480    register struct ip *ip = mtod(m, struct ip *);
1481    register int hlen = ip->ip_hl << 2;
1482    register int s;
1483    register struct ifqueue *ifq;
1484    register struct vif *vifp;
1485
1486    if (ip->ip_p != ENCAP_PROTO) {
1487    	rip_input(m);
1488	return;
1489    }
1490    /*
1491     * dump the packet if it's not to a multicast destination or if
1492     * we don't have an encapsulating tunnel with the source.
1493     * Note:  This code assumes that the remote site IP address
1494     * uniquely identifies the tunnel (i.e., that this site has
1495     * at most one tunnel with the remote site).
1496     */
1497    if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
1498	++mrtstat.mrts_bad_tunnel;
1499	m_freem(m);
1500	return;
1501    }
1502    if (ip->ip_src.s_addr != last_encap_src) {
1503	register struct vif *vife;
1504
1505	vifp = viftable;
1506	vife = vifp + numvifs;
1507	last_encap_src = ip->ip_src.s_addr;
1508	last_encap_vif = 0;
1509	for ( ; vifp < vife; ++vifp)
1510	    if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
1511		if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
1512		    == VIFF_TUNNEL)
1513		    last_encap_vif = vifp;
1514		break;
1515	    }
1516    }
1517    if ((vifp = last_encap_vif) == 0) {
1518	last_encap_src = 0;
1519	mrtstat.mrts_cant_tunnel++; /*XXX*/
1520	m_freem(m);
1521	if (mrtdebug)
1522	    log(LOG_DEBUG, "ip_mforward: no tunnel with %u\n",
1523		ntohl(ip->ip_src.s_addr));
1524	return;
1525    }
1526    ifp = vifp->v_ifp;
1527    hlen -= sizeof(struct ifnet *);
1528    m->m_data += hlen;
1529    m->m_len -= hlen;
1530    *(mtod(m, struct ifnet **)) = ifp;
1531    ifq = &ipintrq;
1532    s = splimp();
1533    if (IF_QFULL(ifq)) {
1534	IF_DROP(ifq);
1535	m_freem(m);
1536    } else {
1537	IF_ENQUEUE(ifq, m);
1538	/*
1539	 * normally we would need a "schednetisr(NETISR_IP)"
1540	 * here but we were called by ip_input and it is going
1541	 * to loop back & try to dequeue the packet we just
1542	 * queued as soon as we return so we avoid the
1543	 * unnecessary software interrrupt.
1544	 */
1545    }
1546    splx(s);
1547}
1548
1549/*
1550 * Token bucket filter module
1551 */
1552void
1553tbf_control(vifp, m, ip, p_len, imo)
1554	register struct vif *vifp;
1555	register struct mbuf *m;
1556	register struct ip *ip;
1557	register u_long p_len;
1558	struct ip_moptions *imo;
1559{
1560    tbf_update_tokens(vifp);
1561
1562    /* if there are enough tokens,
1563     * and the queue is empty,
1564     * send this packet out
1565     */
1566
1567    if (vifp->v_tbf->q_len == 0) {
1568	if (p_len <= vifp->v_tbf->n_tok) {
1569	    vifp->v_tbf->n_tok -= p_len;
1570	    tbf_send_packet(vifp, m, imo);
1571	} else if (p_len > MAX_BKT_SIZE) {
1572	    /* drop if packet is too large */
1573	    mrtstat.mrts_pkt2large++;
1574	    m_freem(m);
1575	    return;
1576	} else {
1577	    /* queue packet and timeout till later */
1578	    tbf_queue(vifp, m, ip, imo);
1579	    timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1580	}
1581    } else if (vifp->v_tbf->q_len < MAXQSIZE) {
1582	/* finite queue length, so queue pkts and process queue */
1583	tbf_queue(vifp, m, ip, imo);
1584	tbf_process_q(vifp);
1585    } else {
1586	/* queue length too much, try to dq and queue and process */
1587	if (!tbf_dq_sel(vifp, ip)) {
1588	    mrtstat.mrts_q_overflow++;
1589	    m_freem(m);
1590	    return;
1591	} else {
1592	    tbf_queue(vifp, m, ip, imo);
1593	    tbf_process_q(vifp);
1594	}
1595    }
1596    return;
1597}
1598
1599/*
1600 * adds a packet to the queue at the interface
1601 */
1602void
1603tbf_queue(vifp, m, ip, imo)
1604	register struct vif *vifp;
1605	register struct mbuf *m;
1606	register struct ip *ip;
1607	struct ip_moptions *imo;
1608{
1609    register u_long ql;
1610    register int index = (vifp - viftable);
1611    register int s = splnet();
1612
1613    ql = vifp->v_tbf->q_len;
1614
1615    qtable[index][ql].pkt_m = m;
1616    qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len;
1617    qtable[index][ql].pkt_ip = ip;
1618    qtable[index][ql].pkt_imo = imo;
1619
1620    vifp->v_tbf->q_len++;
1621    splx(s);
1622}
1623
1624
1625/*
1626 * processes the queue at the interface
1627 */
1628void
1629tbf_process_q(vifp)
1630    register struct vif *vifp;
1631{
1632    register struct pkt_queue pkt_1;
1633    register int index = (vifp - viftable);
1634    register int s = splnet();
1635
1636    /* loop through the queue at the interface and send as many packets
1637     * as possible
1638     */
1639    while (vifp->v_tbf->q_len > 0) {
1640	/* locate the first packet */
1641	pkt_1.pkt_len = ((qtable[index][0]).pkt_len);
1642	pkt_1.pkt_m   = (qtable[index][0]).pkt_m;
1643	pkt_1.pkt_ip   = (qtable[index][0]).pkt_ip;
1644	pkt_1.pkt_imo = (qtable[index][0]).pkt_imo;
1645
1646	/* determine if the packet can be sent */
1647	if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) {
1648	    /* if so,
1649	     * reduce no of tokens, dequeue the queue,
1650	     * send the packet.
1651	     */
1652	    vifp->v_tbf->n_tok -= pkt_1.pkt_len;
1653
1654	    tbf_dequeue(vifp, 0);
1655
1656	    tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo);
1657
1658	} else break;
1659    }
1660    splx(s);
1661}
1662
1663/*
1664 * removes the jth packet from the queue at the interface
1665 */
1666void
1667tbf_dequeue(vifp,j)
1668    register struct vif *vifp;
1669    register int j;
1670{
1671    register u_long index = vifp - viftable;
1672    register int i;
1673
1674    for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) {
1675	qtable[index][i-1].pkt_m   = qtable[index][i].pkt_m;
1676	qtable[index][i-1].pkt_len = qtable[index][i].pkt_len;
1677	qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip;
1678	qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo;
1679    }
1680    qtable[index][i-1].pkt_m = NULL;
1681    qtable[index][i-1].pkt_len = NULL;
1682    qtable[index][i-1].pkt_ip = NULL;
1683    qtable[index][i-1].pkt_imo = NULL;
1684
1685    vifp->v_tbf->q_len--;
1686
1687    if (tbfdebug > 1)
1688	log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d\n",vifp-viftable, i-1);
1689}
1690
1691void
1692tbf_reprocess_q(xvifp)
1693	void *xvifp;
1694{
1695    register struct vif *vifp = xvifp;
1696    if (ip_mrouter == NULL)
1697	return;
1698
1699    tbf_update_tokens(vifp);
1700
1701    tbf_process_q(vifp);
1702
1703    if (vifp->v_tbf->q_len)
1704	timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1705}
1706
1707/* function that will selectively discard a member of the queue
1708 * based on the precedence value and the priority obtained through
1709 * a lookup table - not yet implemented accurately!
1710 */
1711int
1712tbf_dq_sel(vifp, ip)
1713    register struct vif *vifp;
1714    register struct ip *ip;
1715{
1716    register int i;
1717    register int s = splnet();
1718    register u_int p;
1719
1720    p = priority(vifp, ip);
1721
1722    for(i=vifp->v_tbf->q_len-1;i >= 0;i--) {
1723	if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) {
1724	    m_freem(qtable[vifp-viftable][i].pkt_m);
1725	    tbf_dequeue(vifp,i);
1726	    splx(s);
1727	    mrtstat.mrts_drop_sel++;
1728	    return(1);
1729	}
1730    }
1731    splx(s);
1732    return(0);
1733}
1734
1735void
1736tbf_send_packet(vifp, m, imo)
1737    register struct vif *vifp;
1738    register struct mbuf *m;
1739    struct ip_moptions *imo;
1740{
1741    int error;
1742    int s = splnet();
1743
1744    /* if source route tunnels */
1745    if (vifp->v_flags & VIFF_SRCRT) {
1746	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1747			  IP_FORWARDING, imo);
1748	if (mrtdebug > 1)
1749	    log(LOG_DEBUG, "srcrt_send on vif %d err %d\n", vifp-viftable, error);
1750    } else if (vifp->v_flags & VIFF_TUNNEL) {
1751	/* If tunnel options */
1752	ip_output(m, (struct mbuf *)0, (struct route *)0,
1753		  IP_FORWARDING, imo);
1754    } else {
1755	/* if physical interface option, extract the options and then send */
1756	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1757			  IP_FORWARDING, imo);
1758	FREE(imo, M_IPMOPTS);
1759
1760	if (mrtdebug > 1)
1761	    log(LOG_DEBUG, "phyint_send on vif %d err %d\n", vifp-viftable, error);
1762    }
1763    splx(s);
1764}
1765
1766/* determine the current time and then
1767 * the elapsed time (between the last time and time now)
1768 * in milliseconds & update the no. of tokens in the bucket
1769 */
1770void
1771tbf_update_tokens(vifp)
1772    register struct vif *vifp;
1773{
1774    struct timeval tp;
1775    register u_long t;
1776    register u_long elapsed;
1777    register int s = splnet();
1778
1779    GET_TIME(tp);
1780
1781    t = tp.tv_sec*1000 + tp.tv_usec/1000;
1782
1783    elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8;
1784    vifp->v_tbf->n_tok += elapsed;
1785    vifp->v_tbf->last_pkt_t = t;
1786
1787    if (vifp->v_tbf->n_tok > MAX_BKT_SIZE)
1788	vifp->v_tbf->n_tok = MAX_BKT_SIZE;
1789
1790    splx(s);
1791}
1792
1793static int
1794priority(vifp, ip)
1795    register struct vif *vifp;
1796    register struct ip *ip;
1797{
1798    register u_long graddr;
1799    register int prio;
1800
1801    /* temporary hack; will add general packet classifier some day */
1802
1803    prio = 50;  /* default priority */
1804
1805    /* check for source route options and add option length to get dst */
1806    if (vifp->v_flags & VIFF_SRCRT)
1807	graddr = ntohl((ip+8)->ip_dst.s_addr);
1808    else
1809	graddr = ntohl(ip->ip_dst.s_addr);
1810
1811    switch (graddr & 0xf) {
1812	case 0x0: break;
1813	case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */
1814		  break;
1815	case 0x2: break;
1816	case 0x3: break;
1817	case 0x4: break;
1818	case 0x5: break;
1819	case 0x6: break;
1820	case 0x7: break;
1821	case 0x8: break;
1822	case 0x9: break;
1823	case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */
1824		  break;
1825	case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */
1826		  break;
1827	case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */
1828		  break;
1829	case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */
1830		  break;
1831	case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */
1832		  break;
1833	case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */
1834		  break;
1835    }
1836
1837    if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d\n", graddr, prio);
1838
1839    return prio;
1840}
1841
1842/*
1843 * End of token bucket filter modifications
1844 */
1845
1846#ifdef MROUTE_LKM
1847#include <sys/conf.h>
1848#include <sys/exec.h>
1849#include <sys/sysent.h>
1850#include <sys/lkm.h>
1851
1852MOD_MISC("ip_mroute_mod")
1853
1854static int
1855ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
1856{
1857	int i;
1858	struct lkm_misc	*args = lkmtp->private.lkm_misc;
1859	int err = 0;
1860
1861	switch(cmd) {
1862		static int (*old_ip_mrouter_cmd)();
1863		static int (*old_ip_mrouter_done)();
1864		static int (*old_ip_mforward)();
1865		static int (*old_mrt_ioctl)();
1866		static void (*old_proto4_input)();
1867		static int (*old_legal_vif_num)();
1868		extern u_char ip_protox[];
1869		extern struct protosw inetsw[];
1870
1871	case LKM_E_LOAD:
1872		if(lkmexists(lkmtp) || ip_mrtproto)
1873		  return(EEXIST);
1874		old_ip_mrouter_cmd = ip_mrouter_cmd;
1875		ip_mrouter_cmd = X_ip_mrouter_cmd;
1876		old_ip_mrouter_done = ip_mrouter_done;
1877		ip_mrouter_done = X_ip_mrouter_done;
1878		old_ip_mforward = ip_mforward;
1879		ip_mforward = X_ip_mforward;
1880		old_mrt_ioctl = mrt_ioctl;
1881		mrt_ioctl = X_mrt_ioctl;
1882		old_proto4_input = inetsw[ip_protox[IPPROTO_ENCAP]].pr_input;
1883		inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = X_multiencap_decap;
1884		old_legal_vif_num = legal_vif_num;
1885		legal_vif_num = X_legal_vif_num;
1886		ip_mrtproto = IGMP_DVMRP;
1887
1888		printf("\nIP multicast routing loaded\n");
1889		break;
1890
1891	case LKM_E_UNLOAD:
1892		if (ip_mrouter)
1893		  return EINVAL;
1894
1895		ip_mrouter_cmd = old_ip_mrouter_cmd;
1896		ip_mrouter_done = old_ip_mrouter_done;
1897		ip_mforward = old_ip_mforward;
1898		mrt_ioctl = old_mrt_ioctl;
1899		inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = old_proto4_input;
1900		legal_vif_num = old_legal_vif_num;
1901		ip_mrtproto = 0;
1902		break;
1903
1904	default:
1905		err = EINVAL;
1906		break;
1907	}
1908
1909	return(err);
1910}
1911
1912int
1913ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
1914	DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle,
1915		 nosys);
1916}
1917
1918#endif /* MROUTE_LKM */
1919#endif /* MROUTING */
1920
1921
1922