ip_mroute.c revision 2754
1/*
2 * IP multicast forwarding procedures
3 *
4 * Written by David Waitzman, BBN Labs, August 1988.
5 * Modified by Steve Deering, Stanford, February 1989.
6 * Modified by Mark J. Steiglitz, Stanford, May, 1991
7 * Modified by Van Jacobson, LBL, January 1993
8 * Modified by Ajit Thyagarajan, PARC, August 1993
9 *
10 * MROUTING 1.8
11 */
12
13
14#include <sys/param.h>
15#include <sys/systm.h>
16#include <sys/mbuf.h>
17#include <sys/socket.h>
18#include <sys/socketvar.h>
19#include <sys/protosw.h>
20#include <sys/errno.h>
21#include <sys/time.h>
22#include <sys/ioctl.h>
23#include <sys/syslog.h>
24#include <net/if.h>
25#include <net/route.h>
26#include <net/raw_cb.h>
27#include <netinet/in.h>
28#include <netinet/in_systm.h>
29#include <netinet/ip.h>
30#include <netinet/ip_var.h>
31#include <netinet/in_pcb.h>
32#include <netinet/in_var.h>
33#include <netinet/igmp.h>
34#include <netinet/igmp_var.h>
35#include <netinet/ip_mroute.h>
36
37#ifndef NTOHL
38#if BYTE_ORDER != BIG_ENDIAN
39#define NTOHL(d) ((d) = ntohl((d)))
40#define NTOHS(d) ((d) = ntohs((u_short)(d)))
41#define HTONL(d) ((d) = htonl((d)))
42#define HTONS(d) ((d) = htons((u_short)(d)))
43#else
44#define NTOHL(d)
45#define NTOHS(d)
46#define HTONL(d)
47#define HTONS(d)
48#endif
49#endif
50
51struct mrtstat	mrtstat;
52
53#ifndef MROUTING
54/*
55 * Dummy routines and globals used when multicast routing is not compiled in.
56 */
57
58struct socket  *ip_mrouter  = NULL;
59u_int		ip_mrtproto = 0;
60
61int
62_ip_mrouter_cmd(cmd, so, m)
63	int cmd;
64	struct socket *so;
65	struct mbuf *m;
66{
67	return(EOPNOTSUPP);
68}
69
70int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd;
71
72int
73_ip_mrouter_done()
74{
75	return(0);
76}
77
78int (*ip_mrouter_done)(void) = _ip_mrouter_done;
79
80int
81_ip_mforward(ip, ifp, m, imo)
82	struct ip *ip;
83	struct ifnet *ifp;
84	struct mbuf *m;
85	struct ip_moptions *imo;
86{
87	return(0);
88}
89
90int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
91		   struct ip_moptions *) = _ip_mforward;
92
93int
94_mrt_ioctl(int req, caddr_t data, struct proc *p)
95{
96	return EOPNOTSUPP;
97}
98
99int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
100
101void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */
102	rip_input(m);
103}
104
105int (*legal_vif_num)(int) = 0;
106
107#else
108
109#define INSIZ		sizeof(struct in_addr)
110#define	same(a1, a2) \
111	(bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
112
113#define MT_MRTABLE MT_RTABLE	/* since nothing else uses it */
114
115/*
116 * Globals.  All but ip_mrouter and ip_mrtproto could be static,
117 * except for netstat or debugging purposes.
118 */
119struct socket  *ip_mrouter  = NULL;
120int		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */
121
122#define NO_RTE_FOUND 	0x1
123#define RTE_FOUND	0x2
124
125struct mbuf    *mfctable[MFCTBLSIZ];
126struct vif	viftable[MAXVIFS];
127u_int		mrtdebug = 0;	  /* debug level 	*/
128u_int       	tbfdebug = 0;     /* tbf debug level 	*/
129
130u_long timeout_val = 0;			/* count of outstanding upcalls */
131
132/*
133 * Define the token bucket filter structures
134 * tbftable -> each vif has one of these for storing info
135 * qtable   -> each interface has an associated queue of pkts
136 */
137
138struct tbf tbftable[MAXVIFS];
139struct pkt_queue qtable[MAXVIFS][MAXQSIZE];
140
141/*
142 * 'Interfaces' associated with decapsulator (so we can tell
143 * packets that went through it from ones that get reflected
144 * by a broken gateway).  These interfaces are never linked into
145 * the system ifnet list & no routes point to them.  I.e., packets
146 * can't be sent this way.  They only exist as a placeholder for
147 * multicast source verification.
148 */
149struct ifnet multicast_decap_if[MAXVIFS];
150
151#define ENCAP_TTL 64
152#define ENCAP_PROTO 4
153
154/* prototype IP hdr for encapsulated packets */
155struct ip multicast_encap_iphdr = {
156#if BYTE_ORDER == LITTLE_ENDIAN
157	sizeof(struct ip) >> 2, IPVERSION,
158#else
159	IPVERSION, sizeof(struct ip) >> 2,
160#endif
161	0,				/* tos */
162	sizeof(struct ip),		/* total length */
163	0,				/* id */
164	0,				/* frag offset */
165	ENCAP_TTL, ENCAP_PROTO,
166	0,				/* checksum */
167};
168
169/*
170 * Private variables.
171 */
172static vifi_t	   numvifs = 0;
173
174/*
175 * one-back cache used by multiencap_decap to locate a tunnel's vif
176 * given a datagram's src ip address.
177 */
178static u_long last_encap_src;
179static struct vif *last_encap_vif;
180
181static u_long nethash_fc(u_long, u_long);
182static struct mfc *mfcfind(u_long, u_long);
183int get_sg_cnt(struct sioc_sg_req *);
184int get_vif_cnt(struct sioc_vif_req *);
185int get_vifs(caddr_t);
186static int add_vif(struct vifctl *);
187static int del_vif(vifi_t *);
188static int add_mfc(struct mfcctl *);
189static int del_mfc(struct delmfcctl *);
190static void cleanup_cache(void *);
191static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *,
192		  struct ip_moptions *);
193extern int (*legal_vif_num)(int);
194static void phyint_send(struct ip *, struct vif *, struct mbuf *);
195static void srcrt_send(struct ip *, struct vif *, struct mbuf *);
196static void encap_send(struct ip *, struct vif *, struct mbuf *);
197void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long,
198		 struct ip_moptions *);
199void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *);
200void tbf_process_q(struct vif *);
201void tbf_dequeue(struct vif *, int);
202void tbf_reprocess_q(void *);
203int tbf_dq_sel(struct vif *, struct ip *);
204void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *);
205void tbf_update_tokens(struct vif *);
206static int priority(struct vif *, struct ip *);
207static int ip_mrouter_init(struct socket *);
208
209/*
210 * A simple hash function: returns MFCHASHMOD of the low-order octet of
211 * the argument's network or subnet number and the multicast group assoc.
212 */
213static u_long
214nethash_fc(m,n)
215    register u_long m;
216    register u_long n;
217{
218    struct in_addr in1;
219    struct in_addr in2;
220
221    in1.s_addr = m;
222    m = in_netof(in1);
223    while ((m & 0xff) == 0) m >>= 8;
224
225    in2.s_addr = n;
226    n = in_netof(in2);
227    while ((n & 0xff) == 0) n >>= 8;
228
229    return (MFCHASHMOD(m) ^ MFCHASHMOD(n));
230}
231
232/*
233 * this is a direct-mapped cache used to speed the mapping from a
234 * datagram source address to the associated multicast route.  Note
235 * that unlike mrttable, the hash is on IP address, not IP net number.
236 */
237#define MFCHASHSIZ 1024
238#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
239			((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1))
240struct mfc *mfchash[MFCHASHSIZ];
241
242/*
243 * Find a route for a given origin IP address and Multicast group address
244 * Type of service parameter to be added in the future!!!
245 */
246#define MFCFIND(o, g, rt) { \
247	register u_int _mrhasho = o; \
248	register u_int _mrhashg = g; \
249	_mrhasho = MFCHASH(_mrhasho, _mrhashg); \
250	++mrtstat.mrts_mfc_lookups; \
251	rt = mfchash[_mrhasho]; \
252	if ((rt == NULL) || \
253	    ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \
254	     (g != rt->mfc_mcastgrp.s_addr)) \
255	     if ((rt = mfcfind(o, g)) != NULL) \
256		mfchash[_mrhasho] = rt; \
257}
258
259/*
260 * Find route by examining hash table entries
261 */
262static struct mfc *
263mfcfind(origin, mcastgrp)
264    u_long origin;
265    u_long mcastgrp;
266{
267    register struct mbuf *mb_rt;
268    register struct mfc *rt;
269    register u_long hash;
270
271    hash = nethash_fc(origin, mcastgrp);
272    for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
273	rt = mtod(mb_rt, struct mfc *);
274	if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) &&
275	    (mcastgrp == rt->mfc_mcastgrp.s_addr) &&
276	    (mb_rt->m_act == NULL))
277	    return (rt);
278    }
279    mrtstat.mrts_mfc_misses++;
280    return NULL;
281}
282
283/*
284 * Macros to compute elapsed time efficiently
285 * Borrowed from Van Jacobson's scheduling code
286 */
287#define TV_DELTA(a, b, delta) { \
288	    register int xxs; \
289		\
290	    delta = (a).tv_usec - (b).tv_usec; \
291	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
292	       switch (xxs) { \
293		      case 2: \
294			  delta += 1000000; \
295			      /* fall through */ \
296		      case 1: \
297			  delta += 1000000; \
298			  break; \
299		      default: \
300			  delta += (1000000 * xxs); \
301	       } \
302	    } \
303}
304
305#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
306	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
307
308/*
309 * Handle DVMRP setsockopt commands to modify the multicast routing tables.
310 */
311int
312_ip_mrouter_cmd(cmd, so, m)
313    int cmd;
314    struct socket *so;
315    struct mbuf *m;
316{
317   if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES;
318
319    switch (cmd) {
320	case DVMRP_INIT:     return ip_mrouter_init(so);
321	case DVMRP_DONE:     return ip_mrouter_done();
322	case DVMRP_ADD_VIF:  return add_vif (mtod(m, struct vifctl *));
323	case DVMRP_DEL_VIF:  return del_vif (mtod(m, vifi_t *));
324	case DVMRP_ADD_MFC:  return add_mfc (mtod(m, struct mfcctl *));
325	case DVMRP_DEL_MFC:  return del_mfc (mtod(m, struct delmfcctl *));
326	default:             return EOPNOTSUPP;
327    }
328}
329
330int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd;
331
332/*
333 * Handle ioctl commands to obtain information from the cache
334 */
335int
336_mrt_ioctl(cmd, data)
337    int cmd;
338    caddr_t data;
339{
340    int error = 0;
341
342    switch (cmd) {
343      case (SIOCGETVIFINF):		/* Read Virtual Interface (m/cast) */
344	  return (get_vifs(data));
345	  break;
346      case (SIOCGETVIFCNT):
347	  return (get_vif_cnt((struct sioc_vif_req *)data));
348	  break;
349      case (SIOCGETSGCNT):
350	  return (get_sg_cnt((struct sioc_sg_req *)data));
351	  break;
352	default:
353	  return (EINVAL);
354	  break;
355    }
356    return error;
357}
358
359int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
360
361/*
362 * returns the packet count for the source group provided
363 */
364int
365get_sg_cnt(req)
366    register struct sioc_sg_req *req;
367{
368    register struct mfc *rt;
369    int s;
370
371    s = splnet();
372    MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
373    splx(s);
374    if (rt != NULL)
375	req->count = rt->mfc_pkt_cnt;
376    else
377	req->count = 0xffffffff;
378
379    return 0;
380}
381
382/*
383 * returns the input and output packet counts on the interface provided
384 */
385int
386get_vif_cnt(req)
387    register struct sioc_vif_req *req;
388{
389    register vifi_t vifi = req->vifi;
390
391    req->icount = viftable[vifi].v_pkt_in;
392    req->ocount = viftable[vifi].v_pkt_out;
393
394    return 0;
395}
396
397int
398get_vifs(data)
399    char *data;
400{
401    struct vif_conf *vifc = (struct vif_conf *)data;
402    struct vif_req *vifrp, vifr;
403    int space, error=0;
404
405    vifi_t vifi;
406    int s;
407
408    space = vifc->vifc_len;
409    vifrp  = vifc->vifc_req;
410
411    s = splnet();
412    vifc->vifc_num=numvifs;
413
414    for (vifi = 0; vifi <  numvifs; vifi++, vifrp++) {
415	if (viftable[vifi].v_lcl_addr.s_addr != 0) {
416	    vifr.v_flags=viftable[vifi].v_flags;
417	    vifr.v_threshold=viftable[vifi].v_threshold;
418	    vifr.v_lcl_addr=viftable[vifi].v_lcl_addr;
419	    vifr.v_rmt_addr=viftable[vifi].v_rmt_addr;
420	    strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ);
421	    if ((space -= sizeof(vifr)) < 0) {
422		splx(s);
423		return(ENOSPC);
424	    }
425	    error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr));
426	    if (error) {
427		splx(s);
428		return(error);
429	    }
430	}
431    }
432    splx(s);
433    return 0;
434}
435/*
436 * Enable multicast routing
437 */
438static int
439ip_mrouter_init(so)
440	struct socket *so;
441{
442    if (so->so_type != SOCK_RAW ||
443	so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
444
445    if (ip_mrouter != NULL) return EADDRINUSE;
446
447    ip_mrouter = so;
448
449    if (mrtdebug)
450	log(LOG_DEBUG, "ip_mrouter_init");
451
452    return 0;
453}
454
455/*
456 * Disable multicast routing
457 */
458int
459_ip_mrouter_done()
460{
461    vifi_t vifi;
462    int i;
463    struct ifnet *ifp;
464    struct ifreq ifr;
465    struct mbuf *mb_rt;
466    struct mbuf *m;
467    struct rtdetq *rte;
468    int s;
469
470    s = splnet();
471
472    /*
473     * For each phyint in use, disable promiscuous reception of all IP
474     * multicasts.
475     */
476    for (vifi = 0; vifi < numvifs; vifi++) {
477	if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
478	    !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
479	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
480	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
481								= INADDR_ANY;
482	    ifp = viftable[vifi].v_ifp;
483	    (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
484	}
485    }
486    bzero((caddr_t)qtable, sizeof(qtable));
487    bzero((caddr_t)tbftable, sizeof(tbftable));
488    bzero((caddr_t)viftable, sizeof(viftable));
489    numvifs = 0;
490
491    /*
492     * Check if any outstanding timeouts remain
493     */
494    if (timeout_val != 0)
495	for (i = 0; i < MFCTBLSIZ; i++) {
496	    mb_rt = mfctable[i];
497	    while (mb_rt) {
498		if ( mb_rt->m_act != NULL) {
499		    untimeout(cleanup_cache, (caddr_t)mb_rt);
500		    while (m = mb_rt->m_act) {
501			mb_rt->m_act = m->m_act;
502			rte = mtod(m, struct rtdetq *);
503			m_freem(rte->m);
504			m_free(m);
505		    }
506		    timeout_val--;
507		}
508	    mb_rt = mb_rt->m_next;
509	    }
510	    if (timeout_val == 0)
511		break;
512	}
513
514    /*
515     * Free all multicast forwarding cache entries.
516     */
517    for (i = 0; i < MFCTBLSIZ; i++)
518	m_freem(mfctable[i]);
519
520    bzero((caddr_t)mfctable, sizeof(mfctable));
521    bzero((caddr_t)mfchash, sizeof(mfchash));
522
523    /*
524     * Reset de-encapsulation cache
525     */
526    last_encap_src = NULL;
527    last_encap_vif = NULL;
528
529    ip_mrouter = NULL;
530
531    splx(s);
532
533    if (mrtdebug)
534	log(LOG_DEBUG, "ip_mrouter_done");
535
536    return 0;
537}
538
539int (*ip_mrouter_done)(void) = _ip_mrouter_done;
540
541/*
542 * Add a vif to the vif table
543 */
544static int
545add_vif(vifcp)
546    register struct vifctl *vifcp;
547{
548    register struct vif *vifp = viftable + vifcp->vifc_vifi;
549    static struct sockaddr_in sin = {AF_INET};
550    struct ifaddr *ifa;
551    struct ifnet *ifp;
552    struct ifreq ifr;
553    int error, s;
554    struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
555
556    if (vifcp->vifc_vifi >= MAXVIFS)  return EINVAL;
557    if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
558
559    /* Find the interface with an address in AF_INET family */
560    sin.sin_addr = vifcp->vifc_lcl_addr;
561    ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
562    if (ifa == 0) return EADDRNOTAVAIL;
563    ifp = ifa->ifa_ifp;
564
565    if (vifcp->vifc_flags & VIFF_TUNNEL) {
566	if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
567	    static int inited = 0;
568	    if(!inited) {
569		for (s = 0; s < MAXVIFS; ++s) {
570		    multicast_decap_if[s].if_name = "mdecap";
571		    multicast_decap_if[s].if_unit = s;
572		}
573		inited = 1;
574	    }
575	    ifp = &multicast_decap_if[vifcp->vifc_vifi];
576	} else {
577	    ifp = 0;
578	}
579    } else {
580	/* Make sure the interface supports multicast */
581	if ((ifp->if_flags & IFF_MULTICAST) == 0)
582	    return EOPNOTSUPP;
583
584	/* Enable promiscuous reception of all IP multicasts from the if */
585	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
586	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
587	s = splnet();
588	error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
589	splx(s);
590	if (error)
591	    return error;
592    }
593
594    s = splnet();
595    /* define parameters for the tbf structure */
596    vifp->v_tbf = v_tbf;
597    vifp->v_tbf->q_len = 0;
598    vifp->v_tbf->n_tok = 0;
599    vifp->v_tbf->last_pkt_t = 0;
600
601    vifp->v_flags     = vifcp->vifc_flags;
602    vifp->v_threshold = vifcp->vifc_threshold;
603    vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
604    vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
605    vifp->v_ifp       = ifp;
606    vifp->v_rate_limit= vifcp->vifc_rate_limit;
607    /* initialize per vif pkt counters */
608    vifp->v_pkt_in    = 0;
609    vifp->v_pkt_out   = 0;
610    splx(s);
611
612    /* Adjust numvifs up if the vifi is higher than numvifs */
613    if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
614
615    if (mrtdebug)
616	log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d",
617	    vifcp->vifc_vifi,
618	    ntohl(vifcp->vifc_lcl_addr.s_addr),
619	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
620	    ntohl(vifcp->vifc_rmt_addr.s_addr),
621	    vifcp->vifc_threshold,
622	    vifcp->vifc_rate_limit);
623
624    return 0;
625}
626
627/*
628 * Delete a vif from the vif table
629 */
630static int
631del_vif(vifip)
632    vifi_t *vifip;
633{
634    register struct vif *vifp = viftable + *vifip;
635    register vifi_t vifi;
636    struct ifnet *ifp;
637    struct ifreq ifr;
638    int s;
639
640    if (*vifip >= numvifs) return EINVAL;
641    if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
642
643    s = splnet();
644
645    if (!(vifp->v_flags & VIFF_TUNNEL)) {
646	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
647	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
648	ifp = vifp->v_ifp;
649	(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
650    }
651
652    if (vifp == last_encap_vif) {
653	last_encap_vif = 0;
654	last_encap_src = 0;
655    }
656
657    bzero((caddr_t)qtable[*vifip],
658	  sizeof(qtable[*vifip]));
659    bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
660    bzero((caddr_t)vifp, sizeof (*vifp));
661
662    /* Adjust numvifs down */
663    for (vifi = numvifs; vifi > 0; vifi--)
664	if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
665    numvifs = vifi;
666
667    splx(s);
668
669    if (mrtdebug)
670      log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs);
671
672    return 0;
673}
674
675/*
676 * Add an mfc entry
677 */
678static int
679add_mfc(mfccp)
680    struct mfcctl *mfccp;
681{
682    struct mfc *rt;
683    struct mfc *rt1;
684    register struct mbuf *mb_rt;
685    struct mbuf *prev_mb_rt;
686    u_long hash;
687    struct mbuf *mb_ntry;
688    struct rtdetq *rte;
689    register u_short nstl;
690    int s;
691    int i;
692
693    rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
694
695    /* If an entry already exists, just update the fields */
696    if (rt) {
697	if (mrtdebug)
698	    log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x",
699		ntohl(mfccp->mfcc_origin.s_addr),
700		ntohl(mfccp->mfcc_mcastgrp.s_addr),
701		ntohl(mfccp->mfcc_originmask.s_addr),
702		mfccp->mfcc_parent);
703
704	s = splnet();
705	rt->mfc_parent = mfccp->mfcc_parent;
706	for (i = 0; i < numvifs; i++)
707	    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
708	splx(s);
709	return 0;
710    }
711
712    /*
713     * Find the entry for which the upcall was made and update
714     */
715    s = splnet();
716    hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
717    for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0;
718	 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
719
720	rt = mtod(mb_rt, struct mfc *);
721	if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr)
722	     == mfccp->mfcc_origin.s_addr) &&
723	    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
724	    (mb_rt->m_act != NULL)) {
725
726	    if (!nstl++) {
727		if (mrtdebug)
728		    log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x",
729			ntohl(mfccp->mfcc_origin.s_addr),
730			ntohl(mfccp->mfcc_mcastgrp.s_addr),
731			ntohl(mfccp->mfcc_originmask.s_addr),
732			mfccp->mfcc_parent, mb_rt->m_act);
733
734		rt->mfc_origin     = mfccp->mfcc_origin;
735		rt->mfc_originmask = mfccp->mfcc_originmask;
736		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
737		rt->mfc_parent     = mfccp->mfcc_parent;
738		for (i = 0; i < numvifs; i++)
739		    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
740		/* initialize pkt counters per src-grp */
741		rt->mfc_pkt_cnt    = 0;
742		rt1 = rt;
743	    }
744
745	    /* prevent cleanup of cache entry */
746	    untimeout(cleanup_cache, (caddr_t)mb_rt);
747	    timeout_val--;
748
749	    /* free packets Qed at the end of this entry */
750	    while (mb_rt->m_act) {
751		mb_ntry = mb_rt->m_act;
752		rte = mtod(mb_ntry, struct rtdetq *);
753		ip_mdq(rte->m, rte->ifp, rte->tunnel_src,
754		       rt1, rte->imo);
755		mb_rt->m_act = mb_ntry->m_act;
756		m_freem(rte->m);
757		m_free(mb_ntry);
758	    }
759
760	    /*
761	     * If more than one entry was created for a single upcall
762	     * delete that entry
763	     */
764	    if (nstl > 1) {
765		MFREE(mb_rt, prev_mb_rt->m_next);
766		mb_rt = prev_mb_rt;
767	    }
768	}
769    }
770
771    /*
772     * It is possible that an entry is being inserted without an upcall
773     */
774    if (nstl == 0) {
775	if (mrtdebug)
776	    log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x",
777		hash, ntohl(mfccp->mfcc_origin.s_addr),
778		ntohl(mfccp->mfcc_mcastgrp.s_addr),
779		ntohl(mfccp->mfcc_originmask.s_addr),
780		mfccp->mfcc_parent);
781
782	for (prev_mb_rt = mb_rt = mfctable[hash];
783	     mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
784
785	    rt = mtod(mb_rt, struct mfc *);
786	    if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr)
787		 == mfccp->mfcc_origin.s_addr) &&
788		(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
789
790		rt->mfc_origin     = mfccp->mfcc_origin;
791		rt->mfc_originmask = mfccp->mfcc_originmask;
792		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
793		rt->mfc_parent     = mfccp->mfcc_parent;
794		for (i = 0; i < numvifs; i++)
795		    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
796		/* initialize pkt counters per src-grp */
797		rt->mfc_pkt_cnt    = 0;
798	    }
799	}
800	if (mb_rt == NULL) {
801	    /* no upcall, so make a new entry */
802	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
803	    if (mb_rt == NULL) {
804		splx(s);
805		return ENOBUFS;
806	    }
807
808	    rt = mtod(mb_rt, struct mfc *);
809
810	    /* insert new entry at head of hash chain */
811	    rt->mfc_origin     = mfccp->mfcc_origin;
812	    rt->mfc_originmask = mfccp->mfcc_originmask;
813	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
814	    rt->mfc_parent     = mfccp->mfcc_parent;
815	    for (i = 0; i < numvifs; i++)
816		VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
817	    /* initialize pkt counters per src-grp */
818	    rt->mfc_pkt_cnt    = 0;
819
820	    /* link into table */
821	    mb_rt->m_next  = mfctable[hash];
822	    mfctable[hash] = mb_rt;
823	    mb_rt->m_act = NULL;
824	}
825    }
826    splx(s);
827    return 0;
828}
829
830/*
831 * Delete an mfc entry
832 */
833static int
834del_mfc(mfccp)
835    struct delmfcctl *mfccp;
836{
837    struct in_addr 	origin;
838    struct in_addr 	mcastgrp;
839    struct mfc 		*rt;
840    struct mbuf 	*mb_rt;
841    struct mbuf 	*prev_mb_rt;
842    u_long 		hash;
843    struct mfc 		**cmfc;
844    struct mfc 		**cmfcend;
845    int s, i;
846
847    origin = mfccp->mfcc_origin;
848    mcastgrp = mfccp->mfcc_mcastgrp;
849    hash = nethash_fc(origin.s_addr, mcastgrp.s_addr);
850
851    if (mrtdebug)
852	log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x",
853	    ntohl(origin.s_addr), ntohl(mcastgrp.s_addr));
854
855    for (prev_mb_rt = mb_rt = mfctable[hash]
856	 ; mb_rt
857	 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
858        rt = mtod(mb_rt, struct mfc *);
859	if (origin.s_addr == rt->mfc_origin.s_addr &&
860	    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
861	    mb_rt->m_act == NULL)
862	    break;
863    }
864    if (mb_rt == NULL) {
865	return ESRCH;
866    }
867
868    s = splnet();
869
870    cmfc = mfchash;
871    cmfcend = cmfc + MFCHASHSIZ;
872    for ( ; cmfc < cmfcend; ++cmfc)
873	if (*cmfc == rt)
874	    *cmfc = 0;
875
876    if (prev_mb_rt != mb_rt) {	/* if moved past head of list */
877	MFREE(mb_rt, prev_mb_rt->m_next);
878    } else			/* delete head of list, it is in the table */
879        mfctable[hash] = m_free(mb_rt);
880
881    splx(s);
882
883    return 0;
884}
885
886/*
887 * IP multicast forwarding function. This function assumes that the packet
888 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
889 * pointed to by "ifp", and the packet is to be relayed to other networks
890 * that have members of the packet's destination IP multicast group.
891 *
892 * The packet is returned unscathed to the caller, unless it is tunneled
893 * or erroneous, in which case a non-zero return value tells the caller to
894 * discard it.
895 */
896
897#define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
898#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
899
900int
901_ip_mforward(ip, ifp, m, imo)
902    register struct ip *ip;
903    struct ifnet *ifp;
904    struct mbuf *m;
905    struct ip_moptions *imo;
906{
907    register struct mfc *rt;
908    register struct vif *vifp;
909    register u_char *ipoptions;
910    u_long tunnel_src;
911    static struct sockproto	k_igmpproto 	= { AF_INET, IPPROTO_IGMP };
912    static struct sockaddr_in 	k_igmpsrc	= { AF_INET };
913    static struct sockaddr_in 	k_igmpdst 	= { AF_INET };
914    register struct mbuf *mm;
915    register struct mbuf *mn;
916    register struct ip *k_data;
917    int s;
918
919    if (mrtdebug > 1)
920	log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x",
921	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
922
923    if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
924	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
925	/*
926	 * Packet arrived via a physical interface.
927	 */
928	tunnel_src = 0;
929    } else {
930	/*
931	 * Packet arrived through a source-route tunnel.
932	 *
933	 * A source-route tunneled packet has a single NOP option and a
934	 * two-element
935	 * loose-source-and-record-route (LSRR) option immediately following
936	 * the fixed-size part of the IP header.  At this point in processing,
937	 * the IP header should contain the following IP addresses:
938	 *
939	 *	original source          - in the source address field
940	 *	destination group        - in the destination address field
941	 *	remote tunnel end-point  - in the first  element of LSRR
942	 *	one of this host's addrs - in the second element of LSRR
943	 *
944	 * NOTE: RFC-1075 would have the original source and remote tunnel
945	 *	 end-point addresses swapped.  However, that could cause
946	 *	 delivery of ICMP error messages to innocent applications
947	 *	 on intermediate routing hosts!  Therefore, we hereby
948	 *	 change the spec.
949	 */
950
951	/*
952	 * Verify that the tunnel options are well-formed.
953	 */
954	if (ipoptions[0] != IPOPT_NOP ||
955	    ipoptions[2] != 11 ||	/* LSRR option length   */
956	    ipoptions[3] != 12 ||	/* LSRR address pointer */
957	    (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) {
958	    mrtstat.mrts_bad_tunnel++;
959	    if (mrtdebug)
960		log(LOG_DEBUG,
961		    "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)",
962		    ntohl(ip->ip_src.s_addr),
963		    ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3],
964		    *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8]));
965	    return 1;
966	}
967
968	/*
969	 * Delete the tunnel options from the packet.
970	 */
971	ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions,
972		(unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN)));
973	m->m_len   -= TUNNEL_LEN;
974	ip->ip_len -= TUNNEL_LEN;
975	ip->ip_hl  -= TUNNEL_LEN >> 2;
976
977	ifp = 0;
978    }
979
980    /*
981     * Don't forward a packet with time-to-live of zero or one,
982     * or a packet destined to a local-only group.
983     */
984    if (ip->ip_ttl <= 1 ||
985	ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
986	return (int)tunnel_src;
987
988    /*
989     * Determine forwarding vifs from the forwarding cache table
990     */
991    s = splnet();
992    MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
993
994    /* Entry exists, so forward if necessary */
995    if (rt != NULL) {
996	splx(s);
997	return (ip_mdq(m, ifp, tunnel_src, rt, imo));
998    }
999
1000    else {
1001	/*
1002	 * If we don't have a route for packet's origin,
1003	 * Make a copy of the packet &
1004	 * send message to routing daemon
1005	 */
1006
1007	register struct mbuf *mb_rt;
1008	register struct mbuf *mb_ntry;
1009	register struct mbuf *mb0;
1010	register struct rtdetq *rte;
1011	register struct mbuf *rte_m;
1012	register u_long hash;
1013	register struct timeval tp;
1014
1015	mrtstat.mrts_no_route++;
1016	if (mrtdebug)
1017	    log(LOG_DEBUG, "ip_mforward: no rte s %x g %x",
1018		ntohl(ip->ip_src.s_addr),
1019		ntohl(ip->ip_dst.s_addr));
1020
1021	/* is there an upcall waiting for this packet? */
1022	hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1023	for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
1024	    rt = mtod(mb_rt, struct mfc *);
1025	    if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) ==
1026		 rt->mfc_origin.s_addr) &&
1027		(ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1028		(mb_rt->m_act != NULL))
1029		break;
1030	}
1031
1032	if (mb_rt == NULL) {
1033	    /* no upcall, so make a new entry */
1034	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
1035	    if (mb_rt == NULL) {
1036		splx(s);
1037		return ENOBUFS;
1038	    }
1039
1040	    rt = mtod(mb_rt, struct mfc *);
1041
1042	    /* insert new entry at head of hash chain */
1043	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
1044	    rt->mfc_originmask.s_addr = (u_long)0xffffffff;
1045	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
1046
1047	    /* link into table */
1048	    hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr);
1049	    mb_rt->m_next  = mfctable[hash];
1050	    mfctable[hash] = mb_rt;
1051	    mb_rt->m_act = NULL;
1052
1053	}
1054
1055	/* determine if q has overflowed */
1056	for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act)
1057	    hash++;
1058
1059	if (hash > MAX_UPQ) {
1060	    mrtstat.mrts_upq_ovflw++;
1061	    splx(s);
1062	    return 0;
1063	}
1064
1065	/* add this packet and timing, ifp info to m_act */
1066	MGET(mb_ntry, M_DONTWAIT, MT_DATA);
1067	if (mb_ntry == NULL) {
1068	    splx(s);
1069	    return ENOBUFS;
1070	}
1071
1072	mb_ntry->m_act = NULL;
1073	rte = mtod(mb_ntry, struct rtdetq *);
1074
1075	mb0 = m_copy(m, 0, M_COPYALL);
1076	if (mb0 == NULL) {
1077	    splx(s);
1078	    return ENOBUFS;
1079	}
1080
1081	rte->m 			= mb0;
1082	rte->ifp 		= ifp;
1083	rte->tunnel_src 	= tunnel_src;
1084	rte->imo		= imo;
1085
1086	rte_m->m_act = mb_ntry;
1087
1088	splx(s);
1089
1090	if (hash == 0) {
1091	    /*
1092	     * Send message to routing daemon to install
1093	     * a route into the kernel table
1094	     */
1095	    k_igmpsrc.sin_addr = ip->ip_src;
1096	    k_igmpdst.sin_addr = ip->ip_dst;
1097
1098	    mm = m_copy(m, 0, M_COPYALL);
1099	    if (mm == NULL) {
1100		splx(s);
1101		return ENOBUFS;
1102	    }
1103
1104	    k_data = mtod(mm, struct ip *);
1105	    k_data->ip_p = 0;
1106
1107	    mrtstat.mrts_upcalls++;
1108
1109	    raw_input(mm, &k_igmpproto,
1110		      (struct sockaddr *)&k_igmpsrc,
1111		      (struct sockaddr *)&k_igmpdst);
1112
1113	    /* set timer to cleanup entry if upcall is lost */
1114	    timeout(cleanup_cache, (caddr_t)mb_rt, 100);
1115	    timeout_val++;
1116	}
1117
1118	return 0;
1119    }
1120}
1121
1122int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
1123		   struct ip_moptions *) = _ip_mforward;
1124
1125/*
1126 * Clean up the cache entry if upcall is not serviced
1127 */
1128static void
1129cleanup_cache(xmb_rt)
1130	void *xmb_rt;
1131{
1132    struct mbuf *mb_rt = xmb_rt;
1133    struct mfc *rt;
1134    u_long hash;
1135    struct mbuf *prev_m0;
1136    struct mbuf *m0;
1137    struct mbuf *m;
1138    struct rtdetq *rte;
1139    int s;
1140
1141    rt = mtod(mb_rt, struct mfc *);
1142    hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr);
1143
1144    if (mrtdebug)
1145	log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x",
1146	    ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr),
1147	    ntohl(rt->mfc_mcastgrp.s_addr));
1148
1149    mrtstat.mrts_cache_cleanups++;
1150
1151    /*
1152     * determine entry to be cleaned up in cache table
1153     */
1154    s = splnet();
1155    for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next)
1156	if (m0 == mb_rt)
1157	    break;
1158
1159    /*
1160     * drop all the packets
1161     * free the mbuf with the pkt, if, timing info
1162     */
1163    while (mb_rt->m_act) {
1164	m = mb_rt->m_act;
1165	mb_rt->m_act = m->m_act;
1166
1167	rte = mtod(m, struct rtdetq *);
1168	m_freem(rte->m);
1169	m_free(m);
1170    }
1171
1172    /*
1173     * Delete the entry from the cache
1174     */
1175    if (prev_m0 != m0) {	/* if moved past head of list */
1176	MFREE(m0, prev_m0->m_next);
1177    } else			/* delete head of list, it is in the table */
1178	mfctable[hash] = m_free(m0);
1179
1180    timeout_val--;
1181    splx(s);
1182}
1183
1184/*
1185 * Packet forwarding routine once entry in the cache is made
1186 */
1187static int
1188ip_mdq(m, ifp, tunnel_src, rt, imo)
1189    register struct mbuf *m;
1190    register struct ifnet *ifp;
1191    register u_long tunnel_src;
1192    register struct mfc *rt;
1193    register struct ip_moptions *imo;
1194{
1195    register struct ip  *ip = mtod(m, struct ip *);
1196    register vifi_t vifi;
1197    register struct vif *vifp;
1198
1199    /*
1200     * Don't forward if it didn't arrive from the parent vif for its origin.
1201     * Notes: v_ifp is zero for src route tunnels, multicast_decap_if
1202     * for encapsulated tunnels and a real ifnet for non-tunnels so
1203     * the first part of the if catches wrong physical interface or
1204     * tunnel type; v_rmt_addr is zero for non-tunneled packets so
1205     * the 2nd part catches both packets that arrive via a tunnel
1206     * that shouldn't and packets that arrive via the wrong tunnel.
1207     */
1208    vifi = rt->mfc_parent;
1209    if (viftable[vifi].v_ifp != ifp ||
1210	(ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) {
1211	/* came in the wrong interface */
1212	if (mrtdebug)
1213	    log(LOG_DEBUG, "wrong if: ifp %x vifi %d",
1214		ifp, vifi);
1215	++mrtstat.mrts_wrong_if;
1216	return (int)tunnel_src;
1217    }
1218
1219    /* increment the interface and s-g counters */
1220    viftable[vifi].v_pkt_in++;
1221    rt->mfc_pkt_cnt++;
1222
1223    /*
1224     * For each vif, decide if a copy of the packet should be forwarded.
1225     * Forward if:
1226     *		- the ttl exceeds the vif's threshold
1227     *		- there are group members downstream on interface
1228     */
1229#define MC_SEND(ip,vifp,m) {                             \
1230		(vifp)->v_pkt_out++;                     \
1231                if ((vifp)->v_flags & VIFF_SRCRT)        \
1232                    srcrt_send((ip), (vifp), (m));       \
1233                else if ((vifp)->v_flags & VIFF_TUNNEL)  \
1234                    encap_send((ip), (vifp), (m));       \
1235                else                                     \
1236                    phyint_send((ip), (vifp), (m));      \
1237                }
1238
1239/* If no options or the imo_multicast_vif option is 0, don't do this part
1240 */
1241    if ((imo != NULL) &&
1242       (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/)
1243    {
1244        MC_SEND(ip,viftable+vifi,m);
1245        return (1);        /* make sure we are done: No more physical sends */
1246    }
1247
1248    for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1249	if ((rt->mfc_ttls[vifi] > 0) &&
1250	    (ip->ip_ttl > rt->mfc_ttls[vifi]))
1251	    MC_SEND(ip, vifp, m);
1252
1253    return 0;
1254}
1255
1256/* check if a vif number is legal/ok. This is used by ip_output, to export
1257 * numvifs there,
1258 */
1259int
1260_legal_vif_num(vif)
1261    int vif;
1262{   if (vif>=0 && vif<=numvifs)
1263       return(1);
1264    else
1265       return(0);
1266}
1267
1268int (*legal_vif_num)(int) = _legal_vif_num;
1269
1270static void
1271phyint_send(ip, vifp, m)
1272    struct ip *ip;
1273    struct vif *vifp;
1274    struct mbuf *m;
1275{
1276    register struct mbuf *mb_copy;
1277    register struct mbuf *mopts;
1278    register struct ip_moptions *imo;
1279
1280    if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL)
1281	return;
1282
1283    MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT);
1284    if (imo == NULL) {
1285	m_freem(mb_copy);
1286	return;
1287    }
1288
1289    imo->imo_multicast_ifp  = vifp->v_ifp;
1290    imo->imo_multicast_ttl  = ip->ip_ttl - 1;
1291    imo->imo_multicast_loop = 1;
1292
1293    if (vifp->v_rate_limit <= 0)
1294	tbf_send_packet(vifp, mb_copy, imo);
1295    else
1296	tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len,
1297		    imo);
1298}
1299
1300static void
1301srcrt_send(ip, vifp, m)
1302    struct ip *ip;
1303    struct vif *vifp;
1304    struct mbuf *m;
1305{
1306    struct mbuf *mb_copy, *mb_opts;
1307    register struct ip *ip_copy;
1308    u_char *cp;
1309
1310    /*
1311     * Make sure that adding the tunnel options won't exceed the
1312     * maximum allowed number of option bytes.
1313     */
1314    if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) {
1315	mrtstat.mrts_cant_tunnel++;
1316	if (mrtdebug)
1317	    log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u",
1318		ntohl(ip->ip_src.s_addr));
1319	return;
1320    }
1321
1322    if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL)
1323	return;
1324
1325    ip_copy = mtod(mb_copy, struct ip *);
1326    ip_copy->ip_ttl--;
1327    ip_copy->ip_dst = vifp->v_rmt_addr;	  /* remote tunnel end-point */
1328    /*
1329     * Adjust the ip header length to account for the tunnel options.
1330     */
1331    ip_copy->ip_hl  += TUNNEL_LEN >> 2;
1332    ip_copy->ip_len += TUNNEL_LEN;
1333    MGET(mb_opts, M_DONTWAIT, MT_HEADER);
1334    if (mb_opts == NULL) {
1335	m_freem(mb_copy);
1336	return;
1337    }
1338    /*
1339     * 'Delete' the base ip header from the mb_copy chain
1340     */
1341    mb_copy->m_len -= IP_HDR_LEN;
1342    mb_copy->m_data += IP_HDR_LEN;
1343    /*
1344     * Make mb_opts be the new head of the packet chain.
1345     * Any options of the packet were left in the old packet chain head
1346     */
1347    mb_opts->m_next = mb_copy;
1348    mb_opts->m_data += 16;
1349    mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN;
1350    /*
1351     * Copy the base ip header from the mb_copy chain to the new head mbuf
1352     */
1353    bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), IP_HDR_LEN);
1354    /*
1355     * Add the NOP and LSRR after the base ip header
1356     */
1357    cp = mtod(mb_opts, u_char *) + IP_HDR_LEN;
1358    *cp++ = IPOPT_NOP;
1359    *cp++ = IPOPT_LSRR;
1360    *cp++ = 11; /* LSRR option length */
1361    *cp++ = 8;  /* LSSR pointer to second element */
1362    *(u_long*)cp = vifp->v_lcl_addr.s_addr;	/* local tunnel end-point */
1363    cp += 4;
1364    *(u_long*)cp = ip->ip_dst.s_addr;		/* destination group */
1365
1366    if (vifp->v_rate_limit <= 0)
1367	tbf_send_packet(vifp, mb_opts, 0);
1368    else
1369	tbf_control(vifp, mb_opts,
1370		    mtod(mb_opts, struct ip *), ip_copy->ip_len, 0);
1371}
1372
1373static void
1374encap_send(ip, vifp, m)
1375    register struct ip *ip;
1376    register struct vif *vifp;
1377    register struct mbuf *m;
1378{
1379    register struct mbuf *mb_copy;
1380    register struct ip *ip_copy;
1381    register int i, len = ip->ip_len;
1382
1383    /*
1384     * copy the old packet & pullup it's IP header into the
1385     * new mbuf so we can modify it.  Try to fill the new
1386     * mbuf since if we don't the ethernet driver will.
1387     */
1388    MGET(mb_copy, M_DONTWAIT, MT_DATA);
1389    if (mb_copy == NULL)
1390	return;
1391    mb_copy->m_data += 16;
1392    mb_copy->m_len = sizeof(multicast_encap_iphdr);
1393
1394    if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1395	m_freem(mb_copy);
1396	return;
1397    }
1398    i = MHLEN - M_LEADINGSPACE(mb_copy);
1399    if (i > len)
1400	i = len;
1401    mb_copy = m_pullup(mb_copy, i);
1402    if (mb_copy == NULL)
1403	return;
1404
1405    /*
1406     * fill in the encapsulating IP header.
1407     */
1408    ip_copy = mtod(mb_copy, struct ip *);
1409    *ip_copy = multicast_encap_iphdr;
1410    ip_copy->ip_id = htons(ip_id++);
1411    ip_copy->ip_len += len;
1412    ip_copy->ip_src = vifp->v_lcl_addr;
1413    ip_copy->ip_dst = vifp->v_rmt_addr;
1414
1415    /*
1416     * turn the encapsulated IP header back into a valid one.
1417     */
1418    ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1419    --ip->ip_ttl;
1420    HTONS(ip->ip_len);
1421    HTONS(ip->ip_off);
1422    ip->ip_sum = 0;
1423#if defined(LBL) && !defined(ultrix)
1424    ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0);
1425#else
1426    mb_copy->m_data += sizeof(multicast_encap_iphdr);
1427    ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1428    mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1429#endif
1430
1431    if (vifp->v_rate_limit <= 0)
1432	tbf_send_packet(vifp, mb_copy, 0);
1433    else
1434	tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0);
1435}
1436
1437/*
1438 * De-encapsulate a packet and feed it back through ip input (this
1439 * routine is called whenever IP gets a packet with proto type
1440 * ENCAP_PROTO and a local destination address).
1441 */
1442void
1443multiencap_decap(m)
1444    register struct mbuf *m;
1445{
1446    struct ifnet *ifp = m->m_pkthdr.rcvif;
1447    register struct ip *ip = mtod(m, struct ip *);
1448    register int hlen = ip->ip_hl << 2;
1449    register int s;
1450    register struct ifqueue *ifq;
1451    register struct vif *vifp;
1452
1453    if (ip->ip_p != ENCAP_PROTO) {
1454    	rip_input(m);
1455	return;
1456    }
1457    /*
1458     * dump the packet if it's not to a multicast destination or if
1459     * we don't have an encapsulating tunnel with the source.
1460     * Note:  This code assumes that the remote site IP address
1461     * uniquely identifies the tunnel (i.e., that this site has
1462     * at most one tunnel with the remote site).
1463     */
1464    if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
1465	++mrtstat.mrts_bad_tunnel;
1466	m_freem(m);
1467	return;
1468    }
1469    if (ip->ip_src.s_addr != last_encap_src) {
1470	register struct vif *vife;
1471
1472	vifp = viftable;
1473	vife = vifp + numvifs;
1474	last_encap_src = ip->ip_src.s_addr;
1475	last_encap_vif = 0;
1476	for ( ; vifp < vife; ++vifp)
1477	    if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
1478		if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
1479		    == VIFF_TUNNEL)
1480		    last_encap_vif = vifp;
1481		break;
1482	    }
1483    }
1484    if ((vifp = last_encap_vif) == 0) {
1485	last_encap_src = 0;
1486	mrtstat.mrts_cant_tunnel++; /*XXX*/
1487	m_freem(m);
1488	if (mrtdebug)
1489	    log(LOG_DEBUG, "ip_mforward: no tunnel with %u",
1490		ntohl(ip->ip_src.s_addr));
1491	return;
1492    }
1493    ifp = vifp->v_ifp;
1494    hlen -= sizeof(struct ifnet *);
1495    m->m_data += hlen;
1496    m->m_len -= hlen;
1497    *(mtod(m, struct ifnet **)) = ifp;
1498    ifq = &ipintrq;
1499    s = splimp();
1500    if (IF_QFULL(ifq)) {
1501	IF_DROP(ifq);
1502	m_freem(m);
1503    } else {
1504	IF_ENQUEUE(ifq, m);
1505	/*
1506	 * normally we would need a "schednetisr(NETISR_IP)"
1507	 * here but we were called by ip_input and it is going
1508	 * to loop back & try to dequeue the packet we just
1509	 * queued as soon as we return so we avoid the
1510	 * unnecessary software interrrupt.
1511	 */
1512    }
1513    splx(s);
1514}
1515
1516/*
1517 * Token bucket filter module
1518 */
1519void
1520tbf_control(vifp, m, ip, p_len, imo)
1521	register struct vif *vifp;
1522	register struct mbuf *m;
1523	register struct ip *ip;
1524	register u_long p_len;
1525	struct ip_moptions *imo;
1526{
1527    tbf_update_tokens(vifp);
1528
1529    /* if there are enough tokens,
1530     * and the queue is empty,
1531     * send this packet out
1532     */
1533
1534    if (vifp->v_tbf->q_len == 0) {
1535	if (p_len <= vifp->v_tbf->n_tok) {
1536	    vifp->v_tbf->n_tok -= p_len;
1537	    tbf_send_packet(vifp, m, imo);
1538	} else if (p_len > MAX_BKT_SIZE) {
1539	    /* drop if packet is too large */
1540	    mrtstat.mrts_pkt2large++;
1541	    m_freem(m);
1542	    return;
1543	} else {
1544	    /* queue packet and timeout till later */
1545	    tbf_queue(vifp, m, ip, imo);
1546	    timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1547	}
1548    } else if (vifp->v_tbf->q_len < MAXQSIZE) {
1549	/* finite queue length, so queue pkts and process queue */
1550	tbf_queue(vifp, m, ip, imo);
1551	tbf_process_q(vifp);
1552    } else {
1553	/* queue length too much, try to dq and queue and process */
1554	if (!tbf_dq_sel(vifp, ip)) {
1555	    mrtstat.mrts_q_overflow++;
1556	    m_freem(m);
1557	    return;
1558	} else {
1559	    tbf_queue(vifp, m, ip, imo);
1560	    tbf_process_q(vifp);
1561	}
1562    }
1563    return;
1564}
1565
1566/*
1567 * adds a packet to the queue at the interface
1568 */
1569void
1570tbf_queue(vifp, m, ip, imo)
1571	register struct vif *vifp;
1572	register struct mbuf *m;
1573	register struct ip *ip;
1574	struct ip_moptions *imo;
1575{
1576    register u_long ql;
1577    register int index = (vifp - viftable);
1578    register int s = splnet();
1579
1580    ql = vifp->v_tbf->q_len;
1581
1582    qtable[index][ql].pkt_m = m;
1583    qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len;
1584    qtable[index][ql].pkt_ip = ip;
1585    qtable[index][ql].pkt_imo = imo;
1586
1587    vifp->v_tbf->q_len++;
1588    splx(s);
1589}
1590
1591
1592/*
1593 * processes the queue at the interface
1594 */
1595void
1596tbf_process_q(vifp)
1597    register struct vif *vifp;
1598{
1599    register struct mbuf *m;
1600    register struct pkt_queue pkt_1;
1601    register int index = (vifp - viftable);
1602    register int s = splnet();
1603
1604    /* loop through the queue at the interface and send as many packets
1605     * as possible
1606     */
1607    while (vifp->v_tbf->q_len > 0) {
1608	/* locate the first packet */
1609	pkt_1.pkt_len = ((qtable[index][0]).pkt_len);
1610	pkt_1.pkt_m   = (qtable[index][0]).pkt_m;
1611	pkt_1.pkt_ip   = (qtable[index][0]).pkt_ip;
1612	pkt_1.pkt_imo = (qtable[index][0]).pkt_imo;
1613
1614	/* determine if the packet can be sent */
1615	if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) {
1616	    /* if so,
1617	     * reduce no of tokens, dequeue the queue,
1618	     * send the packet.
1619	     */
1620	    vifp->v_tbf->n_tok -= pkt_1.pkt_len;
1621
1622	    tbf_dequeue(vifp, 0);
1623
1624	    tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo);
1625
1626	} else break;
1627    }
1628    splx(s);
1629}
1630
1631/*
1632 * removes the jth packet from the queue at the interface
1633 */
1634void
1635tbf_dequeue(vifp,j)
1636    register struct vif *vifp;
1637    register int j;
1638{
1639    register u_long index = vifp - viftable;
1640    register int i;
1641
1642    for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) {
1643	qtable[index][i-1].pkt_m   = qtable[index][i].pkt_m;
1644	qtable[index][i-1].pkt_len = qtable[index][i].pkt_len;
1645	qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip;
1646	qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo;
1647    }
1648    qtable[index][i-1].pkt_m = NULL;
1649    qtable[index][i-1].pkt_len = NULL;
1650    qtable[index][i-1].pkt_ip = NULL;
1651    qtable[index][i-1].pkt_imo = NULL;
1652
1653    vifp->v_tbf->q_len--;
1654
1655    if (tbfdebug > 1)
1656	log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1);
1657}
1658
1659void
1660tbf_reprocess_q(xvifp)
1661	void *xvifp;
1662{
1663    register struct vif *vifp = xvifp;
1664    if (ip_mrouter == NULL)
1665	return;
1666
1667    tbf_update_tokens(vifp);
1668
1669    tbf_process_q(vifp);
1670
1671    if (vifp->v_tbf->q_len)
1672	timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1673}
1674
1675/* function that will selectively discard a member of the queue
1676 * based on the precedence value and the priority obtained through
1677 * a lookup table - not yet implemented accurately!
1678 */
1679int
1680tbf_dq_sel(vifp, ip)
1681    register struct vif *vifp;
1682    register struct ip *ip;
1683{
1684    register int i;
1685    register int s = splnet();
1686    register u_int p;
1687
1688    p = priority(vifp, ip);
1689
1690    for(i=vifp->v_tbf->q_len-1;i >= 0;i--) {
1691	if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) {
1692	    m_freem(qtable[vifp-viftable][i].pkt_m);
1693	    tbf_dequeue(vifp,i);
1694	    splx(s);
1695	    mrtstat.mrts_drop_sel++;
1696	    return(1);
1697	}
1698    }
1699    splx(s);
1700    return(0);
1701}
1702
1703void
1704tbf_send_packet(vifp, m, imo)
1705    register struct vif *vifp;
1706    register struct mbuf *m;
1707    struct ip_moptions *imo;
1708{
1709    register struct mbuf *mcp;
1710    int error;
1711    int s = splnet();
1712
1713    /* if source route tunnels */
1714    if (vifp->v_flags & VIFF_SRCRT) {
1715	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1716			  IP_FORWARDING, imo);
1717	if (mrtdebug > 1)
1718	    log(LOG_DEBUG, "srcrt_send on vif %d err %d", vifp-viftable, error);
1719    } else if (vifp->v_flags & VIFF_TUNNEL) {
1720	/* If tunnel options */
1721	ip_output(m, (struct mbuf *)0, (struct route *)0,
1722		  IP_FORWARDING, imo);
1723    } else {
1724	/* if physical interface option, extract the options and then send */
1725	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1726			  IP_FORWARDING, imo);
1727	FREE(imo, M_IPMOPTS);
1728
1729	if (mrtdebug > 1)
1730	    log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error);
1731    }
1732    splx(s);
1733}
1734
1735/* determine the current time and then
1736 * the elapsed time (between the last time and time now)
1737 * in milliseconds & update the no. of tokens in the bucket
1738 */
1739void
1740tbf_update_tokens(vifp)
1741    register struct vif *vifp;
1742{
1743    struct timeval tp;
1744    register u_long t;
1745    register u_long elapsed;
1746    register int s = splnet();
1747
1748    GET_TIME(tp);
1749
1750    t = tp.tv_sec*1000 + tp.tv_usec/1000;
1751
1752    elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8;
1753    vifp->v_tbf->n_tok += elapsed;
1754    vifp->v_tbf->last_pkt_t = t;
1755
1756    if (vifp->v_tbf->n_tok > MAX_BKT_SIZE)
1757	vifp->v_tbf->n_tok = MAX_BKT_SIZE;
1758
1759    splx(s);
1760}
1761
1762static int
1763priority(vifp, ip)
1764    register struct vif *vifp;
1765    register struct ip *ip;
1766{
1767    register u_long graddr;
1768    register int prio;
1769
1770    /* temporary hack; will add general packet classifier some day */
1771
1772    prio = 50;  /* default priority */
1773
1774    /* check for source route options and add option length to get dst */
1775    if (vifp->v_flags & VIFF_SRCRT)
1776	graddr = ntohl((ip+8)->ip_dst.s_addr);
1777    else
1778	graddr = ntohl(ip->ip_dst.s_addr);
1779
1780    switch (graddr & 0xf) {
1781	case 0x0: break;
1782	case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */
1783		  break;
1784	case 0x2: break;
1785	case 0x3: break;
1786	case 0x4: break;
1787	case 0x5: break;
1788	case 0x6: break;
1789	case 0x7: break;
1790	case 0x8: break;
1791	case 0x9: break;
1792	case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */
1793		  break;
1794	case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */
1795		  break;
1796	case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */
1797		  break;
1798	case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */
1799		  break;
1800	case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */
1801		  break;
1802	case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */
1803		  break;
1804    }
1805
1806    if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d", graddr, prio);
1807
1808    return prio;
1809}
1810
1811/*
1812 * End of token bucket filter modifications
1813 */
1814#endif
1815
1816
1817