ip_mroute.c revision 2763
1/*
2 * IP multicast forwarding procedures
3 *
4 * Written by David Waitzman, BBN Labs, August 1988.
5 * Modified by Steve Deering, Stanford, February 1989.
6 * Modified by Mark J. Steiglitz, Stanford, May, 1991
7 * Modified by Van Jacobson, LBL, January 1993
8 * Modified by Ajit Thyagarajan, PARC, August 1993
9 *
10 * MROUTING 1.8
11 */
12
13
14#include <sys/param.h>
15#include <sys/systm.h>
16#include <sys/mbuf.h>
17#include <sys/socket.h>
18#include <sys/socketvar.h>
19#include <sys/protosw.h>
20#include <sys/errno.h>
21#include <sys/time.h>
22#include <sys/ioctl.h>
23#include <sys/syslog.h>
24#include <net/if.h>
25#include <net/route.h>
26#include <net/raw_cb.h>
27#include <netinet/in.h>
28#include <netinet/in_systm.h>
29#include <netinet/ip.h>
30#include <netinet/ip_var.h>
31#include <netinet/in_pcb.h>
32#include <netinet/in_var.h>
33#include <netinet/igmp.h>
34#include <netinet/igmp_var.h>
35#include <netinet/ip_mroute.h>
36
37#ifndef NTOHL
38#if BYTE_ORDER != BIG_ENDIAN
39#define NTOHL(d) ((d) = ntohl((d)))
40#define NTOHS(d) ((d) = ntohs((u_short)(d)))
41#define HTONL(d) ((d) = htonl((d)))
42#define HTONS(d) ((d) = htons((u_short)(d)))
43#else
44#define NTOHL(d)
45#define NTOHS(d)
46#define HTONL(d)
47#define HTONS(d)
48#endif
49#endif
50
51#ifndef MROUTING
52/*
53 * Dummy routines and globals used when multicast routing is not compiled in.
54 */
55
56u_int		ip_mrtproto = 0;
57struct socket  *ip_mrouter  = NULL;
58struct mrtstat	mrtstat;
59
60
61int
62_ip_mrouter_cmd(cmd, so, m)
63	int cmd;
64	struct socket *so;
65	struct mbuf *m;
66{
67	return(EOPNOTSUPP);
68}
69
70int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd;
71
72int
73_ip_mrouter_done()
74{
75	return(0);
76}
77
78int (*ip_mrouter_done)(void) = _ip_mrouter_done;
79
80int
81_ip_mforward(ip, ifp, m, imo)
82	struct ip *ip;
83	struct ifnet *ifp;
84	struct mbuf *m;
85	struct ip_moptions *imo;
86{
87	return(0);
88}
89
90int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
91		   struct ip_moptions *) = _ip_mforward;
92
93int
94_mrt_ioctl(int req, caddr_t data, struct proc *p)
95{
96	return EOPNOTSUPP;
97}
98
99int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
100
101void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */
102	rip_input(m);
103}
104
105int (*legal_vif_num)(int) = 0;
106
107#else
108
109#define INSIZ		sizeof(struct in_addr)
110#define	same(a1, a2) \
111	(bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
112
113#define MT_MRTABLE MT_RTABLE	/* since nothing else uses it */
114
115/*
116 * Globals.  All but ip_mrouter and ip_mrtproto could be static,
117 * except for netstat or debugging purposes.
118 */
119#ifndef MROUTE_LKM
120struct socket  *ip_mrouter  = NULL;
121struct mrtstat	mrtstat;
122
123int		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */
124#else
125extern struct mrtstat mrtstat;
126extern int ip_mrtproto;
127#endif
128
129#define NO_RTE_FOUND 	0x1
130#define RTE_FOUND	0x2
131
132struct mbuf    *mfctable[MFCTBLSIZ];
133struct vif	viftable[MAXVIFS];
134u_int		mrtdebug = 0;	  /* debug level 	*/
135u_int       	tbfdebug = 0;     /* tbf debug level 	*/
136
137u_long timeout_val = 0;			/* count of outstanding upcalls */
138
139/*
140 * Define the token bucket filter structures
141 * tbftable -> each vif has one of these for storing info
142 * qtable   -> each interface has an associated queue of pkts
143 */
144
145struct tbf tbftable[MAXVIFS];
146struct pkt_queue qtable[MAXVIFS][MAXQSIZE];
147
148/*
149 * 'Interfaces' associated with decapsulator (so we can tell
150 * packets that went through it from ones that get reflected
151 * by a broken gateway).  These interfaces are never linked into
152 * the system ifnet list & no routes point to them.  I.e., packets
153 * can't be sent this way.  They only exist as a placeholder for
154 * multicast source verification.
155 */
156struct ifnet multicast_decap_if[MAXVIFS];
157
158#define ENCAP_TTL 64
159#define ENCAP_PROTO 4
160
161/* prototype IP hdr for encapsulated packets */
162struct ip multicast_encap_iphdr = {
163#if BYTE_ORDER == LITTLE_ENDIAN
164	sizeof(struct ip) >> 2, IPVERSION,
165#else
166	IPVERSION, sizeof(struct ip) >> 2,
167#endif
168	0,				/* tos */
169	sizeof(struct ip),		/* total length */
170	0,				/* id */
171	0,				/* frag offset */
172	ENCAP_TTL, ENCAP_PROTO,
173	0,				/* checksum */
174};
175
176/*
177 * Private variables.
178 */
179static vifi_t	   numvifs = 0;
180
181/*
182 * one-back cache used by multiencap_decap to locate a tunnel's vif
183 * given a datagram's src ip address.
184 */
185static u_long last_encap_src;
186static struct vif *last_encap_vif;
187
188static u_long nethash_fc(u_long, u_long);
189static struct mfc *mfcfind(u_long, u_long);
190int get_sg_cnt(struct sioc_sg_req *);
191int get_vif_cnt(struct sioc_vif_req *);
192int get_vifs(caddr_t);
193static int add_vif(struct vifctl *);
194static int del_vif(vifi_t *);
195static int add_mfc(struct mfcctl *);
196static int del_mfc(struct delmfcctl *);
197static void cleanup_cache(void *);
198static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *,
199		  struct ip_moptions *);
200extern int (*legal_vif_num)(int);
201static void phyint_send(struct ip *, struct vif *, struct mbuf *);
202static void srcrt_send(struct ip *, struct vif *, struct mbuf *);
203static void encap_send(struct ip *, struct vif *, struct mbuf *);
204void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long,
205		 struct ip_moptions *);
206void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *);
207void tbf_process_q(struct vif *);
208void tbf_dequeue(struct vif *, int);
209void tbf_reprocess_q(void *);
210int tbf_dq_sel(struct vif *, struct ip *);
211void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *);
212void tbf_update_tokens(struct vif *);
213static int priority(struct vif *, struct ip *);
214static int ip_mrouter_init(struct socket *);
215
216/*
217 * A simple hash function: returns MFCHASHMOD of the low-order octet of
218 * the argument's network or subnet number and the multicast group assoc.
219 */
220static u_long
221nethash_fc(m,n)
222    register u_long m;
223    register u_long n;
224{
225    struct in_addr in1;
226    struct in_addr in2;
227
228    in1.s_addr = m;
229    m = in_netof(in1);
230    while ((m & 0xff) == 0) m >>= 8;
231
232    in2.s_addr = n;
233    n = in_netof(in2);
234    while ((n & 0xff) == 0) n >>= 8;
235
236    return (MFCHASHMOD(m) ^ MFCHASHMOD(n));
237}
238
239/*
240 * this is a direct-mapped cache used to speed the mapping from a
241 * datagram source address to the associated multicast route.  Note
242 * that unlike mrttable, the hash is on IP address, not IP net number.
243 */
244#define MFCHASHSIZ 1024
245#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
246			((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1))
247struct mfc *mfchash[MFCHASHSIZ];
248
249/*
250 * Find a route for a given origin IP address and Multicast group address
251 * Type of service parameter to be added in the future!!!
252 */
253#define MFCFIND(o, g, rt) { \
254	register u_int _mrhasho = o; \
255	register u_int _mrhashg = g; \
256	_mrhasho = MFCHASH(_mrhasho, _mrhashg); \
257	++mrtstat.mrts_mfc_lookups; \
258	rt = mfchash[_mrhasho]; \
259	if ((rt == NULL) || \
260	    ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \
261	     (g != rt->mfc_mcastgrp.s_addr)) \
262	     if ((rt = mfcfind(o, g)) != NULL) \
263		mfchash[_mrhasho] = rt; \
264}
265
266/*
267 * Find route by examining hash table entries
268 */
269static struct mfc *
270mfcfind(origin, mcastgrp)
271    u_long origin;
272    u_long mcastgrp;
273{
274    register struct mbuf *mb_rt;
275    register struct mfc *rt;
276    register u_long hash;
277
278    hash = nethash_fc(origin, mcastgrp);
279    for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
280	rt = mtod(mb_rt, struct mfc *);
281	if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) &&
282	    (mcastgrp == rt->mfc_mcastgrp.s_addr) &&
283	    (mb_rt->m_act == NULL))
284	    return (rt);
285    }
286    mrtstat.mrts_mfc_misses++;
287    return NULL;
288}
289
290/*
291 * Macros to compute elapsed time efficiently
292 * Borrowed from Van Jacobson's scheduling code
293 */
294#define TV_DELTA(a, b, delta) { \
295	    register int xxs; \
296		\
297	    delta = (a).tv_usec - (b).tv_usec; \
298	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
299	       switch (xxs) { \
300		      case 2: \
301			  delta += 1000000; \
302			      /* fall through */ \
303		      case 1: \
304			  delta += 1000000; \
305			  break; \
306		      default: \
307			  delta += (1000000 * xxs); \
308	       } \
309	    } \
310}
311
312#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
313	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
314
315/*
316 * Handle DVMRP setsockopt commands to modify the multicast routing tables.
317 */
318int
319X_ip_mrouter_cmd(cmd, so, m)
320    int cmd;
321    struct socket *so;
322    struct mbuf *m;
323{
324   if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES;
325
326    switch (cmd) {
327	case DVMRP_INIT:     return ip_mrouter_init(so);
328	case DVMRP_DONE:     return ip_mrouter_done();
329	case DVMRP_ADD_VIF:  return add_vif (mtod(m, struct vifctl *));
330	case DVMRP_DEL_VIF:  return del_vif (mtod(m, vifi_t *));
331	case DVMRP_ADD_MFC:  return add_mfc (mtod(m, struct mfcctl *));
332	case DVMRP_DEL_MFC:  return del_mfc (mtod(m, struct delmfcctl *));
333	default:             return EOPNOTSUPP;
334    }
335}
336
337#ifndef MROUTE_LKM
338int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd;
339#endif
340
341/*
342 * Handle ioctl commands to obtain information from the cache
343 */
344int
345X_mrt_ioctl(cmd, data)
346    int cmd;
347    caddr_t data;
348{
349    int error = 0;
350
351    switch (cmd) {
352      case (SIOCGETVIFINF):		/* Read Virtual Interface (m/cast) */
353	  return (get_vifs(data));
354	  break;
355      case (SIOCGETVIFCNT):
356	  return (get_vif_cnt((struct sioc_vif_req *)data));
357	  break;
358      case (SIOCGETSGCNT):
359	  return (get_sg_cnt((struct sioc_sg_req *)data));
360	  break;
361	default:
362	  return (EINVAL);
363	  break;
364    }
365    return error;
366}
367
368#ifndef MROUTE_LKM
369int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl;
370#else
371extern int (*mrt_ioctl)(int, caddr_t, struct proc *);
372#endif
373
374/*
375 * returns the packet count for the source group provided
376 */
377int
378get_sg_cnt(req)
379    register struct sioc_sg_req *req;
380{
381    register struct mfc *rt;
382    int s;
383
384    s = splnet();
385    MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
386    splx(s);
387    if (rt != NULL)
388	req->count = rt->mfc_pkt_cnt;
389    else
390	req->count = 0xffffffff;
391
392    return 0;
393}
394
395/*
396 * returns the input and output packet counts on the interface provided
397 */
398int
399get_vif_cnt(req)
400    register struct sioc_vif_req *req;
401{
402    register vifi_t vifi = req->vifi;
403
404    req->icount = viftable[vifi].v_pkt_in;
405    req->ocount = viftable[vifi].v_pkt_out;
406
407    return 0;
408}
409
410int
411get_vifs(data)
412    char *data;
413{
414    struct vif_conf *vifc = (struct vif_conf *)data;
415    struct vif_req *vifrp, vifr;
416    int space, error=0;
417
418    vifi_t vifi;
419    int s;
420
421    space = vifc->vifc_len;
422    vifrp  = vifc->vifc_req;
423
424    s = splnet();
425    vifc->vifc_num=numvifs;
426
427    for (vifi = 0; vifi <  numvifs; vifi++, vifrp++) {
428	if (viftable[vifi].v_lcl_addr.s_addr != 0) {
429	    vifr.v_flags=viftable[vifi].v_flags;
430	    vifr.v_threshold=viftable[vifi].v_threshold;
431	    vifr.v_lcl_addr=viftable[vifi].v_lcl_addr;
432	    vifr.v_rmt_addr=viftable[vifi].v_rmt_addr;
433	    strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ);
434	    if ((space -= sizeof(vifr)) < 0) {
435		splx(s);
436		return(ENOSPC);
437	    }
438	    error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr));
439	    if (error) {
440		splx(s);
441		return(error);
442	    }
443	}
444    }
445    splx(s);
446    return 0;
447}
448/*
449 * Enable multicast routing
450 */
451static int
452ip_mrouter_init(so)
453	struct socket *so;
454{
455    if (so->so_type != SOCK_RAW ||
456	so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
457
458    if (ip_mrouter != NULL) return EADDRINUSE;
459
460    ip_mrouter = so;
461
462    if (mrtdebug)
463	log(LOG_DEBUG, "ip_mrouter_init");
464
465    return 0;
466}
467
468/*
469 * Disable multicast routing
470 */
471int
472X_ip_mrouter_done()
473{
474    vifi_t vifi;
475    int i;
476    struct ifnet *ifp;
477    struct ifreq ifr;
478    struct mbuf *mb_rt;
479    struct mbuf *m;
480    struct rtdetq *rte;
481    int s;
482
483    s = splnet();
484
485    /*
486     * For each phyint in use, disable promiscuous reception of all IP
487     * multicasts.
488     */
489    for (vifi = 0; vifi < numvifs; vifi++) {
490	if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
491	    !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
492	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
493	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
494								= INADDR_ANY;
495	    ifp = viftable[vifi].v_ifp;
496	    (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
497	}
498    }
499    bzero((caddr_t)qtable, sizeof(qtable));
500    bzero((caddr_t)tbftable, sizeof(tbftable));
501    bzero((caddr_t)viftable, sizeof(viftable));
502    numvifs = 0;
503
504    /*
505     * Check if any outstanding timeouts remain
506     */
507    if (timeout_val != 0)
508	for (i = 0; i < MFCTBLSIZ; i++) {
509	    mb_rt = mfctable[i];
510	    while (mb_rt) {
511		if ( mb_rt->m_act != NULL) {
512		    untimeout(cleanup_cache, (caddr_t)mb_rt);
513		    while (m = mb_rt->m_act) {
514			mb_rt->m_act = m->m_act;
515			rte = mtod(m, struct rtdetq *);
516			m_freem(rte->m);
517			m_free(m);
518		    }
519		    timeout_val--;
520		}
521	    mb_rt = mb_rt->m_next;
522	    }
523	    if (timeout_val == 0)
524		break;
525	}
526
527    /*
528     * Free all multicast forwarding cache entries.
529     */
530    for (i = 0; i < MFCTBLSIZ; i++)
531	m_freem(mfctable[i]);
532
533    bzero((caddr_t)mfctable, sizeof(mfctable));
534    bzero((caddr_t)mfchash, sizeof(mfchash));
535
536    /*
537     * Reset de-encapsulation cache
538     */
539    last_encap_src = NULL;
540    last_encap_vif = NULL;
541
542    ip_mrouter = NULL;
543
544    splx(s);
545
546    if (mrtdebug)
547	log(LOG_DEBUG, "ip_mrouter_done");
548
549    return 0;
550}
551
552#ifndef MROUTE_LKM
553int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
554#endif
555
556/*
557 * Add a vif to the vif table
558 */
559static int
560add_vif(vifcp)
561    register struct vifctl *vifcp;
562{
563    register struct vif *vifp = viftable + vifcp->vifc_vifi;
564    static struct sockaddr_in sin = {AF_INET};
565    struct ifaddr *ifa;
566    struct ifnet *ifp;
567    struct ifreq ifr;
568    int error, s;
569    struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
570
571    if (vifcp->vifc_vifi >= MAXVIFS)  return EINVAL;
572    if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
573
574    /* Find the interface with an address in AF_INET family */
575    sin.sin_addr = vifcp->vifc_lcl_addr;
576    ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
577    if (ifa == 0) return EADDRNOTAVAIL;
578    ifp = ifa->ifa_ifp;
579
580    if (vifcp->vifc_flags & VIFF_TUNNEL) {
581	if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
582	    static int inited = 0;
583	    if(!inited) {
584		for (s = 0; s < MAXVIFS; ++s) {
585		    multicast_decap_if[s].if_name = "mdecap";
586		    multicast_decap_if[s].if_unit = s;
587		}
588		inited = 1;
589	    }
590	    ifp = &multicast_decap_if[vifcp->vifc_vifi];
591	} else {
592	    ifp = 0;
593	}
594    } else {
595	/* Make sure the interface supports multicast */
596	if ((ifp->if_flags & IFF_MULTICAST) == 0)
597	    return EOPNOTSUPP;
598
599	/* Enable promiscuous reception of all IP multicasts from the if */
600	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
601	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
602	s = splnet();
603	error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
604	splx(s);
605	if (error)
606	    return error;
607    }
608
609    s = splnet();
610    /* define parameters for the tbf structure */
611    vifp->v_tbf = v_tbf;
612    vifp->v_tbf->q_len = 0;
613    vifp->v_tbf->n_tok = 0;
614    vifp->v_tbf->last_pkt_t = 0;
615
616    vifp->v_flags     = vifcp->vifc_flags;
617    vifp->v_threshold = vifcp->vifc_threshold;
618    vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
619    vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
620    vifp->v_ifp       = ifp;
621    vifp->v_rate_limit= vifcp->vifc_rate_limit;
622    /* initialize per vif pkt counters */
623    vifp->v_pkt_in    = 0;
624    vifp->v_pkt_out   = 0;
625    splx(s);
626
627    /* Adjust numvifs up if the vifi is higher than numvifs */
628    if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
629
630    if (mrtdebug)
631	log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d",
632	    vifcp->vifc_vifi,
633	    ntohl(vifcp->vifc_lcl_addr.s_addr),
634	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
635	    ntohl(vifcp->vifc_rmt_addr.s_addr),
636	    vifcp->vifc_threshold,
637	    vifcp->vifc_rate_limit);
638
639    return 0;
640}
641
642/*
643 * Delete a vif from the vif table
644 */
645static int
646del_vif(vifip)
647    vifi_t *vifip;
648{
649    register struct vif *vifp = viftable + *vifip;
650    register vifi_t vifi;
651    struct ifnet *ifp;
652    struct ifreq ifr;
653    int s;
654
655    if (*vifip >= numvifs) return EINVAL;
656    if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
657
658    s = splnet();
659
660    if (!(vifp->v_flags & VIFF_TUNNEL)) {
661	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
662	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
663	ifp = vifp->v_ifp;
664	(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
665    }
666
667    if (vifp == last_encap_vif) {
668	last_encap_vif = 0;
669	last_encap_src = 0;
670    }
671
672    bzero((caddr_t)qtable[*vifip],
673	  sizeof(qtable[*vifip]));
674    bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
675    bzero((caddr_t)vifp, sizeof (*vifp));
676
677    /* Adjust numvifs down */
678    for (vifi = numvifs; vifi > 0; vifi--)
679	if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
680    numvifs = vifi;
681
682    splx(s);
683
684    if (mrtdebug)
685      log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs);
686
687    return 0;
688}
689
690/*
691 * Add an mfc entry
692 */
693static int
694add_mfc(mfccp)
695    struct mfcctl *mfccp;
696{
697    struct mfc *rt;
698    struct mfc *rt1;
699    register struct mbuf *mb_rt;
700    struct mbuf *prev_mb_rt;
701    u_long hash;
702    struct mbuf *mb_ntry;
703    struct rtdetq *rte;
704    register u_short nstl;
705    int s;
706    int i;
707
708    rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
709
710    /* If an entry already exists, just update the fields */
711    if (rt) {
712	if (mrtdebug)
713	    log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x",
714		ntohl(mfccp->mfcc_origin.s_addr),
715		ntohl(mfccp->mfcc_mcastgrp.s_addr),
716		ntohl(mfccp->mfcc_originmask.s_addr),
717		mfccp->mfcc_parent);
718
719	s = splnet();
720	rt->mfc_parent = mfccp->mfcc_parent;
721	for (i = 0; i < numvifs; i++)
722	    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
723	splx(s);
724	return 0;
725    }
726
727    /*
728     * Find the entry for which the upcall was made and update
729     */
730    s = splnet();
731    hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
732    for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0;
733	 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
734
735	rt = mtod(mb_rt, struct mfc *);
736	if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr)
737	     == mfccp->mfcc_origin.s_addr) &&
738	    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
739	    (mb_rt->m_act != NULL)) {
740
741	    if (!nstl++) {
742		if (mrtdebug)
743		    log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x",
744			ntohl(mfccp->mfcc_origin.s_addr),
745			ntohl(mfccp->mfcc_mcastgrp.s_addr),
746			ntohl(mfccp->mfcc_originmask.s_addr),
747			mfccp->mfcc_parent, mb_rt->m_act);
748
749		rt->mfc_origin     = mfccp->mfcc_origin;
750		rt->mfc_originmask = mfccp->mfcc_originmask;
751		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
752		rt->mfc_parent     = mfccp->mfcc_parent;
753		for (i = 0; i < numvifs; i++)
754		    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
755		/* initialize pkt counters per src-grp */
756		rt->mfc_pkt_cnt    = 0;
757		rt1 = rt;
758	    }
759
760	    /* prevent cleanup of cache entry */
761	    untimeout(cleanup_cache, (caddr_t)mb_rt);
762	    timeout_val--;
763
764	    /* free packets Qed at the end of this entry */
765	    while (mb_rt->m_act) {
766		mb_ntry = mb_rt->m_act;
767		rte = mtod(mb_ntry, struct rtdetq *);
768		ip_mdq(rte->m, rte->ifp, rte->tunnel_src,
769		       rt1, rte->imo);
770		mb_rt->m_act = mb_ntry->m_act;
771		m_freem(rte->m);
772		m_free(mb_ntry);
773	    }
774
775	    /*
776	     * If more than one entry was created for a single upcall
777	     * delete that entry
778	     */
779	    if (nstl > 1) {
780		MFREE(mb_rt, prev_mb_rt->m_next);
781		mb_rt = prev_mb_rt;
782	    }
783	}
784    }
785
786    /*
787     * It is possible that an entry is being inserted without an upcall
788     */
789    if (nstl == 0) {
790	if (mrtdebug)
791	    log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x",
792		hash, ntohl(mfccp->mfcc_origin.s_addr),
793		ntohl(mfccp->mfcc_mcastgrp.s_addr),
794		ntohl(mfccp->mfcc_originmask.s_addr),
795		mfccp->mfcc_parent);
796
797	for (prev_mb_rt = mb_rt = mfctable[hash];
798	     mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
799
800	    rt = mtod(mb_rt, struct mfc *);
801	    if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr)
802		 == mfccp->mfcc_origin.s_addr) &&
803		(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
804
805		rt->mfc_origin     = mfccp->mfcc_origin;
806		rt->mfc_originmask = mfccp->mfcc_originmask;
807		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
808		rt->mfc_parent     = mfccp->mfcc_parent;
809		for (i = 0; i < numvifs; i++)
810		    VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
811		/* initialize pkt counters per src-grp */
812		rt->mfc_pkt_cnt    = 0;
813	    }
814	}
815	if (mb_rt == NULL) {
816	    /* no upcall, so make a new entry */
817	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
818	    if (mb_rt == NULL) {
819		splx(s);
820		return ENOBUFS;
821	    }
822
823	    rt = mtod(mb_rt, struct mfc *);
824
825	    /* insert new entry at head of hash chain */
826	    rt->mfc_origin     = mfccp->mfcc_origin;
827	    rt->mfc_originmask = mfccp->mfcc_originmask;
828	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
829	    rt->mfc_parent     = mfccp->mfcc_parent;
830	    for (i = 0; i < numvifs; i++)
831		VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]);
832	    /* initialize pkt counters per src-grp */
833	    rt->mfc_pkt_cnt    = 0;
834
835	    /* link into table */
836	    mb_rt->m_next  = mfctable[hash];
837	    mfctable[hash] = mb_rt;
838	    mb_rt->m_act = NULL;
839	}
840    }
841    splx(s);
842    return 0;
843}
844
845/*
846 * Delete an mfc entry
847 */
848static int
849del_mfc(mfccp)
850    struct delmfcctl *mfccp;
851{
852    struct in_addr 	origin;
853    struct in_addr 	mcastgrp;
854    struct mfc 		*rt;
855    struct mbuf 	*mb_rt;
856    struct mbuf 	*prev_mb_rt;
857    u_long 		hash;
858    struct mfc 		**cmfc;
859    struct mfc 		**cmfcend;
860    int s, i;
861
862    origin = mfccp->mfcc_origin;
863    mcastgrp = mfccp->mfcc_mcastgrp;
864    hash = nethash_fc(origin.s_addr, mcastgrp.s_addr);
865
866    if (mrtdebug)
867	log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x",
868	    ntohl(origin.s_addr), ntohl(mcastgrp.s_addr));
869
870    for (prev_mb_rt = mb_rt = mfctable[hash]
871	 ; mb_rt
872	 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) {
873        rt = mtod(mb_rt, struct mfc *);
874	if (origin.s_addr == rt->mfc_origin.s_addr &&
875	    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
876	    mb_rt->m_act == NULL)
877	    break;
878    }
879    if (mb_rt == NULL) {
880	return ESRCH;
881    }
882
883    s = splnet();
884
885    cmfc = mfchash;
886    cmfcend = cmfc + MFCHASHSIZ;
887    for ( ; cmfc < cmfcend; ++cmfc)
888	if (*cmfc == rt)
889	    *cmfc = 0;
890
891    if (prev_mb_rt != mb_rt) {	/* if moved past head of list */
892	MFREE(mb_rt, prev_mb_rt->m_next);
893    } else			/* delete head of list, it is in the table */
894        mfctable[hash] = m_free(mb_rt);
895
896    splx(s);
897
898    return 0;
899}
900
901/*
902 * IP multicast forwarding function. This function assumes that the packet
903 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
904 * pointed to by "ifp", and the packet is to be relayed to other networks
905 * that have members of the packet's destination IP multicast group.
906 *
907 * The packet is returned unscathed to the caller, unless it is tunneled
908 * or erroneous, in which case a non-zero return value tells the caller to
909 * discard it.
910 */
911
912#define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
913#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
914
915int
916X_ip_mforward(ip, ifp, m, imo)
917    register struct ip *ip;
918    struct ifnet *ifp;
919    struct mbuf *m;
920    struct ip_moptions *imo;
921{
922    register struct mfc *rt;
923    register struct vif *vifp;
924    register u_char *ipoptions;
925    u_long tunnel_src;
926    static struct sockproto	k_igmpproto 	= { AF_INET, IPPROTO_IGMP };
927    static struct sockaddr_in 	k_igmpsrc	= { AF_INET };
928    static struct sockaddr_in 	k_igmpdst 	= { AF_INET };
929    register struct mbuf *mm;
930    register struct mbuf *mn;
931    register struct ip *k_data;
932    int s;
933
934    if (mrtdebug > 1)
935	log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x",
936	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
937
938    if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
939	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
940	/*
941	 * Packet arrived via a physical interface.
942	 */
943	tunnel_src = 0;
944    } else {
945	/*
946	 * Packet arrived through a source-route tunnel.
947	 *
948	 * A source-route tunneled packet has a single NOP option and a
949	 * two-element
950	 * loose-source-and-record-route (LSRR) option immediately following
951	 * the fixed-size part of the IP header.  At this point in processing,
952	 * the IP header should contain the following IP addresses:
953	 *
954	 *	original source          - in the source address field
955	 *	destination group        - in the destination address field
956	 *	remote tunnel end-point  - in the first  element of LSRR
957	 *	one of this host's addrs - in the second element of LSRR
958	 *
959	 * NOTE: RFC-1075 would have the original source and remote tunnel
960	 *	 end-point addresses swapped.  However, that could cause
961	 *	 delivery of ICMP error messages to innocent applications
962	 *	 on intermediate routing hosts!  Therefore, we hereby
963	 *	 change the spec.
964	 */
965
966	/*
967	 * Verify that the tunnel options are well-formed.
968	 */
969	if (ipoptions[0] != IPOPT_NOP ||
970	    ipoptions[2] != 11 ||	/* LSRR option length   */
971	    ipoptions[3] != 12 ||	/* LSRR address pointer */
972	    (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) {
973	    mrtstat.mrts_bad_tunnel++;
974	    if (mrtdebug)
975		log(LOG_DEBUG,
976		    "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)",
977		    ntohl(ip->ip_src.s_addr),
978		    ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3],
979		    *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8]));
980	    return 1;
981	}
982
983	/*
984	 * Delete the tunnel options from the packet.
985	 */
986	ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions,
987		(unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN)));
988	m->m_len   -= TUNNEL_LEN;
989	ip->ip_len -= TUNNEL_LEN;
990	ip->ip_hl  -= TUNNEL_LEN >> 2;
991
992	ifp = 0;
993    }
994
995    /*
996     * Don't forward a packet with time-to-live of zero or one,
997     * or a packet destined to a local-only group.
998     */
999    if (ip->ip_ttl <= 1 ||
1000	ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
1001	return (int)tunnel_src;
1002
1003    /*
1004     * Determine forwarding vifs from the forwarding cache table
1005     */
1006    s = splnet();
1007    MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1008
1009    /* Entry exists, so forward if necessary */
1010    if (rt != NULL) {
1011	splx(s);
1012	return (ip_mdq(m, ifp, tunnel_src, rt, imo));
1013    }
1014
1015    else {
1016	/*
1017	 * If we don't have a route for packet's origin,
1018	 * Make a copy of the packet &
1019	 * send message to routing daemon
1020	 */
1021
1022	register struct mbuf *mb_rt;
1023	register struct mbuf *mb_ntry;
1024	register struct mbuf *mb0;
1025	register struct rtdetq *rte;
1026	register struct mbuf *rte_m;
1027	register u_long hash;
1028	register struct timeval tp;
1029
1030	mrtstat.mrts_no_route++;
1031	if (mrtdebug)
1032	    log(LOG_DEBUG, "ip_mforward: no rte s %x g %x",
1033		ntohl(ip->ip_src.s_addr),
1034		ntohl(ip->ip_dst.s_addr));
1035
1036	/* is there an upcall waiting for this packet? */
1037	hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1038	for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
1039	    rt = mtod(mb_rt, struct mfc *);
1040	    if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) ==
1041		 rt->mfc_origin.s_addr) &&
1042		(ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1043		(mb_rt->m_act != NULL))
1044		break;
1045	}
1046
1047	if (mb_rt == NULL) {
1048	    /* no upcall, so make a new entry */
1049	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
1050	    if (mb_rt == NULL) {
1051		splx(s);
1052		return ENOBUFS;
1053	    }
1054
1055	    rt = mtod(mb_rt, struct mfc *);
1056
1057	    /* insert new entry at head of hash chain */
1058	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
1059	    rt->mfc_originmask.s_addr = (u_long)0xffffffff;
1060	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
1061
1062	    /* link into table */
1063	    hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr);
1064	    mb_rt->m_next  = mfctable[hash];
1065	    mfctable[hash] = mb_rt;
1066	    mb_rt->m_act = NULL;
1067
1068	}
1069
1070	/* determine if q has overflowed */
1071	for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act)
1072	    hash++;
1073
1074	if (hash > MAX_UPQ) {
1075	    mrtstat.mrts_upq_ovflw++;
1076	    splx(s);
1077	    return 0;
1078	}
1079
1080	/* add this packet and timing, ifp info to m_act */
1081	MGET(mb_ntry, M_DONTWAIT, MT_DATA);
1082	if (mb_ntry == NULL) {
1083	    splx(s);
1084	    return ENOBUFS;
1085	}
1086
1087	mb_ntry->m_act = NULL;
1088	rte = mtod(mb_ntry, struct rtdetq *);
1089
1090	mb0 = m_copy(m, 0, M_COPYALL);
1091	if (mb0 == NULL) {
1092	    splx(s);
1093	    return ENOBUFS;
1094	}
1095
1096	rte->m 			= mb0;
1097	rte->ifp 		= ifp;
1098	rte->tunnel_src 	= tunnel_src;
1099	rte->imo		= imo;
1100
1101	rte_m->m_act = mb_ntry;
1102
1103	splx(s);
1104
1105	if (hash == 0) {
1106	    /*
1107	     * Send message to routing daemon to install
1108	     * a route into the kernel table
1109	     */
1110	    k_igmpsrc.sin_addr = ip->ip_src;
1111	    k_igmpdst.sin_addr = ip->ip_dst;
1112
1113	    mm = m_copy(m, 0, M_COPYALL);
1114	    if (mm == NULL) {
1115		splx(s);
1116		return ENOBUFS;
1117	    }
1118
1119	    k_data = mtod(mm, struct ip *);
1120	    k_data->ip_p = 0;
1121
1122	    mrtstat.mrts_upcalls++;
1123
1124	    raw_input(mm, &k_igmpproto,
1125		      (struct sockaddr *)&k_igmpsrc,
1126		      (struct sockaddr *)&k_igmpdst);
1127
1128	    /* set timer to cleanup entry if upcall is lost */
1129	    timeout(cleanup_cache, (caddr_t)mb_rt, 100);
1130	    timeout_val++;
1131	}
1132
1133	return 0;
1134    }
1135}
1136
1137#ifndef MROUTE_LKM
1138int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
1139		   struct ip_moptions *) = X_ip_mforward;
1140#endif
1141
1142/*
1143 * Clean up the cache entry if upcall is not serviced
1144 */
1145static void
1146cleanup_cache(xmb_rt)
1147	void *xmb_rt;
1148{
1149    struct mbuf *mb_rt = xmb_rt;
1150    struct mfc *rt;
1151    u_long hash;
1152    struct mbuf *prev_m0;
1153    struct mbuf *m0;
1154    struct mbuf *m;
1155    struct rtdetq *rte;
1156    int s;
1157
1158    rt = mtod(mb_rt, struct mfc *);
1159    hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr);
1160
1161    if (mrtdebug)
1162	log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x",
1163	    ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr),
1164	    ntohl(rt->mfc_mcastgrp.s_addr));
1165
1166    mrtstat.mrts_cache_cleanups++;
1167
1168    /*
1169     * determine entry to be cleaned up in cache table
1170     */
1171    s = splnet();
1172    for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next)
1173	if (m0 == mb_rt)
1174	    break;
1175
1176    /*
1177     * drop all the packets
1178     * free the mbuf with the pkt, if, timing info
1179     */
1180    while (mb_rt->m_act) {
1181	m = mb_rt->m_act;
1182	mb_rt->m_act = m->m_act;
1183
1184	rte = mtod(m, struct rtdetq *);
1185	m_freem(rte->m);
1186	m_free(m);
1187    }
1188
1189    /*
1190     * Delete the entry from the cache
1191     */
1192    if (prev_m0 != m0) {	/* if moved past head of list */
1193	MFREE(m0, prev_m0->m_next);
1194    } else			/* delete head of list, it is in the table */
1195	mfctable[hash] = m_free(m0);
1196
1197    timeout_val--;
1198    splx(s);
1199}
1200
1201/*
1202 * Packet forwarding routine once entry in the cache is made
1203 */
1204static int
1205ip_mdq(m, ifp, tunnel_src, rt, imo)
1206    register struct mbuf *m;
1207    register struct ifnet *ifp;
1208    register u_long tunnel_src;
1209    register struct mfc *rt;
1210    register struct ip_moptions *imo;
1211{
1212    register struct ip  *ip = mtod(m, struct ip *);
1213    register vifi_t vifi;
1214    register struct vif *vifp;
1215
1216    /*
1217     * Don't forward if it didn't arrive from the parent vif for its origin.
1218     * Notes: v_ifp is zero for src route tunnels, multicast_decap_if
1219     * for encapsulated tunnels and a real ifnet for non-tunnels so
1220     * the first part of the if catches wrong physical interface or
1221     * tunnel type; v_rmt_addr is zero for non-tunneled packets so
1222     * the 2nd part catches both packets that arrive via a tunnel
1223     * that shouldn't and packets that arrive via the wrong tunnel.
1224     */
1225    vifi = rt->mfc_parent;
1226    if (viftable[vifi].v_ifp != ifp ||
1227	(ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) {
1228	/* came in the wrong interface */
1229	if (mrtdebug)
1230	    log(LOG_DEBUG, "wrong if: ifp %x vifi %d",
1231		ifp, vifi);
1232	++mrtstat.mrts_wrong_if;
1233	return (int)tunnel_src;
1234    }
1235
1236    /* increment the interface and s-g counters */
1237    viftable[vifi].v_pkt_in++;
1238    rt->mfc_pkt_cnt++;
1239
1240    /*
1241     * For each vif, decide if a copy of the packet should be forwarded.
1242     * Forward if:
1243     *		- the ttl exceeds the vif's threshold
1244     *		- there are group members downstream on interface
1245     */
1246#define MC_SEND(ip,vifp,m) {                             \
1247		(vifp)->v_pkt_out++;                     \
1248                if ((vifp)->v_flags & VIFF_SRCRT)        \
1249                    srcrt_send((ip), (vifp), (m));       \
1250                else if ((vifp)->v_flags & VIFF_TUNNEL)  \
1251                    encap_send((ip), (vifp), (m));       \
1252                else                                     \
1253                    phyint_send((ip), (vifp), (m));      \
1254                }
1255
1256/* If no options or the imo_multicast_vif option is 0, don't do this part
1257 */
1258    if ((imo != NULL) &&
1259       (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/)
1260    {
1261        MC_SEND(ip,viftable+vifi,m);
1262        return (1);        /* make sure we are done: No more physical sends */
1263    }
1264
1265    for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1266	if ((rt->mfc_ttls[vifi] > 0) &&
1267	    (ip->ip_ttl > rt->mfc_ttls[vifi]))
1268	    MC_SEND(ip, vifp, m);
1269
1270    return 0;
1271}
1272
1273/* check if a vif number is legal/ok. This is used by ip_output, to export
1274 * numvifs there,
1275 */
1276int
1277X_legal_vif_num(vif)
1278    int vif;
1279{   if (vif>=0 && vif<=numvifs)
1280       return(1);
1281    else
1282       return(0);
1283}
1284
1285#ifndef MROUTE_LKM
1286int (*legal_vif_num)(int) = X_legal_vif_num;
1287#endif
1288
1289static void
1290phyint_send(ip, vifp, m)
1291    struct ip *ip;
1292    struct vif *vifp;
1293    struct mbuf *m;
1294{
1295    register struct mbuf *mb_copy;
1296    register struct mbuf *mopts;
1297    register struct ip_moptions *imo;
1298
1299    if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL)
1300	return;
1301
1302    MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT);
1303    if (imo == NULL) {
1304	m_freem(mb_copy);
1305	return;
1306    }
1307
1308    imo->imo_multicast_ifp  = vifp->v_ifp;
1309    imo->imo_multicast_ttl  = ip->ip_ttl - 1;
1310    imo->imo_multicast_loop = 1;
1311
1312    if (vifp->v_rate_limit <= 0)
1313	tbf_send_packet(vifp, mb_copy, imo);
1314    else
1315	tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len,
1316		    imo);
1317}
1318
1319static void
1320srcrt_send(ip, vifp, m)
1321    struct ip *ip;
1322    struct vif *vifp;
1323    struct mbuf *m;
1324{
1325    struct mbuf *mb_copy, *mb_opts;
1326    register struct ip *ip_copy;
1327    u_char *cp;
1328
1329    /*
1330     * Make sure that adding the tunnel options won't exceed the
1331     * maximum allowed number of option bytes.
1332     */
1333    if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) {
1334	mrtstat.mrts_cant_tunnel++;
1335	if (mrtdebug)
1336	    log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u",
1337		ntohl(ip->ip_src.s_addr));
1338	return;
1339    }
1340
1341    if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL)
1342	return;
1343
1344    ip_copy = mtod(mb_copy, struct ip *);
1345    ip_copy->ip_ttl--;
1346    ip_copy->ip_dst = vifp->v_rmt_addr;	  /* remote tunnel end-point */
1347    /*
1348     * Adjust the ip header length to account for the tunnel options.
1349     */
1350    ip_copy->ip_hl  += TUNNEL_LEN >> 2;
1351    ip_copy->ip_len += TUNNEL_LEN;
1352    MGET(mb_opts, M_DONTWAIT, MT_HEADER);
1353    if (mb_opts == NULL) {
1354	m_freem(mb_copy);
1355	return;
1356    }
1357    /*
1358     * 'Delete' the base ip header from the mb_copy chain
1359     */
1360    mb_copy->m_len -= IP_HDR_LEN;
1361    mb_copy->m_data += IP_HDR_LEN;
1362    /*
1363     * Make mb_opts be the new head of the packet chain.
1364     * Any options of the packet were left in the old packet chain head
1365     */
1366    mb_opts->m_next = mb_copy;
1367    mb_opts->m_data += 16;
1368    mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN;
1369    /*
1370     * Copy the base ip header from the mb_copy chain to the new head mbuf
1371     */
1372    bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), IP_HDR_LEN);
1373    /*
1374     * Add the NOP and LSRR after the base ip header
1375     */
1376    cp = mtod(mb_opts, u_char *) + IP_HDR_LEN;
1377    *cp++ = IPOPT_NOP;
1378    *cp++ = IPOPT_LSRR;
1379    *cp++ = 11; /* LSRR option length */
1380    *cp++ = 8;  /* LSSR pointer to second element */
1381    *(u_long*)cp = vifp->v_lcl_addr.s_addr;	/* local tunnel end-point */
1382    cp += 4;
1383    *(u_long*)cp = ip->ip_dst.s_addr;		/* destination group */
1384
1385    if (vifp->v_rate_limit <= 0)
1386	tbf_send_packet(vifp, mb_opts, 0);
1387    else
1388	tbf_control(vifp, mb_opts,
1389		    mtod(mb_opts, struct ip *), ip_copy->ip_len, 0);
1390}
1391
1392static void
1393encap_send(ip, vifp, m)
1394    register struct ip *ip;
1395    register struct vif *vifp;
1396    register struct mbuf *m;
1397{
1398    register struct mbuf *mb_copy;
1399    register struct ip *ip_copy;
1400    register int i, len = ip->ip_len;
1401
1402    /*
1403     * copy the old packet & pullup it's IP header into the
1404     * new mbuf so we can modify it.  Try to fill the new
1405     * mbuf since if we don't the ethernet driver will.
1406     */
1407    MGET(mb_copy, M_DONTWAIT, MT_DATA);
1408    if (mb_copy == NULL)
1409	return;
1410    mb_copy->m_data += 16;
1411    mb_copy->m_len = sizeof(multicast_encap_iphdr);
1412
1413    if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1414	m_freem(mb_copy);
1415	return;
1416    }
1417    i = MHLEN - M_LEADINGSPACE(mb_copy);
1418    if (i > len)
1419	i = len;
1420    mb_copy = m_pullup(mb_copy, i);
1421    if (mb_copy == NULL)
1422	return;
1423
1424    /*
1425     * fill in the encapsulating IP header.
1426     */
1427    ip_copy = mtod(mb_copy, struct ip *);
1428    *ip_copy = multicast_encap_iphdr;
1429    ip_copy->ip_id = htons(ip_id++);
1430    ip_copy->ip_len += len;
1431    ip_copy->ip_src = vifp->v_lcl_addr;
1432    ip_copy->ip_dst = vifp->v_rmt_addr;
1433
1434    /*
1435     * turn the encapsulated IP header back into a valid one.
1436     */
1437    ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1438    --ip->ip_ttl;
1439    HTONS(ip->ip_len);
1440    HTONS(ip->ip_off);
1441    ip->ip_sum = 0;
1442#if defined(LBL) && !defined(ultrix)
1443    ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0);
1444#else
1445    mb_copy->m_data += sizeof(multicast_encap_iphdr);
1446    ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1447    mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1448#endif
1449
1450    if (vifp->v_rate_limit <= 0)
1451	tbf_send_packet(vifp, mb_copy, 0);
1452    else
1453	tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0);
1454}
1455
1456/*
1457 * De-encapsulate a packet and feed it back through ip input (this
1458 * routine is called whenever IP gets a packet with proto type
1459 * ENCAP_PROTO and a local destination address).
1460 */
1461void
1462#ifdef MROUTE_LKM
1463X_multiencap_decap(m)
1464#else
1465multiencap_decap(m)
1466#endif
1467    register struct mbuf *m;
1468{
1469    struct ifnet *ifp = m->m_pkthdr.rcvif;
1470    register struct ip *ip = mtod(m, struct ip *);
1471    register int hlen = ip->ip_hl << 2;
1472    register int s;
1473    register struct ifqueue *ifq;
1474    register struct vif *vifp;
1475
1476    if (ip->ip_p != ENCAP_PROTO) {
1477    	rip_input(m);
1478	return;
1479    }
1480    /*
1481     * dump the packet if it's not to a multicast destination or if
1482     * we don't have an encapsulating tunnel with the source.
1483     * Note:  This code assumes that the remote site IP address
1484     * uniquely identifies the tunnel (i.e., that this site has
1485     * at most one tunnel with the remote site).
1486     */
1487    if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
1488	++mrtstat.mrts_bad_tunnel;
1489	m_freem(m);
1490	return;
1491    }
1492    if (ip->ip_src.s_addr != last_encap_src) {
1493	register struct vif *vife;
1494
1495	vifp = viftable;
1496	vife = vifp + numvifs;
1497	last_encap_src = ip->ip_src.s_addr;
1498	last_encap_vif = 0;
1499	for ( ; vifp < vife; ++vifp)
1500	    if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
1501		if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
1502		    == VIFF_TUNNEL)
1503		    last_encap_vif = vifp;
1504		break;
1505	    }
1506    }
1507    if ((vifp = last_encap_vif) == 0) {
1508	last_encap_src = 0;
1509	mrtstat.mrts_cant_tunnel++; /*XXX*/
1510	m_freem(m);
1511	if (mrtdebug)
1512	    log(LOG_DEBUG, "ip_mforward: no tunnel with %u",
1513		ntohl(ip->ip_src.s_addr));
1514	return;
1515    }
1516    ifp = vifp->v_ifp;
1517    hlen -= sizeof(struct ifnet *);
1518    m->m_data += hlen;
1519    m->m_len -= hlen;
1520    *(mtod(m, struct ifnet **)) = ifp;
1521    ifq = &ipintrq;
1522    s = splimp();
1523    if (IF_QFULL(ifq)) {
1524	IF_DROP(ifq);
1525	m_freem(m);
1526    } else {
1527	IF_ENQUEUE(ifq, m);
1528	/*
1529	 * normally we would need a "schednetisr(NETISR_IP)"
1530	 * here but we were called by ip_input and it is going
1531	 * to loop back & try to dequeue the packet we just
1532	 * queued as soon as we return so we avoid the
1533	 * unnecessary software interrrupt.
1534	 */
1535    }
1536    splx(s);
1537}
1538
1539/*
1540 * Token bucket filter module
1541 */
1542void
1543tbf_control(vifp, m, ip, p_len, imo)
1544	register struct vif *vifp;
1545	register struct mbuf *m;
1546	register struct ip *ip;
1547	register u_long p_len;
1548	struct ip_moptions *imo;
1549{
1550    tbf_update_tokens(vifp);
1551
1552    /* if there are enough tokens,
1553     * and the queue is empty,
1554     * send this packet out
1555     */
1556
1557    if (vifp->v_tbf->q_len == 0) {
1558	if (p_len <= vifp->v_tbf->n_tok) {
1559	    vifp->v_tbf->n_tok -= p_len;
1560	    tbf_send_packet(vifp, m, imo);
1561	} else if (p_len > MAX_BKT_SIZE) {
1562	    /* drop if packet is too large */
1563	    mrtstat.mrts_pkt2large++;
1564	    m_freem(m);
1565	    return;
1566	} else {
1567	    /* queue packet and timeout till later */
1568	    tbf_queue(vifp, m, ip, imo);
1569	    timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1570	}
1571    } else if (vifp->v_tbf->q_len < MAXQSIZE) {
1572	/* finite queue length, so queue pkts and process queue */
1573	tbf_queue(vifp, m, ip, imo);
1574	tbf_process_q(vifp);
1575    } else {
1576	/* queue length too much, try to dq and queue and process */
1577	if (!tbf_dq_sel(vifp, ip)) {
1578	    mrtstat.mrts_q_overflow++;
1579	    m_freem(m);
1580	    return;
1581	} else {
1582	    tbf_queue(vifp, m, ip, imo);
1583	    tbf_process_q(vifp);
1584	}
1585    }
1586    return;
1587}
1588
1589/*
1590 * adds a packet to the queue at the interface
1591 */
1592void
1593tbf_queue(vifp, m, ip, imo)
1594	register struct vif *vifp;
1595	register struct mbuf *m;
1596	register struct ip *ip;
1597	struct ip_moptions *imo;
1598{
1599    register u_long ql;
1600    register int index = (vifp - viftable);
1601    register int s = splnet();
1602
1603    ql = vifp->v_tbf->q_len;
1604
1605    qtable[index][ql].pkt_m = m;
1606    qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len;
1607    qtable[index][ql].pkt_ip = ip;
1608    qtable[index][ql].pkt_imo = imo;
1609
1610    vifp->v_tbf->q_len++;
1611    splx(s);
1612}
1613
1614
1615/*
1616 * processes the queue at the interface
1617 */
1618void
1619tbf_process_q(vifp)
1620    register struct vif *vifp;
1621{
1622    register struct mbuf *m;
1623    register struct pkt_queue pkt_1;
1624    register int index = (vifp - viftable);
1625    register int s = splnet();
1626
1627    /* loop through the queue at the interface and send as many packets
1628     * as possible
1629     */
1630    while (vifp->v_tbf->q_len > 0) {
1631	/* locate the first packet */
1632	pkt_1.pkt_len = ((qtable[index][0]).pkt_len);
1633	pkt_1.pkt_m   = (qtable[index][0]).pkt_m;
1634	pkt_1.pkt_ip   = (qtable[index][0]).pkt_ip;
1635	pkt_1.pkt_imo = (qtable[index][0]).pkt_imo;
1636
1637	/* determine if the packet can be sent */
1638	if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) {
1639	    /* if so,
1640	     * reduce no of tokens, dequeue the queue,
1641	     * send the packet.
1642	     */
1643	    vifp->v_tbf->n_tok -= pkt_1.pkt_len;
1644
1645	    tbf_dequeue(vifp, 0);
1646
1647	    tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo);
1648
1649	} else break;
1650    }
1651    splx(s);
1652}
1653
1654/*
1655 * removes the jth packet from the queue at the interface
1656 */
1657void
1658tbf_dequeue(vifp,j)
1659    register struct vif *vifp;
1660    register int j;
1661{
1662    register u_long index = vifp - viftable;
1663    register int i;
1664
1665    for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) {
1666	qtable[index][i-1].pkt_m   = qtable[index][i].pkt_m;
1667	qtable[index][i-1].pkt_len = qtable[index][i].pkt_len;
1668	qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip;
1669	qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo;
1670    }
1671    qtable[index][i-1].pkt_m = NULL;
1672    qtable[index][i-1].pkt_len = NULL;
1673    qtable[index][i-1].pkt_ip = NULL;
1674    qtable[index][i-1].pkt_imo = NULL;
1675
1676    vifp->v_tbf->q_len--;
1677
1678    if (tbfdebug > 1)
1679	log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1);
1680}
1681
1682void
1683tbf_reprocess_q(xvifp)
1684	void *xvifp;
1685{
1686    register struct vif *vifp = xvifp;
1687    if (ip_mrouter == NULL)
1688	return;
1689
1690    tbf_update_tokens(vifp);
1691
1692    tbf_process_q(vifp);
1693
1694    if (vifp->v_tbf->q_len)
1695	timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1696}
1697
1698/* function that will selectively discard a member of the queue
1699 * based on the precedence value and the priority obtained through
1700 * a lookup table - not yet implemented accurately!
1701 */
1702int
1703tbf_dq_sel(vifp, ip)
1704    register struct vif *vifp;
1705    register struct ip *ip;
1706{
1707    register int i;
1708    register int s = splnet();
1709    register u_int p;
1710
1711    p = priority(vifp, ip);
1712
1713    for(i=vifp->v_tbf->q_len-1;i >= 0;i--) {
1714	if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) {
1715	    m_freem(qtable[vifp-viftable][i].pkt_m);
1716	    tbf_dequeue(vifp,i);
1717	    splx(s);
1718	    mrtstat.mrts_drop_sel++;
1719	    return(1);
1720	}
1721    }
1722    splx(s);
1723    return(0);
1724}
1725
1726void
1727tbf_send_packet(vifp, m, imo)
1728    register struct vif *vifp;
1729    register struct mbuf *m;
1730    struct ip_moptions *imo;
1731{
1732    register struct mbuf *mcp;
1733    int error;
1734    int s = splnet();
1735
1736    /* if source route tunnels */
1737    if (vifp->v_flags & VIFF_SRCRT) {
1738	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1739			  IP_FORWARDING, imo);
1740	if (mrtdebug > 1)
1741	    log(LOG_DEBUG, "srcrt_send on vif %d err %d", vifp-viftable, error);
1742    } else if (vifp->v_flags & VIFF_TUNNEL) {
1743	/* If tunnel options */
1744	ip_output(m, (struct mbuf *)0, (struct route *)0,
1745		  IP_FORWARDING, imo);
1746    } else {
1747	/* if physical interface option, extract the options and then send */
1748	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1749			  IP_FORWARDING, imo);
1750	FREE(imo, M_IPMOPTS);
1751
1752	if (mrtdebug > 1)
1753	    log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error);
1754    }
1755    splx(s);
1756}
1757
1758/* determine the current time and then
1759 * the elapsed time (between the last time and time now)
1760 * in milliseconds & update the no. of tokens in the bucket
1761 */
1762void
1763tbf_update_tokens(vifp)
1764    register struct vif *vifp;
1765{
1766    struct timeval tp;
1767    register u_long t;
1768    register u_long elapsed;
1769    register int s = splnet();
1770
1771    GET_TIME(tp);
1772
1773    t = tp.tv_sec*1000 + tp.tv_usec/1000;
1774
1775    elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8;
1776    vifp->v_tbf->n_tok += elapsed;
1777    vifp->v_tbf->last_pkt_t = t;
1778
1779    if (vifp->v_tbf->n_tok > MAX_BKT_SIZE)
1780	vifp->v_tbf->n_tok = MAX_BKT_SIZE;
1781
1782    splx(s);
1783}
1784
1785static int
1786priority(vifp, ip)
1787    register struct vif *vifp;
1788    register struct ip *ip;
1789{
1790    register u_long graddr;
1791    register int prio;
1792
1793    /* temporary hack; will add general packet classifier some day */
1794
1795    prio = 50;  /* default priority */
1796
1797    /* check for source route options and add option length to get dst */
1798    if (vifp->v_flags & VIFF_SRCRT)
1799	graddr = ntohl((ip+8)->ip_dst.s_addr);
1800    else
1801	graddr = ntohl(ip->ip_dst.s_addr);
1802
1803    switch (graddr & 0xf) {
1804	case 0x0: break;
1805	case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */
1806		  break;
1807	case 0x2: break;
1808	case 0x3: break;
1809	case 0x4: break;
1810	case 0x5: break;
1811	case 0x6: break;
1812	case 0x7: break;
1813	case 0x8: break;
1814	case 0x9: break;
1815	case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */
1816		  break;
1817	case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */
1818		  break;
1819	case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */
1820		  break;
1821	case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */
1822		  break;
1823	case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */
1824		  break;
1825	case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */
1826		  break;
1827    }
1828
1829    if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d", graddr, prio);
1830
1831    return prio;
1832}
1833
1834/*
1835 * End of token bucket filter modifications
1836 */
1837
1838#ifdef MROUTE_LKM
1839#include <sys/conf.h>
1840#include <sys/exec.h>
1841#include <sys/sysent.h>
1842#include <sys/lkm.h>
1843
1844MOD_MISC("ip_mroute_mod")
1845
1846static int
1847ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
1848{
1849	int i;
1850	struct lkm_misc	*args = lkmtp->private.lkm_misc;
1851	int err = 0;
1852
1853	switch(cmd) {
1854		static int (*old_ip_mrouter_cmd)();
1855		static int (*old_ip_mrouter_done)();
1856		static int (*old_ip_mforward)();
1857		static int (*old_mrt_ioctl)();
1858		static int (*old_proto4_input)();
1859		static int (*old_legal_vif_num)();
1860		extern u_char ip_protox[];
1861		extern struct protosw inetsw[];
1862
1863	case LKM_E_LOAD:
1864		if(lkmexists(lkmtp) || ip_mrtproto)
1865		  return(EEXIST);
1866		old_ip_mrouter_cmd = ip_mrouter_cmd;
1867		ip_mrouter_cmd = X_ip_mrouter_cmd;
1868		old_ip_mrouter_done = ip_mrouter_done;
1869		ip_mrouter_done = X_ip_mrouter_done;
1870		old_ip_mforward = ip_mforward;
1871		ip_mforward = X_ip_mforward;
1872		old_mrt_ioctl = mrt_ioctl;
1873		mrt_ioctl = X_mrt_ioctl;
1874		old_proto4_input = inetsw[ip_protox[IPPROTO_ENCAP]].pr_input;
1875		inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = X_multiencap_decap;
1876		old_legal_vif_num = legal_vif_num;
1877		legal_vif_num = X_legal_vif_num;
1878		ip_mrtproto = IGMP_DVMRP;
1879
1880		printf("\nIP multicast routing loaded\n");
1881		break;
1882
1883	case LKM_E_UNLOAD:
1884		if (ip_mrouter)
1885		  return EINVAL;
1886
1887		ip_mrouter_cmd = old_ip_mrouter_cmd;
1888		ip_mrouter_done = old_ip_mrouter_done;
1889		ip_mforward = old_ip_mforward;
1890		mrt_ioctl = old_mrt_ioctl;
1891		inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = old_proto4_input;
1892		legal_vif_num = old_legal_vif_num;
1893		ip_mrtproto = 0;
1894		break;
1895
1896	default:
1897		err = EINVAL;
1898		break;
1899	}
1900
1901	return(err);
1902}
1903
1904int
1905ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
1906	DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle,
1907		 nosys);
1908}
1909
1910#endif /* MROUTE_LKM */
1911#endif /* MROUTING */
1912
1913
1914