ip_mroute.c revision 9682
1/*
2 * IP multicast forwarding procedures
3 *
4 * Written by David Waitzman, BBN Labs, August 1988.
5 * Modified by Steve Deering, Stanford, February 1989.
6 * Modified by Mark J. Steiglitz, Stanford, May, 1991
7 * Modified by Van Jacobson, LBL, January 1993
8 * Modified by Ajit Thyagarajan, PARC, August 1993
9 * Modified by Bill Fenner, PARC, April 1995
10 *
11 * MROUTING Revision: 3.5
12 * $Id: ip_mroute.c,v 1.19 1995/06/26 16:15:49 wollman Exp $
13 */
14
15
16#include <sys/param.h>
17#include <sys/systm.h>
18#include <sys/mbuf.h>
19#include <sys/socket.h>
20#include <sys/socketvar.h>
21#include <sys/protosw.h>
22#include <sys/errno.h>
23#include <sys/time.h>
24#include <sys/kernel.h>
25#include <sys/ioctl.h>
26#include <sys/syslog.h>
27#include <sys/queue.h>
28#include <net/if.h>
29#include <net/route.h>
30#include <netinet/in.h>
31#include <netinet/in_systm.h>
32#include <netinet/ip.h>
33#include <netinet/ip_var.h>
34#include <netinet/in_pcb.h>
35#include <netinet/in_var.h>
36#include <netinet/igmp.h>
37#include <netinet/igmp_var.h>
38#include <netinet/ip_mroute.h>
39#include <netinet/udp.h>
40
41#ifndef NTOHL
42#if BYTE_ORDER != BIG_ENDIAN
43#define NTOHL(d) ((d) = ntohl((d)))
44#define NTOHS(d) ((d) = ntohs((u_short)(d)))
45#define HTONL(d) ((d) = htonl((d)))
46#define HTONS(d) ((d) = htons((u_short)(d)))
47#else
48#define NTOHL(d)
49#define NTOHS(d)
50#define HTONL(d)
51#define HTONS(d)
52#endif
53#endif
54
55extern int rsvp_on;
56
57#ifndef MROUTING
58/*
59 * Dummy routines and globals used when multicast routing is not compiled in.
60 */
61
62struct socket  *ip_mrouter  = NULL;
63u_int		ip_mrtproto = 0;
64struct mrtstat	mrtstat;
65u_int		rsvpdebug = 0;
66
67int
68_ip_mrouter_set(cmd, so, m)
69	int cmd;
70	struct socket *so;
71	struct mbuf *m;
72{
73	return(EOPNOTSUPP);
74}
75
76int (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = _ip_mrouter_set;
77
78
79int
80_ip_mrouter_get(cmd, so, m)
81	int cmd;
82	struct socket *so;
83	struct mbuf **m;
84{
85	return(EOPNOTSUPP);
86}
87
88int (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = _ip_mrouter_get;
89
90int
91_ip_mrouter_done()
92{
93	return(0);
94}
95
96int (*ip_mrouter_done)(void) = _ip_mrouter_done;
97
98int
99_ip_mforward(ip, ifp, m, imo)
100	struct ip *ip;
101	struct ifnet *ifp;
102	struct mbuf *m;
103	struct ip_moptions *imo;
104{
105	return(0);
106}
107
108int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
109		   struct ip_moptions *) = _ip_mforward;
110
111int
112_mrt_ioctl(int req, caddr_t data, struct proc *p)
113{
114	return EOPNOTSUPP;
115}
116
117int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
118
119void
120rsvp_input(m, iphlen)		/* XXX must fixup manually */
121	struct mbuf *m;
122	int iphlen;
123{
124    /* Can still get packets with rsvp_on = 0 if there is a local member
125     * of the group to which the RSVP packet is addressed.  But in this
126     * case we want to throw the packet away.
127     */
128    if (!rsvp_on) {
129	m_freem(m);
130	return;
131    }
132
133    if (ip_rsvpd != NULL) {
134	if (rsvpdebug)
135	    printf("rsvp_input: Sending packet up old-style socket\n");
136	rip_input(m);
137	return;
138    }
139    /* Drop the packet */
140    m_freem(m);
141}
142
143void ipip_input(struct mbuf *m) { /* XXX must fixup manually */
144	rip_input(m);
145}
146
147int (*legal_vif_num)(int) = 0;
148
149/*
150 * This should never be called, since IP_MULTICAST_VIF should fail, but
151 * just in case it does get called, the code a little lower in ip_output
152 * will assign the packet a local address.
153 */
154u_long
155_ip_mcast_src(int vifi) { return INADDR_ANY; }
156u_long (*ip_mcast_src)(int) = _ip_mcast_src;
157
158int
159ip_rsvp_vif_init(so, m)
160    struct socket *so;
161    struct mbuf *m;
162{
163    return(EINVAL);
164}
165
166int
167ip_rsvp_vif_done(so, m)
168    struct socket *so;
169    struct mbuf *m;
170{
171    return(EINVAL);
172}
173
174void
175ip_rsvp_force_done(so)
176    struct socket *so;
177{
178    return;
179}
180
181#else /* MROUTING */
182
183#define M_HASCL(m)	((m)->m_flags & M_EXT)
184
185#define INSIZ		sizeof(struct in_addr)
186#define	same(a1, a2) \
187	(bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
188
189#define MT_MRTABLE MT_RTABLE	/* since nothing else uses it */
190
191/*
192 * Globals.  All but ip_mrouter and ip_mrtproto could be static,
193 * except for netstat or debugging purposes.
194 */
195#ifndef MROUTE_LKM
196struct socket  *ip_mrouter  = NULL;
197struct mrtstat	mrtstat;
198
199int		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */
200#else /* MROUTE_LKM */
201extern struct mrtstat mrtstat;
202extern int ip_mrtproto;
203#endif
204
205#define NO_RTE_FOUND 	0x1
206#define RTE_FOUND	0x2
207
208struct mbuf    *mfctable[MFCTBLSIZ];
209u_char		nexpire[MFCTBLSIZ];
210struct vif	viftable[MAXVIFS];
211u_int		mrtdebug = 0;	  /* debug level 	*/
212#define		DEBUG_MFC	0x02
213#define		DEBUG_FORWARD	0x04
214#define		DEBUG_EXPIRE	0x08
215#define		DEBUG_XMIT	0x10
216u_int       	tbfdebug = 0;     /* tbf debug level 	*/
217u_int		rsvpdebug = 0;	  /* rsvp debug level   */
218
219#define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
220#define		UPCALL_EXPIRE	6		/* number of timeouts	*/
221
222/*
223 * Define the token bucket filter structures
224 * tbftable -> each vif has one of these for storing info
225 * qtable   -> each interface has an associated queue of pkts
226 */
227
228struct tbf tbftable[MAXVIFS];
229struct pkt_queue qtable[MAXVIFS][MAXQSIZE];
230
231/*
232 * 'Interfaces' associated with decapsulator (so we can tell
233 * packets that went through it from ones that get reflected
234 * by a broken gateway).  These interfaces are never linked into
235 * the system ifnet list & no routes point to them.  I.e., packets
236 * can't be sent this way.  They only exist as a placeholder for
237 * multicast source verification.
238 */
239struct ifnet multicast_decap_if[MAXVIFS];
240
241#define ENCAP_TTL 64
242#define ENCAP_PROTO IPPROTO_IPIP	/* 4 */
243
244/* prototype IP hdr for encapsulated packets */
245struct ip multicast_encap_iphdr = {
246#if BYTE_ORDER == LITTLE_ENDIAN
247	sizeof(struct ip) >> 2, IPVERSION,
248#else
249	IPVERSION, sizeof(struct ip) >> 2,
250#endif
251	0,				/* tos */
252	sizeof(struct ip),		/* total length */
253	0,				/* id */
254	0,				/* frag offset */
255	ENCAP_TTL, ENCAP_PROTO,
256	0,				/* checksum */
257};
258
259/*
260 * Private variables.
261 */
262static vifi_t	   numvifs = 0;
263static void (*encap_oldrawip)() = 0;
264static int have_encap_tunnel = 0;
265
266/*
267 * one-back cache used by ipip_input to locate a tunnel's vif
268 * given a datagram's src ip address.
269 */
270static u_long last_encap_src;
271static struct vif *last_encap_vif;
272
273static int get_sg_cnt(struct sioc_sg_req *);
274static int get_vif_cnt(struct sioc_vif_req *);
275int ip_mrouter_init(struct socket *, struct mbuf *);
276static int add_vif(struct vifctl *);
277static int del_vif(vifi_t *);
278static int add_mfc(struct mfcctl *);
279static int del_mfc(struct mfcctl *);
280static int get_version(struct mbuf *);
281static int get_assert(struct mbuf *);
282static int set_assert(int *);
283static void expire_upcalls(void *);
284static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *,
285		  vifi_t);
286static void phyint_send(struct ip *, struct vif *, struct mbuf *);
287static void encap_send(struct ip *, struct vif *, struct mbuf *);
288static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long,
289		 struct ip_moptions *);
290static void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *);
291static void tbf_process_q(struct vif *);
292static void tbf_dequeue(struct vif *, int);
293static void tbf_reprocess_q(void *);
294static int tbf_dq_sel(struct vif *, struct ip *);
295static void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *);
296static void tbf_update_tokens(struct vif *);
297static int priority(struct vif *, struct ip *);
298void multiencap_decap(struct mbuf *);
299
300/*
301 * whether or not special PIM assert processing is enabled.
302 */
303static int pim_assert;
304/*
305 * Rate limit for assert notification messages, in usec
306 */
307#define ASSERT_MSG_TIME		3000000
308
309/*
310 * Hash function for a source, group entry
311 */
312#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
313			((g) >> 20) ^ ((g) >> 10) ^ (g))
314
315/*
316 * Find a route for a given origin IP address and Multicast group address
317 * Type of service parameter to be added in the future!!!
318 */
319
320#define MFCFIND(o, g, rt) { \
321	register struct mbuf *_mb_rt = mfctable[MFCHASH(o,g)]; \
322	register struct mfc *_rt = NULL; \
323	rt = NULL; \
324	++mrtstat.mrts_mfc_lookups; \
325	while (_mb_rt) { \
326		_rt = mtod(_mb_rt, struct mfc *); \
327		if ((_rt->mfc_origin.s_addr == o) && \
328		    (_rt->mfc_mcastgrp.s_addr == g) && \
329		    (_mb_rt->m_act == NULL)) { \
330			rt = _rt; \
331			break; \
332		} \
333		_mb_rt = _mb_rt->m_next; \
334	} \
335	if (rt == NULL) { \
336		++mrtstat.mrts_mfc_misses; \
337	} \
338}
339
340
341/*
342 * Macros to compute elapsed time efficiently
343 * Borrowed from Van Jacobson's scheduling code
344 */
345#define TV_DELTA(a, b, delta) { \
346	    register int xxs; \
347		\
348	    delta = (a).tv_usec - (b).tv_usec; \
349	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
350	       switch (xxs) { \
351		      case 2: \
352			  delta += 1000000; \
353			      /* fall through */ \
354		      case 1: \
355			  delta += 1000000; \
356			  break; \
357		      default: \
358			  delta += (1000000 * xxs); \
359	       } \
360	    } \
361}
362
363#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
364	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
365
366#ifdef UPCALL_TIMING
367u_long upcall_data[51];
368static void collate(struct timeval *);
369#endif /* UPCALL_TIMING */
370
371
372/*
373 * Handle MRT setsockopt commands to modify the multicast routing tables.
374 */
375int
376X_ip_mrouter_set(cmd, so, m)
377    int cmd;
378    struct socket *so;
379    struct mbuf *m;
380{
381   if (cmd != MRT_INIT && so != ip_mrouter) return EACCES;
382
383    switch (cmd) {
384	case MRT_INIT:     return ip_mrouter_init(so, m);
385	case MRT_DONE:     return ip_mrouter_done();
386	case MRT_ADD_VIF:  return add_vif (mtod(m, struct vifctl *));
387	case MRT_DEL_VIF:  return del_vif (mtod(m, vifi_t *));
388	case MRT_ADD_MFC:  return add_mfc (mtod(m, struct mfcctl *));
389	case MRT_DEL_MFC:  return del_mfc (mtod(m, struct mfcctl *));
390	case MRT_ASSERT:   return set_assert(mtod(m, int *));
391	default:             return EOPNOTSUPP;
392    }
393}
394
395#ifndef MROUTE_LKM
396int (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = X_ip_mrouter_set;
397#endif
398
399/*
400 * Handle MRT getsockopt commands
401 */
402int
403X_ip_mrouter_get(cmd, so, m)
404    int cmd;
405    struct socket *so;
406    struct mbuf **m;
407{
408    struct mbuf *mb;
409
410    if (so != ip_mrouter) return EACCES;
411
412    *m = mb = m_get(M_WAIT, MT_SOOPTS);
413
414    switch (cmd) {
415	case MRT_VERSION:   return get_version(mb);
416	case MRT_ASSERT:    return get_assert(mb);
417	default:            return EOPNOTSUPP;
418    }
419}
420
421#ifndef MROUTE_LKM
422int (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = X_ip_mrouter_get;
423#endif
424
425/*
426 * Handle ioctl commands to obtain information from the cache
427 */
428int
429X_mrt_ioctl(cmd, data)
430    int cmd;
431    caddr_t data;
432{
433    int error = 0;
434
435    switch (cmd) {
436	case (SIOCGETVIFCNT):
437	    return (get_vif_cnt((struct sioc_vif_req *)data));
438	    break;
439	case (SIOCGETSGCNT):
440	    return (get_sg_cnt((struct sioc_sg_req *)data));
441	    break;
442	default:
443	    return (EINVAL);
444	    break;
445    }
446    return error;
447}
448
449#ifndef MROUTE_LKM
450int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl;
451#endif
452
453/*
454 * returns the packet, byte, rpf-failure count for the source group provided
455 */
456static int
457get_sg_cnt(req)
458    register struct sioc_sg_req *req;
459{
460    register struct mfc *rt;
461    int s;
462
463    s = splnet();
464    MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
465    splx(s);
466    if (rt != NULL) {
467	req->pktcnt = rt->mfc_pkt_cnt;
468	req->bytecnt = rt->mfc_byte_cnt;
469	req->wrong_if = rt->mfc_wrong_if;
470    } else
471	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
472
473    return 0;
474}
475
476/*
477 * returns the input and output packet and byte counts on the vif provided
478 */
479static int
480get_vif_cnt(req)
481    register struct sioc_vif_req *req;
482{
483    register vifi_t vifi = req->vifi;
484
485    if (vifi >= numvifs) return EINVAL;
486
487    req->icount = viftable[vifi].v_pkt_in;
488    req->ocount = viftable[vifi].v_pkt_out;
489    req->ibytes = viftable[vifi].v_bytes_in;
490    req->obytes = viftable[vifi].v_bytes_out;
491
492    return 0;
493}
494
495/*
496 * Enable multicast routing
497 */
498int
499ip_mrouter_init(so, m)
500	struct socket *so;
501	struct mbuf *m;
502{
503    int *v;
504    int i;
505
506    if (mrtdebug)
507	log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d",
508		so->so_type, so->so_proto->pr_protocol);
509
510    if (so->so_type != SOCK_RAW ||
511	so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
512
513    if (!m || (m->m_len != sizeof(int *)))
514	return ENOPROTOOPT;
515
516    v = mtod(m, int *);
517    if (*v != 1)
518	return ENOPROTOOPT;
519
520    if (ip_mrouter != NULL) return EADDRINUSE;
521
522    ip_mrouter = so;
523
524    bzero((caddr_t)mfctable, sizeof(mfctable));
525    bzero((caddr_t)nexpire, sizeof(nexpire));
526
527    pim_assert = 0;
528
529    timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
530
531    if (mrtdebug)
532	log(LOG_DEBUG, "ip_mrouter_init");
533
534    return 0;
535}
536
537/*
538 * Disable multicast routing
539 */
540int
541X_ip_mrouter_done()
542{
543    vifi_t vifi;
544    int i;
545    struct ifnet *ifp;
546    struct ifreq ifr;
547    struct mbuf *mb_rt;
548    struct mfc *rt;
549    struct mbuf *m;
550    struct rtdetq *rte;
551    int s;
552
553    s = splnet();
554
555    /*
556     * For each phyint in use, disable promiscuous reception of all IP
557     * multicasts.
558     */
559    for (vifi = 0; vifi < numvifs; vifi++) {
560	if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
561	    !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
562	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
563	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
564								= INADDR_ANY;
565	    ifp = viftable[vifi].v_ifp;
566	    (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
567	}
568    }
569    bzero((caddr_t)qtable, sizeof(qtable));
570    bzero((caddr_t)tbftable, sizeof(tbftable));
571    bzero((caddr_t)viftable, sizeof(viftable));
572    numvifs = 0;
573    pim_assert = 0;
574
575    untimeout(expire_upcalls, (caddr_t)NULL);
576
577    /*
578     * Free all multicast forwarding cache entries.
579     */
580    for (i = 0; i < MFCTBLSIZ; i++) {
581	mb_rt = mfctable[i];
582	while (mb_rt) {
583	    if (mb_rt->m_act != NULL) {
584		while (mb_rt->m_act) {
585		    m = mb_rt->m_act;
586		    mb_rt->m_act = m->m_act;
587		    rte = mtod(m, struct rtdetq *);
588		    m_freem(rte->m);
589		    m_free(m);
590		}
591	    }
592	    mb_rt = m_free(mb_rt);
593	}
594    }
595
596    bzero((caddr_t)mfctable, sizeof(mfctable));
597
598    /*
599     * Reset de-encapsulation cache
600     */
601    last_encap_src = NULL;
602    last_encap_vif = NULL;
603    have_encap_tunnel = 0;
604
605    ip_mrouter = NULL;
606
607    splx(s);
608
609    if (mrtdebug)
610	log(LOG_DEBUG, "ip_mrouter_done");
611
612    return 0;
613}
614
615#ifndef MROUTE_LKM
616int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
617#endif
618
619static int
620get_version(mb)
621    struct mbuf *mb;
622{
623    int *v;
624
625    v = mtod(mb, int *);
626
627    *v = 0x0305;	/* XXX !!!! */
628    mb->m_len = sizeof(int);
629
630    return 0;
631}
632
633/*
634 * Set PIM assert processing global
635 */
636static int
637set_assert(i)
638    int *i;
639{
640    if ((*i != 1) && (*i != 0))
641	return EINVAL;
642
643    pim_assert = *i;
644
645    return 0;
646}
647
648/*
649 * Get PIM assert processing global
650 */
651static int
652get_assert(m)
653    struct mbuf *m;
654{
655    int *i;
656
657    i = mtod(m, int *);
658
659    *i = pim_assert;
660
661    return 0;
662}
663
664/*
665 * Add a vif to the vif table
666 */
667static int
668add_vif(vifcp)
669    register struct vifctl *vifcp;
670{
671    register struct vif *vifp = viftable + vifcp->vifc_vifi;
672    static struct sockaddr_in sin = {sizeof sin, AF_INET};
673    struct ifaddr *ifa;
674    struct ifnet *ifp;
675    struct ifreq ifr;
676    int error, s;
677    struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
678
679    if (vifcp->vifc_vifi >= MAXVIFS)  return EINVAL;
680    if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
681
682    /* Find the interface with an address in AF_INET family */
683    sin.sin_addr = vifcp->vifc_lcl_addr;
684    ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
685    if (ifa == 0) return EADDRNOTAVAIL;
686    ifp = ifa->ifa_ifp;
687
688    if (vifcp->vifc_flags & VIFF_TUNNEL) {
689	if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
690		/*
691		 * An encapsulating tunnel is wanted.  Tell ipip_input() to
692		 * start paying attention to encapsulated packets.
693		 */
694		if (have_encap_tunnel == 0) {
695			have_encap_tunnel = 1;
696			for (s = 0; s < MAXVIFS; ++s) {
697				multicast_decap_if[s].if_name = "mdecap";
698				multicast_decap_if[s].if_unit = s;
699			}
700		}
701		/*
702		 * Set interface to fake encapsulator interface
703		 */
704		ifp = &multicast_decap_if[vifcp->vifc_vifi];
705		/*
706		 * Prepare cached route entry
707		 */
708		bzero(&vifp->v_route, sizeof(vifp->v_route));
709	} else {
710	    log(LOG_ERR, "Source routed tunnels not supported.");
711	    return EOPNOTSUPP;
712	}
713    } else {
714	/* Make sure the interface supports multicast */
715	if ((ifp->if_flags & IFF_MULTICAST) == 0)
716	    return EOPNOTSUPP;
717
718	/* Enable promiscuous reception of all IP multicasts from the if */
719	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
720	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
721	s = splnet();
722	error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
723	splx(s);
724	if (error)
725	    return error;
726    }
727
728    s = splnet();
729    /* define parameters for the tbf structure */
730    vifp->v_tbf = v_tbf;
731    vifp->v_tbf->q_len = 0;
732    vifp->v_tbf->n_tok = 0;
733    vifp->v_tbf->last_pkt_t = 0;
734
735    vifp->v_flags     = vifcp->vifc_flags;
736    vifp->v_threshold = vifcp->vifc_threshold;
737    vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
738    vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
739    vifp->v_ifp       = ifp;
740    vifp->v_rate_limit= vifcp->vifc_rate_limit;
741    vifp->v_rsvp_on   = 0;
742    vifp->v_rsvpd     = NULL;
743    /* initialize per vif pkt counters */
744    vifp->v_pkt_in    = 0;
745    vifp->v_pkt_out   = 0;
746    vifp->v_bytes_in  = 0;
747    vifp->v_bytes_out = 0;
748    splx(s);
749
750    /* Adjust numvifs up if the vifi is higher than numvifs */
751    if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
752
753    if (mrtdebug)
754	log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d",
755	    vifcp->vifc_vifi,
756	    ntohl(vifcp->vifc_lcl_addr.s_addr),
757	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
758	    ntohl(vifcp->vifc_rmt_addr.s_addr),
759	    vifcp->vifc_threshold,
760	    vifcp->vifc_rate_limit);
761
762    return 0;
763}
764
765/*
766 * Delete a vif from the vif table
767 */
768static int
769del_vif(vifip)
770    vifi_t *vifip;
771{
772    register struct vif *vifp = viftable + *vifip;
773    register vifi_t vifi;
774    struct ifnet *ifp;
775    struct ifreq ifr;
776    int s;
777
778    if (*vifip >= numvifs) return EINVAL;
779    if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
780
781    s = splnet();
782
783    if (!(vifp->v_flags & VIFF_TUNNEL)) {
784	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
785	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
786	ifp = vifp->v_ifp;
787	(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
788    }
789
790    if (vifp == last_encap_vif) {
791	last_encap_vif = 0;
792	last_encap_src = 0;
793    }
794
795    bzero((caddr_t)qtable[*vifip],
796	  sizeof(qtable[*vifip]));
797    bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
798    bzero((caddr_t)vifp, sizeof (*vifp));
799
800    /* Adjust numvifs down */
801    for (vifi = numvifs; vifi > 0; vifi--)
802	if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
803    numvifs = vifi;
804
805    splx(s);
806
807    if (mrtdebug)
808      log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs);
809
810    return 0;
811}
812
813/*
814 * Add an mfc entry
815 */
816static int
817add_mfc(mfccp)
818    struct mfcctl *mfccp;
819{
820    struct mfc *rt;
821    register struct mbuf *mb_rt;
822    u_long hash;
823    struct mbuf *mb_ntry;
824    struct rtdetq *rte;
825    register u_short nstl;
826    int s;
827    int i;
828
829    MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
830
831    /* If an entry already exists, just update the fields */
832    if (rt) {
833	if (mrtdebug & DEBUG_MFC)
834	    log(LOG_DEBUG,"add_mfc update o %x g %x p %x",
835		ntohl(mfccp->mfcc_origin.s_addr),
836		ntohl(mfccp->mfcc_mcastgrp.s_addr),
837		mfccp->mfcc_parent);
838
839	s = splnet();
840	rt->mfc_parent = mfccp->mfcc_parent;
841	for (i = 0; i < numvifs; i++)
842	    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
843	splx(s);
844	return 0;
845    }
846
847    /*
848     * Find the entry for which the upcall was made and update
849     */
850    s = splnet();
851    hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
852    for (mb_rt = mfctable[hash], nstl = 0; mb_rt; mb_rt = mb_rt->m_next) {
853
854	rt = mtod(mb_rt, struct mfc *);
855	if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
856	    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
857	    (mb_rt->m_act != NULL)) {
858
859	    if (nstl++)
860		log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %x",
861		    "multiple kernel entries",
862		    ntohl(mfccp->mfcc_origin.s_addr),
863		    ntohl(mfccp->mfcc_mcastgrp.s_addr),
864		    mfccp->mfcc_parent, mb_rt->m_act);
865
866	    if (mrtdebug & DEBUG_MFC)
867		log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %x",
868		    ntohl(mfccp->mfcc_origin.s_addr),
869		    ntohl(mfccp->mfcc_mcastgrp.s_addr),
870		    mfccp->mfcc_parent, mb_rt->m_act);
871
872	    rt->mfc_origin     = mfccp->mfcc_origin;
873	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
874	    rt->mfc_parent     = mfccp->mfcc_parent;
875	    for (i = 0; i < numvifs; i++)
876		rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
877	    /* initialize pkt counters per src-grp */
878	    rt->mfc_pkt_cnt    = 0;
879	    rt->mfc_byte_cnt   = 0;
880	    rt->mfc_wrong_if   = 0;
881	    rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
882
883	    rt->mfc_expire = 0;	/* Don't clean this guy up */
884	    nexpire[hash]--;
885
886	    /* free packets Qed at the end of this entry */
887	    while (mb_rt->m_act) {
888		mb_ntry = mb_rt->m_act;
889		rte = mtod(mb_ntry, struct rtdetq *);
890/* #ifdef RSVP_ISI */
891		ip_mdq(rte->m, rte->ifp, rt, -1);
892/* #endif */
893		mb_rt->m_act = mb_ntry->m_act;
894		m_freem(rte->m);
895#ifdef UPCALL_TIMING
896		collate(&(rte->t));
897#endif /* UPCALL_TIMING */
898		m_free(mb_ntry);
899	    }
900	}
901    }
902
903    /*
904     * It is possible that an entry is being inserted without an upcall
905     */
906    if (nstl == 0) {
907	if (mrtdebug & DEBUG_MFC)
908	    log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x p %x",
909		hash, ntohl(mfccp->mfcc_origin.s_addr),
910		ntohl(mfccp->mfcc_mcastgrp.s_addr),
911		mfccp->mfcc_parent);
912
913	for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
914
915	    rt = mtod(mb_rt, struct mfc *);
916	    if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
917		(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
918
919		rt->mfc_origin     = mfccp->mfcc_origin;
920		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
921		rt->mfc_parent     = mfccp->mfcc_parent;
922		for (i = 0; i < numvifs; i++)
923		    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
924		/* initialize pkt counters per src-grp */
925		rt->mfc_pkt_cnt    = 0;
926		rt->mfc_byte_cnt   = 0;
927		rt->mfc_wrong_if   = 0;
928		rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
929		if (rt->mfc_expire)
930		    nexpire[hash]--;
931		rt->mfc_expire	   = 0;
932	    }
933	}
934	if (mb_rt == NULL) {
935	    /* no upcall, so make a new entry */
936	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
937	    if (mb_rt == NULL) {
938		splx(s);
939		return ENOBUFS;
940	    }
941
942	    rt = mtod(mb_rt, struct mfc *);
943
944	    /* insert new entry at head of hash chain */
945	    rt->mfc_origin     = mfccp->mfcc_origin;
946	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
947	    rt->mfc_parent     = mfccp->mfcc_parent;
948	    for (i = 0; i < numvifs; i++)
949		    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
950	    /* initialize pkt counters per src-grp */
951	    rt->mfc_pkt_cnt    = 0;
952	    rt->mfc_byte_cnt   = 0;
953	    rt->mfc_wrong_if   = 0;
954	    rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
955	    rt->mfc_expire     = 0;
956
957	    /* link into table */
958	    mb_rt->m_next  = mfctable[hash];
959	    mfctable[hash] = mb_rt;
960	    mb_rt->m_act = NULL;
961	}
962    }
963    splx(s);
964    return 0;
965}
966
967#ifdef UPCALL_TIMING
968/*
969 * collect delay statistics on the upcalls
970 */
971static void collate(t)
972register struct timeval *t;
973{
974    register u_long d;
975    register struct timeval tp;
976    register u_long delta;
977
978    GET_TIME(tp);
979
980    if (TV_LT(*t, tp))
981    {
982	TV_DELTA(tp, *t, delta);
983
984	d = delta >> 10;
985	if (d > 50)
986	    d = 50;
987
988	++upcall_data[d];
989    }
990}
991#endif /* UPCALL_TIMING */
992
993/*
994 * Delete an mfc entry
995 */
996static int
997del_mfc(mfccp)
998    struct mfcctl *mfccp;
999{
1000    struct in_addr 	origin;
1001    struct in_addr 	mcastgrp;
1002    struct mfc 		*rt;
1003    struct mbuf 	*mb_rt;
1004    struct mbuf 	**nptr;
1005    u_long 		hash;
1006    int s, i;
1007
1008    origin = mfccp->mfcc_origin;
1009    mcastgrp = mfccp->mfcc_mcastgrp;
1010    hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
1011
1012    if (mrtdebug & DEBUG_MFC)
1013	log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x",
1014	    ntohl(origin.s_addr), ntohl(mcastgrp.s_addr));
1015
1016    s = splnet();
1017
1018    nptr = &mfctable[hash];
1019    while ((mb_rt = *nptr) != NULL) {
1020        rt = mtod(mb_rt, struct mfc *);
1021	if (origin.s_addr == rt->mfc_origin.s_addr &&
1022	    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
1023	    mb_rt->m_act == NULL)
1024	    break;
1025
1026	nptr = &mb_rt->m_next;
1027    }
1028    if (mb_rt == NULL) {
1029	splx(s);
1030	return EADDRNOTAVAIL;
1031    }
1032
1033    MFREE(mb_rt, *nptr);
1034
1035    splx(s);
1036
1037    return 0;
1038}
1039
1040/*
1041 * Send a message to mrouted on the multicast routing socket
1042 */
1043static int
1044socket_send(s, mm, src)
1045	struct socket *s;
1046	struct mbuf *mm;
1047	struct sockaddr_in *src;
1048{
1049	if (s) {
1050		if (sbappendaddr(&s->so_rcv,
1051				 (struct sockaddr *)src,
1052				 mm, (struct mbuf *)0) != 0) {
1053			sorwakeup(s);
1054			return 0;
1055		}
1056	}
1057	m_freem(mm);
1058	return -1;
1059}
1060
1061/*
1062 * IP multicast forwarding function. This function assumes that the packet
1063 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
1064 * pointed to by "ifp", and the packet is to be relayed to other networks
1065 * that have members of the packet's destination IP multicast group.
1066 *
1067 * The packet is returned unscathed to the caller, unless it is
1068 * erroneous, in which case a non-zero return value tells the caller to
1069 * discard it.
1070 */
1071
1072#define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
1073#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
1074
1075int
1076X_ip_mforward(ip, ifp, m, imo)
1077    register struct ip *ip;
1078    struct ifnet *ifp;
1079    struct mbuf *m;
1080    struct ip_moptions *imo;
1081{
1082    register struct mfc *rt = 0; /* XXX uninit warning */
1083    register u_char *ipoptions;
1084    static struct sockproto	k_igmpproto 	= { AF_INET, IPPROTO_IGMP };
1085    static struct sockaddr_in 	k_igmpsrc	= { sizeof k_igmpsrc, AF_INET };
1086    static int srctun = 0;
1087    register struct mbuf *mm;
1088    int s;
1089    vifi_t vifi;
1090    struct vif *vifp;
1091
1092    if (mrtdebug & DEBUG_FORWARD)
1093	log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x",
1094	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
1095
1096    if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
1097	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
1098	/*
1099	 * Packet arrived via a physical interface or
1100	 * an encapsulated tunnel.
1101	 */
1102    } else {
1103	/*
1104	 * Packet arrived through a source-route tunnel.
1105	 * Source-route tunnels are no longer supported.
1106	 */
1107	if ((srctun++ % 1000) == 0)
1108	    log(LOG_ERR, "ip_mforward: received source-routed packet from %x",
1109		ntohl(ip->ip_src.s_addr));
1110
1111	return 1;
1112    }
1113
1114    if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
1115	if (ip->ip_ttl < 255)
1116		ip->ip_ttl++;	/* compensate for -1 in *_send routines */
1117	if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1118	    vifp = viftable + vifi;
1119	    printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n",
1120		ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi,
1121		(vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1122		vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
1123	}
1124	return (ip_mdq(m, ifp, rt, vifi));
1125    }
1126    if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1127	printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
1128	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr));
1129    }
1130
1131    /*
1132     * Don't forward a packet with time-to-live of zero or one,
1133     * or a packet destined to a local-only group.
1134     */
1135    if (ip->ip_ttl <= 1 ||
1136	ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
1137	return 0;
1138
1139    /*
1140     * Determine forwarding vifs from the forwarding cache table
1141     */
1142    s = splnet();
1143    MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1144
1145    /* Entry exists, so forward if necessary */
1146    if (rt != NULL) {
1147	splx(s);
1148	return (ip_mdq(m, ifp, rt, -1));
1149    } else {
1150	/*
1151	 * If we don't have a route for packet's origin,
1152	 * Make a copy of the packet &
1153	 * send message to routing daemon
1154	 */
1155
1156	register struct mbuf *mb_rt;
1157	register struct mbuf *mb_ntry;
1158	register struct mbuf *mb0;
1159	register struct rtdetq *rte;
1160	register struct mbuf *rte_m;
1161	register u_long hash;
1162	register int npkts;
1163#ifdef UPCALL_TIMING
1164	struct timeval tp;
1165
1166	GET_TIME(tp);
1167#endif
1168
1169	mrtstat.mrts_no_route++;
1170	if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1171	    log(LOG_DEBUG, "ip_mforward: no rte s %x g %x",
1172		ntohl(ip->ip_src.s_addr),
1173		ntohl(ip->ip_dst.s_addr));
1174
1175	/*
1176	 * Allocate mbufs early so that we don't do extra work if we are
1177	 * just going to fail anyway.
1178	 */
1179	MGET(mb_ntry, M_DONTWAIT, MT_DATA);
1180	if (mb_ntry == NULL) {
1181	    splx(s);
1182	    return ENOBUFS;
1183	}
1184	mb0 = m_copy(m, 0, M_COPYALL);
1185	if (mb0 == NULL) {
1186	    m_free(mb_ntry);
1187	    splx(s);
1188	    return ENOBUFS;
1189	}
1190
1191	/* is there an upcall waiting for this packet? */
1192	hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1193	for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
1194	    rt = mtod(mb_rt, struct mfc *);
1195	    if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
1196		(ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1197		(mb_rt->m_act != NULL))
1198		break;
1199	}
1200
1201	if (mb_rt == NULL) {
1202	    int hlen = ip->ip_hl << 2;
1203	    int i;
1204	    struct igmpmsg *im;
1205
1206	    /* no upcall, so make a new entry */
1207	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
1208	    if (mb_rt == NULL) {
1209		m_free(mb_ntry);
1210		m_freem(mb0);
1211		splx(s);
1212		return ENOBUFS;
1213	    }
1214	    /* Make a copy of the header to send to the user level process */
1215	    mm = m_copy(m, 0, hlen);
1216	    if (mm && (M_HASCL(mm) || mm->m_len < hlen))
1217		mm = m_pullup(mm, hlen);
1218	    if (mm == NULL) {
1219		m_free(mb_ntry);
1220		m_freem(mb0);
1221		m_free(mb_rt);
1222		splx(s);
1223		return ENOBUFS;
1224	    }
1225
1226	    /*
1227	     * Send message to routing daemon to install
1228	     * a route into the kernel table
1229	     */
1230	    k_igmpsrc.sin_addr = ip->ip_src;
1231
1232	    im = mtod(mm, struct igmpmsg *);
1233	    im->im_msgtype	= IGMPMSG_NOCACHE;
1234	    im->im_mbz		= 0;
1235
1236	    mrtstat.mrts_upcalls++;
1237
1238	    if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
1239		log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full");
1240		++mrtstat.mrts_upq_sockfull;
1241		m_free(mb_ntry);
1242		m_freem(mb0);
1243		m_free(mb_rt);
1244		splx(s);
1245		return ENOBUFS;
1246	    }
1247
1248	    rt = mtod(mb_rt, struct mfc *);
1249
1250	    /* insert new entry at head of hash chain */
1251	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
1252	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
1253	    rt->mfc_expire	      = UPCALL_EXPIRE;
1254	    nexpire[hash]++;
1255	    for (i = 0; i < numvifs; i++)
1256		rt->mfc_ttls[i] = 0;
1257	    rt->mfc_parent = -1;
1258
1259	    /* link into table */
1260	    mb_rt->m_next  = mfctable[hash];
1261	    mfctable[hash] = mb_rt;
1262	    mb_rt->m_act = NULL;
1263
1264	    rte_m = mb_rt;
1265	} else {
1266	    /* determine if q has overflowed */
1267	    for (rte_m = mb_rt, npkts = 0; rte_m->m_act; rte_m = rte_m->m_act)
1268		npkts++;
1269
1270	    if (npkts > MAX_UPQ) {
1271		mrtstat.mrts_upq_ovflw++;
1272		m_free(mb_ntry);
1273		m_freem(mb0);
1274		splx(s);
1275		return 0;
1276	    }
1277	}
1278
1279	mb_ntry->m_act = NULL;
1280	rte = mtod(mb_ntry, struct rtdetq *);
1281
1282	rte->m 			= mb0;
1283	rte->ifp 		= ifp;
1284#ifdef UPCALL_TIMING
1285	rte->t			= tp;
1286#endif
1287
1288	/* Add this entry to the end of the queue */
1289	rte_m->m_act		= mb_ntry;
1290
1291	splx(s);
1292
1293	return 0;
1294    }
1295}
1296
1297#ifndef MROUTE_LKM
1298int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
1299		   struct ip_moptions *) = X_ip_mforward;
1300#endif
1301
1302/*
1303 * Clean up the cache entry if upcall is not serviced
1304 */
1305static void
1306expire_upcalls(void *unused)
1307{
1308    struct mbuf *mb_rt, *m, **nptr;
1309    struct rtdetq *rte;
1310    struct mfc *mfc;
1311    int i;
1312    int s;
1313
1314    s = splnet();
1315    for (i = 0; i < MFCTBLSIZ; i++) {
1316	if (nexpire[i] == 0)
1317	    continue;
1318	nptr = &mfctable[i];
1319	for (mb_rt = *nptr; mb_rt != NULL; mb_rt = *nptr) {
1320	    mfc = mtod(mb_rt, struct mfc *);
1321
1322	    /*
1323	     * Skip real cache entries
1324	     * Make sure it wasn't marked to not expire (shouldn't happen)
1325	     * If it expires now
1326	     */
1327	    if (mb_rt->m_act != NULL &&
1328	        mfc->mfc_expire != 0 &&
1329		--mfc->mfc_expire == 0) {
1330		if (mrtdebug & DEBUG_EXPIRE)
1331		    log(LOG_DEBUG, "expire_upcalls: expiring (%x %x)",
1332			ntohl(mfc->mfc_origin.s_addr),
1333			ntohl(mfc->mfc_mcastgrp.s_addr));
1334		/*
1335		 * drop all the packets
1336		 * free the mbuf with the pkt, if, timing info
1337		 */
1338		while (mb_rt->m_act) {
1339		    m = mb_rt->m_act;
1340		    mb_rt->m_act = m->m_act;
1341
1342		    rte = mtod(m, struct rtdetq *);
1343		    m_freem(rte->m);
1344		    m_free(m);
1345		}
1346		++mrtstat.mrts_cache_cleanups;
1347		nexpire[i]--;
1348
1349		MFREE(mb_rt, *nptr);
1350	    } else {
1351		nptr = &mb_rt->m_next;
1352	    }
1353	}
1354    }
1355    splx(s);
1356    timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
1357}
1358
1359/*
1360 * Packet forwarding routine once entry in the cache is made
1361 */
1362static int
1363ip_mdq(m, ifp, rt, xmt_vif)
1364    register struct mbuf *m;
1365    register struct ifnet *ifp;
1366    register struct mfc *rt;
1367    register vifi_t xmt_vif;
1368{
1369    register struct ip  *ip = mtod(m, struct ip *);
1370    register vifi_t vifi;
1371    register struct vif *vifp;
1372    register struct mbuf *tmp;
1373    register int plen = ntohs(ip->ip_len);
1374
1375/*
1376 * Macro to send packet on vif.  Since RSVP packets don't get counted on
1377 * input, they shouldn't get counted on output, so statistics keeping is
1378 * seperate.
1379 */
1380#define MC_SEND(ip,vifp,m) {                             \
1381                if ((vifp)->v_flags & VIFF_TUNNEL)  	 \
1382                    encap_send((ip), (vifp), (m));       \
1383                else                                     \
1384                    phyint_send((ip), (vifp), (m));      \
1385}
1386
1387    /*
1388     * If xmt_vif is not -1, send on only the requested vif.
1389     *
1390     * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
1391     */
1392    if (xmt_vif < numvifs) {
1393	MC_SEND(ip, viftable + xmt_vif, m);
1394	return 1;
1395    }
1396
1397    /*
1398     * Don't forward if it didn't arrive from the parent vif for its origin.
1399     */
1400    vifi = rt->mfc_parent;
1401    if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1402	/* came in the wrong interface */
1403	if (mrtdebug & DEBUG_FORWARD)
1404	    log(LOG_DEBUG, "wrong if: ifp %x vifi %d vififp %x",
1405		ifp, vifi, viftable[vifi].v_ifp);
1406	++mrtstat.mrts_wrong_if;
1407	++rt->mfc_wrong_if;
1408	/*
1409	 * If we are doing PIM assert processing, and we are forwarding
1410	 * packets on this interface, and it is a broadcast medium
1411	 * interface (and not a tunnel), send a message to the routing daemon.
1412	 */
1413	if (pim_assert && rt->mfc_ttls[vifi] &&
1414		(ifp->if_flags & IFF_BROADCAST) &&
1415		!(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1416	    struct sockaddr_in k_igmpsrc;
1417	    struct mbuf *mm;
1418	    struct igmpmsg *im;
1419	    int hlen = ip->ip_hl << 2;
1420	    struct timeval now;
1421	    register u_long delta;
1422
1423	    GET_TIME(now);
1424
1425	    TV_DELTA(rt->mfc_last_assert, now, delta);
1426
1427	    if (delta > ASSERT_MSG_TIME) {
1428		mm = m_copy(m, 0, hlen);
1429		if (mm && (M_HASCL(mm) || mm->m_len < hlen))
1430		    mm = m_pullup(mm, hlen);
1431		if (mm == NULL) {
1432		    return ENOBUFS;
1433		}
1434
1435		rt->mfc_last_assert = now;
1436
1437		im = mtod(mm, struct igmpmsg *);
1438		im->im_msgtype	= IGMPMSG_WRONGVIF;
1439		im->im_mbz		= 0;
1440		im->im_vif		= vifi;
1441
1442		k_igmpsrc.sin_addr = im->im_src;
1443
1444		socket_send(ip_mrouter, mm, &k_igmpsrc);
1445	    }
1446	}
1447	return 0;
1448    }
1449
1450    /* If I sourced this packet, it counts as output, else it was input. */
1451    if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
1452	viftable[vifi].v_pkt_out++;
1453	viftable[vifi].v_bytes_out += plen;
1454    } else {
1455	viftable[vifi].v_pkt_in++;
1456	viftable[vifi].v_bytes_in += plen;
1457    }
1458    rt->mfc_pkt_cnt++;
1459    rt->mfc_byte_cnt += plen;
1460
1461    /*
1462     * For each vif, decide if a copy of the packet should be forwarded.
1463     * Forward if:
1464     *		- the ttl exceeds the vif's threshold
1465     *		- there are group members downstream on interface
1466     */
1467    for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1468	if ((rt->mfc_ttls[vifi] > 0) &&
1469	    (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1470	    vifp->v_pkt_out++;
1471	    vifp->v_bytes_out += plen;
1472	    MC_SEND(ip, vifp, m);
1473	}
1474
1475    return 0;
1476}
1477
1478/*
1479 * check if a vif number is legal/ok. This is used by ip_output, to export
1480 * numvifs there,
1481 */
1482int
1483X_legal_vif_num(vif)
1484    int vif;
1485{
1486    if (vif >= 0 && vif < numvifs)
1487       return(1);
1488    else
1489       return(0);
1490}
1491
1492#ifndef MROUTE_LKM
1493int (*legal_vif_num)(int) = X_legal_vif_num;
1494#endif
1495
1496/*
1497 * Return the local address used by this vif
1498 */
1499u_long
1500X_ip_mcast_src(vifi)
1501    int vifi;
1502{
1503    if (vifi >= 0 && vifi < numvifs)
1504	return viftable[vifi].v_lcl_addr.s_addr;
1505    else
1506	return INADDR_ANY;
1507}
1508
1509#ifndef MROUTE_LKM
1510u_long (*ip_mcast_src)(int) = X_ip_mcast_src;
1511#endif
1512
1513static void
1514phyint_send(ip, vifp, m)
1515    struct ip *ip;
1516    struct vif *vifp;
1517    struct mbuf *m;
1518{
1519    register struct mbuf *mb_copy;
1520    register int hlen = ip->ip_hl << 2;
1521    register struct ip_moptions *imo;
1522
1523    /*
1524     * Make a new reference to the packet; make sure that
1525     * the IP header is actually copied, not just referenced,
1526     * so that ip_output() only scribbles on the copy.
1527     */
1528    mb_copy = m_copy(m, 0, M_COPYALL);
1529    if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
1530	mb_copy = m_pullup(mb_copy, hlen);
1531    if (mb_copy == NULL)
1532	return;
1533
1534    MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT);
1535    if (imo == NULL) {
1536	m_freem(mb_copy);
1537	return;
1538    }
1539
1540    imo->imo_multicast_ifp  = vifp->v_ifp;
1541    imo->imo_multicast_ttl  = ip->ip_ttl - 1;
1542    imo->imo_multicast_loop = 1;
1543    imo->imo_multicast_vif  = -1;
1544
1545    if (vifp->v_rate_limit <= 0)
1546	tbf_send_packet(vifp, mb_copy, imo);
1547    else
1548	tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len,
1549		    imo);
1550}
1551
1552static void
1553encap_send(ip, vifp, m)
1554    register struct ip *ip;
1555    register struct vif *vifp;
1556    register struct mbuf *m;
1557{
1558    register struct mbuf *mb_copy;
1559    register struct ip *ip_copy;
1560    int hlen = ip->ip_hl << 2;
1561    register int i, len = ip->ip_len;
1562
1563    /*
1564     * copy the old packet & pullup it's IP header into the
1565     * new mbuf so we can modify it.  Try to fill the new
1566     * mbuf since if we don't the ethernet driver will.
1567     */
1568    MGET(mb_copy, M_DONTWAIT, MT_DATA);
1569    if (mb_copy == NULL)
1570	return;
1571    mb_copy->m_data += 16;
1572    mb_copy->m_len = sizeof(multicast_encap_iphdr);
1573
1574    if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1575	m_freem(mb_copy);
1576	return;
1577    }
1578    i = MHLEN - M_LEADINGSPACE(mb_copy);
1579    if (i > len)
1580	i = len;
1581    mb_copy = m_pullup(mb_copy, i);
1582    if (mb_copy == NULL)
1583	return;
1584    mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
1585
1586    /*
1587     * fill in the encapsulating IP header.
1588     */
1589    ip_copy = mtod(mb_copy, struct ip *);
1590    *ip_copy = multicast_encap_iphdr;
1591    ip_copy->ip_id = htons(ip_id++);
1592    ip_copy->ip_len += len;
1593    ip_copy->ip_src = vifp->v_lcl_addr;
1594    ip_copy->ip_dst = vifp->v_rmt_addr;
1595
1596    /*
1597     * turn the encapsulated IP header back into a valid one.
1598     */
1599    ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1600    --ip->ip_ttl;
1601    HTONS(ip->ip_len);
1602    HTONS(ip->ip_off);
1603    ip->ip_sum = 0;
1604#if defined(LBL) && !defined(ultrix)
1605    ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0);
1606#else
1607    mb_copy->m_data += sizeof(multicast_encap_iphdr);
1608    ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1609    mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1610#endif
1611
1612    if (vifp->v_rate_limit <= 0)
1613	tbf_send_packet(vifp, mb_copy, 0);
1614    else
1615	tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0);
1616}
1617
1618/*
1619 * De-encapsulate a packet and feed it back through ip input (this
1620 * routine is called whenever IP gets a packet with proto type
1621 * ENCAP_PROTO and a local destination address).
1622 */
1623void
1624#ifdef MROUTE_LKM
1625X_ipip_input(m)
1626#else
1627ipip_input(m, iphlen)
1628#endif
1629	register struct mbuf *m;
1630	int iphlen;
1631{
1632    struct ifnet *ifp = m->m_pkthdr.rcvif;
1633    register struct ip *ip = mtod(m, struct ip *);
1634    register int hlen = ip->ip_hl << 2;
1635    register int s;
1636    register struct ifqueue *ifq;
1637    register struct vif *vifp;
1638
1639    if (!have_encap_tunnel) {
1640	    rip_input(m);
1641	    return;
1642    }
1643    /*
1644     * dump the packet if it's not to a multicast destination or if
1645     * we don't have an encapsulating tunnel with the source.
1646     * Note:  This code assumes that the remote site IP address
1647     * uniquely identifies the tunnel (i.e., that this site has
1648     * at most one tunnel with the remote site).
1649     */
1650    if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
1651	++mrtstat.mrts_bad_tunnel;
1652	m_freem(m);
1653	return;
1654    }
1655    if (ip->ip_src.s_addr != last_encap_src) {
1656	register struct vif *vife;
1657
1658	vifp = viftable;
1659	vife = vifp + numvifs;
1660	last_encap_src = ip->ip_src.s_addr;
1661	last_encap_vif = 0;
1662	for ( ; vifp < vife; ++vifp)
1663	    if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
1664		if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
1665		    == VIFF_TUNNEL)
1666		    last_encap_vif = vifp;
1667		break;
1668	    }
1669    }
1670    if ((vifp = last_encap_vif) == 0) {
1671	last_encap_src = 0;
1672	mrtstat.mrts_cant_tunnel++; /*XXX*/
1673	m_freem(m);
1674	if (mrtdebug)
1675          log(LOG_DEBUG, "ip_mforward: no tunnel with %x",
1676		ntohl(ip->ip_src.s_addr));
1677	return;
1678    }
1679    ifp = vifp->v_ifp;
1680
1681    if (hlen > IP_HDR_LEN)
1682      ip_stripoptions(m, (struct mbuf *) 0);
1683    m->m_data += IP_HDR_LEN;
1684    m->m_len -= IP_HDR_LEN;
1685    m->m_pkthdr.len -= IP_HDR_LEN;
1686    m->m_pkthdr.rcvif = ifp;
1687
1688    ifq = &ipintrq;
1689    s = splimp();
1690    if (IF_QFULL(ifq)) {
1691	IF_DROP(ifq);
1692	m_freem(m);
1693    } else {
1694	IF_ENQUEUE(ifq, m);
1695	/*
1696	 * normally we would need a "schednetisr(NETISR_IP)"
1697	 * here but we were called by ip_input and it is going
1698	 * to loop back & try to dequeue the packet we just
1699	 * queued as soon as we return so we avoid the
1700	 * unnecessary software interrrupt.
1701	 */
1702    }
1703    splx(s);
1704}
1705
1706/*
1707 * Token bucket filter module
1708 */
1709static void
1710tbf_control(vifp, m, ip, p_len, imo)
1711	register struct vif *vifp;
1712	register struct mbuf *m;
1713	register struct ip *ip;
1714	register u_long p_len;
1715	struct ip_moptions *imo;
1716{
1717    tbf_update_tokens(vifp);
1718
1719    /* if there are enough tokens,
1720     * and the queue is empty,
1721     * send this packet out
1722     */
1723
1724    if (vifp->v_tbf->q_len == 0) {
1725	if (p_len <= vifp->v_tbf->n_tok) {
1726	    vifp->v_tbf->n_tok -= p_len;
1727	    tbf_send_packet(vifp, m, imo);
1728	} else if (p_len > MAX_BKT_SIZE) {
1729	    /* drop if packet is too large */
1730	    mrtstat.mrts_pkt2large++;
1731	    m_freem(m);
1732	    return;
1733	} else {
1734	    /* queue packet and timeout till later */
1735	    tbf_queue(vifp, m, ip, imo);
1736	    timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1737	}
1738    } else if (vifp->v_tbf->q_len < MAXQSIZE) {
1739	/* finite queue length, so queue pkts and process queue */
1740	tbf_queue(vifp, m, ip, imo);
1741	tbf_process_q(vifp);
1742    } else {
1743	/* queue length too much, try to dq and queue and process */
1744	if (!tbf_dq_sel(vifp, ip)) {
1745	    mrtstat.mrts_q_overflow++;
1746	    m_freem(m);
1747	    return;
1748	} else {
1749	    tbf_queue(vifp, m, ip, imo);
1750	    tbf_process_q(vifp);
1751	}
1752    }
1753    return;
1754}
1755
1756/*
1757 * adds a packet to the queue at the interface
1758 */
1759static void
1760tbf_queue(vifp, m, ip, imo)
1761	register struct vif *vifp;
1762	register struct mbuf *m;
1763	register struct ip *ip;
1764	struct ip_moptions *imo;
1765{
1766    register u_long ql;
1767    register int index = (vifp - viftable);
1768    register int s = splnet();
1769
1770    ql = vifp->v_tbf->q_len;
1771
1772    qtable[index][ql].pkt_m = m;
1773    qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len;
1774    qtable[index][ql].pkt_ip = ip;
1775    qtable[index][ql].pkt_imo = imo;
1776
1777    vifp->v_tbf->q_len++;
1778    splx(s);
1779}
1780
1781
1782/*
1783 * processes the queue at the interface
1784 */
1785static void
1786tbf_process_q(vifp)
1787    register struct vif *vifp;
1788{
1789    register struct pkt_queue pkt_1;
1790    register int index = (vifp - viftable);
1791    register int s = splnet();
1792
1793    /* loop through the queue at the interface and send as many packets
1794     * as possible
1795     */
1796    while (vifp->v_tbf->q_len > 0) {
1797	/* locate the first packet */
1798	pkt_1.pkt_len = (qtable[index][0]).pkt_len;
1799	pkt_1.pkt_m   = (qtable[index][0]).pkt_m;
1800	pkt_1.pkt_ip   = (qtable[index][0]).pkt_ip;
1801	pkt_1.pkt_imo = (qtable[index][0]).pkt_imo;
1802
1803	/* determine if the packet can be sent */
1804	if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) {
1805	    /* if so,
1806	     * reduce no of tokens, dequeue the queue,
1807	     * send the packet.
1808	     */
1809	    vifp->v_tbf->n_tok -= pkt_1.pkt_len;
1810
1811	    tbf_dequeue(vifp, 0);
1812
1813	    tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo);
1814
1815	} else break;
1816    }
1817    splx(s);
1818}
1819
1820/*
1821 * removes the jth packet from the queue at the interface
1822 */
1823static void
1824tbf_dequeue(vifp,j)
1825    register struct vif *vifp;
1826    register int j;
1827{
1828    register u_long index = vifp - viftable;
1829    register int i;
1830
1831    for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) {
1832	qtable[index][i-1].pkt_m   = qtable[index][i].pkt_m;
1833	qtable[index][i-1].pkt_len = qtable[index][i].pkt_len;
1834	qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip;
1835	qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo;
1836    }
1837    qtable[index][i-1].pkt_m = NULL;
1838    qtable[index][i-1].pkt_len = NULL;
1839    qtable[index][i-1].pkt_ip = NULL;
1840    qtable[index][i-1].pkt_imo = NULL;
1841
1842    vifp->v_tbf->q_len--;
1843
1844    if (tbfdebug > 1)
1845	log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1);
1846}
1847
1848static void
1849tbf_reprocess_q(xvifp)
1850	void *xvifp;
1851{
1852    register struct vif *vifp = xvifp;
1853    if (ip_mrouter == NULL)
1854	return;
1855
1856    tbf_update_tokens(vifp);
1857
1858    tbf_process_q(vifp);
1859
1860    if (vifp->v_tbf->q_len)
1861	timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1862}
1863
1864/* function that will selectively discard a member of the queue
1865 * based on the precedence value and the priority obtained through
1866 * a lookup table - not yet implemented accurately!
1867 */
1868static int
1869tbf_dq_sel(vifp, ip)
1870    register struct vif *vifp;
1871    register struct ip *ip;
1872{
1873    register int i;
1874    register int s = splnet();
1875    register u_int p;
1876
1877    p = priority(vifp, ip);
1878
1879    for(i=vifp->v_tbf->q_len-1;i >= 0;i--) {
1880	if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) {
1881	    m_freem(qtable[vifp-viftable][i].pkt_m);
1882	    tbf_dequeue(vifp,i);
1883	    splx(s);
1884	    mrtstat.mrts_drop_sel++;
1885	    return(1);
1886	}
1887    }
1888    splx(s);
1889    return(0);
1890}
1891
1892static void
1893tbf_send_packet(vifp, m, imo)
1894    register struct vif *vifp;
1895    register struct mbuf *m;
1896    struct ip_moptions *imo;
1897{
1898    int error;
1899    int s = splnet();
1900
1901    if (vifp->v_flags & VIFF_TUNNEL) {
1902	/* If tunnel options */
1903	ip_output(m, (struct mbuf *)0, (struct route *)0,
1904		  IP_FORWARDING, imo);
1905    } else {
1906	/* if physical interface option, extract the options and then send */
1907	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1908			  IP_FORWARDING, imo);
1909	FREE(imo, M_IPMOPTS);
1910
1911	if (mrtdebug & DEBUG_XMIT)
1912	    log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error);
1913    }
1914    splx(s);
1915}
1916
1917/* determine the current time and then
1918 * the elapsed time (between the last time and time now)
1919 * in milliseconds & update the no. of tokens in the bucket
1920 */
1921static void
1922tbf_update_tokens(vifp)
1923    register struct vif *vifp;
1924{
1925    struct timeval tp;
1926    register u_long t;
1927    register u_long elapsed;
1928    register int s = splnet();
1929
1930    GET_TIME(tp);
1931
1932    t = tp.tv_sec*1000 + tp.tv_usec/1000;
1933
1934    elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8;
1935    vifp->v_tbf->n_tok += elapsed;
1936    vifp->v_tbf->last_pkt_t = t;
1937
1938    if (vifp->v_tbf->n_tok > MAX_BKT_SIZE)
1939	vifp->v_tbf->n_tok = MAX_BKT_SIZE;
1940
1941    splx(s);
1942}
1943
1944static int
1945priority(vifp, ip)
1946    register struct vif *vifp;
1947    register struct ip *ip;
1948{
1949    register int prio;
1950
1951    /* temporary hack; may add general packet classifier some day */
1952
1953    /*
1954     * The UDP port space is divided up into four priority ranges:
1955     * [0, 16384)     : unclassified - lowest priority
1956     * [16384, 32768) : audio - highest priority
1957     * [32768, 49152) : whiteboard - medium priority
1958     * [49152, 65536) : video - low priority
1959     */
1960    if (ip->ip_p == IPPROTO_UDP) {
1961	struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1962	switch (ntohs(udp->uh_dport) & 0xc000) {
1963	    case 0x4000:
1964		prio = 70;
1965		break;
1966	    case 0x8000:
1967		prio = 60;
1968		break;
1969	    case 0xc000:
1970		prio = 55;
1971		break;
1972	    default:
1973		prio = 50;
1974		break;
1975	}
1976	if (tbfdebug > 1)
1977		log(LOG_DEBUG, "port %x prio%d", ntohs(udp->uh_dport), prio);
1978    } else {
1979	    prio = 50;
1980    }
1981    return prio;
1982}
1983
1984/*
1985 * End of token bucket filter modifications
1986 */
1987
1988int
1989ip_rsvp_vif_init(so, m)
1990    struct socket *so;
1991    struct mbuf *m;
1992{
1993    int i;
1994    register int s;
1995
1996    if (rsvpdebug)
1997	printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1998	       so->so_type, so->so_proto->pr_protocol);
1999
2000    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2001	return EOPNOTSUPP;
2002
2003    /* Check mbuf. */
2004    if (m == NULL || m->m_len != sizeof(int)) {
2005	return EINVAL;
2006    }
2007    i = *(mtod(m, int *));
2008
2009    if (rsvpdebug)
2010	printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on);
2011
2012    s = splnet();
2013
2014    /* Check vif. */
2015    if (!legal_vif_num(i)) {
2016	splx(s);
2017	return EADDRNOTAVAIL;
2018    }
2019
2020    /* Check if socket is available. */
2021    if (viftable[i].v_rsvpd != NULL) {
2022	splx(s);
2023	return EADDRINUSE;
2024    }
2025
2026    viftable[i].v_rsvpd = so;
2027    /* This may seem silly, but we need to be sure we don't over-increment
2028     * the RSVP counter, in case something slips up.
2029     */
2030    if (!viftable[i].v_rsvp_on) {
2031	viftable[i].v_rsvp_on = 1;
2032	rsvp_on++;
2033    }
2034
2035    splx(s);
2036    return 0;
2037}
2038
2039int
2040ip_rsvp_vif_done(so, m)
2041    struct socket *so;
2042    struct mbuf *m;
2043{
2044	int i;
2045	register int s;
2046
2047    if (rsvpdebug)
2048	printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
2049	       so->so_type, so->so_proto->pr_protocol);
2050
2051    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2052	return EOPNOTSUPP;
2053
2054    /* Check mbuf. */
2055    if (m == NULL || m->m_len != sizeof(int)) {
2056	    return EINVAL;
2057    }
2058    i = *(mtod(m, int *));
2059
2060    s = splnet();
2061
2062    /* Check vif. */
2063    if (!legal_vif_num(i)) {
2064	splx(s);
2065        return EADDRNOTAVAIL;
2066    }
2067
2068    if (rsvpdebug)
2069	printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",
2070	       viftable[i].v_rsvpd, so);
2071
2072    viftable[i].v_rsvpd = NULL;
2073    /* This may seem silly, but we need to be sure we don't over-decrement
2074     * the RSVP counter, in case something slips up.
2075     */
2076    if (viftable[i].v_rsvp_on) {
2077	viftable[i].v_rsvp_on = 0;
2078	rsvp_on--;
2079    }
2080
2081    splx(s);
2082    return 0;
2083}
2084
2085void
2086ip_rsvp_force_done(so)
2087    struct socket *so;
2088{
2089    int vifi;
2090    register int s;
2091
2092    /* Don't bother if it is not the right type of socket. */
2093    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2094	return;
2095
2096    s = splnet();
2097
2098    /* The socket may be attached to more than one vif...this
2099     * is perfectly legal.
2100     */
2101    for (vifi = 0; vifi < numvifs; vifi++) {
2102	if (viftable[vifi].v_rsvpd == so) {
2103	    viftable[vifi].v_rsvpd = NULL;
2104	    /* This may seem silly, but we need to be sure we don't
2105	     * over-decrement the RSVP counter, in case something slips up.
2106	     */
2107	    if (viftable[vifi].v_rsvp_on) {
2108		viftable[vifi].v_rsvp_on = 0;
2109		rsvp_on--;
2110	    }
2111	}
2112    }
2113
2114    splx(s);
2115    return;
2116}
2117
2118void
2119rsvp_input(m, iphlen)
2120	struct mbuf *m;
2121	int iphlen;
2122{
2123    int vifi;
2124    register struct ip *ip = mtod(m, struct ip *);
2125    static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET };
2126    register int s;
2127    struct ifnet *ifp;
2128
2129    if (rsvpdebug)
2130	printf("rsvp_input: rsvp_on %d\n",rsvp_on);
2131
2132    /* Can still get packets with rsvp_on = 0 if there is a local member
2133     * of the group to which the RSVP packet is addressed.  But in this
2134     * case we want to throw the packet away.
2135     */
2136    if (!rsvp_on) {
2137	m_freem(m);
2138	return;
2139    }
2140
2141    /* If the old-style non-vif-associated socket is set, then use
2142     * it and ignore the new ones.
2143     */
2144    if (ip_rsvpd != NULL) {
2145	if (rsvpdebug)
2146	    printf("rsvp_input: Sending packet up old-style socket\n");
2147	rip_input(m);
2148	return;
2149    }
2150
2151    s = splnet();
2152
2153    if (rsvpdebug)
2154	printf("rsvp_input: check vifs\n");
2155
2156#ifdef DIAGNOSTIC
2157    if (!(m->m_flags & M_PKTHDR))
2158	    panic("rsvp_input no hdr");
2159#endif
2160
2161    ifp = m->m_pkthdr.rcvif;
2162    /* Find which vif the packet arrived on. */
2163    for (vifi = 0; vifi < numvifs; vifi++) {
2164	if (viftable[vifi].v_ifp == ifp)
2165 		break;
2166 	}
2167
2168    if (vifi == numvifs) {
2169	/* Can't find vif packet arrived on. Drop packet. */
2170	if (rsvpdebug)
2171	    printf("rsvp_input: Can't find vif for packet...dropping it.\n");
2172	m_freem(m);
2173	splx(s);
2174	return;
2175    }
2176
2177    if (rsvpdebug)
2178	printf("rsvp_input: check socket\n");
2179
2180    if (viftable[vifi].v_rsvpd == NULL) {
2181	/* drop packet, since there is no specific socket for this
2182	 * interface */
2183	    if (rsvpdebug)
2184		    printf("rsvp_input: No socket defined for vif %d\n",vifi);
2185	    m_freem(m);
2186	    splx(s);
2187	    return;
2188    }
2189    rsvp_src.sin_addr = ip->ip_src;
2190
2191    if (rsvpdebug && m)
2192	printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
2193	       m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
2194
2195    if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
2196	if (rsvpdebug)
2197	    printf("rsvp_input: Failed to append to socket\n");
2198    else
2199	if (rsvpdebug)
2200	    printf("rsvp_input: send packet up\n");
2201
2202    splx(s);
2203}
2204
2205#ifdef MROUTE_LKM
2206#include <sys/conf.h>
2207#include <sys/exec.h>
2208#include <sys/sysent.h>
2209#include <sys/lkm.h>
2210
2211MOD_MISC("ip_mroute_mod")
2212
2213static int
2214ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
2215{
2216	int i;
2217	struct lkm_misc	*args = lkmtp->private.lkm_misc;
2218	int err = 0;
2219
2220	switch(cmd) {
2221		static int (*old_ip_mrouter_cmd)();
2222		static int (*old_ip_mrouter_done)();
2223		static int (*old_ip_mforward)();
2224		static int (*old_mrt_ioctl)();
2225		static void (*old_proto4_input)();
2226		static int (*old_legal_vif_num)();
2227		extern struct protosw inetsw[];
2228
2229	case LKM_E_LOAD:
2230		if(lkmexists(lkmtp) || ip_mrtproto)
2231		  return(EEXIST);
2232		old_ip_mrouter_cmd = ip_mrouter_cmd;
2233		ip_mrouter_cmd = X_ip_mrouter_cmd;
2234		old_ip_mrouter_done = ip_mrouter_done;
2235		ip_mrouter_done = X_ip_mrouter_done;
2236		old_ip_mforward = ip_mforward;
2237		ip_mforward = X_ip_mforward;
2238		old_mrt_ioctl = mrt_ioctl;
2239		mrt_ioctl = X_mrt_ioctl;
2240              old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input;
2241              inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_ipip_input;
2242		old_legal_vif_num = legal_vif_num;
2243		legal_vif_num = X_legal_vif_num;
2244		ip_mrtproto = IGMP_DVMRP;
2245
2246		printf("\nIP multicast routing loaded\n");
2247		break;
2248
2249	case LKM_E_UNLOAD:
2250		if (ip_mrouter)
2251		  return EINVAL;
2252
2253		ip_mrouter_cmd = old_ip_mrouter_cmd;
2254		ip_mrouter_done = old_ip_mrouter_done;
2255		ip_mforward = old_ip_mforward;
2256		mrt_ioctl = old_mrt_ioctl;
2257              inetsw[ip_protox[ENCAP_PROTO]].pr_input = old_proto4_input;
2258		legal_vif_num = old_legal_vif_num;
2259		ip_mrtproto = 0;
2260		break;
2261
2262	default:
2263		err = EINVAL;
2264		break;
2265	}
2266
2267	return(err);
2268}
2269
2270int
2271ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
2272	DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle,
2273		 nosys);
2274}
2275
2276#endif /* MROUTE_LKM */
2277#endif /* MROUTING */
2278