ip_mroute.c revision 9334
1281494Sandrew/*
2281494Sandrew * IP multicast forwarding procedures
3281494Sandrew *
4281494Sandrew * Written by David Waitzman, BBN Labs, August 1988.
5281494Sandrew * Modified by Steve Deering, Stanford, February 1989.
6281494Sandrew * Modified by Mark J. Steiglitz, Stanford, May, 1991
7281494Sandrew * Modified by Van Jacobson, LBL, January 1993
8281494Sandrew * Modified by Ajit Thyagarajan, PARC, August 1993
9281494Sandrew * Modified by Bill Fenner, PARC, April 1995
10281494Sandrew *
11281494Sandrew * MROUTING Revision: 3.5
12281494Sandrew * $Id$
13281494Sandrew */
14281494Sandrew
15281494Sandrew
16281494Sandrew#include <sys/param.h>
17281494Sandrew#include <sys/systm.h>
18281494Sandrew#include <sys/mbuf.h>
19281494Sandrew#include <sys/socket.h>
20281494Sandrew#include <sys/socketvar.h>
21281494Sandrew#include <sys/protosw.h>
22281494Sandrew#include <sys/errno.h>
23281494Sandrew#include <sys/time.h>
24281494Sandrew#include <sys/kernel.h>
25281494Sandrew#include <sys/ioctl.h>
26281494Sandrew#include <sys/syslog.h>
27281494Sandrew#include <sys/queue.h>
28281494Sandrew#include <net/if.h>
29287487Sandrew#include <net/route.h>
30281494Sandrew#include <netinet/in.h>
31281494Sandrew#include <netinet/in_systm.h>
32281494Sandrew#include <netinet/ip.h>
33281494Sandrew#include <netinet/ip_var.h>
34281494Sandrew#include <netinet/in_pcb.h>
35281494Sandrew#include <netinet/in_var.h>
36281494Sandrew#include <netinet/igmp.h>
37281494Sandrew#include <netinet/igmp_var.h>
38281494Sandrew#include <netinet/ip_mroute.h>
39281494Sandrew#include <netinet/udp.h>
40281494Sandrew
41281494Sandrew#ifndef NTOHL
42281494Sandrew#if BYTE_ORDER != BIG_ENDIAN
43281494Sandrew#define NTOHL(d) ((d) = ntohl((d)))
44281494Sandrew#define NTOHS(d) ((d) = ntohs((u_short)(d)))
45281494Sandrew#define HTONL(d) ((d) = htonl((d)))
46281494Sandrew#define HTONS(d) ((d) = htons((u_short)(d)))
47281494Sandrew#else
48281494Sandrew#define NTOHL(d)
49281494Sandrew#define NTOHS(d)
50281494Sandrew#define HTONL(d)
51281494Sandrew#define HTONS(d)
52281494Sandrew#endif
53281494Sandrew#endif
54281494Sandrew
55281494Sandrewextern int rsvp_on;
56281494Sandrew
57281494Sandrew#ifndef MROUTING
58281494Sandrew/*
59291937Skib * Dummy routines and globals used when multicast routing is not compiled in.
60281494Sandrew */
61281494Sandrew
62281494Sandrewstruct socket  *ip_mrouter  = NULL;
63281494Sandrewu_int		ip_mrtproto = 0;
64281494Sandrewstruct mrtstat	mrtstat;
65281494Sandrewu_int		rsvpdebug = 0;
66281494Sandrew
67281494Sandrewint
68281494Sandrew_ip_mrouter_set(cmd, so, m)
69281494Sandrew	int cmd;
70281494Sandrew	struct socket *so;
71281494Sandrew	struct mbuf *m;
72281494Sandrew{
73281494Sandrew	return(EOPNOTSUPP);
74281494Sandrew}
75281494Sandrew
76291937Skibint (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = _ip_mrouter_set;
77281494Sandrew
78281494Sandrew
79281494Sandrewint
80281494Sandrew_ip_mrouter_get(cmd, so, m)
81281494Sandrew	int cmd;
82281494Sandrew	struct socket *so;
83281494Sandrew	struct mbuf **m;
84281494Sandrew{
85281494Sandrew	return(EOPNOTSUPP);
86296266Swma}
87281494Sandrew
88281494Sandrewint (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = _ip_mrouter_get;
89281494Sandrew
90281494Sandrewint
91281494Sandrew_ip_mrouter_done()
92281494Sandrew{
93281494Sandrew	return(0);
94281494Sandrew}
95281494Sandrew
96281494Sandrewint (*ip_mrouter_done)(void) = _ip_mrouter_done;
97281494Sandrew
98281494Sandrewint
99281494Sandrew_ip_mforward(ip, ifp, m, imo)
100281494Sandrew	struct ip *ip;
101281494Sandrew	struct ifnet *ifp;
102281494Sandrew	struct mbuf *m;
103281494Sandrew	struct ip_moptions *imo;
104281494Sandrew{
105281494Sandrew	return(0);
106281494Sandrew}
107281494Sandrew
108281494Sandrewint (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
109281494Sandrew		   struct ip_moptions *) = _ip_mforward;
110281494Sandrew
111281494Sandrewint
112281494Sandrew_mrt_ioctl(int req, caddr_t data, struct proc *p)
113281494Sandrew{
114281494Sandrew	return EOPNOTSUPP;
115281494Sandrew}
116281494Sandrew
117281494Sandrewint (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
118281494Sandrew
119281494Sandrewvoid
120281494Sandrewrsvp_input(m, iphlen)		/* XXX must fixup manually */
121281494Sandrew	struct mbuf *m;
122281494Sandrew	int iphlen;
123281494Sandrew{
124281494Sandrew    /* Can still get packets with rsvp_on = 0 if there is a local member
125285316Sandrew     * of the group to which the RSVP packet is addressed.  But in this
126285316Sandrew     * case we want to throw the packet away.
127285316Sandrew     */
128285316Sandrew    if (!rsvp_on) {
129285316Sandrew	m_freem(m);
130285316Sandrew	return;
131285316Sandrew    }
132281494Sandrew
133281494Sandrew    if (ip_rsvpd != NULL) {
134281494Sandrew	if (rsvpdebug)
135281494Sandrew	    printf("rsvp_input: Sending packet up old-style socket\n");
136281494Sandrew	rip_input(m);
137281494Sandrew	return;
138281494Sandrew    }
139281494Sandrew    /* Drop the packet */
140281494Sandrew    m_freem(m);
141281494Sandrew}
142281494Sandrew
143281494Sandrewvoid ipip_input(struct mbuf *m) { /* XXX must fixup manually */
144281494Sandrew	rip_input(m);
145281494Sandrew}
146281494Sandrew
147281494Sandrewint (*legal_vif_num)(int) = 0;
148281494Sandrew
149281494Sandrew/*
150281494Sandrew * This should never be called, since IP_MULTICAST_VIF should fail, but
151281494Sandrew * just in case it does get called, the code a little lower in ip_output
152281494Sandrew * will assign the packet a local address.
153281494Sandrew */
154281494Sandrewu_long
155281494Sandrew_ip_mcast_src(int vifi) { return INADDR_ANY; }
156281494Sandrewu_long (*ip_mcast_src)(int) = _ip_mcast_src;
157281494Sandrew
158281494Sandrewint
159281494Sandrewip_rsvp_vif_init(so, m)
160281494Sandrew    struct socket *so;
161281494Sandrew    struct mbuf *m;
162281494Sandrew{
163281494Sandrew    return(EINVAL);
164281494Sandrew}
165281494Sandrew
166281494Sandrewint
167281494Sandrewip_rsvp_vif_done(so, m)
168281494Sandrew    struct socket *so;
169281494Sandrew    struct mbuf *m;
170281494Sandrew{
171281494Sandrew    return(EINVAL);
172281494Sandrew}
173281494Sandrew
174281494Sandrewvoid
175281494Sandrewip_rsvp_force_done(so)
176281494Sandrew    struct socket *so;
177281494Sandrew{
178281494Sandrew    return;
179281494Sandrew}
180281494Sandrew
181281494Sandrew#else /* MROUTING */
182281494Sandrew
183281494Sandrew#define M_HASCL(m)	((m)->m_flags & M_EXT)
184281494Sandrew
185281494Sandrew#define INSIZ		sizeof(struct in_addr)
186286225Sandrew#define	same(a1, a2) \
187281494Sandrew	(bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
188281494Sandrew
189281494Sandrew#define MT_MRTABLE MT_RTABLE	/* since nothing else uses it */
190281494Sandrew
191281494Sandrew/*
192281494Sandrew * Globals.  All but ip_mrouter and ip_mrtproto could be static,
193281494Sandrew * except for netstat or debugging purposes.
194281494Sandrew */
195281494Sandrew#ifndef MROUTE_LKM
196281494Sandrewstruct socket  *ip_mrouter  = NULL;
197281494Sandrewstruct mrtstat	mrtstat;
198281494Sandrew
199281494Sandrewint		ip_mrtproto = IGMP_DVMRP;    /* for netstat only */
200281494Sandrew#else /* MROUTE_LKM */
201281494Sandrewextern struct mrtstat mrtstat;
202281494Sandrewextern int ip_mrtproto;
203281494Sandrew#endif
204281494Sandrew
205281494Sandrew#define NO_RTE_FOUND 	0x1
206281494Sandrew#define RTE_FOUND	0x2
207281494Sandrew
208281494Sandrewstruct mbuf    *mfctable[MFCTBLSIZ];
209281494Sandrewu_char		nexpire[MFCTBLSIZ];
210281494Sandrewstruct vif	viftable[MAXVIFS];
211281494Sandrewu_int		mrtdebug = 0;	  /* debug level 	*/
212281494Sandrew#define		DEBUG_MFC	0x02
213281494Sandrew#define		DEBUG_FORWARD	0x04
214281494Sandrew#define		DEBUG_EXPIRE	0x08
215286073Semaste#define		DEBUG_XMIT	0x10
216281494Sandrewu_int       	tbfdebug = 0;     /* tbf debug level 	*/
217281494Sandrewu_int		rsvpdebug = 0;	  /* rsvp debug level   */
218281494Sandrew
219281494Sandrew#define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
220281494Sandrew#define		UPCALL_EXPIRE	6		/* number of timeouts	*/
221281494Sandrew
222286073Semaste/*
223281494Sandrew * Define the token bucket filter structures
224281494Sandrew * tbftable -> each vif has one of these for storing info
225281494Sandrew * qtable   -> each interface has an associated queue of pkts
226281494Sandrew */
227281494Sandrew
228281494Sandrewstruct tbf tbftable[MAXVIFS];
229286073Semastestruct pkt_queue qtable[MAXVIFS][MAXQSIZE];
230281494Sandrew
231281494Sandrew/*
232281494Sandrew * 'Interfaces' associated with decapsulator (so we can tell
233281494Sandrew * packets that went through it from ones that get reflected
234281494Sandrew * by a broken gateway).  These interfaces are never linked into
235281494Sandrew * the system ifnet list & no routes point to them.  I.e., packets
236281494Sandrew * can't be sent this way.  They only exist as a placeholder for
237295142Sandrew * multicast source verification.
238295142Sandrew */
239281494Sandrewstruct ifnet multicast_decap_if[MAXVIFS];
240281494Sandrew
241281494Sandrew#define ENCAP_TTL 64
242281494Sandrew#define ENCAP_PROTO IPPROTO_IPIP	/* 4 */
243281494Sandrew
244281494Sandrew/* prototype IP hdr for encapsulated packets */
245281494Sandrewstruct ip multicast_encap_iphdr = {
246295142Sandrew#if BYTE_ORDER == LITTLE_ENDIAN
247295142Sandrew	sizeof(struct ip) >> 2, IPVERSION,
248281494Sandrew#else
249281494Sandrew	IPVERSION, sizeof(struct ip) >> 2,
250281494Sandrew#endif
251281494Sandrew	0,				/* tos */
252281494Sandrew	sizeof(struct ip),		/* total length */
253281494Sandrew	0,				/* id */
254281494Sandrew	0,				/* frag offset */
255281494Sandrew	ENCAP_TTL, ENCAP_PROTO,
256281494Sandrew	0,				/* checksum */
257281494Sandrew};
258289502Sandrew
259289502Sandrew/*
260289502Sandrew * Private variables.
261289502Sandrew */
262289502Sandrewstatic vifi_t	   numvifs = 0;
263289502Sandrewstatic void (*encap_oldrawip)() = 0;
264289502Sandrewstatic int have_encap_tunnel = 0;
265281494Sandrew
266281494Sandrew/*
267281494Sandrew * one-back cache used by ipip_input to locate a tunnel's vif
268281494Sandrew * given a datagram's src ip address.
269281494Sandrew */
270281494Sandrewstatic u_long last_encap_src;
271281494Sandrewstatic struct vif *last_encap_vif;
272281494Sandrew
273281494Sandrewstatic int get_sg_cnt(struct sioc_sg_req *);
274281494Sandrewstatic int get_vif_cnt(struct sioc_vif_req *);
275281494Sandrewint ip_mrouter_init(struct socket *, struct mbuf *);
276281494Sandrewstatic int add_vif(struct vifctl *);
277281494Sandrewstatic int del_vif(vifi_t *);
278281494Sandrewstatic int add_mfc(struct mfcctl *);
279281494Sandrewstatic int del_mfc(struct mfcctl *);
280285334Sandrewstatic int get_version(struct mbuf *);
281281494Sandrewstatic int get_assert(struct mbuf *);
282285334Sandrewstatic int set_assert(int *);
283285334Sandrewstatic void expire_upcalls(void *);
284281494Sandrewstatic int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *,
285285334Sandrew		  vifi_t);
286285334Sandrewstatic void phyint_send(struct ip *, struct vif *, struct mbuf *);
287281494Sandrewstatic void encap_send(struct ip *, struct vif *, struct mbuf *);
288281494Sandrewstatic void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long,
289281494Sandrew		 struct ip_moptions *);
290281494Sandrewstatic void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *);
291281494Sandrewstatic void tbf_process_q(struct vif *);
292281494Sandrewstatic void tbf_dequeue(struct vif *, int);
293281494Sandrewstatic void tbf_reprocess_q(void *);
294281494Sandrewstatic int tbf_dq_sel(struct vif *, struct ip *);
295281494Sandrewstatic void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *);
296281494Sandrewstatic void tbf_update_tokens(struct vif *);
297281494Sandrewstatic int priority(struct vif *, struct ip *);
298281494Sandrewvoid multiencap_decap(struct mbuf *);
299281494Sandrew
300281494Sandrew/*
301281494Sandrew * whether or not special PIM assert processing is enabled.
302281494Sandrew */
303281494Sandrewstatic int pim_assert;
304281494Sandrew/*
305281494Sandrew * Rate limit for assert notification messages, in usec
306281494Sandrew */
307281494Sandrew#define ASSERT_MSG_TIME		3000000
308281494Sandrew
309281494Sandrew/*
310281494Sandrew * Hash function for a source, group entry
311281494Sandrew */
312281494Sandrew#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
313281494Sandrew			((g) >> 20) ^ ((g) >> 10) ^ (g))
314281494Sandrew
315281494Sandrew/*
316281494Sandrew * Find a route for a given origin IP address and Multicast group address
317281494Sandrew * Type of service parameter to be added in the future!!!
318281494Sandrew */
319281494Sandrew
320281494Sandrew#define MFCFIND(o, g, rt) { \
321281494Sandrew	register struct mbuf *_mb_rt = mfctable[MFCHASH(o,g)]; \
322281494Sandrew	register struct mfc *_rt = NULL; \
323281494Sandrew	rt = NULL; \
324281494Sandrew	++mrtstat.mrts_mfc_lookups; \
325281494Sandrew	while (_mb_rt) { \
326281494Sandrew		_rt = mtod(_mb_rt, struct mfc *); \
327281494Sandrew		if ((_rt->mfc_origin.s_addr == o) && \
328286225Sandrew		    (_rt->mfc_mcastgrp.s_addr == g) && \
329281494Sandrew		    (_mb_rt->m_act == NULL)) { \
330281494Sandrew			rt = _rt; \
331281494Sandrew			break; \
332281494Sandrew		} \
333281494Sandrew		_mb_rt = _mb_rt->m_next; \
334281494Sandrew	} \
335281494Sandrew	if (rt == NULL) { \
336281494Sandrew		++mrtstat.mrts_mfc_misses; \
337281494Sandrew	} \
338281494Sandrew}
339281494Sandrew
340281494Sandrew
341281494Sandrew/*
342281494Sandrew * Macros to compute elapsed time efficiently
343281494Sandrew * Borrowed from Van Jacobson's scheduling code
344281494Sandrew */
345281494Sandrew#define TV_DELTA(a, b, delta) { \
346281494Sandrew	    register int xxs; \
347281494Sandrew		\
348281494Sandrew	    delta = (a).tv_usec - (b).tv_usec; \
349281494Sandrew	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
350281494Sandrew	       switch (xxs) { \
351281494Sandrew		      case 2: \
352281494Sandrew			  delta += 1000000; \
353281494Sandrew			      /* fall through */ \
354281494Sandrew		      case 1: \
355281494Sandrew			  delta += 1000000; \
356281494Sandrew			  break; \
357281494Sandrew		      default: \
358281494Sandrew			  delta += (1000000 * xxs); \
359281494Sandrew	       } \
360281494Sandrew	    } \
361281494Sandrew}
362281494Sandrew
363281494Sandrew#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
364281494Sandrew	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
365281494Sandrew
366281494Sandrew#ifdef UPCALL_TIMING
367281494Sandrewu_long upcall_data[51];
368281494Sandrewstatic void collate(struct timeval *);
369281494Sandrew#endif /* UPCALL_TIMING */
370281494Sandrew
371281494Sandrew
372281494Sandrew/*
373281494Sandrew * Handle MRT setsockopt commands to modify the multicast routing tables.
374281494Sandrew */
375281494Sandrewint
376281494SandrewX_ip_mrouter_set(cmd, so, m)
377281494Sandrew    int cmd;
378281494Sandrew    struct socket *so;
379281494Sandrew    struct mbuf *m;
380281494Sandrew{
381281494Sandrew   if (cmd != MRT_INIT && so != ip_mrouter) return EACCES;
382281494Sandrew
383281494Sandrew    switch (cmd) {
384281494Sandrew	case MRT_INIT:     return ip_mrouter_init(so, m);
385281494Sandrew	case MRT_DONE:     return ip_mrouter_done();
386281494Sandrew	case MRT_ADD_VIF:  return add_vif (mtod(m, struct vifctl *));
387281494Sandrew	case MRT_DEL_VIF:  return del_vif (mtod(m, vifi_t *));
388281494Sandrew	case MRT_ADD_MFC:  return add_mfc (mtod(m, struct mfcctl *));
389281494Sandrew	case MRT_DEL_MFC:  return del_mfc (mtod(m, struct mfcctl *));
390286134Sandrew	case MRT_ASSERT:   return set_assert(mtod(m, int *));
391286134Sandrew	default:             return EOPNOTSUPP;
392286134Sandrew    }
393286134Sandrew}
394286134Sandrew
395281494Sandrew#ifndef MROUTE_LKM
396281494Sandrewint (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = X_ip_mrouter_set;
397281494Sandrew#endif
398281494Sandrew
399281494Sandrew/*
400281494Sandrew * Handle MRT getsockopt commands
401281494Sandrew */
402281494Sandrewint
403281494SandrewX_ip_mrouter_get(cmd, so, m)
404281494Sandrew    int cmd;
405286073Semaste    struct socket *so;
406281494Sandrew    struct mbuf **m;
407281494Sandrew{
408281494Sandrew    struct mbuf *mb;
409281494Sandrew
410281494Sandrew    if (so != ip_mrouter) return EACCES;
411281494Sandrew
412281494Sandrew    *m = mb = m_get(M_WAIT, MT_SOOPTS);
413286073Semaste
414281494Sandrew    switch (cmd) {
415281494Sandrew	case MRT_VERSION:   return get_version(mb);
416281494Sandrew	case MRT_ASSERT:    return get_assert(mb);
417281494Sandrew	default:            return EOPNOTSUPP;
418281494Sandrew    }
419284273Sandrew}
420284273Sandrew
421281494Sandrew#ifndef MROUTE_LKM
422281494Sandrewint (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = X_ip_mrouter_get;
423281494Sandrew#endif
424281494Sandrew
425281494Sandrew/*
426281494Sandrew * Handle ioctl commands to obtain information from the cache
427281494Sandrew */
428281494Sandrewint
429281494SandrewX_mrt_ioctl(cmd, data)
430281494Sandrew    int cmd;
431281494Sandrew    caddr_t data;
432281494Sandrew{
433281494Sandrew    int error = 0;
434281494Sandrew
435281494Sandrew    switch (cmd) {
436281494Sandrew	case (SIOCGETVIFCNT):
437281494Sandrew	    return (get_vif_cnt((struct sioc_vif_req *)data));
438281494Sandrew	    break;
439281494Sandrew	case (SIOCGETSGCNT):
440281494Sandrew	    return (get_sg_cnt((struct sioc_sg_req *)data));
441281494Sandrew	    break;
442281494Sandrew	default:
443281494Sandrew	    return (EINVAL);
444281494Sandrew	    break;
445281494Sandrew    }
446281494Sandrew    return error;
447281494Sandrew}
448281494Sandrew
449281494Sandrew#ifndef MROUTE_LKM
450281494Sandrewint (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl;
451281494Sandrew#endif
452281494Sandrew
453281494Sandrew/*
454281494Sandrew * returns the packet, byte, rpf-failure count for the source group provided
455281494Sandrew */
456281494Sandrewstatic int
457281494Sandrewget_sg_cnt(req)
458281494Sandrew    register struct sioc_sg_req *req;
459281494Sandrew{
460281494Sandrew    register struct mfc *rt;
461281494Sandrew    int s;
462281494Sandrew
463281494Sandrew    s = splnet();
464281494Sandrew    MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
465281494Sandrew    splx(s);
466281494Sandrew    if (rt != NULL) {
467281494Sandrew	req->pktcnt = rt->mfc_pkt_cnt;
468281494Sandrew	req->bytecnt = rt->mfc_byte_cnt;
469281494Sandrew	req->wrong_if = rt->mfc_wrong_if;
470281494Sandrew    } else
471281494Sandrew	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
472281494Sandrew
473281494Sandrew    return 0;
474281494Sandrew}
475281494Sandrew
476281494Sandrew/*
477281494Sandrew * returns the input and output packet and byte counts on the vif provided
478281494Sandrew */
479281494Sandrewstatic int
480281494Sandrewget_vif_cnt(req)
481281494Sandrew    register struct sioc_vif_req *req;
482281494Sandrew{
483281494Sandrew    register vifi_t vifi = req->vifi;
484281494Sandrew
485281494Sandrew    if (vifi >= numvifs) return EINVAL;
486281494Sandrew
487281494Sandrew    req->icount = viftable[vifi].v_pkt_in;
488281494Sandrew    req->ocount = viftable[vifi].v_pkt_out;
489281494Sandrew    req->ibytes = viftable[vifi].v_bytes_in;
490281494Sandrew    req->obytes = viftable[vifi].v_bytes_out;
491281494Sandrew
492281494Sandrew    return 0;
493281494Sandrew}
494281494Sandrew
495281494Sandrew/*
496281494Sandrew * Enable multicast routing
497281494Sandrew */
498281494Sandrewint
499281494Sandrewip_mrouter_init(so, m)
500281494Sandrew	struct socket *so;
501281494Sandrew	struct mbuf *m;
502281494Sandrew{
503281494Sandrew    int *v;
504281494Sandrew    int i;
505281494Sandrew
506281494Sandrew    if (mrtdebug)
507281494Sandrew	log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d",
508281494Sandrew		so->so_type, so->so_proto->pr_protocol);
509281494Sandrew
510281494Sandrew    if (so->so_type != SOCK_RAW ||
511281494Sandrew	so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
512291937Skib
513281494Sandrew    if (!m || (m->m_len != sizeof(int *)))
514281494Sandrew	return ENOPROTOOPT;
515281494Sandrew
516281494Sandrew    v = mtod(m, int *);
517281494Sandrew    if (*v != 1)
518281494Sandrew	return ENOPROTOOPT;
519281494Sandrew
520281494Sandrew    if (ip_mrouter != NULL) return EADDRINUSE;
521281494Sandrew
522281494Sandrew    ip_mrouter = so;
523281494Sandrew
524281494Sandrew    bzero((caddr_t)mfctable, sizeof(mfctable));
525281494Sandrew    bzero((caddr_t)nexpire, sizeof(nexpire));
526281494Sandrew
527281494Sandrew    pim_assert = 0;
528281494Sandrew
529281494Sandrew    timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
530281494Sandrew
531281494Sandrew    if (mrtdebug)
532281494Sandrew	log(LOG_DEBUG, "ip_mrouter_init");
533294930Sjhb
534281494Sandrew    return 0;
535281494Sandrew}
536281494Sandrew
537281494Sandrew/*
538281494Sandrew * Disable multicast routing
539281494Sandrew */
540281494Sandrewint
541281494SandrewX_ip_mrouter_done()
542281494Sandrew{
543281494Sandrew    vifi_t vifi;
544281494Sandrew    int i;
545281494Sandrew    struct ifnet *ifp;
546281494Sandrew    struct ifreq ifr;
547281494Sandrew    struct mbuf *mb_rt;
548281494Sandrew    struct mfc *rt;
549281494Sandrew    struct mbuf *m;
550281494Sandrew    struct rtdetq *rte;
551281494Sandrew    int s;
552281494Sandrew
553281494Sandrew    s = splnet();
554281494Sandrew
555281494Sandrew    /*
556281494Sandrew     * For each phyint in use, disable promiscuous reception of all IP
557281494Sandrew     * multicasts.
558281494Sandrew     */
559281494Sandrew    for (vifi = 0; vifi < numvifs; vifi++) {
560281494Sandrew	if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
561281494Sandrew	    !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
562281494Sandrew	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
563281494Sandrew	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
564281494Sandrew								= INADDR_ANY;
565281494Sandrew	    ifp = viftable[vifi].v_ifp;
566281494Sandrew	    (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
567281494Sandrew	}
568281494Sandrew    }
569281494Sandrew    bzero((caddr_t)qtable, sizeof(qtable));
570281494Sandrew    bzero((caddr_t)tbftable, sizeof(tbftable));
571291937Skib    bzero((caddr_t)viftable, sizeof(viftable));
572291937Skib    numvifs = 0;
573291937Skib    pim_assert = 0;
574291937Skib
575291937Skib    untimeout(expire_upcalls, (caddr_t)NULL);
576291937Skib
577281494Sandrew    /*
578281494Sandrew     * Free all multicast forwarding cache entries.
579281494Sandrew     */
580281494Sandrew    for (i = 0; i < MFCTBLSIZ; i++) {
581281494Sandrew	mb_rt = mfctable[i];
582281494Sandrew	while (mb_rt) {
583281494Sandrew	    if (mb_rt->m_act != NULL) {
584281494Sandrew		while (mb_rt->m_act) {
585281494Sandrew		    m = mb_rt->m_act;
586281494Sandrew		    mb_rt->m_act = m->m_act;
587281494Sandrew		    rte = mtod(m, struct rtdetq *);
588281494Sandrew		    m_freem(rte->m);
589281494Sandrew		    m_free(m);
590281494Sandrew		}
591281494Sandrew	    }
592281494Sandrew	    mb_rt = m_free(mb_rt);
593281494Sandrew	}
594281494Sandrew    }
595281494Sandrew
596281494Sandrew    bzero((caddr_t)mfctable, sizeof(mfctable));
597281494Sandrew
598281494Sandrew    /*
599281494Sandrew     * Reset de-encapsulation cache
600281494Sandrew     */
601281494Sandrew    last_encap_src = NULL;
602281494Sandrew    last_encap_vif = NULL;
603281494Sandrew    have_encap_tunnel = 0;
604281494Sandrew
605281494Sandrew    ip_mrouter = NULL;
606281494Sandrew
607281494Sandrew    splx(s);
608281494Sandrew
609281494Sandrew    if (mrtdebug)
610281494Sandrew	log(LOG_DEBUG, "ip_mrouter_done");
611281494Sandrew
612281494Sandrew    return 0;
613281494Sandrew}
614281494Sandrew
615281494Sandrew#ifndef MROUTE_LKM
616281494Sandrewint (*ip_mrouter_done)(void) = X_ip_mrouter_done;
617281494Sandrew#endif
618281494Sandrew
619281494Sandrewstatic int
620281494Sandrewget_version(mb)
621281494Sandrew    struct mbuf *mb;
622281494Sandrew{
623281494Sandrew    int *v;
624281494Sandrew
625281494Sandrew    v = mtod(mb, int *);
626281494Sandrew
627281494Sandrew    *v = 0x0305;	/* XXX !!!! */
628281494Sandrew    mb->m_len = sizeof(int);
629281494Sandrew
630281494Sandrew    return 0;
631281494Sandrew}
632281494Sandrew
633281494Sandrew/*
634281494Sandrew * Set PIM assert processing global
635281494Sandrew */
636281494Sandrewstatic int
637281494Sandrewset_assert(i)
638281494Sandrew    int *i;
639281494Sandrew{
640281494Sandrew    if ((*i != 1) && (*i != 0))
641281494Sandrew	return EINVAL;
642281494Sandrew
643281494Sandrew    pim_assert = *i;
644281494Sandrew
645281494Sandrew    return 0;
646281494Sandrew}
647281494Sandrew
648281494Sandrew/*
649281494Sandrew * Get PIM assert processing global
650281494Sandrew */
651281494Sandrewstatic int
652281494Sandrewget_assert(m)
653281494Sandrew    struct mbuf *m;
654281494Sandrew{
655281494Sandrew    int *i;
656281494Sandrew
657281494Sandrew    i = mtod(m, int *);
658281494Sandrew
659281494Sandrew    *i = pim_assert;
660281494Sandrew
661281494Sandrew    return 0;
662281494Sandrew}
663281494Sandrew
664281494Sandrew/*
665281494Sandrew * Add a vif to the vif table
666281494Sandrew */
667281494Sandrewstatic int
668281494Sandrewadd_vif(vifcp)
669281494Sandrew    register struct vifctl *vifcp;
670281494Sandrew{
671281494Sandrew    register struct vif *vifp = viftable + vifcp->vifc_vifi;
672296266Swma    static struct sockaddr_in sin = {sizeof sin, AF_INET};
673296266Swma    struct ifaddr *ifa;
674296266Swma    struct ifnet *ifp;
675296266Swma    struct ifreq ifr;
676296266Swma    int error, s;
677296266Swma    struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
678296266Swma
679296266Swma    if (vifcp->vifc_vifi >= MAXVIFS)  return EINVAL;
680296266Swma    if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
681296266Swma
682296266Swma    /* Find the interface with an address in AF_INET family */
683296266Swma    sin.sin_addr = vifcp->vifc_lcl_addr;
684296266Swma    ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
685296266Swma    if (ifa == 0) return EADDRNOTAVAIL;
686281494Sandrew    ifp = ifa->ifa_ifp;
687281494Sandrew
688281494Sandrew    if (vifcp->vifc_flags & VIFF_TUNNEL) {
689281494Sandrew	if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
690281494Sandrew		/*
691281494Sandrew		 * An encapsulating tunnel is wanted.  Tell ipip_input() to
692281494Sandrew		 * start paying attention to encapsulated packets.
693281494Sandrew		 */
694281494Sandrew		if (have_encap_tunnel == 0) {
695281494Sandrew			have_encap_tunnel = 1;
696281494Sandrew			for (s = 0; s < MAXVIFS; ++s) {
697281494Sandrew				multicast_decap_if[s].if_name = "mdecap";
698281494Sandrew				multicast_decap_if[s].if_unit = s;
699281494Sandrew			}
700281494Sandrew		}
701281494Sandrew		/*
702281494Sandrew		 * Set interface to fake encapsulator interface
703281494Sandrew		 */
704281494Sandrew		ifp = &multicast_decap_if[vifcp->vifc_vifi];
705281494Sandrew		/*
706281494Sandrew		 * Prepare cached route entry
707281494Sandrew		 */
708281494Sandrew		bzero(&vifp->v_route, sizeof(vifp->v_route));
709281494Sandrew	} else {
710281494Sandrew	    log(LOG_ERR, "Source routed tunnels not supported.");
711281494Sandrew	    return EOPNOTSUPP;
712281494Sandrew	}
713281494Sandrew    } else {
714281494Sandrew	/* Make sure the interface supports multicast */
715281494Sandrew	if ((ifp->if_flags & IFF_MULTICAST) == 0)
716281494Sandrew	    return EOPNOTSUPP;
717281494Sandrew
718281494Sandrew	/* Enable promiscuous reception of all IP multicasts from the if */
719281494Sandrew	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
720281494Sandrew	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
721281494Sandrew	s = splnet();
722281494Sandrew	error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
723281494Sandrew	splx(s);
724281494Sandrew	if (error)
725281494Sandrew	    return error;
726281494Sandrew    }
727281494Sandrew
728281494Sandrew    s = splnet();
729281494Sandrew    /* define parameters for the tbf structure */
730281494Sandrew    vifp->v_tbf = v_tbf;
731281494Sandrew    vifp->v_tbf->q_len = 0;
732281494Sandrew    vifp->v_tbf->n_tok = 0;
733281494Sandrew    vifp->v_tbf->last_pkt_t = 0;
734281494Sandrew
735281494Sandrew    vifp->v_flags     = vifcp->vifc_flags;
736281494Sandrew    vifp->v_threshold = vifcp->vifc_threshold;
737281494Sandrew    vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
738281494Sandrew    vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
739281494Sandrew    vifp->v_ifp       = ifp;
740281494Sandrew    vifp->v_rate_limit= vifcp->vifc_rate_limit;
741281494Sandrew    vifp->v_rsvp_on   = 0;
742281494Sandrew    vifp->v_rsvpd     = NULL;
743281494Sandrew    /* initialize per vif pkt counters */
744281494Sandrew    vifp->v_pkt_in    = 0;
745281494Sandrew    vifp->v_pkt_out   = 0;
746281494Sandrew    vifp->v_bytes_in  = 0;
747281494Sandrew    vifp->v_bytes_out = 0;
748281494Sandrew    splx(s);
749281494Sandrew
750281494Sandrew    /* Adjust numvifs up if the vifi is higher than numvifs */
751281494Sandrew    if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
752281494Sandrew
753281494Sandrew    if (mrtdebug)
754281494Sandrew	log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d",
755281494Sandrew	    vifcp->vifc_vifi,
756281494Sandrew	    ntohl(vifcp->vifc_lcl_addr.s_addr),
757281494Sandrew	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
758281494Sandrew	    ntohl(vifcp->vifc_rmt_addr.s_addr),
759281494Sandrew	    vifcp->vifc_threshold,
760281494Sandrew	    vifcp->vifc_rate_limit);
761281494Sandrew
762281494Sandrew    return 0;
763281494Sandrew}
764281494Sandrew
765281494Sandrew/*
766281494Sandrew * Delete a vif from the vif table
767281494Sandrew */
768281494Sandrewstatic int
769281494Sandrewdel_vif(vifip)
770281494Sandrew    vifi_t *vifip;
771281494Sandrew{
772281494Sandrew    register struct vif *vifp = viftable + *vifip;
773281494Sandrew    register vifi_t vifi;
774281494Sandrew    struct ifnet *ifp;
775281494Sandrew    struct ifreq ifr;
776281494Sandrew    int s;
777281494Sandrew
778281494Sandrew    if (*vifip >= numvifs) return EINVAL;
779281494Sandrew    if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
780281494Sandrew
781281494Sandrew    s = splnet();
782281494Sandrew
783281494Sandrew    if (!(vifp->v_flags & VIFF_TUNNEL)) {
784281494Sandrew	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
785281494Sandrew	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
786281494Sandrew	ifp = vifp->v_ifp;
787281494Sandrew	(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
788281494Sandrew    }
789281494Sandrew
790281494Sandrew    if (vifp == last_encap_vif) {
791281494Sandrew	last_encap_vif = 0;
792281494Sandrew	last_encap_src = 0;
793281494Sandrew    }
794281494Sandrew
795281494Sandrew    bzero((caddr_t)qtable[*vifip],
796281494Sandrew	  sizeof(qtable[*vifip]));
797281494Sandrew    bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
798281494Sandrew    bzero((caddr_t)vifp, sizeof (*vifp));
799281494Sandrew
800281494Sandrew    /* Adjust numvifs down */
801281494Sandrew    for (vifi = numvifs; vifi > 0; vifi--)
802281494Sandrew	if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
803281494Sandrew    numvifs = vifi;
804281494Sandrew
805281494Sandrew    splx(s);
806281494Sandrew
807281494Sandrew    if (mrtdebug)
808281494Sandrew      log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs);
809281494Sandrew
810281494Sandrew    return 0;
811281494Sandrew}
812281494Sandrew
813281494Sandrew/*
814281494Sandrew * Add an mfc entry
815281494Sandrew */
816281494Sandrewstatic int
817281494Sandrewadd_mfc(mfccp)
818281494Sandrew    struct mfcctl *mfccp;
819281494Sandrew{
820281494Sandrew    struct mfc *rt;
821281494Sandrew    register struct mbuf *mb_rt;
822281494Sandrew    u_long hash;
823281494Sandrew    struct mbuf *mb_ntry;
824281494Sandrew    struct rtdetq *rte;
825296266Swma    register u_short nstl;
826296266Swma    int s;
827296266Swma    int i;
828296266Swma
829281494Sandrew    MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
830281494Sandrew
831281494Sandrew    /* If an entry already exists, just update the fields */
832281494Sandrew    if (rt) {
833281494Sandrew	if (mrtdebug & DEBUG_MFC)
834281494Sandrew	    log(LOG_DEBUG,"add_mfc update o %x g %x p %x",
835281494Sandrew		ntohl(mfccp->mfcc_origin.s_addr),
836281494Sandrew		ntohl(mfccp->mfcc_mcastgrp.s_addr),
837281494Sandrew		mfccp->mfcc_parent);
838281494Sandrew
839281494Sandrew	s = splnet();
840281494Sandrew	rt->mfc_parent = mfccp->mfcc_parent;
841281494Sandrew	for (i = 0; i < numvifs; i++)
842281494Sandrew	    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
843293045Sian	splx(s);
844281494Sandrew	return 0;
845281494Sandrew    }
846281494Sandrew
847281494Sandrew    /*
848281494Sandrew     * Find the entry for which the upcall was made and update
849281494Sandrew     */
850281494Sandrew    s = splnet();
851281494Sandrew    hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
852281494Sandrew    for (mb_rt = mfctable[hash], nstl = 0; mb_rt; mb_rt = mb_rt->m_next) {
853281494Sandrew
854281494Sandrew	rt = mtod(mb_rt, struct mfc *);
855281494Sandrew	if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
856296266Swma	    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
857296266Swma	    (mb_rt->m_act != NULL)) {
858296266Swma
859296266Swma	    if (nstl++)
860296266Swma		log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %x",
861296266Swma		    "multiple kernel entries",
862296266Swma		    ntohl(mfccp->mfcc_origin.s_addr),
863296266Swma		    ntohl(mfccp->mfcc_mcastgrp.s_addr),
864296266Swma		    mfccp->mfcc_parent, mb_rt->m_act);
865296266Swma
866296266Swma	    if (mrtdebug & DEBUG_MFC)
867296266Swma		log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %x",
868281494Sandrew		    ntohl(mfccp->mfcc_origin.s_addr),
869281494Sandrew		    ntohl(mfccp->mfcc_mcastgrp.s_addr),
870281494Sandrew		    mfccp->mfcc_parent, mb_rt->m_act);
871286366Sandrew
872286366Sandrew	    rt->mfc_origin     = mfccp->mfcc_origin;
873286366Sandrew	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
874281494Sandrew	    rt->mfc_parent     = mfccp->mfcc_parent;
875286366Sandrew	    for (i = 0; i < numvifs; i++)
876286366Sandrew		rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
877286366Sandrew	    /* initialize pkt counters per src-grp */
878281494Sandrew	    rt->mfc_pkt_cnt    = 0;
879281494Sandrew	    rt->mfc_byte_cnt   = 0;
880281494Sandrew	    rt->mfc_wrong_if   = 0;
881281494Sandrew	    rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
882281494Sandrew
883281494Sandrew	    rt->mfc_expire = 0;	/* Don't clean this guy up */
884281494Sandrew	    nexpire[hash]--;
885281494Sandrew
886281494Sandrew	    /* free packets Qed at the end of this entry */
887281494Sandrew	    while (mb_rt->m_act) {
888281494Sandrew		mb_ntry = mb_rt->m_act;
889281494Sandrew		rte = mtod(mb_ntry, struct rtdetq *);
890281494Sandrew/* #ifdef RSVP_ISI */
891281494Sandrew		ip_mdq(rte->m, rte->ifp, rt, -1);
892281494Sandrew/* #endif */
893281494Sandrew		mb_rt->m_act = mb_ntry->m_act;
894281494Sandrew		m_freem(rte->m);
895281494Sandrew#ifdef UPCALL_TIMING
896281494Sandrew		collate(&(rte->t));
897281494Sandrew#endif /* UPCALL_TIMING */
898281494Sandrew		m_free(mb_ntry);
899297446Sandrew	    }
900297446Sandrew	}
901281494Sandrew    }
902281494Sandrew
903281494Sandrew    /*
904281494Sandrew     * It is possible that an entry is being inserted without an upcall
905281494Sandrew     */
906281494Sandrew    if (nstl == 0) {
907281494Sandrew	if (mrtdebug & DEBUG_MFC)
908281494Sandrew	    log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x p %x",
909281494Sandrew		hash, ntohl(mfccp->mfcc_origin.s_addr),
910281494Sandrew		ntohl(mfccp->mfcc_mcastgrp.s_addr),
911281494Sandrew		mfccp->mfcc_parent);
912281494Sandrew
913281494Sandrew	for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
914281494Sandrew
915281494Sandrew	    rt = mtod(mb_rt, struct mfc *);
916281494Sandrew	    if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
917291937Skib		(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
918291937Skib
919291937Skib		rt->mfc_origin     = mfccp->mfcc_origin;
920291937Skib		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
921291937Skib		rt->mfc_parent     = mfccp->mfcc_parent;
922291937Skib		for (i = 0; i < numvifs; i++)
923291937Skib		    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
924291937Skib		/* initialize pkt counters per src-grp */
925291937Skib		rt->mfc_pkt_cnt    = 0;
926291937Skib		rt->mfc_byte_cnt   = 0;
927291937Skib		rt->mfc_wrong_if   = 0;
928287487Sandrew		rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
929287487Sandrew		if (rt->mfc_expire)
930287487Sandrew		    nexpire[hash]--;
931287487Sandrew		rt->mfc_expire	   = 0;
932287487Sandrew	    }
933287487Sandrew	}
934287487Sandrew	if (mb_rt == NULL) {
935287487Sandrew	    /* no upcall, so make a new entry */
936287487Sandrew	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
937287487Sandrew	    if (mb_rt == NULL) {
938287487Sandrew		splx(s);
939287487Sandrew		return ENOBUFS;
940287487Sandrew	    }
941287487Sandrew
942287487Sandrew	    rt = mtod(mb_rt, struct mfc *);
943287487Sandrew
944287487Sandrew	    /* insert new entry at head of hash chain */
945287487Sandrew	    rt->mfc_origin     = mfccp->mfcc_origin;
946287487Sandrew	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
947287487Sandrew	    rt->mfc_parent     = mfccp->mfcc_parent;
948287487Sandrew	    for (i = 0; i < numvifs; i++)
949287487Sandrew		    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
950287487Sandrew	    /* initialize pkt counters per src-grp */
951287487Sandrew	    rt->mfc_pkt_cnt    = 0;
952287487Sandrew	    rt->mfc_byte_cnt   = 0;
953287959Sandrew	    rt->mfc_wrong_if   = 0;
954287959Sandrew	    rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
955287487Sandrew	    rt->mfc_expire     = 0;
956287487Sandrew
957287959Sandrew	    /* link into table */
958287487Sandrew	    mb_rt->m_next  = mfctable[hash];
959287487Sandrew	    mfctable[hash] = mb_rt;
960287487Sandrew	    mb_rt->m_act = NULL;
961287487Sandrew	}
962287487Sandrew    }
963287487Sandrew    splx(s);
964287487Sandrew    return 0;
965287487Sandrew}
966287487Sandrew
967287487Sandrew#ifdef UPCALL_TIMING
968287487Sandrew/*
969287487Sandrew * collect delay statistics on the upcalls
970287487Sandrew */
971287487Sandrewstatic void collate(t)
972287487Sandrewregister struct timeval *t;
973287487Sandrew{
974287487Sandrew    register u_long d;
975287487Sandrew    register struct timeval tp;
976287487Sandrew    register u_long delta;
977287487Sandrew
978287487Sandrew    GET_TIME(tp);
979287487Sandrew
980287487Sandrew    if (TV_LT(*t, tp))
981287487Sandrew    {
982287487Sandrew	TV_DELTA(tp, *t, delta);
983287487Sandrew
984287487Sandrew	d = delta >> 10;
985287487Sandrew	if (d > 50)
986287487Sandrew	    d = 50;
987287487Sandrew
988287487Sandrew	++upcall_data[d];
989287487Sandrew    }
990287487Sandrew}
991287487Sandrew#endif /* UPCALL_TIMING */
992287487Sandrew
993287487Sandrew/*
994287487Sandrew * Delete an mfc entry
995287487Sandrew */
996287487Sandrewstatic int
997287487Sandrewdel_mfc(mfccp)
998287487Sandrew    struct mfcctl *mfccp;
999287487Sandrew{
1000287487Sandrew    struct in_addr 	origin;
1001287487Sandrew    struct in_addr 	mcastgrp;
1002287487Sandrew    struct mfc 		*rt;
1003287487Sandrew    struct mbuf 	*mb_rt;
1004287487Sandrew    struct mbuf 	**nptr;
1005287487Sandrew    u_long 		hash;
1006287487Sandrew    int s, i;
1007287487Sandrew
1008287487Sandrew    origin = mfccp->mfcc_origin;
1009287487Sandrew    mcastgrp = mfccp->mfcc_mcastgrp;
1010287487Sandrew    hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
1011287487Sandrew
1012287487Sandrew    if (mrtdebug & DEBUG_MFC)
1013287487Sandrew	log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x",
1014287487Sandrew	    ntohl(origin.s_addr), ntohl(mcastgrp.s_addr));
1015287487Sandrew
1016287487Sandrew    s = splnet();
1017
1018    nptr = &mfctable[hash];
1019    while ((mb_rt = *nptr) != NULL) {
1020        rt = mtod(mb_rt, struct mfc *);
1021	if (origin.s_addr == rt->mfc_origin.s_addr &&
1022	    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
1023	    mb_rt->m_act == NULL)
1024	    break;
1025
1026	nptr = &mb_rt->m_next;
1027    }
1028    if (mb_rt == NULL) {
1029	splx(s);
1030	return EADDRNOTAVAIL;
1031    }
1032
1033    MFREE(mb_rt, *nptr);
1034
1035    splx(s);
1036
1037    return 0;
1038}
1039
1040/*
1041 * Send a message to mrouted on the multicast routing socket
1042 */
1043static int
1044socket_send(s, mm, src)
1045	struct socket *s;
1046	struct mbuf *mm;
1047	struct sockaddr_in *src;
1048{
1049	if (s) {
1050		if (sbappendaddr(&s->so_rcv,
1051				 (struct sockaddr *)src,
1052				 mm, (struct mbuf *)0) != 0) {
1053			sorwakeup(s);
1054			return 0;
1055		}
1056	}
1057	m_freem(mm);
1058	return -1;
1059}
1060
1061/*
1062 * IP multicast forwarding function. This function assumes that the packet
1063 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
1064 * pointed to by "ifp", and the packet is to be relayed to other networks
1065 * that have members of the packet's destination IP multicast group.
1066 *
1067 * The packet is returned unscathed to the caller, unless it is
1068 * erroneous, in which case a non-zero return value tells the caller to
1069 * discard it.
1070 */
1071
1072#define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
1073#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
1074
1075int
1076X_ip_mforward(ip, ifp, m, imo)
1077    register struct ip *ip;
1078    struct ifnet *ifp;
1079    struct mbuf *m;
1080    struct ip_moptions *imo;
1081{
1082    register struct mfc *rt = 0; /* XXX uninit warning */
1083    register u_char *ipoptions;
1084    static struct sockproto	k_igmpproto 	= { AF_INET, IPPROTO_IGMP };
1085    static struct sockaddr_in 	k_igmpsrc	= { sizeof k_igmpsrc, AF_INET };
1086    static int srctun = 0;
1087    register struct mbuf *mm;
1088    int s;
1089    vifi_t vifi;
1090    struct vif *vifp;
1091
1092    if (mrtdebug & DEBUG_FORWARD)
1093	log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x",
1094	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
1095
1096    if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
1097	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
1098	/*
1099	 * Packet arrived via a physical interface or
1100	 * an encapsulated tunnel.
1101	 */
1102    } else {
1103	/*
1104	 * Packet arrived through a source-route tunnel.
1105	 * Source-route tunnels are no longer supported.
1106	 */
1107	if ((srctun++ % 1000) == 0)
1108	    log(LOG_ERR, "ip_mforward: received source-routed packet from %x",
1109		ntohl(ip->ip_src.s_addr));
1110
1111	return 1;
1112    }
1113
1114    if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
1115	if (ip->ip_ttl < 255)
1116		ip->ip_ttl++;	/* compensate for -1 in *_send routines */
1117	if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1118	    vifp = viftable + vifi;
1119	    printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n",
1120		ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi,
1121		(vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1122		vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
1123	}
1124	return (ip_mdq(m, ifp, rt, vifi));
1125    }
1126    if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1127	printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
1128	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr));
1129    }
1130
1131    /*
1132     * Don't forward a packet with time-to-live of zero or one,
1133     * or a packet destined to a local-only group.
1134     */
1135    if (ip->ip_ttl <= 1 ||
1136	ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
1137	return 0;
1138
1139    /*
1140     * Determine forwarding vifs from the forwarding cache table
1141     */
1142    s = splnet();
1143    MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1144
1145    /* Entry exists, so forward if necessary */
1146    if (rt != NULL) {
1147	splx(s);
1148	return (ip_mdq(m, ifp, rt, -1));
1149    } else {
1150	/*
1151	 * If we don't have a route for packet's origin,
1152	 * Make a copy of the packet &
1153	 * send message to routing daemon
1154	 */
1155
1156	register struct mbuf *mb_rt;
1157	register struct mbuf *mb_ntry;
1158	register struct mbuf *mb0;
1159	register struct rtdetq *rte;
1160	register struct mbuf *rte_m;
1161	register u_long hash;
1162	register int npkts;
1163#ifdef UPCALL_TIMING
1164	struct timeval tp;
1165
1166	GET_TIME(tp);
1167#endif
1168
1169	mrtstat.mrts_no_route++;
1170	if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1171	    log(LOG_DEBUG, "ip_mforward: no rte s %x g %x",
1172		ntohl(ip->ip_src.s_addr),
1173		ntohl(ip->ip_dst.s_addr));
1174
1175	/*
1176	 * Allocate mbufs early so that we don't do extra work if we are
1177	 * just going to fail anyway.
1178	 */
1179	MGET(mb_ntry, M_DONTWAIT, MT_DATA);
1180	if (mb_ntry == NULL) {
1181	    splx(s);
1182	    return ENOBUFS;
1183	}
1184	mb0 = m_copy(m, 0, M_COPYALL);
1185	if (mb0 == NULL) {
1186	    m_free(mb_ntry);
1187	    splx(s);
1188	    return ENOBUFS;
1189	}
1190
1191	/* is there an upcall waiting for this packet? */
1192	hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1193	for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) {
1194	    rt = mtod(mb_rt, struct mfc *);
1195	    if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
1196		(ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1197		(mb_rt->m_act != NULL))
1198		break;
1199	}
1200
1201	if (mb_rt == NULL) {
1202	    int hlen = ip->ip_hl << 2;
1203	    int i;
1204	    struct igmpmsg *im;
1205
1206	    /* no upcall, so make a new entry */
1207	    MGET(mb_rt, M_DONTWAIT, MT_MRTABLE);
1208	    if (mb_rt == NULL) {
1209		m_free(mb_ntry);
1210		m_freem(mb0);
1211		splx(s);
1212		return ENOBUFS;
1213	    }
1214	    /* Make a copy of the header to send to the user level process */
1215	    mm = m_copy(m, 0, hlen);
1216	    if (mm && (M_HASCL(mm) || mm->m_len < hlen))
1217		mm = m_pullup(mm, hlen);
1218	    if (mm == NULL) {
1219		m_free(mb_ntry);
1220		m_freem(mb0);
1221		m_free(mb_rt);
1222		splx(s);
1223		return ENOBUFS;
1224	    }
1225
1226	    /*
1227	     * Send message to routing daemon to install
1228	     * a route into the kernel table
1229	     */
1230	    k_igmpsrc.sin_addr = ip->ip_src;
1231
1232	    im = mtod(mm, struct igmpmsg *);
1233	    im->im_msgtype	= IGMPMSG_NOCACHE;
1234	    im->im_mbz		= 0;
1235
1236	    mrtstat.mrts_upcalls++;
1237
1238	    if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
1239		log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full");
1240		++mrtstat.mrts_upq_sockfull;
1241		m_free(mb_ntry);
1242		m_freem(mb0);
1243		m_free(mb_rt);
1244		splx(s);
1245		return ENOBUFS;
1246	    }
1247
1248	    rt = mtod(mb_rt, struct mfc *);
1249
1250	    /* insert new entry at head of hash chain */
1251	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
1252	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
1253	    rt->mfc_expire	      = UPCALL_EXPIRE;
1254	    nexpire[hash]++;
1255	    for (i = 0; i < numvifs; i++)
1256		rt->mfc_ttls[i] = 0;
1257	    rt->mfc_parent = -1;
1258
1259	    /* link into table */
1260	    mb_rt->m_next  = mfctable[hash];
1261	    mfctable[hash] = mb_rt;
1262	    mb_rt->m_act = NULL;
1263
1264	    rte_m = mb_rt;
1265	} else {
1266	    /* determine if q has overflowed */
1267	    for (rte_m = mb_rt, npkts = 0; rte_m->m_act; rte_m = rte_m->m_act)
1268		npkts++;
1269
1270	    if (npkts > MAX_UPQ) {
1271		mrtstat.mrts_upq_ovflw++;
1272		m_free(mb_ntry);
1273		m_freem(mb0);
1274		splx(s);
1275		return 0;
1276	    }
1277	}
1278
1279	mb_ntry->m_act = NULL;
1280	rte = mtod(mb_ntry, struct rtdetq *);
1281
1282	rte->m 			= mb0;
1283	rte->ifp 		= ifp;
1284#ifdef UPCALL_TIMING
1285	rte->t			= tp;
1286#endif
1287
1288	/* Add this entry to the end of the queue */
1289	rte_m->m_act		= mb_ntry;
1290
1291	splx(s);
1292
1293	return 0;
1294    }
1295}
1296
1297#ifndef MROUTE_LKM
1298int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
1299		   struct ip_moptions *) = X_ip_mforward;
1300#endif
1301
1302/*
1303 * Clean up the cache entry if upcall is not serviced
1304 */
1305static void
1306expire_upcalls(void *unused)
1307{
1308    struct mbuf *mb_rt, *m, **nptr;
1309    struct rtdetq *rte;
1310    struct mfc *mfc;
1311    int i;
1312    int s;
1313
1314    s = splnet();
1315    for (i = 0; i < MFCTBLSIZ; i++) {
1316	if (nexpire[i] == 0)
1317	    continue;
1318	nptr = &mfctable[i];
1319	for (mb_rt = *nptr; mb_rt != NULL; mb_rt = *nptr) {
1320	    mfc = mtod(mb_rt, struct mfc *);
1321
1322	    /*
1323	     * Skip real cache entries
1324	     * Make sure it wasn't marked to not expire (shouldn't happen)
1325	     * If it expires now
1326	     */
1327	    if (mb_rt->m_act != NULL &&
1328	        mfc->mfc_expire != 0 &&
1329		--mfc->mfc_expire == 0) {
1330		if (mrtdebug & DEBUG_EXPIRE)
1331		    log(LOG_DEBUG, "expire_upcalls: expiring (%x %x)",
1332			ntohl(mfc->mfc_origin.s_addr),
1333			ntohl(mfc->mfc_mcastgrp.s_addr));
1334		/*
1335		 * drop all the packets
1336		 * free the mbuf with the pkt, if, timing info
1337		 */
1338		while (mb_rt->m_act) {
1339		    m = mb_rt->m_act;
1340		    mb_rt->m_act = m->m_act;
1341
1342		    rte = mtod(m, struct rtdetq *);
1343		    m_freem(rte->m);
1344		    m_free(m);
1345		}
1346		++mrtstat.mrts_cache_cleanups;
1347		nexpire[i]--;
1348
1349		MFREE(mb_rt, *nptr);
1350	    } else {
1351		nptr = &mb_rt->m_next;
1352	    }
1353	}
1354    }
1355    splx(s);
1356    timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
1357}
1358
1359/*
1360 * Packet forwarding routine once entry in the cache is made
1361 */
1362static int
1363ip_mdq(m, ifp, rt, xmt_vif)
1364    register struct mbuf *m;
1365    register struct ifnet *ifp;
1366    register struct mfc *rt;
1367    register vifi_t xmt_vif;
1368{
1369    register struct ip  *ip = mtod(m, struct ip *);
1370    register vifi_t vifi;
1371    register struct vif *vifp;
1372    register struct mbuf *tmp;
1373    register int plen = ntohs(ip->ip_len);
1374
1375/*
1376 * Macro to send packet on vif.  Since RSVP packets don't get counted on
1377 * input, they shouldn't get counted on output, so statistics keeping is
1378 * seperate.
1379 */
1380#define MC_SEND(ip,vifp,m) {                             \
1381                if ((vifp)->v_flags & VIFF_TUNNEL)  	 \
1382                    encap_send((ip), (vifp), (m));       \
1383                else                                     \
1384                    phyint_send((ip), (vifp), (m));      \
1385}
1386
1387    /*
1388     * If xmt_vif is not -1, send on only the requested vif.
1389     *
1390     * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
1391     */
1392    if (xmt_vif < numvifs) {
1393	MC_SEND(ip, viftable + xmt_vif, m);
1394	return 1;
1395    }
1396
1397    /*
1398     * Don't forward if it didn't arrive from the parent vif for its origin.
1399     */
1400    vifi = rt->mfc_parent;
1401    if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1402	/* came in the wrong interface */
1403	if (mrtdebug & DEBUG_FORWARD)
1404	    log(LOG_DEBUG, "wrong if: ifp %x vifi %d vififp %x",
1405		ifp, vifi, viftable[vifi].v_ifp);
1406	++mrtstat.mrts_wrong_if;
1407	++rt->mfc_wrong_if;
1408	/*
1409	 * If we are doing PIM assert processing, and we are forwarding
1410	 * packets on this interface, and it is a broadcast medium
1411	 * interface (and not a tunnel), send a message to the routing daemon.
1412	 */
1413	if (pim_assert && rt->mfc_ttls[vifi] &&
1414		(ifp->if_flags & IFF_BROADCAST) &&
1415		!(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1416	    struct sockaddr_in k_igmpsrc;
1417	    struct mbuf *mm;
1418	    struct igmpmsg *im;
1419	    int hlen = ip->ip_hl << 2;
1420	    struct timeval now;
1421	    register u_long delta;
1422
1423	    GET_TIME(now);
1424
1425	    TV_DELTA(rt->mfc_last_assert, now, delta);
1426
1427	    if (delta > ASSERT_MSG_TIME) {
1428		mm = m_copy(m, 0, hlen);
1429		if (mm && (M_HASCL(mm) || mm->m_len < hlen))
1430		    mm = m_pullup(mm, hlen);
1431		if (mm == NULL) {
1432		    return ENOBUFS;
1433		}
1434
1435		rt->mfc_last_assert = now;
1436
1437		im = mtod(mm, struct igmpmsg *);
1438		im->im_msgtype	= IGMPMSG_WRONGVIF;
1439		im->im_mbz		= 0;
1440		im->im_vif		= vifi;
1441
1442		k_igmpsrc.sin_addr = im->im_src;
1443
1444		socket_send(ip_mrouter, mm, &k_igmpsrc);
1445	    }
1446	}
1447	return 0;
1448    }
1449
1450    /* If I sourced this packet, it counts as output, else it was input. */
1451    if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
1452	viftable[vifi].v_pkt_out++;
1453	viftable[vifi].v_bytes_out += plen;
1454    } else {
1455	viftable[vifi].v_pkt_in++;
1456	viftable[vifi].v_bytes_in += plen;
1457    }
1458    rt->mfc_pkt_cnt++;
1459    rt->mfc_byte_cnt += plen;
1460
1461    /*
1462     * For each vif, decide if a copy of the packet should be forwarded.
1463     * Forward if:
1464     *		- the ttl exceeds the vif's threshold
1465     *		- there are group members downstream on interface
1466     */
1467    for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1468	if ((rt->mfc_ttls[vifi] > 0) &&
1469	    (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1470	    vifp->v_pkt_out++;
1471	    vifp->v_bytes_out += plen;
1472	    MC_SEND(ip, vifp, m);
1473	}
1474
1475    return 0;
1476}
1477
1478/*
1479 * check if a vif number is legal/ok. This is used by ip_output, to export
1480 * numvifs there,
1481 */
1482int
1483X_legal_vif_num(vif)
1484    int vif;
1485{
1486    if (vif >= 0 && vif < numvifs)
1487       return(1);
1488    else
1489       return(0);
1490}
1491
1492#ifndef MROUTE_LKM
1493int (*legal_vif_num)(int) = X_legal_vif_num;
1494#endif
1495
1496/*
1497 * Return the local address used by this vif
1498 */
1499u_long
1500X_ip_mcast_src(vifi)
1501    int vifi;
1502{
1503    if (vifi >= 0 && vifi < numvifs)
1504	return viftable[vifi].v_lcl_addr.s_addr;
1505    else
1506	return INADDR_ANY;
1507}
1508
1509#ifndef MROUTE_LKM
1510u_long (*ip_mcast_src)(int) = X_ip_mcast_src;
1511#endif
1512
1513static void
1514phyint_send(ip, vifp, m)
1515    struct ip *ip;
1516    struct vif *vifp;
1517    struct mbuf *m;
1518{
1519    register struct mbuf *mb_copy;
1520    register int hlen = ip->ip_hl << 2;
1521    register struct ip_moptions *imo;
1522
1523    /*
1524     * Make a new reference to the packet; make sure that
1525     * the IP header is actually copied, not just referenced,
1526     * so that ip_output() only scribbles on the copy.
1527     */
1528    mb_copy = m_copy(m, 0, M_COPYALL);
1529    if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
1530	mb_copy = m_pullup(mb_copy, hlen);
1531    if (mb_copy == NULL)
1532	return;
1533
1534    MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT);
1535    if (imo == NULL) {
1536	m_freem(mb_copy);
1537	return;
1538    }
1539
1540    imo->imo_multicast_ifp  = vifp->v_ifp;
1541    imo->imo_multicast_ttl  = ip->ip_ttl - 1;
1542    imo->imo_multicast_loop = 1;
1543    imo->imo_multicast_vif  = -1;
1544
1545    if (vifp->v_rate_limit <= 0)
1546	tbf_send_packet(vifp, mb_copy, imo);
1547    else
1548	tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len,
1549		    imo);
1550}
1551
1552static void
1553encap_send(ip, vifp, m)
1554    register struct ip *ip;
1555    register struct vif *vifp;
1556    register struct mbuf *m;
1557{
1558    register struct mbuf *mb_copy;
1559    register struct ip *ip_copy;
1560    int hlen = ip->ip_hl << 2;
1561    register int i, len = ip->ip_len;
1562
1563    /*
1564     * copy the old packet & pullup it's IP header into the
1565     * new mbuf so we can modify it.  Try to fill the new
1566     * mbuf since if we don't the ethernet driver will.
1567     */
1568    MGET(mb_copy, M_DONTWAIT, MT_DATA);
1569    if (mb_copy == NULL)
1570	return;
1571    mb_copy->m_data += 16;
1572    mb_copy->m_len = sizeof(multicast_encap_iphdr);
1573
1574    if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1575	m_freem(mb_copy);
1576	return;
1577    }
1578    i = MHLEN - M_LEADINGSPACE(mb_copy);
1579    if (i > len)
1580	i = len;
1581    mb_copy = m_pullup(mb_copy, i);
1582    if (mb_copy == NULL)
1583	return;
1584    mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
1585
1586    /*
1587     * fill in the encapsulating IP header.
1588     */
1589    ip_copy = mtod(mb_copy, struct ip *);
1590    *ip_copy = multicast_encap_iphdr;
1591    ip_copy->ip_id = htons(ip_id++);
1592    ip_copy->ip_len += len;
1593    ip_copy->ip_src = vifp->v_lcl_addr;
1594    ip_copy->ip_dst = vifp->v_rmt_addr;
1595
1596    /*
1597     * turn the encapsulated IP header back into a valid one.
1598     */
1599    ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1600    --ip->ip_ttl;
1601    HTONS(ip->ip_len);
1602    HTONS(ip->ip_off);
1603    ip->ip_sum = 0;
1604#if defined(LBL) && !defined(ultrix)
1605    ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0);
1606#else
1607    mb_copy->m_data += sizeof(multicast_encap_iphdr);
1608    ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1609    mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1610#endif
1611
1612    if (vifp->v_rate_limit <= 0)
1613	tbf_send_packet(vifp, mb_copy, 0);
1614    else
1615	tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0);
1616}
1617
1618/*
1619 * De-encapsulate a packet and feed it back through ip input (this
1620 * routine is called whenever IP gets a packet with proto type
1621 * ENCAP_PROTO and a local destination address).
1622 */
1623void
1624#ifdef MROUTE_LKM
1625X_ipip_input(m)
1626#else
1627ipip_input(m, iphlen)
1628#endif
1629	register struct mbuf *m;
1630	int iphlen;
1631{
1632    struct ifnet *ifp = m->m_pkthdr.rcvif;
1633    register struct ip *ip = mtod(m, struct ip *);
1634    register int hlen = ip->ip_hl << 2;
1635    register int s;
1636    register struct ifqueue *ifq;
1637    register struct vif *vifp;
1638
1639    if (!have_encap_tunnel) {
1640	    rip_input(m);
1641	    return;
1642    }
1643    /*
1644     * dump the packet if it's not to a multicast destination or if
1645     * we don't have an encapsulating tunnel with the source.
1646     * Note:  This code assumes that the remote site IP address
1647     * uniquely identifies the tunnel (i.e., that this site has
1648     * at most one tunnel with the remote site).
1649     */
1650    if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
1651	++mrtstat.mrts_bad_tunnel;
1652	m_freem(m);
1653	return;
1654    }
1655    if (ip->ip_src.s_addr != last_encap_src) {
1656	register struct vif *vife;
1657
1658	vifp = viftable;
1659	vife = vifp + numvifs;
1660	last_encap_src = ip->ip_src.s_addr;
1661	last_encap_vif = 0;
1662	for ( ; vifp < vife; ++vifp)
1663	    if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
1664		if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
1665		    == VIFF_TUNNEL)
1666		    last_encap_vif = vifp;
1667		break;
1668	    }
1669    }
1670    if ((vifp = last_encap_vif) == 0) {
1671	last_encap_src = 0;
1672	mrtstat.mrts_cant_tunnel++; /*XXX*/
1673	m_freem(m);
1674	if (mrtdebug)
1675          log(LOG_DEBUG, "ip_mforward: no tunnel with %x",
1676		ntohl(ip->ip_src.s_addr));
1677	return;
1678    }
1679    ifp = vifp->v_ifp;
1680
1681    if (hlen > IP_HDR_LEN)
1682      ip_stripoptions(m, (struct mbuf *) 0);
1683    m->m_data += IP_HDR_LEN;
1684    m->m_len -= IP_HDR_LEN;
1685    m->m_pkthdr.len -= IP_HDR_LEN;
1686    m->m_pkthdr.rcvif = ifp;
1687
1688    ifq = &ipintrq;
1689    s = splimp();
1690    if (IF_QFULL(ifq)) {
1691	IF_DROP(ifq);
1692	m_freem(m);
1693    } else {
1694	IF_ENQUEUE(ifq, m);
1695	/*
1696	 * normally we would need a "schednetisr(NETISR_IP)"
1697	 * here but we were called by ip_input and it is going
1698	 * to loop back & try to dequeue the packet we just
1699	 * queued as soon as we return so we avoid the
1700	 * unnecessary software interrrupt.
1701	 */
1702    }
1703    splx(s);
1704}
1705
1706/*
1707 * Token bucket filter module
1708 */
1709static void
1710tbf_control(vifp, m, ip, p_len, imo)
1711	register struct vif *vifp;
1712	register struct mbuf *m;
1713	register struct ip *ip;
1714	register u_long p_len;
1715	struct ip_moptions *imo;
1716{
1717    tbf_update_tokens(vifp);
1718
1719    /* if there are enough tokens,
1720     * and the queue is empty,
1721     * send this packet out
1722     */
1723
1724    if (vifp->v_tbf->q_len == 0) {
1725	if (p_len <= vifp->v_tbf->n_tok) {
1726	    vifp->v_tbf->n_tok -= p_len;
1727	    tbf_send_packet(vifp, m, imo);
1728	} else if (p_len > MAX_BKT_SIZE) {
1729	    /* drop if packet is too large */
1730	    mrtstat.mrts_pkt2large++;
1731	    m_freem(m);
1732	    return;
1733	} else {
1734	    /* queue packet and timeout till later */
1735	    tbf_queue(vifp, m, ip, imo);
1736	    timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1737	}
1738    } else if (vifp->v_tbf->q_len < MAXQSIZE) {
1739	/* finite queue length, so queue pkts and process queue */
1740	tbf_queue(vifp, m, ip, imo);
1741	tbf_process_q(vifp);
1742    } else {
1743	/* queue length too much, try to dq and queue and process */
1744	if (!tbf_dq_sel(vifp, ip)) {
1745	    mrtstat.mrts_q_overflow++;
1746	    m_freem(m);
1747	    return;
1748	} else {
1749	    tbf_queue(vifp, m, ip, imo);
1750	    tbf_process_q(vifp);
1751	}
1752    }
1753    return;
1754}
1755
1756/*
1757 * adds a packet to the queue at the interface
1758 */
1759static void
1760tbf_queue(vifp, m, ip, imo)
1761	register struct vif *vifp;
1762	register struct mbuf *m;
1763	register struct ip *ip;
1764	struct ip_moptions *imo;
1765{
1766    register u_long ql;
1767    register int index = (vifp - viftable);
1768    register int s = splnet();
1769
1770    ql = vifp->v_tbf->q_len;
1771
1772    qtable[index][ql].pkt_m = m;
1773    qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len;
1774    qtable[index][ql].pkt_ip = ip;
1775    qtable[index][ql].pkt_imo = imo;
1776
1777    vifp->v_tbf->q_len++;
1778    splx(s);
1779}
1780
1781
1782/*
1783 * processes the queue at the interface
1784 */
1785static void
1786tbf_process_q(vifp)
1787    register struct vif *vifp;
1788{
1789    register struct pkt_queue pkt_1;
1790    register int index = (vifp - viftable);
1791    register int s = splnet();
1792
1793    /* loop through the queue at the interface and send as many packets
1794     * as possible
1795     */
1796    while (vifp->v_tbf->q_len > 0) {
1797	/* locate the first packet */
1798	pkt_1.pkt_len = (qtable[index][0]).pkt_len;
1799	pkt_1.pkt_m   = (qtable[index][0]).pkt_m;
1800	pkt_1.pkt_ip   = (qtable[index][0]).pkt_ip;
1801	pkt_1.pkt_imo = (qtable[index][0]).pkt_imo;
1802
1803	/* determine if the packet can be sent */
1804	if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) {
1805	    /* if so,
1806	     * reduce no of tokens, dequeue the queue,
1807	     * send the packet.
1808	     */
1809	    vifp->v_tbf->n_tok -= pkt_1.pkt_len;
1810
1811	    tbf_dequeue(vifp, 0);
1812
1813	    tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo);
1814
1815	} else break;
1816    }
1817    splx(s);
1818}
1819
1820/*
1821 * removes the jth packet from the queue at the interface
1822 */
1823static void
1824tbf_dequeue(vifp,j)
1825    register struct vif *vifp;
1826    register int j;
1827{
1828    register u_long index = vifp - viftable;
1829    register int i;
1830
1831    for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) {
1832	qtable[index][i-1].pkt_m   = qtable[index][i].pkt_m;
1833	qtable[index][i-1].pkt_len = qtable[index][i].pkt_len;
1834	qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip;
1835	qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo;
1836    }
1837    qtable[index][i-1].pkt_m = NULL;
1838    qtable[index][i-1].pkt_len = NULL;
1839    qtable[index][i-1].pkt_ip = NULL;
1840    qtable[index][i-1].pkt_imo = NULL;
1841
1842    vifp->v_tbf->q_len--;
1843
1844    if (tbfdebug > 1)
1845	log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1);
1846}
1847
1848static void
1849tbf_reprocess_q(xvifp)
1850	void *xvifp;
1851{
1852    register struct vif *vifp = xvifp;
1853    if (ip_mrouter == NULL)
1854	return;
1855
1856    tbf_update_tokens(vifp);
1857
1858    tbf_process_q(vifp);
1859
1860    if (vifp->v_tbf->q_len)
1861	timeout(tbf_reprocess_q, (caddr_t)vifp, 1);
1862}
1863
1864/* function that will selectively discard a member of the queue
1865 * based on the precedence value and the priority obtained through
1866 * a lookup table - not yet implemented accurately!
1867 */
1868static int
1869tbf_dq_sel(vifp, ip)
1870    register struct vif *vifp;
1871    register struct ip *ip;
1872{
1873    register int i;
1874    register int s = splnet();
1875    register u_int p;
1876
1877    p = priority(vifp, ip);
1878
1879    for(i=vifp->v_tbf->q_len-1;i >= 0;i--) {
1880	if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) {
1881	    m_freem(qtable[vifp-viftable][i].pkt_m);
1882	    tbf_dequeue(vifp,i);
1883	    splx(s);
1884	    mrtstat.mrts_drop_sel++;
1885	    return(1);
1886	}
1887    }
1888    splx(s);
1889    return(0);
1890}
1891
1892static void
1893tbf_send_packet(vifp, m, imo)
1894    register struct vif *vifp;
1895    register struct mbuf *m;
1896    struct ip_moptions *imo;
1897{
1898    int error;
1899    int s = splnet();
1900
1901    if (vifp->v_flags & VIFF_TUNNEL) {
1902	/* If tunnel options */
1903	ip_output(m, (struct mbuf *)0, (struct route *)0,
1904		  IP_FORWARDING, imo);
1905    } else {
1906	/* if physical interface option, extract the options and then send */
1907	error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1908			  IP_FORWARDING, imo);
1909	FREE(imo, M_IPMOPTS);
1910
1911	if (mrtdebug & DEBUG_XMIT)
1912	    log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error);
1913    }
1914    splx(s);
1915}
1916
1917/* determine the current time and then
1918 * the elapsed time (between the last time and time now)
1919 * in milliseconds & update the no. of tokens in the bucket
1920 */
1921static void
1922tbf_update_tokens(vifp)
1923    register struct vif *vifp;
1924{
1925    struct timeval tp;
1926    register u_long t;
1927    register u_long elapsed;
1928    register int s = splnet();
1929
1930    GET_TIME(tp);
1931
1932    t = tp.tv_sec*1000 + tp.tv_usec/1000;
1933
1934    elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8;
1935    vifp->v_tbf->n_tok += elapsed;
1936    vifp->v_tbf->last_pkt_t = t;
1937
1938    if (vifp->v_tbf->n_tok > MAX_BKT_SIZE)
1939	vifp->v_tbf->n_tok = MAX_BKT_SIZE;
1940
1941    splx(s);
1942}
1943
1944static int
1945priority(vifp, ip)
1946    register struct vif *vifp;
1947    register struct ip *ip;
1948{
1949    register int prio;
1950
1951    /* temporary hack; may add general packet classifier some day */
1952
1953    /*
1954     * The UDP port space is divided up into four priority ranges:
1955     * [0, 16384)     : unclassified - lowest priority
1956     * [16384, 32768) : audio - highest priority
1957     * [32768, 49152) : whiteboard - medium priority
1958     * [49152, 65536) : video - low priority
1959     */
1960    if (ip->ip_p == IPPROTO_UDP) {
1961	struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1962	switch (ntohs(udp->uh_dport) & 0xc000) {
1963	    case 0x4000:
1964		prio = 70;
1965		break;
1966	    case 0x8000:
1967		prio = 60;
1968		break;
1969	    case 0xc000:
1970		prio = 55;
1971		break;
1972	    default:
1973		prio = 50;
1974		break;
1975	}
1976	if (tbfdebug > 1)
1977		log(LOG_DEBUG, "port %x prio%d", ntohs(udp->uh_dport), prio);
1978    } else {
1979	    prio = 50;
1980    }
1981    return prio;
1982}
1983
1984/*
1985 * End of token bucket filter modifications
1986 */
1987
1988int
1989ip_rsvp_vif_init(so, m)
1990    struct socket *so;
1991    struct mbuf *m;
1992{
1993    int i;
1994    register int s;
1995
1996    if (rsvpdebug)
1997	printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1998	       so->so_type, so->so_proto->pr_protocol);
1999
2000    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2001	return EOPNOTSUPP;
2002
2003    /* Check mbuf. */
2004    if (m == NULL || m->m_len != sizeof(int)) {
2005	return EINVAL;
2006    }
2007    i = *(mtod(m, int *));
2008
2009    if (rsvpdebug)
2010	printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on);
2011
2012    s = splnet();
2013
2014    /* Check vif. */
2015    if (!legal_vif_num(i)) {
2016	splx(s);
2017	return EADDRNOTAVAIL;
2018    }
2019
2020    /* Check if socket is available. */
2021    if (viftable[i].v_rsvpd != NULL) {
2022	splx(s);
2023	return EADDRINUSE;
2024    }
2025
2026    viftable[i].v_rsvpd = so;
2027    /* This may seem silly, but we need to be sure we don't over-increment
2028     * the RSVP counter, in case something slips up.
2029     */
2030    if (!viftable[i].v_rsvp_on) {
2031	viftable[i].v_rsvp_on = 1;
2032	rsvp_on++;
2033    }
2034
2035    splx(s);
2036    return 0;
2037}
2038
2039int
2040ip_rsvp_vif_done(so, m)
2041    struct socket *so;
2042    struct mbuf *m;
2043{
2044	int i;
2045	register int s;
2046
2047    if (rsvpdebug)
2048	printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
2049	       so->so_type, so->so_proto->pr_protocol);
2050
2051    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2052	return EOPNOTSUPP;
2053
2054    /* Check mbuf. */
2055    if (m == NULL || m->m_len != sizeof(int)) {
2056	    return EINVAL;
2057    }
2058    i = *(mtod(m, int *));
2059
2060    s = splnet();
2061
2062    /* Check vif. */
2063    if (!legal_vif_num(i)) {
2064	splx(s);
2065        return EADDRNOTAVAIL;
2066    }
2067
2068    if (rsvpdebug)
2069	printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",
2070	       viftable[i].v_rsvpd, so);
2071
2072    viftable[i].v_rsvpd = NULL;
2073    /* This may seem silly, but we need to be sure we don't over-decrement
2074     * the RSVP counter, in case something slips up.
2075     */
2076    if (viftable[i].v_rsvp_on) {
2077	viftable[i].v_rsvp_on = 0;
2078	rsvp_on--;
2079    }
2080
2081    splx(s);
2082    return 0;
2083}
2084
2085void
2086ip_rsvp_force_done(so)
2087    struct socket *so;
2088{
2089    int vifi;
2090    register int s;
2091
2092    /* Don't bother if it is not the right type of socket. */
2093    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2094	return;
2095
2096    s = splnet();
2097
2098    /* The socket may be attached to more than one vif...this
2099     * is perfectly legal.
2100     */
2101    for (vifi = 0; vifi < numvifs; vifi++) {
2102	if (viftable[vifi].v_rsvpd == so) {
2103	    viftable[vifi].v_rsvpd = NULL;
2104	    /* This may seem silly, but we need to be sure we don't
2105	     * over-decrement the RSVP counter, in case something slips up.
2106	     */
2107	    if (viftable[vifi].v_rsvp_on) {
2108		viftable[vifi].v_rsvp_on = 0;
2109		rsvp_on--;
2110	    }
2111	}
2112    }
2113
2114    splx(s);
2115    return;
2116}
2117
2118void
2119rsvp_input(m, ifp)
2120    struct mbuf *m;
2121    struct ifnet *ifp;
2122{
2123    int vifi;
2124    register struct ip *ip = mtod(m, struct ip *);
2125    static struct sockaddr_in rsvp_src = { AF_INET };
2126    register int s;
2127
2128    if (rsvpdebug)
2129	printf("rsvp_input: rsvp_on %d\n",rsvp_on);
2130
2131    /* Can still get packets with rsvp_on = 0 if there is a local member
2132     * of the group to which the RSVP packet is addressed.  But in this
2133     * case we want to throw the packet away.
2134     */
2135    if (!rsvp_on) {
2136	m_freem(m);
2137	return;
2138    }
2139
2140    /* If the old-style non-vif-associated socket is set, then use
2141     * it and ignore the new ones.
2142     */
2143    if (ip_rsvpd != NULL) {
2144	if (rsvpdebug)
2145	    printf("rsvp_input: Sending packet up old-style socket\n");
2146	rip_input(m);
2147	return;
2148    }
2149
2150    s = splnet();
2151
2152    if (rsvpdebug)
2153	printf("rsvp_input: check vifs\n");
2154
2155    /* Find which vif the packet arrived on. */
2156    for (vifi = 0; vifi < numvifs; vifi++) {
2157	if (viftable[vifi].v_ifp == ifp)
2158 		break;
2159 	}
2160
2161    if (vifi == numvifs) {
2162	/* Can't find vif packet arrived on. Drop packet. */
2163	if (rsvpdebug)
2164	    printf("rsvp_input: Can't find vif for packet...dropping it.\n");
2165	m_freem(m);
2166	splx(s);
2167	return;
2168    }
2169
2170    if (rsvpdebug)
2171	printf("rsvp_input: check socket\n");
2172
2173    if (viftable[vifi].v_rsvpd == NULL) {
2174	/* drop packet, since there is no specific socket for this
2175	 * interface */
2176	    if (rsvpdebug)
2177		    printf("rsvp_input: No socket defined for vif %d\n",vifi);
2178	    m_freem(m);
2179	    splx(s);
2180	    return;
2181    }
2182    rsvp_src.sin_addr = ip->ip_src;
2183
2184    if (rsvpdebug && m)
2185	printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
2186	       m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
2187
2188    if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
2189	if (rsvpdebug)
2190	    printf("rsvp_input: Failed to append to socket\n");
2191    else
2192	if (rsvpdebug)
2193	    printf("rsvp_input: send packet up\n");
2194
2195    splx(s);
2196}
2197
2198#ifdef MROUTE_LKM
2199#include <sys/conf.h>
2200#include <sys/exec.h>
2201#include <sys/sysent.h>
2202#include <sys/lkm.h>
2203
2204MOD_MISC("ip_mroute_mod")
2205
2206static int
2207ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
2208{
2209	int i;
2210	struct lkm_misc	*args = lkmtp->private.lkm_misc;
2211	int err = 0;
2212
2213	switch(cmd) {
2214		static int (*old_ip_mrouter_cmd)();
2215		static int (*old_ip_mrouter_done)();
2216		static int (*old_ip_mforward)();
2217		static int (*old_mrt_ioctl)();
2218		static void (*old_proto4_input)();
2219		static int (*old_legal_vif_num)();
2220		extern struct protosw inetsw[];
2221
2222	case LKM_E_LOAD:
2223		if(lkmexists(lkmtp) || ip_mrtproto)
2224		  return(EEXIST);
2225		old_ip_mrouter_cmd = ip_mrouter_cmd;
2226		ip_mrouter_cmd = X_ip_mrouter_cmd;
2227		old_ip_mrouter_done = ip_mrouter_done;
2228		ip_mrouter_done = X_ip_mrouter_done;
2229		old_ip_mforward = ip_mforward;
2230		ip_mforward = X_ip_mforward;
2231		old_mrt_ioctl = mrt_ioctl;
2232		mrt_ioctl = X_mrt_ioctl;
2233              old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input;
2234              inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_ipip_input;
2235		old_legal_vif_num = legal_vif_num;
2236		legal_vif_num = X_legal_vif_num;
2237		ip_mrtproto = IGMP_DVMRP;
2238
2239		printf("\nIP multicast routing loaded\n");
2240		break;
2241
2242	case LKM_E_UNLOAD:
2243		if (ip_mrouter)
2244		  return EINVAL;
2245
2246		ip_mrouter_cmd = old_ip_mrouter_cmd;
2247		ip_mrouter_done = old_ip_mrouter_done;
2248		ip_mforward = old_ip_mforward;
2249		mrt_ioctl = old_mrt_ioctl;
2250              inetsw[ip_protox[ENCAP_PROTO]].pr_input = old_proto4_input;
2251		legal_vif_num = old_legal_vif_num;
2252		ip_mrtproto = 0;
2253		break;
2254
2255	default:
2256		err = EINVAL;
2257		break;
2258	}
2259
2260	return(err);
2261}
2262
2263int
2264ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
2265	DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle,
2266		 nosys);
2267}
2268
2269#endif /* MROUTE_LKM */
2270#endif /* MROUTING */
2271