ip_mroute.c revision 9334
1281494Sandrew/* 2281494Sandrew * IP multicast forwarding procedures 3281494Sandrew * 4281494Sandrew * Written by David Waitzman, BBN Labs, August 1988. 5281494Sandrew * Modified by Steve Deering, Stanford, February 1989. 6281494Sandrew * Modified by Mark J. Steiglitz, Stanford, May, 1991 7281494Sandrew * Modified by Van Jacobson, LBL, January 1993 8281494Sandrew * Modified by Ajit Thyagarajan, PARC, August 1993 9281494Sandrew * Modified by Bill Fenner, PARC, April 1995 10281494Sandrew * 11281494Sandrew * MROUTING Revision: 3.5 12281494Sandrew * $Id$ 13281494Sandrew */ 14281494Sandrew 15281494Sandrew 16281494Sandrew#include <sys/param.h> 17281494Sandrew#include <sys/systm.h> 18281494Sandrew#include <sys/mbuf.h> 19281494Sandrew#include <sys/socket.h> 20281494Sandrew#include <sys/socketvar.h> 21281494Sandrew#include <sys/protosw.h> 22281494Sandrew#include <sys/errno.h> 23281494Sandrew#include <sys/time.h> 24281494Sandrew#include <sys/kernel.h> 25281494Sandrew#include <sys/ioctl.h> 26281494Sandrew#include <sys/syslog.h> 27281494Sandrew#include <sys/queue.h> 28281494Sandrew#include <net/if.h> 29287487Sandrew#include <net/route.h> 30281494Sandrew#include <netinet/in.h> 31281494Sandrew#include <netinet/in_systm.h> 32281494Sandrew#include <netinet/ip.h> 33281494Sandrew#include <netinet/ip_var.h> 34281494Sandrew#include <netinet/in_pcb.h> 35281494Sandrew#include <netinet/in_var.h> 36281494Sandrew#include <netinet/igmp.h> 37281494Sandrew#include <netinet/igmp_var.h> 38281494Sandrew#include <netinet/ip_mroute.h> 39281494Sandrew#include <netinet/udp.h> 40281494Sandrew 41281494Sandrew#ifndef NTOHL 42281494Sandrew#if BYTE_ORDER != BIG_ENDIAN 43281494Sandrew#define NTOHL(d) ((d) = ntohl((d))) 44281494Sandrew#define NTOHS(d) ((d) = ntohs((u_short)(d))) 45281494Sandrew#define HTONL(d) ((d) = htonl((d))) 46281494Sandrew#define HTONS(d) ((d) = htons((u_short)(d))) 47281494Sandrew#else 48281494Sandrew#define NTOHL(d) 49281494Sandrew#define NTOHS(d) 50281494Sandrew#define HTONL(d) 51281494Sandrew#define HTONS(d) 52281494Sandrew#endif 53281494Sandrew#endif 54281494Sandrew 55281494Sandrewextern int rsvp_on; 56281494Sandrew 57281494Sandrew#ifndef MROUTING 58281494Sandrew/* 59291937Skib * Dummy routines and globals used when multicast routing is not compiled in. 60281494Sandrew */ 61281494Sandrew 62281494Sandrewstruct socket *ip_mrouter = NULL; 63281494Sandrewu_int ip_mrtproto = 0; 64281494Sandrewstruct mrtstat mrtstat; 65281494Sandrewu_int rsvpdebug = 0; 66281494Sandrew 67281494Sandrewint 68281494Sandrew_ip_mrouter_set(cmd, so, m) 69281494Sandrew int cmd; 70281494Sandrew struct socket *so; 71281494Sandrew struct mbuf *m; 72281494Sandrew{ 73281494Sandrew return(EOPNOTSUPP); 74281494Sandrew} 75281494Sandrew 76291937Skibint (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = _ip_mrouter_set; 77281494Sandrew 78281494Sandrew 79281494Sandrewint 80281494Sandrew_ip_mrouter_get(cmd, so, m) 81281494Sandrew int cmd; 82281494Sandrew struct socket *so; 83281494Sandrew struct mbuf **m; 84281494Sandrew{ 85281494Sandrew return(EOPNOTSUPP); 86296266Swma} 87281494Sandrew 88281494Sandrewint (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = _ip_mrouter_get; 89281494Sandrew 90281494Sandrewint 91281494Sandrew_ip_mrouter_done() 92281494Sandrew{ 93281494Sandrew return(0); 94281494Sandrew} 95281494Sandrew 96281494Sandrewint (*ip_mrouter_done)(void) = _ip_mrouter_done; 97281494Sandrew 98281494Sandrewint 99281494Sandrew_ip_mforward(ip, ifp, m, imo) 100281494Sandrew struct ip *ip; 101281494Sandrew struct ifnet *ifp; 102281494Sandrew struct mbuf *m; 103281494Sandrew struct ip_moptions *imo; 104281494Sandrew{ 105281494Sandrew return(0); 106281494Sandrew} 107281494Sandrew 108281494Sandrewint (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 109281494Sandrew struct ip_moptions *) = _ip_mforward; 110281494Sandrew 111281494Sandrewint 112281494Sandrew_mrt_ioctl(int req, caddr_t data, struct proc *p) 113281494Sandrew{ 114281494Sandrew return EOPNOTSUPP; 115281494Sandrew} 116281494Sandrew 117281494Sandrewint (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; 118281494Sandrew 119281494Sandrewvoid 120281494Sandrewrsvp_input(m, iphlen) /* XXX must fixup manually */ 121281494Sandrew struct mbuf *m; 122281494Sandrew int iphlen; 123281494Sandrew{ 124281494Sandrew /* Can still get packets with rsvp_on = 0 if there is a local member 125285316Sandrew * of the group to which the RSVP packet is addressed. But in this 126285316Sandrew * case we want to throw the packet away. 127285316Sandrew */ 128285316Sandrew if (!rsvp_on) { 129285316Sandrew m_freem(m); 130285316Sandrew return; 131285316Sandrew } 132281494Sandrew 133281494Sandrew if (ip_rsvpd != NULL) { 134281494Sandrew if (rsvpdebug) 135281494Sandrew printf("rsvp_input: Sending packet up old-style socket\n"); 136281494Sandrew rip_input(m); 137281494Sandrew return; 138281494Sandrew } 139281494Sandrew /* Drop the packet */ 140281494Sandrew m_freem(m); 141281494Sandrew} 142281494Sandrew 143281494Sandrewvoid ipip_input(struct mbuf *m) { /* XXX must fixup manually */ 144281494Sandrew rip_input(m); 145281494Sandrew} 146281494Sandrew 147281494Sandrewint (*legal_vif_num)(int) = 0; 148281494Sandrew 149281494Sandrew/* 150281494Sandrew * This should never be called, since IP_MULTICAST_VIF should fail, but 151281494Sandrew * just in case it does get called, the code a little lower in ip_output 152281494Sandrew * will assign the packet a local address. 153281494Sandrew */ 154281494Sandrewu_long 155281494Sandrew_ip_mcast_src(int vifi) { return INADDR_ANY; } 156281494Sandrewu_long (*ip_mcast_src)(int) = _ip_mcast_src; 157281494Sandrew 158281494Sandrewint 159281494Sandrewip_rsvp_vif_init(so, m) 160281494Sandrew struct socket *so; 161281494Sandrew struct mbuf *m; 162281494Sandrew{ 163281494Sandrew return(EINVAL); 164281494Sandrew} 165281494Sandrew 166281494Sandrewint 167281494Sandrewip_rsvp_vif_done(so, m) 168281494Sandrew struct socket *so; 169281494Sandrew struct mbuf *m; 170281494Sandrew{ 171281494Sandrew return(EINVAL); 172281494Sandrew} 173281494Sandrew 174281494Sandrewvoid 175281494Sandrewip_rsvp_force_done(so) 176281494Sandrew struct socket *so; 177281494Sandrew{ 178281494Sandrew return; 179281494Sandrew} 180281494Sandrew 181281494Sandrew#else /* MROUTING */ 182281494Sandrew 183281494Sandrew#define M_HASCL(m) ((m)->m_flags & M_EXT) 184281494Sandrew 185281494Sandrew#define INSIZ sizeof(struct in_addr) 186286225Sandrew#define same(a1, a2) \ 187281494Sandrew (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) 188281494Sandrew 189281494Sandrew#define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ 190281494Sandrew 191281494Sandrew/* 192281494Sandrew * Globals. All but ip_mrouter and ip_mrtproto could be static, 193281494Sandrew * except for netstat or debugging purposes. 194281494Sandrew */ 195281494Sandrew#ifndef MROUTE_LKM 196281494Sandrewstruct socket *ip_mrouter = NULL; 197281494Sandrewstruct mrtstat mrtstat; 198281494Sandrew 199281494Sandrewint ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 200281494Sandrew#else /* MROUTE_LKM */ 201281494Sandrewextern struct mrtstat mrtstat; 202281494Sandrewextern int ip_mrtproto; 203281494Sandrew#endif 204281494Sandrew 205281494Sandrew#define NO_RTE_FOUND 0x1 206281494Sandrew#define RTE_FOUND 0x2 207281494Sandrew 208281494Sandrewstruct mbuf *mfctable[MFCTBLSIZ]; 209281494Sandrewu_char nexpire[MFCTBLSIZ]; 210281494Sandrewstruct vif viftable[MAXVIFS]; 211281494Sandrewu_int mrtdebug = 0; /* debug level */ 212281494Sandrew#define DEBUG_MFC 0x02 213281494Sandrew#define DEBUG_FORWARD 0x04 214281494Sandrew#define DEBUG_EXPIRE 0x08 215286073Semaste#define DEBUG_XMIT 0x10 216281494Sandrewu_int tbfdebug = 0; /* tbf debug level */ 217281494Sandrewu_int rsvpdebug = 0; /* rsvp debug level */ 218281494Sandrew 219281494Sandrew#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 220281494Sandrew#define UPCALL_EXPIRE 6 /* number of timeouts */ 221281494Sandrew 222286073Semaste/* 223281494Sandrew * Define the token bucket filter structures 224281494Sandrew * tbftable -> each vif has one of these for storing info 225281494Sandrew * qtable -> each interface has an associated queue of pkts 226281494Sandrew */ 227281494Sandrew 228281494Sandrewstruct tbf tbftable[MAXVIFS]; 229286073Semastestruct pkt_queue qtable[MAXVIFS][MAXQSIZE]; 230281494Sandrew 231281494Sandrew/* 232281494Sandrew * 'Interfaces' associated with decapsulator (so we can tell 233281494Sandrew * packets that went through it from ones that get reflected 234281494Sandrew * by a broken gateway). These interfaces are never linked into 235281494Sandrew * the system ifnet list & no routes point to them. I.e., packets 236281494Sandrew * can't be sent this way. They only exist as a placeholder for 237295142Sandrew * multicast source verification. 238295142Sandrew */ 239281494Sandrewstruct ifnet multicast_decap_if[MAXVIFS]; 240281494Sandrew 241281494Sandrew#define ENCAP_TTL 64 242281494Sandrew#define ENCAP_PROTO IPPROTO_IPIP /* 4 */ 243281494Sandrew 244281494Sandrew/* prototype IP hdr for encapsulated packets */ 245281494Sandrewstruct ip multicast_encap_iphdr = { 246295142Sandrew#if BYTE_ORDER == LITTLE_ENDIAN 247295142Sandrew sizeof(struct ip) >> 2, IPVERSION, 248281494Sandrew#else 249281494Sandrew IPVERSION, sizeof(struct ip) >> 2, 250281494Sandrew#endif 251281494Sandrew 0, /* tos */ 252281494Sandrew sizeof(struct ip), /* total length */ 253281494Sandrew 0, /* id */ 254281494Sandrew 0, /* frag offset */ 255281494Sandrew ENCAP_TTL, ENCAP_PROTO, 256281494Sandrew 0, /* checksum */ 257281494Sandrew}; 258289502Sandrew 259289502Sandrew/* 260289502Sandrew * Private variables. 261289502Sandrew */ 262289502Sandrewstatic vifi_t numvifs = 0; 263289502Sandrewstatic void (*encap_oldrawip)() = 0; 264289502Sandrewstatic int have_encap_tunnel = 0; 265281494Sandrew 266281494Sandrew/* 267281494Sandrew * one-back cache used by ipip_input to locate a tunnel's vif 268281494Sandrew * given a datagram's src ip address. 269281494Sandrew */ 270281494Sandrewstatic u_long last_encap_src; 271281494Sandrewstatic struct vif *last_encap_vif; 272281494Sandrew 273281494Sandrewstatic int get_sg_cnt(struct sioc_sg_req *); 274281494Sandrewstatic int get_vif_cnt(struct sioc_vif_req *); 275281494Sandrewint ip_mrouter_init(struct socket *, struct mbuf *); 276281494Sandrewstatic int add_vif(struct vifctl *); 277281494Sandrewstatic int del_vif(vifi_t *); 278281494Sandrewstatic int add_mfc(struct mfcctl *); 279281494Sandrewstatic int del_mfc(struct mfcctl *); 280285334Sandrewstatic int get_version(struct mbuf *); 281281494Sandrewstatic int get_assert(struct mbuf *); 282285334Sandrewstatic int set_assert(int *); 283285334Sandrewstatic void expire_upcalls(void *); 284281494Sandrewstatic int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, 285285334Sandrew vifi_t); 286285334Sandrewstatic void phyint_send(struct ip *, struct vif *, struct mbuf *); 287281494Sandrewstatic void encap_send(struct ip *, struct vif *, struct mbuf *); 288281494Sandrewstatic void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, 289281494Sandrew struct ip_moptions *); 290281494Sandrewstatic void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); 291281494Sandrewstatic void tbf_process_q(struct vif *); 292281494Sandrewstatic void tbf_dequeue(struct vif *, int); 293281494Sandrewstatic void tbf_reprocess_q(void *); 294281494Sandrewstatic int tbf_dq_sel(struct vif *, struct ip *); 295281494Sandrewstatic void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); 296281494Sandrewstatic void tbf_update_tokens(struct vif *); 297281494Sandrewstatic int priority(struct vif *, struct ip *); 298281494Sandrewvoid multiencap_decap(struct mbuf *); 299281494Sandrew 300281494Sandrew/* 301281494Sandrew * whether or not special PIM assert processing is enabled. 302281494Sandrew */ 303281494Sandrewstatic int pim_assert; 304281494Sandrew/* 305281494Sandrew * Rate limit for assert notification messages, in usec 306281494Sandrew */ 307281494Sandrew#define ASSERT_MSG_TIME 3000000 308281494Sandrew 309281494Sandrew/* 310281494Sandrew * Hash function for a source, group entry 311281494Sandrew */ 312281494Sandrew#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 313281494Sandrew ((g) >> 20) ^ ((g) >> 10) ^ (g)) 314281494Sandrew 315281494Sandrew/* 316281494Sandrew * Find a route for a given origin IP address and Multicast group address 317281494Sandrew * Type of service parameter to be added in the future!!! 318281494Sandrew */ 319281494Sandrew 320281494Sandrew#define MFCFIND(o, g, rt) { \ 321281494Sandrew register struct mbuf *_mb_rt = mfctable[MFCHASH(o,g)]; \ 322281494Sandrew register struct mfc *_rt = NULL; \ 323281494Sandrew rt = NULL; \ 324281494Sandrew ++mrtstat.mrts_mfc_lookups; \ 325281494Sandrew while (_mb_rt) { \ 326281494Sandrew _rt = mtod(_mb_rt, struct mfc *); \ 327281494Sandrew if ((_rt->mfc_origin.s_addr == o) && \ 328286225Sandrew (_rt->mfc_mcastgrp.s_addr == g) && \ 329281494Sandrew (_mb_rt->m_act == NULL)) { \ 330281494Sandrew rt = _rt; \ 331281494Sandrew break; \ 332281494Sandrew } \ 333281494Sandrew _mb_rt = _mb_rt->m_next; \ 334281494Sandrew } \ 335281494Sandrew if (rt == NULL) { \ 336281494Sandrew ++mrtstat.mrts_mfc_misses; \ 337281494Sandrew } \ 338281494Sandrew} 339281494Sandrew 340281494Sandrew 341281494Sandrew/* 342281494Sandrew * Macros to compute elapsed time efficiently 343281494Sandrew * Borrowed from Van Jacobson's scheduling code 344281494Sandrew */ 345281494Sandrew#define TV_DELTA(a, b, delta) { \ 346281494Sandrew register int xxs; \ 347281494Sandrew \ 348281494Sandrew delta = (a).tv_usec - (b).tv_usec; \ 349281494Sandrew if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 350281494Sandrew switch (xxs) { \ 351281494Sandrew case 2: \ 352281494Sandrew delta += 1000000; \ 353281494Sandrew /* fall through */ \ 354281494Sandrew case 1: \ 355281494Sandrew delta += 1000000; \ 356281494Sandrew break; \ 357281494Sandrew default: \ 358281494Sandrew delta += (1000000 * xxs); \ 359281494Sandrew } \ 360281494Sandrew } \ 361281494Sandrew} 362281494Sandrew 363281494Sandrew#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 364281494Sandrew (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 365281494Sandrew 366281494Sandrew#ifdef UPCALL_TIMING 367281494Sandrewu_long upcall_data[51]; 368281494Sandrewstatic void collate(struct timeval *); 369281494Sandrew#endif /* UPCALL_TIMING */ 370281494Sandrew 371281494Sandrew 372281494Sandrew/* 373281494Sandrew * Handle MRT setsockopt commands to modify the multicast routing tables. 374281494Sandrew */ 375281494Sandrewint 376281494SandrewX_ip_mrouter_set(cmd, so, m) 377281494Sandrew int cmd; 378281494Sandrew struct socket *so; 379281494Sandrew struct mbuf *m; 380281494Sandrew{ 381281494Sandrew if (cmd != MRT_INIT && so != ip_mrouter) return EACCES; 382281494Sandrew 383281494Sandrew switch (cmd) { 384281494Sandrew case MRT_INIT: return ip_mrouter_init(so, m); 385281494Sandrew case MRT_DONE: return ip_mrouter_done(); 386281494Sandrew case MRT_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); 387281494Sandrew case MRT_DEL_VIF: return del_vif (mtod(m, vifi_t *)); 388281494Sandrew case MRT_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); 389281494Sandrew case MRT_DEL_MFC: return del_mfc (mtod(m, struct mfcctl *)); 390286134Sandrew case MRT_ASSERT: return set_assert(mtod(m, int *)); 391286134Sandrew default: return EOPNOTSUPP; 392286134Sandrew } 393286134Sandrew} 394286134Sandrew 395281494Sandrew#ifndef MROUTE_LKM 396281494Sandrewint (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = X_ip_mrouter_set; 397281494Sandrew#endif 398281494Sandrew 399281494Sandrew/* 400281494Sandrew * Handle MRT getsockopt commands 401281494Sandrew */ 402281494Sandrewint 403281494SandrewX_ip_mrouter_get(cmd, so, m) 404281494Sandrew int cmd; 405286073Semaste struct socket *so; 406281494Sandrew struct mbuf **m; 407281494Sandrew{ 408281494Sandrew struct mbuf *mb; 409281494Sandrew 410281494Sandrew if (so != ip_mrouter) return EACCES; 411281494Sandrew 412281494Sandrew *m = mb = m_get(M_WAIT, MT_SOOPTS); 413286073Semaste 414281494Sandrew switch (cmd) { 415281494Sandrew case MRT_VERSION: return get_version(mb); 416281494Sandrew case MRT_ASSERT: return get_assert(mb); 417281494Sandrew default: return EOPNOTSUPP; 418281494Sandrew } 419284273Sandrew} 420284273Sandrew 421281494Sandrew#ifndef MROUTE_LKM 422281494Sandrewint (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = X_ip_mrouter_get; 423281494Sandrew#endif 424281494Sandrew 425281494Sandrew/* 426281494Sandrew * Handle ioctl commands to obtain information from the cache 427281494Sandrew */ 428281494Sandrewint 429281494SandrewX_mrt_ioctl(cmd, data) 430281494Sandrew int cmd; 431281494Sandrew caddr_t data; 432281494Sandrew{ 433281494Sandrew int error = 0; 434281494Sandrew 435281494Sandrew switch (cmd) { 436281494Sandrew case (SIOCGETVIFCNT): 437281494Sandrew return (get_vif_cnt((struct sioc_vif_req *)data)); 438281494Sandrew break; 439281494Sandrew case (SIOCGETSGCNT): 440281494Sandrew return (get_sg_cnt((struct sioc_sg_req *)data)); 441281494Sandrew break; 442281494Sandrew default: 443281494Sandrew return (EINVAL); 444281494Sandrew break; 445281494Sandrew } 446281494Sandrew return error; 447281494Sandrew} 448281494Sandrew 449281494Sandrew#ifndef MROUTE_LKM 450281494Sandrewint (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl; 451281494Sandrew#endif 452281494Sandrew 453281494Sandrew/* 454281494Sandrew * returns the packet, byte, rpf-failure count for the source group provided 455281494Sandrew */ 456281494Sandrewstatic int 457281494Sandrewget_sg_cnt(req) 458281494Sandrew register struct sioc_sg_req *req; 459281494Sandrew{ 460281494Sandrew register struct mfc *rt; 461281494Sandrew int s; 462281494Sandrew 463281494Sandrew s = splnet(); 464281494Sandrew MFCFIND(req->src.s_addr, req->grp.s_addr, rt); 465281494Sandrew splx(s); 466281494Sandrew if (rt != NULL) { 467281494Sandrew req->pktcnt = rt->mfc_pkt_cnt; 468281494Sandrew req->bytecnt = rt->mfc_byte_cnt; 469281494Sandrew req->wrong_if = rt->mfc_wrong_if; 470281494Sandrew } else 471281494Sandrew req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 472281494Sandrew 473281494Sandrew return 0; 474281494Sandrew} 475281494Sandrew 476281494Sandrew/* 477281494Sandrew * returns the input and output packet and byte counts on the vif provided 478281494Sandrew */ 479281494Sandrewstatic int 480281494Sandrewget_vif_cnt(req) 481281494Sandrew register struct sioc_vif_req *req; 482281494Sandrew{ 483281494Sandrew register vifi_t vifi = req->vifi; 484281494Sandrew 485281494Sandrew if (vifi >= numvifs) return EINVAL; 486281494Sandrew 487281494Sandrew req->icount = viftable[vifi].v_pkt_in; 488281494Sandrew req->ocount = viftable[vifi].v_pkt_out; 489281494Sandrew req->ibytes = viftable[vifi].v_bytes_in; 490281494Sandrew req->obytes = viftable[vifi].v_bytes_out; 491281494Sandrew 492281494Sandrew return 0; 493281494Sandrew} 494281494Sandrew 495281494Sandrew/* 496281494Sandrew * Enable multicast routing 497281494Sandrew */ 498281494Sandrewint 499281494Sandrewip_mrouter_init(so, m) 500281494Sandrew struct socket *so; 501281494Sandrew struct mbuf *m; 502281494Sandrew{ 503281494Sandrew int *v; 504281494Sandrew int i; 505281494Sandrew 506281494Sandrew if (mrtdebug) 507281494Sandrew log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d", 508281494Sandrew so->so_type, so->so_proto->pr_protocol); 509281494Sandrew 510281494Sandrew if (so->so_type != SOCK_RAW || 511281494Sandrew so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; 512291937Skib 513281494Sandrew if (!m || (m->m_len != sizeof(int *))) 514281494Sandrew return ENOPROTOOPT; 515281494Sandrew 516281494Sandrew v = mtod(m, int *); 517281494Sandrew if (*v != 1) 518281494Sandrew return ENOPROTOOPT; 519281494Sandrew 520281494Sandrew if (ip_mrouter != NULL) return EADDRINUSE; 521281494Sandrew 522281494Sandrew ip_mrouter = so; 523281494Sandrew 524281494Sandrew bzero((caddr_t)mfctable, sizeof(mfctable)); 525281494Sandrew bzero((caddr_t)nexpire, sizeof(nexpire)); 526281494Sandrew 527281494Sandrew pim_assert = 0; 528281494Sandrew 529281494Sandrew timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); 530281494Sandrew 531281494Sandrew if (mrtdebug) 532281494Sandrew log(LOG_DEBUG, "ip_mrouter_init"); 533294930Sjhb 534281494Sandrew return 0; 535281494Sandrew} 536281494Sandrew 537281494Sandrew/* 538281494Sandrew * Disable multicast routing 539281494Sandrew */ 540281494Sandrewint 541281494SandrewX_ip_mrouter_done() 542281494Sandrew{ 543281494Sandrew vifi_t vifi; 544281494Sandrew int i; 545281494Sandrew struct ifnet *ifp; 546281494Sandrew struct ifreq ifr; 547281494Sandrew struct mbuf *mb_rt; 548281494Sandrew struct mfc *rt; 549281494Sandrew struct mbuf *m; 550281494Sandrew struct rtdetq *rte; 551281494Sandrew int s; 552281494Sandrew 553281494Sandrew s = splnet(); 554281494Sandrew 555281494Sandrew /* 556281494Sandrew * For each phyint in use, disable promiscuous reception of all IP 557281494Sandrew * multicasts. 558281494Sandrew */ 559281494Sandrew for (vifi = 0; vifi < numvifs; vifi++) { 560281494Sandrew if (viftable[vifi].v_lcl_addr.s_addr != 0 && 561281494Sandrew !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 562281494Sandrew ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 563281494Sandrew ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr 564281494Sandrew = INADDR_ANY; 565281494Sandrew ifp = viftable[vifi].v_ifp; 566281494Sandrew (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 567281494Sandrew } 568281494Sandrew } 569281494Sandrew bzero((caddr_t)qtable, sizeof(qtable)); 570281494Sandrew bzero((caddr_t)tbftable, sizeof(tbftable)); 571291937Skib bzero((caddr_t)viftable, sizeof(viftable)); 572291937Skib numvifs = 0; 573291937Skib pim_assert = 0; 574291937Skib 575291937Skib untimeout(expire_upcalls, (caddr_t)NULL); 576291937Skib 577281494Sandrew /* 578281494Sandrew * Free all multicast forwarding cache entries. 579281494Sandrew */ 580281494Sandrew for (i = 0; i < MFCTBLSIZ; i++) { 581281494Sandrew mb_rt = mfctable[i]; 582281494Sandrew while (mb_rt) { 583281494Sandrew if (mb_rt->m_act != NULL) { 584281494Sandrew while (mb_rt->m_act) { 585281494Sandrew m = mb_rt->m_act; 586281494Sandrew mb_rt->m_act = m->m_act; 587281494Sandrew rte = mtod(m, struct rtdetq *); 588281494Sandrew m_freem(rte->m); 589281494Sandrew m_free(m); 590281494Sandrew } 591281494Sandrew } 592281494Sandrew mb_rt = m_free(mb_rt); 593281494Sandrew } 594281494Sandrew } 595281494Sandrew 596281494Sandrew bzero((caddr_t)mfctable, sizeof(mfctable)); 597281494Sandrew 598281494Sandrew /* 599281494Sandrew * Reset de-encapsulation cache 600281494Sandrew */ 601281494Sandrew last_encap_src = NULL; 602281494Sandrew last_encap_vif = NULL; 603281494Sandrew have_encap_tunnel = 0; 604281494Sandrew 605281494Sandrew ip_mrouter = NULL; 606281494Sandrew 607281494Sandrew splx(s); 608281494Sandrew 609281494Sandrew if (mrtdebug) 610281494Sandrew log(LOG_DEBUG, "ip_mrouter_done"); 611281494Sandrew 612281494Sandrew return 0; 613281494Sandrew} 614281494Sandrew 615281494Sandrew#ifndef MROUTE_LKM 616281494Sandrewint (*ip_mrouter_done)(void) = X_ip_mrouter_done; 617281494Sandrew#endif 618281494Sandrew 619281494Sandrewstatic int 620281494Sandrewget_version(mb) 621281494Sandrew struct mbuf *mb; 622281494Sandrew{ 623281494Sandrew int *v; 624281494Sandrew 625281494Sandrew v = mtod(mb, int *); 626281494Sandrew 627281494Sandrew *v = 0x0305; /* XXX !!!! */ 628281494Sandrew mb->m_len = sizeof(int); 629281494Sandrew 630281494Sandrew return 0; 631281494Sandrew} 632281494Sandrew 633281494Sandrew/* 634281494Sandrew * Set PIM assert processing global 635281494Sandrew */ 636281494Sandrewstatic int 637281494Sandrewset_assert(i) 638281494Sandrew int *i; 639281494Sandrew{ 640281494Sandrew if ((*i != 1) && (*i != 0)) 641281494Sandrew return EINVAL; 642281494Sandrew 643281494Sandrew pim_assert = *i; 644281494Sandrew 645281494Sandrew return 0; 646281494Sandrew} 647281494Sandrew 648281494Sandrew/* 649281494Sandrew * Get PIM assert processing global 650281494Sandrew */ 651281494Sandrewstatic int 652281494Sandrewget_assert(m) 653281494Sandrew struct mbuf *m; 654281494Sandrew{ 655281494Sandrew int *i; 656281494Sandrew 657281494Sandrew i = mtod(m, int *); 658281494Sandrew 659281494Sandrew *i = pim_assert; 660281494Sandrew 661281494Sandrew return 0; 662281494Sandrew} 663281494Sandrew 664281494Sandrew/* 665281494Sandrew * Add a vif to the vif table 666281494Sandrew */ 667281494Sandrewstatic int 668281494Sandrewadd_vif(vifcp) 669281494Sandrew register struct vifctl *vifcp; 670281494Sandrew{ 671281494Sandrew register struct vif *vifp = viftable + vifcp->vifc_vifi; 672296266Swma static struct sockaddr_in sin = {sizeof sin, AF_INET}; 673296266Swma struct ifaddr *ifa; 674296266Swma struct ifnet *ifp; 675296266Swma struct ifreq ifr; 676296266Swma int error, s; 677296266Swma struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 678296266Swma 679296266Swma if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; 680296266Swma if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; 681296266Swma 682296266Swma /* Find the interface with an address in AF_INET family */ 683296266Swma sin.sin_addr = vifcp->vifc_lcl_addr; 684296266Swma ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 685296266Swma if (ifa == 0) return EADDRNOTAVAIL; 686281494Sandrew ifp = ifa->ifa_ifp; 687281494Sandrew 688281494Sandrew if (vifcp->vifc_flags & VIFF_TUNNEL) { 689281494Sandrew if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 690281494Sandrew /* 691281494Sandrew * An encapsulating tunnel is wanted. Tell ipip_input() to 692281494Sandrew * start paying attention to encapsulated packets. 693281494Sandrew */ 694281494Sandrew if (have_encap_tunnel == 0) { 695281494Sandrew have_encap_tunnel = 1; 696281494Sandrew for (s = 0; s < MAXVIFS; ++s) { 697281494Sandrew multicast_decap_if[s].if_name = "mdecap"; 698281494Sandrew multicast_decap_if[s].if_unit = s; 699281494Sandrew } 700281494Sandrew } 701281494Sandrew /* 702281494Sandrew * Set interface to fake encapsulator interface 703281494Sandrew */ 704281494Sandrew ifp = &multicast_decap_if[vifcp->vifc_vifi]; 705281494Sandrew /* 706281494Sandrew * Prepare cached route entry 707281494Sandrew */ 708281494Sandrew bzero(&vifp->v_route, sizeof(vifp->v_route)); 709281494Sandrew } else { 710281494Sandrew log(LOG_ERR, "Source routed tunnels not supported."); 711281494Sandrew return EOPNOTSUPP; 712281494Sandrew } 713281494Sandrew } else { 714281494Sandrew /* Make sure the interface supports multicast */ 715281494Sandrew if ((ifp->if_flags & IFF_MULTICAST) == 0) 716281494Sandrew return EOPNOTSUPP; 717281494Sandrew 718281494Sandrew /* Enable promiscuous reception of all IP multicasts from the if */ 719281494Sandrew ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 720281494Sandrew ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 721281494Sandrew s = splnet(); 722281494Sandrew error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 723281494Sandrew splx(s); 724281494Sandrew if (error) 725281494Sandrew return error; 726281494Sandrew } 727281494Sandrew 728281494Sandrew s = splnet(); 729281494Sandrew /* define parameters for the tbf structure */ 730281494Sandrew vifp->v_tbf = v_tbf; 731281494Sandrew vifp->v_tbf->q_len = 0; 732281494Sandrew vifp->v_tbf->n_tok = 0; 733281494Sandrew vifp->v_tbf->last_pkt_t = 0; 734281494Sandrew 735281494Sandrew vifp->v_flags = vifcp->vifc_flags; 736281494Sandrew vifp->v_threshold = vifcp->vifc_threshold; 737281494Sandrew vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 738281494Sandrew vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 739281494Sandrew vifp->v_ifp = ifp; 740281494Sandrew vifp->v_rate_limit= vifcp->vifc_rate_limit; 741281494Sandrew vifp->v_rsvp_on = 0; 742281494Sandrew vifp->v_rsvpd = NULL; 743281494Sandrew /* initialize per vif pkt counters */ 744281494Sandrew vifp->v_pkt_in = 0; 745281494Sandrew vifp->v_pkt_out = 0; 746281494Sandrew vifp->v_bytes_in = 0; 747281494Sandrew vifp->v_bytes_out = 0; 748281494Sandrew splx(s); 749281494Sandrew 750281494Sandrew /* Adjust numvifs up if the vifi is higher than numvifs */ 751281494Sandrew if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 752281494Sandrew 753281494Sandrew if (mrtdebug) 754281494Sandrew log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d", 755281494Sandrew vifcp->vifc_vifi, 756281494Sandrew ntohl(vifcp->vifc_lcl_addr.s_addr), 757281494Sandrew (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 758281494Sandrew ntohl(vifcp->vifc_rmt_addr.s_addr), 759281494Sandrew vifcp->vifc_threshold, 760281494Sandrew vifcp->vifc_rate_limit); 761281494Sandrew 762281494Sandrew return 0; 763281494Sandrew} 764281494Sandrew 765281494Sandrew/* 766281494Sandrew * Delete a vif from the vif table 767281494Sandrew */ 768281494Sandrewstatic int 769281494Sandrewdel_vif(vifip) 770281494Sandrew vifi_t *vifip; 771281494Sandrew{ 772281494Sandrew register struct vif *vifp = viftable + *vifip; 773281494Sandrew register vifi_t vifi; 774281494Sandrew struct ifnet *ifp; 775281494Sandrew struct ifreq ifr; 776281494Sandrew int s; 777281494Sandrew 778281494Sandrew if (*vifip >= numvifs) return EINVAL; 779281494Sandrew if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; 780281494Sandrew 781281494Sandrew s = splnet(); 782281494Sandrew 783281494Sandrew if (!(vifp->v_flags & VIFF_TUNNEL)) { 784281494Sandrew ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 785281494Sandrew ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 786281494Sandrew ifp = vifp->v_ifp; 787281494Sandrew (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 788281494Sandrew } 789281494Sandrew 790281494Sandrew if (vifp == last_encap_vif) { 791281494Sandrew last_encap_vif = 0; 792281494Sandrew last_encap_src = 0; 793281494Sandrew } 794281494Sandrew 795281494Sandrew bzero((caddr_t)qtable[*vifip], 796281494Sandrew sizeof(qtable[*vifip])); 797281494Sandrew bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 798281494Sandrew bzero((caddr_t)vifp, sizeof (*vifp)); 799281494Sandrew 800281494Sandrew /* Adjust numvifs down */ 801281494Sandrew for (vifi = numvifs; vifi > 0; vifi--) 802281494Sandrew if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; 803281494Sandrew numvifs = vifi; 804281494Sandrew 805281494Sandrew splx(s); 806281494Sandrew 807281494Sandrew if (mrtdebug) 808281494Sandrew log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs); 809281494Sandrew 810281494Sandrew return 0; 811281494Sandrew} 812281494Sandrew 813281494Sandrew/* 814281494Sandrew * Add an mfc entry 815281494Sandrew */ 816281494Sandrewstatic int 817281494Sandrewadd_mfc(mfccp) 818281494Sandrew struct mfcctl *mfccp; 819281494Sandrew{ 820281494Sandrew struct mfc *rt; 821281494Sandrew register struct mbuf *mb_rt; 822281494Sandrew u_long hash; 823281494Sandrew struct mbuf *mb_ntry; 824281494Sandrew struct rtdetq *rte; 825296266Swma register u_short nstl; 826296266Swma int s; 827296266Swma int i; 828296266Swma 829281494Sandrew MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt); 830281494Sandrew 831281494Sandrew /* If an entry already exists, just update the fields */ 832281494Sandrew if (rt) { 833281494Sandrew if (mrtdebug & DEBUG_MFC) 834281494Sandrew log(LOG_DEBUG,"add_mfc update o %x g %x p %x", 835281494Sandrew ntohl(mfccp->mfcc_origin.s_addr), 836281494Sandrew ntohl(mfccp->mfcc_mcastgrp.s_addr), 837281494Sandrew mfccp->mfcc_parent); 838281494Sandrew 839281494Sandrew s = splnet(); 840281494Sandrew rt->mfc_parent = mfccp->mfcc_parent; 841281494Sandrew for (i = 0; i < numvifs; i++) 842281494Sandrew rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 843293045Sian splx(s); 844281494Sandrew return 0; 845281494Sandrew } 846281494Sandrew 847281494Sandrew /* 848281494Sandrew * Find the entry for which the upcall was made and update 849281494Sandrew */ 850281494Sandrew s = splnet(); 851281494Sandrew hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 852281494Sandrew for (mb_rt = mfctable[hash], nstl = 0; mb_rt; mb_rt = mb_rt->m_next) { 853281494Sandrew 854281494Sandrew rt = mtod(mb_rt, struct mfc *); 855281494Sandrew if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 856296266Swma (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 857296266Swma (mb_rt->m_act != NULL)) { 858296266Swma 859296266Swma if (nstl++) 860296266Swma log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %x", 861296266Swma "multiple kernel entries", 862296266Swma ntohl(mfccp->mfcc_origin.s_addr), 863296266Swma ntohl(mfccp->mfcc_mcastgrp.s_addr), 864296266Swma mfccp->mfcc_parent, mb_rt->m_act); 865296266Swma 866296266Swma if (mrtdebug & DEBUG_MFC) 867296266Swma log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %x", 868281494Sandrew ntohl(mfccp->mfcc_origin.s_addr), 869281494Sandrew ntohl(mfccp->mfcc_mcastgrp.s_addr), 870281494Sandrew mfccp->mfcc_parent, mb_rt->m_act); 871286366Sandrew 872286366Sandrew rt->mfc_origin = mfccp->mfcc_origin; 873286366Sandrew rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 874281494Sandrew rt->mfc_parent = mfccp->mfcc_parent; 875286366Sandrew for (i = 0; i < numvifs; i++) 876286366Sandrew rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 877286366Sandrew /* initialize pkt counters per src-grp */ 878281494Sandrew rt->mfc_pkt_cnt = 0; 879281494Sandrew rt->mfc_byte_cnt = 0; 880281494Sandrew rt->mfc_wrong_if = 0; 881281494Sandrew rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; 882281494Sandrew 883281494Sandrew rt->mfc_expire = 0; /* Don't clean this guy up */ 884281494Sandrew nexpire[hash]--; 885281494Sandrew 886281494Sandrew /* free packets Qed at the end of this entry */ 887281494Sandrew while (mb_rt->m_act) { 888281494Sandrew mb_ntry = mb_rt->m_act; 889281494Sandrew rte = mtod(mb_ntry, struct rtdetq *); 890281494Sandrew/* #ifdef RSVP_ISI */ 891281494Sandrew ip_mdq(rte->m, rte->ifp, rt, -1); 892281494Sandrew/* #endif */ 893281494Sandrew mb_rt->m_act = mb_ntry->m_act; 894281494Sandrew m_freem(rte->m); 895281494Sandrew#ifdef UPCALL_TIMING 896281494Sandrew collate(&(rte->t)); 897281494Sandrew#endif /* UPCALL_TIMING */ 898281494Sandrew m_free(mb_ntry); 899297446Sandrew } 900297446Sandrew } 901281494Sandrew } 902281494Sandrew 903281494Sandrew /* 904281494Sandrew * It is possible that an entry is being inserted without an upcall 905281494Sandrew */ 906281494Sandrew if (nstl == 0) { 907281494Sandrew if (mrtdebug & DEBUG_MFC) 908281494Sandrew log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x p %x", 909281494Sandrew hash, ntohl(mfccp->mfcc_origin.s_addr), 910281494Sandrew ntohl(mfccp->mfcc_mcastgrp.s_addr), 911281494Sandrew mfccp->mfcc_parent); 912281494Sandrew 913281494Sandrew for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 914281494Sandrew 915281494Sandrew rt = mtod(mb_rt, struct mfc *); 916281494Sandrew if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 917291937Skib (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 918291937Skib 919291937Skib rt->mfc_origin = mfccp->mfcc_origin; 920291937Skib rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 921291937Skib rt->mfc_parent = mfccp->mfcc_parent; 922291937Skib for (i = 0; i < numvifs; i++) 923291937Skib rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 924291937Skib /* initialize pkt counters per src-grp */ 925291937Skib rt->mfc_pkt_cnt = 0; 926291937Skib rt->mfc_byte_cnt = 0; 927291937Skib rt->mfc_wrong_if = 0; 928287487Sandrew rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; 929287487Sandrew if (rt->mfc_expire) 930287487Sandrew nexpire[hash]--; 931287487Sandrew rt->mfc_expire = 0; 932287487Sandrew } 933287487Sandrew } 934287487Sandrew if (mb_rt == NULL) { 935287487Sandrew /* no upcall, so make a new entry */ 936287487Sandrew MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 937287487Sandrew if (mb_rt == NULL) { 938287487Sandrew splx(s); 939287487Sandrew return ENOBUFS; 940287487Sandrew } 941287487Sandrew 942287487Sandrew rt = mtod(mb_rt, struct mfc *); 943287487Sandrew 944287487Sandrew /* insert new entry at head of hash chain */ 945287487Sandrew rt->mfc_origin = mfccp->mfcc_origin; 946287487Sandrew rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 947287487Sandrew rt->mfc_parent = mfccp->mfcc_parent; 948287487Sandrew for (i = 0; i < numvifs; i++) 949287487Sandrew rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 950287487Sandrew /* initialize pkt counters per src-grp */ 951287487Sandrew rt->mfc_pkt_cnt = 0; 952287487Sandrew rt->mfc_byte_cnt = 0; 953287959Sandrew rt->mfc_wrong_if = 0; 954287959Sandrew rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; 955287487Sandrew rt->mfc_expire = 0; 956287487Sandrew 957287959Sandrew /* link into table */ 958287487Sandrew mb_rt->m_next = mfctable[hash]; 959287487Sandrew mfctable[hash] = mb_rt; 960287487Sandrew mb_rt->m_act = NULL; 961287487Sandrew } 962287487Sandrew } 963287487Sandrew splx(s); 964287487Sandrew return 0; 965287487Sandrew} 966287487Sandrew 967287487Sandrew#ifdef UPCALL_TIMING 968287487Sandrew/* 969287487Sandrew * collect delay statistics on the upcalls 970287487Sandrew */ 971287487Sandrewstatic void collate(t) 972287487Sandrewregister struct timeval *t; 973287487Sandrew{ 974287487Sandrew register u_long d; 975287487Sandrew register struct timeval tp; 976287487Sandrew register u_long delta; 977287487Sandrew 978287487Sandrew GET_TIME(tp); 979287487Sandrew 980287487Sandrew if (TV_LT(*t, tp)) 981287487Sandrew { 982287487Sandrew TV_DELTA(tp, *t, delta); 983287487Sandrew 984287487Sandrew d = delta >> 10; 985287487Sandrew if (d > 50) 986287487Sandrew d = 50; 987287487Sandrew 988287487Sandrew ++upcall_data[d]; 989287487Sandrew } 990287487Sandrew} 991287487Sandrew#endif /* UPCALL_TIMING */ 992287487Sandrew 993287487Sandrew/* 994287487Sandrew * Delete an mfc entry 995287487Sandrew */ 996287487Sandrewstatic int 997287487Sandrewdel_mfc(mfccp) 998287487Sandrew struct mfcctl *mfccp; 999287487Sandrew{ 1000287487Sandrew struct in_addr origin; 1001287487Sandrew struct in_addr mcastgrp; 1002287487Sandrew struct mfc *rt; 1003287487Sandrew struct mbuf *mb_rt; 1004287487Sandrew struct mbuf **nptr; 1005287487Sandrew u_long hash; 1006287487Sandrew int s, i; 1007287487Sandrew 1008287487Sandrew origin = mfccp->mfcc_origin; 1009287487Sandrew mcastgrp = mfccp->mfcc_mcastgrp; 1010287487Sandrew hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 1011287487Sandrew 1012287487Sandrew if (mrtdebug & DEBUG_MFC) 1013287487Sandrew log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x", 1014287487Sandrew ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); 1015287487Sandrew 1016287487Sandrew s = splnet(); 1017 1018 nptr = &mfctable[hash]; 1019 while ((mb_rt = *nptr) != NULL) { 1020 rt = mtod(mb_rt, struct mfc *); 1021 if (origin.s_addr == rt->mfc_origin.s_addr && 1022 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 1023 mb_rt->m_act == NULL) 1024 break; 1025 1026 nptr = &mb_rt->m_next; 1027 } 1028 if (mb_rt == NULL) { 1029 splx(s); 1030 return EADDRNOTAVAIL; 1031 } 1032 1033 MFREE(mb_rt, *nptr); 1034 1035 splx(s); 1036 1037 return 0; 1038} 1039 1040/* 1041 * Send a message to mrouted on the multicast routing socket 1042 */ 1043static int 1044socket_send(s, mm, src) 1045 struct socket *s; 1046 struct mbuf *mm; 1047 struct sockaddr_in *src; 1048{ 1049 if (s) { 1050 if (sbappendaddr(&s->so_rcv, 1051 (struct sockaddr *)src, 1052 mm, (struct mbuf *)0) != 0) { 1053 sorwakeup(s); 1054 return 0; 1055 } 1056 } 1057 m_freem(mm); 1058 return -1; 1059} 1060 1061/* 1062 * IP multicast forwarding function. This function assumes that the packet 1063 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 1064 * pointed to by "ifp", and the packet is to be relayed to other networks 1065 * that have members of the packet's destination IP multicast group. 1066 * 1067 * The packet is returned unscathed to the caller, unless it is 1068 * erroneous, in which case a non-zero return value tells the caller to 1069 * discard it. 1070 */ 1071 1072#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 1073#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 1074 1075int 1076X_ip_mforward(ip, ifp, m, imo) 1077 register struct ip *ip; 1078 struct ifnet *ifp; 1079 struct mbuf *m; 1080 struct ip_moptions *imo; 1081{ 1082 register struct mfc *rt = 0; /* XXX uninit warning */ 1083 register u_char *ipoptions; 1084 static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; 1085 static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1086 static int srctun = 0; 1087 register struct mbuf *mm; 1088 int s; 1089 vifi_t vifi; 1090 struct vif *vifp; 1091 1092 if (mrtdebug & DEBUG_FORWARD) 1093 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x", 1094 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); 1095 1096 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 1097 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 1098 /* 1099 * Packet arrived via a physical interface or 1100 * an encapsulated tunnel. 1101 */ 1102 } else { 1103 /* 1104 * Packet arrived through a source-route tunnel. 1105 * Source-route tunnels are no longer supported. 1106 */ 1107 if ((srctun++ % 1000) == 0) 1108 log(LOG_ERR, "ip_mforward: received source-routed packet from %x", 1109 ntohl(ip->ip_src.s_addr)); 1110 1111 return 1; 1112 } 1113 1114 if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) { 1115 if (ip->ip_ttl < 255) 1116 ip->ip_ttl++; /* compensate for -1 in *_send routines */ 1117 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1118 vifp = viftable + vifi; 1119 printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n", 1120 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi, 1121 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", 1122 vifp->v_ifp->if_name, vifp->v_ifp->if_unit); 1123 } 1124 return (ip_mdq(m, ifp, rt, vifi)); 1125 } 1126 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1127 printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n", 1128 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr)); 1129 } 1130 1131 /* 1132 * Don't forward a packet with time-to-live of zero or one, 1133 * or a packet destined to a local-only group. 1134 */ 1135 if (ip->ip_ttl <= 1 || 1136 ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1137 return 0; 1138 1139 /* 1140 * Determine forwarding vifs from the forwarding cache table 1141 */ 1142 s = splnet(); 1143 MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); 1144 1145 /* Entry exists, so forward if necessary */ 1146 if (rt != NULL) { 1147 splx(s); 1148 return (ip_mdq(m, ifp, rt, -1)); 1149 } else { 1150 /* 1151 * If we don't have a route for packet's origin, 1152 * Make a copy of the packet & 1153 * send message to routing daemon 1154 */ 1155 1156 register struct mbuf *mb_rt; 1157 register struct mbuf *mb_ntry; 1158 register struct mbuf *mb0; 1159 register struct rtdetq *rte; 1160 register struct mbuf *rte_m; 1161 register u_long hash; 1162 register int npkts; 1163#ifdef UPCALL_TIMING 1164 struct timeval tp; 1165 1166 GET_TIME(tp); 1167#endif 1168 1169 mrtstat.mrts_no_route++; 1170 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) 1171 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x", 1172 ntohl(ip->ip_src.s_addr), 1173 ntohl(ip->ip_dst.s_addr)); 1174 1175 /* 1176 * Allocate mbufs early so that we don't do extra work if we are 1177 * just going to fail anyway. 1178 */ 1179 MGET(mb_ntry, M_DONTWAIT, MT_DATA); 1180 if (mb_ntry == NULL) { 1181 splx(s); 1182 return ENOBUFS; 1183 } 1184 mb0 = m_copy(m, 0, M_COPYALL); 1185 if (mb0 == NULL) { 1186 m_free(mb_ntry); 1187 splx(s); 1188 return ENOBUFS; 1189 } 1190 1191 /* is there an upcall waiting for this packet? */ 1192 hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1193 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 1194 rt = mtod(mb_rt, struct mfc *); 1195 if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && 1196 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1197 (mb_rt->m_act != NULL)) 1198 break; 1199 } 1200 1201 if (mb_rt == NULL) { 1202 int hlen = ip->ip_hl << 2; 1203 int i; 1204 struct igmpmsg *im; 1205 1206 /* no upcall, so make a new entry */ 1207 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 1208 if (mb_rt == NULL) { 1209 m_free(mb_ntry); 1210 m_freem(mb0); 1211 splx(s); 1212 return ENOBUFS; 1213 } 1214 /* Make a copy of the header to send to the user level process */ 1215 mm = m_copy(m, 0, hlen); 1216 if (mm && (M_HASCL(mm) || mm->m_len < hlen)) 1217 mm = m_pullup(mm, hlen); 1218 if (mm == NULL) { 1219 m_free(mb_ntry); 1220 m_freem(mb0); 1221 m_free(mb_rt); 1222 splx(s); 1223 return ENOBUFS; 1224 } 1225 1226 /* 1227 * Send message to routing daemon to install 1228 * a route into the kernel table 1229 */ 1230 k_igmpsrc.sin_addr = ip->ip_src; 1231 1232 im = mtod(mm, struct igmpmsg *); 1233 im->im_msgtype = IGMPMSG_NOCACHE; 1234 im->im_mbz = 0; 1235 1236 mrtstat.mrts_upcalls++; 1237 1238 if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { 1239 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full"); 1240 ++mrtstat.mrts_upq_sockfull; 1241 m_free(mb_ntry); 1242 m_freem(mb0); 1243 m_free(mb_rt); 1244 splx(s); 1245 return ENOBUFS; 1246 } 1247 1248 rt = mtod(mb_rt, struct mfc *); 1249 1250 /* insert new entry at head of hash chain */ 1251 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1252 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1253 rt->mfc_expire = UPCALL_EXPIRE; 1254 nexpire[hash]++; 1255 for (i = 0; i < numvifs; i++) 1256 rt->mfc_ttls[i] = 0; 1257 rt->mfc_parent = -1; 1258 1259 /* link into table */ 1260 mb_rt->m_next = mfctable[hash]; 1261 mfctable[hash] = mb_rt; 1262 mb_rt->m_act = NULL; 1263 1264 rte_m = mb_rt; 1265 } else { 1266 /* determine if q has overflowed */ 1267 for (rte_m = mb_rt, npkts = 0; rte_m->m_act; rte_m = rte_m->m_act) 1268 npkts++; 1269 1270 if (npkts > MAX_UPQ) { 1271 mrtstat.mrts_upq_ovflw++; 1272 m_free(mb_ntry); 1273 m_freem(mb0); 1274 splx(s); 1275 return 0; 1276 } 1277 } 1278 1279 mb_ntry->m_act = NULL; 1280 rte = mtod(mb_ntry, struct rtdetq *); 1281 1282 rte->m = mb0; 1283 rte->ifp = ifp; 1284#ifdef UPCALL_TIMING 1285 rte->t = tp; 1286#endif 1287 1288 /* Add this entry to the end of the queue */ 1289 rte_m->m_act = mb_ntry; 1290 1291 splx(s); 1292 1293 return 0; 1294 } 1295} 1296 1297#ifndef MROUTE_LKM 1298int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 1299 struct ip_moptions *) = X_ip_mforward; 1300#endif 1301 1302/* 1303 * Clean up the cache entry if upcall is not serviced 1304 */ 1305static void 1306expire_upcalls(void *unused) 1307{ 1308 struct mbuf *mb_rt, *m, **nptr; 1309 struct rtdetq *rte; 1310 struct mfc *mfc; 1311 int i; 1312 int s; 1313 1314 s = splnet(); 1315 for (i = 0; i < MFCTBLSIZ; i++) { 1316 if (nexpire[i] == 0) 1317 continue; 1318 nptr = &mfctable[i]; 1319 for (mb_rt = *nptr; mb_rt != NULL; mb_rt = *nptr) { 1320 mfc = mtod(mb_rt, struct mfc *); 1321 1322 /* 1323 * Skip real cache entries 1324 * Make sure it wasn't marked to not expire (shouldn't happen) 1325 * If it expires now 1326 */ 1327 if (mb_rt->m_act != NULL && 1328 mfc->mfc_expire != 0 && 1329 --mfc->mfc_expire == 0) { 1330 if (mrtdebug & DEBUG_EXPIRE) 1331 log(LOG_DEBUG, "expire_upcalls: expiring (%x %x)", 1332 ntohl(mfc->mfc_origin.s_addr), 1333 ntohl(mfc->mfc_mcastgrp.s_addr)); 1334 /* 1335 * drop all the packets 1336 * free the mbuf with the pkt, if, timing info 1337 */ 1338 while (mb_rt->m_act) { 1339 m = mb_rt->m_act; 1340 mb_rt->m_act = m->m_act; 1341 1342 rte = mtod(m, struct rtdetq *); 1343 m_freem(rte->m); 1344 m_free(m); 1345 } 1346 ++mrtstat.mrts_cache_cleanups; 1347 nexpire[i]--; 1348 1349 MFREE(mb_rt, *nptr); 1350 } else { 1351 nptr = &mb_rt->m_next; 1352 } 1353 } 1354 } 1355 splx(s); 1356 timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); 1357} 1358 1359/* 1360 * Packet forwarding routine once entry in the cache is made 1361 */ 1362static int 1363ip_mdq(m, ifp, rt, xmt_vif) 1364 register struct mbuf *m; 1365 register struct ifnet *ifp; 1366 register struct mfc *rt; 1367 register vifi_t xmt_vif; 1368{ 1369 register struct ip *ip = mtod(m, struct ip *); 1370 register vifi_t vifi; 1371 register struct vif *vifp; 1372 register struct mbuf *tmp; 1373 register int plen = ntohs(ip->ip_len); 1374 1375/* 1376 * Macro to send packet on vif. Since RSVP packets don't get counted on 1377 * input, they shouldn't get counted on output, so statistics keeping is 1378 * seperate. 1379 */ 1380#define MC_SEND(ip,vifp,m) { \ 1381 if ((vifp)->v_flags & VIFF_TUNNEL) \ 1382 encap_send((ip), (vifp), (m)); \ 1383 else \ 1384 phyint_send((ip), (vifp), (m)); \ 1385} 1386 1387 /* 1388 * If xmt_vif is not -1, send on only the requested vif. 1389 * 1390 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) 1391 */ 1392 if (xmt_vif < numvifs) { 1393 MC_SEND(ip, viftable + xmt_vif, m); 1394 return 1; 1395 } 1396 1397 /* 1398 * Don't forward if it didn't arrive from the parent vif for its origin. 1399 */ 1400 vifi = rt->mfc_parent; 1401 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { 1402 /* came in the wrong interface */ 1403 if (mrtdebug & DEBUG_FORWARD) 1404 log(LOG_DEBUG, "wrong if: ifp %x vifi %d vififp %x", 1405 ifp, vifi, viftable[vifi].v_ifp); 1406 ++mrtstat.mrts_wrong_if; 1407 ++rt->mfc_wrong_if; 1408 /* 1409 * If we are doing PIM assert processing, and we are forwarding 1410 * packets on this interface, and it is a broadcast medium 1411 * interface (and not a tunnel), send a message to the routing daemon. 1412 */ 1413 if (pim_assert && rt->mfc_ttls[vifi] && 1414 (ifp->if_flags & IFF_BROADCAST) && 1415 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 1416 struct sockaddr_in k_igmpsrc; 1417 struct mbuf *mm; 1418 struct igmpmsg *im; 1419 int hlen = ip->ip_hl << 2; 1420 struct timeval now; 1421 register u_long delta; 1422 1423 GET_TIME(now); 1424 1425 TV_DELTA(rt->mfc_last_assert, now, delta); 1426 1427 if (delta > ASSERT_MSG_TIME) { 1428 mm = m_copy(m, 0, hlen); 1429 if (mm && (M_HASCL(mm) || mm->m_len < hlen)) 1430 mm = m_pullup(mm, hlen); 1431 if (mm == NULL) { 1432 return ENOBUFS; 1433 } 1434 1435 rt->mfc_last_assert = now; 1436 1437 im = mtod(mm, struct igmpmsg *); 1438 im->im_msgtype = IGMPMSG_WRONGVIF; 1439 im->im_mbz = 0; 1440 im->im_vif = vifi; 1441 1442 k_igmpsrc.sin_addr = im->im_src; 1443 1444 socket_send(ip_mrouter, mm, &k_igmpsrc); 1445 } 1446 } 1447 return 0; 1448 } 1449 1450 /* If I sourced this packet, it counts as output, else it was input. */ 1451 if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { 1452 viftable[vifi].v_pkt_out++; 1453 viftable[vifi].v_bytes_out += plen; 1454 } else { 1455 viftable[vifi].v_pkt_in++; 1456 viftable[vifi].v_bytes_in += plen; 1457 } 1458 rt->mfc_pkt_cnt++; 1459 rt->mfc_byte_cnt += plen; 1460 1461 /* 1462 * For each vif, decide if a copy of the packet should be forwarded. 1463 * Forward if: 1464 * - the ttl exceeds the vif's threshold 1465 * - there are group members downstream on interface 1466 */ 1467 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1468 if ((rt->mfc_ttls[vifi] > 0) && 1469 (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1470 vifp->v_pkt_out++; 1471 vifp->v_bytes_out += plen; 1472 MC_SEND(ip, vifp, m); 1473 } 1474 1475 return 0; 1476} 1477 1478/* 1479 * check if a vif number is legal/ok. This is used by ip_output, to export 1480 * numvifs there, 1481 */ 1482int 1483X_legal_vif_num(vif) 1484 int vif; 1485{ 1486 if (vif >= 0 && vif < numvifs) 1487 return(1); 1488 else 1489 return(0); 1490} 1491 1492#ifndef MROUTE_LKM 1493int (*legal_vif_num)(int) = X_legal_vif_num; 1494#endif 1495 1496/* 1497 * Return the local address used by this vif 1498 */ 1499u_long 1500X_ip_mcast_src(vifi) 1501 int vifi; 1502{ 1503 if (vifi >= 0 && vifi < numvifs) 1504 return viftable[vifi].v_lcl_addr.s_addr; 1505 else 1506 return INADDR_ANY; 1507} 1508 1509#ifndef MROUTE_LKM 1510u_long (*ip_mcast_src)(int) = X_ip_mcast_src; 1511#endif 1512 1513static void 1514phyint_send(ip, vifp, m) 1515 struct ip *ip; 1516 struct vif *vifp; 1517 struct mbuf *m; 1518{ 1519 register struct mbuf *mb_copy; 1520 register int hlen = ip->ip_hl << 2; 1521 register struct ip_moptions *imo; 1522 1523 /* 1524 * Make a new reference to the packet; make sure that 1525 * the IP header is actually copied, not just referenced, 1526 * so that ip_output() only scribbles on the copy. 1527 */ 1528 mb_copy = m_copy(m, 0, M_COPYALL); 1529 if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) 1530 mb_copy = m_pullup(mb_copy, hlen); 1531 if (mb_copy == NULL) 1532 return; 1533 1534 MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); 1535 if (imo == NULL) { 1536 m_freem(mb_copy); 1537 return; 1538 } 1539 1540 imo->imo_multicast_ifp = vifp->v_ifp; 1541 imo->imo_multicast_ttl = ip->ip_ttl - 1; 1542 imo->imo_multicast_loop = 1; 1543 imo->imo_multicast_vif = -1; 1544 1545 if (vifp->v_rate_limit <= 0) 1546 tbf_send_packet(vifp, mb_copy, imo); 1547 else 1548 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, 1549 imo); 1550} 1551 1552static void 1553encap_send(ip, vifp, m) 1554 register struct ip *ip; 1555 register struct vif *vifp; 1556 register struct mbuf *m; 1557{ 1558 register struct mbuf *mb_copy; 1559 register struct ip *ip_copy; 1560 int hlen = ip->ip_hl << 2; 1561 register int i, len = ip->ip_len; 1562 1563 /* 1564 * copy the old packet & pullup it's IP header into the 1565 * new mbuf so we can modify it. Try to fill the new 1566 * mbuf since if we don't the ethernet driver will. 1567 */ 1568 MGET(mb_copy, M_DONTWAIT, MT_DATA); 1569 if (mb_copy == NULL) 1570 return; 1571 mb_copy->m_data += 16; 1572 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1573 1574 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 1575 m_freem(mb_copy); 1576 return; 1577 } 1578 i = MHLEN - M_LEADINGSPACE(mb_copy); 1579 if (i > len) 1580 i = len; 1581 mb_copy = m_pullup(mb_copy, i); 1582 if (mb_copy == NULL) 1583 return; 1584 mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); 1585 1586 /* 1587 * fill in the encapsulating IP header. 1588 */ 1589 ip_copy = mtod(mb_copy, struct ip *); 1590 *ip_copy = multicast_encap_iphdr; 1591 ip_copy->ip_id = htons(ip_id++); 1592 ip_copy->ip_len += len; 1593 ip_copy->ip_src = vifp->v_lcl_addr; 1594 ip_copy->ip_dst = vifp->v_rmt_addr; 1595 1596 /* 1597 * turn the encapsulated IP header back into a valid one. 1598 */ 1599 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1600 --ip->ip_ttl; 1601 HTONS(ip->ip_len); 1602 HTONS(ip->ip_off); 1603 ip->ip_sum = 0; 1604#if defined(LBL) && !defined(ultrix) 1605 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); 1606#else 1607 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1608 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1609 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1610#endif 1611 1612 if (vifp->v_rate_limit <= 0) 1613 tbf_send_packet(vifp, mb_copy, 0); 1614 else 1615 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); 1616} 1617 1618/* 1619 * De-encapsulate a packet and feed it back through ip input (this 1620 * routine is called whenever IP gets a packet with proto type 1621 * ENCAP_PROTO and a local destination address). 1622 */ 1623void 1624#ifdef MROUTE_LKM 1625X_ipip_input(m) 1626#else 1627ipip_input(m, iphlen) 1628#endif 1629 register struct mbuf *m; 1630 int iphlen; 1631{ 1632 struct ifnet *ifp = m->m_pkthdr.rcvif; 1633 register struct ip *ip = mtod(m, struct ip *); 1634 register int hlen = ip->ip_hl << 2; 1635 register int s; 1636 register struct ifqueue *ifq; 1637 register struct vif *vifp; 1638 1639 if (!have_encap_tunnel) { 1640 rip_input(m); 1641 return; 1642 } 1643 /* 1644 * dump the packet if it's not to a multicast destination or if 1645 * we don't have an encapsulating tunnel with the source. 1646 * Note: This code assumes that the remote site IP address 1647 * uniquely identifies the tunnel (i.e., that this site has 1648 * at most one tunnel with the remote site). 1649 */ 1650 if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { 1651 ++mrtstat.mrts_bad_tunnel; 1652 m_freem(m); 1653 return; 1654 } 1655 if (ip->ip_src.s_addr != last_encap_src) { 1656 register struct vif *vife; 1657 1658 vifp = viftable; 1659 vife = vifp + numvifs; 1660 last_encap_src = ip->ip_src.s_addr; 1661 last_encap_vif = 0; 1662 for ( ; vifp < vife; ++vifp) 1663 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1664 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1665 == VIFF_TUNNEL) 1666 last_encap_vif = vifp; 1667 break; 1668 } 1669 } 1670 if ((vifp = last_encap_vif) == 0) { 1671 last_encap_src = 0; 1672 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1673 m_freem(m); 1674 if (mrtdebug) 1675 log(LOG_DEBUG, "ip_mforward: no tunnel with %x", 1676 ntohl(ip->ip_src.s_addr)); 1677 return; 1678 } 1679 ifp = vifp->v_ifp; 1680 1681 if (hlen > IP_HDR_LEN) 1682 ip_stripoptions(m, (struct mbuf *) 0); 1683 m->m_data += IP_HDR_LEN; 1684 m->m_len -= IP_HDR_LEN; 1685 m->m_pkthdr.len -= IP_HDR_LEN; 1686 m->m_pkthdr.rcvif = ifp; 1687 1688 ifq = &ipintrq; 1689 s = splimp(); 1690 if (IF_QFULL(ifq)) { 1691 IF_DROP(ifq); 1692 m_freem(m); 1693 } else { 1694 IF_ENQUEUE(ifq, m); 1695 /* 1696 * normally we would need a "schednetisr(NETISR_IP)" 1697 * here but we were called by ip_input and it is going 1698 * to loop back & try to dequeue the packet we just 1699 * queued as soon as we return so we avoid the 1700 * unnecessary software interrrupt. 1701 */ 1702 } 1703 splx(s); 1704} 1705 1706/* 1707 * Token bucket filter module 1708 */ 1709static void 1710tbf_control(vifp, m, ip, p_len, imo) 1711 register struct vif *vifp; 1712 register struct mbuf *m; 1713 register struct ip *ip; 1714 register u_long p_len; 1715 struct ip_moptions *imo; 1716{ 1717 tbf_update_tokens(vifp); 1718 1719 /* if there are enough tokens, 1720 * and the queue is empty, 1721 * send this packet out 1722 */ 1723 1724 if (vifp->v_tbf->q_len == 0) { 1725 if (p_len <= vifp->v_tbf->n_tok) { 1726 vifp->v_tbf->n_tok -= p_len; 1727 tbf_send_packet(vifp, m, imo); 1728 } else if (p_len > MAX_BKT_SIZE) { 1729 /* drop if packet is too large */ 1730 mrtstat.mrts_pkt2large++; 1731 m_freem(m); 1732 return; 1733 } else { 1734 /* queue packet and timeout till later */ 1735 tbf_queue(vifp, m, ip, imo); 1736 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1737 } 1738 } else if (vifp->v_tbf->q_len < MAXQSIZE) { 1739 /* finite queue length, so queue pkts and process queue */ 1740 tbf_queue(vifp, m, ip, imo); 1741 tbf_process_q(vifp); 1742 } else { 1743 /* queue length too much, try to dq and queue and process */ 1744 if (!tbf_dq_sel(vifp, ip)) { 1745 mrtstat.mrts_q_overflow++; 1746 m_freem(m); 1747 return; 1748 } else { 1749 tbf_queue(vifp, m, ip, imo); 1750 tbf_process_q(vifp); 1751 } 1752 } 1753 return; 1754} 1755 1756/* 1757 * adds a packet to the queue at the interface 1758 */ 1759static void 1760tbf_queue(vifp, m, ip, imo) 1761 register struct vif *vifp; 1762 register struct mbuf *m; 1763 register struct ip *ip; 1764 struct ip_moptions *imo; 1765{ 1766 register u_long ql; 1767 register int index = (vifp - viftable); 1768 register int s = splnet(); 1769 1770 ql = vifp->v_tbf->q_len; 1771 1772 qtable[index][ql].pkt_m = m; 1773 qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; 1774 qtable[index][ql].pkt_ip = ip; 1775 qtable[index][ql].pkt_imo = imo; 1776 1777 vifp->v_tbf->q_len++; 1778 splx(s); 1779} 1780 1781 1782/* 1783 * processes the queue at the interface 1784 */ 1785static void 1786tbf_process_q(vifp) 1787 register struct vif *vifp; 1788{ 1789 register struct pkt_queue pkt_1; 1790 register int index = (vifp - viftable); 1791 register int s = splnet(); 1792 1793 /* loop through the queue at the interface and send as many packets 1794 * as possible 1795 */ 1796 while (vifp->v_tbf->q_len > 0) { 1797 /* locate the first packet */ 1798 pkt_1.pkt_len = (qtable[index][0]).pkt_len; 1799 pkt_1.pkt_m = (qtable[index][0]).pkt_m; 1800 pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; 1801 pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; 1802 1803 /* determine if the packet can be sent */ 1804 if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { 1805 /* if so, 1806 * reduce no of tokens, dequeue the queue, 1807 * send the packet. 1808 */ 1809 vifp->v_tbf->n_tok -= pkt_1.pkt_len; 1810 1811 tbf_dequeue(vifp, 0); 1812 1813 tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); 1814 1815 } else break; 1816 } 1817 splx(s); 1818} 1819 1820/* 1821 * removes the jth packet from the queue at the interface 1822 */ 1823static void 1824tbf_dequeue(vifp,j) 1825 register struct vif *vifp; 1826 register int j; 1827{ 1828 register u_long index = vifp - viftable; 1829 register int i; 1830 1831 for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { 1832 qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; 1833 qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; 1834 qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; 1835 qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; 1836 } 1837 qtable[index][i-1].pkt_m = NULL; 1838 qtable[index][i-1].pkt_len = NULL; 1839 qtable[index][i-1].pkt_ip = NULL; 1840 qtable[index][i-1].pkt_imo = NULL; 1841 1842 vifp->v_tbf->q_len--; 1843 1844 if (tbfdebug > 1) 1845 log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1); 1846} 1847 1848static void 1849tbf_reprocess_q(xvifp) 1850 void *xvifp; 1851{ 1852 register struct vif *vifp = xvifp; 1853 if (ip_mrouter == NULL) 1854 return; 1855 1856 tbf_update_tokens(vifp); 1857 1858 tbf_process_q(vifp); 1859 1860 if (vifp->v_tbf->q_len) 1861 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1862} 1863 1864/* function that will selectively discard a member of the queue 1865 * based on the precedence value and the priority obtained through 1866 * a lookup table - not yet implemented accurately! 1867 */ 1868static int 1869tbf_dq_sel(vifp, ip) 1870 register struct vif *vifp; 1871 register struct ip *ip; 1872{ 1873 register int i; 1874 register int s = splnet(); 1875 register u_int p; 1876 1877 p = priority(vifp, ip); 1878 1879 for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { 1880 if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { 1881 m_freem(qtable[vifp-viftable][i].pkt_m); 1882 tbf_dequeue(vifp,i); 1883 splx(s); 1884 mrtstat.mrts_drop_sel++; 1885 return(1); 1886 } 1887 } 1888 splx(s); 1889 return(0); 1890} 1891 1892static void 1893tbf_send_packet(vifp, m, imo) 1894 register struct vif *vifp; 1895 register struct mbuf *m; 1896 struct ip_moptions *imo; 1897{ 1898 int error; 1899 int s = splnet(); 1900 1901 if (vifp->v_flags & VIFF_TUNNEL) { 1902 /* If tunnel options */ 1903 ip_output(m, (struct mbuf *)0, (struct route *)0, 1904 IP_FORWARDING, imo); 1905 } else { 1906 /* if physical interface option, extract the options and then send */ 1907 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1908 IP_FORWARDING, imo); 1909 FREE(imo, M_IPMOPTS); 1910 1911 if (mrtdebug & DEBUG_XMIT) 1912 log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error); 1913 } 1914 splx(s); 1915} 1916 1917/* determine the current time and then 1918 * the elapsed time (between the last time and time now) 1919 * in milliseconds & update the no. of tokens in the bucket 1920 */ 1921static void 1922tbf_update_tokens(vifp) 1923 register struct vif *vifp; 1924{ 1925 struct timeval tp; 1926 register u_long t; 1927 register u_long elapsed; 1928 register int s = splnet(); 1929 1930 GET_TIME(tp); 1931 1932 t = tp.tv_sec*1000 + tp.tv_usec/1000; 1933 1934 elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; 1935 vifp->v_tbf->n_tok += elapsed; 1936 vifp->v_tbf->last_pkt_t = t; 1937 1938 if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) 1939 vifp->v_tbf->n_tok = MAX_BKT_SIZE; 1940 1941 splx(s); 1942} 1943 1944static int 1945priority(vifp, ip) 1946 register struct vif *vifp; 1947 register struct ip *ip; 1948{ 1949 register int prio; 1950 1951 /* temporary hack; may add general packet classifier some day */ 1952 1953 /* 1954 * The UDP port space is divided up into four priority ranges: 1955 * [0, 16384) : unclassified - lowest priority 1956 * [16384, 32768) : audio - highest priority 1957 * [32768, 49152) : whiteboard - medium priority 1958 * [49152, 65536) : video - low priority 1959 */ 1960 if (ip->ip_p == IPPROTO_UDP) { 1961 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); 1962 switch (ntohs(udp->uh_dport) & 0xc000) { 1963 case 0x4000: 1964 prio = 70; 1965 break; 1966 case 0x8000: 1967 prio = 60; 1968 break; 1969 case 0xc000: 1970 prio = 55; 1971 break; 1972 default: 1973 prio = 50; 1974 break; 1975 } 1976 if (tbfdebug > 1) 1977 log(LOG_DEBUG, "port %x prio%d", ntohs(udp->uh_dport), prio); 1978 } else { 1979 prio = 50; 1980 } 1981 return prio; 1982} 1983 1984/* 1985 * End of token bucket filter modifications 1986 */ 1987 1988int 1989ip_rsvp_vif_init(so, m) 1990 struct socket *so; 1991 struct mbuf *m; 1992{ 1993 int i; 1994 register int s; 1995 1996 if (rsvpdebug) 1997 printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n", 1998 so->so_type, so->so_proto->pr_protocol); 1999 2000 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 2001 return EOPNOTSUPP; 2002 2003 /* Check mbuf. */ 2004 if (m == NULL || m->m_len != sizeof(int)) { 2005 return EINVAL; 2006 } 2007 i = *(mtod(m, int *)); 2008 2009 if (rsvpdebug) 2010 printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on); 2011 2012 s = splnet(); 2013 2014 /* Check vif. */ 2015 if (!legal_vif_num(i)) { 2016 splx(s); 2017 return EADDRNOTAVAIL; 2018 } 2019 2020 /* Check if socket is available. */ 2021 if (viftable[i].v_rsvpd != NULL) { 2022 splx(s); 2023 return EADDRINUSE; 2024 } 2025 2026 viftable[i].v_rsvpd = so; 2027 /* This may seem silly, but we need to be sure we don't over-increment 2028 * the RSVP counter, in case something slips up. 2029 */ 2030 if (!viftable[i].v_rsvp_on) { 2031 viftable[i].v_rsvp_on = 1; 2032 rsvp_on++; 2033 } 2034 2035 splx(s); 2036 return 0; 2037} 2038 2039int 2040ip_rsvp_vif_done(so, m) 2041 struct socket *so; 2042 struct mbuf *m; 2043{ 2044 int i; 2045 register int s; 2046 2047 if (rsvpdebug) 2048 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n", 2049 so->so_type, so->so_proto->pr_protocol); 2050 2051 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 2052 return EOPNOTSUPP; 2053 2054 /* Check mbuf. */ 2055 if (m == NULL || m->m_len != sizeof(int)) { 2056 return EINVAL; 2057 } 2058 i = *(mtod(m, int *)); 2059 2060 s = splnet(); 2061 2062 /* Check vif. */ 2063 if (!legal_vif_num(i)) { 2064 splx(s); 2065 return EADDRNOTAVAIL; 2066 } 2067 2068 if (rsvpdebug) 2069 printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n", 2070 viftable[i].v_rsvpd, so); 2071 2072 viftable[i].v_rsvpd = NULL; 2073 /* This may seem silly, but we need to be sure we don't over-decrement 2074 * the RSVP counter, in case something slips up. 2075 */ 2076 if (viftable[i].v_rsvp_on) { 2077 viftable[i].v_rsvp_on = 0; 2078 rsvp_on--; 2079 } 2080 2081 splx(s); 2082 return 0; 2083} 2084 2085void 2086ip_rsvp_force_done(so) 2087 struct socket *so; 2088{ 2089 int vifi; 2090 register int s; 2091 2092 /* Don't bother if it is not the right type of socket. */ 2093 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 2094 return; 2095 2096 s = splnet(); 2097 2098 /* The socket may be attached to more than one vif...this 2099 * is perfectly legal. 2100 */ 2101 for (vifi = 0; vifi < numvifs; vifi++) { 2102 if (viftable[vifi].v_rsvpd == so) { 2103 viftable[vifi].v_rsvpd = NULL; 2104 /* This may seem silly, but we need to be sure we don't 2105 * over-decrement the RSVP counter, in case something slips up. 2106 */ 2107 if (viftable[vifi].v_rsvp_on) { 2108 viftable[vifi].v_rsvp_on = 0; 2109 rsvp_on--; 2110 } 2111 } 2112 } 2113 2114 splx(s); 2115 return; 2116} 2117 2118void 2119rsvp_input(m, ifp) 2120 struct mbuf *m; 2121 struct ifnet *ifp; 2122{ 2123 int vifi; 2124 register struct ip *ip = mtod(m, struct ip *); 2125 static struct sockaddr_in rsvp_src = { AF_INET }; 2126 register int s; 2127 2128 if (rsvpdebug) 2129 printf("rsvp_input: rsvp_on %d\n",rsvp_on); 2130 2131 /* Can still get packets with rsvp_on = 0 if there is a local member 2132 * of the group to which the RSVP packet is addressed. But in this 2133 * case we want to throw the packet away. 2134 */ 2135 if (!rsvp_on) { 2136 m_freem(m); 2137 return; 2138 } 2139 2140 /* If the old-style non-vif-associated socket is set, then use 2141 * it and ignore the new ones. 2142 */ 2143 if (ip_rsvpd != NULL) { 2144 if (rsvpdebug) 2145 printf("rsvp_input: Sending packet up old-style socket\n"); 2146 rip_input(m); 2147 return; 2148 } 2149 2150 s = splnet(); 2151 2152 if (rsvpdebug) 2153 printf("rsvp_input: check vifs\n"); 2154 2155 /* Find which vif the packet arrived on. */ 2156 for (vifi = 0; vifi < numvifs; vifi++) { 2157 if (viftable[vifi].v_ifp == ifp) 2158 break; 2159 } 2160 2161 if (vifi == numvifs) { 2162 /* Can't find vif packet arrived on. Drop packet. */ 2163 if (rsvpdebug) 2164 printf("rsvp_input: Can't find vif for packet...dropping it.\n"); 2165 m_freem(m); 2166 splx(s); 2167 return; 2168 } 2169 2170 if (rsvpdebug) 2171 printf("rsvp_input: check socket\n"); 2172 2173 if (viftable[vifi].v_rsvpd == NULL) { 2174 /* drop packet, since there is no specific socket for this 2175 * interface */ 2176 if (rsvpdebug) 2177 printf("rsvp_input: No socket defined for vif %d\n",vifi); 2178 m_freem(m); 2179 splx(s); 2180 return; 2181 } 2182 rsvp_src.sin_addr = ip->ip_src; 2183 2184 if (rsvpdebug && m) 2185 printf("rsvp_input: m->m_len = %d, sbspace() = %d\n", 2186 m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); 2187 2188 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) 2189 if (rsvpdebug) 2190 printf("rsvp_input: Failed to append to socket\n"); 2191 else 2192 if (rsvpdebug) 2193 printf("rsvp_input: send packet up\n"); 2194 2195 splx(s); 2196} 2197 2198#ifdef MROUTE_LKM 2199#include <sys/conf.h> 2200#include <sys/exec.h> 2201#include <sys/sysent.h> 2202#include <sys/lkm.h> 2203 2204MOD_MISC("ip_mroute_mod") 2205 2206static int 2207ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) 2208{ 2209 int i; 2210 struct lkm_misc *args = lkmtp->private.lkm_misc; 2211 int err = 0; 2212 2213 switch(cmd) { 2214 static int (*old_ip_mrouter_cmd)(); 2215 static int (*old_ip_mrouter_done)(); 2216 static int (*old_ip_mforward)(); 2217 static int (*old_mrt_ioctl)(); 2218 static void (*old_proto4_input)(); 2219 static int (*old_legal_vif_num)(); 2220 extern struct protosw inetsw[]; 2221 2222 case LKM_E_LOAD: 2223 if(lkmexists(lkmtp) || ip_mrtproto) 2224 return(EEXIST); 2225 old_ip_mrouter_cmd = ip_mrouter_cmd; 2226 ip_mrouter_cmd = X_ip_mrouter_cmd; 2227 old_ip_mrouter_done = ip_mrouter_done; 2228 ip_mrouter_done = X_ip_mrouter_done; 2229 old_ip_mforward = ip_mforward; 2230 ip_mforward = X_ip_mforward; 2231 old_mrt_ioctl = mrt_ioctl; 2232 mrt_ioctl = X_mrt_ioctl; 2233 old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input; 2234 inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_ipip_input; 2235 old_legal_vif_num = legal_vif_num; 2236 legal_vif_num = X_legal_vif_num; 2237 ip_mrtproto = IGMP_DVMRP; 2238 2239 printf("\nIP multicast routing loaded\n"); 2240 break; 2241 2242 case LKM_E_UNLOAD: 2243 if (ip_mrouter) 2244 return EINVAL; 2245 2246 ip_mrouter_cmd = old_ip_mrouter_cmd; 2247 ip_mrouter_done = old_ip_mrouter_done; 2248 ip_mforward = old_ip_mforward; 2249 mrt_ioctl = old_mrt_ioctl; 2250 inetsw[ip_protox[ENCAP_PROTO]].pr_input = old_proto4_input; 2251 legal_vif_num = old_legal_vif_num; 2252 ip_mrtproto = 0; 2253 break; 2254 2255 default: 2256 err = EINVAL; 2257 break; 2258 } 2259 2260 return(err); 2261} 2262 2263int 2264ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { 2265 DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, 2266 nosys); 2267} 2268 2269#endif /* MROUTE_LKM */ 2270#endif /* MROUTING */ 2271