ip_mroute.c revision 92960
1178481Sjb/* 2178481Sjb * IP multicast forwarding procedures 3178481Sjb * 4178481Sjb * Written by David Waitzman, BBN Labs, August 1988. 5178481Sjb * Modified by Steve Deering, Stanford, February 1989. 6178481Sjb * Modified by Mark J. Steiglitz, Stanford, May, 1991 7178481Sjb * Modified by Van Jacobson, LBL, January 1993 8178481Sjb * Modified by Ajit Thyagarajan, PARC, August 1993 9178481Sjb * Modified by Bill Fenner, PARC, April 1995 10178481Sjb * 11178481Sjb * MROUTING Revision: 3.5 12178481Sjb * $FreeBSD: head/sys/netinet/ip_mroute.c 92960 2002-03-22 16:45:54Z ru $ 13178481Sjb */ 14178481Sjb 15178481Sjb#include "opt_mrouting.h" 16178481Sjb#include "opt_random_ip_id.h" 17178481Sjb 18178481Sjb#include <sys/param.h> 19178481Sjb#include <sys/systm.h> 20178481Sjb#include <sys/malloc.h> 21178481Sjb#include <sys/mbuf.h> 22178481Sjb#include <sys/socket.h> 23178481Sjb#include <sys/socketvar.h> 24178481Sjb#include <sys/protosw.h> 25178481Sjb#include <sys/time.h> 26178481Sjb#include <sys/kernel.h> 27178481Sjb#include <sys/sysctl.h> 28178481Sjb#include <sys/sockio.h> 29178481Sjb#include <sys/syslog.h> 30178481Sjb#include <net/if.h> 31178481Sjb#include <net/route.h> 32178481Sjb#include <netinet/in.h> 33178481Sjb#include <netinet/in_systm.h> 34178481Sjb#include <netinet/ip.h> 35178481Sjb#include <netinet/ip_var.h> 36178481Sjb#include <netinet/in_var.h> 37178481Sjb#include <netinet/igmp.h> 38178481Sjb#include <netinet/ip_encap.h> 39178481Sjb#include <netinet/ip_mroute.h> 40178481Sjb#include <netinet/udp.h> 41178481Sjb#include <machine/in_cksum.h> 42178481Sjb 43178481Sjb#ifndef MROUTING 44178481Sjbextern u_long _ip_mcast_src(int vifi); 45178481Sjbextern int _ip_mforward(struct ip *ip, struct ifnet *ifp, 46178481Sjb struct mbuf *m, struct ip_moptions *imo); 47178481Sjbextern int _ip_mrouter_done(void); 48178481Sjbextern int _ip_mrouter_get(struct socket *so, struct sockopt *sopt); 49178481Sjbextern int _ip_mrouter_set(struct socket *so, struct sockopt *sopt); 50178481Sjbextern int _mrt_ioctl(int req, caddr_t data); 51178481Sjb 52178481Sjb/* 53178481Sjb * Dummy routines and globals used when multicast routing is not compiled in. 54178481Sjb */ 55178481Sjb 56178481Sjbstruct socket *ip_mrouter = NULL; 57178481Sjbu_int rsvpdebug = 0; 58178481Sjb 59178481Sjbint 60178481Sjb_ip_mrouter_set(so, sopt) 61178481Sjb struct socket *so; 62178481Sjb struct sockopt *sopt; 63178481Sjb{ 64178481Sjb return(EOPNOTSUPP); 65178481Sjb} 66178481Sjb 67178481Sjbint (*ip_mrouter_set)(struct socket *, struct sockopt *) = _ip_mrouter_set; 68178481Sjb 69178481Sjb 70178481Sjbint 71178481Sjb_ip_mrouter_get(so, sopt) 72178481Sjb struct socket *so; 73178481Sjb struct sockopt *sopt; 74178481Sjb{ 75178481Sjb return(EOPNOTSUPP); 76178481Sjb} 77178481Sjb 78178481Sjbint (*ip_mrouter_get)(struct socket *, struct sockopt *) = _ip_mrouter_get; 79178481Sjb 80178481Sjbint 81178481Sjb_ip_mrouter_done() 82178481Sjb{ 83178546Sjb return(0); 84178481Sjb} 85178481Sjb 86178481Sjbint (*ip_mrouter_done)(void) = _ip_mrouter_done; 87178481Sjb 88178481Sjbint 89178481Sjb_ip_mforward(ip, ifp, m, imo) 90178481Sjb struct ip *ip; 91178481Sjb struct ifnet *ifp; 92178481Sjb struct mbuf *m; 93178481Sjb struct ip_moptions *imo; 94178481Sjb{ 95178481Sjb return(0); 96178481Sjb} 97178481Sjb 98178481Sjbint (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 99178481Sjb struct ip_moptions *) = _ip_mforward; 100178481Sjb 101178481Sjbint 102178481Sjb_mrt_ioctl(int req, caddr_t data) 103178481Sjb{ 104178481Sjb return EOPNOTSUPP; 105178481Sjb} 106178481Sjb 107178481Sjbint (*mrt_ioctl)(int, caddr_t) = _mrt_ioctl; 108178481Sjb 109178481Sjbvoid 110178481Sjbrsvp_input(m, off) /* XXX must fixup manually */ 111178481Sjb struct mbuf *m; 112178481Sjb int off; 113178481Sjb{ 114178481Sjb /* Can still get packets with rsvp_on = 0 if there is a local member 115178546Sjb * of the group to which the RSVP packet is addressed. But in this 116178546Sjb * case we want to throw the packet away. 117178546Sjb */ 118178481Sjb if (!rsvp_on) { 119178481Sjb m_freem(m); 120178481Sjb return; 121178481Sjb } 122178481Sjb 123178481Sjb if (ip_rsvpd != NULL) { 124178481Sjb if (rsvpdebug) 125178481Sjb printf("rsvp_input: Sending packet up old-style socket\n"); 126178481Sjb rip_input(m, off); 127178481Sjb return; 128178546Sjb } 129178481Sjb /* Drop the packet */ 130178481Sjb m_freem(m); 131178481Sjb} 132178481Sjb 133178481Sjbint (*legal_vif_num)(int) = 0; 134178481Sjb 135178481Sjb/* 136178481Sjb * This should never be called, since IP_MULTICAST_VIF should fail, but 137178481Sjb * just in case it does get called, the code a little lower in ip_output 138178481Sjb * will assign the packet a local address. 139178481Sjb */ 140178481Sjbu_long 141178481Sjb_ip_mcast_src(int vifi) { return INADDR_ANY; } 142178481Sjbu_long (*ip_mcast_src)(int) = _ip_mcast_src; 143178481Sjb 144178481Sjbint 145178481Sjbip_rsvp_vif_init(so, sopt) 146178481Sjb struct socket *so; 147178481Sjb struct sockopt *sopt; 148178481Sjb{ 149178481Sjb return(EINVAL); 150178481Sjb} 151178481Sjb 152178481Sjbint 153178481Sjbip_rsvp_vif_done(so, sopt) 154178481Sjb struct socket *so; 155178481Sjb struct sockopt *sopt; 156178481Sjb{ 157178481Sjb return(EINVAL); 158178481Sjb} 159178481Sjb 160178481Sjbvoid 161178546Sjbip_rsvp_force_done(so) 162178481Sjb struct socket *so; 163178481Sjb{ 164178481Sjb return; 165178481Sjb} 166178481Sjb 167178481Sjb#else /* MROUTING */ 168178481Sjb 169178481Sjb#define M_HASCL(m) ((m)->m_flags & M_EXT) 170178481Sjb 171178481Sjbstatic MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables"); 172178481Sjb 173178481Sjb#ifndef MROUTE_KLD 174178481Sjb/* The socket used to communicate with the multicast routing daemon. */ 175178481Sjbstruct socket *ip_mrouter = NULL; 176178481Sjb#endif 177178481Sjb 178178481Sjb#if defined(MROUTING) || defined(MROUTE_KLD) 179178481Sjbstatic struct mrtstat mrtstat; 180178481SjbSYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW, 181178481Sjb &mrtstat, mrtstat, "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)"); 182178481Sjb#endif 183178481Sjb 184178481Sjbstatic struct mfc *mfctable[MFCTBLSIZ]; 185178481Sjbstatic u_char nexpire[MFCTBLSIZ]; 186178481Sjbstatic struct vif viftable[MAXVIFS]; 187178481Sjbstatic u_int mrtdebug = 0; /* debug level */ 188178481Sjb#define DEBUG_MFC 0x02 189178481Sjb#define DEBUG_FORWARD 0x04 190178481Sjb#define DEBUG_EXPIRE 0x08 191178481Sjb#define DEBUG_XMIT 0x10 192178546Sjbstatic u_int tbfdebug = 0; /* tbf debug level */ 193178481Sjbstatic u_int rsvpdebug = 0; /* rsvp debug level */ 194178481Sjb 195178481Sjbstatic struct callout_handle expire_upcalls_ch; 196178481Sjb 197178481Sjb#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 198178481Sjb#define UPCALL_EXPIRE 6 /* number of timeouts */ 199178481Sjb 200178481Sjb/* 201178481Sjb * Define the token bucket filter structures 202178481Sjb * tbftable -> each vif has one of these for storing info 203178481Sjb */ 204178481Sjb 205178481Sjbstatic struct tbf tbftable[MAXVIFS]; 206178481Sjb#define TBF_REPROCESS (hz / 100) /* 100x / second */ 207178481Sjb 208178481Sjb/* 209178481Sjb * 'Interfaces' associated with decapsulator (so we can tell 210178481Sjb * packets that went through it from ones that get reflected 211178481Sjb * by a broken gateway). These interfaces are never linked into 212178481Sjb * the system ifnet list & no routes point to them. I.e., packets 213178481Sjb * can't be sent this way. They only exist as a placeholder for 214178481Sjb * multicast source verification. 215178481Sjb */ 216178481Sjbstatic struct ifnet multicast_decap_if[MAXVIFS]; 217178481Sjb 218178481Sjb#define ENCAP_TTL 64 219178481Sjb#define ENCAP_PROTO IPPROTO_IPIP /* 4 */ 220178481Sjb 221178481Sjb/* prototype IP hdr for encapsulated packets */ 222178481Sjbstatic struct ip multicast_encap_iphdr = { 223178481Sjb#if BYTE_ORDER == LITTLE_ENDIAN 224178481Sjb sizeof(struct ip) >> 2, IPVERSION, 225178481Sjb#else 226178481Sjb IPVERSION, sizeof(struct ip) >> 2, 227178481Sjb#endif 228178481Sjb 0, /* tos */ 229178481Sjb sizeof(struct ip), /* total length */ 230178481Sjb 0, /* id */ 231178481Sjb 0, /* frag offset */ 232178481Sjb ENCAP_TTL, ENCAP_PROTO, 233178481Sjb 0, /* checksum */ 234178481Sjb}; 235178481Sjb 236178481Sjb/* 237178481Sjb * Private variables. 238178481Sjb */ 239178481Sjbstatic vifi_t numvifs = 0; 240178481Sjbstatic const struct encaptab *encap_cookie = NULL; 241178481Sjb 242178481Sjb/* 243178481Sjb * one-back cache used by mroute_encapcheck to locate a tunnel's vif 244178481Sjb * given a datagram's src ip address. 245178481Sjb */ 246178481Sjbstatic u_long last_encap_src; 247178481Sjbstatic struct vif *last_encap_vif; 248178481Sjb 249178481Sjbstatic u_long X_ip_mcast_src(int vifi); 250178481Sjbstatic int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo); 251178481Sjbstatic int X_ip_mrouter_done(void); 252178546Sjbstatic int X_ip_mrouter_get(struct socket *so, struct sockopt *m); 253178481Sjbstatic int X_ip_mrouter_set(struct socket *so, struct sockopt *m); 254178481Sjbstatic int X_legal_vif_num(int vif); 255178481Sjbstatic int X_mrt_ioctl(int cmd, caddr_t data); 256178481Sjb 257178481Sjbstatic int get_sg_cnt(struct sioc_sg_req *); 258178481Sjbstatic int get_vif_cnt(struct sioc_vif_req *); 259178481Sjbstatic int ip_mrouter_init(struct socket *, int); 260178481Sjbstatic int add_vif(struct vifctl *); 261178481Sjbstatic int del_vif(vifi_t); 262178481Sjbstatic int add_mfc(struct mfcctl *); 263178481Sjbstatic int del_mfc(struct mfcctl *); 264178481Sjbstatic int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); 265178481Sjbstatic int set_assert(int); 266178481Sjbstatic void expire_upcalls(void *); 267178481Sjbstatic int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, 268178481Sjb vifi_t); 269178481Sjbstatic void phyint_send(struct ip *, struct vif *, struct mbuf *); 270178481Sjbstatic void encap_send(struct ip *, struct vif *, struct mbuf *); 271178481Sjbstatic void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long); 272178481Sjbstatic void tbf_queue(struct vif *, struct mbuf *); 273178481Sjbstatic void tbf_process_q(struct vif *); 274178481Sjbstatic void tbf_reprocess_q(void *); 275178481Sjbstatic int tbf_dq_sel(struct vif *, struct ip *); 276178481Sjbstatic void tbf_send_packet(struct vif *, struct mbuf *); 277178481Sjbstatic void tbf_update_tokens(struct vif *); 278178481Sjbstatic int priority(struct vif *, struct ip *); 279178481Sjb 280178481Sjb/* 281178481Sjb * whether or not special PIM assert processing is enabled. 282178481Sjb */ 283178481Sjbstatic int pim_assert; 284178481Sjb/* 285178481Sjb * Rate limit for assert notification messages, in usec 286178481Sjb */ 287178481Sjb#define ASSERT_MSG_TIME 3000000 288178481Sjb 289178481Sjb/* 290178481Sjb * Hash function for a source, group entry 291178481Sjb */ 292178481Sjb#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 293178481Sjb ((g) >> 20) ^ ((g) >> 10) ^ (g)) 294178481Sjb 295178481Sjb/* 296178481Sjb * Find a route for a given origin IP address and Multicast group address 297178481Sjb * Type of service parameter to be added in the future!!! 298178481Sjb */ 299178481Sjb 300178481Sjb#define MFCFIND(o, g, rt) { \ 301178481Sjb register struct mfc *_rt = mfctable[MFCHASH(o,g)]; \ 302178481Sjb rt = NULL; \ 303178481Sjb ++mrtstat.mrts_mfc_lookups; \ 304178481Sjb while (_rt) { \ 305178481Sjb if ((_rt->mfc_origin.s_addr == o) && \ 306178481Sjb (_rt->mfc_mcastgrp.s_addr == g) && \ 307178481Sjb (_rt->mfc_stall == NULL)) { \ 308178481Sjb rt = _rt; \ 309178481Sjb break; \ 310178481Sjb } \ 311178481Sjb _rt = _rt->mfc_next; \ 312178481Sjb } \ 313178481Sjb if (rt == NULL) { \ 314178481Sjb ++mrtstat.mrts_mfc_misses; \ 315178481Sjb } \ 316178481Sjb} 317178481Sjb 318178481Sjb 319178481Sjb/* 320178481Sjb * Macros to compute elapsed time efficiently 321178481Sjb * Borrowed from Van Jacobson's scheduling code 322178481Sjb */ 323178481Sjb#define TV_DELTA(a, b, delta) { \ 324178481Sjb register int xxs; \ 325178481Sjb \ 326178481Sjb delta = (a).tv_usec - (b).tv_usec; \ 327178481Sjb if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 328178481Sjb switch (xxs) { \ 329178481Sjb case 2: \ 330178481Sjb delta += 1000000; \ 331178481Sjb /* fall through */ \ 332178481Sjb case 1: \ 333178481Sjb delta += 1000000; \ 334178481Sjb break; \ 335178481Sjb default: \ 336178481Sjb delta += (1000000 * xxs); \ 337178481Sjb } \ 338178481Sjb } \ 339178481Sjb} 340178481Sjb 341178481Sjb#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 342178481Sjb (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 343178481Sjb 344178481Sjb#ifdef UPCALL_TIMING 345178481Sjbu_long upcall_data[51]; 346178481Sjbstatic void collate(struct timeval *); 347178481Sjb#endif /* UPCALL_TIMING */ 348178481Sjb 349178481Sjb 350178481Sjb/* 351178481Sjb * Handle MRT setsockopt commands to modify the multicast routing tables. 352178481Sjb */ 353178481Sjbstatic int 354178481SjbX_ip_mrouter_set(so, sopt) 355178481Sjb struct socket *so; 356178481Sjb struct sockopt *sopt; 357178481Sjb{ 358178481Sjb int error, optval; 359178481Sjb vifi_t vifi; 360178481Sjb struct vifctl vifc; 361178481Sjb struct mfcctl mfc; 362178481Sjb 363178481Sjb if (so != ip_mrouter && sopt->sopt_name != MRT_INIT) 364178481Sjb return (EPERM); 365178481Sjb 366178481Sjb error = 0; 367178481Sjb switch (sopt->sopt_name) { 368178481Sjb case MRT_INIT: 369178481Sjb error = sooptcopyin(sopt, &optval, sizeof optval, 370178481Sjb sizeof optval); 371178481Sjb if (error) 372178481Sjb break; 373178481Sjb error = ip_mrouter_init(so, optval); 374178481Sjb break; 375178481Sjb 376178481Sjb case MRT_DONE: 377178481Sjb error = ip_mrouter_done(); 378178481Sjb break; 379178481Sjb 380178481Sjb case MRT_ADD_VIF: 381178481Sjb error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); 382178481Sjb if (error) 383178481Sjb break; 384178481Sjb error = add_vif(&vifc); 385178481Sjb break; 386178481Sjb 387178481Sjb case MRT_DEL_VIF: 388178481Sjb error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); 389178481Sjb if (error) 390178481Sjb break; 391178481Sjb error = del_vif(vifi); 392178481Sjb break; 393178481Sjb 394178481Sjb case MRT_ADD_MFC: 395178481Sjb case MRT_DEL_MFC: 396178481Sjb error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc); 397178481Sjb if (error) 398178481Sjb break; 399178481Sjb if (sopt->sopt_name == MRT_ADD_MFC) 400178481Sjb error = add_mfc(&mfc); 401178481Sjb else 402178481Sjb error = del_mfc(&mfc); 403178481Sjb break; 404178481Sjb 405178481Sjb case MRT_ASSERT: 406178481Sjb error = sooptcopyin(sopt, &optval, sizeof optval, 407178481Sjb sizeof optval); 408178481Sjb if (error) 409178481Sjb break; 410178481Sjb set_assert(optval); 411178481Sjb break; 412178481Sjb 413178481Sjb default: 414178481Sjb error = EOPNOTSUPP; 415178481Sjb break; 416178481Sjb } 417178481Sjb return (error); 418178481Sjb} 419178481Sjb 420178481Sjb#ifndef MROUTE_KLD 421178481Sjbint (*ip_mrouter_set)(struct socket *, struct sockopt *) = X_ip_mrouter_set; 422178481Sjb#endif 423178481Sjb 424178481Sjb/* 425178481Sjb * Handle MRT getsockopt commands 426178481Sjb */ 427178481Sjbstatic int 428178481SjbX_ip_mrouter_get(so, sopt) 429178481Sjb struct socket *so; 430178481Sjb struct sockopt *sopt; 431178481Sjb{ 432178481Sjb int error; 433178481Sjb static int version = 0x0305; /* !!! why is this here? XXX */ 434178481Sjb 435178481Sjb switch (sopt->sopt_name) { 436178481Sjb case MRT_VERSION: 437178481Sjb error = sooptcopyout(sopt, &version, sizeof version); 438178481Sjb break; 439178481Sjb 440178481Sjb case MRT_ASSERT: 441178481Sjb error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert); 442178481Sjb break; 443178481Sjb default: 444178481Sjb error = EOPNOTSUPP; 445178481Sjb break; 446178481Sjb } 447178481Sjb return (error); 448178481Sjb} 449178481Sjb 450178481Sjb#ifndef MROUTE_KLD 451178481Sjbint (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get; 452178481Sjb#endif 453178481Sjb 454178481Sjb/* 455178481Sjb * Handle ioctl commands to obtain information from the cache 456178481Sjb */ 457178481Sjbstatic int 458178481SjbX_mrt_ioctl(cmd, data) 459178481Sjb int cmd; 460178481Sjb caddr_t data; 461178481Sjb{ 462178481Sjb int error = 0; 463178481Sjb 464178481Sjb switch (cmd) { 465178481Sjb case (SIOCGETVIFCNT): 466178481Sjb return (get_vif_cnt((struct sioc_vif_req *)data)); 467178481Sjb break; 468178481Sjb case (SIOCGETSGCNT): 469178481Sjb return (get_sg_cnt((struct sioc_sg_req *)data)); 470178481Sjb break; 471178481Sjb default: 472178481Sjb return (EINVAL); 473178481Sjb break; 474178481Sjb } 475178481Sjb return error; 476178481Sjb} 477178481Sjb 478178481Sjb#ifndef MROUTE_KLD 479178481Sjbint (*mrt_ioctl)(int, caddr_t) = X_mrt_ioctl; 480178481Sjb#endif 481178481Sjb 482178481Sjb/* 483178481Sjb * returns the packet, byte, rpf-failure count for the source group provided 484178481Sjb */ 485178481Sjbstatic int 486178481Sjbget_sg_cnt(req) 487178481Sjb register struct sioc_sg_req *req; 488178481Sjb{ 489178481Sjb register struct mfc *rt; 490178481Sjb int s; 491178481Sjb 492178481Sjb s = splnet(); 493178481Sjb MFCFIND(req->src.s_addr, req->grp.s_addr, rt); 494178481Sjb splx(s); 495178481Sjb if (rt != NULL) { 496178481Sjb req->pktcnt = rt->mfc_pkt_cnt; 497178481Sjb req->bytecnt = rt->mfc_byte_cnt; 498178481Sjb req->wrong_if = rt->mfc_wrong_if; 499178481Sjb } else 500178481Sjb req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 501178481Sjb 502178481Sjb return 0; 503178481Sjb} 504178481Sjb 505178481Sjb/* 506178481Sjb * returns the input and output packet and byte counts on the vif provided 507178481Sjb */ 508178481Sjbstatic int 509178481Sjbget_vif_cnt(req) 510178481Sjb register struct sioc_vif_req *req; 511178481Sjb{ 512178481Sjb register vifi_t vifi = req->vifi; 513178481Sjb 514178481Sjb if (vifi >= numvifs) return EINVAL; 515178481Sjb 516178481Sjb req->icount = viftable[vifi].v_pkt_in; 517178481Sjb req->ocount = viftable[vifi].v_pkt_out; 518178481Sjb req->ibytes = viftable[vifi].v_bytes_in; 519178481Sjb req->obytes = viftable[vifi].v_bytes_out; 520178481Sjb 521178481Sjb return 0; 522178481Sjb} 523178481Sjb 524178481Sjb/* 525178481Sjb * Enable multicast routing 526178481Sjb */ 527178481Sjbstatic int 528178481Sjbip_mrouter_init(so, version) 529178481Sjb struct socket *so; 530178481Sjb int version; 531178481Sjb{ 532178481Sjb if (mrtdebug) 533178481Sjb log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n", 534178481Sjb so->so_type, so->so_proto->pr_protocol); 535178481Sjb 536178481Sjb if (so->so_type != SOCK_RAW || 537178481Sjb so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; 538178481Sjb 539178481Sjb if (version != 1) 540178481Sjb return ENOPROTOOPT; 541178481Sjb 542178481Sjb if (ip_mrouter != NULL) return EADDRINUSE; 543178481Sjb 544178481Sjb ip_mrouter = so; 545178481Sjb 546178481Sjb bzero((caddr_t)mfctable, sizeof(mfctable)); 547178481Sjb bzero((caddr_t)nexpire, sizeof(nexpire)); 548178481Sjb 549178481Sjb pim_assert = 0; 550178481Sjb 551178481Sjb expire_upcalls_ch = timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); 552178481Sjb 553178481Sjb if (mrtdebug) 554178481Sjb log(LOG_DEBUG, "ip_mrouter_init\n"); 555178481Sjb 556178481Sjb return 0; 557178481Sjb} 558178481Sjb 559178481Sjb/* 560178481Sjb * Disable multicast routing 561178481Sjb */ 562178481Sjbstatic int 563178481SjbX_ip_mrouter_done() 564178481Sjb{ 565178481Sjb vifi_t vifi; 566178481Sjb int i; 567178481Sjb struct ifnet *ifp; 568178481Sjb struct ifreq ifr; 569178481Sjb struct mfc *rt; 570178481Sjb struct rtdetq *rte; 571178481Sjb int s; 572178481Sjb 573178481Sjb s = splnet(); 574178481Sjb 575178481Sjb /* 576178481Sjb * For each phyint in use, disable promiscuous reception of all IP 577178481Sjb * multicasts. 578178481Sjb */ 579178481Sjb for (vifi = 0; vifi < numvifs; vifi++) { 580178481Sjb if (viftable[vifi].v_lcl_addr.s_addr != 0 && 581178481Sjb !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 582178481Sjb ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 583178481Sjb ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr 584178481Sjb = INADDR_ANY; 585178481Sjb ifp = viftable[vifi].v_ifp; 586178481Sjb if_allmulti(ifp, 0); 587178481Sjb } 588178481Sjb } 589178481Sjb bzero((caddr_t)tbftable, sizeof(tbftable)); 590178481Sjb bzero((caddr_t)viftable, sizeof(viftable)); 591178481Sjb numvifs = 0; 592178481Sjb pim_assert = 0; 593178481Sjb 594178481Sjb untimeout(expire_upcalls, (caddr_t)NULL, expire_upcalls_ch); 595178481Sjb 596178481Sjb /* 597178481Sjb * Free all multicast forwarding cache entries. 598178481Sjb */ 599178481Sjb for (i = 0; i < MFCTBLSIZ; i++) { 600178481Sjb for (rt = mfctable[i]; rt != NULL; ) { 601178481Sjb struct mfc *nr = rt->mfc_next; 602178481Sjb 603178481Sjb for (rte = rt->mfc_stall; rte != NULL; ) { 604178481Sjb struct rtdetq *n = rte->next; 605178481Sjb 606178481Sjb m_freem(rte->m); 607178481Sjb free(rte, M_MRTABLE); 608178481Sjb rte = n; 609178481Sjb } 610178481Sjb free(rt, M_MRTABLE); 611178481Sjb rt = nr; 612178481Sjb } 613178481Sjb } 614178481Sjb 615178481Sjb bzero((caddr_t)mfctable, sizeof(mfctable)); 616178481Sjb 617178481Sjb /* 618178481Sjb * Reset de-encapsulation cache 619178481Sjb */ 620178481Sjb last_encap_src = 0; 621178481Sjb last_encap_vif = NULL; 622178481Sjb if (encap_cookie) { 623178481Sjb encap_detach(encap_cookie); 624178481Sjb encap_cookie = NULL; 625178481Sjb } 626178481Sjb 627178481Sjb ip_mrouter = NULL; 628178481Sjb 629178481Sjb splx(s); 630178481Sjb 631178481Sjb if (mrtdebug) 632178481Sjb log(LOG_DEBUG, "ip_mrouter_done\n"); 633178481Sjb 634178481Sjb return 0; 635178481Sjb} 636178481Sjb 637178481Sjb#ifndef MROUTE_KLD 638178481Sjbint (*ip_mrouter_done)(void) = X_ip_mrouter_done; 639178481Sjb#endif 640178481Sjb 641178481Sjb/* 642178481Sjb * Set PIM assert processing global 643178481Sjb */ 644178481Sjbstatic int 645178481Sjbset_assert(i) 646178481Sjb int i; 647178481Sjb{ 648178481Sjb if ((i != 1) && (i != 0)) 649178481Sjb return EINVAL; 650178481Sjb 651178481Sjb pim_assert = i; 652178481Sjb 653178481Sjb return 0; 654178481Sjb} 655178481Sjb 656178481Sjb/* 657178481Sjb * Decide if a packet is from a tunnelled peer. 658178481Sjb * Return 0 if not, 64 if so. 659178481Sjb */ 660178481Sjbstatic int 661178481Sjbmroute_encapcheck(const struct mbuf *m, int off, int proto, void *arg) 662178481Sjb{ 663178481Sjb struct ip *ip = mtod(m, struct ip *); 664178481Sjb int hlen = ip->ip_hl << 2; 665178481Sjb register struct vif *vifp; 666178481Sjb 667178481Sjb /* 668178481Sjb * don't claim the packet if it's not to a multicast destination or if 669178481Sjb * we don't have an encapsulating tunnel with the source. 670178481Sjb * Note: This code assumes that the remote site IP address 671178481Sjb * uniquely identifies the tunnel (i.e., that this site has 672178481Sjb * at most one tunnel with the remote site). 673178481Sjb */ 674178481Sjb if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { 675178481Sjb return 0; 676178481Sjb } 677178481Sjb if (ip->ip_src.s_addr != last_encap_src) { 678178481Sjb register struct vif *vife; 679178481Sjb 680178481Sjb vifp = viftable; 681178481Sjb vife = vifp + numvifs; 682178481Sjb last_encap_src = ip->ip_src.s_addr; 683178481Sjb last_encap_vif = 0; 684178481Sjb for ( ; vifp < vife; ++vifp) 685178481Sjb if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 686178481Sjb if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 687178481Sjb == VIFF_TUNNEL) 688178481Sjb last_encap_vif = vifp; 689178481Sjb break; 690178481Sjb } 691178481Sjb } 692178481Sjb if ((vifp = last_encap_vif) == 0) { 693178481Sjb last_encap_src = 0; 694178481Sjb return 0; 695178481Sjb } 696178481Sjb return 64; 697178481Sjb} 698178481Sjb 699178481Sjb/* 700178481Sjb * De-encapsulate a packet and feed it back through ip input (this 701178481Sjb * routine is called whenever IP gets a packet that mroute_encap_func() 702178481Sjb * claimed). 703178481Sjb */ 704178481Sjbstatic void 705178481Sjbmroute_encap_input(struct mbuf *m, int off) 706178481Sjb{ 707178481Sjb struct ip *ip = mtod(m, struct ip *); 708178481Sjb int hlen = ip->ip_hl << 2; 709178481Sjb 710178481Sjb if (hlen > sizeof(struct ip)) 711178481Sjb ip_stripoptions(m, (struct mbuf *) 0); 712178481Sjb m->m_data += sizeof(struct ip); 713178481Sjb m->m_len -= sizeof(struct ip); 714178481Sjb m->m_pkthdr.len -= sizeof(struct ip); 715178481Sjb 716178481Sjb m->m_pkthdr.rcvif = last_encap_vif->v_ifp; 717178481Sjb 718178481Sjb (void) IF_HANDOFF(&ipintrq, m, NULL); 719178481Sjb /* 720178481Sjb * normally we would need a "schednetisr(NETISR_IP)" 721178481Sjb * here but we were called by ip_input and it is going 722178481Sjb * to loop back & try to dequeue the packet we just 723178481Sjb * queued as soon as we return so we avoid the 724178481Sjb * unnecessary software interrrupt. 725178481Sjb */ 726178481Sjb} 727178481Sjb 728178481Sjbextern struct domain inetdomain; 729178481Sjbstatic struct protosw mroute_encap_protosw = 730178481Sjb{ SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR, 731178481Sjb mroute_encap_input, 0, 0, rip_ctloutput, 732178481Sjb 0, 733178481Sjb 0, 0, 0, 0, 734178481Sjb &rip_usrreqs 735178481Sjb}; 736178481Sjb 737178481Sjb/* 738178481Sjb * Add a vif to the vif table 739178481Sjb */ 740178481Sjbstatic int 741178481Sjbadd_vif(vifcp) 742178481Sjb register struct vifctl *vifcp; 743178481Sjb{ 744178481Sjb register struct vif *vifp = viftable + vifcp->vifc_vifi; 745178481Sjb static struct sockaddr_in sin = {sizeof sin, AF_INET}; 746178481Sjb struct ifaddr *ifa; 747178481Sjb struct ifnet *ifp; 748178481Sjb int error, s; 749178481Sjb struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 750178481Sjb 751178481Sjb if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; 752178481Sjb if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; 753178481Sjb 754178481Sjb /* Find the interface with an address in AF_INET family */ 755178481Sjb sin.sin_addr = vifcp->vifc_lcl_addr; 756178481Sjb ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 757178481Sjb if (ifa == 0) return EADDRNOTAVAIL; 758178481Sjb ifp = ifa->ifa_ifp; 759178481Sjb 760178481Sjb if (vifcp->vifc_flags & VIFF_TUNNEL) { 761178481Sjb if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 762178481Sjb /* 763178481Sjb * An encapsulating tunnel is wanted. Tell 764178481Sjb * mroute_encap_input() to start paying attention 765178481Sjb * to encapsulated packets. 766178481Sjb */ 767178481Sjb if (encap_cookie == NULL) { 768178481Sjb encap_cookie = encap_attach_func(AF_INET, -1, 769178481Sjb mroute_encapcheck, 770178481Sjb (struct protosw *)&mroute_encap_protosw, NULL); 771178481Sjb 772178481Sjb if (encap_cookie == NULL) { 773178481Sjb printf("ip_mroute: unable to attach encap\n"); 774178481Sjb return (EIO); /* XXX */ 775178481Sjb } 776178481Sjb for (s = 0; s < MAXVIFS; ++s) { 777178481Sjb multicast_decap_if[s].if_name = "mdecap"; 778178481Sjb multicast_decap_if[s].if_unit = s; 779178481Sjb } 780178481Sjb } 781178481Sjb /* 782178481Sjb * Set interface to fake encapsulator interface 783178481Sjb */ 784178481Sjb ifp = &multicast_decap_if[vifcp->vifc_vifi]; 785178481Sjb /* 786178481Sjb * Prepare cached route entry 787178481Sjb */ 788178481Sjb bzero(&vifp->v_route, sizeof(vifp->v_route)); 789178481Sjb } else { 790178481Sjb log(LOG_ERR, "source routed tunnels not supported\n"); 791178481Sjb return EOPNOTSUPP; 792178481Sjb } 793178481Sjb } else { 794178481Sjb /* Make sure the interface supports multicast */ 795178481Sjb if ((ifp->if_flags & IFF_MULTICAST) == 0) 796178481Sjb return EOPNOTSUPP; 797178481Sjb 798178481Sjb /* Enable promiscuous reception of all IP multicasts from the if */ 799178481Sjb s = splnet(); 800178481Sjb error = if_allmulti(ifp, 1); 801178481Sjb splx(s); 802178481Sjb if (error) 803178481Sjb return error; 804178481Sjb } 805178481Sjb 806178481Sjb s = splnet(); 807178481Sjb /* define parameters for the tbf structure */ 808178481Sjb vifp->v_tbf = v_tbf; 809178481Sjb GET_TIME(vifp->v_tbf->tbf_last_pkt_t); 810178481Sjb vifp->v_tbf->tbf_n_tok = 0; 811178481Sjb vifp->v_tbf->tbf_q_len = 0; 812178481Sjb vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 813178481Sjb vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 814178481Sjb 815178481Sjb vifp->v_flags = vifcp->vifc_flags; 816178481Sjb vifp->v_threshold = vifcp->vifc_threshold; 817178481Sjb vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 818178481Sjb vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 819178481Sjb vifp->v_ifp = ifp; 820178481Sjb /* scaling up here allows division by 1024 in critical code */ 821178481Sjb vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000; 822178481Sjb vifp->v_rsvp_on = 0; 823178481Sjb vifp->v_rsvpd = NULL; 824178481Sjb /* initialize per vif pkt counters */ 825178481Sjb vifp->v_pkt_in = 0; 826178481Sjb vifp->v_pkt_out = 0; 827178481Sjb vifp->v_bytes_in = 0; 828178481Sjb vifp->v_bytes_out = 0; 829178481Sjb splx(s); 830178481Sjb 831178481Sjb /* Adjust numvifs up if the vifi is higher than numvifs */ 832178481Sjb if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 833178481Sjb 834178481Sjb if (mrtdebug) 835178481Sjb log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", 836178481Sjb vifcp->vifc_vifi, 837178481Sjb (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr), 838178481Sjb (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 839178481Sjb (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr), 840178481Sjb vifcp->vifc_threshold, 841178481Sjb vifcp->vifc_rate_limit); 842178481Sjb 843178481Sjb return 0; 844178481Sjb} 845178481Sjb 846178481Sjb/* 847178481Sjb * Delete a vif from the vif table 848178481Sjb */ 849178481Sjbstatic int 850178481Sjbdel_vif(vifi) 851178481Sjb vifi_t vifi; 852178481Sjb{ 853178481Sjb register struct vif *vifp = &viftable[vifi]; 854178481Sjb register struct mbuf *m; 855178481Sjb struct ifnet *ifp; 856178481Sjb struct ifreq ifr; 857178481Sjb int s; 858178481Sjb 859178481Sjb if (vifi >= numvifs) return EINVAL; 860178481Sjb if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; 861178481Sjb 862178481Sjb s = splnet(); 863178481Sjb 864178481Sjb if (!(vifp->v_flags & VIFF_TUNNEL)) { 865178481Sjb ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 866178481Sjb ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 867178481Sjb ifp = vifp->v_ifp; 868178481Sjb if_allmulti(ifp, 0); 869178481Sjb } 870178481Sjb 871178481Sjb if (vifp == last_encap_vif) { 872178481Sjb last_encap_vif = 0; 873178481Sjb last_encap_src = 0; 874178481Sjb } 875178481Sjb 876178481Sjb /* 877178481Sjb * Free packets queued at the interface 878178481Sjb */ 879178481Sjb while (vifp->v_tbf->tbf_q) { 880178481Sjb m = vifp->v_tbf->tbf_q; 881178481Sjb vifp->v_tbf->tbf_q = m->m_act; 882178481Sjb m_freem(m); 883178481Sjb } 884178481Sjb 885178481Sjb bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 886178481Sjb bzero((caddr_t)vifp, sizeof (*vifp)); 887178481Sjb 888178481Sjb if (mrtdebug) 889178481Sjb log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs); 890178481Sjb 891178481Sjb /* Adjust numvifs down */ 892178481Sjb for (vifi = numvifs; vifi > 0; vifi--) 893178481Sjb if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; 894178481Sjb numvifs = vifi; 895178481Sjb 896178481Sjb splx(s); 897178481Sjb 898178481Sjb return 0; 899178481Sjb} 900178481Sjb 901178481Sjb/* 902178481Sjb * Add an mfc entry 903178481Sjb */ 904178481Sjbstatic int 905178481Sjbadd_mfc(mfccp) 906178481Sjb struct mfcctl *mfccp; 907178481Sjb{ 908178481Sjb struct mfc *rt; 909178481Sjb u_long hash; 910178481Sjb struct rtdetq *rte; 911178481Sjb register u_short nstl; 912178481Sjb int s; 913178481Sjb int i; 914178481Sjb 915178481Sjb MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt); 916178481Sjb 917178481Sjb /* If an entry already exists, just update the fields */ 918178481Sjb if (rt) { 919178481Sjb if (mrtdebug & DEBUG_MFC) 920178481Sjb log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n", 921178481Sjb (u_long)ntohl(mfccp->mfcc_origin.s_addr), 922178481Sjb (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 923178481Sjb mfccp->mfcc_parent); 924178481Sjb 925178481Sjb s = splnet(); 926178481Sjb rt->mfc_parent = mfccp->mfcc_parent; 927178481Sjb for (i = 0; i < numvifs; i++) 928178481Sjb rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 929178481Sjb splx(s); 930178481Sjb return 0; 931178481Sjb } 932178481Sjb 933178481Sjb /* 934178481Sjb * Find the entry for which the upcall was made and update 935178481Sjb */ 936178481Sjb s = splnet(); 937178481Sjb hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 938178481Sjb for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { 939178481Sjb 940178481Sjb if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 941178481Sjb (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 942178481Sjb (rt->mfc_stall != NULL)) { 943178481Sjb 944178481Sjb if (nstl++) 945178481Sjb log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", 946178481Sjb "multiple kernel entries", 947178481Sjb (u_long)ntohl(mfccp->mfcc_origin.s_addr), 948178481Sjb (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 949178481Sjb mfccp->mfcc_parent, (void *)rt->mfc_stall); 950178481Sjb 951178481Sjb if (mrtdebug & DEBUG_MFC) 952178481Sjb log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", 953178481Sjb (u_long)ntohl(mfccp->mfcc_origin.s_addr), 954178481Sjb (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 955178481Sjb mfccp->mfcc_parent, (void *)rt->mfc_stall); 956178481Sjb 957178481Sjb rt->mfc_origin = mfccp->mfcc_origin; 958178481Sjb rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 959178481Sjb rt->mfc_parent = mfccp->mfcc_parent; 960178481Sjb for (i = 0; i < numvifs; i++) 961178481Sjb rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 962178481Sjb /* initialize pkt counters per src-grp */ 963178481Sjb rt->mfc_pkt_cnt = 0; 964178481Sjb rt->mfc_byte_cnt = 0; 965178481Sjb rt->mfc_wrong_if = 0; 966178481Sjb rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; 967178481Sjb 968178481Sjb rt->mfc_expire = 0; /* Don't clean this guy up */ 969178481Sjb nexpire[hash]--; 970178481Sjb 971178481Sjb /* free packets Qed at the end of this entry */ 972178481Sjb for (rte = rt->mfc_stall; rte != NULL; ) { 973178481Sjb struct rtdetq *n = rte->next; 974178481Sjb 975178481Sjb ip_mdq(rte->m, rte->ifp, rt, -1); 976178481Sjb m_freem(rte->m); 977178481Sjb#ifdef UPCALL_TIMING 978178481Sjb collate(&(rte->t)); 979178481Sjb#endif /* UPCALL_TIMING */ 980178481Sjb free(rte, M_MRTABLE); 981178481Sjb rte = n; 982178481Sjb } 983178481Sjb rt->mfc_stall = NULL; 984178481Sjb } 985178481Sjb } 986178481Sjb 987178481Sjb /* 988178481Sjb * It is possible that an entry is being inserted without an upcall 989178481Sjb */ 990178481Sjb if (nstl == 0) { 991178481Sjb if (mrtdebug & DEBUG_MFC) 992178481Sjb log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", 993178481Sjb hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr), 994178481Sjb (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 995178481Sjb mfccp->mfcc_parent); 996178481Sjb 997178481Sjb for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { 998178481Sjb 999178481Sjb if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 1000178481Sjb (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 1001178481Sjb 1002178481Sjb rt->mfc_origin = mfccp->mfcc_origin; 1003178481Sjb rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 1004178481Sjb rt->mfc_parent = mfccp->mfcc_parent; 1005178481Sjb for (i = 0; i < numvifs; i++) 1006178481Sjb rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 1007178481Sjb /* initialize pkt counters per src-grp */ 1008178481Sjb rt->mfc_pkt_cnt = 0; 1009178481Sjb rt->mfc_byte_cnt = 0; 1010178481Sjb rt->mfc_wrong_if = 0; 1011178481Sjb rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; 1012178481Sjb if (rt->mfc_expire) 1013178481Sjb nexpire[hash]--; 1014178481Sjb rt->mfc_expire = 0; 1015178481Sjb } 1016178481Sjb } 1017178481Sjb if (rt == NULL) { 1018178481Sjb /* no upcall, so make a new entry */ 1019178481Sjb rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1020178481Sjb if (rt == NULL) { 1021178481Sjb splx(s); 1022178481Sjb return ENOBUFS; 1023178481Sjb } 1024178481Sjb 1025178481Sjb /* insert new entry at head of hash chain */ 1026178481Sjb rt->mfc_origin = mfccp->mfcc_origin; 1027178481Sjb rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 1028178481Sjb rt->mfc_parent = mfccp->mfcc_parent; 1029178481Sjb for (i = 0; i < numvifs; i++) 1030178481Sjb rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 1031178481Sjb /* initialize pkt counters per src-grp */ 1032178481Sjb rt->mfc_pkt_cnt = 0; 1033178481Sjb rt->mfc_byte_cnt = 0; 1034178481Sjb rt->mfc_wrong_if = 0; 1035178481Sjb rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; 1036178481Sjb rt->mfc_expire = 0; 1037178481Sjb rt->mfc_stall = NULL; 1038178481Sjb 1039178481Sjb /* link into table */ 1040178546Sjb rt->mfc_next = mfctable[hash]; 1041178546Sjb mfctable[hash] = rt; 1042178481Sjb } 1043178546Sjb } 1044178481Sjb splx(s); 1045178481Sjb return 0; 1046178481Sjb} 1047178546Sjb 1048178481Sjb#ifdef UPCALL_TIMING 1049178481Sjb/* 1050178481Sjb * collect delay statistics on the upcalls 1051178481Sjb */ 1052178481Sjbstatic void collate(t) 1053178481Sjbregister struct timeval *t; 1054178481Sjb{ 1055178481Sjb register u_long d; 1056178481Sjb register struct timeval tp; 1057178481Sjb register u_long delta; 1058178481Sjb 1059178546Sjb GET_TIME(tp); 1060178481Sjb 1061178546Sjb if (TV_LT(*t, tp)) 1062178481Sjb { 1063178481Sjb TV_DELTA(tp, *t, delta); 1064178481Sjb 1065178481Sjb d = delta >> 10; 1066178481Sjb if (d > 50) 1067178481Sjb d = 50; 1068178481Sjb 1069178481Sjb ++upcall_data[d]; 1070178481Sjb } 1071178481Sjb} 1072178481Sjb#endif /* UPCALL_TIMING */ 1073178481Sjb 1074178481Sjb/* 1075178481Sjb * Delete an mfc entry 1076178481Sjb */ 1077178481Sjbstatic int 1078178481Sjbdel_mfc(mfccp) 1079178481Sjb struct mfcctl *mfccp; 1080178481Sjb{ 1081178481Sjb struct in_addr origin; 1082178481Sjb struct in_addr mcastgrp; 1083178481Sjb struct mfc *rt; 1084178481Sjb struct mfc **nptr; 1085178481Sjb u_long hash; 1086178481Sjb int s; 1087178481Sjb 1088178481Sjb origin = mfccp->mfcc_origin; 1089178481Sjb mcastgrp = mfccp->mfcc_mcastgrp; 1090178481Sjb hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 1091178481Sjb 1092178481Sjb if (mrtdebug & DEBUG_MFC) 1093178481Sjb log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", 1094178481Sjb (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); 1095178481Sjb 1096178481Sjb s = splnet(); 1097178481Sjb 1098178481Sjb nptr = &mfctable[hash]; 1099178481Sjb while ((rt = *nptr) != NULL) { 1100178481Sjb if (origin.s_addr == rt->mfc_origin.s_addr && 1101178481Sjb mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 1102178481Sjb rt->mfc_stall == NULL) 1103178481Sjb break; 1104178481Sjb 1105178481Sjb nptr = &rt->mfc_next; 1106178481Sjb } 1107178481Sjb if (rt == NULL) { 1108178481Sjb splx(s); 1109178481Sjb return EADDRNOTAVAIL; 1110178481Sjb } 1111178481Sjb 1112178481Sjb *nptr = rt->mfc_next; 1113178481Sjb free(rt, M_MRTABLE); 1114178481Sjb 1115178481Sjb splx(s); 1116178481Sjb 1117178481Sjb return 0; 1118178481Sjb} 1119178481Sjb 1120178481Sjb/* 1121178481Sjb * Send a message to mrouted on the multicast routing socket 1122178481Sjb */ 1123178481Sjbstatic int 1124178481Sjbsocket_send(s, mm, src) 1125178481Sjb struct socket *s; 1126178481Sjb struct mbuf *mm; 1127178481Sjb struct sockaddr_in *src; 1128178481Sjb{ 1129178481Sjb if (s) { 1130178481Sjb if (sbappendaddr(&s->so_rcv, 1131178481Sjb (struct sockaddr *)src, 1132178481Sjb mm, (struct mbuf *)0) != 0) { 1133178481Sjb sorwakeup(s); 1134178481Sjb return 0; 1135178481Sjb } 1136178481Sjb } 1137178481Sjb m_freem(mm); 1138178481Sjb return -1; 1139178481Sjb} 1140178481Sjb 1141178481Sjb/* 1142178481Sjb * IP multicast forwarding function. This function assumes that the packet 1143178481Sjb * pointed to by "ip" has arrived on (or is about to be sent to) the interface 1144178481Sjb * pointed to by "ifp", and the packet is to be relayed to other networks 1145178481Sjb * that have members of the packet's destination IP multicast group. 1146178481Sjb * 1147178481Sjb * The packet is returned unscathed to the caller, unless it is 1148178481Sjb * erroneous, in which case a non-zero return value tells the caller to 1149178481Sjb * discard it. 1150178481Sjb */ 1151178481Sjb 1152178481Sjb#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 1153178546Sjb 1154178481Sjbstatic int 1155178546SjbX_ip_mforward(ip, ifp, m, imo) 1156178481Sjb register struct ip *ip; 1157178481Sjb struct ifnet *ifp; 1158178481Sjb struct mbuf *m; 1159178481Sjb struct ip_moptions *imo; 1160178481Sjb{ 1161178481Sjb register struct mfc *rt; 1162178481Sjb register u_char *ipoptions; 1163178481Sjb static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1164178481Sjb static int srctun = 0; 1165178481Sjb register struct mbuf *mm; 1166178481Sjb int s; 1167178481Sjb vifi_t vifi; 1168178481Sjb struct vif *vifp; 1169178481Sjb 1170178481Sjb if (mrtdebug & DEBUG_FORWARD) 1171178481Sjb log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n", 1172178481Sjb (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), 1173178481Sjb (void *)ifp); 1174178481Sjb 1175178481Sjb if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 || 1176178481Sjb (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 1177178481Sjb /* 1178178481Sjb * Packet arrived via a physical interface or 1179178481Sjb * an encapsulated tunnel. 1180178481Sjb */ 1181178481Sjb } else { 1182178481Sjb /* 1183178481Sjb * Packet arrived through a source-route tunnel. 1184178481Sjb * Source-route tunnels are no longer supported. 1185178481Sjb */ 1186178481Sjb if ((srctun++ % 1000) == 0) 1187178481Sjb log(LOG_ERR, 1188178481Sjb "ip_mforward: received source-routed packet from %lx\n", 1189178481Sjb (u_long)ntohl(ip->ip_src.s_addr)); 1190178481Sjb 1191178481Sjb return 1; 1192178481Sjb } 1193178481Sjb 1194178481Sjb if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) { 1195178481Sjb if (ip->ip_ttl < 255) 1196178481Sjb ip->ip_ttl++; /* compensate for -1 in *_send routines */ 1197178546Sjb if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1198178481Sjb vifp = viftable + vifi; 1199 printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s%d)\n", 1200 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr), 1201 vifi, 1202 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", 1203 vifp->v_ifp->if_name, vifp->v_ifp->if_unit); 1204 } 1205 return (ip_mdq(m, ifp, NULL, vifi)); 1206 } 1207 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1208 printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n", 1209 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr)); 1210 if(!imo) 1211 printf("In fact, no options were specified at all\n"); 1212 } 1213 1214 /* 1215 * Don't forward a packet with time-to-live of zero or one, 1216 * or a packet destined to a local-only group. 1217 */ 1218 if (ip->ip_ttl <= 1 || 1219 ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1220 return 0; 1221 1222 /* 1223 * Determine forwarding vifs from the forwarding cache table 1224 */ 1225 s = splnet(); 1226 MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); 1227 1228 /* Entry exists, so forward if necessary */ 1229 if (rt != NULL) { 1230 splx(s); 1231 return (ip_mdq(m, ifp, rt, -1)); 1232 } else { 1233 /* 1234 * If we don't have a route for packet's origin, 1235 * Make a copy of the packet & 1236 * send message to routing daemon 1237 */ 1238 1239 register struct mbuf *mb0; 1240 register struct rtdetq *rte; 1241 register u_long hash; 1242 int hlen = ip->ip_hl << 2; 1243#ifdef UPCALL_TIMING 1244 struct timeval tp; 1245 1246 GET_TIME(tp); 1247#endif 1248 1249 mrtstat.mrts_no_route++; 1250 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) 1251 log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n", 1252 (u_long)ntohl(ip->ip_src.s_addr), 1253 (u_long)ntohl(ip->ip_dst.s_addr)); 1254 1255 /* 1256 * Allocate mbufs early so that we don't do extra work if we are 1257 * just going to fail anyway. Make sure to pullup the header so 1258 * that other people can't step on it. 1259 */ 1260 rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT); 1261 if (rte == NULL) { 1262 splx(s); 1263 return ENOBUFS; 1264 } 1265 mb0 = m_copy(m, 0, M_COPYALL); 1266 if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) 1267 mb0 = m_pullup(mb0, hlen); 1268 if (mb0 == NULL) { 1269 free(rte, M_MRTABLE); 1270 splx(s); 1271 return ENOBUFS; 1272 } 1273 1274 /* is there an upcall waiting for this packet? */ 1275 hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1276 for (rt = mfctable[hash]; rt; rt = rt->mfc_next) { 1277 if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && 1278 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1279 (rt->mfc_stall != NULL)) 1280 break; 1281 } 1282 1283 if (rt == NULL) { 1284 int i; 1285 struct igmpmsg *im; 1286 1287 /* no upcall, so make a new entry */ 1288 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1289 if (rt == NULL) { 1290 free(rte, M_MRTABLE); 1291 m_freem(mb0); 1292 splx(s); 1293 return ENOBUFS; 1294 } 1295 /* Make a copy of the header to send to the user level process */ 1296 mm = m_copy(mb0, 0, hlen); 1297 if (mm == NULL) { 1298 free(rte, M_MRTABLE); 1299 m_freem(mb0); 1300 free(rt, M_MRTABLE); 1301 splx(s); 1302 return ENOBUFS; 1303 } 1304 1305 /* 1306 * Send message to routing daemon to install 1307 * a route into the kernel table 1308 */ 1309 k_igmpsrc.sin_addr = ip->ip_src; 1310 1311 im = mtod(mm, struct igmpmsg *); 1312 im->im_msgtype = IGMPMSG_NOCACHE; 1313 im->im_mbz = 0; 1314 1315 mrtstat.mrts_upcalls++; 1316 1317 if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { 1318 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); 1319 ++mrtstat.mrts_upq_sockfull; 1320 free(rte, M_MRTABLE); 1321 m_freem(mb0); 1322 free(rt, M_MRTABLE); 1323 splx(s); 1324 return ENOBUFS; 1325 } 1326 1327 /* insert new entry at head of hash chain */ 1328 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1329 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1330 rt->mfc_expire = UPCALL_EXPIRE; 1331 nexpire[hash]++; 1332 for (i = 0; i < numvifs; i++) 1333 rt->mfc_ttls[i] = 0; 1334 rt->mfc_parent = -1; 1335 1336 /* link into table */ 1337 rt->mfc_next = mfctable[hash]; 1338 mfctable[hash] = rt; 1339 rt->mfc_stall = rte; 1340 1341 } else { 1342 /* determine if q has overflowed */ 1343 int npkts = 0; 1344 struct rtdetq **p; 1345 1346 for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) 1347 npkts++; 1348 1349 if (npkts > MAX_UPQ) { 1350 mrtstat.mrts_upq_ovflw++; 1351 free(rte, M_MRTABLE); 1352 m_freem(mb0); 1353 splx(s); 1354 return 0; 1355 } 1356 1357 /* Add this entry to the end of the queue */ 1358 *p = rte; 1359 } 1360 1361 rte->m = mb0; 1362 rte->ifp = ifp; 1363#ifdef UPCALL_TIMING 1364 rte->t = tp; 1365#endif 1366 rte->next = NULL; 1367 1368 splx(s); 1369 1370 return 0; 1371 } 1372} 1373 1374#ifndef MROUTE_KLD 1375int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 1376 struct ip_moptions *) = X_ip_mforward; 1377#endif 1378 1379/* 1380 * Clean up the cache entry if upcall is not serviced 1381 */ 1382static void 1383expire_upcalls(void *unused) 1384{ 1385 struct rtdetq *rte; 1386 struct mfc *mfc, **nptr; 1387 int i; 1388 int s; 1389 1390 s = splnet(); 1391 for (i = 0; i < MFCTBLSIZ; i++) { 1392 if (nexpire[i] == 0) 1393 continue; 1394 nptr = &mfctable[i]; 1395 for (mfc = *nptr; mfc != NULL; mfc = *nptr) { 1396 /* 1397 * Skip real cache entries 1398 * Make sure it wasn't marked to not expire (shouldn't happen) 1399 * If it expires now 1400 */ 1401 if (mfc->mfc_stall != NULL && 1402 mfc->mfc_expire != 0 && 1403 --mfc->mfc_expire == 0) { 1404 if (mrtdebug & DEBUG_EXPIRE) 1405 log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", 1406 (u_long)ntohl(mfc->mfc_origin.s_addr), 1407 (u_long)ntohl(mfc->mfc_mcastgrp.s_addr)); 1408 /* 1409 * drop all the packets 1410 * free the mbuf with the pkt, if, timing info 1411 */ 1412 for (rte = mfc->mfc_stall; rte; ) { 1413 struct rtdetq *n = rte->next; 1414 1415 m_freem(rte->m); 1416 free(rte, M_MRTABLE); 1417 rte = n; 1418 } 1419 ++mrtstat.mrts_cache_cleanups; 1420 nexpire[i]--; 1421 1422 *nptr = mfc->mfc_next; 1423 free(mfc, M_MRTABLE); 1424 } else { 1425 nptr = &mfc->mfc_next; 1426 } 1427 } 1428 } 1429 splx(s); 1430 expire_upcalls_ch = timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); 1431} 1432 1433/* 1434 * Packet forwarding routine once entry in the cache is made 1435 */ 1436static int 1437ip_mdq(m, ifp, rt, xmt_vif) 1438 register struct mbuf *m; 1439 register struct ifnet *ifp; 1440 register struct mfc *rt; 1441 register vifi_t xmt_vif; 1442{ 1443 register struct ip *ip = mtod(m, struct ip *); 1444 register vifi_t vifi; 1445 register struct vif *vifp; 1446 register int plen = ip->ip_len; 1447 1448/* 1449 * Macro to send packet on vif. Since RSVP packets don't get counted on 1450 * input, they shouldn't get counted on output, so statistics keeping is 1451 * separate. 1452 */ 1453#define MC_SEND(ip,vifp,m) { \ 1454 if ((vifp)->v_flags & VIFF_TUNNEL) \ 1455 encap_send((ip), (vifp), (m)); \ 1456 else \ 1457 phyint_send((ip), (vifp), (m)); \ 1458} 1459 1460 /* 1461 * If xmt_vif is not -1, send on only the requested vif. 1462 * 1463 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) 1464 */ 1465 if (xmt_vif < numvifs) { 1466 MC_SEND(ip, viftable + xmt_vif, m); 1467 return 1; 1468 } 1469 1470 /* 1471 * Don't forward if it didn't arrive from the parent vif for its origin. 1472 */ 1473 vifi = rt->mfc_parent; 1474 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { 1475 /* came in the wrong interface */ 1476 if (mrtdebug & DEBUG_FORWARD) 1477 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", 1478 (void *)ifp, vifi, (void *)viftable[vifi].v_ifp); 1479 ++mrtstat.mrts_wrong_if; 1480 ++rt->mfc_wrong_if; 1481 /* 1482 * If we are doing PIM assert processing, and we are forwarding 1483 * packets on this interface, and it is a broadcast medium 1484 * interface (and not a tunnel), send a message to the routing daemon. 1485 */ 1486 if (pim_assert && rt->mfc_ttls[vifi] && 1487 (ifp->if_flags & IFF_BROADCAST) && 1488 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 1489 struct sockaddr_in k_igmpsrc; 1490 struct mbuf *mm; 1491 struct igmpmsg *im; 1492 int hlen = ip->ip_hl << 2; 1493 struct timeval now; 1494 register u_long delta; 1495 1496 GET_TIME(now); 1497 1498 TV_DELTA(rt->mfc_last_assert, now, delta); 1499 1500 if (delta > ASSERT_MSG_TIME) { 1501 mm = m_copy(m, 0, hlen); 1502 if (mm && (M_HASCL(mm) || mm->m_len < hlen)) 1503 mm = m_pullup(mm, hlen); 1504 if (mm == NULL) { 1505 return ENOBUFS; 1506 } 1507 1508 rt->mfc_last_assert = now; 1509 1510 im = mtod(mm, struct igmpmsg *); 1511 im->im_msgtype = IGMPMSG_WRONGVIF; 1512 im->im_mbz = 0; 1513 im->im_vif = vifi; 1514 1515 k_igmpsrc.sin_addr = im->im_src; 1516 1517 socket_send(ip_mrouter, mm, &k_igmpsrc); 1518 } 1519 } 1520 return 0; 1521 } 1522 1523 /* If I sourced this packet, it counts as output, else it was input. */ 1524 if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { 1525 viftable[vifi].v_pkt_out++; 1526 viftable[vifi].v_bytes_out += plen; 1527 } else { 1528 viftable[vifi].v_pkt_in++; 1529 viftable[vifi].v_bytes_in += plen; 1530 } 1531 rt->mfc_pkt_cnt++; 1532 rt->mfc_byte_cnt += plen; 1533 1534 /* 1535 * For each vif, decide if a copy of the packet should be forwarded. 1536 * Forward if: 1537 * - the ttl exceeds the vif's threshold 1538 * - there are group members downstream on interface 1539 */ 1540 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1541 if ((rt->mfc_ttls[vifi] > 0) && 1542 (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1543 vifp->v_pkt_out++; 1544 vifp->v_bytes_out += plen; 1545 MC_SEND(ip, vifp, m); 1546 } 1547 1548 return 0; 1549} 1550 1551/* 1552 * check if a vif number is legal/ok. This is used by ip_output, to export 1553 * numvifs there, 1554 */ 1555static int 1556X_legal_vif_num(vif) 1557 int vif; 1558{ 1559 if (vif >= 0 && vif < numvifs) 1560 return(1); 1561 else 1562 return(0); 1563} 1564 1565#ifndef MROUTE_KLD 1566int (*legal_vif_num)(int) = X_legal_vif_num; 1567#endif 1568 1569/* 1570 * Return the local address used by this vif 1571 */ 1572static u_long 1573X_ip_mcast_src(vifi) 1574 int vifi; 1575{ 1576 if (vifi >= 0 && vifi < numvifs) 1577 return viftable[vifi].v_lcl_addr.s_addr; 1578 else 1579 return INADDR_ANY; 1580} 1581 1582#ifndef MROUTE_KLD 1583u_long (*ip_mcast_src)(int) = X_ip_mcast_src; 1584#endif 1585 1586static void 1587phyint_send(ip, vifp, m) 1588 struct ip *ip; 1589 struct vif *vifp; 1590 struct mbuf *m; 1591{ 1592 register struct mbuf *mb_copy; 1593 register int hlen = ip->ip_hl << 2; 1594 1595 /* 1596 * Make a new reference to the packet; make sure that 1597 * the IP header is actually copied, not just referenced, 1598 * so that ip_output() only scribbles on the copy. 1599 */ 1600 mb_copy = m_copy(m, 0, M_COPYALL); 1601 if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) 1602 mb_copy = m_pullup(mb_copy, hlen); 1603 if (mb_copy == NULL) 1604 return; 1605 1606 if (vifp->v_rate_limit == 0) 1607 tbf_send_packet(vifp, mb_copy); 1608 else 1609 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); 1610} 1611 1612static void 1613encap_send(ip, vifp, m) 1614 register struct ip *ip; 1615 register struct vif *vifp; 1616 register struct mbuf *m; 1617{ 1618 register struct mbuf *mb_copy; 1619 register struct ip *ip_copy; 1620 register int i, len = ip->ip_len; 1621 1622 /* 1623 * copy the old packet & pullup its IP header into the 1624 * new mbuf so we can modify it. Try to fill the new 1625 * mbuf since if we don't the ethernet driver will. 1626 */ 1627 MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); 1628 if (mb_copy == NULL) 1629 return; 1630 mb_copy->m_data += max_linkhdr; 1631 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1632 1633 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 1634 m_freem(mb_copy); 1635 return; 1636 } 1637 i = MHLEN - M_LEADINGSPACE(mb_copy); 1638 if (i > len) 1639 i = len; 1640 mb_copy = m_pullup(mb_copy, i); 1641 if (mb_copy == NULL) 1642 return; 1643 mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); 1644 1645 /* 1646 * fill in the encapsulating IP header. 1647 */ 1648 ip_copy = mtod(mb_copy, struct ip *); 1649 *ip_copy = multicast_encap_iphdr; 1650#ifdef RANDOM_IP_ID 1651 ip_copy->ip_id = ip_randomid(); 1652#else 1653 ip_copy->ip_id = htons(ip_id++); 1654#endif 1655 ip_copy->ip_len += len; 1656 ip_copy->ip_src = vifp->v_lcl_addr; 1657 ip_copy->ip_dst = vifp->v_rmt_addr; 1658 1659 /* 1660 * turn the encapsulated IP header back into a valid one. 1661 */ 1662 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1663 --ip->ip_ttl; 1664 ip->ip_len = htons(ip->ip_len); 1665 ip->ip_off = htons(ip->ip_off); 1666 ip->ip_sum = 0; 1667 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1668 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1669 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1670 1671 if (vifp->v_rate_limit == 0) 1672 tbf_send_packet(vifp, mb_copy); 1673 else 1674 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); 1675} 1676 1677/* 1678 * Token bucket filter module 1679 */ 1680 1681static void 1682tbf_control(vifp, m, ip, p_len) 1683 register struct vif *vifp; 1684 register struct mbuf *m; 1685 register struct ip *ip; 1686 register u_long p_len; 1687{ 1688 register struct tbf *t = vifp->v_tbf; 1689 1690 if (p_len > MAX_BKT_SIZE) { 1691 /* drop if packet is too large */ 1692 mrtstat.mrts_pkt2large++; 1693 m_freem(m); 1694 return; 1695 } 1696 1697 tbf_update_tokens(vifp); 1698 1699 /* if there are enough tokens, 1700 * and the queue is empty, 1701 * send this packet out 1702 */ 1703 1704 if (t->tbf_q_len == 0) { 1705 /* queue empty, send packet if enough tokens */ 1706 if (p_len <= t->tbf_n_tok) { 1707 t->tbf_n_tok -= p_len; 1708 tbf_send_packet(vifp, m); 1709 } else { 1710 /* queue packet and timeout till later */ 1711 tbf_queue(vifp, m); 1712 timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); 1713 } 1714 } else if (t->tbf_q_len < t->tbf_max_q_len) { 1715 /* finite queue length, so queue pkts and process queue */ 1716 tbf_queue(vifp, m); 1717 tbf_process_q(vifp); 1718 } else { 1719 /* queue length too much, try to dq and queue and process */ 1720 if (!tbf_dq_sel(vifp, ip)) { 1721 mrtstat.mrts_q_overflow++; 1722 m_freem(m); 1723 return; 1724 } else { 1725 tbf_queue(vifp, m); 1726 tbf_process_q(vifp); 1727 } 1728 } 1729 return; 1730} 1731 1732/* 1733 * adds a packet to the queue at the interface 1734 */ 1735static void 1736tbf_queue(vifp, m) 1737 register struct vif *vifp; 1738 register struct mbuf *m; 1739{ 1740 register int s = splnet(); 1741 register struct tbf *t = vifp->v_tbf; 1742 1743 if (t->tbf_t == NULL) { 1744 /* Queue was empty */ 1745 t->tbf_q = m; 1746 } else { 1747 /* Insert at tail */ 1748 t->tbf_t->m_act = m; 1749 } 1750 1751 /* Set new tail pointer */ 1752 t->tbf_t = m; 1753 1754#ifdef DIAGNOSTIC 1755 /* Make sure we didn't get fed a bogus mbuf */ 1756 if (m->m_act) 1757 panic("tbf_queue: m_act"); 1758#endif 1759 m->m_act = NULL; 1760 1761 t->tbf_q_len++; 1762 1763 splx(s); 1764} 1765 1766 1767/* 1768 * processes the queue at the interface 1769 */ 1770static void 1771tbf_process_q(vifp) 1772 register struct vif *vifp; 1773{ 1774 register struct mbuf *m; 1775 register int len; 1776 register int s = splnet(); 1777 register struct tbf *t = vifp->v_tbf; 1778 1779 /* loop through the queue at the interface and send as many packets 1780 * as possible 1781 */ 1782 while (t->tbf_q_len > 0) { 1783 m = t->tbf_q; 1784 1785 len = mtod(m, struct ip *)->ip_len; 1786 1787 /* determine if the packet can be sent */ 1788 if (len <= t->tbf_n_tok) { 1789 /* if so, 1790 * reduce no of tokens, dequeue the packet, 1791 * send the packet. 1792 */ 1793 t->tbf_n_tok -= len; 1794 1795 t->tbf_q = m->m_act; 1796 if (--t->tbf_q_len == 0) 1797 t->tbf_t = NULL; 1798 1799 m->m_act = NULL; 1800 tbf_send_packet(vifp, m); 1801 1802 } else break; 1803 } 1804 splx(s); 1805} 1806 1807static void 1808tbf_reprocess_q(xvifp) 1809 void *xvifp; 1810{ 1811 register struct vif *vifp = xvifp; 1812 if (ip_mrouter == NULL) 1813 return; 1814 1815 tbf_update_tokens(vifp); 1816 1817 tbf_process_q(vifp); 1818 1819 if (vifp->v_tbf->tbf_q_len) 1820 timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); 1821} 1822 1823/* function that will selectively discard a member of the queue 1824 * based on the precedence value and the priority 1825 */ 1826static int 1827tbf_dq_sel(vifp, ip) 1828 register struct vif *vifp; 1829 register struct ip *ip; 1830{ 1831 register int s = splnet(); 1832 register u_int p; 1833 register struct mbuf *m, *last; 1834 register struct mbuf **np; 1835 register struct tbf *t = vifp->v_tbf; 1836 1837 p = priority(vifp, ip); 1838 1839 np = &t->tbf_q; 1840 last = NULL; 1841 while ((m = *np) != NULL) { 1842 if (p > priority(vifp, mtod(m, struct ip *))) { 1843 *np = m->m_act; 1844 /* If we're removing the last packet, fix the tail pointer */ 1845 if (m == t->tbf_t) 1846 t->tbf_t = last; 1847 m_freem(m); 1848 /* it's impossible for the queue to be empty, but 1849 * we check anyway. */ 1850 if (--t->tbf_q_len == 0) 1851 t->tbf_t = NULL; 1852 splx(s); 1853 mrtstat.mrts_drop_sel++; 1854 return(1); 1855 } 1856 np = &m->m_act; 1857 last = m; 1858 } 1859 splx(s); 1860 return(0); 1861} 1862 1863static void 1864tbf_send_packet(vifp, m) 1865 register struct vif *vifp; 1866 register struct mbuf *m; 1867{ 1868 struct ip_moptions imo; 1869 int error; 1870 static struct route ro; 1871 int s = splnet(); 1872 1873 if (vifp->v_flags & VIFF_TUNNEL) { 1874 /* If tunnel options */ 1875 ip_output(m, (struct mbuf *)0, &vifp->v_route, 1876 IP_FORWARDING, (struct ip_moptions *)0); 1877 } else { 1878 imo.imo_multicast_ifp = vifp->v_ifp; 1879 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; 1880 imo.imo_multicast_loop = 1; 1881 imo.imo_multicast_vif = -1; 1882 1883 /* 1884 * Re-entrancy should not be a problem here, because 1885 * the packets that we send out and are looped back at us 1886 * should get rejected because they appear to come from 1887 * the loopback interface, thus preventing looping. 1888 */ 1889 error = ip_output(m, (struct mbuf *)0, &ro, 1890 IP_FORWARDING, &imo); 1891 1892 if (mrtdebug & DEBUG_XMIT) 1893 log(LOG_DEBUG, "phyint_send on vif %d err %d\n", 1894 vifp - viftable, error); 1895 } 1896 splx(s); 1897} 1898 1899/* determine the current time and then 1900 * the elapsed time (between the last time and time now) 1901 * in milliseconds & update the no. of tokens in the bucket 1902 */ 1903static void 1904tbf_update_tokens(vifp) 1905 register struct vif *vifp; 1906{ 1907 struct timeval tp; 1908 register u_long tm; 1909 register int s = splnet(); 1910 register struct tbf *t = vifp->v_tbf; 1911 1912 GET_TIME(tp); 1913 1914 TV_DELTA(tp, t->tbf_last_pkt_t, tm); 1915 1916 /* 1917 * This formula is actually 1918 * "time in seconds" * "bytes/second". 1919 * 1920 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) 1921 * 1922 * The (1000/1024) was introduced in add_vif to optimize 1923 * this divide into a shift. 1924 */ 1925 t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8; 1926 t->tbf_last_pkt_t = tp; 1927 1928 if (t->tbf_n_tok > MAX_BKT_SIZE) 1929 t->tbf_n_tok = MAX_BKT_SIZE; 1930 1931 splx(s); 1932} 1933 1934static int 1935priority(vifp, ip) 1936 register struct vif *vifp; 1937 register struct ip *ip; 1938{ 1939 register int prio; 1940 1941 /* temporary hack; may add general packet classifier some day */ 1942 1943 /* 1944 * The UDP port space is divided up into four priority ranges: 1945 * [0, 16384) : unclassified - lowest priority 1946 * [16384, 32768) : audio - highest priority 1947 * [32768, 49152) : whiteboard - medium priority 1948 * [49152, 65536) : video - low priority 1949 */ 1950 if (ip->ip_p == IPPROTO_UDP) { 1951 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); 1952 switch (ntohs(udp->uh_dport) & 0xc000) { 1953 case 0x4000: 1954 prio = 70; 1955 break; 1956 case 0x8000: 1957 prio = 60; 1958 break; 1959 case 0xc000: 1960 prio = 55; 1961 break; 1962 default: 1963 prio = 50; 1964 break; 1965 } 1966 if (tbfdebug > 1) 1967 log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio); 1968 } else { 1969 prio = 50; 1970 } 1971 return prio; 1972} 1973 1974/* 1975 * End of token bucket filter modifications 1976 */ 1977 1978int 1979ip_rsvp_vif_init(so, sopt) 1980 struct socket *so; 1981 struct sockopt *sopt; 1982{ 1983 int error, i, s; 1984 1985 if (rsvpdebug) 1986 printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n", 1987 so->so_type, so->so_proto->pr_protocol); 1988 1989 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 1990 return EOPNOTSUPP; 1991 1992 /* Check mbuf. */ 1993 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 1994 if (error) 1995 return (error); 1996 1997 if (rsvpdebug) 1998 printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on); 1999 2000 s = splnet(); 2001 2002 /* Check vif. */ 2003 if (!legal_vif_num(i)) { 2004 splx(s); 2005 return EADDRNOTAVAIL; 2006 } 2007 2008 /* Check if socket is available. */ 2009 if (viftable[i].v_rsvpd != NULL) { 2010 splx(s); 2011 return EADDRINUSE; 2012 } 2013 2014 viftable[i].v_rsvpd = so; 2015 /* This may seem silly, but we need to be sure we don't over-increment 2016 * the RSVP counter, in case something slips up. 2017 */ 2018 if (!viftable[i].v_rsvp_on) { 2019 viftable[i].v_rsvp_on = 1; 2020 rsvp_on++; 2021 } 2022 2023 splx(s); 2024 return 0; 2025} 2026 2027int 2028ip_rsvp_vif_done(so, sopt) 2029 struct socket *so; 2030 struct sockopt *sopt; 2031{ 2032 int error, i, s; 2033 2034 if (rsvpdebug) 2035 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n", 2036 so->so_type, so->so_proto->pr_protocol); 2037 2038 if (so->so_type != SOCK_RAW || 2039 so->so_proto->pr_protocol != IPPROTO_RSVP) 2040 return EOPNOTSUPP; 2041 2042 error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 2043 if (error) 2044 return (error); 2045 2046 s = splnet(); 2047 2048 /* Check vif. */ 2049 if (!legal_vif_num(i)) { 2050 splx(s); 2051 return EADDRNOTAVAIL; 2052 } 2053 2054 if (rsvpdebug) 2055 printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n", 2056 viftable[i].v_rsvpd, so); 2057 2058 viftable[i].v_rsvpd = NULL; 2059 /* 2060 * This may seem silly, but we need to be sure we don't over-decrement 2061 * the RSVP counter, in case something slips up. 2062 */ 2063 if (viftable[i].v_rsvp_on) { 2064 viftable[i].v_rsvp_on = 0; 2065 rsvp_on--; 2066 } 2067 2068 splx(s); 2069 return 0; 2070} 2071 2072void 2073ip_rsvp_force_done(so) 2074 struct socket *so; 2075{ 2076 int vifi; 2077 register int s; 2078 2079 /* Don't bother if it is not the right type of socket. */ 2080 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 2081 return; 2082 2083 s = splnet(); 2084 2085 /* The socket may be attached to more than one vif...this 2086 * is perfectly legal. 2087 */ 2088 for (vifi = 0; vifi < numvifs; vifi++) { 2089 if (viftable[vifi].v_rsvpd == so) { 2090 viftable[vifi].v_rsvpd = NULL; 2091 /* This may seem silly, but we need to be sure we don't 2092 * over-decrement the RSVP counter, in case something slips up. 2093 */ 2094 if (viftable[vifi].v_rsvp_on) { 2095 viftable[vifi].v_rsvp_on = 0; 2096 rsvp_on--; 2097 } 2098 } 2099 } 2100 2101 splx(s); 2102 return; 2103} 2104 2105void 2106rsvp_input(m, off) 2107 struct mbuf *m; 2108 int off; 2109{ 2110 int vifi; 2111 register struct ip *ip = mtod(m, struct ip *); 2112 static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET }; 2113 register int s; 2114 struct ifnet *ifp; 2115 2116 if (rsvpdebug) 2117 printf("rsvp_input: rsvp_on %d\n",rsvp_on); 2118 2119 /* Can still get packets with rsvp_on = 0 if there is a local member 2120 * of the group to which the RSVP packet is addressed. But in this 2121 * case we want to throw the packet away. 2122 */ 2123 if (!rsvp_on) { 2124 m_freem(m); 2125 return; 2126 } 2127 2128 s = splnet(); 2129 2130 if (rsvpdebug) 2131 printf("rsvp_input: check vifs\n"); 2132 2133#ifdef DIAGNOSTIC 2134 if (!(m->m_flags & M_PKTHDR)) 2135 panic("rsvp_input no hdr"); 2136#endif 2137 2138 ifp = m->m_pkthdr.rcvif; 2139 /* Find which vif the packet arrived on. */ 2140 for (vifi = 0; vifi < numvifs; vifi++) 2141 if (viftable[vifi].v_ifp == ifp) 2142 break; 2143 2144 if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { 2145 /* 2146 * If the old-style non-vif-associated socket is set, 2147 * then use it. Otherwise, drop packet since there 2148 * is no specific socket for this vif. 2149 */ 2150 if (ip_rsvpd != NULL) { 2151 if (rsvpdebug) 2152 printf("rsvp_input: Sending packet up old-style socket\n"); 2153 rip_input(m, off); /* xxx */ 2154 } else { 2155 if (rsvpdebug && vifi == numvifs) 2156 printf("rsvp_input: Can't find vif for packet.\n"); 2157 else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL) 2158 printf("rsvp_input: No socket defined for vif %d\n",vifi); 2159 m_freem(m); 2160 } 2161 splx(s); 2162 return; 2163 } 2164 rsvp_src.sin_addr = ip->ip_src; 2165 2166 if (rsvpdebug && m) 2167 printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n", 2168 m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); 2169 2170 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { 2171 if (rsvpdebug) 2172 printf("rsvp_input: Failed to append to socket\n"); 2173 } else { 2174 if (rsvpdebug) 2175 printf("rsvp_input: send packet up\n"); 2176 } 2177 2178 splx(s); 2179} 2180 2181#ifdef MROUTE_KLD 2182 2183static int 2184ip_mroute_modevent(module_t mod, int type, void *unused) 2185{ 2186 int s; 2187 2188 switch (type) { 2189 static u_long (*old_ip_mcast_src)(int); 2190 static int (*old_ip_mrouter_set)(struct socket *, 2191 struct sockopt *); 2192 static int (*old_ip_mrouter_get)(struct socket *, 2193 struct sockopt *); 2194 static int (*old_ip_mrouter_done)(void); 2195 static int (*old_ip_mforward)(struct ip *, struct ifnet *, 2196 struct mbuf *, struct ip_moptions *); 2197 static int (*old_mrt_ioctl)(int, caddr_t); 2198 static int (*old_legal_vif_num)(int); 2199 2200 case MOD_LOAD: 2201 s = splnet(); 2202 /* XXX Protect against multiple loading */ 2203 old_ip_mcast_src = ip_mcast_src; 2204 ip_mcast_src = X_ip_mcast_src; 2205 old_ip_mrouter_get = ip_mrouter_get; 2206 ip_mrouter_get = X_ip_mrouter_get; 2207 old_ip_mrouter_set = ip_mrouter_set; 2208 ip_mrouter_set = X_ip_mrouter_set; 2209 old_ip_mrouter_done = ip_mrouter_done; 2210 ip_mrouter_done = X_ip_mrouter_done; 2211 old_ip_mforward = ip_mforward; 2212 ip_mforward = X_ip_mforward; 2213 old_mrt_ioctl = mrt_ioctl; 2214 mrt_ioctl = X_mrt_ioctl; 2215 old_legal_vif_num = legal_vif_num; 2216 legal_vif_num = X_legal_vif_num; 2217 2218 splx(s); 2219 return 0; 2220 2221 case MOD_UNLOAD: 2222 if (ip_mrouter) 2223 return EINVAL; 2224 2225 s = splnet(); 2226 ip_mrouter_get = old_ip_mrouter_get; 2227 ip_mrouter_set = old_ip_mrouter_set; 2228 ip_mrouter_done = old_ip_mrouter_done; 2229 ip_mforward = old_ip_mforward; 2230 mrt_ioctl = old_mrt_ioctl; 2231 legal_vif_num = old_legal_vif_num; 2232 splx(s); 2233 return 0; 2234 2235 default: 2236 break; 2237 } 2238 return 0; 2239} 2240 2241static moduledata_t ip_mroutemod = { 2242 "ip_mroute", 2243 ip_mroute_modevent, 2244 0 2245}; 2246DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2247 2248#endif /* MROUTE_KLD */ 2249#endif /* MROUTING */ 2250