ip_mroute.c revision 118501
1/* 2 * IP multicast forwarding procedures 3 * 4 * Written by David Waitzman, BBN Labs, August 1988. 5 * Modified by Steve Deering, Stanford, February 1989. 6 * Modified by Mark J. Steiglitz, Stanford, May, 1991 7 * Modified by Van Jacobson, LBL, January 1993 8 * Modified by Ajit Thyagarajan, PARC, August 1993 9 * Modified by Bill Fenner, PARC, April 1995 10 * 11 * MROUTING Revision: 3.5 12 * $FreeBSD: head/sys/netinet/ip_mroute.c 118501 2003-08-05 17:01:33Z hsu $ 13 */ 14 15#include "opt_mac.h" 16#include "opt_mrouting.h" 17#include "opt_random_ip_id.h" 18 19#include <sys/param.h> 20#include <sys/kernel.h> 21#include <sys/lock.h> 22#include <sys/mac.h> 23#include <sys/malloc.h> 24#include <sys/mbuf.h> 25#include <sys/protosw.h> 26#include <sys/signalvar.h> 27#include <sys/socket.h> 28#include <sys/socketvar.h> 29#include <sys/sockio.h> 30#include <sys/sx.h> 31#include <sys/sysctl.h> 32#include <sys/syslog.h> 33#include <sys/systm.h> 34#include <sys/time.h> 35#include <net/if.h> 36#include <net/netisr.h> 37#include <net/route.h> 38#include <netinet/in.h> 39#include <netinet/igmp.h> 40#include <netinet/in_systm.h> 41#include <netinet/in_var.h> 42#include <netinet/ip.h> 43#include <netinet/ip_encap.h> 44#include <netinet/ip_mroute.h> 45#include <netinet/ip_var.h> 46#include <netinet/udp.h> 47#include <machine/in_cksum.h> 48 49/* 50 * Control debugging code for rsvp and multicast routing code. 51 * Can only set them with the debugger. 52 */ 53static u_int rsvpdebug; /* non-zero enables debugging */ 54 55static u_int mrtdebug; /* any set of the flags below */ 56#define DEBUG_MFC 0x02 57#define DEBUG_FORWARD 0x04 58#define DEBUG_EXPIRE 0x08 59#define DEBUG_XMIT 0x10 60 61#define M_HASCL(m) ((m)->m_flags & M_EXT) 62 63static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables"); 64 65static struct mrtstat mrtstat; 66SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW, 67 &mrtstat, mrtstat, 68 "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)"); 69 70static struct mfc *mfctable[MFCTBLSIZ]; 71SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, 72 &mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]", 73 "Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)"); 74 75static struct vif viftable[MAXVIFS]; 76SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD, 77 &viftable, sizeof(viftable), "S,vif[MAXVIFS]", 78 "Multicast Virtual Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); 79 80static u_char nexpire[MFCTBLSIZ]; 81 82static struct callout_handle expire_upcalls_ch; 83 84#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 85#define UPCALL_EXPIRE 6 /* number of timeouts */ 86 87/* 88 * Define the token bucket filter structures 89 * tbftable -> each vif has one of these for storing info 90 */ 91 92static struct tbf tbftable[MAXVIFS]; 93#define TBF_REPROCESS (hz / 100) /* 100x / second */ 94 95/* 96 * 'Interfaces' associated with decapsulator (so we can tell 97 * packets that went through it from ones that get reflected 98 * by a broken gateway). These interfaces are never linked into 99 * the system ifnet list & no routes point to them. I.e., packets 100 * can't be sent this way. They only exist as a placeholder for 101 * multicast source verification. 102 */ 103static struct ifnet multicast_decap_if[MAXVIFS]; 104 105#define ENCAP_TTL 64 106#define ENCAP_PROTO IPPROTO_IPIP /* 4 */ 107 108/* prototype IP hdr for encapsulated packets */ 109static struct ip multicast_encap_iphdr = { 110#if BYTE_ORDER == LITTLE_ENDIAN 111 sizeof(struct ip) >> 2, IPVERSION, 112#else 113 IPVERSION, sizeof(struct ip) >> 2, 114#endif 115 0, /* tos */ 116 sizeof(struct ip), /* total length */ 117 0, /* id */ 118 0, /* frag offset */ 119 ENCAP_TTL, ENCAP_PROTO, 120 0, /* checksum */ 121}; 122 123/* 124 * Private variables. 125 */ 126static vifi_t numvifs; 127static const struct encaptab *encap_cookie; 128 129/* 130 * one-back cache used by mroute_encapcheck to locate a tunnel's vif 131 * given a datagram's src ip address. 132 */ 133static u_long last_encap_src; 134static struct vif *last_encap_vif; 135 136static u_long X_ip_mcast_src(int vifi); 137static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, 138 struct mbuf *m, struct ip_moptions *imo); 139static int X_ip_mrouter_done(void); 140static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); 141static int X_ip_mrouter_set(struct socket *so, struct sockopt *m); 142static int X_legal_vif_num(int vif); 143static int X_mrt_ioctl(int cmd, caddr_t data); 144 145static int get_sg_cnt(struct sioc_sg_req *); 146static int get_vif_cnt(struct sioc_vif_req *); 147static int ip_mrouter_init(struct socket *, int); 148static int add_vif(struct vifctl *); 149static int del_vif(vifi_t); 150static int add_mfc(struct mfcctl *); 151static int del_mfc(struct mfcctl *); 152static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); 153static int set_assert(int); 154static void expire_upcalls(void *); 155static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); 156static void phyint_send(struct ip *, struct vif *, struct mbuf *); 157static void encap_send(struct ip *, struct vif *, struct mbuf *); 158static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long); 159static void tbf_queue(struct vif *, struct mbuf *); 160static void tbf_process_q(struct vif *); 161static void tbf_reprocess_q(void *); 162static int tbf_dq_sel(struct vif *, struct ip *); 163static void tbf_send_packet(struct vif *, struct mbuf *); 164static void tbf_update_tokens(struct vif *); 165static int priority(struct vif *, struct ip *); 166 167/* 168 * whether or not special PIM assert processing is enabled. 169 */ 170static int pim_assert; 171/* 172 * Rate limit for assert notification messages, in usec 173 */ 174#define ASSERT_MSG_TIME 3000000 175 176/* 177 * Hash function for a source, group entry 178 */ 179#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 180 ((g) >> 20) ^ ((g) >> 10) ^ (g)) 181 182/* 183 * Find a route for a given origin IP address and Multicast group address 184 * Type of service parameter to be added in the future!!! 185 * Statistics are updated by the caller if needed 186 * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses) 187 */ 188static struct mfc * 189mfc_find(in_addr_t o, in_addr_t g) 190{ 191 struct mfc *rt; 192 193 for (rt = mfctable[MFCHASH(o,g)]; rt; rt = rt->mfc_next) 194 if ((rt->mfc_origin.s_addr == o) && 195 (rt->mfc_mcastgrp.s_addr == g) && (rt->mfc_stall == NULL)) 196 break; 197 return rt; 198} 199 200/* 201 * Macros to compute elapsed time efficiently 202 * Borrowed from Van Jacobson's scheduling code 203 */ 204#define TV_DELTA(a, b, delta) { \ 205 int xxs; \ 206 delta = (a).tv_usec - (b).tv_usec; \ 207 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 208 switch (xxs) { \ 209 case 2: \ 210 delta += 1000000; \ 211 /* FALLTHROUGH */ \ 212 case 1: \ 213 delta += 1000000; \ 214 break; \ 215 default: \ 216 delta += (1000000 * xxs); \ 217 } \ 218 } \ 219} 220 221#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 222 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 223 224/* 225 * Handle MRT setsockopt commands to modify the multicast routing tables. 226 */ 227static int 228X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) 229{ 230 int error, optval; 231 vifi_t vifi; 232 struct vifctl vifc; 233 struct mfcctl mfc; 234 235 if (so != ip_mrouter && sopt->sopt_name != MRT_INIT) 236 return EPERM; 237 238 error = 0; 239 switch (sopt->sopt_name) { 240 case MRT_INIT: 241 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 242 if (error) 243 break; 244 error = ip_mrouter_init(so, optval); 245 break; 246 247 case MRT_DONE: 248 error = ip_mrouter_done(); 249 break; 250 251 case MRT_ADD_VIF: 252 error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); 253 if (error) 254 break; 255 error = add_vif(&vifc); 256 break; 257 258 case MRT_DEL_VIF: 259 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); 260 if (error) 261 break; 262 error = del_vif(vifi); 263 break; 264 265 case MRT_ADD_MFC: 266 case MRT_DEL_MFC: 267 error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc); 268 if (error) 269 break; 270 if (sopt->sopt_name == MRT_ADD_MFC) 271 error = add_mfc(&mfc); 272 else 273 error = del_mfc(&mfc); 274 break; 275 276 case MRT_ASSERT: 277 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 278 if (error) 279 break; 280 set_assert(optval); 281 break; 282 283 default: 284 error = EOPNOTSUPP; 285 break; 286 } 287 return error; 288} 289 290/* 291 * Handle MRT getsockopt commands 292 */ 293static int 294X_ip_mrouter_get(struct socket *so, struct sockopt *sopt) 295{ 296 int error; 297 static int version = 0x0305; /* !!! why is this here? XXX */ 298 299 switch (sopt->sopt_name) { 300 case MRT_VERSION: 301 error = sooptcopyout(sopt, &version, sizeof version); 302 break; 303 304 case MRT_ASSERT: 305 error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert); 306 break; 307 308 default: 309 error = EOPNOTSUPP; 310 break; 311 } 312 return error; 313} 314 315/* 316 * Handle ioctl commands to obtain information from the cache 317 */ 318static int 319X_mrt_ioctl(int cmd, caddr_t data) 320{ 321 int error = 0; 322 323 switch (cmd) { 324 case (SIOCGETVIFCNT): 325 error = get_vif_cnt((struct sioc_vif_req *)data); 326 break; 327 328 case (SIOCGETSGCNT): 329 error = get_sg_cnt((struct sioc_sg_req *)data); 330 break; 331 332 default: 333 error = EINVAL; 334 break; 335 } 336 return error; 337} 338 339/* 340 * returns the packet, byte, rpf-failure count for the source group provided 341 */ 342static int 343get_sg_cnt(struct sioc_sg_req *req) 344{ 345 int s; 346 struct mfc *rt; 347 348 s = splnet(); 349 rt = mfc_find(req->src.s_addr, req->grp.s_addr); 350 splx(s); 351 if (rt == NULL) { 352 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 353 return EADDRNOTAVAIL; 354 } 355 req->pktcnt = rt->mfc_pkt_cnt; 356 req->bytecnt = rt->mfc_byte_cnt; 357 req->wrong_if = rt->mfc_wrong_if; 358 return 0; 359} 360 361/* 362 * returns the input and output packet and byte counts on the vif provided 363 */ 364static int 365get_vif_cnt(struct sioc_vif_req *req) 366{ 367 vifi_t vifi = req->vifi; 368 369 if (vifi >= numvifs) 370 return EINVAL; 371 372 req->icount = viftable[vifi].v_pkt_in; 373 req->ocount = viftable[vifi].v_pkt_out; 374 req->ibytes = viftable[vifi].v_bytes_in; 375 req->obytes = viftable[vifi].v_bytes_out; 376 377 return 0; 378} 379 380/* 381 * Enable multicast routing 382 */ 383static int 384ip_mrouter_init(struct socket *so, int version) 385{ 386 if (mrtdebug) 387 log(LOG_DEBUG, "ip_mrouter_init: so_type = %d, pr_protocol = %d\n", 388 so->so_type, so->so_proto->pr_protocol); 389 390 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP) 391 return EOPNOTSUPP; 392 393 if (version != 1) 394 return ENOPROTOOPT; 395 396 if (ip_mrouter != NULL) 397 return EADDRINUSE; 398 399 ip_mrouter = so; 400 401 bzero((caddr_t)mfctable, sizeof(mfctable)); 402 bzero((caddr_t)nexpire, sizeof(nexpire)); 403 404 pim_assert = 0; 405 406 expire_upcalls_ch = timeout(expire_upcalls, NULL, EXPIRE_TIMEOUT); 407 408 if (mrtdebug) 409 log(LOG_DEBUG, "ip_mrouter_init\n"); 410 411 return 0; 412} 413 414/* 415 * Disable multicast routing 416 */ 417static int 418X_ip_mrouter_done(void) 419{ 420 vifi_t vifi; 421 int i; 422 struct ifnet *ifp; 423 struct ifreq ifr; 424 struct mfc *rt; 425 struct rtdetq *rte; 426 int s; 427 428 s = splnet(); 429 430 /* 431 * For each phyint in use, disable promiscuous reception of all IP 432 * multicasts. 433 */ 434 for (vifi = 0; vifi < numvifs; vifi++) { 435 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 436 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 437 struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr); 438 439 so->sin_len = sizeof(struct sockaddr_in); 440 so->sin_family = AF_INET; 441 so->sin_addr.s_addr = INADDR_ANY; 442 ifp = viftable[vifi].v_ifp; 443 if_allmulti(ifp, 0); 444 } 445 } 446 bzero((caddr_t)tbftable, sizeof(tbftable)); 447 bzero((caddr_t)viftable, sizeof(viftable)); 448 numvifs = 0; 449 pim_assert = 0; 450 451 untimeout(expire_upcalls, NULL, expire_upcalls_ch); 452 453 /* 454 * Free all multicast forwarding cache entries. 455 */ 456 for (i = 0; i < MFCTBLSIZ; i++) { 457 for (rt = mfctable[i]; rt != NULL; ) { 458 struct mfc *nr = rt->mfc_next; 459 460 for (rte = rt->mfc_stall; rte != NULL; ) { 461 struct rtdetq *n = rte->next; 462 463 m_freem(rte->m); 464 free(rte, M_MRTABLE); 465 rte = n; 466 } 467 free(rt, M_MRTABLE); 468 rt = nr; 469 } 470 } 471 472 bzero((caddr_t)mfctable, sizeof(mfctable)); 473 474 /* 475 * Reset de-encapsulation cache 476 */ 477 last_encap_src = INADDR_ANY; 478 last_encap_vif = NULL; 479 if (encap_cookie) { 480 encap_detach(encap_cookie); 481 encap_cookie = NULL; 482 } 483 484 ip_mrouter = NULL; 485 486 splx(s); 487 488 if (mrtdebug) 489 log(LOG_DEBUG, "ip_mrouter_done\n"); 490 491 return 0; 492} 493 494/* 495 * Set PIM assert processing global 496 */ 497static int 498set_assert(int i) 499{ 500 if ((i != 1) && (i != 0)) 501 return EINVAL; 502 503 pim_assert = i; 504 505 return 0; 506} 507 508/* 509 * Decide if a packet is from a tunnelled peer. 510 * Return 0 if not, 64 if so. XXX yuck.. 64 ??? 511 */ 512static int 513mroute_encapcheck(const struct mbuf *m, int off, int proto, void *arg) 514{ 515 struct ip *ip = mtod(m, struct ip *); 516 int hlen = ip->ip_hl << 2; 517 518 /* 519 * don't claim the packet if it's not to a multicast destination or if 520 * we don't have an encapsulating tunnel with the source. 521 * Note: This code assumes that the remote site IP address 522 * uniquely identifies the tunnel (i.e., that this site has 523 * at most one tunnel with the remote site). 524 */ 525 if (!IN_MULTICAST(ntohl(((struct ip *)((char *)ip+hlen))->ip_dst.s_addr))) 526 return 0; 527 if (ip->ip_src.s_addr != last_encap_src) { 528 struct vif *vifp = viftable; 529 struct vif *vife = vifp + numvifs; 530 531 last_encap_src = ip->ip_src.s_addr; 532 last_encap_vif = NULL; 533 for ( ; vifp < vife; ++vifp) 534 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 535 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) == VIFF_TUNNEL) 536 last_encap_vif = vifp; 537 break; 538 } 539 } 540 if (last_encap_vif == NULL) { 541 last_encap_src = INADDR_ANY; 542 return 0; 543 } 544 return 64; 545} 546 547/* 548 * De-encapsulate a packet and feed it back through ip input (this 549 * routine is called whenever IP gets a packet that mroute_encap_func() 550 * claimed). 551 */ 552static void 553mroute_encap_input(struct mbuf *m, int off) 554{ 555 struct ip *ip = mtod(m, struct ip *); 556 int hlen = ip->ip_hl << 2; 557 558 if (hlen > sizeof(struct ip)) 559 ip_stripoptions(m, (struct mbuf *) 0); 560 m->m_data += sizeof(struct ip); 561 m->m_len -= sizeof(struct ip); 562 m->m_pkthdr.len -= sizeof(struct ip); 563 564 m->m_pkthdr.rcvif = last_encap_vif->v_ifp; 565 566 netisr_queue(NETISR_IP, m); 567 /* 568 * normally we would need a "schednetisr(NETISR_IP)" 569 * here but we were called by ip_input and it is going 570 * to loop back & try to dequeue the packet we just 571 * queued as soon as we return so we avoid the 572 * unnecessary software interrrupt. 573 * 574 * XXX 575 * This no longer holds - we may have direct-dispatched the packet, 576 * or there may be a queue processing limit. 577 */ 578} 579 580extern struct domain inetdomain; 581static struct protosw mroute_encap_protosw = 582{ SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR, 583 mroute_encap_input, 0, 0, rip_ctloutput, 584 0, 585 0, 0, 0, 0, 586 &rip_usrreqs 587}; 588 589/* 590 * Add a vif to the vif table 591 */ 592static int 593add_vif(struct vifctl *vifcp) 594{ 595 struct vif *vifp = viftable + vifcp->vifc_vifi; 596 struct sockaddr_in sin = {sizeof sin, AF_INET}; 597 struct ifaddr *ifa; 598 struct ifnet *ifp; 599 int error, s; 600 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 601 602 if (vifcp->vifc_vifi >= MAXVIFS) 603 return EINVAL; 604 if (vifp->v_lcl_addr.s_addr != INADDR_ANY) 605 return EADDRINUSE; 606 if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY) 607 return EADDRNOTAVAIL; 608 609 /* Find the interface with an address in AF_INET family */ 610 sin.sin_addr = vifcp->vifc_lcl_addr; 611 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 612 if (ifa == NULL) 613 return EADDRNOTAVAIL; 614 ifp = ifa->ifa_ifp; 615 616 if (vifcp->vifc_flags & VIFF_TUNNEL) { 617 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 618 /* 619 * An encapsulating tunnel is wanted. Tell 620 * mroute_encap_input() to start paying attention 621 * to encapsulated packets. 622 */ 623 if (encap_cookie == NULL) { 624 encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4, 625 mroute_encapcheck, 626 (struct protosw *)&mroute_encap_protosw, NULL); 627 628 if (encap_cookie == NULL) { 629 printf("ip_mroute: unable to attach encap\n"); 630 return EIO; /* XXX */ 631 } 632 for (s = 0; s < MAXVIFS; ++s) { 633 multicast_decap_if[s].if_name = "mdecap"; 634 multicast_decap_if[s].if_unit = s; 635 } 636 } 637 /* 638 * Set interface to fake encapsulator interface 639 */ 640 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 641 /* 642 * Prepare cached route entry 643 */ 644 bzero(&vifp->v_route, sizeof(vifp->v_route)); 645 } else { 646 log(LOG_ERR, "source routed tunnels not supported\n"); 647 return EOPNOTSUPP; 648 } 649 } else { /* Make sure the interface supports multicast */ 650 if ((ifp->if_flags & IFF_MULTICAST) == 0) 651 return EOPNOTSUPP; 652 653 /* Enable promiscuous reception of all IP multicasts from the if */ 654 s = splnet(); 655 error = if_allmulti(ifp, 1); 656 splx(s); 657 if (error) 658 return error; 659 } 660 661 s = splnet(); 662 /* define parameters for the tbf structure */ 663 vifp->v_tbf = v_tbf; 664 GET_TIME(vifp->v_tbf->tbf_last_pkt_t); 665 vifp->v_tbf->tbf_n_tok = 0; 666 vifp->v_tbf->tbf_q_len = 0; 667 vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 668 vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 669 670 vifp->v_flags = vifcp->vifc_flags; 671 vifp->v_threshold = vifcp->vifc_threshold; 672 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 673 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 674 vifp->v_ifp = ifp; 675 /* scaling up here allows division by 1024 in critical code */ 676 vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000; 677 vifp->v_rsvp_on = 0; 678 vifp->v_rsvpd = NULL; 679 /* initialize per vif pkt counters */ 680 vifp->v_pkt_in = 0; 681 vifp->v_pkt_out = 0; 682 vifp->v_bytes_in = 0; 683 vifp->v_bytes_out = 0; 684 splx(s); 685 686 /* Adjust numvifs up if the vifi is higher than numvifs */ 687 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 688 689 if (mrtdebug) 690 log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", 691 vifcp->vifc_vifi, 692 (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr), 693 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 694 (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr), 695 vifcp->vifc_threshold, 696 vifcp->vifc_rate_limit); 697 698 return 0; 699} 700 701/* 702 * Delete a vif from the vif table 703 */ 704static int 705del_vif(vifi_t vifi) 706{ 707 struct vif *vifp; 708 int s; 709 710 if (vifi >= numvifs) 711 return EINVAL; 712 vifp = &viftable[vifi]; 713 if (vifp->v_lcl_addr.s_addr == INADDR_ANY) 714 return EADDRNOTAVAIL; 715 716 s = splnet(); 717 718 if (!(vifp->v_flags & VIFF_TUNNEL)) 719 if_allmulti(vifp->v_ifp, 0); 720 721 if (vifp == last_encap_vif) { 722 last_encap_vif = NULL; 723 last_encap_src = INADDR_ANY; 724 } 725 726 /* 727 * Free packets queued at the interface 728 */ 729 while (vifp->v_tbf->tbf_q) { 730 struct mbuf *m = vifp->v_tbf->tbf_q; 731 732 vifp->v_tbf->tbf_q = m->m_act; 733 m_freem(m); 734 } 735 736 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 737 bzero((caddr_t)vifp, sizeof (*vifp)); 738 739 if (mrtdebug) 740 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs); 741 742 /* Adjust numvifs down */ 743 for (vifi = numvifs; vifi > 0; vifi--) 744 if (viftable[vifi-1].v_lcl_addr.s_addr != INADDR_ANY) 745 break; 746 numvifs = vifi; 747 748 splx(s); 749 750 return 0; 751} 752 753/* 754 * update an mfc entry without resetting counters and S,G addresses. 755 */ 756static void 757update_mfc_params(struct mfc *rt, struct mfcctl *mfccp) 758{ 759 int i; 760 761 rt->mfc_parent = mfccp->mfcc_parent; 762 for (i = 0; i < numvifs; i++) 763 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 764} 765 766/* 767 * fully initialize an mfc entry from the parameter. 768 */ 769static void 770init_mfc_params(struct mfc *rt, struct mfcctl *mfccp) 771{ 772 rt->mfc_origin = mfccp->mfcc_origin; 773 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 774 775 update_mfc_params(rt, mfccp); 776 777 /* initialize pkt counters per src-grp */ 778 rt->mfc_pkt_cnt = 0; 779 rt->mfc_byte_cnt = 0; 780 rt->mfc_wrong_if = 0; 781 rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; 782} 783 784 785/* 786 * Add an mfc entry 787 */ 788static int 789add_mfc(struct mfcctl *mfccp) 790{ 791 struct mfc *rt; 792 u_long hash; 793 struct rtdetq *rte; 794 u_short nstl; 795 int s; 796 797 rt = mfc_find(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 798 799 /* If an entry already exists, just update the fields */ 800 if (rt) { 801 if (mrtdebug & DEBUG_MFC) 802 log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n", 803 (u_long)ntohl(mfccp->mfcc_origin.s_addr), 804 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 805 mfccp->mfcc_parent); 806 807 s = splnet(); 808 update_mfc_params(rt, mfccp); 809 splx(s); 810 return 0; 811 } 812 813 /* 814 * Find the entry for which the upcall was made and update 815 */ 816 s = splnet(); 817 hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 818 for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { 819 820 if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 821 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 822 (rt->mfc_stall != NULL)) { 823 824 if (nstl++) 825 log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", 826 "multiple kernel entries", 827 (u_long)ntohl(mfccp->mfcc_origin.s_addr), 828 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 829 mfccp->mfcc_parent, (void *)rt->mfc_stall); 830 831 if (mrtdebug & DEBUG_MFC) 832 log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", 833 (u_long)ntohl(mfccp->mfcc_origin.s_addr), 834 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 835 mfccp->mfcc_parent, (void *)rt->mfc_stall); 836 837 init_mfc_params(rt, mfccp); 838 839 rt->mfc_expire = 0; /* Don't clean this guy up */ 840 nexpire[hash]--; 841 842 /* free packets Qed at the end of this entry */ 843 for (rte = rt->mfc_stall; rte != NULL; ) { 844 struct rtdetq *n = rte->next; 845 846 ip_mdq(rte->m, rte->ifp, rt, -1); 847 m_freem(rte->m); 848 free(rte, M_MRTABLE); 849 rte = n; 850 } 851 rt->mfc_stall = NULL; 852 } 853 } 854 855 /* 856 * It is possible that an entry is being inserted without an upcall 857 */ 858 if (nstl == 0) { 859 if (mrtdebug & DEBUG_MFC) 860 log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", 861 hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr), 862 (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 863 mfccp->mfcc_parent); 864 865 for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { 866 if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 867 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 868 init_mfc_params(rt, mfccp); 869 if (rt->mfc_expire) 870 nexpire[hash]--; 871 rt->mfc_expire = 0; 872 break; /* XXX */ 873 } 874 } 875 if (rt == NULL) { /* no upcall, so make a new entry */ 876 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 877 if (rt == NULL) { 878 splx(s); 879 return ENOBUFS; 880 } 881 882 init_mfc_params(rt, mfccp); 883 rt->mfc_expire = 0; 884 rt->mfc_stall = NULL; 885 886 /* insert new entry at head of hash chain */ 887 rt->mfc_next = mfctable[hash]; 888 mfctable[hash] = rt; 889 } 890 } 891 splx(s); 892 return 0; 893} 894 895/* 896 * Delete an mfc entry 897 */ 898static int 899del_mfc(struct mfcctl *mfccp) 900{ 901 struct in_addr origin; 902 struct in_addr mcastgrp; 903 struct mfc *rt; 904 struct mfc **nptr; 905 u_long hash; 906 int s; 907 908 origin = mfccp->mfcc_origin; 909 mcastgrp = mfccp->mfcc_mcastgrp; 910 911 if (mrtdebug & DEBUG_MFC) 912 log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", 913 (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); 914 915 s = splnet(); 916 917 hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 918 for (nptr = &mfctable[hash]; (rt = *nptr) != NULL; nptr = &rt->mfc_next) 919 if (origin.s_addr == rt->mfc_origin.s_addr && 920 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 921 rt->mfc_stall == NULL) 922 break; 923 if (rt == NULL) { 924 splx(s); 925 return EADDRNOTAVAIL; 926 } 927 928 *nptr = rt->mfc_next; 929 free(rt, M_MRTABLE); 930 931 splx(s); 932 933 return 0; 934} 935 936/* 937 * Send a message to mrouted on the multicast routing socket 938 */ 939static int 940socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) 941{ 942 if (s) { 943 if (sbappendaddr(&s->so_rcv, (struct sockaddr *)src, mm, NULL) != 0) { 944 sorwakeup(s); 945 return 0; 946 } 947 } 948 m_freem(mm); 949 return -1; 950} 951 952/* 953 * IP multicast forwarding function. This function assumes that the packet 954 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 955 * pointed to by "ifp", and the packet is to be relayed to other networks 956 * that have members of the packet's destination IP multicast group. 957 * 958 * The packet is returned unscathed to the caller, unless it is 959 * erroneous, in which case a non-zero return value tells the caller to 960 * discard it. 961 */ 962 963#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 964 965static int 966X_ip_mforward(struct ip *ip, struct ifnet *ifp, 967 struct mbuf *m, struct ip_moptions *imo) 968{ 969 struct mfc *rt; 970 int s; 971 vifi_t vifi; 972 973 if (mrtdebug & DEBUG_FORWARD) 974 log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n", 975 (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), 976 (void *)ifp); 977 978 if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 || 979 ((u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 980 /* 981 * Packet arrived via a physical interface or 982 * an encapsulated tunnel. 983 */ 984 } else { 985 /* 986 * Packet arrived through a source-route tunnel. 987 * Source-route tunnels are no longer supported. 988 */ 989 static int last_log; 990 if (last_log != time_second) { 991 last_log = time_second; 992 log(LOG_ERR, 993 "ip_mforward: received source-routed packet from %lx\n", 994 (u_long)ntohl(ip->ip_src.s_addr)); 995 } 996 return 1; 997 } 998 999 if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) { 1000 if (ip->ip_ttl < 255) 1001 ip->ip_ttl++; /* compensate for -1 in *_send routines */ 1002 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1003 struct vif *vifp = viftable + vifi; 1004 1005 printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s%d)\n", 1006 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr), 1007 vifi, 1008 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", 1009 vifp->v_ifp->if_name, vifp->v_ifp->if_unit); 1010 } 1011 return ip_mdq(m, ifp, NULL, vifi); 1012 } 1013 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1014 printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n", 1015 (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr)); 1016 if (!imo) 1017 printf("In fact, no options were specified at all\n"); 1018 } 1019 1020 /* 1021 * Don't forward a packet with time-to-live of zero or one, 1022 * or a packet destined to a local-only group. 1023 */ 1024 if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1025 return 0; 1026 1027 /* 1028 * Determine forwarding vifs from the forwarding cache table 1029 */ 1030 s = splnet(); 1031 ++mrtstat.mrts_mfc_lookups; 1032 rt = mfc_find(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1033 1034 /* Entry exists, so forward if necessary */ 1035 if (rt != NULL) { 1036 splx(s); 1037 return ip_mdq(m, ifp, rt, -1); 1038 } else { 1039 /* 1040 * If we don't have a route for packet's origin, 1041 * Make a copy of the packet & send message to routing daemon 1042 */ 1043 1044 struct mbuf *mb0; 1045 struct rtdetq *rte; 1046 u_long hash; 1047 int hlen = ip->ip_hl << 2; 1048 1049 ++mrtstat.mrts_mfc_misses; 1050 1051 mrtstat.mrts_no_route++; 1052 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) 1053 log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n", 1054 (u_long)ntohl(ip->ip_src.s_addr), 1055 (u_long)ntohl(ip->ip_dst.s_addr)); 1056 1057 /* 1058 * Allocate mbufs early so that we don't do extra work if we are 1059 * just going to fail anyway. Make sure to pullup the header so 1060 * that other people can't step on it. 1061 */ 1062 rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT); 1063 if (rte == NULL) { 1064 splx(s); 1065 return ENOBUFS; 1066 } 1067 mb0 = m_copy(m, 0, M_COPYALL); 1068 if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) 1069 mb0 = m_pullup(mb0, hlen); 1070 if (mb0 == NULL) { 1071 free(rte, M_MRTABLE); 1072 splx(s); 1073 return ENOBUFS; 1074 } 1075 1076 /* is there an upcall waiting for this flow ? */ 1077 hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1078 for (rt = mfctable[hash]; rt; rt = rt->mfc_next) { 1079 if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && 1080 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1081 (rt->mfc_stall != NULL)) 1082 break; 1083 } 1084 1085 if (rt == NULL) { 1086 int i; 1087 struct igmpmsg *im; 1088 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1089 struct mbuf *mm; 1090 1091 /* 1092 * Locate the vifi for the incoming interface for this packet. 1093 * If none found, drop packet. 1094 */ 1095 for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++) 1096 ; 1097 if (vifi >= numvifs) /* vif not found, drop packet */ 1098 goto non_fatal; 1099 1100 /* no upcall, so make a new entry */ 1101 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1102 if (rt == NULL) 1103 goto fail; 1104 /* Make a copy of the header to send to the user level process */ 1105 mm = m_copy(mb0, 0, hlen); 1106 if (mm == NULL) 1107 goto fail1; 1108 1109 /* 1110 * Send message to routing daemon to install 1111 * a route into the kernel table 1112 */ 1113 1114 im = mtod(mm, struct igmpmsg *); 1115 im->im_msgtype = IGMPMSG_NOCACHE; 1116 im->im_mbz = 0; 1117 im->im_vif = vifi; 1118 1119 mrtstat.mrts_upcalls++; 1120 1121 k_igmpsrc.sin_addr = ip->ip_src; 1122 if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { 1123 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); 1124 ++mrtstat.mrts_upq_sockfull; 1125fail1: 1126 free(rt, M_MRTABLE); 1127fail: 1128 free(rte, M_MRTABLE); 1129 m_freem(mb0); 1130 splx(s); 1131 return ENOBUFS; 1132 } 1133 1134 /* insert new entry at head of hash chain */ 1135 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1136 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1137 rt->mfc_expire = UPCALL_EXPIRE; 1138 nexpire[hash]++; 1139 for (i = 0; i < numvifs; i++) 1140 rt->mfc_ttls[i] = 0; 1141 rt->mfc_parent = -1; 1142 1143 /* link into table */ 1144 rt->mfc_next = mfctable[hash]; 1145 mfctable[hash] = rt; 1146 rt->mfc_stall = rte; 1147 1148 } else { 1149 /* determine if q has overflowed */ 1150 int npkts = 0; 1151 struct rtdetq **p; 1152 1153 /* 1154 * XXX ouch! we need to append to the list, but we 1155 * only have a pointer to the front, so we have to 1156 * scan the entire list every time. 1157 */ 1158 for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) 1159 npkts++; 1160 1161 if (npkts > MAX_UPQ) { 1162 mrtstat.mrts_upq_ovflw++; 1163non_fatal: 1164 free(rte, M_MRTABLE); 1165 m_freem(mb0); 1166 splx(s); 1167 return 0; 1168 } 1169 1170 /* Add this entry to the end of the queue */ 1171 *p = rte; 1172 } 1173 1174 rte->m = mb0; 1175 rte->ifp = ifp; 1176 rte->next = NULL; 1177 1178 splx(s); 1179 1180 return 0; 1181 } 1182} 1183 1184/* 1185 * Clean up the cache entry if upcall is not serviced 1186 */ 1187static void 1188expire_upcalls(void *unused) 1189{ 1190 struct rtdetq *rte; 1191 struct mfc *mfc, **nptr; 1192 int i; 1193 int s; 1194 1195 s = splnet(); 1196 for (i = 0; i < MFCTBLSIZ; i++) { 1197 if (nexpire[i] == 0) 1198 continue; 1199 nptr = &mfctable[i]; 1200 for (mfc = *nptr; mfc != NULL; mfc = *nptr) { 1201 /* 1202 * Skip real cache entries 1203 * Make sure it wasn't marked to not expire (shouldn't happen) 1204 * If it expires now 1205 */ 1206 if (mfc->mfc_stall != NULL && mfc->mfc_expire != 0 && 1207 --mfc->mfc_expire == 0) { 1208 if (mrtdebug & DEBUG_EXPIRE) 1209 log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", 1210 (u_long)ntohl(mfc->mfc_origin.s_addr), 1211 (u_long)ntohl(mfc->mfc_mcastgrp.s_addr)); 1212 /* 1213 * drop all the packets 1214 * free the mbuf with the pkt, if, timing info 1215 */ 1216 for (rte = mfc->mfc_stall; rte; ) { 1217 struct rtdetq *n = rte->next; 1218 1219 m_freem(rte->m); 1220 free(rte, M_MRTABLE); 1221 rte = n; 1222 } 1223 ++mrtstat.mrts_cache_cleanups; 1224 nexpire[i]--; 1225 1226 *nptr = mfc->mfc_next; 1227 free(mfc, M_MRTABLE); 1228 } else { 1229 nptr = &mfc->mfc_next; 1230 } 1231 } 1232 } 1233 splx(s); 1234 expire_upcalls_ch = timeout(expire_upcalls, NULL, EXPIRE_TIMEOUT); 1235} 1236 1237/* 1238 * Packet forwarding routine once entry in the cache is made 1239 */ 1240static int 1241ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) 1242{ 1243 struct ip *ip = mtod(m, struct ip *); 1244 vifi_t vifi; 1245 int plen = ip->ip_len; 1246 1247/* 1248 * Macro to send packet on vif. Since RSVP packets don't get counted on 1249 * input, they shouldn't get counted on output, so statistics keeping is 1250 * separate. 1251 */ 1252#define MC_SEND(ip,vifp,m) { \ 1253 if ((vifp)->v_flags & VIFF_TUNNEL) \ 1254 encap_send((ip), (vifp), (m)); \ 1255 else \ 1256 phyint_send((ip), (vifp), (m)); \ 1257} 1258 1259 /* 1260 * If xmt_vif is not -1, send on only the requested vif. 1261 * 1262 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) 1263 */ 1264 if (xmt_vif < numvifs) { 1265 MC_SEND(ip, viftable + xmt_vif, m); 1266 return 1; 1267 } 1268 1269 /* 1270 * Don't forward if it didn't arrive from the parent vif for its origin. 1271 */ 1272 vifi = rt->mfc_parent; 1273 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { 1274 /* came in the wrong interface */ 1275 if (mrtdebug & DEBUG_FORWARD) 1276 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", 1277 (void *)ifp, vifi, (void *)viftable[vifi].v_ifp); 1278 ++mrtstat.mrts_wrong_if; 1279 ++rt->mfc_wrong_if; 1280 /* 1281 * If we are doing PIM assert processing, and we are forwarding 1282 * packets on this interface, and it is a broadcast medium 1283 * interface (and not a tunnel), send a message to the routing daemon. 1284 */ 1285 if (pim_assert && rt->mfc_ttls[vifi] && 1286 (ifp->if_flags & IFF_BROADCAST) && 1287 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 1288 struct timeval now; 1289 u_long delta; 1290 1291 /* Get vifi for the incoming packet */ 1292 for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++) 1293 ; 1294 if (vifi >= numvifs) 1295 return 0; /* if not found: ignore the packet */ 1296 1297 GET_TIME(now); 1298 1299 TV_DELTA(rt->mfc_last_assert, now, delta); 1300 1301 if (delta > ASSERT_MSG_TIME) { 1302 struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1303 struct igmpmsg *im; 1304 int hlen = ip->ip_hl << 2; 1305 struct mbuf *mm = m_copy(m, 0, hlen); 1306 1307 if (mm && (M_HASCL(mm) || mm->m_len < hlen)) 1308 mm = m_pullup(mm, hlen); 1309 if (mm == NULL) 1310 return ENOBUFS; 1311 1312 rt->mfc_last_assert = now; 1313 1314 im = mtod(mm, struct igmpmsg *); 1315 im->im_msgtype = IGMPMSG_WRONGVIF; 1316 im->im_mbz = 0; 1317 im->im_vif = vifi; 1318 1319 mrtstat.mrts_upcalls++; 1320 1321 k_igmpsrc.sin_addr = im->im_src; 1322 if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { 1323 log(LOG_WARNING, 1324 "ip_mforward: ip_mrouter socket queue full\n"); 1325 ++mrtstat.mrts_upq_sockfull; 1326 return ENOBUFS; 1327 } 1328 } 1329 } 1330 return 0; 1331 } 1332 1333 /* If I sourced this packet, it counts as output, else it was input. */ 1334 if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { 1335 viftable[vifi].v_pkt_out++; 1336 viftable[vifi].v_bytes_out += plen; 1337 } else { 1338 viftable[vifi].v_pkt_in++; 1339 viftable[vifi].v_bytes_in += plen; 1340 } 1341 rt->mfc_pkt_cnt++; 1342 rt->mfc_byte_cnt += plen; 1343 1344 /* 1345 * For each vif, decide if a copy of the packet should be forwarded. 1346 * Forward if: 1347 * - the ttl exceeds the vif's threshold 1348 * - there are group members downstream on interface 1349 */ 1350 for (vifi = 0; vifi < numvifs; vifi++) 1351 if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1352 viftable[vifi].v_pkt_out++; 1353 viftable[vifi].v_bytes_out += plen; 1354 MC_SEND(ip, viftable+vifi, m); 1355 } 1356 1357 return 0; 1358} 1359 1360/* 1361 * check if a vif number is legal/ok. This is used by ip_output. 1362 */ 1363static int 1364X_legal_vif_num(int vif) 1365{ 1366 return (vif >= 0 && vif < numvifs); 1367} 1368 1369/* 1370 * Return the local address used by this vif 1371 */ 1372static u_long 1373X_ip_mcast_src(int vifi) 1374{ 1375 if (vifi >= 0 && vifi < numvifs) 1376 return viftable[vifi].v_lcl_addr.s_addr; 1377 else 1378 return INADDR_ANY; 1379} 1380 1381static void 1382phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1383{ 1384 struct mbuf *mb_copy; 1385 int hlen = ip->ip_hl << 2; 1386 1387 /* 1388 * Make a new reference to the packet; make sure that 1389 * the IP header is actually copied, not just referenced, 1390 * so that ip_output() only scribbles on the copy. 1391 */ 1392 mb_copy = m_copy(m, 0, M_COPYALL); 1393 if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) 1394 mb_copy = m_pullup(mb_copy, hlen); 1395 if (mb_copy == NULL) 1396 return; 1397 1398 if (vifp->v_rate_limit == 0) 1399 tbf_send_packet(vifp, mb_copy); 1400 else 1401 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); 1402} 1403 1404static void 1405encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1406{ 1407 struct mbuf *mb_copy; 1408 struct ip *ip_copy; 1409 int i, len = ip->ip_len; 1410 1411 /* 1412 * XXX: take care of delayed checksums. 1413 * XXX: if network interfaces are capable of computing checksum for 1414 * encapsulated multicast data packets, we need to reconsider this. 1415 */ 1416 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1417 in_delayed_cksum(m); 1418 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1419 } 1420 1421 /* 1422 * copy the old packet & pullup its IP header into the 1423 * new mbuf so we can modify it. Try to fill the new 1424 * mbuf since if we don't the ethernet driver will. 1425 */ 1426 MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); 1427 if (mb_copy == NULL) 1428 return; 1429#ifdef MAC 1430 mac_create_mbuf_multicast_encap(m, vifp->v_ifp, mb_copy); 1431#endif 1432 mb_copy->m_data += max_linkhdr; 1433 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1434 1435 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 1436 m_freem(mb_copy); 1437 return; 1438 } 1439 i = MHLEN - M_LEADINGSPACE(mb_copy); 1440 if (i > len) 1441 i = len; 1442 mb_copy = m_pullup(mb_copy, i); 1443 if (mb_copy == NULL) 1444 return; 1445 mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); 1446 1447 /* 1448 * fill in the encapsulating IP header. 1449 */ 1450 ip_copy = mtod(mb_copy, struct ip *); 1451 *ip_copy = multicast_encap_iphdr; 1452#ifdef RANDOM_IP_ID 1453 ip_copy->ip_id = ip_randomid(); 1454#else 1455 ip_copy->ip_id = htons(ip_id++); 1456#endif 1457 ip_copy->ip_len += len; 1458 ip_copy->ip_src = vifp->v_lcl_addr; 1459 ip_copy->ip_dst = vifp->v_rmt_addr; 1460 1461 /* 1462 * turn the encapsulated IP header back into a valid one. 1463 */ 1464 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1465 --ip->ip_ttl; 1466 ip->ip_len = htons(ip->ip_len); 1467 ip->ip_off = htons(ip->ip_off); 1468 ip->ip_sum = 0; 1469 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1470 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1471 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1472 1473 if (vifp->v_rate_limit == 0) 1474 tbf_send_packet(vifp, mb_copy); 1475 else 1476 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); 1477} 1478 1479/* 1480 * Token bucket filter module 1481 */ 1482 1483static void 1484tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, u_long p_len) 1485{ 1486 struct tbf *t = vifp->v_tbf; 1487 1488 if (p_len > MAX_BKT_SIZE) { /* drop if packet is too large */ 1489 mrtstat.mrts_pkt2large++; 1490 m_freem(m); 1491 return; 1492 } 1493 1494 tbf_update_tokens(vifp); 1495 1496 if (t->tbf_q_len == 0) { /* queue empty... */ 1497 if (p_len <= t->tbf_n_tok) { /* send packet if enough tokens */ 1498 t->tbf_n_tok -= p_len; 1499 tbf_send_packet(vifp, m); 1500 } else { /* no, queue packet and try later */ 1501 tbf_queue(vifp, m); 1502 timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); 1503 } 1504 } else if (t->tbf_q_len < t->tbf_max_q_len) { 1505 /* finite queue length, so queue pkts and process queue */ 1506 tbf_queue(vifp, m); 1507 tbf_process_q(vifp); 1508 } else { 1509 /* queue full, try to dq and queue and process */ 1510 if (!tbf_dq_sel(vifp, ip)) { 1511 mrtstat.mrts_q_overflow++; 1512 m_freem(m); 1513 } else { 1514 tbf_queue(vifp, m); 1515 tbf_process_q(vifp); 1516 } 1517 } 1518} 1519 1520/* 1521 * adds a packet to the queue at the interface 1522 */ 1523static void 1524tbf_queue(struct vif *vifp, struct mbuf *m) 1525{ 1526 int s = splnet(); 1527 struct tbf *t = vifp->v_tbf; 1528 1529 if (t->tbf_t == NULL) /* Queue was empty */ 1530 t->tbf_q = m; 1531 else /* Insert at tail */ 1532 t->tbf_t->m_act = m; 1533 1534 t->tbf_t = m; /* Set new tail pointer */ 1535 1536#ifdef DIAGNOSTIC 1537 /* Make sure we didn't get fed a bogus mbuf */ 1538 if (m->m_act) 1539 panic("tbf_queue: m_act"); 1540#endif 1541 m->m_act = NULL; 1542 1543 t->tbf_q_len++; 1544 1545 splx(s); 1546} 1547 1548/* 1549 * processes the queue at the interface 1550 */ 1551static void 1552tbf_process_q(struct vif *vifp) 1553{ 1554 int s = splnet(); 1555 struct tbf *t = vifp->v_tbf; 1556 1557 /* loop through the queue at the interface and send as many packets 1558 * as possible 1559 */ 1560 while (t->tbf_q_len > 0) { 1561 struct mbuf *m = t->tbf_q; 1562 int len = mtod(m, struct ip *)->ip_len; 1563 1564 /* determine if the packet can be sent */ 1565 if (len > t->tbf_n_tok) /* not enough tokens, we are done */ 1566 break; 1567 /* ok, reduce no of tokens, dequeue and send the packet. */ 1568 t->tbf_n_tok -= len; 1569 1570 t->tbf_q = m->m_act; 1571 if (--t->tbf_q_len == 0) 1572 t->tbf_t = NULL; 1573 1574 m->m_act = NULL; 1575 tbf_send_packet(vifp, m); 1576 } 1577 splx(s); 1578} 1579 1580static void 1581tbf_reprocess_q(void *xvifp) 1582{ 1583 struct vif *vifp = xvifp; 1584 1585 if (ip_mrouter == NULL) 1586 return; 1587 tbf_update_tokens(vifp); 1588 tbf_process_q(vifp); 1589 if (vifp->v_tbf->tbf_q_len) 1590 timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); 1591} 1592 1593/* function that will selectively discard a member of the queue 1594 * based on the precedence value and the priority 1595 */ 1596static int 1597tbf_dq_sel(struct vif *vifp, struct ip *ip) 1598{ 1599 int s = splnet(); 1600 u_int p; 1601 struct mbuf *m, *last; 1602 struct mbuf **np; 1603 struct tbf *t = vifp->v_tbf; 1604 1605 p = priority(vifp, ip); 1606 1607 np = &t->tbf_q; 1608 last = NULL; 1609 while ((m = *np) != NULL) { 1610 if (p > priority(vifp, mtod(m, struct ip *))) { 1611 *np = m->m_act; 1612 /* If we're removing the last packet, fix the tail pointer */ 1613 if (m == t->tbf_t) 1614 t->tbf_t = last; 1615 m_freem(m); 1616 /* It's impossible for the queue to be empty, but check anyways. */ 1617 if (--t->tbf_q_len == 0) 1618 t->tbf_t = NULL; 1619 splx(s); 1620 mrtstat.mrts_drop_sel++; 1621 return 1; 1622 } 1623 np = &m->m_act; 1624 last = m; 1625 } 1626 splx(s); 1627 return 0; 1628} 1629 1630static void 1631tbf_send_packet(struct vif *vifp, struct mbuf *m) 1632{ 1633 int s = splnet(); 1634 1635 if (vifp->v_flags & VIFF_TUNNEL) /* If tunnel options */ 1636 ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, NULL, NULL); 1637 else { 1638 struct ip_moptions imo; 1639 int error; 1640 static struct route ro; /* XXX check this */ 1641 1642 imo.imo_multicast_ifp = vifp->v_ifp; 1643 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; 1644 imo.imo_multicast_loop = 1; 1645 imo.imo_multicast_vif = -1; 1646 1647 /* 1648 * Re-entrancy should not be a problem here, because 1649 * the packets that we send out and are looped back at us 1650 * should get rejected because they appear to come from 1651 * the loopback interface, thus preventing looping. 1652 */ 1653 error = ip_output(m, NULL, &ro, IP_FORWARDING, &imo, NULL); 1654 1655 if (mrtdebug & DEBUG_XMIT) 1656 log(LOG_DEBUG, "phyint_send on vif %d err %d\n", 1657 (int)(vifp - viftable), error); 1658 } 1659 splx(s); 1660} 1661 1662/* determine the current time and then 1663 * the elapsed time (between the last time and time now) 1664 * in milliseconds & update the no. of tokens in the bucket 1665 */ 1666static void 1667tbf_update_tokens(struct vif *vifp) 1668{ 1669 struct timeval tp; 1670 u_long tm; 1671 int s = splnet(); 1672 struct tbf *t = vifp->v_tbf; 1673 1674 GET_TIME(tp); 1675 1676 TV_DELTA(tp, t->tbf_last_pkt_t, tm); 1677 1678 /* 1679 * This formula is actually 1680 * "time in seconds" * "bytes/second". 1681 * 1682 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) 1683 * 1684 * The (1000/1024) was introduced in add_vif to optimize 1685 * this divide into a shift. 1686 */ 1687 t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8; 1688 t->tbf_last_pkt_t = tp; 1689 1690 if (t->tbf_n_tok > MAX_BKT_SIZE) 1691 t->tbf_n_tok = MAX_BKT_SIZE; 1692 1693 splx(s); 1694} 1695 1696static int 1697priority(struct vif *vifp, struct ip *ip) 1698{ 1699 int prio = 50; /* the lowest priority -- default case */ 1700 1701 /* temporary hack; may add general packet classifier some day */ 1702 1703 /* 1704 * The UDP port space is divided up into four priority ranges: 1705 * [0, 16384) : unclassified - lowest priority 1706 * [16384, 32768) : audio - highest priority 1707 * [32768, 49152) : whiteboard - medium priority 1708 * [49152, 65536) : video - low priority 1709 * 1710 * Everything else gets lowest priority. 1711 */ 1712 if (ip->ip_p == IPPROTO_UDP) { 1713 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); 1714 switch (ntohs(udp->uh_dport) & 0xc000) { 1715 case 0x4000: 1716 prio = 70; 1717 break; 1718 case 0x8000: 1719 prio = 60; 1720 break; 1721 case 0xc000: 1722 prio = 55; 1723 break; 1724 } 1725 } 1726 return prio; 1727} 1728 1729/* 1730 * End of token bucket filter modifications 1731 */ 1732 1733static int 1734X_ip_rsvp_vif(struct socket *so, struct sockopt *sopt) 1735{ 1736 int error, vifi, s; 1737 1738 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 1739 return EOPNOTSUPP; 1740 1741 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); 1742 if (error) 1743 return error; 1744 1745 s = splnet(); 1746 1747 if (vifi < 0 || vifi >= numvifs) { /* Error if vif is invalid */ 1748 splx(s); 1749 return EADDRNOTAVAIL; 1750 } 1751 1752 if (sopt->sopt_name == IP_RSVP_VIF_ON) { 1753 /* Check if socket is available. */ 1754 if (viftable[vifi].v_rsvpd != NULL) { 1755 splx(s); 1756 return EADDRINUSE; 1757 } 1758 1759 viftable[vifi].v_rsvpd = so; 1760 /* This may seem silly, but we need to be sure we don't over-increment 1761 * the RSVP counter, in case something slips up. 1762 */ 1763 if (!viftable[vifi].v_rsvp_on) { 1764 viftable[vifi].v_rsvp_on = 1; 1765 rsvp_on++; 1766 } 1767 } else { /* must be VIF_OFF */ 1768 /* 1769 * XXX as an additional consistency check, one could make sure 1770 * that viftable[vifi].v_rsvpd == so, otherwise passing so as 1771 * first parameter is pretty useless. 1772 */ 1773 viftable[vifi].v_rsvpd = NULL; 1774 /* 1775 * This may seem silly, but we need to be sure we don't over-decrement 1776 * the RSVP counter, in case something slips up. 1777 */ 1778 if (viftable[vifi].v_rsvp_on) { 1779 viftable[vifi].v_rsvp_on = 0; 1780 rsvp_on--; 1781 } 1782 } 1783 splx(s); 1784 return 0; 1785} 1786 1787static void 1788X_ip_rsvp_force_done(struct socket *so) 1789{ 1790 int vifi; 1791 int s; 1792 1793 /* Don't bother if it is not the right type of socket. */ 1794 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 1795 return; 1796 1797 s = splnet(); 1798 1799 /* The socket may be attached to more than one vif...this 1800 * is perfectly legal. 1801 */ 1802 for (vifi = 0; vifi < numvifs; vifi++) { 1803 if (viftable[vifi].v_rsvpd == so) { 1804 viftable[vifi].v_rsvpd = NULL; 1805 /* This may seem silly, but we need to be sure we don't 1806 * over-decrement the RSVP counter, in case something slips up. 1807 */ 1808 if (viftable[vifi].v_rsvp_on) { 1809 viftable[vifi].v_rsvp_on = 0; 1810 rsvp_on--; 1811 } 1812 } 1813 } 1814 1815 splx(s); 1816} 1817 1818static void 1819X_rsvp_input(struct mbuf *m, int off) 1820{ 1821 int vifi; 1822 struct ip *ip = mtod(m, struct ip *); 1823 struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET }; 1824 int s; 1825 struct ifnet *ifp; 1826 1827 if (rsvpdebug) 1828 printf("rsvp_input: rsvp_on %d\n",rsvp_on); 1829 1830 /* Can still get packets with rsvp_on = 0 if there is a local member 1831 * of the group to which the RSVP packet is addressed. But in this 1832 * case we want to throw the packet away. 1833 */ 1834 if (!rsvp_on) { 1835 m_freem(m); 1836 return; 1837 } 1838 1839 s = splnet(); 1840 1841 if (rsvpdebug) 1842 printf("rsvp_input: check vifs\n"); 1843 1844#ifdef DIAGNOSTIC 1845 M_ASSERTPKTHDR(m); 1846#endif 1847 1848 ifp = m->m_pkthdr.rcvif; 1849 /* Find which vif the packet arrived on. */ 1850 for (vifi = 0; vifi < numvifs; vifi++) 1851 if (viftable[vifi].v_ifp == ifp) 1852 break; 1853 1854 if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { 1855 /* 1856 * If the old-style non-vif-associated socket is set, 1857 * then use it. Otherwise, drop packet since there 1858 * is no specific socket for this vif. 1859 */ 1860 if (ip_rsvpd != NULL) { 1861 if (rsvpdebug) 1862 printf("rsvp_input: Sending packet up old-style socket\n"); 1863 rip_input(m, off); /* xxx */ 1864 } else { 1865 if (rsvpdebug && vifi == numvifs) 1866 printf("rsvp_input: Can't find vif for packet.\n"); 1867 else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL) 1868 printf("rsvp_input: No socket defined for vif %d\n",vifi); 1869 m_freem(m); 1870 } 1871 splx(s); 1872 return; 1873 } 1874 rsvp_src.sin_addr = ip->ip_src; 1875 1876 if (rsvpdebug && m) 1877 printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n", 1878 m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); 1879 1880 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { 1881 if (rsvpdebug) 1882 printf("rsvp_input: Failed to append to socket\n"); 1883 } else { 1884 if (rsvpdebug) 1885 printf("rsvp_input: send packet up\n"); 1886 } 1887 1888 splx(s); 1889} 1890 1891static int 1892ip_mroute_modevent(module_t mod, int type, void *unused) 1893{ 1894 int s; 1895 1896 switch (type) { 1897 case MOD_LOAD: 1898 s = splnet(); 1899 /* XXX Protect against multiple loading */ 1900 ip_mcast_src = X_ip_mcast_src; 1901 ip_mforward = X_ip_mforward; 1902 ip_mrouter_done = X_ip_mrouter_done; 1903 ip_mrouter_get = X_ip_mrouter_get; 1904 ip_mrouter_set = X_ip_mrouter_set; 1905 ip_rsvp_force_done = X_ip_rsvp_force_done; 1906 ip_rsvp_vif = X_ip_rsvp_vif; 1907 legal_vif_num = X_legal_vif_num; 1908 mrt_ioctl = X_mrt_ioctl; 1909 rsvp_input_p = X_rsvp_input; 1910 splx(s); 1911 break; 1912 1913 case MOD_UNLOAD: 1914 if (ip_mrouter) 1915 return EINVAL; 1916 1917 s = splnet(); 1918 ip_mcast_src = NULL; 1919 ip_mforward = NULL; 1920 ip_mrouter_done = NULL; 1921 ip_mrouter_get = NULL; 1922 ip_mrouter_set = NULL; 1923 ip_rsvp_force_done = NULL; 1924 ip_rsvp_vif = NULL; 1925 legal_vif_num = NULL; 1926 mrt_ioctl = NULL; 1927 rsvp_input_p = NULL; 1928 splx(s); 1929 break; 1930 } 1931 return 0; 1932} 1933 1934static moduledata_t ip_mroutemod = { 1935 "ip_mroute", 1936 ip_mroute_modevent, 1937 0 1938}; 1939DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1940