ip_mroute.c revision 3311
1/* 2 * IP multicast forwarding procedures 3 * 4 * Written by David Waitzman, BBN Labs, August 1988. 5 * Modified by Steve Deering, Stanford, February 1989. 6 * Modified by Mark J. Steiglitz, Stanford, May, 1991 7 * Modified by Van Jacobson, LBL, January 1993 8 * Modified by Ajit Thyagarajan, PARC, August 1993 9 * 10 * MROUTING 1.8 11 */ 12 13 14#include <sys/param.h> 15#include <sys/systm.h> 16#include <sys/mbuf.h> 17#include <sys/socket.h> 18#include <sys/socketvar.h> 19#include <sys/protosw.h> 20#include <sys/errno.h> 21#include <sys/time.h> 22#include <sys/ioctl.h> 23#include <sys/syslog.h> 24#include <net/if.h> 25#include <net/route.h> 26#include <net/raw_cb.h> 27#include <netinet/in.h> 28#include <netinet/in_systm.h> 29#include <netinet/ip.h> 30#include <netinet/ip_var.h> 31#include <netinet/in_pcb.h> 32#include <netinet/in_var.h> 33#include <netinet/igmp.h> 34#include <netinet/igmp_var.h> 35#include <netinet/ip_mroute.h> 36 37#ifndef NTOHL 38#if BYTE_ORDER != BIG_ENDIAN 39#define NTOHL(d) ((d) = ntohl((d))) 40#define NTOHS(d) ((d) = ntohs((u_short)(d))) 41#define HTONL(d) ((d) = htonl((d))) 42#define HTONS(d) ((d) = htons((u_short)(d))) 43#else 44#define NTOHL(d) 45#define NTOHS(d) 46#define HTONL(d) 47#define HTONS(d) 48#endif 49#endif 50 51#ifndef MROUTING 52/* 53 * Dummy routines and globals used when multicast routing is not compiled in. 54 */ 55 56u_int ip_mrtproto = 0; 57struct socket *ip_mrouter = NULL; 58struct mrtstat mrtstat; 59 60 61int 62_ip_mrouter_cmd(cmd, so, m) 63 int cmd; 64 struct socket *so; 65 struct mbuf *m; 66{ 67 return(EOPNOTSUPP); 68} 69 70int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd; 71 72int 73_ip_mrouter_done() 74{ 75 return(0); 76} 77 78int (*ip_mrouter_done)(void) = _ip_mrouter_done; 79 80int 81_ip_mforward(ip, ifp, m, imo) 82 struct ip *ip; 83 struct ifnet *ifp; 84 struct mbuf *m; 85 struct ip_moptions *imo; 86{ 87 return(0); 88} 89 90int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 91 struct ip_moptions *) = _ip_mforward; 92 93int 94_mrt_ioctl(int req, caddr_t data, struct proc *p) 95{ 96 return EOPNOTSUPP; 97} 98 99int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; 100 101void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */ 102 rip_input(m); 103} 104 105int (*legal_vif_num)(int) = 0; 106 107#else 108 109#define INSIZ sizeof(struct in_addr) 110#define same(a1, a2) \ 111 (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) 112 113#define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ 114 115/* 116 * Globals. All but ip_mrouter and ip_mrtproto could be static, 117 * except for netstat or debugging purposes. 118 */ 119#ifndef MROUTE_LKM 120struct socket *ip_mrouter = NULL; 121struct mrtstat mrtstat; 122 123int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 124#else 125extern struct mrtstat mrtstat; 126extern int ip_mrtproto; 127#endif 128 129#define NO_RTE_FOUND 0x1 130#define RTE_FOUND 0x2 131 132struct mbuf *mfctable[MFCTBLSIZ]; 133struct vif viftable[MAXVIFS]; 134u_int mrtdebug = 0; /* debug level */ 135u_int tbfdebug = 0; /* tbf debug level */ 136 137u_long timeout_val = 0; /* count of outstanding upcalls */ 138 139/* 140 * Define the token bucket filter structures 141 * tbftable -> each vif has one of these for storing info 142 * qtable -> each interface has an associated queue of pkts 143 */ 144 145struct tbf tbftable[MAXVIFS]; 146struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; 147 148/* 149 * 'Interfaces' associated with decapsulator (so we can tell 150 * packets that went through it from ones that get reflected 151 * by a broken gateway). These interfaces are never linked into 152 * the system ifnet list & no routes point to them. I.e., packets 153 * can't be sent this way. They only exist as a placeholder for 154 * multicast source verification. 155 */ 156struct ifnet multicast_decap_if[MAXVIFS]; 157 158#define ENCAP_TTL 64 159#define ENCAP_PROTO 4 160 161/* prototype IP hdr for encapsulated packets */ 162struct ip multicast_encap_iphdr = { 163#if BYTE_ORDER == LITTLE_ENDIAN 164 sizeof(struct ip) >> 2, IPVERSION, 165#else 166 IPVERSION, sizeof(struct ip) >> 2, 167#endif 168 0, /* tos */ 169 sizeof(struct ip), /* total length */ 170 0, /* id */ 171 0, /* frag offset */ 172 ENCAP_TTL, ENCAP_PROTO, 173 0, /* checksum */ 174}; 175 176/* 177 * Private variables. 178 */ 179static vifi_t numvifs = 0; 180 181/* 182 * one-back cache used by multiencap_decap to locate a tunnel's vif 183 * given a datagram's src ip address. 184 */ 185static u_long last_encap_src; 186static struct vif *last_encap_vif; 187 188static u_long nethash_fc(u_long, u_long); 189static struct mfc *mfcfind(u_long, u_long); 190int get_sg_cnt(struct sioc_sg_req *); 191int get_vif_cnt(struct sioc_vif_req *); 192int get_vifs(caddr_t); 193static int add_vif(struct vifctl *); 194static int del_vif(vifi_t *); 195static int add_mfc(struct mfcctl *); 196static int del_mfc(struct delmfcctl *); 197static void cleanup_cache(void *); 198static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, 199 struct ip_moptions *); 200extern int (*legal_vif_num)(int); 201static void phyint_send(struct ip *, struct vif *, struct mbuf *); 202static void srcrt_send(struct ip *, struct vif *, struct mbuf *); 203static void encap_send(struct ip *, struct vif *, struct mbuf *); 204void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, 205 struct ip_moptions *); 206void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); 207void tbf_process_q(struct vif *); 208void tbf_dequeue(struct vif *, int); 209void tbf_reprocess_q(void *); 210int tbf_dq_sel(struct vif *, struct ip *); 211void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); 212void tbf_update_tokens(struct vif *); 213static int priority(struct vif *, struct ip *); 214static int ip_mrouter_init(struct socket *); 215 216/* 217 * A simple hash function: returns MFCHASHMOD of the low-order octet of 218 * the argument's network or subnet number and the multicast group assoc. 219 */ 220static u_long 221nethash_fc(m,n) 222 register u_long m; 223 register u_long n; 224{ 225 struct in_addr in1; 226 struct in_addr in2; 227 228 in1.s_addr = m; 229 m = in_netof(in1); 230 while ((m & 0xff) == 0) m >>= 8; 231 232 in2.s_addr = n; 233 n = in_netof(in2); 234 while ((n & 0xff) == 0) n >>= 8; 235 236 return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); 237} 238 239/* 240 * this is a direct-mapped cache used to speed the mapping from a 241 * datagram source address to the associated multicast route. Note 242 * that unlike mrttable, the hash is on IP address, not IP net number. 243 */ 244#define MFCHASHSIZ 1024 245#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 246 ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) 247struct mfc *mfchash[MFCHASHSIZ]; 248 249/* 250 * Find a route for a given origin IP address and Multicast group address 251 * Type of service parameter to be added in the future!!! 252 */ 253#define MFCFIND(o, g, rt) { \ 254 register u_int _mrhasho = o; \ 255 register u_int _mrhashg = g; \ 256 _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ 257 ++mrtstat.mrts_mfc_lookups; \ 258 rt = mfchash[_mrhasho]; \ 259 if ((rt == NULL) || \ 260 ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ 261 (g != rt->mfc_mcastgrp.s_addr)) \ 262 if ((rt = mfcfind(o, g)) != NULL) \ 263 mfchash[_mrhasho] = rt; \ 264} 265 266/* 267 * Find route by examining hash table entries 268 */ 269static struct mfc * 270mfcfind(origin, mcastgrp) 271 u_long origin; 272 u_long mcastgrp; 273{ 274 register struct mbuf *mb_rt; 275 register struct mfc *rt; 276 register u_long hash; 277 278 hash = nethash_fc(origin, mcastgrp); 279 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 280 rt = mtod(mb_rt, struct mfc *); 281 if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && 282 (mcastgrp == rt->mfc_mcastgrp.s_addr) && 283 (mb_rt->m_act == NULL)) 284 return (rt); 285 } 286 mrtstat.mrts_mfc_misses++; 287 return NULL; 288} 289 290/* 291 * Macros to compute elapsed time efficiently 292 * Borrowed from Van Jacobson's scheduling code 293 */ 294#define TV_DELTA(a, b, delta) { \ 295 register int xxs; \ 296 \ 297 delta = (a).tv_usec - (b).tv_usec; \ 298 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 299 switch (xxs) { \ 300 case 2: \ 301 delta += 1000000; \ 302 /* fall through */ \ 303 case 1: \ 304 delta += 1000000; \ 305 break; \ 306 default: \ 307 delta += (1000000 * xxs); \ 308 } \ 309 } \ 310} 311 312#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 313 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 314 315/* 316 * Handle DVMRP setsockopt commands to modify the multicast routing tables. 317 */ 318int 319X_ip_mrouter_cmd(cmd, so, m) 320 int cmd; 321 struct socket *so; 322 struct mbuf *m; 323{ 324 if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; 325 326 switch (cmd) { 327 case DVMRP_INIT: return ip_mrouter_init(so); 328 case DVMRP_DONE: return ip_mrouter_done(); 329 case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); 330 case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); 331 case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); 332 case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); 333 default: return EOPNOTSUPP; 334 } 335} 336 337#ifndef MROUTE_LKM 338int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd; 339#endif 340 341/* 342 * Handle ioctl commands to obtain information from the cache 343 */ 344int 345X_mrt_ioctl(cmd, data) 346 int cmd; 347 caddr_t data; 348{ 349 int error = 0; 350 351 switch (cmd) { 352 case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ 353 return (get_vifs(data)); 354 break; 355 case (SIOCGETVIFCNT): 356 return (get_vif_cnt((struct sioc_vif_req *)data)); 357 break; 358 case (SIOCGETSGCNT): 359 return (get_sg_cnt((struct sioc_sg_req *)data)); 360 break; 361 default: 362 return (EINVAL); 363 break; 364 } 365 return error; 366} 367 368#ifndef MROUTE_LKM 369int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl; 370#else 371extern int (*mrt_ioctl)(int, caddr_t, struct proc *); 372#endif 373 374/* 375 * returns the packet count for the source group provided 376 */ 377int 378get_sg_cnt(req) 379 register struct sioc_sg_req *req; 380{ 381 register struct mfc *rt; 382 int s; 383 384 s = splnet(); 385 MFCFIND(req->src.s_addr, req->grp.s_addr, rt); 386 splx(s); 387 if (rt != NULL) 388 req->count = rt->mfc_pkt_cnt; 389 else 390 req->count = 0xffffffff; 391 392 return 0; 393} 394 395/* 396 * returns the input and output packet counts on the interface provided 397 */ 398int 399get_vif_cnt(req) 400 register struct sioc_vif_req *req; 401{ 402 register vifi_t vifi = req->vifi; 403 404 req->icount = viftable[vifi].v_pkt_in; 405 req->ocount = viftable[vifi].v_pkt_out; 406 407 return 0; 408} 409 410int 411get_vifs(data) 412 char *data; 413{ 414 struct vif_conf *vifc = (struct vif_conf *)data; 415 struct vif_req *vifrp, vifr; 416 int space, error=0; 417 418 vifi_t vifi; 419 int s; 420 421 space = vifc->vifc_len; 422 vifrp = vifc->vifc_req; 423 424 s = splnet(); 425 vifc->vifc_num=numvifs; 426 427 for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { 428 if (viftable[vifi].v_lcl_addr.s_addr != 0) { 429 vifr.v_flags=viftable[vifi].v_flags; 430 vifr.v_threshold=viftable[vifi].v_threshold; 431 vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; 432 vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; 433 strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); 434 if ((space -= sizeof(vifr)) < 0) { 435 splx(s); 436 return(ENOSPC); 437 } 438 error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); 439 if (error) { 440 splx(s); 441 return(error); 442 } 443 } 444 } 445 splx(s); 446 return 0; 447} 448/* 449 * Enable multicast routing 450 */ 451static int 452ip_mrouter_init(so) 453 struct socket *so; 454{ 455 if (so->so_type != SOCK_RAW || 456 so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; 457 458 if (ip_mrouter != NULL) return EADDRINUSE; 459 460 ip_mrouter = so; 461 462 if (mrtdebug) 463 log(LOG_DEBUG, "ip_mrouter_init"); 464 465 return 0; 466} 467 468/* 469 * Disable multicast routing 470 */ 471int 472X_ip_mrouter_done() 473{ 474 vifi_t vifi; 475 int i; 476 struct ifnet *ifp; 477 struct ifreq ifr; 478 struct mbuf *mb_rt; 479 struct mbuf *m; 480 struct rtdetq *rte; 481 int s; 482 483 s = splnet(); 484 485 /* 486 * For each phyint in use, disable promiscuous reception of all IP 487 * multicasts. 488 */ 489 for (vifi = 0; vifi < numvifs; vifi++) { 490 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 491 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 492 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 493 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr 494 = INADDR_ANY; 495 ifp = viftable[vifi].v_ifp; 496 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 497 } 498 } 499 bzero((caddr_t)qtable, sizeof(qtable)); 500 bzero((caddr_t)tbftable, sizeof(tbftable)); 501 bzero((caddr_t)viftable, sizeof(viftable)); 502 numvifs = 0; 503 504 /* 505 * Check if any outstanding timeouts remain 506 */ 507 if (timeout_val != 0) 508 for (i = 0; i < MFCTBLSIZ; i++) { 509 mb_rt = mfctable[i]; 510 while (mb_rt) { 511 if ( mb_rt->m_act != NULL) { 512 untimeout(cleanup_cache, (caddr_t)mb_rt); 513 while (mb_rt->m_act) { 514 m = mb_rt->m_act; 515 mb_rt->m_act = m->m_act; 516 rte = mtod(m, struct rtdetq *); 517 m_freem(rte->m); 518 m_free(m); 519 } 520 timeout_val--; 521 } 522 mb_rt = mb_rt->m_next; 523 } 524 if (timeout_val == 0) 525 break; 526 } 527 528 /* 529 * Free all multicast forwarding cache entries. 530 */ 531 for (i = 0; i < MFCTBLSIZ; i++) 532 m_freem(mfctable[i]); 533 534 bzero((caddr_t)mfctable, sizeof(mfctable)); 535 bzero((caddr_t)mfchash, sizeof(mfchash)); 536 537 /* 538 * Reset de-encapsulation cache 539 */ 540 last_encap_src = NULL; 541 last_encap_vif = NULL; 542 543 ip_mrouter = NULL; 544 545 splx(s); 546 547 if (mrtdebug) 548 log(LOG_DEBUG, "ip_mrouter_done"); 549 550 return 0; 551} 552 553#ifndef MROUTE_LKM 554int (*ip_mrouter_done)(void) = X_ip_mrouter_done; 555#endif 556 557/* 558 * Add a vif to the vif table 559 */ 560static int 561add_vif(vifcp) 562 register struct vifctl *vifcp; 563{ 564 register struct vif *vifp = viftable + vifcp->vifc_vifi; 565 static struct sockaddr_in sin = {AF_INET}; 566 struct ifaddr *ifa; 567 struct ifnet *ifp; 568 struct ifreq ifr; 569 int error, s; 570 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 571 572 if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; 573 if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; 574 575 /* Find the interface with an address in AF_INET family */ 576 sin.sin_addr = vifcp->vifc_lcl_addr; 577 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 578 if (ifa == 0) return EADDRNOTAVAIL; 579 ifp = ifa->ifa_ifp; 580 581 if (vifcp->vifc_flags & VIFF_TUNNEL) { 582 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 583 static int inited = 0; 584 if(!inited) { 585 for (s = 0; s < MAXVIFS; ++s) { 586 multicast_decap_if[s].if_name = "mdecap"; 587 multicast_decap_if[s].if_unit = s; 588 } 589 inited = 1; 590 } 591 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 592 } else { 593 ifp = 0; 594 } 595 } else { 596 /* Make sure the interface supports multicast */ 597 if ((ifp->if_flags & IFF_MULTICAST) == 0) 598 return EOPNOTSUPP; 599 600 /* Enable promiscuous reception of all IP multicasts from the if */ 601 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 602 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 603 s = splnet(); 604 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 605 splx(s); 606 if (error) 607 return error; 608 } 609 610 s = splnet(); 611 /* define parameters for the tbf structure */ 612 vifp->v_tbf = v_tbf; 613 vifp->v_tbf->q_len = 0; 614 vifp->v_tbf->n_tok = 0; 615 vifp->v_tbf->last_pkt_t = 0; 616 617 vifp->v_flags = vifcp->vifc_flags; 618 vifp->v_threshold = vifcp->vifc_threshold; 619 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 620 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 621 vifp->v_ifp = ifp; 622 vifp->v_rate_limit= vifcp->vifc_rate_limit; 623 /* initialize per vif pkt counters */ 624 vifp->v_pkt_in = 0; 625 vifp->v_pkt_out = 0; 626 splx(s); 627 628 /* Adjust numvifs up if the vifi is higher than numvifs */ 629 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 630 631 if (mrtdebug) 632 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d", 633 vifcp->vifc_vifi, 634 ntohl(vifcp->vifc_lcl_addr.s_addr), 635 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 636 ntohl(vifcp->vifc_rmt_addr.s_addr), 637 vifcp->vifc_threshold, 638 vifcp->vifc_rate_limit); 639 640 return 0; 641} 642 643/* 644 * Delete a vif from the vif table 645 */ 646static int 647del_vif(vifip) 648 vifi_t *vifip; 649{ 650 register struct vif *vifp = viftable + *vifip; 651 register vifi_t vifi; 652 struct ifnet *ifp; 653 struct ifreq ifr; 654 int s; 655 656 if (*vifip >= numvifs) return EINVAL; 657 if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; 658 659 s = splnet(); 660 661 if (!(vifp->v_flags & VIFF_TUNNEL)) { 662 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 663 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 664 ifp = vifp->v_ifp; 665 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 666 } 667 668 if (vifp == last_encap_vif) { 669 last_encap_vif = 0; 670 last_encap_src = 0; 671 } 672 673 bzero((caddr_t)qtable[*vifip], 674 sizeof(qtable[*vifip])); 675 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 676 bzero((caddr_t)vifp, sizeof (*vifp)); 677 678 /* Adjust numvifs down */ 679 for (vifi = numvifs; vifi > 0; vifi--) 680 if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; 681 numvifs = vifi; 682 683 splx(s); 684 685 if (mrtdebug) 686 log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs); 687 688 return 0; 689} 690 691/* 692 * Add an mfc entry 693 */ 694static int 695add_mfc(mfccp) 696 struct mfcctl *mfccp; 697{ 698 struct mfc *rt; 699 struct mfc *rt1 = 0; 700 register struct mbuf *mb_rt; 701 struct mbuf *prev_mb_rt; 702 u_long hash; 703 struct mbuf *mb_ntry; 704 struct rtdetq *rte; 705 register u_short nstl; 706 int s; 707 int i; 708 709 rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 710 711 /* If an entry already exists, just update the fields */ 712 if (rt) { 713 if (mrtdebug) 714 log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x", 715 ntohl(mfccp->mfcc_origin.s_addr), 716 ntohl(mfccp->mfcc_mcastgrp.s_addr), 717 ntohl(mfccp->mfcc_originmask.s_addr), 718 mfccp->mfcc_parent); 719 720 s = splnet(); 721 rt->mfc_parent = mfccp->mfcc_parent; 722 for (i = 0; i < numvifs; i++) 723 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 724 splx(s); 725 return 0; 726 } 727 728 /* 729 * Find the entry for which the upcall was made and update 730 */ 731 s = splnet(); 732 hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 733 for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; 734 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 735 736 rt = mtod(mb_rt, struct mfc *); 737 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 738 == mfccp->mfcc_origin.s_addr) && 739 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 740 (mb_rt->m_act != NULL)) { 741 742 if (!nstl++) { 743 if (mrtdebug) 744 log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x", 745 ntohl(mfccp->mfcc_origin.s_addr), 746 ntohl(mfccp->mfcc_mcastgrp.s_addr), 747 ntohl(mfccp->mfcc_originmask.s_addr), 748 mfccp->mfcc_parent, mb_rt->m_act); 749 750 rt->mfc_origin = mfccp->mfcc_origin; 751 rt->mfc_originmask = mfccp->mfcc_originmask; 752 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 753 rt->mfc_parent = mfccp->mfcc_parent; 754 for (i = 0; i < numvifs; i++) 755 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 756 /* initialize pkt counters per src-grp */ 757 rt->mfc_pkt_cnt = 0; 758 rt1 = rt; 759 } 760 761 /* prevent cleanup of cache entry */ 762 untimeout(cleanup_cache, (caddr_t)mb_rt); 763 timeout_val--; 764 765 /* free packets Qed at the end of this entry */ 766 while (mb_rt->m_act) { 767 mb_ntry = mb_rt->m_act; 768 rte = mtod(mb_ntry, struct rtdetq *); 769 ip_mdq(rte->m, rte->ifp, rte->tunnel_src, 770 rt1, rte->imo); 771 mb_rt->m_act = mb_ntry->m_act; 772 m_freem(rte->m); 773 m_free(mb_ntry); 774 } 775 776 /* 777 * If more than one entry was created for a single upcall 778 * delete that entry 779 */ 780 if (nstl > 1) { 781 MFREE(mb_rt, prev_mb_rt->m_next); 782 mb_rt = prev_mb_rt; 783 } 784 } 785 } 786 787 /* 788 * It is possible that an entry is being inserted without an upcall 789 */ 790 if (nstl == 0) { 791 if (mrtdebug) 792 log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x", 793 hash, ntohl(mfccp->mfcc_origin.s_addr), 794 ntohl(mfccp->mfcc_mcastgrp.s_addr), 795 ntohl(mfccp->mfcc_originmask.s_addr), 796 mfccp->mfcc_parent); 797 798 for (prev_mb_rt = mb_rt = mfctable[hash]; 799 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 800 801 rt = mtod(mb_rt, struct mfc *); 802 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 803 == mfccp->mfcc_origin.s_addr) && 804 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 805 806 rt->mfc_origin = mfccp->mfcc_origin; 807 rt->mfc_originmask = mfccp->mfcc_originmask; 808 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 809 rt->mfc_parent = mfccp->mfcc_parent; 810 for (i = 0; i < numvifs; i++) 811 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 812 /* initialize pkt counters per src-grp */ 813 rt->mfc_pkt_cnt = 0; 814 } 815 } 816 if (mb_rt == NULL) { 817 /* no upcall, so make a new entry */ 818 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 819 if (mb_rt == NULL) { 820 splx(s); 821 return ENOBUFS; 822 } 823 824 rt = mtod(mb_rt, struct mfc *); 825 826 /* insert new entry at head of hash chain */ 827 rt->mfc_origin = mfccp->mfcc_origin; 828 rt->mfc_originmask = mfccp->mfcc_originmask; 829 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 830 rt->mfc_parent = mfccp->mfcc_parent; 831 for (i = 0; i < numvifs; i++) 832 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 833 /* initialize pkt counters per src-grp */ 834 rt->mfc_pkt_cnt = 0; 835 836 /* link into table */ 837 mb_rt->m_next = mfctable[hash]; 838 mfctable[hash] = mb_rt; 839 mb_rt->m_act = NULL; 840 } 841 } 842 splx(s); 843 return 0; 844} 845 846/* 847 * Delete an mfc entry 848 */ 849static int 850del_mfc(mfccp) 851 struct delmfcctl *mfccp; 852{ 853 struct in_addr origin; 854 struct in_addr mcastgrp; 855 struct mfc *rt; 856 struct mbuf *mb_rt; 857 struct mbuf *prev_mb_rt; 858 u_long hash; 859 struct mfc **cmfc; 860 struct mfc **cmfcend; 861 int s; 862 863 origin = mfccp->mfcc_origin; 864 mcastgrp = mfccp->mfcc_mcastgrp; 865 hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); 866 867 if (mrtdebug) 868 log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x", 869 ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); 870 871 for (prev_mb_rt = mb_rt = mfctable[hash] 872 ; mb_rt 873 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 874 rt = mtod(mb_rt, struct mfc *); 875 if (origin.s_addr == rt->mfc_origin.s_addr && 876 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 877 mb_rt->m_act == NULL) 878 break; 879 } 880 if (mb_rt == NULL) { 881 return ESRCH; 882 } 883 884 s = splnet(); 885 886 cmfc = mfchash; 887 cmfcend = cmfc + MFCHASHSIZ; 888 for ( ; cmfc < cmfcend; ++cmfc) 889 if (*cmfc == rt) 890 *cmfc = 0; 891 892 if (prev_mb_rt != mb_rt) { /* if moved past head of list */ 893 MFREE(mb_rt, prev_mb_rt->m_next); 894 } else /* delete head of list, it is in the table */ 895 mfctable[hash] = m_free(mb_rt); 896 897 splx(s); 898 899 return 0; 900} 901 902/* 903 * IP multicast forwarding function. This function assumes that the packet 904 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 905 * pointed to by "ifp", and the packet is to be relayed to other networks 906 * that have members of the packet's destination IP multicast group. 907 * 908 * The packet is returned unscathed to the caller, unless it is tunneled 909 * or erroneous, in which case a non-zero return value tells the caller to 910 * discard it. 911 */ 912 913#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 914#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 915 916int 917X_ip_mforward(ip, ifp, m, imo) 918 register struct ip *ip; 919 struct ifnet *ifp; 920 struct mbuf *m; 921 struct ip_moptions *imo; 922{ 923 register struct mfc *rt; 924 register u_char *ipoptions; 925 u_long tunnel_src; 926 static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; 927 static struct sockaddr_in k_igmpsrc = { AF_INET }; 928 static struct sockaddr_in k_igmpdst = { AF_INET }; 929 register struct mbuf *mm; 930 register struct ip *k_data; 931 int s; 932 933 if (mrtdebug > 1) 934 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x", 935 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); 936 937 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 938 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 939 /* 940 * Packet arrived via a physical interface. 941 */ 942 tunnel_src = 0; 943 } else { 944 /* 945 * Packet arrived through a source-route tunnel. 946 * 947 * A source-route tunneled packet has a single NOP option and a 948 * two-element 949 * loose-source-and-record-route (LSRR) option immediately following 950 * the fixed-size part of the IP header. At this point in processing, 951 * the IP header should contain the following IP addresses: 952 * 953 * original source - in the source address field 954 * destination group - in the destination address field 955 * remote tunnel end-point - in the first element of LSRR 956 * one of this host's addrs - in the second element of LSRR 957 * 958 * NOTE: RFC-1075 would have the original source and remote tunnel 959 * end-point addresses swapped. However, that could cause 960 * delivery of ICMP error messages to innocent applications 961 * on intermediate routing hosts! Therefore, we hereby 962 * change the spec. 963 */ 964 965 /* 966 * Verify that the tunnel options are well-formed. 967 */ 968 if (ipoptions[0] != IPOPT_NOP || 969 ipoptions[2] != 11 || /* LSRR option length */ 970 ipoptions[3] != 12 || /* LSRR address pointer */ 971 (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { 972 mrtstat.mrts_bad_tunnel++; 973 if (mrtdebug) 974 log(LOG_DEBUG, 975 "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)", 976 ntohl(ip->ip_src.s_addr), 977 ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], 978 *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); 979 return 1; 980 } 981 982 /* 983 * Delete the tunnel options from the packet. 984 */ 985 ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, 986 (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); 987 m->m_len -= TUNNEL_LEN; 988 ip->ip_len -= TUNNEL_LEN; 989 ip->ip_hl -= TUNNEL_LEN >> 2; 990 991 ifp = 0; 992 } 993 994 /* 995 * Don't forward a packet with time-to-live of zero or one, 996 * or a packet destined to a local-only group. 997 */ 998 if (ip->ip_ttl <= 1 || 999 ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1000 return (int)tunnel_src; 1001 1002 /* 1003 * Determine forwarding vifs from the forwarding cache table 1004 */ 1005 s = splnet(); 1006 MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); 1007 1008 /* Entry exists, so forward if necessary */ 1009 if (rt != NULL) { 1010 splx(s); 1011 return (ip_mdq(m, ifp, tunnel_src, rt, imo)); 1012 } 1013 1014 else { 1015 /* 1016 * If we don't have a route for packet's origin, 1017 * Make a copy of the packet & 1018 * send message to routing daemon 1019 */ 1020 1021 register struct mbuf *mb_rt; 1022 register struct mbuf *mb_ntry; 1023 register struct mbuf *mb0; 1024 register struct rtdetq *rte; 1025 register struct mbuf *rte_m; 1026 register u_long hash; 1027 1028 mrtstat.mrts_no_route++; 1029 if (mrtdebug) 1030 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x", 1031 ntohl(ip->ip_src.s_addr), 1032 ntohl(ip->ip_dst.s_addr)); 1033 1034 /* is there an upcall waiting for this packet? */ 1035 hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1036 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 1037 rt = mtod(mb_rt, struct mfc *); 1038 if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == 1039 rt->mfc_origin.s_addr) && 1040 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1041 (mb_rt->m_act != NULL)) 1042 break; 1043 } 1044 1045 if (mb_rt == NULL) { 1046 /* no upcall, so make a new entry */ 1047 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 1048 if (mb_rt == NULL) { 1049 splx(s); 1050 return ENOBUFS; 1051 } 1052 1053 rt = mtod(mb_rt, struct mfc *); 1054 1055 /* insert new entry at head of hash chain */ 1056 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1057 rt->mfc_originmask.s_addr = (u_long)0xffffffff; 1058 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1059 1060 /* link into table */ 1061 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1062 mb_rt->m_next = mfctable[hash]; 1063 mfctable[hash] = mb_rt; 1064 mb_rt->m_act = NULL; 1065 1066 } 1067 1068 /* determine if q has overflowed */ 1069 for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) 1070 hash++; 1071 1072 if (hash > MAX_UPQ) { 1073 mrtstat.mrts_upq_ovflw++; 1074 splx(s); 1075 return 0; 1076 } 1077 1078 /* add this packet and timing, ifp info to m_act */ 1079 MGET(mb_ntry, M_DONTWAIT, MT_DATA); 1080 if (mb_ntry == NULL) { 1081 splx(s); 1082 return ENOBUFS; 1083 } 1084 1085 mb_ntry->m_act = NULL; 1086 rte = mtod(mb_ntry, struct rtdetq *); 1087 1088 mb0 = m_copy(m, 0, M_COPYALL); 1089 if (mb0 == NULL) { 1090 splx(s); 1091 return ENOBUFS; 1092 } 1093 1094 rte->m = mb0; 1095 rte->ifp = ifp; 1096 rte->tunnel_src = tunnel_src; 1097 rte->imo = imo; 1098 1099 rte_m->m_act = mb_ntry; 1100 1101 splx(s); 1102 1103 if (hash == 0) { 1104 /* 1105 * Send message to routing daemon to install 1106 * a route into the kernel table 1107 */ 1108 k_igmpsrc.sin_addr = ip->ip_src; 1109 k_igmpdst.sin_addr = ip->ip_dst; 1110 1111 mm = m_copy(m, 0, M_COPYALL); 1112 if (mm == NULL) { 1113 splx(s); 1114 return ENOBUFS; 1115 } 1116 1117 k_data = mtod(mm, struct ip *); 1118 k_data->ip_p = 0; 1119 1120 mrtstat.mrts_upcalls++; 1121 1122 raw_input(mm, &k_igmpproto, 1123 (struct sockaddr *)&k_igmpsrc, 1124 (struct sockaddr *)&k_igmpdst); 1125 1126 /* set timer to cleanup entry if upcall is lost */ 1127 timeout(cleanup_cache, (caddr_t)mb_rt, 100); 1128 timeout_val++; 1129 } 1130 1131 return 0; 1132 } 1133} 1134 1135#ifndef MROUTE_LKM 1136int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 1137 struct ip_moptions *) = X_ip_mforward; 1138#endif 1139 1140/* 1141 * Clean up the cache entry if upcall is not serviced 1142 */ 1143static void 1144cleanup_cache(xmb_rt) 1145 void *xmb_rt; 1146{ 1147 struct mbuf *mb_rt = xmb_rt; 1148 struct mfc *rt; 1149 u_long hash; 1150 struct mbuf *prev_m0; 1151 struct mbuf *m0; 1152 struct mbuf *m; 1153 struct rtdetq *rte; 1154 int s; 1155 1156 rt = mtod(mb_rt, struct mfc *); 1157 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1158 1159 if (mrtdebug) 1160 log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x", 1161 ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), 1162 ntohl(rt->mfc_mcastgrp.s_addr)); 1163 1164 mrtstat.mrts_cache_cleanups++; 1165 1166 /* 1167 * determine entry to be cleaned up in cache table 1168 */ 1169 s = splnet(); 1170 for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) 1171 if (m0 == mb_rt) 1172 break; 1173 1174 /* 1175 * drop all the packets 1176 * free the mbuf with the pkt, if, timing info 1177 */ 1178 while (mb_rt->m_act) { 1179 m = mb_rt->m_act; 1180 mb_rt->m_act = m->m_act; 1181 1182 rte = mtod(m, struct rtdetq *); 1183 m_freem(rte->m); 1184 m_free(m); 1185 } 1186 1187 /* 1188 * Delete the entry from the cache 1189 */ 1190 if (prev_m0 != m0) { /* if moved past head of list */ 1191 MFREE(m0, prev_m0->m_next); 1192 } else /* delete head of list, it is in the table */ 1193 mfctable[hash] = m_free(m0); 1194 1195 timeout_val--; 1196 splx(s); 1197} 1198 1199/* 1200 * Packet forwarding routine once entry in the cache is made 1201 */ 1202static int 1203ip_mdq(m, ifp, tunnel_src, rt, imo) 1204 register struct mbuf *m; 1205 register struct ifnet *ifp; 1206 register u_long tunnel_src; 1207 register struct mfc *rt; 1208 register struct ip_moptions *imo; 1209{ 1210 register struct ip *ip = mtod(m, struct ip *); 1211 register vifi_t vifi; 1212 register struct vif *vifp; 1213 1214 /* 1215 * Don't forward if it didn't arrive from the parent vif for its origin. 1216 * Notes: v_ifp is zero for src route tunnels, multicast_decap_if 1217 * for encapsulated tunnels and a real ifnet for non-tunnels so 1218 * the first part of the if catches wrong physical interface or 1219 * tunnel type; v_rmt_addr is zero for non-tunneled packets so 1220 * the 2nd part catches both packets that arrive via a tunnel 1221 * that shouldn't and packets that arrive via the wrong tunnel. 1222 */ 1223 vifi = rt->mfc_parent; 1224 if (viftable[vifi].v_ifp != ifp || 1225 (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { 1226 /* came in the wrong interface */ 1227 if (mrtdebug) 1228 log(LOG_DEBUG, "wrong if: ifp %x vifi %d", 1229 ifp, vifi); 1230 ++mrtstat.mrts_wrong_if; 1231 return (int)tunnel_src; 1232 } 1233 1234 /* increment the interface and s-g counters */ 1235 viftable[vifi].v_pkt_in++; 1236 rt->mfc_pkt_cnt++; 1237 1238 /* 1239 * For each vif, decide if a copy of the packet should be forwarded. 1240 * Forward if: 1241 * - the ttl exceeds the vif's threshold 1242 * - there are group members downstream on interface 1243 */ 1244#define MC_SEND(ip,vifp,m) { \ 1245 (vifp)->v_pkt_out++; \ 1246 if ((vifp)->v_flags & VIFF_SRCRT) \ 1247 srcrt_send((ip), (vifp), (m)); \ 1248 else if ((vifp)->v_flags & VIFF_TUNNEL) \ 1249 encap_send((ip), (vifp), (m)); \ 1250 else \ 1251 phyint_send((ip), (vifp), (m)); \ 1252 } 1253 1254/* If no options or the imo_multicast_vif option is 0, don't do this part 1255 */ 1256 if ((imo != NULL) && 1257 (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) 1258 { 1259 MC_SEND(ip,viftable+vifi,m); 1260 return (1); /* make sure we are done: No more physical sends */ 1261 } 1262 1263 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1264 if ((rt->mfc_ttls[vifi] > 0) && 1265 (ip->ip_ttl > rt->mfc_ttls[vifi])) 1266 MC_SEND(ip, vifp, m); 1267 1268 return 0; 1269} 1270 1271/* check if a vif number is legal/ok. This is used by ip_output, to export 1272 * numvifs there, 1273 */ 1274int 1275X_legal_vif_num(vif) 1276 int vif; 1277{ if (vif>=0 && vif<=numvifs) 1278 return(1); 1279 else 1280 return(0); 1281} 1282 1283#ifndef MROUTE_LKM 1284int (*legal_vif_num)(int) = X_legal_vif_num; 1285#endif 1286 1287static void 1288phyint_send(ip, vifp, m) 1289 struct ip *ip; 1290 struct vif *vifp; 1291 struct mbuf *m; 1292{ 1293 register struct mbuf *mb_copy; 1294 register struct ip_moptions *imo; 1295 1296 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1297 return; 1298 1299 MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); 1300 if (imo == NULL) { 1301 m_freem(mb_copy); 1302 return; 1303 } 1304 1305 imo->imo_multicast_ifp = vifp->v_ifp; 1306 imo->imo_multicast_ttl = ip->ip_ttl - 1; 1307 imo->imo_multicast_loop = 1; 1308 1309 if (vifp->v_rate_limit <= 0) 1310 tbf_send_packet(vifp, mb_copy, imo); 1311 else 1312 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, 1313 imo); 1314} 1315 1316static void 1317srcrt_send(ip, vifp, m) 1318 struct ip *ip; 1319 struct vif *vifp; 1320 struct mbuf *m; 1321{ 1322 struct mbuf *mb_copy, *mb_opts; 1323 register struct ip *ip_copy; 1324 u_char *cp; 1325 1326 /* 1327 * Make sure that adding the tunnel options won't exceed the 1328 * maximum allowed number of option bytes. 1329 */ 1330 if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { 1331 mrtstat.mrts_cant_tunnel++; 1332 if (mrtdebug) 1333 log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u", 1334 ntohl(ip->ip_src.s_addr)); 1335 return; 1336 } 1337 1338 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1339 return; 1340 1341 ip_copy = mtod(mb_copy, struct ip *); 1342 ip_copy->ip_ttl--; 1343 ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ 1344 /* 1345 * Adjust the ip header length to account for the tunnel options. 1346 */ 1347 ip_copy->ip_hl += TUNNEL_LEN >> 2; 1348 ip_copy->ip_len += TUNNEL_LEN; 1349 MGET(mb_opts, M_DONTWAIT, MT_HEADER); 1350 if (mb_opts == NULL) { 1351 m_freem(mb_copy); 1352 return; 1353 } 1354 /* 1355 * 'Delete' the base ip header from the mb_copy chain 1356 */ 1357 mb_copy->m_len -= IP_HDR_LEN; 1358 mb_copy->m_data += IP_HDR_LEN; 1359 /* 1360 * Make mb_opts be the new head of the packet chain. 1361 * Any options of the packet were left in the old packet chain head 1362 */ 1363 mb_opts->m_next = mb_copy; 1364 mb_opts->m_data += 16; 1365 mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN; 1366 /* 1367 * Copy the base ip header from the mb_copy chain to the new head mbuf 1368 */ 1369 bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), IP_HDR_LEN); 1370 /* 1371 * Add the NOP and LSRR after the base ip header 1372 */ 1373 cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; 1374 *cp++ = IPOPT_NOP; 1375 *cp++ = IPOPT_LSRR; 1376 *cp++ = 11; /* LSRR option length */ 1377 *cp++ = 8; /* LSSR pointer to second element */ 1378 *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ 1379 cp += 4; 1380 *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ 1381 1382 if (vifp->v_rate_limit <= 0) 1383 tbf_send_packet(vifp, mb_opts, 0); 1384 else 1385 tbf_control(vifp, mb_opts, 1386 mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); 1387} 1388 1389static void 1390encap_send(ip, vifp, m) 1391 register struct ip *ip; 1392 register struct vif *vifp; 1393 register struct mbuf *m; 1394{ 1395 register struct mbuf *mb_copy; 1396 register struct ip *ip_copy; 1397 register int i, len = ip->ip_len; 1398 1399 /* 1400 * copy the old packet & pullup it's IP header into the 1401 * new mbuf so we can modify it. Try to fill the new 1402 * mbuf since if we don't the ethernet driver will. 1403 */ 1404 MGET(mb_copy, M_DONTWAIT, MT_DATA); 1405 if (mb_copy == NULL) 1406 return; 1407 mb_copy->m_data += 16; 1408 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1409 1410 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 1411 m_freem(mb_copy); 1412 return; 1413 } 1414 i = MHLEN - M_LEADINGSPACE(mb_copy); 1415 if (i > len) 1416 i = len; 1417 mb_copy = m_pullup(mb_copy, i); 1418 if (mb_copy == NULL) 1419 return; 1420 1421 /* 1422 * fill in the encapsulating IP header. 1423 */ 1424 ip_copy = mtod(mb_copy, struct ip *); 1425 *ip_copy = multicast_encap_iphdr; 1426 ip_copy->ip_id = htons(ip_id++); 1427 ip_copy->ip_len += len; 1428 ip_copy->ip_src = vifp->v_lcl_addr; 1429 ip_copy->ip_dst = vifp->v_rmt_addr; 1430 1431 /* 1432 * turn the encapsulated IP header back into a valid one. 1433 */ 1434 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1435 --ip->ip_ttl; 1436 HTONS(ip->ip_len); 1437 HTONS(ip->ip_off); 1438 ip->ip_sum = 0; 1439#if defined(LBL) && !defined(ultrix) 1440 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); 1441#else 1442 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1443 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1444 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1445#endif 1446 1447 if (vifp->v_rate_limit <= 0) 1448 tbf_send_packet(vifp, mb_copy, 0); 1449 else 1450 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); 1451} 1452 1453/* 1454 * De-encapsulate a packet and feed it back through ip input (this 1455 * routine is called whenever IP gets a packet with proto type 1456 * ENCAP_PROTO and a local destination address). 1457 */ 1458void 1459#ifdef MROUTE_LKM 1460X_multiencap_decap(m) 1461#else 1462multiencap_decap(m) 1463#endif 1464 register struct mbuf *m; 1465{ 1466 struct ifnet *ifp = m->m_pkthdr.rcvif; 1467 register struct ip *ip = mtod(m, struct ip *); 1468 register int hlen = ip->ip_hl << 2; 1469 register int s; 1470 register struct ifqueue *ifq; 1471 register struct vif *vifp; 1472 1473 if (ip->ip_p != ENCAP_PROTO) { 1474 rip_input(m); 1475 return; 1476 } 1477 /* 1478 * dump the packet if it's not to a multicast destination or if 1479 * we don't have an encapsulating tunnel with the source. 1480 * Note: This code assumes that the remote site IP address 1481 * uniquely identifies the tunnel (i.e., that this site has 1482 * at most one tunnel with the remote site). 1483 */ 1484 if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { 1485 ++mrtstat.mrts_bad_tunnel; 1486 m_freem(m); 1487 return; 1488 } 1489 if (ip->ip_src.s_addr != last_encap_src) { 1490 register struct vif *vife; 1491 1492 vifp = viftable; 1493 vife = vifp + numvifs; 1494 last_encap_src = ip->ip_src.s_addr; 1495 last_encap_vif = 0; 1496 for ( ; vifp < vife; ++vifp) 1497 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1498 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1499 == VIFF_TUNNEL) 1500 last_encap_vif = vifp; 1501 break; 1502 } 1503 } 1504 if ((vifp = last_encap_vif) == 0) { 1505 last_encap_src = 0; 1506 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1507 m_freem(m); 1508 if (mrtdebug) 1509 log(LOG_DEBUG, "ip_mforward: no tunnel with %u", 1510 ntohl(ip->ip_src.s_addr)); 1511 return; 1512 } 1513 ifp = vifp->v_ifp; 1514 hlen -= sizeof(struct ifnet *); 1515 m->m_data += hlen; 1516 m->m_len -= hlen; 1517 *(mtod(m, struct ifnet **)) = ifp; 1518 ifq = &ipintrq; 1519 s = splimp(); 1520 if (IF_QFULL(ifq)) { 1521 IF_DROP(ifq); 1522 m_freem(m); 1523 } else { 1524 IF_ENQUEUE(ifq, m); 1525 /* 1526 * normally we would need a "schednetisr(NETISR_IP)" 1527 * here but we were called by ip_input and it is going 1528 * to loop back & try to dequeue the packet we just 1529 * queued as soon as we return so we avoid the 1530 * unnecessary software interrrupt. 1531 */ 1532 } 1533 splx(s); 1534} 1535 1536/* 1537 * Token bucket filter module 1538 */ 1539void 1540tbf_control(vifp, m, ip, p_len, imo) 1541 register struct vif *vifp; 1542 register struct mbuf *m; 1543 register struct ip *ip; 1544 register u_long p_len; 1545 struct ip_moptions *imo; 1546{ 1547 tbf_update_tokens(vifp); 1548 1549 /* if there are enough tokens, 1550 * and the queue is empty, 1551 * send this packet out 1552 */ 1553 1554 if (vifp->v_tbf->q_len == 0) { 1555 if (p_len <= vifp->v_tbf->n_tok) { 1556 vifp->v_tbf->n_tok -= p_len; 1557 tbf_send_packet(vifp, m, imo); 1558 } else if (p_len > MAX_BKT_SIZE) { 1559 /* drop if packet is too large */ 1560 mrtstat.mrts_pkt2large++; 1561 m_freem(m); 1562 return; 1563 } else { 1564 /* queue packet and timeout till later */ 1565 tbf_queue(vifp, m, ip, imo); 1566 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1567 } 1568 } else if (vifp->v_tbf->q_len < MAXQSIZE) { 1569 /* finite queue length, so queue pkts and process queue */ 1570 tbf_queue(vifp, m, ip, imo); 1571 tbf_process_q(vifp); 1572 } else { 1573 /* queue length too much, try to dq and queue and process */ 1574 if (!tbf_dq_sel(vifp, ip)) { 1575 mrtstat.mrts_q_overflow++; 1576 m_freem(m); 1577 return; 1578 } else { 1579 tbf_queue(vifp, m, ip, imo); 1580 tbf_process_q(vifp); 1581 } 1582 } 1583 return; 1584} 1585 1586/* 1587 * adds a packet to the queue at the interface 1588 */ 1589void 1590tbf_queue(vifp, m, ip, imo) 1591 register struct vif *vifp; 1592 register struct mbuf *m; 1593 register struct ip *ip; 1594 struct ip_moptions *imo; 1595{ 1596 register u_long ql; 1597 register int index = (vifp - viftable); 1598 register int s = splnet(); 1599 1600 ql = vifp->v_tbf->q_len; 1601 1602 qtable[index][ql].pkt_m = m; 1603 qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; 1604 qtable[index][ql].pkt_ip = ip; 1605 qtable[index][ql].pkt_imo = imo; 1606 1607 vifp->v_tbf->q_len++; 1608 splx(s); 1609} 1610 1611 1612/* 1613 * processes the queue at the interface 1614 */ 1615void 1616tbf_process_q(vifp) 1617 register struct vif *vifp; 1618{ 1619 register struct pkt_queue pkt_1; 1620 register int index = (vifp - viftable); 1621 register int s = splnet(); 1622 1623 /* loop through the queue at the interface and send as many packets 1624 * as possible 1625 */ 1626 while (vifp->v_tbf->q_len > 0) { 1627 /* locate the first packet */ 1628 pkt_1.pkt_len = ((qtable[index][0]).pkt_len); 1629 pkt_1.pkt_m = (qtable[index][0]).pkt_m; 1630 pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; 1631 pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; 1632 1633 /* determine if the packet can be sent */ 1634 if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { 1635 /* if so, 1636 * reduce no of tokens, dequeue the queue, 1637 * send the packet. 1638 */ 1639 vifp->v_tbf->n_tok -= pkt_1.pkt_len; 1640 1641 tbf_dequeue(vifp, 0); 1642 1643 tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); 1644 1645 } else break; 1646 } 1647 splx(s); 1648} 1649 1650/* 1651 * removes the jth packet from the queue at the interface 1652 */ 1653void 1654tbf_dequeue(vifp,j) 1655 register struct vif *vifp; 1656 register int j; 1657{ 1658 register u_long index = vifp - viftable; 1659 register int i; 1660 1661 for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { 1662 qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; 1663 qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; 1664 qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; 1665 qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; 1666 } 1667 qtable[index][i-1].pkt_m = NULL; 1668 qtable[index][i-1].pkt_len = NULL; 1669 qtable[index][i-1].pkt_ip = NULL; 1670 qtable[index][i-1].pkt_imo = NULL; 1671 1672 vifp->v_tbf->q_len--; 1673 1674 if (tbfdebug > 1) 1675 log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1); 1676} 1677 1678void 1679tbf_reprocess_q(xvifp) 1680 void *xvifp; 1681{ 1682 register struct vif *vifp = xvifp; 1683 if (ip_mrouter == NULL) 1684 return; 1685 1686 tbf_update_tokens(vifp); 1687 1688 tbf_process_q(vifp); 1689 1690 if (vifp->v_tbf->q_len) 1691 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1692} 1693 1694/* function that will selectively discard a member of the queue 1695 * based on the precedence value and the priority obtained through 1696 * a lookup table - not yet implemented accurately! 1697 */ 1698int 1699tbf_dq_sel(vifp, ip) 1700 register struct vif *vifp; 1701 register struct ip *ip; 1702{ 1703 register int i; 1704 register int s = splnet(); 1705 register u_int p; 1706 1707 p = priority(vifp, ip); 1708 1709 for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { 1710 if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { 1711 m_freem(qtable[vifp-viftable][i].pkt_m); 1712 tbf_dequeue(vifp,i); 1713 splx(s); 1714 mrtstat.mrts_drop_sel++; 1715 return(1); 1716 } 1717 } 1718 splx(s); 1719 return(0); 1720} 1721 1722void 1723tbf_send_packet(vifp, m, imo) 1724 register struct vif *vifp; 1725 register struct mbuf *m; 1726 struct ip_moptions *imo; 1727{ 1728 int error; 1729 int s = splnet(); 1730 1731 /* if source route tunnels */ 1732 if (vifp->v_flags & VIFF_SRCRT) { 1733 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1734 IP_FORWARDING, imo); 1735 if (mrtdebug > 1) 1736 log(LOG_DEBUG, "srcrt_send on vif %d err %d", vifp-viftable, error); 1737 } else if (vifp->v_flags & VIFF_TUNNEL) { 1738 /* If tunnel options */ 1739 ip_output(m, (struct mbuf *)0, (struct route *)0, 1740 IP_FORWARDING, imo); 1741 } else { 1742 /* if physical interface option, extract the options and then send */ 1743 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1744 IP_FORWARDING, imo); 1745 FREE(imo, M_IPMOPTS); 1746 1747 if (mrtdebug > 1) 1748 log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error); 1749 } 1750 splx(s); 1751} 1752 1753/* determine the current time and then 1754 * the elapsed time (between the last time and time now) 1755 * in milliseconds & update the no. of tokens in the bucket 1756 */ 1757void 1758tbf_update_tokens(vifp) 1759 register struct vif *vifp; 1760{ 1761 struct timeval tp; 1762 register u_long t; 1763 register u_long elapsed; 1764 register int s = splnet(); 1765 1766 GET_TIME(tp); 1767 1768 t = tp.tv_sec*1000 + tp.tv_usec/1000; 1769 1770 elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; 1771 vifp->v_tbf->n_tok += elapsed; 1772 vifp->v_tbf->last_pkt_t = t; 1773 1774 if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) 1775 vifp->v_tbf->n_tok = MAX_BKT_SIZE; 1776 1777 splx(s); 1778} 1779 1780static int 1781priority(vifp, ip) 1782 register struct vif *vifp; 1783 register struct ip *ip; 1784{ 1785 register u_long graddr; 1786 register int prio; 1787 1788 /* temporary hack; will add general packet classifier some day */ 1789 1790 prio = 50; /* default priority */ 1791 1792 /* check for source route options and add option length to get dst */ 1793 if (vifp->v_flags & VIFF_SRCRT) 1794 graddr = ntohl((ip+8)->ip_dst.s_addr); 1795 else 1796 graddr = ntohl(ip->ip_dst.s_addr); 1797 1798 switch (graddr & 0xf) { 1799 case 0x0: break; 1800 case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ 1801 break; 1802 case 0x2: break; 1803 case 0x3: break; 1804 case 0x4: break; 1805 case 0x5: break; 1806 case 0x6: break; 1807 case 0x7: break; 1808 case 0x8: break; 1809 case 0x9: break; 1810 case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ 1811 break; 1812 case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ 1813 break; 1814 case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ 1815 break; 1816 case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ 1817 break; 1818 case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ 1819 break; 1820 case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ 1821 break; 1822 } 1823 1824 if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d", graddr, prio); 1825 1826 return prio; 1827} 1828 1829/* 1830 * End of token bucket filter modifications 1831 */ 1832 1833#ifdef MROUTE_LKM 1834#include <sys/conf.h> 1835#include <sys/exec.h> 1836#include <sys/sysent.h> 1837#include <sys/lkm.h> 1838 1839MOD_MISC("ip_mroute_mod") 1840 1841static int 1842ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) 1843{ 1844 int i; 1845 struct lkm_misc *args = lkmtp->private.lkm_misc; 1846 int err = 0; 1847 1848 switch(cmd) { 1849 static int (*old_ip_mrouter_cmd)(); 1850 static int (*old_ip_mrouter_done)(); 1851 static int (*old_ip_mforward)(); 1852 static int (*old_mrt_ioctl)(); 1853 static int (*old_proto4_input)(); 1854 static int (*old_legal_vif_num)(); 1855 extern u_char ip_protox[]; 1856 extern struct protosw inetsw[]; 1857 1858 case LKM_E_LOAD: 1859 if(lkmexists(lkmtp) || ip_mrtproto) 1860 return(EEXIST); 1861 old_ip_mrouter_cmd = ip_mrouter_cmd; 1862 ip_mrouter_cmd = X_ip_mrouter_cmd; 1863 old_ip_mrouter_done = ip_mrouter_done; 1864 ip_mrouter_done = X_ip_mrouter_done; 1865 old_ip_mforward = ip_mforward; 1866 ip_mforward = X_ip_mforward; 1867 old_mrt_ioctl = mrt_ioctl; 1868 mrt_ioctl = X_mrt_ioctl; 1869 old_proto4_input = inetsw[ip_protox[IPPROTO_ENCAP]].pr_input; 1870 inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = X_multiencap_decap; 1871 old_legal_vif_num = legal_vif_num; 1872 legal_vif_num = X_legal_vif_num; 1873 ip_mrtproto = IGMP_DVMRP; 1874 1875 printf("\nIP multicast routing loaded\n"); 1876 break; 1877 1878 case LKM_E_UNLOAD: 1879 if (ip_mrouter) 1880 return EINVAL; 1881 1882 ip_mrouter_cmd = old_ip_mrouter_cmd; 1883 ip_mrouter_done = old_ip_mrouter_done; 1884 ip_mforward = old_ip_mforward; 1885 mrt_ioctl = old_mrt_ioctl; 1886 inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = old_proto4_input; 1887 legal_vif_num = old_legal_vif_num; 1888 ip_mrtproto = 0; 1889 break; 1890 1891 default: 1892 err = EINVAL; 1893 break; 1894 } 1895 1896 return(err); 1897} 1898 1899int 1900ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { 1901 DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, 1902 nosys); 1903} 1904 1905#endif /* MROUTE_LKM */ 1906#endif /* MROUTING */ 1907 1908 1909