ip_mroute.c revision 2763
1/* 2 * IP multicast forwarding procedures 3 * 4 * Written by David Waitzman, BBN Labs, August 1988. 5 * Modified by Steve Deering, Stanford, February 1989. 6 * Modified by Mark J. Steiglitz, Stanford, May, 1991 7 * Modified by Van Jacobson, LBL, January 1993 8 * Modified by Ajit Thyagarajan, PARC, August 1993 9 * 10 * MROUTING 1.8 11 */ 12 13 14#include <sys/param.h> 15#include <sys/systm.h> 16#include <sys/mbuf.h> 17#include <sys/socket.h> 18#include <sys/socketvar.h> 19#include <sys/protosw.h> 20#include <sys/errno.h> 21#include <sys/time.h> 22#include <sys/ioctl.h> 23#include <sys/syslog.h> 24#include <net/if.h> 25#include <net/route.h> 26#include <net/raw_cb.h> 27#include <netinet/in.h> 28#include <netinet/in_systm.h> 29#include <netinet/ip.h> 30#include <netinet/ip_var.h> 31#include <netinet/in_pcb.h> 32#include <netinet/in_var.h> 33#include <netinet/igmp.h> 34#include <netinet/igmp_var.h> 35#include <netinet/ip_mroute.h> 36 37#ifndef NTOHL 38#if BYTE_ORDER != BIG_ENDIAN 39#define NTOHL(d) ((d) = ntohl((d))) 40#define NTOHS(d) ((d) = ntohs((u_short)(d))) 41#define HTONL(d) ((d) = htonl((d))) 42#define HTONS(d) ((d) = htons((u_short)(d))) 43#else 44#define NTOHL(d) 45#define NTOHS(d) 46#define HTONL(d) 47#define HTONS(d) 48#endif 49#endif 50 51#ifndef MROUTING 52/* 53 * Dummy routines and globals used when multicast routing is not compiled in. 54 */ 55 56u_int ip_mrtproto = 0; 57struct socket *ip_mrouter = NULL; 58struct mrtstat mrtstat; 59 60 61int 62_ip_mrouter_cmd(cmd, so, m) 63 int cmd; 64 struct socket *so; 65 struct mbuf *m; 66{ 67 return(EOPNOTSUPP); 68} 69 70int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd; 71 72int 73_ip_mrouter_done() 74{ 75 return(0); 76} 77 78int (*ip_mrouter_done)(void) = _ip_mrouter_done; 79 80int 81_ip_mforward(ip, ifp, m, imo) 82 struct ip *ip; 83 struct ifnet *ifp; 84 struct mbuf *m; 85 struct ip_moptions *imo; 86{ 87 return(0); 88} 89 90int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 91 struct ip_moptions *) = _ip_mforward; 92 93int 94_mrt_ioctl(int req, caddr_t data, struct proc *p) 95{ 96 return EOPNOTSUPP; 97} 98 99int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; 100 101void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */ 102 rip_input(m); 103} 104 105int (*legal_vif_num)(int) = 0; 106 107#else 108 109#define INSIZ sizeof(struct in_addr) 110#define same(a1, a2) \ 111 (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) 112 113#define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ 114 115/* 116 * Globals. All but ip_mrouter and ip_mrtproto could be static, 117 * except for netstat or debugging purposes. 118 */ 119#ifndef MROUTE_LKM 120struct socket *ip_mrouter = NULL; 121struct mrtstat mrtstat; 122 123int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 124#else 125extern struct mrtstat mrtstat; 126extern int ip_mrtproto; 127#endif 128 129#define NO_RTE_FOUND 0x1 130#define RTE_FOUND 0x2 131 132struct mbuf *mfctable[MFCTBLSIZ]; 133struct vif viftable[MAXVIFS]; 134u_int mrtdebug = 0; /* debug level */ 135u_int tbfdebug = 0; /* tbf debug level */ 136 137u_long timeout_val = 0; /* count of outstanding upcalls */ 138 139/* 140 * Define the token bucket filter structures 141 * tbftable -> each vif has one of these for storing info 142 * qtable -> each interface has an associated queue of pkts 143 */ 144 145struct tbf tbftable[MAXVIFS]; 146struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; 147 148/* 149 * 'Interfaces' associated with decapsulator (so we can tell 150 * packets that went through it from ones that get reflected 151 * by a broken gateway). These interfaces are never linked into 152 * the system ifnet list & no routes point to them. I.e., packets 153 * can't be sent this way. They only exist as a placeholder for 154 * multicast source verification. 155 */ 156struct ifnet multicast_decap_if[MAXVIFS]; 157 158#define ENCAP_TTL 64 159#define ENCAP_PROTO 4 160 161/* prototype IP hdr for encapsulated packets */ 162struct ip multicast_encap_iphdr = { 163#if BYTE_ORDER == LITTLE_ENDIAN 164 sizeof(struct ip) >> 2, IPVERSION, 165#else 166 IPVERSION, sizeof(struct ip) >> 2, 167#endif 168 0, /* tos */ 169 sizeof(struct ip), /* total length */ 170 0, /* id */ 171 0, /* frag offset */ 172 ENCAP_TTL, ENCAP_PROTO, 173 0, /* checksum */ 174}; 175 176/* 177 * Private variables. 178 */ 179static vifi_t numvifs = 0; 180 181/* 182 * one-back cache used by multiencap_decap to locate a tunnel's vif 183 * given a datagram's src ip address. 184 */ 185static u_long last_encap_src; 186static struct vif *last_encap_vif; 187 188static u_long nethash_fc(u_long, u_long); 189static struct mfc *mfcfind(u_long, u_long); 190int get_sg_cnt(struct sioc_sg_req *); 191int get_vif_cnt(struct sioc_vif_req *); 192int get_vifs(caddr_t); 193static int add_vif(struct vifctl *); 194static int del_vif(vifi_t *); 195static int add_mfc(struct mfcctl *); 196static int del_mfc(struct delmfcctl *); 197static void cleanup_cache(void *); 198static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, 199 struct ip_moptions *); 200extern int (*legal_vif_num)(int); 201static void phyint_send(struct ip *, struct vif *, struct mbuf *); 202static void srcrt_send(struct ip *, struct vif *, struct mbuf *); 203static void encap_send(struct ip *, struct vif *, struct mbuf *); 204void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, 205 struct ip_moptions *); 206void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); 207void tbf_process_q(struct vif *); 208void tbf_dequeue(struct vif *, int); 209void tbf_reprocess_q(void *); 210int tbf_dq_sel(struct vif *, struct ip *); 211void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); 212void tbf_update_tokens(struct vif *); 213static int priority(struct vif *, struct ip *); 214static int ip_mrouter_init(struct socket *); 215 216/* 217 * A simple hash function: returns MFCHASHMOD of the low-order octet of 218 * the argument's network or subnet number and the multicast group assoc. 219 */ 220static u_long 221nethash_fc(m,n) 222 register u_long m; 223 register u_long n; 224{ 225 struct in_addr in1; 226 struct in_addr in2; 227 228 in1.s_addr = m; 229 m = in_netof(in1); 230 while ((m & 0xff) == 0) m >>= 8; 231 232 in2.s_addr = n; 233 n = in_netof(in2); 234 while ((n & 0xff) == 0) n >>= 8; 235 236 return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); 237} 238 239/* 240 * this is a direct-mapped cache used to speed the mapping from a 241 * datagram source address to the associated multicast route. Note 242 * that unlike mrttable, the hash is on IP address, not IP net number. 243 */ 244#define MFCHASHSIZ 1024 245#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 246 ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) 247struct mfc *mfchash[MFCHASHSIZ]; 248 249/* 250 * Find a route for a given origin IP address and Multicast group address 251 * Type of service parameter to be added in the future!!! 252 */ 253#define MFCFIND(o, g, rt) { \ 254 register u_int _mrhasho = o; \ 255 register u_int _mrhashg = g; \ 256 _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ 257 ++mrtstat.mrts_mfc_lookups; \ 258 rt = mfchash[_mrhasho]; \ 259 if ((rt == NULL) || \ 260 ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ 261 (g != rt->mfc_mcastgrp.s_addr)) \ 262 if ((rt = mfcfind(o, g)) != NULL) \ 263 mfchash[_mrhasho] = rt; \ 264} 265 266/* 267 * Find route by examining hash table entries 268 */ 269static struct mfc * 270mfcfind(origin, mcastgrp) 271 u_long origin; 272 u_long mcastgrp; 273{ 274 register struct mbuf *mb_rt; 275 register struct mfc *rt; 276 register u_long hash; 277 278 hash = nethash_fc(origin, mcastgrp); 279 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 280 rt = mtod(mb_rt, struct mfc *); 281 if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && 282 (mcastgrp == rt->mfc_mcastgrp.s_addr) && 283 (mb_rt->m_act == NULL)) 284 return (rt); 285 } 286 mrtstat.mrts_mfc_misses++; 287 return NULL; 288} 289 290/* 291 * Macros to compute elapsed time efficiently 292 * Borrowed from Van Jacobson's scheduling code 293 */ 294#define TV_DELTA(a, b, delta) { \ 295 register int xxs; \ 296 \ 297 delta = (a).tv_usec - (b).tv_usec; \ 298 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 299 switch (xxs) { \ 300 case 2: \ 301 delta += 1000000; \ 302 /* fall through */ \ 303 case 1: \ 304 delta += 1000000; \ 305 break; \ 306 default: \ 307 delta += (1000000 * xxs); \ 308 } \ 309 } \ 310} 311 312#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 313 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 314 315/* 316 * Handle DVMRP setsockopt commands to modify the multicast routing tables. 317 */ 318int 319X_ip_mrouter_cmd(cmd, so, m) 320 int cmd; 321 struct socket *so; 322 struct mbuf *m; 323{ 324 if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; 325 326 switch (cmd) { 327 case DVMRP_INIT: return ip_mrouter_init(so); 328 case DVMRP_DONE: return ip_mrouter_done(); 329 case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); 330 case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); 331 case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); 332 case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); 333 default: return EOPNOTSUPP; 334 } 335} 336 337#ifndef MROUTE_LKM 338int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd; 339#endif 340 341/* 342 * Handle ioctl commands to obtain information from the cache 343 */ 344int 345X_mrt_ioctl(cmd, data) 346 int cmd; 347 caddr_t data; 348{ 349 int error = 0; 350 351 switch (cmd) { 352 case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ 353 return (get_vifs(data)); 354 break; 355 case (SIOCGETVIFCNT): 356 return (get_vif_cnt((struct sioc_vif_req *)data)); 357 break; 358 case (SIOCGETSGCNT): 359 return (get_sg_cnt((struct sioc_sg_req *)data)); 360 break; 361 default: 362 return (EINVAL); 363 break; 364 } 365 return error; 366} 367 368#ifndef MROUTE_LKM 369int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl; 370#else 371extern int (*mrt_ioctl)(int, caddr_t, struct proc *); 372#endif 373 374/* 375 * returns the packet count for the source group provided 376 */ 377int 378get_sg_cnt(req) 379 register struct sioc_sg_req *req; 380{ 381 register struct mfc *rt; 382 int s; 383 384 s = splnet(); 385 MFCFIND(req->src.s_addr, req->grp.s_addr, rt); 386 splx(s); 387 if (rt != NULL) 388 req->count = rt->mfc_pkt_cnt; 389 else 390 req->count = 0xffffffff; 391 392 return 0; 393} 394 395/* 396 * returns the input and output packet counts on the interface provided 397 */ 398int 399get_vif_cnt(req) 400 register struct sioc_vif_req *req; 401{ 402 register vifi_t vifi = req->vifi; 403 404 req->icount = viftable[vifi].v_pkt_in; 405 req->ocount = viftable[vifi].v_pkt_out; 406 407 return 0; 408} 409 410int 411get_vifs(data) 412 char *data; 413{ 414 struct vif_conf *vifc = (struct vif_conf *)data; 415 struct vif_req *vifrp, vifr; 416 int space, error=0; 417 418 vifi_t vifi; 419 int s; 420 421 space = vifc->vifc_len; 422 vifrp = vifc->vifc_req; 423 424 s = splnet(); 425 vifc->vifc_num=numvifs; 426 427 for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { 428 if (viftable[vifi].v_lcl_addr.s_addr != 0) { 429 vifr.v_flags=viftable[vifi].v_flags; 430 vifr.v_threshold=viftable[vifi].v_threshold; 431 vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; 432 vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; 433 strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); 434 if ((space -= sizeof(vifr)) < 0) { 435 splx(s); 436 return(ENOSPC); 437 } 438 error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); 439 if (error) { 440 splx(s); 441 return(error); 442 } 443 } 444 } 445 splx(s); 446 return 0; 447} 448/* 449 * Enable multicast routing 450 */ 451static int 452ip_mrouter_init(so) 453 struct socket *so; 454{ 455 if (so->so_type != SOCK_RAW || 456 so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; 457 458 if (ip_mrouter != NULL) return EADDRINUSE; 459 460 ip_mrouter = so; 461 462 if (mrtdebug) 463 log(LOG_DEBUG, "ip_mrouter_init"); 464 465 return 0; 466} 467 468/* 469 * Disable multicast routing 470 */ 471int 472X_ip_mrouter_done() 473{ 474 vifi_t vifi; 475 int i; 476 struct ifnet *ifp; 477 struct ifreq ifr; 478 struct mbuf *mb_rt; 479 struct mbuf *m; 480 struct rtdetq *rte; 481 int s; 482 483 s = splnet(); 484 485 /* 486 * For each phyint in use, disable promiscuous reception of all IP 487 * multicasts. 488 */ 489 for (vifi = 0; vifi < numvifs; vifi++) { 490 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 491 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 492 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 493 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr 494 = INADDR_ANY; 495 ifp = viftable[vifi].v_ifp; 496 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 497 } 498 } 499 bzero((caddr_t)qtable, sizeof(qtable)); 500 bzero((caddr_t)tbftable, sizeof(tbftable)); 501 bzero((caddr_t)viftable, sizeof(viftable)); 502 numvifs = 0; 503 504 /* 505 * Check if any outstanding timeouts remain 506 */ 507 if (timeout_val != 0) 508 for (i = 0; i < MFCTBLSIZ; i++) { 509 mb_rt = mfctable[i]; 510 while (mb_rt) { 511 if ( mb_rt->m_act != NULL) { 512 untimeout(cleanup_cache, (caddr_t)mb_rt); 513 while (m = mb_rt->m_act) { 514 mb_rt->m_act = m->m_act; 515 rte = mtod(m, struct rtdetq *); 516 m_freem(rte->m); 517 m_free(m); 518 } 519 timeout_val--; 520 } 521 mb_rt = mb_rt->m_next; 522 } 523 if (timeout_val == 0) 524 break; 525 } 526 527 /* 528 * Free all multicast forwarding cache entries. 529 */ 530 for (i = 0; i < MFCTBLSIZ; i++) 531 m_freem(mfctable[i]); 532 533 bzero((caddr_t)mfctable, sizeof(mfctable)); 534 bzero((caddr_t)mfchash, sizeof(mfchash)); 535 536 /* 537 * Reset de-encapsulation cache 538 */ 539 last_encap_src = NULL; 540 last_encap_vif = NULL; 541 542 ip_mrouter = NULL; 543 544 splx(s); 545 546 if (mrtdebug) 547 log(LOG_DEBUG, "ip_mrouter_done"); 548 549 return 0; 550} 551 552#ifndef MROUTE_LKM 553int (*ip_mrouter_done)(void) = X_ip_mrouter_done; 554#endif 555 556/* 557 * Add a vif to the vif table 558 */ 559static int 560add_vif(vifcp) 561 register struct vifctl *vifcp; 562{ 563 register struct vif *vifp = viftable + vifcp->vifc_vifi; 564 static struct sockaddr_in sin = {AF_INET}; 565 struct ifaddr *ifa; 566 struct ifnet *ifp; 567 struct ifreq ifr; 568 int error, s; 569 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 570 571 if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; 572 if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; 573 574 /* Find the interface with an address in AF_INET family */ 575 sin.sin_addr = vifcp->vifc_lcl_addr; 576 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 577 if (ifa == 0) return EADDRNOTAVAIL; 578 ifp = ifa->ifa_ifp; 579 580 if (vifcp->vifc_flags & VIFF_TUNNEL) { 581 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 582 static int inited = 0; 583 if(!inited) { 584 for (s = 0; s < MAXVIFS; ++s) { 585 multicast_decap_if[s].if_name = "mdecap"; 586 multicast_decap_if[s].if_unit = s; 587 } 588 inited = 1; 589 } 590 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 591 } else { 592 ifp = 0; 593 } 594 } else { 595 /* Make sure the interface supports multicast */ 596 if ((ifp->if_flags & IFF_MULTICAST) == 0) 597 return EOPNOTSUPP; 598 599 /* Enable promiscuous reception of all IP multicasts from the if */ 600 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 601 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 602 s = splnet(); 603 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 604 splx(s); 605 if (error) 606 return error; 607 } 608 609 s = splnet(); 610 /* define parameters for the tbf structure */ 611 vifp->v_tbf = v_tbf; 612 vifp->v_tbf->q_len = 0; 613 vifp->v_tbf->n_tok = 0; 614 vifp->v_tbf->last_pkt_t = 0; 615 616 vifp->v_flags = vifcp->vifc_flags; 617 vifp->v_threshold = vifcp->vifc_threshold; 618 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 619 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 620 vifp->v_ifp = ifp; 621 vifp->v_rate_limit= vifcp->vifc_rate_limit; 622 /* initialize per vif pkt counters */ 623 vifp->v_pkt_in = 0; 624 vifp->v_pkt_out = 0; 625 splx(s); 626 627 /* Adjust numvifs up if the vifi is higher than numvifs */ 628 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 629 630 if (mrtdebug) 631 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d", 632 vifcp->vifc_vifi, 633 ntohl(vifcp->vifc_lcl_addr.s_addr), 634 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 635 ntohl(vifcp->vifc_rmt_addr.s_addr), 636 vifcp->vifc_threshold, 637 vifcp->vifc_rate_limit); 638 639 return 0; 640} 641 642/* 643 * Delete a vif from the vif table 644 */ 645static int 646del_vif(vifip) 647 vifi_t *vifip; 648{ 649 register struct vif *vifp = viftable + *vifip; 650 register vifi_t vifi; 651 struct ifnet *ifp; 652 struct ifreq ifr; 653 int s; 654 655 if (*vifip >= numvifs) return EINVAL; 656 if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; 657 658 s = splnet(); 659 660 if (!(vifp->v_flags & VIFF_TUNNEL)) { 661 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 662 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 663 ifp = vifp->v_ifp; 664 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 665 } 666 667 if (vifp == last_encap_vif) { 668 last_encap_vif = 0; 669 last_encap_src = 0; 670 } 671 672 bzero((caddr_t)qtable[*vifip], 673 sizeof(qtable[*vifip])); 674 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 675 bzero((caddr_t)vifp, sizeof (*vifp)); 676 677 /* Adjust numvifs down */ 678 for (vifi = numvifs; vifi > 0; vifi--) 679 if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; 680 numvifs = vifi; 681 682 splx(s); 683 684 if (mrtdebug) 685 log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs); 686 687 return 0; 688} 689 690/* 691 * Add an mfc entry 692 */ 693static int 694add_mfc(mfccp) 695 struct mfcctl *mfccp; 696{ 697 struct mfc *rt; 698 struct mfc *rt1; 699 register struct mbuf *mb_rt; 700 struct mbuf *prev_mb_rt; 701 u_long hash; 702 struct mbuf *mb_ntry; 703 struct rtdetq *rte; 704 register u_short nstl; 705 int s; 706 int i; 707 708 rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 709 710 /* If an entry already exists, just update the fields */ 711 if (rt) { 712 if (mrtdebug) 713 log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x", 714 ntohl(mfccp->mfcc_origin.s_addr), 715 ntohl(mfccp->mfcc_mcastgrp.s_addr), 716 ntohl(mfccp->mfcc_originmask.s_addr), 717 mfccp->mfcc_parent); 718 719 s = splnet(); 720 rt->mfc_parent = mfccp->mfcc_parent; 721 for (i = 0; i < numvifs; i++) 722 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 723 splx(s); 724 return 0; 725 } 726 727 /* 728 * Find the entry for which the upcall was made and update 729 */ 730 s = splnet(); 731 hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 732 for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; 733 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 734 735 rt = mtod(mb_rt, struct mfc *); 736 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 737 == mfccp->mfcc_origin.s_addr) && 738 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 739 (mb_rt->m_act != NULL)) { 740 741 if (!nstl++) { 742 if (mrtdebug) 743 log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x", 744 ntohl(mfccp->mfcc_origin.s_addr), 745 ntohl(mfccp->mfcc_mcastgrp.s_addr), 746 ntohl(mfccp->mfcc_originmask.s_addr), 747 mfccp->mfcc_parent, mb_rt->m_act); 748 749 rt->mfc_origin = mfccp->mfcc_origin; 750 rt->mfc_originmask = mfccp->mfcc_originmask; 751 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 752 rt->mfc_parent = mfccp->mfcc_parent; 753 for (i = 0; i < numvifs; i++) 754 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 755 /* initialize pkt counters per src-grp */ 756 rt->mfc_pkt_cnt = 0; 757 rt1 = rt; 758 } 759 760 /* prevent cleanup of cache entry */ 761 untimeout(cleanup_cache, (caddr_t)mb_rt); 762 timeout_val--; 763 764 /* free packets Qed at the end of this entry */ 765 while (mb_rt->m_act) { 766 mb_ntry = mb_rt->m_act; 767 rte = mtod(mb_ntry, struct rtdetq *); 768 ip_mdq(rte->m, rte->ifp, rte->tunnel_src, 769 rt1, rte->imo); 770 mb_rt->m_act = mb_ntry->m_act; 771 m_freem(rte->m); 772 m_free(mb_ntry); 773 } 774 775 /* 776 * If more than one entry was created for a single upcall 777 * delete that entry 778 */ 779 if (nstl > 1) { 780 MFREE(mb_rt, prev_mb_rt->m_next); 781 mb_rt = prev_mb_rt; 782 } 783 } 784 } 785 786 /* 787 * It is possible that an entry is being inserted without an upcall 788 */ 789 if (nstl == 0) { 790 if (mrtdebug) 791 log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x", 792 hash, ntohl(mfccp->mfcc_origin.s_addr), 793 ntohl(mfccp->mfcc_mcastgrp.s_addr), 794 ntohl(mfccp->mfcc_originmask.s_addr), 795 mfccp->mfcc_parent); 796 797 for (prev_mb_rt = mb_rt = mfctable[hash]; 798 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 799 800 rt = mtod(mb_rt, struct mfc *); 801 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 802 == mfccp->mfcc_origin.s_addr) && 803 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 804 805 rt->mfc_origin = mfccp->mfcc_origin; 806 rt->mfc_originmask = mfccp->mfcc_originmask; 807 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 808 rt->mfc_parent = mfccp->mfcc_parent; 809 for (i = 0; i < numvifs; i++) 810 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 811 /* initialize pkt counters per src-grp */ 812 rt->mfc_pkt_cnt = 0; 813 } 814 } 815 if (mb_rt == NULL) { 816 /* no upcall, so make a new entry */ 817 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 818 if (mb_rt == NULL) { 819 splx(s); 820 return ENOBUFS; 821 } 822 823 rt = mtod(mb_rt, struct mfc *); 824 825 /* insert new entry at head of hash chain */ 826 rt->mfc_origin = mfccp->mfcc_origin; 827 rt->mfc_originmask = mfccp->mfcc_originmask; 828 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 829 rt->mfc_parent = mfccp->mfcc_parent; 830 for (i = 0; i < numvifs; i++) 831 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 832 /* initialize pkt counters per src-grp */ 833 rt->mfc_pkt_cnt = 0; 834 835 /* link into table */ 836 mb_rt->m_next = mfctable[hash]; 837 mfctable[hash] = mb_rt; 838 mb_rt->m_act = NULL; 839 } 840 } 841 splx(s); 842 return 0; 843} 844 845/* 846 * Delete an mfc entry 847 */ 848static int 849del_mfc(mfccp) 850 struct delmfcctl *mfccp; 851{ 852 struct in_addr origin; 853 struct in_addr mcastgrp; 854 struct mfc *rt; 855 struct mbuf *mb_rt; 856 struct mbuf *prev_mb_rt; 857 u_long hash; 858 struct mfc **cmfc; 859 struct mfc **cmfcend; 860 int s, i; 861 862 origin = mfccp->mfcc_origin; 863 mcastgrp = mfccp->mfcc_mcastgrp; 864 hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); 865 866 if (mrtdebug) 867 log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x", 868 ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); 869 870 for (prev_mb_rt = mb_rt = mfctable[hash] 871 ; mb_rt 872 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 873 rt = mtod(mb_rt, struct mfc *); 874 if (origin.s_addr == rt->mfc_origin.s_addr && 875 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 876 mb_rt->m_act == NULL) 877 break; 878 } 879 if (mb_rt == NULL) { 880 return ESRCH; 881 } 882 883 s = splnet(); 884 885 cmfc = mfchash; 886 cmfcend = cmfc + MFCHASHSIZ; 887 for ( ; cmfc < cmfcend; ++cmfc) 888 if (*cmfc == rt) 889 *cmfc = 0; 890 891 if (prev_mb_rt != mb_rt) { /* if moved past head of list */ 892 MFREE(mb_rt, prev_mb_rt->m_next); 893 } else /* delete head of list, it is in the table */ 894 mfctable[hash] = m_free(mb_rt); 895 896 splx(s); 897 898 return 0; 899} 900 901/* 902 * IP multicast forwarding function. This function assumes that the packet 903 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 904 * pointed to by "ifp", and the packet is to be relayed to other networks 905 * that have members of the packet's destination IP multicast group. 906 * 907 * The packet is returned unscathed to the caller, unless it is tunneled 908 * or erroneous, in which case a non-zero return value tells the caller to 909 * discard it. 910 */ 911 912#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 913#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 914 915int 916X_ip_mforward(ip, ifp, m, imo) 917 register struct ip *ip; 918 struct ifnet *ifp; 919 struct mbuf *m; 920 struct ip_moptions *imo; 921{ 922 register struct mfc *rt; 923 register struct vif *vifp; 924 register u_char *ipoptions; 925 u_long tunnel_src; 926 static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; 927 static struct sockaddr_in k_igmpsrc = { AF_INET }; 928 static struct sockaddr_in k_igmpdst = { AF_INET }; 929 register struct mbuf *mm; 930 register struct mbuf *mn; 931 register struct ip *k_data; 932 int s; 933 934 if (mrtdebug > 1) 935 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x", 936 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); 937 938 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 939 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 940 /* 941 * Packet arrived via a physical interface. 942 */ 943 tunnel_src = 0; 944 } else { 945 /* 946 * Packet arrived through a source-route tunnel. 947 * 948 * A source-route tunneled packet has a single NOP option and a 949 * two-element 950 * loose-source-and-record-route (LSRR) option immediately following 951 * the fixed-size part of the IP header. At this point in processing, 952 * the IP header should contain the following IP addresses: 953 * 954 * original source - in the source address field 955 * destination group - in the destination address field 956 * remote tunnel end-point - in the first element of LSRR 957 * one of this host's addrs - in the second element of LSRR 958 * 959 * NOTE: RFC-1075 would have the original source and remote tunnel 960 * end-point addresses swapped. However, that could cause 961 * delivery of ICMP error messages to innocent applications 962 * on intermediate routing hosts! Therefore, we hereby 963 * change the spec. 964 */ 965 966 /* 967 * Verify that the tunnel options are well-formed. 968 */ 969 if (ipoptions[0] != IPOPT_NOP || 970 ipoptions[2] != 11 || /* LSRR option length */ 971 ipoptions[3] != 12 || /* LSRR address pointer */ 972 (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { 973 mrtstat.mrts_bad_tunnel++; 974 if (mrtdebug) 975 log(LOG_DEBUG, 976 "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)", 977 ntohl(ip->ip_src.s_addr), 978 ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], 979 *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); 980 return 1; 981 } 982 983 /* 984 * Delete the tunnel options from the packet. 985 */ 986 ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, 987 (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); 988 m->m_len -= TUNNEL_LEN; 989 ip->ip_len -= TUNNEL_LEN; 990 ip->ip_hl -= TUNNEL_LEN >> 2; 991 992 ifp = 0; 993 } 994 995 /* 996 * Don't forward a packet with time-to-live of zero or one, 997 * or a packet destined to a local-only group. 998 */ 999 if (ip->ip_ttl <= 1 || 1000 ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1001 return (int)tunnel_src; 1002 1003 /* 1004 * Determine forwarding vifs from the forwarding cache table 1005 */ 1006 s = splnet(); 1007 MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); 1008 1009 /* Entry exists, so forward if necessary */ 1010 if (rt != NULL) { 1011 splx(s); 1012 return (ip_mdq(m, ifp, tunnel_src, rt, imo)); 1013 } 1014 1015 else { 1016 /* 1017 * If we don't have a route for packet's origin, 1018 * Make a copy of the packet & 1019 * send message to routing daemon 1020 */ 1021 1022 register struct mbuf *mb_rt; 1023 register struct mbuf *mb_ntry; 1024 register struct mbuf *mb0; 1025 register struct rtdetq *rte; 1026 register struct mbuf *rte_m; 1027 register u_long hash; 1028 register struct timeval tp; 1029 1030 mrtstat.mrts_no_route++; 1031 if (mrtdebug) 1032 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x", 1033 ntohl(ip->ip_src.s_addr), 1034 ntohl(ip->ip_dst.s_addr)); 1035 1036 /* is there an upcall waiting for this packet? */ 1037 hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1038 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 1039 rt = mtod(mb_rt, struct mfc *); 1040 if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == 1041 rt->mfc_origin.s_addr) && 1042 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1043 (mb_rt->m_act != NULL)) 1044 break; 1045 } 1046 1047 if (mb_rt == NULL) { 1048 /* no upcall, so make a new entry */ 1049 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 1050 if (mb_rt == NULL) { 1051 splx(s); 1052 return ENOBUFS; 1053 } 1054 1055 rt = mtod(mb_rt, struct mfc *); 1056 1057 /* insert new entry at head of hash chain */ 1058 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1059 rt->mfc_originmask.s_addr = (u_long)0xffffffff; 1060 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1061 1062 /* link into table */ 1063 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1064 mb_rt->m_next = mfctable[hash]; 1065 mfctable[hash] = mb_rt; 1066 mb_rt->m_act = NULL; 1067 1068 } 1069 1070 /* determine if q has overflowed */ 1071 for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) 1072 hash++; 1073 1074 if (hash > MAX_UPQ) { 1075 mrtstat.mrts_upq_ovflw++; 1076 splx(s); 1077 return 0; 1078 } 1079 1080 /* add this packet and timing, ifp info to m_act */ 1081 MGET(mb_ntry, M_DONTWAIT, MT_DATA); 1082 if (mb_ntry == NULL) { 1083 splx(s); 1084 return ENOBUFS; 1085 } 1086 1087 mb_ntry->m_act = NULL; 1088 rte = mtod(mb_ntry, struct rtdetq *); 1089 1090 mb0 = m_copy(m, 0, M_COPYALL); 1091 if (mb0 == NULL) { 1092 splx(s); 1093 return ENOBUFS; 1094 } 1095 1096 rte->m = mb0; 1097 rte->ifp = ifp; 1098 rte->tunnel_src = tunnel_src; 1099 rte->imo = imo; 1100 1101 rte_m->m_act = mb_ntry; 1102 1103 splx(s); 1104 1105 if (hash == 0) { 1106 /* 1107 * Send message to routing daemon to install 1108 * a route into the kernel table 1109 */ 1110 k_igmpsrc.sin_addr = ip->ip_src; 1111 k_igmpdst.sin_addr = ip->ip_dst; 1112 1113 mm = m_copy(m, 0, M_COPYALL); 1114 if (mm == NULL) { 1115 splx(s); 1116 return ENOBUFS; 1117 } 1118 1119 k_data = mtod(mm, struct ip *); 1120 k_data->ip_p = 0; 1121 1122 mrtstat.mrts_upcalls++; 1123 1124 raw_input(mm, &k_igmpproto, 1125 (struct sockaddr *)&k_igmpsrc, 1126 (struct sockaddr *)&k_igmpdst); 1127 1128 /* set timer to cleanup entry if upcall is lost */ 1129 timeout(cleanup_cache, (caddr_t)mb_rt, 100); 1130 timeout_val++; 1131 } 1132 1133 return 0; 1134 } 1135} 1136 1137#ifndef MROUTE_LKM 1138int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 1139 struct ip_moptions *) = X_ip_mforward; 1140#endif 1141 1142/* 1143 * Clean up the cache entry if upcall is not serviced 1144 */ 1145static void 1146cleanup_cache(xmb_rt) 1147 void *xmb_rt; 1148{ 1149 struct mbuf *mb_rt = xmb_rt; 1150 struct mfc *rt; 1151 u_long hash; 1152 struct mbuf *prev_m0; 1153 struct mbuf *m0; 1154 struct mbuf *m; 1155 struct rtdetq *rte; 1156 int s; 1157 1158 rt = mtod(mb_rt, struct mfc *); 1159 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1160 1161 if (mrtdebug) 1162 log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x", 1163 ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), 1164 ntohl(rt->mfc_mcastgrp.s_addr)); 1165 1166 mrtstat.mrts_cache_cleanups++; 1167 1168 /* 1169 * determine entry to be cleaned up in cache table 1170 */ 1171 s = splnet(); 1172 for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) 1173 if (m0 == mb_rt) 1174 break; 1175 1176 /* 1177 * drop all the packets 1178 * free the mbuf with the pkt, if, timing info 1179 */ 1180 while (mb_rt->m_act) { 1181 m = mb_rt->m_act; 1182 mb_rt->m_act = m->m_act; 1183 1184 rte = mtod(m, struct rtdetq *); 1185 m_freem(rte->m); 1186 m_free(m); 1187 } 1188 1189 /* 1190 * Delete the entry from the cache 1191 */ 1192 if (prev_m0 != m0) { /* if moved past head of list */ 1193 MFREE(m0, prev_m0->m_next); 1194 } else /* delete head of list, it is in the table */ 1195 mfctable[hash] = m_free(m0); 1196 1197 timeout_val--; 1198 splx(s); 1199} 1200 1201/* 1202 * Packet forwarding routine once entry in the cache is made 1203 */ 1204static int 1205ip_mdq(m, ifp, tunnel_src, rt, imo) 1206 register struct mbuf *m; 1207 register struct ifnet *ifp; 1208 register u_long tunnel_src; 1209 register struct mfc *rt; 1210 register struct ip_moptions *imo; 1211{ 1212 register struct ip *ip = mtod(m, struct ip *); 1213 register vifi_t vifi; 1214 register struct vif *vifp; 1215 1216 /* 1217 * Don't forward if it didn't arrive from the parent vif for its origin. 1218 * Notes: v_ifp is zero for src route tunnels, multicast_decap_if 1219 * for encapsulated tunnels and a real ifnet for non-tunnels so 1220 * the first part of the if catches wrong physical interface or 1221 * tunnel type; v_rmt_addr is zero for non-tunneled packets so 1222 * the 2nd part catches both packets that arrive via a tunnel 1223 * that shouldn't and packets that arrive via the wrong tunnel. 1224 */ 1225 vifi = rt->mfc_parent; 1226 if (viftable[vifi].v_ifp != ifp || 1227 (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { 1228 /* came in the wrong interface */ 1229 if (mrtdebug) 1230 log(LOG_DEBUG, "wrong if: ifp %x vifi %d", 1231 ifp, vifi); 1232 ++mrtstat.mrts_wrong_if; 1233 return (int)tunnel_src; 1234 } 1235 1236 /* increment the interface and s-g counters */ 1237 viftable[vifi].v_pkt_in++; 1238 rt->mfc_pkt_cnt++; 1239 1240 /* 1241 * For each vif, decide if a copy of the packet should be forwarded. 1242 * Forward if: 1243 * - the ttl exceeds the vif's threshold 1244 * - there are group members downstream on interface 1245 */ 1246#define MC_SEND(ip,vifp,m) { \ 1247 (vifp)->v_pkt_out++; \ 1248 if ((vifp)->v_flags & VIFF_SRCRT) \ 1249 srcrt_send((ip), (vifp), (m)); \ 1250 else if ((vifp)->v_flags & VIFF_TUNNEL) \ 1251 encap_send((ip), (vifp), (m)); \ 1252 else \ 1253 phyint_send((ip), (vifp), (m)); \ 1254 } 1255 1256/* If no options or the imo_multicast_vif option is 0, don't do this part 1257 */ 1258 if ((imo != NULL) && 1259 (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) 1260 { 1261 MC_SEND(ip,viftable+vifi,m); 1262 return (1); /* make sure we are done: No more physical sends */ 1263 } 1264 1265 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1266 if ((rt->mfc_ttls[vifi] > 0) && 1267 (ip->ip_ttl > rt->mfc_ttls[vifi])) 1268 MC_SEND(ip, vifp, m); 1269 1270 return 0; 1271} 1272 1273/* check if a vif number is legal/ok. This is used by ip_output, to export 1274 * numvifs there, 1275 */ 1276int 1277X_legal_vif_num(vif) 1278 int vif; 1279{ if (vif>=0 && vif<=numvifs) 1280 return(1); 1281 else 1282 return(0); 1283} 1284 1285#ifndef MROUTE_LKM 1286int (*legal_vif_num)(int) = X_legal_vif_num; 1287#endif 1288 1289static void 1290phyint_send(ip, vifp, m) 1291 struct ip *ip; 1292 struct vif *vifp; 1293 struct mbuf *m; 1294{ 1295 register struct mbuf *mb_copy; 1296 register struct mbuf *mopts; 1297 register struct ip_moptions *imo; 1298 1299 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1300 return; 1301 1302 MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); 1303 if (imo == NULL) { 1304 m_freem(mb_copy); 1305 return; 1306 } 1307 1308 imo->imo_multicast_ifp = vifp->v_ifp; 1309 imo->imo_multicast_ttl = ip->ip_ttl - 1; 1310 imo->imo_multicast_loop = 1; 1311 1312 if (vifp->v_rate_limit <= 0) 1313 tbf_send_packet(vifp, mb_copy, imo); 1314 else 1315 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, 1316 imo); 1317} 1318 1319static void 1320srcrt_send(ip, vifp, m) 1321 struct ip *ip; 1322 struct vif *vifp; 1323 struct mbuf *m; 1324{ 1325 struct mbuf *mb_copy, *mb_opts; 1326 register struct ip *ip_copy; 1327 u_char *cp; 1328 1329 /* 1330 * Make sure that adding the tunnel options won't exceed the 1331 * maximum allowed number of option bytes. 1332 */ 1333 if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { 1334 mrtstat.mrts_cant_tunnel++; 1335 if (mrtdebug) 1336 log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u", 1337 ntohl(ip->ip_src.s_addr)); 1338 return; 1339 } 1340 1341 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1342 return; 1343 1344 ip_copy = mtod(mb_copy, struct ip *); 1345 ip_copy->ip_ttl--; 1346 ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ 1347 /* 1348 * Adjust the ip header length to account for the tunnel options. 1349 */ 1350 ip_copy->ip_hl += TUNNEL_LEN >> 2; 1351 ip_copy->ip_len += TUNNEL_LEN; 1352 MGET(mb_opts, M_DONTWAIT, MT_HEADER); 1353 if (mb_opts == NULL) { 1354 m_freem(mb_copy); 1355 return; 1356 } 1357 /* 1358 * 'Delete' the base ip header from the mb_copy chain 1359 */ 1360 mb_copy->m_len -= IP_HDR_LEN; 1361 mb_copy->m_data += IP_HDR_LEN; 1362 /* 1363 * Make mb_opts be the new head of the packet chain. 1364 * Any options of the packet were left in the old packet chain head 1365 */ 1366 mb_opts->m_next = mb_copy; 1367 mb_opts->m_data += 16; 1368 mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN; 1369 /* 1370 * Copy the base ip header from the mb_copy chain to the new head mbuf 1371 */ 1372 bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), IP_HDR_LEN); 1373 /* 1374 * Add the NOP and LSRR after the base ip header 1375 */ 1376 cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; 1377 *cp++ = IPOPT_NOP; 1378 *cp++ = IPOPT_LSRR; 1379 *cp++ = 11; /* LSRR option length */ 1380 *cp++ = 8; /* LSSR pointer to second element */ 1381 *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ 1382 cp += 4; 1383 *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ 1384 1385 if (vifp->v_rate_limit <= 0) 1386 tbf_send_packet(vifp, mb_opts, 0); 1387 else 1388 tbf_control(vifp, mb_opts, 1389 mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); 1390} 1391 1392static void 1393encap_send(ip, vifp, m) 1394 register struct ip *ip; 1395 register struct vif *vifp; 1396 register struct mbuf *m; 1397{ 1398 register struct mbuf *mb_copy; 1399 register struct ip *ip_copy; 1400 register int i, len = ip->ip_len; 1401 1402 /* 1403 * copy the old packet & pullup it's IP header into the 1404 * new mbuf so we can modify it. Try to fill the new 1405 * mbuf since if we don't the ethernet driver will. 1406 */ 1407 MGET(mb_copy, M_DONTWAIT, MT_DATA); 1408 if (mb_copy == NULL) 1409 return; 1410 mb_copy->m_data += 16; 1411 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1412 1413 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 1414 m_freem(mb_copy); 1415 return; 1416 } 1417 i = MHLEN - M_LEADINGSPACE(mb_copy); 1418 if (i > len) 1419 i = len; 1420 mb_copy = m_pullup(mb_copy, i); 1421 if (mb_copy == NULL) 1422 return; 1423 1424 /* 1425 * fill in the encapsulating IP header. 1426 */ 1427 ip_copy = mtod(mb_copy, struct ip *); 1428 *ip_copy = multicast_encap_iphdr; 1429 ip_copy->ip_id = htons(ip_id++); 1430 ip_copy->ip_len += len; 1431 ip_copy->ip_src = vifp->v_lcl_addr; 1432 ip_copy->ip_dst = vifp->v_rmt_addr; 1433 1434 /* 1435 * turn the encapsulated IP header back into a valid one. 1436 */ 1437 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1438 --ip->ip_ttl; 1439 HTONS(ip->ip_len); 1440 HTONS(ip->ip_off); 1441 ip->ip_sum = 0; 1442#if defined(LBL) && !defined(ultrix) 1443 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); 1444#else 1445 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1446 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1447 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1448#endif 1449 1450 if (vifp->v_rate_limit <= 0) 1451 tbf_send_packet(vifp, mb_copy, 0); 1452 else 1453 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); 1454} 1455 1456/* 1457 * De-encapsulate a packet and feed it back through ip input (this 1458 * routine is called whenever IP gets a packet with proto type 1459 * ENCAP_PROTO and a local destination address). 1460 */ 1461void 1462#ifdef MROUTE_LKM 1463X_multiencap_decap(m) 1464#else 1465multiencap_decap(m) 1466#endif 1467 register struct mbuf *m; 1468{ 1469 struct ifnet *ifp = m->m_pkthdr.rcvif; 1470 register struct ip *ip = mtod(m, struct ip *); 1471 register int hlen = ip->ip_hl << 2; 1472 register int s; 1473 register struct ifqueue *ifq; 1474 register struct vif *vifp; 1475 1476 if (ip->ip_p != ENCAP_PROTO) { 1477 rip_input(m); 1478 return; 1479 } 1480 /* 1481 * dump the packet if it's not to a multicast destination or if 1482 * we don't have an encapsulating tunnel with the source. 1483 * Note: This code assumes that the remote site IP address 1484 * uniquely identifies the tunnel (i.e., that this site has 1485 * at most one tunnel with the remote site). 1486 */ 1487 if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { 1488 ++mrtstat.mrts_bad_tunnel; 1489 m_freem(m); 1490 return; 1491 } 1492 if (ip->ip_src.s_addr != last_encap_src) { 1493 register struct vif *vife; 1494 1495 vifp = viftable; 1496 vife = vifp + numvifs; 1497 last_encap_src = ip->ip_src.s_addr; 1498 last_encap_vif = 0; 1499 for ( ; vifp < vife; ++vifp) 1500 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1501 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1502 == VIFF_TUNNEL) 1503 last_encap_vif = vifp; 1504 break; 1505 } 1506 } 1507 if ((vifp = last_encap_vif) == 0) { 1508 last_encap_src = 0; 1509 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1510 m_freem(m); 1511 if (mrtdebug) 1512 log(LOG_DEBUG, "ip_mforward: no tunnel with %u", 1513 ntohl(ip->ip_src.s_addr)); 1514 return; 1515 } 1516 ifp = vifp->v_ifp; 1517 hlen -= sizeof(struct ifnet *); 1518 m->m_data += hlen; 1519 m->m_len -= hlen; 1520 *(mtod(m, struct ifnet **)) = ifp; 1521 ifq = &ipintrq; 1522 s = splimp(); 1523 if (IF_QFULL(ifq)) { 1524 IF_DROP(ifq); 1525 m_freem(m); 1526 } else { 1527 IF_ENQUEUE(ifq, m); 1528 /* 1529 * normally we would need a "schednetisr(NETISR_IP)" 1530 * here but we were called by ip_input and it is going 1531 * to loop back & try to dequeue the packet we just 1532 * queued as soon as we return so we avoid the 1533 * unnecessary software interrrupt. 1534 */ 1535 } 1536 splx(s); 1537} 1538 1539/* 1540 * Token bucket filter module 1541 */ 1542void 1543tbf_control(vifp, m, ip, p_len, imo) 1544 register struct vif *vifp; 1545 register struct mbuf *m; 1546 register struct ip *ip; 1547 register u_long p_len; 1548 struct ip_moptions *imo; 1549{ 1550 tbf_update_tokens(vifp); 1551 1552 /* if there are enough tokens, 1553 * and the queue is empty, 1554 * send this packet out 1555 */ 1556 1557 if (vifp->v_tbf->q_len == 0) { 1558 if (p_len <= vifp->v_tbf->n_tok) { 1559 vifp->v_tbf->n_tok -= p_len; 1560 tbf_send_packet(vifp, m, imo); 1561 } else if (p_len > MAX_BKT_SIZE) { 1562 /* drop if packet is too large */ 1563 mrtstat.mrts_pkt2large++; 1564 m_freem(m); 1565 return; 1566 } else { 1567 /* queue packet and timeout till later */ 1568 tbf_queue(vifp, m, ip, imo); 1569 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1570 } 1571 } else if (vifp->v_tbf->q_len < MAXQSIZE) { 1572 /* finite queue length, so queue pkts and process queue */ 1573 tbf_queue(vifp, m, ip, imo); 1574 tbf_process_q(vifp); 1575 } else { 1576 /* queue length too much, try to dq and queue and process */ 1577 if (!tbf_dq_sel(vifp, ip)) { 1578 mrtstat.mrts_q_overflow++; 1579 m_freem(m); 1580 return; 1581 } else { 1582 tbf_queue(vifp, m, ip, imo); 1583 tbf_process_q(vifp); 1584 } 1585 } 1586 return; 1587} 1588 1589/* 1590 * adds a packet to the queue at the interface 1591 */ 1592void 1593tbf_queue(vifp, m, ip, imo) 1594 register struct vif *vifp; 1595 register struct mbuf *m; 1596 register struct ip *ip; 1597 struct ip_moptions *imo; 1598{ 1599 register u_long ql; 1600 register int index = (vifp - viftable); 1601 register int s = splnet(); 1602 1603 ql = vifp->v_tbf->q_len; 1604 1605 qtable[index][ql].pkt_m = m; 1606 qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; 1607 qtable[index][ql].pkt_ip = ip; 1608 qtable[index][ql].pkt_imo = imo; 1609 1610 vifp->v_tbf->q_len++; 1611 splx(s); 1612} 1613 1614 1615/* 1616 * processes the queue at the interface 1617 */ 1618void 1619tbf_process_q(vifp) 1620 register struct vif *vifp; 1621{ 1622 register struct mbuf *m; 1623 register struct pkt_queue pkt_1; 1624 register int index = (vifp - viftable); 1625 register int s = splnet(); 1626 1627 /* loop through the queue at the interface and send as many packets 1628 * as possible 1629 */ 1630 while (vifp->v_tbf->q_len > 0) { 1631 /* locate the first packet */ 1632 pkt_1.pkt_len = ((qtable[index][0]).pkt_len); 1633 pkt_1.pkt_m = (qtable[index][0]).pkt_m; 1634 pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; 1635 pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; 1636 1637 /* determine if the packet can be sent */ 1638 if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { 1639 /* if so, 1640 * reduce no of tokens, dequeue the queue, 1641 * send the packet. 1642 */ 1643 vifp->v_tbf->n_tok -= pkt_1.pkt_len; 1644 1645 tbf_dequeue(vifp, 0); 1646 1647 tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); 1648 1649 } else break; 1650 } 1651 splx(s); 1652} 1653 1654/* 1655 * removes the jth packet from the queue at the interface 1656 */ 1657void 1658tbf_dequeue(vifp,j) 1659 register struct vif *vifp; 1660 register int j; 1661{ 1662 register u_long index = vifp - viftable; 1663 register int i; 1664 1665 for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { 1666 qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; 1667 qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; 1668 qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; 1669 qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; 1670 } 1671 qtable[index][i-1].pkt_m = NULL; 1672 qtable[index][i-1].pkt_len = NULL; 1673 qtable[index][i-1].pkt_ip = NULL; 1674 qtable[index][i-1].pkt_imo = NULL; 1675 1676 vifp->v_tbf->q_len--; 1677 1678 if (tbfdebug > 1) 1679 log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1); 1680} 1681 1682void 1683tbf_reprocess_q(xvifp) 1684 void *xvifp; 1685{ 1686 register struct vif *vifp = xvifp; 1687 if (ip_mrouter == NULL) 1688 return; 1689 1690 tbf_update_tokens(vifp); 1691 1692 tbf_process_q(vifp); 1693 1694 if (vifp->v_tbf->q_len) 1695 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1696} 1697 1698/* function that will selectively discard a member of the queue 1699 * based on the precedence value and the priority obtained through 1700 * a lookup table - not yet implemented accurately! 1701 */ 1702int 1703tbf_dq_sel(vifp, ip) 1704 register struct vif *vifp; 1705 register struct ip *ip; 1706{ 1707 register int i; 1708 register int s = splnet(); 1709 register u_int p; 1710 1711 p = priority(vifp, ip); 1712 1713 for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { 1714 if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { 1715 m_freem(qtable[vifp-viftable][i].pkt_m); 1716 tbf_dequeue(vifp,i); 1717 splx(s); 1718 mrtstat.mrts_drop_sel++; 1719 return(1); 1720 } 1721 } 1722 splx(s); 1723 return(0); 1724} 1725 1726void 1727tbf_send_packet(vifp, m, imo) 1728 register struct vif *vifp; 1729 register struct mbuf *m; 1730 struct ip_moptions *imo; 1731{ 1732 register struct mbuf *mcp; 1733 int error; 1734 int s = splnet(); 1735 1736 /* if source route tunnels */ 1737 if (vifp->v_flags & VIFF_SRCRT) { 1738 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1739 IP_FORWARDING, imo); 1740 if (mrtdebug > 1) 1741 log(LOG_DEBUG, "srcrt_send on vif %d err %d", vifp-viftable, error); 1742 } else if (vifp->v_flags & VIFF_TUNNEL) { 1743 /* If tunnel options */ 1744 ip_output(m, (struct mbuf *)0, (struct route *)0, 1745 IP_FORWARDING, imo); 1746 } else { 1747 /* if physical interface option, extract the options and then send */ 1748 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1749 IP_FORWARDING, imo); 1750 FREE(imo, M_IPMOPTS); 1751 1752 if (mrtdebug > 1) 1753 log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error); 1754 } 1755 splx(s); 1756} 1757 1758/* determine the current time and then 1759 * the elapsed time (between the last time and time now) 1760 * in milliseconds & update the no. of tokens in the bucket 1761 */ 1762void 1763tbf_update_tokens(vifp) 1764 register struct vif *vifp; 1765{ 1766 struct timeval tp; 1767 register u_long t; 1768 register u_long elapsed; 1769 register int s = splnet(); 1770 1771 GET_TIME(tp); 1772 1773 t = tp.tv_sec*1000 + tp.tv_usec/1000; 1774 1775 elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; 1776 vifp->v_tbf->n_tok += elapsed; 1777 vifp->v_tbf->last_pkt_t = t; 1778 1779 if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) 1780 vifp->v_tbf->n_tok = MAX_BKT_SIZE; 1781 1782 splx(s); 1783} 1784 1785static int 1786priority(vifp, ip) 1787 register struct vif *vifp; 1788 register struct ip *ip; 1789{ 1790 register u_long graddr; 1791 register int prio; 1792 1793 /* temporary hack; will add general packet classifier some day */ 1794 1795 prio = 50; /* default priority */ 1796 1797 /* check for source route options and add option length to get dst */ 1798 if (vifp->v_flags & VIFF_SRCRT) 1799 graddr = ntohl((ip+8)->ip_dst.s_addr); 1800 else 1801 graddr = ntohl(ip->ip_dst.s_addr); 1802 1803 switch (graddr & 0xf) { 1804 case 0x0: break; 1805 case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ 1806 break; 1807 case 0x2: break; 1808 case 0x3: break; 1809 case 0x4: break; 1810 case 0x5: break; 1811 case 0x6: break; 1812 case 0x7: break; 1813 case 0x8: break; 1814 case 0x9: break; 1815 case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ 1816 break; 1817 case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ 1818 break; 1819 case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ 1820 break; 1821 case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ 1822 break; 1823 case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ 1824 break; 1825 case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ 1826 break; 1827 } 1828 1829 if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d", graddr, prio); 1830 1831 return prio; 1832} 1833 1834/* 1835 * End of token bucket filter modifications 1836 */ 1837 1838#ifdef MROUTE_LKM 1839#include <sys/conf.h> 1840#include <sys/exec.h> 1841#include <sys/sysent.h> 1842#include <sys/lkm.h> 1843 1844MOD_MISC("ip_mroute_mod") 1845 1846static int 1847ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) 1848{ 1849 int i; 1850 struct lkm_misc *args = lkmtp->private.lkm_misc; 1851 int err = 0; 1852 1853 switch(cmd) { 1854 static int (*old_ip_mrouter_cmd)(); 1855 static int (*old_ip_mrouter_done)(); 1856 static int (*old_ip_mforward)(); 1857 static int (*old_mrt_ioctl)(); 1858 static int (*old_proto4_input)(); 1859 static int (*old_legal_vif_num)(); 1860 extern u_char ip_protox[]; 1861 extern struct protosw inetsw[]; 1862 1863 case LKM_E_LOAD: 1864 if(lkmexists(lkmtp) || ip_mrtproto) 1865 return(EEXIST); 1866 old_ip_mrouter_cmd = ip_mrouter_cmd; 1867 ip_mrouter_cmd = X_ip_mrouter_cmd; 1868 old_ip_mrouter_done = ip_mrouter_done; 1869 ip_mrouter_done = X_ip_mrouter_done; 1870 old_ip_mforward = ip_mforward; 1871 ip_mforward = X_ip_mforward; 1872 old_mrt_ioctl = mrt_ioctl; 1873 mrt_ioctl = X_mrt_ioctl; 1874 old_proto4_input = inetsw[ip_protox[IPPROTO_ENCAP]].pr_input; 1875 inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = X_multiencap_decap; 1876 old_legal_vif_num = legal_vif_num; 1877 legal_vif_num = X_legal_vif_num; 1878 ip_mrtproto = IGMP_DVMRP; 1879 1880 printf("\nIP multicast routing loaded\n"); 1881 break; 1882 1883 case LKM_E_UNLOAD: 1884 if (ip_mrouter) 1885 return EINVAL; 1886 1887 ip_mrouter_cmd = old_ip_mrouter_cmd; 1888 ip_mrouter_done = old_ip_mrouter_done; 1889 ip_mforward = old_ip_mforward; 1890 mrt_ioctl = old_mrt_ioctl; 1891 inetsw[ip_protox[IPPROTO_ENCAP]].pr_input = old_proto4_input; 1892 legal_vif_num = old_legal_vif_num; 1893 ip_mrtproto = 0; 1894 break; 1895 1896 default: 1897 err = EINVAL; 1898 break; 1899 } 1900 1901 return(err); 1902} 1903 1904int 1905ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { 1906 DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, 1907 nosys); 1908} 1909 1910#endif /* MROUTE_LKM */ 1911#endif /* MROUTING */ 1912 1913 1914