ip_mroute.c revision 7593
1/* 2 * IP multicast forwarding procedures 3 * 4 * Written by David Waitzman, BBN Labs, August 1988. 5 * Modified by Steve Deering, Stanford, February 1989. 6 * Modified by Mark J. Steiglitz, Stanford, May, 1991 7 * Modified by Van Jacobson, LBL, January 1993 8 * Modified by Ajit Thyagarajan, PARC, August 1993 9 * 10 * MROUTING 1.8 11 */ 12 13 14#include <sys/param.h> 15#include <sys/systm.h> 16#include <sys/mbuf.h> 17#include <sys/socket.h> 18#include <sys/socketvar.h> 19#include <sys/protosw.h> 20#include <sys/errno.h> 21#include <sys/time.h> 22#include <sys/ioctl.h> 23#include <sys/syslog.h> 24#include <net/if.h> 25#include <net/route.h> 26#include <netinet/in.h> 27#include <netinet/in_systm.h> 28#include <netinet/ip.h> 29#include <netinet/ip_var.h> 30#include <netinet/in_pcb.h> 31#include <netinet/in_var.h> 32#include <netinet/igmp.h> 33#include <netinet/igmp_var.h> 34#include <netinet/ip_mroute.h> 35 36#ifndef NTOHL 37#if BYTE_ORDER != BIG_ENDIAN 38#define NTOHL(d) ((d) = ntohl((d))) 39#define NTOHS(d) ((d) = ntohs((u_short)(d))) 40#define HTONL(d) ((d) = htonl((d))) 41#define HTONS(d) ((d) = htons((u_short)(d))) 42#else 43#define NTOHL(d) 44#define NTOHS(d) 45#define HTONL(d) 46#define HTONS(d) 47#endif 48#endif 49 50#ifndef MROUTING 51/* 52 * Dummy routines and globals used when multicast routing is not compiled in. 53 */ 54 55u_int ip_mrtproto = 0; 56struct socket *ip_mrouter = NULL; 57struct mrtstat mrtstat; 58 59 60int 61_ip_mrouter_cmd(cmd, so, m) 62 int cmd; 63 struct socket *so; 64 struct mbuf *m; 65{ 66 return(EOPNOTSUPP); 67} 68 69int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd; 70 71int 72_ip_mrouter_done() 73{ 74 return(0); 75} 76 77int (*ip_mrouter_done)(void) = _ip_mrouter_done; 78 79int 80_ip_mforward(ip, ifp, m, imo) 81 struct ip *ip; 82 struct ifnet *ifp; 83 struct mbuf *m; 84 struct ip_moptions *imo; 85{ 86 return(0); 87} 88 89int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 90 struct ip_moptions *) = _ip_mforward; 91 92int 93_mrt_ioctl(int req, caddr_t data, struct proc *p) 94{ 95 return EOPNOTSUPP; 96} 97 98int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; 99 100void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */ 101 rip_input(m); 102} 103 104int (*legal_vif_num)(int) = 0; 105 106#else /* MROUTING */ 107 108#define INSIZ sizeof(struct in_addr) 109#define same(a1, a2) \ 110 (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) 111 112#define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ 113 114/* 115 * Globals. All but ip_mrouter and ip_mrtproto could be static, 116 * except for netstat or debugging purposes. 117 */ 118#ifndef MROUTE_LKM 119struct socket *ip_mrouter = NULL; 120struct mrtstat mrtstat; 121 122int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 123#else /* MROUTE_LKM */ 124extern struct mrtstat mrtstat; 125extern int ip_mrtproto; 126#endif 127 128#define NO_RTE_FOUND 0x1 129#define RTE_FOUND 0x2 130 131struct mbuf *mfctable[MFCTBLSIZ]; 132struct vif viftable[MAXVIFS]; 133u_int mrtdebug = 0; /* debug level */ 134u_int tbfdebug = 0; /* tbf debug level */ 135 136u_long timeout_val = 0; /* count of outstanding upcalls */ 137 138/* 139 * Define the token bucket filter structures 140 * tbftable -> each vif has one of these for storing info 141 * qtable -> each interface has an associated queue of pkts 142 */ 143 144struct tbf tbftable[MAXVIFS]; 145struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; 146 147/* 148 * 'Interfaces' associated with decapsulator (so we can tell 149 * packets that went through it from ones that get reflected 150 * by a broken gateway). These interfaces are never linked into 151 * the system ifnet list & no routes point to them. I.e., packets 152 * can't be sent this way. They only exist as a placeholder for 153 * multicast source verification. 154 */ 155struct ifnet multicast_decap_if[MAXVIFS]; 156 157#define ENCAP_TTL 64 158#define ENCAP_PROTO 4 159 160/* prototype IP hdr for encapsulated packets */ 161struct ip multicast_encap_iphdr = { 162#if BYTE_ORDER == LITTLE_ENDIAN 163 sizeof(struct ip) >> 2, IPVERSION, 164#else 165 IPVERSION, sizeof(struct ip) >> 2, 166#endif 167 0, /* tos */ 168 sizeof(struct ip), /* total length */ 169 0, /* id */ 170 0, /* frag offset */ 171 ENCAP_TTL, ENCAP_PROTO, 172 0, /* checksum */ 173}; 174 175/* 176 * Private variables. 177 */ 178static vifi_t numvifs = 0; 179static void (*encap_oldrawip)() = 0; 180 181/* 182 * one-back cache used by multiencap_decap to locate a tunnel's vif 183 * given a datagram's src ip address. 184 */ 185static u_long last_encap_src; 186static struct vif *last_encap_vif; 187 188static u_long nethash_fc(u_long, u_long); 189static struct mfc *mfcfind(u_long, u_long); 190int get_sg_cnt(struct sioc_sg_req *); 191int get_vif_cnt(struct sioc_vif_req *); 192int get_vifs(caddr_t); 193static int add_vif(struct vifctl *); 194static int del_vif(vifi_t *); 195static int add_mfc(struct mfcctl *); 196static int del_mfc(struct delmfcctl *); 197static void cleanup_cache(void *); 198static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, 199 struct ip_moptions *); 200static void phyint_send(struct ip *, struct vif *, struct mbuf *); 201static void srcrt_send(struct ip *, struct vif *, struct mbuf *); 202static void encap_send(struct ip *, struct vif *, struct mbuf *); 203void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, 204 struct ip_moptions *); 205void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); 206void tbf_process_q(struct vif *); 207void tbf_dequeue(struct vif *, int); 208void tbf_reprocess_q(void *); 209int tbf_dq_sel(struct vif *, struct ip *); 210void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); 211void tbf_update_tokens(struct vif *); 212static int priority(struct vif *, struct ip *); 213static int ip_mrouter_init(struct socket *); 214void multiencap_decap(struct mbuf *m); 215 216/* 217 * A simple hash function: returns MFCHASHMOD of the low-order octet of 218 * the argument's network or subnet number and the multicast group assoc. 219 */ 220static u_long 221nethash_fc(m,n) 222 register u_long m; 223 register u_long n; 224{ 225 struct in_addr in1; 226 struct in_addr in2; 227 228 in1.s_addr = m; 229 m = in_netof(in1); 230 while ((m & 0xff) == 0) m >>= 8; 231 232 in2.s_addr = n; 233 n = in_netof(in2); 234 while ((n & 0xff) == 0) n >>= 8; 235 236 return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); 237} 238 239/* 240 * this is a direct-mapped cache used to speed the mapping from a 241 * datagram source address to the associated multicast route. Note 242 * that unlike mrttable, the hash is on IP address, not IP net number. 243 */ 244#define MFCHASHSIZ 1024 245#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 246 ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) 247struct mfc *mfchash[MFCHASHSIZ]; 248 249/* 250 * Find a route for a given origin IP address and Multicast group address 251 * Type of service parameter to be added in the future!!! 252 */ 253#define MFCFIND(o, g, rt) { \ 254 register u_int _mrhasho = o; \ 255 register u_int _mrhashg = g; \ 256 _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ 257 ++mrtstat.mrts_mfc_lookups; \ 258 rt = mfchash[_mrhasho]; \ 259 if ((rt == NULL) || \ 260 ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ 261 (g != rt->mfc_mcastgrp.s_addr)) \ 262 if ((rt = mfcfind(o, g)) != NULL) \ 263 mfchash[_mrhasho] = rt; \ 264} 265 266/* 267 * Find route by examining hash table entries 268 */ 269static struct mfc * 270mfcfind(origin, mcastgrp) 271 u_long origin; 272 u_long mcastgrp; 273{ 274 register struct mbuf *mb_rt; 275 register struct mfc *rt; 276 register u_long hash; 277 278 hash = nethash_fc(origin, mcastgrp); 279 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 280 rt = mtod(mb_rt, struct mfc *); 281 if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && 282 (mcastgrp == rt->mfc_mcastgrp.s_addr) && 283 (mb_rt->m_act == NULL)) 284 return (rt); 285 } 286 mrtstat.mrts_mfc_misses++; 287 return NULL; 288} 289 290/* 291 * Macros to compute elapsed time efficiently 292 * Borrowed from Van Jacobson's scheduling code 293 */ 294#define TV_DELTA(a, b, delta) { \ 295 register int xxs; \ 296 \ 297 delta = (a).tv_usec - (b).tv_usec; \ 298 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 299 switch (xxs) { \ 300 case 2: \ 301 delta += 1000000; \ 302 /* fall through */ \ 303 case 1: \ 304 delta += 1000000; \ 305 break; \ 306 default: \ 307 delta += (1000000 * xxs); \ 308 } \ 309 } \ 310} 311 312#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 313 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 314 315/* 316 * Handle DVMRP setsockopt commands to modify the multicast routing tables. 317 */ 318int 319X_ip_mrouter_cmd(cmd, so, m) 320 int cmd; 321 struct socket *so; 322 struct mbuf *m; 323{ 324 if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; 325 326 switch (cmd) { 327 case DVMRP_INIT: return ip_mrouter_init(so); 328 case DVMRP_DONE: return ip_mrouter_done(); 329 case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); 330 case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); 331 case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); 332 case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); 333 default: return EOPNOTSUPP; 334 } 335} 336 337#ifndef MROUTE_LKM 338int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd; 339#endif 340 341/* 342 * Handle ioctl commands to obtain information from the cache 343 */ 344int 345X_mrt_ioctl(cmd, data) 346 int cmd; 347 caddr_t data; 348{ 349 int error = 0; 350 351 switch (cmd) { 352 case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ 353 return (get_vifs(data)); 354 break; 355 case (SIOCGETVIFCNT): 356 return (get_vif_cnt((struct sioc_vif_req *)data)); 357 break; 358 case (SIOCGETSGCNT): 359 return (get_sg_cnt((struct sioc_sg_req *)data)); 360 break; 361 default: 362 return (EINVAL); 363 break; 364 } 365 return error; 366} 367 368#ifndef MROUTE_LKM 369int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl; 370#endif 371 372/* 373 * returns the packet count for the source group provided 374 */ 375int 376get_sg_cnt(req) 377 register struct sioc_sg_req *req; 378{ 379 register struct mfc *rt; 380 int s; 381 382 s = splnet(); 383 MFCFIND(req->src.s_addr, req->grp.s_addr, rt); 384 splx(s); 385 if (rt != NULL) 386 req->count = rt->mfc_pkt_cnt; 387 else 388 req->count = 0xffffffff; 389 390 return 0; 391} 392 393/* 394 * returns the input and output packet counts on the interface provided 395 */ 396int 397get_vif_cnt(req) 398 register struct sioc_vif_req *req; 399{ 400 register vifi_t vifi = req->vifi; 401 402 req->icount = viftable[vifi].v_pkt_in; 403 req->ocount = viftable[vifi].v_pkt_out; 404 405 return 0; 406} 407 408int 409get_vifs(data) 410 char *data; 411{ 412 struct vif_conf *vifc = (struct vif_conf *)data; 413 struct vif_req *vifrp, vifr; 414 int space, error=0; 415 416 vifi_t vifi; 417 int s; 418 419 space = vifc->vifc_len; 420 vifrp = vifc->vifc_req; 421 422 s = splnet(); 423 vifc->vifc_num=numvifs; 424 425 for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { 426 if (viftable[vifi].v_lcl_addr.s_addr != 0) { 427 vifr.v_flags=viftable[vifi].v_flags; 428 vifr.v_threshold=viftable[vifi].v_threshold; 429 vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; 430 vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; 431 strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); 432 if ((space -= sizeof(vifr)) < 0) { 433 splx(s); 434 return(ENOSPC); 435 } 436 error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); 437 if (error) { 438 splx(s); 439 return(error); 440 } 441 } 442 } 443 splx(s); 444 return 0; 445} 446/* 447 * Enable multicast routing 448 */ 449static int 450ip_mrouter_init(so) 451 struct socket *so; 452{ 453 if (so->so_type != SOCK_RAW || 454 so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; 455 456 if (ip_mrouter != NULL) return EADDRINUSE; 457 458 ip_mrouter = so; 459 460 if (mrtdebug) 461 log(LOG_DEBUG, "ip_mrouter_init\n"); 462 463 return 0; 464} 465 466/* 467 * Disable multicast routing 468 */ 469int 470X_ip_mrouter_done() 471{ 472 vifi_t vifi; 473 int i; 474 struct ifnet *ifp; 475 struct ifreq ifr; 476 struct mbuf *mb_rt; 477 struct mbuf *m; 478 struct rtdetq *rte; 479 int s; 480 481 s = splnet(); 482 483 /* 484 * For each phyint in use, disable promiscuous reception of all IP 485 * multicasts. 486 */ 487 for (vifi = 0; vifi < numvifs; vifi++) { 488 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 489 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 490 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 491 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr 492 = INADDR_ANY; 493 ifp = viftable[vifi].v_ifp; 494 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 495 } 496 } 497 bzero((caddr_t)qtable, sizeof(qtable)); 498 bzero((caddr_t)tbftable, sizeof(tbftable)); 499 bzero((caddr_t)viftable, sizeof(viftable)); 500 numvifs = 0; 501 502 /* 503 * Check if any outstanding timeouts remain 504 */ 505 if (timeout_val != 0) 506 for (i = 0; i < MFCTBLSIZ; i++) { 507 mb_rt = mfctable[i]; 508 while (mb_rt) { 509 if ( mb_rt->m_act != NULL) { 510 untimeout(cleanup_cache, (caddr_t)mb_rt); 511 while (mb_rt->m_act) { 512 m = mb_rt->m_act; 513 mb_rt->m_act = m->m_act; 514 rte = mtod(m, struct rtdetq *); 515 m_freem(rte->m); 516 m_free(m); 517 } 518 timeout_val--; 519 } 520 mb_rt = mb_rt->m_next; 521 } 522 if (timeout_val == 0) 523 break; 524 } 525 526 /* 527 * Free all multicast forwarding cache entries. 528 */ 529 for (i = 0; i < MFCTBLSIZ; i++) 530 m_freem(mfctable[i]); 531 532 bzero((caddr_t)mfctable, sizeof(mfctable)); 533 bzero((caddr_t)mfchash, sizeof(mfchash)); 534 535 /* 536 * Reset de-encapsulation cache 537 */ 538 last_encap_src = NULL; 539 last_encap_vif = NULL; 540 541 ip_mrouter = NULL; 542 543 splx(s); 544 545 if (mrtdebug) 546 log(LOG_DEBUG, "ip_mrouter_done\n"); 547 548 return 0; 549} 550 551#ifndef MROUTE_LKM 552int (*ip_mrouter_done)(void) = X_ip_mrouter_done; 553#endif 554 555/* 556 * Add a vif to the vif table 557 */ 558static int 559add_vif(vifcp) 560 register struct vifctl *vifcp; 561{ 562 register struct vif *vifp = viftable + vifcp->vifc_vifi; 563 static struct sockaddr_in sin = {sizeof sin, AF_INET}; 564 struct ifaddr *ifa; 565 struct ifnet *ifp; 566 struct ifreq ifr; 567 int error, s; 568 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 569 570 if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; 571 if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; 572 573 /* Find the interface with an address in AF_INET family */ 574 sin.sin_addr = vifcp->vifc_lcl_addr; 575 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 576 if (ifa == 0) return EADDRNOTAVAIL; 577 ifp = ifa->ifa_ifp; 578 579 if (vifcp->vifc_flags & VIFF_TUNNEL) { 580 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 581 if (encap_oldrawip == 0) { 582 extern struct protosw inetsw[]; 583 register u_char pr = ip_protox[ENCAP_PROTO]; 584 585 encap_oldrawip = inetsw[pr].pr_input; 586 inetsw[pr].pr_input = multiencap_decap; 587 for (s = 0; s < MAXVIFS; ++s) { 588 multicast_decap_if[s].if_name = "mdecap"; 589 multicast_decap_if[s].if_unit = s; 590 } 591 } 592 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 593 } else { 594 ifp = 0; 595 } 596 } else { 597 /* Make sure the interface supports multicast */ 598 if ((ifp->if_flags & IFF_MULTICAST) == 0) 599 return EOPNOTSUPP; 600 601 /* Enable promiscuous reception of all IP multicasts from the if */ 602 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 603 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 604 s = splnet(); 605 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 606 splx(s); 607 if (error) 608 return error; 609 } 610 611 s = splnet(); 612 /* define parameters for the tbf structure */ 613 vifp->v_tbf = v_tbf; 614 vifp->v_tbf->q_len = 0; 615 vifp->v_tbf->n_tok = 0; 616 vifp->v_tbf->last_pkt_t = 0; 617 618 vifp->v_flags = vifcp->vifc_flags; 619 vifp->v_threshold = vifcp->vifc_threshold; 620 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 621 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 622 vifp->v_ifp = ifp; 623 vifp->v_rate_limit= vifcp->vifc_rate_limit; 624 /* initialize per vif pkt counters */ 625 vifp->v_pkt_in = 0; 626 vifp->v_pkt_out = 0; 627 splx(s); 628 629 /* Adjust numvifs up if the vifi is higher than numvifs */ 630 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 631 632 if (mrtdebug) 633 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n", 634 vifcp->vifc_vifi, 635 ntohl(vifcp->vifc_lcl_addr.s_addr), 636 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 637 ntohl(vifcp->vifc_rmt_addr.s_addr), 638 vifcp->vifc_threshold, 639 vifcp->vifc_rate_limit); 640 641 return 0; 642} 643 644/* 645 * Delete a vif from the vif table 646 */ 647static int 648del_vif(vifip) 649 vifi_t *vifip; 650{ 651 register struct vif *vifp = viftable + *vifip; 652 register vifi_t vifi; 653 struct ifnet *ifp; 654 struct ifreq ifr; 655 int s; 656 657 if (*vifip >= numvifs) return EINVAL; 658 if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; 659 660 s = splnet(); 661 662 if (!(vifp->v_flags & VIFF_TUNNEL)) { 663 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 664 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 665 ifp = vifp->v_ifp; 666 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 667 } 668 669 if (vifp == last_encap_vif) { 670 last_encap_vif = 0; 671 last_encap_src = 0; 672 } 673 674 bzero((caddr_t)qtable[*vifip], 675 sizeof(qtable[*vifip])); 676 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 677 bzero((caddr_t)vifp, sizeof (*vifp)); 678 679 /* Adjust numvifs down */ 680 for (vifi = numvifs; vifi > 0; vifi--) 681 if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; 682 numvifs = vifi; 683 684 splx(s); 685 686 if (mrtdebug) 687 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs); 688 689 return 0; 690} 691 692/* 693 * Add an mfc entry 694 */ 695static int 696add_mfc(mfccp) 697 struct mfcctl *mfccp; 698{ 699 struct mfc *rt; 700 struct mfc *rt1 = 0; 701 register struct mbuf *mb_rt; 702 struct mbuf *prev_mb_rt; 703 u_long hash; 704 struct mbuf *mb_ntry; 705 struct rtdetq *rte; 706 register u_short nstl; 707 int s; 708 int i; 709 710 rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 711 712 /* If an entry already exists, just update the fields */ 713 if (rt) { 714 if (mrtdebug) 715 log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x\n", 716 ntohl(mfccp->mfcc_origin.s_addr), 717 ntohl(mfccp->mfcc_mcastgrp.s_addr), 718 ntohl(mfccp->mfcc_originmask.s_addr), 719 mfccp->mfcc_parent); 720 721 s = splnet(); 722 rt->mfc_parent = mfccp->mfcc_parent; 723 for (i = 0; i < numvifs; i++) 724 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 725 splx(s); 726 return 0; 727 } 728 729 /* 730 * Find the entry for which the upcall was made and update 731 */ 732 s = splnet(); 733 hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 734 for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; 735 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 736 737 rt = mtod(mb_rt, struct mfc *); 738 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 739 == mfccp->mfcc_origin.s_addr) && 740 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 741 (mb_rt->m_act != NULL)) { 742 743 if (!nstl++) { 744 if (mrtdebug) 745 log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x\n", 746 ntohl(mfccp->mfcc_origin.s_addr), 747 ntohl(mfccp->mfcc_mcastgrp.s_addr), 748 ntohl(mfccp->mfcc_originmask.s_addr), 749 mfccp->mfcc_parent, mb_rt->m_act); 750 751 rt->mfc_origin = mfccp->mfcc_origin; 752 rt->mfc_originmask = mfccp->mfcc_originmask; 753 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 754 rt->mfc_parent = mfccp->mfcc_parent; 755 for (i = 0; i < numvifs; i++) 756 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 757 /* initialize pkt counters per src-grp */ 758 rt->mfc_pkt_cnt = 0; 759 rt1 = rt; 760 } 761 762 /* prevent cleanup of cache entry */ 763 untimeout(cleanup_cache, (caddr_t)mb_rt); 764 timeout_val--; 765 766 /* free packets Qed at the end of this entry */ 767 while (mb_rt->m_act) { 768 mb_ntry = mb_rt->m_act; 769 rte = mtod(mb_ntry, struct rtdetq *); 770 ip_mdq(rte->m, rte->ifp, rte->tunnel_src, 771 rt1, rte->imo); 772 mb_rt->m_act = mb_ntry->m_act; 773 m_freem(rte->m); 774 m_free(mb_ntry); 775 } 776 777 /* 778 * If more than one entry was created for a single upcall 779 * delete that entry 780 */ 781 if (nstl > 1) { 782 MFREE(mb_rt, prev_mb_rt->m_next); 783 mb_rt = prev_mb_rt; 784 } 785 } 786 } 787 788 /* 789 * It is possible that an entry is being inserted without an upcall 790 */ 791 if (nstl == 0) { 792 if (mrtdebug) 793 log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x\n", 794 hash, ntohl(mfccp->mfcc_origin.s_addr), 795 ntohl(mfccp->mfcc_mcastgrp.s_addr), 796 ntohl(mfccp->mfcc_originmask.s_addr), 797 mfccp->mfcc_parent); 798 799 for (prev_mb_rt = mb_rt = mfctable[hash]; 800 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 801 802 rt = mtod(mb_rt, struct mfc *); 803 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 804 == mfccp->mfcc_origin.s_addr) && 805 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 806 807 rt->mfc_origin = mfccp->mfcc_origin; 808 rt->mfc_originmask = mfccp->mfcc_originmask; 809 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 810 rt->mfc_parent = mfccp->mfcc_parent; 811 for (i = 0; i < numvifs; i++) 812 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 813 /* initialize pkt counters per src-grp */ 814 rt->mfc_pkt_cnt = 0; 815 } 816 } 817 if (mb_rt == NULL) { 818 /* no upcall, so make a new entry */ 819 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 820 if (mb_rt == NULL) { 821 splx(s); 822 return ENOBUFS; 823 } 824 825 rt = mtod(mb_rt, struct mfc *); 826 827 /* insert new entry at head of hash chain */ 828 rt->mfc_origin = mfccp->mfcc_origin; 829 rt->mfc_originmask = mfccp->mfcc_originmask; 830 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 831 rt->mfc_parent = mfccp->mfcc_parent; 832 for (i = 0; i < numvifs; i++) 833 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 834 /* initialize pkt counters per src-grp */ 835 rt->mfc_pkt_cnt = 0; 836 837 /* link into table */ 838 mb_rt->m_next = mfctable[hash]; 839 mfctable[hash] = mb_rt; 840 mb_rt->m_act = NULL; 841 } 842 } 843 splx(s); 844 return 0; 845} 846 847/* 848 * Delete an mfc entry 849 */ 850static int 851del_mfc(mfccp) 852 struct delmfcctl *mfccp; 853{ 854 struct in_addr origin; 855 struct in_addr mcastgrp; 856 struct mfc *rt; 857 struct mbuf *mb_rt; 858 struct mbuf *prev_mb_rt; 859 u_long hash; 860 struct mfc **cmfc; 861 struct mfc **cmfcend; 862 int s; 863 864 origin = mfccp->mfcc_origin; 865 mcastgrp = mfccp->mfcc_mcastgrp; 866 hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); 867 868 if (mrtdebug) 869 log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x\n", 870 ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); 871 872 for (prev_mb_rt = mb_rt = mfctable[hash] 873 ; mb_rt 874 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 875 rt = mtod(mb_rt, struct mfc *); 876 if (origin.s_addr == rt->mfc_origin.s_addr && 877 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 878 mb_rt->m_act == NULL) 879 break; 880 } 881 if (mb_rt == NULL) { 882 return ESRCH; 883 } 884 885 s = splnet(); 886 887 cmfc = mfchash; 888 cmfcend = cmfc + MFCHASHSIZ; 889 for ( ; cmfc < cmfcend; ++cmfc) 890 if (*cmfc == rt) 891 *cmfc = 0; 892 893 if (prev_mb_rt != mb_rt) { /* if moved past head of list */ 894 MFREE(mb_rt, prev_mb_rt->m_next); 895 } else /* delete head of list, it is in the table */ 896 mfctable[hash] = m_free(mb_rt); 897 898 splx(s); 899 900 return 0; 901} 902 903/* 904 * IP multicast forwarding function. This function assumes that the packet 905 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 906 * pointed to by "ifp", and the packet is to be relayed to other networks 907 * that have members of the packet's destination IP multicast group. 908 * 909 * The packet is returned unscathed to the caller, unless it is tunneled 910 * or erroneous, in which case a non-zero return value tells the caller to 911 * discard it. 912 */ 913 914#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 915#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 916 917int 918X_ip_mforward(ip, ifp, m, imo) 919 register struct ip *ip; 920 struct ifnet *ifp; 921 struct mbuf *m; 922 struct ip_moptions *imo; 923{ 924 register struct mfc *rt; 925 register u_char *ipoptions; 926 u_long tunnel_src; 927 static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; 928 static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 929 static struct sockaddr_in k_igmpdst = { sizeof k_igmpdst, AF_INET }; 930 register struct mbuf *mm; 931 register struct ip *k_data; 932 int s; 933 934 if (mrtdebug > 1) 935 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x (%s%d)\n", 936 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp, 937 ifp->if_name, ifp->if_unit); 938 939 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 940 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 941 /* 942 * Packet arrived via a physical interface. 943 */ 944 tunnel_src = 0; 945 } else { 946 /* 947 * Packet arrived through a source-route tunnel. 948 * 949 * A source-route tunneled packet has a single NOP option and a 950 * two-element 951 * loose-source-and-record-route (LSRR) option immediately following 952 * the fixed-size part of the IP header. At this point in processing, 953 * the IP header should contain the following IP addresses: 954 * 955 * original source - in the source address field 956 * destination group - in the destination address field 957 * remote tunnel end-point - in the first element of LSRR 958 * one of this host's addrs - in the second element of LSRR 959 * 960 * NOTE: RFC-1075 would have the original source and remote tunnel 961 * end-point addresses swapped. However, that could cause 962 * delivery of ICMP error messages to innocent applications 963 * on intermediate routing hosts! Therefore, we hereby 964 * change the spec. 965 */ 966 967 /* 968 * Verify that the tunnel options are well-formed. 969 */ 970 if (ipoptions[0] != IPOPT_NOP || 971 ipoptions[2] != 11 || /* LSRR option length */ 972 ipoptions[3] != 12 || /* LSRR address pointer */ 973 (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { 974 mrtstat.mrts_bad_tunnel++; 975 if (mrtdebug) 976 log(LOG_DEBUG, 977 "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)\n", 978 ntohl(ip->ip_src.s_addr), 979 ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], 980 *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); 981 return 1; 982 } 983 984 /* 985 * Delete the tunnel options from the packet. 986 */ 987 ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, 988 (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); 989 m->m_len -= TUNNEL_LEN; 990 ip->ip_len -= TUNNEL_LEN; 991 ip->ip_hl -= TUNNEL_LEN >> 2; 992 993 ifp = 0; 994 } 995 996 /* 997 * Don't forward a packet with time-to-live of zero or one, 998 * or a packet destined to a local-only group. 999 */ 1000 if (ip->ip_ttl <= 1 || 1001 ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1002 return (int)tunnel_src; 1003 1004 /* 1005 * Determine forwarding vifs from the forwarding cache table 1006 */ 1007 s = splnet(); 1008 MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); 1009 1010 /* Entry exists, so forward if necessary */ 1011 if (rt != NULL) { 1012 splx(s); 1013 return (ip_mdq(m, ifp, tunnel_src, rt, imo)); 1014 } 1015 1016 else { 1017 /* 1018 * If we don't have a route for packet's origin, 1019 * Make a copy of the packet & 1020 * send message to routing daemon 1021 */ 1022 1023 register struct mbuf *mb_rt; 1024 register struct mbuf *mb_ntry; 1025 register struct mbuf *mb0; 1026 register struct rtdetq *rte; 1027 register struct mbuf *rte_m; 1028 register u_long hash; 1029 1030 mrtstat.mrts_no_route++; 1031 if (mrtdebug) 1032 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n", 1033 ntohl(ip->ip_src.s_addr), 1034 ntohl(ip->ip_dst.s_addr)); 1035 1036 /* is there an upcall waiting for this packet? */ 1037 hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1038 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 1039 rt = mtod(mb_rt, struct mfc *); 1040 if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == 1041 rt->mfc_origin.s_addr) && 1042 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1043 (mb_rt->m_act != NULL)) 1044 break; 1045 } 1046 1047 if (mb_rt == NULL) { 1048 /* no upcall, so make a new entry */ 1049 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 1050 if (mb_rt == NULL) { 1051 splx(s); 1052 return ENOBUFS; 1053 } 1054 1055 rt = mtod(mb_rt, struct mfc *); 1056 1057 /* insert new entry at head of hash chain */ 1058 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1059 rt->mfc_originmask.s_addr = (u_long)0xffffffff; 1060 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1061 1062 /* link into table */ 1063 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1064 mb_rt->m_next = mfctable[hash]; 1065 mfctable[hash] = mb_rt; 1066 mb_rt->m_act = NULL; 1067 1068 } 1069 1070 /* determine if q has overflowed */ 1071 for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) 1072 hash++; 1073 1074 if (hash > MAX_UPQ) { 1075 mrtstat.mrts_upq_ovflw++; 1076 splx(s); 1077 return 0; 1078 } 1079 1080 /* add this packet and timing, ifp info to m_act */ 1081 MGET(mb_ntry, M_DONTWAIT, MT_DATA); 1082 if (mb_ntry == NULL) { 1083 splx(s); 1084 return ENOBUFS; 1085 } 1086 1087 mb_ntry->m_act = NULL; 1088 rte = mtod(mb_ntry, struct rtdetq *); 1089 1090 mb0 = m_copy(m, 0, M_COPYALL); 1091 if (mb0 == NULL) { 1092 splx(s); 1093 return ENOBUFS; 1094 } 1095 1096 rte->m = mb0; 1097 rte->ifp = ifp; 1098 rte->tunnel_src = tunnel_src; 1099 rte->imo = imo; 1100 1101 rte_m->m_act = mb_ntry; 1102 1103 splx(s); 1104 1105 if (hash == 0) { 1106 /* 1107 * Send message to routing daemon to install 1108 * a route into the kernel table 1109 */ 1110 k_igmpsrc.sin_addr = ip->ip_src; 1111 k_igmpdst.sin_addr = ip->ip_dst; 1112 1113 mm = m_copy(m, 0, M_COPYALL); 1114 if (mm == NULL) { 1115 splx(s); 1116 return ENOBUFS; 1117 } 1118 1119 k_data = mtod(mm, struct ip *); 1120 k_data->ip_p = 0; 1121 1122 mrtstat.mrts_upcalls++; 1123 1124 rip_ip_input(mm, ip_mrouter, (struct sockaddr *)&k_igmpsrc); 1125 1126 /* set timer to cleanup entry if upcall is lost */ 1127 timeout(cleanup_cache, (caddr_t)mb_rt, 100); 1128 timeout_val++; 1129 } 1130 1131 return 0; 1132 } 1133} 1134 1135#ifndef MROUTE_LKM 1136int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 1137 struct ip_moptions *) = X_ip_mforward; 1138#endif 1139 1140/* 1141 * Clean up the cache entry if upcall is not serviced 1142 */ 1143static void 1144cleanup_cache(xmb_rt) 1145 void *xmb_rt; 1146{ 1147 struct mbuf *mb_rt = xmb_rt; 1148 struct mfc *rt; 1149 u_long hash; 1150 struct mbuf *prev_m0; 1151 struct mbuf *m0; 1152 struct mbuf *m; 1153 struct rtdetq *rte; 1154 int s; 1155 1156 rt = mtod(mb_rt, struct mfc *); 1157 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1158 1159 if (mrtdebug) 1160 log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x\n", 1161 ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), 1162 ntohl(rt->mfc_mcastgrp.s_addr)); 1163 1164 mrtstat.mrts_cache_cleanups++; 1165 1166 /* 1167 * determine entry to be cleaned up in cache table 1168 */ 1169 s = splnet(); 1170 for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) 1171 if (m0 == mb_rt) 1172 break; 1173 1174 /* 1175 * drop all the packets 1176 * free the mbuf with the pkt, if, timing info 1177 */ 1178 while (mb_rt->m_act) { 1179 m = mb_rt->m_act; 1180 mb_rt->m_act = m->m_act; 1181 1182 rte = mtod(m, struct rtdetq *); 1183 m_freem(rte->m); 1184 m_free(m); 1185 } 1186 1187 /* 1188 * Delete the entry from the cache 1189 */ 1190 if (prev_m0 != m0) { /* if moved past head of list */ 1191 MFREE(m0, prev_m0->m_next); 1192 } else /* delete head of list, it is in the table */ 1193 mfctable[hash] = m_free(m0); 1194 1195 timeout_val--; 1196 splx(s); 1197} 1198 1199/* 1200 * Packet forwarding routine once entry in the cache is made 1201 */ 1202static int 1203ip_mdq(m, ifp, tunnel_src, rt, imo) 1204 register struct mbuf *m; 1205 register struct ifnet *ifp; 1206 register u_long tunnel_src; 1207 register struct mfc *rt; 1208 register struct ip_moptions *imo; 1209{ 1210 register struct ip *ip = mtod(m, struct ip *); 1211 register vifi_t vifi; 1212 register struct vif *vifp; 1213 1214 /* 1215 * Don't forward if it didn't arrive from the parent vif for its origin. 1216 * Notes: v_ifp is zero for src route tunnels, multicast_decap_if 1217 * for encapsulated tunnels and a real ifnet for non-tunnels so 1218 * the first part of the if catches wrong physical interface or 1219 * tunnel type; v_rmt_addr is zero for non-tunneled packets so 1220 * the 2nd part catches both packets that arrive via a tunnel 1221 * that shouldn't and packets that arrive via the wrong tunnel. 1222 */ 1223 vifi = rt->mfc_parent; 1224 if (viftable[vifi].v_ifp != ifp || 1225 (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { 1226 /* came in the wrong interface */ 1227 if (mrtdebug) 1228 log(LOG_DEBUG, "wrong if: ifp %x vifi %d\n", 1229 ifp, vifi); 1230 ++mrtstat.mrts_wrong_if; 1231 return (int)tunnel_src; 1232 } 1233 1234 /* increment the interface and s-g counters */ 1235 viftable[vifi].v_pkt_in++; 1236 rt->mfc_pkt_cnt++; 1237 1238 /* 1239 * For each vif, decide if a copy of the packet should be forwarded. 1240 * Forward if: 1241 * - the ttl exceeds the vif's threshold 1242 * - there are group members downstream on interface 1243 */ 1244#define MC_SEND(ip,vifp,m) { \ 1245 (vifp)->v_pkt_out++; \ 1246 if ((vifp)->v_flags & VIFF_SRCRT) \ 1247 srcrt_send((ip), (vifp), (m)); \ 1248 else if ((vifp)->v_flags & VIFF_TUNNEL) \ 1249 encap_send((ip), (vifp), (m)); \ 1250 else \ 1251 phyint_send((ip), (vifp), (m)); \ 1252 } 1253 1254/* If no options or the imo_multicast_vif option is 0, don't do this part 1255 */ 1256 if ((imo != NULL) && 1257 (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) 1258 { 1259 MC_SEND(ip,viftable+vifi,m); 1260 return (1); /* make sure we are done: No more physical sends */ 1261 } 1262 1263 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1264 if ((rt->mfc_ttls[vifi] > 0) && 1265 (ip->ip_ttl > rt->mfc_ttls[vifi])) 1266 MC_SEND(ip, vifp, m); 1267 1268 return 0; 1269} 1270 1271/* check if a vif number is legal/ok. This is used by ip_output, to export 1272 * numvifs there, 1273 */ 1274int 1275X_legal_vif_num(vif) 1276 int vif; 1277{ if (vif>=0 && vif<=numvifs) 1278 return(1); 1279 else 1280 return(0); 1281} 1282 1283#ifndef MROUTE_LKM 1284int (*legal_vif_num)(int) = X_legal_vif_num; 1285#endif 1286 1287static void 1288phyint_send(ip, vifp, m) 1289 struct ip *ip; 1290 struct vif *vifp; 1291 struct mbuf *m; 1292{ 1293 register struct mbuf *mb_copy; 1294 int hlen = ip->ip_hl << 2; 1295 register struct ip_moptions *imo; 1296 1297 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1298 return; 1299 1300 /* 1301 * Make sure the header isn't in an cluster, because the sharing 1302 * in clusters defeats the whole purpose of making the copy above. 1303 */ 1304 mb_copy = m_pullup(mb_copy, hlen); 1305 if (mb_copy == NULL) 1306 return; 1307 1308 MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); 1309 if (imo == NULL) { 1310 m_freem(mb_copy); 1311 return; 1312 } 1313 1314 imo->imo_multicast_ifp = vifp->v_ifp; 1315 imo->imo_multicast_ttl = ip->ip_ttl - 1; 1316 imo->imo_multicast_loop = 1; 1317 1318 if (vifp->v_rate_limit <= 0) 1319 tbf_send_packet(vifp, mb_copy, imo); 1320 else 1321 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, 1322 imo); 1323} 1324 1325static void 1326srcrt_send(ip, vifp, m) 1327 struct ip *ip; 1328 struct vif *vifp; 1329 struct mbuf *m; 1330{ 1331 struct mbuf *mb_copy, *mb_opts; 1332 int hlen = ip->ip_hl << 2; 1333 register struct ip *ip_copy; 1334 u_char *cp; 1335 1336 /* 1337 * Make sure that adding the tunnel options won't exceed the 1338 * maximum allowed number of option bytes. 1339 */ 1340 if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { 1341 mrtstat.mrts_cant_tunnel++; 1342 if (mrtdebug) 1343 log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u\n", 1344 ntohl(ip->ip_src.s_addr)); 1345 return; 1346 } 1347 1348 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1349 return; 1350 1351 MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER); 1352 if (mb_opts == NULL) { 1353 m_freem(mb_copy); 1354 return; 1355 } 1356 /* 1357 * 'Delete' the base ip header from the mb_copy chain 1358 */ 1359 mb_copy->m_len -= hlen; 1360 mb_copy->m_data += hlen; 1361 /* 1362 * Make mb_opts be the new head of the packet chain. 1363 * Any options of the packet were left in the old packet chain head 1364 */ 1365 mb_opts->m_next = mb_copy; 1366 mb_opts->m_len = hlen + TUNNEL_LEN; 1367 mb_opts->m_data += MSIZE - mb_opts->m_len; 1368 mb_opts->m_pkthdr.len = mb_copy->m_pkthdr.len + TUNNEL_LEN; 1369 /* 1370 * Copy the base ip header from the mb_copy chain to the new head mbuf 1371 */ 1372 ip_copy = mtod(mb_opts, struct ip *); 1373 bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), hlen); 1374 ip_copy->ip_ttl--; 1375 ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ 1376 /* 1377 * Adjust the ip header length to account for the tunnel options. 1378 */ 1379 ip_copy->ip_hl += TUNNEL_LEN >> 2; 1380 ip_copy->ip_len += TUNNEL_LEN; 1381 /* 1382 * Add the NOP and LSRR after the base ip header 1383 */ 1384 cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; 1385 *cp++ = IPOPT_NOP; 1386 *cp++ = IPOPT_LSRR; 1387 *cp++ = 11; /* LSRR option length */ 1388 *cp++ = 8; /* LSSR pointer to second element */ 1389 *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ 1390 cp += 4; 1391 *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ 1392 1393 if (vifp->v_rate_limit <= 0) 1394 tbf_send_packet(vifp, mb_opts, 0); 1395 else 1396 tbf_control(vifp, mb_opts, 1397 mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); 1398} 1399 1400static void 1401encap_send(ip, vifp, m) 1402 register struct ip *ip; 1403 register struct vif *vifp; 1404 register struct mbuf *m; 1405{ 1406 register struct mbuf *mb_copy; 1407 register struct ip *ip_copy; 1408 int hlen = ip->ip_hl << 2; 1409 register int i, len = ip->ip_len; 1410 1411 /* 1412 * copy the old packet & pullup it's IP header into the 1413 * new mbuf so we can modify it. Try to fill the new 1414 * mbuf since if we don't the ethernet driver will. 1415 */ 1416 MGET(mb_copy, M_DONTWAIT, MT_DATA); 1417 if (mb_copy == NULL) 1418 return; 1419 mb_copy->m_data += 16; 1420 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1421 1422 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 1423 m_freem(mb_copy); 1424 return; 1425 } 1426 i = MHLEN - M_LEADINGSPACE(mb_copy); 1427 if (i > len) 1428 i = len; 1429 mb_copy = m_pullup(mb_copy, i); 1430 if (mb_copy == NULL) 1431 return; 1432 mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); 1433 1434 /* 1435 * fill in the encapsulating IP header. 1436 */ 1437 ip_copy = mtod(mb_copy, struct ip *); 1438 *ip_copy = multicast_encap_iphdr; 1439 ip_copy->ip_id = htons(ip_id++); 1440 ip_copy->ip_len += len; 1441 ip_copy->ip_src = vifp->v_lcl_addr; 1442 ip_copy->ip_dst = vifp->v_rmt_addr; 1443 1444 /* 1445 * turn the encapsulated IP header back into a valid one. 1446 */ 1447 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1448 --ip->ip_ttl; 1449 HTONS(ip->ip_len); 1450 HTONS(ip->ip_off); 1451 ip->ip_sum = 0; 1452#if defined(LBL) && !defined(ultrix) 1453 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); 1454#else 1455 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1456 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1457 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1458#endif 1459 1460 if (vifp->v_rate_limit <= 0) 1461 tbf_send_packet(vifp, mb_copy, 0); 1462 else 1463 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); 1464} 1465 1466/* 1467 * De-encapsulate a packet and feed it back through ip input (this 1468 * routine is called whenever IP gets a packet with proto type 1469 * ENCAP_PROTO and a local destination address). 1470 */ 1471void 1472#ifdef MROUTE_LKM 1473X_multiencap_decap(m) 1474#else 1475multiencap_decap(m) 1476#endif 1477 register struct mbuf *m; 1478{ 1479 struct ifnet *ifp = m->m_pkthdr.rcvif; 1480 register struct ip *ip = mtod(m, struct ip *); 1481 register int hlen = ip->ip_hl << 2; 1482 register int s; 1483 register struct ifqueue *ifq; 1484 register struct vif *vifp; 1485 1486 if (ip->ip_p != ENCAP_PROTO) { 1487 rip_input(m); 1488 return; 1489 } 1490 /* 1491 * dump the packet if it's not to a multicast destination or if 1492 * we don't have an encapsulating tunnel with the source. 1493 * Note: This code assumes that the remote site IP address 1494 * uniquely identifies the tunnel (i.e., that this site has 1495 * at most one tunnel with the remote site). 1496 */ 1497 if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { 1498 ++mrtstat.mrts_bad_tunnel; 1499 m_freem(m); 1500 return; 1501 } 1502 if (ip->ip_src.s_addr != last_encap_src) { 1503 register struct vif *vife; 1504 1505 vifp = viftable; 1506 vife = vifp + numvifs; 1507 last_encap_src = ip->ip_src.s_addr; 1508 last_encap_vif = 0; 1509 for ( ; vifp < vife; ++vifp) 1510 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1511 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1512 == VIFF_TUNNEL) 1513 last_encap_vif = vifp; 1514 break; 1515 } 1516 } 1517 if ((vifp = last_encap_vif) == 0) { 1518 last_encap_src = 0; 1519 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1520 m_freem(m); 1521 if (mrtdebug) 1522 log(LOG_DEBUG, "ip_mforward: no tunnel with %x\n", 1523 ntohl(ip->ip_src.s_addr)); 1524 return; 1525 } 1526 ifp = vifp->v_ifp; 1527 1528 if (hlen > IP_HDR_LEN) 1529 ip_stripoptions(m, (struct mbuf *) 0); 1530 m->m_data += IP_HDR_LEN; 1531 m->m_len -= IP_HDR_LEN; 1532 m->m_pkthdr.len -= IP_HDR_LEN; 1533 m->m_pkthdr.rcvif = ifp; 1534 1535 ifq = &ipintrq; 1536 s = splimp(); 1537 if (IF_QFULL(ifq)) { 1538 IF_DROP(ifq); 1539 m_freem(m); 1540 } else { 1541 IF_ENQUEUE(ifq, m); 1542 /* 1543 * normally we would need a "schednetisr(NETISR_IP)" 1544 * here but we were called by ip_input and it is going 1545 * to loop back & try to dequeue the packet we just 1546 * queued as soon as we return so we avoid the 1547 * unnecessary software interrrupt. 1548 */ 1549 } 1550 splx(s); 1551} 1552 1553/* 1554 * Token bucket filter module 1555 */ 1556void 1557tbf_control(vifp, m, ip, p_len, imo) 1558 register struct vif *vifp; 1559 register struct mbuf *m; 1560 register struct ip *ip; 1561 register u_long p_len; 1562 struct ip_moptions *imo; 1563{ 1564 tbf_update_tokens(vifp); 1565 1566 /* if there are enough tokens, 1567 * and the queue is empty, 1568 * send this packet out 1569 */ 1570 1571 if (vifp->v_tbf->q_len == 0) { 1572 if (p_len <= vifp->v_tbf->n_tok) { 1573 vifp->v_tbf->n_tok -= p_len; 1574 tbf_send_packet(vifp, m, imo); 1575 } else if (p_len > MAX_BKT_SIZE) { 1576 /* drop if packet is too large */ 1577 mrtstat.mrts_pkt2large++; 1578 m_freem(m); 1579 return; 1580 } else { 1581 /* queue packet and timeout till later */ 1582 tbf_queue(vifp, m, ip, imo); 1583 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1584 } 1585 } else if (vifp->v_tbf->q_len < MAXQSIZE) { 1586 /* finite queue length, so queue pkts and process queue */ 1587 tbf_queue(vifp, m, ip, imo); 1588 tbf_process_q(vifp); 1589 } else { 1590 /* queue length too much, try to dq and queue and process */ 1591 if (!tbf_dq_sel(vifp, ip)) { 1592 mrtstat.mrts_q_overflow++; 1593 m_freem(m); 1594 return; 1595 } else { 1596 tbf_queue(vifp, m, ip, imo); 1597 tbf_process_q(vifp); 1598 } 1599 } 1600 return; 1601} 1602 1603/* 1604 * adds a packet to the queue at the interface 1605 */ 1606void 1607tbf_queue(vifp, m, ip, imo) 1608 register struct vif *vifp; 1609 register struct mbuf *m; 1610 register struct ip *ip; 1611 struct ip_moptions *imo; 1612{ 1613 register u_long ql; 1614 register int index = (vifp - viftable); 1615 register int s = splnet(); 1616 1617 ql = vifp->v_tbf->q_len; 1618 1619 qtable[index][ql].pkt_m = m; 1620 qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; 1621 qtable[index][ql].pkt_ip = ip; 1622 qtable[index][ql].pkt_imo = imo; 1623 1624 vifp->v_tbf->q_len++; 1625 splx(s); 1626} 1627 1628 1629/* 1630 * processes the queue at the interface 1631 */ 1632void 1633tbf_process_q(vifp) 1634 register struct vif *vifp; 1635{ 1636 register struct pkt_queue pkt_1; 1637 register int index = (vifp - viftable); 1638 register int s = splnet(); 1639 1640 /* loop through the queue at the interface and send as many packets 1641 * as possible 1642 */ 1643 while (vifp->v_tbf->q_len > 0) { 1644 /* locate the first packet */ 1645 pkt_1.pkt_len = ((qtable[index][0]).pkt_len); 1646 pkt_1.pkt_m = (qtable[index][0]).pkt_m; 1647 pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; 1648 pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; 1649 1650 /* determine if the packet can be sent */ 1651 if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { 1652 /* if so, 1653 * reduce no of tokens, dequeue the queue, 1654 * send the packet. 1655 */ 1656 vifp->v_tbf->n_tok -= pkt_1.pkt_len; 1657 1658 tbf_dequeue(vifp, 0); 1659 1660 tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); 1661 1662 } else break; 1663 } 1664 splx(s); 1665} 1666 1667/* 1668 * removes the jth packet from the queue at the interface 1669 */ 1670void 1671tbf_dequeue(vifp,j) 1672 register struct vif *vifp; 1673 register int j; 1674{ 1675 register u_long index = vifp - viftable; 1676 register int i; 1677 1678 for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { 1679 qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; 1680 qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; 1681 qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; 1682 qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; 1683 } 1684 qtable[index][i-1].pkt_m = NULL; 1685 qtable[index][i-1].pkt_len = NULL; 1686 qtable[index][i-1].pkt_ip = NULL; 1687 qtable[index][i-1].pkt_imo = NULL; 1688 1689 vifp->v_tbf->q_len--; 1690 1691 if (tbfdebug > 1) 1692 log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d\n",vifp-viftable, i-1); 1693} 1694 1695void 1696tbf_reprocess_q(xvifp) 1697 void *xvifp; 1698{ 1699 register struct vif *vifp = xvifp; 1700 if (ip_mrouter == NULL) 1701 return; 1702 1703 tbf_update_tokens(vifp); 1704 1705 tbf_process_q(vifp); 1706 1707 if (vifp->v_tbf->q_len) 1708 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1709} 1710 1711/* function that will selectively discard a member of the queue 1712 * based on the precedence value and the priority obtained through 1713 * a lookup table - not yet implemented accurately! 1714 */ 1715int 1716tbf_dq_sel(vifp, ip) 1717 register struct vif *vifp; 1718 register struct ip *ip; 1719{ 1720 register int i; 1721 register int s = splnet(); 1722 register u_int p; 1723 1724 p = priority(vifp, ip); 1725 1726 for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { 1727 if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { 1728 m_freem(qtable[vifp-viftable][i].pkt_m); 1729 tbf_dequeue(vifp,i); 1730 splx(s); 1731 mrtstat.mrts_drop_sel++; 1732 return(1); 1733 } 1734 } 1735 splx(s); 1736 return(0); 1737} 1738 1739void 1740tbf_send_packet(vifp, m, imo) 1741 register struct vif *vifp; 1742 register struct mbuf *m; 1743 struct ip_moptions *imo; 1744{ 1745 int error; 1746 int s = splnet(); 1747 1748 /* if source route tunnels */ 1749 if (vifp->v_flags & VIFF_SRCRT) { 1750 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1751 IP_FORWARDING, imo); 1752 if (mrtdebug > 1) 1753 log(LOG_DEBUG, "srcrt_send on vif %d err %d\n", vifp-viftable, error); 1754 } else if (vifp->v_flags & VIFF_TUNNEL) { 1755 /* If tunnel options */ 1756 ip_output(m, (struct mbuf *)0, (struct route *)0, 1757 IP_FORWARDING, imo); 1758 } else { 1759 /* if physical interface option, extract the options and then send */ 1760 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1761 IP_FORWARDING, imo); 1762 FREE(imo, M_IPMOPTS); 1763 1764 if (mrtdebug > 1) 1765 log(LOG_DEBUG, "phyint_send on vif %d err %d\n", vifp-viftable, error); 1766 } 1767 splx(s); 1768} 1769 1770/* determine the current time and then 1771 * the elapsed time (between the last time and time now) 1772 * in milliseconds & update the no. of tokens in the bucket 1773 */ 1774void 1775tbf_update_tokens(vifp) 1776 register struct vif *vifp; 1777{ 1778 struct timeval tp; 1779 register u_long t; 1780 register u_long elapsed; 1781 register int s = splnet(); 1782 1783 GET_TIME(tp); 1784 1785 t = tp.tv_sec*1000 + tp.tv_usec/1000; 1786 1787 elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; 1788 vifp->v_tbf->n_tok += elapsed; 1789 vifp->v_tbf->last_pkt_t = t; 1790 1791 if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) 1792 vifp->v_tbf->n_tok = MAX_BKT_SIZE; 1793 1794 splx(s); 1795} 1796 1797static int 1798priority(vifp, ip) 1799 register struct vif *vifp; 1800 register struct ip *ip; 1801{ 1802 register u_long graddr; 1803 register int prio; 1804 1805 /* temporary hack; will add general packet classifier some day */ 1806 1807 prio = 50; /* default priority */ 1808 1809 /* check for source route options and add option length to get dst */ 1810 if (vifp->v_flags & VIFF_SRCRT) 1811 graddr = ntohl((ip+8)->ip_dst.s_addr); 1812 else 1813 graddr = ntohl(ip->ip_dst.s_addr); 1814 1815 switch (graddr & 0xf) { 1816 case 0x0: break; 1817 case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ 1818 break; 1819 case 0x2: break; 1820 case 0x3: break; 1821 case 0x4: break; 1822 case 0x5: break; 1823 case 0x6: break; 1824 case 0x7: break; 1825 case 0x8: break; 1826 case 0x9: break; 1827 case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ 1828 break; 1829 case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ 1830 break; 1831 case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ 1832 break; 1833 case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ 1834 break; 1835 case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ 1836 break; 1837 case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ 1838 break; 1839 } 1840 1841 if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d\n", graddr, prio); 1842 1843 return prio; 1844} 1845 1846/* 1847 * End of token bucket filter modifications 1848 */ 1849 1850#ifdef MROUTE_LKM 1851#include <sys/conf.h> 1852#include <sys/exec.h> 1853#include <sys/sysent.h> 1854#include <sys/lkm.h> 1855 1856MOD_MISC("ip_mroute_mod") 1857 1858static int 1859ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) 1860{ 1861 int i; 1862 struct lkm_misc *args = lkmtp->private.lkm_misc; 1863 int err = 0; 1864 1865 switch(cmd) { 1866 static int (*old_ip_mrouter_cmd)(); 1867 static int (*old_ip_mrouter_done)(); 1868 static int (*old_ip_mforward)(); 1869 static int (*old_mrt_ioctl)(); 1870 static void (*old_proto4_input)(); 1871 static int (*old_legal_vif_num)(); 1872 extern struct protosw inetsw[]; 1873 1874 case LKM_E_LOAD: 1875 if(lkmexists(lkmtp) || ip_mrtproto) 1876 return(EEXIST); 1877 old_ip_mrouter_cmd = ip_mrouter_cmd; 1878 ip_mrouter_cmd = X_ip_mrouter_cmd; 1879 old_ip_mrouter_done = ip_mrouter_done; 1880 ip_mrouter_done = X_ip_mrouter_done; 1881 old_ip_mforward = ip_mforward; 1882 ip_mforward = X_ip_mforward; 1883 old_mrt_ioctl = mrt_ioctl; 1884 mrt_ioctl = X_mrt_ioctl; 1885 old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input; 1886 inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_multiencap_decap; 1887 old_legal_vif_num = legal_vif_num; 1888 legal_vif_num = X_legal_vif_num; 1889 ip_mrtproto = IGMP_DVMRP; 1890 1891 printf("\nIP multicast routing loaded\n"); 1892 break; 1893 1894 case LKM_E_UNLOAD: 1895 if (ip_mrouter) 1896 return EINVAL; 1897 1898 ip_mrouter_cmd = old_ip_mrouter_cmd; 1899 ip_mrouter_done = old_ip_mrouter_done; 1900 ip_mforward = old_ip_mforward; 1901 mrt_ioctl = old_mrt_ioctl; 1902 inetsw[ip_protox[ENCAP_PROTO]].pr_input = old_proto4_input; 1903 legal_vif_num = old_legal_vif_num; 1904 ip_mrtproto = 0; 1905 break; 1906 1907 default: 1908 err = EINVAL; 1909 break; 1910 } 1911 1912 return(err); 1913} 1914 1915int 1916ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { 1917 DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, 1918 nosys); 1919} 1920 1921#endif /* MROUTE_LKM */ 1922#endif /* MROUTING */ 1923 1924 1925