ip_mroute.c revision 7083
1/* 2 * IP multicast forwarding procedures 3 * 4 * Written by David Waitzman, BBN Labs, August 1988. 5 * Modified by Steve Deering, Stanford, February 1989. 6 * Modified by Mark J. Steiglitz, Stanford, May, 1991 7 * Modified by Van Jacobson, LBL, January 1993 8 * Modified by Ajit Thyagarajan, PARC, August 1993 9 * 10 * MROUTING 1.8 11 */ 12 13 14#include <sys/param.h> 15#include <sys/systm.h> 16#include <sys/mbuf.h> 17#include <sys/socket.h> 18#include <sys/socketvar.h> 19#include <sys/protosw.h> 20#include <sys/errno.h> 21#include <sys/time.h> 22#include <sys/ioctl.h> 23#include <sys/syslog.h> 24#include <net/if.h> 25#include <net/route.h> 26#include <netinet/in.h> 27#include <netinet/in_systm.h> 28#include <netinet/ip.h> 29#include <netinet/ip_var.h> 30#include <netinet/in_pcb.h> 31#include <netinet/in_var.h> 32#include <netinet/igmp.h> 33#include <netinet/igmp_var.h> 34#include <netinet/ip_mroute.h> 35 36#ifndef NTOHL 37#if BYTE_ORDER != BIG_ENDIAN 38#define NTOHL(d) ((d) = ntohl((d))) 39#define NTOHS(d) ((d) = ntohs((u_short)(d))) 40#define HTONL(d) ((d) = htonl((d))) 41#define HTONS(d) ((d) = htons((u_short)(d))) 42#else 43#define NTOHL(d) 44#define NTOHS(d) 45#define HTONL(d) 46#define HTONS(d) 47#endif 48#endif 49 50#ifndef MROUTING 51/* 52 * Dummy routines and globals used when multicast routing is not compiled in. 53 */ 54 55u_int ip_mrtproto = 0; 56struct socket *ip_mrouter = NULL; 57struct mrtstat mrtstat; 58 59 60int 61_ip_mrouter_cmd(cmd, so, m) 62 int cmd; 63 struct socket *so; 64 struct mbuf *m; 65{ 66 return(EOPNOTSUPP); 67} 68 69int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd; 70 71int 72_ip_mrouter_done() 73{ 74 return(0); 75} 76 77int (*ip_mrouter_done)(void) = _ip_mrouter_done; 78 79int 80_ip_mforward(ip, ifp, m, imo) 81 struct ip *ip; 82 struct ifnet *ifp; 83 struct mbuf *m; 84 struct ip_moptions *imo; 85{ 86 return(0); 87} 88 89int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 90 struct ip_moptions *) = _ip_mforward; 91 92int 93_mrt_ioctl(int req, caddr_t data, struct proc *p) 94{ 95 return EOPNOTSUPP; 96} 97 98int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; 99 100void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */ 101 rip_input(m); 102} 103 104int (*legal_vif_num)(int) = 0; 105 106#else /* MROUTING */ 107 108#define INSIZ sizeof(struct in_addr) 109#define same(a1, a2) \ 110 (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) 111 112#define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ 113 114/* 115 * Globals. All but ip_mrouter and ip_mrtproto could be static, 116 * except for netstat or debugging purposes. 117 */ 118#ifndef MROUTE_LKM 119struct socket *ip_mrouter = NULL; 120struct mrtstat mrtstat; 121 122int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 123#else /* MROUTE_LKM */ 124extern struct mrtstat mrtstat; 125extern int ip_mrtproto; 126#endif 127 128#define NO_RTE_FOUND 0x1 129#define RTE_FOUND 0x2 130 131struct mbuf *mfctable[MFCTBLSIZ]; 132struct vif viftable[MAXVIFS]; 133u_int mrtdebug = 0; /* debug level */ 134u_int tbfdebug = 0; /* tbf debug level */ 135 136u_long timeout_val = 0; /* count of outstanding upcalls */ 137 138/* 139 * Define the token bucket filter structures 140 * tbftable -> each vif has one of these for storing info 141 * qtable -> each interface has an associated queue of pkts 142 */ 143 144struct tbf tbftable[MAXVIFS]; 145struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; 146 147/* 148 * 'Interfaces' associated with decapsulator (so we can tell 149 * packets that went through it from ones that get reflected 150 * by a broken gateway). These interfaces are never linked into 151 * the system ifnet list & no routes point to them. I.e., packets 152 * can't be sent this way. They only exist as a placeholder for 153 * multicast source verification. 154 */ 155struct ifnet multicast_decap_if[MAXVIFS]; 156 157#define ENCAP_TTL 64 158#define ENCAP_PROTO 4 159 160/* prototype IP hdr for encapsulated packets */ 161struct ip multicast_encap_iphdr = { 162#if BYTE_ORDER == LITTLE_ENDIAN 163 sizeof(struct ip) >> 2, IPVERSION, 164#else 165 IPVERSION, sizeof(struct ip) >> 2, 166#endif 167 0, /* tos */ 168 sizeof(struct ip), /* total length */ 169 0, /* id */ 170 0, /* frag offset */ 171 ENCAP_TTL, ENCAP_PROTO, 172 0, /* checksum */ 173}; 174 175/* 176 * Private variables. 177 */ 178static vifi_t numvifs = 0; 179static void (*encap_oldrawip)() = 0; 180 181/* 182 * one-back cache used by multiencap_decap to locate a tunnel's vif 183 * given a datagram's src ip address. 184 */ 185static u_long last_encap_src; 186static struct vif *last_encap_vif; 187 188static u_long nethash_fc(u_long, u_long); 189static struct mfc *mfcfind(u_long, u_long); 190int get_sg_cnt(struct sioc_sg_req *); 191int get_vif_cnt(struct sioc_vif_req *); 192int get_vifs(caddr_t); 193static int add_vif(struct vifctl *); 194static int del_vif(vifi_t *); 195static int add_mfc(struct mfcctl *); 196static int del_mfc(struct delmfcctl *); 197static void cleanup_cache(void *); 198static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, 199 struct ip_moptions *); 200extern int (*legal_vif_num)(int); 201static void phyint_send(struct ip *, struct vif *, struct mbuf *); 202static void srcrt_send(struct ip *, struct vif *, struct mbuf *); 203static void encap_send(struct ip *, struct vif *, struct mbuf *); 204void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, 205 struct ip_moptions *); 206void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); 207void tbf_process_q(struct vif *); 208void tbf_dequeue(struct vif *, int); 209void tbf_reprocess_q(void *); 210int tbf_dq_sel(struct vif *, struct ip *); 211void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); 212void tbf_update_tokens(struct vif *); 213static int priority(struct vif *, struct ip *); 214static int ip_mrouter_init(struct socket *); 215void multiencap_decap(struct mbuf *m); 216 217/* 218 * A simple hash function: returns MFCHASHMOD of the low-order octet of 219 * the argument's network or subnet number and the multicast group assoc. 220 */ 221static u_long 222nethash_fc(m,n) 223 register u_long m; 224 register u_long n; 225{ 226 struct in_addr in1; 227 struct in_addr in2; 228 229 in1.s_addr = m; 230 m = in_netof(in1); 231 while ((m & 0xff) == 0) m >>= 8; 232 233 in2.s_addr = n; 234 n = in_netof(in2); 235 while ((n & 0xff) == 0) n >>= 8; 236 237 return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); 238} 239 240/* 241 * this is a direct-mapped cache used to speed the mapping from a 242 * datagram source address to the associated multicast route. Note 243 * that unlike mrttable, the hash is on IP address, not IP net number. 244 */ 245#define MFCHASHSIZ 1024 246#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 247 ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) 248struct mfc *mfchash[MFCHASHSIZ]; 249 250/* 251 * Find a route for a given origin IP address and Multicast group address 252 * Type of service parameter to be added in the future!!! 253 */ 254#define MFCFIND(o, g, rt) { \ 255 register u_int _mrhasho = o; \ 256 register u_int _mrhashg = g; \ 257 _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ 258 ++mrtstat.mrts_mfc_lookups; \ 259 rt = mfchash[_mrhasho]; \ 260 if ((rt == NULL) || \ 261 ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ 262 (g != rt->mfc_mcastgrp.s_addr)) \ 263 if ((rt = mfcfind(o, g)) != NULL) \ 264 mfchash[_mrhasho] = rt; \ 265} 266 267/* 268 * Find route by examining hash table entries 269 */ 270static struct mfc * 271mfcfind(origin, mcastgrp) 272 u_long origin; 273 u_long mcastgrp; 274{ 275 register struct mbuf *mb_rt; 276 register struct mfc *rt; 277 register u_long hash; 278 279 hash = nethash_fc(origin, mcastgrp); 280 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 281 rt = mtod(mb_rt, struct mfc *); 282 if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && 283 (mcastgrp == rt->mfc_mcastgrp.s_addr) && 284 (mb_rt->m_act == NULL)) 285 return (rt); 286 } 287 mrtstat.mrts_mfc_misses++; 288 return NULL; 289} 290 291/* 292 * Macros to compute elapsed time efficiently 293 * Borrowed from Van Jacobson's scheduling code 294 */ 295#define TV_DELTA(a, b, delta) { \ 296 register int xxs; \ 297 \ 298 delta = (a).tv_usec - (b).tv_usec; \ 299 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 300 switch (xxs) { \ 301 case 2: \ 302 delta += 1000000; \ 303 /* fall through */ \ 304 case 1: \ 305 delta += 1000000; \ 306 break; \ 307 default: \ 308 delta += (1000000 * xxs); \ 309 } \ 310 } \ 311} 312 313#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 314 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 315 316/* 317 * Handle DVMRP setsockopt commands to modify the multicast routing tables. 318 */ 319int 320X_ip_mrouter_cmd(cmd, so, m) 321 int cmd; 322 struct socket *so; 323 struct mbuf *m; 324{ 325 if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; 326 327 switch (cmd) { 328 case DVMRP_INIT: return ip_mrouter_init(so); 329 case DVMRP_DONE: return ip_mrouter_done(); 330 case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); 331 case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); 332 case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); 333 case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); 334 default: return EOPNOTSUPP; 335 } 336} 337 338#ifndef MROUTE_LKM 339int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd; 340#endif 341 342/* 343 * Handle ioctl commands to obtain information from the cache 344 */ 345int 346X_mrt_ioctl(cmd, data) 347 int cmd; 348 caddr_t data; 349{ 350 int error = 0; 351 352 switch (cmd) { 353 case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ 354 return (get_vifs(data)); 355 break; 356 case (SIOCGETVIFCNT): 357 return (get_vif_cnt((struct sioc_vif_req *)data)); 358 break; 359 case (SIOCGETSGCNT): 360 return (get_sg_cnt((struct sioc_sg_req *)data)); 361 break; 362 default: 363 return (EINVAL); 364 break; 365 } 366 return error; 367} 368 369#ifndef MROUTE_LKM 370int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl; 371#else 372extern int (*mrt_ioctl)(int, caddr_t, struct proc *); 373#endif 374 375/* 376 * returns the packet count for the source group provided 377 */ 378int 379get_sg_cnt(req) 380 register struct sioc_sg_req *req; 381{ 382 register struct mfc *rt; 383 int s; 384 385 s = splnet(); 386 MFCFIND(req->src.s_addr, req->grp.s_addr, rt); 387 splx(s); 388 if (rt != NULL) 389 req->count = rt->mfc_pkt_cnt; 390 else 391 req->count = 0xffffffff; 392 393 return 0; 394} 395 396/* 397 * returns the input and output packet counts on the interface provided 398 */ 399int 400get_vif_cnt(req) 401 register struct sioc_vif_req *req; 402{ 403 register vifi_t vifi = req->vifi; 404 405 req->icount = viftable[vifi].v_pkt_in; 406 req->ocount = viftable[vifi].v_pkt_out; 407 408 return 0; 409} 410 411int 412get_vifs(data) 413 char *data; 414{ 415 struct vif_conf *vifc = (struct vif_conf *)data; 416 struct vif_req *vifrp, vifr; 417 int space, error=0; 418 419 vifi_t vifi; 420 int s; 421 422 space = vifc->vifc_len; 423 vifrp = vifc->vifc_req; 424 425 s = splnet(); 426 vifc->vifc_num=numvifs; 427 428 for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { 429 if (viftable[vifi].v_lcl_addr.s_addr != 0) { 430 vifr.v_flags=viftable[vifi].v_flags; 431 vifr.v_threshold=viftable[vifi].v_threshold; 432 vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; 433 vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; 434 strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); 435 if ((space -= sizeof(vifr)) < 0) { 436 splx(s); 437 return(ENOSPC); 438 } 439 error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); 440 if (error) { 441 splx(s); 442 return(error); 443 } 444 } 445 } 446 splx(s); 447 return 0; 448} 449/* 450 * Enable multicast routing 451 */ 452static int 453ip_mrouter_init(so) 454 struct socket *so; 455{ 456 if (so->so_type != SOCK_RAW || 457 so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; 458 459 if (ip_mrouter != NULL) return EADDRINUSE; 460 461 ip_mrouter = so; 462 463 if (mrtdebug) 464 log(LOG_DEBUG, "ip_mrouter_init\n"); 465 466 return 0; 467} 468 469/* 470 * Disable multicast routing 471 */ 472int 473X_ip_mrouter_done() 474{ 475 vifi_t vifi; 476 int i; 477 struct ifnet *ifp; 478 struct ifreq ifr; 479 struct mbuf *mb_rt; 480 struct mbuf *m; 481 struct rtdetq *rte; 482 int s; 483 484 s = splnet(); 485 486 /* 487 * For each phyint in use, disable promiscuous reception of all IP 488 * multicasts. 489 */ 490 for (vifi = 0; vifi < numvifs; vifi++) { 491 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 492 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 493 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 494 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr 495 = INADDR_ANY; 496 ifp = viftable[vifi].v_ifp; 497 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 498 } 499 } 500 bzero((caddr_t)qtable, sizeof(qtable)); 501 bzero((caddr_t)tbftable, sizeof(tbftable)); 502 bzero((caddr_t)viftable, sizeof(viftable)); 503 numvifs = 0; 504 505 /* 506 * Check if any outstanding timeouts remain 507 */ 508 if (timeout_val != 0) 509 for (i = 0; i < MFCTBLSIZ; i++) { 510 mb_rt = mfctable[i]; 511 while (mb_rt) { 512 if ( mb_rt->m_act != NULL) { 513 untimeout(cleanup_cache, (caddr_t)mb_rt); 514 while (mb_rt->m_act) { 515 m = mb_rt->m_act; 516 mb_rt->m_act = m->m_act; 517 rte = mtod(m, struct rtdetq *); 518 m_freem(rte->m); 519 m_free(m); 520 } 521 timeout_val--; 522 } 523 mb_rt = mb_rt->m_next; 524 } 525 if (timeout_val == 0) 526 break; 527 } 528 529 /* 530 * Free all multicast forwarding cache entries. 531 */ 532 for (i = 0; i < MFCTBLSIZ; i++) 533 m_freem(mfctable[i]); 534 535 bzero((caddr_t)mfctable, sizeof(mfctable)); 536 bzero((caddr_t)mfchash, sizeof(mfchash)); 537 538 /* 539 * Reset de-encapsulation cache 540 */ 541 last_encap_src = NULL; 542 last_encap_vif = NULL; 543 544 ip_mrouter = NULL; 545 546 splx(s); 547 548 if (mrtdebug) 549 log(LOG_DEBUG, "ip_mrouter_done\n"); 550 551 return 0; 552} 553 554#ifndef MROUTE_LKM 555int (*ip_mrouter_done)(void) = X_ip_mrouter_done; 556#endif 557 558/* 559 * Add a vif to the vif table 560 */ 561static int 562add_vif(vifcp) 563 register struct vifctl *vifcp; 564{ 565 register struct vif *vifp = viftable + vifcp->vifc_vifi; 566 static struct sockaddr_in sin = {sizeof sin, AF_INET}; 567 struct ifaddr *ifa; 568 struct ifnet *ifp; 569 struct ifreq ifr; 570 int error, s; 571 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 572 573 if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; 574 if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; 575 576 /* Find the interface with an address in AF_INET family */ 577 sin.sin_addr = vifcp->vifc_lcl_addr; 578 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 579 if (ifa == 0) return EADDRNOTAVAIL; 580 ifp = ifa->ifa_ifp; 581 582 if (vifcp->vifc_flags & VIFF_TUNNEL) { 583 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 584 if (encap_oldrawip == 0) { 585 extern struct protosw inetsw[]; 586 extern u_char ip_protox[]; 587 register u_char pr = ip_protox[ENCAP_PROTO]; 588 589 encap_oldrawip = inetsw[pr].pr_input; 590 inetsw[pr].pr_input = multiencap_decap; 591 for (s = 0; s < MAXVIFS; ++s) { 592 multicast_decap_if[s].if_name = "mdecap"; 593 multicast_decap_if[s].if_unit = s; 594 } 595 } 596 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 597 } else { 598 ifp = 0; 599 } 600 } else { 601 /* Make sure the interface supports multicast */ 602 if ((ifp->if_flags & IFF_MULTICAST) == 0) 603 return EOPNOTSUPP; 604 605 /* Enable promiscuous reception of all IP multicasts from the if */ 606 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 607 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 608 s = splnet(); 609 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 610 splx(s); 611 if (error) 612 return error; 613 } 614 615 s = splnet(); 616 /* define parameters for the tbf structure */ 617 vifp->v_tbf = v_tbf; 618 vifp->v_tbf->q_len = 0; 619 vifp->v_tbf->n_tok = 0; 620 vifp->v_tbf->last_pkt_t = 0; 621 622 vifp->v_flags = vifcp->vifc_flags; 623 vifp->v_threshold = vifcp->vifc_threshold; 624 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 625 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 626 vifp->v_ifp = ifp; 627 vifp->v_rate_limit= vifcp->vifc_rate_limit; 628 /* initialize per vif pkt counters */ 629 vifp->v_pkt_in = 0; 630 vifp->v_pkt_out = 0; 631 splx(s); 632 633 /* Adjust numvifs up if the vifi is higher than numvifs */ 634 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 635 636 if (mrtdebug) 637 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n", 638 vifcp->vifc_vifi, 639 ntohl(vifcp->vifc_lcl_addr.s_addr), 640 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 641 ntohl(vifcp->vifc_rmt_addr.s_addr), 642 vifcp->vifc_threshold, 643 vifcp->vifc_rate_limit); 644 645 return 0; 646} 647 648/* 649 * Delete a vif from the vif table 650 */ 651static int 652del_vif(vifip) 653 vifi_t *vifip; 654{ 655 register struct vif *vifp = viftable + *vifip; 656 register vifi_t vifi; 657 struct ifnet *ifp; 658 struct ifreq ifr; 659 int s; 660 661 if (*vifip >= numvifs) return EINVAL; 662 if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; 663 664 s = splnet(); 665 666 if (!(vifp->v_flags & VIFF_TUNNEL)) { 667 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 668 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 669 ifp = vifp->v_ifp; 670 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 671 } 672 673 if (vifp == last_encap_vif) { 674 last_encap_vif = 0; 675 last_encap_src = 0; 676 } 677 678 bzero((caddr_t)qtable[*vifip], 679 sizeof(qtable[*vifip])); 680 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 681 bzero((caddr_t)vifp, sizeof (*vifp)); 682 683 /* Adjust numvifs down */ 684 for (vifi = numvifs; vifi > 0; vifi--) 685 if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; 686 numvifs = vifi; 687 688 splx(s); 689 690 if (mrtdebug) 691 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs); 692 693 return 0; 694} 695 696/* 697 * Add an mfc entry 698 */ 699static int 700add_mfc(mfccp) 701 struct mfcctl *mfccp; 702{ 703 struct mfc *rt; 704 struct mfc *rt1 = 0; 705 register struct mbuf *mb_rt; 706 struct mbuf *prev_mb_rt; 707 u_long hash; 708 struct mbuf *mb_ntry; 709 struct rtdetq *rte; 710 register u_short nstl; 711 int s; 712 int i; 713 714 rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 715 716 /* If an entry already exists, just update the fields */ 717 if (rt) { 718 if (mrtdebug) 719 log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x\n", 720 ntohl(mfccp->mfcc_origin.s_addr), 721 ntohl(mfccp->mfcc_mcastgrp.s_addr), 722 ntohl(mfccp->mfcc_originmask.s_addr), 723 mfccp->mfcc_parent); 724 725 s = splnet(); 726 rt->mfc_parent = mfccp->mfcc_parent; 727 for (i = 0; i < numvifs; i++) 728 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 729 splx(s); 730 return 0; 731 } 732 733 /* 734 * Find the entry for which the upcall was made and update 735 */ 736 s = splnet(); 737 hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 738 for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; 739 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 740 741 rt = mtod(mb_rt, struct mfc *); 742 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 743 == mfccp->mfcc_origin.s_addr) && 744 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 745 (mb_rt->m_act != NULL)) { 746 747 if (!nstl++) { 748 if (mrtdebug) 749 log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x\n", 750 ntohl(mfccp->mfcc_origin.s_addr), 751 ntohl(mfccp->mfcc_mcastgrp.s_addr), 752 ntohl(mfccp->mfcc_originmask.s_addr), 753 mfccp->mfcc_parent, mb_rt->m_act); 754 755 rt->mfc_origin = mfccp->mfcc_origin; 756 rt->mfc_originmask = mfccp->mfcc_originmask; 757 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 758 rt->mfc_parent = mfccp->mfcc_parent; 759 for (i = 0; i < numvifs; i++) 760 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 761 /* initialize pkt counters per src-grp */ 762 rt->mfc_pkt_cnt = 0; 763 rt1 = rt; 764 } 765 766 /* prevent cleanup of cache entry */ 767 untimeout(cleanup_cache, (caddr_t)mb_rt); 768 timeout_val--; 769 770 /* free packets Qed at the end of this entry */ 771 while (mb_rt->m_act) { 772 mb_ntry = mb_rt->m_act; 773 rte = mtod(mb_ntry, struct rtdetq *); 774 ip_mdq(rte->m, rte->ifp, rte->tunnel_src, 775 rt1, rte->imo); 776 mb_rt->m_act = mb_ntry->m_act; 777 m_freem(rte->m); 778 m_free(mb_ntry); 779 } 780 781 /* 782 * If more than one entry was created for a single upcall 783 * delete that entry 784 */ 785 if (nstl > 1) { 786 MFREE(mb_rt, prev_mb_rt->m_next); 787 mb_rt = prev_mb_rt; 788 } 789 } 790 } 791 792 /* 793 * It is possible that an entry is being inserted without an upcall 794 */ 795 if (nstl == 0) { 796 if (mrtdebug) 797 log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x\n", 798 hash, ntohl(mfccp->mfcc_origin.s_addr), 799 ntohl(mfccp->mfcc_mcastgrp.s_addr), 800 ntohl(mfccp->mfcc_originmask.s_addr), 801 mfccp->mfcc_parent); 802 803 for (prev_mb_rt = mb_rt = mfctable[hash]; 804 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 805 806 rt = mtod(mb_rt, struct mfc *); 807 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 808 == mfccp->mfcc_origin.s_addr) && 809 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 810 811 rt->mfc_origin = mfccp->mfcc_origin; 812 rt->mfc_originmask = mfccp->mfcc_originmask; 813 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 814 rt->mfc_parent = mfccp->mfcc_parent; 815 for (i = 0; i < numvifs; i++) 816 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 817 /* initialize pkt counters per src-grp */ 818 rt->mfc_pkt_cnt = 0; 819 } 820 } 821 if (mb_rt == NULL) { 822 /* no upcall, so make a new entry */ 823 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 824 if (mb_rt == NULL) { 825 splx(s); 826 return ENOBUFS; 827 } 828 829 rt = mtod(mb_rt, struct mfc *); 830 831 /* insert new entry at head of hash chain */ 832 rt->mfc_origin = mfccp->mfcc_origin; 833 rt->mfc_originmask = mfccp->mfcc_originmask; 834 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 835 rt->mfc_parent = mfccp->mfcc_parent; 836 for (i = 0; i < numvifs; i++) 837 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 838 /* initialize pkt counters per src-grp */ 839 rt->mfc_pkt_cnt = 0; 840 841 /* link into table */ 842 mb_rt->m_next = mfctable[hash]; 843 mfctable[hash] = mb_rt; 844 mb_rt->m_act = NULL; 845 } 846 } 847 splx(s); 848 return 0; 849} 850 851/* 852 * Delete an mfc entry 853 */ 854static int 855del_mfc(mfccp) 856 struct delmfcctl *mfccp; 857{ 858 struct in_addr origin; 859 struct in_addr mcastgrp; 860 struct mfc *rt; 861 struct mbuf *mb_rt; 862 struct mbuf *prev_mb_rt; 863 u_long hash; 864 struct mfc **cmfc; 865 struct mfc **cmfcend; 866 int s; 867 868 origin = mfccp->mfcc_origin; 869 mcastgrp = mfccp->mfcc_mcastgrp; 870 hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); 871 872 if (mrtdebug) 873 log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x\n", 874 ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); 875 876 for (prev_mb_rt = mb_rt = mfctable[hash] 877 ; mb_rt 878 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 879 rt = mtod(mb_rt, struct mfc *); 880 if (origin.s_addr == rt->mfc_origin.s_addr && 881 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 882 mb_rt->m_act == NULL) 883 break; 884 } 885 if (mb_rt == NULL) { 886 return ESRCH; 887 } 888 889 s = splnet(); 890 891 cmfc = mfchash; 892 cmfcend = cmfc + MFCHASHSIZ; 893 for ( ; cmfc < cmfcend; ++cmfc) 894 if (*cmfc == rt) 895 *cmfc = 0; 896 897 if (prev_mb_rt != mb_rt) { /* if moved past head of list */ 898 MFREE(mb_rt, prev_mb_rt->m_next); 899 } else /* delete head of list, it is in the table */ 900 mfctable[hash] = m_free(mb_rt); 901 902 splx(s); 903 904 return 0; 905} 906 907/* 908 * IP multicast forwarding function. This function assumes that the packet 909 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 910 * pointed to by "ifp", and the packet is to be relayed to other networks 911 * that have members of the packet's destination IP multicast group. 912 * 913 * The packet is returned unscathed to the caller, unless it is tunneled 914 * or erroneous, in which case a non-zero return value tells the caller to 915 * discard it. 916 */ 917 918#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 919#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 920 921int 922X_ip_mforward(ip, ifp, m, imo) 923 register struct ip *ip; 924 struct ifnet *ifp; 925 struct mbuf *m; 926 struct ip_moptions *imo; 927{ 928 register struct mfc *rt; 929 register u_char *ipoptions; 930 u_long tunnel_src; 931 static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; 932 static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 933 static struct sockaddr_in k_igmpdst = { sizeof k_igmpdst, AF_INET }; 934 register struct mbuf *mm; 935 register struct ip *k_data; 936 int s; 937 938 if (mrtdebug > 1) 939 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x (%s%d)\n", 940 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp, 941 ifp->if_name, ifp->if_unit); 942 943 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 944 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 945 /* 946 * Packet arrived via a physical interface. 947 */ 948 tunnel_src = 0; 949 } else { 950 /* 951 * Packet arrived through a source-route tunnel. 952 * 953 * A source-route tunneled packet has a single NOP option and a 954 * two-element 955 * loose-source-and-record-route (LSRR) option immediately following 956 * the fixed-size part of the IP header. At this point in processing, 957 * the IP header should contain the following IP addresses: 958 * 959 * original source - in the source address field 960 * destination group - in the destination address field 961 * remote tunnel end-point - in the first element of LSRR 962 * one of this host's addrs - in the second element of LSRR 963 * 964 * NOTE: RFC-1075 would have the original source and remote tunnel 965 * end-point addresses swapped. However, that could cause 966 * delivery of ICMP error messages to innocent applications 967 * on intermediate routing hosts! Therefore, we hereby 968 * change the spec. 969 */ 970 971 /* 972 * Verify that the tunnel options are well-formed. 973 */ 974 if (ipoptions[0] != IPOPT_NOP || 975 ipoptions[2] != 11 || /* LSRR option length */ 976 ipoptions[3] != 12 || /* LSRR address pointer */ 977 (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { 978 mrtstat.mrts_bad_tunnel++; 979 if (mrtdebug) 980 log(LOG_DEBUG, 981 "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)\n", 982 ntohl(ip->ip_src.s_addr), 983 ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], 984 *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); 985 return 1; 986 } 987 988 /* 989 * Delete the tunnel options from the packet. 990 */ 991 ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, 992 (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); 993 m->m_len -= TUNNEL_LEN; 994 ip->ip_len -= TUNNEL_LEN; 995 ip->ip_hl -= TUNNEL_LEN >> 2; 996 997 ifp = 0; 998 } 999 1000 /* 1001 * Don't forward a packet with time-to-live of zero or one, 1002 * or a packet destined to a local-only group. 1003 */ 1004 if (ip->ip_ttl <= 1 || 1005 ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 1006 return (int)tunnel_src; 1007 1008 /* 1009 * Determine forwarding vifs from the forwarding cache table 1010 */ 1011 s = splnet(); 1012 MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); 1013 1014 /* Entry exists, so forward if necessary */ 1015 if (rt != NULL) { 1016 splx(s); 1017 return (ip_mdq(m, ifp, tunnel_src, rt, imo)); 1018 } 1019 1020 else { 1021 /* 1022 * If we don't have a route for packet's origin, 1023 * Make a copy of the packet & 1024 * send message to routing daemon 1025 */ 1026 1027 register struct mbuf *mb_rt; 1028 register struct mbuf *mb_ntry; 1029 register struct mbuf *mb0; 1030 register struct rtdetq *rte; 1031 register struct mbuf *rte_m; 1032 register u_long hash; 1033 1034 mrtstat.mrts_no_route++; 1035 if (mrtdebug) 1036 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n", 1037 ntohl(ip->ip_src.s_addr), 1038 ntohl(ip->ip_dst.s_addr)); 1039 1040 /* is there an upcall waiting for this packet? */ 1041 hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1042 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 1043 rt = mtod(mb_rt, struct mfc *); 1044 if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == 1045 rt->mfc_origin.s_addr) && 1046 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1047 (mb_rt->m_act != NULL)) 1048 break; 1049 } 1050 1051 if (mb_rt == NULL) { 1052 /* no upcall, so make a new entry */ 1053 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 1054 if (mb_rt == NULL) { 1055 splx(s); 1056 return ENOBUFS; 1057 } 1058 1059 rt = mtod(mb_rt, struct mfc *); 1060 1061 /* insert new entry at head of hash chain */ 1062 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1063 rt->mfc_originmask.s_addr = (u_long)0xffffffff; 1064 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1065 1066 /* link into table */ 1067 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1068 mb_rt->m_next = mfctable[hash]; 1069 mfctable[hash] = mb_rt; 1070 mb_rt->m_act = NULL; 1071 1072 } 1073 1074 /* determine if q has overflowed */ 1075 for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) 1076 hash++; 1077 1078 if (hash > MAX_UPQ) { 1079 mrtstat.mrts_upq_ovflw++; 1080 splx(s); 1081 return 0; 1082 } 1083 1084 /* add this packet and timing, ifp info to m_act */ 1085 MGET(mb_ntry, M_DONTWAIT, MT_DATA); 1086 if (mb_ntry == NULL) { 1087 splx(s); 1088 return ENOBUFS; 1089 } 1090 1091 mb_ntry->m_act = NULL; 1092 rte = mtod(mb_ntry, struct rtdetq *); 1093 1094 mb0 = m_copy(m, 0, M_COPYALL); 1095 if (mb0 == NULL) { 1096 splx(s); 1097 return ENOBUFS; 1098 } 1099 1100 rte->m = mb0; 1101 rte->ifp = ifp; 1102 rte->tunnel_src = tunnel_src; 1103 rte->imo = imo; 1104 1105 rte_m->m_act = mb_ntry; 1106 1107 splx(s); 1108 1109 if (hash == 0) { 1110 /* 1111 * Send message to routing daemon to install 1112 * a route into the kernel table 1113 */ 1114 k_igmpsrc.sin_addr = ip->ip_src; 1115 k_igmpdst.sin_addr = ip->ip_dst; 1116 1117 mm = m_copy(m, 0, M_COPYALL); 1118 if (mm == NULL) { 1119 splx(s); 1120 return ENOBUFS; 1121 } 1122 1123 k_data = mtod(mm, struct ip *); 1124 k_data->ip_p = 0; 1125 1126 mrtstat.mrts_upcalls++; 1127 1128 rip_ip_input(mm, ip_mrouter, (struct sockaddr *)&k_igmpsrc); 1129 1130 /* set timer to cleanup entry if upcall is lost */ 1131 timeout(cleanup_cache, (caddr_t)mb_rt, 100); 1132 timeout_val++; 1133 } 1134 1135 return 0; 1136 } 1137} 1138 1139#ifndef MROUTE_LKM 1140int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 1141 struct ip_moptions *) = X_ip_mforward; 1142#endif 1143 1144/* 1145 * Clean up the cache entry if upcall is not serviced 1146 */ 1147static void 1148cleanup_cache(xmb_rt) 1149 void *xmb_rt; 1150{ 1151 struct mbuf *mb_rt = xmb_rt; 1152 struct mfc *rt; 1153 u_long hash; 1154 struct mbuf *prev_m0; 1155 struct mbuf *m0; 1156 struct mbuf *m; 1157 struct rtdetq *rte; 1158 int s; 1159 1160 rt = mtod(mb_rt, struct mfc *); 1161 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1162 1163 if (mrtdebug) 1164 log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x\n", 1165 ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), 1166 ntohl(rt->mfc_mcastgrp.s_addr)); 1167 1168 mrtstat.mrts_cache_cleanups++; 1169 1170 /* 1171 * determine entry to be cleaned up in cache table 1172 */ 1173 s = splnet(); 1174 for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) 1175 if (m0 == mb_rt) 1176 break; 1177 1178 /* 1179 * drop all the packets 1180 * free the mbuf with the pkt, if, timing info 1181 */ 1182 while (mb_rt->m_act) { 1183 m = mb_rt->m_act; 1184 mb_rt->m_act = m->m_act; 1185 1186 rte = mtod(m, struct rtdetq *); 1187 m_freem(rte->m); 1188 m_free(m); 1189 } 1190 1191 /* 1192 * Delete the entry from the cache 1193 */ 1194 if (prev_m0 != m0) { /* if moved past head of list */ 1195 MFREE(m0, prev_m0->m_next); 1196 } else /* delete head of list, it is in the table */ 1197 mfctable[hash] = m_free(m0); 1198 1199 timeout_val--; 1200 splx(s); 1201} 1202 1203/* 1204 * Packet forwarding routine once entry in the cache is made 1205 */ 1206static int 1207ip_mdq(m, ifp, tunnel_src, rt, imo) 1208 register struct mbuf *m; 1209 register struct ifnet *ifp; 1210 register u_long tunnel_src; 1211 register struct mfc *rt; 1212 register struct ip_moptions *imo; 1213{ 1214 register struct ip *ip = mtod(m, struct ip *); 1215 register vifi_t vifi; 1216 register struct vif *vifp; 1217 1218 /* 1219 * Don't forward if it didn't arrive from the parent vif for its origin. 1220 * Notes: v_ifp is zero for src route tunnels, multicast_decap_if 1221 * for encapsulated tunnels and a real ifnet for non-tunnels so 1222 * the first part of the if catches wrong physical interface or 1223 * tunnel type; v_rmt_addr is zero for non-tunneled packets so 1224 * the 2nd part catches both packets that arrive via a tunnel 1225 * that shouldn't and packets that arrive via the wrong tunnel. 1226 */ 1227 vifi = rt->mfc_parent; 1228 if (viftable[vifi].v_ifp != ifp || 1229 (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { 1230 /* came in the wrong interface */ 1231 if (mrtdebug) 1232 log(LOG_DEBUG, "wrong if: ifp %x vifi %d\n", 1233 ifp, vifi); 1234 ++mrtstat.mrts_wrong_if; 1235 return (int)tunnel_src; 1236 } 1237 1238 /* increment the interface and s-g counters */ 1239 viftable[vifi].v_pkt_in++; 1240 rt->mfc_pkt_cnt++; 1241 1242 /* 1243 * For each vif, decide if a copy of the packet should be forwarded. 1244 * Forward if: 1245 * - the ttl exceeds the vif's threshold 1246 * - there are group members downstream on interface 1247 */ 1248#define MC_SEND(ip,vifp,m) { \ 1249 (vifp)->v_pkt_out++; \ 1250 if ((vifp)->v_flags & VIFF_SRCRT) \ 1251 srcrt_send((ip), (vifp), (m)); \ 1252 else if ((vifp)->v_flags & VIFF_TUNNEL) \ 1253 encap_send((ip), (vifp), (m)); \ 1254 else \ 1255 phyint_send((ip), (vifp), (m)); \ 1256 } 1257 1258/* If no options or the imo_multicast_vif option is 0, don't do this part 1259 */ 1260 if ((imo != NULL) && 1261 (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) 1262 { 1263 MC_SEND(ip,viftable+vifi,m); 1264 return (1); /* make sure we are done: No more physical sends */ 1265 } 1266 1267 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1268 if ((rt->mfc_ttls[vifi] > 0) && 1269 (ip->ip_ttl > rt->mfc_ttls[vifi])) 1270 MC_SEND(ip, vifp, m); 1271 1272 return 0; 1273} 1274 1275/* check if a vif number is legal/ok. This is used by ip_output, to export 1276 * numvifs there, 1277 */ 1278int 1279X_legal_vif_num(vif) 1280 int vif; 1281{ if (vif>=0 && vif<=numvifs) 1282 return(1); 1283 else 1284 return(0); 1285} 1286 1287#ifndef MROUTE_LKM 1288int (*legal_vif_num)(int) = X_legal_vif_num; 1289#endif 1290 1291static void 1292phyint_send(ip, vifp, m) 1293 struct ip *ip; 1294 struct vif *vifp; 1295 struct mbuf *m; 1296{ 1297 register struct mbuf *mb_copy; 1298 int hlen = ip->ip_hl << 2; 1299 register struct ip_moptions *imo; 1300 1301 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1302 return; 1303 1304 /* 1305 * Make sure the header isn't in an cluster, because the sharing 1306 * in clusters defeats the whole purpose of making the copy above. 1307 */ 1308 mb_copy = m_pullup(mb_copy, hlen); 1309 if (mb_copy == NULL) 1310 return; 1311 1312 MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); 1313 if (imo == NULL) { 1314 m_freem(mb_copy); 1315 return; 1316 } 1317 1318 imo->imo_multicast_ifp = vifp->v_ifp; 1319 imo->imo_multicast_ttl = ip->ip_ttl - 1; 1320 imo->imo_multicast_loop = 1; 1321 1322 if (vifp->v_rate_limit <= 0) 1323 tbf_send_packet(vifp, mb_copy, imo); 1324 else 1325 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, 1326 imo); 1327} 1328 1329static void 1330srcrt_send(ip, vifp, m) 1331 struct ip *ip; 1332 struct vif *vifp; 1333 struct mbuf *m; 1334{ 1335 struct mbuf *mb_copy, *mb_opts; 1336 int hlen = ip->ip_hl << 2; 1337 register struct ip *ip_copy; 1338 u_char *cp; 1339 1340 /* 1341 * Make sure that adding the tunnel options won't exceed the 1342 * maximum allowed number of option bytes. 1343 */ 1344 if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { 1345 mrtstat.mrts_cant_tunnel++; 1346 if (mrtdebug) 1347 log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u\n", 1348 ntohl(ip->ip_src.s_addr)); 1349 return; 1350 } 1351 1352 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1353 return; 1354 1355 MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER); 1356 if (mb_opts == NULL) { 1357 m_freem(mb_copy); 1358 return; 1359 } 1360 /* 1361 * 'Delete' the base ip header from the mb_copy chain 1362 */ 1363 mb_copy->m_len -= hlen; 1364 mb_copy->m_data += hlen; 1365 /* 1366 * Make mb_opts be the new head of the packet chain. 1367 * Any options of the packet were left in the old packet chain head 1368 */ 1369 mb_opts->m_next = mb_copy; 1370 mb_opts->m_len = hlen + TUNNEL_LEN; 1371 mb_opts->m_data += MSIZE - mb_opts->m_len; 1372 mb_opts->m_pkthdr.len = mb_copy->m_pkthdr.len + TUNNEL_LEN; 1373 /* 1374 * Copy the base ip header from the mb_copy chain to the new head mbuf 1375 */ 1376 ip_copy = mtod(mb_opts, struct ip *); 1377 bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), hlen); 1378 ip_copy->ip_ttl--; 1379 ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ 1380 /* 1381 * Adjust the ip header length to account for the tunnel options. 1382 */ 1383 ip_copy->ip_hl += TUNNEL_LEN >> 2; 1384 ip_copy->ip_len += TUNNEL_LEN; 1385 /* 1386 * Add the NOP and LSRR after the base ip header 1387 */ 1388 cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; 1389 *cp++ = IPOPT_NOP; 1390 *cp++ = IPOPT_LSRR; 1391 *cp++ = 11; /* LSRR option length */ 1392 *cp++ = 8; /* LSSR pointer to second element */ 1393 *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ 1394 cp += 4; 1395 *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ 1396 1397 if (vifp->v_rate_limit <= 0) 1398 tbf_send_packet(vifp, mb_opts, 0); 1399 else 1400 tbf_control(vifp, mb_opts, 1401 mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); 1402} 1403 1404static void 1405encap_send(ip, vifp, m) 1406 register struct ip *ip; 1407 register struct vif *vifp; 1408 register struct mbuf *m; 1409{ 1410 register struct mbuf *mb_copy; 1411 register struct ip *ip_copy; 1412 int hlen = ip->ip_hl << 2; 1413 register int i, len = ip->ip_len; 1414 1415 /* 1416 * copy the old packet & pullup it's IP header into the 1417 * new mbuf so we can modify it. Try to fill the new 1418 * mbuf since if we don't the ethernet driver will. 1419 */ 1420 MGET(mb_copy, M_DONTWAIT, MT_DATA); 1421 if (mb_copy == NULL) 1422 return; 1423 mb_copy->m_data += 16; 1424 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1425 1426 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 1427 m_freem(mb_copy); 1428 return; 1429 } 1430 i = MHLEN - M_LEADINGSPACE(mb_copy); 1431 if (i > len) 1432 i = len; 1433 mb_copy = m_pullup(mb_copy, i); 1434 if (mb_copy == NULL) 1435 return; 1436 mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); 1437 1438 /* 1439 * fill in the encapsulating IP header. 1440 */ 1441 ip_copy = mtod(mb_copy, struct ip *); 1442 *ip_copy = multicast_encap_iphdr; 1443 ip_copy->ip_id = htons(ip_id++); 1444 ip_copy->ip_len += len; 1445 ip_copy->ip_src = vifp->v_lcl_addr; 1446 ip_copy->ip_dst = vifp->v_rmt_addr; 1447 1448 /* 1449 * turn the encapsulated IP header back into a valid one. 1450 */ 1451 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1452 --ip->ip_ttl; 1453 HTONS(ip->ip_len); 1454 HTONS(ip->ip_off); 1455 ip->ip_sum = 0; 1456#if defined(LBL) && !defined(ultrix) 1457 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); 1458#else 1459 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1460 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1461 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1462#endif 1463 1464 if (vifp->v_rate_limit <= 0) 1465 tbf_send_packet(vifp, mb_copy, 0); 1466 else 1467 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); 1468} 1469 1470/* 1471 * De-encapsulate a packet and feed it back through ip input (this 1472 * routine is called whenever IP gets a packet with proto type 1473 * ENCAP_PROTO and a local destination address). 1474 */ 1475void 1476#ifdef MROUTE_LKM 1477X_multiencap_decap(m) 1478#else 1479multiencap_decap(m) 1480#endif 1481 register struct mbuf *m; 1482{ 1483 struct ifnet *ifp = m->m_pkthdr.rcvif; 1484 register struct ip *ip = mtod(m, struct ip *); 1485 register int hlen = ip->ip_hl << 2; 1486 register int s; 1487 register struct ifqueue *ifq; 1488 register struct vif *vifp; 1489 1490 if (ip->ip_p != ENCAP_PROTO) { 1491 rip_input(m); 1492 return; 1493 } 1494 /* 1495 * dump the packet if it's not to a multicast destination or if 1496 * we don't have an encapsulating tunnel with the source. 1497 * Note: This code assumes that the remote site IP address 1498 * uniquely identifies the tunnel (i.e., that this site has 1499 * at most one tunnel with the remote site). 1500 */ 1501 if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { 1502 ++mrtstat.mrts_bad_tunnel; 1503 m_freem(m); 1504 return; 1505 } 1506 if (ip->ip_src.s_addr != last_encap_src) { 1507 register struct vif *vife; 1508 1509 vifp = viftable; 1510 vife = vifp + numvifs; 1511 last_encap_src = ip->ip_src.s_addr; 1512 last_encap_vif = 0; 1513 for ( ; vifp < vife; ++vifp) 1514 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1515 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1516 == VIFF_TUNNEL) 1517 last_encap_vif = vifp; 1518 break; 1519 } 1520 } 1521 if ((vifp = last_encap_vif) == 0) { 1522 last_encap_src = 0; 1523 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1524 m_freem(m); 1525 if (mrtdebug) 1526 log(LOG_DEBUG, "ip_mforward: no tunnel with %x\n", 1527 ntohl(ip->ip_src.s_addr)); 1528 return; 1529 } 1530 ifp = vifp->v_ifp; 1531 1532 if (hlen > IP_HDR_LEN) 1533 ip_stripoptions(m, (struct mbuf *) 0); 1534 m->m_data += IP_HDR_LEN; 1535 m->m_len -= IP_HDR_LEN; 1536 m->m_pkthdr.len -= IP_HDR_LEN; 1537 m->m_pkthdr.rcvif = ifp; 1538 1539 ifq = &ipintrq; 1540 s = splimp(); 1541 if (IF_QFULL(ifq)) { 1542 IF_DROP(ifq); 1543 m_freem(m); 1544 } else { 1545 IF_ENQUEUE(ifq, m); 1546 /* 1547 * normally we would need a "schednetisr(NETISR_IP)" 1548 * here but we were called by ip_input and it is going 1549 * to loop back & try to dequeue the packet we just 1550 * queued as soon as we return so we avoid the 1551 * unnecessary software interrrupt. 1552 */ 1553 } 1554 splx(s); 1555} 1556 1557/* 1558 * Token bucket filter module 1559 */ 1560void 1561tbf_control(vifp, m, ip, p_len, imo) 1562 register struct vif *vifp; 1563 register struct mbuf *m; 1564 register struct ip *ip; 1565 register u_long p_len; 1566 struct ip_moptions *imo; 1567{ 1568 tbf_update_tokens(vifp); 1569 1570 /* if there are enough tokens, 1571 * and the queue is empty, 1572 * send this packet out 1573 */ 1574 1575 if (vifp->v_tbf->q_len == 0) { 1576 if (p_len <= vifp->v_tbf->n_tok) { 1577 vifp->v_tbf->n_tok -= p_len; 1578 tbf_send_packet(vifp, m, imo); 1579 } else if (p_len > MAX_BKT_SIZE) { 1580 /* drop if packet is too large */ 1581 mrtstat.mrts_pkt2large++; 1582 m_freem(m); 1583 return; 1584 } else { 1585 /* queue packet and timeout till later */ 1586 tbf_queue(vifp, m, ip, imo); 1587 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1588 } 1589 } else if (vifp->v_tbf->q_len < MAXQSIZE) { 1590 /* finite queue length, so queue pkts and process queue */ 1591 tbf_queue(vifp, m, ip, imo); 1592 tbf_process_q(vifp); 1593 } else { 1594 /* queue length too much, try to dq and queue and process */ 1595 if (!tbf_dq_sel(vifp, ip)) { 1596 mrtstat.mrts_q_overflow++; 1597 m_freem(m); 1598 return; 1599 } else { 1600 tbf_queue(vifp, m, ip, imo); 1601 tbf_process_q(vifp); 1602 } 1603 } 1604 return; 1605} 1606 1607/* 1608 * adds a packet to the queue at the interface 1609 */ 1610void 1611tbf_queue(vifp, m, ip, imo) 1612 register struct vif *vifp; 1613 register struct mbuf *m; 1614 register struct ip *ip; 1615 struct ip_moptions *imo; 1616{ 1617 register u_long ql; 1618 register int index = (vifp - viftable); 1619 register int s = splnet(); 1620 1621 ql = vifp->v_tbf->q_len; 1622 1623 qtable[index][ql].pkt_m = m; 1624 qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; 1625 qtable[index][ql].pkt_ip = ip; 1626 qtable[index][ql].pkt_imo = imo; 1627 1628 vifp->v_tbf->q_len++; 1629 splx(s); 1630} 1631 1632 1633/* 1634 * processes the queue at the interface 1635 */ 1636void 1637tbf_process_q(vifp) 1638 register struct vif *vifp; 1639{ 1640 register struct pkt_queue pkt_1; 1641 register int index = (vifp - viftable); 1642 register int s = splnet(); 1643 1644 /* loop through the queue at the interface and send as many packets 1645 * as possible 1646 */ 1647 while (vifp->v_tbf->q_len > 0) { 1648 /* locate the first packet */ 1649 pkt_1.pkt_len = ((qtable[index][0]).pkt_len); 1650 pkt_1.pkt_m = (qtable[index][0]).pkt_m; 1651 pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; 1652 pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; 1653 1654 /* determine if the packet can be sent */ 1655 if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { 1656 /* if so, 1657 * reduce no of tokens, dequeue the queue, 1658 * send the packet. 1659 */ 1660 vifp->v_tbf->n_tok -= pkt_1.pkt_len; 1661 1662 tbf_dequeue(vifp, 0); 1663 1664 tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); 1665 1666 } else break; 1667 } 1668 splx(s); 1669} 1670 1671/* 1672 * removes the jth packet from the queue at the interface 1673 */ 1674void 1675tbf_dequeue(vifp,j) 1676 register struct vif *vifp; 1677 register int j; 1678{ 1679 register u_long index = vifp - viftable; 1680 register int i; 1681 1682 for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { 1683 qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; 1684 qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; 1685 qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; 1686 qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; 1687 } 1688 qtable[index][i-1].pkt_m = NULL; 1689 qtable[index][i-1].pkt_len = NULL; 1690 qtable[index][i-1].pkt_ip = NULL; 1691 qtable[index][i-1].pkt_imo = NULL; 1692 1693 vifp->v_tbf->q_len--; 1694 1695 if (tbfdebug > 1) 1696 log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d\n",vifp-viftable, i-1); 1697} 1698 1699void 1700tbf_reprocess_q(xvifp) 1701 void *xvifp; 1702{ 1703 register struct vif *vifp = xvifp; 1704 if (ip_mrouter == NULL) 1705 return; 1706 1707 tbf_update_tokens(vifp); 1708 1709 tbf_process_q(vifp); 1710 1711 if (vifp->v_tbf->q_len) 1712 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1713} 1714 1715/* function that will selectively discard a member of the queue 1716 * based on the precedence value and the priority obtained through 1717 * a lookup table - not yet implemented accurately! 1718 */ 1719int 1720tbf_dq_sel(vifp, ip) 1721 register struct vif *vifp; 1722 register struct ip *ip; 1723{ 1724 register int i; 1725 register int s = splnet(); 1726 register u_int p; 1727 1728 p = priority(vifp, ip); 1729 1730 for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { 1731 if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { 1732 m_freem(qtable[vifp-viftable][i].pkt_m); 1733 tbf_dequeue(vifp,i); 1734 splx(s); 1735 mrtstat.mrts_drop_sel++; 1736 return(1); 1737 } 1738 } 1739 splx(s); 1740 return(0); 1741} 1742 1743void 1744tbf_send_packet(vifp, m, imo) 1745 register struct vif *vifp; 1746 register struct mbuf *m; 1747 struct ip_moptions *imo; 1748{ 1749 int error; 1750 int s = splnet(); 1751 1752 /* if source route tunnels */ 1753 if (vifp->v_flags & VIFF_SRCRT) { 1754 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1755 IP_FORWARDING, imo); 1756 if (mrtdebug > 1) 1757 log(LOG_DEBUG, "srcrt_send on vif %d err %d\n", vifp-viftable, error); 1758 } else if (vifp->v_flags & VIFF_TUNNEL) { 1759 /* If tunnel options */ 1760 ip_output(m, (struct mbuf *)0, (struct route *)0, 1761 IP_FORWARDING, imo); 1762 } else { 1763 /* if physical interface option, extract the options and then send */ 1764 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1765 IP_FORWARDING, imo); 1766 FREE(imo, M_IPMOPTS); 1767 1768 if (mrtdebug > 1) 1769 log(LOG_DEBUG, "phyint_send on vif %d err %d\n", vifp-viftable, error); 1770 } 1771 splx(s); 1772} 1773 1774/* determine the current time and then 1775 * the elapsed time (between the last time and time now) 1776 * in milliseconds & update the no. of tokens in the bucket 1777 */ 1778void 1779tbf_update_tokens(vifp) 1780 register struct vif *vifp; 1781{ 1782 struct timeval tp; 1783 register u_long t; 1784 register u_long elapsed; 1785 register int s = splnet(); 1786 1787 GET_TIME(tp); 1788 1789 t = tp.tv_sec*1000 + tp.tv_usec/1000; 1790 1791 elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; 1792 vifp->v_tbf->n_tok += elapsed; 1793 vifp->v_tbf->last_pkt_t = t; 1794 1795 if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) 1796 vifp->v_tbf->n_tok = MAX_BKT_SIZE; 1797 1798 splx(s); 1799} 1800 1801static int 1802priority(vifp, ip) 1803 register struct vif *vifp; 1804 register struct ip *ip; 1805{ 1806 register u_long graddr; 1807 register int prio; 1808 1809 /* temporary hack; will add general packet classifier some day */ 1810 1811 prio = 50; /* default priority */ 1812 1813 /* check for source route options and add option length to get dst */ 1814 if (vifp->v_flags & VIFF_SRCRT) 1815 graddr = ntohl((ip+8)->ip_dst.s_addr); 1816 else 1817 graddr = ntohl(ip->ip_dst.s_addr); 1818 1819 switch (graddr & 0xf) { 1820 case 0x0: break; 1821 case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ 1822 break; 1823 case 0x2: break; 1824 case 0x3: break; 1825 case 0x4: break; 1826 case 0x5: break; 1827 case 0x6: break; 1828 case 0x7: break; 1829 case 0x8: break; 1830 case 0x9: break; 1831 case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ 1832 break; 1833 case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ 1834 break; 1835 case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ 1836 break; 1837 case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ 1838 break; 1839 case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ 1840 break; 1841 case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ 1842 break; 1843 } 1844 1845 if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d\n", graddr, prio); 1846 1847 return prio; 1848} 1849 1850/* 1851 * End of token bucket filter modifications 1852 */ 1853 1854#ifdef MROUTE_LKM 1855#include <sys/conf.h> 1856#include <sys/exec.h> 1857#include <sys/sysent.h> 1858#include <sys/lkm.h> 1859 1860MOD_MISC("ip_mroute_mod") 1861 1862static int 1863ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) 1864{ 1865 int i; 1866 struct lkm_misc *args = lkmtp->private.lkm_misc; 1867 int err = 0; 1868 1869 switch(cmd) { 1870 static int (*old_ip_mrouter_cmd)(); 1871 static int (*old_ip_mrouter_done)(); 1872 static int (*old_ip_mforward)(); 1873 static int (*old_mrt_ioctl)(); 1874 static void (*old_proto4_input)(); 1875 static int (*old_legal_vif_num)(); 1876 extern u_char ip_protox[]; 1877 extern struct protosw inetsw[]; 1878 1879 case LKM_E_LOAD: 1880 if(lkmexists(lkmtp) || ip_mrtproto) 1881 return(EEXIST); 1882 old_ip_mrouter_cmd = ip_mrouter_cmd; 1883 ip_mrouter_cmd = X_ip_mrouter_cmd; 1884 old_ip_mrouter_done = ip_mrouter_done; 1885 ip_mrouter_done = X_ip_mrouter_done; 1886 old_ip_mforward = ip_mforward; 1887 ip_mforward = X_ip_mforward; 1888 old_mrt_ioctl = mrt_ioctl; 1889 mrt_ioctl = X_mrt_ioctl; 1890 old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input; 1891 inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_multiencap_decap; 1892 old_legal_vif_num = legal_vif_num; 1893 legal_vif_num = X_legal_vif_num; 1894 ip_mrtproto = IGMP_DVMRP; 1895 1896 printf("\nIP multicast routing loaded\n"); 1897 break; 1898 1899 case LKM_E_UNLOAD: 1900 if (ip_mrouter) 1901 return EINVAL; 1902 1903 ip_mrouter_cmd = old_ip_mrouter_cmd; 1904 ip_mrouter_done = old_ip_mrouter_done; 1905 ip_mforward = old_ip_mforward; 1906 mrt_ioctl = old_mrt_ioctl; 1907 inetsw[ip_protox[ENCAP_PROTO]].pr_input = old_proto4_input; 1908 legal_vif_num = old_legal_vif_num; 1909 ip_mrtproto = 0; 1910 break; 1911 1912 default: 1913 err = EINVAL; 1914 break; 1915 } 1916 1917 return(err); 1918} 1919 1920int 1921ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { 1922 DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, 1923 nosys); 1924} 1925 1926#endif /* MROUTE_LKM */ 1927#endif /* MROUTING */ 1928 1929 1930