ip_mroute.c revision 2754
1/* 2 * IP multicast forwarding procedures 3 * 4 * Written by David Waitzman, BBN Labs, August 1988. 5 * Modified by Steve Deering, Stanford, February 1989. 6 * Modified by Mark J. Steiglitz, Stanford, May, 1991 7 * Modified by Van Jacobson, LBL, January 1993 8 * Modified by Ajit Thyagarajan, PARC, August 1993 9 * 10 * MROUTING 1.8 11 */ 12 13 14#include <sys/param.h> 15#include <sys/systm.h> 16#include <sys/mbuf.h> 17#include <sys/socket.h> 18#include <sys/socketvar.h> 19#include <sys/protosw.h> 20#include <sys/errno.h> 21#include <sys/time.h> 22#include <sys/ioctl.h> 23#include <sys/syslog.h> 24#include <net/if.h> 25#include <net/route.h> 26#include <net/raw_cb.h> 27#include <netinet/in.h> 28#include <netinet/in_systm.h> 29#include <netinet/ip.h> 30#include <netinet/ip_var.h> 31#include <netinet/in_pcb.h> 32#include <netinet/in_var.h> 33#include <netinet/igmp.h> 34#include <netinet/igmp_var.h> 35#include <netinet/ip_mroute.h> 36 37#ifndef NTOHL 38#if BYTE_ORDER != BIG_ENDIAN 39#define NTOHL(d) ((d) = ntohl((d))) 40#define NTOHS(d) ((d) = ntohs((u_short)(d))) 41#define HTONL(d) ((d) = htonl((d))) 42#define HTONS(d) ((d) = htons((u_short)(d))) 43#else 44#define NTOHL(d) 45#define NTOHS(d) 46#define HTONL(d) 47#define HTONS(d) 48#endif 49#endif 50 51struct mrtstat mrtstat; 52 53#ifndef MROUTING 54/* 55 * Dummy routines and globals used when multicast routing is not compiled in. 56 */ 57 58struct socket *ip_mrouter = NULL; 59u_int ip_mrtproto = 0; 60 61int 62_ip_mrouter_cmd(cmd, so, m) 63 int cmd; 64 struct socket *so; 65 struct mbuf *m; 66{ 67 return(EOPNOTSUPP); 68} 69 70int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd; 71 72int 73_ip_mrouter_done() 74{ 75 return(0); 76} 77 78int (*ip_mrouter_done)(void) = _ip_mrouter_done; 79 80int 81_ip_mforward(ip, ifp, m, imo) 82 struct ip *ip; 83 struct ifnet *ifp; 84 struct mbuf *m; 85 struct ip_moptions *imo; 86{ 87 return(0); 88} 89 90int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 91 struct ip_moptions *) = _ip_mforward; 92 93int 94_mrt_ioctl(int req, caddr_t data, struct proc *p) 95{ 96 return EOPNOTSUPP; 97} 98 99int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; 100 101void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */ 102 rip_input(m); 103} 104 105int (*legal_vif_num)(int) = 0; 106 107#else 108 109#define INSIZ sizeof(struct in_addr) 110#define same(a1, a2) \ 111 (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) 112 113#define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ 114 115/* 116 * Globals. All but ip_mrouter and ip_mrtproto could be static, 117 * except for netstat or debugging purposes. 118 */ 119struct socket *ip_mrouter = NULL; 120int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 121 122#define NO_RTE_FOUND 0x1 123#define RTE_FOUND 0x2 124 125struct mbuf *mfctable[MFCTBLSIZ]; 126struct vif viftable[MAXVIFS]; 127u_int mrtdebug = 0; /* debug level */ 128u_int tbfdebug = 0; /* tbf debug level */ 129 130u_long timeout_val = 0; /* count of outstanding upcalls */ 131 132/* 133 * Define the token bucket filter structures 134 * tbftable -> each vif has one of these for storing info 135 * qtable -> each interface has an associated queue of pkts 136 */ 137 138struct tbf tbftable[MAXVIFS]; 139struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; 140 141/* 142 * 'Interfaces' associated with decapsulator (so we can tell 143 * packets that went through it from ones that get reflected 144 * by a broken gateway). These interfaces are never linked into 145 * the system ifnet list & no routes point to them. I.e., packets 146 * can't be sent this way. They only exist as a placeholder for 147 * multicast source verification. 148 */ 149struct ifnet multicast_decap_if[MAXVIFS]; 150 151#define ENCAP_TTL 64 152#define ENCAP_PROTO 4 153 154/* prototype IP hdr for encapsulated packets */ 155struct ip multicast_encap_iphdr = { 156#if BYTE_ORDER == LITTLE_ENDIAN 157 sizeof(struct ip) >> 2, IPVERSION, 158#else 159 IPVERSION, sizeof(struct ip) >> 2, 160#endif 161 0, /* tos */ 162 sizeof(struct ip), /* total length */ 163 0, /* id */ 164 0, /* frag offset */ 165 ENCAP_TTL, ENCAP_PROTO, 166 0, /* checksum */ 167}; 168 169/* 170 * Private variables. 171 */ 172static vifi_t numvifs = 0; 173 174/* 175 * one-back cache used by multiencap_decap to locate a tunnel's vif 176 * given a datagram's src ip address. 177 */ 178static u_long last_encap_src; 179static struct vif *last_encap_vif; 180 181static u_long nethash_fc(u_long, u_long); 182static struct mfc *mfcfind(u_long, u_long); 183int get_sg_cnt(struct sioc_sg_req *); 184int get_vif_cnt(struct sioc_vif_req *); 185int get_vifs(caddr_t); 186static int add_vif(struct vifctl *); 187static int del_vif(vifi_t *); 188static int add_mfc(struct mfcctl *); 189static int del_mfc(struct delmfcctl *); 190static void cleanup_cache(void *); 191static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, 192 struct ip_moptions *); 193extern int (*legal_vif_num)(int); 194static void phyint_send(struct ip *, struct vif *, struct mbuf *); 195static void srcrt_send(struct ip *, struct vif *, struct mbuf *); 196static void encap_send(struct ip *, struct vif *, struct mbuf *); 197void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, 198 struct ip_moptions *); 199void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); 200void tbf_process_q(struct vif *); 201void tbf_dequeue(struct vif *, int); 202void tbf_reprocess_q(void *); 203int tbf_dq_sel(struct vif *, struct ip *); 204void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); 205void tbf_update_tokens(struct vif *); 206static int priority(struct vif *, struct ip *); 207static int ip_mrouter_init(struct socket *); 208 209/* 210 * A simple hash function: returns MFCHASHMOD of the low-order octet of 211 * the argument's network or subnet number and the multicast group assoc. 212 */ 213static u_long 214nethash_fc(m,n) 215 register u_long m; 216 register u_long n; 217{ 218 struct in_addr in1; 219 struct in_addr in2; 220 221 in1.s_addr = m; 222 m = in_netof(in1); 223 while ((m & 0xff) == 0) m >>= 8; 224 225 in2.s_addr = n; 226 n = in_netof(in2); 227 while ((n & 0xff) == 0) n >>= 8; 228 229 return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); 230} 231 232/* 233 * this is a direct-mapped cache used to speed the mapping from a 234 * datagram source address to the associated multicast route. Note 235 * that unlike mrttable, the hash is on IP address, not IP net number. 236 */ 237#define MFCHASHSIZ 1024 238#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 239 ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) 240struct mfc *mfchash[MFCHASHSIZ]; 241 242/* 243 * Find a route for a given origin IP address and Multicast group address 244 * Type of service parameter to be added in the future!!! 245 */ 246#define MFCFIND(o, g, rt) { \ 247 register u_int _mrhasho = o; \ 248 register u_int _mrhashg = g; \ 249 _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ 250 ++mrtstat.mrts_mfc_lookups; \ 251 rt = mfchash[_mrhasho]; \ 252 if ((rt == NULL) || \ 253 ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ 254 (g != rt->mfc_mcastgrp.s_addr)) \ 255 if ((rt = mfcfind(o, g)) != NULL) \ 256 mfchash[_mrhasho] = rt; \ 257} 258 259/* 260 * Find route by examining hash table entries 261 */ 262static struct mfc * 263mfcfind(origin, mcastgrp) 264 u_long origin; 265 u_long mcastgrp; 266{ 267 register struct mbuf *mb_rt; 268 register struct mfc *rt; 269 register u_long hash; 270 271 hash = nethash_fc(origin, mcastgrp); 272 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 273 rt = mtod(mb_rt, struct mfc *); 274 if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && 275 (mcastgrp == rt->mfc_mcastgrp.s_addr) && 276 (mb_rt->m_act == NULL)) 277 return (rt); 278 } 279 mrtstat.mrts_mfc_misses++; 280 return NULL; 281} 282 283/* 284 * Macros to compute elapsed time efficiently 285 * Borrowed from Van Jacobson's scheduling code 286 */ 287#define TV_DELTA(a, b, delta) { \ 288 register int xxs; \ 289 \ 290 delta = (a).tv_usec - (b).tv_usec; \ 291 if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 292 switch (xxs) { \ 293 case 2: \ 294 delta += 1000000; \ 295 /* fall through */ \ 296 case 1: \ 297 delta += 1000000; \ 298 break; \ 299 default: \ 300 delta += (1000000 * xxs); \ 301 } \ 302 } \ 303} 304 305#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 306 (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 307 308/* 309 * Handle DVMRP setsockopt commands to modify the multicast routing tables. 310 */ 311int 312_ip_mrouter_cmd(cmd, so, m) 313 int cmd; 314 struct socket *so; 315 struct mbuf *m; 316{ 317 if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; 318 319 switch (cmd) { 320 case DVMRP_INIT: return ip_mrouter_init(so); 321 case DVMRP_DONE: return ip_mrouter_done(); 322 case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); 323 case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); 324 case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); 325 case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); 326 default: return EOPNOTSUPP; 327 } 328} 329 330int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd; 331 332/* 333 * Handle ioctl commands to obtain information from the cache 334 */ 335int 336_mrt_ioctl(cmd, data) 337 int cmd; 338 caddr_t data; 339{ 340 int error = 0; 341 342 switch (cmd) { 343 case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ 344 return (get_vifs(data)); 345 break; 346 case (SIOCGETVIFCNT): 347 return (get_vif_cnt((struct sioc_vif_req *)data)); 348 break; 349 case (SIOCGETSGCNT): 350 return (get_sg_cnt((struct sioc_sg_req *)data)); 351 break; 352 default: 353 return (EINVAL); 354 break; 355 } 356 return error; 357} 358 359int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; 360 361/* 362 * returns the packet count for the source group provided 363 */ 364int 365get_sg_cnt(req) 366 register struct sioc_sg_req *req; 367{ 368 register struct mfc *rt; 369 int s; 370 371 s = splnet(); 372 MFCFIND(req->src.s_addr, req->grp.s_addr, rt); 373 splx(s); 374 if (rt != NULL) 375 req->count = rt->mfc_pkt_cnt; 376 else 377 req->count = 0xffffffff; 378 379 return 0; 380} 381 382/* 383 * returns the input and output packet counts on the interface provided 384 */ 385int 386get_vif_cnt(req) 387 register struct sioc_vif_req *req; 388{ 389 register vifi_t vifi = req->vifi; 390 391 req->icount = viftable[vifi].v_pkt_in; 392 req->ocount = viftable[vifi].v_pkt_out; 393 394 return 0; 395} 396 397int 398get_vifs(data) 399 char *data; 400{ 401 struct vif_conf *vifc = (struct vif_conf *)data; 402 struct vif_req *vifrp, vifr; 403 int space, error=0; 404 405 vifi_t vifi; 406 int s; 407 408 space = vifc->vifc_len; 409 vifrp = vifc->vifc_req; 410 411 s = splnet(); 412 vifc->vifc_num=numvifs; 413 414 for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { 415 if (viftable[vifi].v_lcl_addr.s_addr != 0) { 416 vifr.v_flags=viftable[vifi].v_flags; 417 vifr.v_threshold=viftable[vifi].v_threshold; 418 vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; 419 vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; 420 strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); 421 if ((space -= sizeof(vifr)) < 0) { 422 splx(s); 423 return(ENOSPC); 424 } 425 error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); 426 if (error) { 427 splx(s); 428 return(error); 429 } 430 } 431 } 432 splx(s); 433 return 0; 434} 435/* 436 * Enable multicast routing 437 */ 438static int 439ip_mrouter_init(so) 440 struct socket *so; 441{ 442 if (so->so_type != SOCK_RAW || 443 so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; 444 445 if (ip_mrouter != NULL) return EADDRINUSE; 446 447 ip_mrouter = so; 448 449 if (mrtdebug) 450 log(LOG_DEBUG, "ip_mrouter_init"); 451 452 return 0; 453} 454 455/* 456 * Disable multicast routing 457 */ 458int 459_ip_mrouter_done() 460{ 461 vifi_t vifi; 462 int i; 463 struct ifnet *ifp; 464 struct ifreq ifr; 465 struct mbuf *mb_rt; 466 struct mbuf *m; 467 struct rtdetq *rte; 468 int s; 469 470 s = splnet(); 471 472 /* 473 * For each phyint in use, disable promiscuous reception of all IP 474 * multicasts. 475 */ 476 for (vifi = 0; vifi < numvifs; vifi++) { 477 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 478 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 479 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 480 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr 481 = INADDR_ANY; 482 ifp = viftable[vifi].v_ifp; 483 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 484 } 485 } 486 bzero((caddr_t)qtable, sizeof(qtable)); 487 bzero((caddr_t)tbftable, sizeof(tbftable)); 488 bzero((caddr_t)viftable, sizeof(viftable)); 489 numvifs = 0; 490 491 /* 492 * Check if any outstanding timeouts remain 493 */ 494 if (timeout_val != 0) 495 for (i = 0; i < MFCTBLSIZ; i++) { 496 mb_rt = mfctable[i]; 497 while (mb_rt) { 498 if ( mb_rt->m_act != NULL) { 499 untimeout(cleanup_cache, (caddr_t)mb_rt); 500 while (m = mb_rt->m_act) { 501 mb_rt->m_act = m->m_act; 502 rte = mtod(m, struct rtdetq *); 503 m_freem(rte->m); 504 m_free(m); 505 } 506 timeout_val--; 507 } 508 mb_rt = mb_rt->m_next; 509 } 510 if (timeout_val == 0) 511 break; 512 } 513 514 /* 515 * Free all multicast forwarding cache entries. 516 */ 517 for (i = 0; i < MFCTBLSIZ; i++) 518 m_freem(mfctable[i]); 519 520 bzero((caddr_t)mfctable, sizeof(mfctable)); 521 bzero((caddr_t)mfchash, sizeof(mfchash)); 522 523 /* 524 * Reset de-encapsulation cache 525 */ 526 last_encap_src = NULL; 527 last_encap_vif = NULL; 528 529 ip_mrouter = NULL; 530 531 splx(s); 532 533 if (mrtdebug) 534 log(LOG_DEBUG, "ip_mrouter_done"); 535 536 return 0; 537} 538 539int (*ip_mrouter_done)(void) = _ip_mrouter_done; 540 541/* 542 * Add a vif to the vif table 543 */ 544static int 545add_vif(vifcp) 546 register struct vifctl *vifcp; 547{ 548 register struct vif *vifp = viftable + vifcp->vifc_vifi; 549 static struct sockaddr_in sin = {AF_INET}; 550 struct ifaddr *ifa; 551 struct ifnet *ifp; 552 struct ifreq ifr; 553 int error, s; 554 struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 555 556 if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; 557 if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; 558 559 /* Find the interface with an address in AF_INET family */ 560 sin.sin_addr = vifcp->vifc_lcl_addr; 561 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 562 if (ifa == 0) return EADDRNOTAVAIL; 563 ifp = ifa->ifa_ifp; 564 565 if (vifcp->vifc_flags & VIFF_TUNNEL) { 566 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 567 static int inited = 0; 568 if(!inited) { 569 for (s = 0; s < MAXVIFS; ++s) { 570 multicast_decap_if[s].if_name = "mdecap"; 571 multicast_decap_if[s].if_unit = s; 572 } 573 inited = 1; 574 } 575 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 576 } else { 577 ifp = 0; 578 } 579 } else { 580 /* Make sure the interface supports multicast */ 581 if ((ifp->if_flags & IFF_MULTICAST) == 0) 582 return EOPNOTSUPP; 583 584 /* Enable promiscuous reception of all IP multicasts from the if */ 585 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 586 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 587 s = splnet(); 588 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 589 splx(s); 590 if (error) 591 return error; 592 } 593 594 s = splnet(); 595 /* define parameters for the tbf structure */ 596 vifp->v_tbf = v_tbf; 597 vifp->v_tbf->q_len = 0; 598 vifp->v_tbf->n_tok = 0; 599 vifp->v_tbf->last_pkt_t = 0; 600 601 vifp->v_flags = vifcp->vifc_flags; 602 vifp->v_threshold = vifcp->vifc_threshold; 603 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 604 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 605 vifp->v_ifp = ifp; 606 vifp->v_rate_limit= vifcp->vifc_rate_limit; 607 /* initialize per vif pkt counters */ 608 vifp->v_pkt_in = 0; 609 vifp->v_pkt_out = 0; 610 splx(s); 611 612 /* Adjust numvifs up if the vifi is higher than numvifs */ 613 if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 614 615 if (mrtdebug) 616 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d", 617 vifcp->vifc_vifi, 618 ntohl(vifcp->vifc_lcl_addr.s_addr), 619 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 620 ntohl(vifcp->vifc_rmt_addr.s_addr), 621 vifcp->vifc_threshold, 622 vifcp->vifc_rate_limit); 623 624 return 0; 625} 626 627/* 628 * Delete a vif from the vif table 629 */ 630static int 631del_vif(vifip) 632 vifi_t *vifip; 633{ 634 register struct vif *vifp = viftable + *vifip; 635 register vifi_t vifi; 636 struct ifnet *ifp; 637 struct ifreq ifr; 638 int s; 639 640 if (*vifip >= numvifs) return EINVAL; 641 if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; 642 643 s = splnet(); 644 645 if (!(vifp->v_flags & VIFF_TUNNEL)) { 646 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; 647 ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; 648 ifp = vifp->v_ifp; 649 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 650 } 651 652 if (vifp == last_encap_vif) { 653 last_encap_vif = 0; 654 last_encap_src = 0; 655 } 656 657 bzero((caddr_t)qtable[*vifip], 658 sizeof(qtable[*vifip])); 659 bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 660 bzero((caddr_t)vifp, sizeof (*vifp)); 661 662 /* Adjust numvifs down */ 663 for (vifi = numvifs; vifi > 0; vifi--) 664 if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; 665 numvifs = vifi; 666 667 splx(s); 668 669 if (mrtdebug) 670 log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs); 671 672 return 0; 673} 674 675/* 676 * Add an mfc entry 677 */ 678static int 679add_mfc(mfccp) 680 struct mfcctl *mfccp; 681{ 682 struct mfc *rt; 683 struct mfc *rt1; 684 register struct mbuf *mb_rt; 685 struct mbuf *prev_mb_rt; 686 u_long hash; 687 struct mbuf *mb_ntry; 688 struct rtdetq *rte; 689 register u_short nstl; 690 int s; 691 int i; 692 693 rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 694 695 /* If an entry already exists, just update the fields */ 696 if (rt) { 697 if (mrtdebug) 698 log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x", 699 ntohl(mfccp->mfcc_origin.s_addr), 700 ntohl(mfccp->mfcc_mcastgrp.s_addr), 701 ntohl(mfccp->mfcc_originmask.s_addr), 702 mfccp->mfcc_parent); 703 704 s = splnet(); 705 rt->mfc_parent = mfccp->mfcc_parent; 706 for (i = 0; i < numvifs; i++) 707 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 708 splx(s); 709 return 0; 710 } 711 712 /* 713 * Find the entry for which the upcall was made and update 714 */ 715 s = splnet(); 716 hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 717 for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; 718 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 719 720 rt = mtod(mb_rt, struct mfc *); 721 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 722 == mfccp->mfcc_origin.s_addr) && 723 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 724 (mb_rt->m_act != NULL)) { 725 726 if (!nstl++) { 727 if (mrtdebug) 728 log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x", 729 ntohl(mfccp->mfcc_origin.s_addr), 730 ntohl(mfccp->mfcc_mcastgrp.s_addr), 731 ntohl(mfccp->mfcc_originmask.s_addr), 732 mfccp->mfcc_parent, mb_rt->m_act); 733 734 rt->mfc_origin = mfccp->mfcc_origin; 735 rt->mfc_originmask = mfccp->mfcc_originmask; 736 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 737 rt->mfc_parent = mfccp->mfcc_parent; 738 for (i = 0; i < numvifs; i++) 739 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 740 /* initialize pkt counters per src-grp */ 741 rt->mfc_pkt_cnt = 0; 742 rt1 = rt; 743 } 744 745 /* prevent cleanup of cache entry */ 746 untimeout(cleanup_cache, (caddr_t)mb_rt); 747 timeout_val--; 748 749 /* free packets Qed at the end of this entry */ 750 while (mb_rt->m_act) { 751 mb_ntry = mb_rt->m_act; 752 rte = mtod(mb_ntry, struct rtdetq *); 753 ip_mdq(rte->m, rte->ifp, rte->tunnel_src, 754 rt1, rte->imo); 755 mb_rt->m_act = mb_ntry->m_act; 756 m_freem(rte->m); 757 m_free(mb_ntry); 758 } 759 760 /* 761 * If more than one entry was created for a single upcall 762 * delete that entry 763 */ 764 if (nstl > 1) { 765 MFREE(mb_rt, prev_mb_rt->m_next); 766 mb_rt = prev_mb_rt; 767 } 768 } 769 } 770 771 /* 772 * It is possible that an entry is being inserted without an upcall 773 */ 774 if (nstl == 0) { 775 if (mrtdebug) 776 log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x", 777 hash, ntohl(mfccp->mfcc_origin.s_addr), 778 ntohl(mfccp->mfcc_mcastgrp.s_addr), 779 ntohl(mfccp->mfcc_originmask.s_addr), 780 mfccp->mfcc_parent); 781 782 for (prev_mb_rt = mb_rt = mfctable[hash]; 783 mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 784 785 rt = mtod(mb_rt, struct mfc *); 786 if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) 787 == mfccp->mfcc_origin.s_addr) && 788 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 789 790 rt->mfc_origin = mfccp->mfcc_origin; 791 rt->mfc_originmask = mfccp->mfcc_originmask; 792 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 793 rt->mfc_parent = mfccp->mfcc_parent; 794 for (i = 0; i < numvifs; i++) 795 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 796 /* initialize pkt counters per src-grp */ 797 rt->mfc_pkt_cnt = 0; 798 } 799 } 800 if (mb_rt == NULL) { 801 /* no upcall, so make a new entry */ 802 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 803 if (mb_rt == NULL) { 804 splx(s); 805 return ENOBUFS; 806 } 807 808 rt = mtod(mb_rt, struct mfc *); 809 810 /* insert new entry at head of hash chain */ 811 rt->mfc_origin = mfccp->mfcc_origin; 812 rt->mfc_originmask = mfccp->mfcc_originmask; 813 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 814 rt->mfc_parent = mfccp->mfcc_parent; 815 for (i = 0; i < numvifs; i++) 816 VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); 817 /* initialize pkt counters per src-grp */ 818 rt->mfc_pkt_cnt = 0; 819 820 /* link into table */ 821 mb_rt->m_next = mfctable[hash]; 822 mfctable[hash] = mb_rt; 823 mb_rt->m_act = NULL; 824 } 825 } 826 splx(s); 827 return 0; 828} 829 830/* 831 * Delete an mfc entry 832 */ 833static int 834del_mfc(mfccp) 835 struct delmfcctl *mfccp; 836{ 837 struct in_addr origin; 838 struct in_addr mcastgrp; 839 struct mfc *rt; 840 struct mbuf *mb_rt; 841 struct mbuf *prev_mb_rt; 842 u_long hash; 843 struct mfc **cmfc; 844 struct mfc **cmfcend; 845 int s, i; 846 847 origin = mfccp->mfcc_origin; 848 mcastgrp = mfccp->mfcc_mcastgrp; 849 hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); 850 851 if (mrtdebug) 852 log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x", 853 ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); 854 855 for (prev_mb_rt = mb_rt = mfctable[hash] 856 ; mb_rt 857 ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { 858 rt = mtod(mb_rt, struct mfc *); 859 if (origin.s_addr == rt->mfc_origin.s_addr && 860 mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 861 mb_rt->m_act == NULL) 862 break; 863 } 864 if (mb_rt == NULL) { 865 return ESRCH; 866 } 867 868 s = splnet(); 869 870 cmfc = mfchash; 871 cmfcend = cmfc + MFCHASHSIZ; 872 for ( ; cmfc < cmfcend; ++cmfc) 873 if (*cmfc == rt) 874 *cmfc = 0; 875 876 if (prev_mb_rt != mb_rt) { /* if moved past head of list */ 877 MFREE(mb_rt, prev_mb_rt->m_next); 878 } else /* delete head of list, it is in the table */ 879 mfctable[hash] = m_free(mb_rt); 880 881 splx(s); 882 883 return 0; 884} 885 886/* 887 * IP multicast forwarding function. This function assumes that the packet 888 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 889 * pointed to by "ifp", and the packet is to be relayed to other networks 890 * that have members of the packet's destination IP multicast group. 891 * 892 * The packet is returned unscathed to the caller, unless it is tunneled 893 * or erroneous, in which case a non-zero return value tells the caller to 894 * discard it. 895 */ 896 897#define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 898#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 899 900int 901_ip_mforward(ip, ifp, m, imo) 902 register struct ip *ip; 903 struct ifnet *ifp; 904 struct mbuf *m; 905 struct ip_moptions *imo; 906{ 907 register struct mfc *rt; 908 register struct vif *vifp; 909 register u_char *ipoptions; 910 u_long tunnel_src; 911 static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; 912 static struct sockaddr_in k_igmpsrc = { AF_INET }; 913 static struct sockaddr_in k_igmpdst = { AF_INET }; 914 register struct mbuf *mm; 915 register struct mbuf *mn; 916 register struct ip *k_data; 917 int s; 918 919 if (mrtdebug > 1) 920 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x", 921 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); 922 923 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 924 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 925 /* 926 * Packet arrived via a physical interface. 927 */ 928 tunnel_src = 0; 929 } else { 930 /* 931 * Packet arrived through a source-route tunnel. 932 * 933 * A source-route tunneled packet has a single NOP option and a 934 * two-element 935 * loose-source-and-record-route (LSRR) option immediately following 936 * the fixed-size part of the IP header. At this point in processing, 937 * the IP header should contain the following IP addresses: 938 * 939 * original source - in the source address field 940 * destination group - in the destination address field 941 * remote tunnel end-point - in the first element of LSRR 942 * one of this host's addrs - in the second element of LSRR 943 * 944 * NOTE: RFC-1075 would have the original source and remote tunnel 945 * end-point addresses swapped. However, that could cause 946 * delivery of ICMP error messages to innocent applications 947 * on intermediate routing hosts! Therefore, we hereby 948 * change the spec. 949 */ 950 951 /* 952 * Verify that the tunnel options are well-formed. 953 */ 954 if (ipoptions[0] != IPOPT_NOP || 955 ipoptions[2] != 11 || /* LSRR option length */ 956 ipoptions[3] != 12 || /* LSRR address pointer */ 957 (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { 958 mrtstat.mrts_bad_tunnel++; 959 if (mrtdebug) 960 log(LOG_DEBUG, 961 "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)", 962 ntohl(ip->ip_src.s_addr), 963 ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], 964 *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); 965 return 1; 966 } 967 968 /* 969 * Delete the tunnel options from the packet. 970 */ 971 ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, 972 (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); 973 m->m_len -= TUNNEL_LEN; 974 ip->ip_len -= TUNNEL_LEN; 975 ip->ip_hl -= TUNNEL_LEN >> 2; 976 977 ifp = 0; 978 } 979 980 /* 981 * Don't forward a packet with time-to-live of zero or one, 982 * or a packet destined to a local-only group. 983 */ 984 if (ip->ip_ttl <= 1 || 985 ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 986 return (int)tunnel_src; 987 988 /* 989 * Determine forwarding vifs from the forwarding cache table 990 */ 991 s = splnet(); 992 MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); 993 994 /* Entry exists, so forward if necessary */ 995 if (rt != NULL) { 996 splx(s); 997 return (ip_mdq(m, ifp, tunnel_src, rt, imo)); 998 } 999 1000 else { 1001 /* 1002 * If we don't have a route for packet's origin, 1003 * Make a copy of the packet & 1004 * send message to routing daemon 1005 */ 1006 1007 register struct mbuf *mb_rt; 1008 register struct mbuf *mb_ntry; 1009 register struct mbuf *mb0; 1010 register struct rtdetq *rte; 1011 register struct mbuf *rte_m; 1012 register u_long hash; 1013 register struct timeval tp; 1014 1015 mrtstat.mrts_no_route++; 1016 if (mrtdebug) 1017 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x", 1018 ntohl(ip->ip_src.s_addr), 1019 ntohl(ip->ip_dst.s_addr)); 1020 1021 /* is there an upcall waiting for this packet? */ 1022 hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1023 for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { 1024 rt = mtod(mb_rt, struct mfc *); 1025 if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == 1026 rt->mfc_origin.s_addr) && 1027 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1028 (mb_rt->m_act != NULL)) 1029 break; 1030 } 1031 1032 if (mb_rt == NULL) { 1033 /* no upcall, so make a new entry */ 1034 MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); 1035 if (mb_rt == NULL) { 1036 splx(s); 1037 return ENOBUFS; 1038 } 1039 1040 rt = mtod(mb_rt, struct mfc *); 1041 1042 /* insert new entry at head of hash chain */ 1043 rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1044 rt->mfc_originmask.s_addr = (u_long)0xffffffff; 1045 rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1046 1047 /* link into table */ 1048 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1049 mb_rt->m_next = mfctable[hash]; 1050 mfctable[hash] = mb_rt; 1051 mb_rt->m_act = NULL; 1052 1053 } 1054 1055 /* determine if q has overflowed */ 1056 for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) 1057 hash++; 1058 1059 if (hash > MAX_UPQ) { 1060 mrtstat.mrts_upq_ovflw++; 1061 splx(s); 1062 return 0; 1063 } 1064 1065 /* add this packet and timing, ifp info to m_act */ 1066 MGET(mb_ntry, M_DONTWAIT, MT_DATA); 1067 if (mb_ntry == NULL) { 1068 splx(s); 1069 return ENOBUFS; 1070 } 1071 1072 mb_ntry->m_act = NULL; 1073 rte = mtod(mb_ntry, struct rtdetq *); 1074 1075 mb0 = m_copy(m, 0, M_COPYALL); 1076 if (mb0 == NULL) { 1077 splx(s); 1078 return ENOBUFS; 1079 } 1080 1081 rte->m = mb0; 1082 rte->ifp = ifp; 1083 rte->tunnel_src = tunnel_src; 1084 rte->imo = imo; 1085 1086 rte_m->m_act = mb_ntry; 1087 1088 splx(s); 1089 1090 if (hash == 0) { 1091 /* 1092 * Send message to routing daemon to install 1093 * a route into the kernel table 1094 */ 1095 k_igmpsrc.sin_addr = ip->ip_src; 1096 k_igmpdst.sin_addr = ip->ip_dst; 1097 1098 mm = m_copy(m, 0, M_COPYALL); 1099 if (mm == NULL) { 1100 splx(s); 1101 return ENOBUFS; 1102 } 1103 1104 k_data = mtod(mm, struct ip *); 1105 k_data->ip_p = 0; 1106 1107 mrtstat.mrts_upcalls++; 1108 1109 raw_input(mm, &k_igmpproto, 1110 (struct sockaddr *)&k_igmpsrc, 1111 (struct sockaddr *)&k_igmpdst); 1112 1113 /* set timer to cleanup entry if upcall is lost */ 1114 timeout(cleanup_cache, (caddr_t)mb_rt, 100); 1115 timeout_val++; 1116 } 1117 1118 return 0; 1119 } 1120} 1121 1122int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 1123 struct ip_moptions *) = _ip_mforward; 1124 1125/* 1126 * Clean up the cache entry if upcall is not serviced 1127 */ 1128static void 1129cleanup_cache(xmb_rt) 1130 void *xmb_rt; 1131{ 1132 struct mbuf *mb_rt = xmb_rt; 1133 struct mfc *rt; 1134 u_long hash; 1135 struct mbuf *prev_m0; 1136 struct mbuf *m0; 1137 struct mbuf *m; 1138 struct rtdetq *rte; 1139 int s; 1140 1141 rt = mtod(mb_rt, struct mfc *); 1142 hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); 1143 1144 if (mrtdebug) 1145 log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x", 1146 ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), 1147 ntohl(rt->mfc_mcastgrp.s_addr)); 1148 1149 mrtstat.mrts_cache_cleanups++; 1150 1151 /* 1152 * determine entry to be cleaned up in cache table 1153 */ 1154 s = splnet(); 1155 for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) 1156 if (m0 == mb_rt) 1157 break; 1158 1159 /* 1160 * drop all the packets 1161 * free the mbuf with the pkt, if, timing info 1162 */ 1163 while (mb_rt->m_act) { 1164 m = mb_rt->m_act; 1165 mb_rt->m_act = m->m_act; 1166 1167 rte = mtod(m, struct rtdetq *); 1168 m_freem(rte->m); 1169 m_free(m); 1170 } 1171 1172 /* 1173 * Delete the entry from the cache 1174 */ 1175 if (prev_m0 != m0) { /* if moved past head of list */ 1176 MFREE(m0, prev_m0->m_next); 1177 } else /* delete head of list, it is in the table */ 1178 mfctable[hash] = m_free(m0); 1179 1180 timeout_val--; 1181 splx(s); 1182} 1183 1184/* 1185 * Packet forwarding routine once entry in the cache is made 1186 */ 1187static int 1188ip_mdq(m, ifp, tunnel_src, rt, imo) 1189 register struct mbuf *m; 1190 register struct ifnet *ifp; 1191 register u_long tunnel_src; 1192 register struct mfc *rt; 1193 register struct ip_moptions *imo; 1194{ 1195 register struct ip *ip = mtod(m, struct ip *); 1196 register vifi_t vifi; 1197 register struct vif *vifp; 1198 1199 /* 1200 * Don't forward if it didn't arrive from the parent vif for its origin. 1201 * Notes: v_ifp is zero for src route tunnels, multicast_decap_if 1202 * for encapsulated tunnels and a real ifnet for non-tunnels so 1203 * the first part of the if catches wrong physical interface or 1204 * tunnel type; v_rmt_addr is zero for non-tunneled packets so 1205 * the 2nd part catches both packets that arrive via a tunnel 1206 * that shouldn't and packets that arrive via the wrong tunnel. 1207 */ 1208 vifi = rt->mfc_parent; 1209 if (viftable[vifi].v_ifp != ifp || 1210 (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { 1211 /* came in the wrong interface */ 1212 if (mrtdebug) 1213 log(LOG_DEBUG, "wrong if: ifp %x vifi %d", 1214 ifp, vifi); 1215 ++mrtstat.mrts_wrong_if; 1216 return (int)tunnel_src; 1217 } 1218 1219 /* increment the interface and s-g counters */ 1220 viftable[vifi].v_pkt_in++; 1221 rt->mfc_pkt_cnt++; 1222 1223 /* 1224 * For each vif, decide if a copy of the packet should be forwarded. 1225 * Forward if: 1226 * - the ttl exceeds the vif's threshold 1227 * - there are group members downstream on interface 1228 */ 1229#define MC_SEND(ip,vifp,m) { \ 1230 (vifp)->v_pkt_out++; \ 1231 if ((vifp)->v_flags & VIFF_SRCRT) \ 1232 srcrt_send((ip), (vifp), (m)); \ 1233 else if ((vifp)->v_flags & VIFF_TUNNEL) \ 1234 encap_send((ip), (vifp), (m)); \ 1235 else \ 1236 phyint_send((ip), (vifp), (m)); \ 1237 } 1238 1239/* If no options or the imo_multicast_vif option is 0, don't do this part 1240 */ 1241 if ((imo != NULL) && 1242 (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) 1243 { 1244 MC_SEND(ip,viftable+vifi,m); 1245 return (1); /* make sure we are done: No more physical sends */ 1246 } 1247 1248 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1249 if ((rt->mfc_ttls[vifi] > 0) && 1250 (ip->ip_ttl > rt->mfc_ttls[vifi])) 1251 MC_SEND(ip, vifp, m); 1252 1253 return 0; 1254} 1255 1256/* check if a vif number is legal/ok. This is used by ip_output, to export 1257 * numvifs there, 1258 */ 1259int 1260_legal_vif_num(vif) 1261 int vif; 1262{ if (vif>=0 && vif<=numvifs) 1263 return(1); 1264 else 1265 return(0); 1266} 1267 1268int (*legal_vif_num)(int) = _legal_vif_num; 1269 1270static void 1271phyint_send(ip, vifp, m) 1272 struct ip *ip; 1273 struct vif *vifp; 1274 struct mbuf *m; 1275{ 1276 register struct mbuf *mb_copy; 1277 register struct mbuf *mopts; 1278 register struct ip_moptions *imo; 1279 1280 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1281 return; 1282 1283 MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); 1284 if (imo == NULL) { 1285 m_freem(mb_copy); 1286 return; 1287 } 1288 1289 imo->imo_multicast_ifp = vifp->v_ifp; 1290 imo->imo_multicast_ttl = ip->ip_ttl - 1; 1291 imo->imo_multicast_loop = 1; 1292 1293 if (vifp->v_rate_limit <= 0) 1294 tbf_send_packet(vifp, mb_copy, imo); 1295 else 1296 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, 1297 imo); 1298} 1299 1300static void 1301srcrt_send(ip, vifp, m) 1302 struct ip *ip; 1303 struct vif *vifp; 1304 struct mbuf *m; 1305{ 1306 struct mbuf *mb_copy, *mb_opts; 1307 register struct ip *ip_copy; 1308 u_char *cp; 1309 1310 /* 1311 * Make sure that adding the tunnel options won't exceed the 1312 * maximum allowed number of option bytes. 1313 */ 1314 if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { 1315 mrtstat.mrts_cant_tunnel++; 1316 if (mrtdebug) 1317 log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u", 1318 ntohl(ip->ip_src.s_addr)); 1319 return; 1320 } 1321 1322 if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) 1323 return; 1324 1325 ip_copy = mtod(mb_copy, struct ip *); 1326 ip_copy->ip_ttl--; 1327 ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ 1328 /* 1329 * Adjust the ip header length to account for the tunnel options. 1330 */ 1331 ip_copy->ip_hl += TUNNEL_LEN >> 2; 1332 ip_copy->ip_len += TUNNEL_LEN; 1333 MGET(mb_opts, M_DONTWAIT, MT_HEADER); 1334 if (mb_opts == NULL) { 1335 m_freem(mb_copy); 1336 return; 1337 } 1338 /* 1339 * 'Delete' the base ip header from the mb_copy chain 1340 */ 1341 mb_copy->m_len -= IP_HDR_LEN; 1342 mb_copy->m_data += IP_HDR_LEN; 1343 /* 1344 * Make mb_opts be the new head of the packet chain. 1345 * Any options of the packet were left in the old packet chain head 1346 */ 1347 mb_opts->m_next = mb_copy; 1348 mb_opts->m_data += 16; 1349 mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN; 1350 /* 1351 * Copy the base ip header from the mb_copy chain to the new head mbuf 1352 */ 1353 bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), IP_HDR_LEN); 1354 /* 1355 * Add the NOP and LSRR after the base ip header 1356 */ 1357 cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; 1358 *cp++ = IPOPT_NOP; 1359 *cp++ = IPOPT_LSRR; 1360 *cp++ = 11; /* LSRR option length */ 1361 *cp++ = 8; /* LSSR pointer to second element */ 1362 *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ 1363 cp += 4; 1364 *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ 1365 1366 if (vifp->v_rate_limit <= 0) 1367 tbf_send_packet(vifp, mb_opts, 0); 1368 else 1369 tbf_control(vifp, mb_opts, 1370 mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); 1371} 1372 1373static void 1374encap_send(ip, vifp, m) 1375 register struct ip *ip; 1376 register struct vif *vifp; 1377 register struct mbuf *m; 1378{ 1379 register struct mbuf *mb_copy; 1380 register struct ip *ip_copy; 1381 register int i, len = ip->ip_len; 1382 1383 /* 1384 * copy the old packet & pullup it's IP header into the 1385 * new mbuf so we can modify it. Try to fill the new 1386 * mbuf since if we don't the ethernet driver will. 1387 */ 1388 MGET(mb_copy, M_DONTWAIT, MT_DATA); 1389 if (mb_copy == NULL) 1390 return; 1391 mb_copy->m_data += 16; 1392 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1393 1394 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 1395 m_freem(mb_copy); 1396 return; 1397 } 1398 i = MHLEN - M_LEADINGSPACE(mb_copy); 1399 if (i > len) 1400 i = len; 1401 mb_copy = m_pullup(mb_copy, i); 1402 if (mb_copy == NULL) 1403 return; 1404 1405 /* 1406 * fill in the encapsulating IP header. 1407 */ 1408 ip_copy = mtod(mb_copy, struct ip *); 1409 *ip_copy = multicast_encap_iphdr; 1410 ip_copy->ip_id = htons(ip_id++); 1411 ip_copy->ip_len += len; 1412 ip_copy->ip_src = vifp->v_lcl_addr; 1413 ip_copy->ip_dst = vifp->v_rmt_addr; 1414 1415 /* 1416 * turn the encapsulated IP header back into a valid one. 1417 */ 1418 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1419 --ip->ip_ttl; 1420 HTONS(ip->ip_len); 1421 HTONS(ip->ip_off); 1422 ip->ip_sum = 0; 1423#if defined(LBL) && !defined(ultrix) 1424 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); 1425#else 1426 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1427 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1428 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1429#endif 1430 1431 if (vifp->v_rate_limit <= 0) 1432 tbf_send_packet(vifp, mb_copy, 0); 1433 else 1434 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); 1435} 1436 1437/* 1438 * De-encapsulate a packet and feed it back through ip input (this 1439 * routine is called whenever IP gets a packet with proto type 1440 * ENCAP_PROTO and a local destination address). 1441 */ 1442void 1443multiencap_decap(m) 1444 register struct mbuf *m; 1445{ 1446 struct ifnet *ifp = m->m_pkthdr.rcvif; 1447 register struct ip *ip = mtod(m, struct ip *); 1448 register int hlen = ip->ip_hl << 2; 1449 register int s; 1450 register struct ifqueue *ifq; 1451 register struct vif *vifp; 1452 1453 if (ip->ip_p != ENCAP_PROTO) { 1454 rip_input(m); 1455 return; 1456 } 1457 /* 1458 * dump the packet if it's not to a multicast destination or if 1459 * we don't have an encapsulating tunnel with the source. 1460 * Note: This code assumes that the remote site IP address 1461 * uniquely identifies the tunnel (i.e., that this site has 1462 * at most one tunnel with the remote site). 1463 */ 1464 if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { 1465 ++mrtstat.mrts_bad_tunnel; 1466 m_freem(m); 1467 return; 1468 } 1469 if (ip->ip_src.s_addr != last_encap_src) { 1470 register struct vif *vife; 1471 1472 vifp = viftable; 1473 vife = vifp + numvifs; 1474 last_encap_src = ip->ip_src.s_addr; 1475 last_encap_vif = 0; 1476 for ( ; vifp < vife; ++vifp) 1477 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1478 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1479 == VIFF_TUNNEL) 1480 last_encap_vif = vifp; 1481 break; 1482 } 1483 } 1484 if ((vifp = last_encap_vif) == 0) { 1485 last_encap_src = 0; 1486 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1487 m_freem(m); 1488 if (mrtdebug) 1489 log(LOG_DEBUG, "ip_mforward: no tunnel with %u", 1490 ntohl(ip->ip_src.s_addr)); 1491 return; 1492 } 1493 ifp = vifp->v_ifp; 1494 hlen -= sizeof(struct ifnet *); 1495 m->m_data += hlen; 1496 m->m_len -= hlen; 1497 *(mtod(m, struct ifnet **)) = ifp; 1498 ifq = &ipintrq; 1499 s = splimp(); 1500 if (IF_QFULL(ifq)) { 1501 IF_DROP(ifq); 1502 m_freem(m); 1503 } else { 1504 IF_ENQUEUE(ifq, m); 1505 /* 1506 * normally we would need a "schednetisr(NETISR_IP)" 1507 * here but we were called by ip_input and it is going 1508 * to loop back & try to dequeue the packet we just 1509 * queued as soon as we return so we avoid the 1510 * unnecessary software interrrupt. 1511 */ 1512 } 1513 splx(s); 1514} 1515 1516/* 1517 * Token bucket filter module 1518 */ 1519void 1520tbf_control(vifp, m, ip, p_len, imo) 1521 register struct vif *vifp; 1522 register struct mbuf *m; 1523 register struct ip *ip; 1524 register u_long p_len; 1525 struct ip_moptions *imo; 1526{ 1527 tbf_update_tokens(vifp); 1528 1529 /* if there are enough tokens, 1530 * and the queue is empty, 1531 * send this packet out 1532 */ 1533 1534 if (vifp->v_tbf->q_len == 0) { 1535 if (p_len <= vifp->v_tbf->n_tok) { 1536 vifp->v_tbf->n_tok -= p_len; 1537 tbf_send_packet(vifp, m, imo); 1538 } else if (p_len > MAX_BKT_SIZE) { 1539 /* drop if packet is too large */ 1540 mrtstat.mrts_pkt2large++; 1541 m_freem(m); 1542 return; 1543 } else { 1544 /* queue packet and timeout till later */ 1545 tbf_queue(vifp, m, ip, imo); 1546 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1547 } 1548 } else if (vifp->v_tbf->q_len < MAXQSIZE) { 1549 /* finite queue length, so queue pkts and process queue */ 1550 tbf_queue(vifp, m, ip, imo); 1551 tbf_process_q(vifp); 1552 } else { 1553 /* queue length too much, try to dq and queue and process */ 1554 if (!tbf_dq_sel(vifp, ip)) { 1555 mrtstat.mrts_q_overflow++; 1556 m_freem(m); 1557 return; 1558 } else { 1559 tbf_queue(vifp, m, ip, imo); 1560 tbf_process_q(vifp); 1561 } 1562 } 1563 return; 1564} 1565 1566/* 1567 * adds a packet to the queue at the interface 1568 */ 1569void 1570tbf_queue(vifp, m, ip, imo) 1571 register struct vif *vifp; 1572 register struct mbuf *m; 1573 register struct ip *ip; 1574 struct ip_moptions *imo; 1575{ 1576 register u_long ql; 1577 register int index = (vifp - viftable); 1578 register int s = splnet(); 1579 1580 ql = vifp->v_tbf->q_len; 1581 1582 qtable[index][ql].pkt_m = m; 1583 qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; 1584 qtable[index][ql].pkt_ip = ip; 1585 qtable[index][ql].pkt_imo = imo; 1586 1587 vifp->v_tbf->q_len++; 1588 splx(s); 1589} 1590 1591 1592/* 1593 * processes the queue at the interface 1594 */ 1595void 1596tbf_process_q(vifp) 1597 register struct vif *vifp; 1598{ 1599 register struct mbuf *m; 1600 register struct pkt_queue pkt_1; 1601 register int index = (vifp - viftable); 1602 register int s = splnet(); 1603 1604 /* loop through the queue at the interface and send as many packets 1605 * as possible 1606 */ 1607 while (vifp->v_tbf->q_len > 0) { 1608 /* locate the first packet */ 1609 pkt_1.pkt_len = ((qtable[index][0]).pkt_len); 1610 pkt_1.pkt_m = (qtable[index][0]).pkt_m; 1611 pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; 1612 pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; 1613 1614 /* determine if the packet can be sent */ 1615 if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { 1616 /* if so, 1617 * reduce no of tokens, dequeue the queue, 1618 * send the packet. 1619 */ 1620 vifp->v_tbf->n_tok -= pkt_1.pkt_len; 1621 1622 tbf_dequeue(vifp, 0); 1623 1624 tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); 1625 1626 } else break; 1627 } 1628 splx(s); 1629} 1630 1631/* 1632 * removes the jth packet from the queue at the interface 1633 */ 1634void 1635tbf_dequeue(vifp,j) 1636 register struct vif *vifp; 1637 register int j; 1638{ 1639 register u_long index = vifp - viftable; 1640 register int i; 1641 1642 for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { 1643 qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; 1644 qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; 1645 qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; 1646 qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; 1647 } 1648 qtable[index][i-1].pkt_m = NULL; 1649 qtable[index][i-1].pkt_len = NULL; 1650 qtable[index][i-1].pkt_ip = NULL; 1651 qtable[index][i-1].pkt_imo = NULL; 1652 1653 vifp->v_tbf->q_len--; 1654 1655 if (tbfdebug > 1) 1656 log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1); 1657} 1658 1659void 1660tbf_reprocess_q(xvifp) 1661 void *xvifp; 1662{ 1663 register struct vif *vifp = xvifp; 1664 if (ip_mrouter == NULL) 1665 return; 1666 1667 tbf_update_tokens(vifp); 1668 1669 tbf_process_q(vifp); 1670 1671 if (vifp->v_tbf->q_len) 1672 timeout(tbf_reprocess_q, (caddr_t)vifp, 1); 1673} 1674 1675/* function that will selectively discard a member of the queue 1676 * based on the precedence value and the priority obtained through 1677 * a lookup table - not yet implemented accurately! 1678 */ 1679int 1680tbf_dq_sel(vifp, ip) 1681 register struct vif *vifp; 1682 register struct ip *ip; 1683{ 1684 register int i; 1685 register int s = splnet(); 1686 register u_int p; 1687 1688 p = priority(vifp, ip); 1689 1690 for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { 1691 if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { 1692 m_freem(qtable[vifp-viftable][i].pkt_m); 1693 tbf_dequeue(vifp,i); 1694 splx(s); 1695 mrtstat.mrts_drop_sel++; 1696 return(1); 1697 } 1698 } 1699 splx(s); 1700 return(0); 1701} 1702 1703void 1704tbf_send_packet(vifp, m, imo) 1705 register struct vif *vifp; 1706 register struct mbuf *m; 1707 struct ip_moptions *imo; 1708{ 1709 register struct mbuf *mcp; 1710 int error; 1711 int s = splnet(); 1712 1713 /* if source route tunnels */ 1714 if (vifp->v_flags & VIFF_SRCRT) { 1715 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1716 IP_FORWARDING, imo); 1717 if (mrtdebug > 1) 1718 log(LOG_DEBUG, "srcrt_send on vif %d err %d", vifp-viftable, error); 1719 } else if (vifp->v_flags & VIFF_TUNNEL) { 1720 /* If tunnel options */ 1721 ip_output(m, (struct mbuf *)0, (struct route *)0, 1722 IP_FORWARDING, imo); 1723 } else { 1724 /* if physical interface option, extract the options and then send */ 1725 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1726 IP_FORWARDING, imo); 1727 FREE(imo, M_IPMOPTS); 1728 1729 if (mrtdebug > 1) 1730 log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error); 1731 } 1732 splx(s); 1733} 1734 1735/* determine the current time and then 1736 * the elapsed time (between the last time and time now) 1737 * in milliseconds & update the no. of tokens in the bucket 1738 */ 1739void 1740tbf_update_tokens(vifp) 1741 register struct vif *vifp; 1742{ 1743 struct timeval tp; 1744 register u_long t; 1745 register u_long elapsed; 1746 register int s = splnet(); 1747 1748 GET_TIME(tp); 1749 1750 t = tp.tv_sec*1000 + tp.tv_usec/1000; 1751 1752 elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; 1753 vifp->v_tbf->n_tok += elapsed; 1754 vifp->v_tbf->last_pkt_t = t; 1755 1756 if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) 1757 vifp->v_tbf->n_tok = MAX_BKT_SIZE; 1758 1759 splx(s); 1760} 1761 1762static int 1763priority(vifp, ip) 1764 register struct vif *vifp; 1765 register struct ip *ip; 1766{ 1767 register u_long graddr; 1768 register int prio; 1769 1770 /* temporary hack; will add general packet classifier some day */ 1771 1772 prio = 50; /* default priority */ 1773 1774 /* check for source route options and add option length to get dst */ 1775 if (vifp->v_flags & VIFF_SRCRT) 1776 graddr = ntohl((ip+8)->ip_dst.s_addr); 1777 else 1778 graddr = ntohl(ip->ip_dst.s_addr); 1779 1780 switch (graddr & 0xf) { 1781 case 0x0: break; 1782 case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ 1783 break; 1784 case 0x2: break; 1785 case 0x3: break; 1786 case 0x4: break; 1787 case 0x5: break; 1788 case 0x6: break; 1789 case 0x7: break; 1790 case 0x8: break; 1791 case 0x9: break; 1792 case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ 1793 break; 1794 case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ 1795 break; 1796 case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ 1797 break; 1798 case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ 1799 break; 1800 case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ 1801 break; 1802 case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ 1803 break; 1804 } 1805 1806 if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d", graddr, prio); 1807 1808 return prio; 1809} 1810 1811/* 1812 * End of token bucket filter modifications 1813 */ 1814#endif 1815 1816 1817