ip_mroute.c revision 133720
1163953Srrs/* 2185694Srrs * IP multicast forwarding procedures 3163953Srrs * 4163953Srrs * Written by David Waitzman, BBN Labs, August 1988. 5163953Srrs * Modified by Steve Deering, Stanford, February 1989. 6163953Srrs * Modified by Mark J. Steiglitz, Stanford, May, 1991 7163953Srrs * Modified by Van Jacobson, LBL, January 1993 8163953Srrs * Modified by Ajit Thyagarajan, PARC, August 1993 9163953Srrs * Modified by Bill Fenner, PARC, April 1995 10163953Srrs * Modified by Ahmed Helmy, SGI, June 1996 11163953Srrs * Modified by George Edmond Eddy (Rusty), ISI, February 1998 12163953Srrs * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000 13163953Srrs * Modified by Hitoshi Asaeda, WIDE, August 2000 14163953Srrs * Modified by Pavlin Radoslavov, ICSI, October 2002 15163953Srrs * 16163953Srrs * MROUTING Revision: 3.5 17163953Srrs * and PIM-SMv2 and PIM-DM support, advanced API support, 18163953Srrs * bandwidth metering and signaling 19163953Srrs * 20163953Srrs * $FreeBSD: head/sys/netinet/ip_mroute.c 133720 2004-08-14 15:32:40Z dwmalone $ 21163953Srrs */ 22163953Srrs 23163953Srrs#include "opt_mac.h" 24163953Srrs#include "opt_mrouting.h" 25163953Srrs 26163953Srrs#ifdef PIM 27163953Srrs#define _PIM_VT 1 28163953Srrs#endif 29163953Srrs 30163953Srrs#include <sys/param.h> 31163953Srrs#include <sys/kernel.h> 32163953Srrs#include <sys/lock.h> 33163953Srrs#include <sys/mac.h> 34163953Srrs#include <sys/malloc.h> 35163953Srrs#include <sys/mbuf.h> 36163953Srrs#include <sys/module.h> 37163953Srrs#include <sys/protosw.h> 38167598Srrs#include <sys/signalvar.h> 39163953Srrs#include <sys/socket.h> 40163953Srrs#include <sys/socketvar.h> 41163953Srrs#include <sys/sockio.h> 42163953Srrs#include <sys/sx.h> 43163953Srrs#include <sys/sysctl.h> 44163953Srrs#include <sys/syslog.h> 45163953Srrs#include <sys/systm.h> 46163953Srrs#include <sys/time.h> 47170091Srrs#include <net/if.h> 48172091Srrs#include <net/netisr.h> 49179157Srrs#include <net/route.h> 50163953Srrs#include <netinet/in.h> 51163953Srrs#include <netinet/igmp.h> 52163953Srrs#include <netinet/in_systm.h> 53163953Srrs#include <netinet/in_var.h> 54163953Srrs#include <netinet/ip.h> 55163953Srrs#include <netinet/ip_encap.h> 56163953Srrs#include <netinet/ip_mroute.h> 57163953Srrs#include <netinet/ip_var.h> 58165220Srrs#ifdef PIM 59165220Srrs#include <netinet/pim.h> 60165220Srrs#include <netinet/pim_var.h> 61165220Srrs#endif 62165220Srrs#include <netinet/udp.h> 63163953Srrs#include <machine/in_cksum.h> 64163953Srrs 65165220Srrs/* 66163953Srrs * Control debugging code for rsvp and multicast routing code. 67163953Srrs * Can only set them with the debugger. 68163953Srrs */ 69165220Srrsstatic u_int rsvpdebug; /* non-zero enables debugging */ 70165220Srrs 71165220Srrsstatic u_int mrtdebug; /* any set of the flags below */ 72165220Srrs#define DEBUG_MFC 0x02 73165220Srrs#define DEBUG_FORWARD 0x04 74165220Srrs#define DEBUG_EXPIRE 0x08 75163953Srrs#define DEBUG_XMIT 0x10 76163953Srrs#define DEBUG_PIM 0x20 77163953Srrs 78163953Srrs#define VIFI_INVALID ((vifi_t) -1) 79163953Srrs 80163953Srrs#define M_HASCL(m) ((m)->m_flags & M_EXT) 81163953Srrs 82163953Srrsstatic MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables"); 83179157Srrs 84163953Srrs/* 85163953Srrs * Locking. We use two locks: one for the virtual interface table and 86163953Srrs * one for the forwarding table. These locks may be nested in which case 87163953Srrs * the VIF lock must always be taken first. Note that each lock is used 88163953Srrs * to cover not only the specific data structure but also related data 89169420Srrs * structures. It may be better to add more fine-grained locking later; 90169420Srrs * it's not clear how performance-critical this code is. 91172396Srrs */ 92172396Srrs 93172396Srrsstatic struct mrtstat mrtstat; 94172396SrrsSYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW, 95172396Srrs &mrtstat, mrtstat, 96172396Srrs "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)"); 97163953Srrs 98163953Srrsstatic struct mfc *mfctable[MFCTBLSIZ]; 99163953SrrsSYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, 100163953Srrs &mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]", 101169420Srrs "Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)"); 102169420Srrs 103169420Srrsstatic struct mtx mfc_mtx; 104163953Srrs#define MFC_LOCK() mtx_lock(&mfc_mtx) 105163953Srrs#define MFC_UNLOCK() mtx_unlock(&mfc_mtx) 106179141Srrs#define MFC_LOCK_ASSERT() do { \ 107179141Srrs mtx_assert(&mfc_mtx, MA_OWNED); \ 108179141Srrs NET_ASSERT_GIANT(); \ 109179141Srrs} while (0) 110179141Srrs#define MFC_LOCK_INIT() mtx_init(&mfc_mtx, "mroute mfc table", NULL, MTX_DEF) 111179141Srrs#define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx) 112179141Srrs 113179141Srrsstatic struct vif viftable[MAXVIFS]; 114179141SrrsSYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD, 115163953Srrs &viftable, sizeof(viftable), "S,vif[MAXVIFS]", 116169352Srrs "Multicast Virtual Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); 117179157Srrs 118168299Srrsstatic struct mtx vif_mtx; 119168299Srrs#define VIF_LOCK() mtx_lock(&vif_mtx) 120172396Srrs#define VIF_UNLOCK() mtx_unlock(&vif_mtx) 121163953Srrs#define VIF_LOCK_ASSERT() mtx_assert(&vif_mtx, MA_OWNED) 122163953Srrs#define VIF_LOCK_INIT() mtx_init(&vif_mtx, "mroute vif table", NULL, MTX_DEF) 123163953Srrs#define VIF_LOCK_DESTROY() mtx_destroy(&vif_mtx) 124163953Srrs 125169352Srrsstatic u_char nexpire[MFCTBLSIZ]; 126179157Srrs 127168299Srrsstatic struct callout expire_upcalls_ch; 128168299Srrs 129172396Srrs#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 130163953Srrs#define UPCALL_EXPIRE 6 /* number of timeouts */ 131163953Srrs 132163953Srrs/* 133163953Srrs * Define the token bucket filter structures 134163953Srrs * tbftable -> each vif has one of these for storing info 135169352Srrs */ 136179157Srrs 137168299Srrsstatic struct tbf tbftable[MAXVIFS]; 138168299Srrs#define TBF_REPROCESS (hz / 100) /* 100x / second */ 139172396Srrs 140163953Srrs/* 141163953Srrs * 'Interfaces' associated with decapsulator (so we can tell 142163953Srrs * packets that went through it from ones that get reflected 143163953Srrs * by a broken gateway). These interfaces are never linked into 144169352Srrs * the system ifnet list & no routes point to them. I.e., packets 145179157Srrs * can't be sent this way. They only exist as a placeholder for 146171440Srrs * multicast source verification. 147171440Srrs */ 148172396Srrsstatic struct ifnet multicast_decap_if[MAXVIFS]; 149163953Srrs 150163953Srrs#define ENCAP_TTL 64 151163953Srrs#define ENCAP_PROTO IPPROTO_IPIP /* 4 */ 152163953Srrs 153169352Srrs/* prototype IP hdr for encapsulated packets */ 154179157Srrsstatic struct ip multicast_encap_iphdr = { 155168299Srrs#if BYTE_ORDER == LITTLE_ENDIAN 156168299Srrs sizeof(struct ip) >> 2, IPVERSION, 157172396Srrs#else 158163953Srrs IPVERSION, sizeof(struct ip) >> 2, 159163953Srrs#endif 160163953Srrs 0, /* tos */ 161163953Srrs sizeof(struct ip), /* total length */ 162169352Srrs 0, /* id */ 163179157Srrs 0, /* frag offset */ 164168299Srrs ENCAP_TTL, ENCAP_PROTO, 165168299Srrs 0, /* checksum */ 166172396Srrs}; 167163953Srrs 168163953Srrs/* 169163953Srrs * Bandwidth meter variables and constants 170163953Srrs */ 171163953Srrsstatic MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters"); 172179157Srrs/* 173168299Srrs * Pending timeouts are stored in a hash table, the key being the 174168299Srrs * expiration time. Periodically, the entries are analysed and processed. 175172396Srrs */ 176163953Srrs#define BW_METER_BUCKETS 1024 177163953Srrsstatic struct bw_meter *bw_meter_timers[BW_METER_BUCKETS]; 178169420Srrsstatic struct callout bw_meter_ch; 179179157Srrs#define BW_METER_PERIOD (hz) /* periodical handling of bw meters */ 180172703Srrs 181172396Srrs/* 182172396Srrs * Pending upcalls are stored in a vector which is flushed when 183172396Srrs * full, or periodically 184172396Srrs */ 185163953Srrsstatic struct bw_upcall bw_upcalls[BW_UPCALLS_MAX]; 186163953Srrsstatic u_int bw_upcalls_n; /* # of pending upcalls */ 187163953Srrsstatic struct callout bw_upcalls_ch; 188163953Srrs#define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */ 189163953Srrs 190171158Srrs#ifdef PIM 191171158Srrsstatic struct pimstat pimstat; 192171158SrrsSYSCTL_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD, 193171158Srrs &pimstat, pimstat, 194171158Srrs "PIM Statistics (struct pimstat, netinet/pim_var.h)"); 195171158Srrs 196171158Srrs/* 197171158Srrs * Note: the PIM Register encapsulation adds the following in front of a 198171158Srrs * data packet: 199171158Srrs * 200171158Srrs * struct pim_encap_hdr { 201171158Srrs * struct ip ip; 202171158Srrs * struct pim_encap_pimhdr pim; 203171158Srrs * } 204171158Srrs * 205171158Srrs */ 206171158Srrs 207171158Srrsstruct pim_encap_pimhdr { 208171158Srrs struct pim pim; 209171158Srrs uint32_t flags; 210171531Srrs}; 211171158Srrs 212171158Srrsstatic struct ip pim_encap_iphdr = { 213171158Srrs#if BYTE_ORDER == LITTLE_ENDIAN 214171158Srrs sizeof(struct ip) >> 2, 215171158Srrs IPVERSION, 216171158Srrs#else 217171158Srrs IPVERSION, 218171158Srrs sizeof(struct ip) >> 2, 219171158Srrs#endif 220171158Srrs 0, /* tos */ 221171158Srrs sizeof(struct ip), /* total length */ 222171158Srrs 0, /* id */ 223171158Srrs 0, /* frag offset */ 224171158Srrs ENCAP_TTL, 225171158Srrs IPPROTO_PIM, 226171158Srrs 0, /* checksum */ 227171158Srrs}; 228171158Srrs 229171158Srrsstatic struct pim_encap_pimhdr pim_encap_pimhdr = { 230171158Srrs { 231171158Srrs PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */ 232171158Srrs 0, /* reserved */ 233171158Srrs 0, /* checksum */ 234171158Srrs }, 235171158Srrs 0 /* flags */ 236171158Srrs}; 237171158Srrs 238171158Srrsstatic struct ifnet multicast_register_if; 239171158Srrsstatic vifi_t reg_vif_num = VIFI_INVALID; 240171158Srrs#endif /* PIM */ 241171158Srrs 242171158Srrs/* 243171158Srrs * Private variables. 244171158Srrs */ 245171158Srrsstatic vifi_t numvifs; 246171158Srrsstatic const struct encaptab *encap_cookie; 247171158Srrs 248171158Srrs/* 249171158Srrs * one-back cache used by mroute_encapcheck to locate a tunnel's vif 250171158Srrs * given a datagram's src ip address. 251163953Srrs */ 252163953Srrsstatic u_long last_encap_src; 253163953Srrsstatic struct vif *last_encap_vif; 254163953Srrs 255163953Srrs/* 256163953Srrs * Callout for queue processing. 257163953Srrs */ 258163953Srrsstatic struct callout tbf_reprocess_ch; 259163953Srrs 260163953Srrsstatic u_long X_ip_mcast_src(int vifi); 261163953Srrsstatic int X_ip_mforward(struct ip *ip, struct ifnet *ifp, 262163953Srrs struct mbuf *m, struct ip_moptions *imo); 263163953Srrsstatic int X_ip_mrouter_done(void); 264163953Srrsstatic int X_ip_mrouter_get(struct socket *so, struct sockopt *m); 265163953Srrsstatic int X_ip_mrouter_set(struct socket *so, struct sockopt *m); 266163953Srrsstatic int X_legal_vif_num(int vif); 267163953Srrsstatic int X_mrt_ioctl(int cmd, caddr_t data); 268163953Srrs 269163953Srrsstatic int get_sg_cnt(struct sioc_sg_req *); 270179783Srrsstatic int get_vif_cnt(struct sioc_vif_req *); 271170744Srrsstatic int ip_mrouter_init(struct socket *, int); 272170744Srrsstatic int add_vif(struct vifctl *); 273163953Srrsstatic int del_vif(vifi_t); 274163953Srrsstatic int add_mfc(struct mfcctl2 *); 275164181Srrsstatic int del_mfc(struct mfcctl2 *); 276163953Srrsstatic int set_api_config(uint32_t *); /* chose API capabilities */ 277163953Srrsstatic int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); 278163953Srrsstatic int set_assert(int); 279163953Srrsstatic void expire_upcalls(void *); 280163953Srrsstatic int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); 281163953Srrsstatic void phyint_send(struct ip *, struct vif *, struct mbuf *); 282163953Srrsstatic void encap_send(struct ip *, struct vif *, struct mbuf *); 283163953Srrsstatic void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long); 284163953Srrsstatic void tbf_queue(struct vif *, struct mbuf *); 285163953Srrsstatic void tbf_process_q(struct vif *); 286163953Srrsstatic void tbf_reprocess_q(void *); 287163953Srrsstatic int tbf_dq_sel(struct vif *, struct ip *); 288163953Srrsstatic void tbf_send_packet(struct vif *, struct mbuf *); 289163953Srrsstatic void tbf_update_tokens(struct vif *); 290163953Srrsstatic int priority(struct vif *, struct ip *); 291163953Srrs 292163953Srrs/* 293163953Srrs * Bandwidth monitoring 294163953Srrs */ 295172090Srrsstatic void free_bw_list(struct bw_meter *list); 296163953Srrsstatic int add_bw_upcall(struct bw_upcall *); 297163953Srrsstatic int del_bw_upcall(struct bw_upcall *); 298163953Srrsstatic void bw_meter_receive_packet(struct bw_meter *x, int plen, 299163953Srrs struct timeval *nowp); 300163953Srrsstatic void bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp); 301163953Srrsstatic void bw_upcalls_send(void); 302163953Srrsstatic void schedule_bw_meter(struct bw_meter *x, struct timeval *nowp); 303169420Srrsstatic void unschedule_bw_meter(struct bw_meter *x); 304163979Srustatic void bw_meter_process(void); 305163953Srrsstatic void expire_bw_upcalls_send(void *); 306163953Srrsstatic void expire_bw_meter_process(void *); 307169655Srrs 308163953Srrs#ifdef PIM 309163953Srrsstatic int pim_register_send(struct ip *, struct vif *, 310163953Srrs struct mbuf *, struct mfc *); 311163953Srrsstatic int pim_register_send_rp(struct ip *, struct vif *, 312163953Srrs struct mbuf *, struct mfc *); 313163953Srrsstatic int pim_register_send_upcall(struct ip *, struct vif *, 314163953Srrs struct mbuf *, struct mfc *); 315164181Srrsstatic struct mbuf *pim_register_prepare(struct ip *, struct mbuf *); 316163953Srrs#endif 317163953Srrs 318163953Srrs/* 319185694Srrs * whether or not special PIM assert processing is enabled. 320185694Srrs */ 321179783Srrsstatic int pim_assert; 322170744Srrs/* 323170744Srrs * Rate limit for assert notification messages, in usec 324163953Srrs */ 325163953Srrs#define ASSERT_MSG_TIME 3000000 326163953Srrs 327163953Srrs/* 328185694Srrs * Kernel multicast routing API capabilities and setup. 329185694Srrs * If more API capabilities are added to the kernel, they should be 330185694Srrs * recorded in `mrt_api_support'. 331185694Srrs */ 332185694Srrsstatic const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | 333180955Srrs MRT_MFC_FLAGS_BORDER_VIF | 334163953Srrs MRT_MFC_RP | 335163953Srrs MRT_MFC_BW_UPCALL); 336163953Srrsstatic uint32_t mrt_api_config = 0; 337170091Srrs 338163953Srrs/* 339163953Srrs * Hash function for a source, group entry 340164181Srrs */ 341164181Srrs#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ 342164181Srrs ((g) >> 20) ^ ((g) >> 10) ^ (g)) 343164181Srrs 344164181Srrs/* 345164181Srrs * Find a route for a given origin IP address and Multicast group address 346164181Srrs * Type of service parameter to be added in the future!!! 347171158Srrs * Statistics are updated by the caller if needed 348164181Srrs * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses) 349164181Srrs */ 350164181Srrsstatic struct mfc * 351164181Srrsmfc_find(in_addr_t o, in_addr_t g) 352164181Srrs{ 353164181Srrs struct mfc *rt; 354170091Srrs 355163953Srrs MFC_LOCK_ASSERT(); 356164181Srrs 357164181Srrs for (rt = mfctable[MFCHASH(o,g)]; rt; rt = rt->mfc_next) 358164181Srrs if ((rt->mfc_origin.s_addr == o) && 359164181Srrs (rt->mfc_mcastgrp.s_addr == g) && (rt->mfc_stall == NULL)) 360163953Srrs break; 361170091Srrs return rt; 362163953Srrs} 363163953Srrs 364169420Srrs/* 365163953Srrs * Macros to compute elapsed time efficiently 366163953Srrs * Borrowed from Van Jacobson's scheduling code 367163953Srrs */ 368163953Srrs#define TV_DELTA(a, b, delta) { \ 369163953Srrs int xxs; \ 370163953Srrs delta = (a).tv_usec - (b).tv_usec; \ 371163953Srrs if ((xxs = (a).tv_sec - (b).tv_sec)) { \ 372163953Srrs switch (xxs) { \ 373163953Srrs case 2: \ 374163953Srrs delta += 1000000; \ 375163953Srrs /* FALLTHROUGH */ \ 376168943Srrs case 1: \ 377163953Srrs delta += 1000000; \ 378163953Srrs break; \ 379163953Srrs default: \ 380163953Srrs delta += (1000000 * xxs); \ 381163953Srrs } \ 382163953Srrs } \ 383163953Srrs} 384163953Srrs 385163953Srrs#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ 386163953Srrs (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) 387169655Srrs 388163953Srrs/* 389163953Srrs * Handle MRT setsockopt commands to modify the multicast routing tables. 390163953Srrs */ 391163953Srrsstatic int 392163953SrrsX_ip_mrouter_set(struct socket *so, struct sockopt *sopt) 393163953Srrs{ 394163953Srrs int error, optval; 395163953Srrs vifi_t vifi; 396163953Srrs struct vifctl vifc; 397170181Srrs struct mfcctl2 mfc; 398163953Srrs struct bw_upcall bw_upcall; 399163953Srrs uint32_t i; 400163953Srrs 401163953Srrs if (so != ip_mrouter && sopt->sopt_name != MRT_INIT) 402163953Srrs return EPERM; 403185694Srrs 404163953Srrs error = 0; 405163953Srrs switch (sopt->sopt_name) { 406163953Srrs case MRT_INIT: 407163953Srrs error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 408163953Srrs if (error) 409163953Srrs break; 410163953Srrs error = ip_mrouter_init(so, optval); 411185694Srrs break; 412163953Srrs 413163953Srrs case MRT_DONE: 414172090Srrs error = ip_mrouter_done(); 415170056Srrs break; 416163953Srrs 417163953Srrs case MRT_ADD_VIF: 418163953Srrs error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); 419185694Srrs if (error) 420163953Srrs break; 421163953Srrs error = add_vif(&vifc); 422163953Srrs break; 423163953Srrs 424163953Srrs case MRT_DEL_VIF: 425163953Srrs error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); 426163953Srrs if (error) 427163953Srrs break; 428163953Srrs error = del_vif(vifi); 429163953Srrs break; 430163953Srrs 431169420Srrs case MRT_ADD_MFC: 432169420Srrs case MRT_DEL_MFC: 433169420Srrs /* 434163953Srrs * select data size depending on API version. 435179157Srrs */ 436168299Srrs if (sopt->sopt_name == MRT_ADD_MFC && 437163953Srrs mrt_api_config & MRT_API_FLAGS_ALL) { 438163953Srrs error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2), 439171477Srrs sizeof(struct mfcctl2)); 440171477Srrs } else { 441171477Srrs error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl), 442171477Srrs sizeof(struct mfcctl)); 443171477Srrs bzero((caddr_t)&mfc + sizeof(struct mfcctl), 444171477Srrs sizeof(mfc) - sizeof(struct mfcctl)); 445171477Srrs } 446171477Srrs if (error) 447171477Srrs break; 448171477Srrs if (sopt->sopt_name == MRT_ADD_MFC) 449171477Srrs error = add_mfc(&mfc); 450163953Srrs else 451163953Srrs error = del_mfc(&mfc); 452163953Srrs break; 453163953Srrs 454163953Srrs case MRT_ASSERT: 455163953Srrs error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 456163953Srrs if (error) 457163953Srrs break; 458179783Srrs set_assert(optval); 459171943Srrs break; 460171943Srrs 461171943Srrs case MRT_API_CONFIG: 462171943Srrs error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 463171943Srrs if (!error) 464171943Srrs error = set_api_config(&i); 465163953Srrs if (!error) 466163953Srrs error = sooptcopyout(sopt, &i, sizeof i); 467163953Srrs break; 468163953Srrs 469163953Srrs case MRT_ADD_BW_UPCALL: 470163953Srrs case MRT_DEL_BW_UPCALL: 471163953Srrs error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall, 472163953Srrs sizeof bw_upcall); 473163953Srrs if (error) 474163953Srrs break; 475165220Srrs if (sopt->sopt_name == MRT_ADD_BW_UPCALL) 476163953Srrs error = add_bw_upcall(&bw_upcall); 477165220Srrs else 478171477Srrs error = del_bw_upcall(&bw_upcall); 479165220Srrs break; 480163953Srrs 481163953Srrs default: 482163953Srrs error = EOPNOTSUPP; 483163953Srrs break; 484163953Srrs } 485163953Srrs return error; 486163953Srrs} 487163953Srrs 488163953Srrs/* 489163953Srrs * Handle MRT getsockopt commands 490163953Srrs */ 491163953Srrsstatic int 492163953SrrsX_ip_mrouter_get(struct socket *so, struct sockopt *sopt) 493163953Srrs{ 494163953Srrs int error; 495163953Srrs static int version = 0x0305; /* !!! why is this here? XXX */ 496163953Srrs 497163953Srrs switch (sopt->sopt_name) { 498165647Srrs case MRT_VERSION: 499163953Srrs error = sooptcopyout(sopt, &version, sizeof version); 500163953Srrs break; 501163953Srrs 502163953Srrs case MRT_ASSERT: 503163953Srrs error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert); 504163953Srrs break; 505163953Srrs 506163953Srrs case MRT_API_SUPPORT: 507163953Srrs error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support); 508163953Srrs break; 509163953Srrs 510179157Srrs case MRT_API_CONFIG: 511168299Srrs error = sooptcopyout(sopt, &mrt_api_config, sizeof mrt_api_config); 512163953Srrs break; 513163953Srrs 514163953Srrs default: 515163953Srrs error = EOPNOTSUPP; 516163953Srrs break; 517163953Srrs } 518163953Srrs return error; 519163953Srrs} 520163953Srrs 521163953Srrs/* 522163953Srrs * Handle ioctl commands to obtain information from the cache 523163953Srrs */ 524163953Srrsstatic int 525163953SrrsX_mrt_ioctl(int cmd, caddr_t data) 526163953Srrs{ 527172396Srrs int error = 0; 528163953Srrs 529163953Srrs switch (cmd) { 530163953Srrs case (SIOCGETVIFCNT): 531163953Srrs error = get_vif_cnt((struct sioc_vif_req *)data); 532163953Srrs break; 533163953Srrs 534163953Srrs case (SIOCGETSGCNT): 535163953Srrs error = get_sg_cnt((struct sioc_sg_req *)data); 536163953Srrs break; 537163953Srrs 538163953Srrs default: 539163953Srrs error = EINVAL; 540163953Srrs break; 541163953Srrs } 542163953Srrs return error; 543163953Srrs} 544163953Srrs 545163953Srrs/* 546163953Srrs * returns the packet, byte, rpf-failure count for the source group provided 547163953Srrs */ 548163953Srrsstatic int 549163953Srrsget_sg_cnt(struct sioc_sg_req *req) 550163953Srrs{ 551163953Srrs struct mfc *rt; 552163953Srrs 553163953Srrs MFC_LOCK(); 554163953Srrs rt = mfc_find(req->src.s_addr, req->grp.s_addr); 555163953Srrs if (rt == NULL) { 556169420Srrs MFC_UNLOCK(); 557163953Srrs req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 558163953Srrs return EADDRNOTAVAIL; 559163953Srrs } 560163953Srrs req->pktcnt = rt->mfc_pkt_cnt; 561163953Srrs req->bytecnt = rt->mfc_byte_cnt; 562163953Srrs req->wrong_if = rt->mfc_wrong_if; 563163953Srrs MFC_UNLOCK(); 564163953Srrs return 0; 565163953Srrs} 566163953Srrs 567167598Srrs/* 568167598Srrs * returns the input and output packet and byte counts on the vif provided 569167598Srrs */ 570167598Srrsstatic int 571167598Srrsget_vif_cnt(struct sioc_vif_req *req) 572167598Srrs{ 573167598Srrs vifi_t vifi = req->vifi; 574167598Srrs 575167598Srrs VIF_LOCK(); 576167598Srrs if (vifi >= numvifs) { 577167598Srrs VIF_UNLOCK(); 578167598Srrs return EINVAL; 579167598Srrs } 580167598Srrs 581167598Srrs req->icount = viftable[vifi].v_pkt_in; 582172396Srrs req->ocount = viftable[vifi].v_pkt_out; 583167598Srrs req->ibytes = viftable[vifi].v_bytes_in; 584163953Srrs req->obytes = viftable[vifi].v_bytes_out; 585172090Srrs VIF_UNLOCK(); 586163953Srrs 587163953Srrs return 0; 588163953Srrs} 589163953Srrs 590163953Srrsstatic void 591163953Srrsip_mrouter_reset(void) 592163953Srrs{ 593163953Srrs bzero((caddr_t)mfctable, sizeof(mfctable)); 594163953Srrs bzero((caddr_t)nexpire, sizeof(nexpire)); 595172090Srrs 596163953Srrs pim_assert = 0; 597163953Srrs mrt_api_config = 0; 598169420Srrs 599163953Srrs callout_init(&expire_upcalls_ch, CALLOUT_MPSAFE); 600163953Srrs 601171440Srrs bw_upcalls_n = 0; 602171440Srrs bzero((caddr_t)bw_meter_timers, sizeof(bw_meter_timers)); 603171440Srrs callout_init(&bw_upcalls_ch, CALLOUT_MPSAFE); 604171440Srrs callout_init(&bw_meter_ch, CALLOUT_MPSAFE); 605171440Srrs 606171440Srrs callout_init(&tbf_reprocess_ch, CALLOUT_MPSAFE); 607171440Srrs} 608179783Srrs 609179783Srrsstatic struct mtx mrouter_mtx; /* used to synch init/done work */ 610179783Srrs 611171440Srrs/* 612171440Srrs * Enable multicast routing 613171440Srrs */ 614171440Srrsstatic int 615171440Srrsip_mrouter_init(struct socket *so, int version) 616171440Srrs{ 617179783Srrs if (mrtdebug) 618171440Srrs log(LOG_DEBUG, "ip_mrouter_init: so_type = %d, pr_protocol = %d\n", 619171440Srrs so->so_type, so->so_proto->pr_protocol); 620171440Srrs 621163953Srrs if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP) 622171477Srrs return EOPNOTSUPP; 623172396Srrs 624172396Srrs if (version != 1) 625172396Srrs return ENOPROTOOPT; 626172396Srrs 627172396Srrs mtx_lock(&mrouter_mtx); 628172396Srrs 629172396Srrs if (ip_mrouter != NULL) { 630172396Srrs mtx_unlock(&mrouter_mtx); 631172396Srrs return EADDRINUSE; 632172396Srrs } 633172396Srrs 634172396Srrs callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL); 635172396Srrs 636172396Srrs callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD, 637172396Srrs expire_bw_upcalls_send, NULL); 638172396Srrs callout_reset(&bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, NULL); 639172396Srrs 640172396Srrs ip_mrouter = so; 641172396Srrs 642172396Srrs mtx_unlock(&mrouter_mtx); 643172396Srrs 644172396Srrs if (mrtdebug) 645172396Srrs log(LOG_DEBUG, "ip_mrouter_init\n"); 646172396Srrs 647163953Srrs return 0; 648163953Srrs} 649185694Srrs 650185694Srrs/* 651185694Srrs * Disable multicast routing 652185694Srrs */ 653185694Srrsstatic int 654185694SrrsX_ip_mrouter_done(void) 655185694Srrs{ 656185694Srrs vifi_t vifi; 657185694Srrs int i; 658185694Srrs struct ifnet *ifp; 659185694Srrs struct ifreq ifr; 660185694Srrs struct mfc *rt; 661185694Srrs struct rtdetq *rte; 662185694Srrs 663185694Srrs mtx_lock(&mrouter_mtx); 664185694Srrs 665185694Srrs if (ip_mrouter == NULL) { 666185694Srrs mtx_unlock(&mrouter_mtx); 667185694Srrs return EINVAL; 668185694Srrs } 669185694Srrs 670185694Srrs /* 671185694Srrs * Detach/disable hooks to the reset of the system. 672185694Srrs */ 673185694Srrs ip_mrouter = NULL; 674185694Srrs mrt_api_config = 0; 675185694Srrs 676185694Srrs VIF_LOCK(); 677185694Srrs if (encap_cookie) { 678185694Srrs const struct encaptab *c = encap_cookie; 679185694Srrs encap_cookie = NULL; 680185694Srrs encap_detach(c); 681185694Srrs } 682185694Srrs VIF_UNLOCK(); 683185694Srrs 684185694Srrs callout_stop(&tbf_reprocess_ch); 685185694Srrs 686185694Srrs VIF_LOCK(); 687185694Srrs /* 688185694Srrs * For each phyint in use, disable promiscuous reception of all IP 689185694Srrs * multicasts. 690185694Srrs */ 691185694Srrs for (vifi = 0; vifi < numvifs; vifi++) { 692185694Srrs if (viftable[vifi].v_lcl_addr.s_addr != 0 && 693185694Srrs !(viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 694185694Srrs struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr); 695185694Srrs 696185694Srrs so->sin_len = sizeof(struct sockaddr_in); 697185694Srrs so->sin_family = AF_INET; 698185694Srrs so->sin_addr.s_addr = INADDR_ANY; 699185694Srrs ifp = viftable[vifi].v_ifp; 700185694Srrs if_allmulti(ifp, 0); 701185694Srrs } 702185694Srrs } 703185694Srrs bzero((caddr_t)tbftable, sizeof(tbftable)); 704185694Srrs bzero((caddr_t)viftable, sizeof(viftable)); 705185694Srrs numvifs = 0; 706185694Srrs pim_assert = 0; 707185694Srrs VIF_UNLOCK(); 708185694Srrs 709185694Srrs /* 710185694Srrs * Free all multicast forwarding cache entries. 711185694Srrs */ 712163953Srrs callout_stop(&expire_upcalls_ch); 713163953Srrs callout_stop(&bw_upcalls_ch); 714163953Srrs callout_stop(&bw_meter_ch); 715163953Srrs 716172090Srrs MFC_LOCK(); 717172090Srrs for (i = 0; i < MFCTBLSIZ; i++) { 718172090Srrs for (rt = mfctable[i]; rt != NULL; ) { 719172090Srrs struct mfc *nr = rt->mfc_next; 720185694Srrs 721172090Srrs for (rte = rt->mfc_stall; rte != NULL; ) { 722169420Srrs struct rtdetq *n = rte->next; 723163953Srrs 724163953Srrs m_freem(rte->m); 725163953Srrs free(rte, M_MRTABLE); 726185694Srrs rte = n; 727185694Srrs } 728185694Srrs free_bw_list(rt->mfc_bw_meter); 729185694Srrs free(rt, M_MRTABLE); 730185694Srrs rt = nr; 731185694Srrs } 732185694Srrs } 733185694Srrs bzero((caddr_t)mfctable, sizeof(mfctable)); 734185694Srrs bzero((caddr_t)nexpire, sizeof(nexpire)); 735185694Srrs bw_upcalls_n = 0; 736185694Srrs bzero(bw_meter_timers, sizeof(bw_meter_timers)); 737185694Srrs MFC_UNLOCK(); 738185694Srrs 739185694Srrs /* 740185694Srrs * Reset de-encapsulation cache 741185694Srrs */ 742185694Srrs last_encap_src = INADDR_ANY; 743185694Srrs last_encap_vif = NULL; 744185694Srrs#ifdef PIM 745185694Srrs reg_vif_num = VIFI_INVALID; 746185694Srrs#endif 747185694Srrs 748185694Srrs mtx_unlock(&mrouter_mtx); 749185694Srrs 750185694Srrs if (mrtdebug) 751185694Srrs log(LOG_DEBUG, "ip_mrouter_done\n"); 752185694Srrs 753185694Srrs return 0; 754163953Srrs} 755171440Srrs 756163953Srrs/* 757172090Srrs * Set PIM assert processing global 758163953Srrs */ 759172396Srrsstatic int 760172396Srrsset_assert(int i) 761172396Srrs{ 762172396Srrs if ((i != 1) && (i != 0)) 763172396Srrs return EINVAL; 764172396Srrs 765172396Srrs pim_assert = i; 766172396Srrs 767172396Srrs return 0; 768163953Srrs} 769163953Srrs 770163953Srrs/* 771163953Srrs * Configure API capabilities 772163953Srrs */ 773168859Srrsint 774168859Srrsset_api_config(uint32_t *apival) 775168859Srrs{ 776172090Srrs int i; 777172090Srrs 778172090Srrs /* 779172090Srrs * We can set the API capabilities only if it is the first operation 780172090Srrs * after MRT_INIT. I.e.: 781172090Srrs * - there are no vifs installed 782172090Srrs * - pim_assert is not enabled 783172090Srrs * - the MFC table is empty 784171990Srrs */ 785171943Srrs if (numvifs > 0) { 786171943Srrs *apival = 0; 787172090Srrs return EPERM; 788172090Srrs } 789172090Srrs if (pim_assert) { 790169420Srrs *apival = 0; 791163953Srrs return EPERM; 792163953Srrs } 793163953Srrs for (i = 0; i < MFCTBLSIZ; i++) { 794163953Srrs if (mfctable[i] != NULL) { 795163953Srrs *apival = 0; 796163953Srrs return EPERM; 797163953Srrs } 798163953Srrs } 799163953Srrs 800172090Srrs mrt_api_config = *apival & mrt_api_support; 801172090Srrs *apival = mrt_api_config; 802172090Srrs 803172090Srrs return 0; 804172090Srrs} 805169420Srrs 806169420Srrs/* 807163953Srrs * Decide if a packet is from a tunnelled peer. 808163953Srrs * Return 0 if not, 64 if so. XXX yuck.. 64 ??? 809165220Srrs */ 810165220Srrsstatic int 811165220Srrsmroute_encapcheck(const struct mbuf *m, int off, int proto, void *arg) 812163953Srrs{ 813163953Srrs struct ip *ip = mtod(m, struct ip *); 814163953Srrs int hlen = ip->ip_hl << 2; 815163953Srrs 816163953Srrs /* 817163953Srrs * don't claim the packet if it's not to a multicast destination or if 818163953Srrs * we don't have an encapsulating tunnel with the source. 819163953Srrs * Note: This code assumes that the remote site IP address 820165220Srrs * uniquely identifies the tunnel (i.e., that this site has 821163953Srrs * at most one tunnel with the remote site). 822163953Srrs */ 823163953Srrs if (!IN_MULTICAST(ntohl(((struct ip *)((char *)ip+hlen))->ip_dst.s_addr))) 824163953Srrs return 0; 825165220Srrs if (ip->ip_src.s_addr != last_encap_src) { 826165220Srrs struct vif *vifp = viftable; 827165220Srrs struct vif *vife = vifp + numvifs; 828163953Srrs 829172090Srrs last_encap_src = ip->ip_src.s_addr; 830172090Srrs last_encap_vif = NULL; 831172090Srrs for ( ; vifp < vife; ++vifp) 832172090Srrs if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 833172090Srrs if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) == VIFF_TUNNEL) 834172090Srrs last_encap_vif = vifp; 835172090Srrs break; 836172090Srrs } 837172090Srrs } 838172090Srrs if (last_encap_vif == NULL) { 839172090Srrs last_encap_src = INADDR_ANY; 840172090Srrs return 0; 841172090Srrs } 842163996Srrs return 64; 843172090Srrs} 844172090Srrs 845172090Srrs/* 846163953Srrs * De-encapsulate a packet and feed it back through ip input (this 847163953Srrs * routine is called whenever IP gets a packet that mroute_encap_func() 848163953Srrs * claimed). 849163953Srrs */ 850170056Srrsstatic void 851163953Srrsmroute_encap_input(struct mbuf *m, int off) 852171943Srrs{ 853172703Srrs struct ip *ip = mtod(m, struct ip *); 854163953Srrs int hlen = ip->ip_hl << 2; 855163953Srrs 856163953Srrs if (hlen > sizeof(struct ip)) 857163953Srrs ip_stripoptions(m, (struct mbuf *) 0); 858172090Srrs m->m_data += sizeof(struct ip); 859163953Srrs m->m_len -= sizeof(struct ip); 860163953Srrs m->m_pkthdr.len -= sizeof(struct ip); 861169378Srrs 862163953Srrs m->m_pkthdr.rcvif = last_encap_vif->v_ifp; 863163953Srrs 864163953Srrs netisr_queue(NETISR_IP, m); 865163953Srrs /* 866163953Srrs * normally we would need a "schednetisr(NETISR_IP)" 867163953Srrs * here but we were called by ip_input and it is going 868163953Srrs * to loop back & try to dequeue the packet we just 869171440Srrs * queued as soon as we return so we avoid the 870163953Srrs * unnecessary software interrrupt. 871171158Srrs * 872171158Srrs * XXX 873163953Srrs * This no longer holds - we may have direct-dispatched the packet, 874163953Srrs * or there may be a queue processing limit. 875163953Srrs */ 876163953Srrs} 877163953Srrs 878163953Srrsextern struct domain inetdomain; 879163953Srrsstatic struct protosw mroute_encap_protosw = 880163953Srrs{ SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR, 881163953Srrs mroute_encap_input, 0, 0, rip_ctloutput, 882163953Srrs 0, 883163953Srrs 0, 0, 0, 0, 884166675Srrs &rip_usrreqs 885166675Srrs}; 886163953Srrs 887163953Srrs/* 888171943Srrs * Add a vif to the vif table 889172703Srrs */ 890171990Srrsstatic int 891171990Srrsadd_vif(struct vifctl *vifcp) 892163953Srrs{ 893163953Srrs struct vif *vifp = viftable + vifcp->vifc_vifi; 894163953Srrs struct sockaddr_in sin = {sizeof sin, AF_INET}; 895163953Srrs struct ifaddr *ifa; 896163953Srrs struct ifnet *ifp; 897163953Srrs int error; 898163953Srrs struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; 899163953Srrs 900163953Srrs VIF_LOCK(); 901163953Srrs if (vifcp->vifc_vifi >= MAXVIFS) { 902163953Srrs VIF_UNLOCK(); 903163953Srrs return EINVAL; 904172090Srrs } 905172090Srrs if (vifp->v_lcl_addr.s_addr != INADDR_ANY) { 906172090Srrs VIF_UNLOCK(); 907172090Srrs return EADDRINUSE; 908172090Srrs } 909169420Srrs if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY) { 910169420Srrs VIF_UNLOCK(); 911163953Srrs return EADDRNOTAVAIL; 912163953Srrs } 913163953Srrs 914163953Srrs /* Find the interface with an address in AF_INET family */ 915163953Srrs#ifdef PIM 916163953Srrs if (vifcp->vifc_flags & VIFF_REGISTER) { 917163953Srrs /* 918163953Srrs * XXX: Because VIFF_REGISTER does not really need a valid 919163953Srrs * local interface (e.g. it could be 127.0.0.2), we don't 920163953Srrs * check its address. 921163953Srrs */ 922165220Srrs ifp = NULL; 923163953Srrs } else 924163953Srrs#endif 925163953Srrs { 926163953Srrs sin.sin_addr = vifcp->vifc_lcl_addr; 927165220Srrs ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 928165220Srrs if (ifa == NULL) { 929165220Srrs VIF_UNLOCK(); 930163953Srrs return EADDRNOTAVAIL; 931172090Srrs } 932172090Srrs ifp = ifa->ifa_ifp; 933172090Srrs } 934172090Srrs 935172090Srrs if (vifcp->vifc_flags & VIFF_TUNNEL) { 936172090Srrs if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 937172090Srrs /* 938172090Srrs * An encapsulating tunnel is wanted. Tell 939172090Srrs * mroute_encap_input() to start paying attention 940172090Srrs * to encapsulated packets. 941172090Srrs */ 942172090Srrs if (encap_cookie == NULL) { 943163996Srrs int i; 944172090Srrs 945172090Srrs encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4, 946172090Srrs mroute_encapcheck, 947163953Srrs (struct protosw *)&mroute_encap_protosw, NULL); 948163953Srrs 949163953Srrs if (encap_cookie == NULL) { 950163953Srrs printf("ip_mroute: unable to attach encap\n"); 951163953Srrs VIF_UNLOCK(); 952172090Srrs return EIO; /* XXX */ 953163953Srrs } 954163953Srrs for (i = 0; i < MAXVIFS; ++i) { 955171440Srrs if_initname(&multicast_decap_if[i], "mdecap", i); 956163953Srrs } 957163953Srrs } 958163953Srrs /* 959163953Srrs * Set interface to fake encapsulator interface 960172090Srrs */ 961163953Srrs ifp = &multicast_decap_if[vifcp->vifc_vifi]; 962163953Srrs /* 963163953Srrs * Prepare cached route entry 964163953Srrs */ 965163953Srrs bzero(&vifp->v_route, sizeof(vifp->v_route)); 966163953Srrs } else { 967163953Srrs log(LOG_ERR, "source routed tunnels not supported\n"); 968163953Srrs VIF_UNLOCK(); 969172090Srrs return EOPNOTSUPP; 970172090Srrs } 971172090Srrs#ifdef PIM 972172090Srrs } else if (vifcp->vifc_flags & VIFF_REGISTER) { 973172090Srrs ifp = &multicast_register_if; 974172090Srrs if (mrtdebug) 975172090Srrs log(LOG_DEBUG, "Adding a register vif, ifp: %p\n", 976171943Srrs (void *)&multicast_register_if); 977171440Srrs if (reg_vif_num == VIFI_INVALID) { 978172090Srrs if_initname(&multicast_register_if, "register_vif", 0); 979172090Srrs multicast_register_if.if_flags = IFF_LOOPBACK; 980172090Srrs bzero(&vifp->v_route, sizeof(vifp->v_route)); 981163953Srrs reg_vif_num = vifcp->vifc_vifi; 982163953Srrs } 983163953Srrs#endif 984163953Srrs } else { /* Make sure the interface supports multicast */ 985163953Srrs if ((ifp->if_flags & IFF_MULTICAST) == 0) { 986163953Srrs VIF_UNLOCK(); 987163953Srrs return EOPNOTSUPP; 988163953Srrs } 989163953Srrs 990163953Srrs /* Enable promiscuous reception of all IP multicasts from the if */ 991163953Srrs error = if_allmulti(ifp, 1); 992163953Srrs if (error) { 993163953Srrs VIF_UNLOCK(); 994163953Srrs return error; 995163953Srrs } 996163953Srrs } 997163953Srrs 998163953Srrs /* define parameters for the tbf structure */ 999163953Srrs vifp->v_tbf = v_tbf; 1000163953Srrs GET_TIME(vifp->v_tbf->tbf_last_pkt_t); 1001163953Srrs vifp->v_tbf->tbf_n_tok = 0; 1002163953Srrs vifp->v_tbf->tbf_q_len = 0; 1003163953Srrs vifp->v_tbf->tbf_max_q_len = MAXQSIZE; 1004169420Srrs vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; 1005169420Srrs 1006169420Srrs vifp->v_flags = vifcp->vifc_flags; 1007163953Srrs vifp->v_threshold = vifcp->vifc_threshold; 1008163953Srrs vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 1009163953Srrs vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 1010163953Srrs vifp->v_ifp = ifp; 1011163953Srrs /* scaling up here allows division by 1024 in critical code */ 1012163953Srrs vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000; 1013163953Srrs vifp->v_rsvp_on = 0; 1014163953Srrs vifp->v_rsvpd = NULL; 1015163953Srrs /* initialize per vif pkt counters */ 1016163953Srrs vifp->v_pkt_in = 0; 1017163953Srrs vifp->v_pkt_out = 0; 1018163953Srrs vifp->v_bytes_in = 0; 1019163953Srrs vifp->v_bytes_out = 0; 1020163953Srrs 1021163953Srrs /* Adjust numvifs up if the vifi is higher than numvifs */ 1022163953Srrs if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; 1023163953Srrs 1024163953Srrs VIF_UNLOCK(); 1025163953Srrs 1026163953Srrs if (mrtdebug) 1027163953Srrs log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", 1028163953Srrs vifcp->vifc_vifi, 1029163953Srrs (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr), 1030185694Srrs (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 1031185694Srrs (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr), 1032185694Srrs vifcp->vifc_threshold, 1033163953Srrs vifcp->vifc_rate_limit); 1034163953Srrs 1035163953Srrs return 0; 1036163953Srrs} 1037163953Srrs 1038163953Srrs/* 1039163953Srrs * Delete a vif from the vif table 1040163953Srrs */ 1041163953Srrsstatic int 1042163953Srrsdel_vif(vifi_t vifi) 1043163953Srrs{ 1044163953Srrs struct vif *vifp; 1045169420Srrs 1046169420Srrs VIF_LOCK(); 1047169420Srrs 1048163953Srrs if (vifi >= numvifs) { 1049163953Srrs VIF_UNLOCK(); 1050163953Srrs return EINVAL; 1051169420Srrs } 1052169420Srrs vifp = &viftable[vifi]; 1053169420Srrs if (vifp->v_lcl_addr.s_addr == INADDR_ANY) { 1054163953Srrs VIF_UNLOCK(); 1055163953Srrs return EADDRNOTAVAIL; 1056163953Srrs } 1057163953Srrs 1058163953Srrs if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) 1059163953Srrs if_allmulti(vifp->v_ifp, 0); 1060163953Srrs 1061163953Srrs if (vifp == last_encap_vif) { 1062163953Srrs last_encap_vif = NULL; 1063163953Srrs last_encap_src = INADDR_ANY; 1064163953Srrs } 1065163953Srrs 1066163953Srrs /* 1067163953Srrs * Free packets queued at the interface 1068163953Srrs */ 1069172090Srrs while (vifp->v_tbf->tbf_q) { 1070172090Srrs struct mbuf *m = vifp->v_tbf->tbf_q; 1071172090Srrs 1072172090Srrs vifp->v_tbf->tbf_q = m->m_act; 1073172090Srrs m_freem(m); 1074163953Srrs } 1075163953Srrs 1076163953Srrs#ifdef PIM 1077163953Srrs if (vifp->v_flags & VIFF_REGISTER) 1078163953Srrs reg_vif_num = VIFI_INVALID; 1079163953Srrs#endif 1080163953Srrs 1081163953Srrs bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); 1082163953Srrs bzero((caddr_t)vifp, sizeof (*vifp)); 1083163953Srrs 1084163953Srrs if (mrtdebug) 1085169420Srrs log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs); 1086169420Srrs 1087163953Srrs /* Adjust numvifs down */ 1088163953Srrs for (vifi = numvifs; vifi > 0; vifi--) 1089163953Srrs if (viftable[vifi-1].v_lcl_addr.s_addr != INADDR_ANY) 1090163953Srrs break; 1091163953Srrs numvifs = vifi; 1092163953Srrs 1093163953Srrs VIF_UNLOCK(); 1094169420Srrs 1095169420Srrs return 0; 1096163953Srrs} 1097185694Srrs 1098185694Srrs/* 1099185694Srrs * update an mfc entry without resetting counters and S,G addresses. 1100185694Srrs */ 1101185694Srrsstatic void 1102185694Srrsupdate_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 1103185694Srrs{ 1104185694Srrs int i; 1105185694Srrs 1106185694Srrs rt->mfc_parent = mfccp->mfcc_parent; 1107185694Srrs for (i = 0; i < numvifs; i++) { 1108185694Srrs rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 1109185694Srrs rt->mfc_flags[i] = mfccp->mfcc_flags[i] & mrt_api_config & 1110185694Srrs MRT_MFC_FLAGS_ALL; 1111163953Srrs } 1112163953Srrs /* set the RP address */ 1113163953Srrs if (mrt_api_config & MRT_MFC_RP) 1114163953Srrs rt->mfc_rp = mfccp->mfcc_rp; 1115163953Srrs else 1116163953Srrs rt->mfc_rp.s_addr = INADDR_ANY; 1117163953Srrs} 1118163953Srrs 1119163953Srrs/* 1120163953Srrs * fully initialize an mfc entry from the parameter. 1121163953Srrs */ 1122163953Srrsstatic void 1123163953Srrsinit_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 1124163953Srrs{ 1125172090Srrs rt->mfc_origin = mfccp->mfcc_origin; 1126163953Srrs rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 1127172090Srrs 1128172090Srrs update_mfc_params(rt, mfccp); 1129172090Srrs 1130172090Srrs /* initialize pkt counters per src-grp */ 1131172090Srrs rt->mfc_pkt_cnt = 0; 1132172090Srrs rt->mfc_byte_cnt = 0; 1133172090Srrs rt->mfc_wrong_if = 0; 1134172090Srrs rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; 1135171943Srrs} 1136171943Srrs 1137172090Srrs 1138172090Srrs/* 1139172090Srrs * Add an mfc entry 1140163953Srrs */ 1141163953Srrsstatic int 1142163953Srrsadd_mfc(struct mfcctl2 *mfccp) 1143185694Srrs{ 1144163953Srrs struct mfc *rt; 1145163953Srrs u_long hash; 1146163953Srrs struct rtdetq *rte; 1147172090Srrs u_short nstl; 1148163953Srrs 1149163953Srrs VIF_LOCK(); 1150163953Srrs MFC_LOCK(); 1151163953Srrs 1152163953Srrs rt = mfc_find(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 1153163953Srrs 1154163953Srrs /* If an entry already exists, just update the fields */ 1155163953Srrs if (rt) { 1156163953Srrs if (mrtdebug & DEBUG_MFC) 1157163953Srrs log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n", 1158163953Srrs (u_long)ntohl(mfccp->mfcc_origin.s_addr), 1159163953Srrs (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 1160163953Srrs mfccp->mfcc_parent); 1161163953Srrs 1162163953Srrs update_mfc_params(rt, mfccp); 1163163953Srrs MFC_UNLOCK(); 1164163953Srrs VIF_UNLOCK(); 1165163953Srrs return 0; 1166163953Srrs } 1167163953Srrs 1168163953Srrs /* 1169163953Srrs * Find the entry for which the upcall was made and update 1170163953Srrs */ 1171163953Srrs hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); 1172163953Srrs for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { 1173163953Srrs 1174163953Srrs if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 1175163953Srrs (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && 1176163953Srrs (rt->mfc_stall != NULL)) { 1177163953Srrs 1178163953Srrs if (nstl++) 1179163953Srrs log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", 1180163953Srrs "multiple kernel entries", 1181163953Srrs (u_long)ntohl(mfccp->mfcc_origin.s_addr), 1182166086Srrs (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 1183163953Srrs mfccp->mfcc_parent, (void *)rt->mfc_stall); 1184169420Srrs 1185169420Srrs if (mrtdebug & DEBUG_MFC) 1186163953Srrs log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", 1187163953Srrs (u_long)ntohl(mfccp->mfcc_origin.s_addr), 1188163953Srrs (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 1189163953Srrs mfccp->mfcc_parent, (void *)rt->mfc_stall); 1190163953Srrs 1191166086Srrs init_mfc_params(rt, mfccp); 1192163953Srrs 1193163953Srrs rt->mfc_expire = 0; /* Don't clean this guy up */ 1194163953Srrs nexpire[hash]--; 1195163953Srrs 1196163953Srrs /* free packets Qed at the end of this entry */ 1197169420Srrs for (rte = rt->mfc_stall; rte != NULL; ) { 1198169420Srrs struct rtdetq *n = rte->next; 1199163953Srrs 1200163953Srrs ip_mdq(rte->m, rte->ifp, rt, -1); 1201163953Srrs m_freem(rte->m); 1202163953Srrs free(rte, M_MRTABLE); 1203163953Srrs rte = n; 1204163953Srrs } 1205163953Srrs rt->mfc_stall = NULL; 1206163953Srrs } 1207163953Srrs } 1208163953Srrs 1209169352Srrs /* 1210169352Srrs * It is possible that an entry is being inserted without an upcall 1211170181Srrs */ 1212163953Srrs if (nstl == 0) { 1213163953Srrs if (mrtdebug & DEBUG_MFC) 1214163953Srrs log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", 1215163953Srrs hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr), 1216169420Srrs (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 1217169420Srrs mfccp->mfcc_parent); 1218169420Srrs 1219163953Srrs for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { 1220169420Srrs if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && 1221169420Srrs (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { 1222163953Srrs init_mfc_params(rt, mfccp); 1223163953Srrs if (rt->mfc_expire) 1224163953Srrs nexpire[hash]--; 1225163953Srrs rt->mfc_expire = 0; 1226163953Srrs break; /* XXX */ 1227163953Srrs } 1228179157Srrs } 1229168299Srrs if (rt == NULL) { /* no upcall, so make a new entry */ 1230163953Srrs rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1231163953Srrs if (rt == NULL) { 1232163953Srrs MFC_UNLOCK(); 1233163953Srrs VIF_UNLOCK(); 1234163953Srrs return ENOBUFS; 1235163953Srrs } 1236163953Srrs 1237163953Srrs init_mfc_params(rt, mfccp); 1238179157Srrs rt->mfc_expire = 0; 1239168299Srrs rt->mfc_stall = NULL; 1240163953Srrs 1241163953Srrs rt->mfc_bw_meter = NULL; 1242163953Srrs /* insert new entry at head of hash chain */ 1243163953Srrs rt->mfc_next = mfctable[hash]; 1244163953Srrs mfctable[hash] = rt; 1245163953Srrs } 1246179157Srrs } 1247168299Srrs MFC_UNLOCK(); 1248163953Srrs VIF_UNLOCK(); 1249163953Srrs return 0; 1250163953Srrs} 1251163953Srrs 1252163953Srrs/* 1253163953Srrs * Delete an mfc entry 1254179157Srrs */ 1255168299Srrsstatic int 1256163953Srrsdel_mfc(struct mfcctl2 *mfccp) 1257163953Srrs{ 1258163953Srrs struct in_addr origin; 1259163953Srrs struct in_addr mcastgrp; 1260163953Srrs struct mfc *rt; 1261163953Srrs struct mfc **nptr; 1262179157Srrs u_long hash; 1263168299Srrs struct bw_meter *list; 1264163953Srrs 1265163953Srrs origin = mfccp->mfcc_origin; 1266163953Srrs mcastgrp = mfccp->mfcc_mcastgrp; 1267171943Srrs 1268163953Srrs if (mrtdebug & DEBUG_MFC) 1269163953Srrs log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", 1270163953Srrs (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); 1271163953Srrs 1272163953Srrs MFC_LOCK(); 1273163953Srrs 1274163953Srrs hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); 1275163953Srrs for (nptr = &mfctable[hash]; (rt = *nptr) != NULL; nptr = &rt->mfc_next) 1276163953Srrs if (origin.s_addr == rt->mfc_origin.s_addr && 1277163953Srrs mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && 1278163953Srrs rt->mfc_stall == NULL) 1279163953Srrs break; 1280163953Srrs if (rt == NULL) { 1281163953Srrs MFC_UNLOCK(); 1282172090Srrs return EADDRNOTAVAIL; 1283163953Srrs } 1284169352Srrs 1285170181Srrs *nptr = rt->mfc_next; 1286163953Srrs 1287163953Srrs /* 1288163953Srrs * free the bw_meter entries 1289163953Srrs */ 1290169420Srrs list = rt->mfc_bw_meter; 1291171943Srrs rt->mfc_bw_meter = NULL; 1292163953Srrs 1293163953Srrs free(rt, M_MRTABLE); 1294179783Srrs 1295171943Srrs free_bw_list(list); 1296171943Srrs 1297171943Srrs MFC_UNLOCK(); 1298171943Srrs 1299171943Srrs return 0; 1300171943Srrs} 1301163953Srrs 1302169378Srrs/* 1303163953Srrs * Send a message to mrouted on the multicast routing socket 1304163953Srrs */ 1305165220Srrsstatic int 1306163953Srrssocket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) 1307163953Srrs{ 1308163953Srrs if (s) { 1309163953Srrs SOCKBUF_LOCK(&s->so_rcv); 1310163953Srrs if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm, 1311163953Srrs NULL) != 0) { 1312163953Srrs sorwakeup_locked(s); 1313163953Srrs return 0; 1314163953Srrs } 1315163953Srrs SOCKBUF_UNLOCK(&s->so_rcv); 1316163953Srrs } 1317163953Srrs m_freem(mm); 1318163953Srrs return -1; 1319163953Srrs} 1320163953Srrs 1321163953Srrs/* 1322163953Srrs * IP multicast forwarding function. This function assumes that the packet 1323163953Srrs * pointed to by "ip" has arrived on (or is about to be sent to) the interface 1324163953Srrs * pointed to by "ifp", and the packet is to be relayed to other networks 1325163953Srrs * that have members of the packet's destination IP multicast group. 1326163953Srrs * 1327163953Srrs * The packet is returned unscathed to the caller, unless it is 1328163953Srrs * erroneous, in which case a non-zero return value tells the caller to 1329163953Srrs * discard it. 1330169420Srrs */ 1331163953Srrs 1332163953Srrs#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 1333163953Srrs 1334185694Srrsstatic int 1335185694SrrsX_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, 1336185694Srrs struct ip_moptions *imo) 1337185694Srrs{ 1338185694Srrs struct mfc *rt; 1339185694Srrs int error; 1340185694Srrs vifi_t vifi; 1341163953Srrs 1342185694Srrs if (mrtdebug & DEBUG_FORWARD) 1343163953Srrs log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n", 1344163953Srrs (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), 1345163953Srrs (void *)ifp); 1346163953Srrs 1347163953Srrs if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 || 1348163953Srrs ((u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 1349163953Srrs /* 1350163953Srrs * Packet arrived via a physical interface or 1351163953Srrs * an encapsulated tunnel or a register_vif. 1352185694Srrs */ 1353169352Srrs } else { 1354185694Srrs /* 1355163953Srrs * Packet arrived through a source-route tunnel. 1356163953Srrs * Source-route tunnels are no longer supported. 1357163953Srrs */ 1358163953Srrs static int last_log; 1359185694Srrs if (last_log != time_second) { 1360185694Srrs last_log = time_second; 1361185694Srrs log(LOG_ERR, 1362163953Srrs "ip_mforward: received source-routed packet from %lx\n", 1363165220Srrs (u_long)ntohl(ip->ip_src.s_addr)); 1364163953Srrs } 1365164205Srrs return 1; 1366170140Srrs } 1367163953Srrs 1368185694Srrs VIF_LOCK(); 1369163953Srrs MFC_LOCK(); 1370163953Srrs if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) { 1371166675Srrs if (ip->ip_ttl < 255) 1372166023Srrs ip->ip_ttl++; /* compensate for -1 in *_send routines */ 1373166023Srrs if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1374166023Srrs struct vif *vifp = viftable + vifi; 1375166023Srrs 1376166023Srrs printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s)\n", 1377166023Srrs (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr), 1378163953Srrs vifi, 1379163953Srrs (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", 1380163953Srrs vifp->v_ifp->if_xname); 1381163953Srrs } 1382165647Srrs error = ip_mdq(m, ifp, NULL, vifi); 1383163953Srrs MFC_UNLOCK(); 1384163953Srrs VIF_UNLOCK(); 1385163953Srrs return error; 1386163953Srrs } 1387163953Srrs if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1388178251Srrs printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n", 1389165647Srrs (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr)); 1390178251Srrs if (!imo) 1391178251Srrs printf("In fact, no options were specified at all\n"); 1392178251Srrs } 1393165647Srrs 1394165647Srrs /* 1395163953Srrs * Don't forward a packet with time-to-live of zero or one, 1396165647Srrs * or a packet destined to a local-only group. 1397163953Srrs */ 1398163953Srrs if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) { 1399163953Srrs MFC_UNLOCK(); 1400169352Srrs VIF_UNLOCK(); 1401179157Srrs return 0; 1402166023Srrs } 1403166023Srrs 1404163953Srrs /* 1405163953Srrs * Determine forwarding vifs from the forwarding cache table 1406163953Srrs */ 1407163953Srrs ++mrtstat.mrts_mfc_lookups; 1408163953Srrs rt = mfc_find(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1409163953Srrs 1410163953Srrs /* Entry exists, so forward if necessary */ 1411163953Srrs if (rt != NULL) { 1412163953Srrs error = ip_mdq(m, ifp, rt, -1); 1413163953Srrs MFC_UNLOCK(); 1414163953Srrs VIF_UNLOCK(); 1415163953Srrs return error; 1416163953Srrs } else { 1417163953Srrs /* 1418163953Srrs * If we don't have a route for packet's origin, 1419163953Srrs * Make a copy of the packet & send message to routing daemon 1420163953Srrs */ 1421163953Srrs 1422163953Srrs struct mbuf *mb0; 1423163953Srrs struct rtdetq *rte; 1424163953Srrs u_long hash; 1425163953Srrs int hlen = ip->ip_hl << 2; 1426163953Srrs 1427163953Srrs ++mrtstat.mrts_mfc_misses; 1428163953Srrs 1429163953Srrs mrtstat.mrts_no_route++; 1430163953Srrs if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) 1431163953Srrs log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n", 1432163953Srrs (u_long)ntohl(ip->ip_src.s_addr), 1433163953Srrs (u_long)ntohl(ip->ip_dst.s_addr)); 1434163953Srrs 1435163953Srrs /* 1436163953Srrs * Allocate mbufs early so that we don't do extra work if we are 1437163953Srrs * just going to fail anyway. Make sure to pullup the header so 1438163953Srrs * that other people can't step on it. 1439163953Srrs */ 1440163953Srrs rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT); 1441163953Srrs if (rte == NULL) { 1442163953Srrs MFC_UNLOCK(); 1443163953Srrs VIF_UNLOCK(); 1444163953Srrs return ENOBUFS; 1445163953Srrs } 1446166023Srrs mb0 = m_copypacket(m, M_DONTWAIT); 1447172091Srrs if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) 1448172091Srrs mb0 = m_pullup(mb0, hlen); 1449172091Srrs if (mb0 == NULL) { 1450172091Srrs free(rte, M_MRTABLE); 1451172091Srrs MFC_UNLOCK(); 1452172091Srrs VIF_UNLOCK(); 1453172091Srrs return ENOBUFS; 1454172091Srrs } 1455172091Srrs 1456172091Srrs /* is there an upcall waiting for this flow ? */ 1457172091Srrs hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); 1458172091Srrs for (rt = mfctable[hash]; rt; rt = rt->mfc_next) { 1459172091Srrs if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && 1460172091Srrs (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && 1461172091Srrs (rt->mfc_stall != NULL)) 1462172091Srrs break; 1463172091Srrs } 1464172091Srrs 1465166023Srrs if (rt == NULL) { 1466163953Srrs int i; 1467163953Srrs struct igmpmsg *im; 1468163953Srrs struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1469166023Srrs struct mbuf *mm; 1470163953Srrs 1471169352Srrs /* 1472166023Srrs * Locate the vifi for the incoming interface for this packet. 1473166023Srrs * If none found, drop packet. 1474166023Srrs */ 1475163953Srrs for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++) 1476163953Srrs ; 1477163953Srrs if (vifi >= numvifs) /* vif not found, drop packet */ 1478163953Srrs goto non_fatal; 1479166023Srrs 1480166023Srrs /* no upcall, so make a new entry */ 1481163953Srrs rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1482163953Srrs if (rt == NULL) 1483163953Srrs goto fail; 1484163953Srrs /* Make a copy of the header to send to the user level process */ 1485171440Srrs mm = m_copy(mb0, 0, hlen); 1486171440Srrs if (mm == NULL) 1487163953Srrs goto fail1; 1488166675Srrs 1489166675Srrs /* 1490166675Srrs * Send message to routing daemon to install 1491166675Srrs * a route into the kernel table 1492171943Srrs */ 1493171943Srrs 1494163953Srrs im = mtod(mm, struct igmpmsg *); 1495163953Srrs im->im_msgtype = IGMPMSG_NOCACHE; 1496163953Srrs im->im_mbz = 0; 1497163953Srrs im->im_vif = vifi; 1498166675Srrs 1499165220Srrs mrtstat.mrts_upcalls++; 1500163953Srrs 1501163953Srrs k_igmpsrc.sin_addr = ip->ip_src; 1502163953Srrs if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { 1503163953Srrs log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); 1504172090Srrs ++mrtstat.mrts_upq_sockfull; 1505172090Srrsfail1: 1506172090Srrs free(rt, M_MRTABLE); 1507172090Srrsfail: 1508163953Srrs free(rte, M_MRTABLE); 1509163953Srrs m_freem(mb0); 1510163953Srrs MFC_UNLOCK(); 1511163953Srrs VIF_UNLOCK(); 1512163953Srrs return ENOBUFS; 1513163953Srrs } 1514163953Srrs 1515163953Srrs /* insert new entry at head of hash chain */ 1516172090Srrs rt->mfc_origin.s_addr = ip->ip_src.s_addr; 1517172090Srrs rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 1518172090Srrs rt->mfc_expire = UPCALL_EXPIRE; 1519172090Srrs nexpire[hash]++; 1520172090Srrs for (i = 0; i < numvifs; i++) { 1521172090Srrs rt->mfc_ttls[i] = 0; 1522172090Srrs rt->mfc_flags[i] = 0; 1523172090Srrs } 1524172090Srrs rt->mfc_parent = -1; 1525172090Srrs 1526172090Srrs rt->mfc_rp.s_addr = INADDR_ANY; /* clear the RP address */ 1527172090Srrs 1528172090Srrs rt->mfc_bw_meter = NULL; 1529172090Srrs 1530172090Srrs /* link into table */ 1531172090Srrs rt->mfc_next = mfctable[hash]; 1532163953Srrs mfctable[hash] = rt; 1533163953Srrs rt->mfc_stall = rte; 1534163953Srrs 1535163953Srrs } else { 1536163953Srrs /* determine if q has overflowed */ 1537163953Srrs int npkts = 0; 1538163953Srrs struct rtdetq **p; 1539163953Srrs 1540170642Srrs /* 1541171477Srrs * XXX ouch! we need to append to the list, but we 1542163953Srrs * only have a pointer to the front, so we have to 1543163953Srrs * scan the entire list every time. 1544163953Srrs */ 1545163953Srrs for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) 1546163953Srrs npkts++; 1547163953Srrs 1548163953Srrs if (npkts > MAX_UPQ) { 1549163953Srrs mrtstat.mrts_upq_ovflw++; 1550163953Srrsnon_fatal: 1551163953Srrs free(rte, M_MRTABLE); 1552163953Srrs m_freem(mb0); 1553163953Srrs MFC_UNLOCK(); 1554163953Srrs VIF_UNLOCK(); 1555163953Srrs return 0; 1556165220Srrs } 1557163953Srrs 1558163953Srrs /* Add this entry to the end of the queue */ 1559163953Srrs *p = rte; 1560163953Srrs } 1561163953Srrs 1562163953Srrs rte->m = mb0; 1563163953Srrs rte->ifp = ifp; 1564163953Srrs rte->next = NULL; 1565166023Srrs 1566166023Srrs MFC_UNLOCK(); 1567163953Srrs VIF_UNLOCK(); 1568163953Srrs 1569163953Srrs return 0; 1570165220Srrs } 1571163953Srrs} 1572166023Srrs 1573166023Srrs/* 1574163953Srrs * Clean up the cache entry if upcall is not serviced 1575169352Srrs */ 1576163953Srrsstatic void 1577163953Srrsexpire_upcalls(void *unused) 1578163953Srrs{ 1579163953Srrs struct rtdetq *rte; 1580163953Srrs struct mfc *mfc, **nptr; 1581163953Srrs int i; 1582163953Srrs 1583166023Srrs MFC_LOCK(); 1584166023Srrs for (i = 0; i < MFCTBLSIZ; i++) { 1585163953Srrs if (nexpire[i] == 0) 1586163953Srrs continue; 1587185694Srrs nptr = &mfctable[i]; 1588185694Srrs for (mfc = *nptr; mfc != NULL; mfc = *nptr) { 1589185694Srrs /* 1590185694Srrs * Skip real cache entries 1591185694Srrs * Make sure it wasn't marked to not expire (shouldn't happen) 1592185694Srrs * If it expires now 1593185694Srrs */ 1594185694Srrs if (mfc->mfc_stall != NULL && mfc->mfc_expire != 0 && 1595185694Srrs --mfc->mfc_expire == 0) { 1596163953Srrs if (mrtdebug & DEBUG_EXPIRE) 1597185694Srrs log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", 1598185694Srrs (u_long)ntohl(mfc->mfc_origin.s_addr), 1599185694Srrs (u_long)ntohl(mfc->mfc_mcastgrp.s_addr)); 1600185694Srrs /* 1601185694Srrs * drop all the packets 1602185694Srrs * free the mbuf with the pkt, if, timing info 1603185694Srrs */ 1604185694Srrs for (rte = mfc->mfc_stall; rte; ) { 1605185694Srrs struct rtdetq *n = rte->next; 1606185694Srrs 1607185694Srrs m_freem(rte->m); 1608185694Srrs free(rte, M_MRTABLE); 1609185694Srrs rte = n; 1610185694Srrs } 1611185694Srrs ++mrtstat.mrts_cache_cleanups; 1612185694Srrs nexpire[i]--; 1613185694Srrs 1614185694Srrs /* 1615185694Srrs * free the bw_meter entries 1616185694Srrs */ 1617185694Srrs while (mfc->mfc_bw_meter != NULL) { 1618185694Srrs struct bw_meter *x = mfc->mfc_bw_meter; 1619185694Srrs 1620185694Srrs mfc->mfc_bw_meter = x->bm_mfc_next; 1621185694Srrs free(x, M_BWMETER); 1622185694Srrs } 1623185694Srrs 1624185694Srrs *nptr = mfc->mfc_next; 1625185694Srrs free(mfc, M_MRTABLE); 1626185694Srrs } else { 1627185694Srrs nptr = &mfc->mfc_next; 1628185694Srrs } 1629185694Srrs } 1630185694Srrs } 1631185694Srrs MFC_UNLOCK(); 1632163953Srrs 1633163953Srrs callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL); 1634163953Srrs} 1635166023Srrs 1636166023Srrs/* 1637166023Srrs * Packet forwarding routine once entry in the cache is made 1638166023Srrs */ 1639166023Srrsstatic int 1640166023Srrsip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) 1641166023Srrs{ 1642166023Srrs struct ip *ip = mtod(m, struct ip *); 1643166023Srrs vifi_t vifi; 1644166023Srrs int plen = ip->ip_len; 1645166023Srrs 1646166023Srrs VIF_LOCK_ASSERT(); 1647166023Srrs/* 1648166023Srrs * Macro to send packet on vif. Since RSVP packets don't get counted on 1649166023Srrs * input, they shouldn't get counted on output, so statistics keeping is 1650166023Srrs * separate. 1651166023Srrs */ 1652166023Srrs#define MC_SEND(ip,vifp,m) { \ 1653166023Srrs if ((vifp)->v_flags & VIFF_TUNNEL) \ 1654166023Srrs encap_send((ip), (vifp), (m)); \ 1655166023Srrs else \ 1656166023Srrs phyint_send((ip), (vifp), (m)); \ 1657166023Srrs} 1658166023Srrs 1659166023Srrs /* 1660171440Srrs * If xmt_vif is not -1, send on only the requested vif. 1661163953Srrs * 1662163953Srrs * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) 1663163953Srrs */ 1664163953Srrs if (xmt_vif < numvifs) { 1665163953Srrs#ifdef PIM 1666163953Srrs if (viftable[xmt_vif].v_flags & VIFF_REGISTER) 1667163953Srrs pim_register_send(ip, viftable + xmt_vif, m, rt); 1668163953Srrs else 1669163953Srrs#endif 1670163953Srrs MC_SEND(ip, viftable + xmt_vif, m); 1671163953Srrs return 1; 1672163953Srrs } 1673164205Srrs 1674164205Srrs /* 1675164205Srrs * Don't forward if it didn't arrive from the parent vif for its origin. 1676163953Srrs */ 1677164205Srrs vifi = rt->mfc_parent; 1678164205Srrs if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { 1679164205Srrs /* came in the wrong interface */ 1680164205Srrs if (mrtdebug & DEBUG_FORWARD) 1681164205Srrs log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", 1682164205Srrs (void *)ifp, vifi, (void *)viftable[vifi].v_ifp); 1683164205Srrs ++mrtstat.mrts_wrong_if; 1684164205Srrs ++rt->mfc_wrong_if; 1685164205Srrs /* 1686164205Srrs * If we are doing PIM assert processing, send a message 1687163953Srrs * to the routing daemon. 1688164205Srrs * 1689164205Srrs * XXX: A PIM-SM router needs the WRONGVIF detection so it 1690164205Srrs * can complete the SPT switch, regardless of the type 1691171440Srrs * of the iif (broadcast media, GRE tunnel, etc). 1692171440Srrs */ 1693168124Srrs if (pim_assert && (vifi < numvifs) && viftable[vifi].v_ifp) { 1694164205Srrs struct timeval now; 1695164205Srrs u_long delta; 1696164205Srrs 1697164205Srrs#ifdef PIM 1698164205Srrs if (ifp == &multicast_register_if) 1699163953Srrs pimstat.pims_rcv_registers_wrongiif++; 1700163953Srrs#endif 1701163953Srrs 1702166023Srrs /* Get vifi for the incoming packet */ 1703166023Srrs for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++) 1704163953Srrs ; 1705163953Srrs if (vifi >= numvifs) 1706163953Srrs return 0; /* The iif is not found: ignore the packet. */ 1707163953Srrs 1708163953Srrs if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF) 1709166023Srrs return 0; /* WRONGVIF disabled: ignore the packet */ 1710166023Srrs 1711163953Srrs GET_TIME(now); 1712163953Srrs 1713163953Srrs TV_DELTA(rt->mfc_last_assert, now, delta); 1714163953Srrs 1715163953Srrs if (delta > ASSERT_MSG_TIME) { 1716163953Srrs struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1717163953Srrs struct igmpmsg *im; 1718163953Srrs int hlen = ip->ip_hl << 2; 1719163953Srrs struct mbuf *mm = m_copy(m, 0, hlen); 1720172090Srrs 1721172090Srrs if (mm && (M_HASCL(mm) || mm->m_len < hlen)) 1722172090Srrs mm = m_pullup(mm, hlen); 1723172090Srrs if (mm == NULL) 1724163953Srrs return ENOBUFS; 1725163953Srrs 1726172090Srrs rt->mfc_last_assert = now; 1727172090Srrs 1728172090Srrs im = mtod(mm, struct igmpmsg *); 1729172090Srrs im->im_msgtype = IGMPMSG_WRONGVIF; 1730172090Srrs im->im_mbz = 0; 1731172090Srrs im->im_vif = vifi; 1732172090Srrs 1733172090Srrs mrtstat.mrts_upcalls++; 1734172090Srrs 1735172090Srrs k_igmpsrc.sin_addr = im->im_src; 1736172090Srrs if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { 1737172090Srrs log(LOG_WARNING, 1738172090Srrs "ip_mforward: ip_mrouter socket queue full\n"); 1739172090Srrs ++mrtstat.mrts_upq_sockfull; 1740172090Srrs return ENOBUFS; 1741172090Srrs } 1742163953Srrs } 1743166675Srrs } 1744166675Srrs return 0; 1745166675Srrs } 1746166675Srrs 1747166675Srrs /* If I sourced this packet, it counts as output, else it was input. */ 1748166675Srrs if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { 1749166675Srrs viftable[vifi].v_pkt_out++; 1750166675Srrs viftable[vifi].v_bytes_out += plen; 1751166675Srrs } else { 1752163953Srrs viftable[vifi].v_pkt_in++; 1753171943Srrs viftable[vifi].v_bytes_in += plen; 1754163953Srrs } 1755163953Srrs rt->mfc_pkt_cnt++; 1756163953Srrs rt->mfc_byte_cnt += plen; 1757163953Srrs 1758163953Srrs /* 1759165220Srrs * For each vif, decide if a copy of the packet should be forwarded. 1760163953Srrs * Forward if: 1761164205Srrs * - the ttl exceeds the vif's threshold 1762164205Srrs * - there are group members downstream on interface 1763164205Srrs */ 1764164205Srrs for (vifi = 0; vifi < numvifs; vifi++) 1765164205Srrs if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1766164205Srrs viftable[vifi].v_pkt_out++; 1767164205Srrs viftable[vifi].v_bytes_out += plen; 1768164205Srrs#ifdef PIM 1769172090Srrs if (viftable[vifi].v_flags & VIFF_REGISTER) 1770164205Srrs pim_register_send(ip, viftable + vifi, m, rt); 1771166023Srrs else 1772166023Srrs#endif 1773166023Srrs MC_SEND(ip, viftable+vifi, m); 1774163953Srrs } 1775163953Srrs 1776163953Srrs /* 1777163953Srrs * Perform upcall-related bw measuring. 1778163953Srrs */ 1779163953Srrs if (rt->mfc_bw_meter != NULL) { 1780163953Srrs struct bw_meter *x; 1781163953Srrs struct timeval now; 1782163953Srrs 1783185694Srrs GET_TIME(now); 1784185694Srrs MFC_LOCK_ASSERT(); 1785185694Srrs for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) 1786185694Srrs bw_meter_receive_packet(x, plen, &now); 1787185694Srrs } 1788185694Srrs 1789185694Srrs return 0; 1790185694Srrs} 1791185694Srrs 1792185694Srrs/* 1793185694Srrs * check if a vif number is legal/ok. This is used by ip_output. 1794163953Srrs */ 1795163953Srrsstatic int 1796163953SrrsX_legal_vif_num(int vif) 1797165220Srrs{ 1798166023Srrs /* XXX unlocked, matter? */ 1799166023Srrs return (vif >= 0 && vif < numvifs); 1800171440Srrs} 1801171440Srrs 1802165647Srrs/* 1803163953Srrs * Return the local address used by this vif 1804163953Srrs */ 1805163953Srrsstatic u_long 1806165220SrrsX_ip_mcast_src(int vifi) 1807166675Srrs{ 1808166675Srrs /* XXX unlocked, matter? */ 1809166675Srrs if (vifi >= 0 && vifi < numvifs) 1810166675Srrs return viftable[vifi].v_lcl_addr.s_addr; 1811166675Srrs else 1812166675Srrs return INADDR_ANY; 1813166675Srrs} 1814166675Srrs 1815166675Srrsstatic void 1816166675Srrsphyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1817165647Srrs{ 1818171943Srrs struct mbuf *mb_copy; 1819165647Srrs int hlen = ip->ip_hl << 2; 1820165647Srrs 1821165647Srrs VIF_LOCK_ASSERT(); 1822165647Srrs 1823165647Srrs /* 1824171943Srrs * Make a new reference to the packet; make sure that 1825165647Srrs * the IP header is actually copied, not just referenced, 1826165647Srrs * so that ip_output() only scribbles on the copy. 1827165647Srrs */ 1828165647Srrs mb_copy = m_copypacket(m, M_DONTWAIT); 1829165647Srrs if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) 1830179157Srrs mb_copy = m_pullup(mb_copy, hlen); 1831165647Srrs if (mb_copy == NULL) 1832165647Srrs return; 1833165647Srrs 1834165647Srrs if (vifp->v_rate_limit == 0) 1835165647Srrs tbf_send_packet(vifp, mb_copy); 1836165647Srrs else 1837165647Srrs tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); 1838170138Srrs} 1839165647Srrs 1840165647Srrsstatic void 1841170138Srrsencap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 1842185694Srrs{ 1843185694Srrs struct mbuf *mb_copy; 1844185694Srrs struct ip *ip_copy; 1845185694Srrs int i, len = ip->ip_len; 1846185694Srrs 1847185694Srrs VIF_LOCK_ASSERT(); 1848165220Srrs 1849165220Srrs /* Take care of delayed checksums */ 1850165220Srrs if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1851165220Srrs in_delayed_cksum(m); 1852165220Srrs m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1853163953Srrs } 1854164205Srrs 1855165220Srrs /* 1856172090Srrs * copy the old packet & pullup its IP header into the 1857165220Srrs * new mbuf so we can modify it. Try to fill the new 1858165220Srrs * mbuf since if we don't the ethernet driver will. 1859165220Srrs */ 1860165220Srrs MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); 1861165220Srrs if (mb_copy == NULL) 1862163953Srrs return; 1863163953Srrs#ifdef MAC 1864163953Srrs mac_create_mbuf_multicast_encap(m, vifp->v_ifp, mb_copy); 1865163953Srrs#endif 1866163953Srrs mb_copy->m_data += max_linkhdr; 1867163953Srrs mb_copy->m_len = sizeof(multicast_encap_iphdr); 1868163953Srrs 1869179783Srrs if ((mb_copy->m_next = m_copypacket(m, M_DONTWAIT)) == NULL) { 1870179783Srrs m_freem(mb_copy); 1871163953Srrs return; 1872163953Srrs } 1873163953Srrs i = MHLEN - M_LEADINGSPACE(mb_copy); 1874163953Srrs if (i > len) 1875163953Srrs i = len; 1876163953Srrs mb_copy = m_pullup(mb_copy, i); 1877163953Srrs if (mb_copy == NULL) 1878164205Srrs return; 1879165220Srrs mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); 1880165220Srrs 1881165220Srrs /* 1882163953Srrs * fill in the encapsulating IP header. 1883163953Srrs */ 1884166023Srrs ip_copy = mtod(mb_copy, struct ip *); 1885166023Srrs *ip_copy = multicast_encap_iphdr; 1886166023Srrs ip_copy->ip_id = ip_newid(); 1887163953Srrs ip_copy->ip_len += len; 1888163953Srrs ip_copy->ip_src = vifp->v_lcl_addr; 1889163953Srrs ip_copy->ip_dst = vifp->v_rmt_addr; 1890163953Srrs 1891163953Srrs /* 1892163953Srrs * turn the encapsulated IP header back into a valid one. 1893163953Srrs */ 1894163953Srrs ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1895163953Srrs --ip->ip_ttl; 1896163953Srrs ip->ip_len = htons(ip->ip_len); 1897163953Srrs ip->ip_off = htons(ip->ip_off); 1898166023Srrs ip->ip_sum = 0; 1899166023Srrs mb_copy->m_data += sizeof(multicast_encap_iphdr); 1900166023Srrs ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1901163953Srrs mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1902163953Srrs 1903163953Srrs if (vifp->v_rate_limit == 0) 1904163953Srrs tbf_send_packet(vifp, mb_copy); 1905165220Srrs else 1906163953Srrs tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); 1907166023Srrs} 1908166023Srrs 1909163953Srrs/* 1910163953Srrs * Token bucket filter module 1911163953Srrs */ 1912166023Srrs 1913166023Srrsstatic void 1914163953Srrstbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, u_long p_len) 1915163953Srrs{ 1916163953Srrs struct tbf *t = vifp->v_tbf; 1917163953Srrs 1918166086Srrs VIF_LOCK_ASSERT(); 1919163953Srrs 1920163953Srrs if (p_len > MAX_BKT_SIZE) { /* drop if packet is too large */ 1921163953Srrs mrtstat.mrts_pkt2large++; 1922163953Srrs m_freem(m); 1923163953Srrs return; 1924163953Srrs } 1925163953Srrs 1926185694Srrs tbf_update_tokens(vifp); 1927163953Srrs 1928163953Srrs if (t->tbf_q_len == 0) { /* queue empty... */ 1929163953Srrs if (p_len <= t->tbf_n_tok) { /* send packet if enough tokens */ 1930163953Srrs t->tbf_n_tok -= p_len; 1931169352Srrs tbf_send_packet(vifp, m); 1932179157Srrs } else { /* no, queue packet and try later */ 1933163953Srrs tbf_queue(vifp, m); 1934163953Srrs callout_reset(&tbf_reprocess_ch, TBF_REPROCESS, 1935163953Srrs tbf_reprocess_q, vifp); 1936163953Srrs } 1937163953Srrs } else if (t->tbf_q_len < t->tbf_max_q_len) { 1938163953Srrs /* finite queue length, so queue pkts and process queue */ 1939163953Srrs tbf_queue(vifp, m); 1940163953Srrs tbf_process_q(vifp); 1941163953Srrs } else { 1942163953Srrs /* queue full, try to dq and queue and process */ 1943163953Srrs if (!tbf_dq_sel(vifp, ip)) { 1944163953Srrs mrtstat.mrts_q_overflow++; 1945163953Srrs m_freem(m); 1946163953Srrs } else { 1947166675Srrs tbf_queue(vifp, m); 1948163953Srrs tbf_process_q(vifp); 1949172090Srrs } 1950172090Srrs } 1951172090Srrs} 1952172090Srrs 1953172090Srrs/* 1954172090Srrs * adds a packet to the queue at the interface 1955163953Srrs */ 1956163953Srrsstatic void 1957163953Srrstbf_queue(struct vif *vifp, struct mbuf *m) 1958163953Srrs{ 1959163953Srrs struct tbf *t = vifp->v_tbf; 1960163953Srrs 1961163953Srrs VIF_LOCK_ASSERT(); 1962163953Srrs 1963163953Srrs if (t->tbf_t == NULL) /* Queue was empty */ 1964163953Srrs t->tbf_q = m; 1965163953Srrs else /* Insert at tail */ 1966169420Srrs t->tbf_t->m_act = m; 1967169420Srrs 1968163953Srrs t->tbf_t = m; /* Set new tail pointer */ 1969163953Srrs 1970163953Srrs#ifdef DIAGNOSTIC 1971163953Srrs /* Make sure we didn't get fed a bogus mbuf */ 1972169420Srrs if (m->m_act) 1973163953Srrs panic("tbf_queue: m_act"); 1974163953Srrs#endif 1975163953Srrs m->m_act = NULL; 1976163953Srrs 1977163953Srrs t->tbf_q_len++; 1978163953Srrs} 1979163953Srrs 1980163953Srrs/* 1981163953Srrs * processes the queue at the interface 1982163953Srrs */ 1983163953Srrsstatic void 1984163953Srrstbf_process_q(struct vif *vifp) 1985169420Srrs{ 1986163953Srrs struct tbf *t = vifp->v_tbf; 1987163953Srrs 1988163953Srrs VIF_LOCK_ASSERT(); 1989163953Srrs 1990163953Srrs /* loop through the queue at the interface and send as many packets 1991163953Srrs * as possible 1992163953Srrs */ 1993163953Srrs while (t->tbf_q_len > 0) { 1994163953Srrs struct mbuf *m = t->tbf_q; 1995163953Srrs int len = mtod(m, struct ip *)->ip_len; 1996163953Srrs 1997163953Srrs /* determine if the packet can be sent */ 1998163953Srrs if (len > t->tbf_n_tok) /* not enough tokens, we are done */ 1999163953Srrs break; 2000163953Srrs /* ok, reduce no of tokens, dequeue and send the packet. */ 2001163953Srrs t->tbf_n_tok -= len; 2002163953Srrs 2003163953Srrs t->tbf_q = m->m_act; 2004171531Srrs if (--t->tbf_q_len == 0) 2005171531Srrs t->tbf_t = NULL; 2006171531Srrs 2007171531Srrs m->m_act = NULL; 2008171531Srrs tbf_send_packet(vifp, m); 2009171531Srrs } 2010171531Srrs} 2011163953Srrs 2012171531Srrsstatic void 2013171531Srrstbf_reprocess_q(void *xvifp) 2014171531Srrs{ 2015163953Srrs struct vif *vifp = xvifp; 2016163953Srrs 2017163953Srrs if (ip_mrouter == NULL) 2018163953Srrs return; 2019169420Srrs VIF_LOCK(); 2020169420Srrs tbf_update_tokens(vifp); 2021163953Srrs tbf_process_q(vifp); 2022170140Srrs if (vifp->v_tbf->tbf_q_len) 2023163953Srrs callout_reset(&tbf_reprocess_ch, TBF_REPROCESS, tbf_reprocess_q, vifp); 2024179157Srrs VIF_UNLOCK(); 2025163953Srrs} 2026163953Srrs 2027163953Srrs/* function that will selectively discard a member of the queue 2028169420Srrs * based on the precedence value and the priority 2029169420Srrs */ 2030169420Srrsstatic int 2031163953Srrstbf_dq_sel(struct vif *vifp, struct ip *ip) 2032163953Srrs{ 2033163953Srrs u_int p; 2034163953Srrs struct mbuf *m, *last; 2035163953Srrs struct mbuf **np; 2036163953Srrs struct tbf *t = vifp->v_tbf; 2037163953Srrs 2038163953Srrs VIF_LOCK_ASSERT(); 2039163953Srrs 2040163953Srrs p = priority(vifp, ip); 2041163953Srrs 2042163953Srrs np = &t->tbf_q; 2043163953Srrs last = NULL; 2044163953Srrs while ((m = *np) != NULL) { 2045163953Srrs if (p > priority(vifp, mtod(m, struct ip *))) { 2046163953Srrs *np = m->m_act; 2047170140Srrs /* If we're removing the last packet, fix the tail pointer */ 2048163953Srrs if (m == t->tbf_t) 2049163953Srrs t->tbf_t = last; 2050179157Srrs m_freem(m); 2051172090Srrs /* It's impossible for the queue to be empty, but check anyways. */ 2052172090Srrs if (--t->tbf_q_len == 0) 2053172090Srrs t->tbf_t = NULL; 2054172090Srrs mrtstat.mrts_drop_sel++; 2055172090Srrs return 1; 2056171943Srrs } 2057171440Srrs np = &m->m_act; 2058172090Srrs last = m; 2059172090Srrs } 2060172090Srrs return 0; 2061172090Srrs} 2062163953Srrs 2063163953Srrsstatic void 2064163953Srrstbf_send_packet(struct vif *vifp, struct mbuf *m) 2065163953Srrs{ 2066185694Srrs VIF_LOCK_ASSERT(); 2067163953Srrs 2068163953Srrs if (vifp->v_flags & VIFF_TUNNEL) /* If tunnel options */ 2069163953Srrs ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, NULL, NULL); 2070163953Srrs else { 2071179157Srrs struct ip_moptions imo; 2072163953Srrs int error; 2073163953Srrs static struct route ro; /* XXX check this */ 2074163953Srrs 2075163953Srrs imo.imo_multicast_ifp = vifp->v_ifp; 2076163953Srrs imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; 2077163953Srrs imo.imo_multicast_loop = 1; 2078163953Srrs imo.imo_multicast_vif = -1; 2079169420Srrs 2080169420Srrs /* 2081169420Srrs * Re-entrancy should not be a problem here, because 2082169420Srrs * the packets that we send out and are looped back at us 2083163953Srrs * should get rejected because they appear to come from 2084170140Srrs * the loopback interface, thus preventing looping. 2085172090Srrs */ 2086172090Srrs error = ip_output(m, NULL, &ro, IP_FORWARDING, &imo, NULL); 2087172090Srrs 2088172090Srrs if (mrtdebug & DEBUG_XMIT) 2089172090Srrs log(LOG_DEBUG, "phyint_send on vif %d err %d\n", 2090171943Srrs (int)(vifp - viftable), error); 2091172090Srrs } 2092172090Srrs} 2093172090Srrs 2094172090Srrs/* determine the current time and then 2095163953Srrs * the elapsed time (between the last time and time now) 2096163953Srrs * in milliseconds & update the no. of tokens in the bucket 2097163953Srrs */ 2098163953Srrsstatic void 2099163953Srrstbf_update_tokens(struct vif *vifp) 2100163953Srrs{ 2101170140Srrs struct timeval tp; 2102172090Srrs u_long tm; 2103172090Srrs struct tbf *t = vifp->v_tbf; 2104172090Srrs 2105172090Srrs VIF_LOCK_ASSERT(); 2106172090Srrs 2107171943Srrs GET_TIME(tp); 2108172090Srrs 2109172090Srrs TV_DELTA(tp, t->tbf_last_pkt_t, tm); 2110172090Srrs 2111172090Srrs /* 2112163953Srrs * This formula is actually 2113163953Srrs * "time in seconds" * "bytes/second". 2114163953Srrs * 2115163953Srrs * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) 2116163953Srrs * 2117163953Srrs * The (1000/1024) was introduced in add_vif to optimize 2118163953Srrs * this divide into a shift. 2119163953Srrs */ 2120163953Srrs t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8; 2121163953Srrs t->tbf_last_pkt_t = tp; 2122163953Srrs 2123163953Srrs if (t->tbf_n_tok > MAX_BKT_SIZE) 2124163953Srrs t->tbf_n_tok = MAX_BKT_SIZE; 2125166675Srrs} 2126169420Srrs 2127163953Srrsstatic int 2128169420Srrspriority(struct vif *vifp, struct ip *ip) 2129169420Srrs{ 2130171440Srrs int prio = 50; /* the lowest priority -- default case */ 2131172090Srrs 2132172090Srrs /* temporary hack; may add general packet classifier some day */ 2133172090Srrs 2134172090Srrs /* 2135172090Srrs * The UDP port space is divided up into four priority ranges: 2136171943Srrs * [0, 16384) : unclassified - lowest priority 2137172090Srrs * [16384, 32768) : audio - highest priority 2138172090Srrs * [32768, 49152) : whiteboard - medium priority 2139172090Srrs * [49152, 65536) : video - low priority 2140172090Srrs * 2141163953Srrs * Everything else gets lowest priority. 2142163953Srrs */ 2143163953Srrs if (ip->ip_p == IPPROTO_UDP) { 2144163953Srrs struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); 2145163953Srrs switch (ntohs(udp->uh_dport) & 0xc000) { 2146163953Srrs case 0x4000: 2147163953Srrs prio = 70; 2148169420Srrs break; 2149171943Srrs case 0x8000: 2150163953Srrs prio = 60; 2151163953Srrs break; 2152163953Srrs case 0xc000: 2153163953Srrs prio = 55; 2154165220Srrs break; 2155163953Srrs } 2156163953Srrs } 2157163953Srrs return prio; 2158163953Srrs} 2159163953Srrs 2160163953Srrs/* 2161163953Srrs * End of token bucket filter modifications 2162163953Srrs */ 2163163953Srrs 2164163953Srrsstatic int 2165163953SrrsX_ip_rsvp_vif(struct socket *so, struct sockopt *sopt) 2166163953Srrs{ 2167163953Srrs int error, vifi; 2168163953Srrs 2169163953Srrs if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 2170163953Srrs return EOPNOTSUPP; 2171163953Srrs 2172163953Srrs error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); 2173163953Srrs if (error) 2174163953Srrs return error; 2175163953Srrs 2176163953Srrs VIF_LOCK(); 2177163953Srrs 2178163953Srrs if (vifi < 0 || vifi >= numvifs) { /* Error if vif is invalid */ 2179163953Srrs VIF_UNLOCK(); 2180163953Srrs return EADDRNOTAVAIL; 2181163953Srrs } 2182163953Srrs 2183163953Srrs if (sopt->sopt_name == IP_RSVP_VIF_ON) { 2184163953Srrs /* Check if socket is available. */ 2185163953Srrs if (viftable[vifi].v_rsvpd != NULL) { 2186170140Srrs VIF_UNLOCK(); 2187172090Srrs return EADDRINUSE; 2188172090Srrs } 2189172090Srrs 2190172090Srrs viftable[vifi].v_rsvpd = so; 2191172090Srrs /* This may seem silly, but we need to be sure we don't over-increment 2192171943Srrs * the RSVP counter, in case something slips up. 2193172090Srrs */ 2194172090Srrs if (!viftable[vifi].v_rsvp_on) { 2195172090Srrs viftable[vifi].v_rsvp_on = 1; 2196172090Srrs rsvp_on++; 2197163953Srrs } 2198163953Srrs } else { /* must be VIF_OFF */ 2199163953Srrs /* 2200163953Srrs * XXX as an additional consistency check, one could make sure 2201163953Srrs * that viftable[vifi].v_rsvpd == so, otherwise passing so as 2202163953Srrs * first parameter is pretty useless. 2203163953Srrs */ 2204163953Srrs viftable[vifi].v_rsvpd = NULL; 2205163953Srrs /* 2206163953Srrs * This may seem silly, but we need to be sure we don't over-decrement 2207163953Srrs * the RSVP counter, in case something slips up. 2208163953Srrs */ 2209163953Srrs if (viftable[vifi].v_rsvp_on) { 2210163953Srrs viftable[vifi].v_rsvp_on = 0; 2211163953Srrs rsvp_on--; 2212163953Srrs } 2213163953Srrs } 2214172090Srrs VIF_UNLOCK(); 2215172090Srrs return 0; 2216172090Srrs} 2217172090Srrs 2218172090Srrsstatic void 2219172090SrrsX_ip_rsvp_force_done(struct socket *so) 2220172090Srrs{ 2221172090Srrs int vifi; 2222172090Srrs 2223172090Srrs /* Don't bother if it is not the right type of socket. */ 2224172090Srrs if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 2225172090Srrs return; 2226172090Srrs 2227172090Srrs VIF_LOCK(); 2228172090Srrs 2229163953Srrs /* The socket may be attached to more than one vif...this 2230163953Srrs * is perfectly legal. 2231163953Srrs */ 2232163953Srrs for (vifi = 0; vifi < numvifs; vifi++) { 2233163953Srrs if (viftable[vifi].v_rsvpd == so) { 2234163953Srrs viftable[vifi].v_rsvpd = NULL; 2235163953Srrs /* This may seem silly, but we need to be sure we don't 2236163953Srrs * over-decrement the RSVP counter, in case something slips up. 2237163953Srrs */ 2238163953Srrs if (viftable[vifi].v_rsvp_on) { 2239169420Srrs viftable[vifi].v_rsvp_on = 0; 2240169420Srrs rsvp_on--; 2241163953Srrs } 2242163953Srrs } 2243163953Srrs } 2244163953Srrs 2245163953Srrs VIF_UNLOCK(); 2246165220Srrs} 2247171440Srrs 2248170642Srrsstatic void 2249169420SrrsX_rsvp_input(struct mbuf *m, int off) 2250171477Srrs{ 2251170642Srrs int vifi; 2252172190Srrs struct ip *ip = mtod(m, struct ip *); 2253163953Srrs struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET }; 2254172190Srrs struct ifnet *ifp; 2255172190Srrs 2256172190Srrs if (rsvpdebug) 2257172190Srrs printf("rsvp_input: rsvp_on %d\n",rsvp_on); 2258172190Srrs 2259172190Srrs /* Can still get packets with rsvp_on = 0 if there is a local member 2260172190Srrs * of the group to which the RSVP packet is addressed. But in this 2261172190Srrs * case we want to throw the packet away. 2262172190Srrs */ 2263172190Srrs if (!rsvp_on) { 2264172190Srrs m_freem(m); 2265172190Srrs return; 2266163953Srrs } 2267163953Srrs 2268163953Srrs if (rsvpdebug) 2269185694Srrs printf("rsvp_input: check vifs\n"); 2270185694Srrs 2271185694Srrs#ifdef DIAGNOSTIC 2272185694Srrs M_ASSERTPKTHDR(m); 2273185694Srrs#endif 2274185694Srrs 2275185694Srrs ifp = m->m_pkthdr.rcvif; 2276185694Srrs 2277185694Srrs VIF_LOCK(); 2278185694Srrs /* Find which vif the packet arrived on. */ 2279185694Srrs for (vifi = 0; vifi < numvifs; vifi++) 2280185694Srrs if (viftable[vifi].v_ifp == ifp) 2281163953Srrs break; 2282163953Srrs 2283163953Srrs if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { 2284163953Srrs /* 2285163953Srrs * Drop the lock here to avoid holding it across rip_input. 2286163953Srrs * This could make rsvpdebug printfs wrong. If you care, 2287163953Srrs * record the state of stuff before dropping the lock. 2288163953Srrs */ 2289163953Srrs VIF_UNLOCK(); 2290169352Srrs /* 2291179157Srrs * If the old-style non-vif-associated socket is set, 2292163953Srrs * then use it. Otherwise, drop packet since there 2293163953Srrs * is no specific socket for this vif. 2294163953Srrs */ 2295163953Srrs if (ip_rsvpd != NULL) { 2296163953Srrs if (rsvpdebug) 2297163953Srrs printf("rsvp_input: Sending packet up old-style socket\n"); 2298163953Srrs rip_input(m, off); /* xxx */ 2299163953Srrs } else { 2300163953Srrs if (rsvpdebug && vifi == numvifs) 2301163953Srrs printf("rsvp_input: Can't find vif for packet.\n"); 2302163953Srrs else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL) 2303163953Srrs printf("rsvp_input: No socket defined for vif %d\n",vifi); 2304163953Srrs m_freem(m); 2305163953Srrs } 2306163953Srrs return; 2307163953Srrs } 2308163953Srrs rsvp_src.sin_addr = ip->ip_src; 2309163953Srrs 2310163953Srrs if (rsvpdebug && m) 2311163953Srrs printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n", 2312163953Srrs m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); 2313163953Srrs 2314163953Srrs if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { 2315163953Srrs if (rsvpdebug) 2316169420Srrs printf("rsvp_input: Failed to append to socket\n"); 2317169420Srrs } else { 2318163953Srrs if (rsvpdebug) 2319163953Srrs printf("rsvp_input: send packet up\n"); 2320163953Srrs } 2321163953Srrs VIF_UNLOCK(); 2322163953Srrs} 2323163953Srrs 2324178251Srrs/* 2325178251Srrs * Code for bandwidth monitors 2326178251Srrs */ 2327178251Srrs 2328178251Srrs/* 2329163953Srrs * Define common interface for timeval-related methods 2330178251Srrs */ 2331178251Srrs#define BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp) 2332178251Srrs#define BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp)) 2333178251Srrs#define BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp)) 2334178251Srrs 2335178251Srrsstatic uint32_t 2336178251Srrscompute_bw_meter_flags(struct bw_upcall *req) 2337178251Srrs{ 2338178251Srrs uint32_t flags = 0; 2339178251Srrs 2340178251Srrs if (req->bu_flags & BW_UPCALL_UNIT_PACKETS) 2341178251Srrs flags |= BW_METER_UNIT_PACKETS; 2342178251Srrs if (req->bu_flags & BW_UPCALL_UNIT_BYTES) 2343178251Srrs flags |= BW_METER_UNIT_BYTES; 2344178251Srrs if (req->bu_flags & BW_UPCALL_GEQ) 2345163953Srrs flags |= BW_METER_GEQ; 2346178251Srrs if (req->bu_flags & BW_UPCALL_LEQ) 2347178251Srrs flags |= BW_METER_LEQ; 2348178251Srrs 2349178251Srrs return flags; 2350178251Srrs} 2351178251Srrs 2352178251Srrs/* 2353178251Srrs * Add a bw_meter entry 2354178251Srrs */ 2355178251Srrsstatic int 2356178251Srrsadd_bw_upcall(struct bw_upcall *req) 2357178251Srrs{ 2358163953Srrs struct mfc *mfc; 2359163953Srrs struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC, 2360163953Srrs BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC }; 2361163953Srrs struct timeval now; 2362163953Srrs struct bw_meter *x; 2363163953Srrs uint32_t flags; 2364163953Srrs 2365163953Srrs if (!(mrt_api_config & MRT_MFC_BW_UPCALL)) 2366163953Srrs return EOPNOTSUPP; 2367163953Srrs 2368163953Srrs /* Test if the flags are valid */ 2369163953Srrs if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES))) 2370163953Srrs return EINVAL; 2371163953Srrs if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))) 2372169352Srrs return EINVAL; 2373163953Srrs if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) 2374163953Srrs == (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) 2375163953Srrs return EINVAL; 2376163953Srrs 2377163953Srrs /* Test if the threshold time interval is valid */ 2378163953Srrs if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <)) 2379163953Srrs return EINVAL; 2380163953Srrs 2381163953Srrs flags = compute_bw_meter_flags(req); 2382163953Srrs 2383163953Srrs /* 2384163953Srrs * Find if we have already same bw_meter entry 2385163953Srrs */ 2386163953Srrs MFC_LOCK(); 2387163953Srrs mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr); 2388163953Srrs if (mfc == NULL) { 2389163953Srrs MFC_UNLOCK(); 2390163953Srrs return EADDRNOTAVAIL; 2391163953Srrs } 2392163953Srrs for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) { 2393163953Srrs if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, 2394163953Srrs &req->bu_threshold.b_time, ==)) && 2395163953Srrs (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && 2396163953Srrs (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && 2397163953Srrs (x->bm_flags & BW_METER_USER_FLAGS) == flags) { 2398163953Srrs MFC_UNLOCK(); 2399178202Srrs return 0; /* XXX Already installed */ 2400179783Srrs } 2401178202Srrs } 2402178202Srrs 2403178202Srrs /* Allocate the new bw_meter entry */ 2404178202Srrs x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT); 2405178202Srrs if (x == NULL) { 2406178202Srrs MFC_UNLOCK(); 2407178202Srrs return ENOBUFS; 2408178202Srrs } 2409178202Srrs 2410178202Srrs /* Set the new bw_meter entry */ 2411178202Srrs x->bm_threshold.b_time = req->bu_threshold.b_time; 2412178202Srrs GET_TIME(now); 2413163953Srrs x->bm_start_time = now; 2414163953Srrs x->bm_threshold.b_packets = req->bu_threshold.b_packets; 2415163953Srrs x->bm_threshold.b_bytes = req->bu_threshold.b_bytes; 2416163953Srrs x->bm_measured.b_packets = 0; 2417163953Srrs x->bm_measured.b_bytes = 0; 2418163953Srrs x->bm_flags = flags; 2419163953Srrs x->bm_time_next = NULL; 2420163953Srrs x->bm_time_hash = BW_METER_BUCKETS; 2421163953Srrs 2422163953Srrs /* Add the new bw_meter entry to the front of entries for this MFC */ 2423163953Srrs x->bm_mfc = mfc; 2424163953Srrs x->bm_mfc_next = mfc->mfc_bw_meter; 2425163953Srrs mfc->mfc_bw_meter = x; 2426163953Srrs schedule_bw_meter(x, &now); 2427163953Srrs MFC_UNLOCK(); 2428163953Srrs 2429169378Srrs return 0; 2430163953Srrs} 2431170462Srrs 2432163953Srrsstatic void 2433163953Srrsfree_bw_list(struct bw_meter *list) 2434169378Srrs{ 2435163953Srrs while (list != NULL) { 2436170462Srrs struct bw_meter *x = list; 2437163953Srrs 2438163953Srrs list = list->bm_mfc_next; 2439163953Srrs unschedule_bw_meter(x); 2440163953Srrs free(x, M_BWMETER); 2441163953Srrs } 2442163953Srrs} 2443164085Srrs 2444163953Srrs/* 2445163953Srrs * Delete one or multiple bw_meter entries 2446163953Srrs */ 2447163953Srrsstatic int 2448163953Srrsdel_bw_upcall(struct bw_upcall *req) 2449163953Srrs{ 2450163953Srrs struct mfc *mfc; 2451163953Srrs struct bw_meter *x; 2452169378Srrs 2453163953Srrs if (!(mrt_api_config & MRT_MFC_BW_UPCALL)) 2454170462Srrs return EOPNOTSUPP; 2455163953Srrs 2456163953Srrs MFC_LOCK(); 2457163953Srrs /* Find the corresponding MFC entry */ 2458163953Srrs mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr); 2459163953Srrs if (mfc == NULL) { 2460163953Srrs MFC_UNLOCK(); 2461163953Srrs return EADDRNOTAVAIL; 2462163953Srrs } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) { 2463163953Srrs /* 2464163953Srrs * Delete all bw_meter entries for this mfc 2465163953Srrs */ 2466163953Srrs struct bw_meter *list; 2467163953Srrs 2468163953Srrs list = mfc->mfc_bw_meter; 2469163953Srrs mfc->mfc_bw_meter = NULL; 2470163953Srrs free_bw_list(list); 2471165647Srrs MFC_UNLOCK(); 2472165647Srrs return 0; 2473163953Srrs } else { /* Delete a single bw_meter entry */ 2474165647Srrs struct bw_meter *prev; 2475163953Srrs uint32_t flags = 0; 2476163953Srrs 2477163953Srrs flags = compute_bw_meter_flags(req); 2478169420Srrs 2479169420Srrs /* Find the bw_meter entry to delete */ 2480169420Srrs for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL; 2481169420Srrs prev = x, x = x->bm_mfc_next) { 2482163953Srrs if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, 2483163953Srrs &req->bu_threshold.b_time, ==)) && 2484163953Srrs (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && 2485163953Srrs (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && 2486163953Srrs (x->bm_flags & BW_METER_USER_FLAGS) == flags) 2487169378Srrs break; 2488163953Srrs } 2489169655Srrs if (x != NULL) { /* Delete entry from the list for this MFC */ 2490163953Srrs if (prev != NULL) 2491180387Srrs prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/ 2492180387Srrs else 2493180387Srrs x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */ 2494180387Srrs 2495163953Srrs unschedule_bw_meter(x); 2496163953Srrs MFC_UNLOCK(); 2497163953Srrs /* Free the bw_meter entry */ 2498163953Srrs free(x, M_BWMETER); 2499163953Srrs return 0; 2500163953Srrs } else { 2501163953Srrs MFC_UNLOCK(); 2502165647Srrs return EINVAL; 2503163953Srrs } 2504163953Srrs } 2505163953Srrs /* NOTREACHED */ 2506163953Srrs} 2507163953Srrs 2508178251Srrs/* 2509165647Srrs * Perform bandwidth measurement processing that may result in an upcall 2510178251Srrs */ 2511178251Srrsstatic void 2512178251Srrsbw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp) 2513165647Srrs{ 2514165647Srrs struct timeval delta; 2515163953Srrs 2516163953Srrs MFC_LOCK_ASSERT(); 2517165647Srrs 2518163953Srrs delta = *nowp; 2519163953Srrs BW_TIMEVALDECR(&delta, &x->bm_start_time); 2520163953Srrs 2521163953Srrs if (x->bm_flags & BW_METER_GEQ) { 2522163953Srrs /* 2523163953Srrs * Processing for ">=" type of bw_meter entry 2524163953Srrs */ 2525163953Srrs if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { 2526163953Srrs /* Reset the bw_meter entry */ 2527163953Srrs x->bm_start_time = *nowp; 2528169352Srrs x->bm_measured.b_packets = 0; 2529179157Srrs x->bm_measured.b_bytes = 0; 2530163953Srrs x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2531163953Srrs } 2532163953Srrs 2533163953Srrs /* Record that a packet is received */ 2534163953Srrs x->bm_measured.b_packets++; 2535163953Srrs x->bm_measured.b_bytes += plen; 2536163953Srrs 2537163953Srrs /* 2538163953Srrs * Test if we should deliver an upcall 2539163953Srrs */ 2540163953Srrs if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) { 2541163953Srrs if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2542163953Srrs (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) || 2543163953Srrs ((x->bm_flags & BW_METER_UNIT_BYTES) && 2544163953Srrs (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) { 2545163953Srrs /* Prepare an upcall for delivery */ 2546163953Srrs bw_meter_prepare_upcall(x, nowp); 2547163953Srrs x->bm_flags |= BW_METER_UPCALL_DELIVERED; 2548163953Srrs } 2549163953Srrs } 2550163953Srrs } else if (x->bm_flags & BW_METER_LEQ) { 2551163953Srrs /* 2552163953Srrs * Processing for "<=" type of bw_meter entry 2553163953Srrs */ 2554163953Srrs if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { 2555163953Srrs /* 2556163953Srrs * We are behind time with the multicast forwarding table 2557163953Srrs * scanning for "<=" type of bw_meter entries, so test now 2558169420Srrs * if we should deliver an upcall. 2559169420Srrs */ 2560169420Srrs if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2561163953Srrs (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || 2562163953Srrs ((x->bm_flags & BW_METER_UNIT_BYTES) && 2563163953Srrs (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { 2564163953Srrs /* Prepare an upcall for delivery */ 2565163953Srrs bw_meter_prepare_upcall(x, nowp); 2566163953Srrs } 2567163953Srrs /* Reschedule the bw_meter entry */ 2568163953Srrs unschedule_bw_meter(x); 2569163953Srrs schedule_bw_meter(x, nowp); 2570163953Srrs } 2571163953Srrs 2572169420Srrs /* Record that a packet is received */ 2573163953Srrs x->bm_measured.b_packets++; 2574165220Srrs x->bm_measured.b_bytes += plen; 2575165220Srrs 2576165220Srrs /* 2577165220Srrs * Test if we should restart the measuring interval 2578165220Srrs */ 2579165220Srrs if ((x->bm_flags & BW_METER_UNIT_PACKETS && 2580165220Srrs x->bm_measured.b_packets <= x->bm_threshold.b_packets) || 2581165220Srrs (x->bm_flags & BW_METER_UNIT_BYTES && 2582165220Srrs x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) { 2583165220Srrs /* Don't restart the measuring interval */ 2584165220Srrs } else { 2585165220Srrs /* Do restart the measuring interval */ 2586165220Srrs /* 2587165220Srrs * XXX: note that we don't unschedule and schedule, because this 2588165220Srrs * might be too much overhead per packet. Instead, when we process 2589165220Srrs * all entries for a given timer hash bin, we check whether it is 2590165220Srrs * really a timeout. If not, we reschedule at that time. 2591165220Srrs */ 2592165220Srrs x->bm_start_time = *nowp; 2593165220Srrs x->bm_measured.b_packets = 0; 2594165220Srrs x->bm_measured.b_bytes = 0; 2595169420Srrs x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2596165220Srrs } 2597165220Srrs } 2598165220Srrs} 2599163953Srrs 2600163953Srrs/* 2601169420Srrs * Prepare a bandwidth-related upcall 2602169420Srrs */ 2603169420Srrsstatic void 2604163953Srrsbw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp) 2605163953Srrs{ 2606163953Srrs struct timeval delta; 2607163953Srrs struct bw_upcall *u; 2608163953Srrs 2609179157Srrs MFC_LOCK_ASSERT(); 2610163953Srrs 2611163953Srrs /* 2612163953Srrs * Compute the measured time interval 2613163953Srrs */ 2614185694Srrs delta = *nowp; 2615185694Srrs BW_TIMEVALDECR(&delta, &x->bm_start_time); 2616163953Srrs 2617163953Srrs /* 2618163953Srrs * If there are too many pending upcalls, deliver them now 2619163953Srrs */ 2620163953Srrs if (bw_upcalls_n >= BW_UPCALLS_MAX) 2621163953Srrs bw_upcalls_send(); 2622163953Srrs 2623163953Srrs /* 2624163953Srrs * Set the bw_upcall entry 2625163953Srrs */ 2626163953Srrs u = &bw_upcalls[bw_upcalls_n++]; 2627163953Srrs u->bu_src = x->bm_mfc->mfc_origin; 2628163953Srrs u->bu_dst = x->bm_mfc->mfc_mcastgrp; 2629163953Srrs u->bu_threshold.b_time = x->bm_threshold.b_time; 2630163953Srrs u->bu_threshold.b_packets = x->bm_threshold.b_packets; 2631163953Srrs u->bu_threshold.b_bytes = x->bm_threshold.b_bytes; 2632163953Srrs u->bu_measured.b_time = delta; 2633163953Srrs u->bu_measured.b_packets = x->bm_measured.b_packets; 2634165220Srrs u->bu_measured.b_bytes = x->bm_measured.b_bytes; 2635165220Srrs u->bu_flags = 0; 2636163953Srrs if (x->bm_flags & BW_METER_UNIT_PACKETS) 2637163953Srrs u->bu_flags |= BW_UPCALL_UNIT_PACKETS; 2638163953Srrs if (x->bm_flags & BW_METER_UNIT_BYTES) 2639163953Srrs u->bu_flags |= BW_UPCALL_UNIT_BYTES; 2640163953Srrs if (x->bm_flags & BW_METER_GEQ) 2641169420Srrs u->bu_flags |= BW_UPCALL_GEQ; 2642163953Srrs if (x->bm_flags & BW_METER_LEQ) 2643163953Srrs u->bu_flags |= BW_UPCALL_LEQ; 2644172090Srrs} 2645163953Srrs 2646163953Srrs/* 2647163953Srrs * Send the pending bandwidth-related upcalls 2648163953Srrs */ 2649163953Srrsstatic void 2650163953Srrsbw_upcalls_send(void) 2651163953Srrs{ 2652163953Srrs struct mbuf *m; 2653163953Srrs int len = bw_upcalls_n * sizeof(bw_upcalls[0]); 2654163953Srrs struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 2655163953Srrs static struct igmpmsg igmpmsg = { 0, /* unused1 */ 2656163953Srrs 0, /* unused2 */ 2657163953Srrs IGMPMSG_BW_UPCALL,/* im_msgtype */ 2658163953Srrs 0, /* im_mbz */ 2659163953Srrs 0, /* im_vif */ 2660163953Srrs 0, /* unused3 */ 2661163953Srrs { 0 }, /* im_src */ 2662163953Srrs { 0 } }; /* im_dst */ 2663163953Srrs 2664163953Srrs MFC_LOCK_ASSERT(); 2665163953Srrs 2666163953Srrs if (bw_upcalls_n == 0) 2667172090Srrs return; /* No pending upcalls */ 2668163953Srrs 2669163953Srrs bw_upcalls_n = 0; 2670163953Srrs 2671171858Srrs /* 2672163953Srrs * Allocate a new mbuf, initialize it with the header and 2673166086Srrs * the payload for the pending calls. 2674163953Srrs */ 2675163953Srrs MGETHDR(m, M_DONTWAIT, MT_HEADER); 2676171858Srrs if (m == NULL) { 2677171858Srrs log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n"); 2678163953Srrs return; 2679163953Srrs } 2680163953Srrs 2681172090Srrs m->m_len = m->m_pkthdr.len = 0; 2682172090Srrs m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg); 2683172090Srrs m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&bw_upcalls[0]); 2684172090Srrs 2685163953Srrs /* 2686169420Srrs * Send the upcalls 2687163953Srrs * XXX do we need to set the address in k_igmpsrc ? 2688163953Srrs */ 2689179157Srrs mrtstat.mrts_upcalls++; 2690172090Srrs if (socket_send(ip_mrouter, m, &k_igmpsrc) < 0) { 2691172090Srrs log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n"); 2692172090Srrs ++mrtstat.mrts_upq_sockfull; 2693172090Srrs } 2694172090Srrs} 2695172090Srrs 2696172090Srrs/* 2697172090Srrs * Compute the timeout hash value for the bw_meter entries 2698171943Srrs */ 2699172090Srrs#define BW_METER_TIMEHASH(bw_meter, hash) \ 2700172090Srrs do { \ 2701172090Srrs struct timeval next_timeval = (bw_meter)->bm_start_time; \ 2702163953Srrs \ 2703163953Srrs BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \ 2704163953Srrs (hash) = next_timeval.tv_sec; \ 2705166086Srrs if (next_timeval.tv_usec) \ 2706166086Srrs (hash)++; /* XXX: make sure we don't timeout early */ \ 2707166086Srrs (hash) %= BW_METER_BUCKETS; \ 2708166086Srrs } while (0) 2709166086Srrs 2710166086Srrs/* 2711163953Srrs * Schedule a timer to process periodically bw_meter entry of type "<=" 2712163953Srrs * by linking the entry in the proper hash bucket. 2713163953Srrs */ 2714166086Srrsstatic void 2715163953Srrsschedule_bw_meter(struct bw_meter *x, struct timeval *nowp) 2716163953Srrs{ 2717163953Srrs int time_hash; 2718172091Srrs 2719163953Srrs MFC_LOCK_ASSERT(); 2720163953Srrs 2721163953Srrs if (!(x->bm_flags & BW_METER_LEQ)) 2722163953Srrs return; /* XXX: we schedule timers only for "<=" entries */ 2723163953Srrs 2724163953Srrs /* 2725163953Srrs * Reset the bw_meter entry 2726163953Srrs */ 2727163953Srrs x->bm_start_time = *nowp; 2728163953Srrs x->bm_measured.b_packets = 0; 2729163953Srrs x->bm_measured.b_bytes = 0; 2730163953Srrs x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2731163953Srrs 2732163953Srrs /* 2733163953Srrs * Compute the timeout hash value and insert the entry 2734163953Srrs */ 2735163953Srrs BW_METER_TIMEHASH(x, time_hash); 2736163953Srrs x->bm_time_next = bw_meter_timers[time_hash]; 2737163953Srrs bw_meter_timers[time_hash] = x; 2738163953Srrs x->bm_time_hash = time_hash; 2739163953Srrs} 2740163953Srrs 2741163953Srrs/* 2742170056Srrs * Unschedule the periodic timer that processes bw_meter entry of type "<=" 2743170056Srrs * by removing the entry from the proper hash bucket. 2744170056Srrs */ 2745170056Srrsstatic void 2746175845Srwatsonunschedule_bw_meter(struct bw_meter *x) 2747175845Srwatson{ 2748170056Srrs int time_hash; 2749170056Srrs struct bw_meter *prev, *tmp; 2750163953Srrs 2751170056Srrs MFC_LOCK_ASSERT(); 2752166086Srrs 2753166086Srrs if (!(x->bm_flags & BW_METER_LEQ)) 2754166086Srrs return; /* XXX: we schedule timers only for "<=" entries */ 2755166086Srrs 2756166086Srrs /* 2757166086Srrs * Compute the timeout hash value and delete the entry 2758166086Srrs */ 2759166086Srrs time_hash = x->bm_time_hash; 2760166086Srrs if (time_hash >= BW_METER_BUCKETS) 2761166086Srrs return; /* Entry was not scheduled */ 2762166086Srrs 2763166086Srrs for (prev = NULL, tmp = bw_meter_timers[time_hash]; 2764166086Srrs tmp != NULL; prev = tmp, tmp = tmp->bm_time_next) 2765166086Srrs if (tmp == x) 2766166086Srrs break; 2767163953Srrs 2768163953Srrs if (tmp == NULL) 2769172090Srrs panic("unschedule_bw_meter: bw_meter entry not found"); 2770163953Srrs 2771166086Srrs if (prev != NULL) 2772166086Srrs prev->bm_time_next = x->bm_time_next; 2773166086Srrs else 2774166086Srrs bw_meter_timers[time_hash] = x->bm_time_next; 2775172090Srrs 2776172090Srrs x->bm_time_next = NULL; 2777172090Srrs x->bm_time_hash = BW_METER_BUCKETS; 2778172090Srrs} 2779172090Srrs 2780166086Srrs 2781172090Srrs/* 2782172118Srrs * Process all "<=" type of bw_meter that should be processed now, 2783172118Srrs * and for each entry prepare an upcall if necessary. Each processed 2784172090Srrs * entry is rescheduled again for the (periodic) processing. 2785172090Srrs * 2786163953Srrs * This is run periodically (once per second normally). On each round, 2787163953Srrs * all the potentially matching entries are in the hash slot that we are 2788163953Srrs * looking at. 2789163953Srrs */ 2790172090Srrsstatic void 2791163953Srrsbw_meter_process() 2792163953Srrs{ 2793163953Srrs static uint32_t last_tv_sec; /* last time we processed this */ 2794163953Srrs 2795163953Srrs uint32_t loops; 2796163953Srrs int i; 2797163953Srrs struct timeval now, process_endtime; 2798163953Srrs 2799163953Srrs GET_TIME(now); 2800163953Srrs if (last_tv_sec == now.tv_sec) 2801163953Srrs return; /* nothing to do */ 2802169420Srrs 2803169420Srrs loops = now.tv_sec - last_tv_sec; 2804163953Srrs last_tv_sec = now.tv_sec; 2805163953Srrs if (loops > BW_METER_BUCKETS) 2806163953Srrs loops = BW_METER_BUCKETS; 2807163953Srrs 2808163953Srrs MFC_LOCK(); 2809163953Srrs /* 2810163953Srrs * Process all bins of bw_meter entries from the one after the last 2811163953Srrs * processed to the current one. On entry, i points to the last bucket 2812163953Srrs * visited, so we need to increment i at the beginning of the loop. 2813169420Srrs */ 2814171943Srrs for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) { 2815163953Srrs struct bw_meter *x, *tmp_list; 2816163953Srrs 2817163953Srrs if (++i >= BW_METER_BUCKETS) 2818163953Srrs i = 0; 2819163953Srrs 2820163953Srrs /* Disconnect the list of bw_meter entries from the bin */ 2821163953Srrs tmp_list = bw_meter_timers[i]; 2822163953Srrs bw_meter_timers[i] = NULL; 2823163953Srrs 2824163953Srrs /* Process the list of bw_meter entries */ 2825171477Srrs while (tmp_list != NULL) { 2826163953Srrs x = tmp_list; 2827169378Srrs tmp_list = tmp_list->bm_time_next; 2828172090Srrs 2829163953Srrs /* Test if the time interval is over */ 2830163953Srrs process_endtime = x->bm_start_time; 2831172090Srrs BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time); 2832172090Srrs if (BW_TIMEVALCMP(&process_endtime, &now, >)) { 2833172090Srrs /* Not yet: reschedule, but don't reset */ 2834172090Srrs int time_hash; 2835163953Srrs 2836172090Srrs BW_METER_TIMEHASH(x, time_hash); 2837172090Srrs if (time_hash == i && process_endtime.tv_sec == now.tv_sec) { 2838172090Srrs /* 2839172090Srrs * XXX: somehow the bin processing is a bit ahead of time. 2840172090Srrs * Put the entry in the next bin. 2841172090Srrs */ 2842172090Srrs if (++time_hash >= BW_METER_BUCKETS) 2843172090Srrs time_hash = 0; 2844172090Srrs } 2845172090Srrs x->bm_time_next = bw_meter_timers[time_hash]; 2846172090Srrs bw_meter_timers[time_hash] = x; 2847172090Srrs x->bm_time_hash = time_hash; 2848172090Srrs 2849172090Srrs continue; 2850172090Srrs } 2851172090Srrs 2852163953Srrs /* 2853163953Srrs * Test if we should deliver an upcall 2854163953Srrs */ 2855163953Srrs if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2856163953Srrs (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || 2857163953Srrs ((x->bm_flags & BW_METER_UNIT_BYTES) && 2858163953Srrs (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { 2859163953Srrs /* Prepare an upcall for delivery */ 2860163953Srrs bw_meter_prepare_upcall(x, &now); 2861163953Srrs } 2862163953Srrs 2863163953Srrs /* 2864163953Srrs * Reschedule for next processing 2865163953Srrs */ 2866163953Srrs schedule_bw_meter(x, &now); 2867171572Srrs } 2868171572Srrs } 2869171572Srrs 2870163953Srrs /* Send all upcalls that are pending delivery */ 2871163953Srrs bw_upcalls_send(); 2872163953Srrs 2873163953Srrs MFC_UNLOCK(); 2874171572Srrs} 2875163953Srrs 2876163953Srrs/* 2877163953Srrs * A periodic function for sending all upcalls that are pending delivery 2878171572Srrs */ 2879172190Srrsstatic void 2880172190Srrsexpire_bw_upcalls_send(void *unused) 2881171572Srrs{ 2882163953Srrs MFC_LOCK(); 2883163953Srrs bw_upcalls_send(); 2884163953Srrs MFC_UNLOCK(); 2885163953Srrs 2886163953Srrs callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD, 2887163953Srrs expire_bw_upcalls_send, NULL); 2888163953Srrs} 2889163953Srrs 2890163953Srrs/* 2891163953Srrs * A periodic function for periodic scanning of the multicast forwarding 2892163953Srrs * table for processing all "<=" bw_meter entries. 2893163953Srrs */ 2894163953Srrsstatic void 2895163953Srrsexpire_bw_meter_process(void *unused) 2896163953Srrs{ 2897163953Srrs if (mrt_api_config & MRT_MFC_BW_UPCALL) 2898163953Srrs bw_meter_process(); 2899163953Srrs 2900163953Srrs callout_reset(&bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, NULL); 2901163953Srrs} 2902163953Srrs 2903163953Srrs/* 2904163953Srrs * End of bandwidth monitoring code 2905163953Srrs */ 2906163953Srrs 2907163953Srrs#ifdef PIM 2908163953Srrs/* 2909163953Srrs * Send the packet up to the user daemon, or eventually do kernel encapsulation 2910163953Srrs * 2911163953Srrs */ 2912163953Srrsstatic int 2913163953Srrspim_register_send(struct ip *ip, struct vif *vifp, 2914163953Srrs struct mbuf *m, struct mfc *rt) 2915163953Srrs{ 2916163953Srrs struct mbuf *mb_copy, *mm; 2917163953Srrs 2918163953Srrs if (mrtdebug & DEBUG_PIM) 2919163953Srrs log(LOG_DEBUG, "pim_register_send: "); 2920163953Srrs 2921163953Srrs mb_copy = pim_register_prepare(ip, m); 2922163953Srrs if (mb_copy == NULL) 2923163953Srrs return ENOBUFS; 2924163953Srrs 2925163953Srrs /* 2926163953Srrs * Send all the fragments. Note that the mbuf for each fragment 2927163953Srrs * is freed by the sending machinery. 2928163953Srrs */ 2929163953Srrs for (mm = mb_copy; mm; mm = mb_copy) { 2930163953Srrs mb_copy = mm->m_nextpkt; 2931163953Srrs mm->m_nextpkt = 0; 2932163953Srrs mm = m_pullup(mm, sizeof(struct ip)); 2933163953Srrs if (mm != NULL) { 2934163953Srrs ip = mtod(mm, struct ip *); 2935163953Srrs if ((mrt_api_config & MRT_MFC_RP) && 2936163953Srrs (rt->mfc_rp.s_addr != INADDR_ANY)) { 2937163953Srrs pim_register_send_rp(ip, vifp, mm, rt); 2938163953Srrs } else { 2939163953Srrs pim_register_send_upcall(ip, vifp, mm, rt); 2940171440Srrs } 2941171440Srrs } 2942171440Srrs } 2943171440Srrs 2944171440Srrs return 0; 2945163953Srrs} 2946163953Srrs 2947163953Srrs/* 2948163953Srrs * Return a copy of the data packet that is ready for PIM Register 2949163953Srrs * encapsulation. 2950163953Srrs * XXX: Note that in the returned copy the IP header is a valid one. 2951163953Srrs */ 2952163953Srrsstatic struct mbuf * 2953163953Srrspim_register_prepare(struct ip *ip, struct mbuf *m) 2954163953Srrs{ 2955163953Srrs struct mbuf *mb_copy = NULL; 2956163953Srrs int mtu; 2957163953Srrs 2958163953Srrs /* Take care of delayed checksums */ 2959163953Srrs if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2960163953Srrs in_delayed_cksum(m); 2961163953Srrs m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 2962163953Srrs } 2963163953Srrs 2964163953Srrs /* 2965163953Srrs * Copy the old packet & pullup its IP header into the 2966163953Srrs * new mbuf so we can modify it. 2967163953Srrs */ 2968163953Srrs mb_copy = m_copypacket(m, M_DONTWAIT); 2969163953Srrs if (mb_copy == NULL) 2970163953Srrs return NULL; 2971163953Srrs mb_copy = m_pullup(mb_copy, ip->ip_hl << 2); 2972163953Srrs if (mb_copy == NULL) 2973163953Srrs return NULL; 2974163953Srrs 2975163953Srrs /* take care of the TTL */ 2976163953Srrs ip = mtod(mb_copy, struct ip *); 2977163953Srrs --ip->ip_ttl; 2978163953Srrs 2979163953Srrs /* Compute the MTU after the PIM Register encapsulation */ 2980163953Srrs mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr); 2981163953Srrs 2982163953Srrs if (ip->ip_len <= mtu) { 2983163953Srrs /* Turn the IP header into a valid one */ 2984163953Srrs ip->ip_len = htons(ip->ip_len); 2985163953Srrs ip->ip_off = htons(ip->ip_off); 2986163953Srrs ip->ip_sum = 0; 2987163953Srrs ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 2988163953Srrs } else { 2989163953Srrs /* Fragment the packet */ 2990163953Srrs if (ip_fragment(ip, &mb_copy, mtu, 0, CSUM_DELAY_IP) != 0) { 2991163953Srrs m_freem(mb_copy); 2992163953Srrs return NULL; 2993163953Srrs } 2994163953Srrs } 2995163953Srrs return mb_copy; 2996163953Srrs} 2997163953Srrs 2998163953Srrs/* 2999163953Srrs * Send an upcall with the data packet to the user-level process. 3000163953Srrs */ 3001163953Srrsstatic int 3002172090Srrspim_register_send_upcall(struct ip *ip, struct vif *vifp, 3003172090Srrs struct mbuf *mb_copy, struct mfc *rt) 3004172090Srrs{ 3005172090Srrs struct mbuf *mb_first; 3006172090Srrs int len = ntohs(ip->ip_len); 3007169420Srrs struct igmpmsg *im; 3008169420Srrs struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 3009163953Srrs 3010163953Srrs VIF_LOCK_ASSERT(); 3011163953Srrs 3012163953Srrs /* 3013163953Srrs * Add a new mbuf with an upcall header 3014163953Srrs */ 3015163953Srrs MGETHDR(mb_first, M_DONTWAIT, MT_HEADER); 3016171990Srrs if (mb_first == NULL) { 3017171990Srrs m_freem(mb_copy); 3018163953Srrs return ENOBUFS; 3019163953Srrs } 3020163953Srrs mb_first->m_data += max_linkhdr; 3021163953Srrs mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg); 3022163953Srrs mb_first->m_len = sizeof(struct igmpmsg); 3023172090Srrs mb_first->m_next = mb_copy; 3024163953Srrs 3025163953Srrs /* Send message to routing daemon */ 3026163953Srrs im = mtod(mb_first, struct igmpmsg *); 3027163953Srrs im->im_msgtype = IGMPMSG_WHOLEPKT; 3028172090Srrs im->im_mbz = 0; 3029163953Srrs im->im_vif = vifp - viftable; 3030163953Srrs im->im_src = ip->ip_src; 3031163953Srrs im->im_dst = ip->ip_dst; 3032178202Srrs 3033163953Srrs k_igmpsrc.sin_addr = ip->ip_src; 3034163953Srrs 3035171990Srrs mrtstat.mrts_upcalls++; 3036171990Srrs 3037172090Srrs if (socket_send(ip_mrouter, mb_first, &k_igmpsrc) < 0) { 3038172090Srrs if (mrtdebug & DEBUG_PIM) 3039172090Srrs log(LOG_WARNING, 3040172090Srrs "mcast: pim_register_send_upcall: ip_mrouter socket queue full"); 3041172090Srrs ++mrtstat.mrts_upq_sockfull; 3042172090Srrs return ENOBUFS; 3043172090Srrs } 3044172090Srrs 3045171943Srrs /* Keep statistics */ 3046172090Srrs pimstat.pims_snd_registers_msgs++; 3047172090Srrs pimstat.pims_snd_registers_bytes += len; 3048172090Srrs 3049163953Srrs return 0; 3050163953Srrs} 3051163953Srrs 3052163953Srrs/* 3053163953Srrs * Encapsulate the data packet in PIM Register message and send it to the RP. 3054163953Srrs */ 3055163953Srrsstatic int 3056163953Srrspim_register_send_rp(struct ip *ip, struct vif *vifp, 3057163953Srrs struct mbuf *mb_copy, struct mfc *rt) 3058163953Srrs{ 3059163953Srrs struct mbuf *mb_first; 3060163953Srrs struct ip *ip_outer; 3061163953Srrs struct pim_encap_pimhdr *pimhdr; 3062163953Srrs int len = ntohs(ip->ip_len); 3063163953Srrs vifi_t vifi = rt->mfc_parent; 3064163953Srrs 3065163953Srrs VIF_LOCK_ASSERT(); 3066163953Srrs 3067163953Srrs if ((vifi >= numvifs) || (viftable[vifi].v_lcl_addr.s_addr == 0)) { 3068163953Srrs m_freem(mb_copy); 3069163953Srrs return EADDRNOTAVAIL; /* The iif vif is invalid */ 3070163953Srrs } 3071163953Srrs 3072163953Srrs /* 3073163953Srrs * Add a new mbuf with the encapsulating header 3074163953Srrs */ 3075163953Srrs MGETHDR(mb_first, M_DONTWAIT, MT_HEADER); 3076163953Srrs if (mb_first == NULL) { 3077163953Srrs m_freem(mb_copy); 3078163953Srrs return ENOBUFS; 3079163953Srrs } 3080163953Srrs mb_first->m_data += max_linkhdr; 3081163953Srrs mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); 3082163953Srrs mb_first->m_next = mb_copy; 3083163953Srrs 3084163953Srrs mb_first->m_pkthdr.len = len + mb_first->m_len; 3085163953Srrs 3086163953Srrs /* 3087163953Srrs * Fill in the encapsulating IP and PIM header 3088163953Srrs */ 3089163953Srrs ip_outer = mtod(mb_first, struct ip *); 3090163953Srrs *ip_outer = pim_encap_iphdr; 3091163953Srrs ip_outer->ip_id = ip_newid(); 3092163953Srrs ip_outer->ip_len = len + sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); 3093163953Srrs ip_outer->ip_src = viftable[vifi].v_lcl_addr; 3094163953Srrs ip_outer->ip_dst = rt->mfc_rp; 3095163953Srrs /* 3096163953Srrs * Copy the inner header TOS to the outer header, and take care of the 3097163953Srrs * IP_DF bit. 3098163953Srrs */ 3099163953Srrs ip_outer->ip_tos = ip->ip_tos; 3100163953Srrs if (ntohs(ip->ip_off) & IP_DF) 3101163953Srrs ip_outer->ip_off |= IP_DF; 3102163953Srrs pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer 3103163953Srrs + sizeof(pim_encap_iphdr)); 3104163953Srrs *pimhdr = pim_encap_pimhdr; 3105163953Srrs /* If the iif crosses a border, set the Border-bit */ 3106163953Srrs if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & mrt_api_config) 3107163953Srrs pimhdr->flags |= htonl(PIM_BORDER_REGISTER); 3108163953Srrs 3109163953Srrs mb_first->m_data += sizeof(pim_encap_iphdr); 3110163953Srrs pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr)); 3111163953Srrs mb_first->m_data -= sizeof(pim_encap_iphdr); 3112163953Srrs 3113163953Srrs if (vifp->v_rate_limit == 0) 3114163953Srrs tbf_send_packet(vifp, mb_first); 3115163953Srrs else 3116163953Srrs tbf_control(vifp, mb_first, ip, ip_outer->ip_len); 3117163953Srrs 3118163953Srrs /* Keep statistics */ 3119163953Srrs pimstat.pims_snd_registers_msgs++; 3120163953Srrs pimstat.pims_snd_registers_bytes += len; 3121163953Srrs 3122163953Srrs return 0; 3123163953Srrs} 3124163953Srrs 3125163953Srrs/* 3126163953Srrs * PIM-SMv2 and PIM-DM messages processing. 3127163953Srrs * Receives and verifies the PIM control messages, and passes them 3128163953Srrs * up to the listening socket, using rip_input(). 3129163953Srrs * The only message with special processing is the PIM_REGISTER message 3130163953Srrs * (used by PIM-SM): the PIM header is stripped off, and the inner packet 3131163953Srrs * is passed to if_simloop(). 3132163953Srrs */ 3133163953Srrsvoid 3134163953Srrspim_input(struct mbuf *m, int off) 3135163953Srrs{ 3136163953Srrs struct ip *ip = mtod(m, struct ip *); 3137163953Srrs struct pim *pim; 3138163953Srrs int minlen; 3139163953Srrs int datalen = ip->ip_len; 3140163953Srrs int ip_tos; 3141163953Srrs int iphlen = off; 3142163953Srrs 3143163953Srrs /* Keep statistics */ 3144163953Srrs pimstat.pims_rcv_total_msgs++; 3145163953Srrs pimstat.pims_rcv_total_bytes += datalen; 3146163953Srrs 3147163953Srrs /* 3148163953Srrs * Validate lengths 3149163953Srrs */ 3150163953Srrs if (datalen < PIM_MINLEN) { 3151163953Srrs pimstat.pims_rcv_tooshort++; 3152163953Srrs log(LOG_ERR, "pim_input: packet size too small %d from %lx\n", 3153163953Srrs datalen, (u_long)ip->ip_src.s_addr); 3154163953Srrs m_freem(m); 3155163953Srrs return; 3156163953Srrs } 3157171440Srrs 3158163953Srrs /* 3159163953Srrs * If the packet is at least as big as a REGISTER, go agead 3160163953Srrs * and grab the PIM REGISTER header size, to avoid another 3161163953Srrs * possible m_pullup() later. 3162179783Srrs * 3163170744Srrs * PIM_MINLEN == pimhdr + u_int32_t == 4 + 4 = 8 3164170744Srrs * PIM_REG_MINLEN == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28 3165170744Srrs */ 3166170744Srrs minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN); 3167170744Srrs /* 3168170744Srrs * Get the IP and PIM headers in contiguous memory, and 3169168709Srrs * possibly the PIM REGISTER header. 3170168709Srrs */ 3171163953Srrs if ((m->m_flags & M_EXT || m->m_len < minlen) && 3172163953Srrs (m = m_pullup(m, minlen)) == 0) { 3173163953Srrs log(LOG_ERR, "pim_input: m_pullup failure\n"); 3174163953Srrs return; 3175163953Srrs } 3176163953Srrs /* m_pullup() may have given us a new mbuf so reset ip. */ 3177163953Srrs ip = mtod(m, struct ip *); 3178163953Srrs ip_tos = ip->ip_tos; 3179163953Srrs 3180163953Srrs /* adjust mbuf to point to the PIM header */ 3181163953Srrs m->m_data += iphlen; 3182163953Srrs m->m_len -= iphlen; 3183163953Srrs pim = mtod(m, struct pim *); 3184163953Srrs 3185163953Srrs /* 3186169420Srrs * Validate checksum. If PIM REGISTER, exclude the data packet. 3187163953Srrs * 3188163953Srrs * XXX: some older PIMv2 implementations don't make this distinction, 3189163953Srrs * so for compatibility reason perform the checksum over part of the 3190163953Srrs * message, and if error, then over the whole message. 3191163953Srrs */ 3192163953Srrs if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) { 3193163953Srrs /* do nothing, checksum okay */ 3194163953Srrs } else if (in_cksum(m, datalen)) { 3195163953Srrs pimstat.pims_rcv_badsum++; 3196163953Srrs if (mrtdebug & DEBUG_PIM) 3197163953Srrs log(LOG_DEBUG, "pim_input: invalid checksum"); 3198163953Srrs m_freem(m); 3199163953Srrs return; 3200163953Srrs } 3201163953Srrs 3202163953Srrs /* PIM version check */ 3203163953Srrs if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) { 3204163953Srrs pimstat.pims_rcv_badversion++; 3205163953Srrs log(LOG_ERR, "pim_input: incorrect version %d, expecting %d\n", 3206163953Srrs PIM_VT_V(pim->pim_vt), PIM_VERSION); 3207163953Srrs m_freem(m); 3208163953Srrs return; 3209163953Srrs } 3210163953Srrs 3211163953Srrs /* restore mbuf back to the outer IP */ 3212163953Srrs m->m_data -= iphlen; 3213163953Srrs m->m_len += iphlen; 3214163953Srrs 3215163953Srrs if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) { 3216163953Srrs /* 3217163953Srrs * Since this is a REGISTER, we'll make a copy of the register 3218163953Srrs * headers ip + pim + u_int32 + encap_ip, to be passed up to the 3219163953Srrs * routing daemon. 3220163953Srrs */ 3221163953Srrs struct sockaddr_in dst = { sizeof(dst), AF_INET }; 3222171440Srrs struct mbuf *mcp; 3223172090Srrs struct ip *encap_ip; 3224163953Srrs u_int32_t *reghdr; 3225163953Srrs struct ifnet *vifp; 3226163953Srrs 3227163953Srrs VIF_LOCK(); 3228163953Srrs if ((reg_vif_num >= numvifs) || (reg_vif_num == VIFI_INVALID)) { 3229163953Srrs VIF_UNLOCK(); 3230185694Srrs if (mrtdebug & DEBUG_PIM) 3231185694Srrs log(LOG_DEBUG, 3232185694Srrs "pim_input: register vif not set: %d\n", reg_vif_num); 3233185694Srrs m_freem(m); 3234163953Srrs return; 3235163953Srrs } 3236171440Srrs /* XXX need refcnt? */ 3237171440Srrs vifp = viftable[reg_vif_num].v_ifp; 3238171440Srrs VIF_UNLOCK(); 3239171440Srrs 3240171440Srrs /* 3241171440Srrs * Validate length 3242171440Srrs */ 3243163953Srrs if (datalen < PIM_REG_MINLEN) { 3244163953Srrs pimstat.pims_rcv_tooshort++; 3245163953Srrs pimstat.pims_rcv_badregisters++; 3246163953Srrs log(LOG_ERR, 3247163953Srrs "pim_input: register packet size too small %d from %lx\n", 3248163953Srrs datalen, (u_long)ip->ip_src.s_addr); 3249163953Srrs m_freem(m); 3250163953Srrs return; 3251163953Srrs } 3252163953Srrs 3253163953Srrs reghdr = (u_int32_t *)(pim + 1); 3254163953Srrs encap_ip = (struct ip *)(reghdr + 1); 3255163953Srrs 3256163953Srrs if (mrtdebug & DEBUG_PIM) { 3257163953Srrs log(LOG_DEBUG, 3258163953Srrs "pim_input[register], encap_ip: %lx -> %lx, encap_ip len %d\n", 3259163953Srrs (u_long)ntohl(encap_ip->ip_src.s_addr), 3260163953Srrs (u_long)ntohl(encap_ip->ip_dst.s_addr), 3261163953Srrs ntohs(encap_ip->ip_len)); 3262163953Srrs } 3263163953Srrs 3264163953Srrs /* verify the version number of the inner packet */ 3265163953Srrs if (encap_ip->ip_v != IPVERSION) { 3266163953Srrs pimstat.pims_rcv_badregisters++; 3267163953Srrs if (mrtdebug & DEBUG_PIM) { 3268163953Srrs log(LOG_DEBUG, "pim_input: invalid IP version (%d) " 3269163953Srrs "of the inner packet\n", encap_ip->ip_v); 3270163953Srrs } 3271163953Srrs m_freem(m); 3272163953Srrs return; 3273163953Srrs } 3274171990Srrs 3275163953Srrs /* verify the inner packet is destined to a mcast group */ 3276163953Srrs if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) { 3277163953Srrs pimstat.pims_rcv_badregisters++; 3278163953Srrs if (mrtdebug & DEBUG_PIM) 3279163953Srrs log(LOG_DEBUG, 3280163953Srrs "pim_input: inner packet of register is not " 3281163953Srrs "multicast %lx\n", 3282163953Srrs (u_long)ntohl(encap_ip->ip_dst.s_addr)); 3283163953Srrs m_freem(m); 3284163953Srrs return; 3285163953Srrs } 3286163953Srrs 3287163953Srrs /* If a NULL_REGISTER, pass it to the daemon */ 3288163953Srrs if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) 3289163953Srrs goto pim_input_to_daemon; 3290163953Srrs 3291163953Srrs /* 3292163953Srrs * Copy the TOS from the outer IP header to the inner IP header. 3293163953Srrs */ 3294163953Srrs if (encap_ip->ip_tos != ip_tos) { 3295163953Srrs /* Outer TOS -> inner TOS */ 3296163953Srrs encap_ip->ip_tos = ip_tos; 3297163953Srrs /* Recompute the inner header checksum. Sigh... */ 3298163953Srrs 3299163953Srrs /* adjust mbuf to point to the inner IP header */ 3300163953Srrs m->m_data += (iphlen + PIM_MINLEN); 3301163953Srrs m->m_len -= (iphlen + PIM_MINLEN); 3302163953Srrs 3303163953Srrs encap_ip->ip_sum = 0; 3304163953Srrs encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2); 3305163953Srrs 3306163953Srrs /* restore mbuf to point back to the outer IP header */ 3307163953Srrs m->m_data -= (iphlen + PIM_MINLEN); 3308163953Srrs m->m_len += (iphlen + PIM_MINLEN); 3309163953Srrs } 3310163953Srrs 3311163953Srrs /* 3312163953Srrs * Decapsulate the inner IP packet and loopback to forward it 3313163953Srrs * as a normal multicast packet. Also, make a copy of the 3314163953Srrs * outer_iphdr + pimhdr + reghdr + encap_iphdr 3315163953Srrs * to pass to the daemon later, so it can take the appropriate 3316163953Srrs * actions (e.g., send back PIM_REGISTER_STOP). 3317163953Srrs * XXX: here m->m_data points to the outer IP header. 3318163953Srrs */ 3319172090Srrs mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN); 3320163953Srrs if (mcp == NULL) { 3321163953Srrs log(LOG_ERR, 3322163953Srrs "pim_input: pim register: could not copy register head\n"); 3323163953Srrs m_freem(m); 3324163953Srrs return; 3325163953Srrs } 3326163953Srrs 3327163953Srrs /* Keep statistics */ 3328163953Srrs /* XXX: registers_bytes include only the encap. mcast pkt */ 3329163953Srrs pimstat.pims_rcv_registers_msgs++; 3330163953Srrs pimstat.pims_rcv_registers_bytes += ntohs(encap_ip->ip_len); 3331163953Srrs 3332163953Srrs /* 3333163953Srrs * forward the inner ip packet; point m_data at the inner ip. 3334163953Srrs */ 3335163953Srrs m_adj(m, iphlen + PIM_MINLEN); 3336163953Srrs 3337163953Srrs if (mrtdebug & DEBUG_PIM) { 3338163953Srrs log(LOG_DEBUG, 3339163953Srrs "pim_input: forwarding decapsulated register: " 3340163953Srrs "src %lx, dst %lx, vif %d\n", 3341163953Srrs (u_long)ntohl(encap_ip->ip_src.s_addr), 3342163953Srrs (u_long)ntohl(encap_ip->ip_dst.s_addr), 3343172090Srrs reg_vif_num); 3344163953Srrs } 3345163953Srrs /* NB: vifp was collected above; can it change on us? */ 3346163953Srrs if_simloop(vifp, m, dst.sin_family, 0); 3347163953Srrs 3348163953Srrs /* prepare the register head to send to the mrouting daemon */ 3349163953Srrs m = mcp; 3350163953Srrs } 3351163953Srrs 3352163953Srrspim_input_to_daemon: 3353163953Srrs /* 3354163953Srrs * Pass the PIM message up to the daemon; if it is a Register message, 3355163953Srrs * pass the 'head' only up to the daemon. This includes the 3356163953Srrs * outer IP header, PIM header, PIM-Register header and the 3357163953Srrs * inner IP header. 3358169208Srrs * XXX: the outer IP header pkt size of a Register is not adjust to 3359163953Srrs * reflect the fact that the inner multicast data is truncated. 3360163953Srrs */ 3361163953Srrs rip_input(m, iphlen); 3362169208Srrs 3363163953Srrs return; 3364163953Srrs} 3365163953Srrs#endif /* PIM */ 3366163953Srrs 3367163953Srrsstatic int 3368163953Srrsip_mroute_modevent(module_t mod, int type, void *unused) 3369163953Srrs{ 3370163953Srrs switch (type) { 3371163953Srrs case MOD_LOAD: 3372163953Srrs mtx_init(&mrouter_mtx, "mrouter initialization", NULL, MTX_DEF); 3373163953Srrs MFC_LOCK_INIT(); 3374163953Srrs VIF_LOCK_INIT(); 3375163953Srrs ip_mrouter_reset(); 3376163953Srrs ip_mcast_src = X_ip_mcast_src; 3377163953Srrs ip_mforward = X_ip_mforward; 3378163953Srrs ip_mrouter_done = X_ip_mrouter_done; 3379163953Srrs ip_mrouter_get = X_ip_mrouter_get; 3380163953Srrs ip_mrouter_set = X_ip_mrouter_set; 3381163953Srrs ip_rsvp_force_done = X_ip_rsvp_force_done; 3382163953Srrs ip_rsvp_vif = X_ip_rsvp_vif; 3383163953Srrs legal_vif_num = X_legal_vif_num; 3384163953Srrs mrt_ioctl = X_mrt_ioctl; 3385163953Srrs rsvp_input_p = X_rsvp_input; 3386163953Srrs break; 3387163953Srrs 3388163953Srrs case MOD_UNLOAD: 3389163953Srrs /* 3390163953Srrs * Typically module unload happens after the user-level 3391163953Srrs * process has shutdown the kernel services (the check 3392163953Srrs * below insures someone can't just yank the module out 3393163953Srrs * from under a running process). But if the module is 3394163953Srrs * just loaded and then unloaded w/o starting up a user 3395163953Srrs * process we still need to cleanup. 3396163953Srrs */ 3397163953Srrs if (ip_mrouter) 3398163953Srrs return EINVAL; 3399163953Srrs 3400163953Srrs X_ip_mrouter_done(); 3401166675Srrs ip_mcast_src = NULL; 3402166675Srrs ip_mforward = NULL; 3403171440Srrs ip_mrouter_done = NULL; 3404163953Srrs ip_mrouter_get = NULL; 3405163953Srrs ip_mrouter_set = NULL; 3406163953Srrs ip_rsvp_force_done = NULL; 3407163953Srrs ip_rsvp_vif = NULL; 3408163953Srrs legal_vif_num = NULL; 3409163953Srrs mrt_ioctl = NULL; 3410163953Srrs rsvp_input_p = NULL; 3411163953Srrs VIF_LOCK_DESTROY(); 3412163953Srrs MFC_LOCK_DESTROY(); 3413172156Srrs mtx_destroy(&mrouter_mtx); 3414163953Srrs break; 3415163953Srrs default: 3416163953Srrs return EOPNOTSUPP; 3417163953Srrs } 3418163953Srrs return 0; 3419163953Srrs} 3420163953Srrs 3421164139Srrsstatic moduledata_t ip_mroutemod = { 3422163953Srrs "ip_mroute", 3423163953Srrs ip_mroute_modevent, 3424163953Srrs 0 3425163953Srrs}; 3426163953SrrsDECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY); 3427163953Srrs