netisr.c revision 134443
1111888Sjlemon/*- 2111888Sjlemon * Copyright (c) 2001,2002,2003 Jonathan Lemon <jlemon@FreeBSD.org> 3103781Sjake * Copyright (c) 1997, Stefan Esser <se@freebsd.org> 4103781Sjake * All rights reserved. 5103781Sjake * 6103781Sjake * Redistribution and use in source and binary forms, with or without 7103781Sjake * modification, are permitted provided that the following conditions 8103781Sjake * are met: 9103781Sjake * 1. Redistributions of source code must retain the above copyright 10111888Sjlemon * notice, this list of conditions and the following disclaimer. 11103781Sjake * 2. Redistributions in binary form must reproduce the above copyright 12103781Sjake * notice, this list of conditions and the following disclaimer in the 13103781Sjake * documentation and/or other materials provided with the distribution. 14103781Sjake * 15111888Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16111888Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17111888Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18111888Sjlemon * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19111888Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20111888Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21111888Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22111888Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23111888Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24111888Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25111888Sjlemon * SUCH DAMAGE. 26103781Sjake * 27103781Sjake * $FreeBSD: head/sys/net/netisr.c 134443 2004-08-28 15:11:13Z rwatson $ 28103781Sjake */ 29103781Sjake 30134443Srwatson#include "opt_net.h" 31134443Srwatson 32103781Sjake#include <sys/param.h> 33111888Sjlemon#include <sys/bus.h> 34111888Sjlemon#include <sys/rtprio.h> 35103781Sjake#include <sys/systm.h> 36103781Sjake#include <sys/interrupt.h> 37103781Sjake#include <sys/kernel.h> 38111888Sjlemon#include <sys/kthread.h> 39111888Sjlemon#include <sys/lock.h> 40111888Sjlemon#include <sys/malloc.h> 41111888Sjlemon#include <sys/proc.h> 42111888Sjlemon#include <sys/random.h> 43111888Sjlemon#include <sys/resourcevar.h> 44111888Sjlemon#include <sys/sysctl.h> 45111888Sjlemon#include <sys/unistd.h> 46111888Sjlemon#include <machine/atomic.h> 47111888Sjlemon#include <machine/cpu.h> 48111888Sjlemon#include <machine/stdarg.h> 49103781Sjake 50111888Sjlemon#include <sys/mbuf.h> 51111888Sjlemon#include <sys/socket.h> 52111888Sjlemon 53111888Sjlemon#include <net/if.h> 54111888Sjlemon#include <net/if_types.h> 55111888Sjlemon#include <net/if_var.h> 56103781Sjake#include <net/netisr.h> 57103781Sjake 58132368Srwatson/* 59132368Srwatson * debug_mpsafenet controls network subsystem-wide use of the Giant lock, 60134443Srwatson * from system calls down to interrupt handlers. It can be changed only via 61134443Srwatson * a tunable at boot, not at run-time, due to the complexity of unwinding. 62134443Srwatson * The compiled default is set via a kernel option; right now, the default 63134443Srwatson * unless otherwise specified is to run the network stack without Giant. 64122152Ssam */ 65134443Srwatson#ifdef NET_WITH_GIANT 66122152Ssamint debug_mpsafenet = 0; 67134443Srwatson#else 68134443Srwatsonint debug_mpsafenet = 1; 69134443Srwatson#endif 70134443Srwatsonint debug_mpsafenet_toolatetotwiddle = 0; 71134443Srwatson 72122152SsamTUNABLE_INT("debug.mpsafenet", &debug_mpsafenet); 73122152SsamSYSCTL_INT(_debug, OID_AUTO, mpsafenet, CTLFLAG_RD, &debug_mpsafenet, 0, 74122152Ssam "Enable/disable MPSAFE network support"); 75122152Ssam 76111888Sjlemonvolatile unsigned int netisr; /* scheduling bits for network */ 77103781Sjake 78111888Sjlemonstruct netisr { 79111888Sjlemon netisr_t *ni_handler; 80111888Sjlemon struct ifqueue *ni_queue; 81122320Ssam int ni_flags; 82111888Sjlemon} netisrs[32]; 83103781Sjake 84111888Sjlemonstatic void *net_ih; 85111888Sjlemon 86134443Srwatson/* 87134443Srwatson * Note all network code is currently capable of running MPSAFE; however, 88134443Srwatson * most of it is. Since those sections that are not are generally optional 89134443Srwatson * components not shipped with default kernels, we provide a basic way to 90134443Srwatson * determine whether MPSAFE operation is permitted: based on a default of 91134443Srwatson * yes, we permit non-MPSAFE components to use a registration call to 92134443Srwatson * identify that they require Giant. If the system is early in the boot 93134443Srwatson * process still, then we change the debug_mpsafenet setting to choose a 94134443Srwatson * non-MPSAFE execution mode (degraded). If it's too late for that (since 95134443Srwatson * the setting cannot be changed at run time), we generate a console warning 96134443Srwatson * that the configuration may be unsafe. 97134443Srwatson */ 98134443Srwatsonstatic int mpsafe_warn_count; 99134443Srwatson 100134443Srwatson/* 101134443Srwatson * Function call implementing registration of a non-MPSAFE network component. 102134443Srwatson */ 103103781Sjakevoid 104134443Srwatsonnet_warn_not_mpsafe(const char *component) 105134443Srwatson{ 106134443Srwatson 107134443Srwatson /* 108134443Srwatson * If we're running with Giant over the network stack, there is no 109134443Srwatson * problem. 110134443Srwatson */ 111134443Srwatson if (!debug_mpsafenet) 112134443Srwatson return; 113134443Srwatson 114134443Srwatson /* 115134443Srwatson * If it's not too late to change the MPSAFE setting for the network 116134443Srwatson * stack, do so now. This effectively suppresses warnings by 117134443Srwatson * components registering later. 118134443Srwatson */ 119134443Srwatson if (!debug_mpsafenet_toolatetotwiddle) { 120134443Srwatson debug_mpsafenet = 0; 121134443Srwatson printf("WARNING: debug.mpsafenet forced to = as %s requires " 122134443Srwatson "Giant\n", component); 123134443Srwatson return; 124134443Srwatson } 125134443Srwatson 126134443Srwatson /* 127134443Srwatson * We must run without Giant, so generate a console warning with some 128134443Srwatson * information with what to do about it. The system may be operating 129134443Srwatson * unsafely, however. 130134443Srwatson */ 131134443Srwatson printf("WARNING: Network stack Giant-free, but %s requires Giant.\n", 132134443Srwatson component); 133134443Srwatson if (mpsafe_warn_count == 0) 134134443Srwatson printf(" Consider adding 'options NET_WITH_GIANT' or " 135134443Srwatson "setting debug.mpsafenet=0\n"); 136134443Srwatson mpsafe_warn_count++; 137134443Srwatson} 138134443Srwatson 139134443Srwatson/* 140134443Srwatson * This sysinit is run after any pre-loaded or compiled-in components have 141134443Srwatson * announced that they require Giant, but before any modules loaded at 142134443Srwatson * run-time. 143134443Srwatson */ 144134443Srwatsonstatic void 145134443Srwatsonnet_mpsafe_toolate(void *arg) 146134443Srwatson{ 147134443Srwatson 148134443Srwatson debug_mpsafenet_toolatetotwiddle = 1; 149134443Srwatson 150134443Srwatson if (!debug_mpsafenet) 151134443Srwatson printf("WARNING: MPSAFE network stack disabled, expect " 152134443Srwatson "reduced performance.\n"); 153134443Srwatson} 154134443Srwatson 155134443SrwatsonSYSINIT(net_mpsafe_toolate, SI_SUB_SETTINGS, SI_ORDER_ANY, net_mpsafe_toolate, 156134443Srwatson NULL); 157134443Srwatson 158134443Srwatsonvoid 159103781Sjakelegacy_setsoftnet(void) 160103781Sjake{ 161103781Sjake swi_sched(net_ih, 0); 162103781Sjake} 163103781Sjake 164111888Sjlemonvoid 165122320Ssamnetisr_register(int num, netisr_t *handler, struct ifqueue *inq, int flags) 166103781Sjake{ 167103781Sjake 168111888Sjlemon KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 169111888Sjlemon ("bad isr %d", num)); 170111888Sjlemon netisrs[num].ni_handler = handler; 171111888Sjlemon netisrs[num].ni_queue = inq; 172122320Ssam if ((flags & NETISR_MPSAFE) && !debug_mpsafenet) 173122320Ssam flags &= ~NETISR_MPSAFE; 174122320Ssam netisrs[num].ni_flags = flags; 175111888Sjlemon} 176111888Sjlemon 177111888Sjlemonvoid 178111888Sjlemonnetisr_unregister(int num) 179111888Sjlemon{ 180111888Sjlemon struct netisr *ni; 181111888Sjlemon 182111888Sjlemon KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 183111888Sjlemon ("bad isr %d", num)); 184111888Sjlemon ni = &netisrs[num]; 185111888Sjlemon ni->ni_handler = NULL; 186122320Ssam if (ni->ni_queue != NULL) 187111888Sjlemon IF_DRAIN(ni->ni_queue); 188103781Sjake} 189103781Sjake 190111888Sjlemonstruct isrstat { 191111888Sjlemon int isrs_count; /* dispatch count */ 192122320Ssam int isrs_directed; /* ...directly dispatched */ 193111888Sjlemon int isrs_deferred; /* ...queued instead */ 194111888Sjlemon int isrs_queued; /* intentionally queueued */ 195122320Ssam int isrs_drop; /* dropped 'cuz no handler */ 196111888Sjlemon int isrs_swi_count; /* swi_net handlers called */ 197111888Sjlemon}; 198111888Sjlemonstatic struct isrstat isrstat; 199111888Sjlemon 200111888SjlemonSYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr counters"); 201111888Sjlemon 202120653Srwatsonstatic int netisr_enable = 0; 203111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, enable, CTLFLAG_RW, 204111888Sjlemon &netisr_enable, 0, "enable direct dispatch"); 205120656SrwatsonTUNABLE_INT("net.isr.enable", &netisr_enable); 206111888Sjlemon 207111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, count, CTLFLAG_RD, 208111888Sjlemon &isrstat.isrs_count, 0, ""); 209111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, directed, CTLFLAG_RD, 210111888Sjlemon &isrstat.isrs_directed, 0, ""); 211111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, deferred, CTLFLAG_RD, 212111888Sjlemon &isrstat.isrs_deferred, 0, ""); 213111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, queued, CTLFLAG_RD, 214111888Sjlemon &isrstat.isrs_queued, 0, ""); 215122320SsamSYSCTL_INT(_net_isr, OID_AUTO, drop, CTLFLAG_RD, 216122320Ssam &isrstat.isrs_drop, 0, ""); 217111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, swi_count, CTLFLAG_RD, 218111888Sjlemon &isrstat.isrs_swi_count, 0, ""); 219111888Sjlemon 220111888Sjlemon/* 221120704Srwatson * Process all packets currently present in a netisr queue. Used to 222120704Srwatson * drain an existing set of packets waiting for processing when we 223120704Srwatson * begin direct dispatch, to avoid processing packets out of order. 224120704Srwatson */ 225120704Srwatsonstatic void 226120704Srwatsonnetisr_processqueue(struct netisr *ni) 227120704Srwatson{ 228120704Srwatson struct mbuf *m; 229120704Srwatson 230120704Srwatson for (;;) { 231120704Srwatson IF_DEQUEUE(ni->ni_queue, m); 232120704Srwatson if (m == NULL) 233120704Srwatson break; 234120704Srwatson ni->ni_handler(m); 235120704Srwatson } 236120704Srwatson} 237120704Srwatson 238120704Srwatson/* 239111888Sjlemon * Call the netisr directly instead of queueing the packet, if possible. 240111888Sjlemon */ 241111888Sjlemonvoid 242111888Sjlemonnetisr_dispatch(int num, struct mbuf *m) 243103781Sjake{ 244111888Sjlemon struct netisr *ni; 245103781Sjake 246122320Ssam isrstat.isrs_count++; /* XXX redundant */ 247111888Sjlemon KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 248111888Sjlemon ("bad isr %d", num)); 249111888Sjlemon ni = &netisrs[num]; 250112011Sjlemon if (ni->ni_queue == NULL) { 251122320Ssam isrstat.isrs_drop++; 252112011Sjlemon m_freem(m); 253112011Sjlemon return; 254112011Sjlemon } 255122320Ssam /* 256122320Ssam * Do direct dispatch only for MPSAFE netisrs (and 257122320Ssam * only when enabled). Note that when a netisr is 258122320Ssam * marked MPSAFE we permit multiple concurrent instances 259122320Ssam * to run. We guarantee only the order in which 260122320Ssam * packets are processed for each "dispatch point" in 261122320Ssam * the system (i.e. call to netisr_dispatch or 262122320Ssam * netisr_queue). This insures ordering of packets 263122320Ssam * from an interface but does not guarantee ordering 264122320Ssam * between multiple places in the system (e.g. IP 265122320Ssam * dispatched from interfaces vs. IP queued from IPSec). 266122320Ssam */ 267122320Ssam if (netisr_enable && (ni->ni_flags & NETISR_MPSAFE)) { 268111888Sjlemon isrstat.isrs_directed++; 269111888Sjlemon /* 270122320Ssam * NB: We used to drain the queue before handling 271122320Ssam * the packet but now do not. Doing so here will 272122320Ssam * not preserve ordering so instead we fallback to 273122320Ssam * guaranteeing order only from dispatch points 274122320Ssam * in the system (see above). 275111888Sjlemon */ 276111888Sjlemon ni->ni_handler(m); 277111888Sjlemon } else { 278111888Sjlemon isrstat.isrs_deferred++; 279111888Sjlemon if (IF_HANDOFF(ni->ni_queue, m, NULL)) 280111888Sjlemon schednetisr(num); 281103781Sjake } 282103781Sjake} 283103781Sjake 284111888Sjlemon/* 285111888Sjlemon * Same as above, but always queue. 286111888Sjlemon * This is either used in places where we are not confident that 287111888Sjlemon * direct dispatch is possible, or where queueing is required. 288134391Sandre * It returns (0) on success and ERRNO on failure. On failure the 289134391Sandre * mbuf has been free'd. 290111888Sjlemon */ 291111888Sjlemonint 292111888Sjlemonnetisr_queue(int num, struct mbuf *m) 293111888Sjlemon{ 294111888Sjlemon struct netisr *ni; 295111888Sjlemon 296111888Sjlemon KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 297111888Sjlemon ("bad isr %d", num)); 298111888Sjlemon ni = &netisrs[num]; 299112011Sjlemon if (ni->ni_queue == NULL) { 300122320Ssam isrstat.isrs_drop++; 301112011Sjlemon m_freem(m); 302134391Sandre return (ENXIO); 303112011Sjlemon } 304111888Sjlemon isrstat.isrs_queued++; 305111888Sjlemon if (!IF_HANDOFF(ni->ni_queue, m, NULL)) 306134391Sandre return (ENOBUFS); /* IF_HANDOFF has free'd the mbuf */ 307111888Sjlemon schednetisr(num); 308134391Sandre return (0); 309111888Sjlemon} 310103781Sjake 311103781Sjakestatic void 312103781Sjakeswi_net(void *dummy) 313103781Sjake{ 314111888Sjlemon struct netisr *ni; 315103781Sjake u_int bits; 316103781Sjake int i; 317103781Sjake#ifdef DEVICE_POLLING 318111888Sjlemon const int polling = 1; 319111888Sjlemon#else 320111888Sjlemon const int polling = 0; 321103781Sjake#endif 322111888Sjlemon 323111888Sjlemon do { 324111888Sjlemon bits = atomic_readandclear_int(&netisr); 325111888Sjlemon if (bits == 0) 326111888Sjlemon break; 327111888Sjlemon while ((i = ffs(bits)) != 0) { 328111888Sjlemon isrstat.isrs_swi_count++; 329111888Sjlemon i--; 330111888Sjlemon bits &= ~(1 << i); 331111888Sjlemon ni = &netisrs[i]; 332111888Sjlemon if (ni->ni_handler == NULL) { 333111888Sjlemon printf("swi_net: unregistered isr %d.\n", i); 334111888Sjlemon continue; 335111888Sjlemon } 336122320Ssam if ((ni->ni_flags & NETISR_MPSAFE) == 0) { 337122320Ssam mtx_lock(&Giant); 338122320Ssam if (ni->ni_queue == NULL) 339122320Ssam ni->ni_handler(NULL); 340122320Ssam else 341122320Ssam netisr_processqueue(ni); 342122320Ssam mtx_unlock(&Giant); 343122320Ssam } else { 344122320Ssam if (ni->ni_queue == NULL) 345122320Ssam ni->ni_handler(NULL); 346122320Ssam else 347122320Ssam netisr_processqueue(ni); 348122320Ssam } 349111888Sjlemon } 350111888Sjlemon } while (polling); 351103781Sjake} 352103781Sjake 353103781Sjakestatic void 354103781Sjakestart_netisr(void *dummy) 355103781Sjake{ 356103781Sjake 357122320Ssam if (swi_add(NULL, "net", swi_net, NULL, SWI_NET, INTR_MPSAFE, &net_ih)) 358103781Sjake panic("start_netisr"); 359103781Sjake} 360103781SjakeSYSINIT(start_netisr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_netisr, NULL) 361