netisr.c revision 150968
1139804Simp/*- 2185435Sbz * Copyright (c) 2001,2002,2003 Jonathan Lemon <jlemon@FreeBSD.org> 3185435Sbz * Copyright (c) 1997, Stefan Esser <se@freebsd.org> 4191673Sjamie * All rights reserved. 5185435Sbz * 6190466Sjamie * Redistribution and use in source and binary forms, with or without 7185404Sbz * modification, are permitted provided that the following conditions 8185404Sbz * are met: 9185404Sbz * 1. Redistributions of source code must retain the above copyright 10185404Sbz * notice, this list of conditions and the following disclaimer. 11185404Sbz * 2. Redistributions in binary form must reproduce the above copyright 12185404Sbz * notice, this list of conditions and the following disclaimer in the 13185404Sbz * documentation and/or other materials provided with the distribution. 14185404Sbz * 15185404Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16185404Sbz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17185404Sbz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18185404Sbz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19185404Sbz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20185404Sbz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21185404Sbz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22185404Sbz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23185404Sbz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24185404Sbz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25185404Sbz * SUCH DAMAGE. 26185404Sbz * 2746197Sphk * $FreeBSD: head/sys/net/netisr.c 150968 2005-10-05 10:09:17Z glebius $ 2846155Sphk */ 29116182Sobrien 30116182Sobrien#include "opt_device_polling.h" 31116182Sobrien#include "opt_net.h" 32193066Sjamie 33185435Sbz#include <sys/param.h> 34185435Sbz#include <sys/bus.h> 35185435Sbz#include <sys/rtprio.h> 36131177Spjd#include <sys/systm.h> 3746155Sphk#include <sys/interrupt.h> 3846155Sphk#include <sys/kernel.h> 3946155Sphk#include <sys/kthread.h> 4046155Sphk#include <sys/lock.h> 4146155Sphk#include <sys/malloc.h> 4246155Sphk#include <sys/proc.h> 4346155Sphk#include <sys/random.h> 44192895Sjamie#include <sys/resourcevar.h> 45164032Srwatson#include <sys/sysctl.h> 4646155Sphk#include <sys/unistd.h> 47124882Srwatson#include <machine/atomic.h> 48177785Skib#include <machine/cpu.h> 4946155Sphk#include <machine/stdarg.h> 5087275Srwatson 5187275Srwatson#include <sys/mbuf.h> 52168401Spjd#include <sys/socket.h> 53193066Sjamie 54113275Smike#include <net/if.h> 55147185Spjd#include <net/if_types.h> 56113275Smike#include <net/if_var.h> 5746155Sphk#include <net/netisr.h> 58113275Smike 5957163Srwatson/* 60113275Smike * debug_mpsafenet controls network subsystem-wide use of the Giant lock, 61196019Srwatson * from system calls down to interrupt handlers. It can be changed only via 6246155Sphk * a tunable at boot, not at run-time, due to the complexity of unwinding. 63196019Srwatson * The compiled default is set via a kernel option; right now, the default 64196019Srwatson * unless otherwise specified is to run the network stack without Giant. 6546155Sphk */ 66196019Srwatson#ifdef NET_WITH_GIANT 67185435Sbzint debug_mpsafenet = 0; 68185435Sbz#else 69185435Sbzint debug_mpsafenet = 1; 70185435Sbz#endif 71185435Sbzint debug_mpsafenet_toolatetotwiddle = 0; 72185435Sbz 7346155SphkTUNABLE_INT("debug.mpsafenet", &debug_mpsafenet); 74163606SrwatsonSYSCTL_INT(_debug, OID_AUTO, mpsafenet, CTLFLAG_RD, &debug_mpsafenet, 0, 75163606Srwatson "Enable/disable MPSAFE network support"); 76195944Sjamie 77195944Sjamievolatile unsigned int netisr; /* scheduling bits for network */ 7846155Sphk 7946155Sphkstruct netisr { 80202468Sbz netisr_t *ni_handler; 81202468Sbz struct ifqueue *ni_queue; 82202468Sbz int ni_flags; 83202468Sbz} netisrs[32]; 84202468Sbz 85202468Sbzstatic void *net_ih; 86202468Sbz 87202468Sbz/* 88202468Sbz * Not all network code is currently capable of running MPSAFE; however, 89202468Sbz * most of it is. Since those sections that are not are generally optional 90202468Sbz * components not shipped with default kernels, we provide a basic way to 91202468Sbz * determine whether MPSAFE operation is permitted: based on a default of 92202468Sbz * yes, we permit non-MPSAFE components to use a registration call to 93202468Sbz * identify that they require Giant. If the system is early in the boot 94202468Sbz * process still, then we change the debug_mpsafenet setting to choose a 95192895Sjamie * non-MPSAFE execution mode (degraded). If it's too late for that (since 96192895Sjamie * the setting cannot be changed at run time), we generate a console warning 97192895Sjamie * that the configuration may be unsafe. 98192895Sjamie */ 99192895Sjamiestatic int mpsafe_warn_count; 100192895Sjamie 101192895Sjamie/* 102192895Sjamie * Function call implementing registration of a non-MPSAFE network component. 103194762Sjamie */ 104195944Sjamievoid 105201145Santoinenet_warn_not_mpsafe(const char *component) 106196176Sbz{ 107202468Sbz 108196176Sbz /* 109202468Sbz * If we're running with Giant over the network stack, there is no 110196176Sbz * problem. 111192895Sjamie */ 112192895Sjamie if (!debug_mpsafenet) 113192895Sjamie return; 11457163Srwatson 115192895Sjamie /* 116168401Spjd * If it's not too late to change the MPSAFE setting for the network 117191673Sjamie * stack, do so now. This effectively suppresses warnings by 118191673Sjamie * components registering later. 119179881Sdelphij */ 120113275Smike if (!debug_mpsafenet_toolatetotwiddle) { 121191673Sjamie debug_mpsafenet = 0; 122190466Sjamie printf("WARNING: debug.mpsafenet forced to 0 as %s requires " 123191673Sjamie "Giant\n", component); 124192895Sjamie return; 125192895Sjamie } 126185435Sbz 127190466Sjamie /* 128192895Sjamie * We must run without Giant, so generate a console warning with some 129185435Sbz * information with what to do about it. The system may be operating 130185435Sbz * unsafely, however. 131190466Sjamie */ 132192895Sjamie printf("WARNING: Network stack Giant-free, but %s requires Giant.\n", 133185435Sbz component); 134113275Smike if (mpsafe_warn_count == 0) 135191673Sjamie printf(" Consider adding 'options NET_WITH_GIANT' or " 136191673Sjamie "setting debug.mpsafenet=0\n"); 137191673Sjamie mpsafe_warn_count++; 138191673Sjamie} 139191673Sjamie 140191673Sjamie/* 141113275Smike * This sysinit is run after any pre-loaded or compiled-in components have 142192895Sjamie * announced that they require Giant, but before any modules loaded at 143192895Sjamie * run-time. 144192895Sjamie */ 145192895Sjamiestatic void 146192895Sjamienet_mpsafe_toolate(void *arg) 147202468Sbz{ 148202468Sbz 149202468Sbz debug_mpsafenet_toolatetotwiddle = 1; 150202468Sbz 151202468Sbz if (!debug_mpsafenet) 152202468Sbz printf("WARNING: MPSAFE network stack disabled, expect " 153192895Sjamie "reduced performance.\n"); 154192895Sjamie} 155192895Sjamie 156192895SjamieSYSINIT(net_mpsafe_toolate, SI_SUB_SETTINGS, SI_ORDER_ANY, net_mpsafe_toolate, 157202468Sbz NULL); 158202468Sbz 159202468Sbzvoid 160202468Sbzlegacy_setsoftnet(void) 161202468Sbz{ 162202468Sbz swi_sched(net_ih, 0); 163195870Sjamie} 164195870Sjamie 165195870Sjamievoid 166195870Sjamienetisr_register(int num, netisr_t *handler, struct ifqueue *inq, int flags) 167195870Sjamie{ 168195870Sjamie 169195870Sjamie KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 170195870Sjamie ("bad isr %d", num)); 171195870Sjamie netisrs[num].ni_handler = handler; 172195870Sjamie netisrs[num].ni_queue = inq; 173195870Sjamie if ((flags & NETISR_MPSAFE) && !debug_mpsafenet) 174192895Sjamie flags &= ~NETISR_MPSAFE; 175195870Sjamie netisrs[num].ni_flags = flags; 176192895Sjamie} 177192895Sjamie 178195870Sjamievoid 179192895Sjamienetisr_unregister(int num) 180192895Sjamie{ 181192895Sjamie struct netisr *ni; 182192895Sjamie 183192895Sjamie KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 184192895Sjamie ("bad isr %d", num)); 185192895Sjamie ni = &netisrs[num]; 186192895Sjamie ni->ni_handler = NULL; 187192895Sjamie if (ni->ni_queue != NULL) 188192895Sjamie IF_DRAIN(ni->ni_queue); 189192895Sjamie ni->ni_queue = NULL; 190192895Sjamie} 191192895Sjamie 192192895Sjamiestruct isrstat { 193192895Sjamie int isrs_count; /* dispatch count */ 194192895Sjamie int isrs_directed; /* ...directly dispatched */ 195192895Sjamie int isrs_deferred; /* ...queued instead */ 196192895Sjamie int isrs_queued; /* intentionally queueued */ 197192895Sjamie int isrs_drop; /* dropped 'cuz no handler */ 198192895Sjamie int isrs_swi_count; /* swi_net handlers called */ 199192895Sjamie}; 200192895Sjamiestatic struct isrstat isrstat; 201192895Sjamie 202196002SjamieSYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr counters"); 203196002Sjamie 204192895Sjamiestatic int netisr_direct = 0; 205196002SjamieSYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW, 206192895Sjamie &netisr_direct, 0, "enable direct dispatch"); 207193865SjamieTUNABLE_INT("net.isr.direct", &netisr_direct); 208192895Sjamie 209192895SjamieSYSCTL_INT(_net_isr, OID_AUTO, count, CTLFLAG_RD, 210192895Sjamie &isrstat.isrs_count, 0, ""); 211185435SbzSYSCTL_INT(_net_isr, OID_AUTO, directed, CTLFLAG_RD, 212185435Sbz &isrstat.isrs_directed, 0, ""); 213185435SbzSYSCTL_INT(_net_isr, OID_AUTO, deferred, CTLFLAG_RD, 214185435Sbz &isrstat.isrs_deferred, 0, ""); 215185435SbzSYSCTL_INT(_net_isr, OID_AUTO, queued, CTLFLAG_RD, 216185435Sbz &isrstat.isrs_queued, 0, ""); 217185435SbzSYSCTL_INT(_net_isr, OID_AUTO, drop, CTLFLAG_RD, 218185435Sbz &isrstat.isrs_drop, 0, ""); 219185435SbzSYSCTL_INT(_net_isr, OID_AUTO, swi_count, CTLFLAG_RD, 220185435Sbz &isrstat.isrs_swi_count, 0, ""); 221185435Sbz 222185435Sbz/* 223185435Sbz * Process all packets currently present in a netisr queue. Used to 224185435Sbz * drain an existing set of packets waiting for processing when we 225185435Sbz * begin direct dispatch, to avoid processing packets out of order. 226185435Sbz */ 227185435Sbzstatic void 228185435Sbznetisr_processqueue(struct netisr *ni) 229185435Sbz{ 230185435Sbz struct mbuf *m; 231185435Sbz 232185435Sbz for (;;) { 233185435Sbz IF_DEQUEUE(ni->ni_queue, m); 234185435Sbz if (m == NULL) 235185435Sbz break; 236185435Sbz ni->ni_handler(m); 237185435Sbz } 238185435Sbz} 239185435Sbz 240185435Sbz/* 241185435Sbz * Call the netisr directly instead of queueing the packet, if possible. 242185435Sbz */ 243185435Sbzvoid 244185435Sbznetisr_dispatch(int num, struct mbuf *m) 245185435Sbz{ 246185435Sbz struct netisr *ni; 247185435Sbz 248190466Sjamie isrstat.isrs_count++; /* XXX redundant */ 249185435Sbz KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 250185435Sbz ("bad isr %d", num)); 251185435Sbz ni = &netisrs[num]; 252185435Sbz if (ni->ni_queue == NULL) { 253185435Sbz isrstat.isrs_drop++; 254185435Sbz m_freem(m); 255185435Sbz return; 256185435Sbz } 257185435Sbz /* 258191673Sjamie * Do direct dispatch only for MPSAFE netisrs (and 259191673Sjamie * only when enabled). Note that when a netisr is 260191673Sjamie * marked MPSAFE we permit multiple concurrent instances 261191673Sjamie * to run. We guarantee only the order in which 262191673Sjamie * packets are processed for each "dispatch point" in 263191673Sjamie * the system (i.e. call to netisr_dispatch or 264191673Sjamie * netisr_queue). This insures ordering of packets 265185435Sbz * from an interface but does not guarantee ordering 266191673Sjamie * between multiple places in the system (e.g. IP 267191673Sjamie * dispatched from interfaces vs. IP queued from IPSec). 268192895Sjamie */ 269185435Sbz if (netisr_direct && (ni->ni_flags & NETISR_MPSAFE)) { 270191673Sjamie isrstat.isrs_directed++; 271191673Sjamie /* 272191673Sjamie * NB: We used to drain the queue before handling 273185435Sbz * the packet but now do not. Doing so here will 274191673Sjamie * not preserve ordering so instead we fallback to 275191673Sjamie * guaranteeing order only from dispatch points 276191673Sjamie * in the system (see above). 277191673Sjamie */ 278185435Sbz ni->ni_handler(m); 279192895Sjamie } else { 280192895Sjamie isrstat.isrs_deferred++; 281191673Sjamie if (IF_HANDOFF(ni->ni_queue, m, NULL)) 282191673Sjamie schednetisr(num); 283191673Sjamie } 284192895Sjamie} 285192895Sjamie 286192895Sjamie/* 287192895Sjamie * Same as above, but always queue. 288191673Sjamie * This is either used in places where we are not confident that 289191673Sjamie * direct dispatch is possible, or where queueing is required. 290191673Sjamie * It returns (0) on success and ERRNO on failure. On failure the 291191673Sjamie * mbuf has been free'd. 292185435Sbz */ 293191673Sjamieint 294191673Sjamienetisr_queue(int num, struct mbuf *m) 295185435Sbz{ 296191673Sjamie struct netisr *ni; 297185435Sbz 298191673Sjamie KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 299191673Sjamie ("bad isr %d", num)); 300191673Sjamie ni = &netisrs[num]; 301191673Sjamie if (ni->ni_queue == NULL) { 302191673Sjamie isrstat.isrs_drop++; 303192895Sjamie m_freem(m); 304192895Sjamie return (ENXIO); 305192895Sjamie } 306192895Sjamie isrstat.isrs_queued++; 307192895Sjamie if (!IF_HANDOFF(ni->ni_queue, m, NULL)) 308192895Sjamie return (ENOBUFS); /* IF_HANDOFF has free'd the mbuf */ 309192895Sjamie schednetisr(num); 310192895Sjamie return (0); 311192895Sjamie} 312192895Sjamie 313192895Sjamiestatic void 314192895Sjamieswi_net(void *dummy) 315193865Sjamie{ 316193865Sjamie struct netisr *ni; 317193865Sjamie u_int bits; 318193865Sjamie int i; 319193865Sjamie#ifdef DEVICE_POLLING 320193865Sjamie const int polling = 1; 321193865Sjamie#else 322193865Sjamie const int polling = 0; 323193865Sjamie#endif 324192895Sjamie 325192895Sjamie do { 326185435Sbz bits = atomic_readandclear_int(&netisr); 327193865Sjamie if (bits == 0) 328192895Sjamie break; 329192895Sjamie while ((i = ffs(bits)) != 0) { 330192895Sjamie isrstat.isrs_swi_count++; 331192895Sjamie i--; 332192895Sjamie bits &= ~(1 << i); 333192895Sjamie ni = &netisrs[i]; 334192895Sjamie if (ni->ni_handler == NULL) { 335192895Sjamie printf("swi_net: unregistered isr %d.\n", i); 336192895Sjamie continue; 337192895Sjamie } 338192895Sjamie if ((ni->ni_flags & NETISR_MPSAFE) == 0) { 339192895Sjamie mtx_lock(&Giant); 340192895Sjamie if (ni->ni_queue == NULL) 341192895Sjamie ni->ni_handler(NULL); 342192895Sjamie else 343192895Sjamie netisr_processqueue(ni); 344192895Sjamie mtx_unlock(&Giant); 345192895Sjamie } else { 346192895Sjamie if (ni->ni_queue == NULL) 347192895Sjamie ni->ni_handler(NULL); 348192895Sjamie else 349192895Sjamie netisr_processqueue(ni); 350192895Sjamie } 351192895Sjamie } 352192895Sjamie } while (polling); 353192895Sjamie} 354192895Sjamie 355192895Sjamiestatic void 356192895Sjamiestart_netisr(void *dummy) 357192895Sjamie{ 358192895Sjamie 359192895Sjamie if (swi_add(NULL, "net", swi_net, NULL, SWI_NET, INTR_MPSAFE, &net_ih)) 360192895Sjamie panic("start_netisr"); 361192895Sjamie} 362192895SjamieSYSINIT(start_netisr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_netisr, NULL) 363192895Sjamie