netisr.c revision 195182
1111888Sjlemon/*- 2193219Srwatson * Copyright (c) 2007-2009 Robert N. M. Watson 3103781Sjake * All rights reserved. 4103781Sjake * 5103781Sjake * Redistribution and use in source and binary forms, with or without 6103781Sjake * modification, are permitted provided that the following conditions 7103781Sjake * are met: 8103781Sjake * 1. Redistributions of source code must retain the above copyright 9111888Sjlemon * notice, this list of conditions and the following disclaimer. 10103781Sjake * 2. Redistributions in binary form must reproduce the above copyright 11103781Sjake * notice, this list of conditions and the following disclaimer in the 12103781Sjake * documentation and/or other materials provided with the distribution. 13103781Sjake * 14111888Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15111888Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16111888Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17111888Sjlemon * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18111888Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19111888Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20111888Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21111888Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22111888Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23111888Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24111888Sjlemon * SUCH DAMAGE. 25193219Srwatson */ 26193219Srwatson 27193219Srwatson#include <sys/cdefs.h> 28193219Srwatson__FBSDID("$FreeBSD: head/sys/net/netisr.c 195182 2009-06-30 05:21:00Z bz $"); 29193219Srwatson 30193219Srwatson/* 31193219Srwatson * netisr is a packet dispatch service, allowing synchronous (directly 32193219Srwatson * dispatched) and asynchronous (deferred dispatch) processing of packets by 33193219Srwatson * registered protocol handlers. Callers pass a protocol identifier and 34193219Srwatson * packet to netisr, along with a direct dispatch hint, and work will either 35193219Srwatson * be immediately processed with the registered handler, or passed to a 36193219Srwatson * kernel software interrupt (SWI) thread for deferred dispatch. Callers 37193219Srwatson * will generally select one or the other based on: 38103781Sjake * 39193219Srwatson * - Might directly dispatching a netisr handler lead to code reentrance or 40193219Srwatson * lock recursion, such as entering the socket code from the socket code. 41193219Srwatson * - Might directly dispatching a netisr handler lead to recursive 42193219Srwatson * processing, such as when decapsulating several wrapped layers of tunnel 43193219Srwatson * information (IPSEC within IPSEC within ...). 44193219Srwatson * 45193219Srwatson * Maintaining ordering for protocol streams is a critical design concern. 46193219Srwatson * Enforcing ordering limits the opportunity for concurrency, but maintains 47193219Srwatson * the strong ordering requirements found in some protocols, such as TCP. Of 48193219Srwatson * related concern is CPU affinity--it is desirable to process all data 49193219Srwatson * associated with a particular stream on the same CPU over time in order to 50193219Srwatson * avoid acquiring locks associated with the connection on different CPUs, 51193219Srwatson * keep connection data in one cache, and to generally encourage associated 52193219Srwatson * user threads to live on the same CPU as the stream. It's also desirable 53193219Srwatson * to avoid lock migration and contention where locks are associated with 54193219Srwatson * more than one flow. 55193219Srwatson * 56193219Srwatson * netisr supports several policy variations, represented by the 57193219Srwatson * NETISR_POLICY_* constants, allowing protocols to play a varying role in 58193219Srwatson * identifying flows, assigning work to CPUs, etc. These are described in 59193219Srwatson * detail in netisr.h. 60103781Sjake */ 61103781Sjake 62193219Srwatson#include "opt_ddb.h" 63150968Sglebius#include "opt_device_polling.h" 64134443Srwatson 65103781Sjake#include <sys/param.h> 66111888Sjlemon#include <sys/bus.h> 67103781Sjake#include <sys/kernel.h> 68111888Sjlemon#include <sys/kthread.h> 69193219Srwatson#include <sys/interrupt.h> 70111888Sjlemon#include <sys/lock.h> 71193219Srwatson#include <sys/mbuf.h> 72193219Srwatson#include <sys/mutex.h> 73195019Srwatson#include <sys/pcpu.h> 74111888Sjlemon#include <sys/proc.h> 75193219Srwatson#include <sys/rmlock.h> 76193219Srwatson#include <sys/sched.h> 77193219Srwatson#include <sys/smp.h> 78193219Srwatson#include <sys/socket.h> 79111888Sjlemon#include <sys/sysctl.h> 80193219Srwatson#include <sys/systm.h> 81191816Szec#include <sys/vimage.h> 82103781Sjake 83193219Srwatson#ifdef DDB 84193219Srwatson#include <ddb/ddb.h> 85193219Srwatson#endif 86111888Sjlemon 87111888Sjlemon#include <net/if.h> 88111888Sjlemon#include <net/if_var.h> 89103781Sjake#include <net/netisr.h> 90103781Sjake 91193219Srwatson/*- 92193219Srwatson * Synchronize use and modification of the registered netisr data structures; 93193219Srwatson * acquire a read lock while modifying the set of registered protocols to 94193219Srwatson * prevent partially registered or unregistered protocols from being run. 95193219Srwatson * 96193219Srwatson * The following data structures and fields are protected by this lock: 97193219Srwatson * 98193219Srwatson * - The np array, including all fields of struct netisr_proto. 99193219Srwatson * - The nws array, including all fields of struct netisr_worker. 100193219Srwatson * - The nws_array array. 101193219Srwatson * 102193219Srwatson * Note: the NETISR_LOCKING define controls whether read locks are acquired 103193219Srwatson * in packet processing paths requiring netisr registration stability. This 104193219Srwatson * is disabled by default as it can lead to a measurable performance 105193219Srwatson * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 106193219Srwatson * because netisr registration and unregistration is extremely rare at 107193219Srwatson * runtime. If it becomes more common, this decision should be revisited. 108193219Srwatson * 109193219Srwatson * XXXRW: rmlocks don't support assertions. 110193219Srwatson */ 111193219Srwatsonstatic struct rmlock netisr_rmlock; 112193219Srwatson#define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 113193219Srwatson RM_NOWITNESS) 114193219Srwatson#define NETISR_LOCK_ASSERT() 115193219Srwatson#define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 116193219Srwatson#define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 117193219Srwatson#define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 118193219Srwatson#define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 119193219Srwatson/* #define NETISR_LOCKING */ 120103781Sjake 121193219SrwatsonSYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); 122103781Sjake 123193219Srwatson/*- 124193219Srwatson * Three direct dispatch policies are supported: 125193219Srwatson * 126193219Srwatson * - Always defer: all work is scheduled for a netisr, regardless of context. 127193219Srwatson * (!direct) 128193219Srwatson * 129193219Srwatson * - Hybrid: if the executing context allows direct dispatch, and we're 130193219Srwatson * running on the CPU the work would be done on, then direct dispatch if it 131193219Srwatson * wouldn't violate ordering constraints on the workstream. 132193219Srwatson * (direct && !direct_force) 133193219Srwatson * 134193219Srwatson * - Always direct: if the executing context allows direct dispatch, always 135193219Srwatson * direct dispatch. (direct && direct_force) 136193219Srwatson * 137193219Srwatson * Notice that changing the global policy could lead to short periods of 138193219Srwatson * misordered processing, but this is considered acceptable as compared to 139193219Srwatson * the complexity of enforcing ordering during policy changes. 140193219Srwatson */ 141193219Srwatsonstatic int netisr_direct_force = 1; /* Always direct dispatch. */ 142193219SrwatsonTUNABLE_INT("net.isr.direct_force", &netisr_direct_force); 143193219SrwatsonSYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RW, 144193219Srwatson &netisr_direct_force, 0, "Force direct dispatch"); 145111888Sjlemon 146193219Srwatsonstatic int netisr_direct = 1; /* Enable direct dispatch. */ 147193219SrwatsonTUNABLE_INT("net.isr.direct", &netisr_direct); 148193219SrwatsonSYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW, 149193219Srwatson &netisr_direct, 0, "Enable direct dispatch"); 150193219Srwatson 151193219Srwatson/* 152193219Srwatson * Allow the administrator to limit the number of threads (CPUs) to use for 153193219Srwatson * netisr. We don't check netisr_maxthreads before creating the thread for 154193219Srwatson * CPU 0, so in practice we ignore values <= 1. This must be set at boot. 155193219Srwatson * We will create at most one thread per CPU. 156193219Srwatson */ 157195078Srwatsonstatic int netisr_maxthreads = -1; /* Max number of threads. */ 158193219SrwatsonTUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads); 159193219SrwatsonSYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RD, 160193219Srwatson &netisr_maxthreads, 0, 161193219Srwatson "Use at most this many CPUs for netisr processing"); 162193219Srwatson 163193219Srwatsonstatic int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 164193219SrwatsonTUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads); 165193219SrwatsonSYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RD, 166193219Srwatson &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 167193219Srwatson 168193219Srwatson/* 169193219Srwatson * Limit per-workstream queues to at most net.isr.maxqlimit, both for initial 170193219Srwatson * configuration and later modification using netisr_setqlimit(). 171193219Srwatson */ 172193219Srwatson#define NETISR_DEFAULT_MAXQLIMIT 10240 173193219Srwatsonstatic u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 174193219SrwatsonTUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit); 175193219SrwatsonSYSCTL_INT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RD, 176193219Srwatson &netisr_maxqlimit, 0, 177193219Srwatson "Maximum netisr per-protocol, per-CPU queue depth."); 178193219Srwatson 179193219Srwatson/* 180193219Srwatson * The default per-workstream queue limit for protocols that don't initialize 181193219Srwatson * the nh_qlimit field of their struct netisr_handler. If this is set above 182193219Srwatson * netisr_maxqlimit, we truncate it to the maximum during boot. 183193219Srwatson */ 184193219Srwatson#define NETISR_DEFAULT_DEFAULTQLIMIT 256 185193219Srwatsonstatic u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 186193219SrwatsonTUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit); 187193219SrwatsonSYSCTL_INT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RD, 188193219Srwatson &netisr_defaultqlimit, 0, 189193219Srwatson "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 190193219Srwatson 191193219Srwatson/* 192193219Srwatson * Each protocol is described by a struct netisr_proto, which holds all 193193219Srwatson * global per-protocol information. This data structure is set up by 194193219Srwatson * netisr_register(), and derived from the public struct netisr_handler. 195193219Srwatson */ 196193219Srwatsonstruct netisr_proto { 197193219Srwatson const char *np_name; /* Character string protocol name. */ 198193219Srwatson netisr_handler_t *np_handler; /* Protocol handler. */ 199193219Srwatson netisr_m2flow_t *np_m2flow; /* Query flow for untagged packet. */ 200193219Srwatson netisr_m2cpuid_t *np_m2cpuid; /* Query CPU to process packet on. */ 201194201Sbz netisr_drainedcpu_t *np_drainedcpu; /* Callback when drained a queue. */ 202193219Srwatson u_int np_qlimit; /* Maximum per-CPU queue depth. */ 203193219Srwatson u_int np_policy; /* Work placement policy. */ 204193219Srwatson}; 205193219Srwatson 206193230Srwatson#define NETISR_MAXPROT 16 /* Compile-time limit. */ 207193219Srwatson 208193219Srwatson/* 209193219Srwatson * The np array describes all registered protocols, indexed by protocol 210193219Srwatson * number. 211193219Srwatson */ 212193219Srwatsonstatic struct netisr_proto np[NETISR_MAXPROT]; 213193219Srwatson 214193219Srwatson/* 215193219Srwatson * Protocol-specific work for each workstream is described by struct 216193219Srwatson * netisr_work. Each work descriptor consists of an mbuf queue and 217193219Srwatson * statistics. 218193219Srwatson */ 219193219Srwatsonstruct netisr_work { 220193219Srwatson /* 221193219Srwatson * Packet queue, linked by m_nextpkt. 222193219Srwatson */ 223193219Srwatson struct mbuf *nw_head; 224193219Srwatson struct mbuf *nw_tail; 225193219Srwatson u_int nw_len; 226193219Srwatson u_int nw_qlimit; 227193219Srwatson u_int nw_watermark; 228193219Srwatson 229193219Srwatson /* 230193219Srwatson * Statistics -- written unlocked, but mostly from curcpu. 231193219Srwatson */ 232193219Srwatson u_int64_t nw_dispatched; /* Number of direct dispatches. */ 233193219Srwatson u_int64_t nw_hybrid_dispatched; /* "" hybrid dispatches. */ 234193219Srwatson u_int64_t nw_qdrops; /* "" drops. */ 235193219Srwatson u_int64_t nw_queued; /* "" enqueues. */ 236193219Srwatson u_int64_t nw_handled; /* "" handled in worker. */ 237193219Srwatson}; 238193219Srwatson 239193219Srwatson/* 240193219Srwatson * Workstreams hold a set of ordered work across each protocol, and are 241193219Srwatson * described by netisr_workstream. Each workstream is associated with a 242193219Srwatson * worker thread, which in turn is pinned to a CPU. Work associated with a 243193219Srwatson * workstream can be processd in other threads during direct dispatch; 244193219Srwatson * concurrent processing is prevented by the NWS_RUNNING flag, which 245193219Srwatson * indicates that a thread is already processing the work queue. 246193219Srwatson */ 247193219Srwatsonstruct netisr_workstream { 248193219Srwatson struct intr_event *nws_intr_event; /* Handler for stream. */ 249193219Srwatson void *nws_swi_cookie; /* swi(9) cookie for stream. */ 250193219Srwatson struct mtx nws_mtx; /* Synchronize work. */ 251193219Srwatson u_int nws_cpu; /* CPU pinning. */ 252193219Srwatson u_int nws_flags; /* Wakeup flags. */ 253193219Srwatson u_int nws_pendingbits; /* Scheduled protocols. */ 254193219Srwatson 255193219Srwatson /* 256193219Srwatson * Each protocol has per-workstream data. 257193219Srwatson */ 258193219Srwatson struct netisr_work nws_work[NETISR_MAXPROT]; 259193219Srwatson} __aligned(CACHE_LINE_SIZE); 260193219Srwatson 261193219Srwatson/* 262195019Srwatson * Per-CPU workstream data. 263193219Srwatson */ 264195019SrwatsonDPCPU_DEFINE(struct netisr_workstream, nws); 265193219Srwatson 266193219Srwatson/* 267193219Srwatson * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 268195019Srwatson * accessing workstreams. This allows constructions of the form 269195019Srwatson * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 270193219Srwatson */ 271193219Srwatsonstatic u_int nws_array[MAXCPU]; 272193219Srwatson 273193219Srwatson/* 274193219Srwatson * Number of registered workstreams. Will be at most the number of running 275193219Srwatson * CPUs once fully started. 276193219Srwatson */ 277193219Srwatsonstatic u_int nws_count; 278193219SrwatsonSYSCTL_INT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 279193219Srwatson &nws_count, 0, "Number of extant netisr threads."); 280193219Srwatson 281193219Srwatson/* 282193219Srwatson * Per-workstream flags. 283193219Srwatson */ 284193219Srwatson#define NWS_RUNNING 0x00000001 /* Currently running in a thread. */ 285193219Srwatson#define NWS_DISPATCHING 0x00000002 /* Currently being direct-dispatched. */ 286193219Srwatson#define NWS_SCHEDULED 0x00000004 /* Signal issued. */ 287193219Srwatson 288193219Srwatson/* 289193219Srwatson * Synchronization for each workstream: a mutex protects all mutable fields 290193219Srwatson * in each stream, including per-protocol state (mbuf queues). The SWI is 291193219Srwatson * woken up if asynchronous dispatch is required. 292193219Srwatson */ 293193219Srwatson#define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 294193219Srwatson#define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 295193219Srwatson#define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 296193219Srwatson#define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 297193219Srwatson 298193219Srwatson/* 299193219Srwatson * Utility routines for protocols that implement their own mapping of flows 300193219Srwatson * to CPUs. 301193219Srwatson */ 302193219Srwatsonu_int 303193219Srwatsonnetisr_get_cpucount(void) 304193219Srwatson{ 305193219Srwatson 306193219Srwatson return (nws_count); 307193219Srwatson} 308193219Srwatson 309193219Srwatsonu_int 310193219Srwatsonnetisr_get_cpuid(u_int cpunumber) 311193219Srwatson{ 312193219Srwatson 313193219Srwatson KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber, 314193219Srwatson nws_count)); 315193219Srwatson 316193219Srwatson return (nws_array[cpunumber]); 317193219Srwatson} 318193219Srwatson 319193219Srwatson/* 320193219Srwatson * The default implementation of -> CPU ID mapping. 321193219Srwatson * 322193219Srwatson * Non-static so that protocols can use it to map their own work to specific 323193219Srwatson * CPUs in a manner consistent to netisr for affinity purposes. 324193219Srwatson */ 325193219Srwatsonu_int 326193219Srwatsonnetisr_default_flow2cpu(u_int flowid) 327193219Srwatson{ 328193219Srwatson 329193219Srwatson return (nws_array[flowid % nws_count]); 330193219Srwatson} 331193219Srwatson 332193219Srwatson/* 333193219Srwatson * Register a new netisr handler, which requires initializing per-protocol 334193219Srwatson * fields for each workstream. All netisr work is briefly suspended while 335193219Srwatson * the protocol is installed. 336193219Srwatson */ 337103781Sjakevoid 338193219Srwatsonnetisr_register(const struct netisr_handler *nhp) 339103781Sjake{ 340193219Srwatson struct netisr_work *npwp; 341193219Srwatson const char *name; 342193219Srwatson u_int i, proto; 343193219Srwatson 344193219Srwatson proto = nhp->nh_proto; 345193219Srwatson name = nhp->nh_name; 346193219Srwatson 347193219Srwatson /* 348193219Srwatson * Test that the requested registration is valid. 349193219Srwatson */ 350193219Srwatson KASSERT(nhp->nh_name != NULL, 351193219Srwatson ("%s: nh_name NULL for %u", __func__, proto)); 352193219Srwatson KASSERT(nhp->nh_handler != NULL, 353193219Srwatson ("%s: nh_handler NULL for %s", __func__, name)); 354193219Srwatson KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 355193219Srwatson nhp->nh_policy == NETISR_POLICY_FLOW || 356193219Srwatson nhp->nh_policy == NETISR_POLICY_CPU, 357193219Srwatson ("%s: unsupported nh_policy %u for %s", __func__, 358193219Srwatson nhp->nh_policy, name)); 359193219Srwatson KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 360193219Srwatson nhp->nh_m2flow == NULL, 361193219Srwatson ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 362193219Srwatson name)); 363193219Srwatson KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 364193219Srwatson ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 365193219Srwatson name)); 366193219Srwatson KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 367193219Srwatson ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 368193219Srwatson name)); 369193219Srwatson KASSERT(proto < NETISR_MAXPROT, 370193219Srwatson ("%s(%u, %s): protocol too big", __func__, proto, name)); 371193219Srwatson 372193219Srwatson /* 373193219Srwatson * Test that no existing registration exists for this protocol. 374193219Srwatson */ 375193219Srwatson NETISR_WLOCK(); 376193219Srwatson KASSERT(np[proto].np_name == NULL, 377193219Srwatson ("%s(%u, %s): name present", __func__, proto, name)); 378193219Srwatson KASSERT(np[proto].np_handler == NULL, 379193219Srwatson ("%s(%u, %s): handler present", __func__, proto, name)); 380193219Srwatson 381193219Srwatson np[proto].np_name = name; 382193219Srwatson np[proto].np_handler = nhp->nh_handler; 383193219Srwatson np[proto].np_m2flow = nhp->nh_m2flow; 384193219Srwatson np[proto].np_m2cpuid = nhp->nh_m2cpuid; 385194201Sbz np[proto].np_drainedcpu = nhp->nh_drainedcpu; 386193219Srwatson if (nhp->nh_qlimit == 0) 387193219Srwatson np[proto].np_qlimit = netisr_defaultqlimit; 388193219Srwatson else if (nhp->nh_qlimit > netisr_maxqlimit) { 389193219Srwatson printf("%s: %s requested queue limit %u capped to " 390193219Srwatson "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 391193219Srwatson netisr_maxqlimit); 392193219Srwatson np[proto].np_qlimit = netisr_maxqlimit; 393193219Srwatson } else 394193219Srwatson np[proto].np_qlimit = nhp->nh_qlimit; 395193219Srwatson np[proto].np_policy = nhp->nh_policy; 396195078Srwatson for (i = 0; i <= mp_maxid; i++) { 397195019Srwatson if (CPU_ABSENT(i)) 398195019Srwatson continue; 399195019Srwatson npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 400193219Srwatson bzero(npwp, sizeof(*npwp)); 401193219Srwatson npwp->nw_qlimit = np[proto].np_qlimit; 402193219Srwatson } 403193219Srwatson NETISR_WUNLOCK(); 404103781Sjake} 405103781Sjake 406193219Srwatson/* 407193219Srwatson * Clear drop counters across all workstreams for a protocol. 408193219Srwatson */ 409111888Sjlemonvoid 410193219Srwatsonnetisr_clearqdrops(const struct netisr_handler *nhp) 411103781Sjake{ 412193219Srwatson struct netisr_work *npwp; 413193219Srwatson#ifdef INVARIANTS 414193219Srwatson const char *name; 415193219Srwatson#endif 416193219Srwatson u_int i, proto; 417193219Srwatson 418193219Srwatson proto = nhp->nh_proto; 419193219Srwatson#ifdef INVARIANTS 420193219Srwatson name = nhp->nh_name; 421193219Srwatson#endif 422193219Srwatson KASSERT(proto < NETISR_MAXPROT, 423193219Srwatson ("%s(%u): protocol too big for %s", __func__, proto, name)); 424193219Srwatson 425193219Srwatson NETISR_WLOCK(); 426193219Srwatson KASSERT(np[proto].np_handler != NULL, 427193219Srwatson ("%s(%u): protocol not registered for %s", __func__, proto, 428193219Srwatson name)); 429193219Srwatson 430195078Srwatson for (i = 0; i <= mp_maxid; i++) { 431195019Srwatson if (CPU_ABSENT(i)) 432195019Srwatson continue; 433195019Srwatson npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 434193219Srwatson npwp->nw_qdrops = 0; 435193219Srwatson } 436193219Srwatson NETISR_WUNLOCK(); 437111888Sjlemon} 438111888Sjlemon 439193219Srwatson/* 440193219Srwatson * Query the current drop counters across all workstreams for a protocol. 441193219Srwatson */ 442111888Sjlemonvoid 443193219Srwatsonnetisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 444111888Sjlemon{ 445193219Srwatson struct netisr_work *npwp; 446193219Srwatson struct rm_priotracker tracker; 447193219Srwatson#ifdef INVARIANTS 448193219Srwatson const char *name; 449193219Srwatson#endif 450193219Srwatson u_int i, proto; 451193219Srwatson 452193219Srwatson *qdropp = 0; 453193219Srwatson proto = nhp->nh_proto; 454193219Srwatson#ifdef INVARIANTS 455193219Srwatson name = nhp->nh_name; 456193219Srwatson#endif 457193219Srwatson KASSERT(proto < NETISR_MAXPROT, 458193219Srwatson ("%s(%u): protocol too big for %s", __func__, proto, name)); 459193219Srwatson 460193219Srwatson NETISR_RLOCK(&tracker); 461193219Srwatson KASSERT(np[proto].np_handler != NULL, 462193219Srwatson ("%s(%u): protocol not registered for %s", __func__, proto, 463193219Srwatson name)); 464193219Srwatson 465195078Srwatson for (i = 0; i <= mp_maxid; i++) { 466195019Srwatson if (CPU_ABSENT(i)) 467195019Srwatson continue; 468195019Srwatson npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 469193219Srwatson *qdropp += npwp->nw_qdrops; 470193219Srwatson } 471193219Srwatson NETISR_RUNLOCK(&tracker); 472103781Sjake} 473103781Sjake 474193219Srwatson/* 475193219Srwatson * Query the current queue limit for per-workstream queues for a protocol. 476193219Srwatson */ 477193219Srwatsonvoid 478193219Srwatsonnetisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 479193219Srwatson{ 480193219Srwatson struct rm_priotracker tracker; 481193219Srwatson#ifdef INVARIANTS 482193219Srwatson const char *name; 483193219Srwatson#endif 484193219Srwatson u_int proto; 485111888Sjlemon 486193219Srwatson proto = nhp->nh_proto; 487193219Srwatson#ifdef INVARIANTS 488193219Srwatson name = nhp->nh_name; 489193219Srwatson#endif 490193219Srwatson KASSERT(proto < NETISR_MAXPROT, 491193219Srwatson ("%s(%u): protocol too big for %s", __func__, proto, name)); 492111888Sjlemon 493193219Srwatson NETISR_RLOCK(&tracker); 494193219Srwatson KASSERT(np[proto].np_handler != NULL, 495193219Srwatson ("%s(%u): protocol not registered for %s", __func__, proto, 496193219Srwatson name)); 497193219Srwatson *qlimitp = np[proto].np_qlimit; 498193219Srwatson NETISR_RUNLOCK(&tracker); 499193219Srwatson} 500111888Sjlemon 501193219Srwatson/* 502193219Srwatson * Update the queue limit across per-workstream queues for a protocol. We 503193219Srwatson * simply change the limits, and don't drain overflowed packets as they will 504193219Srwatson * (hopefully) take care of themselves shortly. 505193219Srwatson */ 506193219Srwatsonint 507193219Srwatsonnetisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 508193219Srwatson{ 509193219Srwatson struct netisr_work *npwp; 510193219Srwatson#ifdef INVARIANTS 511193219Srwatson const char *name; 512193219Srwatson#endif 513193219Srwatson u_int i, proto; 514111888Sjlemon 515193219Srwatson if (qlimit > netisr_maxqlimit) 516193219Srwatson return (EINVAL); 517193219Srwatson 518193219Srwatson proto = nhp->nh_proto; 519193219Srwatson#ifdef INVARIANTS 520193219Srwatson name = nhp->nh_name; 521193219Srwatson#endif 522193219Srwatson KASSERT(proto < NETISR_MAXPROT, 523193219Srwatson ("%s(%u): protocol too big for %s", __func__, proto, name)); 524193219Srwatson 525193219Srwatson NETISR_WLOCK(); 526193219Srwatson KASSERT(np[proto].np_handler != NULL, 527193219Srwatson ("%s(%u): protocol not registered for %s", __func__, proto, 528193219Srwatson name)); 529193219Srwatson 530193219Srwatson np[proto].np_qlimit = qlimit; 531195078Srwatson for (i = 0; i <= mp_maxid; i++) { 532195019Srwatson if (CPU_ABSENT(i)) 533195019Srwatson continue; 534195019Srwatson npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 535193219Srwatson npwp->nw_qlimit = qlimit; 536193219Srwatson } 537193219Srwatson NETISR_WUNLOCK(); 538193219Srwatson return (0); 539193219Srwatson} 540193219Srwatson 541111888Sjlemon/* 542193219Srwatson * Drain all packets currently held in a particular protocol work queue. 543120704Srwatson */ 544120704Srwatsonstatic void 545193219Srwatsonnetisr_drain_proto(struct netisr_work *npwp) 546120704Srwatson{ 547120704Srwatson struct mbuf *m; 548120704Srwatson 549193219Srwatson /* 550193219Srwatson * We would assert the lock on the workstream but it's not passed in. 551193219Srwatson */ 552193219Srwatson while ((m = npwp->nw_head) != NULL) { 553193219Srwatson npwp->nw_head = m->m_nextpkt; 554193219Srwatson m->m_nextpkt = NULL; 555193219Srwatson if (npwp->nw_head == NULL) 556193219Srwatson npwp->nw_tail = NULL; 557193219Srwatson npwp->nw_len--; 558193219Srwatson m_freem(m); 559193219Srwatson } 560193219Srwatson KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 561193219Srwatson KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 562193219Srwatson} 563193219Srwatson 564193219Srwatson/* 565193219Srwatson * Remove the registration of a network protocol, which requires clearing 566193219Srwatson * per-protocol fields across all workstreams, including freeing all mbufs in 567193219Srwatson * the queues at time of unregister. All work in netisr is briefly suspended 568193219Srwatson * while this takes place. 569193219Srwatson */ 570193219Srwatsonvoid 571193219Srwatsonnetisr_unregister(const struct netisr_handler *nhp) 572193219Srwatson{ 573193219Srwatson struct netisr_work *npwp; 574193219Srwatson#ifdef INVARIANTS 575193219Srwatson const char *name; 576193219Srwatson#endif 577193219Srwatson u_int i, proto; 578193219Srwatson 579193219Srwatson proto = nhp->nh_proto; 580193219Srwatson#ifdef INVARIANTS 581193219Srwatson name = nhp->nh_name; 582193219Srwatson#endif 583193219Srwatson KASSERT(proto < NETISR_MAXPROT, 584193219Srwatson ("%s(%u): protocol too big for %s", __func__, proto, name)); 585193219Srwatson 586193219Srwatson NETISR_WLOCK(); 587193219Srwatson KASSERT(np[proto].np_handler != NULL, 588193219Srwatson ("%s(%u): protocol not registered for %s", __func__, proto, 589193219Srwatson name)); 590193219Srwatson 591193219Srwatson np[proto].np_name = NULL; 592193219Srwatson np[proto].np_handler = NULL; 593193219Srwatson np[proto].np_m2flow = NULL; 594193219Srwatson np[proto].np_m2cpuid = NULL; 595193219Srwatson np[proto].np_qlimit = 0; 596193219Srwatson np[proto].np_policy = 0; 597195078Srwatson for (i = 0; i <= mp_maxid; i++) { 598195019Srwatson if (CPU_ABSENT(i)) 599195019Srwatson continue; 600195019Srwatson npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 601193219Srwatson netisr_drain_proto(npwp); 602193219Srwatson bzero(npwp, sizeof(*npwp)); 603193219Srwatson } 604193219Srwatson NETISR_WUNLOCK(); 605193219Srwatson} 606193219Srwatson 607193219Srwatson/* 608193219Srwatson * Look up the workstream given a packet and source identifier. Do this by 609193219Srwatson * checking the protocol's policy, and optionally call out to the protocol 610193219Srwatson * for assistance if required. 611193219Srwatson */ 612193219Srwatsonstatic struct mbuf * 613193219Srwatsonnetisr_select_cpuid(struct netisr_proto *npp, uintptr_t source, 614193219Srwatson struct mbuf *m, u_int *cpuidp) 615193219Srwatson{ 616193219Srwatson struct ifnet *ifp; 617193219Srwatson 618193219Srwatson NETISR_LOCK_ASSERT(); 619193219Srwatson 620193219Srwatson /* 621193219Srwatson * In the event we have only one worker, shortcut and deliver to it 622193219Srwatson * without further ado. 623193219Srwatson */ 624193219Srwatson if (nws_count == 1) { 625193219Srwatson *cpuidp = nws_array[0]; 626193219Srwatson return (m); 627193219Srwatson } 628193219Srwatson 629193219Srwatson /* 630193219Srwatson * What happens next depends on the policy selected by the protocol. 631193219Srwatson * If we want to support per-interface policies, we should do that 632193219Srwatson * here first. 633193219Srwatson */ 634193219Srwatson switch (npp->np_policy) { 635193219Srwatson case NETISR_POLICY_CPU: 636193219Srwatson return (npp->np_m2cpuid(m, source, cpuidp)); 637193219Srwatson 638193219Srwatson case NETISR_POLICY_FLOW: 639193219Srwatson if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) { 640193219Srwatson m = npp->np_m2flow(m, source); 641193219Srwatson if (m == NULL) 642193219Srwatson return (NULL); 643193219Srwatson } 644193219Srwatson if (m->m_flags & M_FLOWID) { 645193219Srwatson *cpuidp = 646193219Srwatson netisr_default_flow2cpu(m->m_pkthdr.flowid); 647193219Srwatson return (m); 648193219Srwatson } 649193219Srwatson /* FALLTHROUGH */ 650193219Srwatson 651193219Srwatson case NETISR_POLICY_SOURCE: 652193219Srwatson ifp = m->m_pkthdr.rcvif; 653193219Srwatson if (ifp != NULL) 654193219Srwatson *cpuidp = nws_array[(ifp->if_index + source) % 655193219Srwatson nws_count]; 656193219Srwatson else 657193219Srwatson *cpuidp = nws_array[source % nws_count]; 658193219Srwatson return (m); 659193219Srwatson 660193219Srwatson default: 661193219Srwatson panic("%s: invalid policy %u for %s", __func__, 662193219Srwatson npp->np_policy, npp->np_name); 663193219Srwatson } 664193219Srwatson} 665193219Srwatson 666193219Srwatson/* 667193219Srwatson * Process packets associated with a workstream and protocol. For reasons of 668193219Srwatson * fairness, we process up to one complete netisr queue at a time, moving the 669193219Srwatson * queue to a stack-local queue for processing, but do not loop refreshing 670193219Srwatson * from the global queue. The caller is responsible for deciding whether to 671193219Srwatson * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 672193219Srwatson * locked on entry and relocked before return, but will be released while 673193219Srwatson * processing. The number of packets processed is returned. 674193219Srwatson */ 675193219Srwatsonstatic u_int 676193219Srwatsonnetisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 677193219Srwatson{ 678193219Srwatson struct netisr_work local_npw, *npwp; 679193219Srwatson u_int handled; 680193219Srwatson struct mbuf *m; 681193219Srwatson 682193219Srwatson NETISR_LOCK_ASSERT(); 683193219Srwatson NWS_LOCK_ASSERT(nwsp); 684193219Srwatson 685193219Srwatson KASSERT(nwsp->nws_flags & NWS_RUNNING, 686193219Srwatson ("%s(%u): not running", __func__, proto)); 687193219Srwatson KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 688193219Srwatson ("%s(%u): invalid proto\n", __func__, proto)); 689193219Srwatson 690193219Srwatson npwp = &nwsp->nws_work[proto]; 691193219Srwatson if (npwp->nw_len == 0) 692193219Srwatson return (0); 693193219Srwatson 694193219Srwatson /* 695193219Srwatson * Move the global work queue to a thread-local work queue. 696193219Srwatson * 697193219Srwatson * Notice that this means the effective maximum length of the queue 698193219Srwatson * is actually twice that of the maximum queue length specified in 699193219Srwatson * the protocol registration call. 700193219Srwatson */ 701193219Srwatson handled = npwp->nw_len; 702193219Srwatson local_npw = *npwp; 703193219Srwatson npwp->nw_head = NULL; 704193219Srwatson npwp->nw_tail = NULL; 705193219Srwatson npwp->nw_len = 0; 706193219Srwatson nwsp->nws_pendingbits &= ~(1 << proto); 707193219Srwatson NWS_UNLOCK(nwsp); 708193219Srwatson while ((m = local_npw.nw_head) != NULL) { 709193219Srwatson local_npw.nw_head = m->m_nextpkt; 710193219Srwatson m->m_nextpkt = NULL; 711193219Srwatson if (local_npw.nw_head == NULL) 712193219Srwatson local_npw.nw_tail = NULL; 713193219Srwatson local_npw.nw_len--; 714191816Szec VNET_ASSERT(m->m_pkthdr.rcvif != NULL); 715191816Szec CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 716193219Srwatson np[proto].np_handler(m); 717191816Szec CURVNET_RESTORE(); 718120704Srwatson } 719193219Srwatson KASSERT(local_npw.nw_len == 0, 720193219Srwatson ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 721194201Sbz if (np[proto].np_drainedcpu) 722194201Sbz np[proto].np_drainedcpu(nwsp->nws_cpu); 723193219Srwatson NWS_LOCK(nwsp); 724193219Srwatson npwp->nw_handled += handled; 725193219Srwatson return (handled); 726120704Srwatson} 727120704Srwatson 728120704Srwatson/* 729193219Srwatson * SWI handler for netisr -- processes prackets in a set of workstreams that 730193219Srwatson * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 731193219Srwatson * being direct dispatched, go back to sleep and wait for the dispatching 732193219Srwatson * thread to wake us up again. 733111888Sjlemon */ 734193219Srwatsonstatic void 735193219Srwatsonswi_net(void *arg) 736103781Sjake{ 737193219Srwatson#ifdef NETISR_LOCKING 738193219Srwatson struct rm_priotracker tracker; 739193219Srwatson#endif 740193219Srwatson struct netisr_workstream *nwsp; 741193219Srwatson u_int bits, prot; 742193219Srwatson 743193219Srwatson nwsp = arg; 744193219Srwatson 745193219Srwatson#ifdef DEVICE_POLLING 746193219Srwatson KASSERT(nws_count == 1, 747193219Srwatson ("%s: device_polling but nws_count != 1", __func__)); 748193219Srwatson netisr_poll(); 749193219Srwatson#endif 750193219Srwatson#ifdef NETISR_LOCKING 751193219Srwatson NETISR_RLOCK(&tracker); 752193219Srwatson#endif 753193219Srwatson NWS_LOCK(nwsp); 754193219Srwatson KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 755193219Srwatson if (nwsp->nws_flags & NWS_DISPATCHING) 756193219Srwatson goto out; 757193219Srwatson nwsp->nws_flags |= NWS_RUNNING; 758193219Srwatson nwsp->nws_flags &= ~NWS_SCHEDULED; 759193219Srwatson while ((bits = nwsp->nws_pendingbits) != 0) { 760193219Srwatson while ((prot = ffs(bits)) != 0) { 761193219Srwatson prot--; 762193219Srwatson bits &= ~(1 << prot); 763193219Srwatson (void)netisr_process_workstream_proto(nwsp, prot); 764193219Srwatson } 765112011Sjlemon } 766193219Srwatson nwsp->nws_flags &= ~NWS_RUNNING; 767193219Srwatsonout: 768193219Srwatson NWS_UNLOCK(nwsp); 769193219Srwatson#ifdef NETISR_LOCKING 770193219Srwatson NETISR_RUNLOCK(&tracker); 771193219Srwatson#endif 772193219Srwatson#ifdef DEVICE_POLLING 773193219Srwatson netisr_pollmore(); 774193219Srwatson#endif 775193219Srwatson} 776180239Srwatson 777193219Srwatsonstatic int 778193219Srwatsonnetisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 779193219Srwatson struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 780193219Srwatson{ 781193219Srwatson 782193219Srwatson NWS_LOCK_ASSERT(nwsp); 783193219Srwatson 784193219Srwatson *dosignalp = 0; 785193219Srwatson if (npwp->nw_len < npwp->nw_qlimit) { 786193219Srwatson m->m_nextpkt = NULL; 787193219Srwatson if (npwp->nw_head == NULL) { 788193219Srwatson npwp->nw_head = m; 789193219Srwatson npwp->nw_tail = m; 790193219Srwatson } else { 791193219Srwatson npwp->nw_tail->m_nextpkt = m; 792193219Srwatson npwp->nw_tail = m; 793193219Srwatson } 794193219Srwatson npwp->nw_len++; 795193219Srwatson if (npwp->nw_len > npwp->nw_watermark) 796193219Srwatson npwp->nw_watermark = npwp->nw_len; 797193219Srwatson nwsp->nws_pendingbits |= (1 << proto); 798193219Srwatson if (!(nwsp->nws_flags & 799193219Srwatson (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 800193219Srwatson nwsp->nws_flags |= NWS_SCHEDULED; 801193219Srwatson *dosignalp = 1; /* Defer until unlocked. */ 802193219Srwatson } 803193219Srwatson npwp->nw_queued++; 804193219Srwatson return (0); 805111888Sjlemon } else { 806195182Sbz m_freem(m); 807193219Srwatson npwp->nw_qdrops++; 808193219Srwatson return (ENOBUFS); 809103781Sjake } 810103781Sjake} 811103781Sjake 812193219Srwatsonstatic int 813193219Srwatsonnetisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 814193219Srwatson{ 815193219Srwatson struct netisr_workstream *nwsp; 816193219Srwatson struct netisr_work *npwp; 817193219Srwatson int dosignal, error; 818193219Srwatson 819193219Srwatson#ifdef NETISR_LOCKING 820193219Srwatson NETISR_LOCK_ASSERT(); 821193219Srwatson#endif 822195078Srwatson KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 823195078Srwatson cpuid, mp_maxid)); 824195019Srwatson KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 825193219Srwatson 826193219Srwatson dosignal = 0; 827193219Srwatson error = 0; 828195019Srwatson nwsp = DPCPU_ID_PTR(cpuid, nws); 829193219Srwatson npwp = &nwsp->nws_work[proto]; 830193219Srwatson NWS_LOCK(nwsp); 831193219Srwatson error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 832193219Srwatson NWS_UNLOCK(nwsp); 833193219Srwatson if (dosignal) 834193219Srwatson NWS_SIGNAL(nwsp); 835193219Srwatson return (error); 836193219Srwatson} 837193219Srwatson 838193219Srwatsonint 839193219Srwatsonnetisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 840193219Srwatson{ 841193219Srwatson#ifdef NETISR_LOCKING 842193219Srwatson struct rm_priotracker tracker; 843193219Srwatson#endif 844193219Srwatson u_int cpuid; 845193219Srwatson int error; 846193219Srwatson 847193219Srwatson KASSERT(proto < NETISR_MAXPROT, 848193219Srwatson ("%s: invalid proto %u", __func__, proto)); 849193219Srwatson 850193219Srwatson#ifdef NETISR_LOCKING 851193219Srwatson NETISR_RLOCK(&tracker); 852193219Srwatson#endif 853193219Srwatson KASSERT(np[proto].np_handler != NULL, 854193219Srwatson ("%s: invalid proto %u", __func__, proto)); 855193219Srwatson 856193219Srwatson m = netisr_select_cpuid(&np[proto], source, m, &cpuid); 857195019Srwatson if (m != NULL) { 858195019Srwatson KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 859195019Srwatson cpuid)); 860193219Srwatson error = netisr_queue_internal(proto, m, cpuid); 861195019Srwatson } else 862193219Srwatson error = ENOBUFS; 863193219Srwatson#ifdef NETISR_LOCKING 864193219Srwatson NETISR_RUNLOCK(&tracker); 865193219Srwatson#endif 866193219Srwatson return (error); 867193219Srwatson} 868193219Srwatson 869193219Srwatsonint 870193219Srwatsonnetisr_queue(u_int proto, struct mbuf *m) 871193219Srwatson{ 872193219Srwatson 873193219Srwatson return (netisr_queue_src(proto, 0, m)); 874193219Srwatson} 875193219Srwatson 876111888Sjlemon/* 877193219Srwatson * Dispatch a packet for netisr processing, direct dispatch permitted by 878193219Srwatson * calling context. 879111888Sjlemon */ 880111888Sjlemonint 881193219Srwatsonnetisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 882111888Sjlemon{ 883193219Srwatson#ifdef NETISR_LOCKING 884193219Srwatson struct rm_priotracker tracker; 885193219Srwatson#endif 886193219Srwatson struct netisr_workstream *nwsp; 887193219Srwatson struct netisr_work *npwp; 888193219Srwatson int dosignal, error; 889193219Srwatson u_int cpuid; 890193219Srwatson 891193219Srwatson /* 892193219Srwatson * If direct dispatch is entirely disabled, fall back on queueing. 893193219Srwatson */ 894193219Srwatson if (!netisr_direct) 895193219Srwatson return (netisr_queue_src(proto, source, m)); 896193219Srwatson 897193219Srwatson KASSERT(proto < NETISR_MAXPROT, 898193219Srwatson ("%s: invalid proto %u", __func__, proto)); 899193219Srwatson#ifdef NETISR_LOCKING 900193219Srwatson NETISR_RLOCK(&tracker); 901193219Srwatson#endif 902193219Srwatson KASSERT(np[proto].np_handler != NULL, 903193219Srwatson ("%s: invalid proto %u", __func__, proto)); 904193219Srwatson 905193219Srwatson /* 906193219Srwatson * If direct dispatch is forced, then unconditionally dispatch 907193219Srwatson * without a formal CPU selection. Borrow the current CPU's stats, 908193219Srwatson * even if there's no worker on it. In this case we don't update 909193219Srwatson * nws_flags because all netisr processing will be source ordered due 910193219Srwatson * to always being forced to directly dispatch. 911193219Srwatson */ 912193219Srwatson if (netisr_direct_force) { 913195019Srwatson nwsp = DPCPU_PTR(nws); 914193219Srwatson npwp = &nwsp->nws_work[proto]; 915193219Srwatson npwp->nw_dispatched++; 916193219Srwatson npwp->nw_handled++; 917193219Srwatson np[proto].np_handler(m); 918193219Srwatson error = 0; 919193219Srwatson goto out_unlock; 920112011Sjlemon } 921193219Srwatson 922193219Srwatson /* 923193219Srwatson * Otherwise, we execute in a hybrid mode where we will try to direct 924193219Srwatson * dispatch if we're on the right CPU and the netisr worker isn't 925193219Srwatson * already running. 926193219Srwatson */ 927193219Srwatson m = netisr_select_cpuid(&np[proto], source, m, &cpuid); 928193219Srwatson if (m == NULL) { 929193219Srwatson error = ENOBUFS; 930193219Srwatson goto out_unlock; 931193219Srwatson } 932195019Srwatson KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 933193219Srwatson sched_pin(); 934193219Srwatson if (cpuid != curcpu) 935193219Srwatson goto queue_fallback; 936195019Srwatson nwsp = DPCPU_PTR(nws); 937193219Srwatson npwp = &nwsp->nws_work[proto]; 938193219Srwatson 939193219Srwatson /*- 940193219Srwatson * We are willing to direct dispatch only if three conditions hold: 941193219Srwatson * 942193219Srwatson * (1) The netisr worker isn't already running, 943193219Srwatson * (2) Another thread isn't already directly dispatching, and 944193219Srwatson * (3) The netisr hasn't already been woken up. 945193219Srwatson */ 946193219Srwatson NWS_LOCK(nwsp); 947193219Srwatson if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 948193219Srwatson error = netisr_queue_workstream(nwsp, proto, npwp, m, 949193219Srwatson &dosignal); 950195019Srwatson NWS_UNLOCK(nwsp); 951193219Srwatson if (dosignal) 952193219Srwatson NWS_SIGNAL(nwsp); 953193219Srwatson goto out_unpin; 954193219Srwatson } 955193219Srwatson 956193219Srwatson /* 957193219Srwatson * The current thread is now effectively the netisr worker, so set 958193219Srwatson * the dispatching flag to prevent concurrent processing of the 959193219Srwatson * stream from another thread (even the netisr worker), which could 960193219Srwatson * otherwise lead to effective misordering of the stream. 961193219Srwatson */ 962193219Srwatson nwsp->nws_flags |= NWS_DISPATCHING; 963193219Srwatson NWS_UNLOCK(nwsp); 964193219Srwatson np[proto].np_handler(m); 965193219Srwatson NWS_LOCK(nwsp); 966193219Srwatson nwsp->nws_flags &= ~NWS_DISPATCHING; 967193219Srwatson npwp->nw_handled++; 968193219Srwatson npwp->nw_hybrid_dispatched++; 969193219Srwatson 970193219Srwatson /* 971193219Srwatson * If other work was enqueued by another thread while we were direct 972193219Srwatson * dispatching, we need to signal the netisr worker to do that work. 973193219Srwatson * In the future, we might want to do some of that work in the 974193219Srwatson * current thread, rather than trigger further context switches. If 975193219Srwatson * so, we'll want to establish a reasonable bound on the work done in 976193219Srwatson * the "borrowed" context. 977193219Srwatson */ 978193219Srwatson if (nwsp->nws_pendingbits != 0) { 979193219Srwatson nwsp->nws_flags |= NWS_SCHEDULED; 980193219Srwatson dosignal = 1; 981193219Srwatson } else 982193219Srwatson dosignal = 0; 983193219Srwatson NWS_UNLOCK(nwsp); 984193219Srwatson if (dosignal) 985193219Srwatson NWS_SIGNAL(nwsp); 986193219Srwatson error = 0; 987193219Srwatson goto out_unpin; 988193219Srwatson 989193219Srwatsonqueue_fallback: 990193219Srwatson error = netisr_queue_internal(proto, m, cpuid); 991193219Srwatsonout_unpin: 992193219Srwatson sched_unpin(); 993193219Srwatsonout_unlock: 994193219Srwatson#ifdef NETISR_LOCKING 995193219Srwatson NETISR_RUNLOCK(&tracker); 996193219Srwatson#endif 997193219Srwatson return (error); 998111888Sjlemon} 999103781Sjake 1000193219Srwatsonint 1001193219Srwatsonnetisr_dispatch(u_int proto, struct mbuf *m) 1002193219Srwatson{ 1003193219Srwatson 1004193219Srwatson return (netisr_dispatch_src(proto, 0, m)); 1005193219Srwatson} 1006193219Srwatson 1007193219Srwatson#ifdef DEVICE_POLLING 1008193219Srwatson/* 1009193219Srwatson * Kernel polling borrows a netisr thread to run interface polling in; this 1010193219Srwatson * function allows kernel polling to request that the netisr thread be 1011193219Srwatson * scheduled even if no packets are pending for protocols. 1012193219Srwatson */ 1013193219Srwatsonvoid 1014193219Srwatsonnetisr_sched_poll(void) 1015193219Srwatson{ 1016193219Srwatson struct netisr_workstream *nwsp; 1017193219Srwatson 1018195019Srwatson nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1019193219Srwatson NWS_SIGNAL(nwsp); 1020193219Srwatson} 1021193219Srwatson#endif 1022193219Srwatson 1023103781Sjakestatic void 1024193219Srwatsonnetisr_start_swi(u_int cpuid, struct pcpu *pc) 1025103781Sjake{ 1026193219Srwatson char swiname[12]; 1027193219Srwatson struct netisr_workstream *nwsp; 1028193219Srwatson int error; 1029193219Srwatson 1030195019Srwatson KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1031195019Srwatson 1032195019Srwatson nwsp = DPCPU_ID_PTR(cpuid, nws); 1033193219Srwatson mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1034193219Srwatson nwsp->nws_cpu = cpuid; 1035193219Srwatson snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1036193219Srwatson error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1037193219Srwatson SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie); 1038193219Srwatson if (error) 1039193219Srwatson panic("%s: swi_add %d", __func__, error); 1040193219Srwatson pc->pc_netisr = nwsp->nws_intr_event; 1041193219Srwatson if (netisr_bindthreads) { 1042193219Srwatson error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1043193219Srwatson if (error != 0) 1044193219Srwatson printf("%s: cpu %u: intr_event_bind: %d", __func__, 1045193219Srwatson cpuid, error); 1046193219Srwatson } 1047193219Srwatson NETISR_WLOCK(); 1048193219Srwatson nws_array[nws_count] = nwsp->nws_cpu; 1049193219Srwatson nws_count++; 1050193219Srwatson NETISR_WUNLOCK(); 1051193219Srwatson} 1052193219Srwatson 1053193219Srwatson/* 1054193219Srwatson * Initialize the netisr subsystem. We rely on BSS and static initialization 1055193219Srwatson * of most fields in global data structures. 1056193219Srwatson * 1057193219Srwatson * Start a worker thread for the boot CPU so that we can support network 1058193219Srwatson * traffic immediately in case the network stack is used before additional 1059193219Srwatson * CPUs are started (for example, diskless boot). 1060193219Srwatson */ 1061193219Srwatsonstatic void 1062193219Srwatsonnetisr_init(void *arg) 1063193219Srwatson{ 1064193219Srwatson 1065193219Srwatson KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__)); 1066193219Srwatson 1067193219Srwatson NETISR_LOCK_INIT(); 1068195078Srwatson if (netisr_maxthreads < 1) 1069193219Srwatson netisr_maxthreads = 1; 1070195078Srwatson if (netisr_maxthreads > mp_ncpus) { 1071195078Srwatson printf("netisr2: forcing maxthreads from %d to %d\n", 1072195078Srwatson netisr_maxthreads, mp_ncpus); 1073195078Srwatson netisr_maxthreads = mp_ncpus; 1074193230Srwatson } 1075193230Srwatson if (netisr_defaultqlimit > netisr_maxqlimit) { 1076195078Srwatson printf("netisr2: forcing defaultqlimit from %d to %d\n", 1077195078Srwatson netisr_defaultqlimit, netisr_maxqlimit); 1078193219Srwatson netisr_defaultqlimit = netisr_maxqlimit; 1079193230Srwatson } 1080103781Sjake#ifdef DEVICE_POLLING 1081193219Srwatson /* 1082193219Srwatson * The device polling code is not yet aware of how to deal with 1083193219Srwatson * multiple netisr threads, so for the time being compiling in device 1084193219Srwatson * polling disables parallel netisr workers. 1085193219Srwatson */ 1086193230Srwatson if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1087193230Srwatson printf("netisr2: forcing maxthreads to 1 and bindthreads to " 1088193230Srwatson "0 for device polling\n"); 1089193230Srwatson netisr_maxthreads = 1; 1090193230Srwatson netisr_bindthreads = 0; 1091193230Srwatson } 1092103781Sjake#endif 1093111888Sjlemon 1094193219Srwatson netisr_start_swi(curcpu, pcpu_find(curcpu)); 1095103781Sjake} 1096193219SrwatsonSYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1097103781Sjake 1098193219Srwatson/* 1099193219Srwatson * Start worker threads for additional CPUs. No attempt to gracefully handle 1100193219Srwatson * work reassignment, we don't yet support dynamic reconfiguration. 1101193219Srwatson */ 1102103781Sjakestatic void 1103193219Srwatsonnetisr_start(void *arg) 1104103781Sjake{ 1105193219Srwatson struct pcpu *pc; 1106103781Sjake 1107193219Srwatson SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { 1108193219Srwatson if (nws_count >= netisr_maxthreads) 1109193219Srwatson break; 1110193219Srwatson /* XXXRW: Is skipping absent CPUs still required here? */ 1111193219Srwatson if (CPU_ABSENT(pc->pc_cpuid)) 1112193219Srwatson continue; 1113193219Srwatson /* Worker will already be present for boot CPU. */ 1114193219Srwatson if (pc->pc_netisr != NULL) 1115193219Srwatson continue; 1116193219Srwatson netisr_start_swi(pc->pc_cpuid, pc); 1117193219Srwatson } 1118103781Sjake} 1119193219SrwatsonSYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1120193219Srwatson 1121193219Srwatson#ifdef DDB 1122193219SrwatsonDB_SHOW_COMMAND(netisr, db_show_netisr) 1123193219Srwatson{ 1124193219Srwatson struct netisr_workstream *nwsp; 1125193219Srwatson struct netisr_work *nwp; 1126193219Srwatson int first, proto; 1127195019Srwatson u_int cpuid; 1128193219Srwatson 1129193219Srwatson db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1130193219Srwatson "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 1131195078Srwatson for (cpuid = 0; cpuid <= mp_maxid; cpuid++) { 1132195019Srwatson if (CPU_ABSENT(cpuid)) 1133195019Srwatson continue; 1134195019Srwatson nwsp = DPCPU_ID_PTR(cpuid, nws); 1135193219Srwatson if (nwsp->nws_intr_event == NULL) 1136193219Srwatson continue; 1137193219Srwatson first = 1; 1138193219Srwatson for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1139193219Srwatson if (np[proto].np_handler == NULL) 1140193219Srwatson continue; 1141193219Srwatson nwp = &nwsp->nws_work[proto]; 1142193219Srwatson if (first) { 1143195019Srwatson db_printf("%3d ", cpuid); 1144193219Srwatson first = 0; 1145193219Srwatson } else 1146193219Srwatson db_printf("%3s ", ""); 1147193219Srwatson db_printf( 1148193219Srwatson "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1149193219Srwatson np[proto].np_name, nwp->nw_len, 1150193219Srwatson nwp->nw_watermark, nwp->nw_qlimit, 1151193219Srwatson nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1152193219Srwatson nwp->nw_qdrops, nwp->nw_queued); 1153193219Srwatson } 1154193219Srwatson } 1155193219Srwatson} 1156193219Srwatson#endif 1157