netisr.c revision 275358
1/*- 2 * Copyright (c) 2007-2009 Robert N. M. Watson 3 * Copyright (c) 2010-2011 Juniper Networks, Inc. 4 * All rights reserved. 5 * 6 * This software was developed by Robert N. M. Watson under contract 7 * to Juniper Networks, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD: head/sys/net/netisr.c 275358 2014-12-01 11:45:24Z hselasky $"); 33 34/* 35 * netisr is a packet dispatch service, allowing synchronous (directly 36 * dispatched) and asynchronous (deferred dispatch) processing of packets by 37 * registered protocol handlers. Callers pass a protocol identifier and 38 * packet to netisr, along with a direct dispatch hint, and work will either 39 * be immediately processed by the registered handler, or passed to a 40 * software interrupt (SWI) thread for deferred dispatch. Callers will 41 * generally select one or the other based on: 42 * 43 * - Whether directly dispatching a netisr handler lead to code reentrance or 44 * lock recursion, such as entering the socket code from the socket code. 45 * - Whether directly dispatching a netisr handler lead to recursive 46 * processing, such as when decapsulating several wrapped layers of tunnel 47 * information (IPSEC within IPSEC within ...). 48 * 49 * Maintaining ordering for protocol streams is a critical design concern. 50 * Enforcing ordering limits the opportunity for concurrency, but maintains 51 * the strong ordering requirements found in some protocols, such as TCP. Of 52 * related concern is CPU affinity--it is desirable to process all data 53 * associated with a particular stream on the same CPU over time in order to 54 * avoid acquiring locks associated with the connection on different CPUs, 55 * keep connection data in one cache, and to generally encourage associated 56 * user threads to live on the same CPU as the stream. It's also desirable 57 * to avoid lock migration and contention where locks are associated with 58 * more than one flow. 59 * 60 * netisr supports several policy variations, represented by the 61 * NETISR_POLICY_* constants, allowing protocols to play various roles in 62 * identifying flows, assigning work to CPUs, etc. These are described in 63 * netisr.h. 64 */ 65 66#include "opt_ddb.h" 67#include "opt_device_polling.h" 68 69#include <sys/param.h> 70#include <sys/bus.h> 71#include <sys/kernel.h> 72#include <sys/kthread.h> 73#include <sys/interrupt.h> 74#include <sys/lock.h> 75#include <sys/mbuf.h> 76#include <sys/mutex.h> 77#include <sys/pcpu.h> 78#include <sys/proc.h> 79#include <sys/rmlock.h> 80#include <sys/sched.h> 81#include <sys/smp.h> 82#include <sys/socket.h> 83#include <sys/sysctl.h> 84#include <sys/systm.h> 85 86#ifdef DDB 87#include <ddb/ddb.h> 88#endif 89 90#define _WANT_NETISR_INTERNAL /* Enable definitions from netisr_internal.h */ 91#include <net/if.h> 92#include <net/if_var.h> 93#include <net/netisr.h> 94#include <net/netisr_internal.h> 95#include <net/vnet.h> 96 97/*- 98 * Synchronize use and modification of the registered netisr data structures; 99 * acquire a read lock while modifying the set of registered protocols to 100 * prevent partially registered or unregistered protocols from being run. 101 * 102 * The following data structures and fields are protected by this lock: 103 * 104 * - The netisr_proto array, including all fields of struct netisr_proto. 105 * - The nws array, including all fields of struct netisr_worker. 106 * - The nws_array array. 107 * 108 * Note: the NETISR_LOCKING define controls whether read locks are acquired 109 * in packet processing paths requiring netisr registration stability. This 110 * is disabled by default as it can lead to measurable performance 111 * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 112 * because netisr registration and unregistration is extremely rare at 113 * runtime. If it becomes more common, this decision should be revisited. 114 * 115 * XXXRW: rmlocks don't support assertions. 116 */ 117static struct rmlock netisr_rmlock; 118#define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 119 RM_NOWITNESS) 120#define NETISR_LOCK_ASSERT() 121#define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 122#define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 123#define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 124#define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 125/* #define NETISR_LOCKING */ 126 127static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); 128 129/*- 130 * Three global direct dispatch policies are supported: 131 * 132 * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of 133 * context (may be overriden by protocols). 134 * 135 * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, 136 * and we're running on the CPU the work would be performed on, then direct 137 * dispatch it if it wouldn't violate ordering constraints on the workstream. 138 * 139 * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, 140 * always direct dispatch. (The default.) 141 * 142 * Notice that changing the global policy could lead to short periods of 143 * misordered processing, but this is considered acceptable as compared to 144 * the complexity of enforcing ordering during policy changes. Protocols can 145 * override the global policy (when they're not doing that, they select 146 * NETISR_DISPATCH_DEFAULT). 147 */ 148#define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT 149#define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ 150static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; 151static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); 152SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RWTUN, 153 0, 0, sysctl_netisr_dispatch_policy, "A", 154 "netisr dispatch policy"); 155 156/* 157 * Allow the administrator to limit the number of threads (CPUs) to use for 158 * netisr. We don't check netisr_maxthreads before creating the thread for 159 * CPU 0, so in practice we ignore values <= 1. This must be set at boot. 160 * We will create at most one thread per CPU. 161 */ 162static int netisr_maxthreads = -1; /* Max number of threads. */ 163SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN, 164 &netisr_maxthreads, 0, 165 "Use at most this many CPUs for netisr processing"); 166 167static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 168SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN, 169 &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 170 171/* 172 * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit, 173 * both for initial configuration and later modification using 174 * netisr_setqlimit(). 175 */ 176#define NETISR_DEFAULT_MAXQLIMIT 10240 177static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 178SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN, 179 &netisr_maxqlimit, 0, 180 "Maximum netisr per-protocol, per-CPU queue depth."); 181 182/* 183 * The default per-workstream mbuf queue limit for protocols that don't 184 * initialize the nh_qlimit field of their struct netisr_handler. If this is 185 * set above netisr_maxqlimit, we truncate it to the maximum during boot. 186 */ 187#define NETISR_DEFAULT_DEFAULTQLIMIT 256 188static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 189SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN, 190 &netisr_defaultqlimit, 0, 191 "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 192 193/* 194 * Store and export the compile-time constant NETISR_MAXPROT limit on the 195 * number of protocols that can register with netisr at a time. This is 196 * required for crashdump analysis, as it sizes netisr_proto[]. 197 */ 198static u_int netisr_maxprot = NETISR_MAXPROT; 199SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD, 200 &netisr_maxprot, 0, 201 "Compile-time limit on the number of protocols supported by netisr."); 202 203/* 204 * The netisr_proto array describes all registered protocols, indexed by 205 * protocol number. See netisr_internal.h for more details. 206 */ 207static struct netisr_proto netisr_proto[NETISR_MAXPROT]; 208 209/* 210 * Per-CPU workstream data. See netisr_internal.h for more details. 211 */ 212DPCPU_DEFINE(struct netisr_workstream, nws); 213 214/* 215 * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 216 * accessing workstreams. This allows constructions of the form 217 * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 218 */ 219static u_int nws_array[MAXCPU]; 220 221/* 222 * Number of registered workstreams. Will be at most the number of running 223 * CPUs once fully started. 224 */ 225static u_int nws_count; 226SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 227 &nws_count, 0, "Number of extant netisr threads."); 228 229/* 230 * Synchronization for each workstream: a mutex protects all mutable fields 231 * in each stream, including per-protocol state (mbuf queues). The SWI is 232 * woken up if asynchronous dispatch is required. 233 */ 234#define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 235#define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 236#define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 237#define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 238 239/* 240 * Utility routines for protocols that implement their own mapping of flows 241 * to CPUs. 242 */ 243u_int 244netisr_get_cpucount(void) 245{ 246 247 return (nws_count); 248} 249 250u_int 251netisr_get_cpuid(u_int cpunumber) 252{ 253 254 KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber, 255 nws_count)); 256 257 return (nws_array[cpunumber]); 258} 259 260/* 261 * The default implementation of flow -> CPU ID mapping. 262 * 263 * Non-static so that protocols can use it to map their own work to specific 264 * CPUs in a manner consistent to netisr for affinity purposes. 265 */ 266u_int 267netisr_default_flow2cpu(u_int flowid) 268{ 269 270 return (nws_array[flowid % nws_count]); 271} 272 273/* 274 * Dispatch tunable and sysctl configuration. 275 */ 276struct netisr_dispatch_table_entry { 277 u_int ndte_policy; 278 const char *ndte_policy_str; 279}; 280static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = { 281 { NETISR_DISPATCH_DEFAULT, "default" }, 282 { NETISR_DISPATCH_DEFERRED, "deferred" }, 283 { NETISR_DISPATCH_HYBRID, "hybrid" }, 284 { NETISR_DISPATCH_DIRECT, "direct" }, 285}; 286static const u_int netisr_dispatch_table_len = 287 (sizeof(netisr_dispatch_table) / sizeof(netisr_dispatch_table[0])); 288 289static void 290netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer, 291 u_int buflen) 292{ 293 const struct netisr_dispatch_table_entry *ndtep; 294 const char *str; 295 u_int i; 296 297 str = "unknown"; 298 for (i = 0; i < netisr_dispatch_table_len; i++) { 299 ndtep = &netisr_dispatch_table[i]; 300 if (ndtep->ndte_policy == dispatch_policy) { 301 str = ndtep->ndte_policy_str; 302 break; 303 } 304 } 305 snprintf(buffer, buflen, "%s", str); 306} 307 308static int 309netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp) 310{ 311 const struct netisr_dispatch_table_entry *ndtep; 312 u_int i; 313 314 for (i = 0; i < netisr_dispatch_table_len; i++) { 315 ndtep = &netisr_dispatch_table[i]; 316 if (strcmp(ndtep->ndte_policy_str, str) == 0) { 317 *dispatch_policyp = ndtep->ndte_policy; 318 return (0); 319 } 320 } 321 return (EINVAL); 322} 323 324static int 325sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS) 326{ 327 char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; 328 u_int dispatch_policy; 329 int error; 330 331 netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp, 332 sizeof(tmp)); 333 error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req); 334 if (error == 0 && req->newptr != NULL) { 335 error = netisr_dispatch_policy_from_str(tmp, 336 &dispatch_policy); 337 if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT) 338 error = EINVAL; 339 if (error == 0) 340 netisr_dispatch_policy = dispatch_policy; 341 } 342 return (error); 343} 344 345/* 346 * Register a new netisr handler, which requires initializing per-protocol 347 * fields for each workstream. All netisr work is briefly suspended while 348 * the protocol is installed. 349 */ 350void 351netisr_register(const struct netisr_handler *nhp) 352{ 353 struct netisr_work *npwp; 354 const char *name; 355 u_int i, proto; 356 357 proto = nhp->nh_proto; 358 name = nhp->nh_name; 359 360 /* 361 * Test that the requested registration is valid. 362 */ 363 KASSERT(nhp->nh_name != NULL, 364 ("%s: nh_name NULL for %u", __func__, proto)); 365 KASSERT(nhp->nh_handler != NULL, 366 ("%s: nh_handler NULL for %s", __func__, name)); 367 KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 368 nhp->nh_policy == NETISR_POLICY_FLOW || 369 nhp->nh_policy == NETISR_POLICY_CPU, 370 ("%s: unsupported nh_policy %u for %s", __func__, 371 nhp->nh_policy, name)); 372 KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 373 nhp->nh_m2flow == NULL, 374 ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 375 name)); 376 KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 377 ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 378 name)); 379 KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 380 ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 381 name)); 382 KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT || 383 nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED || 384 nhp->nh_dispatch == NETISR_DISPATCH_HYBRID || 385 nhp->nh_dispatch == NETISR_DISPATCH_DIRECT, 386 ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch)); 387 388 KASSERT(proto < NETISR_MAXPROT, 389 ("%s(%u, %s): protocol too big", __func__, proto, name)); 390 391 /* 392 * Test that no existing registration exists for this protocol. 393 */ 394 NETISR_WLOCK(); 395 KASSERT(netisr_proto[proto].np_name == NULL, 396 ("%s(%u, %s): name present", __func__, proto, name)); 397 KASSERT(netisr_proto[proto].np_handler == NULL, 398 ("%s(%u, %s): handler present", __func__, proto, name)); 399 400 netisr_proto[proto].np_name = name; 401 netisr_proto[proto].np_handler = nhp->nh_handler; 402 netisr_proto[proto].np_m2flow = nhp->nh_m2flow; 403 netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid; 404 netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu; 405 if (nhp->nh_qlimit == 0) 406 netisr_proto[proto].np_qlimit = netisr_defaultqlimit; 407 else if (nhp->nh_qlimit > netisr_maxqlimit) { 408 printf("%s: %s requested queue limit %u capped to " 409 "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 410 netisr_maxqlimit); 411 netisr_proto[proto].np_qlimit = netisr_maxqlimit; 412 } else 413 netisr_proto[proto].np_qlimit = nhp->nh_qlimit; 414 netisr_proto[proto].np_policy = nhp->nh_policy; 415 netisr_proto[proto].np_dispatch = nhp->nh_dispatch; 416 CPU_FOREACH(i) { 417 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 418 bzero(npwp, sizeof(*npwp)); 419 npwp->nw_qlimit = netisr_proto[proto].np_qlimit; 420 } 421 NETISR_WUNLOCK(); 422} 423 424/* 425 * Clear drop counters across all workstreams for a protocol. 426 */ 427void 428netisr_clearqdrops(const struct netisr_handler *nhp) 429{ 430 struct netisr_work *npwp; 431#ifdef INVARIANTS 432 const char *name; 433#endif 434 u_int i, proto; 435 436 proto = nhp->nh_proto; 437#ifdef INVARIANTS 438 name = nhp->nh_name; 439#endif 440 KASSERT(proto < NETISR_MAXPROT, 441 ("%s(%u): protocol too big for %s", __func__, proto, name)); 442 443 NETISR_WLOCK(); 444 KASSERT(netisr_proto[proto].np_handler != NULL, 445 ("%s(%u): protocol not registered for %s", __func__, proto, 446 name)); 447 448 CPU_FOREACH(i) { 449 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 450 npwp->nw_qdrops = 0; 451 } 452 NETISR_WUNLOCK(); 453} 454 455/* 456 * Query current drop counters across all workstreams for a protocol. 457 */ 458void 459netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 460{ 461 struct netisr_work *npwp; 462 struct rm_priotracker tracker; 463#ifdef INVARIANTS 464 const char *name; 465#endif 466 u_int i, proto; 467 468 *qdropp = 0; 469 proto = nhp->nh_proto; 470#ifdef INVARIANTS 471 name = nhp->nh_name; 472#endif 473 KASSERT(proto < NETISR_MAXPROT, 474 ("%s(%u): protocol too big for %s", __func__, proto, name)); 475 476 NETISR_RLOCK(&tracker); 477 KASSERT(netisr_proto[proto].np_handler != NULL, 478 ("%s(%u): protocol not registered for %s", __func__, proto, 479 name)); 480 481 CPU_FOREACH(i) { 482 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 483 *qdropp += npwp->nw_qdrops; 484 } 485 NETISR_RUNLOCK(&tracker); 486} 487 488/* 489 * Query current per-workstream queue limit for a protocol. 490 */ 491void 492netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 493{ 494 struct rm_priotracker tracker; 495#ifdef INVARIANTS 496 const char *name; 497#endif 498 u_int proto; 499 500 proto = nhp->nh_proto; 501#ifdef INVARIANTS 502 name = nhp->nh_name; 503#endif 504 KASSERT(proto < NETISR_MAXPROT, 505 ("%s(%u): protocol too big for %s", __func__, proto, name)); 506 507 NETISR_RLOCK(&tracker); 508 KASSERT(netisr_proto[proto].np_handler != NULL, 509 ("%s(%u): protocol not registered for %s", __func__, proto, 510 name)); 511 *qlimitp = netisr_proto[proto].np_qlimit; 512 NETISR_RUNLOCK(&tracker); 513} 514 515/* 516 * Update the queue limit across per-workstream queues for a protocol. We 517 * simply change the limits, and don't drain overflowed packets as they will 518 * (hopefully) take care of themselves shortly. 519 */ 520int 521netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 522{ 523 struct netisr_work *npwp; 524#ifdef INVARIANTS 525 const char *name; 526#endif 527 u_int i, proto; 528 529 if (qlimit > netisr_maxqlimit) 530 return (EINVAL); 531 532 proto = nhp->nh_proto; 533#ifdef INVARIANTS 534 name = nhp->nh_name; 535#endif 536 KASSERT(proto < NETISR_MAXPROT, 537 ("%s(%u): protocol too big for %s", __func__, proto, name)); 538 539 NETISR_WLOCK(); 540 KASSERT(netisr_proto[proto].np_handler != NULL, 541 ("%s(%u): protocol not registered for %s", __func__, proto, 542 name)); 543 544 netisr_proto[proto].np_qlimit = qlimit; 545 CPU_FOREACH(i) { 546 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 547 npwp->nw_qlimit = qlimit; 548 } 549 NETISR_WUNLOCK(); 550 return (0); 551} 552 553/* 554 * Drain all packets currently held in a particular protocol work queue. 555 */ 556static void 557netisr_drain_proto(struct netisr_work *npwp) 558{ 559 struct mbuf *m; 560 561 /* 562 * We would assert the lock on the workstream but it's not passed in. 563 */ 564 while ((m = npwp->nw_head) != NULL) { 565 npwp->nw_head = m->m_nextpkt; 566 m->m_nextpkt = NULL; 567 if (npwp->nw_head == NULL) 568 npwp->nw_tail = NULL; 569 npwp->nw_len--; 570 m_freem(m); 571 } 572 KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 573 KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 574} 575 576/* 577 * Remove the registration of a network protocol, which requires clearing 578 * per-protocol fields across all workstreams, including freeing all mbufs in 579 * the queues at time of unregister. All work in netisr is briefly suspended 580 * while this takes place. 581 */ 582void 583netisr_unregister(const struct netisr_handler *nhp) 584{ 585 struct netisr_work *npwp; 586#ifdef INVARIANTS 587 const char *name; 588#endif 589 u_int i, proto; 590 591 proto = nhp->nh_proto; 592#ifdef INVARIANTS 593 name = nhp->nh_name; 594#endif 595 KASSERT(proto < NETISR_MAXPROT, 596 ("%s(%u): protocol too big for %s", __func__, proto, name)); 597 598 NETISR_WLOCK(); 599 KASSERT(netisr_proto[proto].np_handler != NULL, 600 ("%s(%u): protocol not registered for %s", __func__, proto, 601 name)); 602 603 netisr_proto[proto].np_name = NULL; 604 netisr_proto[proto].np_handler = NULL; 605 netisr_proto[proto].np_m2flow = NULL; 606 netisr_proto[proto].np_m2cpuid = NULL; 607 netisr_proto[proto].np_qlimit = 0; 608 netisr_proto[proto].np_policy = 0; 609 CPU_FOREACH(i) { 610 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 611 netisr_drain_proto(npwp); 612 bzero(npwp, sizeof(*npwp)); 613 } 614 NETISR_WUNLOCK(); 615} 616 617/* 618 * Compose the global and per-protocol policies on dispatch, and return the 619 * dispatch policy to use. 620 */ 621static u_int 622netisr_get_dispatch(struct netisr_proto *npp) 623{ 624 625 /* 626 * Protocol-specific configuration overrides the global default. 627 */ 628 if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT) 629 return (npp->np_dispatch); 630 return (netisr_dispatch_policy); 631} 632 633/* 634 * Look up the workstream given a packet and source identifier. Do this by 635 * checking the protocol's policy, and optionally call out to the protocol 636 * for assistance if required. 637 */ 638static struct mbuf * 639netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy, 640 uintptr_t source, struct mbuf *m, u_int *cpuidp) 641{ 642 struct ifnet *ifp; 643 u_int policy; 644 645 NETISR_LOCK_ASSERT(); 646 647 /* 648 * In the event we have only one worker, shortcut and deliver to it 649 * without further ado. 650 */ 651 if (nws_count == 1) { 652 *cpuidp = nws_array[0]; 653 return (m); 654 } 655 656 /* 657 * What happens next depends on the policy selected by the protocol. 658 * If we want to support per-interface policies, we should do that 659 * here first. 660 */ 661 policy = npp->np_policy; 662 if (policy == NETISR_POLICY_CPU) { 663 m = npp->np_m2cpuid(m, source, cpuidp); 664 if (m == NULL) 665 return (NULL); 666 667 /* 668 * It's possible for a protocol not to have a good idea about 669 * where to process a packet, in which case we fall back on 670 * the netisr code to decide. In the hybrid case, return the 671 * current CPU ID, which will force an immediate direct 672 * dispatch. In the queued case, fall back on the SOURCE 673 * policy. 674 */ 675 if (*cpuidp != NETISR_CPUID_NONE) 676 return (m); 677 if (dispatch_policy == NETISR_DISPATCH_HYBRID) { 678 *cpuidp = curcpu; 679 return (m); 680 } 681 policy = NETISR_POLICY_SOURCE; 682 } 683 684 if (policy == NETISR_POLICY_FLOW) { 685 if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE && 686 npp->np_m2flow != NULL) { 687 m = npp->np_m2flow(m, source); 688 if (m == NULL) 689 return (NULL); 690 } 691 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 692 *cpuidp = 693 netisr_default_flow2cpu(m->m_pkthdr.flowid); 694 return (m); 695 } 696 policy = NETISR_POLICY_SOURCE; 697 } 698 699 KASSERT(policy == NETISR_POLICY_SOURCE, 700 ("%s: invalid policy %u for %s", __func__, npp->np_policy, 701 npp->np_name)); 702 703 ifp = m->m_pkthdr.rcvif; 704 if (ifp != NULL) 705 *cpuidp = nws_array[(ifp->if_index + source) % nws_count]; 706 else 707 *cpuidp = nws_array[source % nws_count]; 708 return (m); 709} 710 711/* 712 * Process packets associated with a workstream and protocol. For reasons of 713 * fairness, we process up to one complete netisr queue at a time, moving the 714 * queue to a stack-local queue for processing, but do not loop refreshing 715 * from the global queue. The caller is responsible for deciding whether to 716 * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 717 * locked on entry and relocked before return, but will be released while 718 * processing. The number of packets processed is returned. 719 */ 720static u_int 721netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 722{ 723 struct netisr_work local_npw, *npwp; 724 u_int handled; 725 struct mbuf *m; 726 727 NETISR_LOCK_ASSERT(); 728 NWS_LOCK_ASSERT(nwsp); 729 730 KASSERT(nwsp->nws_flags & NWS_RUNNING, 731 ("%s(%u): not running", __func__, proto)); 732 KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 733 ("%s(%u): invalid proto\n", __func__, proto)); 734 735 npwp = &nwsp->nws_work[proto]; 736 if (npwp->nw_len == 0) 737 return (0); 738 739 /* 740 * Move the global work queue to a thread-local work queue. 741 * 742 * Notice that this means the effective maximum length of the queue 743 * is actually twice that of the maximum queue length specified in 744 * the protocol registration call. 745 */ 746 handled = npwp->nw_len; 747 local_npw = *npwp; 748 npwp->nw_head = NULL; 749 npwp->nw_tail = NULL; 750 npwp->nw_len = 0; 751 nwsp->nws_pendingbits &= ~(1 << proto); 752 NWS_UNLOCK(nwsp); 753 while ((m = local_npw.nw_head) != NULL) { 754 local_npw.nw_head = m->m_nextpkt; 755 m->m_nextpkt = NULL; 756 if (local_npw.nw_head == NULL) 757 local_npw.nw_tail = NULL; 758 local_npw.nw_len--; 759 VNET_ASSERT(m->m_pkthdr.rcvif != NULL, 760 ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); 761 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 762 netisr_proto[proto].np_handler(m); 763 CURVNET_RESTORE(); 764 } 765 KASSERT(local_npw.nw_len == 0, 766 ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 767 if (netisr_proto[proto].np_drainedcpu) 768 netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu); 769 NWS_LOCK(nwsp); 770 npwp->nw_handled += handled; 771 return (handled); 772} 773 774/* 775 * SWI handler for netisr -- processes packets in a set of workstreams that 776 * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 777 * being direct dispatched, go back to sleep and wait for the dispatching 778 * thread to wake us up again. 779 */ 780static void 781swi_net(void *arg) 782{ 783#ifdef NETISR_LOCKING 784 struct rm_priotracker tracker; 785#endif 786 struct netisr_workstream *nwsp; 787 u_int bits, prot; 788 789 nwsp = arg; 790 791#ifdef DEVICE_POLLING 792 KASSERT(nws_count == 1, 793 ("%s: device_polling but nws_count != 1", __func__)); 794 netisr_poll(); 795#endif 796#ifdef NETISR_LOCKING 797 NETISR_RLOCK(&tracker); 798#endif 799 NWS_LOCK(nwsp); 800 KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 801 if (nwsp->nws_flags & NWS_DISPATCHING) 802 goto out; 803 nwsp->nws_flags |= NWS_RUNNING; 804 nwsp->nws_flags &= ~NWS_SCHEDULED; 805 while ((bits = nwsp->nws_pendingbits) != 0) { 806 while ((prot = ffs(bits)) != 0) { 807 prot--; 808 bits &= ~(1 << prot); 809 (void)netisr_process_workstream_proto(nwsp, prot); 810 } 811 } 812 nwsp->nws_flags &= ~NWS_RUNNING; 813out: 814 NWS_UNLOCK(nwsp); 815#ifdef NETISR_LOCKING 816 NETISR_RUNLOCK(&tracker); 817#endif 818#ifdef DEVICE_POLLING 819 netisr_pollmore(); 820#endif 821} 822 823static int 824netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 825 struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 826{ 827 828 NWS_LOCK_ASSERT(nwsp); 829 830 *dosignalp = 0; 831 if (npwp->nw_len < npwp->nw_qlimit) { 832 m->m_nextpkt = NULL; 833 if (npwp->nw_head == NULL) { 834 npwp->nw_head = m; 835 npwp->nw_tail = m; 836 } else { 837 npwp->nw_tail->m_nextpkt = m; 838 npwp->nw_tail = m; 839 } 840 npwp->nw_len++; 841 if (npwp->nw_len > npwp->nw_watermark) 842 npwp->nw_watermark = npwp->nw_len; 843 844 /* 845 * We must set the bit regardless of NWS_RUNNING, so that 846 * swi_net() keeps calling netisr_process_workstream_proto(). 847 */ 848 nwsp->nws_pendingbits |= (1 << proto); 849 if (!(nwsp->nws_flags & 850 (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 851 nwsp->nws_flags |= NWS_SCHEDULED; 852 *dosignalp = 1; /* Defer until unlocked. */ 853 } 854 npwp->nw_queued++; 855 return (0); 856 } else { 857 m_freem(m); 858 npwp->nw_qdrops++; 859 return (ENOBUFS); 860 } 861} 862 863static int 864netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 865{ 866 struct netisr_workstream *nwsp; 867 struct netisr_work *npwp; 868 int dosignal, error; 869 870#ifdef NETISR_LOCKING 871 NETISR_LOCK_ASSERT(); 872#endif 873 KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 874 cpuid, mp_maxid)); 875 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 876 877 dosignal = 0; 878 error = 0; 879 nwsp = DPCPU_ID_PTR(cpuid, nws); 880 npwp = &nwsp->nws_work[proto]; 881 NWS_LOCK(nwsp); 882 error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 883 NWS_UNLOCK(nwsp); 884 if (dosignal) 885 NWS_SIGNAL(nwsp); 886 return (error); 887} 888 889int 890netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 891{ 892#ifdef NETISR_LOCKING 893 struct rm_priotracker tracker; 894#endif 895 u_int cpuid; 896 int error; 897 898 KASSERT(proto < NETISR_MAXPROT, 899 ("%s: invalid proto %u", __func__, proto)); 900 901#ifdef NETISR_LOCKING 902 NETISR_RLOCK(&tracker); 903#endif 904 KASSERT(netisr_proto[proto].np_handler != NULL, 905 ("%s: invalid proto %u", __func__, proto)); 906 907 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED, 908 source, m, &cpuid); 909 if (m != NULL) { 910 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 911 cpuid)); 912 error = netisr_queue_internal(proto, m, cpuid); 913 } else 914 error = ENOBUFS; 915#ifdef NETISR_LOCKING 916 NETISR_RUNLOCK(&tracker); 917#endif 918 return (error); 919} 920 921int 922netisr_queue(u_int proto, struct mbuf *m) 923{ 924 925 return (netisr_queue_src(proto, 0, m)); 926} 927 928/* 929 * Dispatch a packet for netisr processing; direct dispatch is permitted by 930 * calling context. 931 */ 932int 933netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 934{ 935#ifdef NETISR_LOCKING 936 struct rm_priotracker tracker; 937#endif 938 struct netisr_workstream *nwsp; 939 struct netisr_proto *npp; 940 struct netisr_work *npwp; 941 int dosignal, error; 942 u_int cpuid, dispatch_policy; 943 944 KASSERT(proto < NETISR_MAXPROT, 945 ("%s: invalid proto %u", __func__, proto)); 946#ifdef NETISR_LOCKING 947 NETISR_RLOCK(&tracker); 948#endif 949 npp = &netisr_proto[proto]; 950 KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__, 951 proto)); 952 953 dispatch_policy = netisr_get_dispatch(npp); 954 if (dispatch_policy == NETISR_DISPATCH_DEFERRED) 955 return (netisr_queue_src(proto, source, m)); 956 957 /* 958 * If direct dispatch is forced, then unconditionally dispatch 959 * without a formal CPU selection. Borrow the current CPU's stats, 960 * even if there's no worker on it. In this case we don't update 961 * nws_flags because all netisr processing will be source ordered due 962 * to always being forced to directly dispatch. 963 */ 964 if (dispatch_policy == NETISR_DISPATCH_DIRECT) { 965 nwsp = DPCPU_PTR(nws); 966 npwp = &nwsp->nws_work[proto]; 967 npwp->nw_dispatched++; 968 npwp->nw_handled++; 969 netisr_proto[proto].np_handler(m); 970 error = 0; 971 goto out_unlock; 972 } 973 974 KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID, 975 ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy)); 976 977 /* 978 * Otherwise, we execute in a hybrid mode where we will try to direct 979 * dispatch if we're on the right CPU and the netisr worker isn't 980 * already running. 981 */ 982 sched_pin(); 983 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID, 984 source, m, &cpuid); 985 if (m == NULL) { 986 error = ENOBUFS; 987 goto out_unpin; 988 } 989 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 990 if (cpuid != curcpu) 991 goto queue_fallback; 992 nwsp = DPCPU_PTR(nws); 993 npwp = &nwsp->nws_work[proto]; 994 995 /*- 996 * We are willing to direct dispatch only if three conditions hold: 997 * 998 * (1) The netisr worker isn't already running, 999 * (2) Another thread isn't already directly dispatching, and 1000 * (3) The netisr hasn't already been woken up. 1001 */ 1002 NWS_LOCK(nwsp); 1003 if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 1004 error = netisr_queue_workstream(nwsp, proto, npwp, m, 1005 &dosignal); 1006 NWS_UNLOCK(nwsp); 1007 if (dosignal) 1008 NWS_SIGNAL(nwsp); 1009 goto out_unpin; 1010 } 1011 1012 /* 1013 * The current thread is now effectively the netisr worker, so set 1014 * the dispatching flag to prevent concurrent processing of the 1015 * stream from another thread (even the netisr worker), which could 1016 * otherwise lead to effective misordering of the stream. 1017 */ 1018 nwsp->nws_flags |= NWS_DISPATCHING; 1019 NWS_UNLOCK(nwsp); 1020 netisr_proto[proto].np_handler(m); 1021 NWS_LOCK(nwsp); 1022 nwsp->nws_flags &= ~NWS_DISPATCHING; 1023 npwp->nw_handled++; 1024 npwp->nw_hybrid_dispatched++; 1025 1026 /* 1027 * If other work was enqueued by another thread while we were direct 1028 * dispatching, we need to signal the netisr worker to do that work. 1029 * In the future, we might want to do some of that work in the 1030 * current thread, rather than trigger further context switches. If 1031 * so, we'll want to establish a reasonable bound on the work done in 1032 * the "borrowed" context. 1033 */ 1034 if (nwsp->nws_pendingbits != 0) { 1035 nwsp->nws_flags |= NWS_SCHEDULED; 1036 dosignal = 1; 1037 } else 1038 dosignal = 0; 1039 NWS_UNLOCK(nwsp); 1040 if (dosignal) 1041 NWS_SIGNAL(nwsp); 1042 error = 0; 1043 goto out_unpin; 1044 1045queue_fallback: 1046 error = netisr_queue_internal(proto, m, cpuid); 1047out_unpin: 1048 sched_unpin(); 1049out_unlock: 1050#ifdef NETISR_LOCKING 1051 NETISR_RUNLOCK(&tracker); 1052#endif 1053 return (error); 1054} 1055 1056int 1057netisr_dispatch(u_int proto, struct mbuf *m) 1058{ 1059 1060 return (netisr_dispatch_src(proto, 0, m)); 1061} 1062 1063#ifdef DEVICE_POLLING 1064/* 1065 * Kernel polling borrows a netisr thread to run interface polling in; this 1066 * function allows kernel polling to request that the netisr thread be 1067 * scheduled even if no packets are pending for protocols. 1068 */ 1069void 1070netisr_sched_poll(void) 1071{ 1072 struct netisr_workstream *nwsp; 1073 1074 nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1075 NWS_SIGNAL(nwsp); 1076} 1077#endif 1078 1079static void 1080netisr_start_swi(u_int cpuid, struct pcpu *pc) 1081{ 1082 char swiname[12]; 1083 struct netisr_workstream *nwsp; 1084 int error; 1085 1086 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1087 1088 nwsp = DPCPU_ID_PTR(cpuid, nws); 1089 mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1090 nwsp->nws_cpu = cpuid; 1091 snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1092 error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1093 SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie); 1094 if (error) 1095 panic("%s: swi_add %d", __func__, error); 1096 pc->pc_netisr = nwsp->nws_intr_event; 1097 if (netisr_bindthreads) { 1098 error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1099 if (error != 0) 1100 printf("%s: cpu %u: intr_event_bind: %d", __func__, 1101 cpuid, error); 1102 } 1103 NETISR_WLOCK(); 1104 nws_array[nws_count] = nwsp->nws_cpu; 1105 nws_count++; 1106 NETISR_WUNLOCK(); 1107} 1108 1109/* 1110 * Initialize the netisr subsystem. We rely on BSS and static initialization 1111 * of most fields in global data structures. 1112 * 1113 * Start a worker thread for the boot CPU so that we can support network 1114 * traffic immediately in case the network stack is used before additional 1115 * CPUs are started (for example, diskless boot). 1116 */ 1117static void 1118netisr_init(void *arg) 1119{ 1120 KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__)); 1121 1122 NETISR_LOCK_INIT(); 1123 if (netisr_maxthreads < 1) 1124 netisr_maxthreads = 1; 1125 if (netisr_maxthreads > mp_ncpus) { 1126 printf("netisr_init: forcing maxthreads from %d to %d\n", 1127 netisr_maxthreads, mp_ncpus); 1128 netisr_maxthreads = mp_ncpus; 1129 } 1130 if (netisr_defaultqlimit > netisr_maxqlimit) { 1131 printf("netisr_init: forcing defaultqlimit from %d to %d\n", 1132 netisr_defaultqlimit, netisr_maxqlimit); 1133 netisr_defaultqlimit = netisr_maxqlimit; 1134 } 1135#ifdef DEVICE_POLLING 1136 /* 1137 * The device polling code is not yet aware of how to deal with 1138 * multiple netisr threads, so for the time being compiling in device 1139 * polling disables parallel netisr workers. 1140 */ 1141 if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1142 printf("netisr_init: forcing maxthreads to 1 and " 1143 "bindthreads to 0 for device polling\n"); 1144 netisr_maxthreads = 1; 1145 netisr_bindthreads = 0; 1146 } 1147#endif 1148 netisr_start_swi(curcpu, pcpu_find(curcpu)); 1149} 1150SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1151 1152/* 1153 * Start worker threads for additional CPUs. No attempt to gracefully handle 1154 * work reassignment, we don't yet support dynamic reconfiguration. 1155 */ 1156static void 1157netisr_start(void *arg) 1158{ 1159 struct pcpu *pc; 1160 1161 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1162 if (nws_count >= netisr_maxthreads) 1163 break; 1164 /* XXXRW: Is skipping absent CPUs still required here? */ 1165 if (CPU_ABSENT(pc->pc_cpuid)) 1166 continue; 1167 /* Worker will already be present for boot CPU. */ 1168 if (pc->pc_netisr != NULL) 1169 continue; 1170 netisr_start_swi(pc->pc_cpuid, pc); 1171 } 1172} 1173SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1174 1175/* 1176 * Sysctl monitoring for netisr: query a list of registered protocols. 1177 */ 1178static int 1179sysctl_netisr_proto(SYSCTL_HANDLER_ARGS) 1180{ 1181 struct rm_priotracker tracker; 1182 struct sysctl_netisr_proto *snpp, *snp_array; 1183 struct netisr_proto *npp; 1184 u_int counter, proto; 1185 int error; 1186 1187 if (req->newptr != NULL) 1188 return (EINVAL); 1189 snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP, 1190 M_ZERO | M_WAITOK); 1191 counter = 0; 1192 NETISR_RLOCK(&tracker); 1193 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1194 npp = &netisr_proto[proto]; 1195 if (npp->np_name == NULL) 1196 continue; 1197 snpp = &snp_array[counter]; 1198 snpp->snp_version = sizeof(*snpp); 1199 strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN); 1200 snpp->snp_proto = proto; 1201 snpp->snp_qlimit = npp->np_qlimit; 1202 snpp->snp_policy = npp->np_policy; 1203 snpp->snp_dispatch = npp->np_dispatch; 1204 if (npp->np_m2flow != NULL) 1205 snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW; 1206 if (npp->np_m2cpuid != NULL) 1207 snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID; 1208 if (npp->np_drainedcpu != NULL) 1209 snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU; 1210 counter++; 1211 } 1212 NETISR_RUNLOCK(&tracker); 1213 KASSERT(counter <= NETISR_MAXPROT, 1214 ("sysctl_netisr_proto: counter too big (%d)", counter)); 1215 error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter); 1216 free(snp_array, M_TEMP); 1217 return (error); 1218} 1219 1220SYSCTL_PROC(_net_isr, OID_AUTO, proto, 1221 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto, 1222 "S,sysctl_netisr_proto", 1223 "Return list of protocols registered with netisr"); 1224 1225/* 1226 * Sysctl monitoring for netisr: query a list of workstreams. 1227 */ 1228static int 1229sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS) 1230{ 1231 struct rm_priotracker tracker; 1232 struct sysctl_netisr_workstream *snwsp, *snws_array; 1233 struct netisr_workstream *nwsp; 1234 u_int counter, cpuid; 1235 int error; 1236 1237 if (req->newptr != NULL) 1238 return (EINVAL); 1239 snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP, 1240 M_ZERO | M_WAITOK); 1241 counter = 0; 1242 NETISR_RLOCK(&tracker); 1243 CPU_FOREACH(cpuid) { 1244 nwsp = DPCPU_ID_PTR(cpuid, nws); 1245 if (nwsp->nws_intr_event == NULL) 1246 continue; 1247 NWS_LOCK(nwsp); 1248 snwsp = &snws_array[counter]; 1249 snwsp->snws_version = sizeof(*snwsp); 1250 1251 /* 1252 * For now, we equate workstream IDs and CPU IDs in the 1253 * kernel, but expose them independently to userspace in case 1254 * that assumption changes in the future. 1255 */ 1256 snwsp->snws_wsid = cpuid; 1257 snwsp->snws_cpu = cpuid; 1258 if (nwsp->nws_intr_event != NULL) 1259 snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR; 1260 NWS_UNLOCK(nwsp); 1261 counter++; 1262 } 1263 NETISR_RUNLOCK(&tracker); 1264 KASSERT(counter <= MAXCPU, 1265 ("sysctl_netisr_workstream: counter too big (%d)", counter)); 1266 error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter); 1267 free(snws_array, M_TEMP); 1268 return (error); 1269} 1270 1271SYSCTL_PROC(_net_isr, OID_AUTO, workstream, 1272 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream, 1273 "S,sysctl_netisr_workstream", 1274 "Return list of workstreams implemented by netisr"); 1275 1276/* 1277 * Sysctl monitoring for netisr: query per-protocol data across all 1278 * workstreams. 1279 */ 1280static int 1281sysctl_netisr_work(SYSCTL_HANDLER_ARGS) 1282{ 1283 struct rm_priotracker tracker; 1284 struct sysctl_netisr_work *snwp, *snw_array; 1285 struct netisr_workstream *nwsp; 1286 struct netisr_proto *npp; 1287 struct netisr_work *nwp; 1288 u_int counter, cpuid, proto; 1289 int error; 1290 1291 if (req->newptr != NULL) 1292 return (EINVAL); 1293 snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT, 1294 M_TEMP, M_ZERO | M_WAITOK); 1295 counter = 0; 1296 NETISR_RLOCK(&tracker); 1297 CPU_FOREACH(cpuid) { 1298 nwsp = DPCPU_ID_PTR(cpuid, nws); 1299 if (nwsp->nws_intr_event == NULL) 1300 continue; 1301 NWS_LOCK(nwsp); 1302 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1303 npp = &netisr_proto[proto]; 1304 if (npp->np_name == NULL) 1305 continue; 1306 nwp = &nwsp->nws_work[proto]; 1307 snwp = &snw_array[counter]; 1308 snwp->snw_version = sizeof(*snwp); 1309 snwp->snw_wsid = cpuid; /* See comment above. */ 1310 snwp->snw_proto = proto; 1311 snwp->snw_len = nwp->nw_len; 1312 snwp->snw_watermark = nwp->nw_watermark; 1313 snwp->snw_dispatched = nwp->nw_dispatched; 1314 snwp->snw_hybrid_dispatched = 1315 nwp->nw_hybrid_dispatched; 1316 snwp->snw_qdrops = nwp->nw_qdrops; 1317 snwp->snw_queued = nwp->nw_queued; 1318 snwp->snw_handled = nwp->nw_handled; 1319 counter++; 1320 } 1321 NWS_UNLOCK(nwsp); 1322 } 1323 KASSERT(counter <= MAXCPU * NETISR_MAXPROT, 1324 ("sysctl_netisr_work: counter too big (%d)", counter)); 1325 NETISR_RUNLOCK(&tracker); 1326 error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter); 1327 free(snw_array, M_TEMP); 1328 return (error); 1329} 1330 1331SYSCTL_PROC(_net_isr, OID_AUTO, work, 1332 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work, 1333 "S,sysctl_netisr_work", 1334 "Return list of per-workstream, per-protocol work in netisr"); 1335 1336#ifdef DDB 1337DB_SHOW_COMMAND(netisr, db_show_netisr) 1338{ 1339 struct netisr_workstream *nwsp; 1340 struct netisr_work *nwp; 1341 int first, proto; 1342 u_int cpuid; 1343 1344 db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1345 "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 1346 CPU_FOREACH(cpuid) { 1347 nwsp = DPCPU_ID_PTR(cpuid, nws); 1348 if (nwsp->nws_intr_event == NULL) 1349 continue; 1350 first = 1; 1351 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1352 if (netisr_proto[proto].np_handler == NULL) 1353 continue; 1354 nwp = &nwsp->nws_work[proto]; 1355 if (first) { 1356 db_printf("%3d ", cpuid); 1357 first = 0; 1358 } else 1359 db_printf("%3s ", ""); 1360 db_printf( 1361 "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1362 netisr_proto[proto].np_name, nwp->nw_len, 1363 nwp->nw_watermark, nwp->nw_qlimit, 1364 nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1365 nwp->nw_qdrops, nwp->nw_queued); 1366 } 1367 } 1368} 1369#endif 1370