1111888Sjlemon/*-
2193219Srwatson * Copyright (c) 2007-2009 Robert N. M. Watson
3222249Srwatson * Copyright (c) 2010-2011 Juniper Networks, Inc.
4103781Sjake * All rights reserved.
5103781Sjake *
6204199Srwatson * This software was developed by Robert N. M. Watson under contract
7204199Srwatson * to Juniper Networks, Inc.
8204199Srwatson *
9103781Sjake * Redistribution and use in source and binary forms, with or without
10103781Sjake * modification, are permitted provided that the following conditions
11103781Sjake * are met:
12103781Sjake * 1. Redistributions of source code must retain the above copyright
13111888Sjlemon *    notice, this list of conditions and the following disclaimer.
14103781Sjake * 2. Redistributions in binary form must reproduce the above copyright
15103781Sjake *    notice, this list of conditions and the following disclaimer in the
16103781Sjake *    documentation and/or other materials provided with the distribution.
17103781Sjake *
18111888Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19111888Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20111888Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21111888Sjlemon * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22111888Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23111888Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24111888Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25111888Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26111888Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27111888Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28111888Sjlemon * SUCH DAMAGE.
29193219Srwatson */
30193219Srwatson
31193219Srwatson#include <sys/cdefs.h>
32193219Srwatson__FBSDID("$FreeBSD$");
33193219Srwatson
34193219Srwatson/*
35193219Srwatson * netisr is a packet dispatch service, allowing synchronous (directly
36193219Srwatson * dispatched) and asynchronous (deferred dispatch) processing of packets by
37193219Srwatson * registered protocol handlers.  Callers pass a protocol identifier and
38193219Srwatson * packet to netisr, along with a direct dispatch hint, and work will either
39200898Srwatson * be immediately processed by the registered handler, or passed to a
40200898Srwatson * software interrupt (SWI) thread for deferred dispatch.  Callers will
41200898Srwatson * generally select one or the other based on:
42103781Sjake *
43200898Srwatson * - Whether directly dispatching a netisr handler lead to code reentrance or
44193219Srwatson *   lock recursion, such as entering the socket code from the socket code.
45200898Srwatson * - Whether directly dispatching a netisr handler lead to recursive
46193219Srwatson *   processing, such as when decapsulating several wrapped layers of tunnel
47193219Srwatson *   information (IPSEC within IPSEC within ...).
48193219Srwatson *
49193219Srwatson * Maintaining ordering for protocol streams is a critical design concern.
50193219Srwatson * Enforcing ordering limits the opportunity for concurrency, but maintains
51193219Srwatson * the strong ordering requirements found in some protocols, such as TCP.  Of
52193219Srwatson * related concern is CPU affinity--it is desirable to process all data
53193219Srwatson * associated with a particular stream on the same CPU over time in order to
54193219Srwatson * avoid acquiring locks associated with the connection on different CPUs,
55193219Srwatson * keep connection data in one cache, and to generally encourage associated
56193219Srwatson * user threads to live on the same CPU as the stream.  It's also desirable
57193219Srwatson * to avoid lock migration and contention where locks are associated with
58193219Srwatson * more than one flow.
59193219Srwatson *
60193219Srwatson * netisr supports several policy variations, represented by the
61200898Srwatson * NETISR_POLICY_* constants, allowing protocols to play various roles in
62193219Srwatson * identifying flows, assigning work to CPUs, etc.  These are described in
63200898Srwatson * netisr.h.
64103781Sjake */
65103781Sjake
66193219Srwatson#include "opt_ddb.h"
67150968Sglebius#include "opt_device_polling.h"
68134443Srwatson
69103781Sjake#include <sys/param.h>
70111888Sjlemon#include <sys/bus.h>
71103781Sjake#include <sys/kernel.h>
72111888Sjlemon#include <sys/kthread.h>
73193219Srwatson#include <sys/interrupt.h>
74111888Sjlemon#include <sys/lock.h>
75193219Srwatson#include <sys/mbuf.h>
76193219Srwatson#include <sys/mutex.h>
77195019Srwatson#include <sys/pcpu.h>
78111888Sjlemon#include <sys/proc.h>
79193219Srwatson#include <sys/rmlock.h>
80193219Srwatson#include <sys/sched.h>
81193219Srwatson#include <sys/smp.h>
82193219Srwatson#include <sys/socket.h>
83111888Sjlemon#include <sys/sysctl.h>
84193219Srwatson#include <sys/systm.h>
85103781Sjake
86193219Srwatson#ifdef DDB
87193219Srwatson#include <ddb/ddb.h>
88193219Srwatson#endif
89111888Sjlemon
90204497Srwatson#define	_WANT_NETISR_INTERNAL	/* Enable definitions from netisr_internal.h */
91111888Sjlemon#include <net/if.h>
92111888Sjlemon#include <net/if_var.h>
93103781Sjake#include <net/netisr.h>
94204497Srwatson#include <net/netisr_internal.h>
95196019Srwatson#include <net/vnet.h>
96103781Sjake
97193219Srwatson/*-
98193219Srwatson * Synchronize use and modification of the registered netisr data structures;
99193219Srwatson * acquire a read lock while modifying the set of registered protocols to
100193219Srwatson * prevent partially registered or unregistered protocols from being run.
101193219Srwatson *
102193219Srwatson * The following data structures and fields are protected by this lock:
103193219Srwatson *
104204497Srwatson * - The netisr_proto array, including all fields of struct netisr_proto.
105193219Srwatson * - The nws array, including all fields of struct netisr_worker.
106193219Srwatson * - The nws_array array.
107193219Srwatson *
108193219Srwatson * Note: the NETISR_LOCKING define controls whether read locks are acquired
109193219Srwatson * in packet processing paths requiring netisr registration stability.  This
110200898Srwatson * is disabled by default as it can lead to measurable performance
111193219Srwatson * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and
112193219Srwatson * because netisr registration and unregistration is extremely rare at
113193219Srwatson * runtime.  If it becomes more common, this decision should be revisited.
114193219Srwatson *
115193219Srwatson * XXXRW: rmlocks don't support assertions.
116193219Srwatson */
117193219Srwatsonstatic struct rmlock	netisr_rmlock;
118193219Srwatson#define	NETISR_LOCK_INIT()	rm_init_flags(&netisr_rmlock, "netisr", \
119193219Srwatson				    RM_NOWITNESS)
120193219Srwatson#define	NETISR_LOCK_ASSERT()
121193219Srwatson#define	NETISR_RLOCK(tracker)	rm_rlock(&netisr_rmlock, (tracker))
122193219Srwatson#define	NETISR_RUNLOCK(tracker)	rm_runlock(&netisr_rmlock, (tracker))
123193219Srwatson#define	NETISR_WLOCK()		rm_wlock(&netisr_rmlock)
124193219Srwatson#define	NETISR_WUNLOCK()	rm_wunlock(&netisr_rmlock)
125193219Srwatson/* #define	NETISR_LOCKING */
126103781Sjake
127248085Smariusstatic SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
128103781Sjake
129193219Srwatson/*-
130222249Srwatson * Three global direct dispatch policies are supported:
131193219Srwatson *
132222249Srwatson * NETISR_DISPATCH_QUEUED: All work is deferred for a netisr, regardless of
133222249Srwatson * context (may be overriden by protocols).
134193219Srwatson *
135222249Srwatson * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch,
136222249Srwatson * and we're running on the CPU the work would be performed on, then direct
137222249Srwatson * dispatch it if it wouldn't violate ordering constraints on the workstream.
138193219Srwatson *
139222249Srwatson * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch,
140222249Srwatson * always direct dispatch.  (The default.)
141193219Srwatson *
142193219Srwatson * Notice that changing the global policy could lead to short periods of
143193219Srwatson * misordered processing, but this is considered acceptable as compared to
144222249Srwatson * the complexity of enforcing ordering during policy changes.  Protocols can
145222249Srwatson * override the global policy (when they're not doing that, they select
146222249Srwatson * NETISR_DISPATCH_DEFAULT).
147193219Srwatson */
148222249Srwatson#define	NETISR_DISPATCH_POLICY_DEFAULT	NETISR_DISPATCH_DIRECT
149222249Srwatson#define	NETISR_DISPATCH_POLICY_MAXSTR	20 /* Used for temporary buffers. */
150222249Srwatsonstatic u_int	netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT;
151222249Srwatsonstatic int	sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS);
152222249SrwatsonSYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RW |
153222249Srwatson    CTLFLAG_TUN, 0, 0, sysctl_netisr_dispatch_policy, "A",
154222249Srwatson    "netisr dispatch policy");
155111888Sjlemon
156222249Srwatson/*
157222249Srwatson * These sysctls were used in previous versions to control and export
158222249Srwatson * dispatch policy state.  Now, we provide read-only export via them so that
159222249Srwatson * older netstat binaries work.  At some point they can be garbage collected.
160222249Srwatson */
161222249Srwatsonstatic int	netisr_direct_force;
162222249SrwatsonSYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RD,
163222249Srwatson    &netisr_direct_force, 0, "compat: force direct dispatch");
164193219Srwatson
165222249Srwatsonstatic int	netisr_direct;
166222249SrwatsonSYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RD, &netisr_direct, 0,
167222249Srwatson    "compat: enable direct dispatch");
168222249Srwatson
169193219Srwatson/*
170193219Srwatson * Allow the administrator to limit the number of threads (CPUs) to use for
171193219Srwatson * netisr.  We don't check netisr_maxthreads before creating the thread for
172193219Srwatson * CPU 0, so in practice we ignore values <= 1.  This must be set at boot.
173193219Srwatson * We will create at most one thread per CPU.
174193219Srwatson */
175195078Srwatsonstatic int	netisr_maxthreads = -1;		/* Max number of threads. */
176193219SrwatsonTUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
177203913SpjdSYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN,
178193219Srwatson    &netisr_maxthreads, 0,
179193219Srwatson    "Use at most this many CPUs for netisr processing");
180193219Srwatson
181193219Srwatsonstatic int	netisr_bindthreads = 0;		/* Bind threads to CPUs. */
182193219SrwatsonTUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads);
183203913SpjdSYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN,
184193219Srwatson    &netisr_bindthreads, 0, "Bind netisr threads to CPUs.");
185193219Srwatson
186193219Srwatson/*
187200898Srwatson * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit,
188200898Srwatson * both for initial configuration and later modification using
189200898Srwatson * netisr_setqlimit().
190193219Srwatson */
191193219Srwatson#define	NETISR_DEFAULT_MAXQLIMIT	10240
192193219Srwatsonstatic u_int	netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT;
193193219SrwatsonTUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit);
194217322SmdfSYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN,
195193219Srwatson    &netisr_maxqlimit, 0,
196193219Srwatson    "Maximum netisr per-protocol, per-CPU queue depth.");
197193219Srwatson
198193219Srwatson/*
199200898Srwatson * The default per-workstream mbuf queue limit for protocols that don't
200200898Srwatson * initialize the nh_qlimit field of their struct netisr_handler.  If this is
201200898Srwatson * set above netisr_maxqlimit, we truncate it to the maximum during boot.
202193219Srwatson */
203193219Srwatson#define	NETISR_DEFAULT_DEFAULTQLIMIT	256
204193219Srwatsonstatic u_int	netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT;
205193219SrwatsonTUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit);
206217322SmdfSYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN,
207193219Srwatson    &netisr_defaultqlimit, 0,
208193219Srwatson    "Default netisr per-protocol, per-CPU queue limit if not set by protocol");
209193219Srwatson
210193219Srwatson/*
211204497Srwatson * Store and export the compile-time constant NETISR_MAXPROT limit on the
212204497Srwatson * number of protocols that can register with netisr at a time.  This is
213204497Srwatson * required for crashdump analysis, as it sizes netisr_proto[].
214193219Srwatson */
215204497Srwatsonstatic u_int	netisr_maxprot = NETISR_MAXPROT;
216217322SmdfSYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD,
217204497Srwatson    &netisr_maxprot, 0,
218204497Srwatson    "Compile-time limit on the number of protocols supported by netisr.");
219193219Srwatson
220193219Srwatson/*
221204497Srwatson * The netisr_proto array describes all registered protocols, indexed by
222204497Srwatson * protocol number.  See netisr_internal.h for more details.
223193219Srwatson */
224204497Srwatsonstatic struct netisr_proto	netisr_proto[NETISR_MAXPROT];
225193219Srwatson
226193219Srwatson/*
227204497Srwatson * Per-CPU workstream data.  See netisr_internal.h for more details.
228193219Srwatson */
229195019SrwatsonDPCPU_DEFINE(struct netisr_workstream, nws);
230193219Srwatson
231193219Srwatson/*
232193219Srwatson * Map contiguous values between 0 and nws_count into CPU IDs appropriate for
233195019Srwatson * accessing workstreams.  This allows constructions of the form
234195019Srwatson * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws).
235193219Srwatson */
236193219Srwatsonstatic u_int				 nws_array[MAXCPU];
237193219Srwatson
238193219Srwatson/*
239193219Srwatson * Number of registered workstreams.  Will be at most the number of running
240193219Srwatson * CPUs once fully started.
241193219Srwatson */
242193219Srwatsonstatic u_int				 nws_count;
243217322SmdfSYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD,
244193219Srwatson    &nws_count, 0, "Number of extant netisr threads.");
245193219Srwatson
246193219Srwatson/*
247193219Srwatson * Synchronization for each workstream: a mutex protects all mutable fields
248193219Srwatson * in each stream, including per-protocol state (mbuf queues).  The SWI is
249193219Srwatson * woken up if asynchronous dispatch is required.
250193219Srwatson */
251193219Srwatson#define	NWS_LOCK(s)		mtx_lock(&(s)->nws_mtx)
252193219Srwatson#define	NWS_LOCK_ASSERT(s)	mtx_assert(&(s)->nws_mtx, MA_OWNED)
253193219Srwatson#define	NWS_UNLOCK(s)		mtx_unlock(&(s)->nws_mtx)
254193219Srwatson#define	NWS_SIGNAL(s)		swi_sched((s)->nws_swi_cookie, 0)
255193219Srwatson
256193219Srwatson/*
257193219Srwatson * Utility routines for protocols that implement their own mapping of flows
258193219Srwatson * to CPUs.
259193219Srwatson */
260193219Srwatsonu_int
261193219Srwatsonnetisr_get_cpucount(void)
262193219Srwatson{
263193219Srwatson
264193219Srwatson	return (nws_count);
265193219Srwatson}
266193219Srwatson
267193219Srwatsonu_int
268193219Srwatsonnetisr_get_cpuid(u_int cpunumber)
269193219Srwatson{
270193219Srwatson
271193219Srwatson	KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber,
272193219Srwatson	    nws_count));
273193219Srwatson
274193219Srwatson	return (nws_array[cpunumber]);
275193219Srwatson}
276193219Srwatson
277193219Srwatson/*
278200898Srwatson * The default implementation of flow -> CPU ID mapping.
279193219Srwatson *
280193219Srwatson * Non-static so that protocols can use it to map their own work to specific
281193219Srwatson * CPUs in a manner consistent to netisr for affinity purposes.
282193219Srwatson */
283193219Srwatsonu_int
284193219Srwatsonnetisr_default_flow2cpu(u_int flowid)
285193219Srwatson{
286193219Srwatson
287193219Srwatson	return (nws_array[flowid % nws_count]);
288193219Srwatson}
289193219Srwatson
290193219Srwatson/*
291222249Srwatson * Dispatch tunable and sysctl configuration.
292222249Srwatson */
293222249Srwatsonstruct netisr_dispatch_table_entry {
294222249Srwatson	u_int		 ndte_policy;
295222249Srwatson	const char	*ndte_policy_str;
296222249Srwatson};
297222249Srwatsonstatic const struct netisr_dispatch_table_entry netisr_dispatch_table[] = {
298222249Srwatson	{ NETISR_DISPATCH_DEFAULT, "default" },
299222249Srwatson	{ NETISR_DISPATCH_DEFERRED, "deferred" },
300222249Srwatson	{ NETISR_DISPATCH_HYBRID, "hybrid" },
301222249Srwatson	{ NETISR_DISPATCH_DIRECT, "direct" },
302222249Srwatson};
303222249Srwatsonstatic const u_int netisr_dispatch_table_len =
304222249Srwatson    (sizeof(netisr_dispatch_table) / sizeof(netisr_dispatch_table[0]));
305222249Srwatson
306222249Srwatsonstatic void
307222249Srwatsonnetisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer,
308222249Srwatson    u_int buflen)
309222249Srwatson{
310222249Srwatson	const struct netisr_dispatch_table_entry *ndtep;
311222249Srwatson	const char *str;
312222249Srwatson	u_int i;
313222249Srwatson
314222249Srwatson	str = "unknown";
315222249Srwatson	for (i = 0; i < netisr_dispatch_table_len; i++) {
316222249Srwatson		ndtep = &netisr_dispatch_table[i];
317222249Srwatson		if (ndtep->ndte_policy == dispatch_policy) {
318222249Srwatson			str = ndtep->ndte_policy_str;
319222249Srwatson			break;
320222249Srwatson		}
321222249Srwatson	}
322222249Srwatson	snprintf(buffer, buflen, "%s", str);
323222249Srwatson}
324222249Srwatson
325222249Srwatsonstatic int
326222249Srwatsonnetisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp)
327222249Srwatson{
328222249Srwatson	const struct netisr_dispatch_table_entry *ndtep;
329222249Srwatson	u_int i;
330222249Srwatson
331222249Srwatson	for (i = 0; i < netisr_dispatch_table_len; i++) {
332222249Srwatson		ndtep = &netisr_dispatch_table[i];
333222249Srwatson		if (strcmp(ndtep->ndte_policy_str, str) == 0) {
334222249Srwatson			*dispatch_policyp = ndtep->ndte_policy;
335222249Srwatson			return (0);
336222249Srwatson		}
337222249Srwatson	}
338222249Srwatson	return (EINVAL);
339222249Srwatson}
340222249Srwatson
341222249Srwatsonstatic void
342222249Srwatsonnetisr_dispatch_policy_compat(void)
343222249Srwatson{
344222249Srwatson
345222249Srwatson	switch (netisr_dispatch_policy) {
346222249Srwatson	case NETISR_DISPATCH_DEFERRED:
347222249Srwatson		netisr_direct_force = 0;
348222249Srwatson		netisr_direct = 0;
349222249Srwatson		break;
350222249Srwatson
351222249Srwatson	case NETISR_DISPATCH_HYBRID:
352222249Srwatson		netisr_direct_force = 0;
353222249Srwatson		netisr_direct = 1;
354222249Srwatson		break;
355222249Srwatson
356222249Srwatson	case NETISR_DISPATCH_DIRECT:
357222249Srwatson		netisr_direct_force = 1;
358222249Srwatson		netisr_direct = 1;
359222249Srwatson		break;
360222249Srwatson
361222249Srwatson	default:
362222249Srwatson		panic("%s: unknown policy %u", __func__,
363222249Srwatson		    netisr_dispatch_policy);
364222249Srwatson	}
365222249Srwatson}
366222249Srwatson
367222249Srwatsonstatic int
368222249Srwatsonsysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
369222249Srwatson{
370222249Srwatson	char tmp[NETISR_DISPATCH_POLICY_MAXSTR];
371222249Srwatson	u_int dispatch_policy;
372222249Srwatson	int error;
373222249Srwatson
374222249Srwatson	netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp,
375222249Srwatson	    sizeof(tmp));
376222249Srwatson	error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req);
377222249Srwatson	if (error == 0 && req->newptr != NULL) {
378222249Srwatson		error = netisr_dispatch_policy_from_str(tmp,
379222249Srwatson		    &dispatch_policy);
380222249Srwatson		if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
381222249Srwatson			error = EINVAL;
382222249Srwatson		if (error == 0) {
383222249Srwatson			netisr_dispatch_policy = dispatch_policy;
384222249Srwatson			netisr_dispatch_policy_compat();
385222249Srwatson		}
386222249Srwatson	}
387222249Srwatson	return (error);
388222249Srwatson}
389222249Srwatson
390222249Srwatson/*
391193219Srwatson * Register a new netisr handler, which requires initializing per-protocol
392193219Srwatson * fields for each workstream.  All netisr work is briefly suspended while
393193219Srwatson * the protocol is installed.
394193219Srwatson */
395103781Sjakevoid
396193219Srwatsonnetisr_register(const struct netisr_handler *nhp)
397103781Sjake{
398193219Srwatson	struct netisr_work *npwp;
399193219Srwatson	const char *name;
400193219Srwatson	u_int i, proto;
401193219Srwatson
402193219Srwatson	proto = nhp->nh_proto;
403193219Srwatson	name = nhp->nh_name;
404193219Srwatson
405193219Srwatson	/*
406193219Srwatson	 * Test that the requested registration is valid.
407193219Srwatson	 */
408193219Srwatson	KASSERT(nhp->nh_name != NULL,
409193219Srwatson	    ("%s: nh_name NULL for %u", __func__, proto));
410193219Srwatson	KASSERT(nhp->nh_handler != NULL,
411193219Srwatson	    ("%s: nh_handler NULL for %s", __func__, name));
412193219Srwatson	KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE ||
413193219Srwatson	    nhp->nh_policy == NETISR_POLICY_FLOW ||
414193219Srwatson	    nhp->nh_policy == NETISR_POLICY_CPU,
415193219Srwatson	    ("%s: unsupported nh_policy %u for %s", __func__,
416193219Srwatson	    nhp->nh_policy, name));
417193219Srwatson	KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW ||
418193219Srwatson	    nhp->nh_m2flow == NULL,
419193219Srwatson	    ("%s: nh_policy != FLOW but m2flow defined for %s", __func__,
420193219Srwatson	    name));
421193219Srwatson	KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL,
422193219Srwatson	    ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__,
423193219Srwatson	    name));
424193219Srwatson	KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL,
425193219Srwatson	    ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__,
426193219Srwatson	    name));
427222249Srwatson	KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT ||
428222249Srwatson	    nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED ||
429222249Srwatson	    nhp->nh_dispatch == NETISR_DISPATCH_HYBRID ||
430222249Srwatson	    nhp->nh_dispatch == NETISR_DISPATCH_DIRECT,
431222249Srwatson	    ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch));
432222249Srwatson
433193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
434193219Srwatson	    ("%s(%u, %s): protocol too big", __func__, proto, name));
435193219Srwatson
436193219Srwatson	/*
437193219Srwatson	 * Test that no existing registration exists for this protocol.
438193219Srwatson	 */
439193219Srwatson	NETISR_WLOCK();
440204497Srwatson	KASSERT(netisr_proto[proto].np_name == NULL,
441193219Srwatson	    ("%s(%u, %s): name present", __func__, proto, name));
442204497Srwatson	KASSERT(netisr_proto[proto].np_handler == NULL,
443193219Srwatson	    ("%s(%u, %s): handler present", __func__, proto, name));
444193219Srwatson
445204497Srwatson	netisr_proto[proto].np_name = name;
446204497Srwatson	netisr_proto[proto].np_handler = nhp->nh_handler;
447204497Srwatson	netisr_proto[proto].np_m2flow = nhp->nh_m2flow;
448204497Srwatson	netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid;
449204497Srwatson	netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu;
450193219Srwatson	if (nhp->nh_qlimit == 0)
451204497Srwatson		netisr_proto[proto].np_qlimit = netisr_defaultqlimit;
452193219Srwatson	else if (nhp->nh_qlimit > netisr_maxqlimit) {
453193219Srwatson		printf("%s: %s requested queue limit %u capped to "
454193219Srwatson		    "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit,
455193219Srwatson		    netisr_maxqlimit);
456204497Srwatson		netisr_proto[proto].np_qlimit = netisr_maxqlimit;
457193219Srwatson	} else
458204497Srwatson		netisr_proto[proto].np_qlimit = nhp->nh_qlimit;
459204497Srwatson	netisr_proto[proto].np_policy = nhp->nh_policy;
460222249Srwatson	netisr_proto[proto].np_dispatch = nhp->nh_dispatch;
461209059Sjhb	CPU_FOREACH(i) {
462195019Srwatson		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
463193219Srwatson		bzero(npwp, sizeof(*npwp));
464204497Srwatson		npwp->nw_qlimit = netisr_proto[proto].np_qlimit;
465193219Srwatson	}
466193219Srwatson	NETISR_WUNLOCK();
467103781Sjake}
468103781Sjake
469193219Srwatson/*
470193219Srwatson * Clear drop counters across all workstreams for a protocol.
471193219Srwatson */
472111888Sjlemonvoid
473193219Srwatsonnetisr_clearqdrops(const struct netisr_handler *nhp)
474103781Sjake{
475193219Srwatson	struct netisr_work *npwp;
476193219Srwatson#ifdef INVARIANTS
477193219Srwatson	const char *name;
478193219Srwatson#endif
479193219Srwatson	u_int i, proto;
480193219Srwatson
481193219Srwatson	proto = nhp->nh_proto;
482193219Srwatson#ifdef INVARIANTS
483193219Srwatson	name = nhp->nh_name;
484193219Srwatson#endif
485193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
486193219Srwatson	    ("%s(%u): protocol too big for %s", __func__, proto, name));
487193219Srwatson
488193219Srwatson	NETISR_WLOCK();
489204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
490193219Srwatson	    ("%s(%u): protocol not registered for %s", __func__, proto,
491193219Srwatson	    name));
492193219Srwatson
493209059Sjhb	CPU_FOREACH(i) {
494195019Srwatson		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
495193219Srwatson		npwp->nw_qdrops = 0;
496193219Srwatson	}
497193219Srwatson	NETISR_WUNLOCK();
498111888Sjlemon}
499111888Sjlemon
500193219Srwatson/*
501200898Srwatson * Query current drop counters across all workstreams for a protocol.
502193219Srwatson */
503111888Sjlemonvoid
504193219Srwatsonnetisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
505111888Sjlemon{
506193219Srwatson	struct netisr_work *npwp;
507193219Srwatson	struct rm_priotracker tracker;
508193219Srwatson#ifdef INVARIANTS
509193219Srwatson	const char *name;
510193219Srwatson#endif
511193219Srwatson	u_int i, proto;
512193219Srwatson
513193219Srwatson	*qdropp = 0;
514193219Srwatson	proto = nhp->nh_proto;
515193219Srwatson#ifdef INVARIANTS
516193219Srwatson	name = nhp->nh_name;
517193219Srwatson#endif
518193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
519193219Srwatson	    ("%s(%u): protocol too big for %s", __func__, proto, name));
520193219Srwatson
521193219Srwatson	NETISR_RLOCK(&tracker);
522204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
523193219Srwatson	    ("%s(%u): protocol not registered for %s", __func__, proto,
524193219Srwatson	    name));
525193219Srwatson
526209059Sjhb	CPU_FOREACH(i) {
527195019Srwatson		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
528193219Srwatson		*qdropp += npwp->nw_qdrops;
529193219Srwatson	}
530193219Srwatson	NETISR_RUNLOCK(&tracker);
531103781Sjake}
532103781Sjake
533193219Srwatson/*
534200898Srwatson * Query current per-workstream queue limit for a protocol.
535193219Srwatson */
536193219Srwatsonvoid
537193219Srwatsonnetisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp)
538193219Srwatson{
539193219Srwatson	struct rm_priotracker tracker;
540193219Srwatson#ifdef INVARIANTS
541193219Srwatson	const char *name;
542193219Srwatson#endif
543193219Srwatson	u_int proto;
544111888Sjlemon
545193219Srwatson	proto = nhp->nh_proto;
546193219Srwatson#ifdef INVARIANTS
547193219Srwatson	name = nhp->nh_name;
548193219Srwatson#endif
549193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
550193219Srwatson	    ("%s(%u): protocol too big for %s", __func__, proto, name));
551111888Sjlemon
552193219Srwatson	NETISR_RLOCK(&tracker);
553204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
554193219Srwatson	    ("%s(%u): protocol not registered for %s", __func__, proto,
555193219Srwatson	    name));
556204497Srwatson	*qlimitp = netisr_proto[proto].np_qlimit;
557193219Srwatson	NETISR_RUNLOCK(&tracker);
558193219Srwatson}
559111888Sjlemon
560193219Srwatson/*
561193219Srwatson * Update the queue limit across per-workstream queues for a protocol.  We
562193219Srwatson * simply change the limits, and don't drain overflowed packets as they will
563193219Srwatson * (hopefully) take care of themselves shortly.
564193219Srwatson */
565193219Srwatsonint
566193219Srwatsonnetisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit)
567193219Srwatson{
568193219Srwatson	struct netisr_work *npwp;
569193219Srwatson#ifdef INVARIANTS
570193219Srwatson	const char *name;
571193219Srwatson#endif
572193219Srwatson	u_int i, proto;
573111888Sjlemon
574193219Srwatson	if (qlimit > netisr_maxqlimit)
575193219Srwatson		return (EINVAL);
576193219Srwatson
577193219Srwatson	proto = nhp->nh_proto;
578193219Srwatson#ifdef INVARIANTS
579193219Srwatson	name = nhp->nh_name;
580193219Srwatson#endif
581193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
582193219Srwatson	    ("%s(%u): protocol too big for %s", __func__, proto, name));
583193219Srwatson
584193219Srwatson	NETISR_WLOCK();
585204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
586193219Srwatson	    ("%s(%u): protocol not registered for %s", __func__, proto,
587193219Srwatson	    name));
588193219Srwatson
589204497Srwatson	netisr_proto[proto].np_qlimit = qlimit;
590209059Sjhb	CPU_FOREACH(i) {
591195019Srwatson		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
592193219Srwatson		npwp->nw_qlimit = qlimit;
593193219Srwatson	}
594193219Srwatson	NETISR_WUNLOCK();
595193219Srwatson	return (0);
596193219Srwatson}
597193219Srwatson
598111888Sjlemon/*
599193219Srwatson * Drain all packets currently held in a particular protocol work queue.
600120704Srwatson */
601120704Srwatsonstatic void
602193219Srwatsonnetisr_drain_proto(struct netisr_work *npwp)
603120704Srwatson{
604120704Srwatson	struct mbuf *m;
605120704Srwatson
606193219Srwatson	/*
607193219Srwatson	 * We would assert the lock on the workstream but it's not passed in.
608193219Srwatson	 */
609193219Srwatson	while ((m = npwp->nw_head) != NULL) {
610193219Srwatson		npwp->nw_head = m->m_nextpkt;
611193219Srwatson		m->m_nextpkt = NULL;
612193219Srwatson		if (npwp->nw_head == NULL)
613193219Srwatson			npwp->nw_tail = NULL;
614193219Srwatson		npwp->nw_len--;
615193219Srwatson		m_freem(m);
616193219Srwatson	}
617193219Srwatson	KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__));
618193219Srwatson	KASSERT(npwp->nw_len == 0, ("%s: len", __func__));
619193219Srwatson}
620193219Srwatson
621193219Srwatson/*
622193219Srwatson * Remove the registration of a network protocol, which requires clearing
623193219Srwatson * per-protocol fields across all workstreams, including freeing all mbufs in
624193219Srwatson * the queues at time of unregister.  All work in netisr is briefly suspended
625193219Srwatson * while this takes place.
626193219Srwatson */
627193219Srwatsonvoid
628193219Srwatsonnetisr_unregister(const struct netisr_handler *nhp)
629193219Srwatson{
630193219Srwatson	struct netisr_work *npwp;
631193219Srwatson#ifdef INVARIANTS
632193219Srwatson	const char *name;
633193219Srwatson#endif
634193219Srwatson	u_int i, proto;
635193219Srwatson
636193219Srwatson	proto = nhp->nh_proto;
637193219Srwatson#ifdef INVARIANTS
638193219Srwatson	name = nhp->nh_name;
639193219Srwatson#endif
640193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
641193219Srwatson	    ("%s(%u): protocol too big for %s", __func__, proto, name));
642193219Srwatson
643193219Srwatson	NETISR_WLOCK();
644204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
645193219Srwatson	    ("%s(%u): protocol not registered for %s", __func__, proto,
646193219Srwatson	    name));
647193219Srwatson
648204497Srwatson	netisr_proto[proto].np_name = NULL;
649204497Srwatson	netisr_proto[proto].np_handler = NULL;
650204497Srwatson	netisr_proto[proto].np_m2flow = NULL;
651204497Srwatson	netisr_proto[proto].np_m2cpuid = NULL;
652204497Srwatson	netisr_proto[proto].np_qlimit = 0;
653204497Srwatson	netisr_proto[proto].np_policy = 0;
654209059Sjhb	CPU_FOREACH(i) {
655195019Srwatson		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
656193219Srwatson		netisr_drain_proto(npwp);
657193219Srwatson		bzero(npwp, sizeof(*npwp));
658193219Srwatson	}
659193219Srwatson	NETISR_WUNLOCK();
660193219Srwatson}
661193219Srwatson
662193219Srwatson/*
663222249Srwatson * Compose the global and per-protocol policies on dispatch, and return the
664222249Srwatson * dispatch policy to use.
665222249Srwatson */
666222249Srwatsonstatic u_int
667222249Srwatsonnetisr_get_dispatch(struct netisr_proto *npp)
668222249Srwatson{
669222249Srwatson
670222249Srwatson	/*
671222249Srwatson	 * Protocol-specific configuration overrides the global default.
672222249Srwatson	 */
673222249Srwatson	if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT)
674222249Srwatson		return (npp->np_dispatch);
675222249Srwatson	return (netisr_dispatch_policy);
676222249Srwatson}
677222249Srwatson
678222249Srwatson/*
679193219Srwatson * Look up the workstream given a packet and source identifier.  Do this by
680193219Srwatson * checking the protocol's policy, and optionally call out to the protocol
681193219Srwatson * for assistance if required.
682193219Srwatson */
683193219Srwatsonstatic struct mbuf *
684222249Srwatsonnetisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
685222249Srwatson    uintptr_t source, struct mbuf *m, u_int *cpuidp)
686193219Srwatson{
687193219Srwatson	struct ifnet *ifp;
688222249Srwatson	u_int policy;
689193219Srwatson
690193219Srwatson	NETISR_LOCK_ASSERT();
691193219Srwatson
692193219Srwatson	/*
693193219Srwatson	 * In the event we have only one worker, shortcut and deliver to it
694193219Srwatson	 * without further ado.
695193219Srwatson	 */
696193219Srwatson	if (nws_count == 1) {
697193219Srwatson		*cpuidp = nws_array[0];
698193219Srwatson		return (m);
699193219Srwatson	}
700193219Srwatson
701193219Srwatson	/*
702193219Srwatson	 * What happens next depends on the policy selected by the protocol.
703193219Srwatson	 * If we want to support per-interface policies, we should do that
704193219Srwatson	 * here first.
705193219Srwatson	 */
706222249Srwatson	policy = npp->np_policy;
707222249Srwatson	if (policy == NETISR_POLICY_CPU) {
708222249Srwatson		m = npp->np_m2cpuid(m, source, cpuidp);
709222249Srwatson		if (m == NULL)
710222249Srwatson			return (NULL);
711193219Srwatson
712222249Srwatson		/*
713222249Srwatson		 * It's possible for a protocol not to have a good idea about
714222249Srwatson		 * where to process a packet, in which case we fall back on
715222249Srwatson		 * the netisr code to decide.  In the hybrid case, return the
716222249Srwatson		 * current CPU ID, which will force an immediate direct
717222249Srwatson		 * dispatch.  In the queued case, fall back on the SOURCE
718222249Srwatson		 * policy.
719222249Srwatson		 */
720222249Srwatson		if (*cpuidp != NETISR_CPUID_NONE)
721222249Srwatson			return (m);
722222249Srwatson		if (dispatch_policy == NETISR_DISPATCH_HYBRID) {
723222249Srwatson			*cpuidp = curcpu;
724222249Srwatson			return (m);
725222249Srwatson		}
726222249Srwatson		policy = NETISR_POLICY_SOURCE;
727222249Srwatson	}
728222249Srwatson
729222249Srwatson	if (policy == NETISR_POLICY_FLOW) {
730193219Srwatson		if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) {
731193219Srwatson			m = npp->np_m2flow(m, source);
732193219Srwatson			if (m == NULL)
733193219Srwatson				return (NULL);
734193219Srwatson		}
735193219Srwatson		if (m->m_flags & M_FLOWID) {
736193219Srwatson			*cpuidp =
737193219Srwatson			    netisr_default_flow2cpu(m->m_pkthdr.flowid);
738193219Srwatson			return (m);
739193219Srwatson		}
740222249Srwatson		policy = NETISR_POLICY_SOURCE;
741222249Srwatson	}
742193219Srwatson
743222249Srwatson	KASSERT(policy == NETISR_POLICY_SOURCE,
744222249Srwatson	    ("%s: invalid policy %u for %s", __func__, npp->np_policy,
745222249Srwatson	    npp->np_name));
746193219Srwatson
747222249Srwatson	ifp = m->m_pkthdr.rcvif;
748222249Srwatson	if (ifp != NULL)
749222249Srwatson		*cpuidp = nws_array[(ifp->if_index + source) % nws_count];
750222249Srwatson	else
751222249Srwatson		*cpuidp = nws_array[source % nws_count];
752222249Srwatson	return (m);
753193219Srwatson}
754193219Srwatson
755193219Srwatson/*
756193219Srwatson * Process packets associated with a workstream and protocol.  For reasons of
757193219Srwatson * fairness, we process up to one complete netisr queue at a time, moving the
758193219Srwatson * queue to a stack-local queue for processing, but do not loop refreshing
759193219Srwatson * from the global queue.  The caller is responsible for deciding whether to
760193219Srwatson * loop, and for setting the NWS_RUNNING flag.  The passed workstream will be
761193219Srwatson * locked on entry and relocked before return, but will be released while
762193219Srwatson * processing.  The number of packets processed is returned.
763193219Srwatson */
764193219Srwatsonstatic u_int
765193219Srwatsonnetisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto)
766193219Srwatson{
767193219Srwatson	struct netisr_work local_npw, *npwp;
768193219Srwatson	u_int handled;
769193219Srwatson	struct mbuf *m;
770193219Srwatson
771193219Srwatson	NETISR_LOCK_ASSERT();
772193219Srwatson	NWS_LOCK_ASSERT(nwsp);
773193219Srwatson
774193219Srwatson	KASSERT(nwsp->nws_flags & NWS_RUNNING,
775193219Srwatson	    ("%s(%u): not running", __func__, proto));
776193219Srwatson	KASSERT(proto >= 0 && proto < NETISR_MAXPROT,
777193219Srwatson	    ("%s(%u): invalid proto\n", __func__, proto));
778193219Srwatson
779193219Srwatson	npwp = &nwsp->nws_work[proto];
780193219Srwatson	if (npwp->nw_len == 0)
781193219Srwatson		return (0);
782193219Srwatson
783193219Srwatson	/*
784193219Srwatson	 * Move the global work queue to a thread-local work queue.
785193219Srwatson	 *
786193219Srwatson	 * Notice that this means the effective maximum length of the queue
787193219Srwatson	 * is actually twice that of the maximum queue length specified in
788193219Srwatson	 * the protocol registration call.
789193219Srwatson	 */
790193219Srwatson	handled = npwp->nw_len;
791193219Srwatson	local_npw = *npwp;
792193219Srwatson	npwp->nw_head = NULL;
793193219Srwatson	npwp->nw_tail = NULL;
794193219Srwatson	npwp->nw_len = 0;
795193219Srwatson	nwsp->nws_pendingbits &= ~(1 << proto);
796193219Srwatson	NWS_UNLOCK(nwsp);
797193219Srwatson	while ((m = local_npw.nw_head) != NULL) {
798193219Srwatson		local_npw.nw_head = m->m_nextpkt;
799193219Srwatson		m->m_nextpkt = NULL;
800193219Srwatson		if (local_npw.nw_head == NULL)
801193219Srwatson			local_npw.nw_tail = NULL;
802193219Srwatson		local_npw.nw_len--;
803218559Sbz		VNET_ASSERT(m->m_pkthdr.rcvif != NULL,
804218559Sbz		    ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m));
805191816Szec		CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
806204497Srwatson		netisr_proto[proto].np_handler(m);
807191816Szec		CURVNET_RESTORE();
808120704Srwatson	}
809193219Srwatson	KASSERT(local_npw.nw_len == 0,
810193219Srwatson	    ("%s(%u): len %u", __func__, proto, local_npw.nw_len));
811204497Srwatson	if (netisr_proto[proto].np_drainedcpu)
812204497Srwatson		netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu);
813193219Srwatson	NWS_LOCK(nwsp);
814193219Srwatson	npwp->nw_handled += handled;
815193219Srwatson	return (handled);
816120704Srwatson}
817120704Srwatson
818120704Srwatson/*
819200898Srwatson * SWI handler for netisr -- processes packets in a set of workstreams that
820193219Srwatson * it owns, woken up by calls to NWS_SIGNAL().  If this workstream is already
821193219Srwatson * being direct dispatched, go back to sleep and wait for the dispatching
822193219Srwatson * thread to wake us up again.
823111888Sjlemon */
824193219Srwatsonstatic void
825193219Srwatsonswi_net(void *arg)
826103781Sjake{
827193219Srwatson#ifdef NETISR_LOCKING
828193219Srwatson	struct rm_priotracker tracker;
829193219Srwatson#endif
830193219Srwatson	struct netisr_workstream *nwsp;
831193219Srwatson	u_int bits, prot;
832193219Srwatson
833193219Srwatson	nwsp = arg;
834193219Srwatson
835193219Srwatson#ifdef DEVICE_POLLING
836193219Srwatson	KASSERT(nws_count == 1,
837193219Srwatson	    ("%s: device_polling but nws_count != 1", __func__));
838193219Srwatson	netisr_poll();
839193219Srwatson#endif
840193219Srwatson#ifdef NETISR_LOCKING
841193219Srwatson	NETISR_RLOCK(&tracker);
842193219Srwatson#endif
843193219Srwatson	NWS_LOCK(nwsp);
844193219Srwatson	KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running"));
845193219Srwatson	if (nwsp->nws_flags & NWS_DISPATCHING)
846193219Srwatson		goto out;
847193219Srwatson	nwsp->nws_flags |= NWS_RUNNING;
848193219Srwatson	nwsp->nws_flags &= ~NWS_SCHEDULED;
849193219Srwatson	while ((bits = nwsp->nws_pendingbits) != 0) {
850193219Srwatson		while ((prot = ffs(bits)) != 0) {
851193219Srwatson			prot--;
852193219Srwatson			bits &= ~(1 << prot);
853193219Srwatson			(void)netisr_process_workstream_proto(nwsp, prot);
854193219Srwatson		}
855112011Sjlemon	}
856193219Srwatson	nwsp->nws_flags &= ~NWS_RUNNING;
857193219Srwatsonout:
858193219Srwatson	NWS_UNLOCK(nwsp);
859193219Srwatson#ifdef NETISR_LOCKING
860193219Srwatson	NETISR_RUNLOCK(&tracker);
861193219Srwatson#endif
862193219Srwatson#ifdef DEVICE_POLLING
863193219Srwatson	netisr_pollmore();
864193219Srwatson#endif
865193219Srwatson}
866180239Srwatson
867193219Srwatsonstatic int
868193219Srwatsonnetisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto,
869193219Srwatson    struct netisr_work *npwp, struct mbuf *m, int *dosignalp)
870193219Srwatson{
871193219Srwatson
872193219Srwatson	NWS_LOCK_ASSERT(nwsp);
873193219Srwatson
874193219Srwatson	*dosignalp = 0;
875193219Srwatson	if (npwp->nw_len < npwp->nw_qlimit) {
876193219Srwatson		m->m_nextpkt = NULL;
877193219Srwatson		if (npwp->nw_head == NULL) {
878193219Srwatson			npwp->nw_head = m;
879193219Srwatson			npwp->nw_tail = m;
880193219Srwatson		} else {
881193219Srwatson			npwp->nw_tail->m_nextpkt = m;
882193219Srwatson			npwp->nw_tail = m;
883193219Srwatson		}
884193219Srwatson		npwp->nw_len++;
885193219Srwatson		if (npwp->nw_len > npwp->nw_watermark)
886193219Srwatson			npwp->nw_watermark = npwp->nw_len;
887200898Srwatson
888200898Srwatson		/*
889200898Srwatson		 * We must set the bit regardless of NWS_RUNNING, so that
890200898Srwatson		 * swi_net() keeps calling netisr_process_workstream_proto().
891200898Srwatson		 */
892193219Srwatson		nwsp->nws_pendingbits |= (1 << proto);
893193219Srwatson		if (!(nwsp->nws_flags &
894193219Srwatson		    (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) {
895193219Srwatson			nwsp->nws_flags |= NWS_SCHEDULED;
896193219Srwatson			*dosignalp = 1;	/* Defer until unlocked. */
897193219Srwatson		}
898193219Srwatson		npwp->nw_queued++;
899193219Srwatson		return (0);
900111888Sjlemon	} else {
901195182Sbz		m_freem(m);
902193219Srwatson		npwp->nw_qdrops++;
903193219Srwatson		return (ENOBUFS);
904103781Sjake	}
905103781Sjake}
906103781Sjake
907193219Srwatsonstatic int
908193219Srwatsonnetisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid)
909193219Srwatson{
910193219Srwatson	struct netisr_workstream *nwsp;
911193219Srwatson	struct netisr_work *npwp;
912193219Srwatson	int dosignal, error;
913193219Srwatson
914193219Srwatson#ifdef NETISR_LOCKING
915193219Srwatson	NETISR_LOCK_ASSERT();
916193219Srwatson#endif
917195078Srwatson	KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__,
918195078Srwatson	    cpuid, mp_maxid));
919195019Srwatson	KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid));
920193219Srwatson
921193219Srwatson	dosignal = 0;
922193219Srwatson	error = 0;
923195019Srwatson	nwsp = DPCPU_ID_PTR(cpuid, nws);
924193219Srwatson	npwp = &nwsp->nws_work[proto];
925193219Srwatson	NWS_LOCK(nwsp);
926193219Srwatson	error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal);
927193219Srwatson	NWS_UNLOCK(nwsp);
928193219Srwatson	if (dosignal)
929193219Srwatson		NWS_SIGNAL(nwsp);
930193219Srwatson	return (error);
931193219Srwatson}
932193219Srwatson
933193219Srwatsonint
934193219Srwatsonnetisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
935193219Srwatson{
936193219Srwatson#ifdef NETISR_LOCKING
937193219Srwatson	struct rm_priotracker tracker;
938193219Srwatson#endif
939193219Srwatson	u_int cpuid;
940193219Srwatson	int error;
941193219Srwatson
942193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
943193219Srwatson	    ("%s: invalid proto %u", __func__, proto));
944193219Srwatson
945193219Srwatson#ifdef NETISR_LOCKING
946193219Srwatson	NETISR_RLOCK(&tracker);
947193219Srwatson#endif
948204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
949193219Srwatson	    ("%s: invalid proto %u", __func__, proto));
950193219Srwatson
951222249Srwatson	m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED,
952222249Srwatson	    source, m, &cpuid);
953195019Srwatson	if (m != NULL) {
954195019Srwatson		KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__,
955195019Srwatson		    cpuid));
956193219Srwatson		error = netisr_queue_internal(proto, m, cpuid);
957195019Srwatson	} else
958193219Srwatson		error = ENOBUFS;
959193219Srwatson#ifdef NETISR_LOCKING
960193219Srwatson	NETISR_RUNLOCK(&tracker);
961193219Srwatson#endif
962193219Srwatson	return (error);
963193219Srwatson}
964193219Srwatson
965193219Srwatsonint
966193219Srwatsonnetisr_queue(u_int proto, struct mbuf *m)
967193219Srwatson{
968193219Srwatson
969193219Srwatson	return (netisr_queue_src(proto, 0, m));
970193219Srwatson}
971193219Srwatson
972111888Sjlemon/*
973200898Srwatson * Dispatch a packet for netisr processing; direct dispatch is permitted by
974193219Srwatson * calling context.
975111888Sjlemon */
976111888Sjlemonint
977193219Srwatsonnetisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
978111888Sjlemon{
979193219Srwatson#ifdef NETISR_LOCKING
980193219Srwatson	struct rm_priotracker tracker;
981193219Srwatson#endif
982193219Srwatson	struct netisr_workstream *nwsp;
983222249Srwatson	struct netisr_proto *npp;
984193219Srwatson	struct netisr_work *npwp;
985193219Srwatson	int dosignal, error;
986222249Srwatson	u_int cpuid, dispatch_policy;
987193219Srwatson
988193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
989193219Srwatson	    ("%s: invalid proto %u", __func__, proto));
990193219Srwatson#ifdef NETISR_LOCKING
991193219Srwatson	NETISR_RLOCK(&tracker);
992193219Srwatson#endif
993222249Srwatson	npp = &netisr_proto[proto];
994222249Srwatson	KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__,
995222249Srwatson	    proto));
996193219Srwatson
997222249Srwatson	dispatch_policy = netisr_get_dispatch(npp);
998222249Srwatson	if (dispatch_policy == NETISR_DISPATCH_DEFERRED)
999222249Srwatson		return (netisr_queue_src(proto, source, m));
1000222249Srwatson
1001193219Srwatson	/*
1002193219Srwatson	 * If direct dispatch is forced, then unconditionally dispatch
1003193219Srwatson	 * without a formal CPU selection.  Borrow the current CPU's stats,
1004193219Srwatson	 * even if there's no worker on it.  In this case we don't update
1005193219Srwatson	 * nws_flags because all netisr processing will be source ordered due
1006193219Srwatson	 * to always being forced to directly dispatch.
1007193219Srwatson	 */
1008222249Srwatson	if (dispatch_policy == NETISR_DISPATCH_DIRECT) {
1009195019Srwatson		nwsp = DPCPU_PTR(nws);
1010193219Srwatson		npwp = &nwsp->nws_work[proto];
1011193219Srwatson		npwp->nw_dispatched++;
1012193219Srwatson		npwp->nw_handled++;
1013204497Srwatson		netisr_proto[proto].np_handler(m);
1014193219Srwatson		error = 0;
1015193219Srwatson		goto out_unlock;
1016112011Sjlemon	}
1017193219Srwatson
1018222249Srwatson	KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID,
1019222249Srwatson	    ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy));
1020222249Srwatson
1021193219Srwatson	/*
1022193219Srwatson	 * Otherwise, we execute in a hybrid mode where we will try to direct
1023193219Srwatson	 * dispatch if we're on the right CPU and the netisr worker isn't
1024193219Srwatson	 * already running.
1025193219Srwatson	 */
1026222249Srwatson	sched_pin();
1027222249Srwatson	m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID,
1028222249Srwatson	    source, m, &cpuid);
1029193219Srwatson	if (m == NULL) {
1030193219Srwatson		error = ENOBUFS;
1031222249Srwatson		goto out_unpin;
1032193219Srwatson	}
1033195019Srwatson	KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid));
1034193219Srwatson	if (cpuid != curcpu)
1035193219Srwatson		goto queue_fallback;
1036195019Srwatson	nwsp = DPCPU_PTR(nws);
1037193219Srwatson	npwp = &nwsp->nws_work[proto];
1038193219Srwatson
1039193219Srwatson	/*-
1040193219Srwatson	 * We are willing to direct dispatch only if three conditions hold:
1041193219Srwatson	 *
1042193219Srwatson	 * (1) The netisr worker isn't already running,
1043193219Srwatson	 * (2) Another thread isn't already directly dispatching, and
1044193219Srwatson	 * (3) The netisr hasn't already been woken up.
1045193219Srwatson	 */
1046193219Srwatson	NWS_LOCK(nwsp);
1047193219Srwatson	if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) {
1048193219Srwatson		error = netisr_queue_workstream(nwsp, proto, npwp, m,
1049193219Srwatson		    &dosignal);
1050195019Srwatson		NWS_UNLOCK(nwsp);
1051193219Srwatson		if (dosignal)
1052193219Srwatson			NWS_SIGNAL(nwsp);
1053193219Srwatson		goto out_unpin;
1054193219Srwatson	}
1055193219Srwatson
1056193219Srwatson	/*
1057193219Srwatson	 * The current thread is now effectively the netisr worker, so set
1058193219Srwatson	 * the dispatching flag to prevent concurrent processing of the
1059193219Srwatson	 * stream from another thread (even the netisr worker), which could
1060193219Srwatson	 * otherwise lead to effective misordering of the stream.
1061193219Srwatson	 */
1062193219Srwatson	nwsp->nws_flags |= NWS_DISPATCHING;
1063193219Srwatson	NWS_UNLOCK(nwsp);
1064204497Srwatson	netisr_proto[proto].np_handler(m);
1065193219Srwatson	NWS_LOCK(nwsp);
1066193219Srwatson	nwsp->nws_flags &= ~NWS_DISPATCHING;
1067193219Srwatson	npwp->nw_handled++;
1068193219Srwatson	npwp->nw_hybrid_dispatched++;
1069193219Srwatson
1070193219Srwatson	/*
1071193219Srwatson	 * If other work was enqueued by another thread while we were direct
1072193219Srwatson	 * dispatching, we need to signal the netisr worker to do that work.
1073193219Srwatson	 * In the future, we might want to do some of that work in the
1074193219Srwatson	 * current thread, rather than trigger further context switches.  If
1075193219Srwatson	 * so, we'll want to establish a reasonable bound on the work done in
1076193219Srwatson	 * the "borrowed" context.
1077193219Srwatson	 */
1078193219Srwatson	if (nwsp->nws_pendingbits != 0) {
1079193219Srwatson		nwsp->nws_flags |= NWS_SCHEDULED;
1080193219Srwatson		dosignal = 1;
1081193219Srwatson	} else
1082193219Srwatson		dosignal = 0;
1083193219Srwatson	NWS_UNLOCK(nwsp);
1084193219Srwatson	if (dosignal)
1085193219Srwatson		NWS_SIGNAL(nwsp);
1086193219Srwatson	error = 0;
1087193219Srwatson	goto out_unpin;
1088193219Srwatson
1089193219Srwatsonqueue_fallback:
1090193219Srwatson	error = netisr_queue_internal(proto, m, cpuid);
1091193219Srwatsonout_unpin:
1092193219Srwatson	sched_unpin();
1093193219Srwatsonout_unlock:
1094193219Srwatson#ifdef NETISR_LOCKING
1095193219Srwatson	NETISR_RUNLOCK(&tracker);
1096193219Srwatson#endif
1097193219Srwatson	return (error);
1098111888Sjlemon}
1099103781Sjake
1100193219Srwatsonint
1101193219Srwatsonnetisr_dispatch(u_int proto, struct mbuf *m)
1102193219Srwatson{
1103193219Srwatson
1104193219Srwatson	return (netisr_dispatch_src(proto, 0, m));
1105193219Srwatson}
1106193219Srwatson
1107193219Srwatson#ifdef DEVICE_POLLING
1108193219Srwatson/*
1109193219Srwatson * Kernel polling borrows a netisr thread to run interface polling in; this
1110193219Srwatson * function allows kernel polling to request that the netisr thread be
1111193219Srwatson * scheduled even if no packets are pending for protocols.
1112193219Srwatson */
1113193219Srwatsonvoid
1114193219Srwatsonnetisr_sched_poll(void)
1115193219Srwatson{
1116193219Srwatson	struct netisr_workstream *nwsp;
1117193219Srwatson
1118195019Srwatson	nwsp = DPCPU_ID_PTR(nws_array[0], nws);
1119193219Srwatson	NWS_SIGNAL(nwsp);
1120193219Srwatson}
1121193219Srwatson#endif
1122193219Srwatson
1123103781Sjakestatic void
1124193219Srwatsonnetisr_start_swi(u_int cpuid, struct pcpu *pc)
1125103781Sjake{
1126193219Srwatson	char swiname[12];
1127193219Srwatson	struct netisr_workstream *nwsp;
1128193219Srwatson	int error;
1129193219Srwatson
1130195019Srwatson	KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid));
1131195019Srwatson
1132195019Srwatson	nwsp = DPCPU_ID_PTR(cpuid, nws);
1133193219Srwatson	mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF);
1134193219Srwatson	nwsp->nws_cpu = cpuid;
1135193219Srwatson	snprintf(swiname, sizeof(swiname), "netisr %u", cpuid);
1136193219Srwatson	error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp,
1137193219Srwatson	    SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie);
1138193219Srwatson	if (error)
1139193219Srwatson		panic("%s: swi_add %d", __func__, error);
1140193219Srwatson	pc->pc_netisr = nwsp->nws_intr_event;
1141193219Srwatson	if (netisr_bindthreads) {
1142193219Srwatson		error = intr_event_bind(nwsp->nws_intr_event, cpuid);
1143193219Srwatson		if (error != 0)
1144193219Srwatson			printf("%s: cpu %u: intr_event_bind: %d", __func__,
1145193219Srwatson			    cpuid, error);
1146193219Srwatson	}
1147193219Srwatson	NETISR_WLOCK();
1148193219Srwatson	nws_array[nws_count] = nwsp->nws_cpu;
1149193219Srwatson	nws_count++;
1150193219Srwatson	NETISR_WUNLOCK();
1151193219Srwatson}
1152193219Srwatson
1153193219Srwatson/*
1154193219Srwatson * Initialize the netisr subsystem.  We rely on BSS and static initialization
1155193219Srwatson * of most fields in global data structures.
1156193219Srwatson *
1157193219Srwatson * Start a worker thread for the boot CPU so that we can support network
1158193219Srwatson * traffic immediately in case the network stack is used before additional
1159193219Srwatson * CPUs are started (for example, diskless boot).
1160193219Srwatson */
1161193219Srwatsonstatic void
1162193219Srwatsonnetisr_init(void *arg)
1163193219Srwatson{
1164222249Srwatson	char tmp[NETISR_DISPATCH_POLICY_MAXSTR];
1165222249Srwatson	u_int dispatch_policy;
1166222249Srwatson	int error;
1167193219Srwatson
1168193219Srwatson	KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
1169193219Srwatson
1170193219Srwatson	NETISR_LOCK_INIT();
1171195078Srwatson	if (netisr_maxthreads < 1)
1172193219Srwatson		netisr_maxthreads = 1;
1173195078Srwatson	if (netisr_maxthreads > mp_ncpus) {
1174200899Srwatson		printf("netisr_init: forcing maxthreads from %d to %d\n",
1175195078Srwatson		    netisr_maxthreads, mp_ncpus);
1176195078Srwatson		netisr_maxthreads = mp_ncpus;
1177193230Srwatson	}
1178193230Srwatson	if (netisr_defaultqlimit > netisr_maxqlimit) {
1179200899Srwatson		printf("netisr_init: forcing defaultqlimit from %d to %d\n",
1180195078Srwatson		    netisr_defaultqlimit, netisr_maxqlimit);
1181193219Srwatson		netisr_defaultqlimit = netisr_maxqlimit;
1182193230Srwatson	}
1183103781Sjake#ifdef DEVICE_POLLING
1184193219Srwatson	/*
1185193219Srwatson	 * The device polling code is not yet aware of how to deal with
1186193219Srwatson	 * multiple netisr threads, so for the time being compiling in device
1187193219Srwatson	 * polling disables parallel netisr workers.
1188193219Srwatson	 */
1189193230Srwatson	if (netisr_maxthreads != 1 || netisr_bindthreads != 0) {
1190200899Srwatson		printf("netisr_init: forcing maxthreads to 1 and "
1191200899Srwatson		    "bindthreads to 0 for device polling\n");
1192193230Srwatson		netisr_maxthreads = 1;
1193193230Srwatson		netisr_bindthreads = 0;
1194193230Srwatson	}
1195103781Sjake#endif
1196111888Sjlemon
1197222249Srwatson	if (TUNABLE_STR_FETCH("net.isr.dispatch", tmp, sizeof(tmp))) {
1198222249Srwatson		error = netisr_dispatch_policy_from_str(tmp,
1199222249Srwatson		    &dispatch_policy);
1200222249Srwatson		if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
1201222249Srwatson			error = EINVAL;
1202222249Srwatson		if (error == 0) {
1203222249Srwatson			netisr_dispatch_policy = dispatch_policy;
1204222249Srwatson			netisr_dispatch_policy_compat();
1205222249Srwatson		} else
1206222249Srwatson			printf(
1207222249Srwatson			    "%s: invalid dispatch policy %s, using default\n",
1208222249Srwatson			    __func__, tmp);
1209222249Srwatson	}
1210222249Srwatson
1211193219Srwatson	netisr_start_swi(curcpu, pcpu_find(curcpu));
1212103781Sjake}
1213193219SrwatsonSYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL);
1214103781Sjake
1215193219Srwatson/*
1216193219Srwatson * Start worker threads for additional CPUs.  No attempt to gracefully handle
1217193219Srwatson * work reassignment, we don't yet support dynamic reconfiguration.
1218193219Srwatson */
1219103781Sjakestatic void
1220193219Srwatsonnetisr_start(void *arg)
1221103781Sjake{
1222193219Srwatson	struct pcpu *pc;
1223103781Sjake
1224222531Snwhitehorn	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
1225193219Srwatson		if (nws_count >= netisr_maxthreads)
1226193219Srwatson			break;
1227193219Srwatson		/* XXXRW: Is skipping absent CPUs still required here? */
1228193219Srwatson		if (CPU_ABSENT(pc->pc_cpuid))
1229193219Srwatson			continue;
1230193219Srwatson		/* Worker will already be present for boot CPU. */
1231193219Srwatson		if (pc->pc_netisr != NULL)
1232193219Srwatson			continue;
1233193219Srwatson		netisr_start_swi(pc->pc_cpuid, pc);
1234193219Srwatson	}
1235103781Sjake}
1236193219SrwatsonSYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL);
1237193219Srwatson
1238204199Srwatson/*
1239204199Srwatson * Sysctl monitoring for netisr: query a list of registered protocols.
1240204199Srwatson */
1241204199Srwatsonstatic int
1242204199Srwatsonsysctl_netisr_proto(SYSCTL_HANDLER_ARGS)
1243204199Srwatson{
1244204199Srwatson	struct rm_priotracker tracker;
1245204199Srwatson	struct sysctl_netisr_proto *snpp, *snp_array;
1246204199Srwatson	struct netisr_proto *npp;
1247204199Srwatson	u_int counter, proto;
1248204199Srwatson	int error;
1249204199Srwatson
1250204199Srwatson	if (req->newptr != NULL)
1251204199Srwatson		return (EINVAL);
1252204199Srwatson	snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP,
1253204199Srwatson	    M_ZERO | M_WAITOK);
1254204199Srwatson	counter = 0;
1255204199Srwatson	NETISR_RLOCK(&tracker);
1256204199Srwatson	for (proto = 0; proto < NETISR_MAXPROT; proto++) {
1257204497Srwatson		npp = &netisr_proto[proto];
1258204199Srwatson		if (npp->np_name == NULL)
1259204199Srwatson			continue;
1260204199Srwatson		snpp = &snp_array[counter];
1261204199Srwatson		snpp->snp_version = sizeof(*snpp);
1262204199Srwatson		strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN);
1263204199Srwatson		snpp->snp_proto = proto;
1264204199Srwatson		snpp->snp_qlimit = npp->np_qlimit;
1265204199Srwatson		snpp->snp_policy = npp->np_policy;
1266222249Srwatson		snpp->snp_dispatch = npp->np_dispatch;
1267204199Srwatson		if (npp->np_m2flow != NULL)
1268204199Srwatson			snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW;
1269204199Srwatson		if (npp->np_m2cpuid != NULL)
1270204199Srwatson			snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID;
1271204199Srwatson		if (npp->np_drainedcpu != NULL)
1272204199Srwatson			snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU;
1273204199Srwatson		counter++;
1274204199Srwatson	}
1275204199Srwatson	NETISR_RUNLOCK(&tracker);
1276204303Srwatson	KASSERT(counter <= NETISR_MAXPROT,
1277204199Srwatson	    ("sysctl_netisr_proto: counter too big (%d)", counter));
1278204199Srwatson	error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter);
1279204199Srwatson	free(snp_array, M_TEMP);
1280204199Srwatson	return (error);
1281204199Srwatson}
1282204199Srwatson
1283204199SrwatsonSYSCTL_PROC(_net_isr, OID_AUTO, proto,
1284204199Srwatson    CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto,
1285204199Srwatson    "S,sysctl_netisr_proto",
1286204199Srwatson    "Return list of protocols registered with netisr");
1287204199Srwatson
1288204199Srwatson/*
1289204199Srwatson * Sysctl monitoring for netisr: query a list of workstreams.
1290204199Srwatson */
1291204199Srwatsonstatic int
1292204199Srwatsonsysctl_netisr_workstream(SYSCTL_HANDLER_ARGS)
1293204199Srwatson{
1294204199Srwatson	struct rm_priotracker tracker;
1295204199Srwatson	struct sysctl_netisr_workstream *snwsp, *snws_array;
1296204199Srwatson	struct netisr_workstream *nwsp;
1297204199Srwatson	u_int counter, cpuid;
1298204199Srwatson	int error;
1299204199Srwatson
1300204199Srwatson	if (req->newptr != NULL)
1301204199Srwatson		return (EINVAL);
1302204199Srwatson	snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP,
1303204199Srwatson	    M_ZERO | M_WAITOK);
1304204199Srwatson	counter = 0;
1305204199Srwatson	NETISR_RLOCK(&tracker);
1306209059Sjhb	CPU_FOREACH(cpuid) {
1307204199Srwatson		nwsp = DPCPU_ID_PTR(cpuid, nws);
1308204199Srwatson		if (nwsp->nws_intr_event == NULL)
1309204199Srwatson			continue;
1310204199Srwatson		NWS_LOCK(nwsp);
1311204199Srwatson		snwsp = &snws_array[counter];
1312204199Srwatson		snwsp->snws_version = sizeof(*snwsp);
1313204199Srwatson
1314204199Srwatson		/*
1315204199Srwatson		 * For now, we equate workstream IDs and CPU IDs in the
1316204199Srwatson		 * kernel, but expose them independently to userspace in case
1317204199Srwatson		 * that assumption changes in the future.
1318204199Srwatson		 */
1319204199Srwatson		snwsp->snws_wsid = cpuid;
1320204199Srwatson		snwsp->snws_cpu = cpuid;
1321204199Srwatson		if (nwsp->nws_intr_event != NULL)
1322204199Srwatson			snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR;
1323204199Srwatson		NWS_UNLOCK(nwsp);
1324204199Srwatson		counter++;
1325204199Srwatson	}
1326204199Srwatson	NETISR_RUNLOCK(&tracker);
1327204303Srwatson	KASSERT(counter <= MAXCPU,
1328204199Srwatson	    ("sysctl_netisr_workstream: counter too big (%d)", counter));
1329204199Srwatson	error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter);
1330204199Srwatson	free(snws_array, M_TEMP);
1331204199Srwatson	return (error);
1332204199Srwatson}
1333204199Srwatson
1334204199SrwatsonSYSCTL_PROC(_net_isr, OID_AUTO, workstream,
1335204199Srwatson    CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream,
1336204199Srwatson    "S,sysctl_netisr_workstream",
1337204199Srwatson    "Return list of workstreams implemented by netisr");
1338204199Srwatson
1339204199Srwatson/*
1340204199Srwatson * Sysctl monitoring for netisr: query per-protocol data across all
1341204199Srwatson * workstreams.
1342204199Srwatson */
1343204199Srwatsonstatic int
1344204199Srwatsonsysctl_netisr_work(SYSCTL_HANDLER_ARGS)
1345204199Srwatson{
1346204199Srwatson	struct rm_priotracker tracker;
1347204199Srwatson	struct sysctl_netisr_work *snwp, *snw_array;
1348204199Srwatson	struct netisr_workstream *nwsp;
1349204199Srwatson	struct netisr_proto *npp;
1350204199Srwatson	struct netisr_work *nwp;
1351204199Srwatson	u_int counter, cpuid, proto;
1352204199Srwatson	int error;
1353204199Srwatson
1354204199Srwatson	if (req->newptr != NULL)
1355204199Srwatson		return (EINVAL);
1356204199Srwatson	snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT,
1357204199Srwatson	    M_TEMP, M_ZERO | M_WAITOK);
1358204199Srwatson	counter = 0;
1359204199Srwatson	NETISR_RLOCK(&tracker);
1360209059Sjhb	CPU_FOREACH(cpuid) {
1361204199Srwatson		nwsp = DPCPU_ID_PTR(cpuid, nws);
1362204199Srwatson		if (nwsp->nws_intr_event == NULL)
1363204199Srwatson			continue;
1364204199Srwatson		NWS_LOCK(nwsp);
1365204199Srwatson		for (proto = 0; proto < NETISR_MAXPROT; proto++) {
1366204497Srwatson			npp = &netisr_proto[proto];
1367204199Srwatson			if (npp->np_name == NULL)
1368204199Srwatson				continue;
1369204199Srwatson			nwp = &nwsp->nws_work[proto];
1370204199Srwatson			snwp = &snw_array[counter];
1371204199Srwatson			snwp->snw_version = sizeof(*snwp);
1372204199Srwatson			snwp->snw_wsid = cpuid;		/* See comment above. */
1373204199Srwatson			snwp->snw_proto = proto;
1374204199Srwatson			snwp->snw_len = nwp->nw_len;
1375204199Srwatson			snwp->snw_watermark = nwp->nw_watermark;
1376204199Srwatson			snwp->snw_dispatched = nwp->nw_dispatched;
1377204199Srwatson			snwp->snw_hybrid_dispatched =
1378204199Srwatson			    nwp->nw_hybrid_dispatched;
1379204199Srwatson			snwp->snw_qdrops = nwp->nw_qdrops;
1380204199Srwatson			snwp->snw_queued = nwp->nw_queued;
1381204199Srwatson			snwp->snw_handled = nwp->nw_handled;
1382204199Srwatson			counter++;
1383204199Srwatson		}
1384204199Srwatson		NWS_UNLOCK(nwsp);
1385204199Srwatson	}
1386204303Srwatson	KASSERT(counter <= MAXCPU * NETISR_MAXPROT,
1387204199Srwatson	    ("sysctl_netisr_work: counter too big (%d)", counter));
1388204199Srwatson	NETISR_RUNLOCK(&tracker);
1389204199Srwatson	error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter);
1390204199Srwatson	free(snw_array, M_TEMP);
1391204199Srwatson	return (error);
1392204199Srwatson}
1393204199Srwatson
1394204199SrwatsonSYSCTL_PROC(_net_isr, OID_AUTO, work,
1395204199Srwatson    CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work,
1396204199Srwatson    "S,sysctl_netisr_work",
1397204199Srwatson    "Return list of per-workstream, per-protocol work in netisr");
1398204199Srwatson
1399193219Srwatson#ifdef DDB
1400193219SrwatsonDB_SHOW_COMMAND(netisr, db_show_netisr)
1401193219Srwatson{
1402193219Srwatson	struct netisr_workstream *nwsp;
1403193219Srwatson	struct netisr_work *nwp;
1404193219Srwatson	int first, proto;
1405195019Srwatson	u_int cpuid;
1406193219Srwatson
1407193219Srwatson	db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto",
1408193219Srwatson	    "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue");
1409209059Sjhb	CPU_FOREACH(cpuid) {
1410195019Srwatson		nwsp = DPCPU_ID_PTR(cpuid, nws);
1411193219Srwatson		if (nwsp->nws_intr_event == NULL)
1412193219Srwatson			continue;
1413193219Srwatson		first = 1;
1414193219Srwatson		for (proto = 0; proto < NETISR_MAXPROT; proto++) {
1415204497Srwatson			if (netisr_proto[proto].np_handler == NULL)
1416193219Srwatson				continue;
1417193219Srwatson			nwp = &nwsp->nws_work[proto];
1418193219Srwatson			if (first) {
1419195019Srwatson				db_printf("%3d ", cpuid);
1420193219Srwatson				first = 0;
1421193219Srwatson			} else
1422193219Srwatson				db_printf("%3s ", "");
1423193219Srwatson			db_printf(
1424193219Srwatson			    "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n",
1425204497Srwatson			    netisr_proto[proto].np_name, nwp->nw_len,
1426193219Srwatson			    nwp->nw_watermark, nwp->nw_qlimit,
1427193219Srwatson			    nwp->nw_dispatched, nwp->nw_hybrid_dispatched,
1428193219Srwatson			    nwp->nw_qdrops, nwp->nw_queued);
1429193219Srwatson		}
1430193219Srwatson	}
1431193219Srwatson}
1432193219Srwatson#endif
1433