1111888Sjlemon/*-
2193219Srwatson * Copyright (c) 2007-2009 Robert N. M. Watson
3222249Srwatson * Copyright (c) 2010-2011 Juniper Networks, Inc.
4103781Sjake * All rights reserved.
5103781Sjake *
6204199Srwatson * This software was developed by Robert N. M. Watson under contract
7204199Srwatson * to Juniper Networks, Inc.
8204199Srwatson *
9103781Sjake * Redistribution and use in source and binary forms, with or without
10103781Sjake * modification, are permitted provided that the following conditions
11103781Sjake * are met:
12103781Sjake * 1. Redistributions of source code must retain the above copyright
13111888Sjlemon *    notice, this list of conditions and the following disclaimer.
14103781Sjake * 2. Redistributions in binary form must reproduce the above copyright
15103781Sjake *    notice, this list of conditions and the following disclaimer in the
16103781Sjake *    documentation and/or other materials provided with the distribution.
17103781Sjake *
18111888Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19111888Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20111888Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21111888Sjlemon * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22111888Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23111888Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24111888Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25111888Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26111888Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27111888Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28111888Sjlemon * SUCH DAMAGE.
29193219Srwatson */
30193219Srwatson
31193219Srwatson#include <sys/cdefs.h>
32193219Srwatson__FBSDID("$FreeBSD: releng/10.3/sys/net/netisr.c 282832 2015-05-13 08:04:50Z hiren $");
33193219Srwatson
34193219Srwatson/*
35193219Srwatson * netisr is a packet dispatch service, allowing synchronous (directly
36193219Srwatson * dispatched) and asynchronous (deferred dispatch) processing of packets by
37193219Srwatson * registered protocol handlers.  Callers pass a protocol identifier and
38193219Srwatson * packet to netisr, along with a direct dispatch hint, and work will either
39200898Srwatson * be immediately processed by the registered handler, or passed to a
40200898Srwatson * software interrupt (SWI) thread for deferred dispatch.  Callers will
41200898Srwatson * generally select one or the other based on:
42103781Sjake *
43200898Srwatson * - Whether directly dispatching a netisr handler lead to code reentrance or
44193219Srwatson *   lock recursion, such as entering the socket code from the socket code.
45200898Srwatson * - Whether directly dispatching a netisr handler lead to recursive
46193219Srwatson *   processing, such as when decapsulating several wrapped layers of tunnel
47193219Srwatson *   information (IPSEC within IPSEC within ...).
48193219Srwatson *
49193219Srwatson * Maintaining ordering for protocol streams is a critical design concern.
50193219Srwatson * Enforcing ordering limits the opportunity for concurrency, but maintains
51193219Srwatson * the strong ordering requirements found in some protocols, such as TCP.  Of
52193219Srwatson * related concern is CPU affinity--it is desirable to process all data
53193219Srwatson * associated with a particular stream on the same CPU over time in order to
54193219Srwatson * avoid acquiring locks associated with the connection on different CPUs,
55193219Srwatson * keep connection data in one cache, and to generally encourage associated
56193219Srwatson * user threads to live on the same CPU as the stream.  It's also desirable
57193219Srwatson * to avoid lock migration and contention where locks are associated with
58193219Srwatson * more than one flow.
59193219Srwatson *
60193219Srwatson * netisr supports several policy variations, represented by the
61200898Srwatson * NETISR_POLICY_* constants, allowing protocols to play various roles in
62193219Srwatson * identifying flows, assigning work to CPUs, etc.  These are described in
63200898Srwatson * netisr.h.
64103781Sjake */
65103781Sjake
66193219Srwatson#include "opt_ddb.h"
67150968Sglebius#include "opt_device_polling.h"
68134443Srwatson
69103781Sjake#include <sys/param.h>
70111888Sjlemon#include <sys/bus.h>
71103781Sjake#include <sys/kernel.h>
72111888Sjlemon#include <sys/kthread.h>
73193219Srwatson#include <sys/interrupt.h>
74111888Sjlemon#include <sys/lock.h>
75193219Srwatson#include <sys/mbuf.h>
76193219Srwatson#include <sys/mutex.h>
77195019Srwatson#include <sys/pcpu.h>
78111888Sjlemon#include <sys/proc.h>
79193219Srwatson#include <sys/rmlock.h>
80193219Srwatson#include <sys/sched.h>
81193219Srwatson#include <sys/smp.h>
82193219Srwatson#include <sys/socket.h>
83111888Sjlemon#include <sys/sysctl.h>
84193219Srwatson#include <sys/systm.h>
85103781Sjake
86193219Srwatson#ifdef DDB
87193219Srwatson#include <ddb/ddb.h>
88193219Srwatson#endif
89111888Sjlemon
90204497Srwatson#define	_WANT_NETISR_INTERNAL	/* Enable definitions from netisr_internal.h */
91111888Sjlemon#include <net/if.h>
92111888Sjlemon#include <net/if_var.h>
93103781Sjake#include <net/netisr.h>
94204497Srwatson#include <net/netisr_internal.h>
95196019Srwatson#include <net/vnet.h>
96103781Sjake
97193219Srwatson/*-
98193219Srwatson * Synchronize use and modification of the registered netisr data structures;
99193219Srwatson * acquire a read lock while modifying the set of registered protocols to
100193219Srwatson * prevent partially registered or unregistered protocols from being run.
101193219Srwatson *
102193219Srwatson * The following data structures and fields are protected by this lock:
103193219Srwatson *
104204497Srwatson * - The netisr_proto array, including all fields of struct netisr_proto.
105193219Srwatson * - The nws array, including all fields of struct netisr_worker.
106193219Srwatson * - The nws_array array.
107193219Srwatson *
108193219Srwatson * Note: the NETISR_LOCKING define controls whether read locks are acquired
109193219Srwatson * in packet processing paths requiring netisr registration stability.  This
110200898Srwatson * is disabled by default as it can lead to measurable performance
111193219Srwatson * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and
112193219Srwatson * because netisr registration and unregistration is extremely rare at
113193219Srwatson * runtime.  If it becomes more common, this decision should be revisited.
114193219Srwatson *
115193219Srwatson * XXXRW: rmlocks don't support assertions.
116193219Srwatson */
117193219Srwatsonstatic struct rmlock	netisr_rmlock;
118193219Srwatson#define	NETISR_LOCK_INIT()	rm_init_flags(&netisr_rmlock, "netisr", \
119193219Srwatson				    RM_NOWITNESS)
120193219Srwatson#define	NETISR_LOCK_ASSERT()
121193219Srwatson#define	NETISR_RLOCK(tracker)	rm_rlock(&netisr_rmlock, (tracker))
122193219Srwatson#define	NETISR_RUNLOCK(tracker)	rm_runlock(&netisr_rmlock, (tracker))
123193219Srwatson#define	NETISR_WLOCK()		rm_wlock(&netisr_rmlock)
124193219Srwatson#define	NETISR_WUNLOCK()	rm_wunlock(&netisr_rmlock)
125193219Srwatson/* #define	NETISR_LOCKING */
126103781Sjake
127227309Sedstatic SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr");
128103781Sjake
129193219Srwatson/*-
130222249Srwatson * Three global direct dispatch policies are supported:
131193219Srwatson *
132222249Srwatson * NETISR_DISPATCH_QUEUED: All work is deferred for a netisr, regardless of
133222249Srwatson * context (may be overriden by protocols).
134193219Srwatson *
135222249Srwatson * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch,
136222249Srwatson * and we're running on the CPU the work would be performed on, then direct
137222249Srwatson * dispatch it if it wouldn't violate ordering constraints on the workstream.
138193219Srwatson *
139222249Srwatson * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch,
140222249Srwatson * always direct dispatch.  (The default.)
141193219Srwatson *
142193219Srwatson * Notice that changing the global policy could lead to short periods of
143193219Srwatson * misordered processing, but this is considered acceptable as compared to
144222249Srwatson * the complexity of enforcing ordering during policy changes.  Protocols can
145222249Srwatson * override the global policy (when they're not doing that, they select
146222249Srwatson * NETISR_DISPATCH_DEFAULT).
147193219Srwatson */
148222249Srwatson#define	NETISR_DISPATCH_POLICY_DEFAULT	NETISR_DISPATCH_DIRECT
149222249Srwatson#define	NETISR_DISPATCH_POLICY_MAXSTR	20 /* Used for temporary buffers. */
150222249Srwatsonstatic u_int	netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT;
151222249Srwatsonstatic int	sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS);
152222249SrwatsonSYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RW |
153222249Srwatson    CTLFLAG_TUN, 0, 0, sysctl_netisr_dispatch_policy, "A",
154222249Srwatson    "netisr dispatch policy");
155111888Sjlemon
156222249Srwatson/*
157193219Srwatson * Allow the administrator to limit the number of threads (CPUs) to use for
158193219Srwatson * netisr.  We don't check netisr_maxthreads before creating the thread for
159282832Shiren * CPU 0. This must be set at boot. We will create at most one thread per CPU.
160282832Shiren * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and
161282832Shiren * therefore only 1 workstream. If set to -1, netisr would use all cpus
162282832Shiren * (mp_ncpus) and therefore would have those many workstreams. One workstream
163282832Shiren * per thread (CPU).
164193219Srwatson */
165282832Shirenstatic int	netisr_maxthreads = 1;		/* Max number of threads. */
166193219SrwatsonTUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads);
167203913SpjdSYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN,
168193219Srwatson    &netisr_maxthreads, 0,
169193219Srwatson    "Use at most this many CPUs for netisr processing");
170193219Srwatson
171193219Srwatsonstatic int	netisr_bindthreads = 0;		/* Bind threads to CPUs. */
172193219SrwatsonTUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads);
173203913SpjdSYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN,
174193219Srwatson    &netisr_bindthreads, 0, "Bind netisr threads to CPUs.");
175193219Srwatson
176193219Srwatson/*
177200898Srwatson * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit,
178200898Srwatson * both for initial configuration and later modification using
179200898Srwatson * netisr_setqlimit().
180193219Srwatson */
181193219Srwatson#define	NETISR_DEFAULT_MAXQLIMIT	10240
182193219Srwatsonstatic u_int	netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT;
183193219SrwatsonTUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit);
184217322SmdfSYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN,
185193219Srwatson    &netisr_maxqlimit, 0,
186193219Srwatson    "Maximum netisr per-protocol, per-CPU queue depth.");
187193219Srwatson
188193219Srwatson/*
189200898Srwatson * The default per-workstream mbuf queue limit for protocols that don't
190200898Srwatson * initialize the nh_qlimit field of their struct netisr_handler.  If this is
191200898Srwatson * set above netisr_maxqlimit, we truncate it to the maximum during boot.
192193219Srwatson */
193193219Srwatson#define	NETISR_DEFAULT_DEFAULTQLIMIT	256
194193219Srwatsonstatic u_int	netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT;
195193219SrwatsonTUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit);
196217322SmdfSYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN,
197193219Srwatson    &netisr_defaultqlimit, 0,
198193219Srwatson    "Default netisr per-protocol, per-CPU queue limit if not set by protocol");
199193219Srwatson
200193219Srwatson/*
201204497Srwatson * Store and export the compile-time constant NETISR_MAXPROT limit on the
202204497Srwatson * number of protocols that can register with netisr at a time.  This is
203204497Srwatson * required for crashdump analysis, as it sizes netisr_proto[].
204193219Srwatson */
205204497Srwatsonstatic u_int	netisr_maxprot = NETISR_MAXPROT;
206217322SmdfSYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD,
207204497Srwatson    &netisr_maxprot, 0,
208204497Srwatson    "Compile-time limit on the number of protocols supported by netisr.");
209193219Srwatson
210193219Srwatson/*
211204497Srwatson * The netisr_proto array describes all registered protocols, indexed by
212204497Srwatson * protocol number.  See netisr_internal.h for more details.
213193219Srwatson */
214204497Srwatsonstatic struct netisr_proto	netisr_proto[NETISR_MAXPROT];
215193219Srwatson
216193219Srwatson/*
217204497Srwatson * Per-CPU workstream data.  See netisr_internal.h for more details.
218193219Srwatson */
219195019SrwatsonDPCPU_DEFINE(struct netisr_workstream, nws);
220193219Srwatson
221193219Srwatson/*
222193219Srwatson * Map contiguous values between 0 and nws_count into CPU IDs appropriate for
223195019Srwatson * accessing workstreams.  This allows constructions of the form
224195019Srwatson * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws).
225193219Srwatson */
226193219Srwatsonstatic u_int				 nws_array[MAXCPU];
227193219Srwatson
228193219Srwatson/*
229193219Srwatson * Number of registered workstreams.  Will be at most the number of running
230193219Srwatson * CPUs once fully started.
231193219Srwatson */
232193219Srwatsonstatic u_int				 nws_count;
233217322SmdfSYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD,
234193219Srwatson    &nws_count, 0, "Number of extant netisr threads.");
235193219Srwatson
236193219Srwatson/*
237193219Srwatson * Synchronization for each workstream: a mutex protects all mutable fields
238193219Srwatson * in each stream, including per-protocol state (mbuf queues).  The SWI is
239193219Srwatson * woken up if asynchronous dispatch is required.
240193219Srwatson */
241193219Srwatson#define	NWS_LOCK(s)		mtx_lock(&(s)->nws_mtx)
242193219Srwatson#define	NWS_LOCK_ASSERT(s)	mtx_assert(&(s)->nws_mtx, MA_OWNED)
243193219Srwatson#define	NWS_UNLOCK(s)		mtx_unlock(&(s)->nws_mtx)
244193219Srwatson#define	NWS_SIGNAL(s)		swi_sched((s)->nws_swi_cookie, 0)
245193219Srwatson
246193219Srwatson/*
247193219Srwatson * Utility routines for protocols that implement their own mapping of flows
248193219Srwatson * to CPUs.
249193219Srwatson */
250193219Srwatsonu_int
251193219Srwatsonnetisr_get_cpucount(void)
252193219Srwatson{
253193219Srwatson
254193219Srwatson	return (nws_count);
255193219Srwatson}
256193219Srwatson
257193219Srwatsonu_int
258193219Srwatsonnetisr_get_cpuid(u_int cpunumber)
259193219Srwatson{
260193219Srwatson
261193219Srwatson	KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber,
262193219Srwatson	    nws_count));
263193219Srwatson
264193219Srwatson	return (nws_array[cpunumber]);
265193219Srwatson}
266193219Srwatson
267193219Srwatson/*
268200898Srwatson * The default implementation of flow -> CPU ID mapping.
269193219Srwatson *
270193219Srwatson * Non-static so that protocols can use it to map their own work to specific
271193219Srwatson * CPUs in a manner consistent to netisr for affinity purposes.
272193219Srwatson */
273193219Srwatsonu_int
274193219Srwatsonnetisr_default_flow2cpu(u_int flowid)
275193219Srwatson{
276193219Srwatson
277193219Srwatson	return (nws_array[flowid % nws_count]);
278193219Srwatson}
279193219Srwatson
280193219Srwatson/*
281222249Srwatson * Dispatch tunable and sysctl configuration.
282222249Srwatson */
283222249Srwatsonstruct netisr_dispatch_table_entry {
284222249Srwatson	u_int		 ndte_policy;
285222249Srwatson	const char	*ndte_policy_str;
286222249Srwatson};
287222249Srwatsonstatic const struct netisr_dispatch_table_entry netisr_dispatch_table[] = {
288222249Srwatson	{ NETISR_DISPATCH_DEFAULT, "default" },
289222249Srwatson	{ NETISR_DISPATCH_DEFERRED, "deferred" },
290222249Srwatson	{ NETISR_DISPATCH_HYBRID, "hybrid" },
291222249Srwatson	{ NETISR_DISPATCH_DIRECT, "direct" },
292222249Srwatson};
293222249Srwatsonstatic const u_int netisr_dispatch_table_len =
294222249Srwatson    (sizeof(netisr_dispatch_table) / sizeof(netisr_dispatch_table[0]));
295222249Srwatson
296222249Srwatsonstatic void
297222249Srwatsonnetisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer,
298222249Srwatson    u_int buflen)
299222249Srwatson{
300222249Srwatson	const struct netisr_dispatch_table_entry *ndtep;
301222249Srwatson	const char *str;
302222249Srwatson	u_int i;
303222249Srwatson
304222249Srwatson	str = "unknown";
305222249Srwatson	for (i = 0; i < netisr_dispatch_table_len; i++) {
306222249Srwatson		ndtep = &netisr_dispatch_table[i];
307222249Srwatson		if (ndtep->ndte_policy == dispatch_policy) {
308222249Srwatson			str = ndtep->ndte_policy_str;
309222249Srwatson			break;
310222249Srwatson		}
311222249Srwatson	}
312222249Srwatson	snprintf(buffer, buflen, "%s", str);
313222249Srwatson}
314222249Srwatson
315222249Srwatsonstatic int
316222249Srwatsonnetisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp)
317222249Srwatson{
318222249Srwatson	const struct netisr_dispatch_table_entry *ndtep;
319222249Srwatson	u_int i;
320222249Srwatson
321222249Srwatson	for (i = 0; i < netisr_dispatch_table_len; i++) {
322222249Srwatson		ndtep = &netisr_dispatch_table[i];
323222249Srwatson		if (strcmp(ndtep->ndte_policy_str, str) == 0) {
324222249Srwatson			*dispatch_policyp = ndtep->ndte_policy;
325222249Srwatson			return (0);
326222249Srwatson		}
327222249Srwatson	}
328222249Srwatson	return (EINVAL);
329222249Srwatson}
330222249Srwatson
331222249Srwatsonstatic int
332222249Srwatsonsysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS)
333222249Srwatson{
334222249Srwatson	char tmp[NETISR_DISPATCH_POLICY_MAXSTR];
335222249Srwatson	u_int dispatch_policy;
336222249Srwatson	int error;
337222249Srwatson
338222249Srwatson	netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp,
339222249Srwatson	    sizeof(tmp));
340222249Srwatson	error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req);
341222249Srwatson	if (error == 0 && req->newptr != NULL) {
342222249Srwatson		error = netisr_dispatch_policy_from_str(tmp,
343222249Srwatson		    &dispatch_policy);
344222249Srwatson		if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
345222249Srwatson			error = EINVAL;
346255329Sdavide		if (error == 0)
347222249Srwatson			netisr_dispatch_policy = dispatch_policy;
348222249Srwatson	}
349222249Srwatson	return (error);
350222249Srwatson}
351222249Srwatson
352222249Srwatson/*
353193219Srwatson * Register a new netisr handler, which requires initializing per-protocol
354193219Srwatson * fields for each workstream.  All netisr work is briefly suspended while
355193219Srwatson * the protocol is installed.
356193219Srwatson */
357103781Sjakevoid
358193219Srwatsonnetisr_register(const struct netisr_handler *nhp)
359103781Sjake{
360193219Srwatson	struct netisr_work *npwp;
361193219Srwatson	const char *name;
362193219Srwatson	u_int i, proto;
363193219Srwatson
364193219Srwatson	proto = nhp->nh_proto;
365193219Srwatson	name = nhp->nh_name;
366193219Srwatson
367193219Srwatson	/*
368193219Srwatson	 * Test that the requested registration is valid.
369193219Srwatson	 */
370193219Srwatson	KASSERT(nhp->nh_name != NULL,
371193219Srwatson	    ("%s: nh_name NULL for %u", __func__, proto));
372193219Srwatson	KASSERT(nhp->nh_handler != NULL,
373193219Srwatson	    ("%s: nh_handler NULL for %s", __func__, name));
374193219Srwatson	KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE ||
375193219Srwatson	    nhp->nh_policy == NETISR_POLICY_FLOW ||
376193219Srwatson	    nhp->nh_policy == NETISR_POLICY_CPU,
377193219Srwatson	    ("%s: unsupported nh_policy %u for %s", __func__,
378193219Srwatson	    nhp->nh_policy, name));
379193219Srwatson	KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW ||
380193219Srwatson	    nhp->nh_m2flow == NULL,
381193219Srwatson	    ("%s: nh_policy != FLOW but m2flow defined for %s", __func__,
382193219Srwatson	    name));
383193219Srwatson	KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL,
384193219Srwatson	    ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__,
385193219Srwatson	    name));
386193219Srwatson	KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL,
387193219Srwatson	    ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__,
388193219Srwatson	    name));
389222249Srwatson	KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT ||
390222249Srwatson	    nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED ||
391222249Srwatson	    nhp->nh_dispatch == NETISR_DISPATCH_HYBRID ||
392222249Srwatson	    nhp->nh_dispatch == NETISR_DISPATCH_DIRECT,
393222249Srwatson	    ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch));
394222249Srwatson
395193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
396193219Srwatson	    ("%s(%u, %s): protocol too big", __func__, proto, name));
397193219Srwatson
398193219Srwatson	/*
399193219Srwatson	 * Test that no existing registration exists for this protocol.
400193219Srwatson	 */
401193219Srwatson	NETISR_WLOCK();
402204497Srwatson	KASSERT(netisr_proto[proto].np_name == NULL,
403193219Srwatson	    ("%s(%u, %s): name present", __func__, proto, name));
404204497Srwatson	KASSERT(netisr_proto[proto].np_handler == NULL,
405193219Srwatson	    ("%s(%u, %s): handler present", __func__, proto, name));
406193219Srwatson
407204497Srwatson	netisr_proto[proto].np_name = name;
408204497Srwatson	netisr_proto[proto].np_handler = nhp->nh_handler;
409204497Srwatson	netisr_proto[proto].np_m2flow = nhp->nh_m2flow;
410204497Srwatson	netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid;
411204497Srwatson	netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu;
412193219Srwatson	if (nhp->nh_qlimit == 0)
413204497Srwatson		netisr_proto[proto].np_qlimit = netisr_defaultqlimit;
414193219Srwatson	else if (nhp->nh_qlimit > netisr_maxqlimit) {
415193219Srwatson		printf("%s: %s requested queue limit %u capped to "
416193219Srwatson		    "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit,
417193219Srwatson		    netisr_maxqlimit);
418204497Srwatson		netisr_proto[proto].np_qlimit = netisr_maxqlimit;
419193219Srwatson	} else
420204497Srwatson		netisr_proto[proto].np_qlimit = nhp->nh_qlimit;
421204497Srwatson	netisr_proto[proto].np_policy = nhp->nh_policy;
422222249Srwatson	netisr_proto[proto].np_dispatch = nhp->nh_dispatch;
423209059Sjhb	CPU_FOREACH(i) {
424195019Srwatson		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
425193219Srwatson		bzero(npwp, sizeof(*npwp));
426204497Srwatson		npwp->nw_qlimit = netisr_proto[proto].np_qlimit;
427193219Srwatson	}
428193219Srwatson	NETISR_WUNLOCK();
429103781Sjake}
430103781Sjake
431193219Srwatson/*
432193219Srwatson * Clear drop counters across all workstreams for a protocol.
433193219Srwatson */
434111888Sjlemonvoid
435193219Srwatsonnetisr_clearqdrops(const struct netisr_handler *nhp)
436103781Sjake{
437193219Srwatson	struct netisr_work *npwp;
438193219Srwatson#ifdef INVARIANTS
439193219Srwatson	const char *name;
440193219Srwatson#endif
441193219Srwatson	u_int i, proto;
442193219Srwatson
443193219Srwatson	proto = nhp->nh_proto;
444193219Srwatson#ifdef INVARIANTS
445193219Srwatson	name = nhp->nh_name;
446193219Srwatson#endif
447193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
448193219Srwatson	    ("%s(%u): protocol too big for %s", __func__, proto, name));
449193219Srwatson
450193219Srwatson	NETISR_WLOCK();
451204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
452193219Srwatson	    ("%s(%u): protocol not registered for %s", __func__, proto,
453193219Srwatson	    name));
454193219Srwatson
455209059Sjhb	CPU_FOREACH(i) {
456195019Srwatson		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
457193219Srwatson		npwp->nw_qdrops = 0;
458193219Srwatson	}
459193219Srwatson	NETISR_WUNLOCK();
460111888Sjlemon}
461111888Sjlemon
462193219Srwatson/*
463200898Srwatson * Query current drop counters across all workstreams for a protocol.
464193219Srwatson */
465111888Sjlemonvoid
466193219Srwatsonnetisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp)
467111888Sjlemon{
468193219Srwatson	struct netisr_work *npwp;
469193219Srwatson	struct rm_priotracker tracker;
470193219Srwatson#ifdef INVARIANTS
471193219Srwatson	const char *name;
472193219Srwatson#endif
473193219Srwatson	u_int i, proto;
474193219Srwatson
475193219Srwatson	*qdropp = 0;
476193219Srwatson	proto = nhp->nh_proto;
477193219Srwatson#ifdef INVARIANTS
478193219Srwatson	name = nhp->nh_name;
479193219Srwatson#endif
480193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
481193219Srwatson	    ("%s(%u): protocol too big for %s", __func__, proto, name));
482193219Srwatson
483193219Srwatson	NETISR_RLOCK(&tracker);
484204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
485193219Srwatson	    ("%s(%u): protocol not registered for %s", __func__, proto,
486193219Srwatson	    name));
487193219Srwatson
488209059Sjhb	CPU_FOREACH(i) {
489195019Srwatson		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
490193219Srwatson		*qdropp += npwp->nw_qdrops;
491193219Srwatson	}
492193219Srwatson	NETISR_RUNLOCK(&tracker);
493103781Sjake}
494103781Sjake
495193219Srwatson/*
496200898Srwatson * Query current per-workstream queue limit for a protocol.
497193219Srwatson */
498193219Srwatsonvoid
499193219Srwatsonnetisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp)
500193219Srwatson{
501193219Srwatson	struct rm_priotracker tracker;
502193219Srwatson#ifdef INVARIANTS
503193219Srwatson	const char *name;
504193219Srwatson#endif
505193219Srwatson	u_int proto;
506111888Sjlemon
507193219Srwatson	proto = nhp->nh_proto;
508193219Srwatson#ifdef INVARIANTS
509193219Srwatson	name = nhp->nh_name;
510193219Srwatson#endif
511193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
512193219Srwatson	    ("%s(%u): protocol too big for %s", __func__, proto, name));
513111888Sjlemon
514193219Srwatson	NETISR_RLOCK(&tracker);
515204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
516193219Srwatson	    ("%s(%u): protocol not registered for %s", __func__, proto,
517193219Srwatson	    name));
518204497Srwatson	*qlimitp = netisr_proto[proto].np_qlimit;
519193219Srwatson	NETISR_RUNLOCK(&tracker);
520193219Srwatson}
521111888Sjlemon
522193219Srwatson/*
523193219Srwatson * Update the queue limit across per-workstream queues for a protocol.  We
524193219Srwatson * simply change the limits, and don't drain overflowed packets as they will
525193219Srwatson * (hopefully) take care of themselves shortly.
526193219Srwatson */
527193219Srwatsonint
528193219Srwatsonnetisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit)
529193219Srwatson{
530193219Srwatson	struct netisr_work *npwp;
531193219Srwatson#ifdef INVARIANTS
532193219Srwatson	const char *name;
533193219Srwatson#endif
534193219Srwatson	u_int i, proto;
535111888Sjlemon
536193219Srwatson	if (qlimit > netisr_maxqlimit)
537193219Srwatson		return (EINVAL);
538193219Srwatson
539193219Srwatson	proto = nhp->nh_proto;
540193219Srwatson#ifdef INVARIANTS
541193219Srwatson	name = nhp->nh_name;
542193219Srwatson#endif
543193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
544193219Srwatson	    ("%s(%u): protocol too big for %s", __func__, proto, name));
545193219Srwatson
546193219Srwatson	NETISR_WLOCK();
547204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
548193219Srwatson	    ("%s(%u): protocol not registered for %s", __func__, proto,
549193219Srwatson	    name));
550193219Srwatson
551204497Srwatson	netisr_proto[proto].np_qlimit = qlimit;
552209059Sjhb	CPU_FOREACH(i) {
553195019Srwatson		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
554193219Srwatson		npwp->nw_qlimit = qlimit;
555193219Srwatson	}
556193219Srwatson	NETISR_WUNLOCK();
557193219Srwatson	return (0);
558193219Srwatson}
559193219Srwatson
560111888Sjlemon/*
561193219Srwatson * Drain all packets currently held in a particular protocol work queue.
562120704Srwatson */
563120704Srwatsonstatic void
564193219Srwatsonnetisr_drain_proto(struct netisr_work *npwp)
565120704Srwatson{
566120704Srwatson	struct mbuf *m;
567120704Srwatson
568193219Srwatson	/*
569193219Srwatson	 * We would assert the lock on the workstream but it's not passed in.
570193219Srwatson	 */
571193219Srwatson	while ((m = npwp->nw_head) != NULL) {
572193219Srwatson		npwp->nw_head = m->m_nextpkt;
573193219Srwatson		m->m_nextpkt = NULL;
574193219Srwatson		if (npwp->nw_head == NULL)
575193219Srwatson			npwp->nw_tail = NULL;
576193219Srwatson		npwp->nw_len--;
577193219Srwatson		m_freem(m);
578193219Srwatson	}
579193219Srwatson	KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__));
580193219Srwatson	KASSERT(npwp->nw_len == 0, ("%s: len", __func__));
581193219Srwatson}
582193219Srwatson
583193219Srwatson/*
584193219Srwatson * Remove the registration of a network protocol, which requires clearing
585193219Srwatson * per-protocol fields across all workstreams, including freeing all mbufs in
586193219Srwatson * the queues at time of unregister.  All work in netisr is briefly suspended
587193219Srwatson * while this takes place.
588193219Srwatson */
589193219Srwatsonvoid
590193219Srwatsonnetisr_unregister(const struct netisr_handler *nhp)
591193219Srwatson{
592193219Srwatson	struct netisr_work *npwp;
593193219Srwatson#ifdef INVARIANTS
594193219Srwatson	const char *name;
595193219Srwatson#endif
596193219Srwatson	u_int i, proto;
597193219Srwatson
598193219Srwatson	proto = nhp->nh_proto;
599193219Srwatson#ifdef INVARIANTS
600193219Srwatson	name = nhp->nh_name;
601193219Srwatson#endif
602193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
603193219Srwatson	    ("%s(%u): protocol too big for %s", __func__, proto, name));
604193219Srwatson
605193219Srwatson	NETISR_WLOCK();
606204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
607193219Srwatson	    ("%s(%u): protocol not registered for %s", __func__, proto,
608193219Srwatson	    name));
609193219Srwatson
610204497Srwatson	netisr_proto[proto].np_name = NULL;
611204497Srwatson	netisr_proto[proto].np_handler = NULL;
612204497Srwatson	netisr_proto[proto].np_m2flow = NULL;
613204497Srwatson	netisr_proto[proto].np_m2cpuid = NULL;
614204497Srwatson	netisr_proto[proto].np_qlimit = 0;
615204497Srwatson	netisr_proto[proto].np_policy = 0;
616209059Sjhb	CPU_FOREACH(i) {
617195019Srwatson		npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto];
618193219Srwatson		netisr_drain_proto(npwp);
619193219Srwatson		bzero(npwp, sizeof(*npwp));
620193219Srwatson	}
621193219Srwatson	NETISR_WUNLOCK();
622193219Srwatson}
623193219Srwatson
624193219Srwatson/*
625222249Srwatson * Compose the global and per-protocol policies on dispatch, and return the
626222249Srwatson * dispatch policy to use.
627222249Srwatson */
628222249Srwatsonstatic u_int
629222249Srwatsonnetisr_get_dispatch(struct netisr_proto *npp)
630222249Srwatson{
631222249Srwatson
632222249Srwatson	/*
633222249Srwatson	 * Protocol-specific configuration overrides the global default.
634222249Srwatson	 */
635222249Srwatson	if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT)
636222249Srwatson		return (npp->np_dispatch);
637222249Srwatson	return (netisr_dispatch_policy);
638222249Srwatson}
639222249Srwatson
640222249Srwatson/*
641193219Srwatson * Look up the workstream given a packet and source identifier.  Do this by
642193219Srwatson * checking the protocol's policy, and optionally call out to the protocol
643193219Srwatson * for assistance if required.
644193219Srwatson */
645193219Srwatsonstatic struct mbuf *
646222249Srwatsonnetisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
647222249Srwatson    uintptr_t source, struct mbuf *m, u_int *cpuidp)
648193219Srwatson{
649193219Srwatson	struct ifnet *ifp;
650222249Srwatson	u_int policy;
651193219Srwatson
652193219Srwatson	NETISR_LOCK_ASSERT();
653193219Srwatson
654193219Srwatson	/*
655193219Srwatson	 * In the event we have only one worker, shortcut and deliver to it
656193219Srwatson	 * without further ado.
657193219Srwatson	 */
658193219Srwatson	if (nws_count == 1) {
659193219Srwatson		*cpuidp = nws_array[0];
660193219Srwatson		return (m);
661193219Srwatson	}
662193219Srwatson
663193219Srwatson	/*
664193219Srwatson	 * What happens next depends on the policy selected by the protocol.
665193219Srwatson	 * If we want to support per-interface policies, we should do that
666193219Srwatson	 * here first.
667193219Srwatson	 */
668222249Srwatson	policy = npp->np_policy;
669222249Srwatson	if (policy == NETISR_POLICY_CPU) {
670222249Srwatson		m = npp->np_m2cpuid(m, source, cpuidp);
671222249Srwatson		if (m == NULL)
672222249Srwatson			return (NULL);
673193219Srwatson
674222249Srwatson		/*
675222249Srwatson		 * It's possible for a protocol not to have a good idea about
676222249Srwatson		 * where to process a packet, in which case we fall back on
677222249Srwatson		 * the netisr code to decide.  In the hybrid case, return the
678222249Srwatson		 * current CPU ID, which will force an immediate direct
679222249Srwatson		 * dispatch.  In the queued case, fall back on the SOURCE
680222249Srwatson		 * policy.
681222249Srwatson		 */
682222249Srwatson		if (*cpuidp != NETISR_CPUID_NONE)
683222249Srwatson			return (m);
684222249Srwatson		if (dispatch_policy == NETISR_DISPATCH_HYBRID) {
685222249Srwatson			*cpuidp = curcpu;
686222249Srwatson			return (m);
687222249Srwatson		}
688222249Srwatson		policy = NETISR_POLICY_SOURCE;
689222249Srwatson	}
690222249Srwatson
691222249Srwatson	if (policy == NETISR_POLICY_FLOW) {
692281955Shiren		if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE &&
693281955Shiren		    npp->np_m2flow != NULL) {
694193219Srwatson			m = npp->np_m2flow(m, source);
695193219Srwatson			if (m == NULL)
696193219Srwatson				return (NULL);
697193219Srwatson		}
698281955Shiren		if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
699193219Srwatson			*cpuidp =
700193219Srwatson			    netisr_default_flow2cpu(m->m_pkthdr.flowid);
701193219Srwatson			return (m);
702193219Srwatson		}
703222249Srwatson		policy = NETISR_POLICY_SOURCE;
704222249Srwatson	}
705193219Srwatson
706222249Srwatson	KASSERT(policy == NETISR_POLICY_SOURCE,
707222249Srwatson	    ("%s: invalid policy %u for %s", __func__, npp->np_policy,
708222249Srwatson	    npp->np_name));
709193219Srwatson
710222249Srwatson	ifp = m->m_pkthdr.rcvif;
711222249Srwatson	if (ifp != NULL)
712222249Srwatson		*cpuidp = nws_array[(ifp->if_index + source) % nws_count];
713222249Srwatson	else
714222249Srwatson		*cpuidp = nws_array[source % nws_count];
715222249Srwatson	return (m);
716193219Srwatson}
717193219Srwatson
718193219Srwatson/*
719193219Srwatson * Process packets associated with a workstream and protocol.  For reasons of
720193219Srwatson * fairness, we process up to one complete netisr queue at a time, moving the
721193219Srwatson * queue to a stack-local queue for processing, but do not loop refreshing
722193219Srwatson * from the global queue.  The caller is responsible for deciding whether to
723193219Srwatson * loop, and for setting the NWS_RUNNING flag.  The passed workstream will be
724193219Srwatson * locked on entry and relocked before return, but will be released while
725193219Srwatson * processing.  The number of packets processed is returned.
726193219Srwatson */
727193219Srwatsonstatic u_int
728193219Srwatsonnetisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto)
729193219Srwatson{
730193219Srwatson	struct netisr_work local_npw, *npwp;
731193219Srwatson	u_int handled;
732193219Srwatson	struct mbuf *m;
733193219Srwatson
734193219Srwatson	NETISR_LOCK_ASSERT();
735193219Srwatson	NWS_LOCK_ASSERT(nwsp);
736193219Srwatson
737193219Srwatson	KASSERT(nwsp->nws_flags & NWS_RUNNING,
738193219Srwatson	    ("%s(%u): not running", __func__, proto));
739193219Srwatson	KASSERT(proto >= 0 && proto < NETISR_MAXPROT,
740193219Srwatson	    ("%s(%u): invalid proto\n", __func__, proto));
741193219Srwatson
742193219Srwatson	npwp = &nwsp->nws_work[proto];
743193219Srwatson	if (npwp->nw_len == 0)
744193219Srwatson		return (0);
745193219Srwatson
746193219Srwatson	/*
747193219Srwatson	 * Move the global work queue to a thread-local work queue.
748193219Srwatson	 *
749193219Srwatson	 * Notice that this means the effective maximum length of the queue
750193219Srwatson	 * is actually twice that of the maximum queue length specified in
751193219Srwatson	 * the protocol registration call.
752193219Srwatson	 */
753193219Srwatson	handled = npwp->nw_len;
754193219Srwatson	local_npw = *npwp;
755193219Srwatson	npwp->nw_head = NULL;
756193219Srwatson	npwp->nw_tail = NULL;
757193219Srwatson	npwp->nw_len = 0;
758193219Srwatson	nwsp->nws_pendingbits &= ~(1 << proto);
759193219Srwatson	NWS_UNLOCK(nwsp);
760193219Srwatson	while ((m = local_npw.nw_head) != NULL) {
761193219Srwatson		local_npw.nw_head = m->m_nextpkt;
762193219Srwatson		m->m_nextpkt = NULL;
763193219Srwatson		if (local_npw.nw_head == NULL)
764193219Srwatson			local_npw.nw_tail = NULL;
765193219Srwatson		local_npw.nw_len--;
766218559Sbz		VNET_ASSERT(m->m_pkthdr.rcvif != NULL,
767218559Sbz		    ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m));
768191816Szec		CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
769204497Srwatson		netisr_proto[proto].np_handler(m);
770191816Szec		CURVNET_RESTORE();
771120704Srwatson	}
772193219Srwatson	KASSERT(local_npw.nw_len == 0,
773193219Srwatson	    ("%s(%u): len %u", __func__, proto, local_npw.nw_len));
774204497Srwatson	if (netisr_proto[proto].np_drainedcpu)
775204497Srwatson		netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu);
776193219Srwatson	NWS_LOCK(nwsp);
777193219Srwatson	npwp->nw_handled += handled;
778193219Srwatson	return (handled);
779120704Srwatson}
780120704Srwatson
781120704Srwatson/*
782200898Srwatson * SWI handler for netisr -- processes packets in a set of workstreams that
783193219Srwatson * it owns, woken up by calls to NWS_SIGNAL().  If this workstream is already
784193219Srwatson * being direct dispatched, go back to sleep and wait for the dispatching
785193219Srwatson * thread to wake us up again.
786111888Sjlemon */
787193219Srwatsonstatic void
788193219Srwatsonswi_net(void *arg)
789103781Sjake{
790193219Srwatson#ifdef NETISR_LOCKING
791193219Srwatson	struct rm_priotracker tracker;
792193219Srwatson#endif
793193219Srwatson	struct netisr_workstream *nwsp;
794193219Srwatson	u_int bits, prot;
795193219Srwatson
796193219Srwatson	nwsp = arg;
797193219Srwatson
798193219Srwatson#ifdef DEVICE_POLLING
799193219Srwatson	KASSERT(nws_count == 1,
800193219Srwatson	    ("%s: device_polling but nws_count != 1", __func__));
801193219Srwatson	netisr_poll();
802193219Srwatson#endif
803193219Srwatson#ifdef NETISR_LOCKING
804193219Srwatson	NETISR_RLOCK(&tracker);
805193219Srwatson#endif
806193219Srwatson	NWS_LOCK(nwsp);
807193219Srwatson	KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running"));
808193219Srwatson	if (nwsp->nws_flags & NWS_DISPATCHING)
809193219Srwatson		goto out;
810193219Srwatson	nwsp->nws_flags |= NWS_RUNNING;
811193219Srwatson	nwsp->nws_flags &= ~NWS_SCHEDULED;
812193219Srwatson	while ((bits = nwsp->nws_pendingbits) != 0) {
813193219Srwatson		while ((prot = ffs(bits)) != 0) {
814193219Srwatson			prot--;
815193219Srwatson			bits &= ~(1 << prot);
816193219Srwatson			(void)netisr_process_workstream_proto(nwsp, prot);
817193219Srwatson		}
818112011Sjlemon	}
819193219Srwatson	nwsp->nws_flags &= ~NWS_RUNNING;
820193219Srwatsonout:
821193219Srwatson	NWS_UNLOCK(nwsp);
822193219Srwatson#ifdef NETISR_LOCKING
823193219Srwatson	NETISR_RUNLOCK(&tracker);
824193219Srwatson#endif
825193219Srwatson#ifdef DEVICE_POLLING
826193219Srwatson	netisr_pollmore();
827193219Srwatson#endif
828193219Srwatson}
829180239Srwatson
830193219Srwatsonstatic int
831193219Srwatsonnetisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto,
832193219Srwatson    struct netisr_work *npwp, struct mbuf *m, int *dosignalp)
833193219Srwatson{
834193219Srwatson
835193219Srwatson	NWS_LOCK_ASSERT(nwsp);
836193219Srwatson
837193219Srwatson	*dosignalp = 0;
838193219Srwatson	if (npwp->nw_len < npwp->nw_qlimit) {
839193219Srwatson		m->m_nextpkt = NULL;
840193219Srwatson		if (npwp->nw_head == NULL) {
841193219Srwatson			npwp->nw_head = m;
842193219Srwatson			npwp->nw_tail = m;
843193219Srwatson		} else {
844193219Srwatson			npwp->nw_tail->m_nextpkt = m;
845193219Srwatson			npwp->nw_tail = m;
846193219Srwatson		}
847193219Srwatson		npwp->nw_len++;
848193219Srwatson		if (npwp->nw_len > npwp->nw_watermark)
849193219Srwatson			npwp->nw_watermark = npwp->nw_len;
850200898Srwatson
851200898Srwatson		/*
852200898Srwatson		 * We must set the bit regardless of NWS_RUNNING, so that
853200898Srwatson		 * swi_net() keeps calling netisr_process_workstream_proto().
854200898Srwatson		 */
855193219Srwatson		nwsp->nws_pendingbits |= (1 << proto);
856193219Srwatson		if (!(nwsp->nws_flags &
857193219Srwatson		    (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) {
858193219Srwatson			nwsp->nws_flags |= NWS_SCHEDULED;
859193219Srwatson			*dosignalp = 1;	/* Defer until unlocked. */
860193219Srwatson		}
861193219Srwatson		npwp->nw_queued++;
862193219Srwatson		return (0);
863111888Sjlemon	} else {
864195182Sbz		m_freem(m);
865193219Srwatson		npwp->nw_qdrops++;
866193219Srwatson		return (ENOBUFS);
867103781Sjake	}
868103781Sjake}
869103781Sjake
870193219Srwatsonstatic int
871193219Srwatsonnetisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid)
872193219Srwatson{
873193219Srwatson	struct netisr_workstream *nwsp;
874193219Srwatson	struct netisr_work *npwp;
875193219Srwatson	int dosignal, error;
876193219Srwatson
877193219Srwatson#ifdef NETISR_LOCKING
878193219Srwatson	NETISR_LOCK_ASSERT();
879193219Srwatson#endif
880195078Srwatson	KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__,
881195078Srwatson	    cpuid, mp_maxid));
882195019Srwatson	KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid));
883193219Srwatson
884193219Srwatson	dosignal = 0;
885193219Srwatson	error = 0;
886195019Srwatson	nwsp = DPCPU_ID_PTR(cpuid, nws);
887193219Srwatson	npwp = &nwsp->nws_work[proto];
888193219Srwatson	NWS_LOCK(nwsp);
889193219Srwatson	error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal);
890193219Srwatson	NWS_UNLOCK(nwsp);
891193219Srwatson	if (dosignal)
892193219Srwatson		NWS_SIGNAL(nwsp);
893193219Srwatson	return (error);
894193219Srwatson}
895193219Srwatson
896193219Srwatsonint
897193219Srwatsonnetisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m)
898193219Srwatson{
899193219Srwatson#ifdef NETISR_LOCKING
900193219Srwatson	struct rm_priotracker tracker;
901193219Srwatson#endif
902193219Srwatson	u_int cpuid;
903193219Srwatson	int error;
904193219Srwatson
905193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
906193219Srwatson	    ("%s: invalid proto %u", __func__, proto));
907193219Srwatson
908193219Srwatson#ifdef NETISR_LOCKING
909193219Srwatson	NETISR_RLOCK(&tracker);
910193219Srwatson#endif
911204497Srwatson	KASSERT(netisr_proto[proto].np_handler != NULL,
912193219Srwatson	    ("%s: invalid proto %u", __func__, proto));
913193219Srwatson
914222249Srwatson	m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED,
915222249Srwatson	    source, m, &cpuid);
916195019Srwatson	if (m != NULL) {
917195019Srwatson		KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__,
918195019Srwatson		    cpuid));
919193219Srwatson		error = netisr_queue_internal(proto, m, cpuid);
920195019Srwatson	} else
921193219Srwatson		error = ENOBUFS;
922193219Srwatson#ifdef NETISR_LOCKING
923193219Srwatson	NETISR_RUNLOCK(&tracker);
924193219Srwatson#endif
925193219Srwatson	return (error);
926193219Srwatson}
927193219Srwatson
928193219Srwatsonint
929193219Srwatsonnetisr_queue(u_int proto, struct mbuf *m)
930193219Srwatson{
931193219Srwatson
932193219Srwatson	return (netisr_queue_src(proto, 0, m));
933193219Srwatson}
934193219Srwatson
935111888Sjlemon/*
936200898Srwatson * Dispatch a packet for netisr processing; direct dispatch is permitted by
937193219Srwatson * calling context.
938111888Sjlemon */
939111888Sjlemonint
940193219Srwatsonnetisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m)
941111888Sjlemon{
942193219Srwatson#ifdef NETISR_LOCKING
943193219Srwatson	struct rm_priotracker tracker;
944193219Srwatson#endif
945193219Srwatson	struct netisr_workstream *nwsp;
946222249Srwatson	struct netisr_proto *npp;
947193219Srwatson	struct netisr_work *npwp;
948193219Srwatson	int dosignal, error;
949222249Srwatson	u_int cpuid, dispatch_policy;
950193219Srwatson
951193219Srwatson	KASSERT(proto < NETISR_MAXPROT,
952193219Srwatson	    ("%s: invalid proto %u", __func__, proto));
953193219Srwatson#ifdef NETISR_LOCKING
954193219Srwatson	NETISR_RLOCK(&tracker);
955193219Srwatson#endif
956222249Srwatson	npp = &netisr_proto[proto];
957222249Srwatson	KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__,
958222249Srwatson	    proto));
959193219Srwatson
960222249Srwatson	dispatch_policy = netisr_get_dispatch(npp);
961222249Srwatson	if (dispatch_policy == NETISR_DISPATCH_DEFERRED)
962222249Srwatson		return (netisr_queue_src(proto, source, m));
963222249Srwatson
964193219Srwatson	/*
965193219Srwatson	 * If direct dispatch is forced, then unconditionally dispatch
966193219Srwatson	 * without a formal CPU selection.  Borrow the current CPU's stats,
967193219Srwatson	 * even if there's no worker on it.  In this case we don't update
968193219Srwatson	 * nws_flags because all netisr processing will be source ordered due
969193219Srwatson	 * to always being forced to directly dispatch.
970193219Srwatson	 */
971222249Srwatson	if (dispatch_policy == NETISR_DISPATCH_DIRECT) {
972195019Srwatson		nwsp = DPCPU_PTR(nws);
973193219Srwatson		npwp = &nwsp->nws_work[proto];
974193219Srwatson		npwp->nw_dispatched++;
975193219Srwatson		npwp->nw_handled++;
976204497Srwatson		netisr_proto[proto].np_handler(m);
977193219Srwatson		error = 0;
978193219Srwatson		goto out_unlock;
979112011Sjlemon	}
980193219Srwatson
981222249Srwatson	KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID,
982222249Srwatson	    ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy));
983222249Srwatson
984193219Srwatson	/*
985193219Srwatson	 * Otherwise, we execute in a hybrid mode where we will try to direct
986193219Srwatson	 * dispatch if we're on the right CPU and the netisr worker isn't
987193219Srwatson	 * already running.
988193219Srwatson	 */
989222249Srwatson	sched_pin();
990222249Srwatson	m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID,
991222249Srwatson	    source, m, &cpuid);
992193219Srwatson	if (m == NULL) {
993193219Srwatson		error = ENOBUFS;
994222249Srwatson		goto out_unpin;
995193219Srwatson	}
996195019Srwatson	KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid));
997193219Srwatson	if (cpuid != curcpu)
998193219Srwatson		goto queue_fallback;
999195019Srwatson	nwsp = DPCPU_PTR(nws);
1000193219Srwatson	npwp = &nwsp->nws_work[proto];
1001193219Srwatson
1002193219Srwatson	/*-
1003193219Srwatson	 * We are willing to direct dispatch only if three conditions hold:
1004193219Srwatson	 *
1005193219Srwatson	 * (1) The netisr worker isn't already running,
1006193219Srwatson	 * (2) Another thread isn't already directly dispatching, and
1007193219Srwatson	 * (3) The netisr hasn't already been woken up.
1008193219Srwatson	 */
1009193219Srwatson	NWS_LOCK(nwsp);
1010193219Srwatson	if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) {
1011193219Srwatson		error = netisr_queue_workstream(nwsp, proto, npwp, m,
1012193219Srwatson		    &dosignal);
1013195019Srwatson		NWS_UNLOCK(nwsp);
1014193219Srwatson		if (dosignal)
1015193219Srwatson			NWS_SIGNAL(nwsp);
1016193219Srwatson		goto out_unpin;
1017193219Srwatson	}
1018193219Srwatson
1019193219Srwatson	/*
1020193219Srwatson	 * The current thread is now effectively the netisr worker, so set
1021193219Srwatson	 * the dispatching flag to prevent concurrent processing of the
1022193219Srwatson	 * stream from another thread (even the netisr worker), which could
1023193219Srwatson	 * otherwise lead to effective misordering of the stream.
1024193219Srwatson	 */
1025193219Srwatson	nwsp->nws_flags |= NWS_DISPATCHING;
1026193219Srwatson	NWS_UNLOCK(nwsp);
1027204497Srwatson	netisr_proto[proto].np_handler(m);
1028193219Srwatson	NWS_LOCK(nwsp);
1029193219Srwatson	nwsp->nws_flags &= ~NWS_DISPATCHING;
1030193219Srwatson	npwp->nw_handled++;
1031193219Srwatson	npwp->nw_hybrid_dispatched++;
1032193219Srwatson
1033193219Srwatson	/*
1034193219Srwatson	 * If other work was enqueued by another thread while we were direct
1035193219Srwatson	 * dispatching, we need to signal the netisr worker to do that work.
1036193219Srwatson	 * In the future, we might want to do some of that work in the
1037193219Srwatson	 * current thread, rather than trigger further context switches.  If
1038193219Srwatson	 * so, we'll want to establish a reasonable bound on the work done in
1039193219Srwatson	 * the "borrowed" context.
1040193219Srwatson	 */
1041193219Srwatson	if (nwsp->nws_pendingbits != 0) {
1042193219Srwatson		nwsp->nws_flags |= NWS_SCHEDULED;
1043193219Srwatson		dosignal = 1;
1044193219Srwatson	} else
1045193219Srwatson		dosignal = 0;
1046193219Srwatson	NWS_UNLOCK(nwsp);
1047193219Srwatson	if (dosignal)
1048193219Srwatson		NWS_SIGNAL(nwsp);
1049193219Srwatson	error = 0;
1050193219Srwatson	goto out_unpin;
1051193219Srwatson
1052193219Srwatsonqueue_fallback:
1053193219Srwatson	error = netisr_queue_internal(proto, m, cpuid);
1054193219Srwatsonout_unpin:
1055193219Srwatson	sched_unpin();
1056193219Srwatsonout_unlock:
1057193219Srwatson#ifdef NETISR_LOCKING
1058193219Srwatson	NETISR_RUNLOCK(&tracker);
1059193219Srwatson#endif
1060193219Srwatson	return (error);
1061111888Sjlemon}
1062103781Sjake
1063193219Srwatsonint
1064193219Srwatsonnetisr_dispatch(u_int proto, struct mbuf *m)
1065193219Srwatson{
1066193219Srwatson
1067193219Srwatson	return (netisr_dispatch_src(proto, 0, m));
1068193219Srwatson}
1069193219Srwatson
1070193219Srwatson#ifdef DEVICE_POLLING
1071193219Srwatson/*
1072193219Srwatson * Kernel polling borrows a netisr thread to run interface polling in; this
1073193219Srwatson * function allows kernel polling to request that the netisr thread be
1074193219Srwatson * scheduled even if no packets are pending for protocols.
1075193219Srwatson */
1076193219Srwatsonvoid
1077193219Srwatsonnetisr_sched_poll(void)
1078193219Srwatson{
1079193219Srwatson	struct netisr_workstream *nwsp;
1080193219Srwatson
1081195019Srwatson	nwsp = DPCPU_ID_PTR(nws_array[0], nws);
1082193219Srwatson	NWS_SIGNAL(nwsp);
1083193219Srwatson}
1084193219Srwatson#endif
1085193219Srwatson
1086103781Sjakestatic void
1087193219Srwatsonnetisr_start_swi(u_int cpuid, struct pcpu *pc)
1088103781Sjake{
1089193219Srwatson	char swiname[12];
1090193219Srwatson	struct netisr_workstream *nwsp;
1091193219Srwatson	int error;
1092193219Srwatson
1093195019Srwatson	KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid));
1094195019Srwatson
1095195019Srwatson	nwsp = DPCPU_ID_PTR(cpuid, nws);
1096193219Srwatson	mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF);
1097193219Srwatson	nwsp->nws_cpu = cpuid;
1098193219Srwatson	snprintf(swiname, sizeof(swiname), "netisr %u", cpuid);
1099193219Srwatson	error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp,
1100193219Srwatson	    SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie);
1101193219Srwatson	if (error)
1102193219Srwatson		panic("%s: swi_add %d", __func__, error);
1103193219Srwatson	pc->pc_netisr = nwsp->nws_intr_event;
1104193219Srwatson	if (netisr_bindthreads) {
1105193219Srwatson		error = intr_event_bind(nwsp->nws_intr_event, cpuid);
1106193219Srwatson		if (error != 0)
1107193219Srwatson			printf("%s: cpu %u: intr_event_bind: %d", __func__,
1108193219Srwatson			    cpuid, error);
1109193219Srwatson	}
1110193219Srwatson	NETISR_WLOCK();
1111193219Srwatson	nws_array[nws_count] = nwsp->nws_cpu;
1112193219Srwatson	nws_count++;
1113193219Srwatson	NETISR_WUNLOCK();
1114193219Srwatson}
1115193219Srwatson
1116193219Srwatson/*
1117193219Srwatson * Initialize the netisr subsystem.  We rely on BSS and static initialization
1118193219Srwatson * of most fields in global data structures.
1119193219Srwatson *
1120193219Srwatson * Start a worker thread for the boot CPU so that we can support network
1121193219Srwatson * traffic immediately in case the network stack is used before additional
1122193219Srwatson * CPUs are started (for example, diskless boot).
1123193219Srwatson */
1124193219Srwatsonstatic void
1125193219Srwatsonnetisr_init(void *arg)
1126193219Srwatson{
1127222249Srwatson	char tmp[NETISR_DISPATCH_POLICY_MAXSTR];
1128222249Srwatson	u_int dispatch_policy;
1129222249Srwatson	int error;
1130193219Srwatson
1131193219Srwatson	KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__));
1132193219Srwatson
1133193219Srwatson	NETISR_LOCK_INIT();
1134282832Shiren	if (netisr_maxthreads == 0 || netisr_maxthreads < -1 )
1135282832Shiren		netisr_maxthreads = 1;		/* default behavior */
1136282832Shiren	else if (netisr_maxthreads == -1)
1137282832Shiren		netisr_maxthreads = mp_ncpus;	/* use max cpus */
1138195078Srwatson	if (netisr_maxthreads > mp_ncpus) {
1139200899Srwatson		printf("netisr_init: forcing maxthreads from %d to %d\n",
1140195078Srwatson		    netisr_maxthreads, mp_ncpus);
1141195078Srwatson		netisr_maxthreads = mp_ncpus;
1142193230Srwatson	}
1143193230Srwatson	if (netisr_defaultqlimit > netisr_maxqlimit) {
1144200899Srwatson		printf("netisr_init: forcing defaultqlimit from %d to %d\n",
1145195078Srwatson		    netisr_defaultqlimit, netisr_maxqlimit);
1146193219Srwatson		netisr_defaultqlimit = netisr_maxqlimit;
1147193230Srwatson	}
1148103781Sjake#ifdef DEVICE_POLLING
1149193219Srwatson	/*
1150193219Srwatson	 * The device polling code is not yet aware of how to deal with
1151193219Srwatson	 * multiple netisr threads, so for the time being compiling in device
1152193219Srwatson	 * polling disables parallel netisr workers.
1153193219Srwatson	 */
1154193230Srwatson	if (netisr_maxthreads != 1 || netisr_bindthreads != 0) {
1155200899Srwatson		printf("netisr_init: forcing maxthreads to 1 and "
1156200899Srwatson		    "bindthreads to 0 for device polling\n");
1157193230Srwatson		netisr_maxthreads = 1;
1158193230Srwatson		netisr_bindthreads = 0;
1159193230Srwatson	}
1160103781Sjake#endif
1161111888Sjlemon
1162222249Srwatson	if (TUNABLE_STR_FETCH("net.isr.dispatch", tmp, sizeof(tmp))) {
1163222249Srwatson		error = netisr_dispatch_policy_from_str(tmp,
1164222249Srwatson		    &dispatch_policy);
1165222249Srwatson		if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT)
1166222249Srwatson			error = EINVAL;
1167255329Sdavide		if (error == 0)
1168222249Srwatson			netisr_dispatch_policy = dispatch_policy;
1169255329Sdavide		else
1170222249Srwatson			printf(
1171222249Srwatson			    "%s: invalid dispatch policy %s, using default\n",
1172222249Srwatson			    __func__, tmp);
1173222249Srwatson	}
1174222249Srwatson
1175193219Srwatson	netisr_start_swi(curcpu, pcpu_find(curcpu));
1176103781Sjake}
1177193219SrwatsonSYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL);
1178103781Sjake
1179193219Srwatson/*
1180193219Srwatson * Start worker threads for additional CPUs.  No attempt to gracefully handle
1181193219Srwatson * work reassignment, we don't yet support dynamic reconfiguration.
1182193219Srwatson */
1183103781Sjakestatic void
1184193219Srwatsonnetisr_start(void *arg)
1185103781Sjake{
1186193219Srwatson	struct pcpu *pc;
1187103781Sjake
1188222531Snwhitehorn	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
1189193219Srwatson		if (nws_count >= netisr_maxthreads)
1190193219Srwatson			break;
1191193219Srwatson		/* XXXRW: Is skipping absent CPUs still required here? */
1192193219Srwatson		if (CPU_ABSENT(pc->pc_cpuid))
1193193219Srwatson			continue;
1194193219Srwatson		/* Worker will already be present for boot CPU. */
1195193219Srwatson		if (pc->pc_netisr != NULL)
1196193219Srwatson			continue;
1197193219Srwatson		netisr_start_swi(pc->pc_cpuid, pc);
1198193219Srwatson	}
1199103781Sjake}
1200193219SrwatsonSYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL);
1201193219Srwatson
1202204199Srwatson/*
1203204199Srwatson * Sysctl monitoring for netisr: query a list of registered protocols.
1204204199Srwatson */
1205204199Srwatsonstatic int
1206204199Srwatsonsysctl_netisr_proto(SYSCTL_HANDLER_ARGS)
1207204199Srwatson{
1208204199Srwatson	struct rm_priotracker tracker;
1209204199Srwatson	struct sysctl_netisr_proto *snpp, *snp_array;
1210204199Srwatson	struct netisr_proto *npp;
1211204199Srwatson	u_int counter, proto;
1212204199Srwatson	int error;
1213204199Srwatson
1214204199Srwatson	if (req->newptr != NULL)
1215204199Srwatson		return (EINVAL);
1216204199Srwatson	snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP,
1217204199Srwatson	    M_ZERO | M_WAITOK);
1218204199Srwatson	counter = 0;
1219204199Srwatson	NETISR_RLOCK(&tracker);
1220204199Srwatson	for (proto = 0; proto < NETISR_MAXPROT; proto++) {
1221204497Srwatson		npp = &netisr_proto[proto];
1222204199Srwatson		if (npp->np_name == NULL)
1223204199Srwatson			continue;
1224204199Srwatson		snpp = &snp_array[counter];
1225204199Srwatson		snpp->snp_version = sizeof(*snpp);
1226204199Srwatson		strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN);
1227204199Srwatson		snpp->snp_proto = proto;
1228204199Srwatson		snpp->snp_qlimit = npp->np_qlimit;
1229204199Srwatson		snpp->snp_policy = npp->np_policy;
1230222249Srwatson		snpp->snp_dispatch = npp->np_dispatch;
1231204199Srwatson		if (npp->np_m2flow != NULL)
1232204199Srwatson			snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW;
1233204199Srwatson		if (npp->np_m2cpuid != NULL)
1234204199Srwatson			snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID;
1235204199Srwatson		if (npp->np_drainedcpu != NULL)
1236204199Srwatson			snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU;
1237204199Srwatson		counter++;
1238204199Srwatson	}
1239204199Srwatson	NETISR_RUNLOCK(&tracker);
1240204303Srwatson	KASSERT(counter <= NETISR_MAXPROT,
1241204199Srwatson	    ("sysctl_netisr_proto: counter too big (%d)", counter));
1242204199Srwatson	error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter);
1243204199Srwatson	free(snp_array, M_TEMP);
1244204199Srwatson	return (error);
1245204199Srwatson}
1246204199Srwatson
1247204199SrwatsonSYSCTL_PROC(_net_isr, OID_AUTO, proto,
1248204199Srwatson    CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto,
1249204199Srwatson    "S,sysctl_netisr_proto",
1250204199Srwatson    "Return list of protocols registered with netisr");
1251204199Srwatson
1252204199Srwatson/*
1253204199Srwatson * Sysctl monitoring for netisr: query a list of workstreams.
1254204199Srwatson */
1255204199Srwatsonstatic int
1256204199Srwatsonsysctl_netisr_workstream(SYSCTL_HANDLER_ARGS)
1257204199Srwatson{
1258204199Srwatson	struct rm_priotracker tracker;
1259204199Srwatson	struct sysctl_netisr_workstream *snwsp, *snws_array;
1260204199Srwatson	struct netisr_workstream *nwsp;
1261204199Srwatson	u_int counter, cpuid;
1262204199Srwatson	int error;
1263204199Srwatson
1264204199Srwatson	if (req->newptr != NULL)
1265204199Srwatson		return (EINVAL);
1266204199Srwatson	snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP,
1267204199Srwatson	    M_ZERO | M_WAITOK);
1268204199Srwatson	counter = 0;
1269204199Srwatson	NETISR_RLOCK(&tracker);
1270209059Sjhb	CPU_FOREACH(cpuid) {
1271204199Srwatson		nwsp = DPCPU_ID_PTR(cpuid, nws);
1272204199Srwatson		if (nwsp->nws_intr_event == NULL)
1273204199Srwatson			continue;
1274204199Srwatson		NWS_LOCK(nwsp);
1275204199Srwatson		snwsp = &snws_array[counter];
1276204199Srwatson		snwsp->snws_version = sizeof(*snwsp);
1277204199Srwatson
1278204199Srwatson		/*
1279204199Srwatson		 * For now, we equate workstream IDs and CPU IDs in the
1280204199Srwatson		 * kernel, but expose them independently to userspace in case
1281204199Srwatson		 * that assumption changes in the future.
1282204199Srwatson		 */
1283204199Srwatson		snwsp->snws_wsid = cpuid;
1284204199Srwatson		snwsp->snws_cpu = cpuid;
1285204199Srwatson		if (nwsp->nws_intr_event != NULL)
1286204199Srwatson			snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR;
1287204199Srwatson		NWS_UNLOCK(nwsp);
1288204199Srwatson		counter++;
1289204199Srwatson	}
1290204199Srwatson	NETISR_RUNLOCK(&tracker);
1291204303Srwatson	KASSERT(counter <= MAXCPU,
1292204199Srwatson	    ("sysctl_netisr_workstream: counter too big (%d)", counter));
1293204199Srwatson	error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter);
1294204199Srwatson	free(snws_array, M_TEMP);
1295204199Srwatson	return (error);
1296204199Srwatson}
1297204199Srwatson
1298204199SrwatsonSYSCTL_PROC(_net_isr, OID_AUTO, workstream,
1299204199Srwatson    CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream,
1300204199Srwatson    "S,sysctl_netisr_workstream",
1301204199Srwatson    "Return list of workstreams implemented by netisr");
1302204199Srwatson
1303204199Srwatson/*
1304204199Srwatson * Sysctl monitoring for netisr: query per-protocol data across all
1305204199Srwatson * workstreams.
1306204199Srwatson */
1307204199Srwatsonstatic int
1308204199Srwatsonsysctl_netisr_work(SYSCTL_HANDLER_ARGS)
1309204199Srwatson{
1310204199Srwatson	struct rm_priotracker tracker;
1311204199Srwatson	struct sysctl_netisr_work *snwp, *snw_array;
1312204199Srwatson	struct netisr_workstream *nwsp;
1313204199Srwatson	struct netisr_proto *npp;
1314204199Srwatson	struct netisr_work *nwp;
1315204199Srwatson	u_int counter, cpuid, proto;
1316204199Srwatson	int error;
1317204199Srwatson
1318204199Srwatson	if (req->newptr != NULL)
1319204199Srwatson		return (EINVAL);
1320204199Srwatson	snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT,
1321204199Srwatson	    M_TEMP, M_ZERO | M_WAITOK);
1322204199Srwatson	counter = 0;
1323204199Srwatson	NETISR_RLOCK(&tracker);
1324209059Sjhb	CPU_FOREACH(cpuid) {
1325204199Srwatson		nwsp = DPCPU_ID_PTR(cpuid, nws);
1326204199Srwatson		if (nwsp->nws_intr_event == NULL)
1327204199Srwatson			continue;
1328204199Srwatson		NWS_LOCK(nwsp);
1329204199Srwatson		for (proto = 0; proto < NETISR_MAXPROT; proto++) {
1330204497Srwatson			npp = &netisr_proto[proto];
1331204199Srwatson			if (npp->np_name == NULL)
1332204199Srwatson				continue;
1333204199Srwatson			nwp = &nwsp->nws_work[proto];
1334204199Srwatson			snwp = &snw_array[counter];
1335204199Srwatson			snwp->snw_version = sizeof(*snwp);
1336204199Srwatson			snwp->snw_wsid = cpuid;		/* See comment above. */
1337204199Srwatson			snwp->snw_proto = proto;
1338204199Srwatson			snwp->snw_len = nwp->nw_len;
1339204199Srwatson			snwp->snw_watermark = nwp->nw_watermark;
1340204199Srwatson			snwp->snw_dispatched = nwp->nw_dispatched;
1341204199Srwatson			snwp->snw_hybrid_dispatched =
1342204199Srwatson			    nwp->nw_hybrid_dispatched;
1343204199Srwatson			snwp->snw_qdrops = nwp->nw_qdrops;
1344204199Srwatson			snwp->snw_queued = nwp->nw_queued;
1345204199Srwatson			snwp->snw_handled = nwp->nw_handled;
1346204199Srwatson			counter++;
1347204199Srwatson		}
1348204199Srwatson		NWS_UNLOCK(nwsp);
1349204199Srwatson	}
1350204303Srwatson	KASSERT(counter <= MAXCPU * NETISR_MAXPROT,
1351204199Srwatson	    ("sysctl_netisr_work: counter too big (%d)", counter));
1352204199Srwatson	NETISR_RUNLOCK(&tracker);
1353204199Srwatson	error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter);
1354204199Srwatson	free(snw_array, M_TEMP);
1355204199Srwatson	return (error);
1356204199Srwatson}
1357204199Srwatson
1358204199SrwatsonSYSCTL_PROC(_net_isr, OID_AUTO, work,
1359204199Srwatson    CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work,
1360204199Srwatson    "S,sysctl_netisr_work",
1361204199Srwatson    "Return list of per-workstream, per-protocol work in netisr");
1362204199Srwatson
1363193219Srwatson#ifdef DDB
1364193219SrwatsonDB_SHOW_COMMAND(netisr, db_show_netisr)
1365193219Srwatson{
1366193219Srwatson	struct netisr_workstream *nwsp;
1367193219Srwatson	struct netisr_work *nwp;
1368193219Srwatson	int first, proto;
1369195019Srwatson	u_int cpuid;
1370193219Srwatson
1371193219Srwatson	db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto",
1372193219Srwatson	    "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue");
1373209059Sjhb	CPU_FOREACH(cpuid) {
1374195019Srwatson		nwsp = DPCPU_ID_PTR(cpuid, nws);
1375193219Srwatson		if (nwsp->nws_intr_event == NULL)
1376193219Srwatson			continue;
1377193219Srwatson		first = 1;
1378193219Srwatson		for (proto = 0; proto < NETISR_MAXPROT; proto++) {
1379204497Srwatson			if (netisr_proto[proto].np_handler == NULL)
1380193219Srwatson				continue;
1381193219Srwatson			nwp = &nwsp->nws_work[proto];
1382193219Srwatson			if (first) {
1383195019Srwatson				db_printf("%3d ", cpuid);
1384193219Srwatson				first = 0;
1385193219Srwatson			} else
1386193219Srwatson				db_printf("%3s ", "");
1387193219Srwatson			db_printf(
1388193219Srwatson			    "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n",
1389204497Srwatson			    netisr_proto[proto].np_name, nwp->nw_len,
1390193219Srwatson			    nwp->nw_watermark, nwp->nw_qlimit,
1391193219Srwatson			    nwp->nw_dispatched, nwp->nw_hybrid_dispatched,
1392193219Srwatson			    nwp->nw_qdrops, nwp->nw_queued);
1393193219Srwatson		}
1394193219Srwatson	}
1395193219Srwatson}
1396193219Srwatson#endif
1397