netisr.c revision 134443
1111888Sjlemon/*-
2111888Sjlemon * Copyright (c) 2001,2002,2003 Jonathan Lemon <jlemon@FreeBSD.org>
3103781Sjake * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
4103781Sjake * All rights reserved.
5103781Sjake *
6103781Sjake * Redistribution and use in source and binary forms, with or without
7103781Sjake * modification, are permitted provided that the following conditions
8103781Sjake * are met:
9103781Sjake * 1. Redistributions of source code must retain the above copyright
10111888Sjlemon *    notice, this list of conditions and the following disclaimer.
11103781Sjake * 2. Redistributions in binary form must reproduce the above copyright
12103781Sjake *    notice, this list of conditions and the following disclaimer in the
13103781Sjake *    documentation and/or other materials provided with the distribution.
14103781Sjake *
15111888Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16111888Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17111888Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18111888Sjlemon * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19111888Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20111888Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21111888Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22111888Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23111888Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24111888Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25111888Sjlemon * SUCH DAMAGE.
26103781Sjake *
27103781Sjake * $FreeBSD: head/sys/net/netisr.c 134443 2004-08-28 15:11:13Z rwatson $
28103781Sjake */
29103781Sjake
30134443Srwatson#include "opt_net.h"
31134443Srwatson
32103781Sjake#include <sys/param.h>
33111888Sjlemon#include <sys/bus.h>
34111888Sjlemon#include <sys/rtprio.h>
35103781Sjake#include <sys/systm.h>
36103781Sjake#include <sys/interrupt.h>
37103781Sjake#include <sys/kernel.h>
38111888Sjlemon#include <sys/kthread.h>
39111888Sjlemon#include <sys/lock.h>
40111888Sjlemon#include <sys/malloc.h>
41111888Sjlemon#include <sys/proc.h>
42111888Sjlemon#include <sys/random.h>
43111888Sjlemon#include <sys/resourcevar.h>
44111888Sjlemon#include <sys/sysctl.h>
45111888Sjlemon#include <sys/unistd.h>
46111888Sjlemon#include <machine/atomic.h>
47111888Sjlemon#include <machine/cpu.h>
48111888Sjlemon#include <machine/stdarg.h>
49103781Sjake
50111888Sjlemon#include <sys/mbuf.h>
51111888Sjlemon#include <sys/socket.h>
52111888Sjlemon
53111888Sjlemon#include <net/if.h>
54111888Sjlemon#include <net/if_types.h>
55111888Sjlemon#include <net/if_var.h>
56103781Sjake#include <net/netisr.h>
57103781Sjake
58132368Srwatson/*
59132368Srwatson * debug_mpsafenet controls network subsystem-wide use of the Giant lock,
60134443Srwatson * from system calls down to interrupt handlers.  It can be changed only via
61134443Srwatson * a tunable at boot, not at run-time, due to the complexity of unwinding.
62134443Srwatson * The compiled default is set via a kernel option; right now, the default
63134443Srwatson * unless otherwise specified is to run the network stack without Giant.
64122152Ssam */
65134443Srwatson#ifdef NET_WITH_GIANT
66122152Ssamint	debug_mpsafenet = 0;
67134443Srwatson#else
68134443Srwatsonint	debug_mpsafenet = 1;
69134443Srwatson#endif
70134443Srwatsonint	debug_mpsafenet_toolatetotwiddle = 0;
71134443Srwatson
72122152SsamTUNABLE_INT("debug.mpsafenet", &debug_mpsafenet);
73122152SsamSYSCTL_INT(_debug, OID_AUTO, mpsafenet, CTLFLAG_RD, &debug_mpsafenet, 0,
74122152Ssam    "Enable/disable MPSAFE network support");
75122152Ssam
76111888Sjlemonvolatile unsigned int	netisr;	/* scheduling bits for network */
77103781Sjake
78111888Sjlemonstruct netisr {
79111888Sjlemon	netisr_t	*ni_handler;
80111888Sjlemon	struct ifqueue	*ni_queue;
81122320Ssam	int		ni_flags;
82111888Sjlemon} netisrs[32];
83103781Sjake
84111888Sjlemonstatic void *net_ih;
85111888Sjlemon
86134443Srwatson/*
87134443Srwatson * Note all network code is currently capable of running MPSAFE; however,
88134443Srwatson * most of it is.  Since those sections that are not are generally optional
89134443Srwatson * components not shipped with default kernels, we provide a basic way to
90134443Srwatson * determine whether MPSAFE operation is permitted: based on a default of
91134443Srwatson * yes, we permit non-MPSAFE components to use a registration call to
92134443Srwatson * identify that they require Giant.  If the system is early in the boot
93134443Srwatson * process still, then we change the debug_mpsafenet setting to choose a
94134443Srwatson * non-MPSAFE execution mode (degraded).  If it's too late for that (since
95134443Srwatson * the setting cannot be changed at run time), we generate a console warning
96134443Srwatson * that the configuration may be unsafe.
97134443Srwatson */
98134443Srwatsonstatic int mpsafe_warn_count;
99134443Srwatson
100134443Srwatson/*
101134443Srwatson * Function call implementing registration of a non-MPSAFE network component.
102134443Srwatson */
103103781Sjakevoid
104134443Srwatsonnet_warn_not_mpsafe(const char *component)
105134443Srwatson{
106134443Srwatson
107134443Srwatson	/*
108134443Srwatson	 * If we're running with Giant over the network stack, there is no
109134443Srwatson	 * problem.
110134443Srwatson	 */
111134443Srwatson	if (!debug_mpsafenet)
112134443Srwatson		return;
113134443Srwatson
114134443Srwatson	/*
115134443Srwatson	 * If it's not too late to change the MPSAFE setting for the network
116134443Srwatson	 * stack, do so now.  This effectively suppresses warnings by
117134443Srwatson	 * components registering later.
118134443Srwatson	 */
119134443Srwatson	if (!debug_mpsafenet_toolatetotwiddle) {
120134443Srwatson		debug_mpsafenet = 0;
121134443Srwatson		printf("WARNING: debug.mpsafenet forced to = as %s requires "
122134443Srwatson		    "Giant\n", component);
123134443Srwatson		return;
124134443Srwatson	}
125134443Srwatson
126134443Srwatson	/*
127134443Srwatson	 * We must run without Giant, so generate a console warning with some
128134443Srwatson	 * information with what to do about it.  The system may be operating
129134443Srwatson	 * unsafely, however.
130134443Srwatson	 */
131134443Srwatson	printf("WARNING: Network stack Giant-free, but %s requires Giant.\n",
132134443Srwatson	    component);
133134443Srwatson	if (mpsafe_warn_count == 0)
134134443Srwatson		printf("    Consider adding 'options NET_WITH_GIANT' or "
135134443Srwatson		    "setting debug.mpsafenet=0\n");
136134443Srwatson	mpsafe_warn_count++;
137134443Srwatson}
138134443Srwatson
139134443Srwatson/*
140134443Srwatson * This sysinit is run after any pre-loaded or compiled-in components have
141134443Srwatson * announced that they require Giant, but before any modules loaded at
142134443Srwatson * run-time.
143134443Srwatson */
144134443Srwatsonstatic void
145134443Srwatsonnet_mpsafe_toolate(void *arg)
146134443Srwatson{
147134443Srwatson
148134443Srwatson	debug_mpsafenet_toolatetotwiddle = 1;
149134443Srwatson
150134443Srwatson	if (!debug_mpsafenet)
151134443Srwatson		printf("WARNING: MPSAFE network stack disabled, expect "
152134443Srwatson		    "reduced performance.\n");
153134443Srwatson}
154134443Srwatson
155134443SrwatsonSYSINIT(net_mpsafe_toolate, SI_SUB_SETTINGS, SI_ORDER_ANY, net_mpsafe_toolate,
156134443Srwatson    NULL);
157134443Srwatson
158134443Srwatsonvoid
159103781Sjakelegacy_setsoftnet(void)
160103781Sjake{
161103781Sjake	swi_sched(net_ih, 0);
162103781Sjake}
163103781Sjake
164111888Sjlemonvoid
165122320Ssamnetisr_register(int num, netisr_t *handler, struct ifqueue *inq, int flags)
166103781Sjake{
167103781Sjake
168111888Sjlemon	KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))),
169111888Sjlemon	    ("bad isr %d", num));
170111888Sjlemon	netisrs[num].ni_handler = handler;
171111888Sjlemon	netisrs[num].ni_queue = inq;
172122320Ssam	if ((flags & NETISR_MPSAFE) && !debug_mpsafenet)
173122320Ssam		flags &= ~NETISR_MPSAFE;
174122320Ssam	netisrs[num].ni_flags = flags;
175111888Sjlemon}
176111888Sjlemon
177111888Sjlemonvoid
178111888Sjlemonnetisr_unregister(int num)
179111888Sjlemon{
180111888Sjlemon	struct netisr *ni;
181111888Sjlemon
182111888Sjlemon	KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))),
183111888Sjlemon	    ("bad isr %d", num));
184111888Sjlemon	ni = &netisrs[num];
185111888Sjlemon	ni->ni_handler = NULL;
186122320Ssam	if (ni->ni_queue != NULL)
187111888Sjlemon		IF_DRAIN(ni->ni_queue);
188103781Sjake}
189103781Sjake
190111888Sjlemonstruct isrstat {
191111888Sjlemon	int	isrs_count;			/* dispatch count */
192122320Ssam	int	isrs_directed;			/* ...directly dispatched */
193111888Sjlemon	int	isrs_deferred;			/* ...queued instead */
194111888Sjlemon	int	isrs_queued;			/* intentionally queueued */
195122320Ssam	int	isrs_drop;			/* dropped 'cuz no handler */
196111888Sjlemon	int	isrs_swi_count;			/* swi_net handlers called */
197111888Sjlemon};
198111888Sjlemonstatic struct isrstat isrstat;
199111888Sjlemon
200111888SjlemonSYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr counters");
201111888Sjlemon
202120653Srwatsonstatic int	netisr_enable = 0;
203111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, enable, CTLFLAG_RW,
204111888Sjlemon    &netisr_enable, 0, "enable direct dispatch");
205120656SrwatsonTUNABLE_INT("net.isr.enable", &netisr_enable);
206111888Sjlemon
207111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, count, CTLFLAG_RD,
208111888Sjlemon    &isrstat.isrs_count, 0, "");
209111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, directed, CTLFLAG_RD,
210111888Sjlemon    &isrstat.isrs_directed, 0, "");
211111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, deferred, CTLFLAG_RD,
212111888Sjlemon    &isrstat.isrs_deferred, 0, "");
213111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, queued, CTLFLAG_RD,
214111888Sjlemon    &isrstat.isrs_queued, 0, "");
215122320SsamSYSCTL_INT(_net_isr, OID_AUTO, drop, CTLFLAG_RD,
216122320Ssam    &isrstat.isrs_drop, 0, "");
217111888SjlemonSYSCTL_INT(_net_isr, OID_AUTO, swi_count, CTLFLAG_RD,
218111888Sjlemon    &isrstat.isrs_swi_count, 0, "");
219111888Sjlemon
220111888Sjlemon/*
221120704Srwatson * Process all packets currently present in a netisr queue.  Used to
222120704Srwatson * drain an existing set of packets waiting for processing when we
223120704Srwatson * begin direct dispatch, to avoid processing packets out of order.
224120704Srwatson */
225120704Srwatsonstatic void
226120704Srwatsonnetisr_processqueue(struct netisr *ni)
227120704Srwatson{
228120704Srwatson	struct mbuf *m;
229120704Srwatson
230120704Srwatson	for (;;) {
231120704Srwatson		IF_DEQUEUE(ni->ni_queue, m);
232120704Srwatson		if (m == NULL)
233120704Srwatson			break;
234120704Srwatson		ni->ni_handler(m);
235120704Srwatson	}
236120704Srwatson}
237120704Srwatson
238120704Srwatson/*
239111888Sjlemon * Call the netisr directly instead of queueing the packet, if possible.
240111888Sjlemon */
241111888Sjlemonvoid
242111888Sjlemonnetisr_dispatch(int num, struct mbuf *m)
243103781Sjake{
244111888Sjlemon	struct netisr *ni;
245103781Sjake
246122320Ssam	isrstat.isrs_count++;		/* XXX redundant */
247111888Sjlemon	KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))),
248111888Sjlemon	    ("bad isr %d", num));
249111888Sjlemon	ni = &netisrs[num];
250112011Sjlemon	if (ni->ni_queue == NULL) {
251122320Ssam		isrstat.isrs_drop++;
252112011Sjlemon		m_freem(m);
253112011Sjlemon		return;
254112011Sjlemon	}
255122320Ssam	/*
256122320Ssam	 * Do direct dispatch only for MPSAFE netisrs (and
257122320Ssam	 * only when enabled).  Note that when a netisr is
258122320Ssam	 * marked MPSAFE we permit multiple concurrent instances
259122320Ssam	 * to run.  We guarantee only the order in which
260122320Ssam	 * packets are processed for each "dispatch point" in
261122320Ssam	 * the system (i.e. call to netisr_dispatch or
262122320Ssam	 * netisr_queue).  This insures ordering of packets
263122320Ssam	 * from an interface but does not guarantee ordering
264122320Ssam	 * between multiple places in the system (e.g. IP
265122320Ssam	 * dispatched from interfaces vs. IP queued from IPSec).
266122320Ssam	 */
267122320Ssam	if (netisr_enable && (ni->ni_flags & NETISR_MPSAFE)) {
268111888Sjlemon		isrstat.isrs_directed++;
269111888Sjlemon		/*
270122320Ssam		 * NB: We used to drain the queue before handling
271122320Ssam		 * the packet but now do not.  Doing so here will
272122320Ssam		 * not preserve ordering so instead we fallback to
273122320Ssam		 * guaranteeing order only from dispatch points
274122320Ssam		 * in the system (see above).
275111888Sjlemon		 */
276111888Sjlemon		ni->ni_handler(m);
277111888Sjlemon	} else {
278111888Sjlemon		isrstat.isrs_deferred++;
279111888Sjlemon		if (IF_HANDOFF(ni->ni_queue, m, NULL))
280111888Sjlemon			schednetisr(num);
281103781Sjake	}
282103781Sjake}
283103781Sjake
284111888Sjlemon/*
285111888Sjlemon * Same as above, but always queue.
286111888Sjlemon * This is either used in places where we are not confident that
287111888Sjlemon * direct dispatch is possible, or where queueing is required.
288134391Sandre * It returns (0) on success and ERRNO on failure.  On failure the
289134391Sandre * mbuf has been free'd.
290111888Sjlemon */
291111888Sjlemonint
292111888Sjlemonnetisr_queue(int num, struct mbuf *m)
293111888Sjlemon{
294111888Sjlemon	struct netisr *ni;
295111888Sjlemon
296111888Sjlemon	KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))),
297111888Sjlemon	    ("bad isr %d", num));
298111888Sjlemon	ni = &netisrs[num];
299112011Sjlemon	if (ni->ni_queue == NULL) {
300122320Ssam		isrstat.isrs_drop++;
301112011Sjlemon		m_freem(m);
302134391Sandre		return (ENXIO);
303112011Sjlemon	}
304111888Sjlemon	isrstat.isrs_queued++;
305111888Sjlemon	if (!IF_HANDOFF(ni->ni_queue, m, NULL))
306134391Sandre		return (ENOBUFS);	/* IF_HANDOFF has free'd the mbuf */
307111888Sjlemon	schednetisr(num);
308134391Sandre	return (0);
309111888Sjlemon}
310103781Sjake
311103781Sjakestatic void
312103781Sjakeswi_net(void *dummy)
313103781Sjake{
314111888Sjlemon	struct netisr *ni;
315103781Sjake	u_int bits;
316103781Sjake	int i;
317103781Sjake#ifdef DEVICE_POLLING
318111888Sjlemon	const int polling = 1;
319111888Sjlemon#else
320111888Sjlemon	const int polling = 0;
321103781Sjake#endif
322111888Sjlemon
323111888Sjlemon	do {
324111888Sjlemon		bits = atomic_readandclear_int(&netisr);
325111888Sjlemon		if (bits == 0)
326111888Sjlemon			break;
327111888Sjlemon		while ((i = ffs(bits)) != 0) {
328111888Sjlemon			isrstat.isrs_swi_count++;
329111888Sjlemon			i--;
330111888Sjlemon			bits &= ~(1 << i);
331111888Sjlemon			ni = &netisrs[i];
332111888Sjlemon			if (ni->ni_handler == NULL) {
333111888Sjlemon				printf("swi_net: unregistered isr %d.\n", i);
334111888Sjlemon				continue;
335111888Sjlemon			}
336122320Ssam			if ((ni->ni_flags & NETISR_MPSAFE) == 0) {
337122320Ssam				mtx_lock(&Giant);
338122320Ssam				if (ni->ni_queue == NULL)
339122320Ssam					ni->ni_handler(NULL);
340122320Ssam				else
341122320Ssam					netisr_processqueue(ni);
342122320Ssam				mtx_unlock(&Giant);
343122320Ssam			} else {
344122320Ssam				if (ni->ni_queue == NULL)
345122320Ssam					ni->ni_handler(NULL);
346122320Ssam				else
347122320Ssam					netisr_processqueue(ni);
348122320Ssam			}
349111888Sjlemon		}
350111888Sjlemon	} while (polling);
351103781Sjake}
352103781Sjake
353103781Sjakestatic void
354103781Sjakestart_netisr(void *dummy)
355103781Sjake{
356103781Sjake
357122320Ssam	if (swi_add(NULL, "net", swi_net, NULL, SWI_NET, INTR_MPSAFE, &net_ih))
358103781Sjake		panic("start_netisr");
359103781Sjake}
360103781SjakeSYSINIT(start_netisr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_netisr, NULL)
361