187902Sluigi/*-
290550Sluigi * Copyright (c) 2001-2002 Luigi Rizzo
387902Sluigi *
490550Sluigi * Supported by: the Xorp Project (www.xorp.org)
590550Sluigi *
687902Sluigi * Redistribution and use in source and binary forms, with or without
787902Sluigi * modification, are permitted provided that the following conditions
887902Sluigi * are met:
987902Sluigi * 1. Redistributions of source code must retain the above copyright
1087902Sluigi *    notice, this list of conditions and the following disclaimer.
1187902Sluigi * 2. Redistributions in binary form must reproduce the above copyright
1287902Sluigi *    notice, this list of conditions and the following disclaimer in the
1387902Sluigi *    documentation and/or other materials provided with the distribution.
1487902Sluigi *
1587902Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
1687902Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1787902Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1887902Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
1987902Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2087902Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2187902Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2287902Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2387902Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2487902Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2587902Sluigi * SUCH DAMAGE.
2687902Sluigi */
2787902Sluigi
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD$");
30116182Sobrien
31150968Sglebius#include "opt_device_polling.h"
32150968Sglebius
3387902Sluigi#include <sys/param.h>
3487902Sluigi#include <sys/systm.h>
3587902Sluigi#include <sys/kernel.h>
36185935Sbz#include <sys/kthread.h>
37185935Sbz#include <sys/proc.h>
38193219Srwatson#include <sys/eventhandler.h>
39185935Sbz#include <sys/resourcevar.h>
4087902Sluigi#include <sys/socket.h>			/* needed by net/if.h		*/
41150789Sglebius#include <sys/sockio.h>
4287902Sluigi#include <sys/sysctl.h>
43149798Sglebius#include <sys/syslog.h>
4487902Sluigi
45257176Sglebius#include <net/if.h>
46257176Sglebius#include <net/if_var.h>
4787902Sluigi#include <net/netisr.h>			/* for NETISR_POLL		*/
48185571Sbz#include <net/vnet.h>
4987902Sluigi
5090550Sluigivoid hardclock_device_poll(void);	/* hook from hardclock		*/
5187902Sluigi
52150866Sglebiusstatic struct mtx	poll_mtx;
53150866Sglebius
5487902Sluigi/*
5587902Sluigi * Polling support for [network] device drivers.
5687902Sluigi *
57150789Sglebius * Drivers which support this feature can register with the
5887902Sluigi * polling code.
5987902Sluigi *
6087902Sluigi * If registration is successful, the driver must disable interrupts,
6187902Sluigi * and further I/O is performed through the handler, which is invoked
6287902Sluigi * (at least once per clock tick) with 3 arguments: the "arg" passed at
6387902Sluigi * register time (a struct ifnet pointer), a command, and a "count" limit.
6487902Sluigi *
6587902Sluigi * The command can be one of the following:
6687902Sluigi *  POLL_ONLY: quick move of "count" packets from input/output queues.
6787902Sluigi *  POLL_AND_CHECK_STATUS: as above, plus check status registers or do
6887902Sluigi *	other more expensive operations. This command is issued periodically
6987902Sluigi *	but less frequently than POLL_ONLY.
7087902Sluigi *
7187902Sluigi * The count limit specifies how much work the handler can do during the
7287902Sluigi * call -- typically this is the number of packets to be received, or
7387902Sluigi * transmitted, etc. (drivers are free to interpret this number, as long
7487902Sluigi * as the max time spent in the function grows roughly linearly with the
7587902Sluigi * count).
7687902Sluigi *
77150789Sglebius * Polling is enabled and disabled via setting IFCAP_POLLING flag on
78150789Sglebius * the interface. The driver ioctl handler should register interface
79150789Sglebius * with polling and disable interrupts, if registration was successful.
8087902Sluigi *
8187902Sluigi * A second variable controls the sharing of CPU between polling/kernel
8287902Sluigi * network processing, and other activities (typically userlevel tasks):
8387902Sluigi * kern.polling.user_frac (between 0 and 100, default 50) sets the share
8487902Sluigi * of CPU allocated to user tasks. CPU is allocated proportionally to the
8587902Sluigi * shares, by dynamically adjusting the "count" (poll_burst).
8687902Sluigi *
8787902Sluigi * Other parameters can should be left to their default values.
8887902Sluigi * The following constraints hold
8987902Sluigi *
9087902Sluigi *	1 <= poll_each_burst <= poll_burst <= poll_burst_max
9187902Sluigi *	MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
9287902Sluigi */
9387902Sluigi
9487902Sluigi#define MIN_POLL_BURST_MAX	10
95250911Sluigi#define MAX_POLL_BURST_MAX	20000
9687902Sluigi
97150866Sglebiusstatic uint32_t poll_burst = 5;
98150866Sglebiusstatic uint32_t poll_burst_max = 150;	/* good for 100Mbit net and HZ=1000 */
99150866Sglebiusstatic uint32_t poll_each_burst = 5;
100150866Sglebius
101227309Sedstatic SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW, 0,
10287902Sluigi	"Device polling parameters");
10387902Sluigi
104150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, burst, CTLFLAG_RD,
10587902Sluigi	&poll_burst, 0, "Current polling burst size");
10687902Sluigi
107193219Srwatsonstatic int	netisr_poll_scheduled;
108193219Srwatsonstatic int	netisr_pollmore_scheduled;
109193219Srwatsonstatic int	poll_shutting_down;
110193219Srwatson
111150866Sglebiusstatic int poll_burst_max_sysctl(SYSCTL_HANDLER_ARGS)
112150866Sglebius{
113150866Sglebius	uint32_t val = poll_burst_max;
114150866Sglebius	int error;
11587902Sluigi
116170289Sdwmalone	error = sysctl_handle_int(oidp, &val, 0, req);
117150866Sglebius	if (error || !req->newptr )
118150866Sglebius		return (error);
119150866Sglebius	if (val < MIN_POLL_BURST_MAX || val > MAX_POLL_BURST_MAX)
120150866Sglebius		return (EINVAL);
12187902Sluigi
122150866Sglebius	mtx_lock(&poll_mtx);
123150866Sglebius	poll_burst_max = val;
124150866Sglebius	if (poll_burst > poll_burst_max)
125150866Sglebius		poll_burst = poll_burst_max;
126150866Sglebius	if (poll_each_burst > poll_burst_max)
127150866Sglebius		poll_each_burst = MIN_POLL_BURST_MAX;
128150866Sglebius	mtx_unlock(&poll_mtx);
129150866Sglebius
130150866Sglebius	return (0);
131150866Sglebius}
132150866SglebiusSYSCTL_PROC(_kern_polling, OID_AUTO, burst_max, CTLTYPE_UINT | CTLFLAG_RW,
133150866Sglebius	0, sizeof(uint32_t), poll_burst_max_sysctl, "I", "Max Polling burst size");
134150866Sglebius
135150866Sglebiusstatic int poll_each_burst_sysctl(SYSCTL_HANDLER_ARGS)
136150866Sglebius{
137150866Sglebius	uint32_t val = poll_each_burst;
138150866Sglebius	int error;
139150866Sglebius
140170289Sdwmalone	error = sysctl_handle_int(oidp, &val, 0, req);
141150866Sglebius	if (error || !req->newptr )
142150866Sglebius		return (error);
143150866Sglebius	if (val < 1)
144150866Sglebius		return (EINVAL);
145150866Sglebius
146150866Sglebius	mtx_lock(&poll_mtx);
147150866Sglebius	if (val > poll_burst_max) {
148150866Sglebius		mtx_unlock(&poll_mtx);
149150866Sglebius		return (EINVAL);
150150866Sglebius	}
151150866Sglebius	poll_each_burst = val;
152150866Sglebius	mtx_unlock(&poll_mtx);
153150866Sglebius
154150866Sglebius	return (0);
155150866Sglebius}
156150866SglebiusSYSCTL_PROC(_kern_polling, OID_AUTO, each_burst, CTLTYPE_UINT | CTLFLAG_RW,
157150866Sglebius	0, sizeof(uint32_t), poll_each_burst_sysctl, "I",
158150866Sglebius	"Max size of each burst");
159150866Sglebius
160150866Sglebiusstatic uint32_t poll_in_idle_loop=0;	/* do we poll in idle loop ? */
16190550SluigiSYSCTL_UINT(_kern_polling, OID_AUTO, idle_poll, CTLFLAG_RW,
16290550Sluigi	&poll_in_idle_loop, 0, "Enable device polling in idle loop");
16390550Sluigi
164150866Sglebiusstatic uint32_t user_frac = 50;
165150866Sglebiusstatic int user_frac_sysctl(SYSCTL_HANDLER_ARGS)
166150866Sglebius{
167150866Sglebius	uint32_t val = user_frac;
168150866Sglebius	int error;
16987902Sluigi
170170289Sdwmalone	error = sysctl_handle_int(oidp, &val, 0, req);
171150866Sglebius	if (error || !req->newptr )
172150866Sglebius		return (error);
173254031Skevlo	if (val > 99)
174150866Sglebius		return (EINVAL);
17587902Sluigi
176150866Sglebius	mtx_lock(&poll_mtx);
177150866Sglebius	user_frac = val;
178150866Sglebius	mtx_unlock(&poll_mtx);
179150866Sglebius
180150866Sglebius	return (0);
181150866Sglebius}
182150866SglebiusSYSCTL_PROC(_kern_polling, OID_AUTO, user_frac, CTLTYPE_UINT | CTLFLAG_RW,
183150866Sglebius	0, sizeof(uint32_t), user_frac_sysctl, "I",
184150866Sglebius	"Desired user fraction of cpu time");
185150866Sglebius
186150866Sglebiusstatic uint32_t reg_frac_count = 0;
187150866Sglebiusstatic uint32_t reg_frac = 20 ;
188150866Sglebiusstatic int reg_frac_sysctl(SYSCTL_HANDLER_ARGS)
189150866Sglebius{
190150866Sglebius	uint32_t val = reg_frac;
191150866Sglebius	int error;
192150866Sglebius
193170289Sdwmalone	error = sysctl_handle_int(oidp, &val, 0, req);
194150866Sglebius	if (error || !req->newptr )
195150866Sglebius		return (error);
196150866Sglebius	if (val < 1 || val > hz)
197150866Sglebius		return (EINVAL);
198150866Sglebius
199150866Sglebius	mtx_lock(&poll_mtx);
200150866Sglebius	reg_frac = val;
201150866Sglebius	if (reg_frac_count >= reg_frac)
202150866Sglebius		reg_frac_count = 0;
203150866Sglebius	mtx_unlock(&poll_mtx);
204150866Sglebius
205150866Sglebius	return (0);
206150866Sglebius}
207150866SglebiusSYSCTL_PROC(_kern_polling, OID_AUTO, reg_frac, CTLTYPE_UINT | CTLFLAG_RW,
208150866Sglebius	0, sizeof(uint32_t), reg_frac_sysctl, "I",
209150866Sglebius	"Every this many cycles check registers");
210150866Sglebius
211150866Sglebiusstatic uint32_t short_ticks;
212150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, short_ticks, CTLFLAG_RD,
21387902Sluigi	&short_ticks, 0, "Hardclock ticks shorter than they should be");
21487902Sluigi
215150866Sglebiusstatic uint32_t lost_polls;
216150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, lost_polls, CTLFLAG_RD,
21787902Sluigi	&lost_polls, 0, "How many times we would have lost a poll tick");
21887902Sluigi
219150866Sglebiusstatic uint32_t pending_polls;
220150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, pending_polls, CTLFLAG_RD,
22190550Sluigi	&pending_polls, 0, "Do we need to poll again");
22290550Sluigi
22390550Sluigistatic int residual_burst = 0;
224150866SglebiusSYSCTL_INT(_kern_polling, OID_AUTO, residual_burst, CTLFLAG_RD,
22590550Sluigi	&residual_burst, 0, "# of residual cycles in burst");
22690550Sluigi
227150866Sglebiusstatic uint32_t poll_handlers; /* next free entry in pr[]. */
22890550SluigiSYSCTL_UINT(_kern_polling, OID_AUTO, handlers, CTLFLAG_RD,
22987902Sluigi	&poll_handlers, 0, "Number of registered poll handlers");
23087902Sluigi
231150866Sglebiusstatic uint32_t phase;
232150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, phase, CTLFLAG_RD,
23390550Sluigi	&phase, 0, "Polling phase");
23490550Sluigi
235150866Sglebiusstatic uint32_t suspect;
236150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, suspect, CTLFLAG_RD,
23790550Sluigi	&suspect, 0, "suspect event");
23890550Sluigi
239150866Sglebiusstatic uint32_t stalled;
240150866SglebiusSYSCTL_UINT(_kern_polling, OID_AUTO, stalled, CTLFLAG_RD,
24190550Sluigi	&stalled, 0, "potential stalls");
24290550Sluigi
243150866Sglebiusstatic uint32_t idlepoll_sleeping; /* idlepoll is sleeping */
24490550SluigiSYSCTL_UINT(_kern_polling, OID_AUTO, idlepoll_sleeping, CTLFLAG_RD,
24588156Sluigi	&idlepoll_sleeping, 0, "idlepoll is sleeping");
24688156Sluigi
24787902Sluigi
24887902Sluigi#define POLL_LIST_LEN  128
24987902Sluigistruct pollrec {
25087902Sluigi	poll_handler_t	*handler;
25187902Sluigi	struct ifnet	*ifp;
25287902Sluigi};
25387902Sluigi
25487902Sluigistatic struct pollrec pr[POLL_LIST_LEN];
255149798Sglebius
256111888Sjlemonstatic void
257193219Srwatsonpoll_shutdown(void *arg, int howto)
258193219Srwatson{
259193219Srwatson
260193219Srwatson	poll_shutting_down = 1;
261193219Srwatson}
262193219Srwatson
263193219Srwatsonstatic void
26490550Sluigiinit_device_poll(void)
26590550Sluigi{
266111888Sjlemon
267149798Sglebius	mtx_init(&poll_mtx, "polling", NULL, MTX_DEF);
268193219Srwatson	EVENTHANDLER_REGISTER(shutdown_post_sync, poll_shutdown, NULL,
269193219Srwatson	    SHUTDOWN_PRI_LAST);
27090550Sluigi}
271256945SbrooksSYSINIT(device_poll, SI_SUB_SOFTINTR, SI_ORDER_MIDDLE, init_device_poll, NULL);
27290550Sluigi
273111888Sjlemon
27490550Sluigi/*
27587902Sluigi * Hook from hardclock. Tries to schedule a netisr, but keeps track
27687902Sluigi * of lost ticks due to the previous handler taking too long.
27798946Sluigi * Normally, this should not happen, because polling handler should
27898946Sluigi * run for a short time. However, in some cases (e.g. when there are
27998946Sluigi * changes in link status etc.) the drivers take a very long time
28098946Sluigi * (even in the order of milliseconds) to reset and reconfigure the
28198946Sluigi * device, causing apparent lost polls.
28298946Sluigi *
28387902Sluigi * The first part of the code is just for debugging purposes, and tries
28487902Sluigi * to count how often hardclock ticks are shorter than they should,
28587902Sluigi * meaning either stray interrupts or delayed events.
28687902Sluigi */
28787902Sluigivoid
28887902Sluigihardclock_device_poll(void)
28987902Sluigi{
29087902Sluigi	static struct timeval prev_t, t;
29187902Sluigi	int delta;
29287902Sluigi
293193219Srwatson	if (poll_handlers == 0 || poll_shutting_down)
29490550Sluigi		return;
29590550Sluigi
29687902Sluigi	microuptime(&t);
29787902Sluigi	delta = (t.tv_usec - prev_t.tv_usec) +
29887902Sluigi		(t.tv_sec - prev_t.tv_sec)*1000000;
29987902Sluigi	if (delta * hz < 500000)
30087902Sluigi		short_ticks++;
30187902Sluigi	else
30287902Sluigi		prev_t = t;
30387902Sluigi
30490550Sluigi	if (pending_polls > 100) {
30598946Sluigi		/*
30698946Sluigi		 * Too much, assume it has stalled (not always true
30798946Sluigi		 * see comment above).
30898946Sluigi		 */
30990550Sluigi		stalled++;
31090550Sluigi		pending_polls = 0;
31190550Sluigi		phase = 0;
31287902Sluigi	}
31390550Sluigi
31490550Sluigi	if (phase <= 2) {
31590550Sluigi		if (phase != 0)
31690550Sluigi			suspect++;
31790550Sluigi		phase = 1;
318193219Srwatson		netisr_poll_scheduled = 1;
319193219Srwatson		netisr_pollmore_scheduled = 1;
320193219Srwatson		netisr_sched_poll();
32190550Sluigi		phase = 2;
32290550Sluigi	}
32390550Sluigi	if (pending_polls++ > 0)
32490550Sluigi		lost_polls++;
32587902Sluigi}
32687902Sluigi
32787902Sluigi/*
328150789Sglebius * ether_poll is called from the idle loop.
32987902Sluigi */
330192404Srwatsonstatic void
33187902Sluigiether_poll(int count)
33287902Sluigi{
33387902Sluigi	int i;
33487902Sluigi
335149798Sglebius	mtx_lock(&poll_mtx);
33687902Sluigi
33787902Sluigi	if (count > poll_each_burst)
33887902Sluigi		count = poll_each_burst;
339149798Sglebius
340150789Sglebius	for (i = 0 ; i < poll_handlers ; i++)
341150789Sglebius		pr[i].handler(pr[i].ifp, POLL_ONLY, count);
342150789Sglebius
343149798Sglebius	mtx_unlock(&poll_mtx);
34487902Sluigi}
34587902Sluigi
34687902Sluigi/*
34790550Sluigi * netisr_pollmore is called after other netisr's, possibly scheduling
34887902Sluigi * another NETISR_POLL call, or adapting the burst size for the next cycle.
34987902Sluigi *
35087902Sluigi * It is very bad to fetch large bursts of packets from a single card at once,
35187902Sluigi * because the burst could take a long time to be completely processed, or
35287902Sluigi * could saturate the intermediate queue (ipintrq or similar) leading to
35387902Sluigi * losses or unfairness. To reduce the problem, and also to account better for
35490550Sluigi * time spent in network-related processing, we split the burst in smaller
35587902Sluigi * chunks of fixed size, giving control to the other netisr's between chunks.
35687902Sluigi * This helps in improving the fairness, reducing livelock (because we
35787902Sluigi * emulate more closely the "process to completion" that we have with
35887902Sluigi * fastforwarding) and accounting for the work performed in low level
35987902Sluigi * handling and forwarding.
36087902Sluigi */
36187902Sluigi
36287902Sluigistatic struct timeval poll_start_t;
36387902Sluigi
36487902Sluigivoid
36590550Sluiginetisr_pollmore()
36687902Sluigi{
36787902Sluigi	struct timeval t;
36887902Sluigi	int kern_load;
36987902Sluigi
370281528Sgnn	if (poll_handlers == 0)
371281528Sgnn		return;
372281528Sgnn
373149798Sglebius	mtx_lock(&poll_mtx);
374193219Srwatson	if (!netisr_pollmore_scheduled) {
375193219Srwatson		mtx_unlock(&poll_mtx);
376193219Srwatson		return;
377193219Srwatson	}
378193219Srwatson	netisr_pollmore_scheduled = 0;
37990550Sluigi	phase = 5;
38087902Sluigi	if (residual_burst > 0) {
381193219Srwatson		netisr_poll_scheduled = 1;
382193219Srwatson		netisr_pollmore_scheduled = 1;
383193219Srwatson		netisr_sched_poll();
384149798Sglebius		mtx_unlock(&poll_mtx);
38587902Sluigi		/* will run immediately on return, followed by netisrs */
386111888Sjlemon		return;
38787902Sluigi	}
38887902Sluigi	/* here we can account time spent in netisr's in this tick */
38987902Sluigi	microuptime(&t);
39087902Sluigi	kern_load = (t.tv_usec - poll_start_t.tv_usec) +
39187902Sluigi		(t.tv_sec - poll_start_t.tv_sec)*1000000;	/* us */
39287902Sluigi	kern_load = (kern_load * hz) / 10000;			/* 0..100 */
39387902Sluigi	if (kern_load > (100 - user_frac)) { /* try decrease ticks */
39487902Sluigi		if (poll_burst > 1)
39587902Sluigi			poll_burst--;
39687902Sluigi	} else {
39787902Sluigi		if (poll_burst < poll_burst_max)
39887902Sluigi			poll_burst++;
39987902Sluigi	}
40087902Sluigi
40190550Sluigi	pending_polls--;
40290550Sluigi	if (pending_polls == 0) /* we are done */
40390550Sluigi		phase = 0;
40490550Sluigi	else {
40587902Sluigi		/*
40687902Sluigi		 * Last cycle was long and caused us to miss one or more
40790550Sluigi		 * hardclock ticks. Restart processing again, but slightly
40887902Sluigi		 * reduce the burst size to prevent that this happens again.
40987902Sluigi		 */
41087902Sluigi		poll_burst -= (poll_burst / 8);
41187902Sluigi		if (poll_burst < 1)
41287902Sluigi			poll_burst = 1;
413193219Srwatson		netisr_poll_scheduled = 1;
414193219Srwatson		netisr_pollmore_scheduled = 1;
415193219Srwatson		netisr_sched_poll();
41690550Sluigi		phase = 6;
41790550Sluigi	}
418149798Sglebius	mtx_unlock(&poll_mtx);
41987902Sluigi}
42087902Sluigi
42187902Sluigi/*
422193219Srwatson * netisr_poll is typically scheduled once per tick.
42387902Sluigi */
424193219Srwatsonvoid
42590550Sluiginetisr_poll(void)
42687902Sluigi{
42787902Sluigi	int i, cycles;
42887902Sluigi	enum poll_cmd arg = POLL_ONLY;
42987902Sluigi
430281528Sgnn	if (poll_handlers == 0)
431281528Sgnn		return;
432281528Sgnn
433149798Sglebius	mtx_lock(&poll_mtx);
434193219Srwatson	if (!netisr_poll_scheduled) {
435193219Srwatson		mtx_unlock(&poll_mtx);
436193219Srwatson		return;
437193219Srwatson	}
438193219Srwatson	netisr_poll_scheduled = 0;
43990550Sluigi	phase = 3;
44087902Sluigi	if (residual_burst == 0) { /* first call in this tick */
44187902Sluigi		microuptime(&poll_start_t);
442150866Sglebius		if (++reg_frac_count == reg_frac) {
44387902Sluigi			arg = POLL_AND_CHECK_STATUS;
444150866Sglebius			reg_frac_count = 0;
44587902Sluigi		}
44687902Sluigi
44787902Sluigi		residual_burst = poll_burst;
44887902Sluigi	}
44987902Sluigi	cycles = (residual_burst < poll_each_burst) ?
45087902Sluigi		residual_burst : poll_each_burst;
45187902Sluigi	residual_burst -= cycles;
45287902Sluigi
453150789Sglebius	for (i = 0 ; i < poll_handlers ; i++)
454150789Sglebius		pr[i].handler(pr[i].ifp, arg, cycles);
455149798Sglebius
45690550Sluigi	phase = 4;
457149798Sglebius	mtx_unlock(&poll_mtx);
45887902Sluigi}
45987902Sluigi
46087902Sluigi/*
461150789Sglebius * Try to register routine for polling. Returns 0 if successful
462150789Sglebius * (and polling should be enabled), error code otherwise.
46387902Sluigi * A device is not supposed to register itself multiple times.
46487902Sluigi *
465150789Sglebius * This is called from within the *_ioctl() functions.
46687902Sluigi */
46787902Sluigiint
468272257Sglebiusether_poll_register(poll_handler_t *h, if_t ifp)
46987902Sluigi{
470149798Sglebius	int i;
47187902Sluigi
472150789Sglebius	KASSERT(h != NULL, ("%s: handler is NULL", __func__));
473150789Sglebius	KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
474150789Sglebius
475149798Sglebius	mtx_lock(&poll_mtx);
47687902Sluigi	if (poll_handlers >= POLL_LIST_LEN) {
47787902Sluigi		/*
47887902Sluigi		 * List full, cannot register more entries.
47987902Sluigi		 * This should never happen; if it does, it is probably a
48087902Sluigi		 * broken driver trying to register multiple times. Checking
48187902Sluigi		 * this at runtime is expensive, and won't solve the problem
48287902Sluigi		 * anyways, so just report a few times and then give up.
48387902Sluigi		 */
48487902Sluigi		static int verbose = 10 ;
48587902Sluigi		if (verbose >0) {
486149798Sglebius			log(LOG_ERR, "poll handlers list full, "
487149798Sglebius			    "maybe a broken driver ?\n");
48887902Sluigi			verbose--;
48987902Sluigi		}
490149798Sglebius		mtx_unlock(&poll_mtx);
491150789Sglebius		return (ENOMEM); /* no polling for you */
49287902Sluigi	}
49387902Sluigi
494149798Sglebius	for (i = 0 ; i < poll_handlers ; i++)
495149798Sglebius		if (pr[i].ifp == ifp && pr[i].handler != NULL) {
496149798Sglebius			mtx_unlock(&poll_mtx);
497149798Sglebius			log(LOG_DEBUG, "ether_poll_register: %s: handler"
498149798Sglebius			    " already registered\n", ifp->if_xname);
499150789Sglebius			return (EEXIST);
500149798Sglebius		}
501149798Sglebius
50287902Sluigi	pr[poll_handlers].handler = h;
50387902Sluigi	pr[poll_handlers].ifp = ifp;
50487902Sluigi	poll_handlers++;
505149798Sglebius	mtx_unlock(&poll_mtx);
50688156Sluigi	if (idlepoll_sleeping)
50788156Sluigi		wakeup(&idlepoll_sleeping);
508150789Sglebius	return (0);
50987902Sluigi}
51087902Sluigi
51187902Sluigi/*
512150789Sglebius * Remove interface from the polling list. Called from *_ioctl(), too.
51387902Sluigi */
51487902Sluigiint
515272257Sglebiusether_poll_deregister(if_t ifp)
51687902Sluigi{
51787902Sluigi	int i;
51887902Sluigi
519150789Sglebius	KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
520150789Sglebius
521150789Sglebius	mtx_lock(&poll_mtx);
522149798Sglebius
52387902Sluigi	for (i = 0 ; i < poll_handlers ; i++)
52487902Sluigi		if (pr[i].ifp == ifp) /* found it */
52587902Sluigi			break;
52687902Sluigi	if (i == poll_handlers) {
527149798Sglebius		log(LOG_DEBUG, "ether_poll_deregister: %s: not found!\n",
528149798Sglebius		    ifp->if_xname);
529150789Sglebius		mtx_unlock(&poll_mtx);
530150789Sglebius		return (ENOENT);
53187902Sluigi	}
53287902Sluigi	poll_handlers--;
53387902Sluigi	if (i < poll_handlers) { /* Last entry replaces this one. */
53487902Sluigi		pr[i].handler = pr[poll_handlers].handler;
53587902Sluigi		pr[i].ifp = pr[poll_handlers].ifp;
53687902Sluigi	}
537149798Sglebius	mtx_unlock(&poll_mtx);
538150789Sglebius	return (0);
53987902Sluigi}
54088156Sluigi
54188156Sluigistatic void
54288156Sluigipoll_idle(void)
54388156Sluigi{
54488156Sluigi	struct thread *td = curthread;
54588156Sluigi	struct rtprio rtp;
54688156Sluigi
54788156Sluigi	rtp.prio = RTP_PRIO_MAX;	/* lowest priority */
54888156Sluigi	rtp.type = RTP_PRIO_IDLE;
549170307Sjeff	PROC_SLOCK(td->td_proc);
550163709Sjb	rtp_to_pri(&rtp, td);
551170307Sjeff	PROC_SUNLOCK(td->td_proc);
55288156Sluigi
55388156Sluigi	for (;;) {
55490550Sluigi		if (poll_in_idle_loop && poll_handlers > 0) {
55588156Sluigi			idlepoll_sleeping = 0;
55688156Sluigi			ether_poll(poll_each_burst);
557170307Sjeff			thread_lock(td);
558131516Sjhb			mi_switch(SW_VOL, NULL);
559170307Sjeff			thread_unlock(td);
56088156Sluigi		} else {
56188156Sluigi			idlepoll_sleeping = 1;
562157815Sjhb			tsleep(&idlepoll_sleeping, 0, "pollid", hz * 3);
56388156Sluigi		}
56488156Sluigi	}
56588156Sluigi}
56688156Sluigi
56788156Sluigistatic struct proc *idlepoll;
56888156Sluigistatic struct kproc_desc idlepoll_kp = {
56988156Sluigi	 "idlepoll",
57088156Sluigi	 poll_idle,
57188156Sluigi	 &idlepoll
57288156Sluigi};
573177253SrwatsonSYSINIT(idlepoll, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, kproc_start,
574177253Srwatson    &idlepoll_kp);
575