kern_intr.c revision 256381
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/10/sys/kern/kern_intr.c 256381 2013-10-12 15:31:36Z markm $");
29
30#include "opt_ddb.h"
31
32#include <sys/param.h>
33#include <sys/bus.h>
34#include <sys/conf.h>
35#include <sys/cpuset.h>
36#include <sys/rtprio.h>
37#include <sys/systm.h>
38#include <sys/interrupt.h>
39#include <sys/kernel.h>
40#include <sys/kthread.h>
41#include <sys/ktr.h>
42#include <sys/limits.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mutex.h>
46#include <sys/priv.h>
47#include <sys/proc.h>
48#include <sys/random.h>
49#include <sys/resourcevar.h>
50#include <sys/sched.h>
51#include <sys/smp.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/unistd.h>
55#include <sys/vmmeter.h>
56#include <machine/atomic.h>
57#include <machine/cpu.h>
58#include <machine/md_var.h>
59#include <machine/stdarg.h>
60#ifdef DDB
61#include <ddb/ddb.h>
62#include <ddb/db_sym.h>
63#endif
64
65/*
66 * Describe an interrupt thread.  There is one of these per interrupt event.
67 */
68struct intr_thread {
69	struct intr_event *it_event;
70	struct thread *it_thread;	/* Kernel thread. */
71	int	it_flags;		/* (j) IT_* flags. */
72	int	it_need;		/* Needs service. */
73};
74
75/* Interrupt thread flags kept in it_flags */
76#define	IT_DEAD		0x000001	/* Thread is waiting to exit. */
77#define	IT_WAIT		0x000002	/* Thread is waiting for completion. */
78
79struct	intr_entropy {
80	struct	thread *td;
81	uintptr_t event;
82};
83
84struct	intr_event *clk_intr_event;
85struct	intr_event *tty_intr_event;
86void	*vm_ih;
87struct proc *intrproc;
88
89static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads");
90
91static int intr_storm_threshold = 1000;
92TUNABLE_INT("hw.intr_storm_threshold", &intr_storm_threshold);
93SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RW,
94    &intr_storm_threshold, 0,
95    "Number of consecutive interrupts before storm protection is enabled");
96static TAILQ_HEAD(, intr_event) event_list =
97    TAILQ_HEAD_INITIALIZER(event_list);
98static struct mtx event_lock;
99MTX_SYSINIT(intr_event_list, &event_lock, "intr event list", MTX_DEF);
100
101static void	intr_event_update(struct intr_event *ie);
102#ifdef INTR_FILTER
103static int	intr_event_schedule_thread(struct intr_event *ie,
104		    struct intr_thread *ithd);
105static int	intr_filter_loop(struct intr_event *ie,
106		    struct trapframe *frame, struct intr_thread **ithd);
107static struct intr_thread *ithread_create(const char *name,
108			      struct intr_handler *ih);
109#else
110static int	intr_event_schedule_thread(struct intr_event *ie);
111static struct intr_thread *ithread_create(const char *name);
112#endif
113static void	ithread_destroy(struct intr_thread *ithread);
114static void	ithread_execute_handlers(struct proc *p,
115		    struct intr_event *ie);
116#ifdef INTR_FILTER
117static void	priv_ithread_execute_handler(struct proc *p,
118		    struct intr_handler *ih);
119#endif
120static void	ithread_loop(void *);
121static void	ithread_update(struct intr_thread *ithd);
122static void	start_softintr(void *);
123
124/* Map an interrupt type to an ithread priority. */
125u_char
126intr_priority(enum intr_type flags)
127{
128	u_char pri;
129
130	flags &= (INTR_TYPE_TTY | INTR_TYPE_BIO | INTR_TYPE_NET |
131	    INTR_TYPE_CAM | INTR_TYPE_MISC | INTR_TYPE_CLK | INTR_TYPE_AV);
132	switch (flags) {
133	case INTR_TYPE_TTY:
134		pri = PI_TTY;
135		break;
136	case INTR_TYPE_BIO:
137		pri = PI_DISK;
138		break;
139	case INTR_TYPE_NET:
140		pri = PI_NET;
141		break;
142	case INTR_TYPE_CAM:
143		pri = PI_DISK;
144		break;
145	case INTR_TYPE_AV:
146		pri = PI_AV;
147		break;
148	case INTR_TYPE_CLK:
149		pri = PI_REALTIME;
150		break;
151	case INTR_TYPE_MISC:
152		pri = PI_DULL;          /* don't care */
153		break;
154	default:
155		/* We didn't specify an interrupt level. */
156		panic("intr_priority: no interrupt type in flags");
157	}
158
159	return pri;
160}
161
162/*
163 * Update an ithread based on the associated intr_event.
164 */
165static void
166ithread_update(struct intr_thread *ithd)
167{
168	struct intr_event *ie;
169	struct thread *td;
170	u_char pri;
171
172	ie = ithd->it_event;
173	td = ithd->it_thread;
174
175	/* Determine the overall priority of this event. */
176	if (TAILQ_EMPTY(&ie->ie_handlers))
177		pri = PRI_MAX_ITHD;
178	else
179		pri = TAILQ_FIRST(&ie->ie_handlers)->ih_pri;
180
181	/* Update name and priority. */
182	strlcpy(td->td_name, ie->ie_fullname, sizeof(td->td_name));
183#ifdef KTR
184	sched_clear_tdname(td);
185#endif
186	thread_lock(td);
187	sched_prio(td, pri);
188	thread_unlock(td);
189}
190
191/*
192 * Regenerate the full name of an interrupt event and update its priority.
193 */
194static void
195intr_event_update(struct intr_event *ie)
196{
197	struct intr_handler *ih;
198	char *last;
199	int missed, space;
200
201	/* Start off with no entropy and just the name of the event. */
202	mtx_assert(&ie->ie_lock, MA_OWNED);
203	strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
204	ie->ie_flags &= ~IE_ENTROPY;
205	missed = 0;
206	space = 1;
207
208	/* Run through all the handlers updating values. */
209	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
210		if (strlen(ie->ie_fullname) + strlen(ih->ih_name) + 1 <
211		    sizeof(ie->ie_fullname)) {
212			strcat(ie->ie_fullname, " ");
213			strcat(ie->ie_fullname, ih->ih_name);
214			space = 0;
215		} else
216			missed++;
217		if (ih->ih_flags & IH_ENTROPY)
218			ie->ie_flags |= IE_ENTROPY;
219	}
220
221	/*
222	 * If the handler names were too long, add +'s to indicate missing
223	 * names. If we run out of room and still have +'s to add, change
224	 * the last character from a + to a *.
225	 */
226	last = &ie->ie_fullname[sizeof(ie->ie_fullname) - 2];
227	while (missed-- > 0) {
228		if (strlen(ie->ie_fullname) + 1 == sizeof(ie->ie_fullname)) {
229			if (*last == '+') {
230				*last = '*';
231				break;
232			} else
233				*last = '+';
234		} else if (space) {
235			strcat(ie->ie_fullname, " +");
236			space = 0;
237		} else
238			strcat(ie->ie_fullname, "+");
239	}
240
241	/*
242	 * If this event has an ithread, update it's priority and
243	 * name.
244	 */
245	if (ie->ie_thread != NULL)
246		ithread_update(ie->ie_thread);
247	CTR2(KTR_INTR, "%s: updated %s", __func__, ie->ie_fullname);
248}
249
250int
251intr_event_create(struct intr_event **event, void *source, int flags, int irq,
252    void (*pre_ithread)(void *), void (*post_ithread)(void *),
253    void (*post_filter)(void *), int (*assign_cpu)(void *, u_char),
254    const char *fmt, ...)
255{
256	struct intr_event *ie;
257	va_list ap;
258
259	/* The only valid flag during creation is IE_SOFT. */
260	if ((flags & ~IE_SOFT) != 0)
261		return (EINVAL);
262	ie = malloc(sizeof(struct intr_event), M_ITHREAD, M_WAITOK | M_ZERO);
263	ie->ie_source = source;
264	ie->ie_pre_ithread = pre_ithread;
265	ie->ie_post_ithread = post_ithread;
266	ie->ie_post_filter = post_filter;
267	ie->ie_assign_cpu = assign_cpu;
268	ie->ie_flags = flags;
269	ie->ie_irq = irq;
270	ie->ie_cpu = NOCPU;
271	TAILQ_INIT(&ie->ie_handlers);
272	mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF);
273
274	va_start(ap, fmt);
275	vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap);
276	va_end(ap);
277	strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
278	mtx_lock(&event_lock);
279	TAILQ_INSERT_TAIL(&event_list, ie, ie_list);
280	mtx_unlock(&event_lock);
281	if (event != NULL)
282		*event = ie;
283	CTR2(KTR_INTR, "%s: created %s", __func__, ie->ie_name);
284	return (0);
285}
286
287/*
288 * Bind an interrupt event to the specified CPU.  Note that not all
289 * platforms support binding an interrupt to a CPU.  For those
290 * platforms this request will fail.  For supported platforms, any
291 * associated ithreads as well as the primary interrupt context will
292 * be bound to the specificed CPU.  Using a cpu id of NOCPU unbinds
293 * the interrupt event.
294 */
295int
296intr_event_bind(struct intr_event *ie, u_char cpu)
297{
298	cpuset_t mask;
299	lwpid_t id;
300	int error;
301
302	/* Need a CPU to bind to. */
303	if (cpu != NOCPU && CPU_ABSENT(cpu))
304		return (EINVAL);
305
306	if (ie->ie_assign_cpu == NULL)
307		return (EOPNOTSUPP);
308
309	error = priv_check(curthread, PRIV_SCHED_CPUSET_INTR);
310	if (error)
311		return (error);
312
313	/*
314	 * If we have any ithreads try to set their mask first to verify
315	 * permissions, etc.
316	 */
317	mtx_lock(&ie->ie_lock);
318	if (ie->ie_thread != NULL) {
319		CPU_ZERO(&mask);
320		if (cpu == NOCPU)
321			CPU_COPY(cpuset_root, &mask);
322		else
323			CPU_SET(cpu, &mask);
324		id = ie->ie_thread->it_thread->td_tid;
325		mtx_unlock(&ie->ie_lock);
326		error = cpuset_setthread(id, &mask);
327		if (error)
328			return (error);
329	} else
330		mtx_unlock(&ie->ie_lock);
331	error = ie->ie_assign_cpu(ie->ie_source, cpu);
332	if (error) {
333		mtx_lock(&ie->ie_lock);
334		if (ie->ie_thread != NULL) {
335			CPU_ZERO(&mask);
336			if (ie->ie_cpu == NOCPU)
337				CPU_COPY(cpuset_root, &mask);
338			else
339				CPU_SET(ie->ie_cpu, &mask);
340			id = ie->ie_thread->it_thread->td_tid;
341			mtx_unlock(&ie->ie_lock);
342			(void)cpuset_setthread(id, &mask);
343		} else
344			mtx_unlock(&ie->ie_lock);
345		return (error);
346	}
347
348	mtx_lock(&ie->ie_lock);
349	ie->ie_cpu = cpu;
350	mtx_unlock(&ie->ie_lock);
351
352	return (error);
353}
354
355static struct intr_event *
356intr_lookup(int irq)
357{
358	struct intr_event *ie;
359
360	mtx_lock(&event_lock);
361	TAILQ_FOREACH(ie, &event_list, ie_list)
362		if (ie->ie_irq == irq &&
363		    (ie->ie_flags & IE_SOFT) == 0 &&
364		    TAILQ_FIRST(&ie->ie_handlers) != NULL)
365			break;
366	mtx_unlock(&event_lock);
367	return (ie);
368}
369
370int
371intr_setaffinity(int irq, void *m)
372{
373	struct intr_event *ie;
374	cpuset_t *mask;
375	u_char cpu;
376	int n;
377
378	mask = m;
379	cpu = NOCPU;
380	/*
381	 * If we're setting all cpus we can unbind.  Otherwise make sure
382	 * only one cpu is in the set.
383	 */
384	if (CPU_CMP(cpuset_root, mask)) {
385		for (n = 0; n < CPU_SETSIZE; n++) {
386			if (!CPU_ISSET(n, mask))
387				continue;
388			if (cpu != NOCPU)
389				return (EINVAL);
390			cpu = (u_char)n;
391		}
392	}
393	ie = intr_lookup(irq);
394	if (ie == NULL)
395		return (ESRCH);
396	return (intr_event_bind(ie, cpu));
397}
398
399int
400intr_getaffinity(int irq, void *m)
401{
402	struct intr_event *ie;
403	cpuset_t *mask;
404
405	mask = m;
406	ie = intr_lookup(irq);
407	if (ie == NULL)
408		return (ESRCH);
409	CPU_ZERO(mask);
410	mtx_lock(&ie->ie_lock);
411	if (ie->ie_cpu == NOCPU)
412		CPU_COPY(cpuset_root, mask);
413	else
414		CPU_SET(ie->ie_cpu, mask);
415	mtx_unlock(&ie->ie_lock);
416	return (0);
417}
418
419int
420intr_event_destroy(struct intr_event *ie)
421{
422
423	mtx_lock(&event_lock);
424	mtx_lock(&ie->ie_lock);
425	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
426		mtx_unlock(&ie->ie_lock);
427		mtx_unlock(&event_lock);
428		return (EBUSY);
429	}
430	TAILQ_REMOVE(&event_list, ie, ie_list);
431#ifndef notyet
432	if (ie->ie_thread != NULL) {
433		ithread_destroy(ie->ie_thread);
434		ie->ie_thread = NULL;
435	}
436#endif
437	mtx_unlock(&ie->ie_lock);
438	mtx_unlock(&event_lock);
439	mtx_destroy(&ie->ie_lock);
440	free(ie, M_ITHREAD);
441	return (0);
442}
443
444#ifndef INTR_FILTER
445static struct intr_thread *
446ithread_create(const char *name)
447{
448	struct intr_thread *ithd;
449	struct thread *td;
450	int error;
451
452	ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO);
453
454	error = kproc_kthread_add(ithread_loop, ithd, &intrproc,
455		    &td, RFSTOPPED | RFHIGHPID,
456	    	    0, "intr", "%s", name);
457	if (error)
458		panic("kproc_create() failed with %d", error);
459	thread_lock(td);
460	sched_class(td, PRI_ITHD);
461	TD_SET_IWAIT(td);
462	thread_unlock(td);
463	td->td_pflags |= TDP_ITHREAD;
464	ithd->it_thread = td;
465	CTR2(KTR_INTR, "%s: created %s", __func__, name);
466	return (ithd);
467}
468#else
469static struct intr_thread *
470ithread_create(const char *name, struct intr_handler *ih)
471{
472	struct intr_thread *ithd;
473	struct thread *td;
474	int error;
475
476	ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO);
477
478	error = kproc_kthread_add(ithread_loop, ih, &intrproc,
479		    &td, RFSTOPPED | RFHIGHPID,
480	    	    0, "intr", "%s", name);
481	if (error)
482		panic("kproc_create() failed with %d", error);
483	thread_lock(td);
484	sched_class(td, PRI_ITHD);
485	TD_SET_IWAIT(td);
486	thread_unlock(td);
487	td->td_pflags |= TDP_ITHREAD;
488	ithd->it_thread = td;
489	CTR2(KTR_INTR, "%s: created %s", __func__, name);
490	return (ithd);
491}
492#endif
493
494static void
495ithread_destroy(struct intr_thread *ithread)
496{
497	struct thread *td;
498
499	CTR2(KTR_INTR, "%s: killing %s", __func__, ithread->it_event->ie_name);
500	td = ithread->it_thread;
501	thread_lock(td);
502	ithread->it_flags |= IT_DEAD;
503	if (TD_AWAITING_INTR(td)) {
504		TD_CLR_IWAIT(td);
505		sched_add(td, SRQ_INTR);
506	}
507	thread_unlock(td);
508}
509
510#ifndef INTR_FILTER
511int
512intr_event_add_handler(struct intr_event *ie, const char *name,
513    driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri,
514    enum intr_type flags, void **cookiep)
515{
516	struct intr_handler *ih, *temp_ih;
517	struct intr_thread *it;
518
519	if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
520		return (EINVAL);
521
522	/* Allocate and populate an interrupt handler structure. */
523	ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO);
524	ih->ih_filter = filter;
525	ih->ih_handler = handler;
526	ih->ih_argument = arg;
527	strlcpy(ih->ih_name, name, sizeof(ih->ih_name));
528	ih->ih_event = ie;
529	ih->ih_pri = pri;
530	if (flags & INTR_EXCL)
531		ih->ih_flags = IH_EXCLUSIVE;
532	if (flags & INTR_MPSAFE)
533		ih->ih_flags |= IH_MPSAFE;
534	if (flags & INTR_ENTROPY)
535		ih->ih_flags |= IH_ENTROPY;
536
537	/* We can only have one exclusive handler in a event. */
538	mtx_lock(&ie->ie_lock);
539	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
540		if ((flags & INTR_EXCL) ||
541		    (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
542			mtx_unlock(&ie->ie_lock);
543			free(ih, M_ITHREAD);
544			return (EINVAL);
545		}
546	}
547
548	/* Create a thread if we need one. */
549	while (ie->ie_thread == NULL && handler != NULL) {
550		if (ie->ie_flags & IE_ADDING_THREAD)
551			msleep(ie, &ie->ie_lock, 0, "ithread", 0);
552		else {
553			ie->ie_flags |= IE_ADDING_THREAD;
554			mtx_unlock(&ie->ie_lock);
555			it = ithread_create("intr: newborn");
556			mtx_lock(&ie->ie_lock);
557			ie->ie_flags &= ~IE_ADDING_THREAD;
558			ie->ie_thread = it;
559			it->it_event = ie;
560			ithread_update(it);
561			wakeup(ie);
562		}
563	}
564
565	/* Add the new handler to the event in priority order. */
566	TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
567		if (temp_ih->ih_pri > ih->ih_pri)
568			break;
569	}
570	if (temp_ih == NULL)
571		TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
572	else
573		TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
574	intr_event_update(ie);
575
576	CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
577	    ie->ie_name);
578	mtx_unlock(&ie->ie_lock);
579
580	if (cookiep != NULL)
581		*cookiep = ih;
582	return (0);
583}
584#else
585int
586intr_event_add_handler(struct intr_event *ie, const char *name,
587    driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri,
588    enum intr_type flags, void **cookiep)
589{
590	struct intr_handler *ih, *temp_ih;
591	struct intr_thread *it;
592
593	if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
594		return (EINVAL);
595
596	/* Allocate and populate an interrupt handler structure. */
597	ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO);
598	ih->ih_filter = filter;
599	ih->ih_handler = handler;
600	ih->ih_argument = arg;
601	strlcpy(ih->ih_name, name, sizeof(ih->ih_name));
602	ih->ih_event = ie;
603	ih->ih_pri = pri;
604	if (flags & INTR_EXCL)
605		ih->ih_flags = IH_EXCLUSIVE;
606	if (flags & INTR_MPSAFE)
607		ih->ih_flags |= IH_MPSAFE;
608	if (flags & INTR_ENTROPY)
609		ih->ih_flags |= IH_ENTROPY;
610
611	/* We can only have one exclusive handler in a event. */
612	mtx_lock(&ie->ie_lock);
613	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
614		if ((flags & INTR_EXCL) ||
615		    (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
616			mtx_unlock(&ie->ie_lock);
617			free(ih, M_ITHREAD);
618			return (EINVAL);
619		}
620	}
621
622	/* For filtered handlers, create a private ithread to run on. */
623	if (filter != NULL && handler != NULL) {
624		mtx_unlock(&ie->ie_lock);
625		it = ithread_create("intr: newborn", ih);
626		mtx_lock(&ie->ie_lock);
627		it->it_event = ie;
628		ih->ih_thread = it;
629		ithread_update(it); /* XXX - do we really need this?!?!? */
630	} else { /* Create the global per-event thread if we need one. */
631		while (ie->ie_thread == NULL && handler != NULL) {
632			if (ie->ie_flags & IE_ADDING_THREAD)
633				msleep(ie, &ie->ie_lock, 0, "ithread", 0);
634			else {
635				ie->ie_flags |= IE_ADDING_THREAD;
636				mtx_unlock(&ie->ie_lock);
637				it = ithread_create("intr: newborn", ih);
638				mtx_lock(&ie->ie_lock);
639				ie->ie_flags &= ~IE_ADDING_THREAD;
640				ie->ie_thread = it;
641				it->it_event = ie;
642				ithread_update(it);
643				wakeup(ie);
644			}
645		}
646	}
647
648	/* Add the new handler to the event in priority order. */
649	TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
650		if (temp_ih->ih_pri > ih->ih_pri)
651			break;
652	}
653	if (temp_ih == NULL)
654		TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
655	else
656		TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
657	intr_event_update(ie);
658
659	CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
660	    ie->ie_name);
661	mtx_unlock(&ie->ie_lock);
662
663	if (cookiep != NULL)
664		*cookiep = ih;
665	return (0);
666}
667#endif
668
669/*
670 * Append a description preceded by a ':' to the name of the specified
671 * interrupt handler.
672 */
673int
674intr_event_describe_handler(struct intr_event *ie, void *cookie,
675    const char *descr)
676{
677	struct intr_handler *ih;
678	size_t space;
679	char *start;
680
681	mtx_lock(&ie->ie_lock);
682#ifdef INVARIANTS
683	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
684		if (ih == cookie)
685			break;
686	}
687	if (ih == NULL) {
688		mtx_unlock(&ie->ie_lock);
689		panic("handler %p not found in interrupt event %p", cookie, ie);
690	}
691#endif
692	ih = cookie;
693
694	/*
695	 * Look for an existing description by checking for an
696	 * existing ":".  This assumes device names do not include
697	 * colons.  If one is found, prepare to insert the new
698	 * description at that point.  If one is not found, find the
699	 * end of the name to use as the insertion point.
700	 */
701	start = strchr(ih->ih_name, ':');
702	if (start == NULL)
703		start = strchr(ih->ih_name, 0);
704
705	/*
706	 * See if there is enough remaining room in the string for the
707	 * description + ":".  The "- 1" leaves room for the trailing
708	 * '\0'.  The "+ 1" accounts for the colon.
709	 */
710	space = sizeof(ih->ih_name) - (start - ih->ih_name) - 1;
711	if (strlen(descr) + 1 > space) {
712		mtx_unlock(&ie->ie_lock);
713		return (ENOSPC);
714	}
715
716	/* Append a colon followed by the description. */
717	*start = ':';
718	strcpy(start + 1, descr);
719	intr_event_update(ie);
720	mtx_unlock(&ie->ie_lock);
721	return (0);
722}
723
724/*
725 * Return the ie_source field from the intr_event an intr_handler is
726 * associated with.
727 */
728void *
729intr_handler_source(void *cookie)
730{
731	struct intr_handler *ih;
732	struct intr_event *ie;
733
734	ih = (struct intr_handler *)cookie;
735	if (ih == NULL)
736		return (NULL);
737	ie = ih->ih_event;
738	KASSERT(ie != NULL,
739	    ("interrupt handler \"%s\" has a NULL interrupt event",
740	    ih->ih_name));
741	return (ie->ie_source);
742}
743
744/*
745 * Sleep until an ithread finishes executing an interrupt handler.
746 *
747 * XXX Doesn't currently handle interrupt filters or fast interrupt
748 * handlers.  This is intended for compatibility with linux drivers
749 * only.  Do not use in BSD code.
750 */
751void
752_intr_drain(int irq)
753{
754	struct intr_event *ie;
755	struct intr_thread *ithd;
756	struct thread *td;
757
758	ie = intr_lookup(irq);
759	if (ie == NULL)
760		return;
761	if (ie->ie_thread == NULL)
762		return;
763	ithd = ie->ie_thread;
764	td = ithd->it_thread;
765	/*
766	 * We set the flag and wait for it to be cleared to avoid
767	 * long delays with potentially busy interrupt handlers
768	 * were we to only sample TD_AWAITING_INTR() every tick.
769	 */
770	thread_lock(td);
771	if (!TD_AWAITING_INTR(td)) {
772		ithd->it_flags |= IT_WAIT;
773		while (ithd->it_flags & IT_WAIT) {
774			thread_unlock(td);
775			pause("idrain", 1);
776			thread_lock(td);
777		}
778	}
779	thread_unlock(td);
780	return;
781}
782
783
784#ifndef INTR_FILTER
785int
786intr_event_remove_handler(void *cookie)
787{
788	struct intr_handler *handler = (struct intr_handler *)cookie;
789	struct intr_event *ie;
790#ifdef INVARIANTS
791	struct intr_handler *ih;
792#endif
793#ifdef notyet
794	int dead;
795#endif
796
797	if (handler == NULL)
798		return (EINVAL);
799	ie = handler->ih_event;
800	KASSERT(ie != NULL,
801	    ("interrupt handler \"%s\" has a NULL interrupt event",
802	    handler->ih_name));
803	mtx_lock(&ie->ie_lock);
804	CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
805	    ie->ie_name);
806#ifdef INVARIANTS
807	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
808		if (ih == handler)
809			goto ok;
810	mtx_unlock(&ie->ie_lock);
811	panic("interrupt handler \"%s\" not found in interrupt event \"%s\"",
812	    ih->ih_name, ie->ie_name);
813ok:
814#endif
815	/*
816	 * If there is no ithread, then just remove the handler and return.
817	 * XXX: Note that an INTR_FAST handler might be running on another
818	 * CPU!
819	 */
820	if (ie->ie_thread == NULL) {
821		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
822		mtx_unlock(&ie->ie_lock);
823		free(handler, M_ITHREAD);
824		return (0);
825	}
826
827	/*
828	 * If the interrupt thread is already running, then just mark this
829	 * handler as being dead and let the ithread do the actual removal.
830	 *
831	 * During a cold boot while cold is set, msleep() does not sleep,
832	 * so we have to remove the handler here rather than letting the
833	 * thread do it.
834	 */
835	thread_lock(ie->ie_thread->it_thread);
836	if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) {
837		handler->ih_flags |= IH_DEAD;
838
839		/*
840		 * Ensure that the thread will process the handler list
841		 * again and remove this handler if it has already passed
842		 * it on the list.
843		 */
844		atomic_store_rel_int(&ie->ie_thread->it_need, 1);
845	} else
846		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
847	thread_unlock(ie->ie_thread->it_thread);
848	while (handler->ih_flags & IH_DEAD)
849		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
850	intr_event_update(ie);
851#ifdef notyet
852	/*
853	 * XXX: This could be bad in the case of ppbus(8).  Also, I think
854	 * this could lead to races of stale data when servicing an
855	 * interrupt.
856	 */
857	dead = 1;
858	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
859		if (!(ih->ih_flags & IH_FAST)) {
860			dead = 0;
861			break;
862		}
863	}
864	if (dead) {
865		ithread_destroy(ie->ie_thread);
866		ie->ie_thread = NULL;
867	}
868#endif
869	mtx_unlock(&ie->ie_lock);
870	free(handler, M_ITHREAD);
871	return (0);
872}
873
874static int
875intr_event_schedule_thread(struct intr_event *ie)
876{
877	struct intr_entropy entropy;
878	struct intr_thread *it;
879	struct thread *td;
880	struct thread *ctd;
881	struct proc *p;
882
883	/*
884	 * If no ithread or no handlers, then we have a stray interrupt.
885	 */
886	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) ||
887	    ie->ie_thread == NULL)
888		return (EINVAL);
889
890	ctd = curthread;
891	it = ie->ie_thread;
892	td = it->it_thread;
893	p = td->td_proc;
894
895	/*
896	 * If any of the handlers for this ithread claim to be good
897	 * sources of entropy, then gather some.
898	 */
899	if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) {
900		CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__,
901		    p->p_pid, td->td_name);
902		entropy.event = (uintptr_t)ie;
903		entropy.td = ctd;
904		random_harvest(&entropy, sizeof(entropy), 2,
905		    RANDOM_INTERRUPT);
906	}
907
908	KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name));
909
910	/*
911	 * Set it_need to tell the thread to keep running if it is already
912	 * running.  Then, lock the thread and see if we actually need to
913	 * put it on the runqueue.
914	 */
915	atomic_store_rel_int(&it->it_need, 1);
916	thread_lock(td);
917	if (TD_AWAITING_INTR(td)) {
918		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
919		    td->td_name);
920		TD_CLR_IWAIT(td);
921		sched_add(td, SRQ_INTR);
922	} else {
923		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
924		    __func__, p->p_pid, td->td_name, it->it_need, td->td_state);
925	}
926	thread_unlock(td);
927
928	return (0);
929}
930#else
931int
932intr_event_remove_handler(void *cookie)
933{
934	struct intr_handler *handler = (struct intr_handler *)cookie;
935	struct intr_event *ie;
936	struct intr_thread *it;
937#ifdef INVARIANTS
938	struct intr_handler *ih;
939#endif
940#ifdef notyet
941	int dead;
942#endif
943
944	if (handler == NULL)
945		return (EINVAL);
946	ie = handler->ih_event;
947	KASSERT(ie != NULL,
948	    ("interrupt handler \"%s\" has a NULL interrupt event",
949	    handler->ih_name));
950	mtx_lock(&ie->ie_lock);
951	CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
952	    ie->ie_name);
953#ifdef INVARIANTS
954	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
955		if (ih == handler)
956			goto ok;
957	mtx_unlock(&ie->ie_lock);
958	panic("interrupt handler \"%s\" not found in interrupt event \"%s\"",
959	    ih->ih_name, ie->ie_name);
960ok:
961#endif
962	/*
963	 * If there are no ithreads (per event and per handler), then
964	 * just remove the handler and return.
965	 * XXX: Note that an INTR_FAST handler might be running on another CPU!
966	 */
967	if (ie->ie_thread == NULL && handler->ih_thread == NULL) {
968		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
969		mtx_unlock(&ie->ie_lock);
970		free(handler, M_ITHREAD);
971		return (0);
972	}
973
974	/* Private or global ithread? */
975	it = (handler->ih_thread) ? handler->ih_thread : ie->ie_thread;
976	/*
977	 * If the interrupt thread is already running, then just mark this
978	 * handler as being dead and let the ithread do the actual removal.
979	 *
980	 * During a cold boot while cold is set, msleep() does not sleep,
981	 * so we have to remove the handler here rather than letting the
982	 * thread do it.
983	 */
984	thread_lock(it->it_thread);
985	if (!TD_AWAITING_INTR(it->it_thread) && !cold) {
986		handler->ih_flags |= IH_DEAD;
987
988		/*
989		 * Ensure that the thread will process the handler list
990		 * again and remove this handler if it has already passed
991		 * it on the list.
992		 */
993		atomic_store_rel_int(&it->it_need, 1);
994	} else
995		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
996	thread_unlock(it->it_thread);
997	while (handler->ih_flags & IH_DEAD)
998		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
999	/*
1000	 * At this point, the handler has been disconnected from the event,
1001	 * so we can kill the private ithread if any.
1002	 */
1003	if (handler->ih_thread) {
1004		ithread_destroy(handler->ih_thread);
1005		handler->ih_thread = NULL;
1006	}
1007	intr_event_update(ie);
1008#ifdef notyet
1009	/*
1010	 * XXX: This could be bad in the case of ppbus(8).  Also, I think
1011	 * this could lead to races of stale data when servicing an
1012	 * interrupt.
1013	 */
1014	dead = 1;
1015	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
1016		if (handler != NULL) {
1017			dead = 0;
1018			break;
1019		}
1020	}
1021	if (dead) {
1022		ithread_destroy(ie->ie_thread);
1023		ie->ie_thread = NULL;
1024	}
1025#endif
1026	mtx_unlock(&ie->ie_lock);
1027	free(handler, M_ITHREAD);
1028	return (0);
1029}
1030
1031static int
1032intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it)
1033{
1034	struct intr_entropy entropy;
1035	struct thread *td;
1036	struct thread *ctd;
1037	struct proc *p;
1038
1039	/*
1040	 * If no ithread or no handlers, then we have a stray interrupt.
1041	 */
1042	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) || it == NULL)
1043		return (EINVAL);
1044
1045	ctd = curthread;
1046	td = it->it_thread;
1047	p = td->td_proc;
1048
1049	/*
1050	 * If any of the handlers for this ithread claim to be good
1051	 * sources of entropy, then gather some.
1052	 */
1053	if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) {
1054		CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__,
1055		    p->p_pid, td->td_name);
1056		entropy.event = (uintptr_t)ie;
1057		entropy.td = ctd;
1058		random_harvest(&entropy, sizeof(entropy), 2,
1059		    RANDOM_INTERRUPT);
1060	}
1061
1062	KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name));
1063
1064	/*
1065	 * Set it_need to tell the thread to keep running if it is already
1066	 * running.  Then, lock the thread and see if we actually need to
1067	 * put it on the runqueue.
1068	 */
1069	atomic_store_rel_int(&it->it_need, 1);
1070	thread_lock(td);
1071	if (TD_AWAITING_INTR(td)) {
1072		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
1073		    td->td_name);
1074		TD_CLR_IWAIT(td);
1075		sched_add(td, SRQ_INTR);
1076	} else {
1077		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
1078		    __func__, p->p_pid, td->td_name, it->it_need, td->td_state);
1079	}
1080	thread_unlock(td);
1081
1082	return (0);
1083}
1084#endif
1085
1086/*
1087 * Allow interrupt event binding for software interrupt handlers -- a no-op,
1088 * since interrupts are generated in software rather than being directed by
1089 * a PIC.
1090 */
1091static int
1092swi_assign_cpu(void *arg, u_char cpu)
1093{
1094
1095	return (0);
1096}
1097
1098/*
1099 * Add a software interrupt handler to a specified event.  If a given event
1100 * is not specified, then a new event is created.
1101 */
1102int
1103swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
1104	    void *arg, int pri, enum intr_type flags, void **cookiep)
1105{
1106	struct intr_event *ie;
1107	int error;
1108
1109	if (flags & INTR_ENTROPY)
1110		return (EINVAL);
1111
1112	ie = (eventp != NULL) ? *eventp : NULL;
1113
1114	if (ie != NULL) {
1115		if (!(ie->ie_flags & IE_SOFT))
1116			return (EINVAL);
1117	} else {
1118		error = intr_event_create(&ie, NULL, IE_SOFT, 0,
1119		    NULL, NULL, NULL, swi_assign_cpu, "swi%d:", pri);
1120		if (error)
1121			return (error);
1122		if (eventp != NULL)
1123			*eventp = ie;
1124	}
1125	error = intr_event_add_handler(ie, name, NULL, handler, arg,
1126	    PI_SWI(pri), flags, cookiep);
1127	return (error);
1128}
1129
1130/*
1131 * Schedule a software interrupt thread.
1132 */
1133void
1134swi_sched(void *cookie, int flags)
1135{
1136	struct intr_handler *ih = (struct intr_handler *)cookie;
1137	struct intr_event *ie = ih->ih_event;
1138	struct intr_entropy entropy;
1139	int error;
1140
1141	CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name,
1142	    ih->ih_need);
1143
1144	if (harvest.swi) {
1145		CTR2(KTR_INTR, "swi_sched: pid %d (%s) gathering entropy",
1146		    curproc->p_pid, curthread->td_name);
1147		entropy.event = (uintptr_t)ih;
1148		entropy.td = curthread;
1149		random_harvest(&entropy, sizeof(entropy), 1,
1150		    RANDOM_SWI);
1151	}
1152
1153	/*
1154	 * Set ih_need for this handler so that if the ithread is already
1155	 * running it will execute this handler on the next pass.  Otherwise,
1156	 * it will execute it the next time it runs.
1157	 */
1158	atomic_store_rel_int(&ih->ih_need, 1);
1159
1160	if (!(flags & SWI_DELAY)) {
1161		PCPU_INC(cnt.v_soft);
1162#ifdef INTR_FILTER
1163		error = intr_event_schedule_thread(ie, ie->ie_thread);
1164#else
1165		error = intr_event_schedule_thread(ie);
1166#endif
1167		KASSERT(error == 0, ("stray software interrupt"));
1168	}
1169}
1170
1171/*
1172 * Remove a software interrupt handler.  Currently this code does not
1173 * remove the associated interrupt event if it becomes empty.  Calling code
1174 * may do so manually via intr_event_destroy(), but that's not really
1175 * an optimal interface.
1176 */
1177int
1178swi_remove(void *cookie)
1179{
1180
1181	return (intr_event_remove_handler(cookie));
1182}
1183
1184#ifdef INTR_FILTER
1185static void
1186priv_ithread_execute_handler(struct proc *p, struct intr_handler *ih)
1187{
1188	struct intr_event *ie;
1189
1190	ie = ih->ih_event;
1191	/*
1192	 * If this handler is marked for death, remove it from
1193	 * the list of handlers and wake up the sleeper.
1194	 */
1195	if (ih->ih_flags & IH_DEAD) {
1196		mtx_lock(&ie->ie_lock);
1197		TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next);
1198		ih->ih_flags &= ~IH_DEAD;
1199		wakeup(ih);
1200		mtx_unlock(&ie->ie_lock);
1201		return;
1202	}
1203
1204	/* Execute this handler. */
1205	CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x",
1206	     __func__, p->p_pid, (void *)ih->ih_handler, ih->ih_argument,
1207	     ih->ih_name, ih->ih_flags);
1208
1209	if (!(ih->ih_flags & IH_MPSAFE))
1210		mtx_lock(&Giant);
1211	ih->ih_handler(ih->ih_argument);
1212	if (!(ih->ih_flags & IH_MPSAFE))
1213		mtx_unlock(&Giant);
1214}
1215#endif
1216
1217/*
1218 * This is a public function for use by drivers that mux interrupt
1219 * handlers for child devices from their interrupt handler.
1220 */
1221void
1222intr_event_execute_handlers(struct proc *p, struct intr_event *ie)
1223{
1224	struct intr_handler *ih, *ihn;
1225
1226	TAILQ_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) {
1227		/*
1228		 * If this handler is marked for death, remove it from
1229		 * the list of handlers and wake up the sleeper.
1230		 */
1231		if (ih->ih_flags & IH_DEAD) {
1232			mtx_lock(&ie->ie_lock);
1233			TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next);
1234			ih->ih_flags &= ~IH_DEAD;
1235			wakeup(ih);
1236			mtx_unlock(&ie->ie_lock);
1237			continue;
1238		}
1239
1240		/* Skip filter only handlers */
1241		if (ih->ih_handler == NULL)
1242			continue;
1243
1244		/*
1245		 * For software interrupt threads, we only execute
1246		 * handlers that have their need flag set.  Hardware
1247		 * interrupt threads always invoke all of their handlers.
1248		 */
1249		if (ie->ie_flags & IE_SOFT) {
1250			if (atomic_load_acq_int(&ih->ih_need) == 0)
1251				continue;
1252			else
1253				atomic_store_rel_int(&ih->ih_need, 0);
1254		}
1255
1256		/* Execute this handler. */
1257		CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x",
1258		    __func__, p->p_pid, (void *)ih->ih_handler,
1259		    ih->ih_argument, ih->ih_name, ih->ih_flags);
1260
1261		if (!(ih->ih_flags & IH_MPSAFE))
1262			mtx_lock(&Giant);
1263		ih->ih_handler(ih->ih_argument);
1264		if (!(ih->ih_flags & IH_MPSAFE))
1265			mtx_unlock(&Giant);
1266	}
1267}
1268
1269static void
1270ithread_execute_handlers(struct proc *p, struct intr_event *ie)
1271{
1272
1273	/* Interrupt handlers should not sleep. */
1274	if (!(ie->ie_flags & IE_SOFT))
1275		THREAD_NO_SLEEPING();
1276	intr_event_execute_handlers(p, ie);
1277	if (!(ie->ie_flags & IE_SOFT))
1278		THREAD_SLEEPING_OK();
1279
1280	/*
1281	 * Interrupt storm handling:
1282	 *
1283	 * If this interrupt source is currently storming, then throttle
1284	 * it to only fire the handler once  per clock tick.
1285	 *
1286	 * If this interrupt source is not currently storming, but the
1287	 * number of back to back interrupts exceeds the storm threshold,
1288	 * then enter storming mode.
1289	 */
1290	if (intr_storm_threshold != 0 && ie->ie_count >= intr_storm_threshold &&
1291	    !(ie->ie_flags & IE_SOFT)) {
1292		/* Report the message only once every second. */
1293		if (ppsratecheck(&ie->ie_warntm, &ie->ie_warncnt, 1)) {
1294			printf(
1295	"interrupt storm detected on \"%s\"; throttling interrupt source\n",
1296			    ie->ie_name);
1297		}
1298		pause("istorm", 1);
1299	} else
1300		ie->ie_count++;
1301
1302	/*
1303	 * Now that all the handlers have had a chance to run, reenable
1304	 * the interrupt source.
1305	 */
1306	if (ie->ie_post_ithread != NULL)
1307		ie->ie_post_ithread(ie->ie_source);
1308}
1309
1310#ifndef INTR_FILTER
1311/*
1312 * This is the main code for interrupt threads.
1313 */
1314static void
1315ithread_loop(void *arg)
1316{
1317	struct intr_thread *ithd;
1318	struct intr_event *ie;
1319	struct thread *td;
1320	struct proc *p;
1321	int wake;
1322
1323	td = curthread;
1324	p = td->td_proc;
1325	ithd = (struct intr_thread *)arg;
1326	KASSERT(ithd->it_thread == td,
1327	    ("%s: ithread and proc linkage out of sync", __func__));
1328	ie = ithd->it_event;
1329	ie->ie_count = 0;
1330	wake = 0;
1331
1332	/*
1333	 * As long as we have interrupts outstanding, go through the
1334	 * list of handlers, giving each one a go at it.
1335	 */
1336	for (;;) {
1337		/*
1338		 * If we are an orphaned thread, then just die.
1339		 */
1340		if (ithd->it_flags & IT_DEAD) {
1341			CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__,
1342			    p->p_pid, td->td_name);
1343			free(ithd, M_ITHREAD);
1344			kthread_exit();
1345		}
1346
1347		/*
1348		 * Service interrupts.  If another interrupt arrives while
1349		 * we are running, it will set it_need to note that we
1350		 * should make another pass.
1351		 */
1352		while (atomic_load_acq_int(&ithd->it_need) != 0) {
1353			/*
1354			 * This might need a full read and write barrier
1355			 * to make sure that this write posts before any
1356			 * of the memory or device accesses in the
1357			 * handlers.
1358			 */
1359			atomic_store_rel_int(&ithd->it_need, 0);
1360			ithread_execute_handlers(p, ie);
1361		}
1362		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
1363		mtx_assert(&Giant, MA_NOTOWNED);
1364
1365		/*
1366		 * Processed all our interrupts.  Now get the sched
1367		 * lock.  This may take a while and it_need may get
1368		 * set again, so we have to check it again.
1369		 */
1370		thread_lock(td);
1371		if ((atomic_load_acq_int(&ithd->it_need) == 0) &&
1372		    !(ithd->it_flags & (IT_DEAD | IT_WAIT))) {
1373			TD_SET_IWAIT(td);
1374			ie->ie_count = 0;
1375			mi_switch(SW_VOL | SWT_IWAIT, NULL);
1376		}
1377		if (ithd->it_flags & IT_WAIT) {
1378			wake = 1;
1379			ithd->it_flags &= ~IT_WAIT;
1380		}
1381		thread_unlock(td);
1382		if (wake) {
1383			wakeup(ithd);
1384			wake = 0;
1385		}
1386	}
1387}
1388
1389/*
1390 * Main interrupt handling body.
1391 *
1392 * Input:
1393 * o ie:                        the event connected to this interrupt.
1394 * o frame:                     some archs (i.e. i386) pass a frame to some.
1395 *                              handlers as their main argument.
1396 * Return value:
1397 * o 0:                         everything ok.
1398 * o EINVAL:                    stray interrupt.
1399 */
1400int
1401intr_event_handle(struct intr_event *ie, struct trapframe *frame)
1402{
1403	struct intr_handler *ih;
1404	struct trapframe *oldframe;
1405	struct thread *td;
1406	int error, ret, thread;
1407
1408	td = curthread;
1409
1410	/* An interrupt with no event or handlers is a stray interrupt. */
1411	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
1412		return (EINVAL);
1413
1414	/*
1415	 * Execute fast interrupt handlers directly.
1416	 * To support clock handlers, if a handler registers
1417	 * with a NULL argument, then we pass it a pointer to
1418	 * a trapframe as its argument.
1419	 */
1420	td->td_intr_nesting_level++;
1421	thread = 0;
1422	ret = 0;
1423	critical_enter();
1424	oldframe = td->td_intr_frame;
1425	td->td_intr_frame = frame;
1426	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
1427		if (ih->ih_filter == NULL) {
1428			thread = 1;
1429			continue;
1430		}
1431		CTR4(KTR_INTR, "%s: exec %p(%p) for %s", __func__,
1432		    ih->ih_filter, ih->ih_argument == NULL ? frame :
1433		    ih->ih_argument, ih->ih_name);
1434		if (ih->ih_argument == NULL)
1435			ret = ih->ih_filter(frame);
1436		else
1437			ret = ih->ih_filter(ih->ih_argument);
1438		KASSERT(ret == FILTER_STRAY ||
1439		    ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 &&
1440		    (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0),
1441		    ("%s: incorrect return value %#x from %s", __func__, ret,
1442		    ih->ih_name));
1443
1444		/*
1445		 * Wrapper handler special handling:
1446		 *
1447		 * in some particular cases (like pccard and pccbb),
1448		 * the _real_ device handler is wrapped in a couple of
1449		 * functions - a filter wrapper and an ithread wrapper.
1450		 * In this case (and just in this case), the filter wrapper
1451		 * could ask the system to schedule the ithread and mask
1452		 * the interrupt source if the wrapped handler is composed
1453		 * of just an ithread handler.
1454		 *
1455		 * TODO: write a generic wrapper to avoid people rolling
1456		 * their own
1457		 */
1458		if (!thread) {
1459			if (ret == FILTER_SCHEDULE_THREAD)
1460				thread = 1;
1461		}
1462	}
1463	td->td_intr_frame = oldframe;
1464
1465	if (thread) {
1466		if (ie->ie_pre_ithread != NULL)
1467			ie->ie_pre_ithread(ie->ie_source);
1468	} else {
1469		if (ie->ie_post_filter != NULL)
1470			ie->ie_post_filter(ie->ie_source);
1471	}
1472
1473	/* Schedule the ithread if needed. */
1474	if (thread) {
1475		error = intr_event_schedule_thread(ie);
1476#ifndef XEN
1477		KASSERT(error == 0, ("bad stray interrupt"));
1478#else
1479		if (error != 0)
1480			log(LOG_WARNING, "bad stray interrupt");
1481#endif
1482	}
1483	critical_exit();
1484	td->td_intr_nesting_level--;
1485	return (0);
1486}
1487#else
1488/*
1489 * This is the main code for interrupt threads.
1490 */
1491static void
1492ithread_loop(void *arg)
1493{
1494	struct intr_thread *ithd;
1495	struct intr_handler *ih;
1496	struct intr_event *ie;
1497	struct thread *td;
1498	struct proc *p;
1499	int priv;
1500	int wake;
1501
1502	td = curthread;
1503	p = td->td_proc;
1504	ih = (struct intr_handler *)arg;
1505	priv = (ih->ih_thread != NULL) ? 1 : 0;
1506	ithd = (priv) ? ih->ih_thread : ih->ih_event->ie_thread;
1507	KASSERT(ithd->it_thread == td,
1508	    ("%s: ithread and proc linkage out of sync", __func__));
1509	ie = ithd->it_event;
1510	ie->ie_count = 0;
1511	wake = 0;
1512
1513	/*
1514	 * As long as we have interrupts outstanding, go through the
1515	 * list of handlers, giving each one a go at it.
1516	 */
1517	for (;;) {
1518		/*
1519		 * If we are an orphaned thread, then just die.
1520		 */
1521		if (ithd->it_flags & IT_DEAD) {
1522			CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__,
1523			    p->p_pid, td->td_name);
1524			free(ithd, M_ITHREAD);
1525			kthread_exit();
1526		}
1527
1528		/*
1529		 * Service interrupts.  If another interrupt arrives while
1530		 * we are running, it will set it_need to note that we
1531		 * should make another pass.
1532		 */
1533		while (atomic_load_acq_int(&ithd->it_need) != 0) {
1534			/*
1535			 * This might need a full read and write barrier
1536			 * to make sure that this write posts before any
1537			 * of the memory or device accesses in the
1538			 * handlers.
1539			 */
1540			atomic_store_rel_int(&ithd->it_need, 0);
1541			if (priv)
1542				priv_ithread_execute_handler(p, ih);
1543			else
1544				ithread_execute_handlers(p, ie);
1545		}
1546		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
1547		mtx_assert(&Giant, MA_NOTOWNED);
1548
1549		/*
1550		 * Processed all our interrupts.  Now get the sched
1551		 * lock.  This may take a while and it_need may get
1552		 * set again, so we have to check it again.
1553		 */
1554		thread_lock(td);
1555		if ((atomic_load_acq_int(&ithd->it_need) == 0) &&
1556		    !(ithd->it_flags & (IT_DEAD | IT_WAIT))) {
1557			TD_SET_IWAIT(td);
1558			ie->ie_count = 0;
1559			mi_switch(SW_VOL | SWT_IWAIT, NULL);
1560		}
1561		if (ithd->it_flags & IT_WAIT) {
1562			wake = 1;
1563			ithd->it_flags &= ~IT_WAIT;
1564		}
1565		thread_unlock(td);
1566		if (wake) {
1567			wakeup(ithd);
1568			wake = 0;
1569		}
1570	}
1571}
1572
1573/*
1574 * Main loop for interrupt filter.
1575 *
1576 * Some architectures (i386, amd64 and arm) require the optional frame
1577 * parameter, and use it as the main argument for fast handler execution
1578 * when ih_argument == NULL.
1579 *
1580 * Return value:
1581 * o FILTER_STRAY:              No filter recognized the event, and no
1582 *                              filter-less handler is registered on this
1583 *                              line.
1584 * o FILTER_HANDLED:            A filter claimed the event and served it.
1585 * o FILTER_SCHEDULE_THREAD:    No filter claimed the event, but there's at
1586 *                              least one filter-less handler on this line.
1587 * o FILTER_HANDLED |
1588 *   FILTER_SCHEDULE_THREAD:    A filter claimed the event, and asked for
1589 *                              scheduling the per-handler ithread.
1590 *
1591 * In case an ithread has to be scheduled, in *ithd there will be a
1592 * pointer to a struct intr_thread containing the thread to be
1593 * scheduled.
1594 */
1595
1596static int
1597intr_filter_loop(struct intr_event *ie, struct trapframe *frame,
1598		 struct intr_thread **ithd)
1599{
1600	struct intr_handler *ih;
1601	void *arg;
1602	int ret, thread_only;
1603
1604	ret = 0;
1605	thread_only = 0;
1606	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
1607		/*
1608		 * Execute fast interrupt handlers directly.
1609		 * To support clock handlers, if a handler registers
1610		 * with a NULL argument, then we pass it a pointer to
1611		 * a trapframe as its argument.
1612		 */
1613		arg = ((ih->ih_argument == NULL) ? frame : ih->ih_argument);
1614
1615		CTR5(KTR_INTR, "%s: exec %p/%p(%p) for %s", __func__,
1616		     ih->ih_filter, ih->ih_handler, arg, ih->ih_name);
1617
1618		if (ih->ih_filter != NULL)
1619			ret = ih->ih_filter(arg);
1620		else {
1621			thread_only = 1;
1622			continue;
1623		}
1624		KASSERT(ret == FILTER_STRAY ||
1625		    ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 &&
1626		    (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0),
1627		    ("%s: incorrect return value %#x from %s", __func__, ret,
1628		    ih->ih_name));
1629		if (ret & FILTER_STRAY)
1630			continue;
1631		else {
1632			*ithd = ih->ih_thread;
1633			return (ret);
1634		}
1635	}
1636
1637	/*
1638	 * No filters handled the interrupt and we have at least
1639	 * one handler without a filter.  In this case, we schedule
1640	 * all of the filter-less handlers to run in the ithread.
1641	 */
1642	if (thread_only) {
1643		*ithd = ie->ie_thread;
1644		return (FILTER_SCHEDULE_THREAD);
1645	}
1646	return (FILTER_STRAY);
1647}
1648
1649/*
1650 * Main interrupt handling body.
1651 *
1652 * Input:
1653 * o ie:                        the event connected to this interrupt.
1654 * o frame:                     some archs (i.e. i386) pass a frame to some.
1655 *                              handlers as their main argument.
1656 * Return value:
1657 * o 0:                         everything ok.
1658 * o EINVAL:                    stray interrupt.
1659 */
1660int
1661intr_event_handle(struct intr_event *ie, struct trapframe *frame)
1662{
1663	struct intr_thread *ithd;
1664	struct trapframe *oldframe;
1665	struct thread *td;
1666	int thread;
1667
1668	ithd = NULL;
1669	td = curthread;
1670
1671	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
1672		return (EINVAL);
1673
1674	td->td_intr_nesting_level++;
1675	thread = 0;
1676	critical_enter();
1677	oldframe = td->td_intr_frame;
1678	td->td_intr_frame = frame;
1679	thread = intr_filter_loop(ie, frame, &ithd);
1680	if (thread & FILTER_HANDLED) {
1681		if (ie->ie_post_filter != NULL)
1682			ie->ie_post_filter(ie->ie_source);
1683	} else {
1684		if (ie->ie_pre_ithread != NULL)
1685			ie->ie_pre_ithread(ie->ie_source);
1686	}
1687	td->td_intr_frame = oldframe;
1688	critical_exit();
1689
1690	/* Interrupt storm logic */
1691	if (thread & FILTER_STRAY) {
1692		ie->ie_count++;
1693		if (ie->ie_count < intr_storm_threshold)
1694			printf("Interrupt stray detection not present\n");
1695	}
1696
1697	/* Schedule an ithread if needed. */
1698	if (thread & FILTER_SCHEDULE_THREAD) {
1699		if (intr_event_schedule_thread(ie, ithd) != 0)
1700			panic("%s: impossible stray interrupt", __func__);
1701	}
1702	td->td_intr_nesting_level--;
1703	return (0);
1704}
1705#endif
1706
1707#ifdef DDB
1708/*
1709 * Dump details about an interrupt handler
1710 */
1711static void
1712db_dump_intrhand(struct intr_handler *ih)
1713{
1714	int comma;
1715
1716	db_printf("\t%-10s ", ih->ih_name);
1717	switch (ih->ih_pri) {
1718	case PI_REALTIME:
1719		db_printf("CLK ");
1720		break;
1721	case PI_AV:
1722		db_printf("AV  ");
1723		break;
1724	case PI_TTY:
1725		db_printf("TTY ");
1726		break;
1727	case PI_NET:
1728		db_printf("NET ");
1729		break;
1730	case PI_DISK:
1731		db_printf("DISK");
1732		break;
1733	case PI_DULL:
1734		db_printf("DULL");
1735		break;
1736	default:
1737		if (ih->ih_pri >= PI_SOFT)
1738			db_printf("SWI ");
1739		else
1740			db_printf("%4u", ih->ih_pri);
1741		break;
1742	}
1743	db_printf(" ");
1744	if (ih->ih_filter != NULL) {
1745		db_printf("[F]");
1746		db_printsym((uintptr_t)ih->ih_filter, DB_STGY_PROC);
1747	}
1748	if (ih->ih_handler != NULL) {
1749		if (ih->ih_filter != NULL)
1750			db_printf(",");
1751		db_printf("[H]");
1752		db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC);
1753	}
1754	db_printf("(%p)", ih->ih_argument);
1755	if (ih->ih_need ||
1756	    (ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_DEAD |
1757	    IH_MPSAFE)) != 0) {
1758		db_printf(" {");
1759		comma = 0;
1760		if (ih->ih_flags & IH_EXCLUSIVE) {
1761			if (comma)
1762				db_printf(", ");
1763			db_printf("EXCL");
1764			comma = 1;
1765		}
1766		if (ih->ih_flags & IH_ENTROPY) {
1767			if (comma)
1768				db_printf(", ");
1769			db_printf("ENTROPY");
1770			comma = 1;
1771		}
1772		if (ih->ih_flags & IH_DEAD) {
1773			if (comma)
1774				db_printf(", ");
1775			db_printf("DEAD");
1776			comma = 1;
1777		}
1778		if (ih->ih_flags & IH_MPSAFE) {
1779			if (comma)
1780				db_printf(", ");
1781			db_printf("MPSAFE");
1782			comma = 1;
1783		}
1784		if (ih->ih_need) {
1785			if (comma)
1786				db_printf(", ");
1787			db_printf("NEED");
1788		}
1789		db_printf("}");
1790	}
1791	db_printf("\n");
1792}
1793
1794/*
1795 * Dump details about a event.
1796 */
1797void
1798db_dump_intr_event(struct intr_event *ie, int handlers)
1799{
1800	struct intr_handler *ih;
1801	struct intr_thread *it;
1802	int comma;
1803
1804	db_printf("%s ", ie->ie_fullname);
1805	it = ie->ie_thread;
1806	if (it != NULL)
1807		db_printf("(pid %d)", it->it_thread->td_proc->p_pid);
1808	else
1809		db_printf("(no thread)");
1810	if ((ie->ie_flags & (IE_SOFT | IE_ENTROPY | IE_ADDING_THREAD)) != 0 ||
1811	    (it != NULL && it->it_need)) {
1812		db_printf(" {");
1813		comma = 0;
1814		if (ie->ie_flags & IE_SOFT) {
1815			db_printf("SOFT");
1816			comma = 1;
1817		}
1818		if (ie->ie_flags & IE_ENTROPY) {
1819			if (comma)
1820				db_printf(", ");
1821			db_printf("ENTROPY");
1822			comma = 1;
1823		}
1824		if (ie->ie_flags & IE_ADDING_THREAD) {
1825			if (comma)
1826				db_printf(", ");
1827			db_printf("ADDING_THREAD");
1828			comma = 1;
1829		}
1830		if (it != NULL && it->it_need) {
1831			if (comma)
1832				db_printf(", ");
1833			db_printf("NEED");
1834		}
1835		db_printf("}");
1836	}
1837	db_printf("\n");
1838
1839	if (handlers)
1840		TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
1841		    db_dump_intrhand(ih);
1842}
1843
1844/*
1845 * Dump data about interrupt handlers
1846 */
1847DB_SHOW_COMMAND(intr, db_show_intr)
1848{
1849	struct intr_event *ie;
1850	int all, verbose;
1851
1852	verbose = strchr(modif, 'v') != NULL;
1853	all = strchr(modif, 'a') != NULL;
1854	TAILQ_FOREACH(ie, &event_list, ie_list) {
1855		if (!all && TAILQ_EMPTY(&ie->ie_handlers))
1856			continue;
1857		db_dump_intr_event(ie, verbose);
1858		if (db_pager_quit)
1859			break;
1860	}
1861}
1862#endif /* DDB */
1863
1864/*
1865 * Start standard software interrupt threads
1866 */
1867static void
1868start_softintr(void *dummy)
1869{
1870
1871	if (swi_add(NULL, "vm", swi_vm, NULL, SWI_VM, INTR_MPSAFE, &vm_ih))
1872		panic("died while creating vm swi ithread");
1873}
1874SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr,
1875    NULL);
1876
1877/*
1878 * Sysctls used by systat and others: hw.intrnames and hw.intrcnt.
1879 * The data for this machine dependent, and the declarations are in machine
1880 * dependent code.  The layout of intrnames and intrcnt however is machine
1881 * independent.
1882 *
1883 * We do not know the length of intrcnt and intrnames at compile time, so
1884 * calculate things at run time.
1885 */
1886static int
1887sysctl_intrnames(SYSCTL_HANDLER_ARGS)
1888{
1889	return (sysctl_handle_opaque(oidp, intrnames, sintrnames, req));
1890}
1891
1892SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD,
1893    NULL, 0, sysctl_intrnames, "", "Interrupt Names");
1894
1895static int
1896sysctl_intrcnt(SYSCTL_HANDLER_ARGS)
1897{
1898#ifdef SCTL_MASK32
1899	uint32_t *intrcnt32;
1900	unsigned i;
1901	int error;
1902
1903	if (req->flags & SCTL_MASK32) {
1904		if (!req->oldptr)
1905			return (sysctl_handle_opaque(oidp, NULL, sintrcnt / 2, req));
1906		intrcnt32 = malloc(sintrcnt / 2, M_TEMP, M_NOWAIT);
1907		if (intrcnt32 == NULL)
1908			return (ENOMEM);
1909		for (i = 0; i < sintrcnt / sizeof (u_long); i++)
1910			intrcnt32[i] = intrcnt[i];
1911		error = sysctl_handle_opaque(oidp, intrcnt32, sintrcnt / 2, req);
1912		free(intrcnt32, M_TEMP);
1913		return (error);
1914	}
1915#endif
1916	return (sysctl_handle_opaque(oidp, intrcnt, sintrcnt, req));
1917}
1918
1919SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD,
1920    NULL, 0, sysctl_intrcnt, "", "Interrupt Counts");
1921
1922#ifdef DDB
1923/*
1924 * DDB command to dump the interrupt statistics.
1925 */
1926DB_SHOW_COMMAND(intrcnt, db_show_intrcnt)
1927{
1928	u_long *i;
1929	char *cp;
1930	u_int j;
1931
1932	cp = intrnames;
1933	j = 0;
1934	for (i = intrcnt; j < (sintrcnt / sizeof(u_long)) && !db_pager_quit;
1935	    i++, j++) {
1936		if (*cp == '\0')
1937			break;
1938		if (*i != 0)
1939			db_printf("%s\t%lu\n", cp, *i);
1940		cp += strlen(cp) + 1;
1941	}
1942}
1943#endif
1944