intr_machdep.c revision 367457
1/*-
2 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/11/sys/x86/x86/intr_machdep.c 367457 2020-11-07 18:10:59Z dim $
26 */
27
28/*
29 * Machine dependent interrupt code for x86.  For x86, we have to
30 * deal with different PICs.  Thus, we use the passed in vector to lookup
31 * an interrupt source associated with that vector.  The interrupt source
32 * describes which PIC the source belongs to and includes methods to handle
33 * that source.
34 */
35
36#include "opt_atpic.h"
37#include "opt_ddb.h"
38#include "opt_smp.h"
39
40#include <sys/param.h>
41#include <sys/bus.h>
42#include <sys/interrupt.h>
43#include <sys/ktr.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/malloc.h>
47#include <sys/mutex.h>
48#include <sys/proc.h>
49#include <sys/smp.h>
50#include <sys/sx.h>
51#include <sys/syslog.h>
52#include <sys/systm.h>
53#include <sys/vmmeter.h>
54#include <machine/clock.h>
55#include <machine/intr_machdep.h>
56#include <machine/smp.h>
57#ifdef DDB
58#include <ddb/ddb.h>
59#endif
60
61#ifndef DEV_ATPIC
62#include <machine/segments.h>
63#include <machine/frame.h>
64#include <dev/ic/i8259.h>
65#include <x86/isa/icu.h>
66#ifdef PC98
67#include <pc98/cbus/cbus.h>
68#else
69#include <isa/isareg.h>
70#endif
71#endif
72
73#define	MAX_STRAY_LOG	5
74
75typedef void (*mask_fn)(void *);
76
77static int intrcnt_index;
78static struct intsrc **interrupt_sources;
79static struct sx intrsrc_lock;
80static struct mtx intrpic_lock;
81static struct mtx intrcnt_lock;
82static TAILQ_HEAD(pics_head, pic) pics;
83u_int num_io_irqs;
84
85#if defined(SMP) && !defined(EARLY_AP_STARTUP)
86static int assign_cpu;
87#endif
88
89u_long *intrcnt;
90char *intrnames;
91size_t sintrcnt = sizeof(intrcnt);
92size_t sintrnames = sizeof(intrnames);
93int nintrcnt;
94
95static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources");
96
97static int	intr_assign_cpu(void *arg, int cpu);
98static void	intr_disable_src(void *arg);
99static void	intr_init(void *__dummy);
100static int	intr_pic_registered(struct pic *pic);
101static void	intrcnt_setname(const char *name, int index);
102static void	intrcnt_updatename(struct intsrc *is);
103static void	intrcnt_register(struct intsrc *is);
104
105/*
106 * SYSINIT levels for SI_SUB_INTR:
107 *
108 * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init
109 * SI_ORDER_SECOND: Xen PICs
110 * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges
111 * SI_ORDER_FOURTH: Add 8259A PICs
112 * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources
113 * SI_ORDER_MIDDLE: SMP interrupt counters
114 * SI_ORDER_ANY: Enable interrupts on BSP
115 */
116
117static int
118intr_pic_registered(struct pic *pic)
119{
120	struct pic *p;
121
122	TAILQ_FOREACH(p, &pics, pics) {
123		if (p == pic)
124			return (1);
125	}
126	return (0);
127}
128
129/*
130 * Register a new interrupt controller (PIC).  This is to support suspend
131 * and resume where we suspend/resume controllers rather than individual
132 * sources.  This also allows controllers with no active sources (such as
133 * 8259As in a system using the APICs) to participate in suspend and resume.
134 */
135int
136intr_register_pic(struct pic *pic)
137{
138	int error;
139
140	mtx_lock(&intrpic_lock);
141	if (intr_pic_registered(pic))
142		error = EBUSY;
143	else {
144		TAILQ_INSERT_TAIL(&pics, pic, pics);
145		error = 0;
146	}
147	mtx_unlock(&intrpic_lock);
148	return (error);
149}
150
151/*
152 * Allocate interrupt source arrays and register interrupt sources
153 * once the number of interrupts is known.
154 */
155static void
156intr_init_sources(void *arg)
157{
158	struct pic *pic;
159
160	MPASS(num_io_irqs > 0);
161
162	interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources),
163	    M_INTR, M_WAITOK | M_ZERO);
164
165	/*
166	 * - 1 ??? dummy counter.
167	 * - 2 counters for each I/O interrupt.
168	 * - 1 counter for each CPU for lapic timer.
169	 * - 1 counter for each CPU for the Hyper-V vmbus driver.
170	 * - 8 counters for each CPU for IPI counters for SMP.
171	 */
172	nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2;
173#ifdef COUNT_IPIS
174	if (mp_ncpus > 1)
175		nintrcnt += 8 * mp_ncpus;
176#endif
177	intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK |
178	    M_ZERO);
179	intrnames = mallocarray(nintrcnt, MAXCOMLEN + 1, M_INTR, M_WAITOK |
180	    M_ZERO);
181	sintrcnt = nintrcnt * sizeof(u_long);
182	sintrnames = nintrcnt * (MAXCOMLEN + 1);
183
184	intrcnt_setname("???", 0);
185	intrcnt_index = 1;
186
187	/*
188	 * NB: intrpic_lock is not held here to avoid LORs due to
189	 * malloc() in intr_register_source().  However, we are still
190	 * single-threaded at this point in startup so the list of
191	 * PICs shouldn't change.
192	 */
193	TAILQ_FOREACH(pic, &pics, pics) {
194		if (pic->pic_register_sources != NULL)
195			pic->pic_register_sources(pic);
196	}
197}
198SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources,
199    NULL);
200
201/*
202 * Register a new interrupt source with the global interrupt system.
203 * The global interrupts need to be disabled when this function is
204 * called.
205 */
206int
207intr_register_source(struct intsrc *isrc)
208{
209	int error, vector;
210
211	KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC"));
212	vector = isrc->is_pic->pic_vector(isrc);
213	KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector,
214	    num_io_irqs));
215	if (interrupt_sources[vector] != NULL)
216		return (EEXIST);
217	error = intr_event_create(&isrc->is_event, isrc, 0, vector,
218	    intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source,
219	    (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:",
220	    vector);
221	if (error)
222		return (error);
223	sx_xlock(&intrsrc_lock);
224	if (interrupt_sources[vector] != NULL) {
225		sx_xunlock(&intrsrc_lock);
226		intr_event_destroy(isrc->is_event);
227		return (EEXIST);
228	}
229	intrcnt_register(isrc);
230	interrupt_sources[vector] = isrc;
231	isrc->is_handlers = 0;
232	sx_xunlock(&intrsrc_lock);
233	return (0);
234}
235
236struct intsrc *
237intr_lookup_source(int vector)
238{
239
240	if (vector < 0 || vector >= num_io_irqs)
241		return (NULL);
242	return (interrupt_sources[vector]);
243}
244
245int
246intr_add_handler(const char *name, int vector, driver_filter_t filter,
247    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
248{
249	struct intsrc *isrc;
250	int error;
251
252	isrc = intr_lookup_source(vector);
253	if (isrc == NULL)
254		return (EINVAL);
255	error = intr_event_add_handler(isrc->is_event, name, filter, handler,
256	    arg, intr_priority(flags), flags, cookiep);
257	if (error == 0) {
258		sx_xlock(&intrsrc_lock);
259		intrcnt_updatename(isrc);
260		isrc->is_handlers++;
261		if (isrc->is_handlers == 1) {
262			isrc->is_pic->pic_enable_intr(isrc);
263			isrc->is_pic->pic_enable_source(isrc);
264		}
265		sx_xunlock(&intrsrc_lock);
266	}
267	return (error);
268}
269
270int
271intr_remove_handler(void *cookie)
272{
273	struct intsrc *isrc;
274	int error;
275
276	isrc = intr_handler_source(cookie);
277	error = intr_event_remove_handler(cookie);
278	if (error == 0) {
279		sx_xlock(&intrsrc_lock);
280		isrc->is_handlers--;
281		if (isrc->is_handlers == 0) {
282			isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI);
283			isrc->is_pic->pic_disable_intr(isrc);
284		}
285		intrcnt_updatename(isrc);
286		sx_xunlock(&intrsrc_lock);
287	}
288	return (error);
289}
290
291int
292intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol)
293{
294	struct intsrc *isrc;
295
296	isrc = intr_lookup_source(vector);
297	if (isrc == NULL)
298		return (EINVAL);
299	return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
300}
301
302static void
303intr_disable_src(void *arg)
304{
305	struct intsrc *isrc;
306
307	isrc = arg;
308	isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
309}
310
311void
312intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
313{
314	struct intr_event *ie;
315	int vector;
316
317	/*
318	 * We count software interrupts when we process them.  The
319	 * code here follows previous practice, but there's an
320	 * argument for counting hardware interrupts when they're
321	 * processed too.
322	 */
323	(*isrc->is_count)++;
324	PCPU_INC(cnt.v_intr);
325
326	ie = isrc->is_event;
327
328	/*
329	 * XXX: We assume that IRQ 0 is only used for the ISA timer
330	 * device (clk).
331	 */
332	vector = isrc->is_pic->pic_vector(isrc);
333	if (vector == 0)
334		clkintr_pending = 1;
335
336	/*
337	 * For stray interrupts, mask and EOI the source, bump the
338	 * stray count, and log the condition.
339	 */
340	if (intr_event_handle(ie, frame) != 0) {
341		isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
342		(*isrc->is_straycount)++;
343		if (*isrc->is_straycount < MAX_STRAY_LOG)
344			log(LOG_ERR, "stray irq%d\n", vector);
345		else if (*isrc->is_straycount == MAX_STRAY_LOG)
346			log(LOG_CRIT,
347			    "too many stray irq %d's: not logging anymore\n",
348			    vector);
349	}
350}
351
352void
353intr_resume(bool suspend_cancelled)
354{
355	struct pic *pic;
356
357#ifndef DEV_ATPIC
358	atpic_reset();
359#endif
360	mtx_lock(&intrpic_lock);
361	TAILQ_FOREACH(pic, &pics, pics) {
362		if (pic->pic_resume != NULL)
363			pic->pic_resume(pic, suspend_cancelled);
364	}
365	mtx_unlock(&intrpic_lock);
366}
367
368void
369intr_suspend(void)
370{
371	struct pic *pic;
372
373	mtx_lock(&intrpic_lock);
374	TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) {
375		if (pic->pic_suspend != NULL)
376			pic->pic_suspend(pic);
377	}
378	mtx_unlock(&intrpic_lock);
379}
380
381static int
382intr_assign_cpu(void *arg, int cpu)
383{
384#ifdef SMP
385	struct intsrc *isrc;
386	int error;
387
388#ifdef EARLY_AP_STARTUP
389	MPASS(mp_ncpus == 1 || smp_started);
390
391	/* Nothing to do if there is only a single CPU. */
392	if (mp_ncpus > 1 && cpu != NOCPU) {
393#else
394	/*
395	 * Don't do anything during early boot.  We will pick up the
396	 * assignment once the APs are started.
397	 */
398	if (assign_cpu && cpu != NOCPU) {
399#endif
400		isrc = arg;
401		sx_xlock(&intrsrc_lock);
402		error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
403		sx_xunlock(&intrsrc_lock);
404	} else
405		error = 0;
406	return (error);
407#else
408	return (EOPNOTSUPP);
409#endif
410}
411
412static void
413intrcnt_setname(const char *name, int index)
414{
415
416	snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s",
417	    MAXCOMLEN, name);
418}
419
420static void
421intrcnt_updatename(struct intsrc *is)
422{
423
424	intrcnt_setname(is->is_event->ie_fullname, is->is_index);
425}
426
427static void
428intrcnt_register(struct intsrc *is)
429{
430	char straystr[MAXCOMLEN + 1];
431
432	KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__));
433	mtx_lock_spin(&intrcnt_lock);
434	MPASS(intrcnt_index + 2 <= nintrcnt);
435	is->is_index = intrcnt_index;
436	intrcnt_index += 2;
437	snprintf(straystr, MAXCOMLEN + 1, "stray irq%d",
438	    is->is_pic->pic_vector(is));
439	intrcnt_updatename(is);
440	is->is_count = &intrcnt[is->is_index];
441	intrcnt_setname(straystr, is->is_index + 1);
442	is->is_straycount = &intrcnt[is->is_index + 1];
443	mtx_unlock_spin(&intrcnt_lock);
444}
445
446void
447intrcnt_add(const char *name, u_long **countp)
448{
449
450	mtx_lock_spin(&intrcnt_lock);
451	MPASS(intrcnt_index < nintrcnt);
452	*countp = &intrcnt[intrcnt_index];
453	intrcnt_setname(name, intrcnt_index);
454	intrcnt_index++;
455	mtx_unlock_spin(&intrcnt_lock);
456}
457
458static void
459intr_init(void *dummy __unused)
460{
461
462	TAILQ_INIT(&pics);
463	mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF);
464	sx_init(&intrsrc_lock, "intrsrc");
465	mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
466}
467SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL);
468
469static void
470intr_init_final(void *dummy __unused)
471{
472
473	/*
474	 * Enable interrupts on the BSP after all of the interrupt
475	 * controllers are initialized.  Device interrupts are still
476	 * disabled in the interrupt controllers until interrupt
477	 * handlers are registered.  Interrupts are enabled on each AP
478	 * after their first context switch.
479	 */
480	enable_intr();
481}
482SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL);
483
484#ifndef DEV_ATPIC
485/* Initialize the two 8259A's to a known-good shutdown state. */
486void
487atpic_reset(void)
488{
489
490	outb(IO_ICU1, ICW1_RESET | ICW1_IC4);
491	outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS);
492	outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID));
493	outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE);
494	outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
495	outb(IO_ICU1, OCW3_SEL | OCW3_RR);
496
497	outb(IO_ICU2, ICW1_RESET | ICW1_IC4);
498	outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8);
499	outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID);
500	outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE);
501	outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);
502	outb(IO_ICU2, OCW3_SEL | OCW3_RR);
503}
504#endif
505
506/* Add a description to an active interrupt handler. */
507int
508intr_describe(u_int vector, void *ih, const char *descr)
509{
510	struct intsrc *isrc;
511	int error;
512
513	isrc = intr_lookup_source(vector);
514	if (isrc == NULL)
515		return (EINVAL);
516	error = intr_event_describe_handler(isrc->is_event, ih, descr);
517	if (error)
518		return (error);
519	intrcnt_updatename(isrc);
520	return (0);
521}
522
523void
524intr_reprogram(void)
525{
526	struct intsrc *is;
527	u_int v;
528
529	sx_xlock(&intrsrc_lock);
530	for (v = 0; v < num_io_irqs; v++) {
531		is = interrupt_sources[v];
532		if (is == NULL)
533			continue;
534		if (is->is_pic->pic_reprogram_pin != NULL)
535			is->is_pic->pic_reprogram_pin(is);
536	}
537	sx_xunlock(&intrsrc_lock);
538}
539
540#ifdef DDB
541/*
542 * Dump data about interrupt handlers
543 */
544DB_SHOW_COMMAND(irqs, db_show_irqs)
545{
546	struct intsrc **isrc;
547	u_int i;
548	int verbose;
549
550	if (strcmp(modif, "v") == 0)
551		verbose = 1;
552	else
553		verbose = 0;
554	isrc = interrupt_sources;
555	for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++)
556		if (*isrc != NULL)
557			db_dump_intr_event((*isrc)->is_event, verbose);
558}
559#endif
560
561#ifdef SMP
562/*
563 * Support for balancing interrupt sources across CPUs.  For now we just
564 * allocate CPUs round-robin.
565 */
566
567cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
568static int current_cpu;
569
570/*
571 * Return the CPU that the next interrupt source should use.  For now
572 * this just returns the next local APIC according to round-robin.
573 */
574u_int
575intr_next_cpu(void)
576{
577	u_int apic_id;
578
579#ifdef EARLY_AP_STARTUP
580	MPASS(mp_ncpus == 1 || smp_started);
581	if (mp_ncpus == 1)
582		return (PCPU_GET(apic_id));
583#else
584	/* Leave all interrupts on the BSP during boot. */
585	if (!assign_cpu)
586		return (PCPU_GET(apic_id));
587#endif
588
589	mtx_lock_spin(&icu_lock);
590	apic_id = cpu_apic_ids[current_cpu];
591	do {
592		current_cpu++;
593		if (current_cpu > mp_maxid)
594			current_cpu = 0;
595	} while (!CPU_ISSET(current_cpu, &intr_cpus));
596	mtx_unlock_spin(&icu_lock);
597	return (apic_id);
598}
599
600/* Attempt to bind the specified IRQ to the specified CPU. */
601int
602intr_bind(u_int vector, u_char cpu)
603{
604	struct intsrc *isrc;
605
606	isrc = intr_lookup_source(vector);
607	if (isrc == NULL)
608		return (EINVAL);
609	return (intr_event_bind(isrc->is_event, cpu));
610}
611
612/*
613 * Add a CPU to our mask of valid CPUs that can be destinations of
614 * interrupts.
615 */
616void
617intr_add_cpu(u_int cpu)
618{
619
620	if (cpu >= MAXCPU)
621		panic("%s: Invalid CPU ID", __func__);
622	if (bootverbose)
623		printf("INTR: Adding local APIC %d as a target\n",
624		    cpu_apic_ids[cpu]);
625
626	CPU_SET(cpu, &intr_cpus);
627}
628
629#ifndef EARLY_AP_STARTUP
630/*
631 * Distribute all the interrupt sources among the available CPUs once the
632 * AP's have been launched.
633 */
634static void
635intr_shuffle_irqs(void *arg __unused)
636{
637	struct intsrc *isrc;
638	u_int i;
639
640	/* Don't bother on UP. */
641	if (mp_ncpus == 1)
642		return;
643
644	/* Round-robin assign a CPU to each enabled source. */
645	sx_xlock(&intrsrc_lock);
646	assign_cpu = 1;
647	for (i = 0; i < num_io_irqs; i++) {
648		isrc = interrupt_sources[i];
649		if (isrc != NULL && isrc->is_handlers > 0) {
650			/*
651			 * If this event is already bound to a CPU,
652			 * then assign the source to that CPU instead
653			 * of picking one via round-robin.  Note that
654			 * this is careful to only advance the
655			 * round-robin if the CPU assignment succeeds.
656			 */
657			if (isrc->is_event->ie_cpu != NOCPU)
658				(void)isrc->is_pic->pic_assign_cpu(isrc,
659				    cpu_apic_ids[isrc->is_event->ie_cpu]);
660			else if (isrc->is_pic->pic_assign_cpu(isrc,
661				cpu_apic_ids[current_cpu]) == 0)
662				(void)intr_next_cpu();
663
664		}
665	}
666	sx_xunlock(&intrsrc_lock);
667}
668SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs,
669    NULL);
670#endif
671#else
672/*
673 * Always route interrupts to the current processor in the UP case.
674 */
675u_int
676intr_next_cpu(void)
677{
678
679	return (PCPU_GET(apic_id));
680}
681#endif
682