io_apic.c revision 340016
1/*-
2 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/x86/x86/io_apic.c 340016 2018-11-01 18:34:26Z jhb $");
29
30#include "opt_acpi.h"
31#include "opt_isa.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/bus.h>
36#include <sys/kernel.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/module.h>
40#include <sys/mutex.h>
41#include <sys/rman.h>
42#include <sys/sysctl.h>
43
44#include <dev/pci/pcireg.h>
45#include <dev/pci/pcivar.h>
46
47#include <vm/vm.h>
48#include <vm/pmap.h>
49
50#include <x86/apicreg.h>
51#include <machine/frame.h>
52#include <machine/intr_machdep.h>
53#include <x86/apicvar.h>
54#include <machine/resource.h>
55#include <machine/segments.h>
56#include <x86/iommu/iommu_intrmap.h>
57
58#define IOAPIC_ISA_INTS		16
59#define	IOAPIC_MEM_REGION	32
60#define	IOAPIC_REDTBL_LO(i)	(IOAPIC_REDTBL + (i) * 2)
61#define	IOAPIC_REDTBL_HI(i)	(IOAPIC_REDTBL_LO(i) + 1)
62
63static MALLOC_DEFINE(M_IOAPIC, "io_apic", "I/O APIC structures");
64
65/*
66 * I/O APIC interrupt source driver.  Each pin is assigned an IRQ cookie
67 * as laid out in the ACPI System Interrupt number model where each I/O
68 * APIC has a contiguous chunk of the System Interrupt address space.
69 * We assume that IRQs 1 - 15 behave like ISA IRQs and that all other
70 * IRQs behave as PCI IRQs by default.  We also assume that the pin for
71 * IRQ 0 is actually an ExtINT pin.  The apic enumerators override the
72 * configuration of individual pins as indicated by their tables.
73 *
74 * Documentation for the I/O APIC: "82093AA I/O Advanced Programmable
75 * Interrupt Controller (IOAPIC)", May 1996, Intel Corp.
76 * ftp://download.intel.com/design/chipsets/datashts/29056601.pdf
77 */
78
79struct ioapic_intsrc {
80	struct intsrc io_intsrc;
81	int io_irq;
82	u_int io_intpin:8;
83	u_int io_vector:8;
84	u_int io_cpu;
85	u_int io_activehi:1;
86	u_int io_edgetrigger:1;
87	u_int io_masked:1;
88	int io_bus:4;
89	uint32_t io_lowreg;
90	u_int io_remap_cookie;
91};
92
93struct ioapic {
94	struct pic io_pic;
95	u_int io_id:8;			/* logical ID */
96	u_int io_apic_id:4;
97	u_int io_intbase:8;		/* System Interrupt base */
98	u_int io_numintr:8;
99	u_int io_haseoi:1;
100	volatile ioapic_t *io_addr;	/* XXX: should use bus_space */
101	vm_paddr_t io_paddr;
102	STAILQ_ENTRY(ioapic) io_next;
103	device_t pci_dev;		/* matched pci device, if found */
104	struct resource *pci_wnd;	/* BAR 0, should be same or alias to
105					   io_paddr */
106	struct ioapic_intsrc io_pins[0];
107};
108
109static u_int	ioapic_read(volatile ioapic_t *apic, int reg);
110static void	ioapic_write(volatile ioapic_t *apic, int reg, u_int val);
111static const char *ioapic_bus_string(int bus_type);
112static void	ioapic_print_irq(struct ioapic_intsrc *intpin);
113static void	ioapic_register_sources(struct pic *pic);
114static void	ioapic_enable_source(struct intsrc *isrc);
115static void	ioapic_disable_source(struct intsrc *isrc, int eoi);
116static void	ioapic_eoi_source(struct intsrc *isrc);
117static void	ioapic_enable_intr(struct intsrc *isrc);
118static void	ioapic_disable_intr(struct intsrc *isrc);
119static int	ioapic_vector(struct intsrc *isrc);
120static int	ioapic_source_pending(struct intsrc *isrc);
121static int	ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
122		    enum intr_polarity pol);
123static void	ioapic_resume(struct pic *pic, bool suspend_cancelled);
124static int	ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
125static void	ioapic_program_intpin(struct ioapic_intsrc *intpin);
126static void	ioapic_reprogram_intpin(struct intsrc *isrc);
127
128static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
129struct pic ioapic_template = {
130	.pic_register_sources = ioapic_register_sources,
131	.pic_enable_source = ioapic_enable_source,
132	.pic_disable_source = ioapic_disable_source,
133	.pic_eoi_source = ioapic_eoi_source,
134	.pic_enable_intr = ioapic_enable_intr,
135	.pic_disable_intr = ioapic_disable_intr,
136	.pic_vector = ioapic_vector,
137	.pic_source_pending = ioapic_source_pending,
138	.pic_suspend = NULL,
139	.pic_resume = ioapic_resume,
140	.pic_config_intr = ioapic_config_intr,
141	.pic_assign_cpu = ioapic_assign_cpu,
142	.pic_reprogram_pin = ioapic_reprogram_intpin,
143};
144
145static u_int next_ioapic_base;
146static u_int next_id;
147
148static int enable_extint;
149SYSCTL_INT(_hw_apic, OID_AUTO, enable_extint, CTLFLAG_RDTUN, &enable_extint, 0,
150    "Enable the ExtINT pin in the first I/O APIC");
151
152static void
153_ioapic_eoi_source(struct intsrc *isrc, int locked)
154{
155	struct ioapic_intsrc *src;
156	struct ioapic *io;
157	volatile uint32_t *apic_eoi;
158	uint32_t low1;
159
160	lapic_eoi();
161	if (!lapic_eoi_suppression)
162		return;
163	src = (struct ioapic_intsrc *)isrc;
164	if (src->io_edgetrigger)
165		return;
166	io = (struct ioapic *)isrc->is_pic;
167
168	/*
169	 * Handle targeted EOI for level-triggered pins, if broadcast
170	 * EOI suppression is supported by LAPICs.
171	 */
172	if (io->io_haseoi) {
173		/*
174		 * If IOAPIC has EOI Register, simply write vector
175		 * number into the reg.
176		 */
177		apic_eoi = (volatile uint32_t *)((volatile char *)
178		    io->io_addr + IOAPIC_EOIR);
179		*apic_eoi = src->io_vector;
180	} else {
181		/*
182		 * Otherwise, if IO-APIC is too old to provide EOIR,
183		 * do what Intel did for the Linux kernel. Temporary
184		 * switch the pin to edge-trigger and back, masking
185		 * the pin during the trick.
186		 */
187		if (!locked)
188			mtx_lock_spin(&icu_lock);
189		low1 = src->io_lowreg;
190		low1 &= ~IOART_TRGRLVL;
191		low1 |= IOART_TRGREDG | IOART_INTMSET;
192		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(src->io_intpin),
193		    low1);
194		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(src->io_intpin),
195		    src->io_lowreg);
196		if (!locked)
197			mtx_unlock_spin(&icu_lock);
198	}
199}
200
201static u_int
202ioapic_read(volatile ioapic_t *apic, int reg)
203{
204
205	mtx_assert(&icu_lock, MA_OWNED);
206	apic->ioregsel = reg;
207	return (apic->iowin);
208}
209
210static void
211ioapic_write(volatile ioapic_t *apic, int reg, u_int val)
212{
213
214	mtx_assert(&icu_lock, MA_OWNED);
215	apic->ioregsel = reg;
216	apic->iowin = val;
217}
218
219static const char *
220ioapic_bus_string(int bus_type)
221{
222
223	switch (bus_type) {
224	case APIC_BUS_ISA:
225		return ("ISA");
226	case APIC_BUS_EISA:
227		return ("EISA");
228	case APIC_BUS_PCI:
229		return ("PCI");
230	default:
231		return ("unknown");
232	}
233}
234
235static void
236ioapic_print_irq(struct ioapic_intsrc *intpin)
237{
238
239	switch (intpin->io_irq) {
240	case IRQ_DISABLED:
241		printf("disabled");
242		break;
243	case IRQ_EXTINT:
244		printf("ExtINT");
245		break;
246	case IRQ_NMI:
247		printf("NMI");
248		break;
249	case IRQ_SMI:
250		printf("SMI");
251		break;
252	default:
253		printf("%s IRQ %d", ioapic_bus_string(intpin->io_bus),
254		    intpin->io_irq);
255	}
256}
257
258static void
259ioapic_enable_source(struct intsrc *isrc)
260{
261	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
262	struct ioapic *io = (struct ioapic *)isrc->is_pic;
263	uint32_t flags;
264
265	mtx_lock_spin(&icu_lock);
266	if (intpin->io_masked) {
267		flags = intpin->io_lowreg & ~IOART_INTMASK;
268		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
269		    flags);
270		intpin->io_masked = 0;
271	}
272	mtx_unlock_spin(&icu_lock);
273}
274
275static void
276ioapic_disable_source(struct intsrc *isrc, int eoi)
277{
278	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
279	struct ioapic *io = (struct ioapic *)isrc->is_pic;
280	uint32_t flags;
281
282	mtx_lock_spin(&icu_lock);
283	if (!intpin->io_masked && !intpin->io_edgetrigger) {
284		flags = intpin->io_lowreg | IOART_INTMSET;
285		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
286		    flags);
287		intpin->io_masked = 1;
288	}
289
290	if (eoi == PIC_EOI)
291		_ioapic_eoi_source(isrc, 1);
292
293	mtx_unlock_spin(&icu_lock);
294}
295
296static void
297ioapic_eoi_source(struct intsrc *isrc)
298{
299
300	_ioapic_eoi_source(isrc, 0);
301}
302
303/*
304 * Completely program an intpin based on the data in its interrupt source
305 * structure.
306 */
307static void
308ioapic_program_intpin(struct ioapic_intsrc *intpin)
309{
310	struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
311	uint32_t low, high;
312#ifdef ACPI_DMAR
313	int error;
314#endif
315
316	/*
317	 * If a pin is completely invalid or if it is valid but hasn't
318	 * been enabled yet, just ensure that the pin is masked.
319	 */
320	mtx_assert(&icu_lock, MA_OWNED);
321	if (intpin->io_irq == IRQ_DISABLED || (intpin->io_irq >= 0 &&
322	    intpin->io_vector == 0)) {
323		low = ioapic_read(io->io_addr,
324		    IOAPIC_REDTBL_LO(intpin->io_intpin));
325		if ((low & IOART_INTMASK) == IOART_INTMCLR)
326			ioapic_write(io->io_addr,
327			    IOAPIC_REDTBL_LO(intpin->io_intpin),
328			    low | IOART_INTMSET);
329#ifdef ACPI_DMAR
330		mtx_unlock_spin(&icu_lock);
331		iommu_unmap_ioapic_intr(io->io_apic_id,
332		    &intpin->io_remap_cookie);
333		mtx_lock_spin(&icu_lock);
334#endif
335		return;
336	}
337
338#ifdef ACPI_DMAR
339	mtx_unlock_spin(&icu_lock);
340	error = iommu_map_ioapic_intr(io->io_apic_id,
341	    intpin->io_cpu, intpin->io_vector, intpin->io_edgetrigger,
342	    intpin->io_activehi, intpin->io_irq, &intpin->io_remap_cookie,
343	    &high, &low);
344	mtx_lock_spin(&icu_lock);
345	if (error == 0) {
346		ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin),
347		    high);
348		intpin->io_lowreg = low;
349		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
350		    low);
351		return;
352	} else if (error != EOPNOTSUPP) {
353		return;
354	}
355#endif
356
357	/*
358	 * Set the destination.  Note that with Intel interrupt remapping,
359	 * the previously reserved bits 55:48 now have a purpose so ensure
360	 * these are zero.
361	 */
362	low = IOART_DESTPHY;
363	high = intpin->io_cpu << APIC_ID_SHIFT;
364
365	/* Program the rest of the low word. */
366	if (intpin->io_edgetrigger)
367		low |= IOART_TRGREDG;
368	else
369		low |= IOART_TRGRLVL;
370	if (intpin->io_activehi)
371		low |= IOART_INTAHI;
372	else
373		low |= IOART_INTALO;
374	if (intpin->io_masked)
375		low |= IOART_INTMSET;
376	switch (intpin->io_irq) {
377	case IRQ_EXTINT:
378		KASSERT(intpin->io_edgetrigger,
379		    ("ExtINT not edge triggered"));
380		low |= IOART_DELEXINT;
381		break;
382	case IRQ_NMI:
383		KASSERT(intpin->io_edgetrigger,
384		    ("NMI not edge triggered"));
385		low |= IOART_DELNMI;
386		break;
387	case IRQ_SMI:
388		KASSERT(intpin->io_edgetrigger,
389		    ("SMI not edge triggered"));
390		low |= IOART_DELSMI;
391		break;
392	default:
393		KASSERT(intpin->io_vector != 0, ("No vector for IRQ %u",
394		    intpin->io_irq));
395		low |= IOART_DELFIXED | intpin->io_vector;
396	}
397
398	/* Write the values to the APIC. */
399	ioapic_write(io->io_addr, IOAPIC_REDTBL_HI(intpin->io_intpin), high);
400	intpin->io_lowreg = low;
401	ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin), low);
402}
403
404static void
405ioapic_reprogram_intpin(struct intsrc *isrc)
406{
407
408	mtx_lock_spin(&icu_lock);
409	ioapic_program_intpin((struct ioapic_intsrc *)isrc);
410	mtx_unlock_spin(&icu_lock);
411}
412
413static int
414ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id)
415{
416	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
417	struct ioapic *io = (struct ioapic *)isrc->is_pic;
418	u_int old_vector, new_vector;
419	u_int old_id;
420
421	/*
422	 * On Hyper-V:
423	 * - Stick to the first cpu for all I/O APIC pins.
424	 * - And don't allow destination cpu changes.
425	 */
426	if (vm_guest == VM_GUEST_HV) {
427		if (intpin->io_vector)
428			return (EINVAL);
429		else
430			apic_id = 0;
431	}
432
433	/*
434	 * keep 1st core as the destination for NMI
435	 */
436	if (intpin->io_irq == IRQ_NMI)
437		apic_id = 0;
438
439	/*
440	 * Set us up to free the old irq.
441	 */
442	old_vector = intpin->io_vector;
443	old_id = intpin->io_cpu;
444	if (old_vector && apic_id == old_id)
445		return (0);
446
447	/*
448	 * Allocate an APIC vector for this interrupt pin.  Once
449	 * we have a vector we program the interrupt pin.
450	 */
451	new_vector = apic_alloc_vector(apic_id, intpin->io_irq);
452	if (new_vector == 0)
453		return (ENOSPC);
454
455	/*
456	 * Mask the old intpin if it is enabled while it is migrated.
457	 *
458	 * At least some level-triggered interrupts seem to need the
459	 * extra DELAY() to avoid being stuck in a non-EOI'd state.
460	 */
461	mtx_lock_spin(&icu_lock);
462	if (!intpin->io_masked && !intpin->io_edgetrigger) {
463		ioapic_write(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin),
464		    intpin->io_lowreg | IOART_INTMSET);
465		mtx_unlock_spin(&icu_lock);
466		DELAY(100);
467		mtx_lock_spin(&icu_lock);
468	}
469
470	intpin->io_cpu = apic_id;
471	intpin->io_vector = new_vector;
472	if (isrc->is_handlers > 0)
473		apic_enable_vector(intpin->io_cpu, intpin->io_vector);
474	if (bootverbose) {
475		printf("ioapic%u: routing intpin %u (", io->io_id,
476		    intpin->io_intpin);
477		ioapic_print_irq(intpin);
478		printf(") to lapic %u vector %u\n", intpin->io_cpu,
479		    intpin->io_vector);
480	}
481	ioapic_program_intpin(intpin);
482	mtx_unlock_spin(&icu_lock);
483
484	/*
485	 * Free the old vector after the new one is established.  This is done
486	 * to prevent races where we could miss an interrupt.
487	 */
488	if (old_vector) {
489		if (isrc->is_handlers > 0)
490			apic_disable_vector(old_id, old_vector);
491		apic_free_vector(old_id, old_vector, intpin->io_irq);
492	}
493	return (0);
494}
495
496static void
497ioapic_enable_intr(struct intsrc *isrc)
498{
499	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
500
501	if (intpin->io_vector == 0)
502		if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0)
503			panic("Couldn't find an APIC vector for IRQ %d",
504			    intpin->io_irq);
505	apic_enable_vector(intpin->io_cpu, intpin->io_vector);
506}
507
508
509static void
510ioapic_disable_intr(struct intsrc *isrc)
511{
512	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
513	u_int vector;
514
515	if (intpin->io_vector != 0) {
516		/* Mask this interrupt pin and free its APIC vector. */
517		vector = intpin->io_vector;
518		apic_disable_vector(intpin->io_cpu, vector);
519		mtx_lock_spin(&icu_lock);
520		intpin->io_masked = 1;
521		intpin->io_vector = 0;
522		ioapic_program_intpin(intpin);
523		mtx_unlock_spin(&icu_lock);
524		apic_free_vector(intpin->io_cpu, vector, intpin->io_irq);
525	}
526}
527
528static int
529ioapic_vector(struct intsrc *isrc)
530{
531	struct ioapic_intsrc *pin;
532
533	pin = (struct ioapic_intsrc *)isrc;
534	return (pin->io_irq);
535}
536
537static int
538ioapic_source_pending(struct intsrc *isrc)
539{
540	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
541
542	if (intpin->io_vector == 0)
543		return 0;
544	return (lapic_intr_pending(intpin->io_vector));
545}
546
547static int
548ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
549    enum intr_polarity pol)
550{
551	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
552	struct ioapic *io = (struct ioapic *)isrc->is_pic;
553	int changed;
554
555	KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM),
556	    ("%s: Conforming trigger or polarity\n", __func__));
557
558	/*
559	 * EISA interrupts always use active high polarity, so don't allow
560	 * them to be set to active low.
561	 *
562	 * XXX: Should we write to the ELCR if the trigger mode changes for
563	 * an EISA IRQ or an ISA IRQ with the ELCR present?
564	 */
565	mtx_lock_spin(&icu_lock);
566	if (intpin->io_bus == APIC_BUS_EISA)
567		pol = INTR_POLARITY_HIGH;
568	changed = 0;
569	if (intpin->io_edgetrigger != (trig == INTR_TRIGGER_EDGE)) {
570		if (bootverbose)
571			printf("ioapic%u: Changing trigger for pin %u to %s\n",
572			    io->io_id, intpin->io_intpin,
573			    trig == INTR_TRIGGER_EDGE ? "edge" : "level");
574		intpin->io_edgetrigger = (trig == INTR_TRIGGER_EDGE);
575		changed++;
576	}
577	if (intpin->io_activehi != (pol == INTR_POLARITY_HIGH)) {
578		if (bootverbose)
579			printf("ioapic%u: Changing polarity for pin %u to %s\n",
580			    io->io_id, intpin->io_intpin,
581			    pol == INTR_POLARITY_HIGH ? "high" : "low");
582		intpin->io_activehi = (pol == INTR_POLARITY_HIGH);
583		changed++;
584	}
585	if (changed)
586		ioapic_program_intpin(intpin);
587	mtx_unlock_spin(&icu_lock);
588	return (0);
589}
590
591static void
592ioapic_resume(struct pic *pic, bool suspend_cancelled)
593{
594	struct ioapic *io = (struct ioapic *)pic;
595	int i;
596
597	mtx_lock_spin(&icu_lock);
598	for (i = 0; i < io->io_numintr; i++)
599		ioapic_program_intpin(&io->io_pins[i]);
600	mtx_unlock_spin(&icu_lock);
601}
602
603/*
604 * Create a plain I/O APIC object.
605 */
606void *
607ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase)
608{
609	struct ioapic *io;
610	struct ioapic_intsrc *intpin;
611	volatile ioapic_t *apic;
612	u_int numintr, i;
613	uint32_t value;
614
615	/* Map the register window so we can access the device. */
616	apic = pmap_mapdev(addr, IOAPIC_MEM_REGION);
617	mtx_lock_spin(&icu_lock);
618	value = ioapic_read(apic, IOAPIC_VER);
619	mtx_unlock_spin(&icu_lock);
620
621	/* If it's version register doesn't seem to work, punt. */
622	if (value == 0xffffffff) {
623		pmap_unmapdev((vm_offset_t)apic, IOAPIC_MEM_REGION);
624		return (NULL);
625	}
626
627	/* Determine the number of vectors and set the APIC ID. */
628	numintr = ((value & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) + 1;
629	io = malloc(sizeof(struct ioapic) +
630	    numintr * sizeof(struct ioapic_intsrc), M_IOAPIC, M_WAITOK);
631	io->io_pic = ioapic_template;
632	io->pci_dev = NULL;
633	io->pci_wnd = NULL;
634	mtx_lock_spin(&icu_lock);
635	io->io_id = next_id++;
636	io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;
637	if (apic_id != -1 && io->io_apic_id != apic_id) {
638		ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT);
639		mtx_unlock_spin(&icu_lock);
640		io->io_apic_id = apic_id;
641		printf("ioapic%u: Changing APIC ID to %d\n", io->io_id,
642		    apic_id);
643	} else
644		mtx_unlock_spin(&icu_lock);
645	if (intbase == -1) {
646		intbase = next_ioapic_base;
647		printf("ioapic%u: Assuming intbase of %d\n", io->io_id,
648		    intbase);
649	} else if (intbase != next_ioapic_base && bootverbose)
650		printf("ioapic%u: WARNING: intbase %d != expected base %d\n",
651		    io->io_id, intbase, next_ioapic_base);
652	io->io_intbase = intbase;
653	next_ioapic_base = intbase + numintr;
654	if (next_ioapic_base > num_io_irqs)
655		num_io_irqs = next_ioapic_base;
656	io->io_numintr = numintr;
657	io->io_addr = apic;
658	io->io_paddr = addr;
659
660	if (bootverbose) {
661		printf("ioapic%u: ver 0x%02x maxredir 0x%02x\n", io->io_id,
662		    (value & IOART_VER_VERSION), (value & IOART_VER_MAXREDIR)
663		    >> MAXREDIRSHIFT);
664	}
665	/*
666	 * The  summary information about IO-APIC versions is taken from
667	 * the Linux kernel source:
668	 *     0Xh     82489DX
669	 *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
670	 *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
671	 *     30h-FFh Reserved
672	 * IO-APICs with version >= 0x20 have working EOIR register.
673	 */
674	io->io_haseoi = (value & IOART_VER_VERSION) >= 0x20;
675
676	/*
677	 * Initialize pins.  Start off with interrupts disabled.  Default
678	 * to active-hi and edge-triggered for ISA interrupts and active-lo
679	 * and level-triggered for all others.
680	 */
681	bzero(io->io_pins, sizeof(struct ioapic_intsrc) * numintr);
682	mtx_lock_spin(&icu_lock);
683	for (i = 0, intpin = io->io_pins; i < numintr; i++, intpin++) {
684		intpin->io_intsrc.is_pic = (struct pic *)io;
685		intpin->io_intpin = i;
686		intpin->io_irq = intbase + i;
687
688		/*
689		 * Assume that pin 0 on the first I/O APIC is an ExtINT pin.
690		 * Assume that pins 1-15 are ISA interrupts and that all
691		 * other pins are PCI interrupts.
692		 */
693		if (intpin->io_irq == 0)
694			ioapic_set_extint(io, i);
695		else if (intpin->io_irq < IOAPIC_ISA_INTS) {
696			intpin->io_bus = APIC_BUS_ISA;
697			intpin->io_activehi = 1;
698			intpin->io_edgetrigger = 1;
699			intpin->io_masked = 1;
700		} else {
701			intpin->io_bus = APIC_BUS_PCI;
702			intpin->io_activehi = 0;
703			intpin->io_edgetrigger = 0;
704			intpin->io_masked = 1;
705		}
706
707		/*
708		 * Route interrupts to the BSP by default.  Interrupts may
709		 * be routed to other CPUs later after they are enabled.
710		 */
711		intpin->io_cpu = PCPU_GET(apic_id);
712		value = ioapic_read(apic, IOAPIC_REDTBL_LO(i));
713		ioapic_write(apic, IOAPIC_REDTBL_LO(i), value | IOART_INTMSET);
714#ifdef ACPI_DMAR
715		/* dummy, but sets cookie */
716		mtx_unlock_spin(&icu_lock);
717		iommu_map_ioapic_intr(io->io_apic_id,
718		    intpin->io_cpu, intpin->io_vector, intpin->io_edgetrigger,
719		    intpin->io_activehi, intpin->io_irq,
720		    &intpin->io_remap_cookie, NULL, NULL);
721		mtx_lock_spin(&icu_lock);
722#endif
723	}
724	mtx_unlock_spin(&icu_lock);
725
726	return (io);
727}
728
729int
730ioapic_get_vector(void *cookie, u_int pin)
731{
732	struct ioapic *io;
733
734	io = (struct ioapic *)cookie;
735	if (pin >= io->io_numintr)
736		return (-1);
737	return (io->io_pins[pin].io_irq);
738}
739
740int
741ioapic_disable_pin(void *cookie, u_int pin)
742{
743	struct ioapic *io;
744
745	io = (struct ioapic *)cookie;
746	if (pin >= io->io_numintr)
747		return (EINVAL);
748	if (io->io_pins[pin].io_irq == IRQ_DISABLED)
749		return (EINVAL);
750	io->io_pins[pin].io_irq = IRQ_DISABLED;
751	if (bootverbose)
752		printf("ioapic%u: intpin %d disabled\n", io->io_id, pin);
753	return (0);
754}
755
756int
757ioapic_remap_vector(void *cookie, u_int pin, int vector)
758{
759	struct ioapic *io;
760
761	io = (struct ioapic *)cookie;
762	if (pin >= io->io_numintr || vector < 0)
763		return (EINVAL);
764	if (io->io_pins[pin].io_irq < 0)
765		return (EINVAL);
766	io->io_pins[pin].io_irq = vector;
767	if (bootverbose)
768		printf("ioapic%u: Routing IRQ %d -> intpin %d\n", io->io_id,
769		    vector, pin);
770	return (0);
771}
772
773int
774ioapic_set_bus(void *cookie, u_int pin, int bus_type)
775{
776	struct ioapic *io;
777
778	if (bus_type < 0 || bus_type > APIC_BUS_MAX)
779		return (EINVAL);
780	io = (struct ioapic *)cookie;
781	if (pin >= io->io_numintr)
782		return (EINVAL);
783	if (io->io_pins[pin].io_irq < 0)
784		return (EINVAL);
785	if (io->io_pins[pin].io_bus == bus_type)
786		return (0);
787	io->io_pins[pin].io_bus = bus_type;
788	if (bootverbose)
789		printf("ioapic%u: intpin %d bus %s\n", io->io_id, pin,
790		    ioapic_bus_string(bus_type));
791	return (0);
792}
793
794int
795ioapic_set_nmi(void *cookie, u_int pin)
796{
797	struct ioapic *io;
798
799	io = (struct ioapic *)cookie;
800	if (pin >= io->io_numintr)
801		return (EINVAL);
802	if (io->io_pins[pin].io_irq == IRQ_NMI)
803		return (0);
804	if (io->io_pins[pin].io_irq < 0)
805		return (EINVAL);
806	io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
807	io->io_pins[pin].io_irq = IRQ_NMI;
808	io->io_pins[pin].io_masked = 0;
809	io->io_pins[pin].io_edgetrigger = 1;
810	io->io_pins[pin].io_activehi = 1;
811	if (bootverbose)
812		printf("ioapic%u: Routing NMI -> intpin %d\n",
813		    io->io_id, pin);
814	return (0);
815}
816
817int
818ioapic_set_smi(void *cookie, u_int pin)
819{
820	struct ioapic *io;
821
822	io = (struct ioapic *)cookie;
823	if (pin >= io->io_numintr)
824		return (EINVAL);
825	if (io->io_pins[pin].io_irq == IRQ_SMI)
826		return (0);
827	if (io->io_pins[pin].io_irq < 0)
828		return (EINVAL);
829	io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
830	io->io_pins[pin].io_irq = IRQ_SMI;
831	io->io_pins[pin].io_masked = 0;
832	io->io_pins[pin].io_edgetrigger = 1;
833	io->io_pins[pin].io_activehi = 1;
834	if (bootverbose)
835		printf("ioapic%u: Routing SMI -> intpin %d\n",
836		    io->io_id, pin);
837	return (0);
838}
839
840int
841ioapic_set_extint(void *cookie, u_int pin)
842{
843	struct ioapic *io;
844
845	io = (struct ioapic *)cookie;
846	if (pin >= io->io_numintr)
847		return (EINVAL);
848	if (io->io_pins[pin].io_irq == IRQ_EXTINT)
849		return (0);
850	if (io->io_pins[pin].io_irq < 0)
851		return (EINVAL);
852	io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN;
853	io->io_pins[pin].io_irq = IRQ_EXTINT;
854	if (enable_extint)
855		io->io_pins[pin].io_masked = 0;
856	else
857		io->io_pins[pin].io_masked = 1;
858	io->io_pins[pin].io_edgetrigger = 1;
859	io->io_pins[pin].io_activehi = 1;
860	if (bootverbose)
861		printf("ioapic%u: Routing external 8259A's -> intpin %d\n",
862		    io->io_id, pin);
863	return (0);
864}
865
866int
867ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol)
868{
869	struct ioapic *io;
870	int activehi;
871
872	io = (struct ioapic *)cookie;
873	if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM)
874		return (EINVAL);
875	if (io->io_pins[pin].io_irq < 0)
876		return (EINVAL);
877	activehi = (pol == INTR_POLARITY_HIGH);
878	if (io->io_pins[pin].io_activehi == activehi)
879		return (0);
880	io->io_pins[pin].io_activehi = activehi;
881	if (bootverbose)
882		printf("ioapic%u: intpin %d polarity: %s\n", io->io_id, pin,
883		    pol == INTR_POLARITY_HIGH ? "high" : "low");
884	return (0);
885}
886
887int
888ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger)
889{
890	struct ioapic *io;
891	int edgetrigger;
892
893	io = (struct ioapic *)cookie;
894	if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM)
895		return (EINVAL);
896	if (io->io_pins[pin].io_irq < 0)
897		return (EINVAL);
898	edgetrigger = (trigger == INTR_TRIGGER_EDGE);
899	if (io->io_pins[pin].io_edgetrigger == edgetrigger)
900		return (0);
901	io->io_pins[pin].io_edgetrigger = edgetrigger;
902	if (bootverbose)
903		printf("ioapic%u: intpin %d trigger: %s\n", io->io_id, pin,
904		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
905	return (0);
906}
907
908/*
909 * Register a complete I/O APIC object with the interrupt subsystem.
910 */
911void
912ioapic_register(void *cookie)
913{
914	struct ioapic_intsrc *pin;
915	struct ioapic *io;
916	volatile ioapic_t *apic;
917	uint32_t flags;
918	int i;
919
920	io = (struct ioapic *)cookie;
921	apic = io->io_addr;
922	mtx_lock_spin(&icu_lock);
923	flags = ioapic_read(apic, IOAPIC_VER) & IOART_VER_VERSION;
924	STAILQ_INSERT_TAIL(&ioapic_list, io, io_next);
925	mtx_unlock_spin(&icu_lock);
926	printf("ioapic%u <Version %u.%u> irqs %u-%u on motherboard\n",
927	    io->io_id, flags >> 4, flags & 0xf, io->io_intbase,
928	    io->io_intbase + io->io_numintr - 1);
929
930	/*
931	 * Reprogram pins to handle special case pins (such as NMI and
932	 * SMI) and disable normal pins until a handler is registered.
933	 */
934	intr_register_pic(&io->io_pic);
935	for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++)
936		ioapic_reprogram_intpin(&pin->io_intsrc);
937}
938
939/*
940 * Add interrupt sources for I/O APIC interrupt pins.
941 */
942static void
943ioapic_register_sources(struct pic *pic)
944{
945	struct ioapic_intsrc *pin;
946	struct ioapic *io;
947	int i;
948
949	io = (struct ioapic *)pic;
950	for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) {
951		if (pin->io_irq >= 0)
952			intr_register_source(&pin->io_intsrc);
953	}
954}
955
956/* A simple new-bus driver to consume PCI I/O APIC devices. */
957static int
958ioapic_pci_probe(device_t dev)
959{
960
961	if (pci_get_class(dev) == PCIC_BASEPERIPH &&
962	    pci_get_subclass(dev) == PCIS_BASEPERIPH_PIC) {
963		switch (pci_get_progif(dev)) {
964		case PCIP_BASEPERIPH_PIC_IO_APIC:
965			device_set_desc(dev, "IO APIC");
966			break;
967		case PCIP_BASEPERIPH_PIC_IOX_APIC:
968			device_set_desc(dev, "IO(x) APIC");
969			break;
970		default:
971			return (ENXIO);
972		}
973		device_quiet(dev);
974		return (-10000);
975	}
976	return (ENXIO);
977}
978
979static int
980ioapic_pci_attach(device_t dev)
981{
982	struct resource *res;
983	volatile ioapic_t *apic;
984	struct ioapic *io;
985	int rid;
986	u_int apic_id;
987
988	/*
989	 * Try to match the enumerated ioapic.  Match BAR start
990	 * against io_paddr.  Due to a fear that PCI window is not the
991	 * same as the MADT reported io window, but an alias, read the
992	 * APIC ID from the mapped BAR and match against it.
993	 */
994	rid = PCIR_BAR(0);
995	res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
996	    RF_ACTIVE | RF_SHAREABLE);
997	if (res == NULL) {
998		if (bootverbose)
999			device_printf(dev, "cannot activate BAR0\n");
1000		return (ENXIO);
1001	}
1002	apic = (volatile ioapic_t *)rman_get_virtual(res);
1003	if (rman_get_size(res) < IOAPIC_WND_SIZE) {
1004		if (bootverbose)
1005			device_printf(dev,
1006			    "BAR0 too small (%jd) for IOAPIC window\n",
1007			    (uintmax_t)rman_get_size(res));
1008		goto fail;
1009	}
1010	mtx_lock_spin(&icu_lock);
1011	apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;
1012	/* First match by io window address */
1013	STAILQ_FOREACH(io, &ioapic_list, io_next) {
1014		if (io->io_paddr == (vm_paddr_t)rman_get_start(res))
1015			goto found;
1016	}
1017	/* Then by apic id */
1018	STAILQ_FOREACH(io, &ioapic_list, io_next) {
1019		if (io->io_apic_id == apic_id)
1020			goto found;
1021	}
1022	mtx_unlock_spin(&icu_lock);
1023	if (bootverbose)
1024		device_printf(dev,
1025		    "cannot match pci bar apic id %d against MADT\n",
1026		    apic_id);
1027fail:
1028	bus_release_resource(dev, SYS_RES_MEMORY, rid, res);
1029	return (ENXIO);
1030found:
1031	KASSERT(io->pci_dev == NULL,
1032	    ("ioapic %d pci_dev not NULL", io->io_id));
1033	KASSERT(io->pci_wnd == NULL,
1034	    ("ioapic %d pci_wnd not NULL", io->io_id));
1035
1036	io->pci_dev = dev;
1037	io->pci_wnd = res;
1038	if (bootverbose && (io->io_paddr != (vm_paddr_t)rman_get_start(res) ||
1039	    io->io_apic_id != apic_id)) {
1040		device_printf(dev, "pci%d:%d:%d:%d pci BAR0@%jx id %d "
1041		    "MADT id %d paddr@%jx\n",
1042		    pci_get_domain(dev), pci_get_bus(dev),
1043		    pci_get_slot(dev), pci_get_function(dev),
1044		    (uintmax_t)rman_get_start(res), apic_id,
1045		    io->io_apic_id, (uintmax_t)io->io_paddr);
1046	}
1047	mtx_unlock_spin(&icu_lock);
1048	return (0);
1049}
1050
1051static device_method_t ioapic_pci_methods[] = {
1052	/* Device interface */
1053	DEVMETHOD(device_probe,		ioapic_pci_probe),
1054	DEVMETHOD(device_attach,	ioapic_pci_attach),
1055
1056	{ 0, 0 }
1057};
1058
1059DEFINE_CLASS_0(ioapic, ioapic_pci_driver, ioapic_pci_methods, 0);
1060
1061static devclass_t ioapic_devclass;
1062DRIVER_MODULE(ioapic, pci, ioapic_pci_driver, ioapic_devclass, 0, 0);
1063
1064int
1065ioapic_get_rid(u_int apic_id, uint16_t *ridp)
1066{
1067	struct ioapic *io;
1068	uintptr_t rid;
1069	int error;
1070
1071	mtx_lock_spin(&icu_lock);
1072	STAILQ_FOREACH(io, &ioapic_list, io_next) {
1073		if (io->io_apic_id == apic_id)
1074			break;
1075	}
1076	mtx_unlock_spin(&icu_lock);
1077	if (io == NULL || io->pci_dev == NULL)
1078		return (EINVAL);
1079	error = pci_get_id(io->pci_dev, PCI_ID_RID, &rid);
1080	if (error != 0)
1081		return (error);
1082	*ridp = rid;
1083	return (0);
1084}
1085
1086/*
1087 * A new-bus driver to consume the memory resources associated with
1088 * the APICs in the system.  On some systems ACPI or PnPBIOS system
1089 * resource devices may already claim these resources.  To keep from
1090 * breaking those devices, we attach ourself to the nexus device after
1091 * legacy0 and acpi0 and ignore any allocation failures.
1092 */
1093static void
1094apic_identify(driver_t *driver, device_t parent)
1095{
1096
1097	/*
1098	 * Add at order 12.  acpi0 is probed at order 10 and legacy0
1099	 * is probed at order 11.
1100	 */
1101	if (lapic_paddr != 0)
1102		BUS_ADD_CHILD(parent, 12, "apic", 0);
1103}
1104
1105static int
1106apic_probe(device_t dev)
1107{
1108
1109	device_set_desc(dev, "APIC resources");
1110	device_quiet(dev);
1111	return (0);
1112}
1113
1114static void
1115apic_add_resource(device_t dev, int rid, vm_paddr_t base, size_t length)
1116{
1117	int error;
1118
1119	error = bus_set_resource(dev, SYS_RES_MEMORY, rid, base, length);
1120	if (error)
1121		panic("apic_add_resource: resource %d failed set with %d", rid,
1122		    error);
1123	bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_SHAREABLE);
1124}
1125
1126static int
1127apic_attach(device_t dev)
1128{
1129	struct ioapic *io;
1130	int i;
1131
1132	/* Reserve the local APIC. */
1133	apic_add_resource(dev, 0, lapic_paddr, LAPIC_MEM_REGION);
1134	i = 1;
1135	STAILQ_FOREACH(io, &ioapic_list, io_next) {
1136		apic_add_resource(dev, i, io->io_paddr, IOAPIC_MEM_REGION);
1137		i++;
1138	}
1139	return (0);
1140}
1141
1142static device_method_t apic_methods[] = {
1143	/* Device interface */
1144	DEVMETHOD(device_identify,	apic_identify),
1145	DEVMETHOD(device_probe,		apic_probe),
1146	DEVMETHOD(device_attach,	apic_attach),
1147
1148	{ 0, 0 }
1149};
1150
1151DEFINE_CLASS_0(apic, apic_driver, apic_methods, 0);
1152
1153static devclass_t apic_devclass;
1154DRIVER_MODULE(apic, nexus, apic_driver, apic_devclass, 0, 0);
1155
1156#include "opt_ddb.h"
1157
1158#ifdef DDB
1159#include <ddb/ddb.h>
1160
1161static const char *
1162ioapic_delivery_mode(uint32_t mode)
1163{
1164
1165	switch (mode) {
1166	case IOART_DELFIXED:
1167		return ("fixed");
1168	case IOART_DELLOPRI:
1169		return ("lowestpri");
1170	case IOART_DELSMI:
1171		return ("SMI");
1172	case IOART_DELRSV1:
1173		return ("rsrvd1");
1174	case IOART_DELNMI:
1175		return ("NMI");
1176	case IOART_DELINIT:
1177		return ("INIT");
1178	case IOART_DELRSV2:
1179		return ("rsrvd2");
1180	case IOART_DELEXINT:
1181		return ("ExtINT");
1182	default:
1183		return ("");
1184	}
1185}
1186
1187static u_int
1188db_ioapic_read(volatile ioapic_t *apic, int reg)
1189{
1190
1191	apic->ioregsel = reg;
1192	return (apic->iowin);
1193}
1194
1195static void
1196db_show_ioapic_one(volatile ioapic_t *io_addr)
1197{
1198	uint32_t r, lo, hi;
1199	int mre, i;
1200
1201	r = db_ioapic_read(io_addr, IOAPIC_VER);
1202	mre = (r & IOART_VER_MAXREDIR) >> MAXREDIRSHIFT;
1203	db_printf("Id 0x%08x Ver 0x%02x MRE %d\n",
1204	    db_ioapic_read(io_addr, IOAPIC_ID), r & IOART_VER_VERSION, mre);
1205	for (i = 0; i < mre; i++) {
1206		lo = db_ioapic_read(io_addr, IOAPIC_REDTBL_LO(i));
1207		hi = db_ioapic_read(io_addr, IOAPIC_REDTBL_HI(i));
1208		db_printf("  pin %d Dest %s/%x %smasked Trig %s RemoteIRR %d "
1209		    "Polarity %s Status %s DeliveryMode %s Vec %d\n", i,
1210		    (lo & IOART_DESTMOD) == IOART_DESTLOG ? "log" : "phy",
1211		    (hi & IOART_DEST) >> 24,
1212		    (lo & IOART_INTMASK) == IOART_INTMSET ? "" : "not",
1213		    (lo & IOART_TRGRMOD) == IOART_TRGRLVL ? "lvl" : "edge",
1214		    (lo & IOART_REM_IRR) == IOART_REM_IRR ? 1 : 0,
1215		    (lo & IOART_INTPOL) == IOART_INTALO ? "low" : "high",
1216		    (lo & IOART_DELIVS) == IOART_DELIVS ? "pend" : "idle",
1217		    ioapic_delivery_mode(lo & IOART_DELMOD),
1218		    (lo & IOART_INTVEC));
1219	  }
1220}
1221
1222DB_SHOW_COMMAND(ioapic, db_show_ioapic)
1223{
1224	struct ioapic *ioapic;
1225	int idx, i;
1226
1227	if (!have_addr) {
1228		db_printf("usage: show ioapic index\n");
1229		return;
1230	}
1231
1232	idx = (int)addr;
1233	i = 0;
1234	STAILQ_FOREACH(ioapic, &ioapic_list, io_next) {
1235		if (idx == i) {
1236			db_show_ioapic_one(ioapic->io_addr);
1237			break;
1238		}
1239		i++;
1240	}
1241}
1242
1243DB_SHOW_ALL_COMMAND(ioapics, db_show_all_ioapics)
1244{
1245	struct ioapic *ioapic;
1246
1247	STAILQ_FOREACH(ioapic, &ioapic_list, io_next)
1248		db_show_ioapic_one(ioapic->io_addr);
1249}
1250#endif
1251