1/*	$NetBSD: intr.c,v 1.168 2024/04/22 22:29:28 andvar Exp $	*/
2
3/*
4 * Copyright (c) 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran, and by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Copyright 2002 (c) Wasabi Systems, Inc.
34 * All rights reserved.
35 *
36 * Written by Frank van der Linden for Wasabi Systems, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 *    notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 *    notice, this list of conditions and the following disclaimer in the
45 *    documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 *    must display the following acknowledgement:
48 *      This product includes software developed for the NetBSD Project by
49 *      Wasabi Systems, Inc.
50 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
51 *    or promote products derived from this software without specific prior
52 *    written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
56 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
57 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
58 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
59 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
60 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
61 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
62 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
63 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64 * POSSIBILITY OF SUCH DAMAGE.
65 */
66
67/*-
68 * Copyright (c) 1991 The Regents of the University of California.
69 * All rights reserved.
70 *
71 * This code is derived from software contributed to Berkeley by
72 * William Jolitz.
73 *
74 * Redistribution and use in source and binary forms, with or without
75 * modification, are permitted provided that the following conditions
76 * are met:
77 * 1. Redistributions of source code must retain the above copyright
78 *    notice, this list of conditions and the following disclaimer.
79 * 2. Redistributions in binary form must reproduce the above copyright
80 *    notice, this list of conditions and the following disclaimer in the
81 *    documentation and/or other materials provided with the distribution.
82 * 3. Neither the name of the University nor the names of its contributors
83 *    may be used to endorse or promote products derived from this software
84 *    without specific prior written permission.
85 *
86 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
87 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
88 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
89 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
90 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
91 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
92 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
93 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
94 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
95 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
96 * SUCH DAMAGE.
97 *
98 *	@(#)isa.c	7.2 (Berkeley) 5/13/91
99 */
100
101/*-
102 * Copyright (c) 1993, 1994 Charles Hannum.
103 *
104 * Redistribution and use in source and binary forms, with or without
105 * modification, are permitted provided that the following conditions
106 * are met:
107 * 1. Redistributions of source code must retain the above copyright
108 *    notice, this list of conditions and the following disclaimer.
109 * 2. Redistributions in binary form must reproduce the above copyright
110 *    notice, this list of conditions and the following disclaimer in the
111 *    documentation and/or other materials provided with the distribution.
112 * 3. All advertising materials mentioning features or use of this software
113 *    must display the following acknowledgement:
114 *	This product includes software developed by the University of
115 *	California, Berkeley and its contributors.
116 * 4. Neither the name of the University nor the names of its contributors
117 *    may be used to endorse or promote products derived from this software
118 *    without specific prior written permission.
119 *
120 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
121 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
122 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
123 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
124 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
125 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
126 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
127 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
128 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
129 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
130 * SUCH DAMAGE.
131 *
132 *	@(#)isa.c	7.2 (Berkeley) 5/13/91
133 */
134
135#include <sys/cdefs.h>
136__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.168 2024/04/22 22:29:28 andvar Exp $");
137
138#include "opt_acpi.h"
139#include "opt_intrdebug.h"
140#include "opt_multiprocessor.h"
141#include "opt_pci.h"
142
143#include <sys/param.h>
144#include <sys/systm.h>
145#include <sys/kernel.h>
146#include <sys/syslog.h>
147#include <sys/device.h>
148#include <sys/kmem.h>
149#include <sys/proc.h>
150#include <sys/errno.h>
151#include <sys/intr.h>
152#include <sys/cpu.h>
153#include <sys/xcall.h>
154#include <sys/interrupt.h>
155#include <sys/reboot.h> /* for AB_VERBOSE */
156#include <sys/sdt.h>
157
158#include <sys/kauth.h>
159#include <sys/conf.h>
160
161#include <uvm/uvm_extern.h>
162
163#include <machine/i8259.h>
164#include <machine/pio.h>
165
166#include <x86/intr_private.h>
167
168#include "ioapic.h"
169#include "lapic.h"
170#include "pci.h"
171#include "acpica.h"
172#ifndef XENPV
173#include "hyperv.h"
174#if NHYPERV > 0
175#include <dev/hyperv/hypervvar.h>
176
177extern void Xresume_hyperv_hypercall(void);
178extern void Xrecurse_hyperv_hypercall(void);
179#endif
180#endif
181
182#if NIOAPIC > 0 || NACPICA > 0
183#include <machine/i82093var.h>
184#include <machine/mpbiosvar.h>
185#include <machine/mpacpi.h>
186#endif
187
188#if NLAPIC > 0
189#include <machine/i82489var.h>
190#endif
191
192#if NPCI > 0
193#include <dev/pci/ppbreg.h>
194#endif
195
196#include <x86/pci/msipic.h>
197#include <x86/pci/pci_msi_machdep.h>
198
199#if NPCI == 0 || !defined(__HAVE_PCI_MSI_MSIX)
200#define msipic_is_msi_pic(PIC)	(false)
201#endif
202
203#include <ddb/db_active.h>
204
205#ifdef DDB
206#include <ddb/db_output.h>
207#endif
208
209#ifdef INTRDEBUG
210#define DPRINTF(msg) printf msg
211#else
212#define DPRINTF(msg)
213#endif
214
215static SIMPLEQ_HEAD(, intrsource) io_interrupt_sources =
216	SIMPLEQ_HEAD_INITIALIZER(io_interrupt_sources);
217
218static kmutex_t intr_distribute_lock;
219
220static int intr_allocate_slot_cpu(struct cpu_info *, struct pic *, int, int *,
221				  struct intrsource *);
222static int __noinline intr_allocate_slot(struct pic *, int, int,
223					 struct cpu_info **, int *, int *,
224					 struct intrsource *);
225
226static void intr_source_free(struct cpu_info *, int, struct pic *, int);
227
228static void intr_establish_xcall(void *, void *);
229static void intr_disestablish_xcall(void *, void *);
230
231static const char *legacy_intr_string(int, char *, size_t, struct pic *);
232
233static const char *xen_intr_string(int, char *, size_t, struct pic *);
234
235#if defined(INTRSTACKSIZE)
236static inline bool redzone_const_or_false(bool);
237static inline int redzone_const_or_zero(int);
238#endif
239
240static void intr_redistribute_xc_t(void *, void *);
241static void intr_redistribute_xc_s1(void *, void *);
242static void intr_redistribute_xc_s2(void *, void *);
243static bool intr_redistribute(struct cpu_info *);
244static struct intrsource *intr_get_io_intrsource(const char *);
245static void intr_free_io_intrsource_direct(struct intrsource *);
246static int intr_num_handlers(struct intrsource *);
247static int intr_find_unused_slot(struct cpu_info *, int *);
248static void intr_activate_xcall(void *, void *);
249static void intr_deactivate_xcall(void *, void *);
250static void intr_get_affinity(struct intrsource *, kcpuset_t *);
251static int intr_set_affinity(struct intrsource *, const kcpuset_t *);
252
253SDT_PROBE_DEFINE3(sdt, kernel, intr, entry,
254    "int (*)(void *)"/*func*/,
255    "void *"/*arg*/,
256    "struct intrhand *"/*ih*/);
257SDT_PROBE_DEFINE4(sdt, kernel, intr, return,
258    "int (*)(void *)"/*func*/,
259    "void *"/*arg*/,
260    "struct intrhand *"/*ih*/,
261    "int"/*handled*/);
262
263/*
264 * Fill in default interrupt table (in case of spurious interrupt
265 * during configuration of kernel), setup interrupt control unit
266 */
267void
268intr_default_setup(void)
269{
270	struct idt_vec *iv = &(cpu_info_primary.ci_idtvec);
271	int i;
272
273	/* icu vectors */
274	for (i = 0; i < NUM_LEGACY_IRQS; i++) {
275		idt_vec_reserve(iv, ICU_OFFSET + i);
276		idt_vec_set(iv, ICU_OFFSET + i, legacy_stubs[i].ist_entry);
277	}
278
279	/*
280	 * Eventually might want to check if it's actually there.
281	 */
282	i8259_default_setup();
283
284	mutex_init(&intr_distribute_lock, MUTEX_DEFAULT, IPL_NONE);
285}
286
287/*
288 * Handle a NMI, possibly a machine check.
289 * return true to panic system, false to ignore.
290 */
291void
292x86_nmi(void)
293{
294
295	log(LOG_CRIT, "NMI port 61 %x, port 70 %x\n", inb(0x61), inb(0x70));
296}
297
298/*
299 * Create an interrupt id such as "ioapic0 pin 9". This interrupt id is used
300 * by MI code and intrctl(8).
301 */
302const char *
303intr_create_intrid(int legacy_irq, struct pic *pic, int pin, char *buf,
304    size_t len)
305{
306	int ih = 0;
307
308#if NPCI > 0
309#if defined(__HAVE_PCI_MSI_MSIX)
310	if ((pic->pic_type == PIC_MSI) || (pic->pic_type == PIC_MSIX)) {
311		uint64_t pih;
312		int dev, vec;
313
314		dev = msipic_get_devid(pic);
315		vec = pin;
316		pih = __SHIFTIN((uint64_t)dev, MSI_INT_DEV_MASK)
317			| __SHIFTIN((uint64_t)vec, MSI_INT_VEC_MASK)
318			| APIC_INT_VIA_MSI;
319		if (pic->pic_type == PIC_MSI)
320			MSI_INT_MAKE_MSI(pih);
321		else if (pic->pic_type == PIC_MSIX)
322			MSI_INT_MAKE_MSIX(pih);
323
324		return x86_pci_msi_string(NULL, pih, buf, len);
325	}
326#endif /* __HAVE_PCI_MSI_MSIX */
327#endif
328
329	if (pic->pic_type == PIC_XEN) {
330		ih = pin;	/* Port == pin */
331		return xen_intr_string(pin, buf, len, pic);
332	}
333
334	/*
335	 * If the device is pci, "legacy_irq" is always -1. Least 8 bit of "ih"
336	 * is only used in intr_string() to show the irq number.
337	 * If the device is "legacy"(such as floppy), it should not use
338	 * intr_string().
339	 */
340	if (pic->pic_type == PIC_I8259) {
341		ih = legacy_irq;
342		return legacy_intr_string(ih, buf, len, pic);
343	}
344
345#if NIOAPIC > 0 || NACPICA > 0
346	ih = ((pic->pic_apicid << APIC_INT_APIC_SHIFT) & APIC_INT_APIC_MASK)
347	    | ((pin << APIC_INT_PIN_SHIFT) & APIC_INT_PIN_MASK);
348	if (pic->pic_type == PIC_IOAPIC) {
349		ih |= APIC_INT_VIA_APIC;
350	}
351	ih |= pin;
352	return intr_string(ih, buf, len);
353#endif
354
355	return NULL; /* No pic found! */
356}
357
358/*
359 * Find intrsource from io_interrupt_sources list.
360 */
361static struct intrsource *
362intr_get_io_intrsource(const char *intrid)
363{
364	struct intrsource *isp;
365
366	KASSERT(mutex_owned(&cpu_lock));
367
368	SIMPLEQ_FOREACH(isp, &io_interrupt_sources, is_list) {
369		KASSERT(isp->is_intrid != NULL);
370		if (strncmp(intrid, isp->is_intrid, INTRIDBUF - 1) == 0)
371			return isp;
372	}
373	return NULL;
374}
375
376/*
377 * Allocate intrsource and add to io_interrupt_sources list.
378 */
379struct intrsource *
380intr_allocate_io_intrsource(const char *intrid)
381{
382	CPU_INFO_ITERATOR cii;
383	struct cpu_info *ci;
384	struct intrsource *isp;
385	struct percpu_evcnt *pep;
386
387	KASSERT(mutex_owned(&cpu_lock));
388
389	if (intrid == NULL)
390		return NULL;
391
392	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
393	pep = kmem_zalloc(sizeof(*pep) * ncpu, KM_SLEEP);
394	isp->is_saved_evcnt = pep;
395	for (CPU_INFO_FOREACH(cii, ci)) {
396		pep->cpuid = ci->ci_cpuid;
397		pep++;
398	}
399	strlcpy(isp->is_intrid, intrid, sizeof(isp->is_intrid));
400
401	SIMPLEQ_INSERT_TAIL(&io_interrupt_sources, isp, is_list);
402
403	return isp;
404}
405
406/*
407 * Remove from io_interrupt_sources list and free by the intrsource pointer.
408 */
409static void
410intr_free_io_intrsource_direct(struct intrsource *isp)
411{
412	KASSERT(mutex_owned(&cpu_lock));
413
414	SIMPLEQ_REMOVE(&io_interrupt_sources, isp, intrsource, is_list);
415
416	/* Is this interrupt established? */
417	if (isp->is_evname[0] != '\0') {
418		evcnt_detach(&isp->is_evcnt);
419		isp->is_evname[0] = '\0';
420	}
421
422	kmem_free(isp->is_saved_evcnt,
423	    sizeof(*(isp->is_saved_evcnt)) * ncpu);
424
425	kmem_free(isp, sizeof(*isp));
426}
427
428/*
429 * Remove from io_interrupt_sources list and free by the interrupt id.
430 * This function can be used by MI code.
431 */
432void
433intr_free_io_intrsource(const char *intrid)
434{
435	struct intrsource *isp;
436
437	KASSERT(mutex_owned(&cpu_lock));
438
439	if (intrid == NULL)
440		return;
441
442	if ((isp = intr_get_io_intrsource(intrid)) == NULL) {
443		return;
444	}
445
446	/* If the interrupt uses shared IRQ, don't free yet. */
447	if (isp->is_handlers != NULL) {
448		return;
449	}
450
451	intr_free_io_intrsource_direct(isp);
452}
453
454static int
455intr_allocate_slot_cpu(struct cpu_info *ci, struct pic *pic, int pin,
456		       int *index, struct intrsource *chained)
457{
458	int slot, i;
459	struct intrsource *isp;
460
461	KASSERT(mutex_owned(&cpu_lock));
462
463	if (pic == &i8259_pic) {
464		KASSERT(CPU_IS_PRIMARY(ci));
465		slot = pin;
466	} else {
467		int start = 0;
468		int max = MAX_INTR_SOURCES;
469		slot = -1;
470
471		/* avoid reserved slots for legacy interrupts. */
472		if (CPU_IS_PRIMARY(ci) && msipic_is_msi_pic(pic))
473			start = NUM_LEGACY_IRQS;
474		/* don't step over Xen's slots */
475		if (vm_guest == VM_GUEST_XENPVH)
476			max = SIR_XENIPL_VM;
477		/*
478		 * intr_allocate_slot has checked for an existing mapping.
479		 * Now look for a free slot.
480		 */
481		for (i = start; i < max ; i++) {
482			if (ci->ci_isources[i] == NULL) {
483				slot = i;
484				break;
485			}
486		}
487		if (slot == -1) {
488			return EBUSY;
489		}
490	}
491
492	isp = ci->ci_isources[slot];
493	if (isp == NULL) {
494		const char *via;
495
496		isp = chained;
497		KASSERT(isp != NULL);
498		if (pic->pic_type == PIC_MSI || pic->pic_type == PIC_MSIX)
499			via = "vec";
500		else
501			via = "pin";
502		snprintf(isp->is_evname, sizeof (isp->is_evname),
503		    "%s %d", via, pin);
504		evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_INTR, NULL,
505		    pic->pic_name, isp->is_evname);
506		isp->is_active_cpu = ci->ci_cpuid;
507		ci->ci_isources[slot] = isp;
508	}
509
510	*index = slot;
511	return 0;
512}
513
514/*
515 * A simple round-robin allocator to assign interrupts to CPUs.
516 */
517static int __noinline
518intr_allocate_slot(struct pic *pic, int pin, int level,
519		   struct cpu_info **cip, int *index, int *idt_slot,
520		   struct intrsource *chained)
521{
522	CPU_INFO_ITERATOR cii;
523	struct cpu_info *ci, *lci;
524	struct intrsource *isp;
525	int slot = 0, idtvec, error;
526
527	KASSERT(mutex_owned(&cpu_lock));
528
529	/* First check if this pin is already used by an interrupt vector. */
530	for (CPU_INFO_FOREACH(cii, ci)) {
531		for (slot = 0 ; slot < MAX_INTR_SOURCES ; slot++) {
532			if ((isp = ci->ci_isources[slot]) == NULL) {
533				continue;
534			}
535			if (isp->is_pic == pic &&
536			    pin != -1 && isp->is_pin == pin) {
537				*idt_slot = isp->is_idtvec;
538				*index = slot;
539				*cip = ci;
540				return 0;
541			}
542		}
543	}
544
545	/*
546	 * The pic/pin combination doesn't have an existing mapping.
547	 * Find a slot for a new interrupt source.  For the i8259 case,
548	 * we always use reserved slots of the primary CPU.  Otherwise,
549	 * we make an attempt to balance the interrupt load.
550	 *
551	 * PIC and APIC usage are essentially exclusive, so the reservation
552	 * of the ISA slots is ignored when assigning IOAPIC slots.
553	 */
554	if (pic == &i8259_pic) {
555		/*
556		 * Must be directed to BP.
557		 */
558		ci = &cpu_info_primary;
559		error = intr_allocate_slot_cpu(ci, pic, pin, &slot, chained);
560	} else {
561		/*
562		 * Find least loaded AP/BP and try to allocate there.
563		 */
564		ci = NULL;
565		for (CPU_INFO_FOREACH(cii, lci)) {
566			if ((lci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
567				continue;
568			}
569#if 0
570			if (ci == NULL ||
571			    ci->ci_nintrhand > lci->ci_nintrhand) {
572				ci = lci;
573			}
574#else
575			ci = &cpu_info_primary;
576#endif
577		}
578		KASSERT(ci != NULL);
579		error = intr_allocate_slot_cpu(ci, pic, pin, &slot, chained);
580
581		/*
582		 * If that did not work, allocate anywhere.
583		 */
584		if (error != 0) {
585			for (CPU_INFO_FOREACH(cii, ci)) {
586				if ((ci->ci_schedstate.spc_flags &
587				    SPCF_NOINTR) != 0) {
588					continue;
589				}
590				error = intr_allocate_slot_cpu(ci, pic,
591				    pin, &slot, chained);
592				if (error == 0) {
593					break;
594				}
595			}
596		}
597	}
598	if (error != 0) {
599		return error;
600	}
601	KASSERT(ci != NULL);
602
603	/*
604	 * Now allocate an IDT vector.
605	 * For the 8259 these are reserved up front.
606	 */
607	if (pic == &i8259_pic) {
608		idtvec = ICU_OFFSET + pin;
609	} else {
610		/*
611		 * TODO to support MSI (not MSI-X) multiple vectors
612		 *
613		 * PCI Local Bus Specification Revision 3.0 says the devices
614		 * which use MSI multiple vectors increment the low order bits
615		 * of MSI message data.
616		 * On the other hand, Intel SDM "10.11.2 Message Data Register
617		 * Format" says the 7:0 bits of MSI message data mean Interrupt
618		 * Descriptor Table(IDT) vector.
619		 * As the result of these two documents, the IDT vectors which
620		 * are used by a device using MSI multiple vectors must be
621		 * continuous.
622		 */
623		struct idt_vec *iv;
624
625		iv = idt_vec_ref(&ci->ci_idtvec);
626		idtvec = idt_vec_alloc(iv, APIC_LEVEL(level), IDT_INTR_HIGH);
627	}
628	if (idtvec < 0) {
629		evcnt_detach(&ci->ci_isources[slot]->is_evcnt);
630		ci->ci_isources[slot]->is_evname[0] = '\0';
631		ci->ci_isources[slot] = NULL;
632		return EBUSY;
633	}
634	ci->ci_isources[slot]->is_idtvec = idtvec;
635	*idt_slot = idtvec;
636	*index = slot;
637	*cip = ci;
638	return 0;
639}
640
641static void
642intr_source_free(struct cpu_info *ci, int slot, struct pic *pic, int idtvec)
643{
644	struct intrsource *isp;
645	struct idt_vec *iv;
646
647	isp = ci->ci_isources[slot];
648	iv = idt_vec_ref(&ci->ci_idtvec);
649
650	if (isp->is_handlers != NULL)
651		return;
652	ci->ci_isources[slot] = NULL;
653	if (pic != &i8259_pic)
654		idt_vec_free(iv, idtvec);
655
656	isp->is_recurse = NULL;
657	isp->is_resume = NULL;
658}
659
660#ifdef MULTIPROCESSOR
661static int intr_biglock_wrapper(void *);
662
663/*
664 * intr_biglock_wrapper: grab biglock and call a real interrupt handler.
665 */
666
667static int
668intr_biglock_wrapper(void *vp)
669{
670	struct intrhand *ih = vp;
671	int locks;
672	int ret;
673
674	KERNEL_LOCK(1, NULL);
675
676	locks = curcpu()->ci_biglock_count;
677	SDT_PROBE3(sdt, kernel, intr, entry,
678	    ih->ih_realfun, ih->ih_realarg, ih);
679	ret = (*ih->ih_realfun)(ih->ih_realarg);
680	SDT_PROBE4(sdt, kernel, intr, return,
681	    ih->ih_realfun, ih->ih_realarg, ih, ret);
682	KASSERTMSG(locks == curcpu()->ci_biglock_count,
683	    "%s @ %p slipped locks %d -> %d",
684	    ih->ih_xname, ih->ih_realfun, locks, curcpu()->ci_biglock_count);
685
686	KERNEL_UNLOCK_ONE(NULL);
687
688	return ret;
689}
690#endif /* MULTIPROCESSOR */
691
692#ifdef KDTRACE_HOOKS
693static int
694intr_kdtrace_wrapper(void *vp)
695{
696	struct intrhand *ih = vp;
697	int ret;
698
699	SDT_PROBE3(sdt, kernel, intr, entry,
700	    ih->ih_realfun, ih->ih_realarg, ih);
701	ret = (*ih->ih_realfun)(ih->ih_realarg);
702	SDT_PROBE4(sdt, kernel, intr, return,
703	    ih->ih_realfun, ih->ih_realarg, ih, ret);
704
705	return ret;
706}
707#endif
708
709/*
710 * Append device name to intrsource. If device A and device B share IRQ number,
711 * the device name of the interrupt id is "device A, device B".
712 */
713static void
714intr_append_intrsource_xname(struct intrsource *isp, const char *xname)
715{
716
717	if (isp->is_xname[0] != '\0')
718		strlcat(isp->is_xname, ", ", sizeof(isp->is_xname));
719	strlcat(isp->is_xname, xname, sizeof(isp->is_xname));
720}
721
722/*
723 * Called on bound CPU to handle calling pic_hwunmask from contexts
724 * that are not already running on the bound CPU.
725 *
726 * => caller (on initiating CPU) holds cpu_lock on our behalf
727 * => arg1: struct intrhand *ih
728 */
729static void
730intr_hwunmask_xcall(void *arg1, void *arg2)
731{
732	struct intrhand * const ih = arg1;
733	struct cpu_info * const ci = ih->ih_cpu;
734
735	KASSERT(ci == curcpu() || !mp_online);
736
737	const u_long psl = x86_read_psl();
738	x86_disable_intr();
739
740	struct intrsource * const source = ci->ci_isources[ih->ih_slot];
741	struct pic * const pic = source->is_pic;
742
743	if (source->is_mask_count == 0) {
744		(*pic->pic_hwunmask)(pic, ih->ih_pin);
745	}
746
747	x86_write_psl(psl);
748}
749
750/*
751 * Handle per-CPU component of interrupt establish.
752 *
753 * => caller (on initiating CPU) holds cpu_lock on our behalf
754 * => arg1: struct intrhand *ih
755 * => arg2: int idt_vec
756 */
757static void
758intr_establish_xcall(void *arg1, void *arg2)
759{
760	struct idt_vec *iv;
761	struct intrsource *source;
762	struct intrstub *stubp;
763	struct intrhand *ih;
764	struct cpu_info *ci;
765	int idt_vec;
766	u_long psl;
767
768	ih = arg1;
769
770	KASSERT(ih->ih_cpu == curcpu() || !mp_online);
771
772	ci = ih->ih_cpu;
773	source = ci->ci_isources[ih->ih_slot];
774	idt_vec = (int)(intptr_t)arg2;
775	iv = idt_vec_ref(&ci->ci_idtvec);
776
777	/* Disable interrupts locally. */
778	psl = x86_read_psl();
779	x86_disable_intr();
780
781	/* Link in the handler and re-calculate masks. */
782	*(ih->ih_prevp) = ih;
783	x86_intr_calculatemasks(ci);
784
785	/* Hook in new IDT vector and SPL state. */
786	if (source->is_resume == NULL || source->is_idtvec != idt_vec) {
787		if (source->is_idtvec != 0 && source->is_idtvec != idt_vec)
788			idt_vec_free(iv, source->is_idtvec);
789		source->is_idtvec = idt_vec;
790		if (source->is_type == IST_LEVEL) {
791			stubp = &source->is_pic->pic_level_stubs[ih->ih_slot];
792		} else {
793			stubp = &source->is_pic->pic_edge_stubs[ih->ih_slot];
794		}
795		source->is_resume = stubp->ist_resume;
796		source->is_recurse = stubp->ist_recurse;
797		idt_vec_set(iv, idt_vec, stubp->ist_entry);
798	}
799
800	/* Re-enable interrupts locally. */
801	x86_write_psl(psl);
802}
803
804void *
805intr_establish_xname(int legacy_irq, struct pic *pic, int pin, int type,
806		     int level, int (*handler)(void *), void *arg,
807		     bool known_mpsafe, const char *xname)
808{
809	struct intrhand **p, *q, *ih;
810	struct cpu_info *ci;
811	int slot, error, idt_vec;
812	struct intrsource *chained, *source;
813#ifdef MULTIPROCESSOR
814	bool mpsafe = (known_mpsafe || level != IPL_VM);
815#endif /* MULTIPROCESSOR */
816	uint64_t where;
817	const char *intrstr;
818	char intrstr_buf[INTRIDBUF];
819
820	KASSERTMSG((legacy_irq == -1 || (0 <= legacy_irq && legacy_irq < 16)),
821	    "bad legacy IRQ value: %d", legacy_irq);
822	KASSERTMSG((legacy_irq != -1 || pic != &i8259_pic),
823	    "non-legacy IRQ on i8259");
824
825	ih = kmem_alloc(sizeof(*ih), KM_SLEEP);
826	intrstr = intr_create_intrid(legacy_irq, pic, pin, intrstr_buf,
827	    sizeof(intrstr_buf));
828	KASSERT(intrstr != NULL);
829
830	mutex_enter(&cpu_lock);
831
832	/* allocate intrsource pool, if not yet. */
833	chained = intr_get_io_intrsource(intrstr);
834	if (chained == NULL) {
835		if (msipic_is_msi_pic(pic)) {
836			mutex_exit(&cpu_lock);
837			kmem_free(ih, sizeof(*ih));
838			printf("%s: %s has no intrsource\n", __func__, intrstr);
839			return NULL;
840		}
841		chained = intr_allocate_io_intrsource(intrstr);
842		if (chained == NULL) {
843			mutex_exit(&cpu_lock);
844			kmem_free(ih, sizeof(*ih));
845			printf("%s: can't allocate io_intersource\n", __func__);
846			return NULL;
847		}
848	}
849
850	error = intr_allocate_slot(pic, pin, level, &ci, &slot, &idt_vec,
851	    chained);
852	if (error != 0) {
853		intr_free_io_intrsource_direct(chained);
854		mutex_exit(&cpu_lock);
855		kmem_free(ih, sizeof(*ih));
856		printf("failed to allocate interrupt slot for PIC %s pin %d\n",
857		    pic->pic_name, pin);
858		return NULL;
859	}
860
861	source = ci->ci_isources[slot];
862
863	if (source->is_handlers != NULL &&
864	    source->is_pic->pic_type != pic->pic_type) {
865		intr_free_io_intrsource_direct(chained);
866		mutex_exit(&cpu_lock);
867		kmem_free(ih, sizeof(*ih));
868		printf("%s: can't share intr source between "
869		       "different PIC types (legacy_irq %d pin %d slot %d)\n",
870		    __func__, legacy_irq, pin, slot);
871		return NULL;
872	}
873
874	source->is_pin = pin;
875	source->is_pic = pic;
876	intr_append_intrsource_xname(source, xname);
877	switch (source->is_type) {
878	case IST_NONE:
879		source->is_type = type;
880		break;
881	case IST_EDGE:
882	case IST_LEVEL:
883		if (source->is_type == type)
884			break;
885		/* FALLTHROUGH */
886	case IST_PULSE:
887		if (type != IST_NONE) {
888			int otype = source->is_type;
889
890			intr_source_free(ci, slot, pic, idt_vec);
891			intr_free_io_intrsource_direct(chained);
892			mutex_exit(&cpu_lock);
893			kmem_free(ih, sizeof(*ih));
894			printf("%s: pic %s pin %d: can't share "
895			       "type %d with %d\n",
896				__func__, pic->pic_name, pin,
897				otype, type);
898			return NULL;
899		}
900		break;
901	default:
902		panic("%s: bad intr type %d for pic %s pin %d\n",
903		    __func__, source->is_type, pic->pic_name, pin);
904		/* NOTREACHED */
905	}
906
907	/*
908	 * If the establishing interrupt uses shared IRQ, the interrupt uses
909	 * "ci->ci_isources[slot]" instead of allocated by the establishing
910	 * device's pci_intr_alloc() or this function.
911	 */
912	if (source->is_handlers != NULL) {
913		struct intrsource *isp, *nisp;
914
915		SIMPLEQ_FOREACH_SAFE(isp, &io_interrupt_sources,
916		    is_list, nisp) {
917			if (strncmp(intrstr, isp->is_intrid, INTRIDBUF - 1) == 0
918			    && isp->is_handlers == NULL)
919				intr_free_io_intrsource_direct(isp);
920		}
921	}
922
923	/*
924	 * We're now committed.  Mask the interrupt in hardware and
925	 * count it for load distribution.
926	 */
927	(*pic->pic_hwmask)(pic, pin);
928	(ci->ci_nintrhand)++;
929
930	/*
931	 * Figure out where to put the handler.
932	 * This is O(N^2), but we want to preserve the order, and N is
933	 * generally small.
934	 */
935	for (p = &ci->ci_isources[slot]->is_handlers;
936	     (q = *p) != NULL && q->ih_level > level;
937	     p = &q->ih_next) {
938		/* nothing */;
939	}
940
941	ih->ih_pic = pic;
942	ih->ih_fun = ih->ih_realfun = handler;
943	ih->ih_arg = ih->ih_realarg = arg;
944	ih->ih_prevp = p;
945	ih->ih_next = *p;
946	ih->ih_level = level;
947	ih->ih_pin = pin;
948	ih->ih_cpu = ci;
949	ih->ih_slot = slot;
950	strlcpy(ih->ih_xname, xname, sizeof(ih->ih_xname));
951#ifdef KDTRACE_HOOKS
952	/*
953	 * XXX i8254_clockintr is special -- takes a magic extra
954	 * argument.  This should be fixed properly in some way that
955	 * doesn't involve sketchy function pointer casts.  See also
956	 * the comments in x86/isa/clock.c.
957	 */
958	if (handler != __FPTRCAST(int (*)(void *), i8254_clockintr)) {
959		ih->ih_fun = intr_kdtrace_wrapper;
960		ih->ih_arg = ih;
961	}
962#endif
963#ifdef MULTIPROCESSOR
964	if (!mpsafe) {
965		KASSERT(handler !=			/* XXX */
966		    __FPTRCAST(int (*)(void *), i8254_clockintr));
967		ih->ih_fun = intr_biglock_wrapper;
968		ih->ih_arg = ih;
969	}
970#endif /* MULTIPROCESSOR */
971
972	/*
973	 * Call out to the remote CPU to update its interrupt state.
974	 * Only make RPCs if the APs are up and running.
975	 */
976	if (ci == curcpu() || !mp_online) {
977		intr_establish_xcall(ih, (void *)(intptr_t)idt_vec);
978	} else {
979		where = xc_unicast(0, intr_establish_xcall, ih,
980		    (void *)(intptr_t)idt_vec, ci);
981		xc_wait(where);
982	}
983
984	/* All set up, so add a route for the interrupt and unmask it. */
985	(*pic->pic_addroute)(pic, ci, pin, idt_vec, type);
986	if (ci == curcpu() || !mp_online) {
987		intr_hwunmask_xcall(ih, NULL);
988	} else {
989		where = xc_unicast(0, intr_hwunmask_xcall, ih, NULL, ci);
990		xc_wait(where);
991	}
992	mutex_exit(&cpu_lock);
993
994	if (bootverbose || cpu_index(ci) != 0)
995		aprint_verbose("allocated pic %s type %s pin %d level %d to "
996		    "%s slot %d idt entry %d\n",
997		    pic->pic_name, type == IST_EDGE ? "edge" : "level", pin,
998		    level, device_xname(ci->ci_dev), slot, idt_vec);
999
1000	return ih;
1001}
1002
1003void *
1004intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level,
1005	       int (*handler)(void *), void *arg, bool known_mpsafe)
1006{
1007
1008	return intr_establish_xname(legacy_irq, pic, pin, type,
1009	    level, handler, arg, known_mpsafe, "unknown");
1010}
1011
1012/*
1013 * Called on bound CPU to handle intr_mask() / intr_unmask().
1014 *
1015 * => caller (on initiating CPU) holds cpu_lock on our behalf
1016 * => arg1: struct intrhand *ih
1017 * => arg2: true -> mask, false -> unmask.
1018 */
1019static void
1020intr_mask_xcall(void *arg1, void *arg2)
1021{
1022	struct intrhand * const ih = arg1;
1023	const uintptr_t mask = (uintptr_t)arg2;
1024	struct cpu_info * const ci = ih->ih_cpu;
1025	bool force_pending = false;
1026
1027	KASSERT(ci == curcpu() || !mp_online);
1028
1029	/*
1030	 * We need to disable interrupts to hold off the interrupt
1031	 * vectors.
1032	 */
1033	const u_long psl = x86_read_psl();
1034	x86_disable_intr();
1035
1036	struct intrsource * const source = ci->ci_isources[ih->ih_slot];
1037	struct pic * const pic = source->is_pic;
1038
1039	if (mask) {
1040		source->is_mask_count++;
1041		KASSERT(source->is_mask_count != 0);
1042		if (source->is_mask_count == 1) {
1043			(*pic->pic_hwmask)(pic, ih->ih_pin);
1044		}
1045	} else {
1046		KASSERT(source->is_mask_count != 0);
1047		if (--source->is_mask_count == 0) {
1048			/*
1049			 * If this interrupt source is being moved, don't
1050			 * unmask it at the hw.
1051			 */
1052			if (! source->is_distribute_pending) {
1053				(*pic->pic_hwunmask)(pic, ih->ih_pin);
1054			}
1055
1056			/*
1057			 * For level-sensitive interrupts, the hardware
1058			 * will let us know.  For everything else, we
1059			 * need to explicitly handle interrupts that
1060			 * happened when the source was masked.
1061			 */
1062			const uint64_t bit = (1U << ih->ih_slot);
1063			if (ci->ci_imasked & bit) {
1064				ci->ci_imasked &= ~bit;
1065				if (source->is_type != IST_LEVEL) {
1066					ci->ci_ipending |= bit;
1067					force_pending = true;
1068				}
1069			}
1070		}
1071	}
1072
1073	/* Re-enable interrupts. */
1074	x86_write_psl(psl);
1075
1076	if (force_pending) {
1077		/* Force processing of any pending interrupts. */
1078		splx(splhigh());
1079	}
1080}
1081
1082static void
1083intr_mask_internal(struct intrhand * const ih, const bool mask)
1084{
1085
1086	/*
1087	 * Call out to the remote CPU to update its interrupt state.
1088	 * Only make RPCs if the APs are up and running.
1089	 */
1090	mutex_enter(&cpu_lock);
1091	struct cpu_info * const ci = ih->ih_cpu;
1092	void * const mask_arg = (void *)(uintptr_t)mask;
1093	if (ci == curcpu() || !mp_online) {
1094		intr_mask_xcall(ih, mask_arg);
1095	} else {
1096		const uint64_t where =
1097		    xc_unicast(0, intr_mask_xcall, ih, mask_arg, ci);
1098		xc_wait(where);
1099	}
1100	mutex_exit(&cpu_lock);
1101}
1102
1103void
1104intr_mask(struct intrhand *ih)
1105{
1106
1107	if (cpu_intr_p()) {
1108		/*
1109		 * Special case of calling intr_mask() from an interrupt
1110		 * handler: we MUST be called from the bound CPU for this
1111		 * interrupt (presumably from a handler we're about to
1112		 * mask).
1113		 *
1114		 * We can't take the cpu_lock in this case, and we must
1115		 * therefore be extra careful.
1116		 */
1117		KASSERT(ih->ih_cpu == curcpu() || !mp_online);
1118		intr_mask_xcall(ih, (void *)(uintptr_t)true);
1119		return;
1120	}
1121
1122	intr_mask_internal(ih, true);
1123}
1124
1125void
1126intr_unmask(struct intrhand *ih)
1127{
1128
1129	/*
1130	 * This is not safe to call from an interrupt context because
1131	 * we don't want to accidentally unmask an interrupt source
1132	 * that's masked because it's being serviced.
1133	 */
1134	KASSERT(!cpu_intr_p());
1135	intr_mask_internal(ih, false);
1136}
1137
1138/*
1139 * Called on bound CPU to handle intr_disestablish().
1140 *
1141 * => caller (on initiating CPU) holds cpu_lock on our behalf
1142 * => arg1: struct intrhand *ih
1143 * => arg2: unused
1144 */
1145static void
1146intr_disestablish_xcall(void *arg1, void *arg2)
1147{
1148	struct intrhand **p, *q;
1149	struct cpu_info *ci;
1150	struct pic *pic;
1151	struct intrsource *source;
1152	struct intrhand *ih;
1153	u_long psl;
1154	int idtvec;
1155
1156	ih = arg1;
1157	ci = ih->ih_cpu;
1158
1159	KASSERT(ci == curcpu() || !mp_online);
1160
1161	/* Disable interrupts locally. */
1162	psl = x86_read_psl();
1163	x86_disable_intr();
1164
1165	pic = ci->ci_isources[ih->ih_slot]->is_pic;
1166	source = ci->ci_isources[ih->ih_slot];
1167	idtvec = source->is_idtvec;
1168
1169	(*pic->pic_hwmask)(pic, ih->ih_pin);
1170
1171	/*
1172	 * ci_pending is stable on the current CPU while interrupts are
1173	 * blocked, and we only need to synchronize with interrupt
1174	 * vectors on the same CPU, so no need for atomics or membars.
1175	 */
1176	ci->ci_ipending &= ~(1ULL << ih->ih_slot);
1177
1178	/*
1179	 * Remove the handler from the chain.
1180	 */
1181	for (p = &source->is_handlers; (q = *p) != NULL && q != ih;
1182	     p = &q->ih_next)
1183		;
1184	if (q == NULL) {
1185		x86_write_psl(psl);
1186		panic("%s: handler not registered", __func__);
1187		/* NOTREACHED */
1188	}
1189
1190	*p = q->ih_next;
1191
1192	x86_intr_calculatemasks(ci);
1193	/*
1194	 * If there is no any handler, 1) do delroute because it has no
1195	 * any source and 2) dont' hwunmask to prevent spurious interrupt.
1196	 *
1197	 * If there is any handler, 1) don't delroute because it has source
1198	 * and 2) do hwunmask to be able to get interrupt again.
1199	 *
1200	 */
1201	if (source->is_handlers == NULL)
1202		(*pic->pic_delroute)(pic, ci, ih->ih_pin, idtvec,
1203		    source->is_type);
1204	else if (source->is_mask_count == 0)
1205		(*pic->pic_hwunmask)(pic, ih->ih_pin);
1206
1207	/* If the source is free we can drop it now. */
1208	intr_source_free(ci, ih->ih_slot, pic, idtvec);
1209
1210	/* Re-enable interrupts. */
1211	x86_write_psl(psl);
1212
1213	DPRINTF(("%s: remove slot %d (pic %s pin %d vec %d)\n",
1214	    device_xname(ci->ci_dev), ih->ih_slot, pic->pic_name,
1215	    ih->ih_pin, idtvec));
1216}
1217
1218static int
1219intr_num_handlers(struct intrsource *isp)
1220{
1221	struct intrhand *ih;
1222	int num;
1223
1224	num = 0;
1225	for (ih = isp->is_handlers; ih != NULL; ih = ih->ih_next)
1226		num++;
1227
1228	return num;
1229}
1230
1231/*
1232 * Deregister an interrupt handler.
1233 */
1234void
1235intr_disestablish(struct intrhand *ih)
1236{
1237	struct cpu_info *ci;
1238	struct intrsource *isp;
1239	uint64_t where;
1240
1241	/*
1242	 * Count the removal for load balancing.
1243	 * Call out to the remote CPU to update its interrupt state.
1244	 * Only make RPCs if the APs are up and running.
1245	 */
1246	mutex_enter(&cpu_lock);
1247	ci = ih->ih_cpu;
1248	(ci->ci_nintrhand)--;
1249	KASSERT(ci->ci_nintrhand >= 0);
1250	isp = ci->ci_isources[ih->ih_slot];
1251	if (ci == curcpu() || !mp_online) {
1252		intr_disestablish_xcall(ih, NULL);
1253	} else {
1254		where = xc_unicast(0, intr_disestablish_xcall, ih, NULL, ci);
1255		xc_wait(where);
1256	}
1257	if (!msipic_is_msi_pic(isp->is_pic) && intr_num_handlers(isp) < 1) {
1258		intr_free_io_intrsource_direct(isp);
1259	}
1260	mutex_exit(&cpu_lock);
1261	kmem_free(ih, sizeof(*ih));
1262}
1263
1264static const char *
1265xen_intr_string(int port, char *buf, size_t len, struct pic *pic)
1266{
1267	KASSERT(pic->pic_type == PIC_XEN);
1268
1269	KASSERT(port >= 0);
1270
1271	snprintf(buf, len, "%s chan %d", pic->pic_name, port);
1272
1273	return buf;
1274}
1275
1276static const char *
1277legacy_intr_string(int ih, char *buf, size_t len, struct pic *pic)
1278{
1279	int legacy_irq;
1280
1281	KASSERT(pic->pic_type == PIC_I8259);
1282#if NLAPIC > 0
1283	KASSERT(APIC_IRQ_ISLEGACY(ih));
1284
1285	legacy_irq = APIC_IRQ_LEGACY_IRQ(ih);
1286#else
1287	legacy_irq = ih;
1288#endif
1289	KASSERT(legacy_irq >= 0 && legacy_irq < 16);
1290
1291	snprintf(buf, len, "%s pin %d", pic->pic_name, legacy_irq);
1292
1293	return buf;
1294}
1295
1296const char *
1297intr_string(intr_handle_t ih, char *buf, size_t len)
1298{
1299#if NIOAPIC > 0
1300	struct ioapic_softc *pic;
1301#endif
1302
1303	if (ih == 0)
1304		panic("%s: bogus handle 0x%" PRIx64, __func__, ih);
1305
1306#if NIOAPIC > 0
1307	if (ih & APIC_INT_VIA_APIC) {
1308		pic = ioapic_find(APIC_IRQ_APIC(ih));
1309		if (pic != NULL) {
1310			snprintf(buf, len, "%s pin %d",
1311			    device_xname(pic->sc_dev), APIC_IRQ_PIN(ih));
1312		} else {
1313			snprintf(buf, len,
1314			    "apic %d int %d (irq %d)",
1315			    APIC_IRQ_APIC(ih),
1316			    APIC_IRQ_PIN(ih),
1317			    APIC_IRQ_LEGACY_IRQ(ih));
1318		}
1319	} else
1320		snprintf(buf, len, "irq %d", APIC_IRQ_LEGACY_IRQ(ih));
1321
1322#elif NLAPIC > 0
1323	snprintf(buf, len, "irq %d", APIC_IRQ_LEGACY_IRQ(ih));
1324#else
1325	snprintf(buf, len, "irq %d", (int) ih);
1326#endif
1327	return buf;
1328
1329}
1330
1331/*
1332 * Fake interrupt handler structures for the benefit of symmetry with
1333 * other interrupt sources, and the benefit of x86_intr_calculatemasks()
1334 */
1335struct intrhand fake_timer_intrhand;
1336struct intrhand fake_ipi_intrhand;
1337#if NHYPERV > 0
1338struct intrhand fake_hyperv_intrhand;
1339#endif
1340
1341#if NLAPIC > 0 && defined(MULTIPROCESSOR)
1342static const char *x86_ipi_names[X86_NIPI] = X86_IPI_NAMES;
1343#endif
1344
1345#if defined(INTRSTACKSIZE)
1346static inline bool
1347redzone_const_or_false(bool x)
1348{
1349#ifdef DIAGNOSTIC
1350	return x;
1351#else
1352	return false;
1353#endif /* !DIAGNOSTIC */
1354}
1355
1356static inline int
1357redzone_const_or_zero(int x)
1358{
1359	return redzone_const_or_false(true) ? x : 0;
1360}
1361#endif
1362
1363/*
1364 * Initialize all handlers that aren't dynamically allocated, and exist
1365 * for each CPU.
1366 */
1367void
1368cpu_intr_init(struct cpu_info *ci)
1369{
1370#if NLAPIC > 0
1371	struct intrsource *isp;
1372	static int first = 1;
1373#if defined(MULTIPROCESSOR)
1374	int i;
1375#endif
1376
1377	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
1378	isp->is_recurse = Xrecurse_lapic_ltimer;
1379	isp->is_resume = Xresume_lapic_ltimer;
1380	fake_timer_intrhand.ih_pic = &local_pic;
1381	fake_timer_intrhand.ih_level = IPL_CLOCK;
1382	isp->is_handlers = &fake_timer_intrhand;
1383	isp->is_pic = &local_pic;
1384	ci->ci_isources[LIR_TIMER] = isp;
1385	evcnt_attach_dynamic(&isp->is_evcnt,
1386	    first ? EVCNT_TYPE_INTR : EVCNT_TYPE_MISC, NULL,
1387	    device_xname(ci->ci_dev), "timer");
1388	first = 0;
1389
1390#ifdef MULTIPROCESSOR
1391	isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
1392	isp->is_recurse = Xrecurse_lapic_ipi;
1393	isp->is_resume = Xresume_lapic_ipi;
1394	fake_ipi_intrhand.ih_pic = &local_pic;
1395	fake_ipi_intrhand.ih_level = IPL_HIGH;
1396	isp->is_handlers = &fake_ipi_intrhand;
1397	isp->is_pic = &local_pic;
1398	ci->ci_isources[LIR_IPI] = isp;
1399
1400	for (i = 0; i < X86_NIPI; i++)
1401		evcnt_attach_dynamic(&ci->ci_ipi_events[i], EVCNT_TYPE_MISC,
1402		    NULL, device_xname(ci->ci_dev), x86_ipi_names[i]);
1403#endif /* MULTIPROCESSOR */
1404
1405#if NHYPERV > 0
1406	if (hyperv_hypercall_enabled()) {
1407		isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
1408		isp->is_recurse = Xrecurse_hyperv_hypercall;
1409		isp->is_resume = Xresume_hyperv_hypercall;
1410		fake_hyperv_intrhand.ih_level = IPL_NET;
1411		isp->is_handlers = &fake_hyperv_intrhand;
1412		isp->is_pic = &local_pic;
1413		ci->ci_isources[LIR_HV] = isp;
1414		evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_INTR, NULL,
1415		    device_xname(ci->ci_dev), "Hyper-V hypercall");
1416	}
1417#endif /* NHYPERV > 0 */
1418#endif /* NLAPIC > 0 */
1419
1420#if defined(__HAVE_PREEMPTION)
1421	x86_init_preempt(ci);
1422
1423#endif
1424	x86_intr_calculatemasks(ci);
1425
1426#if defined(INTRSTACKSIZE)
1427	vaddr_t istack;
1428
1429	/*
1430	 * If the red zone is activated, protect both the top and
1431	 * the bottom of the stack with an unmapped page.
1432	 */
1433	istack = uvm_km_alloc(kernel_map,
1434	    INTRSTACKSIZE + redzone_const_or_zero(2 * PAGE_SIZE), 0,
1435	    UVM_KMF_WIRED | UVM_KMF_ZERO);
1436	if (redzone_const_or_false(true)) {
1437		pmap_kremove(istack, PAGE_SIZE);
1438		pmap_kremove(istack + INTRSTACKSIZE + PAGE_SIZE, PAGE_SIZE);
1439		pmap_update(pmap_kernel());
1440	}
1441
1442	/*
1443	 * 33 used to be 1.  Arbitrarily reserve 32 more register_t's
1444	 * of space for ddb(4) to examine some subroutine arguments
1445	 * and to hunt for the next stack frame.
1446	 */
1447	ci->ci_intrstack = (char *)istack + redzone_const_or_zero(PAGE_SIZE) +
1448	    INTRSTACKSIZE - 33 * sizeof(register_t);
1449#endif
1450
1451	ci->ci_idepth = -1;
1452}
1453
1454#if defined(INTRDEBUG) || defined(DDB)
1455
1456void
1457intr_printconfig(void)
1458{
1459	int i;
1460	struct intrhand *ih;
1461	struct intrsource *isp;
1462	struct cpu_info *ci;
1463	CPU_INFO_ITERATOR cii;
1464	void (*pr)(const char *, ...);
1465
1466	pr = printf;
1467#ifdef DDB
1468	if (db_active) {
1469		pr = db_printf;
1470	}
1471#endif
1472
1473	for (CPU_INFO_FOREACH(cii, ci)) {
1474		(*pr)("%s: interrupt masks:\n", device_xname(ci->ci_dev));
1475		for (i = 0; i < NIPL; i++)
1476			(*pr)("IPL %d mask %016"PRIx64" unmask %016"PRIx64"\n",
1477			    i, ci->ci_imask[i], ci->ci_iunmask[i]);
1478		for (i = 0; i < MAX_INTR_SOURCES; i++) {
1479			isp = ci->ci_isources[i];
1480			if (isp == NULL)
1481				continue;
1482			(*pr)("%s source %d is pin %d from pic %s type %d "
1483			    "maxlevel %d\n", device_xname(ci->ci_dev), i,
1484			    isp->is_pin, isp->is_pic->pic_name, isp->is_type,
1485			    isp->is_maxlevel);
1486			for (ih = isp->is_handlers; ih != NULL;
1487			     ih = ih->ih_next)
1488				(*pr)("\thandler %p level %d\n",
1489				    ih->ih_fun, ih->ih_level);
1490#if NIOAPIC > 0
1491			if (isp->is_pic->pic_type == PIC_IOAPIC) {
1492				struct ioapic_softc *sc;
1493				sc = isp->is_pic->pic_ioapic;
1494				(*pr)("\tioapic redir 0x%x\n",
1495				    sc->sc_pins[isp->is_pin].ip_map->redir);
1496			}
1497#endif
1498
1499		}
1500	}
1501}
1502
1503#endif
1504
1505/*
1506 * Save current affinitied cpu's interrupt count.
1507 */
1508static void
1509intr_save_evcnt(struct intrsource *source, cpuid_t cpuid)
1510{
1511	struct percpu_evcnt *pep;
1512	uint64_t curcnt;
1513	int i;
1514
1515	curcnt = source->is_evcnt.ev_count;
1516	pep = source->is_saved_evcnt;
1517
1518	for (i = 0; i < ncpu; i++) {
1519		if (pep[i].cpuid == cpuid) {
1520			pep[i].count = curcnt;
1521			break;
1522		}
1523	}
1524}
1525
1526/*
1527 * Restore current affinitied cpu's interrupt count.
1528 */
1529static void
1530intr_restore_evcnt(struct intrsource *source, cpuid_t cpuid)
1531{
1532	struct percpu_evcnt *pep;
1533	int i;
1534
1535	pep = source->is_saved_evcnt;
1536
1537	for (i = 0; i < ncpu; i++) {
1538		if (pep[i].cpuid == cpuid) {
1539			source->is_evcnt.ev_count = pep[i].count;
1540			break;
1541		}
1542	}
1543}
1544
1545static void
1546intr_redistribute_xc_t(void *arg1, void *arg2)
1547{
1548	struct cpu_info *ci;
1549	struct intrsource *isp;
1550	int slot;
1551	u_long psl;
1552
1553	ci = curcpu();
1554	isp = arg1;
1555	slot = (int)(intptr_t)arg2;
1556
1557	/* Disable interrupts locally. */
1558	psl = x86_read_psl();
1559	x86_disable_intr();
1560
1561	/* Hook it in and re-calculate masks. */
1562	ci->ci_isources[slot] = isp;
1563	x86_intr_calculatemasks(curcpu());
1564
1565	/* Re-enable interrupts locally. */
1566	x86_write_psl(psl);
1567}
1568
1569static void
1570intr_redistribute_xc_s1(void *arg1, void *arg2)
1571{
1572	struct pic *pic;
1573	struct intrsource *isp;
1574	struct cpu_info *nci;
1575	u_long psl;
1576
1577	isp = arg1;
1578	nci = arg2;
1579
1580	/*
1581	 * Disable interrupts on-chip and mask the pin.  Back out
1582	 * and let the interrupt be processed if one is pending.
1583	 */
1584	pic = isp->is_pic;
1585	for (;;) {
1586		psl = x86_read_psl();
1587		x86_disable_intr();
1588		if ((*pic->pic_trymask)(pic, isp->is_pin)) {
1589			break;
1590		}
1591		x86_write_psl(psl);
1592		DELAY(1000);
1593	}
1594
1595	/* pic_addroute will unmask the interrupt. */
1596	(*pic->pic_addroute)(pic, nci, isp->is_pin, isp->is_idtvec,
1597	    isp->is_type);
1598	x86_write_psl(psl);
1599}
1600
1601static void
1602intr_redistribute_xc_s2(void *arg1, void *arg2)
1603{
1604	struct cpu_info *ci;
1605	u_long psl;
1606	int slot;
1607
1608	ci = curcpu();
1609	slot = (int)(uintptr_t)arg1;
1610
1611	/* Disable interrupts locally. */
1612	psl = x86_read_psl();
1613	x86_disable_intr();
1614
1615	/* Patch out the source and re-calculate masks. */
1616	ci->ci_isources[slot] = NULL;
1617	x86_intr_calculatemasks(ci);
1618
1619	/* Re-enable interrupts locally. */
1620	x86_write_psl(psl);
1621}
1622
1623static bool
1624intr_redistribute(struct cpu_info *oci)
1625{
1626	struct intrsource *isp;
1627	struct intrhand *ih;
1628	CPU_INFO_ITERATOR cii;
1629	struct cpu_info *nci, *ici;
1630	int oslot, nslot;
1631	uint64_t where;
1632
1633	KASSERT(mutex_owned(&cpu_lock));
1634
1635	/* Look for an interrupt source that we can migrate. */
1636	for (oslot = 0; oslot < MAX_INTR_SOURCES; oslot++) {
1637		if ((isp = oci->ci_isources[oslot]) == NULL) {
1638			continue;
1639		}
1640		if (isp->is_pic->pic_type == PIC_IOAPIC) {
1641			break;
1642		}
1643	}
1644	if (oslot == MAX_INTR_SOURCES) {
1645		return false;
1646	}
1647
1648	/* Find least loaded CPU and try to move there. */
1649	nci = NULL;
1650	for (CPU_INFO_FOREACH(cii, ici)) {
1651		if ((ici->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
1652			continue;
1653		}
1654		KASSERT(ici != oci);
1655		if (nci == NULL || nci->ci_nintrhand > ici->ci_nintrhand) {
1656			nci = ici;
1657		}
1658	}
1659	if (nci == NULL) {
1660		return false;
1661	}
1662	for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) {
1663		if (nci->ci_isources[nslot] == NULL) {
1664			break;
1665		}
1666	}
1667
1668	/* If that did not work, allocate anywhere. */
1669	if (nslot == MAX_INTR_SOURCES) {
1670		for (CPU_INFO_FOREACH(cii, nci)) {
1671			if ((nci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
1672				continue;
1673			}
1674			KASSERT(nci != oci);
1675			for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) {
1676				if (nci->ci_isources[nslot] == NULL) {
1677					break;
1678				}
1679			}
1680			if (nslot != MAX_INTR_SOURCES) {
1681				break;
1682			}
1683		}
1684	}
1685	if (nslot == MAX_INTR_SOURCES) {
1686		return false;
1687	}
1688
1689	/*
1690	 * Now we have new CPU and new slot.  Run a cross-call to set up
1691	 * the new vector on the target CPU.
1692	 */
1693	where = xc_unicast(0, intr_redistribute_xc_t, isp,
1694	    (void *)(intptr_t)nslot, nci);
1695	xc_wait(where);
1696
1697	/*
1698	 * We're ready to go on the target CPU.  Run a cross call to
1699	 * reroute the interrupt away from the source CPU.
1700	 */
1701	where = xc_unicast(0, intr_redistribute_xc_s1, isp, nci, oci);
1702	xc_wait(where);
1703
1704	/* Sleep for (at least) 10ms to allow the change to take hold. */
1705	(void)kpause("intrdist", false, mstohz(10), NULL);
1706
1707	/* Complete removal from the source CPU. */
1708	where = xc_unicast(0, intr_redistribute_xc_s2,
1709	    (void *)(uintptr_t)oslot, NULL, oci);
1710	xc_wait(where);
1711
1712	/* Finally, take care of book-keeping. */
1713	for (ih = isp->is_handlers; ih != NULL; ih = ih->ih_next) {
1714		oci->ci_nintrhand--;
1715		nci->ci_nintrhand++;
1716		ih->ih_cpu = nci;
1717	}
1718	intr_save_evcnt(isp, oci->ci_cpuid);
1719	intr_restore_evcnt(isp, nci->ci_cpuid);
1720	isp->is_active_cpu = nci->ci_cpuid;
1721
1722	return true;
1723}
1724
1725void
1726cpu_intr_redistribute(void)
1727{
1728	CPU_INFO_ITERATOR cii;
1729	struct cpu_info *ci;
1730
1731	KASSERT(mutex_owned(&cpu_lock));
1732	KASSERT(mp_online);
1733
1734	/* Direct interrupts away from shielded CPUs. */
1735	for (CPU_INFO_FOREACH(cii, ci)) {
1736		if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0) {
1737			continue;
1738		}
1739		while (intr_redistribute(ci)) {
1740			/* nothing */
1741		}
1742	}
1743
1744	/* XXX should now re-balance */
1745}
1746
1747u_int
1748cpu_intr_count(struct cpu_info *ci)
1749{
1750
1751	KASSERT(ci->ci_nintrhand >= 0);
1752
1753	return ci->ci_nintrhand;
1754}
1755
1756static int
1757intr_find_unused_slot(struct cpu_info *ci, int *index)
1758{
1759	int slot, i;
1760
1761	KASSERT(mutex_owned(&cpu_lock));
1762
1763	slot = -1;
1764	for (i = 0; i < MAX_INTR_SOURCES ; i++) {
1765		if (ci->ci_isources[i] == NULL) {
1766			slot = i;
1767			break;
1768		}
1769	}
1770	if (slot == -1) {
1771		DPRINTF(("cannot allocate ci_isources\n"));
1772		return EBUSY;
1773	}
1774
1775	*index = slot;
1776	return 0;
1777}
1778
1779/*
1780 * Let cpu_info ready to accept the interrupt.
1781 */
1782static void
1783intr_activate_xcall(void *arg1, void *arg2)
1784{
1785	struct cpu_info *ci;
1786	struct intrsource *source;
1787	struct intrstub *stubp;
1788	struct intrhand *ih;
1789	struct idt_vec *iv;
1790	u_long psl;
1791	int idt_vec;
1792	int slot;
1793
1794	ih = arg1;
1795
1796	kpreempt_disable();
1797
1798	KASSERT(ih->ih_cpu == curcpu() || !mp_online);
1799
1800	ci = ih->ih_cpu;
1801	slot = ih->ih_slot;
1802	source = ci->ci_isources[slot];
1803	idt_vec = source->is_idtvec;
1804	iv = idt_vec_ref(&ci->ci_idtvec);
1805
1806	psl = x86_read_psl();
1807	x86_disable_intr();
1808
1809	x86_intr_calculatemasks(ci);
1810
1811	if (source->is_type == IST_LEVEL) {
1812		stubp = &source->is_pic->pic_level_stubs[slot];
1813	} else {
1814		stubp = &source->is_pic->pic_edge_stubs[slot];
1815	}
1816
1817	source->is_resume = stubp->ist_resume;
1818	source->is_recurse = stubp->ist_recurse;
1819	idt_vec_set(iv, idt_vec, stubp->ist_entry);
1820
1821	x86_write_psl(psl);
1822
1823	kpreempt_enable();
1824}
1825
1826/*
1827 * Let cpu_info not accept the interrupt.
1828 */
1829static void
1830intr_deactivate_xcall(void *arg1, void *arg2)
1831{
1832	struct cpu_info *ci;
1833	struct intrhand *ih, *lih;
1834	struct intrsource *isp;
1835	u_long psl;
1836	int idt_vec;
1837	int slot;
1838
1839	ih = arg1;
1840
1841	kpreempt_disable();
1842
1843	KASSERT(ih->ih_cpu == curcpu() || !mp_online);
1844
1845	ci = ih->ih_cpu;
1846	slot = ih->ih_slot;
1847	isp = ci->ci_isources[slot];
1848	idt_vec = isp->is_idtvec;
1849
1850	psl = x86_read_psl();
1851	x86_disable_intr();
1852
1853	/* Move all devices sharing IRQ number. */
1854	ci->ci_isources[slot] = NULL;
1855	for (lih = ih; lih != NULL; lih = lih->ih_next) {
1856		ci->ci_nintrhand--;
1857	}
1858
1859	x86_intr_calculatemasks(ci);
1860
1861	if (idt_vec_is_pcpu()) {
1862		idt_vec_free(&ci->ci_idtvec, idt_vec);
1863	} else {
1864		/*
1865		 * Skip unsetgate(), because the same idt[] entry is
1866		 * overwritten in intr_activate_xcall().
1867		 */
1868	}
1869
1870	x86_write_psl(psl);
1871
1872	kpreempt_enable();
1873}
1874
1875static void
1876intr_get_affinity(struct intrsource *isp, kcpuset_t *cpuset)
1877{
1878	struct cpu_info *ci;
1879
1880	KASSERT(mutex_owned(&cpu_lock));
1881
1882	if (isp == NULL) {
1883		kcpuset_zero(cpuset);
1884		return;
1885	}
1886
1887	KASSERTMSG(isp->is_handlers != NULL,
1888	    "Don't get affinity for the device which is not established.");
1889
1890	ci = isp->is_handlers->ih_cpu;
1891	if (ci == NULL) {
1892		kcpuset_zero(cpuset);
1893		return;
1894	}
1895
1896	kcpuset_set(cpuset, cpu_index(ci));
1897	return;
1898}
1899
1900static int
1901intr_set_affinity(struct intrsource *isp, const kcpuset_t *cpuset)
1902{
1903	struct cpu_info *oldci, *newci;
1904	struct intrhand *ih, *lih;
1905	struct pic *pic;
1906	u_int cpu_idx;
1907	int old_idtvec, new_idtvec;
1908	int oldslot, newslot;
1909	int err;
1910	int pin;
1911
1912	KASSERT(mutex_owned(&intr_distribute_lock));
1913	KASSERT(mutex_owned(&cpu_lock));
1914
1915	/* XXX
1916	 * logical destination mode is not supported, use lowest index cpu.
1917	 */
1918	cpu_idx = kcpuset_ffs(cpuset) - 1;
1919	newci = cpu_lookup(cpu_idx);
1920	if (newci == NULL) {
1921		DPRINTF(("invalid cpu index: %u\n", cpu_idx));
1922		return EINVAL;
1923	}
1924	if ((newci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
1925		DPRINTF(("the cpu is set nointr shield. index:%u\n", cpu_idx));
1926		return EINVAL;
1927	}
1928
1929	if (isp == NULL) {
1930		DPRINTF(("invalid intrctl handler\n"));
1931		return EINVAL;
1932	}
1933
1934	/* i8259_pic supports only primary cpu, see i8259.c. */
1935	pic = isp->is_pic;
1936	if (pic == &i8259_pic) {
1937		DPRINTF(("i8259 pic does not support set_affinity\n"));
1938		return ENOTSUP;
1939	}
1940
1941	ih = isp->is_handlers;
1942	KASSERTMSG(ih != NULL,
1943	    "Don't set affinity for the device which is not established.");
1944
1945	oldci = ih->ih_cpu;
1946	if (newci == oldci) /* nothing to do */
1947		return 0;
1948
1949	oldslot = ih->ih_slot;
1950
1951	err = intr_find_unused_slot(newci, &newslot);
1952	if (err) {
1953		DPRINTF(("failed to allocate interrupt slot for PIC %s intrid "
1954			"%s\n", isp->is_pic->pic_name, isp->is_intrid));
1955		return err;
1956	}
1957
1958	old_idtvec = isp->is_idtvec;
1959
1960	if (idt_vec_is_pcpu()) {
1961		new_idtvec = idt_vec_alloc(&newci->ci_idtvec,
1962		    APIC_LEVEL(ih->ih_level), IDT_INTR_HIGH);
1963		if (new_idtvec == 0)
1964			return EBUSY;
1965		DPRINTF(("interrupt from cpu%d vec %d to cpu%d vec %d\n",
1966		    cpu_index(oldci), old_idtvec, cpu_index(newci),
1967			new_idtvec));
1968	} else {
1969		new_idtvec = isp->is_idtvec;
1970	}
1971
1972	/* Prevent intr_unmask() from reenabling the source at the hw. */
1973	isp->is_distribute_pending = true;
1974
1975	pin = isp->is_pin;
1976	(*pic->pic_hwmask)(pic, pin); /* for ci_ipending check */
1977	membar_sync();
1978	while (oldci->ci_ipending & (1ULL << oldslot)) {
1979		(void)kpause("intrdist", false, 1, &cpu_lock);
1980		membar_sync();
1981	}
1982
1983	kpreempt_disable();
1984
1985	/* deactivate old interrupt setting */
1986	if (oldci == curcpu() || !mp_online) {
1987		intr_deactivate_xcall(ih, NULL);
1988	} else {
1989		uint64_t where;
1990		where = xc_unicast(0, intr_deactivate_xcall, ih,
1991				   NULL, oldci);
1992		xc_wait(where);
1993	}
1994	intr_save_evcnt(isp, oldci->ci_cpuid);
1995	(*pic->pic_delroute)(pic, oldci, pin, old_idtvec, isp->is_type);
1996
1997	/* activate new interrupt setting */
1998	isp->is_idtvec =  new_idtvec;
1999	newci->ci_isources[newslot] = isp;
2000	for (lih = ih; lih != NULL; lih = lih->ih_next) {
2001		newci->ci_nintrhand++;
2002		lih->ih_cpu = newci;
2003		lih->ih_slot = newslot;
2004	}
2005	if (newci == curcpu() || !mp_online) {
2006		intr_activate_xcall(ih, NULL);
2007	} else {
2008		uint64_t where;
2009		where = xc_unicast(0, intr_activate_xcall, ih,
2010				   NULL, newci);
2011		xc_wait(where);
2012	}
2013	intr_restore_evcnt(isp, newci->ci_cpuid);
2014	isp->is_active_cpu = newci->ci_cpuid;
2015	(*pic->pic_addroute)(pic, newci, pin, new_idtvec, isp->is_type);
2016
2017	isp->is_distribute_pending = false;
2018	if (newci == curcpu() || !mp_online) {
2019		intr_hwunmask_xcall(ih, NULL);
2020	} else {
2021		uint64_t where;
2022		where = xc_unicast(0, intr_hwunmask_xcall, ih, NULL, newci);
2023		xc_wait(where);
2024	}
2025
2026	kpreempt_enable();
2027
2028	return err;
2029}
2030
2031static bool
2032intr_is_affinity_intrsource(struct intrsource *isp, const kcpuset_t *cpuset)
2033{
2034	struct cpu_info *ci;
2035
2036	KASSERT(mutex_owned(&cpu_lock));
2037
2038	/*
2039	 * The device is already pci_intr_alloc'ed, however it is not
2040	 * established yet.
2041	 */
2042	if (isp->is_handlers == NULL)
2043		return false;
2044
2045	ci = isp->is_handlers->ih_cpu;
2046	KASSERT(ci != NULL);
2047
2048	return kcpuset_isset(cpuset, cpu_index(ci));
2049}
2050
2051static struct intrhand *
2052intr_get_handler(const char *intrid)
2053{
2054	struct intrsource *isp;
2055
2056	KASSERT(mutex_owned(&cpu_lock));
2057
2058	isp = intr_get_io_intrsource(intrid);
2059	if (isp == NULL)
2060		return NULL;
2061
2062	return isp->is_handlers;
2063}
2064
2065uint64_t
2066x86_intr_get_count(const char *intrid, u_int cpu_idx)
2067{
2068	struct cpu_info *ci;
2069	struct intrsource *isp;
2070	struct intrhand *ih;
2071	struct percpu_evcnt pep;
2072	cpuid_t cpuid;
2073	int i, slot;
2074	uint64_t count = 0;
2075
2076	KASSERT(mutex_owned(&cpu_lock));
2077	ci = cpu_lookup(cpu_idx);
2078	cpuid = ci->ci_cpuid;
2079
2080	ih = intr_get_handler(intrid);
2081	if (ih == NULL) {
2082		count = 0;
2083		goto out;
2084	}
2085	slot = ih->ih_slot;
2086	isp = ih->ih_cpu->ci_isources[slot];
2087
2088	for (i = 0; i < ncpu; i++) {
2089		pep = isp->is_saved_evcnt[i];
2090		if (cpuid == pep.cpuid) {
2091			if (isp->is_active_cpu == pep.cpuid) {
2092				count = isp->is_evcnt.ev_count;
2093				goto out;
2094			} else {
2095				count = pep.count;
2096				goto out;
2097			}
2098		}
2099	}
2100
2101 out:
2102	return count;
2103}
2104
2105void
2106x86_intr_get_assigned(const char *intrid, kcpuset_t *cpuset)
2107{
2108	struct cpu_info *ci;
2109	struct intrhand *ih;
2110
2111	KASSERT(mutex_owned(&cpu_lock));
2112	kcpuset_zero(cpuset);
2113
2114	ih = intr_get_handler(intrid);
2115	if (ih == NULL)
2116		return;
2117
2118	ci = ih->ih_cpu;
2119	kcpuset_set(cpuset, cpu_index(ci));
2120}
2121
2122void
2123x86_intr_get_devname(const char *intrid, char *buf, size_t len)
2124{
2125	struct intrsource *isp;
2126	struct intrhand *ih;
2127	int slot;
2128
2129	KASSERT(mutex_owned(&cpu_lock));
2130
2131	ih = intr_get_handler(intrid);
2132	if (ih == NULL) {
2133		buf[0] = '\0';
2134		return;
2135	}
2136	slot = ih->ih_slot;
2137	isp = ih->ih_cpu->ci_isources[slot];
2138	strlcpy(buf, isp->is_xname, len);
2139
2140}
2141
2142/*
2143 * MI interface for subr_interrupt.c
2144 */
2145uint64_t
2146interrupt_get_count(const char *intrid, u_int cpu_idx)
2147{
2148	struct intrsource *isp;
2149	uint64_t count = 0;
2150
2151	mutex_enter(&cpu_lock);
2152	isp = intr_get_io_intrsource(intrid);
2153	if (isp != NULL)
2154		count = isp->is_pic->pic_intr_get_count(intrid, cpu_idx);
2155	mutex_exit(&cpu_lock);
2156	return count;
2157}
2158
2159/*
2160 * MI interface for subr_interrupt.c
2161 */
2162void
2163interrupt_get_assigned(const char *intrid, kcpuset_t *cpuset)
2164{
2165	struct intrsource *isp;
2166
2167	mutex_enter(&cpu_lock);
2168	isp = intr_get_io_intrsource(intrid);
2169	if (isp != NULL)
2170		isp->is_pic->pic_intr_get_assigned(intrid, cpuset);
2171	mutex_exit(&cpu_lock);
2172}
2173
2174/*
2175 * MI interface for subr_interrupt.c
2176 */
2177void
2178interrupt_get_available(kcpuset_t *cpuset)
2179{
2180	CPU_INFO_ITERATOR cii;
2181	struct cpu_info *ci;
2182
2183	kcpuset_zero(cpuset);
2184
2185	mutex_enter(&cpu_lock);
2186	for (CPU_INFO_FOREACH(cii, ci)) {
2187		if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0) {
2188			kcpuset_set(cpuset, cpu_index(ci));
2189		}
2190	}
2191	mutex_exit(&cpu_lock);
2192}
2193
2194/*
2195 * MI interface for subr_interrupt.c
2196 */
2197void
2198interrupt_get_devname(const char *intrid, char *buf, size_t len)
2199{
2200	struct intrsource *isp;
2201
2202	mutex_enter(&cpu_lock);
2203	isp = intr_get_io_intrsource(intrid);
2204	if (isp != NULL) {
2205		if (isp->is_pic->pic_intr_get_devname == NULL) {
2206			printf("NULL get_devname intrid %s pic %s\n",
2207			    intrid, isp->is_pic->pic_name);
2208		} else {
2209			isp->is_pic->pic_intr_get_devname(intrid, buf, len);
2210		}
2211	}
2212	mutex_exit(&cpu_lock);
2213}
2214
2215static int
2216intr_distribute_locked(struct intrhand *ih, const kcpuset_t *newset,
2217    kcpuset_t *oldset)
2218{
2219	struct intrsource *isp;
2220	int slot;
2221
2222	KASSERT(mutex_owned(&intr_distribute_lock));
2223	KASSERT(mutex_owned(&cpu_lock));
2224
2225	if (ih == NULL)
2226		return EINVAL;
2227
2228	slot = ih->ih_slot;
2229	isp = ih->ih_cpu->ci_isources[slot];
2230	KASSERT(isp != NULL);
2231
2232	if (oldset != NULL)
2233		intr_get_affinity(isp, oldset);
2234
2235	return intr_set_affinity(isp, newset);
2236}
2237
2238/*
2239 * MI interface for subr_interrupt.c
2240 */
2241int
2242interrupt_distribute(void *cookie, const kcpuset_t *newset, kcpuset_t *oldset)
2243{
2244	int error;
2245	struct intrhand *ih = cookie;
2246
2247	mutex_enter(&intr_distribute_lock);
2248	mutex_enter(&cpu_lock);
2249	error = intr_distribute_locked(ih, newset, oldset);
2250	mutex_exit(&cpu_lock);
2251	mutex_exit(&intr_distribute_lock);
2252
2253	return error;
2254}
2255
2256/*
2257 * MI interface for subr_interrupt.c
2258 */
2259int
2260interrupt_distribute_handler(const char *intrid, const kcpuset_t *newset,
2261    kcpuset_t *oldset)
2262{
2263	int error;
2264	struct intrhand *ih;
2265
2266	mutex_enter(&intr_distribute_lock);
2267	mutex_enter(&cpu_lock);
2268
2269	ih = intr_get_handler(intrid);
2270	if (ih == NULL) {
2271		error = ENOENT;
2272		goto out;
2273	}
2274	error = intr_distribute_locked(ih, newset, oldset);
2275
2276 out:
2277	mutex_exit(&cpu_lock);
2278	mutex_exit(&intr_distribute_lock);
2279	return error;
2280}
2281
2282/*
2283 * MI interface for subr_interrupt.c
2284 */
2285struct intrids_handler *
2286interrupt_construct_intrids(const kcpuset_t *cpuset)
2287{
2288	struct intrsource *isp;
2289	struct intrids_handler *ii_handler;
2290	intrid_t *ids;
2291	int i, count;
2292
2293	if (kcpuset_iszero(cpuset))
2294		return 0;
2295
2296	/*
2297	 * Count the number of interrupts which affinity to any cpu of
2298	 * "cpuset".
2299	 */
2300	count = 0;
2301	mutex_enter(&cpu_lock);
2302	SIMPLEQ_FOREACH(isp, &io_interrupt_sources, is_list) {
2303		if (intr_is_affinity_intrsource(isp, cpuset))
2304			count++;
2305	}
2306	mutex_exit(&cpu_lock);
2307
2308	ii_handler = kmem_zalloc(sizeof(int) + sizeof(intrid_t) * count,
2309	    KM_SLEEP);
2310	if (ii_handler == NULL)
2311		return NULL;
2312	ii_handler->iih_nids = count;
2313	if (count == 0)
2314		return ii_handler;
2315
2316	ids = ii_handler->iih_intrids;
2317	i = 0;
2318	mutex_enter(&cpu_lock);
2319	SIMPLEQ_FOREACH(isp, &io_interrupt_sources, is_list) {
2320		/* Ignore devices attached after counting "count". */
2321		if (i >= count) {
2322			DPRINTF(("New devices are attached after counting.\n"));
2323			break;
2324		}
2325
2326		if (!intr_is_affinity_intrsource(isp, cpuset))
2327			continue;
2328
2329		strncpy(ids[i], isp->is_intrid, sizeof(intrid_t));
2330		i++;
2331	}
2332	mutex_exit(&cpu_lock);
2333
2334	return ii_handler;
2335}
2336
2337/*
2338 * MI interface for subr_interrupt.c
2339 */
2340void
2341interrupt_destruct_intrids(struct intrids_handler *ii_handler)
2342{
2343	size_t iih_size;
2344
2345	if (ii_handler == NULL)
2346		return;
2347
2348	iih_size = sizeof(int) + sizeof(intrid_t) * ii_handler->iih_nids;
2349	kmem_free(ii_handler, iih_size);
2350}
2351