1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25/*
26 * Copyright (c) 2010, Intel Corporation.
27 * All rights reserved.
28 */
29
30#include <sys/processor.h>
31#include <sys/time.h>
32#include <sys/psm.h>
33#include <sys/smp_impldefs.h>
34#include <sys/cram.h>
35#include <sys/acpi/acpi.h>
36#include <sys/acpica.h>
37#include <sys/psm_common.h>
38#include <sys/pit.h>
39#include <sys/ddi.h>
40#include <sys/sunddi.h>
41#include <sys/ddi_impldefs.h>
42#include <sys/pci.h>
43#include <sys/promif.h>
44#include <sys/x86_archext.h>
45#include <sys/cpc_impl.h>
46#include <sys/uadmin.h>
47#include <sys/panic.h>
48#include <sys/debug.h>
49#include <sys/archsystm.h>
50#include <sys/trap.h>
51#include <sys/machsystm.h>
52#include <sys/sysmacros.h>
53#include <sys/cpuvar.h>
54#include <sys/rm_platter.h>
55#include <sys/privregs.h>
56#include <sys/note.h>
57#include <sys/pci_intr_lib.h>
58#include <sys/spl.h>
59#include <sys/clock.h>
60#include <sys/dditypes.h>
61#include <sys/sunddi.h>
62#include <sys/x_call.h>
63#include <sys/reboot.h>
64#include <sys/apix.h>
65
66static int apix_get_avail_vector_oncpu(uint32_t, int, int);
67static apix_vector_t *apix_init_vector(processorid_t, uchar_t);
68static void apix_cleanup_vector(apix_vector_t *);
69static void apix_insert_av(apix_vector_t *, void *, avfunc, caddr_t, caddr_t,
70    uint64_t *, int, dev_info_t *);
71static void apix_remove_av(apix_vector_t *, struct autovec *);
72static void apix_clear_dev_map(dev_info_t *, int, int);
73static boolean_t apix_is_cpu_enabled(processorid_t);
74static void apix_wait_till_seen(processorid_t, int);
75
76#define	GET_INTR_INUM(ihdlp)		\
77	(((ihdlp) != NULL) ? ((ddi_intr_handle_impl_t *)(ihdlp))->ih_inum : 0)
78
79apix_rebind_info_t apix_rebindinfo = {0, 0, 0, NULL, 0, NULL};
80
81/*
82 * Allocate IPI
83 *
84 * Return vector number or 0 on error
85 */
86uchar_t
87apix_alloc_ipi(int ipl)
88{
89	apix_vector_t *vecp;
90	uchar_t vector;
91	int cpun;
92	int nproc;
93
94	APIX_ENTER_CPU_LOCK(0);
95
96	vector = apix_get_avail_vector_oncpu(0, APIX_IPI_MIN, APIX_IPI_MAX);
97	if (vector == 0) {
98		APIX_LEAVE_CPU_LOCK(0);
99		cmn_err(CE_WARN, "apix: no available IPI\n");
100		apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
101		return (0);
102	}
103
104	nproc = max(apic_nproc, apic_max_nproc);
105	for (cpun = 0; cpun < nproc; cpun++) {
106		vecp = xv_vector(cpun, vector);
107		if (vecp == NULL) {
108			vecp = kmem_zalloc(sizeof (apix_vector_t), KM_NOSLEEP);
109			if (vecp == NULL) {
110				cmn_err(CE_WARN, "apix: No memory for ipi");
111				goto fail;
112			}
113			xv_vector(cpun, vector) = vecp;
114		}
115		vecp->v_state = APIX_STATE_ALLOCED;
116		vecp->v_type = APIX_TYPE_IPI;
117		vecp->v_cpuid = vecp->v_bound_cpuid = cpun;
118		vecp->v_vector = vector;
119		vecp->v_pri = ipl;
120	}
121	APIX_LEAVE_CPU_LOCK(0);
122	return (vector);
123
124fail:
125	while (--cpun >= 0)
126		apix_cleanup_vector(xv_vector(cpun, vector));
127	APIX_LEAVE_CPU_LOCK(0);
128	return (0);
129}
130
131/*
132 * Add IPI service routine
133 */
134static int
135apix_add_ipi(int ipl, avfunc xxintr, char *name, int vector,
136    caddr_t arg1, caddr_t arg2)
137{
138	int cpun;
139	apix_vector_t *vecp;
140	int nproc;
141
142	ASSERT(vector >= APIX_IPI_MIN && vector <= APIX_IPI_MAX);
143
144	nproc = max(apic_nproc, apic_max_nproc);
145	for (cpun = 0; cpun < nproc; cpun++) {
146		APIX_ENTER_CPU_LOCK(cpun);
147		vecp = xv_vector(cpun, vector);
148		apix_insert_av(vecp, NULL, xxintr, arg1, arg2, NULL, ipl, NULL);
149		vecp->v_state = APIX_STATE_ENABLED;
150		APIX_LEAVE_CPU_LOCK(cpun);
151	}
152
153	APIC_VERBOSE(IPI, (CE_CONT, "apix: add ipi for %s, vector %x "
154	    "ipl %x\n", name, vector, ipl));
155
156	return (1);
157}
158
159/*
160 * Find and return first free vector in range (start, end)
161 */
162static int
163apix_get_avail_vector_oncpu(uint32_t cpuid, int start, int end)
164{
165	int i;
166	apix_impl_t *apixp = apixs[cpuid];
167
168	for (i = start; i <= end; i++) {
169		if (APIC_CHECK_RESERVE_VECTORS(i))
170			continue;
171		if (IS_VECT_FREE(apixp->x_vectbl[i]))
172			return (i);
173	}
174
175	return (0);
176}
177
178/*
179 * Allocate a vector on specified cpu
180 *
181 * Return NULL on error
182 */
183static apix_vector_t *
184apix_alloc_vector_oncpu(uint32_t cpuid, dev_info_t *dip, int inum, int type)
185{
186	processorid_t tocpu = cpuid & ~IRQ_USER_BOUND;
187	apix_vector_t *vecp;
188	int vector;
189
190	ASSERT(APIX_CPU_LOCK_HELD(tocpu));
191
192	/* find free vector */
193	vector = apix_get_avail_vector_oncpu(tocpu, APIX_AVINTR_MIN,
194	    APIX_AVINTR_MAX);
195	if (vector == 0)
196		return (NULL);
197
198	vecp = apix_init_vector(tocpu, vector);
199	vecp->v_type = (ushort_t)type;
200	vecp->v_inum = inum;
201	vecp->v_flags = (cpuid & IRQ_USER_BOUND) ? APIX_VECT_USER_BOUND : 0;
202
203	if (dip != NULL)
204		apix_set_dev_map(vecp, dip, inum);
205
206	return (vecp);
207}
208
209/*
210 * Allocates "count" contiguous MSI vectors starting at the proper alignment.
211 * Caller needs to make sure that count has to be power of 2 and should not
212 * be < 1.
213 *
214 * Return first vector number
215 */
216apix_vector_t *
217apix_alloc_nvectors_oncpu(uint32_t cpuid, dev_info_t *dip, int inum,
218    int count, int type)
219{
220	int i, msibits, start = 0, navail = 0;
221	apix_vector_t *vecp, *startp = NULL;
222	processorid_t tocpu = cpuid & ~IRQ_USER_BOUND;
223	uint_t flags;
224
225	ASSERT(APIX_CPU_LOCK_HELD(tocpu));
226
227	/*
228	 * msibits is the no. of lower order message data bits for the
229	 * allocated MSI vectors and is used to calculate the aligned
230	 * starting vector
231	 */
232	msibits = count - 1;
233
234	/* It has to be contiguous */
235	for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
236		if (!IS_VECT_FREE(xv_vector(tocpu, i)))
237			continue;
238
239		/*
240		 * starting vector has to be aligned accordingly for
241		 * multiple MSIs
242		 */
243		if (msibits)
244			i = (i + msibits) & ~msibits;
245
246		for (navail = 0, start = i; i <= APIX_AVINTR_MAX; i++) {
247			if (!IS_VECT_FREE(xv_vector(tocpu, i)))
248				break;
249			if (APIC_CHECK_RESERVE_VECTORS(i))
250				break;
251			if (++navail == count)
252				goto done;
253		}
254	}
255
256	return (NULL);
257
258done:
259	flags = (cpuid & IRQ_USER_BOUND) ? APIX_VECT_USER_BOUND : 0;
260
261	for (i = 0; i < count; i++) {
262		if ((vecp = apix_init_vector(tocpu, start + i)) == NULL)
263			goto fail;
264
265		vecp->v_type = (ushort_t)type;
266		vecp->v_inum = inum + i;
267		vecp->v_flags = flags;
268
269		if (dip != NULL)
270			apix_set_dev_map(vecp, dip, inum + i);
271
272		if (i == 0)
273			startp = vecp;
274	}
275
276	return (startp);
277
278fail:
279	while (i-- > 0) {	/* Free allocated vectors */
280		vecp = xv_vector(tocpu, start + i);
281		apix_clear_dev_map(dip, inum + i, type);
282		apix_cleanup_vector(vecp);
283	}
284	return (NULL);
285}
286
287#define	APIX_WRITE_MSI_DATA(_hdl, _cap, _ctrl, _v)\
288do {\
289	if ((_ctrl) & PCI_MSI_64BIT_MASK)\
290		pci_config_put16((_hdl), (_cap) + PCI_MSI_64BIT_DATA, (_v));\
291	else\
292		pci_config_put16((_hdl), (_cap) + PCI_MSI_32BIT_DATA, (_v));\
293_NOTE(CONSTCOND)} while (0)
294
295static void
296apix_pci_msi_enable_vector(apix_vector_t *vecp, dev_info_t *dip, int type,
297    int inum, int count, uchar_t vector, int target_apic_id)
298{
299	uint64_t		msi_addr, msi_data;
300	ushort_t		msi_ctrl;
301	int			i, cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
302	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
303	msi_regs_t		msi_regs;
304	void			*intrmap_tbl[PCI_MSI_MAX_INTRS];
305
306	DDI_INTR_IMPLDBG((CE_CONT, "apix_pci_msi_enable_vector: dip=0x%p\n"
307	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
308	    ddi_driver_name(dip), inum, vector, target_apic_id));
309
310	ASSERT((handle != NULL) && (cap_ptr != 0));
311
312	msi_regs.mr_data = vector;
313	msi_regs.mr_addr = target_apic_id;
314
315	for (i = 0; i < count; i++)
316		intrmap_tbl[i] = xv_intrmap_private(vecp->v_cpuid, vector + i);
317	apic_vt_ops->apic_intrmap_alloc_entry(intrmap_tbl, dip, type,
318	    count, 0xff);
319	for (i = 0; i < count; i++)
320		xv_intrmap_private(vecp->v_cpuid, vector + i) = intrmap_tbl[i];
321
322	apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
323	    (void *)&msi_regs, type, count);
324	apic_vt_ops->apic_intrmap_record_msi(vecp->v_intrmap_private,
325	    &msi_regs);
326
327	/* MSI Address */
328	msi_addr = msi_regs.mr_addr;
329
330	/* MSI Data: MSI is edge triggered according to spec */
331	msi_data = msi_regs.mr_data;
332
333	DDI_INTR_IMPLDBG((CE_CONT, "apix_pci_msi_enable_vector: addr=0x%lx "
334	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
335
336	if (type == APIX_TYPE_MSI) {
337		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
338
339		/* Set the bits to inform how many MSIs are enabled */
340		msi_ctrl |= ((highbit(count) - 1) << PCI_MSI_MME_SHIFT);
341		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
342
343		if ((vecp->v_flags & APIX_VECT_MASKABLE) == 0)
344			APIX_WRITE_MSI_DATA(handle, cap_ptr, msi_ctrl,
345			    APIX_RESV_VECTOR);
346
347		pci_config_put32(handle,
348		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
349		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
350			pci_config_put32(handle,
351			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
352
353		APIX_WRITE_MSI_DATA(handle, cap_ptr, msi_ctrl, msi_data);
354	} else if (type == APIX_TYPE_MSIX) {
355		uintptr_t	off;
356		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
357
358		/* Offset into the "inum"th entry in the MSI-X table */
359		off = (uintptr_t)msix_p->msix_tbl_addr +
360		    (inum * PCI_MSIX_VECTOR_SIZE);
361
362		ddi_put32(msix_p->msix_tbl_hdl,
363		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
364		ddi_put64(msix_p->msix_tbl_hdl,
365		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
366	}
367}
368
369static void
370apix_pci_msi_enable_mode(dev_info_t *dip, int type, int inum)
371{
372	ushort_t		msi_ctrl;
373	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
374	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
375
376	ASSERT((handle != NULL) && (cap_ptr != 0));
377
378	if (type == APIX_TYPE_MSI) {
379		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
380		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
381			return;
382
383		msi_ctrl |= PCI_MSI_ENABLE_BIT;
384		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
385
386	} else if (type == DDI_INTR_TYPE_MSIX) {
387		uintptr_t	off;
388		uint32_t	mask;
389		ddi_intr_msix_t	*msix_p;
390
391		msix_p = i_ddi_get_msix(dip);
392
393		/* Offset into "inum"th entry in the MSI-X table & clear mask */
394		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
395		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
396
397		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
398
399		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
400
401		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
402
403		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
404			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
405			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
406			    msi_ctrl);
407		}
408	}
409}
410
411/*
412 * Setup interrupt, pogramming IO-APIC or MSI/X address/data.
413 */
414void
415apix_enable_vector(apix_vector_t *vecp)
416{
417	int tocpu = vecp->v_cpuid, type = vecp->v_type;
418	apic_cpus_info_t *cpu_infop;
419	ulong_t iflag;
420
421	ASSERT(tocpu < apic_nproc);
422
423	cpu_infop = &apic_cpus[tocpu];
424	if (vecp->v_flags & APIX_VECT_USER_BOUND)
425		cpu_infop->aci_bound++;
426	else
427		cpu_infop->aci_temp_bound++;
428
429	iflag = intr_clear();
430	lock_set(&apic_ioapic_lock);
431
432	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {	/* fixed */
433		apix_intx_enable(vecp->v_inum);
434	} else {
435		int inum = vecp->v_inum;
436		dev_info_t *dip = APIX_GET_DIP(vecp);
437		int count = i_ddi_intr_get_current_nintrs(dip);
438
439		if (type == APIX_TYPE_MSI) {	/* MSI */
440			if (inum == apix_get_max_dev_inum(dip, type)) {
441				/* last one */
442				uchar_t start_inum = inum + 1 - count;
443				uchar_t start_vect = vecp->v_vector + 1 - count;
444				apix_vector_t *start_vecp =
445				    xv_vector(vecp->v_cpuid, start_vect);
446
447				APIC_VERBOSE(INTR, (CE_CONT, "apix: call "
448				    "apix_pci_msi_enable_vector\n"));
449				apix_pci_msi_enable_vector(start_vecp, dip,
450				    type, start_inum, count, start_vect,
451				    cpu_infop->aci_local_id);
452
453				APIC_VERBOSE(INTR, (CE_CONT, "apix: call "
454				    "apix_pci_msi_enable_mode\n"));
455				apix_pci_msi_enable_mode(dip, type, inum);
456			}
457		} else {				/* MSI-X */
458			apix_pci_msi_enable_vector(vecp, dip,
459			    type, inum, 1, vecp->v_vector,
460			    cpu_infop->aci_local_id);
461			apix_pci_msi_enable_mode(dip, type, inum);
462		}
463	}
464	vecp->v_state = APIX_STATE_ENABLED;
465	apic_redist_cpu_skip &= ~(1 << tocpu);
466
467	lock_clear(&apic_ioapic_lock);
468	intr_restore(iflag);
469}
470
471/*
472 * Disable the interrupt
473 */
474void
475apix_disable_vector(apix_vector_t *vecp)
476{
477	struct autovec *avp = vecp->v_autovect;
478	ulong_t iflag;
479
480	ASSERT(avp != NULL);
481
482	iflag = intr_clear();
483	lock_set(&apic_ioapic_lock);
484
485	switch (vecp->v_type) {
486	case APIX_TYPE_MSI:
487		ASSERT(avp->av_vector != NULL && avp->av_dip != NULL);
488		/*
489		 * Disable the MSI vector
490		 * Make sure we only disable on the last
491		 * of the multi-MSI support
492		 */
493		if (i_ddi_intr_get_current_nenables(avp->av_dip) == 1) {
494			apic_pci_msi_disable_mode(avp->av_dip,
495			    DDI_INTR_TYPE_MSI);
496		}
497		break;
498	case APIX_TYPE_MSIX:
499		ASSERT(avp->av_vector != NULL && avp->av_dip != NULL);
500		/*
501		 * Disable the MSI-X vector
502		 * needs to clear its mask and addr/data for each MSI-X
503		 */
504		apic_pci_msi_unconfigure(avp->av_dip, DDI_INTR_TYPE_MSIX,
505		    vecp->v_inum);
506		/*
507		 * Make sure we only disable on the last MSI-X
508		 */
509		if (i_ddi_intr_get_current_nenables(avp->av_dip) == 1) {
510			apic_pci_msi_disable_mode(avp->av_dip,
511			    DDI_INTR_TYPE_MSIX);
512		}
513		break;
514	default:
515		apix_intx_disable(vecp->v_inum);
516		break;
517	}
518
519	if (!(apic_cpus[vecp->v_cpuid].aci_status & APIC_CPU_SUSPEND))
520		vecp->v_state = APIX_STATE_DISABLED;
521	apic_vt_ops->apic_intrmap_free_entry(&vecp->v_intrmap_private);
522	vecp->v_intrmap_private = NULL;
523
524	lock_clear(&apic_ioapic_lock);
525	intr_restore(iflag);
526}
527
528/*
529 * Mark vector as obsoleted or freed. The vector is marked
530 * obsoleted if there are pending requests on it. Otherwise,
531 * free the vector. The obsoleted vectors get freed after
532 * being serviced.
533 *
534 * Return 1 on being obosoleted and 0 on being freed.
535 */
536#define	INTR_BUSY(_avp)\
537	((((volatile ushort_t)(_avp)->av_flags) &\
538	(AV_PENTRY_PEND | AV_PENTRY_ONPROC)) != 0)
539#define	LOCAL_WITH_INTR_DISABLED(_cpuid)\
540	((_cpuid) == psm_get_cpu_id() && !interrupts_enabled())
541static uint64_t dummy_tick;
542
543int
544apix_obsolete_vector(apix_vector_t *vecp)
545{
546	struct autovec *avp = vecp->v_autovect;
547	int repeats, tries, ipl, busy = 0, cpuid = vecp->v_cpuid;
548	apix_impl_t *apixp = apixs[cpuid];
549
550	ASSERT(APIX_CPU_LOCK_HELD(cpuid));
551
552	for (avp = vecp->v_autovect; avp != NULL; avp = avp->av_link) {
553		if (avp->av_vector == NULL)
554			continue;
555
556		if (LOCAL_WITH_INTR_DISABLED(cpuid)) {
557			int bit, index, irr;
558
559			if (INTR_BUSY(avp)) {
560				busy++;
561				continue;
562			}
563
564			/* check IRR for pending interrupts */
565			index = vecp->v_vector / 32;
566			bit = vecp->v_vector % 32;
567			irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
568			if ((irr & (1 << bit)) != 0)
569				busy++;
570
571			if (!busy)
572				apix_remove_av(vecp, avp);
573
574			continue;
575		}
576
577		repeats = 0;
578		do {
579			repeats++;
580			for (tries = 0; tries < apic_max_reps_clear_pending;
581			    tries++)
582				if (!INTR_BUSY(avp))
583					break;
584		} while (INTR_BUSY(avp) &&
585		    (repeats < apic_max_reps_clear_pending));
586
587		if (INTR_BUSY(avp))
588			busy++;
589		else {
590			/*
591			 * Interrupt is not in pending list or being serviced.
592			 * However it might be cached in Local APIC's IRR
593			 * register. It's impossible to check another CPU's
594			 * IRR register. Then wait till lower levels finish
595			 * running.
596			 */
597			for (ipl = 1; ipl < MIN(LOCK_LEVEL, vecp->v_pri); ipl++)
598				apix_wait_till_seen(cpuid, ipl);
599			if (INTR_BUSY(avp))
600				busy++;
601		}
602
603		if (!busy)
604			apix_remove_av(vecp, avp);
605	}
606
607	if (busy) {
608		apix_vector_t *tp = apixp->x_obsoletes;
609
610		if (vecp->v_state == APIX_STATE_OBSOLETED)
611			return (1);
612
613		vecp->v_state = APIX_STATE_OBSOLETED;
614		vecp->v_next = NULL;
615		if (tp == NULL)
616			apixp->x_obsoletes = vecp;
617		else {
618			while (tp->v_next != NULL)
619				tp = tp->v_next;
620			tp->v_next = vecp;
621		}
622		return (1);
623	}
624
625	/* interrupt is not busy */
626	if (vecp->v_state == APIX_STATE_OBSOLETED) {
627		/* remove from obsoleted list */
628		apixp->x_obsoletes = vecp->v_next;
629		vecp->v_next = NULL;
630	}
631	apix_cleanup_vector(vecp);
632	return (0);
633}
634
635/*
636 * Duplicate number of continuous vectors to specified target vectors.
637 */
638static void
639apix_dup_vectors(apix_vector_t *oldp, apix_vector_t *newp, int count)
640{
641	struct autovec *avp;
642	apix_vector_t *fromp, *top;
643	processorid_t oldcpu = oldp->v_cpuid, newcpu = newp->v_cpuid;
644	uchar_t oldvec = oldp->v_vector, newvec = newp->v_vector;
645	int i, inum;
646
647	ASSERT(oldp->v_type != APIX_TYPE_IPI);
648
649	for (i = 0; i < count; i++) {
650		fromp = xv_vector(oldcpu, oldvec + i);
651		top = xv_vector(newcpu, newvec + i);
652		ASSERT(fromp != NULL && top != NULL);
653
654		/* copy over original one */
655		top->v_state = fromp->v_state;
656		top->v_type = fromp->v_type;
657		top->v_bound_cpuid = fromp->v_bound_cpuid;
658		top->v_inum = fromp->v_inum;
659		top->v_flags = fromp->v_flags;
660		top->v_intrmap_private = fromp->v_intrmap_private;
661
662		for (avp = fromp->v_autovect; avp != NULL; avp = avp->av_link) {
663			if (avp->av_vector == NULL)
664				continue;
665
666			apix_insert_av(top, avp->av_intr_id, avp->av_vector,
667			    avp->av_intarg1, avp->av_intarg2, avp->av_ticksp,
668			    avp->av_prilevel, avp->av_dip);
669
670			if (fromp->v_type == APIX_TYPE_FIXED &&
671			    avp->av_dip != NULL) {
672				inum = GET_INTR_INUM(avp->av_intr_id);
673				apix_set_dev_map(top, avp->av_dip, inum);
674			}
675		}
676
677		if (DDI_INTR_IS_MSI_OR_MSIX(fromp->v_type) &&
678		    fromp->v_devp != NULL)
679			apix_set_dev_map(top, fromp->v_devp->dv_dip,
680			    fromp->v_devp->dv_inum);
681	}
682}
683
684static apix_vector_t *
685apix_init_vector(processorid_t cpuid, uchar_t vector)
686{
687	apix_impl_t *apixp = apixs[cpuid];
688	apix_vector_t *vecp = apixp->x_vectbl[vector];
689
690	ASSERT(IS_VECT_FREE(vecp));
691
692	if (vecp == NULL) {
693		vecp = kmem_zalloc(sizeof (apix_vector_t), KM_NOSLEEP);
694		if (vecp == NULL) {
695			cmn_err(CE_WARN, "apix: no memory to allocate vector");
696			return (NULL);
697		}
698		apixp->x_vectbl[vector] = vecp;
699	}
700	vecp->v_state = APIX_STATE_ALLOCED;
701	vecp->v_cpuid = vecp->v_bound_cpuid = cpuid;
702	vecp->v_vector = vector;
703
704	return (vecp);
705}
706
707static void
708apix_cleanup_vector(apix_vector_t *vecp)
709{
710	ASSERT(vecp->v_share == 0);
711	vecp->v_bound_cpuid = IRQ_UNINIT;
712	vecp->v_state = APIX_STATE_FREED;
713	vecp->v_type = 0;
714	vecp->v_flags = 0;
715	vecp->v_busy = 0;
716	vecp->v_intrmap_private = NULL;
717}
718
719static void
720apix_dprint_vector(apix_vector_t *vecp, dev_info_t *dip, int count)
721{
722#ifdef DEBUG
723	major_t major;
724	char *name, *drv_name;
725	int instance, len, t_len;
726	char mesg[1024] = "apix: ";
727
728	t_len = sizeof (mesg);
729	len = strlen(mesg);
730	if (dip != NULL) {
731		name = ddi_get_name(dip);
732		major = ddi_name_to_major(name);
733		drv_name = ddi_major_to_name(major);
734		instance = ddi_get_instance(dip);
735		(void) snprintf(mesg + len, t_len - len, "%s (%s) instance %d ",
736		    name, drv_name, instance);
737	}
738	len = strlen(mesg);
739
740	switch (vecp->v_type) {
741	case APIX_TYPE_FIXED:
742		(void) snprintf(mesg + len, t_len - len, "irqno %d",
743		    vecp->v_inum);
744		break;
745	case APIX_TYPE_MSI:
746		(void) snprintf(mesg + len, t_len - len,
747		    "msi inum %d (count %d)", vecp->v_inum, count);
748		break;
749	case APIX_TYPE_MSIX:
750		(void) snprintf(mesg + len, t_len - len, "msi-x inum %d",
751		    vecp->v_inum);
752		break;
753	default:
754		break;
755
756	}
757
758	APIC_VERBOSE(ALLOC, (CE_CONT, "%s allocated with vector 0x%x on "
759	    "cpu %d\n", mesg, vecp->v_vector, vecp->v_cpuid));
760#endif	/* DEBUG */
761}
762
763/*
764 * Operations on avintr
765 */
766
767#define	INIT_AUTOVEC(p, intr_id, f, arg1, arg2, ticksp, ipl, dip)	\
768do { \
769	(p)->av_intr_id = intr_id;	\
770	(p)->av_vector = f;		\
771	(p)->av_intarg1 = arg1;		\
772	(p)->av_intarg2 = arg2;		\
773	(p)->av_ticksp = ticksp;	\
774	(p)->av_prilevel = ipl;		\
775	(p)->av_dip = dip;		\
776	(p)->av_flags = 0;		\
777_NOTE(CONSTCOND)} while (0)
778
779/*
780 * Insert an interrupt service routine into chain by its priority from
781 * high to low
782 */
783static void
784apix_insert_av(apix_vector_t *vecp, void *intr_id, avfunc f, caddr_t arg1,
785    caddr_t arg2, uint64_t *ticksp, int ipl, dev_info_t *dip)
786{
787	struct autovec *p, *prep, *mem;
788
789	APIC_VERBOSE(INTR, (CE_CONT, "apix_insert_av: dip %p, vector 0x%x, "
790	    "cpu %d\n", (void *)dip, vecp->v_vector, vecp->v_cpuid));
791
792	mem = kmem_zalloc(sizeof (struct autovec), KM_SLEEP);
793	INIT_AUTOVEC(mem, intr_id, f, arg1, arg2, ticksp, ipl, dip);
794	if (vecp->v_type == APIX_TYPE_FIXED && apic_level_intr[vecp->v_inum])
795		mem->av_flags |= AV_PENTRY_LEVEL;
796
797	vecp->v_share++;
798	vecp->v_pri = (ipl > vecp->v_pri) ? ipl : vecp->v_pri;
799	if (vecp->v_autovect == NULL) {	/* Nothing on list - put it at head */
800		vecp->v_autovect = mem;
801		return;
802	}
803
804	if (DDI_INTR_IS_MSI_OR_MSIX(vecp->v_type)) {	/* MSI/X */
805		ASSERT(vecp->v_share == 1);	/* No sharing for MSI/X */
806
807		INIT_AUTOVEC(vecp->v_autovect, intr_id, f, arg1, arg2, ticksp,
808		    ipl, dip);
809		prep = vecp->v_autovect->av_link;
810		vecp->v_autovect->av_link = NULL;
811
812		/* Free the following autovect chain */
813		while (prep != NULL) {
814			ASSERT(prep->av_vector == NULL);
815
816			p = prep;
817			prep = prep->av_link;
818			kmem_free(p, sizeof (struct autovec));
819		}
820
821		kmem_free(mem, sizeof (struct autovec));
822		return;
823	}
824
825	/* find where it goes in list */
826	prep = NULL;
827	for (p = vecp->v_autovect; p != NULL; p = p->av_link) {
828		if (p->av_vector && p->av_prilevel <= ipl)
829			break;
830		prep = p;
831	}
832	if (prep != NULL) {
833		if (prep->av_vector == NULL) {	/* freed struct available */
834			INIT_AUTOVEC(prep, intr_id, f, arg1, arg2,
835			    ticksp, ipl, dip);
836			prep->av_flags = mem->av_flags;
837			kmem_free(mem, sizeof (struct autovec));
838			return;
839		}
840
841		mem->av_link = prep->av_link;
842		prep->av_link = mem;
843	} else {
844		/* insert new intpt at beginning of chain */
845		mem->av_link = vecp->v_autovect;
846		vecp->v_autovect = mem;
847	}
848}
849
850/*
851 * After having made a change to an autovector list, wait until we have
852 * seen specified cpu not executing an interrupt at that level--so we
853 * know our change has taken effect completely (no old state in registers,
854 * etc).
855 */
856#define	APIX_CPU_ENABLED(_cp) \
857	(quiesce_active == 0 && \
858	(((_cp)->cpu_flags & (CPU_QUIESCED|CPU_OFFLINE)) == 0))
859
860static void
861apix_wait_till_seen(processorid_t cpuid, int ipl)
862{
863	struct cpu *cp = cpu[cpuid];
864
865	if (cp == NULL || LOCAL_WITH_INTR_DISABLED(cpuid))
866		return;
867
868	/*
869	 * Don't wait if the CPU is quiesced or offlined. This can happen
870	 * when a CPU is running pause thread but hardware triggered an
871	 * interrupt and the interrupt gets queued.
872	 */
873	for (;;) {
874		if (!INTR_ACTIVE((volatile struct cpu *)cpu[cpuid], ipl) &&
875		    (!APIX_CPU_ENABLED(cp) ||
876		    !INTR_PENDING((volatile apix_impl_t *)apixs[cpuid], ipl)))
877			return;
878	}
879}
880
881static void
882apix_remove_av(apix_vector_t *vecp, struct autovec *target)
883{
884	int hi_pri = 0;
885	struct autovec *p;
886
887	if (target == NULL)
888		return;
889
890	APIC_VERBOSE(INTR, (CE_CONT, "apix_remove_av: dip %p, vector 0x%x, "
891	    "cpu %d\n", (void *)target->av_dip, vecp->v_vector, vecp->v_cpuid));
892
893	for (p = vecp->v_autovect; p; p = p->av_link) {
894		if (p == target || p->av_vector == NULL)
895			continue;
896		hi_pri = (p->av_prilevel > hi_pri) ? p->av_prilevel : hi_pri;
897	}
898
899	vecp->v_share--;
900	vecp->v_pri = hi_pri;
901
902	/*
903	 * This drops the handler from the chain, it can no longer be called.
904	 * However, there is no guarantee that the handler is not currently
905	 * still executing.
906	 */
907	target->av_vector = NULL;
908	/*
909	 * There is a race where we could be just about to pick up the ticksp
910	 * pointer to increment it after returning from the service routine
911	 * in av_dispatch_autovect.  Rather than NULL it out let's just point
912	 * it off to something safe so that any final tick update attempt
913	 * won't fault.
914	 */
915	target->av_ticksp = &dummy_tick;
916	apix_wait_till_seen(vecp->v_cpuid, target->av_prilevel);
917}
918
919static struct autovec *
920apix_find_av(apix_vector_t *vecp, void *intr_id, avfunc f)
921{
922	struct autovec *p;
923
924	for (p = vecp->v_autovect; p; p = p->av_link) {
925		if ((p->av_vector == f) && (p->av_intr_id == intr_id)) {
926			/* found the handler */
927			return (p);
928		}
929	}
930
931	return (NULL);
932}
933
934static apix_vector_t *
935apix_find_vector_by_avintr(void *intr_id, avfunc f)
936{
937	apix_vector_t *vecp;
938	processorid_t n;
939	uchar_t v;
940
941	for (n = 0; n < apic_nproc; n++) {
942		if (!apix_is_cpu_enabled(n))
943			continue;
944
945		for (v = APIX_AVINTR_MIN; v <= APIX_AVINTR_MIN; v++) {
946			vecp = xv_vector(n, v);
947			if (vecp == NULL ||
948			    vecp->v_state <= APIX_STATE_OBSOLETED)
949				continue;
950
951			if (apix_find_av(vecp, intr_id, f) != NULL)
952				return (vecp);
953		}
954	}
955
956	return (NULL);
957}
958
959/*
960 * Add interrupt service routine.
961 *
962 * For legacy interrupts (HPET timer, ACPI SCI), the vector is actually
963 * IRQ no. A vector is then allocated. Otherwise, the vector is already
964 * allocated. The input argument virt_vect is virtual vector of format
965 * APIX_VIRTVEC_VECTOR(cpuid, vector).
966 *
967 * Return 1 on success, 0 on failure.
968 */
969int
970apix_add_avintr(void *intr_id, int ipl, avfunc xxintr, char *name,
971    int virt_vect, caddr_t arg1, caddr_t arg2, uint64_t *ticksp,
972    dev_info_t *dip)
973{
974	int cpuid;
975	uchar_t v = (uchar_t)APIX_VIRTVEC_VECTOR(virt_vect);
976	apix_vector_t *vecp;
977
978	if (xxintr == NULL) {
979		cmn_err(CE_WARN, "Attempt to add null for %s "
980		    "on vector 0x%x,0x%x", name,
981		    APIX_VIRTVEC_CPU(virt_vect),
982		    APIX_VIRTVEC_VECTOR(virt_vect));
983		return (0);
984	}
985
986	if (v >= APIX_IPI_MIN)	/* IPIs */
987		return (apix_add_ipi(ipl, xxintr, name, v, arg1, arg2));
988
989	if (!APIX_IS_VIRTVEC(virt_vect)) {	/* got irq */
990		int irqno = virt_vect;
991		int inum = GET_INTR_INUM(intr_id);
992
993		/*
994		 * Senarios include:
995		 * a. add_avintr() is called before irqp initialized (legacy)
996		 * b. irqp is initialized, vector is not allocated (fixed)
997		 * c. irqp is initialized, vector is allocated (fixed & shared)
998		 */
999		if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
1000			return (0);
1001
1002		cpuid = vecp->v_cpuid;
1003		v = vecp->v_vector;
1004		virt_vect = APIX_VIRTVECTOR(cpuid, v);
1005	} else {	/* got virtual vector */
1006		cpuid = APIX_VIRTVEC_CPU(virt_vect);
1007		vecp = xv_vector(cpuid, v);
1008		ASSERT(vecp != NULL);
1009	}
1010
1011	lock_set(&apix_lock);
1012	if (vecp->v_state <= APIX_STATE_OBSOLETED) {
1013		vecp = NULL;
1014
1015		/*
1016		 * Basically the allocated but not enabled interrupts
1017		 * will not get re-targeted. But MSIs in allocated state
1018		 * could be re-targeted due to group re-targeting.
1019		 */
1020		if (intr_id != NULL && dip != NULL) {
1021			ddi_intr_handle_impl_t *hdlp = intr_id;
1022			vecp = apix_get_dev_map(dip, hdlp->ih_inum,
1023			    hdlp->ih_type);
1024			ASSERT(vecp->v_state == APIX_STATE_ALLOCED);
1025		}
1026		if (vecp == NULL) {
1027			lock_clear(&apix_lock);
1028			cmn_err(CE_WARN, "Invalid interrupt 0x%x,0x%x "
1029			    " for %p to add", cpuid, v, intr_id);
1030			return (0);
1031		}
1032		cpuid = vecp->v_cpuid;
1033		virt_vect = APIX_VIRTVECTOR(cpuid, vecp->v_vector);
1034	}
1035
1036	APIX_ENTER_CPU_LOCK(cpuid);
1037	apix_insert_av(vecp, intr_id, xxintr, arg1, arg2, ticksp, ipl, dip);
1038	APIX_LEAVE_CPU_LOCK(cpuid);
1039
1040	(void) apix_addspl(virt_vect, ipl, 0, 0);
1041
1042	lock_clear(&apix_lock);
1043
1044	return (1);
1045}
1046
1047/*
1048 * Remove avintr
1049 *
1050 * For fixed, if it's the last one of shared interrupts, free the vector.
1051 * For msi/x, only disable the interrupt but not free the vector, which
1052 * is freed by PSM_XXX_FREE_XXX.
1053 */
1054void
1055apix_rem_avintr(void *intr_id, int ipl, avfunc xxintr, int virt_vect)
1056{
1057	avfunc f;
1058	apix_vector_t *vecp;
1059	struct autovec *avp;
1060	processorid_t cpuid;
1061
1062	if ((f = xxintr) == NULL)
1063		return;
1064
1065	lock_set(&apix_lock);
1066
1067	if (!APIX_IS_VIRTVEC(virt_vect)) {	/* got irq */
1068		vecp = apix_intx_get_vector(virt_vect);
1069		virt_vect = APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1070	} else	/* got virtual vector */
1071		vecp = xv_vector(APIX_VIRTVEC_CPU(virt_vect),
1072		    APIX_VIRTVEC_VECTOR(virt_vect));
1073
1074	if (vecp == NULL) {
1075		lock_clear(&apix_lock);
1076		cmn_err(CE_CONT, "Invalid interrupt 0x%x,0x%x to remove",
1077		    APIX_VIRTVEC_CPU(virt_vect),
1078		    APIX_VIRTVEC_VECTOR(virt_vect));
1079		return;
1080	}
1081
1082	if (vecp->v_state <= APIX_STATE_OBSOLETED ||
1083	    ((avp = apix_find_av(vecp, intr_id, f)) == NULL)) {
1084		/*
1085		 * It's possible that the interrupt is rebound to a
1086		 * different cpu before rem_avintr() is called. Search
1087		 * through all vectors once it happens.
1088		 */
1089		if ((vecp = apix_find_vector_by_avintr(intr_id, f))
1090		    == NULL) {
1091			lock_clear(&apix_lock);
1092			cmn_err(CE_CONT, "Unknown interrupt 0x%x,0x%x "
1093			    "for %p to remove", APIX_VIRTVEC_CPU(virt_vect),
1094			    APIX_VIRTVEC_VECTOR(virt_vect), intr_id);
1095			return;
1096		}
1097		virt_vect = APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1098		avp = apix_find_av(vecp, intr_id, f);
1099	}
1100	cpuid = vecp->v_cpuid;
1101
1102	/* disable interrupt */
1103	(void) apix_delspl(virt_vect, ipl, 0, 0);
1104
1105	/* remove ISR entry */
1106	APIX_ENTER_CPU_LOCK(cpuid);
1107	apix_remove_av(vecp, avp);
1108	APIX_LEAVE_CPU_LOCK(cpuid);
1109
1110	lock_clear(&apix_lock);
1111}
1112
1113/*
1114 * Device to vector mapping table
1115 */
1116
1117static void
1118apix_clear_dev_map(dev_info_t *dip, int inum, int type)
1119{
1120	char *name;
1121	major_t major;
1122	apix_dev_vector_t *dvp, *prev = NULL;
1123	int found = 0;
1124
1125	name = ddi_get_name(dip);
1126	major = ddi_name_to_major(name);
1127
1128	mutex_enter(&apix_mutex);
1129
1130	for (dvp = apix_dev_vector[major]; dvp != NULL;
1131	    prev = dvp, dvp = dvp->dv_next) {
1132		if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1133		    dvp->dv_type == type) {
1134			found++;
1135			break;
1136		}
1137	}
1138
1139	if (!found) {
1140		mutex_exit(&apix_mutex);
1141		return;
1142	}
1143
1144	if (prev != NULL)
1145		prev->dv_next = dvp->dv_next;
1146
1147	if (apix_dev_vector[major] == dvp)
1148		apix_dev_vector[major] = dvp->dv_next;
1149
1150	dvp->dv_vector->v_devp = NULL;
1151
1152	mutex_exit(&apix_mutex);
1153
1154	kmem_free(dvp, sizeof (apix_dev_vector_t));
1155}
1156
1157void
1158apix_set_dev_map(apix_vector_t *vecp, dev_info_t *dip, int inum)
1159{
1160	apix_dev_vector_t *dvp;
1161	char *name;
1162	major_t major;
1163	uint32_t found = 0;
1164
1165	ASSERT(dip != NULL);
1166	name = ddi_get_name(dip);
1167	major = ddi_name_to_major(name);
1168
1169	mutex_enter(&apix_mutex);
1170
1171	for (dvp = apix_dev_vector[major]; dvp != NULL;
1172	    dvp = dvp->dv_next) {
1173		if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1174		    dvp->dv_type == vecp->v_type) {
1175			found++;
1176			break;
1177		}
1178	}
1179
1180	if (found == 0) {	/* not found */
1181		dvp = kmem_zalloc(sizeof (apix_dev_vector_t), KM_SLEEP);
1182		dvp->dv_dip = dip;
1183		dvp->dv_inum = inum;
1184		dvp->dv_type = vecp->v_type;
1185
1186		dvp->dv_next = apix_dev_vector[major];
1187		apix_dev_vector[major] = dvp;
1188	}
1189	dvp->dv_vector = vecp;
1190	vecp->v_devp = dvp;
1191
1192	mutex_exit(&apix_mutex);
1193
1194	DDI_INTR_IMPLDBG((CE_CONT, "apix_set_dev_map: dip=0x%p "
1195	    "inum=0x%x  vector=0x%x/0x%x\n",
1196	    (void *)dip, inum, vecp->v_cpuid, vecp->v_vector));
1197}
1198
1199apix_vector_t *
1200apix_get_dev_map(dev_info_t *dip, int inum, int type)
1201{
1202	char *name;
1203	major_t major;
1204	apix_dev_vector_t *dvp;
1205	apix_vector_t *vecp;
1206
1207	name = ddi_get_name(dip);
1208	if ((major = ddi_name_to_major(name)) == DDI_MAJOR_T_NONE)
1209		return (NULL);
1210
1211	mutex_enter(&apix_mutex);
1212	for (dvp = apix_dev_vector[major]; dvp != NULL;
1213	    dvp = dvp->dv_next) {
1214		if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1215		    dvp->dv_type == type) {
1216			vecp = dvp->dv_vector;
1217			mutex_exit(&apix_mutex);
1218			return (vecp);
1219		}
1220	}
1221	mutex_exit(&apix_mutex);
1222
1223	return (NULL);
1224}
1225
1226/*
1227 * Get minimum inum for specified device, used for MSI
1228 */
1229int
1230apix_get_min_dev_inum(dev_info_t *dip, int type)
1231{
1232	char *name;
1233	major_t major;
1234	apix_dev_vector_t *dvp;
1235	int inum = -1;
1236
1237	name = ddi_get_name(dip);
1238	major = ddi_name_to_major(name);
1239
1240	mutex_enter(&apix_mutex);
1241	for (dvp = apix_dev_vector[major]; dvp != NULL;
1242	    dvp = dvp->dv_next) {
1243		if (dvp->dv_dip == dip && dvp->dv_type == type) {
1244			if (inum == -1)
1245				inum = dvp->dv_inum;
1246			else
1247				inum = (dvp->dv_inum < inum) ?
1248				    dvp->dv_inum : inum;
1249		}
1250	}
1251	mutex_exit(&apix_mutex);
1252
1253	return (inum);
1254}
1255
1256int
1257apix_get_max_dev_inum(dev_info_t *dip, int type)
1258{
1259	char *name;
1260	major_t major;
1261	apix_dev_vector_t *dvp;
1262	int inum = -1;
1263
1264	name = ddi_get_name(dip);
1265	major = ddi_name_to_major(name);
1266
1267	mutex_enter(&apix_mutex);
1268	for (dvp = apix_dev_vector[major]; dvp != NULL;
1269	    dvp = dvp->dv_next) {
1270		if (dvp->dv_dip == dip && dvp->dv_type == type) {
1271			if (inum == -1)
1272				inum = dvp->dv_inum;
1273			else
1274				inum = (dvp->dv_inum > inum) ?
1275				    dvp->dv_inum : inum;
1276		}
1277	}
1278	mutex_exit(&apix_mutex);
1279
1280	return (inum);
1281}
1282
1283/*
1284 * Major to cpu binding, for INTR_ROUND_ROBIN_WITH_AFFINITY cpu
1285 * binding policy
1286 */
1287
1288static uint32_t
1289apix_get_dev_binding(dev_info_t *dip)
1290{
1291	major_t major;
1292	char *name;
1293	uint32_t cpu = IRQ_UNINIT;
1294
1295	name = ddi_get_name(dip);
1296	major = ddi_name_to_major(name);
1297	if (major < devcnt) {
1298		mutex_enter(&apix_mutex);
1299		cpu = apix_major_to_cpu[major];
1300		mutex_exit(&apix_mutex);
1301	}
1302
1303	return (cpu);
1304}
1305
1306static void
1307apix_set_dev_binding(dev_info_t *dip, uint32_t cpu)
1308{
1309	major_t major;
1310	char *name;
1311
1312	/* setup major to cpu mapping */
1313	name = ddi_get_name(dip);
1314	major = ddi_name_to_major(name);
1315	if (apix_major_to_cpu[major] == IRQ_UNINIT) {
1316		mutex_enter(&apix_mutex);
1317		apix_major_to_cpu[major] = cpu;
1318		mutex_exit(&apix_mutex);
1319	}
1320}
1321
1322/*
1323 * return the cpu to which this intr should be bound.
1324 * Check properties or any other mechanism to see if user wants it
1325 * bound to a specific CPU. If so, return the cpu id with high bit set.
1326 * If not, use the policy to choose a cpu and return the id.
1327 */
1328uint32_t
1329apix_bind_cpu(dev_info_t *dip)
1330{
1331	int	instance, instno, prop_len, bind_cpu, count;
1332	uint_t	i, rc;
1333	major_t	major;
1334	char	*name, *drv_name, *prop_val, *cptr;
1335	char	prop_name[32];
1336
1337	lock_set(&apix_lock);
1338
1339	if (apic_intr_policy == INTR_LOWEST_PRIORITY) {
1340		cmn_err(CE_WARN, "apix: unsupported interrupt binding policy "
1341		    "LOWEST PRIORITY, use ROUND ROBIN instead");
1342		apic_intr_policy = INTR_ROUND_ROBIN;
1343	}
1344
1345	if (apic_nproc == 1) {
1346		lock_clear(&apix_lock);
1347		return (0);
1348	}
1349
1350	drv_name = NULL;
1351	rc = DDI_PROP_NOT_FOUND;
1352	major = (major_t)-1;
1353	if (dip != NULL) {
1354		name = ddi_get_name(dip);
1355		major = ddi_name_to_major(name);
1356		drv_name = ddi_major_to_name(major);
1357		instance = ddi_get_instance(dip);
1358		if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
1359			bind_cpu = apix_get_dev_binding(dip);
1360			if (bind_cpu != IRQ_UNINIT) {
1361				lock_clear(&apix_lock);
1362				return (bind_cpu);
1363			}
1364		}
1365		/*
1366		 * search for "drvname"_intpt_bind_cpus property first, the
1367		 * syntax of the property should be "a[,b,c,...]" where
1368		 * instance 0 binds to cpu a, instance 1 binds to cpu b,
1369		 * instance 3 binds to cpu c...
1370		 * ddi_getlongprop() will search /option first, then /
1371		 * if "drvname"_intpt_bind_cpus doesn't exist, then find
1372		 * intpt_bind_cpus property.  The syntax is the same, and
1373		 * it applies to all the devices if its "drvname" specific
1374		 * property doesn't exist
1375		 */
1376		(void) strcpy(prop_name, drv_name);
1377		(void) strcat(prop_name, "_intpt_bind_cpus");
1378		rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name,
1379		    (caddr_t)&prop_val, &prop_len);
1380		if (rc != DDI_PROP_SUCCESS) {
1381			rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0,
1382			    "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len);
1383		}
1384	}
1385	if (rc == DDI_PROP_SUCCESS) {
1386		for (i = count = 0; i < (prop_len - 1); i++)
1387			if (prop_val[i] == ',')
1388				count++;
1389		if (prop_val[i-1] != ',')
1390			count++;
1391		/*
1392		 * if somehow the binding instances defined in the
1393		 * property are not enough for this instno., then
1394		 * reuse the pattern for the next instance until
1395		 * it reaches the requested instno
1396		 */
1397		instno = instance % count;
1398		i = 0;
1399		cptr = prop_val;
1400		while (i < instno)
1401			if (*cptr++ == ',')
1402				i++;
1403		bind_cpu = stoi(&cptr);
1404		kmem_free(prop_val, prop_len);
1405		/* if specific cpu is bogus, then default to cpu 0 */
1406		if (bind_cpu >= apic_nproc) {
1407			cmn_err(CE_WARN, "apix: %s=%s: CPU %d not present",
1408			    prop_name, prop_val, bind_cpu);
1409			bind_cpu = 0;
1410		} else {
1411			/* indicate that we are bound at user request */
1412			bind_cpu |= IRQ_USER_BOUND;
1413		}
1414		/*
1415		 * no need to check apic_cpus[].aci_status, if specific cpu is
1416		 * not up, then post_cpu_start will handle it.
1417		 */
1418	} else {
1419		bind_cpu = apic_get_next_bind_cpu();
1420	}
1421
1422	lock_clear(&apix_lock);
1423
1424	return ((uint32_t)bind_cpu);
1425}
1426
1427static boolean_t
1428apix_is_cpu_enabled(processorid_t cpuid)
1429{
1430	apic_cpus_info_t *cpu_infop;
1431
1432	cpu_infop = &apic_cpus[cpuid];
1433
1434	if ((cpu_infop->aci_status & APIC_CPU_INTR_ENABLE) == 0)
1435		return (B_FALSE);
1436
1437	return (B_TRUE);
1438}
1439
1440/*
1441 * Must be called with apix_lock held. This function can be
1442 * called from above lock level by apix_intr_redistribute().
1443 *
1444 * Arguments:
1445 *    vecp  : Vector to be rebound
1446 *    tocpu : Target cpu. IRQ_UNINIT means target is vecp->v_cpuid.
1447 *    count : Number of continuous vectors
1448 *
1449 * Return new vector being bound to
1450 */
1451apix_vector_t *
1452apix_rebind(apix_vector_t *vecp, processorid_t newcpu, int count)
1453{
1454	apix_vector_t *newp, *oldp;
1455	processorid_t oldcpu = vecp->v_cpuid;
1456	uchar_t newvec, oldvec = vecp->v_vector;
1457	int i;
1458
1459	ASSERT(LOCK_HELD(&apix_lock) && count > 0);
1460
1461	if (!apix_is_cpu_enabled(newcpu))
1462		return (NULL);
1463
1464	if (vecp->v_cpuid == newcpu) 	/* rebind to the same cpu */
1465		return (vecp);
1466
1467	APIX_ENTER_CPU_LOCK(oldcpu);
1468	APIX_ENTER_CPU_LOCK(newcpu);
1469
1470	/* allocate vector */
1471	if (count == 1)
1472		newp = apix_alloc_vector_oncpu(newcpu, NULL, 0, vecp->v_type);
1473	else {
1474		ASSERT(vecp->v_type == APIX_TYPE_MSI);
1475		newp = apix_alloc_nvectors_oncpu(newcpu, NULL, 0, count,
1476		    vecp->v_type);
1477	}
1478	if (newp == NULL) {
1479		APIX_LEAVE_CPU_LOCK(newcpu);
1480		APIX_LEAVE_CPU_LOCK(oldcpu);
1481		return (NULL);
1482	}
1483
1484	newvec = newp->v_vector;
1485	apix_dup_vectors(vecp, newp, count);
1486
1487	APIX_LEAVE_CPU_LOCK(newcpu);
1488	APIX_LEAVE_CPU_LOCK(oldcpu);
1489
1490	if (!DDI_INTR_IS_MSI_OR_MSIX(vecp->v_type)) {
1491		ASSERT(count == 1);
1492		if (apix_intx_rebind(vecp->v_inum, newcpu, newvec) != 0) {
1493			struct autovec *avp;
1494			int inum;
1495
1496			/* undo duplication */
1497			APIX_ENTER_CPU_LOCK(oldcpu);
1498			APIX_ENTER_CPU_LOCK(newcpu);
1499			for (avp = newp->v_autovect; avp != NULL;
1500			    avp = avp->av_link) {
1501				if (avp->av_dip != NULL) {
1502					inum = GET_INTR_INUM(avp->av_intr_id);
1503					apix_set_dev_map(vecp, avp->av_dip,
1504					    inum);
1505				}
1506				apix_remove_av(newp, avp);
1507			}
1508			apix_cleanup_vector(newp);
1509			APIX_LEAVE_CPU_LOCK(newcpu);
1510			APIX_LEAVE_CPU_LOCK(oldcpu);
1511			APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind fixed "
1512			    "interrupt 0x%x to cpu %d failed\n",
1513			    vecp->v_inum, newcpu));
1514			return (NULL);
1515		}
1516
1517		APIX_ENTER_CPU_LOCK(oldcpu);
1518		(void) apix_obsolete_vector(vecp);
1519		APIX_LEAVE_CPU_LOCK(oldcpu);
1520		APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind fixed interrupt"
1521		    " 0x%x/0x%x to 0x%x/0x%x\n",
1522		    oldcpu, oldvec, newcpu, newvec));
1523		return (newp);
1524	}
1525
1526	for (i = 0; i < count; i++) {
1527		oldp = xv_vector(oldcpu, oldvec + i);
1528		newp = xv_vector(newcpu, newvec + i);
1529
1530		if (newp->v_share > 0) {
1531			APIX_SET_REBIND_INFO(oldp, newp);
1532
1533			apix_enable_vector(newp);
1534
1535			APIX_CLR_REBIND_INFO();
1536		}
1537
1538		APIX_ENTER_CPU_LOCK(oldcpu);
1539		(void) apix_obsolete_vector(oldp);
1540		APIX_LEAVE_CPU_LOCK(oldcpu);
1541	}
1542	APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind vector 0x%x/0x%x "
1543	    "to 0x%x/0x%x, count=%d\n",
1544	    oldcpu, oldvec, newcpu, newvec, count));
1545
1546	return (xv_vector(newcpu, newvec));
1547}
1548
1549/*
1550 * Senarios include:
1551 * a. add_avintr() is called before irqp initialized (legacy)
1552 * b. irqp is initialized, vector is not allocated (fixed interrupts)
1553 * c. irqp is initialized, vector is allocated (shared interrupts)
1554 */
1555apix_vector_t *
1556apix_alloc_intx(dev_info_t *dip, int inum, int irqno)
1557{
1558	apic_irq_t *irqp;
1559	apix_vector_t *vecp;
1560
1561	/*
1562	 * Allocate IRQ. Caller is later responsible for the
1563	 * initialization
1564	 */
1565	mutex_enter(&airq_mutex);
1566	if ((irqp = apic_irq_table[irqno]) == NULL) {
1567		/* allocate irq */
1568		irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
1569		irqp->airq_mps_intr_index = FREE_INDEX;
1570		apic_irq_table[irqno] = irqp;
1571	}
1572	if (irqp->airq_mps_intr_index == FREE_INDEX) {
1573		irqp->airq_mps_intr_index = DEFAULT_INDEX;
1574		irqp->airq_cpu = IRQ_UNINIT;
1575		irqp->airq_origirq = (uchar_t)irqno;
1576	}
1577
1578	mutex_exit(&airq_mutex);
1579
1580	/*
1581	 * allocate vector
1582	 */
1583	if (irqp->airq_cpu == IRQ_UNINIT) {
1584		uint32_t bindcpu, cpuid;
1585
1586		/* select cpu by system policy */
1587		bindcpu = apix_bind_cpu(dip);
1588		cpuid = bindcpu & ~IRQ_USER_BOUND;
1589
1590		/* allocate vector */
1591		APIX_ENTER_CPU_LOCK(cpuid);
1592
1593		if ((vecp = apix_alloc_vector_oncpu(bindcpu, dip, inum,
1594		    APIX_TYPE_FIXED)) == NULL) {
1595			cmn_err(CE_WARN, "No interrupt vector for irq %x",
1596			    irqno);
1597			APIX_LEAVE_CPU_LOCK(cpuid);
1598			return (NULL);
1599		}
1600		vecp->v_inum = irqno;
1601		vecp->v_flags |= APIX_VECT_MASKABLE;
1602
1603		apix_intx_set_vector(irqno, vecp->v_cpuid, vecp->v_vector);
1604
1605		APIX_LEAVE_CPU_LOCK(cpuid);
1606	} else {
1607		vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1608		ASSERT(!IS_VECT_FREE(vecp));
1609
1610		if (dip != NULL)
1611			apix_set_dev_map(vecp, dip, inum);
1612	}
1613
1614	if ((dip != NULL) &&
1615	    (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1616	    ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1617		apix_set_dev_binding(dip, vecp->v_cpuid);
1618
1619	apix_dprint_vector(vecp, dip, 1);
1620
1621	return (vecp);
1622}
1623
1624int
1625apix_alloc_msi(dev_info_t *dip, int inum, int count, int behavior)
1626{
1627	int i, cap_ptr, rcount = count;
1628	apix_vector_t *vecp;
1629	processorid_t bindcpu, cpuid;
1630	ushort_t msi_ctrl;
1631	ddi_acc_handle_t handle;
1632
1633	DDI_INTR_IMPLDBG((CE_CONT, "apix_alloc_msi_vectors: dip=0x%p "
1634	    "inum=0x%x  count=0x%x behavior=%d\n",
1635	    (void *)dip, inum, count, behavior));
1636
1637	if (count > 1) {
1638		if (behavior == DDI_INTR_ALLOC_STRICT &&
1639		    apic_multi_msi_enable == 0)
1640			return (0);
1641		if (apic_multi_msi_enable == 0)
1642			count = 1;
1643	}
1644
1645	/* Check whether it supports per-vector masking */
1646	cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1647	handle = i_ddi_get_pci_config_handle(dip);
1648	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1649
1650	/* bind to cpu */
1651	bindcpu = apix_bind_cpu(dip);
1652	cpuid = bindcpu & ~IRQ_USER_BOUND;
1653
1654	/* if not ISP2, then round it down */
1655	if (!ISP2(rcount))
1656		rcount = 1 << (highbit(rcount) - 1);
1657
1658	APIX_ENTER_CPU_LOCK(cpuid);
1659	for (vecp = NULL; rcount > 0; rcount >>= 1) {
1660		vecp = apix_alloc_nvectors_oncpu(bindcpu, dip, inum, rcount,
1661		    APIX_TYPE_MSI);
1662		if (vecp != NULL || behavior == DDI_INTR_ALLOC_STRICT)
1663			break;
1664	}
1665	for (i = 0; vecp && i < rcount; i++)
1666		xv_vector(vecp->v_cpuid, vecp->v_vector + i)->v_flags |=
1667		    (msi_ctrl & PCI_MSI_PVM_MASK) ? APIX_VECT_MASKABLE : 0;
1668	APIX_LEAVE_CPU_LOCK(cpuid);
1669	if (vecp == NULL) {
1670		APIC_VERBOSE(INTR, (CE_CONT,
1671		    "apix_alloc_msi: no %d cont vectors found on cpu 0x%x\n",
1672		    count, bindcpu));
1673		return (0);
1674	}
1675
1676	/* major to cpu binding */
1677	if ((apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1678	    ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1679		apix_set_dev_binding(dip, vecp->v_cpuid);
1680
1681	apix_dprint_vector(vecp, dip, rcount);
1682
1683	return (rcount);
1684}
1685
1686int
1687apix_alloc_msix(dev_info_t *dip, int inum, int count, int behavior)
1688{
1689	apix_vector_t *vecp;
1690	processorid_t bindcpu, cpuid;
1691	int i;
1692
1693	for (i = 0; i < count; i++) {
1694		/* select cpu by system policy */
1695		bindcpu = apix_bind_cpu(dip);
1696		cpuid = bindcpu & ~IRQ_USER_BOUND;
1697
1698		/* allocate vector */
1699		APIX_ENTER_CPU_LOCK(cpuid);
1700		if ((vecp = apix_alloc_vector_oncpu(bindcpu, dip, inum + i,
1701		    APIX_TYPE_MSIX)) == NULL) {
1702			APIX_LEAVE_CPU_LOCK(cpuid);
1703			APIC_VERBOSE(INTR, (CE_CONT, "apix_alloc_msix: "
1704			    "allocate msix for device dip=%p, inum=%d on"
1705			    " cpu %d failed", (void *)dip, inum + i, bindcpu));
1706			break;
1707		}
1708		vecp->v_flags |= APIX_VECT_MASKABLE;
1709		APIX_LEAVE_CPU_LOCK(cpuid);
1710
1711		/* major to cpu mapping */
1712		if ((i == 0) &&
1713		    (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1714		    ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1715			apix_set_dev_binding(dip, vecp->v_cpuid);
1716
1717		apix_dprint_vector(vecp, dip, 1);
1718	}
1719
1720	if (i < count && behavior == DDI_INTR_ALLOC_STRICT) {
1721		APIC_VERBOSE(INTR, (CE_WARN, "apix_alloc_msix: "
1722		    "strictly allocate %d vectors failed, got %d\n",
1723		    count, i));
1724		apix_free_vectors(dip, inum, i, APIX_TYPE_MSIX);
1725		i = 0;
1726	}
1727
1728	return (i);
1729}
1730
1731/*
1732 * A rollback free for vectors allocated by apix_alloc_xxx().
1733 */
1734void
1735apix_free_vectors(dev_info_t *dip, int inum, int count, int type)
1736{
1737	int i, cpuid;
1738	apix_vector_t *vecp;
1739
1740	DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: dip: %p inum: %x "
1741	    "count: %x type: %x\n",
1742	    (void *)dip, inum, count, type));
1743
1744	lock_set(&apix_lock);
1745
1746	for (i = 0; i < count; i++, inum++) {
1747		if ((vecp = apix_get_dev_map(dip, inum, type)) == NULL) {
1748			lock_clear(&apix_lock);
1749			DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: "
1750			    "dip=0x%p inum=0x%x type=0x%x apix_find_intr() "
1751			    "failed\n", (void *)dip, inum, type));
1752			continue;
1753		}
1754
1755		APIX_ENTER_CPU_LOCK(vecp->v_cpuid);
1756		cpuid = vecp->v_cpuid;
1757
1758		DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: "
1759		    "dip=0x%p inum=0x%x type=0x%x vector 0x%x (share %d)\n",
1760		    (void *)dip, inum, type, vecp->v_vector, vecp->v_share));
1761
1762		/* tear down device interrupt to vector mapping */
1763		apix_clear_dev_map(dip, inum, type);
1764
1765		if (vecp->v_type == APIX_TYPE_FIXED) {
1766			if (vecp->v_share > 0) {	/* share IRQ line */
1767				APIX_LEAVE_CPU_LOCK(cpuid);
1768				continue;
1769			}
1770
1771			/* Free apic_irq_table entry */
1772			apix_intx_free(vecp->v_inum);
1773		}
1774
1775		/* free vector */
1776		apix_cleanup_vector(vecp);
1777
1778		APIX_LEAVE_CPU_LOCK(cpuid);
1779	}
1780
1781	lock_clear(&apix_lock);
1782}
1783
1784/*
1785 * Must be called with apix_lock held
1786 */
1787apix_vector_t *
1788apix_setup_io_intr(apix_vector_t *vecp)
1789{
1790	processorid_t bindcpu;
1791	int ret;
1792
1793	ASSERT(LOCK_HELD(&apix_lock));
1794
1795	/*
1796	 * Interrupts are enabled on the CPU, programme IOAPIC RDT
1797	 * entry or MSI/X address/data to enable the interrupt.
1798	 */
1799	if (apix_is_cpu_enabled(vecp->v_cpuid)) {
1800		apix_enable_vector(vecp);
1801		return (vecp);
1802	}
1803
1804	/*
1805	 * CPU is not up or interrupts are disabled. Fall back to the
1806	 * first avialable CPU.
1807	 */
1808	bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
1809
1810	if (vecp->v_type == APIX_TYPE_MSI)
1811		return (apix_grp_set_cpu(vecp, bindcpu, &ret));
1812
1813	return (apix_set_cpu(vecp, bindcpu, &ret));
1814}
1815
1816/*
1817 * For interrupts which call add_avintr() before apic is initialized.
1818 * ioapix_setup_intr() will
1819 *   - allocate vector
1820 *   - copy over ISR
1821 */
1822static void
1823ioapix_setup_intr(int irqno, iflag_t *flagp)
1824{
1825	extern struct av_head autovect[];
1826	apix_vector_t *vecp;
1827	apic_irq_t *irqp;
1828	uchar_t ioapicindex, ipin;
1829	ulong_t iflag;
1830	struct autovec *avp;
1831
1832	irqp = apic_irq_table[irqno];
1833	ioapicindex = acpi_find_ioapic(irqno);
1834	ASSERT(ioapicindex != 0xFF);
1835	ipin = irqno - apic_io_vectbase[ioapicindex];
1836
1837	if ((irqp != NULL) && (irqp->airq_mps_intr_index == ACPI_INDEX)) {
1838		ASSERT(irqp->airq_intin_no == ipin &&
1839		    irqp->airq_ioapicindex == ioapicindex);
1840		vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1841		ASSERT(!IS_VECT_FREE(vecp));
1842	} else {
1843		vecp = apix_alloc_intx(NULL, 0, irqno);
1844
1845		irqp = apic_irq_table[irqno];
1846		irqp->airq_mps_intr_index = ACPI_INDEX;
1847		irqp->airq_ioapicindex = ioapicindex;
1848		irqp->airq_intin_no = ipin;
1849		irqp->airq_iflag = *flagp;
1850		irqp->airq_share++;
1851		apic_record_rdt_entry(irqp, irqno);
1852	}
1853
1854	/* copy over autovect */
1855	for (avp = autovect[irqno].avh_link; avp; avp = avp->av_link)
1856		apix_insert_av(vecp, avp->av_intr_id, avp->av_vector,
1857		    avp->av_intarg1, avp->av_intarg2, avp->av_ticksp,
1858		    avp->av_prilevel, avp->av_dip);
1859
1860	/* Program I/O APIC */
1861	iflag = intr_clear();
1862	lock_set(&apix_lock);
1863
1864	(void) apix_setup_io_intr(vecp);
1865
1866	lock_clear(&apix_lock);
1867	intr_restore(iflag);
1868
1869	APIC_VERBOSE_IOAPIC((CE_CONT, "apix: setup ioapic, irqno %x "
1870	    "(ioapic %x, ipin %x) is bound to cpu %x, vector %x\n",
1871	    irqno, ioapicindex, ipin, irqp->airq_cpu, irqp->airq_vector));
1872}
1873
1874void
1875ioapix_init_intr(int mask_apic)
1876{
1877	int ioapicindex;
1878	int i, j;
1879
1880	/* mask interrupt vectors */
1881	for (j = 0; j < apic_io_max && mask_apic; j++) {
1882		int intin_max;
1883
1884		ioapicindex = j;
1885		/* Bits 23-16 define the maximum redirection entries */
1886		intin_max = (ioapic_read(ioapicindex, APIC_VERS_CMD) >> 16)
1887		    & 0xff;
1888		for (i = 0; i <= intin_max; i++)
1889			ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * i,
1890			    AV_MASK);
1891	}
1892
1893	/*
1894	 * Hack alert: deal with ACPI SCI interrupt chicken/egg here
1895	 */
1896	if (apic_sci_vect > 0)
1897		ioapix_setup_intr(apic_sci_vect, &apic_sci_flags);
1898
1899	/*
1900	 * Hack alert: deal with ACPI HPET interrupt chicken/egg here.
1901	 */
1902	if (apic_hpet_vect > 0)
1903		ioapix_setup_intr(apic_hpet_vect, &apic_hpet_flags);
1904}
1905