local_apic.c revision 164265
1/*-
2 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
3 * Copyright (c) 1996, by Steve Passe
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. The name of the developer may NOT be used to endorse or promote products
12 *    derived from this software without specific prior written permission.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/*
31 * Local APIC support on Pentium and later processors.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/i386/i386/local_apic.c 164265 2006-11-13 22:23:34Z jhb $");
36
37#include "opt_hwpmc_hooks.h"
38
39#include "opt_ddb.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/bus.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/mutex.h>
47#include <sys/pcpu.h>
48#include <sys/smp.h>
49
50#include <vm/vm.h>
51#include <vm/pmap.h>
52
53#include <machine/apicreg.h>
54#include <machine/cpu.h>
55#include <machine/cputypes.h>
56#include <machine/frame.h>
57#include <machine/intr_machdep.h>
58#include <machine/apicvar.h>
59#include <machine/md_var.h>
60#include <machine/smp.h>
61#include <machine/specialreg.h>
62
63#ifdef DDB
64#include <sys/interrupt.h>
65#include <ddb/ddb.h>
66#endif
67
68/*
69 * We can handle up to 60 APICs via our logical cluster IDs, but currently
70 * the physical IDs on Intel processors up to the Pentium 4 are limited to
71 * 16.
72 */
73#define	MAX_APICID	16
74
75/* Sanity checks on IDT vectors. */
76CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
77CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
78CTASSERT(APIC_LOCAL_INTS == 240);
79CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
80
81#define	LAPIC_TIMER_HZ_DIVIDER		2
82#define	LAPIC_TIMER_STATHZ_DIVIDER	15
83#define	LAPIC_TIMER_PROFHZ_DIVIDER	3
84
85/* Magic IRQ values for the timer and syscalls. */
86#define	IRQ_TIMER	(NUM_IO_INTS + 1)
87#define	IRQ_SYSCALL	(NUM_IO_INTS + 2)
88
89/*
90 * Support for local APICs.  Local APICs manage interrupts on each
91 * individual processor as opposed to I/O APICs which receive interrupts
92 * from I/O devices and then forward them on to the local APICs.
93 *
94 * Local APICs can also send interrupts to each other thus providing the
95 * mechanism for IPIs.
96 */
97
98struct lvt {
99	u_int lvt_edgetrigger:1;
100	u_int lvt_activehi:1;
101	u_int lvt_masked:1;
102	u_int lvt_active:1;
103	u_int lvt_mode:16;
104	u_int lvt_vector:8;
105};
106
107struct lapic {
108	struct lvt la_lvts[LVT_MAX + 1];
109	u_int la_id:8;
110	u_int la_cluster:4;
111	u_int la_cluster_id:2;
112	u_int la_present:1;
113	u_long *la_timer_count;
114	u_long la_hard_ticks;
115	u_long la_stat_ticks;
116	u_long la_prof_ticks;
117} static lapics[MAX_APICID];
118
119/* XXX: should thermal be an NMI? */
120
121/* Global defaults for local APIC LVT entries. */
122static struct lvt lvts[LVT_MAX + 1] = {
123	{ 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },	/* LINT0: masked ExtINT */
124	{ 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },	/* LINT1: NMI */
125	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },	/* Timer */
126	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },	/* Error */
127	{ 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },	/* PMC */
128	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },	/* Thermal */
129};
130
131static inthand_t *ioint_handlers[] = {
132	NULL,			/* 0 - 31 */
133	IDTVEC(apic_isr1),	/* 32 - 63 */
134	IDTVEC(apic_isr2),	/* 64 - 95 */
135	IDTVEC(apic_isr3),	/* 96 - 127 */
136	IDTVEC(apic_isr4),	/* 128 - 159 */
137	IDTVEC(apic_isr5),	/* 160 - 191 */
138	IDTVEC(apic_isr6),	/* 192 - 223 */
139	IDTVEC(apic_isr7),	/* 224 - 255 */
140};
141
142/* Include IDT_SYSCALL to make indexing easier. */
143static u_int ioint_irqs[APIC_NUM_IOINTS + 1];
144
145static u_int32_t lapic_timer_divisors[] = {
146	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
147	APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
148};
149
150volatile lapic_t *lapic;
151static u_long lapic_timer_divisor, lapic_timer_period, lapic_timer_hz;
152
153static void	lapic_enable(void);
154static void	lapic_resume(struct pic *pic);
155static void	lapic_timer_enable_intr(void);
156static void	lapic_timer_oneshot(u_int count);
157static void	lapic_timer_periodic(u_int count);
158static void	lapic_timer_set_divisor(u_int divisor);
159static uint32_t	lvt_mode(struct lapic *la, u_int pin, uint32_t value);
160
161struct pic lapic_pic = { .pic_resume = lapic_resume };
162
163static uint32_t
164lvt_mode(struct lapic *la, u_int pin, uint32_t value)
165{
166	struct lvt *lvt;
167
168	KASSERT(pin <= LVT_MAX, ("%s: pin %u out of range", __func__, pin));
169	if (la->la_lvts[pin].lvt_active)
170		lvt = &la->la_lvts[pin];
171	else
172		lvt = &lvts[pin];
173
174	value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
175	    APIC_LVT_VECTOR);
176	if (lvt->lvt_edgetrigger == 0)
177		value |= APIC_LVT_TM;
178	if (lvt->lvt_activehi == 0)
179		value |= APIC_LVT_IIPP_INTALO;
180	if (lvt->lvt_masked)
181		value |= APIC_LVT_M;
182	value |= lvt->lvt_mode;
183	switch (lvt->lvt_mode) {
184	case APIC_LVT_DM_NMI:
185	case APIC_LVT_DM_SMI:
186	case APIC_LVT_DM_INIT:
187	case APIC_LVT_DM_EXTINT:
188		if (!lvt->lvt_edgetrigger) {
189			printf("lapic%u: Forcing LINT%u to edge trigger\n",
190			    la->la_id, pin);
191			value |= APIC_LVT_TM;
192		}
193		/* Use a vector of 0. */
194		break;
195	case APIC_LVT_DM_FIXED:
196		value |= lvt->lvt_vector;
197		break;
198	default:
199		panic("bad APIC LVT delivery mode: %#x\n", value);
200	}
201	return (value);
202}
203
204/*
205 * Map the local APIC and setup necessary interrupt vectors.
206 */
207void
208lapic_init(uintptr_t addr)
209{
210
211	/* Map the local APIC and setup the spurious interrupt handler. */
212	KASSERT(trunc_page(addr) == addr,
213	    ("local APIC not aligned on a page boundary"));
214	lapic = pmap_mapdev(addr, sizeof(lapic_t));
215	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
216	    GSEL(GCODE_SEL, SEL_KPL));
217
218	/* Perform basic initialization of the BSP's local APIC. */
219	lapic_enable();
220	ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
221
222	/* Set BSP's per-CPU local APIC ID. */
223	PCPU_SET(apic_id, lapic_id());
224	intr_add_cpu(PCPU_GET(apic_id));
225
226	/* Local APIC timer interrupt. */
227	setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_SYS386IGT, SEL_KPL,
228	    GSEL(GCODE_SEL, SEL_KPL));
229	ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER;
230
231	/* XXX: error/thermal interrupts */
232}
233
234/*
235 * Create a local APIC instance.
236 */
237void
238lapic_create(u_int apic_id, int boot_cpu)
239{
240	int i;
241
242	if (apic_id >= MAX_APICID) {
243		printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
244		if (boot_cpu)
245			panic("Can't ignore BSP");
246		return;
247	}
248	KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
249	    apic_id));
250
251	/*
252	 * Assume no local LVT overrides and a cluster of 0 and
253	 * intra-cluster ID of 0.
254	 */
255	lapics[apic_id].la_present = 1;
256	lapics[apic_id].la_id = apic_id;
257	for (i = 0; i < LVT_MAX; i++) {
258		lapics[apic_id].la_lvts[i] = lvts[i];
259		lapics[apic_id].la_lvts[i].lvt_active = 0;
260	}
261
262#ifdef SMP
263	cpu_add(apic_id, boot_cpu);
264#endif
265}
266
267/*
268 * Dump contents of local APIC registers
269 */
270void
271lapic_dump(const char* str)
272{
273
274	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
275	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n",
276	    lapic->id, lapic->version, lapic->ldr, lapic->dfr);
277	printf("  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
278	    lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
279	printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x pcm: 0x%08x\n",
280	    lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error,
281	    lapic->lvt_pcint);
282}
283
284void
285lapic_setup(int boot)
286{
287	struct lapic *la;
288	u_int32_t maxlvt;
289	register_t eflags;
290	char buf[MAXCOMLEN + 1];
291
292	la = &lapics[lapic_id()];
293	KASSERT(la->la_present, ("missing APIC structure"));
294	eflags = intr_disable();
295	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
296
297	/* Initialize the TPR to allow all interrupts. */
298	lapic_set_tpr(0);
299
300	/* Setup spurious vector and enable the local APIC. */
301	lapic_enable();
302
303	/* Program LINT[01] LVT entries. */
304	lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0);
305	lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1);
306#ifdef	HWPMC_HOOKS
307	/* Program the PMC LVT entry if present. */
308	if (maxlvt >= LVT_PMC)
309		lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint);
310#endif
311
312	/* Program timer LVT and setup handler. */
313	lapic->lvt_timer = lvt_mode(la, LVT_TIMER, lapic->lvt_timer);
314	if (boot) {
315		snprintf(buf, sizeof(buf), "cpu%d: timer", PCPU_GET(cpuid));
316		intrcnt_add(buf, &la->la_timer_count);
317	}
318
319	/* We don't setup the timer during boot on the BSP until later. */
320	if (!(boot && PCPU_GET(cpuid) == 0)) {
321		KASSERT(lapic_timer_period != 0, ("lapic%u: zero divisor",
322		    lapic_id()));
323		lapic_timer_set_divisor(lapic_timer_divisor);
324		lapic_timer_periodic(lapic_timer_period);
325		lapic_timer_enable_intr();
326	}
327
328	/* XXX: Error and thermal LVTs */
329
330	intr_restore(eflags);
331}
332
333/*
334 * Called by cpu_initclocks() on the BSP to setup the local APIC timer so
335 * that it can drive hardclock, statclock, and profclock.  This function
336 * returns true if it is able to use the local APIC timer to drive the
337 * clocks and false if it is not able.
338 */
339int
340lapic_setup_clock(void)
341{
342	u_long value;
343
344	/* Can't drive the timer without a local APIC. */
345	if (lapic == NULL)
346		return (0);
347
348	/* Start off with a divisor of 2 (power on reset default). */
349	lapic_timer_divisor = 2;
350
351	/* Try to calibrate the local APIC timer. */
352	do {
353		lapic_timer_set_divisor(lapic_timer_divisor);
354		lapic_timer_oneshot(APIC_TIMER_MAX_COUNT);
355		DELAY(2000000);
356		value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer;
357		if (value != APIC_TIMER_MAX_COUNT)
358			break;
359		lapic_timer_divisor <<= 1;
360	} while (lapic_timer_divisor <= 128);
361	if (lapic_timer_divisor > 128)
362		panic("lapic: Divisor too big");
363	value /= 2;
364	if (bootverbose)
365		printf("lapic: Divisor %lu, Frequency %lu hz\n",
366		    lapic_timer_divisor, value);
367
368	/*
369	 * We will drive the timer at a small multiple of hz and drive
370	 * both of the other timers with similarly small but relatively
371	 * prime divisors.
372	 */
373	lapic_timer_hz = hz * LAPIC_TIMER_HZ_DIVIDER;
374	stathz = lapic_timer_hz / LAPIC_TIMER_STATHZ_DIVIDER;
375	profhz = lapic_timer_hz / LAPIC_TIMER_PROFHZ_DIVIDER;
376	lapic_timer_period = value / lapic_timer_hz;
377
378	/*
379	 * Start up the timer on the BSP.  The APs will kick off their
380	 * timer during lapic_setup().
381	 */
382	lapic_timer_periodic(lapic_timer_period);
383	lapic_timer_enable_intr();
384	return (1);
385}
386
387void
388lapic_disable(void)
389{
390	uint32_t value;
391
392	/* Software disable the local APIC. */
393	value = lapic->svr;
394	value &= ~APIC_SVR_SWEN;
395	lapic->svr = value;
396}
397
398static void
399lapic_enable(void)
400{
401	u_int32_t value;
402
403	/* Program the spurious vector to enable the local APIC. */
404	value = lapic->svr;
405	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
406	value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
407	lapic->svr = value;
408}
409
410/* Reset the local APIC on the BSP during resume. */
411static void
412lapic_resume(struct pic *pic)
413{
414
415	lapic_setup(0);
416}
417
418int
419lapic_id(void)
420{
421
422	KASSERT(lapic != NULL, ("local APIC is not mapped"));
423	return (lapic->id >> APIC_ID_SHIFT);
424}
425
426int
427lapic_intr_pending(u_int vector)
428{
429	volatile u_int32_t *irr;
430
431	/*
432	 * The IRR registers are an array of 128-bit registers each of
433	 * which only describes 32 interrupts in the low 32 bits..  Thus,
434	 * we divide the vector by 32 to get the 128-bit index.  We then
435	 * multiply that index by 4 to get the equivalent index from
436	 * treating the IRR as an array of 32-bit registers.  Finally, we
437	 * modulus the vector by 32 to determine the individual bit to
438	 * test.
439	 */
440	irr = &lapic->irr0;
441	return (irr[(vector / 32) * 4] & 1 << (vector % 32));
442}
443
444void
445lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
446{
447	struct lapic *la;
448
449	KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
450	    __func__, apic_id));
451	KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
452	    __func__, cluster));
453	KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
454	    ("%s: intra cluster id %u too big", __func__, cluster_id));
455	la = &lapics[apic_id];
456	la->la_cluster = cluster;
457	la->la_cluster_id = cluster_id;
458}
459
460int
461lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
462{
463
464	if (pin > LVT_MAX)
465		return (EINVAL);
466	if (apic_id == APIC_ID_ALL) {
467		lvts[pin].lvt_masked = masked;
468		if (bootverbose)
469			printf("lapic:");
470	} else {
471		KASSERT(lapics[apic_id].la_present,
472		    ("%s: missing APIC %u", __func__, apic_id));
473		lapics[apic_id].la_lvts[pin].lvt_masked = masked;
474		lapics[apic_id].la_lvts[pin].lvt_active = 1;
475		if (bootverbose)
476			printf("lapic%u:", apic_id);
477	}
478	if (bootverbose)
479		printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
480	return (0);
481}
482
483int
484lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
485{
486	struct lvt *lvt;
487
488	if (pin > LVT_MAX)
489		return (EINVAL);
490	if (apic_id == APIC_ID_ALL) {
491		lvt = &lvts[pin];
492		if (bootverbose)
493			printf("lapic:");
494	} else {
495		KASSERT(lapics[apic_id].la_present,
496		    ("%s: missing APIC %u", __func__, apic_id));
497		lvt = &lapics[apic_id].la_lvts[pin];
498		lvt->lvt_active = 1;
499		if (bootverbose)
500			printf("lapic%u:", apic_id);
501	}
502	lvt->lvt_mode = mode;
503	switch (mode) {
504	case APIC_LVT_DM_NMI:
505	case APIC_LVT_DM_SMI:
506	case APIC_LVT_DM_INIT:
507	case APIC_LVT_DM_EXTINT:
508		lvt->lvt_edgetrigger = 1;
509		lvt->lvt_activehi = 1;
510		if (mode == APIC_LVT_DM_EXTINT)
511			lvt->lvt_masked = 1;
512		else
513			lvt->lvt_masked = 0;
514		break;
515	default:
516		panic("Unsupported delivery mode: 0x%x\n", mode);
517	}
518	if (bootverbose) {
519		printf(" Routing ");
520		switch (mode) {
521		case APIC_LVT_DM_NMI:
522			printf("NMI");
523			break;
524		case APIC_LVT_DM_SMI:
525			printf("SMI");
526			break;
527		case APIC_LVT_DM_INIT:
528			printf("INIT");
529			break;
530		case APIC_LVT_DM_EXTINT:
531			printf("ExtINT");
532			break;
533		}
534		printf(" -> LINT%u\n", pin);
535	}
536	return (0);
537}
538
539int
540lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
541{
542
543	if (pin > LVT_MAX || pol == INTR_POLARITY_CONFORM)
544		return (EINVAL);
545	if (apic_id == APIC_ID_ALL) {
546		lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
547		if (bootverbose)
548			printf("lapic:");
549	} else {
550		KASSERT(lapics[apic_id].la_present,
551		    ("%s: missing APIC %u", __func__, apic_id));
552		lapics[apic_id].la_lvts[pin].lvt_active = 1;
553		lapics[apic_id].la_lvts[pin].lvt_activehi =
554		    (pol == INTR_POLARITY_HIGH);
555		if (bootverbose)
556			printf("lapic%u:", apic_id);
557	}
558	if (bootverbose)
559		printf(" LINT%u polarity: %s\n", pin,
560		    pol == INTR_POLARITY_HIGH ? "high" : "low");
561	return (0);
562}
563
564int
565lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger)
566{
567
568	if (pin > LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
569		return (EINVAL);
570	if (apic_id == APIC_ID_ALL) {
571		lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
572		if (bootverbose)
573			printf("lapic:");
574	} else {
575		KASSERT(lapics[apic_id].la_present,
576		    ("%s: missing APIC %u", __func__, apic_id));
577		lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
578		    (trigger == INTR_TRIGGER_EDGE);
579		lapics[apic_id].la_lvts[pin].lvt_active = 1;
580		if (bootverbose)
581			printf("lapic%u:", apic_id);
582	}
583	if (bootverbose)
584		printf(" LINT%u trigger: %s\n", pin,
585		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
586	return (0);
587}
588
589/*
590 * Adjust the TPR of the current CPU so that it blocks all interrupts below
591 * the passed in vector.
592 */
593void
594lapic_set_tpr(u_int vector)
595{
596#ifdef CHEAP_TPR
597	lapic->tpr = vector;
598#else
599	u_int32_t tpr;
600
601	tpr = lapic->tpr & ~APIC_TPR_PRIO;
602	tpr |= vector;
603	lapic->tpr = tpr;
604#endif
605}
606
607void
608lapic_eoi(void)
609{
610
611	lapic->eoi = 0;
612}
613
614void
615lapic_handle_intr(int vector, struct trapframe frame)
616{
617	struct intsrc *isrc;
618
619	if (vector == -1)
620		panic("Couldn't get vector from ISR!");
621	isrc = intr_lookup_source(apic_idt_to_irq(vector));
622	intr_execute_handlers(isrc, &frame);
623}
624
625void
626lapic_handle_timer(struct trapframe frame)
627{
628	struct lapic *la;
629
630	/* Send EOI first thing. */
631	lapic_eoi();
632
633#if defined(SMP) && !defined(SCHED_ULE)
634	/*
635	 * Don't do any accounting for the disabled HTT cores, since it
636	 * will provide misleading numbers for the userland.
637	 *
638	 * No locking is necessary here, since even if we loose the race
639	 * when hlt_cpus_mask changes it is not a big deal, really.
640	 *
641	 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
642	 * and unlike other schedulers it actually schedules threads to
643	 * those CPUs.
644	 */
645	if ((hlt_cpus_mask & (1 << PCPU_GET(cpuid))) != 0)
646		return;
647#endif
648
649	/* Look up our local APIC structure for the tick counters. */
650	la = &lapics[PCPU_GET(apic_id)];
651	(*la->la_timer_count)++;
652	critical_enter();
653
654	/* Fire hardclock at hz. */
655	la->la_hard_ticks += hz;
656	if (la->la_hard_ticks >= lapic_timer_hz) {
657		la->la_hard_ticks -= lapic_timer_hz;
658		if (PCPU_GET(cpuid) == 0)
659			hardclock(TRAPF_USERMODE(&frame), TRAPF_PC(&frame));
660		else
661			hardclock_cpu(TRAPF_USERMODE(&frame));
662	}
663
664	/* Fire statclock at stathz. */
665	la->la_stat_ticks += stathz;
666	if (la->la_stat_ticks >= lapic_timer_hz) {
667		la->la_stat_ticks -= lapic_timer_hz;
668		statclock(TRAPF_USERMODE(&frame));
669	}
670
671	/* Fire profclock at profhz, but only when needed. */
672	la->la_prof_ticks += profhz;
673	if (la->la_prof_ticks >= lapic_timer_hz) {
674		la->la_prof_ticks -= lapic_timer_hz;
675		if (profprocs != 0)
676			profclock(TRAPF_USERMODE(&frame), TRAPF_PC(&frame));
677	}
678	critical_exit();
679}
680
681static void
682lapic_timer_set_divisor(u_int divisor)
683{
684
685	KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
686	KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) /
687	    sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor));
688	lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1];
689}
690
691static void
692lapic_timer_oneshot(u_int count)
693{
694	u_int32_t value;
695
696	value = lapic->lvt_timer;
697	value &= ~APIC_LVTT_TM;
698	value |= APIC_LVTT_TM_ONE_SHOT;
699	lapic->lvt_timer = value;
700	lapic->icr_timer = count;
701}
702
703static void
704lapic_timer_periodic(u_int count)
705{
706	u_int32_t value;
707
708	value = lapic->lvt_timer;
709	value &= ~APIC_LVTT_TM;
710	value |= APIC_LVTT_TM_PERIODIC;
711	lapic->lvt_timer = value;
712	lapic->icr_timer = count;
713}
714
715static void
716lapic_timer_enable_intr(void)
717{
718	u_int32_t value;
719
720	value = lapic->lvt_timer;
721	value &= ~APIC_LVT_M;
722	lapic->lvt_timer = value;
723}
724
725/* Request a free IDT vector to be used by the specified IRQ. */
726u_int
727apic_alloc_vector(u_int irq)
728{
729	u_int vector;
730
731	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
732
733	/*
734	 * Search for a free vector.  Currently we just use a very simple
735	 * algorithm to find the first free vector.
736	 */
737	mtx_lock_spin(&icu_lock);
738	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
739		if (ioint_irqs[vector] != 0)
740			continue;
741		ioint_irqs[vector] = irq;
742		mtx_unlock_spin(&icu_lock);
743		return (vector + APIC_IO_INTS);
744	}
745	mtx_unlock_spin(&icu_lock);
746	panic("Couldn't find an APIC vector for IRQ %u", irq);
747}
748
749/*
750 * Request 'count' free contiguous IDT vectors to be used by 'count'
751 * IRQs.  'count' must be a power of two and the vectors will be
752 * aligned on a boundary of 'align'.  If the request cannot be
753 * satisfied, 0 is returned.
754 */
755u_int
756apic_alloc_vectors(u_int *irqs, u_int count, u_int align)
757{
758	u_int first, run, vector;
759
760	KASSERT(powerof2(count), ("bad count"));
761	KASSERT(powerof2(align), ("bad align"));
762	KASSERT(align >= count, ("align < count"));
763#ifdef INVARIANTS
764	for (run = 0; run < count; run++)
765		KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u",
766		    irqs[run], run));
767#endif
768
769	/*
770	 * Search for 'count' free vectors.  As with apic_alloc_vector(),
771	 * this just uses a simple first fit algorithm.
772	 */
773	run = 0;
774	first = 0;
775	mtx_lock_spin(&icu_lock);
776	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
777
778		/* Vector is in use, end run. */
779		if (ioint_irqs[vector] != 0) {
780			run = 0;
781			first = 0;
782			continue;
783		}
784
785		/* Start a new run if run == 0 and vector is aligned. */
786		if (run == 0) {
787			if ((vector & (align - 1)) != 0)
788				continue;
789			first = vector;
790		}
791		run++;
792
793		/* Keep looping if the run isn't long enough yet. */
794		if (run < count)
795			continue;
796
797		/* Found a run, assign IRQs and return the first vector. */
798		for (vector = 0; vector < count; vector++)
799			ioint_irqs[first + vector] = irqs[vector];
800		mtx_unlock_spin(&icu_lock);
801		return (first + APIC_IO_INTS);
802	}
803	mtx_unlock_spin(&icu_lock);
804	printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
805	return (0);
806}
807
808void
809apic_enable_vector(u_int vector)
810{
811
812	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
813	KASSERT(ioint_handlers[vector / 32] != NULL,
814	    ("No ISR handler for vector %u", vector));
815	setidt(vector, ioint_handlers[vector / 32], SDT_SYS386IGT, SEL_KPL,
816	    GSEL(GCODE_SEL, SEL_KPL));
817}
818
819/* Release an APIC vector when it's no longer in use. */
820void
821apic_free_vector(u_int vector, u_int irq)
822{
823	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
824	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
825	    ("Vector %u does not map to an IRQ line", vector));
826	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
827	KASSERT(ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch"));
828	mtx_lock_spin(&icu_lock);
829	ioint_irqs[vector - APIC_IO_INTS] = 0;
830	mtx_unlock_spin(&icu_lock);
831}
832
833/* Map an IDT vector (APIC) to an IRQ (interrupt source). */
834u_int
835apic_idt_to_irq(u_int vector)
836{
837
838	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
839	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
840	    ("Vector %u does not map to an IRQ line", vector));
841	return (ioint_irqs[vector - APIC_IO_INTS]);
842}
843
844#ifdef DDB
845/*
846 * Dump data about APIC IDT vector mappings.
847 */
848DB_SHOW_COMMAND(apic, db_show_apic)
849{
850	struct intsrc *isrc;
851	int i, verbose;
852	u_int irq;
853
854	if (strcmp(modif, "vv") == 0)
855		verbose = 2;
856	else if (strcmp(modif, "v") == 0)
857		verbose = 1;
858	else
859		verbose = 0;
860	for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
861		irq = ioint_irqs[i];
862		if (irq != 0 && irq != IRQ_SYSCALL) {
863			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
864			if (irq == IRQ_TIMER)
865				db_printf("lapic timer\n");
866			else if (irq < NUM_IO_INTS) {
867				isrc = intr_lookup_source(irq);
868				if (isrc == NULL || verbose == 0)
869					db_printf("IRQ %u\n", irq);
870				else
871					db_dump_intr_event(isrc->is_event,
872					    verbose == 2);
873			} else
874				db_printf("IRQ %u ???\n", irq);
875		}
876	}
877}
878
879static void
880dump_mask(const char *prefix, uint32_t v, int base)
881{
882	int i, first;
883
884	first = 1;
885	for (i = 0; i < 32; i++)
886		if (v & (1 << i)) {
887			if (first) {
888				db_printf("%s:", prefix);
889				first = 0;
890			}
891			db_printf(" %02x", base + i);
892		}
893	if (!first)
894		db_printf("\n");
895}
896
897/* Show info from the lapic regs for this CPU. */
898DB_SHOW_COMMAND(lapic, db_show_lapic)
899{
900	uint32_t v;
901
902	db_printf("lapic ID = %d\n", lapic_id());
903	v = lapic->version;
904	db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
905	    v & 0xf);
906	db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
907	v = lapic->svr;
908	db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
909	    v & APIC_SVR_ENABLE ? "enabled" : "disabled");
910	db_printf("TPR      = %02x\n", lapic->tpr);
911
912#define dump_field(prefix, index)					\
913	dump_mask(__XSTRING(prefix ## index), lapic->prefix ## index,	\
914	    index * 32)
915
916	db_printf("In-service Interrupts:\n");
917	dump_field(isr, 0);
918	dump_field(isr, 1);
919	dump_field(isr, 2);
920	dump_field(isr, 3);
921	dump_field(isr, 4);
922	dump_field(isr, 5);
923	dump_field(isr, 6);
924	dump_field(isr, 7);
925
926	db_printf("TMR Interrupts:\n");
927	dump_field(tmr, 0);
928	dump_field(tmr, 1);
929	dump_field(tmr, 2);
930	dump_field(tmr, 3);
931	dump_field(tmr, 4);
932	dump_field(tmr, 5);
933	dump_field(tmr, 6);
934	dump_field(tmr, 7);
935
936	db_printf("IRR Interrupts:\n");
937	dump_field(irr, 0);
938	dump_field(irr, 1);
939	dump_field(irr, 2);
940	dump_field(irr, 3);
941	dump_field(irr, 4);
942	dump_field(irr, 5);
943	dump_field(irr, 6);
944	dump_field(irr, 7);
945
946#undef dump_field
947}
948#endif
949
950/*
951 * APIC probing support code.  This includes code to manage enumerators.
952 */
953
954static SLIST_HEAD(, apic_enumerator) enumerators =
955	SLIST_HEAD_INITIALIZER(enumerators);
956static struct apic_enumerator *best_enum;
957
958void
959apic_register_enumerator(struct apic_enumerator *enumerator)
960{
961#ifdef INVARIANTS
962	struct apic_enumerator *apic_enum;
963
964	SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
965		if (apic_enum == enumerator)
966			panic("%s: Duplicate register of %s", __func__,
967			    enumerator->apic_name);
968	}
969#endif
970	SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
971}
972
973/*
974 * Probe the APIC enumerators, enumerate CPUs, and initialize the
975 * local APIC.
976 */
977static void
978apic_init(void *dummy __unused)
979{
980	struct apic_enumerator *enumerator;
981	uint64_t apic_base;
982	int retval, best;
983
984	/* We only support built in local APICs. */
985	if (!(cpu_feature & CPUID_APIC))
986		return;
987
988	/* Don't probe if APIC mode is disabled. */
989	if (resource_disabled("apic", 0))
990		return;
991
992	/* First, probe all the enumerators to find the best match. */
993	best_enum = NULL;
994	best = 0;
995	SLIST_FOREACH(enumerator, &enumerators, apic_next) {
996		retval = enumerator->apic_probe();
997		if (retval > 0)
998			continue;
999		if (best_enum == NULL || best < retval) {
1000			best_enum = enumerator;
1001			best = retval;
1002		}
1003	}
1004	if (best_enum == NULL) {
1005		if (bootverbose)
1006			printf("APIC: Could not find any APICs.\n");
1007		return;
1008	}
1009
1010	if (bootverbose)
1011		printf("APIC: Using the %s enumerator.\n",
1012		    best_enum->apic_name);
1013
1014	/*
1015	 * To work around an errata, we disable the local APIC on some
1016	 * CPUs during early startup.  We need to turn the local APIC back
1017	 * on on such CPUs now.
1018	 */
1019	if (cpu == CPU_686 && strcmp(cpu_vendor, "GenuineIntel") == 0 &&
1020	    (cpu_id & 0xff0) == 0x610) {
1021		apic_base = rdmsr(MSR_APICBASE);
1022		apic_base |= APICBASE_ENABLED;
1023		wrmsr(MSR_APICBASE, apic_base);
1024	}
1025
1026	/* Second, probe the CPU's in the system. */
1027	retval = best_enum->apic_probe_cpus();
1028	if (retval != 0)
1029		printf("%s: Failed to probe CPUs: returned %d\n",
1030		    best_enum->apic_name, retval);
1031
1032	/* Third, initialize the local APIC. */
1033	retval = best_enum->apic_setup_local();
1034	if (retval != 0)
1035		printf("%s: Failed to setup the local APIC: returned %d\n",
1036		    best_enum->apic_name, retval);
1037#ifdef SMP
1038	/* Last, setup the cpu topology now that we have probed CPUs */
1039	mp_topology();
1040#endif
1041}
1042SYSINIT(apic_init, SI_SUB_CPU, SI_ORDER_FIRST, apic_init, NULL)
1043
1044/*
1045 * Setup the I/O APICs.
1046 */
1047static void
1048apic_setup_io(void *dummy __unused)
1049{
1050	int retval;
1051
1052	if (best_enum == NULL)
1053		return;
1054	retval = best_enum->apic_setup_io();
1055	if (retval != 0)
1056		printf("%s: Failed to setup I/O APICs: returned %d\n",
1057		    best_enum->apic_name, retval);
1058
1059	/*
1060	 * Finish setting up the local APIC on the BSP once we know how to
1061	 * properly program the LINT pins.
1062	 */
1063	lapic_setup(1);
1064	intr_register_pic(&lapic_pic);
1065	if (bootverbose)
1066		lapic_dump("BSP");
1067
1068	/* Enable the MSI "pic". */
1069	msi_init();
1070}
1071SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL)
1072
1073#ifdef SMP
1074/*
1075 * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
1076 * private to the sys/i386 code.  The public interface for the rest of the
1077 * kernel is defined in mp_machdep.c.
1078 */
1079int
1080lapic_ipi_wait(int delay)
1081{
1082	int x, incr;
1083
1084	/*
1085	 * Wait delay loops for IPI to be sent.  This is highly bogus
1086	 * since this is sensitive to CPU clock speed.  If delay is
1087	 * -1, we wait forever.
1088	 */
1089	if (delay == -1) {
1090		incr = 0;
1091		delay = 1;
1092	} else
1093		incr = 1;
1094	for (x = 0; x < delay; x += incr) {
1095		if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
1096			return (1);
1097		ia32_pause();
1098	}
1099	return (0);
1100}
1101
1102void
1103lapic_ipi_raw(register_t icrlo, u_int dest)
1104{
1105	register_t value, eflags;
1106
1107	/* XXX: Need more sanity checking of icrlo? */
1108	KASSERT(lapic != NULL, ("%s called too early", __func__));
1109	KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
1110	    ("%s: invalid dest field", __func__));
1111	KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
1112	    ("%s: reserved bits set in ICR LO register", __func__));
1113
1114	/* Set destination in ICR HI register if it is being used. */
1115	eflags = intr_disable();
1116	if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
1117		value = lapic->icr_hi;
1118		value &= ~APIC_ID_MASK;
1119		value |= dest << APIC_ID_SHIFT;
1120		lapic->icr_hi = value;
1121	}
1122
1123	/* Program the contents of the IPI and dispatch it. */
1124	value = lapic->icr_lo;
1125	value &= APIC_ICRLO_RESV_MASK;
1126	value |= icrlo;
1127	lapic->icr_lo = value;
1128	intr_restore(eflags);
1129}
1130
1131#define	BEFORE_SPIN	1000000
1132#ifdef DETECT_DEADLOCK
1133#define	AFTER_SPIN	1000
1134#endif
1135
1136void
1137lapic_ipi_vectored(u_int vector, int dest)
1138{
1139	register_t icrlo, destfield;
1140
1141	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
1142	    ("%s: invalid vector %d", __func__, vector));
1143
1144	icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY |
1145	    APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE;
1146	destfield = 0;
1147	switch (dest) {
1148	case APIC_IPI_DEST_SELF:
1149		icrlo |= APIC_DEST_SELF;
1150		break;
1151	case APIC_IPI_DEST_ALL:
1152		icrlo |= APIC_DEST_ALLISELF;
1153		break;
1154	case APIC_IPI_DEST_OTHERS:
1155		icrlo |= APIC_DEST_ALLESELF;
1156		break;
1157	default:
1158		KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
1159		    ("%s: invalid destination 0x%x", __func__, dest));
1160		destfield = dest;
1161	}
1162
1163	/* Wait for an earlier IPI to finish. */
1164	if (!lapic_ipi_wait(BEFORE_SPIN)) {
1165		if (panicstr != NULL)
1166			return;
1167		else
1168			panic("APIC: Previous IPI is stuck");
1169	}
1170
1171	lapic_ipi_raw(icrlo, destfield);
1172
1173#ifdef DETECT_DEADLOCK
1174	/* Wait for IPI to be delivered. */
1175	if (!lapic_ipi_wait(AFTER_SPIN)) {
1176#ifdef needsattention
1177		/*
1178		 * XXX FIXME:
1179		 *
1180		 * The above function waits for the message to actually be
1181		 * delivered.  It breaks out after an arbitrary timeout
1182		 * since the message should eventually be delivered (at
1183		 * least in theory) and that if it wasn't we would catch
1184		 * the failure with the check above when the next IPI is
1185		 * sent.
1186		 *
1187		 * We could skip this wait entirely, EXCEPT it probably
1188		 * protects us from other routines that assume that the
1189		 * message was delivered and acted upon when this function
1190		 * returns.
1191		 */
1192		printf("APIC: IPI might be stuck\n");
1193#else /* !needsattention */
1194		/* Wait until mesage is sent without a timeout. */
1195		while (lapic->icr_lo & APIC_DELSTAT_PEND)
1196			ia32_pause();
1197#endif /* needsattention */
1198	}
1199#endif /* DETECT_DEADLOCK */
1200}
1201#endif /* SMP */
1202