local_apic.c revision 340016
1/*- 2 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org> 3 * Copyright (c) 1996, by Steve Passe 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. The name of the developer may NOT be used to endorse or promote products 12 * derived from this software without specific prior written permission. 13 * 3. Neither the name of the author nor the names of any co-contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30/* 31 * Local APIC support on Pentium and later processors. 32 */ 33 34#include <sys/cdefs.h> 35__FBSDID("$FreeBSD: stable/11/sys/x86/x86/local_apic.c 340016 2018-11-01 18:34:26Z jhb $"); 36 37#include "opt_atpic.h" 38#include "opt_hwpmc_hooks.h" 39 40#include "opt_ddb.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/bus.h> 45#include <sys/kernel.h> 46#include <sys/lock.h> 47#include <sys/mutex.h> 48#include <sys/pcpu.h> 49#include <sys/proc.h> 50#include <sys/sched.h> 51#include <sys/smp.h> 52#include <sys/sysctl.h> 53#include <sys/timeet.h> 54 55#include <vm/vm.h> 56#include <vm/pmap.h> 57 58#include <x86/apicreg.h> 59#include <machine/clock.h> 60#include <machine/cpufunc.h> 61#include <machine/cputypes.h> 62#include <machine/frame.h> 63#include <machine/intr_machdep.h> 64#include <x86/apicvar.h> 65#include <x86/mca.h> 66#include <machine/md_var.h> 67#include <machine/smp.h> 68#include <machine/specialreg.h> 69#include <x86/init.h> 70 71#ifdef DDB 72#include <sys/interrupt.h> 73#include <ddb/ddb.h> 74#endif 75 76#ifdef __amd64__ 77#define SDT_APIC SDT_SYSIGT 78#define SDT_APICT SDT_SYSIGT 79#define GSEL_APIC 0 80#else 81#define SDT_APIC SDT_SYS386IGT 82#define SDT_APICT SDT_SYS386TGT 83#define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) 84#endif 85 86/* Sanity checks on IDT vectors. */ 87CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); 88CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); 89CTASSERT(APIC_LOCAL_INTS == 240); 90CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); 91 92/* 93 * I/O interrupts use non-negative IRQ values. These values are used 94 * to mark unused IDT entries or IDT entries reserved for a non-I/O 95 * interrupt. 96 */ 97#define IRQ_FREE -1 98#define IRQ_TIMER -2 99#define IRQ_SYSCALL -3 100#define IRQ_DTRACE_RET -4 101#define IRQ_EVTCHN -5 102 103enum lat_timer_mode { 104 LAT_MODE_UNDEF = 0, 105 LAT_MODE_PERIODIC = 1, 106 LAT_MODE_ONESHOT = 2, 107 LAT_MODE_DEADLINE = 3, 108}; 109 110/* 111 * Support for local APICs. Local APICs manage interrupts on each 112 * individual processor as opposed to I/O APICs which receive interrupts 113 * from I/O devices and then forward them on to the local APICs. 114 * 115 * Local APICs can also send interrupts to each other thus providing the 116 * mechanism for IPIs. 117 */ 118 119struct lvt { 120 u_int lvt_edgetrigger:1; 121 u_int lvt_activehi:1; 122 u_int lvt_masked:1; 123 u_int lvt_active:1; 124 u_int lvt_mode:16; 125 u_int lvt_vector:8; 126}; 127 128struct lapic { 129 struct lvt la_lvts[APIC_LVT_MAX + 1]; 130 struct lvt la_elvts[APIC_ELVT_MAX + 1];; 131 u_int la_id:8; 132 u_int la_cluster:4; 133 u_int la_cluster_id:2; 134 u_int la_present:1; 135 u_long *la_timer_count; 136 uint64_t la_timer_period; 137 enum lat_timer_mode la_timer_mode; 138 uint32_t lvt_timer_base; 139 uint32_t lvt_timer_last; 140 /* Include IDT_SYSCALL to make indexing easier. */ 141 int la_ioint_irqs[APIC_NUM_IOINTS + 1]; 142} static lapics[MAX_APIC_ID + 1]; 143 144/* Global defaults for local APIC LVT entries. */ 145static struct lvt lvts[APIC_LVT_MAX + 1] = { 146 { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ 147 { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ 148 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ 149 { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ 150 { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ 151 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ 152 { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ 153}; 154 155/* Global defaults for AMD local APIC ELVT entries. */ 156static struct lvt elvts[APIC_ELVT_MAX + 1] = { 157 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 158 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT }, 159 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 160 { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 }, 161}; 162 163static inthand_t *ioint_handlers[] = { 164 NULL, /* 0 - 31 */ 165 IDTVEC(apic_isr1), /* 32 - 63 */ 166 IDTVEC(apic_isr2), /* 64 - 95 */ 167 IDTVEC(apic_isr3), /* 96 - 127 */ 168 IDTVEC(apic_isr4), /* 128 - 159 */ 169 IDTVEC(apic_isr5), /* 160 - 191 */ 170 IDTVEC(apic_isr6), /* 192 - 223 */ 171 IDTVEC(apic_isr7), /* 224 - 255 */ 172}; 173 174static inthand_t *ioint_pti_handlers[] = { 175 NULL, /* 0 - 31 */ 176 IDTVEC(apic_isr1_pti), /* 32 - 63 */ 177 IDTVEC(apic_isr2_pti), /* 64 - 95 */ 178 IDTVEC(apic_isr3_pti), /* 96 - 127 */ 179 IDTVEC(apic_isr4_pti), /* 128 - 159 */ 180 IDTVEC(apic_isr5_pti), /* 160 - 191 */ 181 IDTVEC(apic_isr6_pti), /* 192 - 223 */ 182 IDTVEC(apic_isr7_pti), /* 224 - 255 */ 183}; 184 185static u_int32_t lapic_timer_divisors[] = { 186 APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, 187 APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 188}; 189 190extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd); 191 192volatile char *lapic_map; 193vm_paddr_t lapic_paddr; 194int x2apic_mode; 195int lapic_eoi_suppression; 196static int lapic_timer_tsc_deadline; 197static u_long lapic_timer_divisor, count_freq; 198static struct eventtimer lapic_et; 199#ifdef SMP 200static uint64_t lapic_ipi_wait_mult; 201#endif 202 203SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options"); 204SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, ""); 205SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD, 206 &lapic_eoi_suppression, 0, ""); 207SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD, 208 &lapic_timer_tsc_deadline, 0, ""); 209 210static void lapic_calibrate_initcount(struct lapic *la); 211static void lapic_calibrate_deadline(struct lapic *la); 212 213static uint32_t 214lapic_read32(enum LAPIC_REGISTERS reg) 215{ 216 uint32_t res; 217 218 if (x2apic_mode) { 219 res = rdmsr32(MSR_APIC_000 + reg); 220 } else { 221 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL); 222 } 223 return (res); 224} 225 226static void 227lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val) 228{ 229 230 if (x2apic_mode) { 231 mfence(); 232 lfence(); 233 wrmsr(MSR_APIC_000 + reg, val); 234 } else { 235 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 236 } 237} 238 239static void 240lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val) 241{ 242 243 if (x2apic_mode) { 244 wrmsr(MSR_APIC_000 + reg, val); 245 } else { 246 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val; 247 } 248} 249 250#ifdef SMP 251static uint64_t 252lapic_read_icr(void) 253{ 254 uint64_t v; 255 uint32_t vhi, vlo; 256 257 if (x2apic_mode) { 258 v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO); 259 } else { 260 vhi = lapic_read32(LAPIC_ICR_HI); 261 vlo = lapic_read32(LAPIC_ICR_LO); 262 v = ((uint64_t)vhi << 32) | vlo; 263 } 264 return (v); 265} 266 267static uint64_t 268lapic_read_icr_lo(void) 269{ 270 271 return (lapic_read32(LAPIC_ICR_LO)); 272} 273 274static void 275lapic_write_icr(uint32_t vhi, uint32_t vlo) 276{ 277 uint64_t v; 278 279 if (x2apic_mode) { 280 v = ((uint64_t)vhi << 32) | vlo; 281 mfence(); 282 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v); 283 } else { 284 lapic_write32(LAPIC_ICR_HI, vhi); 285 lapic_write32(LAPIC_ICR_LO, vlo); 286 } 287} 288#endif /* SMP */ 289 290static void 291native_lapic_enable_x2apic(void) 292{ 293 uint64_t apic_base; 294 295 apic_base = rdmsr(MSR_APICBASE); 296 apic_base |= APICBASE_X2APIC | APICBASE_ENABLED; 297 wrmsr(MSR_APICBASE, apic_base); 298} 299 300static bool 301native_lapic_is_x2apic(void) 302{ 303 uint64_t apic_base; 304 305 apic_base = rdmsr(MSR_APICBASE); 306 return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) == 307 (APICBASE_X2APIC | APICBASE_ENABLED)); 308} 309 310static void lapic_enable(void); 311static void lapic_resume(struct pic *pic, bool suspend_cancelled); 312static void lapic_timer_oneshot(struct lapic *); 313static void lapic_timer_oneshot_nointr(struct lapic *, uint32_t); 314static void lapic_timer_periodic(struct lapic *); 315static void lapic_timer_deadline(struct lapic *); 316static void lapic_timer_stop(struct lapic *); 317static void lapic_timer_set_divisor(u_int divisor); 318static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); 319static int lapic_et_start(struct eventtimer *et, 320 sbintime_t first, sbintime_t period); 321static int lapic_et_stop(struct eventtimer *et); 322static u_int apic_idt_to_irq(u_int apic_id, u_int vector); 323static void lapic_set_tpr(u_int vector); 324 325struct pic lapic_pic = { .pic_resume = lapic_resume }; 326 327/* Forward declarations for apic_ops */ 328static void native_lapic_create(u_int apic_id, int boot_cpu); 329static void native_lapic_init(vm_paddr_t addr); 330static void native_lapic_xapic_mode(void); 331static void native_lapic_setup(int boot); 332static void native_lapic_dump(const char *str); 333static void native_lapic_disable(void); 334static void native_lapic_eoi(void); 335static int native_lapic_id(void); 336static int native_lapic_intr_pending(u_int vector); 337static u_int native_apic_cpuid(u_int apic_id); 338static u_int native_apic_alloc_vector(u_int apic_id, u_int irq); 339static u_int native_apic_alloc_vectors(u_int apic_id, u_int *irqs, 340 u_int count, u_int align); 341static void native_apic_disable_vector(u_int apic_id, u_int vector); 342static void native_apic_enable_vector(u_int apic_id, u_int vector); 343static void native_apic_free_vector(u_int apic_id, u_int vector, u_int irq); 344static void native_lapic_set_logical_id(u_int apic_id, u_int cluster, 345 u_int cluster_id); 346static int native_lapic_enable_pmc(void); 347static void native_lapic_disable_pmc(void); 348static void native_lapic_reenable_pmc(void); 349static void native_lapic_enable_cmc(void); 350static int native_lapic_enable_mca_elvt(void); 351static int native_lapic_set_lvt_mask(u_int apic_id, u_int lvt, 352 u_char masked); 353static int native_lapic_set_lvt_mode(u_int apic_id, u_int lvt, 354 uint32_t mode); 355static int native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, 356 enum intr_polarity pol); 357static int native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, 358 enum intr_trigger trigger); 359#ifdef SMP 360static void native_lapic_ipi_raw(register_t icrlo, u_int dest); 361static void native_lapic_ipi_vectored(u_int vector, int dest); 362static int native_lapic_ipi_wait(int delay); 363#endif /* SMP */ 364static int native_lapic_ipi_alloc(inthand_t *ipifunc); 365static void native_lapic_ipi_free(int vector); 366 367struct apic_ops apic_ops = { 368 .create = native_lapic_create, 369 .init = native_lapic_init, 370 .xapic_mode = native_lapic_xapic_mode, 371 .is_x2apic = native_lapic_is_x2apic, 372 .setup = native_lapic_setup, 373 .dump = native_lapic_dump, 374 .disable = native_lapic_disable, 375 .eoi = native_lapic_eoi, 376 .id = native_lapic_id, 377 .intr_pending = native_lapic_intr_pending, 378 .set_logical_id = native_lapic_set_logical_id, 379 .cpuid = native_apic_cpuid, 380 .alloc_vector = native_apic_alloc_vector, 381 .alloc_vectors = native_apic_alloc_vectors, 382 .enable_vector = native_apic_enable_vector, 383 .disable_vector = native_apic_disable_vector, 384 .free_vector = native_apic_free_vector, 385 .enable_pmc = native_lapic_enable_pmc, 386 .disable_pmc = native_lapic_disable_pmc, 387 .reenable_pmc = native_lapic_reenable_pmc, 388 .enable_cmc = native_lapic_enable_cmc, 389 .enable_mca_elvt = native_lapic_enable_mca_elvt, 390#ifdef SMP 391 .ipi_raw = native_lapic_ipi_raw, 392 .ipi_vectored = native_lapic_ipi_vectored, 393 .ipi_wait = native_lapic_ipi_wait, 394#endif 395 .ipi_alloc = native_lapic_ipi_alloc, 396 .ipi_free = native_lapic_ipi_free, 397 .set_lvt_mask = native_lapic_set_lvt_mask, 398 .set_lvt_mode = native_lapic_set_lvt_mode, 399 .set_lvt_polarity = native_lapic_set_lvt_polarity, 400 .set_lvt_triggermode = native_lapic_set_lvt_triggermode, 401}; 402 403static uint32_t 404lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) 405{ 406 407 value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | 408 APIC_LVT_VECTOR); 409 if (lvt->lvt_edgetrigger == 0) 410 value |= APIC_LVT_TM; 411 if (lvt->lvt_activehi == 0) 412 value |= APIC_LVT_IIPP_INTALO; 413 if (lvt->lvt_masked) 414 value |= APIC_LVT_M; 415 value |= lvt->lvt_mode; 416 switch (lvt->lvt_mode) { 417 case APIC_LVT_DM_NMI: 418 case APIC_LVT_DM_SMI: 419 case APIC_LVT_DM_INIT: 420 case APIC_LVT_DM_EXTINT: 421 if (!lvt->lvt_edgetrigger && bootverbose) { 422 printf("lapic%u: Forcing LINT%u to edge trigger\n", 423 la->la_id, pin); 424 value &= ~APIC_LVT_TM; 425 } 426 /* Use a vector of 0. */ 427 break; 428 case APIC_LVT_DM_FIXED: 429 value |= lvt->lvt_vector; 430 break; 431 default: 432 panic("bad APIC LVT delivery mode: %#x\n", value); 433 } 434 return (value); 435} 436 437static uint32_t 438lvt_mode(struct lapic *la, u_int pin, uint32_t value) 439{ 440 struct lvt *lvt; 441 442 KASSERT(pin <= APIC_LVT_MAX, 443 ("%s: pin %u out of range", __func__, pin)); 444 if (la->la_lvts[pin].lvt_active) 445 lvt = &la->la_lvts[pin]; 446 else 447 lvt = &lvts[pin]; 448 449 return (lvt_mode_impl(la, lvt, pin, value)); 450} 451 452static uint32_t 453elvt_mode(struct lapic *la, u_int idx, uint32_t value) 454{ 455 struct lvt *elvt; 456 457 KASSERT(idx <= APIC_ELVT_MAX, 458 ("%s: idx %u out of range", __func__, idx)); 459 460 elvt = &la->la_elvts[idx]; 461 KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx)); 462 KASSERT(elvt->lvt_edgetrigger, 463 ("%s: ELVT%u is not edge triggered", __func__, idx)); 464 KASSERT(elvt->lvt_activehi, 465 ("%s: ELVT%u is not active high", __func__, idx)); 466 return (lvt_mode_impl(la, elvt, idx, value)); 467} 468 469/* 470 * Map the local APIC and setup necessary interrupt vectors. 471 */ 472static void 473native_lapic_init(vm_paddr_t addr) 474{ 475#ifdef SMP 476 uint64_t r, r1, r2, rx; 477#endif 478 uint32_t ver; 479 u_int regs[4]; 480 int i, arat; 481 482 /* 483 * Enable x2APIC mode if possible. Map the local APIC 484 * registers page. 485 * 486 * Keep the LAPIC registers page mapped uncached for x2APIC 487 * mode too, to have direct map page attribute set to 488 * uncached. This is needed to work around CPU errata present 489 * on all Intel processors. 490 */ 491 KASSERT(trunc_page(addr) == addr, 492 ("local APIC not aligned on a page boundary")); 493 lapic_paddr = addr; 494 lapic_map = pmap_mapdev(addr, PAGE_SIZE); 495 if (x2apic_mode) { 496 native_lapic_enable_x2apic(); 497 lapic_map = NULL; 498 } 499 500 /* Setup the spurious interrupt handler. */ 501 setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, 502 GSEL_APIC); 503 504 /* Perform basic initialization of the BSP's local APIC. */ 505 lapic_enable(); 506 507 /* Set BSP's per-CPU local APIC ID. */ 508 PCPU_SET(apic_id, lapic_id()); 509 510 /* Local APIC timer interrupt. */ 511 setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint), 512 SDT_APIC, SEL_KPL, GSEL_APIC); 513 514 /* Local APIC error interrupt. */ 515 setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint), 516 SDT_APIC, SEL_KPL, GSEL_APIC); 517 518 /* XXX: Thermal interrupt */ 519 520 /* Local APIC CMCI. */ 521 setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint), 522 SDT_APICT, SEL_KPL, GSEL_APIC); 523 524 if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { 525 arat = 0; 526 /* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */ 527 if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) { 528 do_cpuid(0x06, regs); 529 if ((regs[0] & CPUTPM1_ARAT) != 0) 530 arat = 1; 531 } else if (cpu_vendor_id == CPU_VENDOR_AMD && 532 CPUID_TO_FAMILY(cpu_id) >= 0x12) { 533 arat = 1; 534 } 535 bzero(&lapic_et, sizeof(lapic_et)); 536 lapic_et.et_name = "LAPIC"; 537 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | 538 ET_FLAGS_PERCPU; 539 lapic_et.et_quality = 600; 540 if (!arat) { 541 lapic_et.et_flags |= ET_FLAGS_C3STOP; 542 lapic_et.et_quality = 100; 543 } 544 if ((cpu_feature & CPUID_TSC) != 0 && 545 (cpu_feature2 & CPUID2_TSCDLT) != 0 && 546 tsc_is_invariant && tsc_freq != 0) { 547 lapic_timer_tsc_deadline = 1; 548 TUNABLE_INT_FETCH("hw.lapic_tsc_deadline", 549 &lapic_timer_tsc_deadline); 550 } 551 552 lapic_et.et_frequency = 0; 553 /* We don't know frequency yet, so trying to guess. */ 554 lapic_et.et_min_period = 0x00001000LL; 555 lapic_et.et_max_period = SBT_1S; 556 lapic_et.et_start = lapic_et_start; 557 lapic_et.et_stop = lapic_et_stop; 558 lapic_et.et_priv = NULL; 559 et_register(&lapic_et); 560 } 561 562 /* 563 * Set lapic_eoi_suppression after lapic_enable(), to not 564 * enable suppression in the hardware prematurely. Note that 565 * we by default enable suppression even when system only has 566 * one IO-APIC, since EOI is broadcasted to all APIC agents, 567 * including CPUs, otherwise. 568 * 569 * It seems that at least some KVM versions report 570 * EOI_SUPPRESSION bit, but auto-EOI does not work. 571 */ 572 ver = lapic_read32(LAPIC_VERSION); 573 if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) { 574 lapic_eoi_suppression = 1; 575 if (vm_guest == VM_GUEST_KVM) { 576 if (bootverbose) 577 printf( 578 "KVM -- disabling lapic eoi suppression\n"); 579 lapic_eoi_suppression = 0; 580 } 581 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression", 582 &lapic_eoi_suppression); 583 } 584 585#ifdef SMP 586#define LOOPS 100000 587 /* 588 * Calibrate the busy loop waiting for IPI ack in xAPIC mode. 589 * lapic_ipi_wait_mult contains the number of iterations which 590 * approximately delay execution for 1 microsecond (the 591 * argument to native_lapic_ipi_wait() is in microseconds). 592 * 593 * We assume that TSC is present and already measured. 594 * Possible TSC frequency jumps are irrelevant to the 595 * calibration loop below, the CPU clock management code is 596 * not yet started, and we do not enter sleep states. 597 */ 598 KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0, 599 ("TSC not initialized")); 600 if (!x2apic_mode) { 601 r = rdtsc(); 602 for (rx = 0; rx < LOOPS; rx++) { 603 (void)lapic_read_icr_lo(); 604 ia32_pause(); 605 } 606 r = rdtsc() - r; 607 r1 = tsc_freq * LOOPS; 608 r2 = r * 1000000; 609 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1; 610 if (bootverbose) { 611 printf("LAPIC: ipi_wait() us multiplier %ju (r %ju " 612 "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult, 613 (uintmax_t)r, (uintmax_t)tsc_freq); 614 } 615 } 616#undef LOOPS 617#endif /* SMP */ 618} 619 620/* 621 * Create a local APIC instance. 622 */ 623static void 624native_lapic_create(u_int apic_id, int boot_cpu) 625{ 626 int i; 627 628 if (apic_id > MAX_APIC_ID) { 629 printf("APIC: Ignoring local APIC with ID %d\n", apic_id); 630 if (boot_cpu) 631 panic("Can't ignore BSP"); 632 return; 633 } 634 KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", 635 apic_id)); 636 637 /* 638 * Assume no local LVT overrides and a cluster of 0 and 639 * intra-cluster ID of 0. 640 */ 641 lapics[apic_id].la_present = 1; 642 lapics[apic_id].la_id = apic_id; 643 for (i = 0; i <= APIC_LVT_MAX; i++) { 644 lapics[apic_id].la_lvts[i] = lvts[i]; 645 lapics[apic_id].la_lvts[i].lvt_active = 0; 646 } 647 for (i = 0; i <= APIC_ELVT_MAX; i++) { 648 lapics[apic_id].la_elvts[i] = elvts[i]; 649 lapics[apic_id].la_elvts[i].lvt_active = 0; 650 } 651 for (i = 0; i <= APIC_NUM_IOINTS; i++) 652 lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; 653 lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; 654 lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = 655 IRQ_TIMER; 656#ifdef KDTRACE_HOOKS 657 lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = 658 IRQ_DTRACE_RET; 659#endif 660#ifdef XENHVM 661 lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; 662#endif 663 664 665#ifdef SMP 666 cpu_add(apic_id, boot_cpu); 667#endif 668} 669 670static inline uint32_t 671amd_read_ext_features(void) 672{ 673 uint32_t version; 674 675 if (cpu_vendor_id != CPU_VENDOR_AMD) 676 return (0); 677 version = lapic_read32(LAPIC_VERSION); 678 if ((version & APIC_VER_AMD_EXT_SPACE) != 0) 679 return (lapic_read32(LAPIC_EXT_FEATURES)); 680 else 681 return (0); 682} 683 684static inline uint32_t 685amd_read_elvt_count(void) 686{ 687 uint32_t extf; 688 uint32_t count; 689 690 extf = amd_read_ext_features(); 691 count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT; 692 count = min(count, APIC_ELVT_MAX + 1); 693 return (count); 694} 695 696/* 697 * Dump contents of local APIC registers 698 */ 699static void 700native_lapic_dump(const char* str) 701{ 702 uint32_t version; 703 uint32_t maxlvt; 704 uint32_t extf; 705 int elvt_count; 706 int i; 707 708 version = lapic_read32(LAPIC_VERSION); 709 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 710 printf("cpu%d %s:\n", PCPU_GET(cpuid), str); 711 printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x", 712 lapic_read32(LAPIC_ID), version, 713 lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR)); 714 if ((cpu_feature2 & CPUID2_X2APIC) != 0) 715 printf(" x2APIC: %d", x2apic_mode); 716 printf("\n lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", 717 lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1), 718 lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR)); 719 printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", 720 lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL), 721 lapic_read32(LAPIC_LVT_ERROR)); 722 if (maxlvt >= APIC_LVT_PMC) 723 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT)); 724 printf("\n"); 725 if (maxlvt >= APIC_LVT_CMCI) 726 printf(" cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI)); 727 extf = amd_read_ext_features(); 728 if (extf != 0) { 729 printf(" AMD ext features: 0x%08x\n", extf); 730 elvt_count = amd_read_elvt_count(); 731 for (i = 0; i < elvt_count; i++) 732 printf(" AMD elvt%d: 0x%08x\n", i, 733 lapic_read32(LAPIC_EXT_LVT0 + i)); 734 } 735} 736 737static void 738native_lapic_xapic_mode(void) 739{ 740 register_t saveintr; 741 742 saveintr = intr_disable(); 743 if (x2apic_mode) 744 native_lapic_enable_x2apic(); 745 intr_restore(saveintr); 746} 747 748static void 749native_lapic_setup(int boot) 750{ 751 struct lapic *la; 752 uint32_t version; 753 uint32_t maxlvt; 754 register_t saveintr; 755 int elvt_count; 756 int i; 757 758 saveintr = intr_disable(); 759 760 la = &lapics[lapic_id()]; 761 KASSERT(la->la_present, ("missing APIC structure")); 762 version = lapic_read32(LAPIC_VERSION); 763 maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 764 765 /* Initialize the TPR to allow all interrupts. */ 766 lapic_set_tpr(0); 767 768 /* Setup spurious vector and enable the local APIC. */ 769 lapic_enable(); 770 771 /* Program LINT[01] LVT entries. */ 772 lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0, 773 lapic_read32(LAPIC_LVT_LINT0))); 774 lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1, 775 lapic_read32(LAPIC_LVT_LINT1))); 776 777 /* Program the PMC LVT entry if present. */ 778 if (maxlvt >= APIC_LVT_PMC) { 779 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 780 LAPIC_LVT_PCINT)); 781 } 782 783 /* Program timer LVT. */ 784 la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, 785 lapic_read32(LAPIC_LVT_TIMER)); 786 la->lvt_timer_last = la->lvt_timer_base; 787 lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); 788 789 /* Calibrate the timer parameters using BSP. */ 790 if (boot && IS_BSP()) { 791 lapic_calibrate_initcount(la); 792 if (lapic_timer_tsc_deadline) 793 lapic_calibrate_deadline(la); 794 } 795 796 /* Setup the timer if configured. */ 797 if (la->la_timer_mode != LAT_MODE_UNDEF) { 798 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", 799 lapic_id())); 800 switch (la->la_timer_mode) { 801 case LAT_MODE_PERIODIC: 802 lapic_timer_set_divisor(lapic_timer_divisor); 803 lapic_timer_periodic(la); 804 break; 805 case LAT_MODE_ONESHOT: 806 lapic_timer_set_divisor(lapic_timer_divisor); 807 lapic_timer_oneshot(la); 808 break; 809 case LAT_MODE_DEADLINE: 810 lapic_timer_deadline(la); 811 break; 812 default: 813 panic("corrupted la_timer_mode %p %d", la, 814 la->la_timer_mode); 815 } 816 } 817 818 /* Program error LVT and clear any existing errors. */ 819 lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR, 820 lapic_read32(LAPIC_LVT_ERROR))); 821 lapic_write32(LAPIC_ESR, 0); 822 823 /* XXX: Thermal LVT */ 824 825 /* Program the CMCI LVT entry if present. */ 826 if (maxlvt >= APIC_LVT_CMCI) { 827 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI, 828 lapic_read32(LAPIC_LVT_CMCI))); 829 } 830 831 elvt_count = amd_read_elvt_count(); 832 for (i = 0; i < elvt_count; i++) { 833 if (la->la_elvts[i].lvt_active) 834 lapic_write32(LAPIC_EXT_LVT0 + i, 835 elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i))); 836 } 837 838 intr_restore(saveintr); 839} 840 841static void 842native_lapic_intrcnt(void *dummy __unused) 843{ 844 struct pcpu *pc; 845 struct lapic *la; 846 char buf[MAXCOMLEN + 1]; 847 848 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 849 la = &lapics[pc->pc_apic_id]; 850 if (!la->la_present) 851 continue; 852 853 snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); 854 intrcnt_add(buf, &la->la_timer_count); 855 } 856} 857SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt, 858 NULL); 859 860static void 861native_lapic_reenable_pmc(void) 862{ 863#ifdef HWPMC_HOOKS 864 uint32_t value; 865 866 value = lapic_read32(LAPIC_LVT_PCINT); 867 value &= ~APIC_LVT_M; 868 lapic_write32(LAPIC_LVT_PCINT, value); 869#endif 870} 871 872#ifdef HWPMC_HOOKS 873static void 874lapic_update_pmc(void *dummy) 875{ 876 struct lapic *la; 877 878 la = &lapics[lapic_id()]; 879 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, 880 lapic_read32(LAPIC_LVT_PCINT))); 881} 882#endif 883 884static int 885native_lapic_enable_pmc(void) 886{ 887#ifdef HWPMC_HOOKS 888 u_int32_t maxlvt; 889 890 /* Fail if the local APIC is not present. */ 891 if (!x2apic_mode && lapic_map == NULL) 892 return (0); 893 894 /* Fail if the PMC LVT is not present. */ 895 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 896 if (maxlvt < APIC_LVT_PMC) 897 return (0); 898 899 lvts[APIC_LVT_PMC].lvt_masked = 0; 900 901#ifdef EARLY_AP_STARTUP 902 MPASS(mp_ncpus == 1 || smp_started); 903 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 904#else 905#ifdef SMP 906 /* 907 * If hwpmc was loaded at boot time then the APs may not be 908 * started yet. In that case, don't forward the request to 909 * them as they will program the lvt when they start. 910 */ 911 if (smp_started) 912 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 913 else 914#endif 915 lapic_update_pmc(NULL); 916#endif 917 return (1); 918#else 919 return (0); 920#endif 921} 922 923static void 924native_lapic_disable_pmc(void) 925{ 926#ifdef HWPMC_HOOKS 927 u_int32_t maxlvt; 928 929 /* Fail if the local APIC is not present. */ 930 if (!x2apic_mode && lapic_map == NULL) 931 return; 932 933 /* Fail if the PMC LVT is not present. */ 934 maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT; 935 if (maxlvt < APIC_LVT_PMC) 936 return; 937 938 lvts[APIC_LVT_PMC].lvt_masked = 1; 939 940#ifdef SMP 941 /* The APs should always be started when hwpmc is unloaded. */ 942 KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); 943#endif 944 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); 945#endif 946} 947 948static void 949lapic_calibrate_initcount(struct lapic *la) 950{ 951 u_long value; 952 953 /* Start off with a divisor of 2 (power on reset default). */ 954 lapic_timer_divisor = 2; 955 /* Try to calibrate the local APIC timer. */ 956 do { 957 lapic_timer_set_divisor(lapic_timer_divisor); 958 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT); 959 DELAY(1000000); 960 value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER); 961 if (value != APIC_TIMER_MAX_COUNT) 962 break; 963 lapic_timer_divisor <<= 1; 964 } while (lapic_timer_divisor <= 128); 965 if (lapic_timer_divisor > 128) 966 panic("lapic: Divisor too big"); 967 if (bootverbose) { 968 printf("lapic: Divisor %lu, Frequency %lu Hz\n", 969 lapic_timer_divisor, value); 970 } 971 count_freq = value; 972} 973 974static void 975lapic_calibrate_deadline(struct lapic *la __unused) 976{ 977 978 if (bootverbose) { 979 printf("lapic: deadline tsc mode, Frequency %ju Hz\n", 980 (uintmax_t)tsc_freq); 981 } 982} 983 984static void 985lapic_change_mode(struct eventtimer *et, struct lapic *la, 986 enum lat_timer_mode newmode) 987{ 988 989 if (la->la_timer_mode == newmode) 990 return; 991 switch (newmode) { 992 case LAT_MODE_PERIODIC: 993 lapic_timer_set_divisor(lapic_timer_divisor); 994 et->et_frequency = count_freq; 995 break; 996 case LAT_MODE_DEADLINE: 997 et->et_frequency = tsc_freq; 998 break; 999 case LAT_MODE_ONESHOT: 1000 lapic_timer_set_divisor(lapic_timer_divisor); 1001 et->et_frequency = count_freq; 1002 break; 1003 default: 1004 panic("lapic_change_mode %d", newmode); 1005 } 1006 la->la_timer_mode = newmode; 1007 et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; 1008 et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; 1009} 1010 1011static int 1012lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) 1013{ 1014 struct lapic *la; 1015 1016 la = &lapics[PCPU_GET(apic_id)]; 1017 if (period != 0) { 1018 lapic_change_mode(et, la, LAT_MODE_PERIODIC); 1019 la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 1020 32; 1021 lapic_timer_periodic(la); 1022 } else if (lapic_timer_tsc_deadline) { 1023 lapic_change_mode(et, la, LAT_MODE_DEADLINE); 1024 la->la_timer_period = (et->et_frequency * first) >> 32; 1025 lapic_timer_deadline(la); 1026 } else { 1027 lapic_change_mode(et, la, LAT_MODE_ONESHOT); 1028 la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 1029 32; 1030 lapic_timer_oneshot(la); 1031 } 1032 return (0); 1033} 1034 1035static int 1036lapic_et_stop(struct eventtimer *et) 1037{ 1038 struct lapic *la; 1039 1040 la = &lapics[PCPU_GET(apic_id)]; 1041 lapic_timer_stop(la); 1042 la->la_timer_mode = LAT_MODE_UNDEF; 1043 return (0); 1044} 1045 1046static void 1047native_lapic_disable(void) 1048{ 1049 uint32_t value; 1050 1051 /* Software disable the local APIC. */ 1052 value = lapic_read32(LAPIC_SVR); 1053 value &= ~APIC_SVR_SWEN; 1054 lapic_write32(LAPIC_SVR, value); 1055} 1056 1057static void 1058lapic_enable(void) 1059{ 1060 uint32_t value; 1061 1062 /* Program the spurious vector to enable the local APIC. */ 1063 value = lapic_read32(LAPIC_SVR); 1064 value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); 1065 value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT; 1066 if (lapic_eoi_suppression) 1067 value |= APIC_SVR_EOI_SUPPRESSION; 1068 lapic_write32(LAPIC_SVR, value); 1069} 1070 1071/* Reset the local APIC on the BSP during resume. */ 1072static void 1073lapic_resume(struct pic *pic, bool suspend_cancelled) 1074{ 1075 1076 lapic_setup(0); 1077} 1078 1079static int 1080native_lapic_id(void) 1081{ 1082 uint32_t v; 1083 1084 KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped")); 1085 v = lapic_read32(LAPIC_ID); 1086 if (!x2apic_mode) 1087 v >>= APIC_ID_SHIFT; 1088 return (v); 1089} 1090 1091static int 1092native_lapic_intr_pending(u_int vector) 1093{ 1094 uint32_t irr; 1095 1096 /* 1097 * The IRR registers are an array of registers each of which 1098 * only describes 32 interrupts in the low 32 bits. Thus, we 1099 * divide the vector by 32 to get the register index. 1100 * Finally, we modulus the vector by 32 to determine the 1101 * individual bit to test. 1102 */ 1103 irr = lapic_read32(LAPIC_IRR0 + vector / 32); 1104 return (irr & 1 << (vector % 32)); 1105} 1106 1107static void 1108native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) 1109{ 1110 struct lapic *la; 1111 1112 KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", 1113 __func__, apic_id)); 1114 KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", 1115 __func__, cluster)); 1116 KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, 1117 ("%s: intra cluster id %u too big", __func__, cluster_id)); 1118 la = &lapics[apic_id]; 1119 la->la_cluster = cluster; 1120 la->la_cluster_id = cluster_id; 1121} 1122 1123static int 1124native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) 1125{ 1126 1127 if (pin > APIC_LVT_MAX) 1128 return (EINVAL); 1129 if (apic_id == APIC_ID_ALL) { 1130 lvts[pin].lvt_masked = masked; 1131 if (bootverbose) 1132 printf("lapic:"); 1133 } else { 1134 KASSERT(lapics[apic_id].la_present, 1135 ("%s: missing APIC %u", __func__, apic_id)); 1136 lapics[apic_id].la_lvts[pin].lvt_masked = masked; 1137 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1138 if (bootverbose) 1139 printf("lapic%u:", apic_id); 1140 } 1141 if (bootverbose) 1142 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); 1143 return (0); 1144} 1145 1146static int 1147native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) 1148{ 1149 struct lvt *lvt; 1150 1151 if (pin > APIC_LVT_MAX) 1152 return (EINVAL); 1153 if (apic_id == APIC_ID_ALL) { 1154 lvt = &lvts[pin]; 1155 if (bootverbose) 1156 printf("lapic:"); 1157 } else { 1158 KASSERT(lapics[apic_id].la_present, 1159 ("%s: missing APIC %u", __func__, apic_id)); 1160 lvt = &lapics[apic_id].la_lvts[pin]; 1161 lvt->lvt_active = 1; 1162 if (bootverbose) 1163 printf("lapic%u:", apic_id); 1164 } 1165 lvt->lvt_mode = mode; 1166 switch (mode) { 1167 case APIC_LVT_DM_NMI: 1168 case APIC_LVT_DM_SMI: 1169 case APIC_LVT_DM_INIT: 1170 case APIC_LVT_DM_EXTINT: 1171 lvt->lvt_edgetrigger = 1; 1172 lvt->lvt_activehi = 1; 1173 if (mode == APIC_LVT_DM_EXTINT) 1174 lvt->lvt_masked = 1; 1175 else 1176 lvt->lvt_masked = 0; 1177 break; 1178 default: 1179 panic("Unsupported delivery mode: 0x%x\n", mode); 1180 } 1181 if (bootverbose) { 1182 printf(" Routing "); 1183 switch (mode) { 1184 case APIC_LVT_DM_NMI: 1185 printf("NMI"); 1186 break; 1187 case APIC_LVT_DM_SMI: 1188 printf("SMI"); 1189 break; 1190 case APIC_LVT_DM_INIT: 1191 printf("INIT"); 1192 break; 1193 case APIC_LVT_DM_EXTINT: 1194 printf("ExtINT"); 1195 break; 1196 } 1197 printf(" -> LINT%u\n", pin); 1198 } 1199 return (0); 1200} 1201 1202static int 1203native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) 1204{ 1205 1206 if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM) 1207 return (EINVAL); 1208 if (apic_id == APIC_ID_ALL) { 1209 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); 1210 if (bootverbose) 1211 printf("lapic:"); 1212 } else { 1213 KASSERT(lapics[apic_id].la_present, 1214 ("%s: missing APIC %u", __func__, apic_id)); 1215 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1216 lapics[apic_id].la_lvts[pin].lvt_activehi = 1217 (pol == INTR_POLARITY_HIGH); 1218 if (bootverbose) 1219 printf("lapic%u:", apic_id); 1220 } 1221 if (bootverbose) 1222 printf(" LINT%u polarity: %s\n", pin, 1223 pol == INTR_POLARITY_HIGH ? "high" : "low"); 1224 return (0); 1225} 1226 1227static int 1228native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin, 1229 enum intr_trigger trigger) 1230{ 1231 1232 if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM) 1233 return (EINVAL); 1234 if (apic_id == APIC_ID_ALL) { 1235 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); 1236 if (bootverbose) 1237 printf("lapic:"); 1238 } else { 1239 KASSERT(lapics[apic_id].la_present, 1240 ("%s: missing APIC %u", __func__, apic_id)); 1241 lapics[apic_id].la_lvts[pin].lvt_edgetrigger = 1242 (trigger == INTR_TRIGGER_EDGE); 1243 lapics[apic_id].la_lvts[pin].lvt_active = 1; 1244 if (bootverbose) 1245 printf("lapic%u:", apic_id); 1246 } 1247 if (bootverbose) 1248 printf(" LINT%u trigger: %s\n", pin, 1249 trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); 1250 return (0); 1251} 1252 1253/* 1254 * Adjust the TPR of the current CPU so that it blocks all interrupts below 1255 * the passed in vector. 1256 */ 1257static void 1258lapic_set_tpr(u_int vector) 1259{ 1260#ifdef CHEAP_TPR 1261 lapic_write32(LAPIC_TPR, vector); 1262#else 1263 uint32_t tpr; 1264 1265 tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO; 1266 tpr |= vector; 1267 lapic_write32(LAPIC_TPR, tpr); 1268#endif 1269} 1270 1271static void 1272native_lapic_eoi(void) 1273{ 1274 1275 lapic_write32_nofence(LAPIC_EOI, 0); 1276} 1277 1278void 1279lapic_handle_intr(int vector, struct trapframe *frame) 1280{ 1281 struct intsrc *isrc; 1282 1283 isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), 1284 vector)); 1285 intr_execute_handlers(isrc, frame); 1286} 1287 1288void 1289lapic_handle_timer(struct trapframe *frame) 1290{ 1291 struct lapic *la; 1292 struct trapframe *oldframe; 1293 struct thread *td; 1294 1295 /* Send EOI first thing. */ 1296 lapic_eoi(); 1297 1298#if defined(SMP) && !defined(SCHED_ULE) 1299 /* 1300 * Don't do any accounting for the disabled HTT cores, since it 1301 * will provide misleading numbers for the userland. 1302 * 1303 * No locking is necessary here, since even if we lose the race 1304 * when hlt_cpus_mask changes it is not a big deal, really. 1305 * 1306 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask 1307 * and unlike other schedulers it actually schedules threads to 1308 * those CPUs. 1309 */ 1310 if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) 1311 return; 1312#endif 1313 1314 /* Look up our local APIC structure for the tick counters. */ 1315 la = &lapics[PCPU_GET(apic_id)]; 1316 (*la->la_timer_count)++; 1317 critical_enter(); 1318 if (lapic_et.et_active) { 1319 td = curthread; 1320 td->td_intr_nesting_level++; 1321 oldframe = td->td_intr_frame; 1322 td->td_intr_frame = frame; 1323 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); 1324 td->td_intr_frame = oldframe; 1325 td->td_intr_nesting_level--; 1326 } 1327 critical_exit(); 1328} 1329 1330static void 1331lapic_timer_set_divisor(u_int divisor) 1332{ 1333 1334 KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); 1335 KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors), 1336 ("lapic: invalid divisor %u", divisor)); 1337 lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]); 1338} 1339 1340static void 1341lapic_timer_oneshot(struct lapic *la) 1342{ 1343 uint32_t value; 1344 1345 value = la->lvt_timer_base; 1346 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1347 value |= APIC_LVTT_TM_ONE_SHOT; 1348 la->lvt_timer_last = value; 1349 lapic_write32(LAPIC_LVT_TIMER, value); 1350 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1351} 1352 1353static void 1354lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count) 1355{ 1356 uint32_t value; 1357 1358 value = la->lvt_timer_base; 1359 value &= ~APIC_LVTT_TM; 1360 value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M; 1361 la->lvt_timer_last = value; 1362 lapic_write32(LAPIC_LVT_TIMER, value); 1363 lapic_write32(LAPIC_ICR_TIMER, count); 1364} 1365 1366static void 1367lapic_timer_periodic(struct lapic *la) 1368{ 1369 uint32_t value; 1370 1371 value = la->lvt_timer_base; 1372 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1373 value |= APIC_LVTT_TM_PERIODIC; 1374 la->lvt_timer_last = value; 1375 lapic_write32(LAPIC_LVT_TIMER, value); 1376 lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period); 1377} 1378 1379static void 1380lapic_timer_deadline(struct lapic *la) 1381{ 1382 uint32_t value; 1383 1384 value = la->lvt_timer_base; 1385 value &= ~(APIC_LVTT_TM | APIC_LVT_M); 1386 value |= APIC_LVTT_TM_TSCDLT; 1387 if (value != la->lvt_timer_last) { 1388 la->lvt_timer_last = value; 1389 lapic_write32_nofence(LAPIC_LVT_TIMER, value); 1390 if (!x2apic_mode) 1391 mfence(); 1392 } 1393 wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc()); 1394} 1395 1396static void 1397lapic_timer_stop(struct lapic *la) 1398{ 1399 uint32_t value; 1400 1401 if (la->la_timer_mode == LAT_MODE_DEADLINE) { 1402 wrmsr(MSR_TSC_DEADLINE, 0); 1403 mfence(); 1404 } else { 1405 value = la->lvt_timer_base; 1406 value &= ~APIC_LVTT_TM; 1407 value |= APIC_LVT_M; 1408 la->lvt_timer_last = value; 1409 lapic_write32(LAPIC_LVT_TIMER, value); 1410 } 1411} 1412 1413void 1414lapic_handle_cmc(void) 1415{ 1416 1417 lapic_eoi(); 1418 cmc_intr(); 1419} 1420 1421/* 1422 * Called from the mca_init() to activate the CMC interrupt if this CPU is 1423 * responsible for monitoring any MC banks for CMC events. Since mca_init() 1424 * is called prior to lapic_setup() during boot, this just needs to unmask 1425 * this CPU's LVT_CMCI entry. 1426 */ 1427static void 1428native_lapic_enable_cmc(void) 1429{ 1430 u_int apic_id; 1431 1432#ifdef DEV_ATPIC 1433 if (!x2apic_mode && lapic_map == NULL) 1434 return; 1435#endif 1436 apic_id = PCPU_GET(apic_id); 1437 KASSERT(lapics[apic_id].la_present, 1438 ("%s: missing APIC %u", __func__, apic_id)); 1439 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0; 1440 lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1; 1441 if (bootverbose) 1442 printf("lapic%u: CMCI unmasked\n", apic_id); 1443} 1444 1445static int 1446native_lapic_enable_mca_elvt(void) 1447{ 1448 u_int apic_id; 1449 uint32_t value; 1450 int elvt_count; 1451 1452#ifdef DEV_ATPIC 1453 if (lapic_map == NULL) 1454 return (-1); 1455#endif 1456 1457 apic_id = PCPU_GET(apic_id); 1458 KASSERT(lapics[apic_id].la_present, 1459 ("%s: missing APIC %u", __func__, apic_id)); 1460 elvt_count = amd_read_elvt_count(); 1461 if (elvt_count <= APIC_ELVT_MCA) 1462 return (-1); 1463 1464 value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA); 1465 if ((value & APIC_LVT_M) == 0) { 1466 printf("AMD MCE Thresholding Extended LVT is already active\n"); 1467 return (-1); 1468 } 1469 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0; 1470 lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1; 1471 if (bootverbose) 1472 printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id); 1473 return (APIC_ELVT_MCA); 1474} 1475 1476void 1477lapic_handle_error(void) 1478{ 1479 uint32_t esr; 1480 1481 /* 1482 * Read the contents of the error status register. Write to 1483 * the register first before reading from it to force the APIC 1484 * to update its value to indicate any errors that have 1485 * occurred since the previous write to the register. 1486 */ 1487 lapic_write32(LAPIC_ESR, 0); 1488 esr = lapic_read32(LAPIC_ESR); 1489 1490 printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); 1491 lapic_eoi(); 1492} 1493 1494static u_int 1495native_apic_cpuid(u_int apic_id) 1496{ 1497#ifdef SMP 1498 return apic_cpuids[apic_id]; 1499#else 1500 return 0; 1501#endif 1502} 1503 1504/* Request a free IDT vector to be used by the specified IRQ. */ 1505static u_int 1506native_apic_alloc_vector(u_int apic_id, u_int irq) 1507{ 1508 u_int vector; 1509 1510 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1511 1512 /* 1513 * Search for a free vector. Currently we just use a very simple 1514 * algorithm to find the first free vector. 1515 */ 1516 mtx_lock_spin(&icu_lock); 1517 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1518 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) 1519 continue; 1520 lapics[apic_id].la_ioint_irqs[vector] = irq; 1521 mtx_unlock_spin(&icu_lock); 1522 return (vector + APIC_IO_INTS); 1523 } 1524 mtx_unlock_spin(&icu_lock); 1525 return (0); 1526} 1527 1528/* 1529 * Request 'count' free contiguous IDT vectors to be used by 'count' 1530 * IRQs. 'count' must be a power of two and the vectors will be 1531 * aligned on a boundary of 'align'. If the request cannot be 1532 * satisfied, 0 is returned. 1533 */ 1534static u_int 1535native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) 1536{ 1537 u_int first, run, vector; 1538 1539 KASSERT(powerof2(count), ("bad count")); 1540 KASSERT(powerof2(align), ("bad align")); 1541 KASSERT(align >= count, ("align < count")); 1542#ifdef INVARIANTS 1543 for (run = 0; run < count; run++) 1544 KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", 1545 irqs[run], run)); 1546#endif 1547 1548 /* 1549 * Search for 'count' free vectors. As with apic_alloc_vector(), 1550 * this just uses a simple first fit algorithm. 1551 */ 1552 run = 0; 1553 first = 0; 1554 mtx_lock_spin(&icu_lock); 1555 for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { 1556 1557 /* Vector is in use, end run. */ 1558 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { 1559 run = 0; 1560 first = 0; 1561 continue; 1562 } 1563 1564 /* Start a new run if run == 0 and vector is aligned. */ 1565 if (run == 0) { 1566 if ((vector & (align - 1)) != 0) 1567 continue; 1568 first = vector; 1569 } 1570 run++; 1571 1572 /* Keep looping if the run isn't long enough yet. */ 1573 if (run < count) 1574 continue; 1575 1576 /* Found a run, assign IRQs and return the first vector. */ 1577 for (vector = 0; vector < count; vector++) 1578 lapics[apic_id].la_ioint_irqs[first + vector] = 1579 irqs[vector]; 1580 mtx_unlock_spin(&icu_lock); 1581 return (first + APIC_IO_INTS); 1582 } 1583 mtx_unlock_spin(&icu_lock); 1584 printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); 1585 return (0); 1586} 1587 1588/* 1589 * Enable a vector for a particular apic_id. Since all lapics share idt 1590 * entries and ioint_handlers this enables the vector on all lapics. lapics 1591 * which do not have the vector configured would report spurious interrupts 1592 * should it fire. 1593 */ 1594static void 1595native_apic_enable_vector(u_int apic_id, u_int vector) 1596{ 1597 1598 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1599 KASSERT(ioint_handlers[vector / 32] != NULL, 1600 ("No ISR handler for vector %u", vector)); 1601#ifdef KDTRACE_HOOKS 1602 KASSERT(vector != IDT_DTRACE_RET, 1603 ("Attempt to overwrite DTrace entry")); 1604#endif 1605 setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32], 1606 SDT_APIC, SEL_KPL, GSEL_APIC); 1607} 1608 1609static void 1610native_apic_disable_vector(u_int apic_id, u_int vector) 1611{ 1612 1613 KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); 1614#ifdef KDTRACE_HOOKS 1615 KASSERT(vector != IDT_DTRACE_RET, 1616 ("Attempt to overwrite DTrace entry")); 1617#endif 1618 KASSERT(ioint_handlers[vector / 32] != NULL, 1619 ("No ISR handler for vector %u", vector)); 1620#ifdef notyet 1621 /* 1622 * We can not currently clear the idt entry because other cpus 1623 * may have a valid vector at this offset. 1624 */ 1625 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APICT, 1626 SEL_KPL, GSEL_APIC); 1627#endif 1628} 1629 1630/* Release an APIC vector when it's no longer in use. */ 1631static void 1632native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) 1633{ 1634 struct thread *td; 1635 1636 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1637 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1638 ("Vector %u does not map to an IRQ line", vector)); 1639 KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); 1640 KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == 1641 irq, ("IRQ mismatch")); 1642#ifdef KDTRACE_HOOKS 1643 KASSERT(vector != IDT_DTRACE_RET, 1644 ("Attempt to overwrite DTrace entry")); 1645#endif 1646 1647 /* 1648 * Bind us to the cpu that owned the vector before freeing it so 1649 * we don't lose an interrupt delivery race. 1650 */ 1651 td = curthread; 1652 if (!rebooting) { 1653 thread_lock(td); 1654 if (sched_is_bound(td)) 1655 panic("apic_free_vector: Thread already bound.\n"); 1656 sched_bind(td, apic_cpuid(apic_id)); 1657 thread_unlock(td); 1658 } 1659 mtx_lock_spin(&icu_lock); 1660 lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; 1661 mtx_unlock_spin(&icu_lock); 1662 if (!rebooting) { 1663 thread_lock(td); 1664 sched_unbind(td); 1665 thread_unlock(td); 1666 } 1667} 1668 1669/* Map an IDT vector (APIC) to an IRQ (interrupt source). */ 1670static u_int 1671apic_idt_to_irq(u_int apic_id, u_int vector) 1672{ 1673 int irq; 1674 1675 KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && 1676 vector <= APIC_IO_INTS + APIC_NUM_IOINTS, 1677 ("Vector %u does not map to an IRQ line", vector)); 1678#ifdef KDTRACE_HOOKS 1679 KASSERT(vector != IDT_DTRACE_RET, 1680 ("Attempt to overwrite DTrace entry")); 1681#endif 1682 irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; 1683 if (irq < 0) 1684 irq = 0; 1685 return (irq); 1686} 1687 1688#ifdef DDB 1689/* 1690 * Dump data about APIC IDT vector mappings. 1691 */ 1692DB_SHOW_COMMAND(apic, db_show_apic) 1693{ 1694 struct intsrc *isrc; 1695 int i, verbose; 1696 u_int apic_id; 1697 u_int irq; 1698 1699 if (strcmp(modif, "vv") == 0) 1700 verbose = 2; 1701 else if (strcmp(modif, "v") == 0) 1702 verbose = 1; 1703 else 1704 verbose = 0; 1705 for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) { 1706 if (lapics[apic_id].la_present == 0) 1707 continue; 1708 db_printf("Interrupts bound to lapic %u\n", apic_id); 1709 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { 1710 irq = lapics[apic_id].la_ioint_irqs[i]; 1711 if (irq == IRQ_FREE || irq == IRQ_SYSCALL) 1712 continue; 1713#ifdef KDTRACE_HOOKS 1714 if (irq == IRQ_DTRACE_RET) 1715 continue; 1716#endif 1717#ifdef XENHVM 1718 if (irq == IRQ_EVTCHN) 1719 continue; 1720#endif 1721 db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); 1722 if (irq == IRQ_TIMER) 1723 db_printf("lapic timer\n"); 1724 else if (irq < num_io_irqs) { 1725 isrc = intr_lookup_source(irq); 1726 if (isrc == NULL || verbose == 0) 1727 db_printf("IRQ %u\n", irq); 1728 else 1729 db_dump_intr_event(isrc->is_event, 1730 verbose == 2); 1731 } else 1732 db_printf("IRQ %u ???\n", irq); 1733 } 1734 } 1735} 1736 1737static void 1738dump_mask(const char *prefix, uint32_t v, int base) 1739{ 1740 int i, first; 1741 1742 first = 1; 1743 for (i = 0; i < 32; i++) 1744 if (v & (1 << i)) { 1745 if (first) { 1746 db_printf("%s:", prefix); 1747 first = 0; 1748 } 1749 db_printf(" %02x", base + i); 1750 } 1751 if (!first) 1752 db_printf("\n"); 1753} 1754 1755/* Show info from the lapic regs for this CPU. */ 1756DB_SHOW_COMMAND(lapic, db_show_lapic) 1757{ 1758 uint32_t v; 1759 1760 db_printf("lapic ID = %d\n", lapic_id()); 1761 v = lapic_read32(LAPIC_VERSION); 1762 db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, 1763 v & 0xf); 1764 db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); 1765 v = lapic_read32(LAPIC_SVR); 1766 db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, 1767 v & APIC_SVR_ENABLE ? "enabled" : "disabled"); 1768 db_printf("TPR = %02x\n", lapic_read32(LAPIC_TPR)); 1769 1770#define dump_field(prefix, regn, index) \ 1771 dump_mask(__XSTRING(prefix ## index), \ 1772 lapic_read32(LAPIC_ ## regn ## index), \ 1773 index * 32) 1774 1775 db_printf("In-service Interrupts:\n"); 1776 dump_field(isr, ISR, 0); 1777 dump_field(isr, ISR, 1); 1778 dump_field(isr, ISR, 2); 1779 dump_field(isr, ISR, 3); 1780 dump_field(isr, ISR, 4); 1781 dump_field(isr, ISR, 5); 1782 dump_field(isr, ISR, 6); 1783 dump_field(isr, ISR, 7); 1784 1785 db_printf("TMR Interrupts:\n"); 1786 dump_field(tmr, TMR, 0); 1787 dump_field(tmr, TMR, 1); 1788 dump_field(tmr, TMR, 2); 1789 dump_field(tmr, TMR, 3); 1790 dump_field(tmr, TMR, 4); 1791 dump_field(tmr, TMR, 5); 1792 dump_field(tmr, TMR, 6); 1793 dump_field(tmr, TMR, 7); 1794 1795 db_printf("IRR Interrupts:\n"); 1796 dump_field(irr, IRR, 0); 1797 dump_field(irr, IRR, 1); 1798 dump_field(irr, IRR, 2); 1799 dump_field(irr, IRR, 3); 1800 dump_field(irr, IRR, 4); 1801 dump_field(irr, IRR, 5); 1802 dump_field(irr, IRR, 6); 1803 dump_field(irr, IRR, 7); 1804 1805#undef dump_field 1806} 1807#endif 1808 1809/* 1810 * APIC probing support code. This includes code to manage enumerators. 1811 */ 1812 1813static SLIST_HEAD(, apic_enumerator) enumerators = 1814 SLIST_HEAD_INITIALIZER(enumerators); 1815static struct apic_enumerator *best_enum; 1816 1817void 1818apic_register_enumerator(struct apic_enumerator *enumerator) 1819{ 1820#ifdef INVARIANTS 1821 struct apic_enumerator *apic_enum; 1822 1823 SLIST_FOREACH(apic_enum, &enumerators, apic_next) { 1824 if (apic_enum == enumerator) 1825 panic("%s: Duplicate register of %s", __func__, 1826 enumerator->apic_name); 1827 } 1828#endif 1829 SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); 1830} 1831 1832/* 1833 * We have to look for CPU's very, very early because certain subsystems 1834 * want to know how many CPU's we have extremely early on in the boot 1835 * process. 1836 */ 1837static void 1838apic_init(void *dummy __unused) 1839{ 1840 struct apic_enumerator *enumerator; 1841 int retval, best; 1842 1843 /* We only support built in local APICs. */ 1844 if (!(cpu_feature & CPUID_APIC)) 1845 return; 1846 1847 /* Don't probe if APIC mode is disabled. */ 1848 if (resource_disabled("apic", 0)) 1849 return; 1850 1851 /* Probe all the enumerators to find the best match. */ 1852 best_enum = NULL; 1853 best = 0; 1854 SLIST_FOREACH(enumerator, &enumerators, apic_next) { 1855 retval = enumerator->apic_probe(); 1856 if (retval > 0) 1857 continue; 1858 if (best_enum == NULL || best < retval) { 1859 best_enum = enumerator; 1860 best = retval; 1861 } 1862 } 1863 if (best_enum == NULL) { 1864 if (bootverbose) 1865 printf("APIC: Could not find any APICs.\n"); 1866#ifndef DEV_ATPIC 1867 panic("running without device atpic requires a local APIC"); 1868#endif 1869 return; 1870 } 1871 1872 if (bootverbose) 1873 printf("APIC: Using the %s enumerator.\n", 1874 best_enum->apic_name); 1875 1876#ifdef I686_CPU 1877 /* 1878 * To work around an errata, we disable the local APIC on some 1879 * CPUs during early startup. We need to turn the local APIC back 1880 * on on such CPUs now. 1881 */ 1882 ppro_reenable_apic(); 1883#endif 1884 1885 /* Probe the CPU's in the system. */ 1886 retval = best_enum->apic_probe_cpus(); 1887 if (retval != 0) 1888 printf("%s: Failed to probe CPUs: returned %d\n", 1889 best_enum->apic_name, retval); 1890 1891} 1892SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); 1893 1894/* 1895 * Setup the local APIC. We have to do this prior to starting up the APs 1896 * in the SMP case. 1897 */ 1898static void 1899apic_setup_local(void *dummy __unused) 1900{ 1901 int retval; 1902 1903 if (best_enum == NULL) 1904 return; 1905 1906 /* Initialize the local APIC. */ 1907 retval = best_enum->apic_setup_local(); 1908 if (retval != 0) 1909 printf("%s: Failed to setup the local APIC: returned %d\n", 1910 best_enum->apic_name, retval); 1911} 1912SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); 1913 1914/* 1915 * Setup the I/O APICs. 1916 */ 1917static void 1918apic_setup_io(void *dummy __unused) 1919{ 1920 int retval; 1921 1922 if (best_enum == NULL) 1923 return; 1924 1925 /* 1926 * Local APIC must be registered before other PICs and pseudo PICs 1927 * for proper suspend/resume order. 1928 */ 1929 intr_register_pic(&lapic_pic); 1930 1931 retval = best_enum->apic_setup_io(); 1932 if (retval != 0) 1933 printf("%s: Failed to setup I/O APICs: returned %d\n", 1934 best_enum->apic_name, retval); 1935 1936 /* 1937 * Finish setting up the local APIC on the BSP once we know 1938 * how to properly program the LINT pins. In particular, this 1939 * enables the EOI suppression mode, if LAPIC support it and 1940 * user did not disabled the mode. 1941 */ 1942 lapic_setup(1); 1943 if (bootverbose) 1944 lapic_dump("BSP"); 1945 1946 /* Enable the MSI "pic". */ 1947 init_ops.msi_init(); 1948 1949#ifdef XENHVM 1950 xen_intr_alloc_irqs(); 1951#endif 1952} 1953SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); 1954 1955#ifdef SMP 1956/* 1957 * Inter Processor Interrupt functions. The lapic_ipi_*() functions are 1958 * private to the MD code. The public interface for the rest of the 1959 * kernel is defined in mp_machdep.c. 1960 */ 1961 1962/* 1963 * Wait delay microseconds for IPI to be sent. If delay is -1, we 1964 * wait forever. 1965 */ 1966static int 1967native_lapic_ipi_wait(int delay) 1968{ 1969 uint64_t rx; 1970 1971 /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */ 1972 if (x2apic_mode) 1973 return (1); 1974 1975 for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) { 1976 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) == 1977 APIC_DELSTAT_IDLE) 1978 return (1); 1979 ia32_pause(); 1980 } 1981 return (0); 1982} 1983 1984static void 1985native_lapic_ipi_raw(register_t icrlo, u_int dest) 1986{ 1987 uint64_t icr; 1988 uint32_t vhi, vlo; 1989 register_t saveintr; 1990 1991 /* XXX: Need more sanity checking of icrlo? */ 1992 KASSERT(x2apic_mode || lapic_map != NULL, 1993 ("%s called too early", __func__)); 1994 KASSERT(x2apic_mode || 1995 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 1996 ("%s: invalid dest field", __func__)); 1997 KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, 1998 ("%s: reserved bits set in ICR LO register", __func__)); 1999 2000 /* Set destination in ICR HI register if it is being used. */ 2001 if (!x2apic_mode) { 2002 saveintr = intr_disable(); 2003 icr = lapic_read_icr(); 2004 } 2005 2006 if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { 2007 if (x2apic_mode) { 2008 vhi = dest; 2009 } else { 2010 vhi = icr >> 32; 2011 vhi &= ~APIC_ID_MASK; 2012 vhi |= dest << APIC_ID_SHIFT; 2013 } 2014 } else { 2015 vhi = 0; 2016 } 2017 2018 /* Program the contents of the IPI and dispatch it. */ 2019 if (x2apic_mode) { 2020 vlo = icrlo; 2021 } else { 2022 vlo = icr; 2023 vlo &= APIC_ICRLO_RESV_MASK; 2024 vlo |= icrlo; 2025 } 2026 lapic_write_icr(vhi, vlo); 2027 if (!x2apic_mode) 2028 intr_restore(saveintr); 2029} 2030 2031#define BEFORE_SPIN 50000 2032#ifdef DETECT_DEADLOCK 2033#define AFTER_SPIN 50 2034#endif 2035 2036static void 2037native_lapic_ipi_vectored(u_int vector, int dest) 2038{ 2039 register_t icrlo, destfield; 2040 2041 KASSERT((vector & ~APIC_VECTOR_MASK) == 0, 2042 ("%s: invalid vector %d", __func__, vector)); 2043 2044 icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; 2045 2046 /* 2047 * NMI IPIs are just fake vectors used to send a NMI. Use special rules 2048 * regarding NMIs if passed, otherwise specify the vector. 2049 */ 2050 if (vector >= IPI_NMI_FIRST) 2051 icrlo |= APIC_DELMODE_NMI; 2052 else 2053 icrlo |= vector | APIC_DELMODE_FIXED; 2054 destfield = 0; 2055 switch (dest) { 2056 case APIC_IPI_DEST_SELF: 2057 icrlo |= APIC_DEST_SELF; 2058 break; 2059 case APIC_IPI_DEST_ALL: 2060 icrlo |= APIC_DEST_ALLISELF; 2061 break; 2062 case APIC_IPI_DEST_OTHERS: 2063 icrlo |= APIC_DEST_ALLESELF; 2064 break; 2065 default: 2066 KASSERT(x2apic_mode || 2067 (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, 2068 ("%s: invalid destination 0x%x", __func__, dest)); 2069 destfield = dest; 2070 } 2071 2072 /* Wait for an earlier IPI to finish. */ 2073 if (!lapic_ipi_wait(BEFORE_SPIN)) { 2074 if (panicstr != NULL) 2075 return; 2076 else 2077 panic("APIC: Previous IPI is stuck"); 2078 } 2079 2080 lapic_ipi_raw(icrlo, destfield); 2081 2082#ifdef DETECT_DEADLOCK 2083 /* Wait for IPI to be delivered. */ 2084 if (!lapic_ipi_wait(AFTER_SPIN)) { 2085#ifdef needsattention 2086 /* 2087 * XXX FIXME: 2088 * 2089 * The above function waits for the message to actually be 2090 * delivered. It breaks out after an arbitrary timeout 2091 * since the message should eventually be delivered (at 2092 * least in theory) and that if it wasn't we would catch 2093 * the failure with the check above when the next IPI is 2094 * sent. 2095 * 2096 * We could skip this wait entirely, EXCEPT it probably 2097 * protects us from other routines that assume that the 2098 * message was delivered and acted upon when this function 2099 * returns. 2100 */ 2101 printf("APIC: IPI might be stuck\n"); 2102#else /* !needsattention */ 2103 /* Wait until mesage is sent without a timeout. */ 2104 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND) 2105 ia32_pause(); 2106#endif /* needsattention */ 2107 } 2108#endif /* DETECT_DEADLOCK */ 2109} 2110 2111#endif /* SMP */ 2112 2113/* 2114 * Since the IDT is shared by all CPUs the IPI slot update needs to be globally 2115 * visible. 2116 * 2117 * Consider the case where an IPI is generated immediately after allocation: 2118 * vector = lapic_ipi_alloc(ipifunc); 2119 * ipi_selected(other_cpus, vector); 2120 * 2121 * In xAPIC mode a write to ICR_LO has serializing semantics because the 2122 * APIC page is mapped as an uncached region. In x2APIC mode there is an 2123 * explicit 'mfence' before the ICR MSR is written. Therefore in both cases 2124 * the IDT slot update is globally visible before the IPI is delivered. 2125 */ 2126static int 2127native_lapic_ipi_alloc(inthand_t *ipifunc) 2128{ 2129 struct gate_descriptor *ip; 2130 long func; 2131 int idx, vector; 2132 2133 KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti), 2134 ("invalid ipifunc %p", ipifunc)); 2135 2136 vector = -1; 2137 mtx_lock_spin(&icu_lock); 2138 for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) { 2139 ip = &idt[idx]; 2140 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2141 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) || 2142 (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) { 2143 vector = idx; 2144 setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC); 2145 break; 2146 } 2147 } 2148 mtx_unlock_spin(&icu_lock); 2149 return (vector); 2150} 2151 2152static void 2153native_lapic_ipi_free(int vector) 2154{ 2155 struct gate_descriptor *ip; 2156 long func; 2157 2158 KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST, 2159 ("%s: invalid vector %d", __func__, vector)); 2160 2161 mtx_lock_spin(&icu_lock); 2162 ip = &idt[vector]; 2163 func = (ip->gd_hioffset << 16) | ip->gd_looffset; 2164 KASSERT(func != (uintptr_t)&IDTVEC(rsvd) && 2165 func != (uintptr_t)&IDTVEC(rsvd_pti), 2166 ("invalid idtfunc %#lx", func)); 2167 setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APICT, 2168 SEL_KPL, GSEL_APIC); 2169 mtx_unlock_spin(&icu_lock); 2170} 2171