vlapic.c revision 268891
150472Speter/*- 237Srgrimes * Copyright (c) 2011 NetApp, Inc. 337Srgrimes * All rights reserved. 437Srgrimes * 581020Srwatson * Redistribution and use in source and binary forms, with or without 681020Srwatson * modification, are permitted provided that the following conditions 781020Srwatson * are met: 837Srgrimes * 1. Redistributions of source code must retain the above copyright 981020Srwatson * notice, this list of conditions and the following disclaimer. 1081020Srwatson * 2. Redistributions in binary form must reproduce the above copyright 1181020Srwatson * notice, this list of conditions and the following disclaimer in the 1281020Srwatson * documentation and/or other materials provided with the distribution. 1366621Skris * 1475017Speter * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 1566621Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1675017Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1766568Sjkh * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 1875017Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 193190Spst * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2018639Spst * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2137Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2281020Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2341444Sdillon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2441444Sdillon * SUCH DAMAGE. 2581020Srwatson * 2681020Srwatson * $FreeBSD: stable/10/sys/amd64/vmm/io/vlapic.c 268891 2014-07-19 22:06:46Z jhb $ 2781020Srwatson */ 2881020Srwatson 2918639Spst#include <sys/cdefs.h> 3018639Spst__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/io/vlapic.c 268891 2014-07-19 22:06:46Z jhb $"); 3118639Spst 3218639Spst#include <sys/param.h> 3318639Spst#include <sys/lock.h> 3418639Spst#include <sys/kernel.h> 3518639Spst#include <sys/malloc.h> 3618639Spst#include <sys/mutex.h> 3775017Speter#include <sys/systm.h> 3818639Spst#include <sys/smp.h> 3975017Speter 4018639Spst#include <x86/specialreg.h> 4175017Speter#include <x86/apicreg.h> 4218639Spst 4375017Speter#include <machine/clock.h> 4418639Spst#include <machine/smp.h> 4575017Speter 465183Swollman#include <machine/vmm.h> 4775017Speter 4818639Spst#include "vmm_ipi.h" 4975017Speter#include "vmm_lapic.h" 5018639Spst#include "vmm_ktr.h" 5175017Speter#include "vmm_stat.h" 5218639Spst 5375017Speter#include "vlapic.h" 5418639Spst#include "vlapic_priv.h" 5575017Speter#include "vioapic.h" 5618639Spst 5737Srgrimes#define PRIO(x) ((x) >> 4) 5818639Spst 5919607Speter#define VLAPIC_VERSION (16) 6019607Speter 6119607Speter#define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) 6238337Smarkm 6318639Spst/* 6455115Speter * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the 6555115Speter * vlapic_callout_handler() and vcpu accesses to: 6655115Speter * - timer_freq_bt, timer_period_bt, timer_fire_bt 6718639Spst * - timer LVT register 6855115Speter */ 6955115Speter#define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) 70591Srgrimes#define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) 7118639Spst#define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) 72831Sats 7318639Spst#define VLAPIC_BUS_FREQ tsc_freq 7418639Spst 7518639Spststatic __inline uint32_t 7618639Spstvlapic_get_id(struct vlapic *vlapic) 7718639Spst{ 7818639Spst 7918639Spst if (x2apic(vlapic)) 8018639Spst return (vlapic->vcpuid); 8118639Spst else 824652Sats return (vlapic->vcpuid << 24); 83831Sats} 8421613Sache 8521613Sachestatic uint32_t 8621613Sachex2apic_ldr(struct vlapic *vlapic) 8721613Sache{ 8848845Ssheldonh int apicid; 8940911Sphk uint32_t ldr; 9048845Ssheldonh 9175017Speter apicid = vlapic_get_id(vlapic); 9240911Sphk ldr = 1 << (apicid & 0xf); 9349034Ssheldonh ldr |= (apicid & 0xffff0) << 12; 9449059Sgreen return (ldr); 9549059Sgreen} 9618639Spst 9749059Sgreenvoid 9875017Spetervlapic_dfr_write_handler(struct vlapic *vlapic) 9929951Sjkh{ 10048845Ssheldonh struct LAPIC *lapic; 10148845Ssheldonh 10248846Sgreen lapic = vlapic->apic_page; 10348845Ssheldonh if (x2apic(vlapic)) { 10455779Sdbaker VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", 10555779Sdbaker lapic->dfr); 10655779Sdbaker lapic->dfr = 0; 10755779Sdbaker return; 10837741Shoek } 10937741Shoek 11037741Shoek lapic->dfr &= APIC_DFR_MODEL_MASK; 11129951Sjkh lapic->dfr |= APIC_DFR_RESERVED; 11229951Sjkh 11329951Sjkh if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) 11429951Sjkh VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); 11529951Sjkh else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) 116 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); 117 else 118 VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); 119} 120 121void 122vlapic_ldr_write_handler(struct vlapic *vlapic) 123{ 124 struct LAPIC *lapic; 125 126 lapic = vlapic->apic_page; 127 128 /* LDR is read-only in x2apic mode */ 129 if (x2apic(vlapic)) { 130 VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", 131 lapic->ldr); 132 lapic->ldr = x2apic_ldr(vlapic); 133 } else { 134 lapic->ldr &= ~APIC_LDR_RESERVED; 135 VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); 136 } 137} 138 139void 140vlapic_id_write_handler(struct vlapic *vlapic) 141{ 142 struct LAPIC *lapic; 143 144 /* 145 * We don't allow the ID register to be modified so reset it back to 146 * its default value. 147 */ 148 lapic = vlapic->apic_page; 149 lapic->id = vlapic_get_id(vlapic); 150} 151 152static int 153vlapic_timer_divisor(uint32_t dcr) 154{ 155 switch (dcr & 0xB) { 156 case APIC_TDCR_1: 157 return (1); 158 case APIC_TDCR_2: 159 return (2); 160 case APIC_TDCR_4: 161 return (4); 162 case APIC_TDCR_8: 163 return (8); 164 case APIC_TDCR_16: 165 return (16); 166 case APIC_TDCR_32: 167 return (32); 168 case APIC_TDCR_64: 169 return (64); 170 case APIC_TDCR_128: 171 return (128); 172 default: 173 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); 174 } 175} 176 177#if 0 178static inline void 179vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) 180{ 181 printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, 182 *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, 183 *lvt & APIC_LVTT_M); 184} 185#endif 186 187static uint32_t 188vlapic_get_ccr(struct vlapic *vlapic) 189{ 190 struct bintime bt_now, bt_rem; 191 struct LAPIC *lapic; 192 uint32_t ccr; 193 194 ccr = 0; 195 lapic = vlapic->apic_page; 196 197 VLAPIC_TIMER_LOCK(vlapic); 198 if (callout_active(&vlapic->callout)) { 199 /* 200 * If the timer is scheduled to expire in the future then 201 * compute the value of 'ccr' based on the remaining time. 202 */ 203 binuptime(&bt_now); 204 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { 205 bt_rem = vlapic->timer_fire_bt; 206 bintime_sub(&bt_rem, &bt_now); 207 ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); 208 ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; 209 } 210 } 211 KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " 212 "icr_timer is %#x", ccr, lapic->icr_timer)); 213 VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", 214 ccr, lapic->icr_timer); 215 VLAPIC_TIMER_UNLOCK(vlapic); 216 return (ccr); 217} 218 219void 220vlapic_dcr_write_handler(struct vlapic *vlapic) 221{ 222 struct LAPIC *lapic; 223 int divisor; 224 225 lapic = vlapic->apic_page; 226 VLAPIC_TIMER_LOCK(vlapic); 227 228 divisor = vlapic_timer_divisor(lapic->dcr_timer); 229 VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", 230 lapic->dcr_timer, divisor); 231 232 /* 233 * Update the timer frequency and the timer period. 234 * 235 * XXX changes to the frequency divider will not take effect until 236 * the timer is reloaded. 237 */ 238 FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); 239 vlapic->timer_period_bt = vlapic->timer_freq_bt; 240 bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); 241 242 VLAPIC_TIMER_UNLOCK(vlapic); 243} 244 245void 246vlapic_esr_write_handler(struct vlapic *vlapic) 247{ 248 struct LAPIC *lapic; 249 250 lapic = vlapic->apic_page; 251 lapic->esr = vlapic->esr_pending; 252 vlapic->esr_pending = 0; 253} 254 255int 256vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 257{ 258 struct LAPIC *lapic; 259 uint32_t *irrptr, *tmrptr, mask; 260 int idx; 261 262 KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); 263 264 lapic = vlapic->apic_page; 265 if (!(lapic->svr & APIC_SVR_ENABLE)) { 266 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " 267 "interrupt %d", vector); 268 return (0); 269 } 270 271 if (vector < 16) { 272 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR); 273 VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", 274 vector); 275 return (1); 276 } 277 278 if (vlapic->ops.set_intr_ready) 279 return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); 280 281 idx = (vector / 32) * 4; 282 mask = 1 << (vector % 32); 283 284 irrptr = &lapic->irr0; 285 atomic_set_int(&irrptr[idx], mask); 286 287 /* 288 * Verify that the trigger-mode of the interrupt matches with 289 * the vlapic TMR registers. 290 */ 291 tmrptr = &lapic->tmr0; 292 if ((tmrptr[idx] & mask) != (level ? mask : 0)) { 293 VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but " 294 "interrupt is %s-triggered", idx / 4, tmrptr[idx], 295 level ? "level" : "edge"); 296 } 297 298 VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); 299 return (1); 300} 301 302static __inline uint32_t * 303vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) 304{ 305 struct LAPIC *lapic = vlapic->apic_page; 306 int i; 307 308 switch (offset) { 309 case APIC_OFFSET_CMCI_LVT: 310 return (&lapic->lvt_cmci); 311 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 312 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; 313 return ((&lapic->lvt_timer) + i);; 314 default: 315 panic("vlapic_get_lvt: invalid LVT\n"); 316 } 317} 318 319static __inline int 320lvt_off_to_idx(uint32_t offset) 321{ 322 int index; 323 324 switch (offset) { 325 case APIC_OFFSET_CMCI_LVT: 326 index = APIC_LVT_CMCI; 327 break; 328 case APIC_OFFSET_TIMER_LVT: 329 index = APIC_LVT_TIMER; 330 break; 331 case APIC_OFFSET_THERM_LVT: 332 index = APIC_LVT_THERMAL; 333 break; 334 case APIC_OFFSET_PERF_LVT: 335 index = APIC_LVT_PMC; 336 break; 337 case APIC_OFFSET_LINT0_LVT: 338 index = APIC_LVT_LINT0; 339 break; 340 case APIC_OFFSET_LINT1_LVT: 341 index = APIC_LVT_LINT1; 342 break; 343 case APIC_OFFSET_ERROR_LVT: 344 index = APIC_LVT_ERROR; 345 break; 346 default: 347 index = -1; 348 break; 349 } 350 KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " 351 "invalid lvt index %d for offset %#x", index, offset)); 352 353 return (index); 354} 355 356static __inline uint32_t 357vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) 358{ 359 int idx; 360 uint32_t val; 361 362 idx = lvt_off_to_idx(offset); 363 val = atomic_load_acq_32(&vlapic->lvt_last[idx]); 364 return (val); 365} 366 367void 368vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) 369{ 370 uint32_t *lvtptr, mask, val; 371 struct LAPIC *lapic; 372 int idx; 373 374 lapic = vlapic->apic_page; 375 lvtptr = vlapic_get_lvtptr(vlapic, offset); 376 val = *lvtptr; 377 idx = lvt_off_to_idx(offset); 378 379 if (!(lapic->svr & APIC_SVR_ENABLE)) 380 val |= APIC_LVT_M; 381 mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; 382 switch (offset) { 383 case APIC_OFFSET_TIMER_LVT: 384 mask |= APIC_LVTT_TM; 385 break; 386 case APIC_OFFSET_ERROR_LVT: 387 break; 388 case APIC_OFFSET_LINT0_LVT: 389 case APIC_OFFSET_LINT1_LVT: 390 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; 391 /* FALLTHROUGH */ 392 default: 393 mask |= APIC_LVT_DM; 394 break; 395 } 396 val &= mask; 397 *lvtptr = val; 398 atomic_store_rel_32(&vlapic->lvt_last[idx], val); 399} 400 401static void 402vlapic_mask_lvts(struct vlapic *vlapic) 403{ 404 struct LAPIC *lapic = vlapic->apic_page; 405 406 lapic->lvt_cmci |= APIC_LVT_M; 407 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); 408 409 lapic->lvt_timer |= APIC_LVT_M; 410 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); 411 412 lapic->lvt_thermal |= APIC_LVT_M; 413 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); 414 415 lapic->lvt_pcint |= APIC_LVT_M; 416 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); 417 418 lapic->lvt_lint0 |= APIC_LVT_M; 419 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); 420 421 lapic->lvt_lint1 |= APIC_LVT_M; 422 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); 423 424 lapic->lvt_error |= APIC_LVT_M; 425 vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); 426} 427 428static int 429vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt) 430{ 431 uint32_t vec, mode; 432 433 if (lvt & APIC_LVT_M) 434 return (0); 435 436 vec = lvt & APIC_LVT_VECTOR; 437 mode = lvt & APIC_LVT_DM; 438 439 switch (mode) { 440 case APIC_LVT_DM_FIXED: 441 if (vec < 16) { 442 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 443 return (0); 444 } 445 if (vlapic_set_intr_ready(vlapic, vec, false)) 446 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); 447 break; 448 case APIC_LVT_DM_NMI: 449 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 450 break; 451 case APIC_LVT_DM_EXTINT: 452 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 453 break; 454 default: 455 // Other modes ignored 456 return (0); 457 } 458 return (1); 459} 460 461#if 1 462static void 463dump_isrvec_stk(struct vlapic *vlapic) 464{ 465 int i; 466 uint32_t *isrptr; 467 468 isrptr = &vlapic->apic_page->isr0; 469 for (i = 0; i < 8; i++) 470 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); 471 472 for (i = 0; i <= vlapic->isrvec_stk_top; i++) 473 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); 474} 475#endif 476 477/* 478 * Algorithm adopted from section "Interrupt, Task and Processor Priority" 479 * in Intel Architecture Manual Vol 3a. 480 */ 481static void 482vlapic_update_ppr(struct vlapic *vlapic) 483{ 484 int isrvec, tpr, ppr; 485 486 /* 487 * Note that the value on the stack at index 0 is always 0. 488 * 489 * This is a placeholder for the value of ISRV when none of the 490 * bits is set in the ISRx registers. 491 */ 492 isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; 493 tpr = vlapic->apic_page->tpr; 494 495#if 1 496 { 497 int i, lastprio, curprio, vector, idx; 498 uint32_t *isrptr; 499 500 if (vlapic->isrvec_stk_top == 0 && isrvec != 0) 501 panic("isrvec_stk is corrupted: %d", isrvec); 502 503 /* 504 * Make sure that the priority of the nested interrupts is 505 * always increasing. 506 */ 507 lastprio = -1; 508 for (i = 1; i <= vlapic->isrvec_stk_top; i++) { 509 curprio = PRIO(vlapic->isrvec_stk[i]); 510 if (curprio <= lastprio) { 511 dump_isrvec_stk(vlapic); 512 panic("isrvec_stk does not satisfy invariant"); 513 } 514 lastprio = curprio; 515 } 516 517 /* 518 * Make sure that each bit set in the ISRx registers has a 519 * corresponding entry on the isrvec stack. 520 */ 521 i = 1; 522 isrptr = &vlapic->apic_page->isr0; 523 for (vector = 0; vector < 256; vector++) { 524 idx = (vector / 32) * 4; 525 if (isrptr[idx] & (1 << (vector % 32))) { 526 if (i > vlapic->isrvec_stk_top || 527 vlapic->isrvec_stk[i] != vector) { 528 dump_isrvec_stk(vlapic); 529 panic("ISR and isrvec_stk out of sync"); 530 } 531 i++; 532 } 533 } 534 } 535#endif 536 537 if (PRIO(tpr) >= PRIO(isrvec)) 538 ppr = tpr; 539 else 540 ppr = isrvec & 0xf0; 541 542 vlapic->apic_page->ppr = ppr; 543 VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); 544} 545 546static void 547vlapic_process_eoi(struct vlapic *vlapic) 548{ 549 struct LAPIC *lapic = vlapic->apic_page; 550 uint32_t *isrptr, *tmrptr; 551 int i, idx, bitpos, vector; 552 553 isrptr = &lapic->isr0; 554 tmrptr = &lapic->tmr0; 555 556 /* 557 * The x86 architecture reserves the the first 32 vectors for use 558 * by the processor. 559 */ 560 for (i = 7; i > 0; i--) { 561 idx = i * 4; 562 bitpos = fls(isrptr[idx]); 563 if (bitpos-- != 0) { 564 if (vlapic->isrvec_stk_top <= 0) { 565 panic("invalid vlapic isrvec_stk_top %d", 566 vlapic->isrvec_stk_top); 567 } 568 isrptr[idx] &= ~(1 << bitpos); 569 VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); 570 vlapic->isrvec_stk_top--; 571 vlapic_update_ppr(vlapic); 572 if ((tmrptr[idx] & (1 << bitpos)) != 0) { 573 vector = i * 32 + bitpos; 574 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, 575 vector); 576 } 577 return; 578 } 579 } 580} 581 582static __inline int 583vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) 584{ 585 586 return (lvt & mask); 587} 588 589static __inline int 590vlapic_periodic_timer(struct vlapic *vlapic) 591{ 592 uint32_t lvt; 593 594 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 595 596 return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); 597} 598 599static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); 600 601void 602vlapic_set_error(struct vlapic *vlapic, uint32_t mask) 603{ 604 uint32_t lvt; 605 606 vlapic->esr_pending |= mask; 607 if (vlapic->esr_firing) 608 return; 609 vlapic->esr_firing = 1; 610 611 // The error LVT always uses the fixed delivery mode. 612 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 613 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 614 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); 615 } 616 vlapic->esr_firing = 0; 617} 618 619static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); 620 621static void 622vlapic_fire_timer(struct vlapic *vlapic) 623{ 624 uint32_t lvt; 625 626 KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); 627 628 // The timer LVT always uses the fixed delivery mode. 629 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 630 if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { 631 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); 632 } 633} 634 635static VMM_STAT(VLAPIC_INTR_CMC, 636 "corrected machine check interrupts generated by vlapic"); 637 638void 639vlapic_fire_cmci(struct vlapic *vlapic) 640{ 641 uint32_t lvt; 642 643 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 644 if (vlapic_fire_lvt(vlapic, lvt)) { 645 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); 646 } 647} 648 649static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, 650 "lvts triggered"); 651 652int 653vlapic_trigger_lvt(struct vlapic *vlapic, int vector) 654{ 655 uint32_t lvt; 656 657 if (vlapic_enabled(vlapic) == false) { 658 /* 659 * When the local APIC is global/hardware disabled, 660 * LINT[1:0] pins are configured as INTR and NMI pins, 661 * respectively. 662 */ 663 switch (vector) { 664 case APIC_LVT_LINT0: 665 vm_inject_extint(vlapic->vm, vlapic->vcpuid); 666 break; 667 case APIC_LVT_LINT1: 668 vm_inject_nmi(vlapic->vm, vlapic->vcpuid); 669 break; 670 default: 671 break; 672 } 673 return (0); 674 } 675 676 switch (vector) { 677 case APIC_LVT_LINT0: 678 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT); 679 break; 680 case APIC_LVT_LINT1: 681 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT); 682 break; 683 case APIC_LVT_TIMER: 684 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); 685 lvt |= APIC_LVT_DM_FIXED; 686 break; 687 case APIC_LVT_ERROR: 688 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); 689 lvt |= APIC_LVT_DM_FIXED; 690 break; 691 case APIC_LVT_PMC: 692 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT); 693 break; 694 case APIC_LVT_THERMAL: 695 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT); 696 break; 697 case APIC_LVT_CMCI: 698 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); 699 break; 700 default: 701 return (EINVAL); 702 } 703 if (vlapic_fire_lvt(vlapic, lvt)) { 704 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 705 LVTS_TRIGGERRED, vector, 1); 706 } 707 return (0); 708} 709 710static void 711vlapic_callout_handler(void *arg) 712{ 713 struct vlapic *vlapic; 714 struct bintime bt, btnow; 715 sbintime_t rem_sbt; 716 717 vlapic = arg; 718 719 VLAPIC_TIMER_LOCK(vlapic); 720 if (callout_pending(&vlapic->callout)) /* callout was reset */ 721 goto done; 722 723 if (!callout_active(&vlapic->callout)) /* callout was stopped */ 724 goto done; 725 726 callout_deactivate(&vlapic->callout); 727 728 vlapic_fire_timer(vlapic); 729 730 if (vlapic_periodic_timer(vlapic)) { 731 binuptime(&btnow); 732 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), 733 ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", 734 btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, 735 vlapic->timer_fire_bt.frac)); 736 737 /* 738 * Compute the delta between when the timer was supposed to 739 * fire and the present time. 740 */ 741 bt = btnow; 742 bintime_sub(&bt, &vlapic->timer_fire_bt); 743 744 rem_sbt = bttosbt(vlapic->timer_period_bt); 745 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { 746 /* 747 * Adjust the time until the next countdown downward 748 * to account for the lost time. 749 */ 750 rem_sbt -= bttosbt(bt); 751 } else { 752 /* 753 * If the delta is greater than the timer period then 754 * just reset our time base instead of trying to catch 755 * up. 756 */ 757 vlapic->timer_fire_bt = btnow; 758 VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " 759 "usecs, period is %lu usecs - resetting time base", 760 bttosbt(bt) / SBT_1US, 761 bttosbt(vlapic->timer_period_bt) / SBT_1US); 762 } 763 764 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 765 callout_reset_sbt(&vlapic->callout, rem_sbt, 0, 766 vlapic_callout_handler, vlapic, 0); 767 } 768done: 769 VLAPIC_TIMER_UNLOCK(vlapic); 770} 771 772void 773vlapic_icrtmr_write_handler(struct vlapic *vlapic) 774{ 775 struct LAPIC *lapic; 776 sbintime_t sbt; 777 uint32_t icr_timer; 778 779 VLAPIC_TIMER_LOCK(vlapic); 780 781 lapic = vlapic->apic_page; 782 icr_timer = lapic->icr_timer; 783 784 vlapic->timer_period_bt = vlapic->timer_freq_bt; 785 bintime_mul(&vlapic->timer_period_bt, icr_timer); 786 787 if (icr_timer != 0) { 788 binuptime(&vlapic->timer_fire_bt); 789 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); 790 791 sbt = bttosbt(vlapic->timer_period_bt); 792 callout_reset_sbt(&vlapic->callout, sbt, 0, 793 vlapic_callout_handler, vlapic, 0); 794 } else 795 callout_stop(&vlapic->callout); 796 797 VLAPIC_TIMER_UNLOCK(vlapic); 798} 799 800/* 801 * This function populates 'dmask' with the set of vcpus that match the 802 * addressing specified by the (dest, phys, lowprio) tuple. 803 * 804 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) 805 * or xAPIC (8-bit) destination field. 806 */ 807static void 808vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, 809 bool lowprio, bool x2apic_dest) 810{ 811 struct vlapic *vlapic; 812 uint32_t dfr, ldr, ldest, cluster; 813 uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; 814 cpuset_t amask; 815 int vcpuid; 816 817 if ((x2apic_dest && dest == 0xffffffff) || 818 (!x2apic_dest && dest == 0xff)) { 819 /* 820 * Broadcast in both logical and physical modes. 821 */ 822 *dmask = vm_active_cpus(vm); 823 return; 824 } 825 826 if (phys) { 827 /* 828 * Physical mode: destination is APIC ID. 829 */ 830 CPU_ZERO(dmask); 831 vcpuid = vm_apicid2vcpuid(vm, dest); 832 if (vcpuid < VM_MAXCPU) 833 CPU_SET(vcpuid, dmask); 834 } else { 835 /* 836 * In the "Flat Model" the MDA is interpreted as an 8-bit wide 837 * bitmask. This model is only avilable in the xAPIC mode. 838 */ 839 mda_flat_ldest = dest & 0xff; 840 841 /* 842 * In the "Cluster Model" the MDA is used to identify a 843 * specific cluster and a set of APICs in that cluster. 844 */ 845 if (x2apic_dest) { 846 mda_cluster_id = dest >> 16; 847 mda_cluster_ldest = dest & 0xffff; 848 } else { 849 mda_cluster_id = (dest >> 4) & 0xf; 850 mda_cluster_ldest = dest & 0xf; 851 } 852 853 /* 854 * Logical mode: match each APIC that has a bit set 855 * in it's LDR that matches a bit in the ldest. 856 */ 857 CPU_ZERO(dmask); 858 amask = vm_active_cpus(vm); 859 while ((vcpuid = CPU_FFS(&amask)) != 0) { 860 vcpuid--; 861 CPU_CLR(vcpuid, &amask); 862 863 vlapic = vm_lapic(vm, vcpuid); 864 dfr = vlapic->apic_page->dfr; 865 ldr = vlapic->apic_page->ldr; 866 867 if ((dfr & APIC_DFR_MODEL_MASK) == 868 APIC_DFR_MODEL_FLAT) { 869 ldest = ldr >> 24; 870 mda_ldest = mda_flat_ldest; 871 } else if ((dfr & APIC_DFR_MODEL_MASK) == 872 APIC_DFR_MODEL_CLUSTER) { 873 if (x2apic(vlapic)) { 874 cluster = ldr >> 16; 875 ldest = ldr & 0xffff; 876 } else { 877 cluster = ldr >> 28; 878 ldest = (ldr >> 24) & 0xf; 879 } 880 if (cluster != mda_cluster_id) 881 continue; 882 mda_ldest = mda_cluster_ldest; 883 } else { 884 /* 885 * Guest has configured a bad logical 886 * model for this vcpu - skip it. 887 */ 888 VLAPIC_CTR1(vlapic, "vlapic has bad logical " 889 "model %x - cannot deliver interrupt", dfr); 890 continue; 891 } 892 893 if ((mda_ldest & ldest) != 0) { 894 CPU_SET(vcpuid, dmask); 895 if (lowprio) 896 break; 897 } 898 } 899 } 900} 901 902static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); 903 904int 905vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) 906{ 907 int i; 908 bool phys; 909 cpuset_t dmask; 910 uint64_t icrval; 911 uint32_t dest, vec, mode; 912 struct vlapic *vlapic2; 913 struct vm_exit *vmexit; 914 struct LAPIC *lapic; 915 916 lapic = vlapic->apic_page; 917 lapic->icr_lo &= ~APIC_DELSTAT_PEND; 918 icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; 919 920 if (x2apic(vlapic)) 921 dest = icrval >> 32; 922 else 923 dest = icrval >> (32 + 24); 924 vec = icrval & APIC_VECTOR_MASK; 925 mode = icrval & APIC_DELMODE_MASK; 926 927 if (mode == APIC_DELMODE_FIXED && vec < 16) { 928 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); 929 VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); 930 return (0); 931 } 932 933 VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); 934 935 if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { 936 switch (icrval & APIC_DEST_MASK) { 937 case APIC_DEST_DESTFLD: 938 phys = ((icrval & APIC_DESTMODE_LOG) == 0); 939 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, 940 x2apic(vlapic)); 941 break; 942 case APIC_DEST_SELF: 943 CPU_SETOF(vlapic->vcpuid, &dmask); 944 break; 945 case APIC_DEST_ALLISELF: 946 dmask = vm_active_cpus(vlapic->vm); 947 break; 948 case APIC_DEST_ALLESELF: 949 dmask = vm_active_cpus(vlapic->vm); 950 CPU_CLR(vlapic->vcpuid, &dmask); 951 break; 952 default: 953 CPU_ZERO(&dmask); /* satisfy gcc */ 954 break; 955 } 956 957 while ((i = CPU_FFS(&dmask)) != 0) { 958 i--; 959 CPU_CLR(i, &dmask); 960 if (mode == APIC_DELMODE_FIXED) { 961 lapic_intr_edge(vlapic->vm, i, vec); 962 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, 963 IPIS_SENT, i, 1); 964 VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " 965 "to vcpuid %d", vec, i); 966 } else { 967 vm_inject_nmi(vlapic->vm, i); 968 VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " 969 "to vcpuid %d", i); 970 } 971 } 972 973 return (0); /* handled completely in the kernel */ 974 } 975 976 if (mode == APIC_DELMODE_INIT) { 977 if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) 978 return (0); 979 980 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { 981 vlapic2 = vm_lapic(vlapic->vm, dest); 982 983 /* move from INIT to waiting-for-SIPI state */ 984 if (vlapic2->boot_state == BS_INIT) { 985 vlapic2->boot_state = BS_SIPI; 986 } 987 988 return (0); 989 } 990 } 991 992 if (mode == APIC_DELMODE_STARTUP) { 993 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { 994 vlapic2 = vm_lapic(vlapic->vm, dest); 995 996 /* 997 * Ignore SIPIs in any state other than wait-for-SIPI 998 */ 999 if (vlapic2->boot_state != BS_SIPI) 1000 return (0); 1001 1002 /* 1003 * XXX this assumes that the startup IPI always succeeds 1004 */ 1005 vlapic2->boot_state = BS_RUNNING; 1006 vm_activate_cpu(vlapic2->vm, dest); 1007 1008 *retu = true; 1009 vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); 1010 vmexit->exitcode = VM_EXITCODE_SPINUP_AP; 1011 vmexit->u.spinup_ap.vcpu = dest; 1012 vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; 1013 1014 return (0); 1015 } 1016 } 1017 1018 /* 1019 * This will cause a return to userland. 1020 */ 1021 return (1); 1022} 1023 1024void 1025vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) 1026{ 1027 int vec; 1028 1029 KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); 1030 1031 vec = val & 0xff; 1032 lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); 1033 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, 1034 vlapic->vcpuid, 1); 1035 VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); 1036} 1037 1038int 1039vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) 1040{ 1041 struct LAPIC *lapic = vlapic->apic_page; 1042 int idx, i, bitpos, vector; 1043 uint32_t *irrptr, val; 1044 1045 if (vlapic->ops.pending_intr) 1046 return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); 1047 1048 irrptr = &lapic->irr0; 1049 1050 /* 1051 * The x86 architecture reserves the the first 32 vectors for use 1052 * by the processor. 1053 */ 1054 for (i = 7; i > 0; i--) { 1055 idx = i * 4; 1056 val = atomic_load_acq_int(&irrptr[idx]); 1057 bitpos = fls(val); 1058 if (bitpos != 0) { 1059 vector = i * 32 + (bitpos - 1); 1060 if (PRIO(vector) > PRIO(lapic->ppr)) { 1061 VLAPIC_CTR1(vlapic, "pending intr %d", vector); 1062 if (vecptr != NULL) 1063 *vecptr = vector; 1064 return (1); 1065 } else 1066 break; 1067 } 1068 } 1069 return (0); 1070} 1071 1072void 1073vlapic_intr_accepted(struct vlapic *vlapic, int vector) 1074{ 1075 struct LAPIC *lapic = vlapic->apic_page; 1076 uint32_t *irrptr, *isrptr; 1077 int idx, stk_top; 1078 1079 if (vlapic->ops.intr_accepted) 1080 return ((*vlapic->ops.intr_accepted)(vlapic, vector)); 1081 1082 /* 1083 * clear the ready bit for vector being accepted in irr 1084 * and set the vector as in service in isr. 1085 */ 1086 idx = (vector / 32) * 4; 1087 1088 irrptr = &lapic->irr0; 1089 atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); 1090 VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); 1091 1092 isrptr = &lapic->isr0; 1093 isrptr[idx] |= 1 << (vector % 32); 1094 VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); 1095 1096 /* 1097 * Update the PPR 1098 */ 1099 vlapic->isrvec_stk_top++; 1100 1101 stk_top = vlapic->isrvec_stk_top; 1102 if (stk_top >= ISRVEC_STK_SIZE) 1103 panic("isrvec_stk_top overflow %d", stk_top); 1104 1105 vlapic->isrvec_stk[stk_top] = vector; 1106 vlapic_update_ppr(vlapic); 1107} 1108 1109void 1110vlapic_svr_write_handler(struct vlapic *vlapic) 1111{ 1112 struct LAPIC *lapic; 1113 uint32_t old, new, changed; 1114 1115 lapic = vlapic->apic_page; 1116 1117 new = lapic->svr; 1118 old = vlapic->svr_last; 1119 vlapic->svr_last = new; 1120 1121 changed = old ^ new; 1122 if ((changed & APIC_SVR_ENABLE) != 0) { 1123 if ((new & APIC_SVR_ENABLE) == 0) { 1124 /* 1125 * The apic is now disabled so stop the apic timer 1126 * and mask all the LVT entries. 1127 */ 1128 VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); 1129 VLAPIC_TIMER_LOCK(vlapic); 1130 callout_stop(&vlapic->callout); 1131 VLAPIC_TIMER_UNLOCK(vlapic); 1132 vlapic_mask_lvts(vlapic); 1133 } else { 1134 /* 1135 * The apic is now enabled so restart the apic timer 1136 * if it is configured in periodic mode. 1137 */ 1138 VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); 1139 if (vlapic_periodic_timer(vlapic)) 1140 vlapic_icrtmr_write_handler(vlapic); 1141 } 1142 } 1143} 1144 1145int 1146vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1147 uint64_t *data, bool *retu) 1148{ 1149 struct LAPIC *lapic = vlapic->apic_page; 1150 uint32_t *reg; 1151 int i; 1152 1153 /* Ignore MMIO accesses in x2APIC mode */ 1154 if (x2apic(vlapic) && mmio_access) { 1155 VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", 1156 offset); 1157 *data = 0; 1158 goto done; 1159 } 1160 1161 if (!x2apic(vlapic) && !mmio_access) { 1162 /* 1163 * XXX Generate GP fault for MSR accesses in xAPIC mode 1164 */ 1165 VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " 1166 "xAPIC mode", offset); 1167 *data = 0; 1168 goto done; 1169 } 1170 1171 if (offset > sizeof(*lapic)) { 1172 *data = 0; 1173 goto done; 1174 } 1175 1176 offset &= ~3; 1177 switch(offset) 1178 { 1179 case APIC_OFFSET_ID: 1180 *data = lapic->id; 1181 break; 1182 case APIC_OFFSET_VER: 1183 *data = lapic->version; 1184 break; 1185 case APIC_OFFSET_TPR: 1186 *data = lapic->tpr; 1187 break; 1188 case APIC_OFFSET_APR: 1189 *data = lapic->apr; 1190 break; 1191 case APIC_OFFSET_PPR: 1192 *data = lapic->ppr; 1193 break; 1194 case APIC_OFFSET_EOI: 1195 *data = lapic->eoi; 1196 break; 1197 case APIC_OFFSET_LDR: 1198 *data = lapic->ldr; 1199 break; 1200 case APIC_OFFSET_DFR: 1201 *data = lapic->dfr; 1202 break; 1203 case APIC_OFFSET_SVR: 1204 *data = lapic->svr; 1205 break; 1206 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1207 i = (offset - APIC_OFFSET_ISR0) >> 2; 1208 reg = &lapic->isr0; 1209 *data = *(reg + i); 1210 break; 1211 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1212 i = (offset - APIC_OFFSET_TMR0) >> 2; 1213 reg = &lapic->tmr0; 1214 *data = *(reg + i); 1215 break; 1216 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1217 i = (offset - APIC_OFFSET_IRR0) >> 2; 1218 reg = &lapic->irr0; 1219 *data = atomic_load_acq_int(reg + i); 1220 break; 1221 case APIC_OFFSET_ESR: 1222 *data = lapic->esr; 1223 break; 1224 case APIC_OFFSET_ICR_LOW: 1225 *data = lapic->icr_lo; 1226 if (x2apic(vlapic)) 1227 *data |= (uint64_t)lapic->icr_hi << 32; 1228 break; 1229 case APIC_OFFSET_ICR_HI: 1230 *data = lapic->icr_hi; 1231 break; 1232 case APIC_OFFSET_CMCI_LVT: 1233 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1234 *data = vlapic_get_lvt(vlapic, offset); 1235#ifdef INVARIANTS 1236 reg = vlapic_get_lvtptr(vlapic, offset); 1237 KASSERT(*data == *reg, ("inconsistent lvt value at " 1238 "offset %#lx: %#lx/%#x", offset, *data, *reg)); 1239#endif 1240 break; 1241 case APIC_OFFSET_TIMER_ICR: 1242 *data = lapic->icr_timer; 1243 break; 1244 case APIC_OFFSET_TIMER_CCR: 1245 *data = vlapic_get_ccr(vlapic); 1246 break; 1247 case APIC_OFFSET_TIMER_DCR: 1248 *data = lapic->dcr_timer; 1249 break; 1250 case APIC_OFFSET_SELF_IPI: 1251 /* 1252 * XXX generate a GP fault if vlapic is in x2apic mode 1253 */ 1254 *data = 0; 1255 break; 1256 case APIC_OFFSET_RRR: 1257 default: 1258 *data = 0; 1259 break; 1260 } 1261done: 1262 VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); 1263 return 0; 1264} 1265 1266int 1267vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, 1268 uint64_t data, bool *retu) 1269{ 1270 struct LAPIC *lapic = vlapic->apic_page; 1271 uint32_t *regptr; 1272 int retval; 1273 1274 KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, 1275 ("vlapic_write: invalid offset %#lx", offset)); 1276 1277 VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", 1278 offset, data); 1279 1280 if (offset > sizeof(*lapic)) 1281 return (0); 1282 1283 /* Ignore MMIO accesses in x2APIC mode */ 1284 if (x2apic(vlapic) && mmio_access) { 1285 VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " 1286 "in x2APIC mode", data, offset); 1287 return (0); 1288 } 1289 1290 /* 1291 * XXX Generate GP fault for MSR accesses in xAPIC mode 1292 */ 1293 if (!x2apic(vlapic) && !mmio_access) { 1294 VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " 1295 "in xAPIC mode", data, offset); 1296 return (0); 1297 } 1298 1299 retval = 0; 1300 switch(offset) 1301 { 1302 case APIC_OFFSET_ID: 1303 lapic->id = data; 1304 vlapic_id_write_handler(vlapic); 1305 break; 1306 case APIC_OFFSET_TPR: 1307 lapic->tpr = data & 0xff; 1308 vlapic_update_ppr(vlapic); 1309 break; 1310 case APIC_OFFSET_EOI: 1311 vlapic_process_eoi(vlapic); 1312 break; 1313 case APIC_OFFSET_LDR: 1314 lapic->ldr = data; 1315 vlapic_ldr_write_handler(vlapic); 1316 break; 1317 case APIC_OFFSET_DFR: 1318 lapic->dfr = data; 1319 vlapic_dfr_write_handler(vlapic); 1320 break; 1321 case APIC_OFFSET_SVR: 1322 lapic->svr = data; 1323 vlapic_svr_write_handler(vlapic); 1324 break; 1325 case APIC_OFFSET_ICR_LOW: 1326 lapic->icr_lo = data; 1327 if (x2apic(vlapic)) 1328 lapic->icr_hi = data >> 32; 1329 retval = vlapic_icrlo_write_handler(vlapic, retu); 1330 break; 1331 case APIC_OFFSET_ICR_HI: 1332 lapic->icr_hi = data; 1333 break; 1334 case APIC_OFFSET_CMCI_LVT: 1335 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1336 regptr = vlapic_get_lvtptr(vlapic, offset); 1337 *regptr = data; 1338 vlapic_lvt_write_handler(vlapic, offset); 1339 break; 1340 case APIC_OFFSET_TIMER_ICR: 1341 lapic->icr_timer = data; 1342 vlapic_icrtmr_write_handler(vlapic); 1343 break; 1344 1345 case APIC_OFFSET_TIMER_DCR: 1346 lapic->dcr_timer = data; 1347 vlapic_dcr_write_handler(vlapic); 1348 break; 1349 1350 case APIC_OFFSET_ESR: 1351 vlapic_esr_write_handler(vlapic); 1352 break; 1353 1354 case APIC_OFFSET_SELF_IPI: 1355 if (x2apic(vlapic)) 1356 vlapic_self_ipi_handler(vlapic, data); 1357 break; 1358 1359 case APIC_OFFSET_VER: 1360 case APIC_OFFSET_APR: 1361 case APIC_OFFSET_PPR: 1362 case APIC_OFFSET_RRR: 1363 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1364 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1365 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1366 case APIC_OFFSET_TIMER_CCR: 1367 default: 1368 // Read only. 1369 break; 1370 } 1371 1372 return (retval); 1373} 1374 1375static void 1376vlapic_reset(struct vlapic *vlapic) 1377{ 1378 struct LAPIC *lapic; 1379 1380 lapic = vlapic->apic_page; 1381 bzero(lapic, sizeof(struct LAPIC)); 1382 1383 lapic->id = vlapic_get_id(vlapic); 1384 lapic->version = VLAPIC_VERSION; 1385 lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); 1386 lapic->dfr = 0xffffffff; 1387 lapic->svr = APIC_SVR_VECTOR; 1388 vlapic_mask_lvts(vlapic); 1389 vlapic_reset_tmr(vlapic); 1390 1391 lapic->dcr_timer = 0; 1392 vlapic_dcr_write_handler(vlapic); 1393 1394 if (vlapic->vcpuid == 0) 1395 vlapic->boot_state = BS_RUNNING; /* BSP */ 1396 else 1397 vlapic->boot_state = BS_INIT; /* AP */ 1398 1399 vlapic->svr_last = lapic->svr; 1400} 1401 1402void 1403vlapic_init(struct vlapic *vlapic) 1404{ 1405 KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); 1406 KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU, 1407 ("vlapic_init: vcpuid is not initialized")); 1408 KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " 1409 "initialized")); 1410 1411 /* 1412 * If the vlapic is configured in x2apic mode then it will be 1413 * accessed in the critical section via the MSR emulation code. 1414 * 1415 * Therefore the timer mutex must be a spinlock because blockable 1416 * mutexes cannot be acquired in a critical section. 1417 */ 1418 mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); 1419 callout_init(&vlapic->callout, 1); 1420 1421 vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; 1422 1423 if (vlapic->vcpuid == 0) 1424 vlapic->msr_apicbase |= APICBASE_BSP; 1425 1426 vlapic_reset(vlapic); 1427} 1428 1429void 1430vlapic_cleanup(struct vlapic *vlapic) 1431{ 1432 1433 callout_drain(&vlapic->callout); 1434} 1435 1436uint64_t 1437vlapic_get_apicbase(struct vlapic *vlapic) 1438{ 1439 1440 return (vlapic->msr_apicbase); 1441} 1442 1443int 1444vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) 1445{ 1446 1447 if (vlapic->msr_apicbase != new) { 1448 VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " 1449 "not supported", vlapic->msr_apicbase, new); 1450 return (-1); 1451 } 1452 1453 return (0); 1454} 1455 1456void 1457vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1458{ 1459 struct vlapic *vlapic; 1460 struct LAPIC *lapic; 1461 1462 vlapic = vm_lapic(vm, vcpuid); 1463 1464 if (state == X2APIC_DISABLED) 1465 vlapic->msr_apicbase &= ~APICBASE_X2APIC; 1466 else 1467 vlapic->msr_apicbase |= APICBASE_X2APIC; 1468 1469 /* 1470 * Reset the local APIC registers whose values are mode-dependent. 1471 * 1472 * XXX this works because the APIC mode can be changed only at vcpu 1473 * initialization time. 1474 */ 1475 lapic = vlapic->apic_page; 1476 lapic->id = vlapic_get_id(vlapic); 1477 if (x2apic(vlapic)) { 1478 lapic->ldr = x2apic_ldr(vlapic); 1479 lapic->dfr = 0; 1480 } else { 1481 lapic->ldr = 0; 1482 lapic->dfr = 0xffffffff; 1483 } 1484 1485 if (state == X2APIC_ENABLED) { 1486 if (vlapic->ops.enable_x2apic_mode) 1487 (*vlapic->ops.enable_x2apic_mode)(vlapic); 1488 } 1489} 1490 1491void 1492vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, 1493 int delmode, int vec) 1494{ 1495 bool lowprio; 1496 int vcpuid; 1497 cpuset_t dmask; 1498 1499 if (delmode != IOART_DELFIXED && 1500 delmode != IOART_DELLOPRI && 1501 delmode != IOART_DELEXINT) { 1502 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); 1503 return; 1504 } 1505 lowprio = (delmode == IOART_DELLOPRI); 1506 1507 /* 1508 * We don't provide any virtual interrupt redirection hardware so 1509 * all interrupts originating from the ioapic or MSI specify the 1510 * 'dest' in the legacy xAPIC format. 1511 */ 1512 vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); 1513 1514 while ((vcpuid = CPU_FFS(&dmask)) != 0) { 1515 vcpuid--; 1516 CPU_CLR(vcpuid, &dmask); 1517 if (delmode == IOART_DELEXINT) { 1518 vm_inject_extint(vm, vcpuid); 1519 } else { 1520 lapic_set_intr(vm, vcpuid, vec, level); 1521 } 1522 } 1523} 1524 1525void 1526vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) 1527{ 1528 /* 1529 * Post an interrupt to the vcpu currently running on 'hostcpu'. 1530 * 1531 * This is done by leveraging features like Posted Interrupts (Intel) 1532 * Doorbell MSR (AMD AVIC) that avoid a VM exit. 1533 * 1534 * If neither of these features are available then fallback to 1535 * sending an IPI to 'hostcpu'. 1536 */ 1537 if (vlapic->ops.post_intr) 1538 (*vlapic->ops.post_intr)(vlapic, hostcpu); 1539 else 1540 ipi_cpu(hostcpu, ipinum); 1541} 1542 1543bool 1544vlapic_enabled(struct vlapic *vlapic) 1545{ 1546 struct LAPIC *lapic = vlapic->apic_page; 1547 1548 if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && 1549 (lapic->svr & APIC_SVR_ENABLE) != 0) 1550 return (true); 1551 else 1552 return (false); 1553} 1554 1555static void 1556vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) 1557{ 1558 struct LAPIC *lapic; 1559 uint32_t *tmrptr, mask; 1560 int idx; 1561 1562 lapic = vlapic->apic_page; 1563 tmrptr = &lapic->tmr0; 1564 idx = (vector / 32) * 4; 1565 mask = 1 << (vector % 32); 1566 if (level) 1567 tmrptr[idx] |= mask; 1568 else 1569 tmrptr[idx] &= ~mask; 1570 1571 if (vlapic->ops.set_tmr != NULL) 1572 (*vlapic->ops.set_tmr)(vlapic, vector, level); 1573} 1574 1575void 1576vlapic_reset_tmr(struct vlapic *vlapic) 1577{ 1578 int vector; 1579 1580 VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); 1581 1582 for (vector = 0; vector <= 255; vector++) 1583 vlapic_set_tmr(vlapic, vector, false); 1584} 1585 1586void 1587vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, 1588 int delmode, int vector) 1589{ 1590 cpuset_t dmask; 1591 bool lowprio; 1592 1593 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 1594 1595 /* 1596 * A level trigger is valid only for fixed and lowprio delivery modes. 1597 */ 1598 if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { 1599 VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " 1600 "delivery-mode %d", delmode); 1601 return; 1602 } 1603 1604 lowprio = (delmode == APIC_DELMODE_LOWPRIO); 1605 vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); 1606 1607 if (!CPU_ISSET(vlapic->vcpuid, &dmask)) 1608 return; 1609 1610 VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); 1611 vlapic_set_tmr(vlapic, vector, true); 1612} 1613