vmx.c revision 266641
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/amd64/vmm/intel/vmx.c 266641 2014-05-25 00:57:24Z neel $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/amd64/vmm/intel/vmx.c 266641 2014-05-25 00:57:24Z neel $"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/smp.h> 35#include <sys/kernel.h> 36#include <sys/malloc.h> 37#include <sys/pcpu.h> 38#include <sys/proc.h> 39#include <sys/sysctl.h> 40 41#include <vm/vm.h> 42#include <vm/pmap.h> 43 44#include <machine/psl.h> 45#include <machine/cpufunc.h> 46#include <machine/md_var.h> 47#include <machine/segments.h> 48#include <machine/smp.h> 49#include <machine/specialreg.h> 50#include <machine/vmparam.h> 51 52#include <machine/vmm.h> 53#include <machine/vmm_dev.h> 54#include <machine/vmm_instruction_emul.h> 55#include "vmm_host.h" 56#include "vmm_ioport.h" 57#include "vmm_ipi.h" 58#include "vmm_msr.h" 59#include "vmm_ktr.h" 60#include "vmm_stat.h" 61#include "vatpic.h" 62#include "vlapic.h" 63#include "vlapic_priv.h" 64 65#include "vmx_msr.h" 66#include "ept.h" 67#include "vmx_cpufunc.h" 68#include "vmx.h" 69#include "x86.h" 70#include "vmx_controls.h" 71 72#define PINBASED_CTLS_ONE_SETTING \ 73 (PINBASED_EXTINT_EXITING | \ 74 PINBASED_NMI_EXITING | \ 75 PINBASED_VIRTUAL_NMI) 76#define PINBASED_CTLS_ZERO_SETTING 0 77 78#define PROCBASED_CTLS_WINDOW_SETTING \ 79 (PROCBASED_INT_WINDOW_EXITING | \ 80 PROCBASED_NMI_WINDOW_EXITING) 81 82#define PROCBASED_CTLS_ONE_SETTING \ 83 (PROCBASED_SECONDARY_CONTROLS | \ 84 PROCBASED_IO_EXITING | \ 85 PROCBASED_MSR_BITMAPS | \ 86 PROCBASED_CTLS_WINDOW_SETTING) 87#define PROCBASED_CTLS_ZERO_SETTING \ 88 (PROCBASED_CR3_LOAD_EXITING | \ 89 PROCBASED_CR3_STORE_EXITING | \ 90 PROCBASED_IO_BITMAPS) 91 92#define PROCBASED_CTLS2_ONE_SETTING PROCBASED2_ENABLE_EPT 93#define PROCBASED_CTLS2_ZERO_SETTING 0 94 95#define VM_EXIT_CTLS_ONE_SETTING_NO_PAT \ 96 (VM_EXIT_HOST_LMA | \ 97 VM_EXIT_SAVE_EFER | \ 98 VM_EXIT_LOAD_EFER) 99 100#define VM_EXIT_CTLS_ONE_SETTING \ 101 (VM_EXIT_CTLS_ONE_SETTING_NO_PAT | \ 102 VM_EXIT_ACKNOWLEDGE_INTERRUPT | \ 103 VM_EXIT_SAVE_PAT | \ 104 VM_EXIT_LOAD_PAT) 105#define VM_EXIT_CTLS_ZERO_SETTING VM_EXIT_SAVE_DEBUG_CONTROLS 106 107#define VM_ENTRY_CTLS_ONE_SETTING_NO_PAT VM_ENTRY_LOAD_EFER 108 109#define VM_ENTRY_CTLS_ONE_SETTING \ 110 (VM_ENTRY_CTLS_ONE_SETTING_NO_PAT | \ 111 VM_ENTRY_LOAD_PAT) 112#define VM_ENTRY_CTLS_ZERO_SETTING \ 113 (VM_ENTRY_LOAD_DEBUG_CONTROLS | \ 114 VM_ENTRY_INTO_SMM | \ 115 VM_ENTRY_DEACTIVATE_DUAL_MONITOR) 116 117#define guest_msr_rw(vmx, msr) \ 118 msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW) 119 120#define guest_msr_ro(vmx, msr) \ 121 msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_READ) 122 123#define HANDLED 1 124#define UNHANDLED 0 125 126static MALLOC_DEFINE(M_VMX, "vmx", "vmx"); 127static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic"); 128 129SYSCTL_DECL(_hw_vmm); 130SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW, NULL, NULL); 131 132int vmxon_enabled[MAXCPU]; 133static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE); 134 135static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2; 136static uint32_t exit_ctls, entry_ctls; 137 138static uint64_t cr0_ones_mask, cr0_zeros_mask; 139SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr0_ones_mask, CTLFLAG_RD, 140 &cr0_ones_mask, 0, NULL); 141SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr0_zeros_mask, CTLFLAG_RD, 142 &cr0_zeros_mask, 0, NULL); 143 144static uint64_t cr4_ones_mask, cr4_zeros_mask; 145SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr4_ones_mask, CTLFLAG_RD, 146 &cr4_ones_mask, 0, NULL); 147SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr4_zeros_mask, CTLFLAG_RD, 148 &cr4_zeros_mask, 0, NULL); 149 150static int vmx_no_patmsr; 151 152static int vmx_initialized; 153SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD, 154 &vmx_initialized, 0, "Intel VMX initialized"); 155 156/* 157 * Optional capabilities 158 */ 159static int cap_halt_exit; 160static int cap_pause_exit; 161static int cap_unrestricted_guest; 162static int cap_monitor_trap; 163static int cap_invpcid; 164 165static int virtual_interrupt_delivery; 166SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD, 167 &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery support"); 168 169static int posted_interrupts; 170SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupts, CTLFLAG_RD, 171 &posted_interrupts, 0, "APICv posted interrupt support"); 172 173static int pirvec; 174SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, posted_interrupt_vector, CTLFLAG_RD, 175 &pirvec, 0, "APICv posted interrupt vector"); 176 177static struct unrhdr *vpid_unr; 178static u_int vpid_alloc_failed; 179SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD, 180 &vpid_alloc_failed, 0, NULL); 181 182/* 183 * Use the last page below 4GB as the APIC access address. This address is 184 * occupied by the boot firmware so it is guaranteed that it will not conflict 185 * with a page in system memory. 186 */ 187#define APIC_ACCESS_ADDRESS 0xFFFFF000 188 189static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); 190static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); 191static void vmx_inject_pir(struct vlapic *vlapic); 192 193#ifdef KTR 194static const char * 195exit_reason_to_str(int reason) 196{ 197 static char reasonbuf[32]; 198 199 switch (reason) { 200 case EXIT_REASON_EXCEPTION: 201 return "exception"; 202 case EXIT_REASON_EXT_INTR: 203 return "extint"; 204 case EXIT_REASON_TRIPLE_FAULT: 205 return "triplefault"; 206 case EXIT_REASON_INIT: 207 return "init"; 208 case EXIT_REASON_SIPI: 209 return "sipi"; 210 case EXIT_REASON_IO_SMI: 211 return "iosmi"; 212 case EXIT_REASON_SMI: 213 return "smi"; 214 case EXIT_REASON_INTR_WINDOW: 215 return "intrwindow"; 216 case EXIT_REASON_NMI_WINDOW: 217 return "nmiwindow"; 218 case EXIT_REASON_TASK_SWITCH: 219 return "taskswitch"; 220 case EXIT_REASON_CPUID: 221 return "cpuid"; 222 case EXIT_REASON_GETSEC: 223 return "getsec"; 224 case EXIT_REASON_HLT: 225 return "hlt"; 226 case EXIT_REASON_INVD: 227 return "invd"; 228 case EXIT_REASON_INVLPG: 229 return "invlpg"; 230 case EXIT_REASON_RDPMC: 231 return "rdpmc"; 232 case EXIT_REASON_RDTSC: 233 return "rdtsc"; 234 case EXIT_REASON_RSM: 235 return "rsm"; 236 case EXIT_REASON_VMCALL: 237 return "vmcall"; 238 case EXIT_REASON_VMCLEAR: 239 return "vmclear"; 240 case EXIT_REASON_VMLAUNCH: 241 return "vmlaunch"; 242 case EXIT_REASON_VMPTRLD: 243 return "vmptrld"; 244 case EXIT_REASON_VMPTRST: 245 return "vmptrst"; 246 case EXIT_REASON_VMREAD: 247 return "vmread"; 248 case EXIT_REASON_VMRESUME: 249 return "vmresume"; 250 case EXIT_REASON_VMWRITE: 251 return "vmwrite"; 252 case EXIT_REASON_VMXOFF: 253 return "vmxoff"; 254 case EXIT_REASON_VMXON: 255 return "vmxon"; 256 case EXIT_REASON_CR_ACCESS: 257 return "craccess"; 258 case EXIT_REASON_DR_ACCESS: 259 return "draccess"; 260 case EXIT_REASON_INOUT: 261 return "inout"; 262 case EXIT_REASON_RDMSR: 263 return "rdmsr"; 264 case EXIT_REASON_WRMSR: 265 return "wrmsr"; 266 case EXIT_REASON_INVAL_VMCS: 267 return "invalvmcs"; 268 case EXIT_REASON_INVAL_MSR: 269 return "invalmsr"; 270 case EXIT_REASON_MWAIT: 271 return "mwait"; 272 case EXIT_REASON_MTF: 273 return "mtf"; 274 case EXIT_REASON_MONITOR: 275 return "monitor"; 276 case EXIT_REASON_PAUSE: 277 return "pause"; 278 case EXIT_REASON_MCE: 279 return "mce"; 280 case EXIT_REASON_TPR: 281 return "tpr"; 282 case EXIT_REASON_APIC_ACCESS: 283 return "apic-access"; 284 case EXIT_REASON_GDTR_IDTR: 285 return "gdtridtr"; 286 case EXIT_REASON_LDTR_TR: 287 return "ldtrtr"; 288 case EXIT_REASON_EPT_FAULT: 289 return "eptfault"; 290 case EXIT_REASON_EPT_MISCONFIG: 291 return "eptmisconfig"; 292 case EXIT_REASON_INVEPT: 293 return "invept"; 294 case EXIT_REASON_RDTSCP: 295 return "rdtscp"; 296 case EXIT_REASON_VMX_PREEMPT: 297 return "vmxpreempt"; 298 case EXIT_REASON_INVVPID: 299 return "invvpid"; 300 case EXIT_REASON_WBINVD: 301 return "wbinvd"; 302 case EXIT_REASON_XSETBV: 303 return "xsetbv"; 304 case EXIT_REASON_APIC_WRITE: 305 return "apic-write"; 306 default: 307 snprintf(reasonbuf, sizeof(reasonbuf), "%d", reason); 308 return (reasonbuf); 309 } 310} 311#endif /* KTR */ 312 313static int 314vmx_allow_x2apic_msrs(struct vmx *vmx) 315{ 316 int i, error; 317 318 error = 0; 319 320 /* 321 * Allow readonly access to the following x2APIC MSRs from the guest. 322 */ 323 error += guest_msr_ro(vmx, MSR_APIC_ID); 324 error += guest_msr_ro(vmx, MSR_APIC_VERSION); 325 error += guest_msr_ro(vmx, MSR_APIC_LDR); 326 error += guest_msr_ro(vmx, MSR_APIC_SVR); 327 328 for (i = 0; i < 8; i++) 329 error += guest_msr_ro(vmx, MSR_APIC_ISR0 + i); 330 331 for (i = 0; i < 8; i++) 332 error += guest_msr_ro(vmx, MSR_APIC_TMR0 + i); 333 334 for (i = 0; i < 8; i++) 335 error += guest_msr_ro(vmx, MSR_APIC_IRR0 + i); 336 337 error += guest_msr_ro(vmx, MSR_APIC_ESR); 338 error += guest_msr_ro(vmx, MSR_APIC_LVT_TIMER); 339 error += guest_msr_ro(vmx, MSR_APIC_LVT_THERMAL); 340 error += guest_msr_ro(vmx, MSR_APIC_LVT_PCINT); 341 error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT0); 342 error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT1); 343 error += guest_msr_ro(vmx, MSR_APIC_LVT_ERROR); 344 error += guest_msr_ro(vmx, MSR_APIC_ICR_TIMER); 345 error += guest_msr_ro(vmx, MSR_APIC_DCR_TIMER); 346 error += guest_msr_ro(vmx, MSR_APIC_ICR); 347 348 /* 349 * Allow TPR, EOI and SELF_IPI MSRs to be read and written by the guest. 350 * 351 * These registers get special treatment described in the section 352 * "Virtualizing MSR-Based APIC Accesses". 353 */ 354 error += guest_msr_rw(vmx, MSR_APIC_TPR); 355 error += guest_msr_rw(vmx, MSR_APIC_EOI); 356 error += guest_msr_rw(vmx, MSR_APIC_SELF_IPI); 357 358 return (error); 359} 360 361u_long 362vmx_fix_cr0(u_long cr0) 363{ 364 365 return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask); 366} 367 368u_long 369vmx_fix_cr4(u_long cr4) 370{ 371 372 return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask); 373} 374 375static void 376vpid_free(int vpid) 377{ 378 if (vpid < 0 || vpid > 0xffff) 379 panic("vpid_free: invalid vpid %d", vpid); 380 381 /* 382 * VPIDs [0,VM_MAXCPU] are special and are not allocated from 383 * the unit number allocator. 384 */ 385 386 if (vpid > VM_MAXCPU) 387 free_unr(vpid_unr, vpid); 388} 389 390static void 391vpid_alloc(uint16_t *vpid, int num) 392{ 393 int i, x; 394 395 if (num <= 0 || num > VM_MAXCPU) 396 panic("invalid number of vpids requested: %d", num); 397 398 /* 399 * If the "enable vpid" execution control is not enabled then the 400 * VPID is required to be 0 for all vcpus. 401 */ 402 if ((procbased_ctls2 & PROCBASED2_ENABLE_VPID) == 0) { 403 for (i = 0; i < num; i++) 404 vpid[i] = 0; 405 return; 406 } 407 408 /* 409 * Allocate a unique VPID for each vcpu from the unit number allocator. 410 */ 411 for (i = 0; i < num; i++) { 412 x = alloc_unr(vpid_unr); 413 if (x == -1) 414 break; 415 else 416 vpid[i] = x; 417 } 418 419 if (i < num) { 420 atomic_add_int(&vpid_alloc_failed, 1); 421 422 /* 423 * If the unit number allocator does not have enough unique 424 * VPIDs then we need to allocate from the [1,VM_MAXCPU] range. 425 * 426 * These VPIDs are not be unique across VMs but this does not 427 * affect correctness because the combined mappings are also 428 * tagged with the EP4TA which is unique for each VM. 429 * 430 * It is still sub-optimal because the invvpid will invalidate 431 * combined mappings for a particular VPID across all EP4TAs. 432 */ 433 while (i-- > 0) 434 vpid_free(vpid[i]); 435 436 for (i = 0; i < num; i++) 437 vpid[i] = i + 1; 438 } 439} 440 441static void 442vpid_init(void) 443{ 444 /* 445 * VPID 0 is required when the "enable VPID" execution control is 446 * disabled. 447 * 448 * VPIDs [1,VM_MAXCPU] are used as the "overflow namespace" when the 449 * unit number allocator does not have sufficient unique VPIDs to 450 * satisfy the allocation. 451 * 452 * The remaining VPIDs are managed by the unit number allocator. 453 */ 454 vpid_unr = new_unrhdr(VM_MAXCPU + 1, 0xffff, NULL); 455} 456 457static void 458msr_save_area_init(struct msr_entry *g_area, int *g_count) 459{ 460 int cnt; 461 462 static struct msr_entry guest_msrs[] = { 463 { MSR_KGSBASE, 0, 0 }, 464 }; 465 466 cnt = sizeof(guest_msrs) / sizeof(guest_msrs[0]); 467 if (cnt > GUEST_MSR_MAX_ENTRIES) 468 panic("guest msr save area overrun"); 469 bcopy(guest_msrs, g_area, sizeof(guest_msrs)); 470 *g_count = cnt; 471} 472 473static void 474vmx_disable(void *arg __unused) 475{ 476 struct invvpid_desc invvpid_desc = { 0 }; 477 struct invept_desc invept_desc = { 0 }; 478 479 if (vmxon_enabled[curcpu]) { 480 /* 481 * See sections 25.3.3.3 and 25.3.3.4 in Intel Vol 3b. 482 * 483 * VMXON or VMXOFF are not required to invalidate any TLB 484 * caching structures. This prevents potential retention of 485 * cached information in the TLB between distinct VMX episodes. 486 */ 487 invvpid(INVVPID_TYPE_ALL_CONTEXTS, invvpid_desc); 488 invept(INVEPT_TYPE_ALL_CONTEXTS, invept_desc); 489 vmxoff(); 490 } 491 load_cr4(rcr4() & ~CR4_VMXE); 492} 493 494static int 495vmx_cleanup(void) 496{ 497 498 if (pirvec != 0) 499 vmm_ipi_free(pirvec); 500 501 if (vpid_unr != NULL) { 502 delete_unrhdr(vpid_unr); 503 vpid_unr = NULL; 504 } 505 506 smp_rendezvous(NULL, vmx_disable, NULL, NULL); 507 508 return (0); 509} 510 511static void 512vmx_enable(void *arg __unused) 513{ 514 int error; 515 516 load_cr4(rcr4() | CR4_VMXE); 517 518 *(uint32_t *)vmxon_region[curcpu] = vmx_revision(); 519 error = vmxon(vmxon_region[curcpu]); 520 if (error == 0) 521 vmxon_enabled[curcpu] = 1; 522} 523 524static void 525vmx_restore(void) 526{ 527 528 if (vmxon_enabled[curcpu]) 529 vmxon(vmxon_region[curcpu]); 530} 531 532static int 533vmx_init(int ipinum) 534{ 535 int error, use_tpr_shadow; 536 uint64_t basic, fixed0, fixed1, feature_control; 537 uint32_t tmp, procbased2_vid_bits; 538 539 /* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */ 540 if (!(cpu_feature2 & CPUID2_VMX)) { 541 printf("vmx_init: processor does not support VMX operation\n"); 542 return (ENXIO); 543 } 544 545 /* 546 * Verify that MSR_IA32_FEATURE_CONTROL lock and VMXON enable bits 547 * are set (bits 0 and 2 respectively). 548 */ 549 feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 550 if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 0 || 551 (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) { 552 printf("vmx_init: VMX operation disabled by BIOS\n"); 553 return (ENXIO); 554 } 555 556 /* 557 * Verify capabilities MSR_VMX_BASIC: 558 * - bit 54 indicates support for INS/OUTS decoding 559 */ 560 basic = rdmsr(MSR_VMX_BASIC); 561 if ((basic & (1UL << 54)) == 0) { 562 printf("vmx_init: processor does not support desired basic " 563 "capabilities\n"); 564 return (EINVAL); 565 } 566 567 /* Check support for primary processor-based VM-execution controls */ 568 error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 569 MSR_VMX_TRUE_PROCBASED_CTLS, 570 PROCBASED_CTLS_ONE_SETTING, 571 PROCBASED_CTLS_ZERO_SETTING, &procbased_ctls); 572 if (error) { 573 printf("vmx_init: processor does not support desired primary " 574 "processor-based controls\n"); 575 return (error); 576 } 577 578 /* Clear the processor-based ctl bits that are set on demand */ 579 procbased_ctls &= ~PROCBASED_CTLS_WINDOW_SETTING; 580 581 /* Check support for secondary processor-based VM-execution controls */ 582 error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 583 MSR_VMX_PROCBASED_CTLS2, 584 PROCBASED_CTLS2_ONE_SETTING, 585 PROCBASED_CTLS2_ZERO_SETTING, &procbased_ctls2); 586 if (error) { 587 printf("vmx_init: processor does not support desired secondary " 588 "processor-based controls\n"); 589 return (error); 590 } 591 592 /* Check support for VPID */ 593 error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2, 594 PROCBASED2_ENABLE_VPID, 0, &tmp); 595 if (error == 0) 596 procbased_ctls2 |= PROCBASED2_ENABLE_VPID; 597 598 /* Check support for pin-based VM-execution controls */ 599 error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS, 600 MSR_VMX_TRUE_PINBASED_CTLS, 601 PINBASED_CTLS_ONE_SETTING, 602 PINBASED_CTLS_ZERO_SETTING, &pinbased_ctls); 603 if (error) { 604 printf("vmx_init: processor does not support desired " 605 "pin-based controls\n"); 606 return (error); 607 } 608 609 /* Check support for VM-exit controls */ 610 error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS, 611 VM_EXIT_CTLS_ONE_SETTING, 612 VM_EXIT_CTLS_ZERO_SETTING, 613 &exit_ctls); 614 if (error) { 615 /* Try again without the PAT MSR bits */ 616 error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, 617 MSR_VMX_TRUE_EXIT_CTLS, 618 VM_EXIT_CTLS_ONE_SETTING_NO_PAT, 619 VM_EXIT_CTLS_ZERO_SETTING, 620 &exit_ctls); 621 if (error) { 622 printf("vmx_init: processor does not support desired " 623 "exit controls\n"); 624 return (error); 625 } else { 626 if (bootverbose) 627 printf("vmm: PAT MSR access not supported\n"); 628 guest_msr_valid(MSR_PAT); 629 vmx_no_patmsr = 1; 630 } 631 } 632 633 /* Check support for VM-entry controls */ 634 if (!vmx_no_patmsr) { 635 error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, 636 MSR_VMX_TRUE_ENTRY_CTLS, 637 VM_ENTRY_CTLS_ONE_SETTING, 638 VM_ENTRY_CTLS_ZERO_SETTING, 639 &entry_ctls); 640 } else { 641 error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, 642 MSR_VMX_TRUE_ENTRY_CTLS, 643 VM_ENTRY_CTLS_ONE_SETTING_NO_PAT, 644 VM_ENTRY_CTLS_ZERO_SETTING, 645 &entry_ctls); 646 } 647 648 if (error) { 649 printf("vmx_init: processor does not support desired " 650 "entry controls\n"); 651 return (error); 652 } 653 654 /* 655 * Check support for optional features by testing them 656 * as individual bits 657 */ 658 cap_halt_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 659 MSR_VMX_TRUE_PROCBASED_CTLS, 660 PROCBASED_HLT_EXITING, 0, 661 &tmp) == 0); 662 663 cap_monitor_trap = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 664 MSR_VMX_PROCBASED_CTLS, 665 PROCBASED_MTF, 0, 666 &tmp) == 0); 667 668 cap_pause_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 669 MSR_VMX_TRUE_PROCBASED_CTLS, 670 PROCBASED_PAUSE_EXITING, 0, 671 &tmp) == 0); 672 673 cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 674 MSR_VMX_PROCBASED_CTLS2, 675 PROCBASED2_UNRESTRICTED_GUEST, 0, 676 &tmp) == 0); 677 678 cap_invpcid = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 679 MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0, 680 &tmp) == 0); 681 682 /* 683 * Check support for virtual interrupt delivery. 684 */ 685 procbased2_vid_bits = (PROCBASED2_VIRTUALIZE_APIC_ACCESSES | 686 PROCBASED2_VIRTUALIZE_X2APIC_MODE | 687 PROCBASED2_APIC_REGISTER_VIRTUALIZATION | 688 PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY); 689 690 use_tpr_shadow = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 691 MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0, 692 &tmp) == 0); 693 694 error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2, 695 procbased2_vid_bits, 0, &tmp); 696 if (error == 0 && use_tpr_shadow) { 697 virtual_interrupt_delivery = 1; 698 TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid", 699 &virtual_interrupt_delivery); 700 } 701 702 if (virtual_interrupt_delivery) { 703 procbased_ctls |= PROCBASED_USE_TPR_SHADOW; 704 procbased_ctls2 |= procbased2_vid_bits; 705 procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE; 706 707 /* 708 * Check for Posted Interrupts only if Virtual Interrupt 709 * Delivery is enabled. 710 */ 711 error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS, 712 MSR_VMX_TRUE_PINBASED_CTLS, PINBASED_POSTED_INTERRUPT, 0, 713 &tmp); 714 if (error == 0) { 715 pirvec = vmm_ipi_alloc(); 716 if (pirvec == 0) { 717 if (bootverbose) { 718 printf("vmx_init: unable to allocate " 719 "posted interrupt vector\n"); 720 } 721 } else { 722 posted_interrupts = 1; 723 TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_pir", 724 &posted_interrupts); 725 } 726 } 727 } 728 729 if (posted_interrupts) 730 pinbased_ctls |= PINBASED_POSTED_INTERRUPT; 731 732 /* Initialize EPT */ 733 error = ept_init(ipinum); 734 if (error) { 735 printf("vmx_init: ept initialization failed (%d)\n", error); 736 return (error); 737 } 738 739 /* 740 * Stash the cr0 and cr4 bits that must be fixed to 0 or 1 741 */ 742 fixed0 = rdmsr(MSR_VMX_CR0_FIXED0); 743 fixed1 = rdmsr(MSR_VMX_CR0_FIXED1); 744 cr0_ones_mask = fixed0 & fixed1; 745 cr0_zeros_mask = ~fixed0 & ~fixed1; 746 747 /* 748 * CR0_PE and CR0_PG can be set to zero in VMX non-root operation 749 * if unrestricted guest execution is allowed. 750 */ 751 if (cap_unrestricted_guest) 752 cr0_ones_mask &= ~(CR0_PG | CR0_PE); 753 754 /* 755 * Do not allow the guest to set CR0_NW or CR0_CD. 756 */ 757 cr0_zeros_mask |= (CR0_NW | CR0_CD); 758 759 fixed0 = rdmsr(MSR_VMX_CR4_FIXED0); 760 fixed1 = rdmsr(MSR_VMX_CR4_FIXED1); 761 cr4_ones_mask = fixed0 & fixed1; 762 cr4_zeros_mask = ~fixed0 & ~fixed1; 763 764 vpid_init(); 765 766 /* enable VMX operation */ 767 smp_rendezvous(NULL, vmx_enable, NULL, NULL); 768 769 vmx_initialized = 1; 770 771 return (0); 772} 773 774static void 775vmx_trigger_hostintr(int vector) 776{ 777 uintptr_t func; 778 struct gate_descriptor *gd; 779 780 gd = &idt[vector]; 781 782 KASSERT(vector >= 32 && vector <= 255, ("vmx_trigger_hostintr: " 783 "invalid vector %d", vector)); 784 KASSERT(gd->gd_p == 1, ("gate descriptor for vector %d not present", 785 vector)); 786 KASSERT(gd->gd_type == SDT_SYSIGT, ("gate descriptor for vector %d " 787 "has invalid type %d", vector, gd->gd_type)); 788 KASSERT(gd->gd_dpl == SEL_KPL, ("gate descriptor for vector %d " 789 "has invalid dpl %d", vector, gd->gd_dpl)); 790 KASSERT(gd->gd_selector == GSEL(GCODE_SEL, SEL_KPL), ("gate descriptor " 791 "for vector %d has invalid selector %d", vector, gd->gd_selector)); 792 KASSERT(gd->gd_ist == 0, ("gate descriptor for vector %d has invalid " 793 "IST %d", vector, gd->gd_ist)); 794 795 func = ((long)gd->gd_hioffset << 16 | gd->gd_looffset); 796 vmx_call_isr(func); 797} 798 799static int 800vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial) 801{ 802 int error, mask_ident, shadow_ident; 803 uint64_t mask_value; 804 805 if (which != 0 && which != 4) 806 panic("vmx_setup_cr_shadow: unknown cr%d", which); 807 808 if (which == 0) { 809 mask_ident = VMCS_CR0_MASK; 810 mask_value = cr0_ones_mask | cr0_zeros_mask; 811 shadow_ident = VMCS_CR0_SHADOW; 812 } else { 813 mask_ident = VMCS_CR4_MASK; 814 mask_value = cr4_ones_mask | cr4_zeros_mask; 815 shadow_ident = VMCS_CR4_SHADOW; 816 } 817 818 error = vmcs_setreg(vmcs, 0, VMCS_IDENT(mask_ident), mask_value); 819 if (error) 820 return (error); 821 822 error = vmcs_setreg(vmcs, 0, VMCS_IDENT(shadow_ident), initial); 823 if (error) 824 return (error); 825 826 return (0); 827} 828#define vmx_setup_cr0_shadow(vmcs,init) vmx_setup_cr_shadow(0, (vmcs), (init)) 829#define vmx_setup_cr4_shadow(vmcs,init) vmx_setup_cr_shadow(4, (vmcs), (init)) 830 831static void * 832vmx_vminit(struct vm *vm, pmap_t pmap) 833{ 834 uint16_t vpid[VM_MAXCPU]; 835 int i, error, guest_msr_count; 836 struct vmx *vmx; 837 struct vmcs *vmcs; 838 839 vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO); 840 if ((uintptr_t)vmx & PAGE_MASK) { 841 panic("malloc of struct vmx not aligned on %d byte boundary", 842 PAGE_SIZE); 843 } 844 vmx->vm = vm; 845 846 vmx->eptp = eptp(vtophys((vm_offset_t)pmap->pm_pml4)); 847 848 /* 849 * Clean up EPTP-tagged guest physical and combined mappings 850 * 851 * VMX transitions are not required to invalidate any guest physical 852 * mappings. So, it may be possible for stale guest physical mappings 853 * to be present in the processor TLBs. 854 * 855 * Combined mappings for this EP4TA are also invalidated for all VPIDs. 856 */ 857 ept_invalidate_mappings(vmx->eptp); 858 859 msr_bitmap_initialize(vmx->msr_bitmap); 860 861 /* 862 * It is safe to allow direct access to MSR_GSBASE and MSR_FSBASE. 863 * The guest FSBASE and GSBASE are saved and restored during 864 * vm-exit and vm-entry respectively. The host FSBASE and GSBASE are 865 * always restored from the vmcs host state area on vm-exit. 866 * 867 * The SYSENTER_CS/ESP/EIP MSRs are identical to FS/GSBASE in 868 * how they are saved/restored so can be directly accessed by the 869 * guest. 870 * 871 * Guest KGSBASE is saved and restored in the guest MSR save area. 872 * Host KGSBASE is restored before returning to userland from the pcb. 873 * There will be a window of time when we are executing in the host 874 * kernel context with a value of KGSBASE from the guest. This is ok 875 * because the value of KGSBASE is inconsequential in kernel context. 876 * 877 * MSR_EFER is saved and restored in the guest VMCS area on a 878 * VM exit and entry respectively. It is also restored from the 879 * host VMCS area on a VM exit. 880 * 881 * The TSC MSR is exposed read-only. Writes are disallowed as that 882 * will impact the host TSC. 883 * XXX Writes would be implemented with a wrmsr trap, and 884 * then modifying the TSC offset in the VMCS. 885 */ 886 if (guest_msr_rw(vmx, MSR_GSBASE) || 887 guest_msr_rw(vmx, MSR_FSBASE) || 888 guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) || 889 guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) || 890 guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) || 891 guest_msr_rw(vmx, MSR_KGSBASE) || 892 guest_msr_rw(vmx, MSR_EFER) || 893 guest_msr_ro(vmx, MSR_TSC)) 894 panic("vmx_vminit: error setting guest msr access"); 895 896 /* 897 * MSR_PAT is saved and restored in the guest VMCS are on a VM exit 898 * and entry respectively. It is also restored from the host VMCS 899 * area on a VM exit. However, if running on a system with no 900 * MSR_PAT save/restore support, leave access disabled so accesses 901 * will be trapped. 902 */ 903 if (!vmx_no_patmsr && guest_msr_rw(vmx, MSR_PAT)) 904 panic("vmx_vminit: error setting guest pat msr access"); 905 906 vpid_alloc(vpid, VM_MAXCPU); 907 908 if (virtual_interrupt_delivery) { 909 error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE, 910 APIC_ACCESS_ADDRESS); 911 /* XXX this should really return an error to the caller */ 912 KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error)); 913 } 914 915 for (i = 0; i < VM_MAXCPU; i++) { 916 vmcs = &vmx->vmcs[i]; 917 vmcs->identifier = vmx_revision(); 918 error = vmclear(vmcs); 919 if (error != 0) { 920 panic("vmx_vminit: vmclear error %d on vcpu %d\n", 921 error, i); 922 } 923 924 error = vmcs_init(vmcs); 925 KASSERT(error == 0, ("vmcs_init error %d", error)); 926 927 VMPTRLD(vmcs); 928 error = 0; 929 error += vmwrite(VMCS_HOST_RSP, (u_long)&vmx->ctx[i]); 930 error += vmwrite(VMCS_EPTP, vmx->eptp); 931 error += vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls); 932 error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls); 933 error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2); 934 error += vmwrite(VMCS_EXIT_CTLS, exit_ctls); 935 error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls); 936 error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap)); 937 error += vmwrite(VMCS_VPID, vpid[i]); 938 if (virtual_interrupt_delivery) { 939 error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); 940 error += vmwrite(VMCS_VIRTUAL_APIC, 941 vtophys(&vmx->apic_page[i])); 942 error += vmwrite(VMCS_EOI_EXIT0, 0); 943 error += vmwrite(VMCS_EOI_EXIT1, 0); 944 error += vmwrite(VMCS_EOI_EXIT2, 0); 945 error += vmwrite(VMCS_EOI_EXIT3, 0); 946 } 947 if (posted_interrupts) { 948 error += vmwrite(VMCS_PIR_VECTOR, pirvec); 949 error += vmwrite(VMCS_PIR_DESC, 950 vtophys(&vmx->pir_desc[i])); 951 } 952 VMCLEAR(vmcs); 953 KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs")); 954 955 vmx->cap[i].set = 0; 956 vmx->cap[i].proc_ctls = procbased_ctls; 957 vmx->cap[i].proc_ctls2 = procbased_ctls2; 958 959 vmx->state[i].lastcpu = -1; 960 vmx->state[i].vpid = vpid[i]; 961 962 msr_save_area_init(vmx->guest_msrs[i], &guest_msr_count); 963 964 error = vmcs_set_msr_save(vmcs, vtophys(vmx->guest_msrs[i]), 965 guest_msr_count); 966 if (error != 0) 967 panic("vmcs_set_msr_save error %d", error); 968 969 /* 970 * Set up the CR0/4 shadows, and init the read shadow 971 * to the power-on register value from the Intel Sys Arch. 972 * CR0 - 0x60000010 973 * CR4 - 0 974 */ 975 error = vmx_setup_cr0_shadow(vmcs, 0x60000010); 976 if (error != 0) 977 panic("vmx_setup_cr0_shadow %d", error); 978 979 error = vmx_setup_cr4_shadow(vmcs, 0); 980 if (error != 0) 981 panic("vmx_setup_cr4_shadow %d", error); 982 983 vmx->ctx[i].pmap = pmap; 984 } 985 986 return (vmx); 987} 988 989static int 990vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx) 991{ 992 int handled, func; 993 994 func = vmxctx->guest_rax; 995 996 handled = x86_emulate_cpuid(vm, vcpu, 997 (uint32_t*)(&vmxctx->guest_rax), 998 (uint32_t*)(&vmxctx->guest_rbx), 999 (uint32_t*)(&vmxctx->guest_rcx), 1000 (uint32_t*)(&vmxctx->guest_rdx)); 1001 return (handled); 1002} 1003 1004static __inline void 1005vmx_run_trace(struct vmx *vmx, int vcpu) 1006{ 1007#ifdef KTR 1008 VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %#lx", vmcs_guest_rip()); 1009#endif 1010} 1011 1012static __inline void 1013vmx_exit_trace(struct vmx *vmx, int vcpu, uint64_t rip, uint32_t exit_reason, 1014 int handled) 1015{ 1016#ifdef KTR 1017 VCPU_CTR3(vmx->vm, vcpu, "%s %s vmexit at 0x%0lx", 1018 handled ? "handled" : "unhandled", 1019 exit_reason_to_str(exit_reason), rip); 1020#endif 1021} 1022 1023static __inline void 1024vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip) 1025{ 1026#ifdef KTR 1027 VCPU_CTR1(vmx->vm, vcpu, "astpending vmexit at 0x%0lx", rip); 1028#endif 1029} 1030 1031static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved"); 1032 1033static void 1034vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) 1035{ 1036 struct vmxstate *vmxstate; 1037 struct invvpid_desc invvpid_desc; 1038 1039 vmxstate = &vmx->state[vcpu]; 1040 if (vmxstate->lastcpu == curcpu) 1041 return; 1042 1043 vmxstate->lastcpu = curcpu; 1044 1045 vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1); 1046 1047 vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase()); 1048 vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase()); 1049 vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase()); 1050 1051 /* 1052 * If we are using VPIDs then invalidate all mappings tagged with 'vpid' 1053 * 1054 * We do this because this vcpu was executing on a different host 1055 * cpu when it last ran. We do not track whether it invalidated 1056 * mappings associated with its 'vpid' during that run. So we must 1057 * assume that the mappings associated with 'vpid' on 'curcpu' are 1058 * stale and invalidate them. 1059 * 1060 * Note that we incur this penalty only when the scheduler chooses to 1061 * move the thread associated with this vcpu between host cpus. 1062 * 1063 * Note also that this will invalidate mappings tagged with 'vpid' 1064 * for "all" EP4TAs. 1065 */ 1066 if (vmxstate->vpid != 0) { 1067 if (pmap->pm_eptgen == vmx->eptgen[curcpu]) { 1068 invvpid_desc._res1 = 0; 1069 invvpid_desc._res2 = 0; 1070 invvpid_desc.vpid = vmxstate->vpid; 1071 invvpid_desc.linear_addr = 0; 1072 invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc); 1073 } else { 1074 /* 1075 * The invvpid can be skipped if an invept is going to 1076 * be performed before entering the guest. The invept 1077 * will invalidate combined mappings tagged with 1078 * 'vmx->eptp' for all vpids. 1079 */ 1080 vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_SAVED, 1); 1081 } 1082 } 1083} 1084 1085/* 1086 * We depend on 'procbased_ctls' to have the Interrupt Window Exiting bit set. 1087 */ 1088CTASSERT((PROCBASED_CTLS_ONE_SETTING & PROCBASED_INT_WINDOW_EXITING) != 0); 1089 1090static void __inline 1091vmx_set_int_window_exiting(struct vmx *vmx, int vcpu) 1092{ 1093 1094 if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) == 0) { 1095 vmx->cap[vcpu].proc_ctls |= PROCBASED_INT_WINDOW_EXITING; 1096 vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1097 VCPU_CTR0(vmx->vm, vcpu, "Enabling interrupt window exiting"); 1098 } 1099} 1100 1101static void __inline 1102vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu) 1103{ 1104 1105 KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0, 1106 ("intr_window_exiting not set: %#x", vmx->cap[vcpu].proc_ctls)); 1107 vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING; 1108 vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1109 VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting"); 1110} 1111 1112static void __inline 1113vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu) 1114{ 1115 1116 if ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) == 0) { 1117 vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING; 1118 vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1119 VCPU_CTR0(vmx->vm, vcpu, "Enabling NMI window exiting"); 1120 } 1121} 1122 1123static void __inline 1124vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu) 1125{ 1126 1127 KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0, 1128 ("nmi_window_exiting not set %#x", vmx->cap[vcpu].proc_ctls)); 1129 vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING; 1130 vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1131 VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting"); 1132} 1133 1134#define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \ 1135 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) 1136#define HWINTR_BLOCKING (VMCS_INTERRUPTIBILITY_STI_BLOCKING | \ 1137 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) 1138 1139static void 1140vmx_inject_nmi(struct vmx *vmx, int vcpu) 1141{ 1142 uint32_t gi, info; 1143 1144 gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 1145 KASSERT((gi & NMI_BLOCKING) == 0, ("vmx_inject_nmi: invalid guest " 1146 "interruptibility-state %#x", gi)); 1147 1148 info = vmcs_read(VMCS_ENTRY_INTR_INFO); 1149 KASSERT((info & VMCS_INTR_VALID) == 0, ("vmx_inject_nmi: invalid " 1150 "VM-entry interruption information %#x", info)); 1151 1152 /* 1153 * Inject the virtual NMI. The vector must be the NMI IDT entry 1154 * or the VMCS entry check will fail. 1155 */ 1156 info = IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID; 1157 vmcs_write(VMCS_ENTRY_INTR_INFO, info); 1158 1159 VCPU_CTR0(vmx->vm, vcpu, "Injecting vNMI"); 1160 1161 /* Clear the request */ 1162 vm_nmi_clear(vmx->vm, vcpu); 1163} 1164 1165static void 1166vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic) 1167{ 1168 struct vm_exception exc; 1169 int vector, need_nmi_exiting, extint_pending; 1170 uint64_t rflags; 1171 uint32_t gi, info; 1172 1173 if (vm_exception_pending(vmx->vm, vcpu, &exc)) { 1174 KASSERT(exc.vector >= 0 && exc.vector < 32, 1175 ("%s: invalid exception vector %d", __func__, exc.vector)); 1176 1177 info = vmcs_read(VMCS_ENTRY_INTR_INFO); 1178 KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject " 1179 "pending exception %d: %#x", __func__, exc.vector, info)); 1180 1181 info = exc.vector | VMCS_INTR_T_HWEXCEPTION | VMCS_INTR_VALID; 1182 if (exc.error_code_valid) { 1183 info |= VMCS_INTR_DEL_ERRCODE; 1184 vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, exc.error_code); 1185 } 1186 vmcs_write(VMCS_ENTRY_INTR_INFO, info); 1187 } 1188 1189 if (vm_nmi_pending(vmx->vm, vcpu)) { 1190 /* 1191 * If there are no conditions blocking NMI injection then 1192 * inject it directly here otherwise enable "NMI window 1193 * exiting" to inject it as soon as we can. 1194 * 1195 * We also check for STI_BLOCKING because some implementations 1196 * don't allow NMI injection in this case. If we are running 1197 * on a processor that doesn't have this restriction it will 1198 * immediately exit and the NMI will be injected in the 1199 * "NMI window exiting" handler. 1200 */ 1201 need_nmi_exiting = 1; 1202 gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 1203 if ((gi & (HWINTR_BLOCKING | NMI_BLOCKING)) == 0) { 1204 info = vmcs_read(VMCS_ENTRY_INTR_INFO); 1205 if ((info & VMCS_INTR_VALID) == 0) { 1206 vmx_inject_nmi(vmx, vcpu); 1207 need_nmi_exiting = 0; 1208 } else { 1209 VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI " 1210 "due to VM-entry intr info %#x", info); 1211 } 1212 } else { 1213 VCPU_CTR1(vmx->vm, vcpu, "Cannot inject NMI due to " 1214 "Guest Interruptibility-state %#x", gi); 1215 } 1216 1217 if (need_nmi_exiting) 1218 vmx_set_nmi_window_exiting(vmx, vcpu); 1219 } 1220 1221 extint_pending = vm_extint_pending(vmx->vm, vcpu); 1222 1223 if (!extint_pending && virtual_interrupt_delivery) { 1224 vmx_inject_pir(vlapic); 1225 return; 1226 } 1227 1228 /* 1229 * If interrupt-window exiting is already in effect then don't bother 1230 * checking for pending interrupts. This is just an optimization and 1231 * not needed for correctness. 1232 */ 1233 if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0) { 1234 VCPU_CTR0(vmx->vm, vcpu, "Skip interrupt injection due to " 1235 "pending int_window_exiting"); 1236 return; 1237 } 1238 1239 if (!extint_pending) { 1240 /* Ask the local apic for a vector to inject */ 1241 if (!vlapic_pending_intr(vlapic, &vector)) 1242 return; 1243 } else { 1244 /* Ask the legacy pic for a vector to inject */ 1245 vatpic_pending_intr(vmx->vm, &vector); 1246 } 1247 1248 KASSERT(vector >= 32 && vector <= 255, ("invalid vector %d", vector)); 1249 1250 /* Check RFLAGS.IF and the interruptibility state of the guest */ 1251 rflags = vmcs_read(VMCS_GUEST_RFLAGS); 1252 if ((rflags & PSL_I) == 0) { 1253 VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " 1254 "rflags %#lx", vector, rflags); 1255 goto cantinject; 1256 } 1257 1258 gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 1259 if (gi & HWINTR_BLOCKING) { 1260 VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " 1261 "Guest Interruptibility-state %#x", vector, gi); 1262 goto cantinject; 1263 } 1264 1265 info = vmcs_read(VMCS_ENTRY_INTR_INFO); 1266 if (info & VMCS_INTR_VALID) { 1267 /* 1268 * This is expected and could happen for multiple reasons: 1269 * - A vectoring VM-entry was aborted due to astpending 1270 * - A VM-exit happened during event injection. 1271 * - An exception was injected above. 1272 * - An NMI was injected above or after "NMI window exiting" 1273 */ 1274 VCPU_CTR2(vmx->vm, vcpu, "Cannot inject vector %d due to " 1275 "VM-entry intr info %#x", vector, info); 1276 goto cantinject; 1277 } 1278 1279 /* Inject the interrupt */ 1280 info = VMCS_INTR_T_HWINTR | VMCS_INTR_VALID; 1281 info |= vector; 1282 vmcs_write(VMCS_ENTRY_INTR_INFO, info); 1283 1284 if (!extint_pending) { 1285 /* Update the Local APIC ISR */ 1286 vlapic_intr_accepted(vlapic, vector); 1287 } else { 1288 vm_extint_clear(vmx->vm, vcpu); 1289 vatpic_intr_accepted(vmx->vm, vector); 1290 1291 /* 1292 * After we accepted the current ExtINT the PIC may 1293 * have posted another one. If that is the case, set 1294 * the Interrupt Window Exiting execution control so 1295 * we can inject that one too. 1296 */ 1297 if (vm_extint_pending(vmx->vm, vcpu)) 1298 vmx_set_int_window_exiting(vmx, vcpu); 1299 } 1300 1301 VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector); 1302 1303 return; 1304 1305cantinject: 1306 /* 1307 * Set the Interrupt Window Exiting execution control so we can inject 1308 * the interrupt as soon as blocking condition goes away. 1309 */ 1310 vmx_set_int_window_exiting(vmx, vcpu); 1311} 1312 1313/* 1314 * If the Virtual NMIs execution control is '1' then the logical processor 1315 * tracks virtual-NMI blocking in the Guest Interruptibility-state field of 1316 * the VMCS. An IRET instruction in VMX non-root operation will remove any 1317 * virtual-NMI blocking. 1318 * 1319 * This unblocking occurs even if the IRET causes a fault. In this case the 1320 * hypervisor needs to restore virtual-NMI blocking before resuming the guest. 1321 */ 1322static void 1323vmx_restore_nmi_blocking(struct vmx *vmx, int vcpuid) 1324{ 1325 uint32_t gi; 1326 1327 VCPU_CTR0(vmx->vm, vcpuid, "Restore Virtual-NMI blocking"); 1328 gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 1329 gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING; 1330 vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); 1331} 1332 1333static void 1334vmx_clear_nmi_blocking(struct vmx *vmx, int vcpuid) 1335{ 1336 uint32_t gi; 1337 1338 VCPU_CTR0(vmx->vm, vcpuid, "Clear Virtual-NMI blocking"); 1339 gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 1340 gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING; 1341 vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); 1342} 1343 1344static int 1345vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) 1346{ 1347 struct vmxctx *vmxctx; 1348 uint64_t xcrval; 1349 const struct xsave_limits *limits; 1350 1351 vmxctx = &vmx->ctx[vcpu]; 1352 limits = vmm_get_xsave_limits(); 1353 1354 /* 1355 * Note that the processor raises a GP# fault on its own if 1356 * xsetbv is executed for CPL != 0, so we do not have to 1357 * emulate that fault here. 1358 */ 1359 1360 /* Only xcr0 is supported. */ 1361 if (vmxctx->guest_rcx != 0) { 1362 vm_inject_gp(vmx->vm, vcpu); 1363 return (HANDLED); 1364 } 1365 1366 /* We only handle xcr0 if both the host and guest have XSAVE enabled. */ 1367 if (!limits->xsave_enabled || !(vmcs_read(VMCS_GUEST_CR4) & CR4_XSAVE)) { 1368 vm_inject_ud(vmx->vm, vcpu); 1369 return (HANDLED); 1370 } 1371 1372 xcrval = vmxctx->guest_rdx << 32 | (vmxctx->guest_rax & 0xffffffff); 1373 if ((xcrval & ~limits->xcr0_allowed) != 0) { 1374 vm_inject_gp(vmx->vm, vcpu); 1375 return (HANDLED); 1376 } 1377 1378 if (!(xcrval & XFEATURE_ENABLED_X87)) { 1379 vm_inject_gp(vmx->vm, vcpu); 1380 return (HANDLED); 1381 } 1382 1383 if ((xcrval & (XFEATURE_ENABLED_AVX | XFEATURE_ENABLED_SSE)) == 1384 XFEATURE_ENABLED_AVX) { 1385 vm_inject_gp(vmx->vm, vcpu); 1386 return (HANDLED); 1387 } 1388 1389 /* 1390 * This runs "inside" vmrun() with the guest's FPU state, so 1391 * modifying xcr0 directly modifies the guest's xcr0, not the 1392 * host's. 1393 */ 1394 load_xcr(0, xcrval); 1395 return (HANDLED); 1396} 1397 1398static int 1399vmx_emulate_cr_access(struct vmx *vmx, int vcpu, uint64_t exitqual) 1400{ 1401 int cr, vmcs_guest_cr, vmcs_shadow_cr; 1402 uint64_t crval, regval, ones_mask, zeros_mask; 1403 const struct vmxctx *vmxctx; 1404 1405 /* We only handle mov to %cr0 or %cr4 at this time */ 1406 if ((exitqual & 0xf0) != 0x00) 1407 return (UNHANDLED); 1408 1409 cr = exitqual & 0xf; 1410 if (cr != 0 && cr != 4) 1411 return (UNHANDLED); 1412 1413 regval = 0; /* silence gcc */ 1414 vmxctx = &vmx->ctx[vcpu]; 1415 1416 /* 1417 * We must use vmcs_write() directly here because vmcs_setreg() will 1418 * call vmclear(vmcs) as a side-effect which we certainly don't want. 1419 */ 1420 switch ((exitqual >> 8) & 0xf) { 1421 case 0: 1422 regval = vmxctx->guest_rax; 1423 break; 1424 case 1: 1425 regval = vmxctx->guest_rcx; 1426 break; 1427 case 2: 1428 regval = vmxctx->guest_rdx; 1429 break; 1430 case 3: 1431 regval = vmxctx->guest_rbx; 1432 break; 1433 case 4: 1434 regval = vmcs_read(VMCS_GUEST_RSP); 1435 break; 1436 case 5: 1437 regval = vmxctx->guest_rbp; 1438 break; 1439 case 6: 1440 regval = vmxctx->guest_rsi; 1441 break; 1442 case 7: 1443 regval = vmxctx->guest_rdi; 1444 break; 1445 case 8: 1446 regval = vmxctx->guest_r8; 1447 break; 1448 case 9: 1449 regval = vmxctx->guest_r9; 1450 break; 1451 case 10: 1452 regval = vmxctx->guest_r10; 1453 break; 1454 case 11: 1455 regval = vmxctx->guest_r11; 1456 break; 1457 case 12: 1458 regval = vmxctx->guest_r12; 1459 break; 1460 case 13: 1461 regval = vmxctx->guest_r13; 1462 break; 1463 case 14: 1464 regval = vmxctx->guest_r14; 1465 break; 1466 case 15: 1467 regval = vmxctx->guest_r15; 1468 break; 1469 } 1470 1471 if (cr == 0) { 1472 ones_mask = cr0_ones_mask; 1473 zeros_mask = cr0_zeros_mask; 1474 vmcs_guest_cr = VMCS_GUEST_CR0; 1475 vmcs_shadow_cr = VMCS_CR0_SHADOW; 1476 } else { 1477 ones_mask = cr4_ones_mask; 1478 zeros_mask = cr4_zeros_mask; 1479 vmcs_guest_cr = VMCS_GUEST_CR4; 1480 vmcs_shadow_cr = VMCS_CR4_SHADOW; 1481 } 1482 vmcs_write(vmcs_shadow_cr, regval); 1483 1484 crval = regval | ones_mask; 1485 crval &= ~zeros_mask; 1486 vmcs_write(vmcs_guest_cr, crval); 1487 1488 if (cr == 0 && regval & CR0_PG) { 1489 uint64_t efer, entry_ctls; 1490 1491 /* 1492 * If CR0.PG is 1 and EFER.LME is 1 then EFER.LMA and 1493 * the "IA-32e mode guest" bit in VM-entry control must be 1494 * equal. 1495 */ 1496 efer = vmcs_read(VMCS_GUEST_IA32_EFER); 1497 if (efer & EFER_LME) { 1498 efer |= EFER_LMA; 1499 vmcs_write(VMCS_GUEST_IA32_EFER, efer); 1500 entry_ctls = vmcs_read(VMCS_ENTRY_CTLS); 1501 entry_ctls |= VM_ENTRY_GUEST_LMA; 1502 vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); 1503 } 1504 } 1505 1506 return (HANDLED); 1507} 1508 1509/* 1510 * From section "Guest Register State" in the Intel SDM: CPL = SS.DPL 1511 */ 1512static int 1513vmx_cpl(void) 1514{ 1515 uint32_t ssar; 1516 1517 ssar = vmcs_read(VMCS_GUEST_SS_ACCESS_RIGHTS); 1518 return ((ssar >> 5) & 0x3); 1519} 1520 1521static enum vm_cpu_mode 1522vmx_cpu_mode(void) 1523{ 1524 1525 if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LMA) 1526 return (CPU_MODE_64BIT); 1527 else 1528 return (CPU_MODE_COMPATIBILITY); 1529} 1530 1531static enum vm_paging_mode 1532vmx_paging_mode(void) 1533{ 1534 1535 if (!(vmcs_read(VMCS_GUEST_CR0) & CR0_PG)) 1536 return (PAGING_MODE_FLAT); 1537 if (!(vmcs_read(VMCS_GUEST_CR4) & CR4_PAE)) 1538 return (PAGING_MODE_32); 1539 if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LME) 1540 return (PAGING_MODE_64); 1541 else 1542 return (PAGING_MODE_PAE); 1543} 1544 1545static uint64_t 1546inout_str_index(struct vmx *vmx, int vcpuid, int in) 1547{ 1548 uint64_t val; 1549 int error; 1550 enum vm_reg_name reg; 1551 1552 reg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI; 1553 error = vmx_getreg(vmx, vcpuid, reg, &val); 1554 KASSERT(error == 0, ("%s: vmx_getreg error %d", __func__, error)); 1555 return (val); 1556} 1557 1558static uint64_t 1559inout_str_count(struct vmx *vmx, int vcpuid, int rep) 1560{ 1561 uint64_t val; 1562 int error; 1563 1564 if (rep) { 1565 error = vmx_getreg(vmx, vcpuid, VM_REG_GUEST_RCX, &val); 1566 KASSERT(!error, ("%s: vmx_getreg error %d", __func__, error)); 1567 } else { 1568 val = 1; 1569 } 1570 return (val); 1571} 1572 1573static int 1574inout_str_addrsize(uint32_t inst_info) 1575{ 1576 uint32_t size; 1577 1578 size = (inst_info >> 7) & 0x7; 1579 switch (size) { 1580 case 0: 1581 return (2); /* 16 bit */ 1582 case 1: 1583 return (4); /* 32 bit */ 1584 case 2: 1585 return (8); /* 64 bit */ 1586 default: 1587 panic("%s: invalid size encoding %d", __func__, size); 1588 } 1589} 1590 1591static void 1592inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in, 1593 struct vm_inout_str *vis) 1594{ 1595 int error, s; 1596 1597 if (in) { 1598 vis->seg_name = VM_REG_GUEST_ES; 1599 } else { 1600 s = (inst_info >> 15) & 0x7; 1601 vis->seg_name = vm_segment_name(s); 1602 } 1603 1604 error = vmx_getdesc(vmx, vcpuid, vis->seg_name, &vis->seg_desc); 1605 KASSERT(error == 0, ("%s: vmx_getdesc error %d", __func__, error)); 1606 1607 /* XXX modify svm.c to update bit 16 of seg_desc.access (unusable) */ 1608} 1609 1610static void 1611vmx_paging_info(struct vm_guest_paging *paging) 1612{ 1613 paging->cr3 = vmcs_guest_cr3(); 1614 paging->cpl = vmx_cpl(); 1615 paging->cpu_mode = vmx_cpu_mode(); 1616 paging->paging_mode = vmx_paging_mode(); 1617} 1618 1619static void 1620vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla) 1621{ 1622 vmexit->exitcode = VM_EXITCODE_INST_EMUL; 1623 vmexit->u.inst_emul.gpa = gpa; 1624 vmexit->u.inst_emul.gla = gla; 1625 vmx_paging_info(&vmexit->u.inst_emul.paging); 1626} 1627 1628static int 1629ept_fault_type(uint64_t ept_qual) 1630{ 1631 int fault_type; 1632 1633 if (ept_qual & EPT_VIOLATION_DATA_WRITE) 1634 fault_type = VM_PROT_WRITE; 1635 else if (ept_qual & EPT_VIOLATION_INST_FETCH) 1636 fault_type = VM_PROT_EXECUTE; 1637 else 1638 fault_type= VM_PROT_READ; 1639 1640 return (fault_type); 1641} 1642 1643static boolean_t 1644ept_emulation_fault(uint64_t ept_qual) 1645{ 1646 int read, write; 1647 1648 /* EPT fault on an instruction fetch doesn't make sense here */ 1649 if (ept_qual & EPT_VIOLATION_INST_FETCH) 1650 return (FALSE); 1651 1652 /* EPT fault must be a read fault or a write fault */ 1653 read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; 1654 write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; 1655 if ((read | write) == 0) 1656 return (FALSE); 1657 1658 /* 1659 * The EPT violation must have been caused by accessing a 1660 * guest-physical address that is a translation of a guest-linear 1661 * address. 1662 */ 1663 if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || 1664 (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { 1665 return (FALSE); 1666 } 1667 1668 return (TRUE); 1669} 1670 1671static __inline int 1672apic_access_virtualization(struct vmx *vmx, int vcpuid) 1673{ 1674 uint32_t proc_ctls2; 1675 1676 proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; 1677 return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) ? 1 : 0); 1678} 1679 1680static __inline int 1681x2apic_virtualization(struct vmx *vmx, int vcpuid) 1682{ 1683 uint32_t proc_ctls2; 1684 1685 proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; 1686 return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE) ? 1 : 0); 1687} 1688 1689static int 1690vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, 1691 uint64_t qual) 1692{ 1693 int error, handled, offset; 1694 uint32_t *apic_regs, vector; 1695 bool retu; 1696 1697 handled = HANDLED; 1698 offset = APIC_WRITE_OFFSET(qual); 1699 1700 if (!apic_access_virtualization(vmx, vcpuid)) { 1701 /* 1702 * In general there should not be any APIC write VM-exits 1703 * unless APIC-access virtualization is enabled. 1704 * 1705 * However self-IPI virtualization can legitimately trigger 1706 * an APIC-write VM-exit so treat it specially. 1707 */ 1708 if (x2apic_virtualization(vmx, vcpuid) && 1709 offset == APIC_OFFSET_SELF_IPI) { 1710 apic_regs = (uint32_t *)(vlapic->apic_page); 1711 vector = apic_regs[APIC_OFFSET_SELF_IPI / 4]; 1712 vlapic_self_ipi_handler(vlapic, vector); 1713 return (HANDLED); 1714 } else 1715 return (UNHANDLED); 1716 } 1717 1718 switch (offset) { 1719 case APIC_OFFSET_ID: 1720 vlapic_id_write_handler(vlapic); 1721 break; 1722 case APIC_OFFSET_LDR: 1723 vlapic_ldr_write_handler(vlapic); 1724 break; 1725 case APIC_OFFSET_DFR: 1726 vlapic_dfr_write_handler(vlapic); 1727 break; 1728 case APIC_OFFSET_SVR: 1729 vlapic_svr_write_handler(vlapic); 1730 break; 1731 case APIC_OFFSET_ESR: 1732 vlapic_esr_write_handler(vlapic); 1733 break; 1734 case APIC_OFFSET_ICR_LOW: 1735 retu = false; 1736 error = vlapic_icrlo_write_handler(vlapic, &retu); 1737 if (error != 0 || retu) 1738 handled = UNHANDLED; 1739 break; 1740 case APIC_OFFSET_CMCI_LVT: 1741 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 1742 vlapic_lvt_write_handler(vlapic, offset); 1743 break; 1744 case APIC_OFFSET_TIMER_ICR: 1745 vlapic_icrtmr_write_handler(vlapic); 1746 break; 1747 case APIC_OFFSET_TIMER_DCR: 1748 vlapic_dcr_write_handler(vlapic); 1749 break; 1750 default: 1751 handled = UNHANDLED; 1752 break; 1753 } 1754 return (handled); 1755} 1756 1757static bool 1758apic_access_fault(struct vmx *vmx, int vcpuid, uint64_t gpa) 1759{ 1760 1761 if (apic_access_virtualization(vmx, vcpuid) && 1762 (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE)) 1763 return (true); 1764 else 1765 return (false); 1766} 1767 1768static int 1769vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) 1770{ 1771 uint64_t qual; 1772 int access_type, offset, allowed; 1773 1774 if (!apic_access_virtualization(vmx, vcpuid)) 1775 return (UNHANDLED); 1776 1777 qual = vmexit->u.vmx.exit_qualification; 1778 access_type = APIC_ACCESS_TYPE(qual); 1779 offset = APIC_ACCESS_OFFSET(qual); 1780 1781 allowed = 0; 1782 if (access_type == 0) { 1783 /* 1784 * Read data access to the following registers is expected. 1785 */ 1786 switch (offset) { 1787 case APIC_OFFSET_APR: 1788 case APIC_OFFSET_PPR: 1789 case APIC_OFFSET_RRR: 1790 case APIC_OFFSET_CMCI_LVT: 1791 case APIC_OFFSET_TIMER_CCR: 1792 allowed = 1; 1793 break; 1794 default: 1795 break; 1796 } 1797 } else if (access_type == 1) { 1798 /* 1799 * Write data access to the following registers is expected. 1800 */ 1801 switch (offset) { 1802 case APIC_OFFSET_VER: 1803 case APIC_OFFSET_APR: 1804 case APIC_OFFSET_PPR: 1805 case APIC_OFFSET_RRR: 1806 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 1807 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 1808 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 1809 case APIC_OFFSET_CMCI_LVT: 1810 case APIC_OFFSET_TIMER_CCR: 1811 allowed = 1; 1812 break; 1813 default: 1814 break; 1815 } 1816 } 1817 1818 if (allowed) { 1819 vmexit_inst_emul(vmexit, DEFAULT_APIC_BASE + offset, 1820 VIE_INVALID_GLA); 1821 } 1822 1823 /* 1824 * Regardless of whether the APIC-access is allowed this handler 1825 * always returns UNHANDLED: 1826 * - if the access is allowed then it is handled by emulating the 1827 * instruction that caused the VM-exit (outside the critical section) 1828 * - if the access is not allowed then it will be converted to an 1829 * exitcode of VM_EXITCODE_VMX and will be dealt with in userland. 1830 */ 1831 return (UNHANDLED); 1832} 1833 1834static int 1835vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) 1836{ 1837 int error, handled, in; 1838 struct vmxctx *vmxctx; 1839 struct vlapic *vlapic; 1840 struct vm_inout_str *vis; 1841 uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; 1842 uint32_t reason; 1843 uint64_t qual, gpa; 1844 bool retu; 1845 1846 CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0); 1847 CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_NMI_EXITING) != 0); 1848 1849 handled = UNHANDLED; 1850 vmxctx = &vmx->ctx[vcpu]; 1851 1852 qual = vmexit->u.vmx.exit_qualification; 1853 reason = vmexit->u.vmx.exit_reason; 1854 vmexit->exitcode = VM_EXITCODE_BOGUS; 1855 1856 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1); 1857 1858 /* 1859 * VM exits that could be triggered during event injection on the 1860 * previous VM entry need to be handled specially by re-injecting 1861 * the event. 1862 * 1863 * See "Information for VM Exits During Event Delivery" in Intel SDM 1864 * for details. 1865 */ 1866 switch (reason) { 1867 case EXIT_REASON_EPT_FAULT: 1868 case EXIT_REASON_EPT_MISCONFIG: 1869 case EXIT_REASON_APIC_ACCESS: 1870 case EXIT_REASON_TASK_SWITCH: 1871 case EXIT_REASON_EXCEPTION: 1872 idtvec_info = vmcs_idt_vectoring_info(); 1873 if (idtvec_info & VMCS_IDT_VEC_VALID) { 1874 idtvec_info &= ~(1 << 12); /* clear undefined bit */ 1875 vmcs_write(VMCS_ENTRY_INTR_INFO, idtvec_info); 1876 if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { 1877 idtvec_err = vmcs_idt_vectoring_err(); 1878 vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, 1879 idtvec_err); 1880 } 1881 /* 1882 * If 'virtual NMIs' are being used and the VM-exit 1883 * happened while injecting an NMI during the previous 1884 * VM-entry, then clear "blocking by NMI" in the Guest 1885 * Interruptibility-state. 1886 */ 1887 if ((idtvec_info & VMCS_INTR_T_MASK) == 1888 VMCS_INTR_T_NMI) { 1889 vmx_clear_nmi_blocking(vmx, vcpu); 1890 } 1891 vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); 1892 } 1893 default: 1894 idtvec_info = 0; 1895 break; 1896 } 1897 1898 switch (reason) { 1899 case EXIT_REASON_CR_ACCESS: 1900 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1); 1901 handled = vmx_emulate_cr_access(vmx, vcpu, qual); 1902 break; 1903 case EXIT_REASON_RDMSR: 1904 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1); 1905 retu = false; 1906 ecx = vmxctx->guest_rcx; 1907 VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx); 1908 error = emulate_rdmsr(vmx->vm, vcpu, ecx, &retu); 1909 if (error) { 1910 vmexit->exitcode = VM_EXITCODE_RDMSR; 1911 vmexit->u.msr.code = ecx; 1912 } else if (!retu) { 1913 handled = HANDLED; 1914 } else { 1915 /* Return to userspace with a valid exitcode */ 1916 KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS, 1917 ("emulate_wrmsr retu with bogus exitcode")); 1918 } 1919 break; 1920 case EXIT_REASON_WRMSR: 1921 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1); 1922 retu = false; 1923 eax = vmxctx->guest_rax; 1924 ecx = vmxctx->guest_rcx; 1925 edx = vmxctx->guest_rdx; 1926 VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx", 1927 ecx, (uint64_t)edx << 32 | eax); 1928 error = emulate_wrmsr(vmx->vm, vcpu, ecx, 1929 (uint64_t)edx << 32 | eax, &retu); 1930 if (error) { 1931 vmexit->exitcode = VM_EXITCODE_WRMSR; 1932 vmexit->u.msr.code = ecx; 1933 vmexit->u.msr.wval = (uint64_t)edx << 32 | eax; 1934 } else if (!retu) { 1935 handled = HANDLED; 1936 } else { 1937 /* Return to userspace with a valid exitcode */ 1938 KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS, 1939 ("emulate_wrmsr retu with bogus exitcode")); 1940 } 1941 break; 1942 case EXIT_REASON_HLT: 1943 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1); 1944 vmexit->exitcode = VM_EXITCODE_HLT; 1945 vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS); 1946 break; 1947 case EXIT_REASON_MTF: 1948 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1); 1949 vmexit->exitcode = VM_EXITCODE_MTRAP; 1950 break; 1951 case EXIT_REASON_PAUSE: 1952 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1); 1953 vmexit->exitcode = VM_EXITCODE_PAUSE; 1954 break; 1955 case EXIT_REASON_INTR_WINDOW: 1956 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1); 1957 vmx_clear_int_window_exiting(vmx, vcpu); 1958 return (1); 1959 case EXIT_REASON_EXT_INTR: 1960 /* 1961 * External interrupts serve only to cause VM exits and allow 1962 * the host interrupt handler to run. 1963 * 1964 * If this external interrupt triggers a virtual interrupt 1965 * to a VM, then that state will be recorded by the 1966 * host interrupt handler in the VM's softc. We will inject 1967 * this virtual interrupt during the subsequent VM enter. 1968 */ 1969 intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); 1970 1971 /* 1972 * XXX: Ignore this exit if VMCS_INTR_VALID is not set. 1973 * This appears to be a bug in VMware Fusion? 1974 */ 1975 if (!(intr_info & VMCS_INTR_VALID)) 1976 return (1); 1977 KASSERT((intr_info & VMCS_INTR_VALID) != 0 && 1978 (intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_HWINTR, 1979 ("VM exit interruption info invalid: %#x", intr_info)); 1980 vmx_trigger_hostintr(intr_info & 0xff); 1981 1982 /* 1983 * This is special. We want to treat this as an 'handled' 1984 * VM-exit but not increment the instruction pointer. 1985 */ 1986 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1); 1987 return (1); 1988 case EXIT_REASON_NMI_WINDOW: 1989 /* Exit to allow the pending virtual NMI to be injected */ 1990 if (vm_nmi_pending(vmx->vm, vcpu)) 1991 vmx_inject_nmi(vmx, vcpu); 1992 vmx_clear_nmi_window_exiting(vmx, vcpu); 1993 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NMI_WINDOW, 1); 1994 return (1); 1995 case EXIT_REASON_INOUT: 1996 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1); 1997 vmexit->exitcode = VM_EXITCODE_INOUT; 1998 vmexit->u.inout.bytes = (qual & 0x7) + 1; 1999 vmexit->u.inout.in = in = (qual & 0x8) ? 1 : 0; 2000 vmexit->u.inout.string = (qual & 0x10) ? 1 : 0; 2001 vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0; 2002 vmexit->u.inout.port = (uint16_t)(qual >> 16); 2003 vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax); 2004 if (vmexit->u.inout.string) { 2005 inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO); 2006 vmexit->exitcode = VM_EXITCODE_INOUT_STR; 2007 vis = &vmexit->u.inout_str; 2008 vmx_paging_info(&vis->paging); 2009 vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS); 2010 vis->cr0 = vmcs_read(VMCS_GUEST_CR0); 2011 vis->index = inout_str_index(vmx, vcpu, in); 2012 vis->count = inout_str_count(vmx, vcpu, vis->inout.rep); 2013 vis->addrsize = inout_str_addrsize(inst_info); 2014 inout_str_seginfo(vmx, vcpu, inst_info, in, vis); 2015 } 2016 break; 2017 case EXIT_REASON_CPUID: 2018 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1); 2019 handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx); 2020 break; 2021 case EXIT_REASON_EXCEPTION: 2022 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXCEPTION, 1); 2023 intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); 2024 KASSERT((intr_info & VMCS_INTR_VALID) != 0, 2025 ("VM exit interruption info invalid: %#x", intr_info)); 2026 2027 /* 2028 * If Virtual NMIs control is 1 and the VM-exit is due to a 2029 * fault encountered during the execution of IRET then we must 2030 * restore the state of "virtual-NMI blocking" before resuming 2031 * the guest. 2032 * 2033 * See "Resuming Guest Software after Handling an Exception". 2034 */ 2035 if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && 2036 (intr_info & 0xff) != IDT_DF && 2037 (intr_info & EXIT_QUAL_NMIUDTI) != 0) 2038 vmx_restore_nmi_blocking(vmx, vcpu); 2039 2040 /* 2041 * The NMI has already been handled in vmx_exit_handle_nmi(). 2042 */ 2043 if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) 2044 return (1); 2045 break; 2046 case EXIT_REASON_EPT_FAULT: 2047 /* 2048 * If 'gpa' lies within the address space allocated to 2049 * memory then this must be a nested page fault otherwise 2050 * this must be an instruction that accesses MMIO space. 2051 */ 2052 gpa = vmcs_gpa(); 2053 if (vm_mem_allocated(vmx->vm, gpa) || 2054 apic_access_fault(vmx, vcpu, gpa)) { 2055 vmexit->exitcode = VM_EXITCODE_PAGING; 2056 vmexit->u.paging.gpa = gpa; 2057 vmexit->u.paging.fault_type = ept_fault_type(qual); 2058 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1); 2059 } else if (ept_emulation_fault(qual)) { 2060 vmexit_inst_emul(vmexit, gpa, vmcs_gla()); 2061 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1); 2062 } 2063 /* 2064 * If Virtual NMIs control is 1 and the VM-exit is due to an 2065 * EPT fault during the execution of IRET then we must restore 2066 * the state of "virtual-NMI blocking" before resuming. 2067 * 2068 * See description of "NMI unblocking due to IRET" in 2069 * "Exit Qualification for EPT Violations". 2070 */ 2071 if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && 2072 (qual & EXIT_QUAL_NMIUDTI) != 0) 2073 vmx_restore_nmi_blocking(vmx, vcpu); 2074 break; 2075 case EXIT_REASON_VIRTUALIZED_EOI: 2076 vmexit->exitcode = VM_EXITCODE_IOAPIC_EOI; 2077 vmexit->u.ioapic_eoi.vector = qual & 0xFF; 2078 vmexit->inst_length = 0; /* trap-like */ 2079 break; 2080 case EXIT_REASON_APIC_ACCESS: 2081 handled = vmx_handle_apic_access(vmx, vcpu, vmexit); 2082 break; 2083 case EXIT_REASON_APIC_WRITE: 2084 /* 2085 * APIC-write VM exit is trap-like so the %rip is already 2086 * pointing to the next instruction. 2087 */ 2088 vmexit->inst_length = 0; 2089 vlapic = vm_lapic(vmx->vm, vcpu); 2090 handled = vmx_handle_apic_write(vmx, vcpu, vlapic, qual); 2091 break; 2092 case EXIT_REASON_XSETBV: 2093 handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit); 2094 break; 2095 default: 2096 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1); 2097 break; 2098 } 2099 2100 if (handled) { 2101 /* 2102 * It is possible that control is returned to userland 2103 * even though we were able to handle the VM exit in the 2104 * kernel. 2105 * 2106 * In such a case we want to make sure that the userland 2107 * restarts guest execution at the instruction *after* 2108 * the one we just processed. Therefore we update the 2109 * guest rip in the VMCS and in 'vmexit'. 2110 */ 2111 vmexit->rip += vmexit->inst_length; 2112 vmexit->inst_length = 0; 2113 vmcs_write(VMCS_GUEST_RIP, vmexit->rip); 2114 } else { 2115 if (vmexit->exitcode == VM_EXITCODE_BOGUS) { 2116 /* 2117 * If this VM exit was not claimed by anybody then 2118 * treat it as a generic VMX exit. 2119 */ 2120 vmexit->exitcode = VM_EXITCODE_VMX; 2121 vmexit->u.vmx.status = VM_SUCCESS; 2122 vmexit->u.vmx.inst_type = 0; 2123 vmexit->u.vmx.inst_error = 0; 2124 } else { 2125 /* 2126 * The exitcode and collateral have been populated. 2127 * The VM exit will be processed further in userland. 2128 */ 2129 } 2130 } 2131 return (handled); 2132} 2133 2134static __inline int 2135vmx_exit_astpending(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) 2136{ 2137 2138 vmexit->rip = vmcs_guest_rip(); 2139 vmexit->inst_length = 0; 2140 vmexit->exitcode = VM_EXITCODE_BOGUS; 2141 vmx_astpending_trace(vmx, vcpu, vmexit->rip); 2142 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_ASTPENDING, 1); 2143 2144 return (HANDLED); 2145} 2146 2147static __inline int 2148vmx_exit_rendezvous(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) 2149{ 2150 2151 vmexit->rip = vmcs_guest_rip(); 2152 vmexit->inst_length = 0; 2153 vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; 2154 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RENDEZVOUS, 1); 2155 2156 return (UNHANDLED); 2157} 2158 2159static __inline int 2160vmx_exit_inst_error(struct vmxctx *vmxctx, int rc, struct vm_exit *vmexit) 2161{ 2162 2163 KASSERT(vmxctx->inst_fail_status != VM_SUCCESS, 2164 ("vmx_exit_inst_error: invalid inst_fail_status %d", 2165 vmxctx->inst_fail_status)); 2166 2167 vmexit->inst_length = 0; 2168 vmexit->exitcode = VM_EXITCODE_VMX; 2169 vmexit->u.vmx.status = vmxctx->inst_fail_status; 2170 vmexit->u.vmx.inst_error = vmcs_instruction_error(); 2171 vmexit->u.vmx.exit_reason = ~0; 2172 vmexit->u.vmx.exit_qualification = ~0; 2173 2174 switch (rc) { 2175 case VMX_VMRESUME_ERROR: 2176 case VMX_VMLAUNCH_ERROR: 2177 case VMX_INVEPT_ERROR: 2178 vmexit->u.vmx.inst_type = rc; 2179 break; 2180 default: 2181 panic("vm_exit_inst_error: vmx_enter_guest returned %d", rc); 2182 } 2183 2184 return (UNHANDLED); 2185} 2186 2187/* 2188 * If the NMI-exiting VM execution control is set to '1' then an NMI in 2189 * non-root operation causes a VM-exit. NMI blocking is in effect so it is 2190 * sufficient to simply vector to the NMI handler via a software interrupt. 2191 * However, this must be done before maskable interrupts are enabled 2192 * otherwise the "iret" issued by an interrupt handler will incorrectly 2193 * clear NMI blocking. 2194 */ 2195static __inline void 2196vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) 2197{ 2198 uint32_t intr_info; 2199 2200 KASSERT((read_rflags() & PSL_I) == 0, ("interrupts enabled")); 2201 2202 if (vmexit->u.vmx.exit_reason != EXIT_REASON_EXCEPTION) 2203 return; 2204 2205 intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); 2206 KASSERT((intr_info & VMCS_INTR_VALID) != 0, 2207 ("VM exit interruption info invalid: %#x", intr_info)); 2208 2209 if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) { 2210 KASSERT((intr_info & 0xff) == IDT_NMI, ("VM exit due " 2211 "to NMI has invalid vector: %#x", intr_info)); 2212 VCPU_CTR0(vmx->vm, vcpuid, "Vectoring to NMI handler"); 2213 __asm __volatile("int $2"); 2214 } 2215} 2216 2217static int 2218vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap, 2219 void *rendezvous_cookie, void *suspend_cookie) 2220{ 2221 int rc, handled, launched; 2222 struct vmx *vmx; 2223 struct vm *vm; 2224 struct vmxctx *vmxctx; 2225 struct vmcs *vmcs; 2226 struct vm_exit *vmexit; 2227 struct vlapic *vlapic; 2228 uint64_t rip; 2229 uint32_t exit_reason; 2230 2231 vmx = arg; 2232 vm = vmx->vm; 2233 vmcs = &vmx->vmcs[vcpu]; 2234 vmxctx = &vmx->ctx[vcpu]; 2235 vlapic = vm_lapic(vm, vcpu); 2236 vmexit = vm_exitinfo(vm, vcpu); 2237 launched = 0; 2238 2239 KASSERT(vmxctx->pmap == pmap, 2240 ("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap)); 2241 2242 VMPTRLD(vmcs); 2243 2244 /* 2245 * XXX 2246 * We do this every time because we may setup the virtual machine 2247 * from a different process than the one that actually runs it. 2248 * 2249 * If the life of a virtual machine was spent entirely in the context 2250 * of a single process we could do this once in vmx_vminit(). 2251 */ 2252 vmcs_write(VMCS_HOST_CR3, rcr3()); 2253 2254 vmcs_write(VMCS_GUEST_RIP, startrip); 2255 vmx_set_pcpu_defaults(vmx, vcpu, pmap); 2256 do { 2257 /* 2258 * Interrupts are disabled from this point on until the 2259 * guest starts executing. This is done for the following 2260 * reasons: 2261 * 2262 * If an AST is asserted on this thread after the check below, 2263 * then the IPI_AST notification will not be lost, because it 2264 * will cause a VM exit due to external interrupt as soon as 2265 * the guest state is loaded. 2266 * 2267 * A posted interrupt after 'vmx_inject_interrupts()' will 2268 * not be "lost" because it will be held pending in the host 2269 * APIC because interrupts are disabled. The pending interrupt 2270 * will be recognized as soon as the guest state is loaded. 2271 * 2272 * The same reasoning applies to the IPI generated by 2273 * pmap_invalidate_ept(). 2274 */ 2275 disable_intr(); 2276 if (vcpu_suspended(suspend_cookie)) { 2277 enable_intr(); 2278 vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip()); 2279 handled = UNHANDLED; 2280 break; 2281 } 2282 2283 if (vcpu_rendezvous_pending(rendezvous_cookie)) { 2284 enable_intr(); 2285 handled = vmx_exit_rendezvous(vmx, vcpu, vmexit); 2286 break; 2287 } 2288 2289 if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) { 2290 enable_intr(); 2291 handled = vmx_exit_astpending(vmx, vcpu, vmexit); 2292 break; 2293 } 2294 2295 vmx_inject_interrupts(vmx, vcpu, vlapic); 2296 vmx_run_trace(vmx, vcpu); 2297 rc = vmx_enter_guest(vmxctx, vmx, launched); 2298 2299 /* Collect some information for VM exit processing */ 2300 vmexit->rip = rip = vmcs_guest_rip(); 2301 vmexit->inst_length = vmexit_instruction_length(); 2302 vmexit->u.vmx.exit_reason = exit_reason = vmcs_exit_reason(); 2303 vmexit->u.vmx.exit_qualification = vmcs_exit_qualification(); 2304 2305 if (rc == VMX_GUEST_VMEXIT) { 2306 vmx_exit_handle_nmi(vmx, vcpu, vmexit); 2307 enable_intr(); 2308 handled = vmx_exit_process(vmx, vcpu, vmexit); 2309 } else { 2310 enable_intr(); 2311 handled = vmx_exit_inst_error(vmxctx, rc, vmexit); 2312 } 2313 launched = 1; 2314 vmx_exit_trace(vmx, vcpu, rip, exit_reason, handled); 2315 } while (handled); 2316 2317 /* 2318 * If a VM exit has been handled then the exitcode must be BOGUS 2319 * If a VM exit is not handled then the exitcode must not be BOGUS 2320 */ 2321 if ((handled && vmexit->exitcode != VM_EXITCODE_BOGUS) || 2322 (!handled && vmexit->exitcode == VM_EXITCODE_BOGUS)) { 2323 panic("Mismatch between handled (%d) and exitcode (%d)", 2324 handled, vmexit->exitcode); 2325 } 2326 2327 if (!handled) 2328 vmm_stat_incr(vm, vcpu, VMEXIT_USERSPACE, 1); 2329 2330 VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d", 2331 vmexit->exitcode); 2332 2333 VMCLEAR(vmcs); 2334 return (0); 2335} 2336 2337static void 2338vmx_vmcleanup(void *arg) 2339{ 2340 int i; 2341 struct vmx *vmx = arg; 2342 2343 if (apic_access_virtualization(vmx, 0)) 2344 vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE); 2345 2346 for (i = 0; i < VM_MAXCPU; i++) 2347 vpid_free(vmx->state[i].vpid); 2348 2349 free(vmx, M_VMX); 2350 2351 return; 2352} 2353 2354static register_t * 2355vmxctx_regptr(struct vmxctx *vmxctx, int reg) 2356{ 2357 2358 switch (reg) { 2359 case VM_REG_GUEST_RAX: 2360 return (&vmxctx->guest_rax); 2361 case VM_REG_GUEST_RBX: 2362 return (&vmxctx->guest_rbx); 2363 case VM_REG_GUEST_RCX: 2364 return (&vmxctx->guest_rcx); 2365 case VM_REG_GUEST_RDX: 2366 return (&vmxctx->guest_rdx); 2367 case VM_REG_GUEST_RSI: 2368 return (&vmxctx->guest_rsi); 2369 case VM_REG_GUEST_RDI: 2370 return (&vmxctx->guest_rdi); 2371 case VM_REG_GUEST_RBP: 2372 return (&vmxctx->guest_rbp); 2373 case VM_REG_GUEST_R8: 2374 return (&vmxctx->guest_r8); 2375 case VM_REG_GUEST_R9: 2376 return (&vmxctx->guest_r9); 2377 case VM_REG_GUEST_R10: 2378 return (&vmxctx->guest_r10); 2379 case VM_REG_GUEST_R11: 2380 return (&vmxctx->guest_r11); 2381 case VM_REG_GUEST_R12: 2382 return (&vmxctx->guest_r12); 2383 case VM_REG_GUEST_R13: 2384 return (&vmxctx->guest_r13); 2385 case VM_REG_GUEST_R14: 2386 return (&vmxctx->guest_r14); 2387 case VM_REG_GUEST_R15: 2388 return (&vmxctx->guest_r15); 2389 case VM_REG_GUEST_CR2: 2390 return (&vmxctx->guest_cr2); 2391 default: 2392 break; 2393 } 2394 return (NULL); 2395} 2396 2397static int 2398vmxctx_getreg(struct vmxctx *vmxctx, int reg, uint64_t *retval) 2399{ 2400 register_t *regp; 2401 2402 if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) { 2403 *retval = *regp; 2404 return (0); 2405 } else 2406 return (EINVAL); 2407} 2408 2409static int 2410vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val) 2411{ 2412 register_t *regp; 2413 2414 if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) { 2415 *regp = val; 2416 return (0); 2417 } else 2418 return (EINVAL); 2419} 2420 2421static int 2422vmx_shadow_reg(int reg) 2423{ 2424 int shreg; 2425 2426 shreg = -1; 2427 2428 switch (reg) { 2429 case VM_REG_GUEST_CR0: 2430 shreg = VMCS_CR0_SHADOW; 2431 break; 2432 case VM_REG_GUEST_CR4: 2433 shreg = VMCS_CR4_SHADOW; 2434 break; 2435 default: 2436 break; 2437 } 2438 2439 return (shreg); 2440} 2441 2442static int 2443vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) 2444{ 2445 int running, hostcpu; 2446 struct vmx *vmx = arg; 2447 2448 running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 2449 if (running && hostcpu != curcpu) 2450 panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); 2451 2452 if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0) 2453 return (0); 2454 2455 return (vmcs_getreg(&vmx->vmcs[vcpu], running, reg, retval)); 2456} 2457 2458static int 2459vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) 2460{ 2461 int error, hostcpu, running, shadow; 2462 uint64_t ctls; 2463 struct vmx *vmx = arg; 2464 2465 running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 2466 if (running && hostcpu != curcpu) 2467 panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu); 2468 2469 if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0) 2470 return (0); 2471 2472 error = vmcs_setreg(&vmx->vmcs[vcpu], running, reg, val); 2473 2474 if (error == 0) { 2475 /* 2476 * If the "load EFER" VM-entry control is 1 then the 2477 * value of EFER.LMA must be identical to "IA-32e mode guest" 2478 * bit in the VM-entry control. 2479 */ 2480 if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 && 2481 (reg == VM_REG_GUEST_EFER)) { 2482 vmcs_getreg(&vmx->vmcs[vcpu], running, 2483 VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls); 2484 if (val & EFER_LMA) 2485 ctls |= VM_ENTRY_GUEST_LMA; 2486 else 2487 ctls &= ~VM_ENTRY_GUEST_LMA; 2488 vmcs_setreg(&vmx->vmcs[vcpu], running, 2489 VMCS_IDENT(VMCS_ENTRY_CTLS), ctls); 2490 } 2491 2492 shadow = vmx_shadow_reg(reg); 2493 if (shadow > 0) { 2494 /* 2495 * Store the unmodified value in the shadow 2496 */ 2497 error = vmcs_setreg(&vmx->vmcs[vcpu], running, 2498 VMCS_IDENT(shadow), val); 2499 } 2500 } 2501 2502 return (error); 2503} 2504 2505static int 2506vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) 2507{ 2508 int hostcpu, running; 2509 struct vmx *vmx = arg; 2510 2511 running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 2512 if (running && hostcpu != curcpu) 2513 panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu); 2514 2515 return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc)); 2516} 2517 2518static int 2519vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) 2520{ 2521 int hostcpu, running; 2522 struct vmx *vmx = arg; 2523 2524 running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 2525 if (running && hostcpu != curcpu) 2526 panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu); 2527 2528 return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc)); 2529} 2530 2531static int 2532vmx_getcap(void *arg, int vcpu, int type, int *retval) 2533{ 2534 struct vmx *vmx = arg; 2535 int vcap; 2536 int ret; 2537 2538 ret = ENOENT; 2539 2540 vcap = vmx->cap[vcpu].set; 2541 2542 switch (type) { 2543 case VM_CAP_HALT_EXIT: 2544 if (cap_halt_exit) 2545 ret = 0; 2546 break; 2547 case VM_CAP_PAUSE_EXIT: 2548 if (cap_pause_exit) 2549 ret = 0; 2550 break; 2551 case VM_CAP_MTRAP_EXIT: 2552 if (cap_monitor_trap) 2553 ret = 0; 2554 break; 2555 case VM_CAP_UNRESTRICTED_GUEST: 2556 if (cap_unrestricted_guest) 2557 ret = 0; 2558 break; 2559 case VM_CAP_ENABLE_INVPCID: 2560 if (cap_invpcid) 2561 ret = 0; 2562 break; 2563 default: 2564 break; 2565 } 2566 2567 if (ret == 0) 2568 *retval = (vcap & (1 << type)) ? 1 : 0; 2569 2570 return (ret); 2571} 2572 2573static int 2574vmx_setcap(void *arg, int vcpu, int type, int val) 2575{ 2576 struct vmx *vmx = arg; 2577 struct vmcs *vmcs = &vmx->vmcs[vcpu]; 2578 uint32_t baseval; 2579 uint32_t *pptr; 2580 int error; 2581 int flag; 2582 int reg; 2583 int retval; 2584 2585 retval = ENOENT; 2586 pptr = NULL; 2587 2588 switch (type) { 2589 case VM_CAP_HALT_EXIT: 2590 if (cap_halt_exit) { 2591 retval = 0; 2592 pptr = &vmx->cap[vcpu].proc_ctls; 2593 baseval = *pptr; 2594 flag = PROCBASED_HLT_EXITING; 2595 reg = VMCS_PRI_PROC_BASED_CTLS; 2596 } 2597 break; 2598 case VM_CAP_MTRAP_EXIT: 2599 if (cap_monitor_trap) { 2600 retval = 0; 2601 pptr = &vmx->cap[vcpu].proc_ctls; 2602 baseval = *pptr; 2603 flag = PROCBASED_MTF; 2604 reg = VMCS_PRI_PROC_BASED_CTLS; 2605 } 2606 break; 2607 case VM_CAP_PAUSE_EXIT: 2608 if (cap_pause_exit) { 2609 retval = 0; 2610 pptr = &vmx->cap[vcpu].proc_ctls; 2611 baseval = *pptr; 2612 flag = PROCBASED_PAUSE_EXITING; 2613 reg = VMCS_PRI_PROC_BASED_CTLS; 2614 } 2615 break; 2616 case VM_CAP_UNRESTRICTED_GUEST: 2617 if (cap_unrestricted_guest) { 2618 retval = 0; 2619 pptr = &vmx->cap[vcpu].proc_ctls2; 2620 baseval = *pptr; 2621 flag = PROCBASED2_UNRESTRICTED_GUEST; 2622 reg = VMCS_SEC_PROC_BASED_CTLS; 2623 } 2624 break; 2625 case VM_CAP_ENABLE_INVPCID: 2626 if (cap_invpcid) { 2627 retval = 0; 2628 pptr = &vmx->cap[vcpu].proc_ctls2; 2629 baseval = *pptr; 2630 flag = PROCBASED2_ENABLE_INVPCID; 2631 reg = VMCS_SEC_PROC_BASED_CTLS; 2632 } 2633 break; 2634 default: 2635 break; 2636 } 2637 2638 if (retval == 0) { 2639 if (val) { 2640 baseval |= flag; 2641 } else { 2642 baseval &= ~flag; 2643 } 2644 VMPTRLD(vmcs); 2645 error = vmwrite(reg, baseval); 2646 VMCLEAR(vmcs); 2647 2648 if (error) { 2649 retval = error; 2650 } else { 2651 /* 2652 * Update optional stored flags, and record 2653 * setting 2654 */ 2655 if (pptr != NULL) { 2656 *pptr = baseval; 2657 } 2658 2659 if (val) { 2660 vmx->cap[vcpu].set |= (1 << type); 2661 } else { 2662 vmx->cap[vcpu].set &= ~(1 << type); 2663 } 2664 } 2665 } 2666 2667 return (retval); 2668} 2669 2670struct vlapic_vtx { 2671 struct vlapic vlapic; 2672 struct pir_desc *pir_desc; 2673 struct vmx *vmx; 2674}; 2675 2676#define VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg) \ 2677do { \ 2678 VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d", \ 2679 level ? "level" : "edge", vector); \ 2680 VCPU_CTR1(vm, vcpuid, msg " pir0 0x%016lx", pir_desc->pir[0]); \ 2681 VCPU_CTR1(vm, vcpuid, msg " pir1 0x%016lx", pir_desc->pir[1]); \ 2682 VCPU_CTR1(vm, vcpuid, msg " pir2 0x%016lx", pir_desc->pir[2]); \ 2683 VCPU_CTR1(vm, vcpuid, msg " pir3 0x%016lx", pir_desc->pir[3]); \ 2684 VCPU_CTR1(vm, vcpuid, msg " notify: %s", notify ? "yes" : "no");\ 2685} while (0) 2686 2687/* 2688 * vlapic->ops handlers that utilize the APICv hardware assist described in 2689 * Chapter 29 of the Intel SDM. 2690 */ 2691static int 2692vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level) 2693{ 2694 struct vlapic_vtx *vlapic_vtx; 2695 struct pir_desc *pir_desc; 2696 uint64_t mask; 2697 int idx, notify; 2698 2699 vlapic_vtx = (struct vlapic_vtx *)vlapic; 2700 pir_desc = vlapic_vtx->pir_desc; 2701 2702 /* 2703 * Keep track of interrupt requests in the PIR descriptor. This is 2704 * because the virtual APIC page pointed to by the VMCS cannot be 2705 * modified if the vcpu is running. 2706 */ 2707 idx = vector / 64; 2708 mask = 1UL << (vector % 64); 2709 atomic_set_long(&pir_desc->pir[idx], mask); 2710 notify = atomic_cmpset_long(&pir_desc->pending, 0, 1); 2711 2712 VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector, 2713 level, "vmx_set_intr_ready"); 2714 return (notify); 2715} 2716 2717static int 2718vmx_pending_intr(struct vlapic *vlapic, int *vecptr) 2719{ 2720 struct vlapic_vtx *vlapic_vtx; 2721 struct pir_desc *pir_desc; 2722 struct LAPIC *lapic; 2723 uint64_t pending, pirval; 2724 uint32_t ppr, vpr; 2725 int i; 2726 2727 /* 2728 * This function is only expected to be called from the 'HLT' exit 2729 * handler which does not care about the vector that is pending. 2730 */ 2731 KASSERT(vecptr == NULL, ("vmx_pending_intr: vecptr must be NULL")); 2732 2733 vlapic_vtx = (struct vlapic_vtx *)vlapic; 2734 pir_desc = vlapic_vtx->pir_desc; 2735 2736 pending = atomic_load_acq_long(&pir_desc->pending); 2737 if (!pending) 2738 return (0); /* common case */ 2739 2740 /* 2741 * If there is an interrupt pending then it will be recognized only 2742 * if its priority is greater than the processor priority. 2743 * 2744 * Special case: if the processor priority is zero then any pending 2745 * interrupt will be recognized. 2746 */ 2747 lapic = vlapic->apic_page; 2748 ppr = lapic->ppr & 0xf0; 2749 if (ppr == 0) 2750 return (1); 2751 2752 VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d", 2753 lapic->ppr); 2754 2755 for (i = 3; i >= 0; i--) { 2756 pirval = pir_desc->pir[i]; 2757 if (pirval != 0) { 2758 vpr = (i * 64 + flsl(pirval) - 1) & 0xf0; 2759 return (vpr > ppr); 2760 } 2761 } 2762 return (0); 2763} 2764 2765static void 2766vmx_intr_accepted(struct vlapic *vlapic, int vector) 2767{ 2768 2769 panic("vmx_intr_accepted: not expected to be called"); 2770} 2771 2772static void 2773vmx_set_tmr(struct vlapic *vlapic, int vector, bool level) 2774{ 2775 struct vlapic_vtx *vlapic_vtx; 2776 struct vmx *vmx; 2777 struct vmcs *vmcs; 2778 uint64_t mask, val; 2779 2780 KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); 2781 KASSERT(!vcpu_is_running(vlapic->vm, vlapic->vcpuid, NULL), 2782 ("vmx_set_tmr: vcpu cannot be running")); 2783 2784 vlapic_vtx = (struct vlapic_vtx *)vlapic; 2785 vmx = vlapic_vtx->vmx; 2786 vmcs = &vmx->vmcs[vlapic->vcpuid]; 2787 mask = 1UL << (vector % 64); 2788 2789 VMPTRLD(vmcs); 2790 val = vmcs_read(VMCS_EOI_EXIT(vector)); 2791 if (level) 2792 val |= mask; 2793 else 2794 val &= ~mask; 2795 vmcs_write(VMCS_EOI_EXIT(vector), val); 2796 VMCLEAR(vmcs); 2797} 2798 2799static void 2800vmx_enable_x2apic_mode(struct vlapic *vlapic) 2801{ 2802 struct vmx *vmx; 2803 struct vmcs *vmcs; 2804 uint32_t proc_ctls2; 2805 int vcpuid, error; 2806 2807 vcpuid = vlapic->vcpuid; 2808 vmx = ((struct vlapic_vtx *)vlapic)->vmx; 2809 vmcs = &vmx->vmcs[vcpuid]; 2810 2811 proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; 2812 KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0, 2813 ("%s: invalid proc_ctls2 %#x", __func__, proc_ctls2)); 2814 2815 proc_ctls2 &= ~PROCBASED2_VIRTUALIZE_APIC_ACCESSES; 2816 proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE; 2817 vmx->cap[vcpuid].proc_ctls2 = proc_ctls2; 2818 2819 VMPTRLD(vmcs); 2820 vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2); 2821 VMCLEAR(vmcs); 2822 2823 if (vlapic->vcpuid == 0) { 2824 /* 2825 * The nested page table mappings are shared by all vcpus 2826 * so unmap the APIC access page just once. 2827 */ 2828 error = vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE); 2829 KASSERT(error == 0, ("%s: vm_unmap_mmio error %d", 2830 __func__, error)); 2831 2832 /* 2833 * The MSR bitmap is shared by all vcpus so modify it only 2834 * once in the context of vcpu 0. 2835 */ 2836 error = vmx_allow_x2apic_msrs(vmx); 2837 KASSERT(error == 0, ("%s: vmx_allow_x2apic_msrs error %d", 2838 __func__, error)); 2839 } 2840} 2841 2842static void 2843vmx_post_intr(struct vlapic *vlapic, int hostcpu) 2844{ 2845 2846 ipi_cpu(hostcpu, pirvec); 2847} 2848 2849/* 2850 * Transfer the pending interrupts in the PIR descriptor to the IRR 2851 * in the virtual APIC page. 2852 */ 2853static void 2854vmx_inject_pir(struct vlapic *vlapic) 2855{ 2856 struct vlapic_vtx *vlapic_vtx; 2857 struct pir_desc *pir_desc; 2858 struct LAPIC *lapic; 2859 uint64_t val, pirval; 2860 int rvi, pirbase = -1; 2861 uint16_t intr_status_old, intr_status_new; 2862 2863 vlapic_vtx = (struct vlapic_vtx *)vlapic; 2864 pir_desc = vlapic_vtx->pir_desc; 2865 if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) { 2866 VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: " 2867 "no posted interrupt pending"); 2868 return; 2869 } 2870 2871 pirval = 0; 2872 pirbase = -1; 2873 lapic = vlapic->apic_page; 2874 2875 val = atomic_readandclear_long(&pir_desc->pir[0]); 2876 if (val != 0) { 2877 lapic->irr0 |= val; 2878 lapic->irr1 |= val >> 32; 2879 pirbase = 0; 2880 pirval = val; 2881 } 2882 2883 val = atomic_readandclear_long(&pir_desc->pir[1]); 2884 if (val != 0) { 2885 lapic->irr2 |= val; 2886 lapic->irr3 |= val >> 32; 2887 pirbase = 64; 2888 pirval = val; 2889 } 2890 2891 val = atomic_readandclear_long(&pir_desc->pir[2]); 2892 if (val != 0) { 2893 lapic->irr4 |= val; 2894 lapic->irr5 |= val >> 32; 2895 pirbase = 128; 2896 pirval = val; 2897 } 2898 2899 val = atomic_readandclear_long(&pir_desc->pir[3]); 2900 if (val != 0) { 2901 lapic->irr6 |= val; 2902 lapic->irr7 |= val >> 32; 2903 pirbase = 192; 2904 pirval = val; 2905 } 2906 2907 VLAPIC_CTR_IRR(vlapic, "vmx_inject_pir"); 2908 2909 /* 2910 * Update RVI so the processor can evaluate pending virtual 2911 * interrupts on VM-entry. 2912 * 2913 * It is possible for pirval to be 0 here, even though the 2914 * pending bit has been set. The scenario is: 2915 * CPU-Y is sending a posted interrupt to CPU-X, which 2916 * is running a guest and processing posted interrupts in h/w. 2917 * CPU-X will eventually exit and the state seen in s/w is 2918 * the pending bit set, but no PIR bits set. 2919 * 2920 * CPU-X CPU-Y 2921 * (vm running) (host running) 2922 * rx posted interrupt 2923 * CLEAR pending bit 2924 * SET PIR bit 2925 * READ/CLEAR PIR bits 2926 * SET pending bit 2927 * (vm exit) 2928 * pending bit set, PIR 0 2929 */ 2930 if (pirval != 0) { 2931 rvi = pirbase + flsl(pirval) - 1; 2932 intr_status_old = vmcs_read(VMCS_GUEST_INTR_STATUS); 2933 intr_status_new = (intr_status_old & 0xFF00) | rvi; 2934 if (intr_status_new > intr_status_old) { 2935 vmcs_write(VMCS_GUEST_INTR_STATUS, intr_status_new); 2936 VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: " 2937 "guest_intr_status changed from 0x%04x to 0x%04x", 2938 intr_status_old, intr_status_new); 2939 } 2940 } 2941} 2942 2943static struct vlapic * 2944vmx_vlapic_init(void *arg, int vcpuid) 2945{ 2946 struct vmx *vmx; 2947 struct vlapic *vlapic; 2948 struct vlapic_vtx *vlapic_vtx; 2949 2950 vmx = arg; 2951 2952 vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO); 2953 vlapic->vm = vmx->vm; 2954 vlapic->vcpuid = vcpuid; 2955 vlapic->apic_page = (struct LAPIC *)&vmx->apic_page[vcpuid]; 2956 2957 vlapic_vtx = (struct vlapic_vtx *)vlapic; 2958 vlapic_vtx->pir_desc = &vmx->pir_desc[vcpuid]; 2959 vlapic_vtx->vmx = vmx; 2960 2961 if (virtual_interrupt_delivery) { 2962 vlapic->ops.set_intr_ready = vmx_set_intr_ready; 2963 vlapic->ops.pending_intr = vmx_pending_intr; 2964 vlapic->ops.intr_accepted = vmx_intr_accepted; 2965 vlapic->ops.set_tmr = vmx_set_tmr; 2966 vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode; 2967 } 2968 2969 if (posted_interrupts) 2970 vlapic->ops.post_intr = vmx_post_intr; 2971 2972 vlapic_init(vlapic); 2973 2974 return (vlapic); 2975} 2976 2977static void 2978vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic) 2979{ 2980 2981 vlapic_cleanup(vlapic); 2982 free(vlapic, M_VLAPIC); 2983} 2984 2985struct vmm_ops vmm_ops_intel = { 2986 vmx_init, 2987 vmx_cleanup, 2988 vmx_restore, 2989 vmx_vminit, 2990 vmx_run, 2991 vmx_vmcleanup, 2992 vmx_getreg, 2993 vmx_setreg, 2994 vmx_getdesc, 2995 vmx_setdesc, 2996 vmx_getcap, 2997 vmx_setcap, 2998 ept_vmspace_alloc, 2999 ept_vmspace_free, 3000 vmx_vlapic_init, 3001 vmx_vlapic_cleanup, 3002}; 3003