1/* 2 * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) 3 * 4 * SPDX-License-Identifier: GPL-2.0-only 5 */ 6 7#include <config.h> 8 9#ifdef CONFIG_VTX 10 11#include <types.h> 12#include <machine/io.h> 13#include <api/failures.h> 14#include <api/syscall.h> 15#include <kernel/thread.h> 16#include <object/objecttype.h> 17#include <arch/machine/cpu_registers.h> 18#include <arch/model/statedata.h> 19#include <arch/object/vcpu.h> 20#include <arch/object/ioport.h> 21#include <util.h> 22#include <sel4/arch/vmenter.h> 23 24#define VMX_EXIT_QUAL_TYPE_MOV_CR 0 25#define VMX_EXIT_QUAL_TYPE_CLTS 2 26#define VMX_EXIT_QUAL_TYPE_LMSW 3 27 28#define VMXON_REGION_SIZE 4096 29 30const vcpu_gp_register_t crExitRegs[] = { 31 VCPU_EAX, VCPU_ECX, VCPU_EDX, VCPU_EBX, VCPU_ESP, VCPU_EBP, VCPU_ESI, VCPU_EDI 32}; 33 34typedef struct msr_bitmap { 35 word_t bitmap[0x2000 / sizeof(word_t) / 8]; 36} msr_bitmap_t; 37 38typedef struct msr_bitmaps { 39 msr_bitmap_t low_msr_read; 40 msr_bitmap_t high_msr_read; 41 msr_bitmap_t low_msr_write; 42 msr_bitmap_t high_msr_write; 43} msr_bitmaps_t; 44 45static struct PACKED { 46 uint32_t revision; 47 char data[VMXON_REGION_SIZE - sizeof(uint32_t)]; 48} vmxon_region ALIGN(VMXON_REGION_SIZE); 49 50static msr_bitmaps_t msr_bitmap_region ALIGN(BIT(seL4_PageBits)); 51 52static char null_ept_space[seL4_PageBits] ALIGN(BIT(seL4_PageBits)); 53 54/* Cached value of the hardware defined vmcs revision */ 55static uint32_t vmcs_revision; 56 57/* Cached value of the VPID capability MSR */ 58static vmx_ept_vpid_cap_msr_t vpid_capability; 59 60/* Cache the values that we calculated for bits that need to be set high 61 * and low in various vmcs fields */ 62static uint32_t pin_control_high; 63static uint32_t pin_control_low; 64static uint32_t primary_control_high; 65static uint32_t primary_control_low; 66static uint32_t secondary_control_high; 67static uint32_t secondary_control_low; 68static uint32_t entry_control_high; 69static uint32_t entry_control_low; 70static uint32_t exit_control_high; 71static uint32_t exit_control_low; 72static uint32_t cr0_high; 73static uint32_t cr0_low; 74static uint32_t cr4_high; 75static uint32_t cr4_low; 76 77/* these flags indicate the presence of specific VT-x features. These features 78 * are checked for at boot time and are constant from then on */ 79static bool_t vmx_feature_vpid; 80static bool_t vmx_feature_load_perf_global_ctrl; 81static bool_t vmx_feature_ack_on_exit; 82 83static vcpu_t *x86KSVPIDTable[VPID_LAST + 1]; 84static vpid_t x86KSNextVPID = VPID_FIRST; 85 86static inline bool_t vmxon(paddr_t vmxon_region) 87{ 88 uint8_t error; 89 /* vmxon requires a 64bit memory address, so perform a 90 * cast here to guarantee this on 32-bit platforms */ 91 uint64_t vmxonreg = vmxon_region; 92 asm volatile( 93 "vmxon %1; setnae %0" 94 : "=q"(error) 95 : "m"(vmxonreg) 96 : "memory", "cc" 97 ); 98 return !!error; 99} 100 101static void vmclear(void *vmcs_ptr) 102{ 103 uint64_t physical_address; 104 physical_address = pptr_to_paddr((void *)vmcs_ptr); 105 asm volatile( 106 "vmclear %0" 107 : 108 : "m"(physical_address) 109 : "cc" 110 ); 111} 112 113void clearCurrentVCPU(void) 114{ 115 vcpu_t *vcpu = ARCH_NODE_STATE(x86KSCurrentVCPU); 116 if (vcpu) { 117 vmclear(vcpu); 118 vcpu->launched = false; 119 ARCH_NODE_STATE(x86KSCurrentVCPU) = NULL; 120 } 121} 122 123static void vmptrld(void *vmcs_ptr) 124{ 125 uint64_t physical_address; 126 uint8_t error; 127 physical_address = pptr_to_paddr(vmcs_ptr); 128 asm volatile( 129 "vmptrld %1; setna %0" 130 : "=q"(error) 131 : "m"(physical_address) 132 : "cc" 133 ); 134 /* The usage of vmptrld should be correct by construction. As there is no 135 * capacity to propogate errors where vmptrld is used we will do our best 136 * to detect bugs in debug builds by asserting */ 137 assert(!error); 138} 139 140static void switchVCPU(vcpu_t *vcpu) 141{ 142#ifdef ENABLE_SMP_SUPPORT 143 if (vcpu->last_cpu != getCurrentCPUIndex() && ARCH_NODE_STATE_ON_CORE(x86KSCurrentVCPU, vcpu->last_cpu) == vcpu) { 144 /* vcpu is currently loaded on another core, need to do vmclear on that core */ 145 doRemoteClearCurrentVCPU(vcpu->last_cpu); 146 } 147#endif 148 clearCurrentVCPU(); 149 vmptrld(vcpu); 150#ifdef ENABLE_SMP_SUPPORT 151 if (vcpu->last_cpu != getCurrentCPUIndex()) { 152 /* migrate host state */ 153 vmwrite(VMX_HOST_TR_BASE, (word_t)&x86KSGlobalState[CURRENT_CPU_INDEX()].x86KStss); 154 vmwrite(VMX_HOST_GDTR_BASE, (word_t)x86KSGlobalState[CURRENT_CPU_INDEX()].x86KSgdt); 155 vmwrite(VMX_HOST_IDTR_BASE, (word_t)x86KSGlobalState[CURRENT_CPU_INDEX()].x86KSidt); 156 vmwrite(VMX_HOST_SYSENTER_ESP, (uint64_t)(word_t)((char *)&x86KSGlobalState[CURRENT_CPU_INDEX()].x86KStss.tss.words[0] + 157 4)); 158 } 159 vcpu->last_cpu = getCurrentCPUIndex(); 160#endif 161 ARCH_NODE_STATE(x86KSCurrentVCPU) = vcpu; 162} 163 164static void print_bits(word_t bits) 165{ 166 bool_t first = true; 167 while (bits) { 168 int index = seL4_WordBits - 1 - clzl(bits); 169 if (first) { 170 printf("%d", index); 171 first = false; 172 } else { 173 printf(",%d", index); 174 } 175 bits &= ~BIT(index); 176 } 177} 178 179static bool_t check_fixed_value(word_t val, word_t low, word_t high) 180{ 181 word_t not_high; 182 word_t not_low; 183 /* check if any bits that should be high, are not 184 * high & val represents the set of bits that are 185 * correctly set to high. if this is equal to high 186 * then everything is good, to detect exactly which 187 * bits were not high we can invert and mask with 188 * high again. Now if this is 0 everythins is fine, 189 * and if not each bit set indicates a bit we had 190 * failed to set */ 191 not_high = high & ~(high & val); 192 if (not_high != 0) { 193 printf("Failed to set bits: "); 194 print_bits(not_high); 195 return false; 196 } 197 /* we can do the same thing for finding things 198 * that should be low by first inverting */ 199 not_low = ~low & ~(~low & ~val); 200 if (not_low != 0) { 201 printf("Incorrectly cleared bits: "); 202 print_bits(not_low); 203 return false; 204 } 205 return true; 206} 207 208static bool_t vtx_check_fixed_values(word_t cr0, word_t cr4) 209{ 210 if (!check_fixed_value(cr0, cr0_low, cr0_high)) { 211 printf(" of CR0\n"); 212 return false; 213 } 214 if (!check_fixed_value(cr4, cr4_low, cr4_high)) { 215 printf(" of CR4\n"); 216 return false; 217 } 218 return true; 219} 220 221static bool_t BOOT_CODE init_vtx_fixed_values(bool_t useTrueMsrs) 222{ 223 uint32_t pin_control_mask = 224 BIT(0) | //Extern interrupt exiting 225 BIT(3) | //NMI exiting 226 BIT(5); //virtual NMIs 227 uint32_t primary_control_mask = 228 BIT(25) | //Use I/O bitmaps 229 BIT(28) | //Use MSR bitmaps 230 BIT(31); //Activate secondary controls 231 uint32_t secondary_control_mask = 232 BIT(1); //Enable EPT 233 uint32_t exit_control_mask = 234 BIT(2) | //Save debug controls 235 BIT(18) | //Save guest IA32_PAT on exit 236 BIT(19) | //Load host IA32_PAT 237 BIT(20) | //Save guest IA32_EFER on exit 238 BIT(21); //Load host IA32_EFER 239#ifdef CONFIG_ARCH_X86_64 240 exit_control_mask |= BIT(9); //Host address-space size 241#endif 242 /* Read out the fixed high and low bits from the MSRs */ 243 uint32_t pinbased_ctls; 244 uint32_t procbased_ctls; 245 uint32_t exit_ctls; 246 uint32_t entry_ctls; 247 if (useTrueMsrs) { 248 pinbased_ctls = IA32_VMX_TRUE_PINBASED_CTLS_MSR; 249 procbased_ctls = IA32_VMX_TRUE_PROCBASED_CTLS_MSR; 250 exit_ctls = IA32_VMX_TRUE_EXIT_CTLS_MSR; 251 entry_ctls = IA32_VMX_TRUE_ENTRY_CTLS_MSR; 252 } else { 253 pinbased_ctls = IA32_VMX_PINBASED_CTLS_MSR; 254 procbased_ctls = IA32_VMX_PROCBASED_CTLS_MSR; 255 exit_ctls = IA32_VMX_EXIT_CTLS_MSR; 256 entry_ctls = IA32_VMX_ENTRY_CTLS_MSR; 257 } 258 pin_control_high = x86_rdmsr_low(pinbased_ctls); 259 pin_control_low = x86_rdmsr_high(pinbased_ctls); 260 primary_control_high = x86_rdmsr_low(procbased_ctls); 261 primary_control_low = x86_rdmsr_high(procbased_ctls); 262 secondary_control_high = x86_rdmsr_low(IA32_VMX_PROCBASED_CTLS2_MSR); 263 secondary_control_low = x86_rdmsr_high(IA32_VMX_PROCBASED_CTLS2_MSR); 264 exit_control_high = x86_rdmsr_low(exit_ctls); 265 exit_control_low = x86_rdmsr_high(exit_ctls); 266 entry_control_high = x86_rdmsr_low(entry_ctls); 267 entry_control_low = x86_rdmsr_high(entry_ctls); 268 269 cr0_high = x86_rdmsr_low(IA32_VMX_CR0_FIXED0_MSR); 270 cr0_low = x86_rdmsr_low(IA32_VMX_CR0_FIXED1_MSR); 271 cr4_high = x86_rdmsr_low(IA32_VMX_CR4_FIXED0_MSR); 272 cr4_low = x86_rdmsr_low(IA32_VMX_CR4_FIXED1_MSR); 273 274 /* Check for VPID support */ 275 if (!(secondary_control_low & BIT(5))) { 276 vmx_feature_vpid = 0; 277 printf("vt-x: VPIDs are not supported. Expect performance degredation\n"); 278 } else { 279 vmx_feature_vpid = 1; 280 secondary_control_mask |= BIT(5); 281 } 282 283 /* Check for load perf global control */ 284 if (!(exit_control_low & BIT(12))) { 285 vmx_feature_load_perf_global_ctrl = 0; 286 printf("vt-x: Load IA32_PERF_GLOBAL_CONTROL not supported. Hardware debugging may not work\n"); 287 } else { 288 vmx_feature_load_perf_global_ctrl = 1; 289 exit_control_mask |= BIT(12); 290 } 291 292 /* Check for external interrupt exiting */ 293 if (!(exit_control_low & BIT(15))) { 294 vmx_feature_ack_on_exit = 0; 295 printf("vt-x: Interrupt ack on exit not supported. Expect performance degredation\n"); 296 } else { 297 vmx_feature_ack_on_exit = 1; 298 exit_control_mask |= BIT(15); 299 } 300 301 /* See if the hardware requires bits that require to be high to be low */ 302 uint32_t missing; 303 missing = (~pin_control_low) & pin_control_mask; 304 if (missing) { 305 printf("vt-x: Unsupported pin control features %lx\n", (long)missing); 306 return false; 307 } 308 missing = (~primary_control_low) & primary_control_mask; 309 if (missing) { 310 printf("vt-x: Unsupported primary control features %lx\n", (long)missing); 311 return false; 312 } 313 missing = (~secondary_control_low) & secondary_control_mask; 314 if (missing) { 315 printf("vt-x: Unsupported secondary control features %lx\n", (long)missing); 316 return false; 317 } 318 missing = (~exit_control_low) & exit_control_mask; 319 if (missing) { 320 printf("vt-x: Unsupported exit control features %lx\n", (long)missing); 321 return false; 322 } 323 324 /* Force the bits we require to be high */ 325 pin_control_high |= pin_control_mask; 326 primary_control_high |= primary_control_mask; 327 secondary_control_high |= secondary_control_mask; 328 exit_control_high |= exit_control_mask; 329 330 return true; 331} 332 333static bool_t BOOT_CODE check_vtx_fixed_values(bool_t useTrueMsrs) 334{ 335 uint32_t pinbased_ctls; 336 uint32_t procbased_ctls; 337 uint32_t exit_ctls; 338 uint32_t entry_ctls; 339 if (useTrueMsrs) { 340 pinbased_ctls = IA32_VMX_TRUE_PINBASED_CTLS_MSR; 341 procbased_ctls = IA32_VMX_TRUE_PROCBASED_CTLS_MSR; 342 exit_ctls = IA32_VMX_TRUE_EXIT_CTLS_MSR; 343 entry_ctls = IA32_VMX_TRUE_ENTRY_CTLS_MSR; 344 } else { 345 pinbased_ctls = IA32_VMX_PINBASED_CTLS_MSR; 346 procbased_ctls = IA32_VMX_PROCBASED_CTLS_MSR; 347 exit_ctls = IA32_VMX_EXIT_CTLS_MSR; 348 entry_ctls = IA32_VMX_ENTRY_CTLS_MSR; 349 } 350 uint32_t local_pin_control_high = x86_rdmsr_low(pinbased_ctls); 351 uint32_t local_pin_control_low = x86_rdmsr_high(pinbased_ctls); 352 uint32_t local_primary_control_high = x86_rdmsr_low(procbased_ctls); 353 uint32_t local_primary_control_low = x86_rdmsr_high(procbased_ctls); 354 uint32_t local_secondary_control_high = x86_rdmsr_low(IA32_VMX_PROCBASED_CTLS2_MSR); 355 uint32_t local_secondary_control_low = x86_rdmsr_high(IA32_VMX_PROCBASED_CTLS2_MSR); 356 uint32_t local_exit_control_high = x86_rdmsr_low(exit_ctls); 357 uint32_t local_exit_control_low = x86_rdmsr_high(exit_ctls); 358 uint32_t local_entry_control_high = x86_rdmsr_low(entry_ctls); 359 uint32_t local_entry_control_low = x86_rdmsr_high(entry_ctls); 360 361 uint32_t local_cr0_high = x86_rdmsr_low(IA32_VMX_CR0_FIXED0_MSR); 362 uint32_t local_cr0_low = x86_rdmsr_low(IA32_VMX_CR0_FIXED1_MSR); 363 uint32_t local_cr4_high = x86_rdmsr_low(IA32_VMX_CR4_FIXED0_MSR); 364 uint32_t local_cr4_low = x86_rdmsr_low(IA32_VMX_CR4_FIXED1_MSR); 365 366 /* We want to check that any bits that there are no bits that this core 367 * requires to be high, that the BSP did not require to be high. This can 368 * be checked with 'local_high & high == local_high'. 369 * Also need to make sure that the BSP has not determined that any bits should 370 * be high that this core requires to be low. This can be checked with 371 * '~local_low & high == 0' 372 */ 373 return 374 (local_pin_control_high & pin_control_high) == local_pin_control_high && 375 (~local_pin_control_low & pin_control_high) == 0 && 376 (local_primary_control_high & primary_control_high) == local_primary_control_high && 377 (~local_primary_control_low & primary_control_high) == 0 && 378 (local_secondary_control_high & secondary_control_high) == local_secondary_control_high && 379 (~local_secondary_control_low & secondary_control_high) == 0 && 380 (local_exit_control_high & exit_control_high) == local_exit_control_high && 381 (~local_exit_control_low & exit_control_high) == 0 && 382 (local_entry_control_high & entry_control_high) == local_entry_control_high && 383 (~local_entry_control_low & entry_control_high) == 0 && 384 local_cr0_high == cr0_high && 385 local_cr0_low == cr0_low && 386 local_cr4_high == cr4_high && 387 local_cr4_low == cr4_low; 388} 389 390static inline uint32_t applyFixedBits(uint32_t original, uint32_t high, uint32_t low) 391{ 392 original |= high; 393 original &= low; 394 return original; 395} 396 397void vcpu_init(vcpu_t *vcpu) 398{ 399 vcpu->vcpuTCB = NULL; 400 vcpu->launched = false; 401 402 memcpy(vcpu->vmcs, &vmcs_revision, 4); 403 404 switchVCPU(vcpu); 405 406 vcpu->cr0 = cr0_high & cr0_low; 407 vcpu->cr0_shadow = 0; 408 vcpu->cr0_mask = 0; 409 vcpu->exception_bitmap = 0; 410 vcpu->vpid = VPID_INVALID; 411#ifdef ENABLE_SMP_SUPPORT 412 vcpu->last_cpu = getCurrentCPUIndex(); 413#endif /* ENABLE_SMP_SUPPORT */ 414 415 vmwrite(VMX_HOST_PAT, x86_rdmsr(IA32_PAT_MSR)); 416 vmwrite(VMX_HOST_EFER, x86_rdmsr(IA32_EFER_MSR)); 417 // By default we will disable performance counters when we come out 418 // of a VM. When performance counters are supported this host state 419 // needs to be updated on VM entry 420 if (vmx_feature_load_perf_global_ctrl) { 421 vmwrite(VMX_HOST_PERF_GLOBAL_CTRL, 0); 422 } 423 vmwrite(VMX_HOST_CR0, read_cr0()); 424 vmwrite(VMX_HOST_CR4, read_cr4()); 425 vmwrite(VMX_HOST_FS_BASE, 0); 426 vmwrite(VMX_HOST_GS_BASE, 0); 427 vmwrite(VMX_HOST_TR_BASE, (word_t)&x86KSGlobalState[CURRENT_CPU_INDEX()].x86KStss); 428 vmwrite(VMX_HOST_GDTR_BASE, (word_t)x86KSGlobalState[CURRENT_CPU_INDEX()].x86KSgdt); 429 vmwrite(VMX_HOST_IDTR_BASE, (word_t)x86KSGlobalState[CURRENT_CPU_INDEX()].x86KSidt); 430 vmwrite(VMX_HOST_SYSENTER_CS, (word_t)SEL_CS_0); 431 vmwrite(VMX_HOST_SYSENTER_EIP, (word_t)&handle_syscall); 432 if (!config_set(CONFIG_HARDWARE_DEBUG_API)) { 433 vmwrite(VMX_HOST_SYSENTER_ESP, (uint64_t)(word_t)((char *)&x86KSGlobalState[CURRENT_CPU_INDEX()].x86KStss.tss.words[0] + 434 4)); 435 } 436 /* Set host SP to point just beyond the first field to be stored on exit. */ 437 vmwrite(VMX_HOST_RSP, (word_t)&vcpu->gp_registers[n_vcpu_gp_register]); 438 vmwrite(VMX_HOST_RIP, (word_t)&handle_vmexit); 439#ifdef CONFIG_KERNEL_SKIM_WINDOW 440 /* if we have a skim window then our host cr3 is a constant and is always the 441 * the kernel address space, so we set it here instead of lazily in restoreVMCS */ 442 vmwrite(VMX_HOST_CR3, makeCR3(kpptr_to_paddr(x64KSKernelPML4), 0).words[0]); 443#endif /* CONFIG_KERNEL_SKIM_WINDOW */ 444 445 vmwrite(VMX_HOST_ES_SELECTOR, SEL_DS_0); 446 vmwrite(VMX_HOST_CS_SELECTOR, SEL_CS_0); 447 vmwrite(VMX_HOST_SS_SELECTOR, SEL_DS_0); 448 vmwrite(VMX_HOST_DS_SELECTOR, SEL_DS_0); 449 vmwrite(VMX_HOST_FS_SELECTOR, 0); 450 vmwrite(VMX_HOST_GS_SELECTOR, 0); 451 vmwrite(VMX_HOST_TR_SELECTOR, SEL_TSS); 452 453 /* Set fixed VMCS control fields. */ 454 vmwrite(VMX_CONTROL_PIN_EXECUTION_CONTROLS, pin_control_high & pin_control_low); 455 vmwrite(VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS, primary_control_high & primary_control_low); 456 vmwrite(VMX_CONTROL_SECONDARY_PROCESSOR_CONTROLS, secondary_control_high & secondary_control_low); 457 vmwrite(VMX_CONTROL_EXIT_CONTROLS, exit_control_high & exit_control_low); 458 vmwrite(VMX_CONTROL_ENTRY_CONTROLS, entry_control_high & entry_control_low); 459 vmwrite(VMX_CONTROL_MSR_ADDRESS, (word_t)kpptr_to_paddr(&msr_bitmap_region)); 460 vmwrite(VMX_GUEST_CR0, vcpu->cr0); 461 vmwrite(VMX_GUEST_CR4, cr4_high & cr4_low); 462 463 vmwrite(VMX_GUEST_VMCS_LINK_POINTER, ~(word_t)0); 464 vmwrite(VMX_GUEST_VMCS_LINK_POINTER_HIGH, ~(word_t)0); 465 466 memset(vcpu->io, ~(word_t)0, VCPU_IOBITMAP_SIZE); 467 vmwrite(VMX_CONTROL_IOA_ADDRESS, pptr_to_paddr(vcpu->io)); 468 vmwrite(VMX_CONTROL_IOB_ADDRESS, pptr_to_paddr((char *)vcpu->io + (VCPU_IOBITMAP_SIZE / 2))); 469} 470 471static void dissociateVcpuTcb(tcb_t *tcb, vcpu_t *vcpu) 472{ 473 assert(tcb->tcbArch.tcbVCPU == vcpu); 474 assert(vcpu->vcpuTCB == tcb); 475 tcb->tcbArch.tcbVCPU = NULL; 476 vcpu->vcpuTCB = NULL; 477} 478 479void vcpu_finalise(vcpu_t *vcpu) 480{ 481 if (vcpu->vcpuTCB) { 482 dissociateVcpuTcb(vcpu->vcpuTCB, vcpu); 483 } 484 if (ARCH_NODE_STATE_ON_CORE(x86KSCurrentVCPU, vcpu->last_cpu) == vcpu) { 485#ifdef ENABLE_SMP_SUPPORT 486 if (vcpu->last_cpu != getCurrentCPUIndex()) { 487 doRemoteClearCurrentVCPU(vcpu->last_cpu); 488 } else 489#endif /* ENABLE_SMP_SUPPORT */ 490 { 491 clearCurrentVCPU(); 492 } 493 } 494} 495 496static void associateVcpuTcb(tcb_t *tcb, vcpu_t *vcpu) 497{ 498 if (tcb->tcbArch.tcbVCPU) { 499 dissociateVcpuTcb(tcb, tcb->tcbArch.tcbVCPU); 500 } 501 if (vcpu->vcpuTCB) { 502 dissociateVcpuTcb(vcpu->vcpuTCB, vcpu); 503 } 504 vcpu->vcpuTCB = tcb; 505 tcb->tcbArch.tcbVCPU = vcpu; 506} 507 508static exception_t invokeVCPUWriteRegisters(vcpu_t *vcpu, word_t *buffer) 509{ 510 int i; 511 for (i = 0; i < n_vcpu_gp_register; i++) { 512 vcpu->gp_registers[i] = getSyscallArg(i, buffer); 513 } 514 setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); 515 return EXCEPTION_NONE; 516} 517 518static exception_t decodeVCPUWriteRegisters(cap_t cap, word_t length, word_t *buffer) 519{ 520 if (length < 7) { 521 userError("VCPU WriteRegisters: Truncated message."); 522 current_syscall_error.type = seL4_TruncatedMessage; 523 return EXCEPTION_SYSCALL_ERROR; 524 } 525 return invokeVCPUWriteRegisters(VCPU_PTR(cap_vcpu_cap_get_capVCPUPtr(cap)), buffer); 526} 527 528static exception_t invokeEnableIOPort(vcpu_t *vcpu, cte_t *slot, cap_t cap, uint16_t low, uint16_t high) 529{ 530 /* remove any existing io ports from this cap */ 531 clearVPIDIOPortMappings(cap_io_port_cap_get_capIOPortVPID(cap), 532 cap_io_port_cap_get_capIOPortFirstPort(cap), 533 cap_io_port_cap_get_capIOPortLastPort(cap)); 534 /* update the assigned vpid. If the vcpu does not have a valid vpid then 535 * this is fine as whilst the cap will not point to the vcpu, the vcpu 536 * will have its port mask cleared when it gets assigned a vpid */ 537 cap = cap_io_port_cap_set_capIOPortVPID(cap, vcpu->vpid); 538 slot->cap = cap; 539 setIOPortMask(vcpu->io, low, high, false); 540 return EXCEPTION_NONE; 541} 542 543static exception_t decodeEnableIOPort(cap_t cap, word_t length, word_t *buffer, extra_caps_t excaps) 544{ 545 vcpu_t *vcpu; 546 cap_t ioCap; 547 cte_t *ioSlot; 548 uint16_t low, high; 549 550 if (length < 2) { 551 userError("VCPU EnableIOPort: Truncated message."); 552 current_syscall_error.type = seL4_TruncatedMessage; 553 return EXCEPTION_SYSCALL_ERROR; 554 } 555 if (excaps.excaprefs[0] == NULL) { 556 userError("VCPU EnableIOPort: Truncated message."); 557 current_syscall_error.type = seL4_TruncatedMessage; 558 return EXCEPTION_SYSCALL_ERROR; 559 } 560 ioSlot = excaps.excaprefs[0]; 561 ioCap = excaps.excaprefs[0]->cap; 562 563 if (cap_get_capType(ioCap) != cap_io_port_cap) { 564 userError("VCPU EnableIOPort: IOPort cap is not a IOPort cap."); 565 current_syscall_error.type = seL4_IllegalOperation; 566 return EXCEPTION_SYSCALL_ERROR; 567 } 568 569 low = getSyscallArg(0, buffer); 570 high = getSyscallArg(1, buffer); 571 572 if (low < cap_io_port_cap_get_capIOPortFirstPort(ioCap) || high > cap_io_port_cap_get_capIOPortLastPort(ioCap)) { 573 userError("VCPU EnableIOPort: Requested range not valid for given IOPort cap"); 574 current_syscall_error.type = seL4_InvalidArgument; 575 current_syscall_error.invalidArgumentNumber = 0; 576 return EXCEPTION_SYSCALL_ERROR; 577 } 578 579 vcpu = VCPU_PTR(cap_vcpu_cap_get_capVCPUPtr(cap)); 580 581 setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); 582 return invokeEnableIOPort(vcpu, ioSlot, ioCap, low, high); 583} 584 585static exception_t invokeDisableIOPort(vcpu_t *vcpu, uint16_t low, uint16_t high) 586{ 587 setIOPortMask(vcpu->io, low, high, true); 588 setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); 589 return EXCEPTION_NONE; 590} 591 592static exception_t decodeDisableIOPort(cap_t cap, word_t length, word_t *buffer) 593{ 594 vcpu_t *vcpu; 595 uint16_t low, high; 596 597 if (length < 2) { 598 userError("VCPU DisableIOPort: Truncated message."); 599 current_syscall_error.type = seL4_TruncatedMessage; 600 return EXCEPTION_SYSCALL_ERROR; 601 } 602 603 low = getSyscallArg(0, buffer); 604 high = getSyscallArg(1, buffer); 605 606 vcpu = VCPU_PTR(cap_vcpu_cap_get_capVCPUPtr(cap)); 607 608 return invokeDisableIOPort(vcpu, low, high); 609} 610 611static exception_t invokeWriteVMCS(vcpu_t *vcpu, word_t *buffer, word_t field, word_t value) 612{ 613 tcb_t *thread; 614 thread = NODE_STATE(ksCurThread); 615 if (ARCH_NODE_STATE(x86KSCurrentVCPU) != vcpu) { 616 switchVCPU(vcpu); 617 } 618 switch (field) { 619 case VMX_CONTROL_EXCEPTION_BITMAP: 620 vcpu->exception_bitmap = vcpu->cached_exception_bitmap = value; 621 break; 622 case VMX_GUEST_CR0: 623 vcpu->cr0 = vcpu->cached_cr0 = value; 624 break; 625 case VMX_CONTROL_CR0_MASK: 626 vcpu->cr0_mask = vcpu->cached_cr0_mask = value; 627 break; 628 case VMX_CONTROL_CR0_READ_SHADOW: 629 vcpu->cr0_shadow = vcpu->cached_cr0_shadow = value; 630 break; 631 } 632 setMR(thread, buffer, 0, value); 633 vmwrite(field, value); 634 setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); 635 return EXCEPTION_NONE; 636} 637 638static exception_t decodeWriteVMCS(cap_t cap, word_t length, word_t *buffer) 639{ 640 word_t field; 641 word_t value; 642 643 if (length < 2) { 644 userError("VCPU WriteVMCS: Not enough arguments."); 645 current_syscall_error.type = seL4_IllegalOperation; 646 return EXCEPTION_SYSCALL_ERROR; 647 } 648 649 field = getSyscallArg(0, buffer); 650 value = getSyscallArg(1, buffer); 651 switch (field) { 652 case VMX_GUEST_RIP: 653 case VMX_GUEST_RSP: 654 case VMX_GUEST_ES_SELECTOR: 655 case VMX_GUEST_CS_SELECTOR: 656 case VMX_GUEST_SS_SELECTOR: 657 case VMX_GUEST_DS_SELECTOR: 658 case VMX_GUEST_FS_SELECTOR: 659 case VMX_GUEST_GS_SELECTOR: 660 case VMX_GUEST_LDTR_SELECTOR: 661 case VMX_GUEST_TR_SELECTOR: 662 case VMX_GUEST_DEBUGCTRL: 663 case VMX_GUEST_PAT: 664 case VMX_GUEST_EFER: 665 case VMX_GUEST_PERF_GLOBAL_CTRL: 666 case VMX_GUEST_PDPTE0: 667 case VMX_GUEST_PDPTE1: 668 case VMX_GUEST_PDPTE2: 669 case VMX_GUEST_PDPTE3: 670 case VMX_GUEST_ES_LIMIT: 671 case VMX_GUEST_CS_LIMIT: 672 case VMX_GUEST_SS_LIMIT: 673 case VMX_GUEST_DS_LIMIT: 674 case VMX_GUEST_FS_LIMIT: 675 case VMX_GUEST_GS_LIMIT: 676 case VMX_GUEST_LDTR_LIMIT: 677 case VMX_GUEST_TR_LIMIT: 678 case VMX_GUEST_GDTR_LIMIT: 679 case VMX_GUEST_IDTR_LIMIT: 680 case VMX_GUEST_ES_ACCESS_RIGHTS: 681 case VMX_GUEST_CS_ACCESS_RIGHTS: 682 case VMX_GUEST_SS_ACCESS_RIGHTS: 683 case VMX_GUEST_DS_ACCESS_RIGHTS: 684 case VMX_GUEST_FS_ACCESS_RIGHTS: 685 case VMX_GUEST_GS_ACCESS_RIGHTS: 686 case VMX_GUEST_LDTR_ACCESS_RIGHTS: 687 case VMX_GUEST_TR_ACCESS_RIGHTS: 688 case VMX_GUEST_INTERRUPTABILITY: 689 case VMX_GUEST_ACTIVITY: 690 case VMX_GUEST_SMBASE: 691 case VMX_GUEST_SYSENTER_CS: 692 case VMX_GUEST_PREEMPTION_TIMER_VALUE: 693 case VMX_GUEST_ES_BASE: 694 case VMX_GUEST_CS_BASE: 695 case VMX_GUEST_SS_BASE: 696 case VMX_GUEST_DS_BASE: 697 case VMX_GUEST_FS_BASE: 698 case VMX_GUEST_GS_BASE: 699 case VMX_GUEST_LDTR_BASE: 700 case VMX_GUEST_TR_BASE: 701 case VMX_GUEST_GDTR_BASE: 702 case VMX_GUEST_IDTR_BASE: 703 case VMX_GUEST_DR7: 704 case VMX_GUEST_RFLAGS: 705 case VMX_GUEST_PENDING_DEBUG_EXCEPTIONS: 706 case VMX_GUEST_SYSENTER_ESP: 707 case VMX_GUEST_SYSENTER_EIP: 708 case VMX_CONTROL_CR0_MASK: 709 case VMX_CONTROL_CR4_MASK: 710 case VMX_CONTROL_CR0_READ_SHADOW: 711 case VMX_CONTROL_CR4_READ_SHADOW: 712 case VMX_GUEST_CR3: 713 case VMX_CONTROL_EXCEPTION_BITMAP: 714 case VMX_CONTROL_ENTRY_INTERRUPTION_INFO: 715 case VMX_CONTROL_ENTRY_EXCEPTION_ERROR_CODE: 716 break; 717 case VMX_CONTROL_PIN_EXECUTION_CONTROLS: 718 value = applyFixedBits(value, pin_control_high, pin_control_low); 719 break; 720 case VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS: 721 value = applyFixedBits(value, primary_control_high, primary_control_low); 722 break; 723 case VMX_CONTROL_SECONDARY_PROCESSOR_CONTROLS: 724 value = applyFixedBits(value, secondary_control_high, secondary_control_low); 725 break; 726 case VMX_CONTROL_EXIT_CONTROLS: 727 value = applyFixedBits(value, exit_control_high, exit_control_low); 728 break; 729 case VMX_GUEST_CR0: 730 value = applyFixedBits(value, cr0_high, cr0_low); 731 break; 732 case VMX_GUEST_CR4: 733 value = applyFixedBits(value, cr4_high, cr4_low); 734 break; 735 default: 736 userError("VCPU WriteVMCS: Invalid field %lx.", (long)field); 737 current_syscall_error.type = seL4_IllegalOperation; 738 return EXCEPTION_SYSCALL_ERROR; 739 } 740 return invokeWriteVMCS(VCPU_PTR(cap_vcpu_cap_get_capVCPUPtr(cap)), buffer, field, value); 741} 742 743static word_t readVMCSField(vcpu_t *vcpu, word_t field) 744{ 745 switch (field) { 746 case VMX_CONTROL_EXCEPTION_BITMAP: 747 return vcpu->exception_bitmap; 748 case VMX_GUEST_CR0: 749 return vcpu->cr0; 750 case VMX_CONTROL_CR0_MASK: 751 return vcpu->cr0_mask; 752 case VMX_CONTROL_CR0_READ_SHADOW: 753 return vcpu->cr0_shadow; 754 } 755 if (ARCH_NODE_STATE(x86KSCurrentVCPU) != vcpu) { 756 switchVCPU(vcpu); 757 } 758 return vmread(field); 759} 760 761static exception_t invokeReadVMCS(vcpu_t *vcpu, word_t field, word_t *buffer) 762{ 763 tcb_t *thread; 764 thread = NODE_STATE(ksCurThread); 765 766 setMR(thread, buffer, 0, readVMCSField(vcpu, field)); 767 setRegister(thread, msgInfoRegister, wordFromMessageInfo( 768 seL4_MessageInfo_new(0, 0, 0, 1))); 769 setThreadState(thread, ThreadState_Restart); 770 return EXCEPTION_NONE; 771} 772 773static exception_t decodeReadVMCS(cap_t cap, word_t length, word_t *buffer) 774{ 775 if (length < 1) { 776 userError("VCPU ReadVMCS: Not enough arguments."); 777 current_syscall_error.type = seL4_IllegalOperation; 778 return EXCEPTION_SYSCALL_ERROR; 779 } 780 word_t field = getSyscallArg(0, buffer); 781 switch (field) { 782 case VMX_GUEST_RIP: 783 case VMX_GUEST_RSP: 784 case VMX_GUEST_ES_SELECTOR: 785 case VMX_GUEST_CS_SELECTOR: 786 case VMX_GUEST_SS_SELECTOR: 787 case VMX_GUEST_DS_SELECTOR: 788 case VMX_GUEST_FS_SELECTOR: 789 case VMX_GUEST_GS_SELECTOR: 790 case VMX_GUEST_LDTR_SELECTOR: 791 case VMX_GUEST_TR_SELECTOR: 792 case VMX_GUEST_DEBUGCTRL: 793 case VMX_GUEST_PAT: 794 case VMX_GUEST_EFER: 795 case VMX_GUEST_PERF_GLOBAL_CTRL: 796 case VMX_GUEST_PDPTE0: 797 case VMX_GUEST_PDPTE1: 798 case VMX_GUEST_PDPTE2: 799 case VMX_GUEST_PDPTE3: 800 case VMX_GUEST_ES_LIMIT: 801 case VMX_GUEST_CS_LIMIT: 802 case VMX_GUEST_SS_LIMIT: 803 case VMX_GUEST_DS_LIMIT: 804 case VMX_GUEST_FS_LIMIT: 805 case VMX_GUEST_GS_LIMIT: 806 case VMX_GUEST_LDTR_LIMIT: 807 case VMX_GUEST_TR_LIMIT: 808 case VMX_GUEST_GDTR_LIMIT: 809 case VMX_GUEST_IDTR_LIMIT: 810 case VMX_GUEST_ES_ACCESS_RIGHTS: 811 case VMX_GUEST_CS_ACCESS_RIGHTS: 812 case VMX_GUEST_SS_ACCESS_RIGHTS: 813 case VMX_GUEST_DS_ACCESS_RIGHTS: 814 case VMX_GUEST_FS_ACCESS_RIGHTS: 815 case VMX_GUEST_GS_ACCESS_RIGHTS: 816 case VMX_GUEST_LDTR_ACCESS_RIGHTS: 817 case VMX_GUEST_TR_ACCESS_RIGHTS: 818 case VMX_GUEST_INTERRUPTABILITY: 819 case VMX_GUEST_ACTIVITY: 820 case VMX_GUEST_SMBASE: 821 case VMX_GUEST_SYSENTER_CS: 822 case VMX_GUEST_PREEMPTION_TIMER_VALUE: 823 case VMX_GUEST_ES_BASE: 824 case VMX_GUEST_CS_BASE: 825 case VMX_GUEST_SS_BASE: 826 case VMX_GUEST_DS_BASE: 827 case VMX_GUEST_FS_BASE: 828 case VMX_GUEST_GS_BASE: 829 case VMX_GUEST_LDTR_BASE: 830 case VMX_GUEST_TR_BASE: 831 case VMX_GUEST_GDTR_BASE: 832 case VMX_GUEST_IDTR_BASE: 833 case VMX_GUEST_DR7: 834 case VMX_GUEST_RFLAGS: 835 case VMX_GUEST_PENDING_DEBUG_EXCEPTIONS: 836 case VMX_GUEST_SYSENTER_ESP: 837 case VMX_GUEST_SYSENTER_EIP: 838 case VMX_CONTROL_CR0_MASK: 839 case VMX_CONTROL_CR4_MASK: 840 case VMX_CONTROL_CR0_READ_SHADOW: 841 case VMX_CONTROL_CR4_READ_SHADOW: 842 case VMX_DATA_INSTRUCTION_ERROR: 843 case VMX_DATA_EXIT_INTERRUPT_INFO: 844 case VMX_DATA_EXIT_INTERRUPT_ERROR: 845 case VMX_DATA_IDT_VECTOR_INFO: 846 case VMX_DATA_IDT_VECTOR_ERROR: 847 case VMX_DATA_EXIT_INSTRUCTION_LENGTH: 848 case VMX_DATA_EXIT_INSTRUCTION_INFO: 849 case VMX_DATA_GUEST_PHYSICAL: 850 case VMX_DATA_IO_RCX: 851 case VMX_DATA_IO_RSI: 852 case VMX_DATA_IO_RDI: 853 case VMX_DATA_IO_RIP: 854 case VMX_DATA_GUEST_LINEAR_ADDRESS: 855 case VMX_CONTROL_ENTRY_INTERRUPTION_INFO: 856 case VMX_CONTROL_PIN_EXECUTION_CONTROLS: 857 case VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS: 858 case VMX_CONTROL_EXCEPTION_BITMAP: 859 case VMX_CONTROL_EXIT_CONTROLS: 860 case VMX_GUEST_CR0: 861 case VMX_GUEST_CR3: 862 case VMX_GUEST_CR4: 863 break; 864 default: 865 userError("VCPU ReadVMCS: Invalid field %lx.", (long)field); 866 current_syscall_error.type = seL4_IllegalOperation; 867 return EXCEPTION_SYSCALL_ERROR; 868 } 869 return invokeReadVMCS(VCPU_PTR(cap_vcpu_cap_get_capVCPUPtr(cap)), field, buffer); 870} 871 872static exception_t invokeSetTCB(vcpu_t *vcpu, tcb_t *tcb) 873{ 874 associateVcpuTcb(tcb, vcpu); 875 876 setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); 877 return EXCEPTION_NONE; 878} 879 880static exception_t decodeSetTCB(cap_t cap, word_t length, word_t *buffer, extra_caps_t excaps) 881{ 882 cap_t tcbCap; 883 if (excaps.excaprefs[0] == NULL) { 884 userError("VCPU SetTCB: Truncated message."); 885 current_syscall_error.type = seL4_TruncatedMessage; 886 return EXCEPTION_SYSCALL_ERROR; 887 } 888 tcbCap = excaps.excaprefs[0]->cap; 889 890 if (cap_get_capType(tcbCap) != cap_thread_cap) { 891 userError("TCB cap is not a TCB cap."); 892 current_syscall_error.type = seL4_IllegalOperation; 893 return EXCEPTION_SYSCALL_ERROR; 894 } 895 896 return invokeSetTCB(VCPU_PTR(cap_vcpu_cap_get_capVCPUPtr(cap)), TCB_PTR(cap_thread_cap_get_capTCBPtr(tcbCap))); 897} 898 899void vcpu_update_state_sysvmenter(vcpu_t *vcpu) 900{ 901 word_t *buffer; 902 if (ARCH_NODE_STATE(x86KSCurrentVCPU) != vcpu) { 903 switchVCPU(vcpu); 904 } 905 buffer = lookupIPCBuffer(false, NODE_STATE(ksCurThread)); 906 if (!buffer) { 907 userError("No IPC buffer."); 908 return; 909 } 910 vmwrite(VMX_GUEST_RIP, getSyscallArg(0, buffer)); 911 vmwrite(VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS, applyFixedBits(getSyscallArg(1, buffer), primary_control_high, 912 primary_control_low)); 913 vmwrite(VMX_CONTROL_ENTRY_INTERRUPTION_INFO, getSyscallArg(2, buffer)); 914} 915 916void vcpu_sysvmenter_reply_to_user(tcb_t *tcb) 917{ 918 word_t *buffer; 919 vcpu_t *vcpu; 920 921 buffer = lookupIPCBuffer(true, tcb); 922 vcpu = tcb->tcbArch.tcbVCPU; 923 924 assert(vcpu); 925 926 if (ARCH_NODE_STATE(x86KSCurrentVCPU) != vcpu) { 927 switchVCPU(vcpu); 928 } 929 930 setMR(tcb, buffer, SEL4_VMENTER_CALL_EIP_MR, vmread(VMX_GUEST_RIP)); 931 setMR(tcb, buffer, SEL4_VMENTER_CALL_CONTROL_PPC_MR, vmread(VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS)); 932 933 setMR(tcb, buffer, SEL4_VMENTER_CALL_CONTROL_ENTRY_MR, vmread(VMX_CONTROL_ENTRY_INTERRUPTION_INFO)); 934 setRegister(tcb, msgInfoRegister, 0); 935} 936 937exception_t decodeX86VCPUInvocation( 938 word_t invLabel, 939 word_t length, 940 cptr_t cptr, 941 cte_t *slot, 942 cap_t cap, 943 extra_caps_t excaps, 944 word_t *buffer 945) 946{ 947 switch (invLabel) { 948 case X86VCPUSetTCB: 949 return decodeSetTCB(cap, length, buffer, excaps); 950 case X86VCPUReadVMCS: 951 return decodeReadVMCS(cap, length, buffer); 952 case X86VCPUWriteVMCS: 953 return decodeWriteVMCS(cap, length, buffer); 954 case X86VCPUEnableIOPort: 955 return decodeEnableIOPort(cap, length, buffer, excaps); 956 case X86VCPUDisableIOPort: 957 return decodeDisableIOPort(cap, length, buffer); 958 case X86VCPUWriteRegisters: 959 return decodeVCPUWriteRegisters(cap, length, buffer); 960 default: 961 userError("VCPU: Illegal operation."); 962 current_syscall_error.type = seL4_IllegalOperation; 963 return EXCEPTION_SYSCALL_ERROR; 964 } 965} 966 967static bool_t is_vtx_supported(void) 968{ 969 /* check for VMX support in CPUID 970 * see section 23.7 of Volume 3 of the Intel manual */ 971 return !!(x86_cpuid_ecx(0x1, 0) & BIT(5)); 972} 973 974static inline void clear_bit(word_t *bitmap, word_t bit) 975{ 976 int index = bit / (sizeof(word_t) * 8); 977 int offset = bit % (sizeof(word_t) * 8); 978 bitmap[index] &= ~BIT(offset); 979} 980 981BOOT_CODE bool_t vtx_init(void) 982{ 983 if (!is_vtx_supported()) { 984 printf("vt-x: not supported\n"); 985 return false; 986 } 987 vmx_basic_msr_t vmx_basic; 988 feature_control_msr_t feature_control; 989 vmx_basic.words[0] = x86_rdmsr_low(IA32_VMX_BASIC_MSR); 990 vmx_basic.words[1] = x86_rdmsr_high(IA32_VMX_BASIC_MSR); 991 vmcs_revision = vmx_basic_msr_get_vmcs_revision(vmx_basic); 992 feature_control.words[0] = x86_rdmsr_low(IA32_FEATURE_CONTROL_MSR); 993 if (!feature_control_msr_get_vmx_outside_smx(feature_control)) { 994 /* enable if the MSR is not locked */ 995 if (feature_control_msr_get_lock(feature_control)) { 996 printf("vt-x: feature locked\n"); 997 return false; 998 } 999 feature_control = feature_control_msr_set_vmx_outside_smx(feature_control, 1); 1000 x86_wrmsr_parts(IA32_FEATURE_CONTROL_MSR, x86_rdmsr_high(IA32_FEATURE_CONTROL_MSR), feature_control.words[0]); 1001 } 1002 /* make sure the msr is locked */ 1003 if (!feature_control_msr_get_lock(feature_control)) { 1004 feature_control = feature_control_msr_set_lock(feature_control, 1); 1005 x86_wrmsr_parts(IA32_FEATURE_CONTROL_MSR, x86_rdmsr_high(IA32_FEATURE_CONTROL_MSR), feature_control.words[0]); 1006 } 1007 /* Initialize the fixed values only on the boot core. All other cores 1008 * will just check that the fixed values are valid */ 1009 if (SMP_TERNARY(getCurrentCPUIndex(), 0) == 0) { 1010 if (!init_vtx_fixed_values(vmx_basic_msr_get_true_msrs(vmx_basic))) { 1011 printf("vt-x: lack of required features\n"); 1012 return false; 1013 } 1014 } 1015 if (!check_vtx_fixed_values(vmx_basic_msr_get_true_msrs(vmx_basic))) { 1016 printf("vt-x: cores have inconsistent features\n"); 1017 return false; 1018 } 1019 write_cr4(read_cr4() | CR4_VMXE); 1020 /* we are required to set the VMCS region in the VMXON region */ 1021 vmxon_region.revision = vmcs_revision; 1022 /* Before calling vmxon, we must check that CR0 and CR4 are not set to values 1023 * that are unsupported by vt-x */ 1024 if (!vtx_check_fixed_values(read_cr0(), read_cr4())) { 1025 return false; 1026 } 1027 if (vmxon(kpptr_to_paddr(&vmxon_region))) { 1028 printf("vt-x: vmxon failure\n"); 1029 return false; 1030 } 1031 memset(&msr_bitmap_region, ~0, sizeof(msr_bitmap_region)); 1032 /* Set sysenter MSRs to writeable and readable. These are all low msrs */ 1033 clear_bit(msr_bitmap_region.low_msr_read.bitmap, IA32_SYSENTER_CS_MSR); 1034 clear_bit(msr_bitmap_region.low_msr_read.bitmap, IA32_SYSENTER_ESP_MSR); 1035 clear_bit(msr_bitmap_region.low_msr_read.bitmap, IA32_SYSENTER_EIP_MSR); 1036 clear_bit(msr_bitmap_region.low_msr_write.bitmap, IA32_SYSENTER_CS_MSR); 1037 clear_bit(msr_bitmap_region.low_msr_write.bitmap, IA32_SYSENTER_ESP_MSR); 1038 clear_bit(msr_bitmap_region.low_msr_write.bitmap, IA32_SYSENTER_EIP_MSR); 1039 1040 /* The VMX_EPT_VPID_CAP MSR exists if VMX supports EPT or VPIDs. Whilst 1041 * VPID support is optional, EPT support is not and is already checked for, 1042 * so we know that this MSR is safe to read */ 1043 vpid_capability.words[0] = x86_rdmsr_low(IA32_VMX_EPT_VPID_CAP_MSR); 1044 vpid_capability.words[1] = x86_rdmsr_high(IA32_VMX_EPT_VPID_CAP_MSR); 1045 1046 /* check for supported EPT features */ 1047 if (!vmx_ept_vpid_cap_msr_get_ept_wb(vpid_capability)) { 1048 printf("vt-x: Expected wb attribute for EPT paging structure\n"); 1049 return false; 1050 } 1051 if (!vmx_ept_vpid_cap_msr_get_ept_2m(vpid_capability)) { 1052 printf("vt-x: Expected supported for 2m pages\n"); 1053 return false; 1054 } 1055 1056 return true; 1057} 1058 1059static void setMRs_vmexit(uint32_t reason, word_t qualification) 1060{ 1061 word_t *buffer; 1062 int i; 1063 1064 buffer = lookupIPCBuffer(true, NODE_STATE(ksCurThread)); 1065 1066 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_CALL_EIP_MR, vmread(VMX_GUEST_RIP)); 1067 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_CALL_CONTROL_PPC_MR, 1068 vmread(VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS)); 1069 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_CALL_CONTROL_ENTRY_MR, vmread(VMX_CONTROL_ENTRY_INTERRUPTION_INFO)); 1070 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_FAULT_REASON_MR, reason); 1071 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_FAULT_QUALIFICATION_MR, qualification); 1072 1073 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_FAULT_INSTRUCTION_LEN_MR, vmread(VMX_DATA_EXIT_INSTRUCTION_LENGTH)); 1074 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_FAULT_GUEST_PHYSICAL_MR, vmread(VMX_DATA_GUEST_PHYSICAL)); 1075 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_FAULT_RFLAGS_MR, vmread(VMX_GUEST_RFLAGS)); 1076 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_FAULT_GUEST_INT_MR, vmread(VMX_GUEST_INTERRUPTABILITY)); 1077 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_FAULT_CR3_MR, vmread(VMX_GUEST_CR3)); 1078 1079 for (i = 0; i < n_vcpu_gp_register; i++) { 1080 setMR(NODE_STATE(ksCurThread), buffer, SEL4_VMENTER_FAULT_EAX + i, 1081 NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->gp_registers[i]); 1082 } 1083} 1084 1085static void handleVmxFault(uint32_t reason, word_t qualification) 1086{ 1087 /* Indicate that we are returning the from VMEnter with a fault */ 1088 setRegister(NODE_STATE(ksCurThread), msgInfoRegister, SEL4_VMENTER_RESULT_FAULT); 1089 1090 setMRs_vmexit(reason, qualification); 1091 1092 /* Set the thread back to running */ 1093 setThreadState(NODE_STATE(ksCurThread), ThreadState_Running); 1094 1095 /* No need to schedule because this wasn't an interrupt and 1096 * we run at the same priority */ 1097 activateThread(); 1098} 1099 1100static inline void finishVmexitSaving(void) 1101{ 1102 vcpu_t *vcpu = ARCH_NODE_STATE(x86KSCurrentVCPU); 1103 assert(vcpu == NODE_STATE(ksCurThread)->tcbArch.tcbVCPU); 1104 vcpu->launched = true; 1105 /* Update our cache of what is in the vmcs. This is the only value 1106 * that we cache that can be modified by the guest during execution */ 1107 vcpu->cached_cr0 = vmread(VMX_GUEST_CR0); 1108 if (vcpuThreadUsingFPU(NODE_STATE(ksCurThread))) { 1109 /* If the vcpu owns the fpu then we did not modify the active cr0 to anything different 1110 * to what the VCPU owner requested, so we can update it with any modifications 1111 * the guest may have made */ 1112 vcpu->cr0 = vcpu->cached_cr0; 1113 } else { 1114 /* If the vcpu does not own the fpu then we will have forced the task switched flag 1115 * to be set in the cr0 that was put into the vmcs. Since the VCPU owner (or the guest) 1116 * may believe the task switched flag is not set, and the guest may have modified other 1117 * parts of cr0 during execution then to update the desired cr0 value to be the cr0 1118 * value from the vmcs (thus pulling in any modifications the guest made) but removing 1119 * the task switched flag that we set and then adding back in the task switched flag 1120 * that may be in the desired current cr0 */ 1121 vcpu->cr0 = (vcpu->cached_cr0 & ~CR0_TASK_SWITCH) | (NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 & CR0_TASK_SWITCH); 1122 } 1123} 1124 1125exception_t handleVmexit(void) 1126{ 1127 uint32_t interrupt; 1128 /* qualification is host width, reason is defined as being 32 bit */ 1129 word_t qualification; 1130 uint32_t reason; 1131 finishVmexitSaving(); 1132 /* the basic exit reason is the bottom 16 bits of the exit reason field */ 1133 reason = vmread(VMX_DATA_EXIT_REASON) & MASK(16); 1134 if (reason == EXTERNAL_INTERRUPT) { 1135 if (vmx_feature_ack_on_exit) { 1136 interrupt = vmread(VMX_DATA_EXIT_INTERRUPT_INFO); 1137 ARCH_NODE_STATE(x86KScurInterrupt) = interrupt & 0xff; 1138 NODE_LOCK_IRQ_IF(interrupt != int_remote_call_ipi); 1139 handleInterruptEntry(); 1140 } else { 1141 /* poll for the pending irq. We will then handle it once we return back 1142 * up to restore_user_context */ 1143 receivePendingIRQ(); 1144 } 1145 return EXCEPTION_NONE; 1146 } 1147 1148 NODE_LOCK_SYS; 1149 1150 if (!vcpuThreadUsingFPU(NODE_STATE(ksCurThread))) { 1151 /* since this vcpu does not currently own the fpu state, check if the kernel should 1152 * switch the fpu owner or not. We switch if the guest performed and unimplemented device 1153 * exception AND the owner of this vcpu has not requested that these exceptions be forwarded 1154 * to them (i.e. if they have not explicitly set the unimplemented device exception in the 1155 * exception_bitmap) */ 1156 if (reason == EXCEPTION_OR_NMI && !(NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->exception_bitmap & BIT(int_unimpl_dev))) { 1157 interrupt = vmread(VMX_DATA_EXIT_INTERRUPT_INFO); 1158 /* The exception number is the bottom 8 bits of the interrupt info */ 1159 if ((interrupt & 0xff) == int_unimpl_dev) { 1160 switchLocalFpuOwner(&NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->fpuState); 1161 return EXCEPTION_NONE; 1162 } 1163 } else if (reason == CONTROL_REGISTER && !(NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0_mask & CR0_TASK_SWITCH)) { 1164 /* we get here if the guest is attempting to write to a control register that is set (by 1165 * a 1 bit in the cr0 mask) as being owned by the host. If we got here then the previous check 1166 * on cr0_mask meant that the VCPU owner did not claim ownership of the the task switch bit 1167 * however we may have temporarily claimed ownership for the purposes of FPU switching. 1168 * At this point we could still have a false positive, as the guest could be attempted to 1169 * manipulate bits that are not task switch, so we still have to be careful and propogate 1170 * all or some of an attempted write */ 1171 qualification = vmread(VMX_DATA_EXIT_QUALIFICATION); 1172 vmx_data_exit_qualification_control_regster_t qual; 1173 qual.words[0] = qualification; 1174 /* We only care about some of the exit qualification cases, we handle them here 1175 * and will deliver any others through to fault handler */ 1176 switch (vmx_data_exit_qualification_control_regster_get_access_type(qual)) { 1177 case VMX_EXIT_QUAL_TYPE_MOV_CR: { 1178 /* check for cr0 */ 1179 if (vmx_data_exit_qualification_control_regster_get_cr(qual) == 0) { 1180 vcpu_gp_register_t source = crExitRegs[vmx_data_exit_qualification_control_regster_get_reg(qual)]; 1181 word_t value; 1182 if (source == VCPU_ESP) { 1183 /* ESP is the only register that is is not part of the general purpose 1184 * registers that we have to save and restore ourselves, so we need to 1185 * get this one from the vmcs */ 1186 value = vmread(VMX_GUEST_RSP); 1187 } else { 1188 value = NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->gp_registers[source]; 1189 } 1190 /* First unset the task switch bit in cr0 */ 1191 NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 &= ~CR0_TASK_SWITCH; 1192 /* now set it to the value we were given */ 1193 NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 |= value & CR0_TASK_SWITCH; 1194 /* check if there are any parts of the write remaining to forward. we only need 1195 * to consider bits that the hardware will not have handled without faulting, which 1196 * is writing any bit such that it is different to the shadow, but only considering 1197 * bits that the VCPU owner has declared that they want to own (via the cr0_shadow) 1198 */ 1199 if (!((value ^ NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0_shadow) & 1200 NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0_mask)) { 1201 return EXCEPTION_NONE; 1202 } 1203 } 1204 break; 1205 } 1206 case VMX_EXIT_QUAL_TYPE_CLTS: { 1207 /* Easy case. Just remove the task switch bit out of cr0 */ 1208 NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 &= ~CR0_TASK_SWITCH; 1209 return EXCEPTION_NONE; 1210 } 1211 case VMX_EXIT_QUAL_TYPE_LMSW: { 1212 uint16_t value = vmx_data_exit_qualification_control_regster_get_data(qual); 1213 /* First unset the task switch bit in cr0 */ 1214 NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 &= ~CR0_TASK_SWITCH; 1215 /* now set it to the value we were given */ 1216 NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0 |= value & CR0_TASK_SWITCH; 1217 /* check if there are any parts of the write remaining to forward. we only need 1218 * to consider bits that the hardware will not have handled without faulting, which 1219 * is writing any bit such that it is different to the shadow, but only considering 1220 * bits that the VCPU owner has declared that they want to own (via the cr0_shadow). 1221 * Additionally since LMSW only loads the bottom 4 bits of CR0 we only consider 1222 * the low 4 bits 1223 */ 1224 if (!((value ^ NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0_shadow) & 1225 NODE_STATE(ksCurThread)->tcbArch.tcbVCPU->cr0_mask & MASK(4))) { 1226 return EXCEPTION_NONE; 1227 } 1228 break; 1229 } 1230 } 1231 } 1232 } 1233 switch (reason) { 1234 case EXCEPTION_OR_NMI: 1235 case MOV_DR: 1236 case TASK_SWITCH: 1237 case CONTROL_REGISTER: 1238 case IO: 1239 case MWAIT: 1240 case SIPI: 1241 case INVLPG: 1242 case INVEPT: 1243 case INVVPID: 1244 case VMCLEAR: 1245 case VMPTRLD: 1246 case VMPTRST: 1247 case VMREAD: 1248 case VMWRITE: 1249 case VMXON: 1250 case EPT_VIOLATION: 1251 case GDTR_OR_IDTR: 1252 case LDTR_OR_TR: 1253 case TPR_BELOW_THRESHOLD: 1254 case APIC_ACCESS: 1255 qualification = vmread(VMX_DATA_EXIT_QUALIFICATION); 1256 break; 1257 default: 1258 qualification = 0; 1259 } 1260 1261 handleVmxFault(reason, qualification); 1262 1263 return EXCEPTION_NONE; 1264} 1265 1266exception_t handleVmEntryFail(void) 1267{ 1268 handleVmxFault(-1, -1); 1269 1270 return EXCEPTION_NONE; 1271} 1272 1273#ifdef ENABLE_SMP_SUPPORT 1274void VMCheckBoundNotification(tcb_t *tcb) 1275{ 1276 /* We want to check if the VM we are currently running has received 1277 * a message on its bound notification object. This check is done 1278 * in c_traps when we first peform a SysVMEnter, but we could presently 1279 * be running a VM and another core may have placed a message on the 1280 * endpoint 1281 */ 1282 assert(tcb->tcbAffinity == getCurrentCPUIndex()); 1283 notification_t *ntfnPtr = tcb->tcbBoundNotification; 1284 if (thread_state_ptr_get_tsType(&tcb->tcbState) == ThreadState_RunningVM 1285 && ntfnPtr && notification_ptr_get_state(ntfnPtr) == NtfnState_Active) { 1286 1287 word_t badge = notification_ptr_get_ntfnMsgIdentifier(ntfnPtr); 1288 notification_ptr_set_state(ntfnPtr, NtfnState_Idle); 1289 setThreadState(tcb, ThreadState_Running); 1290 setRegister(tcb, badgeRegister, badge); 1291 Arch_leaveVMAsyncTransfer(tcb); 1292 /* In the process of performing Arch_leavVMAsyncTransfer we will have 1293 * had to switch the active VMCS. As a result we might as well try and 1294 * run this tcb if it is permitted instead of switching VMCS contexts 1295 * back and forth */ 1296 if (tcb != NODE_STATE(ksCurThread)) { 1297 possibleSwitchTo(tcb); 1298 } 1299 } 1300} 1301#endif /* ENABLE_SMP_SUPPORT */ 1302 1303static void invvpid_context(uint16_t vpid) 1304{ 1305 struct { 1306 uint64_t vpid : 16; 1307 uint64_t rsvd : 48; 1308 uint64_t address; 1309 } PACKED operand = {vpid, 0, 0}; 1310 asm volatile("invvpid %0, %1" :: "m"(operand), "r"((word_t)1) : "cc"); 1311} 1312 1313static void setEPTRoot(cap_t vmxSpace, vcpu_t *vcpu) 1314{ 1315 paddr_t ept_root; 1316 if (cap_get_capType(vmxSpace) != cap_ept_pml4_cap || 1317 !cap_ept_pml4_cap_get_capPML4IsMapped(vmxSpace)) { 1318 ept_root = kpptr_to_paddr(null_ept_space); 1319 } else { 1320 findEPTForASID_ret_t find_ret; 1321 ept_pml4e_t *pml4; 1322 1323 pml4 = (ept_pml4e_t *)cap_ept_pml4_cap_get_capPML4BasePtr(vmxSpace); 1324 find_ret = findEPTForASID(cap_ept_pml4_cap_get_capPML4MappedASID(vmxSpace)); 1325 if (find_ret.status != EXCEPTION_NONE || find_ret.ept != pml4) { 1326 ept_root = kpptr_to_paddr(null_ept_space); 1327 } else { 1328 ept_root = pptr_to_paddr(pml4); 1329 } 1330 } 1331 if (ept_root != vcpu->last_ept_root) { 1332 vcpu->last_ept_root = ept_root; 1333 vmx_eptp_t eptp = vmx_eptp_new( 1334 ept_root, /* paddr of ept */ 1335 0, /* do not use accessed and dirty flags */ 1336 3, /* depth (4) minus 1 of desired table walking */ 1337 6 /* write back memory type */ 1338 ); 1339 vmwrite(VMX_CONTROL_EPT_POINTER, eptp.words[0]); 1340 assert(vcpu->vpid != VPID_INVALID); 1341 if (vmx_feature_vpid) { 1342 invvpid_context(vcpu->vpid); 1343 } 1344 } 1345} 1346 1347static void handleLazyFpu(void) 1348{ 1349 vcpu_t *vcpu = NODE_STATE(ksCurThread)->tcbArch.tcbVCPU; 1350 word_t cr0 = vcpu->cr0; 1351 word_t exception_bitmap = vcpu->exception_bitmap; 1352 word_t cr0_mask = vcpu->cr0_mask; 1353 word_t cr0_shadow = vcpu->cr0_shadow; 1354 /* if the vcpu actually owns the fpu then we do not need to change any bits 1355 * and so we will put into the vmcs precisely what the VCPU owner has requested */ 1356 if (!vcpuThreadUsingFPU(NODE_STATE(ksCurThread))) { 1357 /* when the vcpu doesn't own the fpu we need to force the task switched flag 1358 * so that we can take an exception and perform lazy fpu switching */ 1359 cr0 |= CR0_TASK_SWITCH; 1360 /* once we have forced the task switched flag we also need to handle 1361 * unimplemented device exceptions so we force these to trap. In the case 1362 * where the VCPU owner had already set these to trap then this well be 1363 * checked in handleVmexit and the exception will get forwarded */ 1364 exception_bitmap |= BIT(int_unimpl_dev); 1365 /* we need to claim ownership of the task switch bit so that any modifications 1366 * of it cause an exception and we must do this regardless of whether the 1367 * VCPU owner also wants to claim ownership. In the scenario where the VCPU 1368 * owner had also claimed ownership then in handleVmexit we won't actually 1369 * do any fpu switching and will forward the fault */ 1370 cr0_mask |= CR0_TASK_SWITCH; 1371 /* since we have forced a value in the cr0 mask we need to set an appropriate value 1372 * in the cr0 read shadow. If the VCPU owner is also masking this bit then 1373 * we should use the value they have put in the cr0 read shadow. If they aren't 1374 * then the read shadow will contain garbage and we should instead set the 1375 * read shadow to the actual desired cr0 value */ 1376 if (!(vcpu->cr0_mask & CR0_TASK_SWITCH)) { 1377 cr0_shadow &= ~CR0_TASK_SWITCH; 1378 cr0_shadow |= vcpu->cr0 & CR0_TASK_SWITCH; 1379 } 1380 } 1381 if (cr0 != vcpu->cached_cr0) { 1382 vmwrite(VMX_GUEST_CR0, cr0); 1383 vcpu->cached_cr0 = cr0; 1384 } 1385 if (exception_bitmap != vcpu->cached_exception_bitmap) { 1386 vmwrite(VMX_CONTROL_EXCEPTION_BITMAP, exception_bitmap); 1387 vcpu->cached_exception_bitmap = exception_bitmap; 1388 } 1389 if (cr0_mask != vcpu->cached_cr0_mask) { 1390 vmwrite(VMX_CONTROL_CR0_MASK, cr0_mask); 1391 vcpu->cached_cr0_mask = cr0_mask; 1392 } 1393 if (cr0_shadow != vcpu->cached_cr0_shadow) { 1394 vmwrite(VMX_CONTROL_CR0_READ_SHADOW, cr0_shadow); 1395 vcpu->cached_cr0_shadow = cr0_shadow; 1396 } 1397} 1398 1399void clearVPIDIOPortMappings(vpid_t vpid, uint16_t first, uint16_t last) 1400{ 1401 if (vpid == VPID_INVALID) { 1402 return; 1403 } 1404 vcpu_t *vcpu = x86KSVPIDTable[vpid]; 1405 if (vcpu == NULL) { 1406 return; 1407 } 1408 assert(vcpu->vpid == vpid); 1409 setIOPortMask(vcpu->io, first, last, true); 1410} 1411 1412static inline vpid_t nextVPID(vpid_t vpid) 1413{ 1414 if (vpid == VPID_LAST) { 1415 return VPID_FIRST; 1416 } else { 1417 return vpid + 1; 1418 } 1419} 1420 1421static void invalidateVPID(vpid_t vpid) 1422{ 1423 vcpu_t *vcpu = x86KSVPIDTable[vpid]; 1424 /* clear the IO bitmap as when we sever the VPID asignment we lose 1425 * the ability for the references in IO port capabilities to invalidate */ 1426 memset(vcpu->io, ~0, sizeof(vcpu->io)); 1427 /* invalidate the VPID context */ 1428 if (vmx_feature_vpid) { 1429 invvpid_context(vpid); 1430 } 1431} 1432 1433static vpid_t findFreeVPID(void) 1434{ 1435 vpid_t vpid; 1436 1437 vpid = x86KSNextVPID; 1438 do { 1439 if (x86KSVPIDTable[vpid] == NULL) { 1440 return vpid; 1441 } 1442 vpid = nextVPID(vpid); 1443 } while (vpid != x86KSNextVPID); 1444 1445 /* Forcively take the next VPID */ 1446 vpid = x86KSNextVPID; 1447 invalidateVPID(vpid); 1448 1449 x86KSVPIDTable[vpid]->vpid = VPID_INVALID; 1450 x86KSVPIDTable[vpid] = NULL; 1451 1452 x86KSNextVPID = nextVPID(x86KSNextVPID); 1453 return vpid; 1454} 1455 1456static void storeVPID(vcpu_t *vcpu, vpid_t vpid) 1457{ 1458 assert(x86KSVPIDTable[vpid] == NULL); 1459 assert(vcpu->vpid == VPID_INVALID); 1460 x86KSVPIDTable[vpid] = vcpu; 1461 vcpu->vpid = vpid; 1462} 1463 1464void restoreVMCS(void) 1465{ 1466 vcpu_t *expected_vmcs = NODE_STATE(ksCurThread)->tcbArch.tcbVCPU; 1467 1468 /* Check that the right VMCS is active and current. */ 1469 if (ARCH_NODE_STATE(x86KSCurrentVCPU) != expected_vmcs) { 1470 switchVCPU(expected_vmcs); 1471 } 1472 1473#ifndef CONFIG_KERNEL_SKIM_WINDOW 1474 if (getCurrentCR3().words[0] != expected_vmcs->last_host_cr3) { 1475 expected_vmcs->last_host_cr3 = getCurrentCR3().words[0]; 1476 vmwrite(VMX_HOST_CR3, getCurrentCR3().words[0]); 1477 } 1478#endif 1479 if (expected_vmcs->vpid == VPID_INVALID) { 1480 vpid_t vpid = findFreeVPID(); 1481 storeVPID(expected_vmcs, vpid); 1482 if (vmx_feature_vpid) { 1483 vmwrite(VMX_CONTROL_VPID, vpid); 1484 } 1485 } 1486 setEPTRoot(TCB_PTR_CTE_PTR(NODE_STATE(ksCurThread), tcbArchEPTRoot)->cap, expected_vmcs); 1487 handleLazyFpu(); 1488} 1489 1490void invept(ept_pml4e_t *ept_pml4) 1491{ 1492 if (vmx_ept_vpid_cap_msr_get_invept(vpid_capability)) { 1493 struct { 1494 uint64_t parts[2]; 1495 } address; 1496 word_t type; 1497 if (vmx_ept_vpid_cap_msr_get_invept_single_context(vpid_capability)) { 1498 type = 1; 1499 } else if (vmx_ept_vpid_cap_msr_get_invept_all_context(vpid_capability)) { 1500 type = 2; 1501 } else { 1502 /* hardware claims to support invept yet provides us with no actual 1503 * invept mechanism. This is probably impossible, but just silently 1504 * ignore if it happens */ 1505 userError("Hardware claimed to support invept, yet provided no mechanism"); 1506 return; 1507 } 1508 1509 address.parts[0] = pptr_to_paddr((void *)ept_pml4); 1510 address.parts[1] = 0; 1511 asm volatile( 1512 "invept %0, %1" 1513 : 1514 : "m"(address), "r"(type) 1515 : "memory" 1516 ); 1517 } 1518} 1519 1520#endif 1521