1/** 2 * \file 3 * \brief Contains VMKit kernel interface for version using VMX extensions. 4 */ 5 6/* 7 * Copyright (c) 2014, University of Washington. 8 * All rights reserved. 9 * 10 * This file is distributed under the terms in the attached LICENSE file. 11 * If you do not find this file, copies can be found by writing to: 12 * ETH Zurich D-INFK, CAB F.78, Universitaetstr. 6, CH-8092 Zurich. 13 * Attn: Systems Group. 14 */ 15 16#include <string.h> 17#include <kernel.h> 18#include <paging_kernel_arch.h> 19#include <vmx_vmkit.h> 20#include <vmx_checks.h> 21#include <x86.h> 22#include <dispatch.h> 23#include <exec.h> 24#include <irq.h> 25#include <barrelfish_kpi/vmkit.h> 26#include <barrelfish_kpi/syscalls.h> 27 28#include <dev/ia32_dev.h> 29 30// Execution, entry, and exit controls that we want to use 31// for each VM 32#ifdef CONFIG_ARRAKISMON 33#define GUEST_PIN_BASE_CTLS_ENABLE \ 34 (PIN_CTLS_EXT_INTR | PIN_CTLS_NMI | PIN_CTLS_VIRT_NMI) 35 36#define GUEST_PIN_BASE_CTLS_DISABLE \ 37 (0) 38 39#define GUEST_PP_CTLS_ENABLE \ 40 (PP_CLTS_MSRBMP | PP_CLTS_IOBMP | PP_CLTS_HLT) 41 42#define GUEST_PP_CTLS_DISABLE \ 43 (0) 44 45#define GUEST_SP_CTLS_ENABLE \ 46 (0) 47 48#define GUEST_SP_CTLS_DISABLE \ 49 (0) 50 51#define GUEST_EXIT_CTLS_ENABLE \ 52 (EXIT_CLTS_HOST_SIZE | EXIT_CLTS_SAVE_EFER | EXIT_CLTS_LOAD_EFER) 53 54#define GUEST_EXIT_CTLS_DISABLE \ 55 (0) 56 57#define GUEST_ENTRY_CTLS_ENABLE \ 58 (ENTRY_CLTS_LOAD_EFER | ENTRY_CLTS_LOAD_DBG | ENTRY_CLTS_IA32E_MODE) 59 60#define GUEST_ENTRY_CTLS_DISABLE \ 61 (0) 62#else 63#define GUEST_PIN_BASE_CTLS_ENABLE \ 64 (PIN_CTLS_EXT_INTR | PIN_CTLS_NMI | PIN_CTLS_VIRT_NMI) 65 66#define GUEST_PIN_BASE_CTLS_DISABLE \ 67 (0) 68 69#define GUEST_PP_CTLS_ENABLE \ 70 (PP_CLTS_MSRBMP | PP_CLTS_IOBMP | PP_CLTS_HLT | PP_CLTS_SEC_CTLS) 71 72#define GUEST_PP_CTLS_DISABLE \ 73 (0) 74 75#define GUEST_SP_CTLS_ENABLE \ 76 (SP_CLTS_ENABLE_EPT | SP_CLTS_UNRSTD_GUEST) 77 78#define GUEST_SP_CTLS_DISABLE \ 79 (0) 80 81#define GUEST_EXIT_CTLS_ENABLE \ 82 (EXIT_CLTS_HOST_SIZE | EXIT_CLTS_SAVE_EFER | EXIT_CLTS_LOAD_EFER | \ 83 EXIT_CLTS_SAVE_PAT | EXIT_CLTS_LOAD_PAT) 84 85#define GUEST_EXIT_CTLS_DISABLE \ 86 (0) 87 88#define GUEST_ENTRY_CTLS_ENABLE \ 89 (ENTRY_CLTS_LOAD_EFER) 90 91#define GUEST_ENTRY_CTLS_DISABLE \ 92 (0) 93#endif 94 95extern void *vmx_return_func; 96 97static struct guest_control *ctrl = NULL; 98 99static int launched = 0; 100 101#ifndef CONFIG_ARRAKISMON 102// List of MSRs that are loaded on VM-exit. 103static uint32_t msr_list[VMX_MSR_COUNT] = 104 {MSR_KERNEL_GS_BASE, MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SFMASK}; 105 106// VM-exit MSR-load area that contains host MSR values that are saved prior 107// to VM-entry and loaded on VM exit. 108static struct msr_entry host_msr_area[VMX_MSR_COUNT] 109__attribute__ ((aligned(16))); 110#endif 111 112// VMX controls that are written to the VMCS. In addition to the controls 113// that are requested, these values may have bits that are reserved set. 114vmx_controls pin_based_ctls = 0, pp_based_ctls = 0, sp_based_ctls = 0, 115 entry_ctls = 0, exit_ctls = 0; 116 117static uint8_t vmxon_region[BASE_PAGE_SIZE] 118__attribute__ ((aligned(BASE_PAGE_SIZE))); 119 120// Returns true if extended page tables (EPT) are enabled. 121static inline int ept_enabled(void) 122{ 123 return ((GUEST_SP_CTLS_ENABLE & SP_CLTS_ENABLE_EPT) != 0); 124} 125 126static inline errval_t instr_err(void) 127{ 128 errval_t err; 129 __asm volatile("jnc vmx_err_check_zf%=\n\t" 130 "mov %[VMfailInvalid], %[err]\n\t" 131 "jmp vmx_err_done%=\n\t" 132 "vmx_err_check_zf%=:\n\t" 133 "jnz vmx_err_succeed%=\n\t" 134 "mov %[VMfailValid], %[err]\n\t" 135 "jmp vmx_err_done%=\n\t" 136 "vmx_err_succeed%=:\n\t" 137 "mov %[VMsucceed], %[err]\n\t" 138 "vmx_err_done%=:\n\t" 139 : [err] "=r" (err) 140 : [VMfailInvalid] "i" (SYS_ERR_VMKIT_VMX_VMFAIL_INVALID), 141 [VMfailValid] "i" (SYS_ERR_VMKIT_VMX_VMFAIL_VALID), 142 [VMsucceed] "i" (SYS_ERR_OK) 143 : "memory"); 144 return err; 145} 146 147// Executes the vmptrld instruction, which makes the VMCS referenced by 148// 'vmcs_base' active and current. 149errval_t vmptrld(lpaddr_t vmcs_base) 150{ 151 __asm volatile("vmptrld %[vmcs_base]\n\t" 152 : 153 : [vmcs_base] "m" (vmcs_base) 154 : "memory"); 155 return instr_err(); 156} 157 158// Returns the physical address base of the current VMCS. 159lpaddr_t vmptrst(void) 160{ 161 lpaddr_t dest_addr; 162 __asm volatile("vmptrst %[dest_addr]\n\t" 163 : 164 : [dest_addr] "m" (dest_addr) 165 : "memory"); 166 return dest_addr; 167} 168 169// Executes the vmclear instruction, which makes the VMCS referenced 170// by 'vmcs_base' clear and inactive. This instruction also ensures 171// that the referenced VMCS data is saved. 172errval_t vmclear(lpaddr_t vmcs_base) 173{ 174 __asm volatile("vmclear %[vmcs_base]\n\t" 175 : 176 : [vmcs_base] "m" (vmcs_base) 177 : "memory"); 178 return instr_err(); 179} 180 181// Reads a component with a specified encoding from the current VMCS 182// to an address dest_addr using the vmread instruction. 183errval_t vmread(uintptr_t encoding, lvaddr_t *dest_addr) 184{ 185 __asm volatile("vmread %[encoding], %[dest_addr]\n\t" 186 : 187 : [encoding] "r" (encoding), [dest_addr] "m" (*dest_addr) 188 : "memory"); 189 return instr_err(); 190} 191 192// Writes a component with a specifed encoding and value to the current 193// VMCS using the vmwrite instruction. 194errval_t vmwrite(uintptr_t encoding, uintptr_t value) 195{ 196 __asm volatile("vmwrite %[value], %[encoding]\n\t" 197 : 198 : [encoding] "r" (encoding), [value] "r" (value) 199 : "memory"); 200 return instr_err(); 201} 202 203// Using a provided VMXON region, causes the logical processor to enter 204// into root-mode by executing the vmxon instruction. 205errval_t vmxon(lpaddr_t base_addr) 206{ 207 __asm volatile("vmxon %[base_addr]\n\t" 208 : 209 : [base_addr] "m" (base_addr) 210 : "memory"); 211 return instr_err(); 212} 213 214// Exits VMX operation by executing the vmxoff instruction. 215errval_t vmxoff(void) 216{ 217 __asm volatile("vmxoff"); 218 return instr_err(); 219} 220 221// Reads and returns the MSR that reports the allowed settings 222// for ALL of the bits of the controls indicated by 'type.' 223static uint64_t msr_ctls_true(enum vmx_ctls_t type) 224{ 225 uint64_t true_msr = 0; 226 switch(type) { 227 case VMX_CTLS_PIN_BASED: 228 true_msr = ia32_vmx_true_pinbased_ctls_rd(NULL); 229 break; 230 case VMX_CTLS_PRIMARY_PROCESSOR: 231 true_msr = ia32_vmx_true_ppbased_ctls_rd(NULL); 232 break; 233 case VMX_CTLS_SECONDARY_PROCESSOR: 234 assert(!"No such MSR for secondary processor controls!\n"); 235 break; 236 case VMX_CTLS_EXIT: 237 true_msr = ia32_vmx_true_exit_ctls_rd(NULL); 238 break; 239 case VMX_CTLS_ENTRY: 240 true_msr = ia32_vmx_true_entry_ctls_rd(NULL); 241 break; 242 } 243 return true_msr; 244} 245 246// Reads and returns the MSR that reports the allowed settings 247// for MOST of the bits of the controls indicated by 'type.' 248static uint64_t msr_ctls(enum vmx_ctls_t type) 249{ 250 uint64_t msr = 0; 251 switch(type) { 252 case VMX_CTLS_PIN_BASED: 253 msr = ia32_vmx_pinbased_ctls_rd(NULL); 254 break; 255 case VMX_CTLS_PRIMARY_PROCESSOR: 256 msr = ia32_vmx_ppbased_ctls_rd(NULL); 257 break; 258 case VMX_CTLS_SECONDARY_PROCESSOR: 259 msr = ia32_vmx_spbased_ctls_rd(NULL); 260 break; 261 case VMX_CTLS_EXIT: 262 msr = ia32_vmx_exit_ctls_rd(NULL); 263 break; 264 case VMX_CTLS_ENTRY: 265 msr = ia32_vmx_entry_ctls_rd(NULL); 266 break; 267 } 268 return msr; 269} 270 271// Writes the controls indicated by 'type' to the VMCS using 'mask_1s' 272// and 'mask_0s', which correspond to the controls that should be enabled 273// and disabled, respectively. 274static uint32_t set_vmx_controls(uint32_t mask_1s, 275 uint32_t mask_0s, enum vmx_ctls_t type) 276{ 277 uint32_t controls = 0; 278 279 ia32_vmx_basic_t vmx_basic = ia32_vmx_basic_rd(NULL); 280 bool true_ctls = !!(ia32_vmx_basic_ctls_clear_extract(vmx_basic)); 281 if (true_ctls && (type != VMX_CTLS_SECONDARY_PROCESSOR)) { 282 uint64_t true_msr = msr_ctls_true(type); 283 controls = ((DWORD_LS(true_msr) | mask_1s) & DWORD_MS(true_msr)); 284 } else { 285 uint64_t msr = msr_ctls(type); 286 controls = ((DWORD_LS(msr) | mask_1s) & DWORD_MS(msr)); 287 } 288 assert((mask_1s & (~controls)) == 0); 289 assert((mask_0s & controls) == 0); 290 return controls; 291} 292 293/** 294 * \brief Tries to enable hardware assisted virtualization. 295 * 296 * Checks whether hardware assisted virtualization is available on the platform 297 * and enables this feature. 298 * 299 * \Return Returns VMKIT_ERR_OK on successful initialization of the subsystem 300 * or VMKIT_ERR_UNAVAIL if virtualization is unavailable. 301 */ 302errval_t vmx_enable_virtualization (void) 303{ 304 uint32_t cpuid_ecx; 305 cpuid(CPUID_VMX, NULL, NULL, &cpuid_ecx, NULL); 306 if (!(cpuid_ecx & VMX_SUPPORT)) { 307 return SYS_ERR_VMKIT_UNAVAIL; 308 } 309 310 // The 'lock' and 'enable VMXON outside' bits of the IA32_FEATURE_CONTROL_MSR 311 // must be set 312 ia32_feature_cntl_t feat_cntl_msr; 313 feat_cntl_msr = ia32_feature_cntl_rd(NULL); 314 if (!ia32_feature_cntl_lock_extract(feat_cntl_msr) || 315 !ia32_feature_cntl_vmxoutsmx_extract(feat_cntl_msr)) { 316 return SYS_ERR_VMKIT_UNAVAIL; 317 } 318 319 pin_based_ctls = set_vmx_controls( 320 GUEST_PIN_BASE_CTLS_ENABLE, GUEST_PIN_BASE_CTLS_DISABLE, VMX_CTLS_PIN_BASED); 321 322 pp_based_ctls = set_vmx_controls( 323 GUEST_PP_CTLS_ENABLE, GUEST_PP_CTLS_DISABLE, VMX_CTLS_PRIMARY_PROCESSOR); 324 325 sp_based_ctls = set_vmx_controls( 326 GUEST_SP_CTLS_ENABLE, GUEST_SP_CTLS_DISABLE, VMX_CTLS_SECONDARY_PROCESSOR); 327 328 entry_ctls = set_vmx_controls( 329 GUEST_ENTRY_CTLS_ENABLE, GUEST_ENTRY_CTLS_DISABLE, VMX_CTLS_ENTRY); 330 331 exit_ctls = set_vmx_controls( 332 GUEST_EXIT_CTLS_ENABLE, GUEST_EXIT_CTLS_DISABLE, VMX_CTLS_EXIT); 333 334 // Initialize the VMXON region 335 memset(vmxon_region, 0x0, BASE_PAGE_SIZE); 336 ia32_vmx_basic_t vmx_basic; 337 vmx_basic = ia32_vmx_basic_rd(NULL); 338 uint32_t vmcs_rev_id = ia32_vmx_basic_vmcs_rev_id_extract(vmx_basic); 339 memcpy(vmxon_region, &vmcs_rev_id, sizeof(uint32_t)); 340 341 // The logical processor must use PAE paging 342 uint64_t cr0 = rdcr0(); 343 if ((cr0 & CR0_PE) == 0 || (rdcr0() & CR0_PG) == 0) { 344 return SYS_ERR_VMKIT_UNAVAIL; 345 } 346 347 // The CR0 register value has to support all of the CR0 fixed bits 348 if (cr0 != vmx_fixed_cr0()) { 349 return SYS_ERR_VMKIT_UNAVAIL; 350 } 351 352 // Enable virtualization, if not already enabled 353 if (!vmx_enabled()) { 354 enable_vmx(); 355 } 356 // The CR4 register value has to support all of the CR4 fixed bits 357 if (rdcr4() != vmx_fixed_cr4()) { 358 return SYS_ERR_VMKIT_UNAVAIL; 359 } 360 361 // Execute VMXON to place processor into VMX root operation 362 errval_t err = vmxon(mem_to_local_phys((lvaddr_t)vmxon_region)); 363 assert(err_is_ok(err)); 364 365 return SYS_ERR_OK; 366} 367 368static inline void vmx_set_exception_bitmap(void) 369{ 370 errval_t err = vmwrite(VMX_EXCP_BMP, ~(1UL << 7)); 371 assert(err_is_ok(err)); 372} 373 374#ifndef CONFIG_ARRAKISMON 375static uint64_t vmx_read_msr(uint32_t index) { 376 uint64_t val = 0; 377 switch (index) { 378 case MSR_KERNEL_GS_BASE: 379 val = ia32_kernel_gs_base_rd(NULL); 380 break; 381 case MSR_STAR: 382 val = ia32_star_rd(NULL); 383 break; 384 case MSR_LSTAR: 385 val = ia32_lstar_rd(NULL); 386 break; 387 case MSR_CSTAR: 388 val = ia32_cstar_rd(NULL); 389 break; 390 case MSR_SFMASK: 391 val = ia32_fmask_rd(NULL); 392 break; 393 default: 394 assert(!"MSR index not supported"); 395 } 396 return val; 397} 398 399static void vmx_host_msr_area_init(struct msr_entry *msr_area) 400{ 401 for (int i = 0; i < VMX_MSR_COUNT; i++) { 402 msr_area[i].index = msr_list[i]; 403 msr_area[i].val = vmx_read_msr(msr_list[i]); 404 } 405} 406#endif 407 408static inline lpaddr_t mem_to_local_phys_no_assertion(lvaddr_t addr) 409{ 410 return (lpaddr_t)(addr - (lpaddr_t)X86_64_MEMORY_OFFSET); 411} 412 413// Writes the host state, which is used after a VM-exit, to the 414// current VMCS 415static void vmx_set_host_state(void) 416{ 417 // On a page-fault the processor checks whether: 418 // (#PF error-code) & (#PF error-code mask) = (#PF error-code match) 419 420 // Setting the mask to 0, the match to 0xFFFFFFFF, and bit 14 in the 421 // exception bitmap results in no VM-exits on guest page-faults. 422 errval_t err = vmwrite(VMX_PF_ERR_MASK, 0); 423 err += vmwrite(VMX_PF_ERR_MATCH, 0xFFFFFFFF); 424 err += vmwrite(VMX_CR3_TARGET_CNT, 0); 425 426 uint64_t cr0 = rdcr0(), cr3 = rdcr3(), cr4 = rdcr4(); 427 428 uint64_t cr0_fixed0 = ia32_vmx_cr0_fixed0_rd(NULL); 429 uint64_t cr0_fixed1 = ia32_vmx_cr0_fixed1_rd(NULL); 430 uint64_t cr4_fixed0 = ia32_vmx_cr4_fixed0_rd(NULL); 431 uint64_t cr4_fixed1 = ia32_vmx_cr4_fixed1_rd(NULL); 432 433 assert((~cr0 & cr0_fixed0) == 0); 434 assert((cr0 & ~cr0_fixed1) == 0); 435 assert((~cr4 & cr4_fixed0) == 0); 436 assert((cr4 & ~cr4_fixed1) == 0); 437 438 assert(((cr0 | cr0_fixed0) & cr0_fixed1) == cr0); 439 assert(((cr4 | cr4_fixed0) & cr4_fixed1) == cr4); 440 assert(rdcr4() & CR4_PAE); 441 442 err += vmwrite(VMX_HOST_CR0, cr0); 443 err += vmwrite(VMX_HOST_CR3, cr3); 444 err += vmwrite(VMX_HOST_CR4, cr4); 445 446 err += vmwrite(VMX_HOST_ES_SEL, rd_es() & ~0x7); 447 err += vmwrite(VMX_HOST_CS_SEL, rd_cs() & ~0x7); 448 err += vmwrite(VMX_HOST_SS_SEL, rd_ss() & ~0x7); 449 err += vmwrite(VMX_HOST_DS_SEL, rd_ds() & ~0x7); 450 err += vmwrite(VMX_HOST_TR_SEL, rd_tr() & ~0x7); 451 452 err += vmwrite(VMX_HOST_TR_BASE, tr_addr(rd_tr(), gdtr_addr(rd_gdtr()))); 453 err += vmwrite(VMX_HOST_GDTR_BASE, gdtr_addr(rd_gdtr())); 454 err += vmwrite(VMX_HOST_IDTR_BASE, idtr_addr(rd_idtr())); 455 err += vmwrite(VMX_HOST_SYSENTER_CS, 0); 456 err += vmwrite(VMX_HOST_SYSENTER_ESP, 0); 457 err += vmwrite(VMX_HOST_SYSENTER_EIP, 0); 458 err += vmwrite(VMX_HOST_PAT_F, ia32_cr_pat_rd(NULL)); 459 460 ia32_efer_t efer_msr = ia32_efer_rd(NULL); 461 err += vmwrite(VMX_HOST_EFER_F, efer_msr); 462 assert(ia32_efer_lme_extract(efer_msr)); 463 assert(ia32_efer_lma_extract(efer_msr)); 464 465 err += vmwrite(VMX_HOST_GS_SEL, 0x0); 466 err += vmwrite(VMX_HOST_GS_BASE, 0x0); 467 468 err += vmwrite(VMX_HOST_FS_SEL, 0x0); 469 err += vmwrite(VMX_HOST_FS_BASE, 0x0); 470 471 err += vmwrite(VMX_HOST_RIP, (uint64_t)(&vmx_return_func)); 472#ifndef CONFIG_ARRAKISMON 473 vmx_host_msr_area_init(host_msr_area); 474 475 lpaddr_t msr_area_base = mem_to_local_phys_no_assertion( 476 (lvaddr_t) host_msr_area); 477 if (!((lvaddr_t) host_msr_area >= X86_64_MEMORY_OFFSET)) { 478 printk(LOG_NOTE, "assertion failed! 0x%lx >= 0x%lx\n", 479 (lvaddr_t) host_msr_area, 480 X86_64_MEMORY_OFFSET); 481 } 482 err += vmwrite(VMX_EXIT_MSR_LOAD_F, canonical_form(msr_area_base)); 483 err += vmwrite(VMX_EXIT_MSR_LOAD_CNT, VMX_MSR_COUNT); 484#endif 485 assert(err_is_ok(err)); 486} 487 488// Writes the VMX controls to the current VMCS. 489void vmx_set_exec_ctls(void) 490{ 491 // VM-execution controls 492 errval_t err = vmwrite(VMX_EXEC_PIN_BASED, pin_based_ctls); 493 err += vmwrite(VMX_EXEC_PRIM_PROC, pp_based_ctls); 494 err += vmwrite(VMX_EXEC_SEC_PROC, sp_based_ctls); 495 496 // VM-entry and VM-exit control fields 497 err += vmwrite(VMX_EXIT_CONTROLS, exit_ctls); 498 err += vmwrite(VMX_ENTRY_CONTROLS, entry_ctls); 499 500 vmx_set_exception_bitmap(); 501 502 err += vmwrite(VMX_ENTRY_INTR_INFO, 0); 503 err += vmwrite(VMX_ENTRY_EXCP_ERR, 0); 504 err += vmwrite(VMX_ENTRY_INSTR_LEN, 0); 505 assert(err_is_ok(err)); 506} 507 508errval_t initialize_vmcs(lpaddr_t vmcs_paddr) 509{ 510 struct vmcs *vmcs = (struct vmcs *)local_phys_to_mem(vmcs_paddr); 511 512 ia32_vmx_basic_t vmx_basic; 513 vmx_basic = ia32_vmx_basic_rd(NULL); 514 uint32_t vmcs_rev_id = ia32_vmx_basic_vmcs_rev_id_extract(vmx_basic); 515 516 memset(vmcs, 0x0, BASE_PAGE_SIZE); 517 vmcs->prelude.p.revision_id = vmcs_rev_id; 518 vmcs->prelude.p.shadow = 0; 519 errval_t err = vmclear(vmcs_paddr); 520 err += vmptrld(vmcs_paddr); 521 522 err += vmwrite(VMX_GUEST_VMCS_LPTR_F, ~0x0); 523 err += vmwrite(VMX_GUEST_VMCS_LPTR_H, ~0x0); 524 err += vmwrite(VMX_GUEST_SYSENTER_CS, 0x0); 525 err += vmwrite(VMX_GUEST_SYSENTER_ESP, 0x0); 526 err += vmwrite(VMX_GUEST_SYSENTER_EIP, 0x0); 527#ifdef CONFIG_ARRAKISMON 528 err += vmwrite(VMX_GUEST_DR7, 0x0); 529 err += vmwrite(VMX_GUEST_EFER_F, ia32_efer_rd(NULL) | EFER_LME | EFER_LMA); 530 531 err += vmwrite(VMX_GUEST_ACTIV_STATE, 0x0); 532 err += vmwrite(VMX_GUEST_INTR_STATE, 0x0); 533 534 err += vmwrite(VMX_GUEST_CS_LIM, 0xFFFFFFFF); 535 err += vmwrite(VMX_GUEST_DS_LIM, 0xFFFFFFFF); 536 err += vmwrite(VMX_GUEST_ES_LIM, 0xFFFFFFFF); 537 err += vmwrite(VMX_GUEST_SS_LIM, 0xFFFFFFFF); 538 err += vmwrite(VMX_GUEST_FS_LIM, 0xFFFFFFFF); 539 err += vmwrite(VMX_GUEST_GS_LIM, 0xFFFFFFFF); 540 err += vmwrite(VMX_GUEST_TR_LIM, 0xFFFF); 541 err += vmwrite(VMX_GUEST_LDTR_LIM, 0xFFFF); 542 err += vmwrite(VMX_GUEST_GDTR_LIM, 0xFFFF); 543 err += vmwrite(VMX_GUEST_IDTR_LIM, 0xFFFF); 544 545 err += vmwrite(VMX_GUEST_CS_ACCESS, 0xA09B); 546 err += vmwrite(VMX_GUEST_DS_ACCESS, 0xC093); 547 err += vmwrite(VMX_GUEST_ES_ACCESS, 0xC093); 548 err += vmwrite(VMX_GUEST_FS_ACCESS, 0xC093); 549 err += vmwrite(VMX_GUEST_GS_ACCESS, 0xC093); 550 err += vmwrite(VMX_GUEST_SS_ACCESS, 0xC093); 551 err += vmwrite(VMX_GUEST_TR_ACCESS, 0x8B); 552 err += vmwrite(VMX_GUEST_LDTR_ACCESS, 0x82); 553 554 err += vmwrite(VMX_GUEST_CS_SEL, 0x8); 555 err += vmwrite(VMX_GUEST_SS_SEL, 0x10); 556 err += vmwrite(VMX_GUEST_DS_SEL, 0x10); 557 err += vmwrite(VMX_GUEST_ES_SEL, 0x10); 558 err += vmwrite(VMX_GUEST_FS_SEL, 0x10); 559 err += vmwrite(VMX_GUEST_GS_SEL, 0x10); 560 err += vmwrite(VMX_GUEST_TR_SEL, 0x10); 561 err += vmwrite(VMX_GUEST_LDTR_SEL, 0x10); 562 563 err += vmwrite(VMX_GUEST_CS_BASE, 0x0); 564 err += vmwrite(VMX_GUEST_SS_BASE, 0x0); 565 err += vmwrite(VMX_GUEST_DS_BASE, 0x0); 566 err += vmwrite(VMX_GUEST_ES_BASE, 0x0); 567 err += vmwrite(VMX_GUEST_FS_BASE, 0x0); 568 err += vmwrite(VMX_GUEST_GS_BASE, 0x0); 569 err += vmwrite(VMX_GUEST_TR_BASE, 0x0); 570 err += vmwrite(VMX_GUEST_LDTR_BASE, 0x0); 571 err += vmwrite(VMX_GUEST_GDTR_BASE, 0x0); 572 err += vmwrite(VMX_GUEST_IDTR_BASE, 0x0); 573 574 uint64_t guest_cr0 = 0x60000010 | CR0_PE | CR0_PG; 575 err += vmwrite(VMX_GUEST_CR0, (uint32_t)(guest_cr0 | ia32_vmx_cr0_fixed0_rd(NULL)) & 576 ia32_vmx_cr0_fixed1_rd(NULL)); 577 578 uint64_t guest_cr4 = CR4_PAE; 579 err += vmwrite(VMX_GUEST_CR4, (guest_cr4 | ia32_vmx_cr4_fixed0_rd(NULL)) & 580 ia32_vmx_cr4_fixed1_rd(NULL)); 581 582 err += vmwrite(VMX_CR0_GH_MASK, 0UL); 583 err += vmwrite(VMX_CR4_GH_MASK, 0UL); 584#else 585 err += vmwrite(VMX_GUEST_DR7, 0x400); 586 err += vmwrite(VMX_GUEST_EFER_F, 0x0); 587 err += vmwrite(VMX_GUEST_PAT_F, 0x0007040600070406ul); 588 589 err += vmwrite(VMX_GUEST_ACTIV_STATE, 0x0); 590 err += vmwrite(VMX_GUEST_INTR_STATE, 0x0); 591 592 err += vmwrite(VMX_GUEST_CS_LIM, 0xFFFF); 593 err += vmwrite(VMX_GUEST_DS_LIM, 0xFFFF); 594 err += vmwrite(VMX_GUEST_ES_LIM, 0xFFFF); 595 err += vmwrite(VMX_GUEST_FS_LIM, 0xFFFF); 596 err += vmwrite(VMX_GUEST_GS_LIM, 0xFFFF); 597 err += vmwrite(VMX_GUEST_SS_LIM, 0xFFFF); 598 err += vmwrite(VMX_GUEST_TR_LIM, 0xFFFF); 599 err += vmwrite(VMX_GUEST_LDTR_LIM, 0xFFFF); 600 err += vmwrite(VMX_GUEST_GDTR_LIM, 0xFFFF); 601 err += vmwrite(VMX_GUEST_IDTR_LIM, 0xFFFF); 602 603 err += vmwrite(VMX_GUEST_CS_ACCESS, 0x9B); 604 err += vmwrite(VMX_GUEST_DS_ACCESS, 0x93); 605 err += vmwrite(VMX_GUEST_ES_ACCESS, 0x93); 606 err += vmwrite(VMX_GUEST_FS_ACCESS, 0x93); 607 err += vmwrite(VMX_GUEST_GS_ACCESS, 0x93); 608 err += vmwrite(VMX_GUEST_SS_ACCESS, 0x93); 609 err += vmwrite(VMX_GUEST_TR_ACCESS, 0x8B); 610 err += vmwrite(VMX_GUEST_LDTR_ACCESS, 0x82); 611 612 err += vmwrite(VMX_GUEST_CS_SEL, 0x0); 613 err += vmwrite(VMX_GUEST_DS_SEL, 0x0); 614 err += vmwrite(VMX_GUEST_ES_SEL, 0x0); 615 err += vmwrite(VMX_GUEST_FS_SEL, 0x0); 616 err += vmwrite(VMX_GUEST_GS_SEL, 0x0); 617 err += vmwrite(VMX_GUEST_SS_SEL, 0x0); 618 err += vmwrite(VMX_GUEST_TR_SEL, 0x0); 619 err += vmwrite(VMX_GUEST_LDTR_SEL, 0x0); 620 621 err += vmwrite(VMX_GUEST_CS_BASE, 0x0); 622 err += vmwrite(VMX_GUEST_DS_BASE, 0x0); 623 err += vmwrite(VMX_GUEST_ES_BASE, 0x0); 624 err += vmwrite(VMX_GUEST_FS_BASE, 0x0); 625 err += vmwrite(VMX_GUEST_GS_BASE, 0x0); 626 err += vmwrite(VMX_GUEST_SS_BASE, 0x0); 627 err += vmwrite(VMX_GUEST_TR_BASE, 0x0); 628 err += vmwrite(VMX_GUEST_LDTR_BASE, 0x0); 629 err += vmwrite(VMX_GUEST_GDTR_BASE, 0x0); 630 err += vmwrite(VMX_GUEST_IDTR_BASE, 0x0); 631 632 err += vmwrite(VMX_GUEST_RFLAGS, 0x200002); 633 err += vmwrite(VMX_GUEST_RIP, 0xFFF0); 634 err += vmwrite(VMX_GUEST_RSP, 0x0); 635 636 uint64_t guest_cr0 = (0x60000010 | ia32_vmx_cr0_fixed0_rd(NULL)) & 637 ia32_vmx_cr0_fixed1_rd(NULL); 638 err += vmwrite(VMX_GUEST_CR0, guest_cr0 & ~(CR0_PE | CR0_PG)); 639 640 uint64_t guest_cr4 = CR4_PAE; 641 err += vmwrite(VMX_GUEST_CR4, (guest_cr4 | ia32_vmx_cr4_fixed0_rd(NULL)) & 642 ia32_vmx_cr4_fixed1_rd(NULL)); 643 assert((guest_cr4 & CR4_PCIDE) == 0); 644 645 uint64_t cr0_shadow; 646 err += vmread(VMX_GUEST_CR0, &cr0_shadow); 647 648 err += vmwrite(VMX_CR0_RD_SHADOW, cr0_shadow); 649 err += vmwrite(VMX_CR0_GH_MASK, CR0_PE); 650 err += vmwrite(VMX_CR4_GH_MASK, 0x20); 651#endif 652 assert(err_is_ok(err)); 653 654 vmx_set_exec_ctls(); 655 656 return SYS_ERR_OK; 657} 658 659static uint32_t fail = 0; 660 661static inline void enter_guest(void) 662{ 663 // Set the host state prior to every VM-entry in case the values 664 // written to the VMCS change. 665 vmx_set_host_state(); 666 667 // This is necessary or else a #GPF will be incurred in the 668 // monitor domain. 669 uint16_t ldtr_sel = rd_ldtr(); 670 671 // Perform most checks that are performed by the processor 672 if (!launched) { 673 check_guest_state_area(); 674 check_host_state_area(); 675 check_vmx_controls(); 676 } 677 678 __asm volatile("mov %[ctrl], %%rdi\n\t" 679 680 // save host host 681 "mov %%rsp, %%r8\n\t" 682 "mov %[host_rsp_encoding], %%r9\n\t" 683 "vmwrite %%r8, %%r9\n\t" 684 685 "mov %%rbx, (148 + 1*8)(%%rdi)\n\t" 686 "mov %%rbp, (148 + 6*8)(%%rdi)\n\t" 687 "mov %%r12, (148 + 12*8)(%%rdi)\n\t" 688 "mov %%r13, (148 + 13*8)(%%rdi)\n\t" 689 "mov %%r14, (148 + 14*8)(%%rdi)\n\t" 690 "mov %%r15, (148 + 15*8)(%%rdi)\n\t" 691 "mov %%cr2, %%rsi\n\t" 692 "mov %%rsi, 38*8(%%rdi)\n\t" 693 694 // load guest state 695 "mov 37*8(%%rdi), %%rsi\n\t" 696 "mov %%rsi, %%cr2\n\t" 697 698 "mov 0*8(%%rdi), %%rax\n\t" 699 "mov 1*8(%%rdi), %%rbx\n\t" 700 "mov 2*8(%%rdi), %%rcx\n\t" 701 "mov 3*8(%%rdi), %%rdx\n\t" 702 "mov 4*8(%%rdi), %%rsi\n\t" 703 "mov 6*8(%%rdi), %%rbp\n\t" 704 "mov 8*8(%%rdi), %%r8\n\t" 705 "mov 9*8(%%rdi), %%r9\n\t" 706 "mov 10*8(%%rdi), %%r10\n\t" 707 "mov 11*8(%%rdi), %%r11\n\t" 708 "mov 12*8(%%rdi), %%r12\n\t" 709 "mov 13*8(%%rdi), %%r13\n\t" 710 "mov 14*8(%%rdi), %%r14\n\t" 711 "mov 15*8(%%rdi), %%r15\n\t" 712 "mov 5*8(%%rdi), %%rdi\n\t" 713 714 // enter the guest VM 715 "cmpl $0, %[launched]\n\t" 716 "jne 1f\n\t" 717 "sti\n\t" 718 "vmlaunch\n\t" 719 "jmp 2f\n\t" 720 "1: " 721 "sti\n\t" 722 "vmresume\n\t" 723 "2: " 724 "setbe %[fail]\n\t" 725 "vmx_return_func:\n\t" 726 "cli\n\t" 727 728 "push %%rdi\n\t" 729 "mov %[ctrl], %%rdi\n\t" 730 731 // save guest state 732 "mov %%rax, 0*8(%%rdi)\n\t" 733 "mov %%rbx, 1*8(%%rdi)\n\t" 734 "mov %%rcx, 2*8(%%rdi)\n\t" 735 "mov %%rdx, 3*8(%%rdi)\n\t" 736 "mov %%rsi, 4*8(%%rdi)\n\t" 737 "mov %%rbp, 6*8(%%rdi)\n\t" 738 "mov %%r8, 8*8(%%rdi)\n\t" 739 "mov %%r9, 9*8(%%rdi)\n\t" 740 "mov %%r10, 10*8(%%rdi)\n\t" 741 "mov %%r11, 11*8(%%rdi)\n\t" 742 "mov %%r12, 12*8(%%rdi)\n\t" 743 "mov %%r13, 13*8(%%rdi)\n\t" 744 "mov %%r14, 14*8(%%rdi)\n\t" 745 "mov %%r15, 15*8(%%rdi)\n\t" 746 747 "mov %%cr2, %%rsi\n\t" 748 "mov %%rsi, 37*8(%%rdi)\n\t" 749 750 "pop %%rsi\n\t" 751 "mov %%rsi, 5*8(%%rdi)\n\t" 752 753 // load host state 754 "mov (148 + 1*8)(%%rdi), %%rbx\n\t" 755 "mov (148 + 6*8)(%%rdi), %%rbp\n\t" 756 "mov (148 + 12*8)(%%rdi), %%r12\n\t" 757 "mov (148 + 13*8)(%%rdi), %%r13\n\t" 758 "mov (148 + 14*8)(%%rdi), %%r14\n\t" 759 "mov (148 + 15*8)(%%rdi), %%r15\n\t" 760 "mov 38*8(%%rdi), %%rsi\n\t" 761 "mov %%rsi, %%cr2\n\t" 762 : [fail] "=m" (fail) 763 : [ctrl] "m" (ctrl), [launched] "m" (launched), 764 [host_rsp_encoding] "i" (VMX_HOST_RSP) 765 : "memory" 766 ); 767 assert(!fail); 768 wr_ldtr(ldtr_sel); 769 770 launched = 1; 771} 772 773static inline void print_vmcs_info(struct guest_control *g) 774{ 775 uint64_t guest_rip, guest_rsp, guest_rflags; 776 uint64_t reason, exit_qual; 777 uint64_t exit_intr_info, intr_err; 778 uint64_t idt_vec_info, idt_vec_err; 779 uint64_t instr_len, instr_info; 780 uint64_t instr_error, gpaddr, gladdr; 781 uint64_t entry_intr_info, activ_state, intr_state; 782 uint64_t guest_cr0, guest_cr3, guest_cr4; 783 uint64_t guest_efer; 784 785 uint64_t guest_es_sel, guest_es_base, guest_es_lim, guest_es_access; 786 uint64_t guest_cs_sel, guest_cs_base, guest_cs_lim, guest_cs_access; 787 uint64_t guest_ss_sel, guest_ss_base, guest_ss_lim, guest_ss_access; 788 uint64_t guest_ds_sel, guest_ds_base, guest_ds_lim, guest_ds_access; 789 uint64_t guest_fs_sel, guest_fs_base, guest_fs_lim, guest_fs_access; 790 uint64_t guest_gs_sel, guest_gs_base, guest_gs_lim, guest_gs_access; 791 uint64_t guest_tr_sel, guest_tr_base, guest_tr_lim, guest_tr_access; 792 uint64_t guest_ldtr_sel, guest_ldtr_base, guest_ldtr_lim, guest_ldtr_access; 793 uint64_t guest_idtr_base, guest_idtr_lim; 794 uint64_t guest_gdtr_base, guest_gdtr_lim; 795 796 errval_t err = vmread(VMX_GUEST_ES_SEL, &guest_es_sel); 797 err += vmread(VMX_GUEST_ES_BASE, &guest_es_base); 798 err += vmread(VMX_GUEST_ES_LIM, &guest_es_lim); 799 err += vmread(VMX_GUEST_ES_ACCESS, &guest_es_access); 800 err += vmread(VMX_GUEST_CS_SEL, &guest_cs_sel); 801 err += vmread(VMX_GUEST_CS_BASE, &guest_cs_base); 802 err += vmread(VMX_GUEST_CS_LIM, &guest_cs_lim); 803 err += vmread(VMX_GUEST_CS_ACCESS, &guest_cs_access); 804 err += vmread(VMX_GUEST_SS_SEL, &guest_ss_sel); 805 err += vmread(VMX_GUEST_SS_BASE, &guest_ss_base); 806 err += vmread(VMX_GUEST_SS_LIM, &guest_ss_lim); 807 err += vmread(VMX_GUEST_SS_ACCESS, &guest_ss_access); 808 err += vmread(VMX_GUEST_DS_SEL, &guest_ds_sel); 809 err += vmread(VMX_GUEST_DS_BASE, &guest_ds_base); 810 err += vmread(VMX_GUEST_DS_LIM, &guest_ds_lim); 811 err += vmread(VMX_GUEST_DS_ACCESS, &guest_ds_access); 812 err += vmread(VMX_GUEST_FS_SEL, &guest_fs_sel); 813 err += vmread(VMX_GUEST_FS_BASE, &guest_fs_base); 814 err += vmread(VMX_GUEST_FS_LIM, &guest_fs_lim); 815 err += vmread(VMX_GUEST_FS_ACCESS, &guest_fs_access); 816 err += vmread(VMX_GUEST_GS_SEL, &guest_gs_sel); 817 err += vmread(VMX_GUEST_GS_BASE, &guest_gs_base); 818 err += vmread(VMX_GUEST_GS_LIM, &guest_gs_lim); 819 err += vmread(VMX_GUEST_GS_ACCESS, &guest_gs_access); 820 err += vmread(VMX_GUEST_TR_SEL, &guest_tr_sel); 821 err += vmread(VMX_GUEST_TR_BASE, &guest_tr_base); 822 err += vmread(VMX_GUEST_TR_LIM, &guest_tr_lim); 823 err += vmread(VMX_GUEST_TR_ACCESS, &guest_tr_access); 824 err += vmread(VMX_GUEST_LDTR_SEL, &guest_ldtr_sel); 825 err += vmread(VMX_GUEST_LDTR_BASE, &guest_ldtr_base); 826 err += vmread(VMX_GUEST_LDTR_LIM, &guest_ldtr_lim); 827 err += vmread(VMX_GUEST_LDTR_ACCESS, &guest_ldtr_access); 828 err += vmread(VMX_GUEST_IDTR_BASE, &guest_idtr_base); 829 err += vmread(VMX_GUEST_IDTR_LIM, &guest_idtr_lim); 830 err += vmread(VMX_GUEST_GDTR_BASE, &guest_gdtr_base); 831 err += vmread(VMX_GUEST_GDTR_LIM, &guest_gdtr_lim); 832 833 err += vmread(VMX_GUEST_RIP, &guest_rip); 834 err += vmread(VMX_GUEST_RSP, &guest_rsp); 835 err += vmread(VMX_GUEST_RFLAGS, &guest_rflags); 836 err += vmread(VMX_EXIT_REASON, &reason); 837 err += vmread(VMX_EXIT_QUAL, &exit_qual); 838 err += vmread(VMX_EXIT_INTR_INFO, &exit_intr_info); 839 err += vmread(VMX_EXIT_INTR_ERR, &intr_err); 840 err += vmread(VMX_IDT_VEC_INFO, &idt_vec_info); 841 err += vmread(VMX_IDT_VEC_ERR, &idt_vec_err); 842 err += vmread(VMX_INSTR_ERROR, &instr_error); 843 err += vmread(VMX_GPADDR_F, &gpaddr); 844 err += vmread(VMX_GL_ADDR, &gladdr); 845 err += vmread(VMX_ENTRY_INTR_INFO, &entry_intr_info); 846 err += vmread(VMX_GUEST_ACTIV_STATE, &activ_state); 847 err += vmread(VMX_GUEST_INTR_STATE, &intr_state); 848 err += vmread(VMX_EXIT_INSTR_LEN, &instr_len); 849 err += vmread(VMX_EXIT_INSTR_INFO, &instr_info); 850 err += vmread(VMX_GUEST_CR0, &guest_cr0); 851 err += vmread(VMX_GUEST_CR3, &guest_cr3); 852 err += vmread(VMX_GUEST_CR4, &guest_cr4); 853 err += vmread(VMX_GUEST_EFER_F, &guest_efer); 854 assert(err_is_ok(err)); 855 856 printf("VMCS info:\n"); 857 printf("\tvmexit reason = %d\n", (int)reason & 0xFFFF); 858 printf("\texit qualification = 0x%"PRIx64"\n", exit_qual); 859 printf("\tBit 31 of reason = %x\n", ((int)reason >> 31) & 1); 860 861 printf("\tVM-exit interruption information = 0x%"PRIx64"\n", exit_intr_info); 862 printf("\tVM-exit interruption error = 0x%"PRIx64"\n", intr_err); 863 864 printf("\tVM-entry interruption info=0x%"PRIx64"\n", entry_intr_info); 865 866 printf("\tIDT vector information = 0x%"PRIx64"\n", idt_vec_info); 867 printf("\tIDT vector error = 0x%"PRIx64"\n", idt_vec_err); 868 869 printf("\tInstruction error = 0x%"PRIx64", gladdr = 0x%"PRIx64", gpaddr = 0x%"PRIx64"\n", 870 instr_error, gpaddr, gladdr); 871 printf("\tActivity state=0x%"PRIx64", Interruptibility state=0x%"PRIx64"\n", 872 activ_state, intr_state); 873 printf("\tVM-exit instruction length = 0x%"PRIx64"\n", instr_len); 874 printf("\tVM-exit instruction info = 0x%"PRIx64"\n", instr_info); 875 876 printf("\tguest_rip = 0x%"PRIx64", guest_rflags = 0x%"PRIx64"\n", 877 guest_rip, guest_rflags); 878 printf("\tRAX=0x%"PRIx64" RBX=0x%"PRIx64" RCX=0x%"PRIx64" RDX=0x%"PRIx64"\n", 879 g->regs.rax, g->regs.rbx, g->regs.rcx, g->regs.rdx); 880 printf("\tRSP=0x%"PRIx64" RBP=0x%"PRIx64" RSI=0x%"PRIx64" RDI=0x%"PRIx64"\n", 881 guest_rsp, g->regs.rbp, g->regs.rsi, g->regs.rdi); 882 printf("\tR8 =0x%"PRIx64" R9 =0x%"PRIx64" R10=0x%"PRIx64" R11=0x%"PRIx64"\n", 883 g->regs.r8, g->regs.r9, g->regs.r10, g->regs.r11); 884 printf("\tR12=0x%"PRIx64" R13=0x%"PRIx64" R14=0x%"PRIx64" R15=0x%"PRIx64"\n", 885 g->regs.r12, g->regs.r13, g->regs.r14, g->regs.r15); 886 printf("\tCR0=0x%"PRIx64", CR3=0x%"PRIx64", CR4=0x%"PRIx64"\n", 887 guest_cr0, guest_cr3, guest_cr4); 888 889 printf("\tES: sel=0x%"PRIx64", base=0x%"PRIx64", lim=0x%"PRIx64", access=0x%"PRIx64"\n", 890 guest_es_sel, guest_es_base, guest_es_lim, guest_es_access); 891 printf("\tCS: sel=0x%"PRIx64", base=0x%"PRIx64", lim=0x%"PRIx64", access=0x%"PRIx64"\n", 892 guest_cs_sel, guest_cs_base, guest_cs_lim, guest_cs_access); 893 printf("\tSS: sel= 0x%"PRIx64", base=0x%"PRIx64", lim=0x%"PRIx64", access=0x%"PRIx64"\n", 894 guest_ss_sel, guest_ss_base, guest_ss_lim, guest_ss_access); 895 printf("\tDS: sel=0x%"PRIx64", base=0x%"PRIx64", lim=0x%"PRIx64", access=0x%"PRIx64"\n", 896 guest_ds_sel, guest_ds_base, guest_ds_lim, guest_ds_access); 897 printf("\tFS: sel=0x%"PRIx64", base=0x%"PRIx64", lim=0x%"PRIx64", access=0x%"PRIx64"\n", 898 guest_fs_sel, guest_fs_base, guest_fs_lim, guest_fs_access); 899 printf("\tGS: sel=0x%"PRIx64", base=0x%"PRIx64", lim=0x%"PRIx64", access=0x%"PRIx64"\n", 900 guest_gs_sel, guest_gs_base, guest_gs_lim, guest_gs_access); 901 printf("\tTR: sel=0x%"PRIx64", base=0x%"PRIx64", lim=0x%"PRIx64", access=0x%"PRIx64"\n", 902 guest_tr_sel, guest_tr_base, guest_tr_lim, guest_tr_access); 903 printf("\tLDTR: sel=0x%"PRIx64", base=0x%"PRIx64", lim=0x%"PRIx64", access=0x%"PRIx64"\n", 904 guest_ldtr_sel, guest_ldtr_base, guest_ldtr_lim, guest_ldtr_access); 905 printf("\tIDTR: base=0x%"PRIx64", lim=0x%"PRIx64"\n", 906 guest_idtr_base, guest_idtr_lim); 907 printf("\tGDTR: base=0x%"PRIx64", lim=0x%"PRIx64"\n", 908 guest_gdtr_base, guest_gdtr_lim); 909 910 printf("\tEFER = 0x%"PRIx64"\n", guest_efer); 911} 912 913static inline uint64_t interruption_type(uint64_t intr_info) { 914 return (intr_info >> 8) & 0x7; 915} 916 917static void __attribute__ ((noreturn)) 918call_monitor(struct dcb *dcb) 919{ 920 ctrl->num_vm_exits_with_monitor_invocation++; 921 /* the guest exited not due to an interrupt but some condition the 922 * monitor has to handle, therefore notify the monitor */ 923 924 assert(dcb->is_vm_guest); 925 926 // disable the domain 927 scheduler_remove(dcb); 928 929 // call the monitor 930 errval_t err = lmp_deliver_notification(&dcb->guest_desc.monitor_ep.cap); 931 if (err_is_fail(err)) { 932 printk(LOG_ERR, "Unexpected error delivering VMEXIT"); 933 } 934 935 // run the monitor 936 dispatch(dcb->guest_desc.monitor_ep.cap.u.endpoint.listener); 937} 938 939struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1, 940 uint64_t *args, uint64_t rflags, uint64_t rip); 941 942extern uint64_t user_stack_save; 943 944void __attribute__ ((noreturn)) 945vmx_vmkit_vmenter (struct dcb *dcb) 946{ 947 errval_t err; 948 lpaddr_t lpaddr = gen_phys_to_local_phys(dcb->guest_desc.ctrl.cap.u.frame.base); 949 ctrl = (void *)local_phys_to_mem(lpaddr); 950 951 assert(dcb != NULL); 952 assert(dcb->vspace != 0); 953 assert(dcb->is_vm_guest); 954 955 if (ept_enabled()) { 956 err = vmwrite(VMX_EPTP_F, ((dcb->vspace) & pa_width_mask() & ~BASE_PAGE_MASK) | 0x18); 957 assert(err_is_ok(err)); 958 } else { 959 err = vmwrite(VMX_GUEST_CR3, dcb->vspace); 960 assert(err_is_ok(err)); 961 } 962 963 vmx_vmenter_loop: 964 965 enter_guest(); 966 967 uint16_t exit_reason; 968 err = vmread(VMX_EXIT_REASON, (uint64_t *)&exit_reason); 969 970 switch(exit_reason) { 971 case VMX_EXIT_REASON_INVAL_VMCS: 972 { 973 // A condition that violates ones of the processor checks may be violated 974 // during the execution of the guest. With the Linux guest we used, the GS 975 // limit is set to 0x10ffef, which causes one of the checks to fail. 976 uint64_t gs_lim; 977 err += vmread(VMX_GUEST_GS_LIM, &gs_lim); 978 assert(gs_lim == 0x10ffef); 979 err += vmwrite(VMX_GUEST_GS_LIM, 0xfffef); 980 assert(err_is_ok(err)); 981 } 982 goto vmx_vmenter_loop; 983 984 case VMX_EXIT_REASON_EXCEPTION: 985 { 986 uint64_t intr_info, type; 987 err += vmread(VMX_EXIT_INTR_INFO, &intr_info); 988 assert(err_is_ok(err)); 989 990 type = interruption_type(intr_info); 991 992 if (type != TYPE_NMI) { 993 call_monitor(dcb); 994 break; 995 } 996 } 997 case VMX_EXIT_REASON_EXT_INTR: 998 case VMX_EXIT_REASON_SMI: 999 { 1000 ctrl->num_vm_exits_without_monitor_invocation++; 1001 1002#ifdef CONFIG_ARRAKISMON 1003 uint64_t guest_rip, guest_rsp, guest_rflags; 1004 err += vmread(VMX_GUEST_RIP, &guest_rip); 1005 err += vmread(VMX_GUEST_RSP, &guest_rsp); 1006 err += vmread(VMX_GUEST_RFLAGS, &guest_rflags); 1007 1008 uint64_t guest_fs_sel, guest_gs_sel; 1009 err += vmread(VMX_GUEST_FS_SEL, &guest_fs_sel); 1010 err += vmread(VMX_GUEST_GS_SEL, &guest_gs_sel); 1011 assert(err_is_ok(err)); 1012 1013 arch_registers_state_t *area = NULL; 1014 1015 // Store user state into corresponding save area 1016 if(dispatcher_is_disabled_ip(dcb->disp, guest_rip)) { 1017 area = dispatcher_get_disabled_save_area(dcb->disp); 1018 dcb->disabled = true; 1019 } else { 1020 area = dispatcher_get_enabled_save_area(dcb->disp); 1021 dcb->disabled = false; 1022 } 1023 memcpy(area, &ctrl->regs, sizeof(arch_registers_state_t)); 1024 area->rip = guest_rip; 1025 area->rax = ctrl->regs.rax; 1026 area->rsp = guest_rsp; 1027 area->eflags = guest_rflags; 1028 area->fs = guest_fs_sel; 1029 area->gs = guest_gs_sel; 1030#endif 1031 wait_for_interrupt(); 1032 } 1033 break; 1034#ifdef CONFIG_ARRAKISMON 1035 case VMX_EXIT_REASON_VMCALL: 1036 { 1037 // Translate this to a SYSCALL 1038 struct registers_x86_64 *regs = &ctrl->regs; 1039 uint64_t args[10] = { 1040 regs->r10, regs->r8, regs->r9, regs->r12, regs->r13, regs->r14, 1041 regs->r15, regs->rax, regs->rbp, regs->rbx 1042 }; 1043 1044 /* printf("VMMCALL\n"); */ 1045 1046 uint64_t guest_rip, guest_rsp, guest_rflags; 1047 err += vmread(VMX_GUEST_RIP, &guest_rip); 1048 err += vmread(VMX_GUEST_RSP, &guest_rsp); 1049 err += vmread(VMX_GUEST_RFLAGS, &guest_rflags); 1050 // Advance guest RIP to next instruction 1051 err += vmwrite(VMX_GUEST_RIP, guest_rip + 3); 1052 assert(err_is_ok(err)); 1053 1054 user_stack_save = guest_rsp; 1055 1056 struct sysret ret = sys_syscall(regs->rdi, regs->rsi, regs->rdx, 1057 args, guest_rflags, guest_rip + 3); 1058 regs->rax = ret.error; 1059 regs->rdx = ret.value; 1060 } 1061 goto vmx_vmenter_loop; 1062#endif 1063 default: 1064 call_monitor(dcb); 1065 break; 1066 } 1067} 1068