1// Copyright 2017 The Fuchsia Authors 2// 3// Use of this source code is governed by a MIT-style 4// license that can be found in the LICENSE file or at 5// https://opensource.org/licenses/MIT 6 7#include "vmx_cpu_state_priv.h" 8 9#include <assert.h> 10#include <bits.h> 11#include <string.h> 12 13#include <hypervisor/cpu.h> 14#include <kernel/auto_lock.h> 15#include <kernel/mp.h> 16 17#include <fbl/mutex.h> 18 19static fbl::Mutex guest_mutex; 20static size_t num_guests TA_GUARDED(guest_mutex) = 0; 21static fbl::Array<VmxPage> vmxon_pages TA_GUARDED(guest_mutex); 22 23static zx_status_t vmxon(paddr_t pa) { 24 uint8_t err; 25 26 __asm__ volatile( 27 "vmxon %[pa];" VMX_ERR_CHECK(err) 28 : [err] "=r"(err) 29 : [pa] "m"(pa) 30 : "cc", "memory"); 31 32 return err ? ZX_ERR_INTERNAL : ZX_OK; 33} 34 35static zx_status_t vmxoff() { 36 uint8_t err; 37 38 __asm__ volatile( 39 "vmxoff;" VMX_ERR_CHECK(err) 40 : [err] "=r"(err) 41 : 42 : "cc"); 43 44 return err ? ZX_ERR_INTERNAL : ZX_OK; 45} 46 47VmxInfo::VmxInfo() { 48 // From Volume 3, Appendix A.1. 49 uint64_t basic_info = read_msr(X86_MSR_IA32_VMX_BASIC); 50 revision_id = static_cast<uint32_t>(BITS(basic_info, 30, 0)); 51 region_size = static_cast<uint16_t>(BITS_SHIFT(basic_info, 44, 32)); 52 write_back = BITS_SHIFT(basic_info, 53, 50) == VMX_MEMORY_TYPE_WRITE_BACK; 53 io_exit_info = BIT_SHIFT(basic_info, 54); 54 vmx_controls = BIT_SHIFT(basic_info, 55); 55} 56 57EptInfo::EptInfo() { 58 // From Volume 3, Appendix A.10. 59 uint64_t ept_info = read_msr(X86_MSR_IA32_VMX_EPT_VPID_CAP); 60 page_walk_4 = BIT_SHIFT(ept_info, 6); 61 write_back = BIT_SHIFT(ept_info, 14); 62 invept = 63 // INVEPT instruction is supported. 64 BIT_SHIFT(ept_info, 20) && 65 // Single-context INVEPT type is supported. 66 BIT_SHIFT(ept_info, 25) && 67 // All-context INVEPT type is supported. 68 BIT_SHIFT(ept_info, 26); 69} 70 71zx_status_t VmxPage::Alloc(const VmxInfo& vmx_info, uint8_t fill) { 72 // From Volume 3, Appendix A.1: Bits 44:32 report the number of bytes that 73 // software should allocate for the VMXON region and any VMCS region. It is 74 // a value greater than 0 and at most 4096 (bit 44 is set if and only if 75 // bits 43:32 are clear). 76 if (vmx_info.region_size > PAGE_SIZE) 77 return ZX_ERR_NOT_SUPPORTED; 78 79 // Check use of write-back memory for VMX regions is supported. 80 if (!vmx_info.write_back) 81 return ZX_ERR_NOT_SUPPORTED; 82 83 // The maximum size for a VMXON or VMCS region is 4096, therefore 84 // unconditionally allocating a page is adequate. 85 return hypervisor::Page::Alloc(fill); 86} 87 88static zx_status_t vmxon_task(void* context, cpu_num_t cpu_num) { 89 auto pages = static_cast<fbl::Array<VmxPage>*>(context); 90 VmxPage& page = (*pages)[cpu_num]; 91 92 // Check that we have instruction information when we VM exit on IO. 93 VmxInfo vmx_info; 94 if (!vmx_info.io_exit_info) 95 return ZX_ERR_NOT_SUPPORTED; 96 97 // Check that full VMX controls are supported. 98 if (!vmx_info.vmx_controls) 99 return ZX_ERR_NOT_SUPPORTED; 100 101 // Check that a page-walk length of 4 is supported. 102 EptInfo ept_info; 103 if (!ept_info.page_walk_4) 104 return ZX_ERR_NOT_SUPPORTED; 105 106 // Check use write-back memory for EPT is supported. 107 if (!ept_info.write_back) 108 return ZX_ERR_NOT_SUPPORTED; 109 110 // Check that the INVEPT instruction is supported. 111 if (!ept_info.invept) 112 return ZX_ERR_NOT_SUPPORTED; 113 114 // Enable VMXON, if required. 115 uint64_t feature_control = read_msr(X86_MSR_IA32_FEATURE_CONTROL); 116 if (!(feature_control & X86_MSR_IA32_FEATURE_CONTROL_LOCK) || 117 !(feature_control & X86_MSR_IA32_FEATURE_CONTROL_VMXON)) { 118 if ((feature_control & X86_MSR_IA32_FEATURE_CONTROL_LOCK) && 119 !(feature_control & X86_MSR_IA32_FEATURE_CONTROL_VMXON)) { 120 return ZX_ERR_NOT_SUPPORTED; 121 } 122 feature_control |= X86_MSR_IA32_FEATURE_CONTROL_LOCK; 123 feature_control |= X86_MSR_IA32_FEATURE_CONTROL_VMXON; 124 write_msr(X86_MSR_IA32_FEATURE_CONTROL, feature_control); 125 } 126 127 // Check control registers are in a VMX-friendly state. 128 uint64_t cr0 = x86_get_cr0(); 129 if (cr_is_invalid(cr0, X86_MSR_IA32_VMX_CR0_FIXED0, X86_MSR_IA32_VMX_CR0_FIXED1)) 130 return ZX_ERR_BAD_STATE; 131 uint64_t cr4 = x86_get_cr4() | X86_CR4_VMXE; 132 if (cr_is_invalid(cr4, X86_MSR_IA32_VMX_CR4_FIXED0, X86_MSR_IA32_VMX_CR4_FIXED1)) 133 return ZX_ERR_BAD_STATE; 134 135 // Enable VMX using the VMXE bit. 136 x86_set_cr4(cr4); 137 138 // Setup VMXON page. 139 VmxRegion* region = page.VirtualAddress<VmxRegion>(); 140 region->revision_id = vmx_info.revision_id; 141 142 // Execute VMXON. 143 zx_status_t status = vmxon(page.PhysicalAddress()); 144 if (status != ZX_OK) { 145 dprintf(CRITICAL, "Failed to turn on VMX on CPU %u\n", cpu_num); 146 return status; 147 } 148 149 return ZX_OK; 150} 151 152static void vmxoff_task(void* arg) { 153 // Execute VMXOFF. 154 zx_status_t status = vmxoff(); 155 if (status != ZX_OK) { 156 dprintf(CRITICAL, "Failed to turn off VMX on CPU %u\n", arch_curr_cpu_num()); 157 return; 158 } 159 160 // Disable VMX. 161 x86_set_cr4(x86_get_cr4() & ~X86_CR4_VMXE); 162} 163 164zx_status_t alloc_vmx_state() { 165 fbl::AutoLock lock(&guest_mutex); 166 if (num_guests == 0) { 167 fbl::AllocChecker ac; 168 size_t num_cpus = arch_max_num_cpus(); 169 VmxPage* pages_ptr = new (&ac) VmxPage[num_cpus]; 170 if (!ac.check()) 171 return ZX_ERR_NO_MEMORY; 172 fbl::Array<VmxPage> pages(pages_ptr, num_cpus); 173 VmxInfo vmx_info; 174 for (auto& page : pages) { 175 zx_status_t status = page.Alloc(vmx_info, 0); 176 if (status != ZX_OK) 177 return status; 178 } 179 180 // Enable VMX for all online CPUs. 181 cpu_mask_t cpu_mask = percpu_exec(vmxon_task, &pages); 182 if (cpu_mask != mp_get_online_mask()) { 183 mp_sync_exec(MP_IPI_TARGET_MASK, cpu_mask, vmxoff_task, nullptr); 184 return ZX_ERR_NOT_SUPPORTED; 185 } 186 187 vmxon_pages = fbl::move(pages); 188 } 189 num_guests++; 190 return ZX_OK; 191} 192 193zx_status_t free_vmx_state() { 194 fbl::AutoLock lock(&guest_mutex); 195 num_guests--; 196 if (num_guests == 0) { 197 mp_sync_exec(MP_IPI_TARGET_ALL, 0, vmxoff_task, nullptr); 198 vmxon_pages.reset(); 199 } 200 return ZX_OK; 201} 202 203bool cr_is_invalid(uint64_t cr_value, uint32_t fixed0_msr, uint32_t fixed1_msr) { 204 uint64_t fixed0 = read_msr(fixed0_msr); 205 uint64_t fixed1 = read_msr(fixed1_msr); 206 return ~(cr_value | ~fixed0) != 0 || ~(~cr_value | fixed1) != 0; 207} 208