1/** 2 * \file 3 * \brief Contains VMKit kernel interface for version using SVM extensions. 4 */ 5 6/* 7 * Copyright (c) 2014, University of Washington. 8 * All rights reserved. 9 * 10 * This file is distributed under the terms in the attached LICENSE file. 11 * If you do not find this file, copies can be found by writing to: 12 * ETH Zurich D-INFK, CAB F.78, Universitaetstr. 6, CH-8092 Zurich. 13 * Attn: Systems Group. 14 */ 15 16#include <string.h> 17#include <kernel.h> 18#include <paging_kernel_arch.h> 19#include <svm_vmkit.h> 20#include <x86.h> 21#include <dispatch.h> 22#include <exec.h> 23#include <barrelfish_kpi/vmkit.h> 24#include <barrelfish_kpi/syscalls.h> 25 26#include <dev/amd_vmcb_dev.h> 27 28/** 29 * \brief The storage area where SVM puts the host state during guest exec. 30 */ 31static uint8_t host_save_area[BASE_PAGE_SIZE] 32__attribute__ ((aligned(BASE_PAGE_SIZE))); 33 34/** 35 * \brief VMCB for the host to save its state. 36 */ 37static uint8_t host_vmcb[BASE_PAGE_SIZE] 38__attribute__ ((aligned(BASE_PAGE_SIZE))); 39 40static void 41vmkit_init (void) 42{ 43 static bool executed = false; 44 45 if (executed) { 46 return; 47 } 48 49 executed = true; 50 memset(host_save_area, 0x0, BASE_PAGE_SIZE); 51 memset(host_vmcb, 0x0, BASE_PAGE_SIZE); 52} 53 54/** 55 * \brief Tries to enable hardware assisted virtualization. 56 * 57 * Checks whether hardware assisted virtualization is available on the platform 58 * and enables this feature. 59 * 60 * \Return Returns VMKIT_ERR_OK on successful initialization of the subsystem 61 * or VMKIT_ERR_UNAVAIL if virtualization is unavailable. 62 */ 63errval_t svm_enable_virtualization (void) 64{ 65 vmkit_init (); 66 67 // first check what CPUID tells us about SVM support 68 uint32_t cpuid_ecx; 69 cpuid(CPUID_AMD_EXTFEAT, NULL, NULL, &cpuid_ecx, NULL); 70 if (!(cpuid_ecx & AMD_EXTFEAT_ECX_SVM)) { 71 return SYS_ERR_VMKIT_UNAVAIL; 72 } 73 74 // check whether SVM support is deactivated 75 uint64_t msr_vmcr = rdmsr(MSR_AMD_VMCR); 76 if (msr_vmcr & AMD_VMCR_SVMDIS) { 77 return SYS_ERR_VMKIT_UNAVAIL; 78 } 79 80 // from here on we assume that SVM is avail and may be enabled 81 82 // check whether SVM is already enabled 83 uint64_t msr_efer = rdmsr(MSR_IA32_EFER); 84 if (msr_efer & IA32_EFER_SVME) { 85 // SVM is already enabled 86 return SYS_ERR_OK; 87 } 88 // enable SVM 89 addmsr(MSR_IA32_EFER, IA32_EFER_SVME); 90 // check whether SVM is now enabled 91 msr_efer = rdmsr(MSR_IA32_EFER); 92 if (msr_efer & IA32_EFER_SVME) { 93 // SVM enabled 94 // set the host save area 95 wrmsr(MSR_AMD_VM_HSAVE, mem_to_local_phys((lvaddr_t)host_save_area)); 96 return SYS_ERR_OK; 97 } else { 98 printk(LOG_WARN, "VMKit: Unable to enable SVM although the hardware " 99 "claims to support it.\n"); 100 return SYS_ERR_VMKIT_UNAVAIL; 101 } 102} 103 104static inline void 105vm_exec (struct dcb *dcb) 106{ 107 lpaddr_t lpaddr = gen_phys_to_local_phys(dcb->guest_desc.ctrl.cap.u.frame.base); 108 struct guest_control *ctrl = (void *)local_phys_to_mem(lpaddr); 109 register uintptr_t rbx __asm("rbx") = ctrl->regs.rbx; 110 register uintptr_t rcx __asm("rcx") = ctrl->regs.rcx; 111 register uintptr_t rdx __asm("rdx") = ctrl->regs.rdx; 112 register uintptr_t rsi __asm("rsi") = ctrl->regs.rsi; 113 register uintptr_t rdi __asm("rdi") = ctrl->regs.rdi; 114 register uintptr_t r8 __asm("r8") = ctrl->regs.r8; 115 register uintptr_t r9 __asm("r9") = ctrl->regs.r9; 116 register uintptr_t r10 __asm("r10") = ctrl->regs.r10; 117 register uintptr_t r11 __asm("r11") = ctrl->regs.r11; 118 register uintptr_t r12 __asm("r12") = ctrl->regs.r12; 119 register uintptr_t r13 __asm("r13") = ctrl->regs.r13; 120 register uintptr_t r14 __asm("r14") = ctrl->regs.r14; 121 register uintptr_t r15 __asm("r15") = ctrl->regs.r15; 122#ifdef NDEBUG 123 register uintptr_t rbp __asm("rbp") = ctrl->regs.rbp; 124 125 __asm volatile ("sti\n\t" // allow intr to happen inside the host 126 "vmrun\n\t" // execute the guest 127 "cli\n\t" // disable intr in the host again 128 "stgi\n\t" // enable the global intr flag 129 : "+r" (rbx), "+r" (rcx), "+r" (rdx), "+r" (rbp), "+r" (rsi), "+r" (rdi), 130 "+r" (r8), "+r" (r9), "+r" (r10), "+r" (r11), "+r" (r12), "+r" (r13), 131 "+r" (r14), "+r" (r15) 132 : "a" (dcb->guest_desc.vmcb.cap.u.frame.base) 133 : "memory"); 134#else 135 static uintptr_t rbp, srbp; 136 137 rbp = ctrl->regs.rbp; 138 139 __asm volatile ("mov %%rbp, %[srbp]\n\t" :: [srbp] "m" (srbp)); 140 141 __asm volatile ("mov %[nrbp], %%rbp\n\t" 142 "sti\n\t" // allow intr to happen inside the host 143 "vmrun\n\t" // execute the guest 144 "cli\n\t" // disable intr in the host again 145 "stgi\n\t" // enable the global intr flag 146 "mov %%rbp, %[nrbp]\n\t" 147 : "+r" (rbx), "+r" (rcx), "+r" (rdx), [nrbp] "+m" (rbp), 148 "+r" (rsi), "+r" (rdi), "+r" (r8), "+r" (r9), "+r" (r10), 149 "+r" (r11), "+r" (r12), "+r" (r13), "+r" (r14), "+r" (r15) 150 : "a" (dcb->guest_desc.vmcb.cap.u.frame.base) 151 : "memory"); 152 153 __asm volatile ("mov %[srbp], %%rbp\n\t" 154 : [srbp] "+m" (srbp)); 155#endif 156 157 ctrl->regs.rbx = rbx; 158 ctrl->regs.rcx = rcx; 159 ctrl->regs.rdx = rdx; 160 ctrl->regs.rbp = rbp; 161 ctrl->regs.rsi = rsi; 162 ctrl->regs.rdi = rdi; 163 ctrl->regs.r8 = r8; 164 ctrl->regs.r9 = r9; 165 ctrl->regs.r10 = r10; 166 ctrl->regs.r11 = r11; 167 ctrl->regs.r12 = r12; 168 ctrl->regs.r13 = r13; 169 ctrl->regs.r14 = r14; 170 ctrl->regs.r15 = r15; 171} 172 173static inline void 174vmload (lpaddr_t vmcb) { 175 __asm volatile ("vmload" : : "a" (vmcb) : "memory"); 176} 177 178static inline void 179vmsave (lpaddr_t vmcb) { 180 __asm volatile ("vmsave" : : "a" (vmcb) : "memory"); 181} 182 183static inline void 184vmkit_switch_to (struct dcb *dcb) 185{ 186 assert(dcb != NULL); 187 assert(dcb->is_vm_guest); 188 189 // save the host state 190 vmsave(mem_to_local_phys((lvaddr_t)host_vmcb)); 191 // load the guest state 192 vmload(gen_phys_to_local_phys(dcb->guest_desc.vmcb.cap.u.frame.base)); 193} 194 195static inline void 196vmkit_switch_from (struct dcb *dcb) 197{ 198 assert(dcb != NULL); 199 assert(dcb->is_vm_guest); 200 201 // save the guest state 202 vmsave(gen_phys_to_local_phys(dcb->guest_desc.vmcb.cap.u.frame.base)); 203 // load the host state 204 vmload(mem_to_local_phys((lvaddr_t)host_vmcb)); 205} 206 207struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1, 208 uint64_t *args, uint64_t rflags, uint64_t rip); 209 210extern uint64_t user_stack_save; 211 212void __attribute__ ((noreturn)) 213svm_vmkit_vmenter (struct dcb *dcb) 214{ 215 lpaddr_t lpaddr = gen_phys_to_local_phys(dcb->guest_desc.ctrl.cap.u.frame.base); 216 struct guest_control *ctrl = (void *)local_phys_to_mem(lpaddr); 217 218 assert(dcb != NULL); 219 assert(dcb->vspace != 0); 220 assert(dcb->is_vm_guest); 221 222 lpaddr = gen_phys_to_local_phys(dcb->guest_desc.vmcb.cap.u.frame.base); 223 amd_vmcb_t vmcb; 224 amd_vmcb_initialize(&vmcb, (void *)local_phys_to_mem(lpaddr)); 225 226 /* We need to set the page translation mode. If nested paging is disabled 227 * then we need to set the guest cr3 to the value of the domains vspace. If 228 * nested paging is enabled then we need to copy the domains vspace into the 229 * ncr3 field of the vmcb. */ 230 if (amd_vmcb_np_rd(&vmcb).enable) { 231 amd_vmcb_ncr3_wr(&vmcb, dcb->vspace); 232 } else { 233 amd_vmcb_cr3_wr(&vmcb, dcb->vspace); 234 } 235 236 svm_vmenter_loop: 237 238 /* printf("vmenter IN\n"); */ 239 240 // Enter the guest 241 vmkit_switch_to(dcb); 242 vm_exec(dcb); 243 vmkit_switch_from(dcb); 244 245 /* printf("vmenter OUT\n"); */ 246 247 // Here we exited the guest due to some intercept triggered a vm exit 248 // our state is automatically restored by SVM 249 250 uint64_t ec = amd_vmcb_exitcode_rd(&vmcb); 251 252 /* We treat exits due to pysical interrupts (INTR, NMI, SMI) specially since 253 * they need to be processed by the kernel interrupt service routines */ 254 switch(ec) { 255 case VMEXIT_INTR: 256 case VMEXIT_NMI: 257 case VMEXIT_SMI: 258 { 259 ctrl->num_vm_exits_without_monitor_invocation++; 260 261 // Store user state into corresponding save area 262 // LH: This block doesnt make sense. dcb of (traditional) vm guests dont have a disp. 263 // But maybe arrakis needs this? 264 #ifdef CONFIG_ARRAKISMON 265 arch_registers_state_t *area = NULL; 266 { 267 if(dispatcher_is_disabled_ip(dcb->disp, amd_vmcb_rip_rd(&vmcb))) { 268 area = dispatcher_get_disabled_save_area(dcb->disp); 269 dcb->disabled = true; 270 } else { 271 area = dispatcher_get_enabled_save_area(dcb->disp); 272 dcb->disabled = false; 273 } 274 memcpy(area, &ctrl->regs, sizeof(arch_registers_state_t)); 275 area->rax = amd_vmcb_rax_rd(&vmcb); 276 area->rip = amd_vmcb_rip_rd(&vmcb); 277 area->rsp = amd_vmcb_rsp_rd(&vmcb); 278 area->eflags = amd_vmcb_rflags_rd_raw(&vmcb); 279 area->fs = amd_vmcb_fs_selector_rd(&vmcb); 280 area->gs = amd_vmcb_gs_selector_rd(&vmcb); 281 } 282 #endif 283 284 // wait for interrupt will enable interrupts and therefore trigger their 285 // corresponding handlers (which may be the monitor) 286 wait_for_interrupt(); 287 } 288 break; 289 290 case VMEXIT_VMMCALL: 291 { 292 // Translate this to a SYSCALL 293 struct registers_x86_64 *regs = &ctrl->regs; 294 uint64_t args[10] = { 295 regs->r10, regs->r8, regs->r9, regs->r12, regs->r13, regs->r14, 296 regs->r15, amd_vmcb_rax_rd(&vmcb), regs->rbp, regs->rbx 297 }; 298 299 /* printf("VMMCALL\n"); */ 300 301 // Advance guest RIP to next instruction 302 amd_vmcb_rip_wr(&vmcb, amd_vmcb_rip_rd(&vmcb) + 3); 303 user_stack_save = amd_vmcb_rsp_rd(&vmcb); 304 305 struct sysret ret = 306 sys_syscall(regs->rdi, regs->rsi, regs->rdx, args, 307 amd_vmcb_rflags_rd_raw(&vmcb), 308 amd_vmcb_rip_rd(&vmcb)); 309 310 amd_vmcb_rax_wr(&vmcb, ret.error); 311 regs->rdx = ret.value; 312 } 313 goto svm_vmenter_loop; 314 315 default: 316 ctrl->num_vm_exits_with_monitor_invocation++; 317 /* the guest exited not due to an interrupt but some condition the 318 * monitor has to handle, therefore notify the monitor */ 319 320 /* printf("OTHER\n"); */ 321 322 assert(dcb->is_vm_guest); 323 324 // disable the domain 325 scheduler_remove(dcb); 326 327 // call the monitor 328 errval_t err = lmp_deliver_notification(&dcb->guest_desc.monitor_ep.cap); 329 if (err_is_fail(err)) { 330 printk(LOG_ERR, "Unexpected error delivering VMEXIT"); 331 } 332 333 // run the monitor 334 dispatch(dcb->guest_desc.monitor_ep.cap.u.endpoint.listener); 335 break; 336 } 337} 338