1/**
2 * \file
3 * \brief Contains VMKit kernel interface for version using SVM extensions.
4 */
5
6/*
7 * Copyright (c) 2014, University of Washington.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, CAB F.78, Universitaetstr. 6, CH-8092 Zurich.
13 * Attn: Systems Group.
14 */
15
16#include <string.h>
17#include <kernel.h>
18#include <paging_kernel_arch.h>
19#include <svm_vmkit.h>
20#include <x86.h>
21#include <dispatch.h>
22#include <exec.h>
23#include <barrelfish_kpi/vmkit.h>
24#include <barrelfish_kpi/syscalls.h>
25
26#include <dev/amd_vmcb_dev.h>
27
28/**
29 * \brief The storage area where SVM puts the host state during guest exec.
30 */
31static uint8_t host_save_area[BASE_PAGE_SIZE]
32__attribute__ ((aligned(BASE_PAGE_SIZE)));
33
34/**
35 * \brief VMCB for the host to save its state.
36 */
37static uint8_t host_vmcb[BASE_PAGE_SIZE]
38__attribute__ ((aligned(BASE_PAGE_SIZE)));
39
40static void
41vmkit_init (void)
42{
43    static bool executed = false;
44
45    if (executed) {
46        return;
47    }
48
49    executed = true;
50    memset(host_save_area, 0x0, BASE_PAGE_SIZE);
51    memset(host_vmcb, 0x0, BASE_PAGE_SIZE);
52}
53
54/**
55 * \brief Tries to enable hardware assisted virtualization.
56 *
57 * Checks whether hardware assisted virtualization is available on the platform
58 * and enables this feature.
59 *
60 * \Return Returns VMKIT_ERR_OK on successful initialization of the subsystem
61 *         or VMKIT_ERR_UNAVAIL if virtualization is unavailable.
62 */
63errval_t svm_enable_virtualization (void)
64{
65    vmkit_init ();
66
67    // first check what CPUID tells us about SVM support
68    uint32_t cpuid_ecx;
69    cpuid(CPUID_AMD_EXTFEAT, NULL, NULL, &cpuid_ecx, NULL);
70    if (!(cpuid_ecx & AMD_EXTFEAT_ECX_SVM)) {
71        return SYS_ERR_VMKIT_UNAVAIL;
72    }
73
74    // check whether SVM support is deactivated
75    uint64_t msr_vmcr = rdmsr(MSR_AMD_VMCR);
76    if (msr_vmcr & AMD_VMCR_SVMDIS) {
77        return SYS_ERR_VMKIT_UNAVAIL;
78    }
79
80    // from here on we assume that SVM is avail and may be enabled
81
82    // check whether SVM is already enabled
83    uint64_t msr_efer = rdmsr(MSR_IA32_EFER);
84    if (msr_efer & IA32_EFER_SVME) {
85        // SVM is already enabled
86        return SYS_ERR_OK;
87    }
88    // enable SVM
89    addmsr(MSR_IA32_EFER, IA32_EFER_SVME);
90    // check whether SVM is now enabled
91    msr_efer = rdmsr(MSR_IA32_EFER);
92    if (msr_efer & IA32_EFER_SVME) {
93        // SVM enabled
94        // set the host save area
95        wrmsr(MSR_AMD_VM_HSAVE, mem_to_local_phys((lvaddr_t)host_save_area));
96        return SYS_ERR_OK;
97    } else {
98        printk(LOG_WARN, "VMKit: Unable to enable SVM although the hardware "
99               "claims to support it.\n");
100        return SYS_ERR_VMKIT_UNAVAIL;
101    }
102}
103
104static inline void
105vm_exec (struct dcb *dcb)
106{
107    lpaddr_t lpaddr = gen_phys_to_local_phys(dcb->guest_desc.ctrl.cap.u.frame.base);
108    struct guest_control *ctrl = (void *)local_phys_to_mem(lpaddr);
109    register uintptr_t rbx __asm("rbx") = ctrl->regs.rbx;
110    register uintptr_t rcx __asm("rcx") = ctrl->regs.rcx;
111    register uintptr_t rdx __asm("rdx") = ctrl->regs.rdx;
112    register uintptr_t rsi __asm("rsi") = ctrl->regs.rsi;
113    register uintptr_t rdi __asm("rdi") = ctrl->regs.rdi;
114    register uintptr_t r8  __asm("r8")  = ctrl->regs.r8;
115    register uintptr_t r9  __asm("r9")  = ctrl->regs.r9;
116    register uintptr_t r10 __asm("r10") = ctrl->regs.r10;
117    register uintptr_t r11 __asm("r11") = ctrl->regs.r11;
118    register uintptr_t r12 __asm("r12") = ctrl->regs.r12;
119    register uintptr_t r13 __asm("r13") = ctrl->regs.r13;
120    register uintptr_t r14 __asm("r14") = ctrl->regs.r14;
121    register uintptr_t r15 __asm("r15") = ctrl->regs.r15;
122#ifdef NDEBUG
123    register uintptr_t rbp __asm("rbp") = ctrl->regs.rbp;
124
125    __asm volatile ("sti\n\t"       // allow intr to happen inside the host
126                    "vmrun\n\t"     // execute the guest
127                    "cli\n\t"       // disable intr in the host again
128                    "stgi\n\t"      // enable the global intr flag
129        : "+r" (rbx), "+r" (rcx), "+r" (rdx), "+r" (rbp), "+r" (rsi), "+r" (rdi),
130          "+r" (r8), "+r" (r9), "+r" (r10), "+r" (r11), "+r" (r12), "+r" (r13),
131          "+r" (r14), "+r" (r15)
132        : "a" (dcb->guest_desc.vmcb.cap.u.frame.base)
133        : "memory");
134#else
135    static uintptr_t rbp, srbp;
136
137    rbp = ctrl->regs.rbp;
138
139    __asm volatile ("mov %%rbp, %[srbp]\n\t" :: [srbp] "m" (srbp));
140
141    __asm volatile ("mov %[nrbp], %%rbp\n\t"
142                    "sti\n\t"       // allow intr to happen inside the host
143                    "vmrun\n\t"     // execute the guest
144                    "cli\n\t"       // disable intr in the host again
145                    "stgi\n\t"      // enable the global intr flag
146                    "mov %%rbp, %[nrbp]\n\t"
147        : "+r" (rbx), "+r" (rcx), "+r" (rdx), [nrbp] "+m" (rbp),
148                    "+r" (rsi), "+r" (rdi), "+r" (r8), "+r" (r9), "+r" (r10),
149                    "+r" (r11), "+r" (r12), "+r" (r13), "+r" (r14), "+r" (r15)
150        : "a" (dcb->guest_desc.vmcb.cap.u.frame.base)
151        : "memory");
152
153    __asm volatile ("mov %[srbp], %%rbp\n\t"
154                    : [srbp] "+m" (srbp));
155#endif
156
157    ctrl->regs.rbx = rbx;
158    ctrl->regs.rcx = rcx;
159    ctrl->regs.rdx = rdx;
160    ctrl->regs.rbp = rbp;
161    ctrl->regs.rsi = rsi;
162    ctrl->regs.rdi = rdi;
163    ctrl->regs.r8 = r8;
164    ctrl->regs.r9 = r9;
165    ctrl->regs.r10 = r10;
166    ctrl->regs.r11 = r11;
167    ctrl->regs.r12 = r12;
168    ctrl->regs.r13 = r13;
169    ctrl->regs.r14 = r14;
170    ctrl->regs.r15 = r15;
171}
172
173static inline void
174vmload (lpaddr_t vmcb) {
175    __asm volatile ("vmload" : : "a" (vmcb) : "memory");
176}
177
178static inline void
179vmsave (lpaddr_t vmcb) {
180    __asm volatile ("vmsave" : : "a" (vmcb) : "memory");
181}
182
183static inline void
184vmkit_switch_to (struct dcb *dcb)
185{
186    assert(dcb != NULL);
187    assert(dcb->is_vm_guest);
188
189    // save the host state
190    vmsave(mem_to_local_phys((lvaddr_t)host_vmcb));
191    // load the guest state
192    vmload(gen_phys_to_local_phys(dcb->guest_desc.vmcb.cap.u.frame.base));
193}
194
195static inline void
196vmkit_switch_from (struct dcb *dcb)
197{
198    assert(dcb != NULL);
199    assert(dcb->is_vm_guest);
200
201    // save the guest state
202    vmsave(gen_phys_to_local_phys(dcb->guest_desc.vmcb.cap.u.frame.base));
203    // load the host state
204    vmload(mem_to_local_phys((lvaddr_t)host_vmcb));
205}
206
207struct sysret sys_syscall(uint64_t syscall, uint64_t arg0, uint64_t arg1,
208                          uint64_t *args, uint64_t rflags, uint64_t rip);
209
210extern uint64_t user_stack_save;
211
212void __attribute__ ((noreturn))
213svm_vmkit_vmenter (struct dcb *dcb)
214{
215    lpaddr_t lpaddr = gen_phys_to_local_phys(dcb->guest_desc.ctrl.cap.u.frame.base);
216    struct guest_control *ctrl = (void *)local_phys_to_mem(lpaddr);
217
218    assert(dcb != NULL);
219    assert(dcb->vspace != 0);
220    assert(dcb->is_vm_guest);
221
222    lpaddr = gen_phys_to_local_phys(dcb->guest_desc.vmcb.cap.u.frame.base);
223    amd_vmcb_t vmcb;
224    amd_vmcb_initialize(&vmcb, (void *)local_phys_to_mem(lpaddr));
225
226    /* We need to set the page translation mode. If nested paging is disabled
227     * then we need to set the guest cr3 to the value of the domains vspace. If
228     * nested paging is enabled then we need to copy the domains vspace into the
229     * ncr3 field of the vmcb. */
230    if (amd_vmcb_np_rd(&vmcb).enable) {
231        amd_vmcb_ncr3_wr(&vmcb, dcb->vspace);
232    } else {
233        amd_vmcb_cr3_wr(&vmcb, dcb->vspace);
234    }
235
236 svm_vmenter_loop:
237
238    /* printf("vmenter IN\n"); */
239
240    // Enter the guest
241    vmkit_switch_to(dcb);
242    vm_exec(dcb);
243    vmkit_switch_from(dcb);
244
245    /* printf("vmenter OUT\n"); */
246
247    // Here we exited the guest due to some intercept triggered a vm exit
248    // our state is automatically restored by SVM
249
250    uint64_t ec = amd_vmcb_exitcode_rd(&vmcb);
251
252    /* We treat exits due to pysical interrupts (INTR, NMI, SMI) specially since
253     * they need to be processed by the kernel interrupt service routines */
254    switch(ec) {
255    case VMEXIT_INTR:
256    case VMEXIT_NMI:
257    case VMEXIT_SMI:
258    {
259        ctrl->num_vm_exits_without_monitor_invocation++;
260
261        // Store user state into corresponding save area
262        // LH: This block doesnt make sense. dcb of (traditional) vm guests dont have a disp.
263        // But maybe arrakis needs this?
264        #ifdef CONFIG_ARRAKISMON
265        arch_registers_state_t *area = NULL;
266        {
267            if(dispatcher_is_disabled_ip(dcb->disp, amd_vmcb_rip_rd(&vmcb))) {
268              area = dispatcher_get_disabled_save_area(dcb->disp);
269              dcb->disabled = true;
270            } else {
271              area = dispatcher_get_enabled_save_area(dcb->disp);
272              dcb->disabled = false;
273            }
274            memcpy(area, &ctrl->regs, sizeof(arch_registers_state_t));
275            area->rax = amd_vmcb_rax_rd(&vmcb);
276            area->rip = amd_vmcb_rip_rd(&vmcb);
277            area->rsp = amd_vmcb_rsp_rd(&vmcb);
278            area->eflags = amd_vmcb_rflags_rd_raw(&vmcb);
279            area->fs = amd_vmcb_fs_selector_rd(&vmcb);
280            area->gs = amd_vmcb_gs_selector_rd(&vmcb);
281        }
282        #endif
283
284        // wait for interrupt will enable interrupts and therefore trigger their
285        // corresponding handlers (which may be the monitor)
286        wait_for_interrupt();
287      }
288      break;
289
290    case VMEXIT_VMMCALL:
291      {
292	// Translate this to a SYSCALL
293	struct registers_x86_64 *regs = &ctrl->regs;
294	uint64_t args[10] = {
295	  regs->r10, regs->r8, regs->r9, regs->r12, regs->r13, regs->r14,
296	  regs->r15, amd_vmcb_rax_rd(&vmcb), regs->rbp, regs->rbx
297	};
298
299	/* printf("VMMCALL\n"); */
300
301	// Advance guest RIP to next instruction
302	amd_vmcb_rip_wr(&vmcb, amd_vmcb_rip_rd(&vmcb) + 3);
303	user_stack_save = amd_vmcb_rsp_rd(&vmcb);
304
305	struct sysret ret =
306	  sys_syscall(regs->rdi, regs->rsi, regs->rdx, args,
307		      amd_vmcb_rflags_rd_raw(&vmcb),
308		      amd_vmcb_rip_rd(&vmcb));
309
310	amd_vmcb_rax_wr(&vmcb, ret.error);
311	regs->rdx = ret.value;
312      }
313      goto svm_vmenter_loop;
314
315    default:
316        ctrl->num_vm_exits_with_monitor_invocation++;
317        /* the guest exited not due to an interrupt but some condition the
318         * monitor has to handle, therefore notify the monitor */
319
320	/* printf("OTHER\n"); */
321
322        assert(dcb->is_vm_guest);
323
324        // disable the domain
325        scheduler_remove(dcb);
326
327        // call the monitor
328        errval_t err = lmp_deliver_notification(&dcb->guest_desc.monitor_ep.cap);
329        if (err_is_fail(err)) {
330            printk(LOG_ERR, "Unexpected error delivering VMEXIT");
331        }
332
333        // run the monitor
334        dispatch(dcb->guest_desc.monitor_ep.cap.u.endpoint.listener);
335	break;
336    }
337}
338