1
2/**
3 * \file
4 */
5
6/*
7 * Copyright (c) 2009, 2010, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <stdlib.h>
16#include <string.h>
17#include "vmkitmon.h"
18#include <barrelfish/lmp_endpoints.h>
19#include "x86.h"
20#ifdef CONFIG_SVM
21#include "svm.h"
22#endif
23#include "realmode.h"
24#include "hdd.h"
25#include "console.h"
26#include "pc16550d.h"
27#include "apic.h"
28#include "lpc.h"
29#include "pci.h"
30#include "pci_host.h"
31#include "pci_devices.h"
32#include "pci_ethernet.h"
33
34#define VMCB_SIZE       0x1000      // 4KB
35
36#ifdef CONFIG_SVM
37#define IOPM_SIZE       0x3000      // 12KB
38#define MSRPM_SIZE      0x2000      // 8KB
39#else
40#define IOBMP_A_SIZE    0x1000      // 4KB
41#define IOBMP_B_SIZE    0x1000      // 4KB
42#define MSRPM_SIZE      0x1000      // 4KB
43#endif
44
45#define RM_MEM_SIZE     (0x100000 + BASE_PAGE_SIZE)    // 1MB + A20 gate space
46
47#define APIC_BASE       0xfee00000
48
49#define SERIAL_DRIVER   "serial0.raw"
50
51#ifndef CONFIG_SVM
52extern uint16_t saved_exit_reason;
53extern uint64_t saved_exit_qual, saved_rip;
54
55// List of MSRs that are saved on VM-exit and loaded on VM-entry.
56static uint32_t msr_list[VMX_MSR_COUNT] =
57    {X86_MSR_KERNEL_GS_BASE, X86_MSR_STAR, X86_MSR_LSTAR, X86_MSR_CSTAR, X86_MSR_SFMASK};
58
59// Saved priority of the most recent irq that is asserted.
60uint8_t interrupt_priority = 0;
61#endif
62
63#ifndef CONFIG_SVM
64static inline int vmx_guest_msr_index(uint32_t msr_index)
65{
66    for (int i = 0; i < VMX_MSR_COUNT; i++) {
67        if (msr_list[i] == msr_index) {
68            return i;
69	}
70    }
71    return -1;
72}
73
74static void initialize_guest_msr_area(struct guest *g)
75{
76    struct msr_entry *guest_msr_area = (struct msr_entry *)g->msr_area_va;
77
78    // The values of the MSRs in the guest MSR area are all set to 0.
79    for (int i = 0; i < VMX_MSR_COUNT; i++) {
80        guest_msr_area[i].index = msr_list[i];
81	guest_msr_area[i].val = 0x0;
82    }
83
84    errval_t err = invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXIT_MSR_STORE_F, g->msr_area_pa);
85    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXIT_MSR_STORE_CNT, VMX_MSR_COUNT);
86    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_ENTRY_MSR_LOAD_F, g->msr_area_pa);
87    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_ENTRY_MSR_LOAD_CNT, VMX_MSR_COUNT);
88    assert(err_is_ok(err));
89}
90#endif
91
92lvaddr_t guest_offset = 0;
93static struct guest __guest;
94static struct guest *__guestp = NULL;
95
96#ifdef CONFIG_SVM
97/// stores the last used guest ASID
98static uint32_t last_guest_asid = 0;
99#endif
100
101// FIXME: this is somewhat broken by design... we should emit proper exceptions
102//        to the guest opposed to just halt the VM
103#define guest_assert(g, e) \
104    ((e) ? (void)0 : (handle_vmexit_unhandeled(g), assert(e)))
105
106static errval_t
107guest_slot_alloc(struct guest *g, struct capref *ret)
108{
109    return g->slot_alloc.a.alloc(&g->slot_alloc.a, ret);
110}
111
112errval_t guest_vspace_map_wrapper(struct vspace *vspace, lvaddr_t vaddr,
113                                         struct capref frame,  size_t size)
114{
115    errval_t err;
116    struct vregion *vregion = NULL;
117    struct memobj_one_frame *memobj = NULL;
118
119    // Allocate space
120    vregion = malloc(sizeof(struct vregion));
121    if (!vregion) {
122        err = LIB_ERR_MALLOC_FAIL;
123        goto error;
124    }
125    memobj = malloc(sizeof(struct memobj_one_frame));
126    if (!memobj) {
127        err = LIB_ERR_MALLOC_FAIL;
128        goto error;
129    }
130
131    // Create the objects
132    err = memobj_create_one_frame(memobj, size, 0);
133    if (err_is_fail(err)) {
134        err = err_push(err, LIB_ERR_MEMOBJ_CREATE_ANON);
135        goto error;
136    }
137    err = memobj->m.f.fill(&memobj->m, 0, frame, size);
138    if (err_is_fail(err)) {
139        err = err_push(err, LIB_ERR_MEMOBJ_FILL);
140        goto error;
141    }
142    err = vregion_map_fixed(vregion, vspace, &memobj->m, 0, size, vaddr,
143                            VREGION_FLAGS_READ | VREGION_FLAGS_WRITE | VREGION_FLAGS_EXECUTE);
144    if (err_is_fail(err)) {
145        err = LIB_ERR_VSPACE_MAP;
146        goto error;
147    }
148    err = memobj->m.f.pagefault(&memobj->m, vregion, 0, 0);
149    if (err_is_fail(err)) {
150        err = err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER);
151        goto error;
152    }
153
154    return SYS_ERR_OK;
155
156 error: // XXX: proper cleanup
157    if (vregion) {
158        free(vregion);
159    }
160    if (memobj) {
161        free(memobj);
162    }
163    return err;
164}
165
166#define GUEST_VSPACE_SIZE (1ul<<32) // GB
167
168static errval_t vspace_map_wrapper(lvaddr_t vaddr, struct capref frame,
169                                   size_t size)
170{
171    errval_t err;
172    static struct memobj_anon *memobj = NULL;
173    static struct vregion *vregion = NULL;
174    static bool initialized = false;
175
176    if (!initialized) {
177        // Allocate space
178        memobj = malloc(sizeof(struct memobj_anon));
179        if (!memobj) {
180            return LIB_ERR_MALLOC_FAIL;
181        }
182        vregion = malloc(sizeof(struct vregion));
183        if (!vregion) {
184            return LIB_ERR_MALLOC_FAIL;
185        }
186
187        // Create a memobj and vregion
188        err = memobj_create_anon(memobj, GUEST_VSPACE_SIZE, 0);
189        if (err_is_fail(err)) {
190            return err_push(err, LIB_ERR_MEMOBJ_CREATE_ANON);
191        }
192        err = vregion_map(vregion, get_current_vspace(), &memobj->m, 0,
193                          GUEST_VSPACE_SIZE, VREGION_FLAGS_READ_WRITE);
194        if (err_is_fail(err)) {
195            return err_push(err, LIB_ERR_VREGION_MAP);
196        }
197
198        guest_offset = vregion_get_base_addr(vregion);
199        initialized = true;
200    }
201
202    // Create mapping
203    err = memobj->m.f.fill(&memobj->m, vaddr, frame, size);
204    if (err_is_fail(err)) {
205        return err_push(err, LIB_ERR_MEMOBJ_FILL);
206    }
207    err = memobj->m.f.pagefault(&memobj->m, vregion, vaddr, 0);
208    if (err_is_fail(err)) {
209        return err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER);
210    }
211
212    return SYS_ERR_OK;
213}
214// allocates some bytes of memory for the guest starting at a specific addr
215// also performs the mapping into the vspace of the monitor
216errval_t
217alloc_guest_mem(struct guest *g, lvaddr_t guest_paddr, size_t bytes)
218{
219    errval_t err;
220
221    // only allow multiple of page sizes to be allocated
222    assert(bytes > 0 && (bytes & BASE_PAGE_MASK) == 0);
223    // do not allow allocation outside of the guests physical memory
224    assert(guest_paddr + bytes <= g->mem_high_va);
225
226    // Allocate frame
227    struct capref cap;
228    err = guest_slot_alloc(g, &cap);
229
230
231
232
233    if (err_is_fail(err)) {
234        return err_push(err, LIB_ERR_SLOT_ALLOC);
235    }
236    err = frame_create(cap, bytes, NULL);
237    if (err_is_fail(err)) {
238        return err_push(err, LIB_ERR_FRAME_CREATE);
239    }
240
241    // Map into the guest vspace
242    err = guest_vspace_map_wrapper(&g->vspace, guest_paddr, cap, bytes);
243    if (err_is_fail(err)) {
244        return err;
245    }
246
247    // Create a copy of the capability to map in our vspace
248    struct capref host_cap;
249    err = slot_alloc(&host_cap);
250    if (err_is_fail(err)) {
251        return err;
252    }
253    err = cap_copy(host_cap, cap);
254    if (err_is_fail(err)) {
255        return err;
256    }
257
258    // Map into my vspace
259    err = vspace_map_wrapper(guest_to_host(guest_paddr), host_cap, bytes);
260    if (err_is_fail(err)) {
261        return err;
262    }
263
264	struct frame_identity frameid = { .base = 0, .bytes = 0 };
265	errval_t r = invoke_frame_identify(cap, &frameid);
266	assert(err_is_ok(r));
267	VMKIT_PCI_DEBUG("alloc_guest_mem: frameid.base: 0x%lx, frameid.bytes: %zd, "
268                "g->mem_low_va: 0x%lx, g->mem_high_va: 0x%lx\n",
269                frameid.base, frameid.bytes, g->mem_low_va, g->mem_high_va);
270
271    return SYS_ERR_OK;
272}
273
274static void
275initialize_iopm (struct guest *self) {
276    // intercept all IO port accesses (for now)
277#ifdef CONFIG_SVM
278    memset((void*)self->iopm_va, 0xFF, IOPM_SIZE);
279#else
280    memset((void*)self->iobmp_a_va, 0xFF, IOBMP_A_SIZE);
281    memset((void*)self->iobmp_b_va, 0xFF, IOBMP_B_SIZE);
282#endif
283}
284
285// access_mode: 0 all access, 1 read intercept, 2 write intercept, 3 all interc.
286static inline void
287set_msr_access (struct guest *g, uint32_t msr, int access_mode)
288{
289    assert(access_mode >= 0 && access_mode <= 3);
290
291    // a region a 2K bytes represents the access bits of 8K MSRs, therefore each
292    // MSR takes two bits (one for rdmsr and one for wrmsr)
293    uintptr_t byte_offset = (msr & 0xffff) / 4;
294    int bit_offset = ((msr & 0xffff) % 4) * 2;
295
296    if (msr < 0x2000) {
297        // do nothing
298    } else if (msr >= 0xc0000000 && msr < 0xc0002000) {
299        byte_offset += 0x800;
300    } else if (msr >= 0xc0010000 && msr < 0xc0012000) {
301        byte_offset += 0x1000;
302    } else {
303        assert(!"not reached");
304    }
305
306    assert(byte_offset < MSRPM_SIZE);
307
308    // read the byte holding the relevant bits
309    uint8_t val = *(uint8_t *)(g->msrpm_va + byte_offset);
310    // set the access params according to the arguments
311    val = (val & ~(0x3 << bit_offset)) | (access_mode << bit_offset);
312    // store the modified value back in the map
313    *(uint8_t *)(g->msrpm_va + byte_offset) = val;
314
315    //printf("MSR: msr %x, byte_offset %lx, bit_offset %x, val %x\n", msr, byte_offset, bit_offset, val);
316}
317
318static void
319initialize_msrpm (struct guest *g) {
320    // intercept all MSR accesses (for now)
321    memset((void*)g->msrpm_va, 0xff, MSRPM_SIZE);
322#if 0
323    // allow performance counters and evnets MSR accesses
324    set_msr_access (g, 0xc0010000, 0);
325    set_msr_access (g, 0xc0010001, 0);
326    set_msr_access (g, 0xc0010002, 0);
327    set_msr_access (g, 0xc0010003, 0);
328    set_msr_access (g, 0xc0010004, 0);
329    set_msr_access (g, 0xc0010005, 0);
330    set_msr_access (g, 0xc0010006, 0);
331    set_msr_access (g, 0xc0010007, 0);
332#endif
333}
334
335#define INIT_DATA_SEGREG(vmcb,x)                 \
336do {                                             \
337    amd_vmcb_seg_attrib_t __sa = {               \
338        .segtype = 3,                            \
339        .p = 1,                                  \
340        .s = 1                                   \
341    };                                           \
342    amd_vmcb_##x## _attrib_wr((vmcb), __sa);     \
343    amd_vmcb_##x## _selector_wr((vmcb), 0x0);    \
344    amd_vmcb_##x## _base_wr((vmcb), 0x0);        \
345    amd_vmcb_##x## _limit_wr((vmcb), 0xffff);    \
346} while (0)
347
348#define INIT_CODE_SEGREG(vmcb,x)                 \
349do {                                             \
350    amd_vmcb_seg_attrib_t __sa = {               \
351        .segtype = 11,                           \
352        .p = 1,                                  \
353        .s = 1                                   \
354    };                                           \
355    amd_vmcb_##x## _attrib_wr((vmcb), __sa);     \
356    amd_vmcb_##x## _selector_wr((vmcb), 0xf000); \
357    amd_vmcb_##x## _base_wr((vmcb), 0xffff0000); \
358    amd_vmcb_##x## _limit_wr((vmcb), 0xffff);    \
359} while (0)
360
361#define INIT_SYS_SEGREG(vmcb,x)                  \
362do {                                             \
363    amd_vmcb_seg_attrib_t __sa = {               \
364        .segtype = 2,                            \
365        .p = 1                                   \
366    };                                           \
367    amd_vmcb_##x## _attrib_wr((vmcb), __sa);     \
368    amd_vmcb_##x## _selector_wr((vmcb), 0x0);    \
369    amd_vmcb_##x## _base_wr((vmcb), 0x0);        \
370    amd_vmcb_##x## _limit_wr((vmcb), 0xffff);    \
371} while (0)
372
373/* This method initializes a new VMCB memory regsion and sets the initial
374 * machine state as defined by the AMD64 architecture specification */
375#ifdef CONFIG_SVM
376static void
377initialize_vmcb (struct guest *self) {
378    amd_vmcb_initialize(&self->vmcb, (mackerel_addr_t)self->vmcb_va);
379
380    // 1. Initialize intercepts
381
382    /* For now we intercept just everything */
383
384    amd_vmcb_cr_access_wr_raw(&self->vmcb, ~0u);
385    amd_vmcb_cr_access_rdcr2_wrf(&self->vmcb, 0);
386    amd_vmcb_cr_access_wrcr2_wrf(&self->vmcb, 0);
387    amd_vmcb_cr_access_rdcr4_wrf(&self->vmcb, 0);
388    amd_vmcb_cr_access_wrcr4_wrf(&self->vmcb, 0);
389
390    // FIXME: ignoring DR accesses may be insecure
391    //amd_vmcb_dr_access_wr_raw(&self->vmcb, ~0u);
392    amd_vmcb_exceptions_wr_raw(&self->vmcb, ~0u);
393    amd_vmcb_exceptions_vector7_wrf(&self->vmcb, 0);
394    amd_vmcb_exceptions_vector14_wrf(&self->vmcb, 0);
395
396    amd_vmcb_intercepts_wr_raw(&self->vmcb, 0x1fffffffffff);
397    amd_vmcb_intercepts_pushf_wrf(&self->vmcb, 0);
398    amd_vmcb_intercepts_popf_wrf(&self->vmcb, 0);
399    amd_vmcb_intercepts_invlpg_wrf(&self->vmcb, 0);
400    amd_vmcb_intercepts_rdtsc_wrf(&self->vmcb, 0);
401    amd_vmcb_intercepts_rdtscp_wrf(&self->vmcb, 0);
402    amd_vmcb_intercepts_iret_wrf(&self->vmcb, 0);
403    amd_vmcb_intercepts_wbinvd_wrf(&self->vmcb, 0);
404    amd_vmcb_intercepts_pause_wrf(&self->vmcb, 0);
405    amd_vmcb_intercepts_vintr_wrf(&self->vmcb, 0);
406
407    // 2. Setup some config fields
408
409    // physical addresses of IOPM and MSRPM_SIZE
410    amd_vmcb_iopm_base_pa_wr(&self->vmcb, self->iopm_pa);
411    amd_vmcb_msrpm_base_pa_wr(&self->vmcb, self->msrpm_pa);
412    // assign guest ASID
413    // FIXME: use real asid allocator. BF does not know about tagged TLBs atm
414    amd_vmcb_tlb_guest_asid_wrf(&self->vmcb, ++last_guest_asid);
415    // enable virtual intr masking
416    amd_vmcb_vintr_vintr_masking_wrf(&self->vmcb, 1);
417    // enable nested paging
418    amd_vmcb_np_enable_wrf(&self->vmcb, 1);
419
420    /* 3. Guest state initialization
421     * according to Intels Manual 3A: Table 9-1. */
422
423    // The second bit of rflags needs to be 1, also indicate that we support the
424    // CPUID instruction.
425    amd_vmcb_rflags_wr_raw(&self->vmcb, 0x00200002);
426    amd_vmcb_rip_wr(&self->vmcb, 0x0000fff0);
427    amd_vmcb_cr0_wr_raw(&self->vmcb, 0x60000010);
428
429    INIT_CODE_SEGREG(&self->vmcb, cs);
430    INIT_DATA_SEGREG(&self->vmcb, ss);
431    INIT_DATA_SEGREG(&self->vmcb, ds);
432    INIT_DATA_SEGREG(&self->vmcb, es);
433    INIT_DATA_SEGREG(&self->vmcb, fs);
434    INIT_DATA_SEGREG(&self->vmcb, gs);
435
436    INIT_SYS_SEGREG(&self->vmcb, gdtr);
437    INIT_SYS_SEGREG(&self->vmcb, idtr);
438    INIT_SYS_SEGREG(&self->vmcb, ldtr);
439    INIT_SYS_SEGREG(&self->vmcb, tr);
440
441    amd_vmcb_dr6_wr(&self->vmcb, 0xffff0ff0);
442    amd_vmcb_dr7_wr(&self->vmcb, 0x00000400);
443
444    // taken from the linux SVM source
445    amd_vmcb_gpat_wr(&self->vmcb, 0x0007040600070406ul);
446
447    // svm requires guest EFER.SVME to be set
448    amd_vmcb_efer_svme_wrf(&self->vmcb, 1);
449}
450#endif
451
452static void
453idc_handler(void *arg)
454{
455    struct guest *g = arg;
456    errval_t err;
457
458    // consume message
459    struct lmp_recv_buf buf = { .buflen = 0 };
460    err = lmp_endpoint_recv(g->monitor_ep, &buf, NULL);
461    assert(err_is_ok(err));
462
463    // run real handler
464    guest_handle_vmexit(g);
465
466    // re-register
467    struct event_closure cl = {
468        .handler = idc_handler,
469        .arg = arg,
470    };
471    err = lmp_endpoint_register(g->monitor_ep, get_default_waitset(), cl);
472    assert(err_is_ok(err));
473}
474
475/* This method duplicates some code from spawndomain since we need to spawn very
476 * special domains */
477static void
478spawn_guest_domain (struct guest *self) {
479    errval_t err;
480
481    // create the guest virtual address space
482    struct capref vnode_cap;
483    err = guest_slot_alloc(self, &vnode_cap);
484    assert(err_is_ok(err));
485    err = vnode_create(vnode_cap, ObjType_VNode_x86_64_pml4);
486    assert(err_is_ok(err));
487
488    struct pmap *pmap = malloc(sizeof(struct pmap_x86));
489    assert(pmap);
490    err = pmap_x86_64_init(pmap, &self->vspace, vnode_cap, NULL);
491    assert(err_is_ok(err));
492    err = vspace_init(&self->vspace, pmap);
493    assert(err_is_ok(err));
494
495    // create DCB
496    err = guest_slot_alloc(self, &self->dcb_cap);
497    assert(err_is_ok(err));
498    err = dispatcher_create(self->dcb_cap);
499    assert(err_is_ok(err));
500
501    // create end point
502    struct capref ep_cap;
503
504    // use minimum-sized endpoint, because we don't need to buffer >1 vmexit
505    err = endpoint_create(LMP_RECV_LENGTH, &ep_cap, &self->monitor_ep);
506    assert(err_is_ok(err));
507
508    // register to receive on this endpoint
509    struct event_closure cl = {
510        .handler = idc_handler,
511        .arg = self,
512    };
513    err = lmp_endpoint_register(self->monitor_ep, get_default_waitset(), cl);
514    assert(err_is_ok(err));
515
516    // setup the DCB
517    err = invoke_dispatcher_setup_guest(self->dcb_cap, ep_cap, vnode_cap,
518                                        self->vmcb_cap, self->ctrl_cap);
519    assert(err_is_ok(err));
520
521#ifndef CONFIG_SVM
522    initialize_guest_msr_area(self);
523
524    err = 0;
525    err += invoke_dispatcher_vmwrite(self->dcb_cap, VMX_IOBMP_A_F, self->iobmp_a_pa);
526    err += invoke_dispatcher_vmwrite(self->dcb_cap, VMX_IOBMP_B_F, self->iobmp_b_pa);
527    err += invoke_dispatcher_vmwrite(self->dcb_cap, VMX_MSRBMP_F, self->msrpm_pa);
528    assert(err_is_ok(err));
529#endif
530    // set up the guests physical address space
531    self->mem_low_va = 0;
532    // FIXME: Hardcoded guest memory size
533    self->mem_high_va = 0x80000000;   // 2 GiB
534    // allocate the memory used for real mode
535    // This is not 100% necessary since one could also catch the pagefaults.
536    // If we allocate the whole memory at once we use less caps and reduce
537    // the risk run out of CSpace.
538    err = alloc_guest_mem(self, 0x0, 0x80000000);
539    assert_err(err, "alloc_guest_mem");
540}
541
542static void
543install_grub_stage2 (struct guest *g, void *img, size_t img_size)
544{
545    assert(img != NULL);
546
547    /* the grub image goes to 0x8000 according to
548     * http://www.gnu.org/software/grub/manual/html_node/Memory-map.html */
549    memcpy((void *)(guest_to_host(g->mem_low_va + 0x8000)), img, img_size);
550    // according to grub stage2 source its entry point is at 0x8200
551#ifdef CONFIG_SVM
552    amd_vmcb_rip_wr(&g->vmcb, 0x8200);
553    // switch to the first segment
554    amd_vmcb_cs_selector_wr(&g->vmcb, 0x0);
555    amd_vmcb_cs_base_wr(&g->vmcb, 0x0);
556    amd_vmcb_cs_limit_wr(&g->vmcb, 0xffff);
557#else
558    errval_t err = invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, 0x8200);
559    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CS_SEL, 0x0);
560    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CS_BASE, 0x0);
561    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CS_LIM, 0xffff);
562    assert(err_is_ok(err));
563#endif
564
565}
566
567#if 0
568static void
569install_debug_app (struct guest *g)
570{
571    //static uint8_t app[] = { 0xcd, 0x20 };
572    static uint8_t app[] = { 0xcd, 0x20, 0x90, 0x90, 0x90, 0x90, 0x90 };
573    memcpy((void *)g->rm_mem_va, app, sizeof(app));
574    amd_vmcb_rip_wr(&g->vmcb, 0x0);
575    // disable nested pageing in real mode
576    amd_vmcb_np_enable_wrf(&g->vmcb, 0);
577    // enable paged real mode
578    //amd_vmcb_cr0_pg_wrf(&g->vmcb, 0x1);
579    //g->save_area->cr0 |= X86_CR0_PE_MASK;
580    amd_vmcb_rsp_wr(&g->vmcb, 0x1000);
581    amd_vmcb_cs_selector_wr(&g->vmcb, 0x0);
582    amd_vmcb_cs_base_wr(&g->vmcb, 0x0);
583    amd_vmcb_cs_limit_wr(&g->vmcb, 0xffff);
584    //g->save_area->cs.selector = 0x1000;
585    //g->save_area->cs.base = 0x10000;
586    //g->save_area->cs.base = 0x1ffff;
587}
588#endif
589
590static bool
591virq_pending (void *ud, uint8_t *irq, uint8_t *irq_prio)
592{
593    assert(ud != NULL);
594
595    struct guest *g = ud;
596#ifdef CONFIG_SVM
597    if (amd_vmcb_vintr_rd(&g->vmcb).virq == 1) {
598#else
599    uint64_t info;
600    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_ENTRY_INTR_INFO, &info);
601    assert(err_is_ok(err));
602    if (!!(info & (1UL << 31))) {
603#endif
604        if (irq != NULL) {
605#ifdef CONFIG_SVM
606            *irq = amd_vmcb_vintr_rd(&g->vmcb).vintr_vector;
607#else
608	    *irq = info & 0xff;
609#endif
610        }
611        if (irq_prio != NULL) {
612#ifdef CONFIG_SVM
613            *irq_prio = amd_vmcb_vintr_rd(&g->vmcb).vintr_prio;
614#else
615	    *irq_prio = interrupt_priority;
616#endif
617        }
618        return true;
619    } else {
620        return false;
621    }
622}
623
624#ifndef CONFIG_SVM
625static bool
626virq_accepting (void *ud)
627{
628    assert(ud != NULL);
629
630    struct guest *g = ud;
631
632    uint64_t guest_rflags;
633    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags);
634    assert(err_is_ok(err));
635    return (guest_rflags & (1UL << 9));
636}
637#endif
638
639static void
640virq_handler (void *ud, uint8_t irq, uint8_t irq_prio)
641{
642    assert(ud != NULL);
643
644    struct guest *g = ud;
645
646    // tell the hw extensions that there is a virtual IRQ pending
647#ifdef CONFIG_SVM
648    amd_vmcb_vintr_virq_wrf(&g->vmcb, 1);
649    amd_vmcb_vintr_vintr_prio_wrf(&g->vmcb, irq_prio);
650    amd_vmcb_vintr_vintr_vector_wrf(&g->vmcb, irq);
651    amd_vmcb_vintr_v_ign_tpr_wrf(&g->vmcb, 1);
652#else
653    uint64_t guest_rflags;
654    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags);
655    assert(guest_rflags & (1UL << 9));
656
657    uint64_t info = (0 << 8 /*HWINTR*/) | (1UL << 31 /*INTR VALID*/) | irq;
658    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_ENTRY_INTR_INFO, info);
659
660    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_ACTIV_STATE, 0x0);
661    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_INTR_STATE, 0x0);
662    assert(err_is_ok(err));
663
664    interrupt_priority = irq_prio;
665#endif
666    // if the guest is currently waiting then we have to restart it to make
667    // forward progress
668    if (!g->runnable) {
669        g->runnable = true;
670        guest_make_runnable(g, true);
671    }
672}
673
674static void
675guest_setup (struct guest *g)
676{
677    errval_t err;
678
679    // initialize the guests slot_allocator
680    err = two_level_slot_alloc_init(&g->slot_alloc);
681    assert_err(err, "two_level_slot_alloc_init");
682
683    struct frame_identity fi;
684
685    // allocate memory for the vmcb
686    err = guest_slot_alloc(g, &g->vmcb_cap);
687    assert_err(err, "guest_cspace_alloc");
688    err = frame_create(g->vmcb_cap, VMCB_SIZE, NULL);
689    assert_err(err, "frame_create");
690    err = invoke_frame_identify(g->vmcb_cap, &fi);
691    assert_err(err, "frame_identify");
692    g->vmcb_pa = fi.base;
693    err = vspace_map_one_frame_attr((void**)&g->vmcb_va, VMCB_SIZE, g->vmcb_cap,
694                                    VREGION_FLAGS_READ_WRITE_NOCACHE,
695                                    NULL, NULL);
696    if (err_is_fail(err)) {
697        DEBUG_ERR(err, "vspace_map_one_frame_attr failed");
698    }
699
700    // guest control
701    err = frame_alloc(&g->ctrl_cap, sizeof(struct guest_control), NULL);
702    assert_err(err, "frame_alloc");
703    size_t size = ROUND_UP(sizeof(struct guest_control), BASE_PAGE_SIZE);
704    err = vspace_map_one_frame_attr((void**)&g->ctrl, size, g->ctrl_cap,
705                                    VREGION_FLAGS_READ_WRITE_NOCACHE,
706                                    NULL, NULL);
707    if (err_is_fail(err)) {
708        DEBUG_ERR(err, "vspace_map_one_frame_attr failed");
709    }
710    g->ctrl->num_vm_exits_with_monitor_invocation = 0;
711    g->ctrl->num_vm_exits_without_monitor_invocation = 0;
712#ifdef CONFIG_SVM
713    // allocate memory for the iopm
714    err = frame_alloc(&g->iopm_cap, IOPM_SIZE, NULL);
715    assert_err(err, "frame_alloc");
716    err = invoke_frame_identify(g->iopm_cap, &fi);
717    assert_err(err, "frame_identify");
718    g->iopm_pa = fi.base;
719    err = vspace_map_one_frame_attr((void**)&g->iopm_va, IOPM_SIZE, g->iopm_cap,
720                                    VREGION_FLAGS_READ_WRITE_NOCACHE,
721                                    NULL, NULL);
722    if (err_is_fail(err)) {
723        DEBUG_ERR(err, "vspace_map_one_frame_attr failed");
724    }
725#else
726    // allocate memory for I/O bitmap A
727    err = frame_alloc(&g->iobmp_a_cap, IOBMP_A_SIZE, NULL);
728    assert_err(err, "frame_alloc");
729    err = invoke_frame_identify(g->iobmp_a_cap, &fi);
730    assert_err(err, "frame_identify");
731    g->iobmp_a_pa = fi.base;
732    err = vspace_map_one_frame_attr((void**)&g->iobmp_a_va, IOBMP_A_SIZE, g->iobmp_a_cap,
733                                    VREGION_FLAGS_READ_WRITE_NOCACHE,
734                                    NULL, NULL);
735    if (err_is_fail(err)) {
736        DEBUG_ERR(err, "vspace_map_one_frame_attr failed");
737    }
738
739    // allocate memory for I/O bitmap B
740    err = frame_alloc(&g->iobmp_b_cap, IOBMP_B_SIZE, NULL);
741    assert_err(err, "frame_alloc");
742    err = invoke_frame_identify(g->iobmp_b_cap, &fi);
743    assert_err(err, "frame_identify");
744    g->iobmp_b_pa = fi.base;
745    err = vspace_map_one_frame_attr((void**)&g->iobmp_b_va, IOBMP_B_SIZE, g->iobmp_b_cap,
746                                    VREGION_FLAGS_READ_WRITE_NOCACHE,
747                                    NULL, NULL);
748    if (err_is_fail(err)) {
749        DEBUG_ERR(err, "vspace_map_one_frame_attr failed");
750    }
751
752    // allocate memory for the guest MSR store/load area
753    err = frame_alloc(&g->msr_area_cap, VMX_MSR_AREA_SIZE, NULL);
754    assert_err(err, "frame_alloc");
755    err = invoke_frame_identify(g->msr_area_cap, &fi);
756    assert_err(err, "frame_identify");
757    g->msr_area_pa = fi.base;
758    err = vspace_map_one_frame_attr((void**)&g->msr_area_va, VMX_MSR_AREA_SIZE,
759                                    g->msr_area_cap,
760                                    VREGION_FLAGS_READ_WRITE_NOCACHE,
761                                    NULL, NULL);
762    if (err_is_fail(err)) {
763        DEBUG_ERR(err, "vspace_map_one_frame_attr failed");
764    }
765#endif
766    // allocate memory for the msrpm
767    err = frame_alloc(&g->msrpm_cap, MSRPM_SIZE, NULL);
768    assert_err(err, "frame_alloc");
769    err = invoke_frame_identify(g->msrpm_cap, &fi);
770    assert_err(err, "frame_identify");
771    g->msrpm_pa = fi.base;
772    err = vspace_map_one_frame_attr((void**)&g->msrpm_va, MSRPM_SIZE,
773                                    g->msrpm_cap,
774                                    VREGION_FLAGS_READ_WRITE_NOCACHE,
775                                    NULL, NULL);
776    if (err_is_fail(err)) {
777        DEBUG_ERR(err, "vspace_map_one_frame_attr failed");
778    }
779
780    // initialize the allocated structures
781    initialize_iopm(g);
782    initialize_msrpm(g);
783#ifdef CONFIG_SVM
784    initialize_vmcb(g);
785#endif
786    // spawn the guest domain
787    spawn_guest_domain(g);
788    assert (grub_image != NULL);
789    install_grub_stage2(g, grub_image, grub_image_size);
790    //install_debug_app(g);
791
792    // add virtual hardware
793    g->apic = apic_new(APIC_BASE);
794    g->lpc = lpc_new(virq_handler, virq_pending,
795#ifndef CONFIG_SVM
796		     virq_accepting,
797#endif
798		     g, g->apic);
799    if (hdd0_image != NULL) {
800        g->hdds[0] = hdd_new_from_memory(hdd0_image, hdd0_image_size);
801        g->hdd_count++;
802    }
803    g->console = console_new();
804    g->serial_ports[0] = pc16550d_new(0x3f8, 4, g->lpc);
805
806    // FIXME: Which virtual uart port is connected to which host port
807    //        should be adjustable from the command line or a configuration
808    //        file.
809    pc16550d_attach_to_host_uart(g->serial_ports[0], SERIAL_DRIVER);
810    g->serial_ports[1] = pc16550d_new(0x2f8, 3, g->lpc);
811    g->serial_ports[2] = pc16550d_new(0x3e8, 4, g->lpc);
812    g->serial_ports[3] = pc16550d_new(0x2e8, 3, g->lpc);
813    g->serial_port_count = 4;
814
815    g->pci = pci_new();
816    init_host_devices(g->pci);
817
818//    struct pci_device *ethernet = pci_ethernet_new(g->lpc, g);
819//    int r = pci_attach_device(g->pci, 0, 2, ethernet);
820//	assert(r == 0);
821//
822//	struct pci_device *vmkitmon_eth = pci_vmkitmon_eth_new(g->lpc, g);
823//	r = pci_attach_device(g->pci, 0, 3, vmkitmon_eth);
824//	assert(r==0);
825
826    // set up bios memory
827    // FIXME: find a modular way to do this
828    *(uint16_t *)guest_to_host(g->mem_low_va + 0x400) = 0x3f8;  // COM1
829    *(uint16_t *)guest_to_host(g->mem_low_va + 0x402) = 0x2f8;  // COM2
830
831    g->runnable = true;
832}
833
834/**
835 * \brief Create a new guest.
836 *
837 * This function creates a new guest. It will do everything necessary to make
838 * the guest accept images to run. It will create a new domain and assign some
839 * memory to that domain. Afterwards it will load a bios into the memory and
840 * set the guest initial IP to the POST entry of the bios.
841 *
842 * \return The pointer to the newly created structure describing the guest.
843 */
844struct guest *
845guest_create (void)
846{
847    // support the allocation of one guest for now
848    assert(__guestp == NULL);
849    __guestp = &__guest;
850    memset(__guestp, 0, sizeof(struct guest));
851    guest_setup(__guestp);
852    return __guestp;
853}
854
855static int
856run_realmode (struct guest *g)
857{
858    int r;
859
860    realmode_switch_to(g);
861    r = realmode_exec();
862    assert(r == REALMODE_ERR_OK);
863    realmode_switch_from(g);
864
865    guest_handle_vmexit(g);
866
867    return 0;
868};
869
870#ifndef CONFIG_SVM
871// Return true if the "Enable EPT" Secondary Processor-based control is
872// set in the VMCS, else false.
873static inline bool vmx_ept_enabled(struct guest *g)
874{
875    uint64_t sp_controls;
876    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_EXEC_SEC_PROC, &sp_controls);
877    assert(err_is_ok(err));
878    return ((sp_controls & SP_CLTS_ENABLE_EPT) != 0);
879}
880
881// Set or clear the "Descriptor-table exiting" Secondary Processor-based
882// control if val is 1 or 0, respectively.
883static inline void vmx_intercept_desc_table_wrf(struct guest *g, int val)
884{
885    assert(val == 0 || val == 1);
886
887    uint64_t sec_proc_ctrls;
888    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_EXEC_SEC_PROC, &sec_proc_ctrls);
889    if (val) {
890        uint64_t prim_proc_ctrls;
891	err += invoke_dispatcher_vmread(g->dcb_cap, VMX_EXEC_PRIM_PROC, &prim_proc_ctrls);
892	assert(prim_proc_ctrls & PP_CLTS_SEC_CTLS);
893	err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXEC_SEC_PROC,
894					 sec_proc_ctrls | SP_CLTS_DESC_TABLE);
895    } else {
896        err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXEC_SEC_PROC,
897					 sec_proc_ctrls & ~SP_CLTS_DESC_TABLE);
898    }
899    assert(err_is_ok(err));
900}
901
902
903// Before entering the guest, synchronize the CR0 shadow with the guest
904// CR0 value that is potentially changed in the real-mode emulator.
905static inline void vmx_set_cr0_shadow(struct guest *g)
906{
907    uint64_t cr0_shadow;
908    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &cr0_shadow);
909    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_CR0_RD_SHADOW, cr0_shadow);
910    assert(err_is_ok(err));
911}
912#endif
913
914/**
915 * \brief Marks a guest as runnable.
916 *
917 * A call to this method will update the guest's runnable state and, if made
918 * runnable, yield the remaining time slice to the guest domain.
919 *
920 * \return Zero on success, non-zero on error
921 */
922errval_t
923guest_make_runnable (struct guest *g, bool run)
924{
925    assert(g->runnable);
926
927    errval_t err;
928
929    /* If the guest is currently in real mode (CR0.PE flag clear) then we do not
930     * schedule the domain to run the virtualization but run the real-mode
931     * emulation */
932#ifdef CONFIG_SVM
933    if (UNLIKELY(run && amd_vmcb_cr0_rd(&g->vmcb).pe == 0)) {
934        if (!g->emulated_before_exit) {
935            // do the inverse of the code below
936            amd_vmcb_intercepts_rdgdtr_wrf(&g->vmcb, 1);
937            amd_vmcb_intercepts_wrgdtr_wrf(&g->vmcb, 1);
938            amd_vmcb_intercepts_rdldtr_wrf(&g->vmcb, 1);
939            amd_vmcb_intercepts_wrldtr_wrf(&g->vmcb, 1);
940            amd_vmcb_intercepts_rdidtr_wrf(&g->vmcb, 1);
941            amd_vmcb_intercepts_wridtr_wrf(&g->vmcb, 1);
942            amd_vmcb_intercepts_rdtr_wrf(&g->vmcb, 1);
943            amd_vmcb_intercepts_wrtr_wrf(&g->vmcb, 1);
944            amd_vmcb_cr_access_rdcr0_wrf(&g->vmcb, 1);
945            amd_vmcb_cr_access_wrcr0_wrf(&g->vmcb, 1);
946            amd_vmcb_cr_access_rdcr3_wrf(&g->vmcb, 1);
947            amd_vmcb_cr_access_wrcr3_wrf(&g->vmcb, 1);
948            amd_vmcb_intercepts_intn_wrf(&g->vmcb, 1);
949
950            // mark guest as emulated
951            g->emulated_before_exit = true;
952        }
953#else
954    uint64_t guest_cr0;
955    err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0);
956    assert(err_is_ok(err));
957    if (UNLIKELY(run && (guest_cr0 & CR0_PE) == 0)) {
958        if (!g->emulated_before_exit) {
959	    vmx_intercept_desc_table_wrf(g, 1);
960	    g->emulated_before_exit = true;
961	}
962#endif
963#if 0 /* why create a thread for this? it seems fine without! -AB */
964        struct thread *t = thread_create((thread_func_t)run_realmode, g);
965        assert(t != NULL);
966        err = thread_detach(t);
967        assert(err_is_ok(err));
968#else
969        run_realmode(g);
970#endif
971        return SYS_ERR_OK;
972    }
973
974    /* every time we move the machine from the emulated to virtualized we need
975     * to adjust some intercepts */
976    if (UNLIKELY(run && g->emulated_before_exit)) {
977#ifdef CONFIG_SVM
978        // we enforce NP to be enabled (no shadow paging support)
979        assert(amd_vmcb_np_rd(&g->vmcb).enable == 1);
980
981        // disable GDTR intercept
982        amd_vmcb_intercepts_rdgdtr_wrf(&g->vmcb, 0);
983        amd_vmcb_intercepts_wrgdtr_wrf(&g->vmcb, 0);
984        // disable GDTR intercept
985        amd_vmcb_intercepts_rdldtr_wrf(&g->vmcb, 0);
986        amd_vmcb_intercepts_wrldtr_wrf(&g->vmcb, 0);
987        // disable IDTR intercept
988        amd_vmcb_intercepts_rdidtr_wrf(&g->vmcb, 0);
989        amd_vmcb_intercepts_wridtr_wrf(&g->vmcb, 0);
990        // disable TR intercept
991        amd_vmcb_intercepts_rdtr_wrf(&g->vmcb, 0);
992        amd_vmcb_intercepts_wrtr_wrf(&g->vmcb, 0);
993        // disable non essential CR0 access intercepts_t
994        amd_vmcb_cr_access_rdcr0_wrf(&g->vmcb, 0);
995        amd_vmcb_cr_access_wrcr0_wrf(&g->vmcb, 0);
996        // disable CR3 access intercepts
997        assert(amd_vmcb_np_rd(&g->vmcb).enable != 0);
998        amd_vmcb_cr_access_rdcr3_wrf(&g->vmcb, 0);
999        amd_vmcb_cr_access_wrcr3_wrf(&g->vmcb, 0);
1000        // disable INTn intercept
1001        // we have to be outside of real mode for this to work
1002        assert(amd_vmcb_cr0_rd(&g->vmcb).pe != 0);
1003        amd_vmcb_intercepts_intn_wrf(&g->vmcb, 0);
1004#else
1005        bool ept_enabled = vmx_ept_enabled(g);
1006	assert(ept_enabled);
1007	vmx_intercept_desc_table_wrf(g, 0);
1008	assert(guest_cr0 & CR0_PE);
1009	vmx_set_cr0_shadow(g);
1010#endif
1011        // mark guest as not emulated
1012        g->emulated_before_exit = false;
1013    }
1014
1015    // update the guets domain's runnable state
1016    err = invoke_dispatcher(g->dcb_cap, NULL_CAP, NULL_CAP, NULL_CAP, NULL_CAP, run);
1017    assert_err(err, "dispatcher_make_runnable");
1018    // yield the dispatcher
1019    if (run) {
1020        thread_yield_dispatcher(NULL_CAP);
1021    }
1022
1023    return SYS_ERR_OK;
1024}
1025
1026/* VMEXIT hanlders */
1027
1028#define HANDLER_ERR_OK          (0)
1029#define HANDLER_ERR_FATAL       (-1)
1030
1031#ifdef CONFIG_SVM
1032static int
1033handle_vmexit_unhandeled (struct guest *g)
1034{
1035    printf("Unhandeled guest vmexit:\n");
1036    printf(" code:\t  %lx\n", amd_vmcb_exitcode_rd(&g->vmcb));
1037    printf(" info1:\t  %lx\n", amd_vmcb_exitinfo1_rd(&g->vmcb));
1038    printf(" info2:\t  %lx\n", amd_vmcb_exitinfo2_rd(&g->vmcb));
1039    printf(" intinfo: %lx\n", amd_vmcb_exitintinfo_rd(&g->vmcb));
1040
1041    printf("VMCB save area:\n");
1042    printf(" cr0:\t%lx\n", amd_vmcb_cr0_rd_raw(&g->vmcb));
1043    printf(" cr2:\t%lx\n", amd_vmcb_cr2_rd_raw(&g->vmcb));
1044    printf(" cr3:\t%lx\n", amd_vmcb_cr3_rd_raw(&g->vmcb));
1045    printf(" cr4:\t%lx\n", amd_vmcb_cr4_rd_raw(&g->vmcb));
1046    printf(" efer:\t%lx\n", amd_vmcb_efer_rd_raw(&g->vmcb));
1047    printf(" rip:\t%lx\n", amd_vmcb_rip_rd_raw(&g->vmcb));
1048    printf(" cs:\tselector %x, base %lx, limit %x, attrib %x\n",
1049           amd_vmcb_cs_selector_rd(&g->vmcb), amd_vmcb_cs_base_rd(&g->vmcb),
1050           amd_vmcb_cs_limit_rd(&g->vmcb), amd_vmcb_cs_attrib_rd_raw(&g->vmcb));
1051    printf(" ds:\tselector %x, base %lx, limit %x, attrib %x\n",
1052           amd_vmcb_ds_selector_rd(&g->vmcb), amd_vmcb_ds_base_rd(&g->vmcb),
1053           amd_vmcb_ds_limit_rd(&g->vmcb), amd_vmcb_ds_attrib_rd_raw(&g->vmcb));
1054    printf(" es:\tselector %x, base %lx, limit %x, attrib %x\n",
1055           amd_vmcb_es_selector_rd(&g->vmcb), amd_vmcb_es_base_rd(&g->vmcb),
1056           amd_vmcb_es_limit_rd(&g->vmcb), amd_vmcb_es_attrib_rd_raw(&g->vmcb));
1057    printf(" ss:\tselector %x, base %lx, limit %x, attrib %x\n",
1058           amd_vmcb_ss_selector_rd(&g->vmcb), amd_vmcb_ss_base_rd(&g->vmcb),
1059           amd_vmcb_ss_limit_rd(&g->vmcb), amd_vmcb_ss_attrib_rd_raw(&g->vmcb));
1060    printf(" rax:\t%lx\n", amd_vmcb_rax_rd_raw(&g->vmcb));
1061    printf(" rbx:\t%lx\n", g->ctrl->regs.rbx);
1062    printf(" rcx:\t%lx\n", g->ctrl->regs.rcx);
1063    printf(" rdx:\t%lx\n", g->ctrl->regs.rdx);
1064    printf(" rsi:\t%lx\n", g->ctrl->regs.rsi);
1065    printf(" rdi:\t%lx\n", g->ctrl->regs.rdi);
1066
1067    return HANDLER_ERR_FATAL;
1068}
1069#else
1070static int
1071handle_vmexit_unhandeled (struct guest *g)
1072{
1073    printf("Unhandeled guest vmexit:\n");
1074    printf(" exit reason:\t %"PRIu16"\n", saved_exit_reason);
1075    printf(" exit qualification:\t %"PRIx64"\n", saved_exit_qual);
1076    printf(" next rip (I/O instruction):\t %"PRIx64"\n", saved_rip);
1077
1078    uint64_t gpaddr;
1079    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GPADDR_F, &gpaddr);
1080    printf(" guest physical-address:\t %"PRIx64"\n", gpaddr);
1081
1082    uint64_t guest_cr0, guest_cr3, guest_cr4;
1083    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0);
1084    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR3, &guest_cr3);
1085    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR4, &guest_cr4);
1086
1087    uint64_t guest_efer, guest_rip;
1088    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_EFER_F, &guest_efer);
1089    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
1090
1091    uint64_t guest_cs_sel, guest_cs_base, guest_cs_lim,
1092        guest_cs_access;
1093    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_SEL, &guest_cs_sel);
1094    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_BASE, &guest_cs_base);
1095    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_LIM, &guest_cs_lim);
1096    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_ACCESS, &guest_cs_access);
1097
1098    uint64_t guest_ds_sel, guest_ds_base, guest_ds_lim,
1099        guest_ds_access;
1100    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_SEL, &guest_ds_sel);
1101    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base);
1102    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_LIM, &guest_ds_lim);
1103    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_ACCESS, &guest_ds_access);
1104
1105    uint64_t guest_es_sel, guest_es_base, guest_es_lim,
1106        guest_es_access;
1107    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_SEL, &guest_es_sel);
1108    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_BASE, &guest_es_base);
1109    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_LIM, &guest_es_lim);
1110    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_ACCESS, &guest_es_access);
1111
1112    uint64_t guest_ss_sel, guest_ss_base, guest_ss_lim,
1113        guest_ss_access;
1114    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_SEL, &guest_ss_sel);
1115    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_BASE, &guest_ss_base);
1116    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_LIM, &guest_ss_lim);
1117    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_ACCESS, &guest_ss_access);
1118    assert(err_is_ok(err));
1119
1120    printf("VMCS save area:\n");
1121    printf(" cr0:\t%lx\n", guest_cr0);
1122    printf(" cr3:\t%lx\n", guest_cr3);
1123    printf(" cr4:\t%lx\n", guest_cr4);
1124    printf(" efer:\t%lx\n", guest_efer);
1125    printf(" rip:\t%lx\n", guest_rip);
1126    printf(" cs:\tselector %lx, base %lx, limit %lx, access %lx\n",
1127           guest_cs_sel, guest_cs_base, guest_cs_lim, guest_cs_access);
1128    printf(" ds:\tselector %lx, base %lx, limit %lx, access %lx\n",
1129           guest_ds_sel, guest_ds_base, guest_ds_lim, guest_ds_access);
1130    printf(" es:\tselector %lx, base %lx, limit %lx, access %lx\n",
1131           guest_es_sel, guest_es_base, guest_es_lim, guest_es_access);
1132    printf(" ss:\tselector %lx, base %lx, limit %lx, access %lx\n",
1133           guest_ss_sel, guest_ss_base, guest_ss_lim, guest_ss_access);
1134    printf(" rax:\t%lx\n", g->ctrl->regs.rax);
1135    printf(" rbx:\t%lx\n", g->ctrl->regs.rbx);
1136    printf(" rcx:\t%lx\n", g->ctrl->regs.rcx);
1137    printf(" rdx:\t%lx\n", g->ctrl->regs.rdx);
1138    printf(" rsi:\t%lx\n", g->ctrl->regs.rsi);
1139    printf(" rdi:\t%lx\n", g->ctrl->regs.rdi);
1140
1141    return HANDLER_ERR_FATAL;
1142}
1143#endif
1144
1145static inline uint64_t
1146lookup_paddr_long_mode (struct guest *g, uint64_t vaddr)
1147{
1148    union x86_lm_va va = { .raw = vaddr };
1149    uint64_t *page_table;
1150
1151    // get a pointer to the pml4 table
1152#ifdef CONFIG_SVM
1153    page_table = (uint64_t *)guest_to_host(amd_vmcb_cr3_rd(&g->vmcb));
1154#else
1155    uint64_t guest_cr3;
1156    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR3, &guest_cr3);
1157    assert(err_is_ok(err));
1158    page_table = (uint64_t *)guest_to_host(guest_cr3);
1159#endif
1160    // get pml4 entry
1161    union x86_lm_pml4_entry pml4e = { .raw = page_table[va.u.pml4_idx] };
1162    assert (pml4e.u.p == 1);
1163
1164    // get a pointer to the pdp table
1165    page_table = (uint64_t *)guest_to_host(pml4e.u.pdp_base_pa << 12);
1166    // get pdp entry
1167    union x86_lm_pdp_entry pdpe = { .raw = page_table[va.u.pdp_idx] };
1168    assert(pdpe.u.p == 1);
1169    // check for 1GB page (PS bit set)
1170    if (pdpe.u.ps == 1) {
1171        return (pdpe.u1gb.base_pa << 30) | va.u1gb.pa_offset;
1172    }
1173
1174    // get a pointer to the pd table
1175    page_table = (uint64_t *)guest_to_host(pdpe.u.pd_base_pa << 12);
1176    // get pd entry
1177    union x86_lm_pd_entry pde = { .raw = page_table[va.u.pd_idx] };
1178    if (pde.u.p == 0) {
1179        printf("g2h %lx, pml4e %p %lx, pdpe %p %lx, pde %p %lx\n",
1180	       guest_to_host(0), &pml4e, pml4e.raw, &pdpe, pdpe.raw, &pde, pde.raw);
1181    }
1182    assert(pde.u.p == 1);
1183    // check for 2MB page (PS bit set)
1184    if (pde.u.ps == 1) {
1185        return (pde.u2mb.base_pa << 21) | va.u2mb.pa_offset;
1186    }
1187
1188    // get a pointer to the page table
1189    page_table = (uint64_t *)guest_to_host(pde.u.pt_base_pa << 12);
1190    // get the page table entry
1191    union x86_lm_pt_entry pte = { .raw = page_table[va.u.pt_idx] };
1192    assert(pte.u.p == 1);
1193
1194    return (pte.u.base_pa << 12) | va.u.pa_offset;
1195}
1196
1197static inline uint32_t
1198lookup_paddr_legacy_mode (struct guest *g, uint32_t vaddr)
1199{
1200//	printf("lookup_paddr_legacy_mode enter\n");
1201    // PAE not supported
1202#ifdef CONFIG_SVM
1203    guest_assert(g, amd_vmcb_cr4_rd(&g->vmcb).pae == 0);
1204#else
1205    uint64_t guest_cr4;
1206    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR4, &guest_cr4);
1207    guest_assert(g, (guest_cr4 & CR4_PAE) == 0);
1208#endif
1209    union x86_legm_va va = { .raw = vaddr };
1210    uint32_t *page_table;
1211
1212    // get a pointer to the pd table
1213#ifdef CONFIG_SVM
1214    page_table = (uint32_t *)guest_to_host(amd_vmcb_cr3_rd(&g->vmcb));
1215#else
1216    uint64_t guest_cr3;
1217    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR3, &guest_cr3);
1218    assert(err_is_ok(err));
1219    page_table = (uint32_t *)guest_to_host(guest_cr3);
1220#endif
1221
1222    // get pd entry
1223    union x86_legm_pd_entry pde = { .raw = page_table[va.u.pd_idx] };
1224    assert (pde.u.p == 1);
1225    // check for 4MB page (PS bit set)
1226    if (pde.u.ps == 1) {
1227        return (pde.u4mb.base_pa << 22) | va.u4mb.pa_offset;
1228    }
1229
1230    // get a pointer to the page table
1231    page_table = (uint32_t *)guest_to_host(pde.u.pt_base_pa << 12);
1232    // get the page table entry
1233    union x86_legm_pt_entry pte = { .raw = page_table[va.u.pt_idx] };
1234    assert(pte.u.p == 1);
1235
1236    return (pte.u.base_pa << 12) | va.u.pa_offset;
1237}
1238
1239// retunrs a pointer to a byte array starting at the current instruction
1240static inline int
1241get_instr_arr (struct guest *g, uint8_t **arr)
1242{
1243#ifdef CONFIG_SVM
1244    if (UNLIKELY(amd_vmcb_cr0_rd(&g->vmcb).pg == 0)) {
1245#else
1246    uint64_t guest_cr0;
1247    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0);
1248    if (UNLIKELY((guest_cr0 & CR0_PG) == 0)) {
1249#endif
1250    	//printf("Segmentation active!\n");
1251        // without paging
1252        // take segmentation into account
1253#ifdef CONFIG_SVM
1254        *arr = (uint8_t *)(guest_to_host(g->mem_low_va) +
1255               amd_vmcb_cs_base_rd(&g->vmcb) +
1256               amd_vmcb_rip_rd(&g->vmcb));
1257#else
1258	uint64_t guest_cs_base, guest_rip;
1259	err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_BASE, &guest_cs_base);
1260	err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
1261        *arr = (uint8_t *)(guest_to_host(g->mem_low_va) +
1262			   guest_cs_base + guest_rip);
1263#endif
1264    } else {
1265        // with paging
1266#ifdef CONFIG_SVM
1267        if (amd_vmcb_efer_rd(&g->vmcb).lma == 1) {
1268#else
1269	uint64_t guest_efer;
1270	err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_EFER_F, &guest_efer);
1271	if (guest_efer & EFER_LMA) {
1272#endif
1273            // long mode
1274#ifdef CONFIG_SVM
1275            if (amd_vmcb_cs_attrib_rd(&g->vmcb).l == 1) {
1276                // 64-bit mode
1277                *arr = (uint8_t *)guest_to_host(lookup_paddr_long_mode(g,
1278                                                amd_vmcb_rip_rd(&g->vmcb)));
1279#else
1280	    uint64_t cs_access_rights, guest_rip;
1281	    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_ACCESS, &cs_access_rights);
1282	    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
1283	    if (cs_access_rights & ACCESS_RIGHTS_LONG_MODE) {
1284                *arr = (uint8_t *)guest_to_host(lookup_paddr_long_mode(g,
1285                                                guest_rip));
1286#endif
1287            } else {
1288                // cmpatibility mode
1289                guest_assert(g, !"compatiblity mode not supported yet");
1290            }
1291        } else {
1292            // Legacy (aka. Paged Protected) Mode
1293#ifdef CONFIG_SVM
1294            assert(amd_vmcb_cr0_rd(&g->vmcb).pe == 1);
1295
1296            *arr = (uint8_t *)guest_to_host(lookup_paddr_legacy_mode(g,
1297                                            amd_vmcb_rip_rd(&g->vmcb)));
1298#else
1299	    assert(guest_cr0 & CR0_PE);
1300
1301	    uint64_t guest_rip;
1302	    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
1303            *arr = (uint8_t *)guest_to_host(lookup_paddr_legacy_mode(g,
1304                                            guest_rip));
1305#endif
1306        }
1307    }
1308#ifndef CONFIG_SVM
1309    assert(err_is_ok(err));
1310#endif
1311    return HANDLER_ERR_OK;
1312}
1313
1314static inline uint64_t
1315get_reg_val_by_reg_num (struct guest *g, uint8_t regnum) {
1316    switch (regnum) {
1317    case 0:
1318        return guest_get_rax(g);
1319    case 1:
1320        return guest_get_rcx(g);
1321    case 2:
1322        return guest_get_rdx(g);
1323    case 3:
1324        return guest_get_rbx(g);
1325    case 4:
1326        return guest_get_rsp(g);
1327    case 5:
1328        return guest_get_rbp(g);
1329    case 6:
1330        return guest_get_rsi(g);
1331    case 7:
1332        return guest_get_rdi(g);
1333    default:
1334        assert(!"not reached");
1335        return 0;
1336    }
1337}
1338
1339static inline void
1340set_reg_val_by_reg_num (struct guest *g, uint8_t regnum, uint64_t val) {
1341    switch (regnum) {
1342    case 0:
1343        guest_set_rax(g, val);
1344        break;
1345    case 1:
1346        guest_set_rcx(g, val);
1347        break;
1348    case 2:
1349        guest_set_rdx(g, val);
1350        break;
1351    case 3:
1352        guest_set_rbx(g, val);
1353        break;
1354    case 4:
1355        guest_set_rsp(g, val);
1356        break;
1357    case 5:
1358        guest_set_rbp(g, val);
1359        break;
1360    case 6:
1361        guest_set_rsi(g, val);
1362        break;
1363    case 7:
1364        guest_set_rdi(g, val);
1365        break;
1366    default:
1367        assert(!"not reached");
1368        break;
1369    }
1370}
1371
1372static int
1373handle_vmexit_cr_access (struct guest *g)
1374{
1375    int r;
1376    uint8_t *code = NULL;
1377#ifndef CONFIG_SVM
1378    errval_t err = 0;
1379    if (g->emulated_before_exit) {
1380        assert(saved_exit_reason == VMX_EXIT_REASON_CR_ACCESS);
1381        assert(((saved_exit_qual >> 0) & 0xf) == 0);
1382    }
1383#endif
1384    // fetch the location to the code
1385    r = get_instr_arr(g, &code);
1386    if (r != HANDLER_ERR_OK) {
1387        return r;
1388    }
1389    assert(code != NULL);
1390
1391    assert(code[0] == 0x0f && (code[1] == 0x20 || code[1] == 0x22));
1392
1393    uint64_t val;
1394    bool read = (code[1] == 0x20);
1395    union x86_modrm mod;
1396    mod.raw = code[2];
1397
1398    // FIXME: use proper exception
1399    assert(mod.u.mod == 3);
1400
1401    // source
1402    if (read) {
1403        // read from CR
1404        switch (mod.u.regop) {
1405        case 0:
1406#ifdef CONFIG_SVM
1407            val = amd_vmcb_cr0_rd_raw(&g->vmcb);
1408#else
1409	    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &val);
1410#endif
1411            break;
1412        default:
1413            printf("CR access: unknown CR source register\n");
1414            return handle_vmexit_unhandeled(g);
1415        }
1416    } else {
1417        // read from GPR
1418        val = get_reg_val_by_reg_num(g, mod.u.rm);
1419    }
1420
1421    // destination
1422    if (read) {
1423        // write to GPR
1424        switch (mod.u.rm) {
1425        case 0:
1426            guest_set_rax(g, val);
1427            break;
1428        case 1:
1429            guest_set_rcx(g, val);
1430            break;
1431        case 2:
1432            guest_set_rdx(g, val);
1433            break;
1434        case 3:
1435            guest_set_rbx(g, val);
1436            break;
1437        default:
1438            printf("CR access: unknown GPR destination register\n");
1439            return handle_vmexit_unhandeled(g);
1440        }
1441    } else {
1442        // write to CR
1443        switch (mod.u.regop) {
1444        case 0:
1445#ifdef CONFIG_SVM
1446            amd_vmcb_cr0_wr_raw(&g->vmcb, val);
1447#else
1448	    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CR0, val);
1449#endif
1450            break;
1451
1452        case 4:
1453            // allow writing to CR4 by do nothing for this case
1454            break;
1455        default:
1456            printf("CR access: unknown CR destination register\n");
1457            return handle_vmexit_unhandeled(g);
1458        }
1459    }
1460
1461    // advance the rip beyond the instruction
1462#ifdef CONFIG_SVM
1463    amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 3);
1464#else
1465    uint64_t guest_rip;
1466    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
1467    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 3);
1468    assert(err_is_ok(err));
1469#endif
1470    return HANDLER_ERR_OK;
1471}
1472
1473static int
1474handle_vmexit_ldt (struct guest *g)
1475{
1476    int r;
1477    uint8_t *code = NULL;
1478    uint8_t *mem;
1479
1480    // this handler supports only real-mode
1481#ifdef CONFIG_SVM
1482    assert(amd_vmcb_cr0_rd(&g->vmcb).pe == 0);
1483#else
1484    uint64_t guest_cr0;
1485    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0);
1486    assert((guest_cr0 & CR0_PE) == 0);
1487#endif
1488    // fetch the location to the code
1489    r = get_instr_arr(g, &code);
1490    if (r != HANDLER_ERR_OK) {
1491        return r;
1492    }
1493    mem = (uint8_t *)guest_to_host(g->mem_low_va);
1494    assert(code != NULL);
1495
1496    assert (code[0] == 0x0f && code[1] == 0x01);
1497
1498    // check for relevant instruction prefixes
1499    bool addr32 = code[-2] == 0x67 || code[-1] == 0x67;
1500    bool op32 = code[-2] == 0x66 || code[-1] == 0x66;
1501    // fetch modrm
1502    union x86_modrm modrm = { .raw = code[2] };
1503
1504    assert(modrm.u.regop == 2 || modrm.u.regop == 3);
1505    guest_assert(g, op32);
1506
1507    uint32_t addr;
1508    if (addr32) {
1509        // byte 3-6 hold a 32 bit address to a mem location where the first word
1510        // holds the limit and the following dword holds the base
1511        addr = *(uint32_t *)&code[3];
1512    } else {
1513        // byte 3-4 hold a 16 bit address to a mem location where the first word
1514        // holds the limit and the following dword holds the base
1515        // this address is relative to DS base
1516#ifdef CONFIG_SVM
1517        addr = *(uint16_t *)&code[3] + amd_vmcb_ds_base_rd(&g->vmcb);
1518#else
1519	uint64_t guest_ds_base;
1520	err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base);
1521	addr = *(uint16_t *)&code[3] + guest_ds_base;
1522#endif
1523    }
1524
1525    // santity check on the addr
1526    // FIXME: raise a proper exception
1527    if (addr > g->mem_high_va) {
1528        printf("Memory access beyond physical address space\n");
1529        return HANDLER_ERR_FATAL;
1530    }
1531
1532    // load the actual register
1533    if (modrm.u.regop == 2) {
1534        // LGDT
1535#ifdef CONFIG_SVM
1536        amd_vmcb_gdtr_limit_wr(&g->vmcb, *(uint16_t*)(mem + addr));
1537        amd_vmcb_gdtr_base_wr(&g->vmcb, *(uint32_t*)(mem + addr + 2));
1538#else
1539	err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_GDTR_LIM,
1540					 *(uint16_t*)(mem + addr));
1541        err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_GDTR_BASE,
1542					 *(uint32_t*)(mem + addr + 2));
1543#endif
1544
1545    } else if (modrm.u.regop == 3) {
1546        // LIDT
1547#ifdef CONFIG_SVM
1548        amd_vmcb_idtr_limit_wr(&g->vmcb, *(uint16_t*)(mem + addr));
1549        amd_vmcb_idtr_base_wr(&g->vmcb, *(uint32_t*)(mem + addr + 2));
1550#else
1551	err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_IDTR_LIM,
1552					 *(uint16_t*)(mem + addr));
1553	err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_IDTR_BASE,
1554					 *(uint32_t*)(mem + addr + 2));
1555#endif
1556    } else {
1557        assert(!"not reached");
1558    }
1559
1560    // advance the rip beyond the instruction
1561#ifdef CONFIG_SVM
1562    if (addr32) {
1563        amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 7);
1564    } else {
1565        amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 5);
1566    }
1567#else
1568    uint64_t guest_rip;
1569    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
1570    if (addr32) {
1571        err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 7);
1572    } else {
1573        err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 5);
1574    }
1575    assert(err_is_ok(err));
1576#endif
1577    return HANDLER_ERR_OK;
1578}
1579
1580#ifndef CONFIG_SVM
1581static inline void vmx_vmcs_rflags_cf_wrf(struct guest *g, int val) {
1582    assert(val == 0 || val == 1);
1583    uint64_t guest_rflags;
1584    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags);
1585    if (val) {
1586        err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RFLAGS,
1587					 guest_rflags | RFLAGS_CF);
1588    } else {
1589        err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RFLAGS,
1590					 guest_rflags & (~RFLAGS_CF));
1591    }
1592    assert(err_is_ok(err));
1593}
1594#endif
1595
1596static int
1597handle_vmexit_swint (struct guest *g)
1598{
1599    int r;
1600    uint8_t *code = NULL;
1601
1602    r = get_instr_arr(g, &code);
1603    if (r != HANDLER_ERR_OK) {
1604        return r;
1605    }
1606    assert (code != NULL);
1607
1608    // check for correct instruciton
1609    assert(code[0] == 0xcd);
1610
1611    // the number of the interrupt is followed by the INT (0xcd) opcode
1612    uint8_t int_num = code[1];
1613
1614    // check whether the guest is in real mode
1615#ifdef CONFIG_SVM
1616    if (amd_vmcb_cr0_rd(&g->vmcb).pe == 0) {
1617#else
1618    uint64_t guest_ds_base, es_guest_base;
1619    uint64_t guest_cr0, guest_rip;
1620    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0);
1621    if ((guest_cr0 & CR0_PE) == 0) {
1622#endif
1623        // in real mode the interrupts starting at 10 have different meaning
1624        // examine the sw interrupt
1625        switch (int_num) {
1626            case 0x10:
1627                r = console_handle_int10(g->console, g);
1628                if (r != HANDLER_ERR_OK) {
1629                    printf("Unhandeled method on INT 0x10\n");
1630                    return handle_vmexit_unhandeled(g);
1631                }
1632                break;
1633            case 0x12:
1634                switch (guest_get_ax(g)) {
1635                    case 0: // GET MEMORY SIZE
1636                        // our VM always has 1MB of base memory
1637                        // AX holds the amount of 1KB memory blocks starting at
1638                        // addr 0 which is 640 (640 KiB)
1639                        guest_set_ax(g, 640);
1640                        break;
1641                    default:
1642                        printf("Unhandeled method on INT 0x12\n");
1643                        return handle_vmexit_unhandeled(g);
1644                }
1645                break;
1646            case 0x13:
1647                // Bootable CD-ROM - GET STATUS
1648                if (guest_get_ax(g) == 0x4b01) {
1649                    // no cdrom support
1650#ifdef CONFIG_SVM
1651                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1652#else
1653		    vmx_vmcs_rflags_cf_wrf(g, 1);
1654#endif
1655                }
1656                // DISK RESET
1657                else if (guest_get_ah(g) == 0) {
1658                    for (int i = 0; i < g->hdd_count; i++) {
1659                        hdd_reset(g->hdds[i]);
1660                    }
1661                }
1662                // DISK - GET DRIVE PARAMETERS (PC,XT286,CONV,PS,ESDI,SCSI)
1663                else if (guest_get_ah(g) == 0x08) {
1664                    uint8_t dl = guest_get_dl(g);
1665
1666                    // only respond to installed hard disks
1667                    if ((dl >> 7) && ((dl & 0x7f) < g->hdd_count)) {
1668                        uint16_t c;
1669                        uint8_t h, s;
1670
1671                        r = hdd_get_geometry_chs(g->hdds[dl & 0x7f], &c, &h, &s);
1672                        assert(r == 0);
1673
1674                        // set some return values for success
1675                        guest_set_ah(g, 0);
1676#ifdef CONFIG_SVM
1677                        amd_vmcb_rflags_cf_wrf(&g->vmcb, 0);
1678#else
1679			vmx_vmcs_rflags_cf_wrf(g, 0);
1680#endif
1681                        guest_set_bl(g, 0);
1682                        // store the geometry into the correct registers
1683                        guest_set_cx(g, c << 6 | (s & 0x3f));
1684                        guest_set_dh(g, h);
1685                        guest_set_dl(g, g->hdd_count);
1686                    } else {
1687#ifdef CONFIG_SVM
1688                        amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1689#else
1690			vmx_vmcs_rflags_cf_wrf(g, 1);
1691#endif
1692                        // it is not really clear to me what ah should contain
1693                        // when the drive is not present, so set it to FF
1694                        guest_set_ah(g, 1);
1695                    }
1696                }
1697                // INT 13 Extensions - INSTALLATION CHECK
1698                else if (guest_get_ah(g) == 0x41 && guest_get_bx(g) == 0x55aa) {
1699#ifdef CONFIG_SVM
1700                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 0);
1701#else
1702		    vmx_vmcs_rflags_cf_wrf(g, 0);
1703#endif
1704                    guest_set_bx(g, 0xaa55);
1705                    guest_set_ah(g, 0x01); // Drive extensions 1.x
1706                    guest_set_al(g, 0);
1707                    guest_set_cx(g, 0x5);
1708                }
1709                // IBM/MS INT 13 Extensions - EXTENDED READ
1710                else if (guest_get_ah(g) == 0x42) {
1711                    uint8_t dl = guest_get_dl(g);
1712
1713                    // only respond to installed hard disks
1714                    if ((dl >> 7) && ((dl & 0x7f) < g->hdd_count)) {
1715#ifdef CONFIG_SVM
1716                        amd_vmcb_rflags_cf_wrf(&g->vmcb, 0);
1717#else
1718			vmx_vmcs_rflags_cf_wrf(g, 0);
1719#endif
1720                        guest_set_ah(g, 0);
1721
1722                        struct disk_access_block {
1723                            uint8_t     size;
1724                            uint8_t     reserved;
1725                            uint16_t    count;
1726                            // pointer to the data buffer formated like
1727                            // SEGMENT:ADDRESS
1728                            uint32_t    transfer_buffer;
1729                            uint64_t    abs_block_number;
1730                        } __attribute__ ((packed));
1731
1732                        // memory location of the disk access block
1733#ifdef CONFIG_SVM
1734                        uintptr_t mem = guest_to_host(g->mem_low_va) +
1735                                        amd_vmcb_ds_base_rd(&g->vmcb) +
1736                                        guest_get_si(g);
1737#else
1738			err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base);
1739                        uintptr_t mem = guest_to_host(g->mem_low_va) +
1740                                        guest_ds_base + guest_get_si(g);
1741#endif
1742
1743                        struct disk_access_block *dap = (void *)mem;
1744
1745                        if (dap->size < 0x10) {
1746#ifdef CONFIG_SVM
1747                            amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1748#else
1749			    vmx_vmcs_rflags_cf_wrf(g, 1);
1750#endif
1751                            guest_set_ah(g, 1);
1752                        } else {
1753                            // dap->transfer buffer points to a real-mode segment
1754                            // resolve it according to that rules
1755                            mem = guest_to_host(g->mem_low_va) +
1756                                  ((dap->transfer_buffer >> 16) << 4) +
1757                                  (dap->transfer_buffer & 0xffff);
1758
1759                            size_t count = dap->count;
1760                            r = hdd_read_blocks(g->hdds[dl & 0x7f],
1761                                                dap->abs_block_number,
1762                                                &count, mem);
1763                            dap->count = count;
1764
1765                            if (r != HANDLER_ERR_OK) {
1766#ifdef CONFIG_SVM
1767                                amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1768#else
1769				vmx_vmcs_rflags_cf_wrf(g, 1);
1770#endif
1771                                guest_set_ah(g, 1);
1772                            }
1773                        }
1774                    } else {
1775#ifdef CONFIG_SVM
1776                        amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1777#else
1778			vmx_vmcs_rflags_cf_wrf(g, 1);
1779#endif
1780                        // it is not really clear to me what ah should contain
1781                        // when the drive is not present, so set it to FF
1782                        guest_set_ah(g, 1);
1783                    }
1784                }
1785                // IBM/MS INT 13 Extensions - GET DRIVE PARAMETERS
1786                else if (guest_get_ah(g) == 0x48) {
1787                    uint8_t dl = guest_get_dl(g);
1788
1789                    // only respond to installed hard disks
1790                    if ((dl >> 7) && ((dl & 0x7f) < g->hdd_count)) {
1791                        // structure to hold drive info
1792                        struct drive_params {
1793                            uint16_t size;
1794                            uint16_t flags;
1795                            uint32_t cylinders;
1796                            uint32_t heads;
1797                            uint32_t sectors;
1798                            uint64_t total_sectors;
1799                            uint16_t bytes_per_sector;
1800                        } __attribute__ ((packed));
1801
1802                        // memory where the drive info shall be stored
1803#ifdef CONFIG_SVM
1804                        uintptr_t mem = guest_to_host(g->mem_low_va) +
1805                                        amd_vmcb_ds_base_rd(&g->vmcb) +
1806                                        guest_get_si(g);
1807#else
1808			err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base);
1809                        uintptr_t mem = guest_to_host(g->mem_low_va) +
1810                                        guest_ds_base + guest_get_si(g);
1811#endif
1812
1813                        struct drive_params *drp = (void *)mem;
1814
1815                        // sanity check
1816                        if (drp->size < sizeof(struct drive_params)) {
1817#ifdef CONFIG_SVM
1818                            amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1819#else
1820			    vmx_vmcs_rflags_cf_wrf(g, 1);
1821#endif
1822                        } else {
1823#ifdef CONFIG_SVM
1824                            amd_vmcb_rflags_cf_wrf(&g->vmcb, 0);
1825#else
1826			    vmx_vmcs_rflags_cf_wrf(g, 0);
1827#endif
1828                            guest_set_ah(g, 0);
1829
1830                            drp->size = sizeof(struct drive_params);
1831                            // CHS invalid, no removable drive, etc
1832                            drp->flags = 0;
1833                            drp->cylinders = 0;
1834                            drp->heads = 0;
1835                            drp->sectors = 0;
1836                            drp->total_sectors = hdd_get_blocks_count(
1837                                                    g->hdds[dl & 0x7f]);
1838                            drp->bytes_per_sector = 512; // FIXME: Hardcoded
1839                        }
1840                    } else {
1841#ifdef CONFIG_SVM
1842                        amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1843#else
1844			vmx_vmcs_rflags_cf_wrf(g, 1);
1845#endif
1846                        // it is not really clear to me what ah should contain
1847                        // when the drive is not present, so set it to FF
1848                        guest_set_ah(g, 0x1);
1849                    }
1850                } else {
1851                    printf("Unhandeled method on INT 0x13\n");
1852                    return handle_vmexit_unhandeled(g);
1853                }
1854                break;
1855            case 0x15:
1856                // ENABLE A20 GATE
1857                if (guest_get_ax(g) == 0x2401) {
1858                    g->a20_gate_enabled = true;
1859#ifdef CONFIG_SVM
1860                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 0);
1861#else
1862		    vmx_vmcs_rflags_cf_wrf(g, 0);
1863#endif
1864                    guest_set_ah(g, 0);
1865                }
1866                // APM INSTALLATION CHECK
1867                else if (guest_get_ax(g) == 0x5300) {
1868                    // we do not support APM - set carry flag to indicate error
1869#ifdef CONFIG_SVM
1870                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1871#else
1872		    vmx_vmcs_rflags_cf_wrf(g, 1);
1873#endif
1874                }
1875                // APM DISCONNECT
1876                else if (guest_get_ax(g) == 0x5304) {
1877                    // we do not support APM - set carry flag to indicate error
1878#ifdef CONFIG_SVM
1879                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1880#else
1881		    vmx_vmcs_rflags_cf_wrf(g, 1);
1882#endif
1883                }
1884                // GET MEMORY SIZE FOR >64M CONFIGURATIONS
1885                else if (guest_get_ax(g) == 0xe801) {
1886                    // we do not support this BIOS call
1887                    // both grub and linux may also use the 0xe820 call
1888#ifdef CONFIG_SVM
1889                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1890#else
1891		    vmx_vmcs_rflags_cf_wrf(g, 1);
1892#endif
1893                }
1894                // GET SYSTEM MEMORY MAP
1895                // EDX has to contain 0x534d4150 (== 'SMAP')
1896                else if (guest_get_ax(g) == 0xe820 &&
1897                         guest_get_edx(g) == 0x534d4150) {
1898                    // for now we return only one entry containing the real mem
1899                    if (guest_get_ebx(g) > 1 || guest_get_ecx(g) < 20) {
1900                        // wrong input params -> report error
1901#ifdef CONFIG_SVM
1902                        amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1903#else
1904			vmx_vmcs_rflags_cf_wrf(g, 1);
1905#endif
1906                    } else {
1907                        // taken from http://www.ctyme.com/intr/rb-1741.htm
1908#ifdef CONFIG_SVM
1909                        uintptr_t addr = guest_to_host(g->mem_low_va) +
1910                                         amd_vmcb_es_base_rd(&g->vmcb) +
1911                                         guest_get_di(g);
1912#else
1913			err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_BASE, &es_guest_base);
1914                        uintptr_t addr = guest_to_host(g->mem_low_va) +
1915                                         es_guest_base + guest_get_di(g);
1916#endif
1917                        // set EAX to 'SMAP'
1918                        guest_set_eax(g, 0x534D4150);
1919                        // returned bytes (always 20)
1920                        guest_set_ecx(g, 20);
1921
1922                        switch (guest_get_ebx(g)) {
1923                        case 0x0:
1924                            // base memory
1925                            assert(g->mem_low_va == 0);
1926                            // base address
1927                            *(uint64_t *)addr = 0;
1928                            // size of the memory block
1929                            *(uint64_t *)(addr + 8) = 0xa0000; // 640 KiB
1930                            // mem type, 1 == "memory, available to the OS"
1931                            *(uint32_t *)(addr + 16) = 1;
1932                            // indicate that there is more data
1933                            guest_set_ebx(g, 1);
1934                            break;
1935                        case 0x1:
1936                            // extended memory
1937                            assert(g->mem_high_va > 0x100000);
1938                            // base address
1939                            *(uint64_t *)addr = 0x100000;   // 1 MiB
1940                            // size of the memory block
1941                            *(uint64_t *)(addr + 8) = g->mem_high_va - 0x100000;
1942                            // mem type, 1 == "memory, available to the OS"
1943                            *(uint32_t *)(addr + 16) = 1;
1944                            // indicate that there is no more data
1945                            guest_set_ebx(g, 0);
1946                            break;
1947                        default:
1948                            assert(!"not reached");
1949                            break;
1950                        }
1951
1952                        // mark success
1953#ifdef CONFIG_SVM
1954                        amd_vmcb_rflags_cf_wrf(&g->vmcb, 0);
1955#else
1956			vmx_vmcs_rflags_cf_wrf(g, 0);
1957#endif
1958                    }
1959                }
1960                // SYSTEM - Get Intel SpeedStep (IST) information
1961                else if (guest_get_ax(g) == 0xe980) {
1962                    // not supportet yet
1963#ifdef CONFIG_SVM
1964                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1965#else
1966		    vmx_vmcs_rflags_cf_wrf(g, 1);
1967#endif
1968                }
1969                // SYSTEM - GET CONFIGURATION (XT >1986/1/10,AT mdl 3x9,
1970                // CONV,XT286,PS)
1971                // GRUB BUG: it puts 0xc0 into AX instead of AH
1972                else if (guest_get_ax(g) == 0xc0) {
1973                    // we do not support this
1974#ifdef CONFIG_SVM
1975                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
1976#else
1977		    vmx_vmcs_rflags_cf_wrf(g, 1);
1978#endif
1979                    guest_set_ah(g, 0x80);
1980                }
1981                // GET EXTENDED MEMORY SIZE
1982                else if (guest_get_ah(g) == 0x88) {
1983                    // calculate number of 1KB chunks starting from 1MB but not
1984                    // beyond 16MB
1985                    assert(((g->mem_high_va - g->mem_low_va) & 0x3ff) == 0);
1986                    guest_set_ax(g, MIN(0x3c00 /* 16MB */,
1987                                 (g->mem_high_va - g->mem_low_va) / 1024));
1988                    // indicate no error occured
1989#ifdef CONFIG_SVM
1990                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 0);
1991#else
1992		    vmx_vmcs_rflags_cf_wrf(g, 0);
1993#endif
1994                }
1995                // SYSTEM - GET CONFIGURATION (XT >1986/1/10,AT mdl 3x9,
1996                // CONV,XT286,PS)
1997                else if (guest_get_ah(g) == 0xc0) {
1998                    // we do not support this
1999#ifdef CONFIG_SVM
2000                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 1);
2001#else
2002		    vmx_vmcs_rflags_cf_wrf(g, 1);
2003#endif
2004                    guest_set_ah(g, 0x80);
2005                // SYSTEM - SET BIOS MODE
2006                } else if (guest_get_ah(g) == 0xec) {
2007                    // I do no really know the use of this bios call and linux
2008                    // expects no action what so ever
2009                } else {
2010                    printf("Unhandeled method on INT 0x15\n");
2011                    return handle_vmexit_unhandeled(g);
2012                }
2013                break;
2014            case 0x16:
2015                // KEYBOARD - SET TYPEMATIC RATE AND DELAY
2016                if (guest_get_ah(g) == 0x3) {
2017                    // ignore this
2018                } else if (guest_get_ah(g) == 0x2) {
2019                    // Return keyboard flags
2020                    guest_set_al(g, 0x0);
2021                } else {
2022                    printf("Unhandeled method on INT 0x16\n");
2023                    return handle_vmexit_unhandeled(g);
2024                }
2025                break;
2026            case 0x1a:
2027                // TIME - GET REAL-TIME CLOCK TIME (AT,XT286,PS)
2028                if (guest_get_ah(g) == 0x2) {
2029                    uint8_t h, m, s;
2030                    lpc_rtc_get_time_bcd(g->lpc, &h, &m, &s);
2031                    guest_set_ch(g, h);
2032                    guest_set_cl(g, m);
2033                    guest_set_dh(g, s);
2034                    guest_set_dl(g, 0);
2035                    // mark success
2036#ifdef CONFIG_SVM
2037                    amd_vmcb_rflags_cf_wrf(&g->vmcb, 0);
2038#else
2039		    vmx_vmcs_rflags_cf_wrf(g, 0);
2040#endif
2041                } else {
2042                    printf("Unhandeled method on INT 0x1a\n");
2043                    return handle_vmexit_unhandeled(g);
2044                }
2045                break;
2046            default:
2047                printf("handle_vmexit_swint: Unhandeled real-mode interrupt "
2048                       "0x%x (%d).\n", int_num, int_num);
2049                return handle_vmexit_unhandeled(g);
2050        }
2051    } else {
2052        printf("vmkitmon: encountered INT instruction outside real mode\n");
2053        return handle_vmexit_unhandeled(g);
2054    }
2055
2056    // advance the rip beyond the instruction
2057#ifdef CONFIG_SVM
2058    amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2);
2059#else
2060    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
2061    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2);
2062    assert(err_is_ok(err));
2063#endif
2064    return HANDLER_ERR_OK;
2065}
2066
2067static inline enum opsize
2068io_access_size_to_opsize (enum x86_io_access io)
2069{
2070    if (io & X86_IO_ACCESS_SZ8) {
2071        return OPSIZE_8;
2072    } else if (io & X86_IO_ACCESS_SZ16) {
2073        return OPSIZE_16;
2074    } else if (io & X86_IO_ACCESS_SZ32) {
2075        return OPSIZE_32;
2076    } else {
2077        assert(!"NYI");
2078        return 0;
2079    }
2080}
2081
2082static int
2083handle_vmexit_ioio (struct guest *g)
2084{
2085    int r;
2086#ifdef CONFIG_SVM
2087    uint64_t info1 = amd_vmcb_exitinfo1_rd(&g->vmcb);
2088    enum x86_io_access io;
2089    uint16_t port = info1 >> 16;
2090#else
2091    errval_t err = 0;
2092    if (!g->emulated_before_exit) {
2093        err += invoke_dispatcher_vmread(g->dcb_cap, VMX_EXIT_QUAL, &saved_exit_qual);
2094	uint64_t instr_len, guest_rip;
2095	err += invoke_dispatcher_vmread(g->dcb_cap, VMX_EXIT_INSTR_LEN, &instr_len);
2096	err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
2097	saved_rip = guest_rip + instr_len;
2098    }
2099    uint16_t port = (saved_exit_qual >> 16) & 0xffff;
2100#endif
2101    bool write;
2102    enum opsize size;
2103    uint32_t val;
2104    bool newapi = false; // needed as a transition
2105
2106#ifdef CONFIG_SVM
2107    // copy the access flags
2108    // FIXME: this severely exploits the way the x86_io_access flags are set up
2109    io = (info1 >> 1);
2110    io |= info1 & SVM_IOIO_TYPE_MASK;
2111
2112    // gather some params for the io access
2113    write = (io & X86_IO_ACCESS_TYPE) == 0;
2114    size = OPSIZE_8; // make gcc happy
2115    if (io & X86_IO_ACCESS_SZ8) {
2116        size = OPSIZE_8;
2117    } else if (io & X86_IO_ACCESS_SZ16) {
2118        size = OPSIZE_16;
2119    } else if (io & X86_IO_ACCESS_SZ32) {
2120        size = OPSIZE_32;
2121    }
2122#else
2123    write = ((saved_exit_qual >> 3) & 0x1) == 0;
2124    size = OPSIZE_8;
2125    if ((saved_exit_qual & 0x7) == 0) {
2126        size = OPSIZE_8;
2127    } else if ((saved_exit_qual & 0x7) == 1) {
2128        size = OPSIZE_16;
2129    } else if ((saved_exit_qual & 0x7) == 3) {
2130        size = OPSIZE_32;
2131    } else {
2132        assert(!"Invalid size of access value");
2133    }
2134#endif
2135    // fetch the source val if neccessary
2136    if (write) {
2137        switch (size) {
2138        case OPSIZE_8:
2139            val = guest_get_al(g);
2140            break;
2141        case OPSIZE_16:
2142            val = guest_get_ax(g);
2143            break;
2144        case OPSIZE_32:
2145            val = guest_get_eax(g);
2146            break;
2147        default:
2148            assert(!"not reached");
2149            break;
2150        }
2151    }
2152
2153    // assign the request to the corresponding subsystem
2154    switch (port) {
2155        // LPC devices
2156        case 0x20:  // primary PIC
2157        case 0x21:  // primary PIC
2158        case 0x40:  // Timer
2159        case 0x41:  // Timer
2160        case 0x42:  // Timer
2161        case 0x43:  // Timer
2162        case 0x61:  // NMI Controller
2163        case 0x70:  // RTC
2164        case 0x71:  // RTC
2165        case 0x72:  // RTC
2166        case 0x73:  // RTC
2167        case 0x74:  // RTC
2168        case 0x75:  // RTC
2169        case 0x76:  // RTC
2170        case 0x77:  // RTC
2171        case 0xa0:  // secondary PIC
2172        case 0xa1:  // secondary PIC
2173            if (write) {
2174                r = lpc_handle_pio_write(g->lpc, port, size, val);
2175                guest_assert(g, r == 0);
2176            } else {
2177                r = lpc_handle_pio_read(g->lpc, port, size, &val);
2178                assert(r == 0);
2179            }
2180            newapi = true;
2181            break;
2182        // Keyboard
2183        case 0x60:
2184        case 0x64:
2185            // we currently do not support a keyboard
2186            if (!write) {
2187                val = ~0;
2188            }
2189            newapi = true;
2190            break;
2191        case 0x80:
2192            // some apps use writing to this port as a method to delay execution
2193            // so we just do noting
2194            break;
2195        // Coprocessor
2196        case 0xf0:
2197        case 0xf1:
2198            // coprocessor IGNNE# - do nothing for now
2199            break;
2200
2201        // serial COM1 port
2202        // FIXME: this should not be hardcoded !
2203        case 0x3f8:
2204        case 0x3f9:
2205        case 0x3fa:
2206        case 0x3fb:
2207        case 0x3fc:
2208        case 0x3fd:
2209        case 0x3fe:
2210        case 0x3ff:
2211        // COM2
2212        case 0x2f8:
2213        case 0x2f9:
2214        case 0x2fa:
2215        case 0x2fb:
2216        case 0x2fc:
2217        case 0x2fd:
2218        case 0x2fe:
2219        case 0x2ff:
2220        // COM3
2221        case 0x3e8:
2222        case 0x3e9:
2223        case 0x3ea:
2224        case 0x3eb:
2225        case 0x3ec:
2226        case 0x3ed:
2227        case 0x3ee:
2228        case 0x3ef:
2229        // COM4
2230        case 0x2e8:
2231        case 0x2e9:
2232        case 0x2ea:
2233        case 0x2eb:
2234        case 0x2ec:
2235        case 0x2ed:
2236        case 0x2ee:
2237        case 0x2ef: {
2238            int com;
2239
2240            com = (port & 0xf0) == 0xf0 ? !(port & 0x100) : !(port & 0x100) + 2;
2241            assert(com >= 0 && com < 4);
2242            if (write) {
2243                r = pc16550d_handle_pio_write(g->serial_ports[com], port,
2244                                              size, val);
2245                assert(r == 0);
2246            } else {
2247                r = pc16550d_handle_pio_read(g->serial_ports[com], port,
2248                                             size, &val);
2249                assert(r == 0);
2250            }
2251            newapi = true;
2252            break;
2253        }
2254
2255            // PCI config space (address)
2256    case 0xcf8:
2257    case 0xcf9:
2258    case 0xcfa:
2259    case 0xcfb:
2260            // PCI config space (data)
2261    case 0xcfc:
2262    case 0xcfd:
2263    case 0xcfe:
2264    case 0xcff:
2265        if(write) {
2266            r = pci_handle_pio_write(g->pci, port, size, val);
2267        } else {
2268            r = pci_handle_pio_read(g->pci, port, size, &val);
2269        }
2270        assert(r == 0);
2271        newapi = true;
2272        break;
2273
2274        default:
2275            // the default is to return 0xff and to ignore writes
2276            if (!write) {
2277                val = 0xffffffff;
2278            }
2279            newapi = true;
2280    };
2281
2282    // set the destination when neccessary
2283    if (newapi && !write) {
2284        switch (size) {
2285        case OPSIZE_8:
2286            guest_set_al(g, val);
2287            break;
2288        case OPSIZE_16:
2289            guest_set_ax(g, val);
2290            break;
2291        case OPSIZE_32:
2292            guest_set_eax(g, val);
2293            break;
2294        default:
2295            assert(!"not reached");
2296            break;
2297        }
2298    }
2299
2300    // the following IP is stored in the exitinfo2 field
2301#ifdef CONFIG_SVM
2302    amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_exitinfo2_rd(&g->vmcb));
2303#else
2304    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, saved_rip);
2305    assert(err_is_ok(err));
2306#endif
2307    return HANDLER_ERR_OK;
2308}
2309
2310static int
2311handle_vmexit_msr (struct guest *g) {
2312#ifdef CONFIG_SVM
2313    bool write = amd_vmcb_exitinfo1_rd(&g->vmcb) == 1;
2314#else
2315    int msr_index;
2316    errval_t err = 0;
2317    bool write = (saved_exit_reason == VMX_EXIT_REASON_WRMSR);
2318    struct msr_entry *guest_msr_area = (struct msr_entry *)g->msr_area_va;
2319#endif
2320    uint32_t msr = guest_get_ecx(g);
2321    uint64_t val;
2322
2323    // there may be writes or reads to MSRs
2324    if (write) {
2325        // fetch the value to write from EDX:EAX
2326        val = ((uint64_t)guest_get_edx(g) << 32) | guest_get_eax(g);
2327
2328        // store the read value into the corresponding location
2329        switch (msr) {
2330        case X86_MSR_SYSENTER_CS:
2331#ifdef CONFIG_SVM
2332            amd_vmcb_sysenter_cs_wr(&g->vmcb, val);
2333#else
2334	    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_SYSENTER_CS, val);
2335#endif
2336            break;
2337        case X86_MSR_SYSENTER_ESP:
2338#ifdef CONFIG_SVM
2339            amd_vmcb_sysenter_esp_wr(&g->vmcb, val);
2340#else
2341	    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_SYSENTER_ESP, val);
2342#endif
2343            break;
2344        case X86_MSR_SYSENTER_EIP:
2345#ifdef CONFIG_SVM
2346            amd_vmcb_sysenter_eip_wr(&g->vmcb, val);
2347#else
2348	    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_SYSENTER_EIP, val);
2349#endif
2350            break;
2351        case X86_MSR_EFER:
2352#ifdef CONFIG_SVM
2353            amd_vmcb_efer_wr_raw(&g->vmcb, val);
2354#else
2355	    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_EFER_F, val);
2356#endif
2357            break;
2358        case X86_MSR_FS_BASE:
2359#ifdef CONFIG_SVM
2360            amd_vmcb_fs_base_wr(&g->vmcb, val);
2361#else
2362	    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_FS_BASE, val);
2363#endif
2364            break;
2365        case X86_MSR_GS_BASE:
2366#ifdef CONFIG_SVM
2367            amd_vmcb_gs_base_wr(&g->vmcb, val);
2368#else
2369	    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_GS_BASE, val);
2370#endif
2371            break;
2372#ifdef CONFIG_SVM
2373        case X86_MSR_KERNEL_GS_BASE:
2374            amd_vmcb_kernel_gs_base_wr(&g->vmcb, val);
2375            break;
2376        case X86_MSR_STAR:
2377            amd_vmcb_star_wr(&g->vmcb, val);
2378            break;
2379        case X86_MSR_LSTAR:
2380            amd_vmcb_lstar_wr(&g->vmcb, val);
2381            break;
2382        case X86_MSR_CSTAR:
2383            amd_vmcb_cstar_wr(&g->vmcb, val);
2384            break;
2385        case X86_MSR_SFMASK:
2386            amd_vmcb_sfmask_wr(&g->vmcb, val);
2387            break;
2388        default:
2389            printf("MSR: unhandeled MSR write access to %x\n", msr);
2390            return handle_vmexit_unhandeled(g);
2391#else
2392        case X86_MSR_BIOS_SIGN_ID:
2393            break;
2394	default:
2395	    msr_index = vmx_guest_msr_index(msr);
2396	    if (msr_index == -1) {
2397	        printf("MSR: unhandeled MSR write access to %x\n", msr);
2398		return handle_vmexit_unhandeled(g);
2399	    }
2400	    guest_msr_area[msr_index].val = val;
2401	    break;
2402#endif
2403        }
2404    } else {
2405        // read the value from the corresponding location
2406        switch (msr) {
2407        case X86_MSR_SYSENTER_CS:
2408#ifdef CONFIG_SVM
2409            val = amd_vmcb_sysenter_cs_rd(&g->vmcb);
2410#else
2411	    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SYSENTER_CS, &val);
2412#endif
2413            break;
2414        case X86_MSR_SYSENTER_ESP:
2415#ifdef CONFIG_SVM
2416            val = amd_vmcb_sysenter_esp_rd(&g->vmcb);
2417#else
2418	    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SYSENTER_ESP, &val);
2419#endif
2420            break;
2421        case X86_MSR_SYSENTER_EIP:
2422#ifdef CONFIG_SVM
2423            val = amd_vmcb_sysenter_eip_rd(&g->vmcb);
2424#else
2425	    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SYSENTER_EIP, &val);
2426#endif
2427            break;
2428        case X86_MSR_EFER:
2429#ifdef CONFIG_SVM
2430            val = amd_vmcb_efer_rd_raw(&g->vmcb);
2431#else
2432	    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_EFER_F, &val);
2433#endif
2434            break;
2435        case X86_MSR_FS_BASE:
2436#ifdef CONFIG_SVM
2437            val = amd_vmcb_fs_base_rd(&g->vmcb);
2438#else
2439	    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_FS_BASE, &val);
2440#endif
2441            break;
2442        case X86_MSR_GS_BASE:
2443#ifdef CONFIG_SVM
2444            val = amd_vmcb_gs_base_rd(&g->vmcb);
2445#else
2446	    err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_GS_BASE, &val);
2447#endif
2448            break;
2449#ifdef CONFIG_SVM
2450        case X86_MSR_KERNEL_GS_BASE:
2451            val = amd_vmcb_kernel_gs_base_rd(&g->vmcb);
2452            break;
2453        case X86_MSR_STAR:
2454            val = amd_vmcb_star_rd(&g->vmcb);
2455            break;
2456        case X86_MSR_LSTAR:
2457            val = amd_vmcb_lstar_rd(&g->vmcb);
2458            break;
2459        case X86_MSR_CSTAR:
2460            val = amd_vmcb_cstar_rd(&g->vmcb);
2461            break;
2462        case X86_MSR_SFMASK:
2463            val = amd_vmcb_sfmask_rd(&g->vmcb);
2464            break;
2465        default:
2466            printf("MSR: unhandeled MSR read access to %x\n", msr);
2467            return handle_vmexit_unhandeled(g);
2468#else
2469        case X86_MSR_APIC_BASE:
2470        case X86_MSR_BIOS_SIGN_ID:
2471        case X86_MSR_MTRRCAP:
2472        case X86_MSR_MCG_CAP:
2473        case X86_MSR_MCG_STATUS:
2474        case X86_MSR_PAT:
2475        case X86_MTRR_DEF_TYPE:
2476            val = 0x0;
2477            break;
2478        case X86_MSR_MISC_ENABLE:
2479            val = 0x1; // enable fast-string instructions
2480            break;
2481	default:
2482	    msr_index = vmx_guest_msr_index(msr);
2483	    if (msr_index == -1) {
2484	      printf("MSR: unhandeled MSR read access to %x\n", msr);
2485	      return handle_vmexit_unhandeled(g);
2486	    }
2487	    val = guest_msr_area[msr_index].val;
2488	    break;
2489#endif
2490        }
2491
2492        // store the value in EDX:EAX
2493        guest_set_eax(g, val);
2494        guest_set_edx(g, val >> 32);
2495    }
2496
2497    // advance the rip beyond the current instruction
2498#ifdef CONFIG_SVM
2499    amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2);
2500#else
2501    uint64_t guest_rip;
2502    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
2503    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2);
2504    assert(err_is_ok(err));
2505#endif
2506    return HANDLER_ERR_OK;
2507}
2508
2509static int
2510handle_vmexit_cpuid (struct guest *g) {
2511    uint32_t eax, ebx, ecx, edx;
2512    uint32_t func = guest_get_eax(g);
2513
2514    switch (func) {
2515#ifdef CONFIG_SVM
2516    // Processor Vendor and Largest Standard Function Number
2517    case 0:
2518    case 0x80000000:
2519        // max standard function offset
2520        eax = func == 0 ? 0x1 : 0x80000000;
2521        // string "AuthenticAMD"
2522        ebx = 0x68747541;
2523        ecx = 0x444d4163;
2524        edx = 0x69746e65;
2525    break;
2526
2527    // Family, Model, Stepping Identifiers
2528    case 1:
2529        // we simulate a AMD K6-3D
2530        // Family 5, Model 8, Stepping 12
2531        eax = 0x58c;
2532        // no brand, clflush size 16, no mulitprocessing, no local apic
2533        ebx = 0x0f00;
2534        // support the popcnt instr
2535        ecx = 0x800000;
2536        // support some basic features
2537        edx = 0x89a91b;
2538    break;
2539
2540    default:
2541        // use the answer of the host if there is any other request
2542        // FIXME: this is probably not a good idea ;)
2543        cpuid(func, &eax, &ebx, &ecx, &edx);
2544        printf("handle_vmexit_cpuid: CPUID: func %x, host reports: eax %x, "
2545                "ebx %x, ecx %x, edx %x\n", func, eax, ebx, ecx, edx);
2546        break;
2547#else
2548    case 0:
2549        eax = 0x2;
2550        ebx = 0x756e6547;
2551        ecx = 0x6c65746e;
2552        edx = 0x49656e69;
2553        break;
2554    case 1:
2555        eax = 0x800;
2556        ebx = 0x800;
2557        ecx = 0x80200000;
2558        edx = 0x183fbff;
2559        break;
2560    case 2:
2561        eax = 0x1;
2562        ebx = 0x0;
2563        ecx = 0x4d;
2564        edx = 0x2c307d;
2565    default:
2566        eax = 0x0;
2567        ebx = 0x0;
2568        ecx = 0x0;
2569        edx = 0x0;
2570        break;
2571#endif
2572    }
2573
2574    guest_set_eax(g, eax);
2575    guest_set_ebx(g, ebx);
2576    guest_set_ecx(g, ecx);
2577    guest_set_edx(g, edx);
2578
2579    // advance the rip beyond the instruction
2580#ifdef CONFIG_SVM
2581    amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2);
2582#else
2583    uint64_t guest_rip;
2584    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
2585    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2);
2586    assert(err_is_ok(err));
2587#endif
2588    return HANDLER_ERR_OK;
2589}
2590
2591static int
2592handle_vmexit_vmmcall (struct guest *g) {
2593    /*printf("VMMCALL: tsc %lu, exits with mon invocation %lu, exits w/o mon "
2594           "invocation %lu\n", rdtsc(),
2595           g->ctrl->num_vm_exits_with_monitor_invocation,
2596           g->ctrl->num_vm_exits_without_monitor_invocation);*/
2597
2598    // advance the rip beyond the instruction
2599#ifdef CONFIG_SVM
2600    amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 3);
2601#else
2602    uint64_t guest_rip;
2603    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
2604    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 3);
2605    assert(err_is_ok(err));
2606#endif
2607    return HANDLER_ERR_OK;
2608}
2609
2610static int
2611handle_vmexit_hlt (struct guest *g) {
2612    // the guest has nothing to do - poll out irq sources for pending IRQs
2613    // if they do not assert a virtual IRQ then we will do nothing
2614    lpc_pic_process_irqs(g->lpc);
2615
2616    // advance the rip beyond the instruction
2617#ifdef CONFIG_SVM
2618    amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 1);
2619#else
2620    uint64_t guest_rip;
2621    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
2622    err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 1);
2623#endif
2624
2625    // running HLT with IRQs masked does not make any sense
2626    // FIXME: this assert silly, shutting down the VM would be the right way
2627#ifdef CONFIG_SVM
2628    guest_assert(g, amd_vmcb_rflags_rd(&g->vmcb).intrf == 1);
2629#else
2630    uint64_t guest_rflags;
2631    err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags);
2632    assert(err_is_ok(err));
2633    guest_assert(g, guest_rflags & RFLAGS_IF);
2634#endif
2635    if (virq_pending(g, NULL, NULL)) {
2636        // there is an IRQ pending, proceed as normal, the CPU will take it
2637    } else {
2638        // there is really nothing to do - stop the VM and wait
2639        g->runnable = false;
2640    }
2641
2642    return HANDLER_ERR_OK;
2643}
2644
2645static inline int
2646decode_mov_instr_length (struct guest *g, uint8_t *code)
2647{
2648    int len;
2649
2650    // we only support long mode for now
2651    //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1);
2652
2653    // all non special MOV instructions use one byte as opcode and at least a
2654    // ModR/M byte
2655    len = 2;
2656    // check for the REX prefix
2657    if ((code[0] >> 4) == 0x4) {
2658        len++;
2659        code++;
2660    }
2661    // precaution because I did no check all variants of MOV, at least these two
2662    // variants are supported
2663    assert(code[0] == 0x89 || code[0] == 0x8b);
2664
2665    union x86_modrm modrm = { .raw = code[1] };
2666    // check for displacements
2667    if (modrm.u.mod == 0x1) {
2668        // 1B displacement
2669        len++;
2670    } else if (modrm.u.mod == 0x2) {
2671        // 4B displacement
2672        len += 4;
2673    }
2674
2675    // check for SIB byte
2676    if (modrm.u.rm == 0x4 && modrm.u.mod != 0x3) {
2677        len++;
2678    }
2679
2680    return len;
2681}
2682
2683// finds out whether a move instruction is a read or a write with respect to
2684// memory
2685static inline bool
2686decode_mov_is_write (struct guest *g, uint8_t *code)
2687{
2688    // check for the REX prefix
2689    if ((code[0] >> 4) == 0x4) {
2690        code++;
2691    }
2692
2693    // we only support one move variant (in each direction) for now
2694    assert(code[0] == 0x89 || code[0] == 0x8b);
2695
2696    union x86_modrm modrm = { .raw = code[1] };
2697    // not defined for reg to reg moves
2698    assert(modrm.u.mod != 3);
2699
2700    return code[0] == 0x89; // 0x89 ==> MOV reg -> mem
2701}
2702
2703static inline enum opsize
2704decode_mov_op_size (struct guest *g, uint8_t *code)
2705{
2706    /*
2707	printf("EFER: 0x%lx\n", amd_vmcb_efer_rd_raw(&g->vmcb));
2708	printf("Code: 0x%lx\n", *((uint64_t *)code));
2709	printf("Code[0]: 0x%x, Code[1]: 0x%x, Code[2]: 0x%x, Code[3]: 0x%x\n", code[0],code[1],code[2],code[3]);
2710	printf("Guest EAX: 0x%x\n", guest_get_eax(g));
2711	printf("Guest EBX: 0x%x\n", guest_get_ebx(g));
2712	printf("Guest ECX: 0x%x\n", guest_get_ecx(g));
2713
2714	printf("Guest EDX: 0x%x\n", guest_get_edx(g));
2715	printf("Guest RDI: 0x%lx\n", guest_get_rdi(g));
2716	printf("Guest RSI: 0x%lx\n", guest_get_rsi(g));
2717	printf("Guest RSP: 0x%lx\n", guest_get_rsp(g));
2718	printf("Guest RBP: 0x%lx\n", guest_get_rbp(g));
2719    */
2720
2721    // we only support long mode for now
2722    //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1);
2723
2724    // check for the REX prefix
2725    if ((code[0] >> 4) == 0x4 && code[0] & 0x48) {
2726        return OPSIZE_64;
2727    }
2728    return OPSIZE_32;
2729}
2730
2731
2732static inline uint64_t
2733decode_mov_src_val (struct guest *g, uint8_t *code) {
2734
2735    // we only support long mode for now
2736    //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1);
2737
2738    // check for the REX prefix
2739    if ((code[0] >> 4) == 0x4) {
2740        code++;
2741    }
2742
2743    // we only support one variant for now
2744    assert(code[0] == 0x89);
2745
2746    union x86_modrm modrm = { .raw = code[1] };
2747    return get_reg_val_by_reg_num(g, modrm.u.regop);
2748}
2749
2750
2751static inline void
2752decode_mov_dest_val (struct guest *g, uint8_t *code, uint64_t val)
2753{
2754    // we only support long mode for now
2755    //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1);
2756
2757    // check for the REX prefix
2758    if ((code[0] >> 4) == 0x4) {
2759        code++;
2760    }
2761
2762    // we only support one variant for now
2763    assert(code[0] == 0x8b);
2764
2765    union x86_modrm modrm = { .raw = code[1] };
2766    set_reg_val_by_reg_num(g, modrm.u.regop, val);
2767}
2768
2769/**** e1000
2770#define TDBAL_OFFSET 0x3800
2771#define TDBAH_OFFSET 0x3804
2772#define RDBAL_OFFSET 0x2800
2773#define RDBAH_OFFSET 0x2804
2774#define TDT_OFFSET 0x3818 //Transmit descriptor tail. Writes to this toggle transmission
2775#define TCTL_OFFSET 0x400 //Transmission Control
2776
2777#define IMS_OFFSET 0xd0 // Interrupt Mask Set/Read Register
2778#define ICS_OFFSET 0xc8 // Interrupt Cause Set Register
2779
2780static int register_needs_translation(uint64_t addr){
2781	return (
2782		addr == TDBAL_OFFSET ||
2783		addr == TDBAH_OFFSET ||
2784		addr == RDBAL_OFFSET ||
2785		addr == RDBAH_OFFSET
2786	);
2787
2788}
2789
2790**** e1000 */
2791
2792
2793
2794
2795#define MMIO_MASK(bytes) (~(~(bytes) + 1)) // I think ~(-bytes) is also correct
2796
2797static int
2798handle_vmexit_npf (struct guest *g) {
2799    int r;
2800#ifdef CONFIG_SVM
2801    uint64_t fault_addr = amd_vmcb_exitinfo2_rd(&g->vmcb);
2802#else
2803    uint64_t fault_addr, guest_rip;
2804    errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GPADDR_F, &fault_addr);
2805    assert(err_is_ok(err));
2806#endif
2807    uint8_t *code = NULL;
2808
2809    // check for fault inside the guest physical memory region
2810    if (fault_addr >= g->mem_low_va && fault_addr < g->mem_high_va) {
2811        // allocate the missing memory
2812        alloc_guest_mem(g, fault_addr & ~BASE_PAGE_MASK, BASE_PAGE_SIZE);
2813        // do not advance the RIP, it is safe (and neccessary) to
2814        // replay the faulting instruction
2815        return HANDLER_ERR_OK;
2816    }
2817
2818    // fetch the location to the code
2819    r = get_instr_arr(g, &code);
2820    assert (r == 0);
2821
2822    // virtual devices
2823    switch (fault_addr & ~BASE_PAGE_MASK) {
2824    case APIC_BASE: {
2825        uint64_t val;
2826        enum opsize size;
2827
2828        assert(g->apic != NULL);
2829        size = decode_mov_op_size(g, code);
2830        if (decode_mov_is_write(g, code)) {
2831            val = decode_mov_src_val(g, code);
2832            r = apic_handle_mmio_write(g->apic, fault_addr, size, val);
2833            assert(r == 0);
2834        } else {
2835            r = apic_handle_mmio_read(g->apic, fault_addr, size, &val);
2836            assert(r == 0);
2837            decode_mov_dest_val(g, code, val);
2838        }
2839
2840        // advance the rip beyond the instruction
2841#ifdef CONFIG_SVM
2842        amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) +
2843                        decode_mov_instr_length(g, code));
2844#else
2845	err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
2846	err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip +
2847					 decode_mov_instr_length(g, code));
2848	assert(err_is_ok(err));
2849#endif
2850        return HANDLER_ERR_OK;
2851    }
2852    }
2853
2854    //Check if this is a access to a pci device memory
2855
2856    for(int bus_i = 0; bus_i<256; bus_i++){
2857    	for(int dev_i = 0; dev_i < 32; dev_i++){
2858    		struct pci_bus *bus = g->pci->bus[bus_i];
2859			if(bus) {
2860				struct pci_device* dev = bus->device[dev_i];
2861				if(dev){
2862					for(int bar_i=0; bar_i<5; bar_i++){
2863						struct bar_info *curbar = &dev->bars[bar_i];
2864						if(curbar->paddr <= fault_addr && fault_addr < curbar->paddr + curbar->bytes){
2865							if(decode_mov_is_write(g, code)){
2866								uint64_t val = decode_mov_src_val(g, code);
2867								if(dev->mem_write) {
2868									dev->mem_write(dev, MMIO_MASK(curbar->bytes) & fault_addr, bar_i, val );
2869								} else {
2870									goto error;
2871								}
2872							} else {
2873								uint64_t val;
2874								if(dev->mem_read){
2875									dev->mem_read(dev, MMIO_MASK(curbar->bytes) & fault_addr, bar_i, (uint32_t*)&val);
2876									decode_mov_dest_val(g, code, val);
2877								} else {
2878									goto error;
2879								}
2880							}
2881#ifdef CONFIG_SVM
2882							amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) +
2883							                        decode_mov_instr_length(g, code));
2884#else
2885							err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip);
2886							err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip +
2887											 decode_mov_instr_length(g, code));
2888							assert(err_is_ok(err));
2889#endif
2890							return HANDLER_ERR_OK;
2891						}
2892					}
2893				}
2894			}
2895    	}
2896    }
2897
2898    error:
2899    printf("vmkitmon: access to an unknown memory location: %lx", fault_addr);
2900    return handle_vmexit_unhandeled(g);
2901}
2902
2903typedef int (*vmexit_handler)(struct guest *g);
2904
2905#ifdef CONFIG_SVM
2906static vmexit_handler vmexit_handlers[0x8c] = {
2907    [SVM_VMEXIT_CR0_READ] = handle_vmexit_cr_access,
2908    [SVM_VMEXIT_CR0_WRITE] = handle_vmexit_cr_access,
2909    [SVM_VMEXIT_CR0_SEL_WRITE] = handle_vmexit_cr_access,
2910    [SVM_VMEXIT_SWINT] = handle_vmexit_swint,
2911    [SVM_VMEXIT_IDTR_WRITE] = handle_vmexit_ldt,
2912    [SVM_VMEXIT_GDTR_WRITE] = handle_vmexit_ldt,
2913    [SVM_VMEXIT_IOIO] = handle_vmexit_ioio,
2914    [SVM_VMEXIT_MSR] = handle_vmexit_msr,
2915    [SVM_VMEXIT_CPUID] = handle_vmexit_cpuid,
2916    [SVM_VMEXIT_VMMCALL] = handle_vmexit_vmmcall,
2917    [SVM_VMEXIT_HLT] = handle_vmexit_hlt
2918};
2919#else
2920static vmexit_handler vmexit_handlers[0x8c] = {
2921    [VMX_EXIT_REASON_CPUID] = handle_vmexit_cpuid,
2922    [VMX_EXIT_REASON_HLT] = handle_vmexit_hlt,
2923    [VMX_EXIT_REASON_VMCALL] = handle_vmexit_vmmcall,
2924    [VMX_EXIT_REASON_CR_ACCESS] = handle_vmexit_cr_access,
2925    [VMX_EXIT_REASON_INOUT] = handle_vmexit_ioio,
2926    [VMX_EXIT_REASON_RDMSR] = handle_vmexit_msr,
2927    [VMX_EXIT_REASON_WRMSR] = handle_vmexit_msr,
2928    [VMX_EXIT_REASON_GDTR_IDTR] = handle_vmexit_ldt,
2929    [VMX_EXIT_REASON_EPT_FAULT] = handle_vmexit_npf,
2930    [VMX_EXIT_REASON_SWINT] = handle_vmexit_swint
2931};
2932#endif
2933
2934void
2935guest_handle_vmexit (struct guest *g) {
2936	//struct pci_ethernet * eth = (struct pci_ethernet * ) g->pci->bus[0]->device[2]->state;//
2937	//printf("guest_handle_vmexit\n");
2938    vmexit_handler handler;
2939#ifdef CONFIG_SVM
2940    uint64_t exitcode = amd_vmcb_exitcode_rd(&g->vmcb);
2941    if (exitcode == SVM_VMEXIT_NPF) {
2942        handler = handle_vmexit_npf;
2943    } else if (LIKELY(vmexit_handlers[exitcode] != NULL)) {
2944        handler = vmexit_handlers[exitcode];
2945    } else {
2946        handle_vmexit_unhandeled(g);
2947        return;
2948    }
2949#else
2950    if (!g->emulated_before_exit) {
2951        errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_EXIT_REASON,
2952						(uint64_t *)&saved_exit_reason);
2953	assert(err_is_ok(err));
2954    }
2955
2956    if (LIKELY(vmexit_handlers[saved_exit_reason] != NULL)) {
2957        handler = vmexit_handlers[saved_exit_reason];
2958    } else {
2959        handle_vmexit_unhandeled(g);
2960	return;
2961    }
2962#endif
2963    int r = handler(g);
2964    if (LIKELY(r == HANDLER_ERR_OK)) {
2965        if (g->runnable) {
2966            guest_make_runnable(g, true);
2967        }
2968    }
2969}
2970