1 2/** 3 * \file 4 */ 5 6/* 7 * Copyright (c) 2009, 2010, ETH Zurich. 8 * All rights reserved. 9 * 10 * This file is distributed under the terms in the attached LICENSE file. 11 * If you do not find this file, copies can be found by writing to: 12 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group. 13 */ 14 15#include <stdlib.h> 16#include <string.h> 17#include "vmkitmon.h" 18#include <barrelfish/lmp_endpoints.h> 19#include "x86.h" 20#ifdef CONFIG_SVM 21#include "svm.h" 22#endif 23#include "realmode.h" 24#include "hdd.h" 25#include "console.h" 26#include "pc16550d.h" 27#include "apic.h" 28#include "lpc.h" 29#include "pci.h" 30#include "pci_host.h" 31#include "pci_devices.h" 32#include "pci_ethernet.h" 33 34#define VMCB_SIZE 0x1000 // 4KB 35 36#ifdef CONFIG_SVM 37#define IOPM_SIZE 0x3000 // 12KB 38#define MSRPM_SIZE 0x2000 // 8KB 39#else 40#define IOBMP_A_SIZE 0x1000 // 4KB 41#define IOBMP_B_SIZE 0x1000 // 4KB 42#define MSRPM_SIZE 0x1000 // 4KB 43#endif 44 45#define RM_MEM_SIZE (0x100000 + BASE_PAGE_SIZE) // 1MB + A20 gate space 46 47#define APIC_BASE 0xfee00000 48 49#define SERIAL_DRIVER "serial0.raw" 50 51#ifndef CONFIG_SVM 52extern uint16_t saved_exit_reason; 53extern uint64_t saved_exit_qual, saved_rip; 54 55// List of MSRs that are saved on VM-exit and loaded on VM-entry. 56static uint32_t msr_list[VMX_MSR_COUNT] = 57 {X86_MSR_KERNEL_GS_BASE, X86_MSR_STAR, X86_MSR_LSTAR, X86_MSR_CSTAR, X86_MSR_SFMASK}; 58 59// Saved priority of the most recent irq that is asserted. 60uint8_t interrupt_priority = 0; 61#endif 62 63#ifndef CONFIG_SVM 64static inline int vmx_guest_msr_index(uint32_t msr_index) 65{ 66 for (int i = 0; i < VMX_MSR_COUNT; i++) { 67 if (msr_list[i] == msr_index) { 68 return i; 69 } 70 } 71 return -1; 72} 73 74static void initialize_guest_msr_area(struct guest *g) 75{ 76 struct msr_entry *guest_msr_area = (struct msr_entry *)g->msr_area_va; 77 78 // The values of the MSRs in the guest MSR area are all set to 0. 79 for (int i = 0; i < VMX_MSR_COUNT; i++) { 80 guest_msr_area[i].index = msr_list[i]; 81 guest_msr_area[i].val = 0x0; 82 } 83 84 errval_t err = invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXIT_MSR_STORE_F, g->msr_area_pa); 85 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXIT_MSR_STORE_CNT, VMX_MSR_COUNT); 86 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_ENTRY_MSR_LOAD_F, g->msr_area_pa); 87 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_ENTRY_MSR_LOAD_CNT, VMX_MSR_COUNT); 88 assert(err_is_ok(err)); 89} 90#endif 91 92lvaddr_t guest_offset = 0; 93static struct guest __guest; 94static struct guest *__guestp = NULL; 95 96#ifdef CONFIG_SVM 97/// stores the last used guest ASID 98static uint32_t last_guest_asid = 0; 99#endif 100 101// FIXME: this is somewhat broken by design... we should emit proper exceptions 102// to the guest opposed to just halt the VM 103#define guest_assert(g, e) \ 104 ((e) ? (void)0 : (handle_vmexit_unhandeled(g), assert(e))) 105 106static errval_t 107guest_slot_alloc(struct guest *g, struct capref *ret) 108{ 109 return g->slot_alloc.a.alloc(&g->slot_alloc.a, ret); 110} 111 112errval_t guest_vspace_map_wrapper(struct vspace *vspace, lvaddr_t vaddr, 113 struct capref frame, size_t size) 114{ 115 errval_t err; 116 struct vregion *vregion = NULL; 117 struct memobj_one_frame *memobj = NULL; 118 119 // Allocate space 120 vregion = malloc(sizeof(struct vregion)); 121 if (!vregion) { 122 err = LIB_ERR_MALLOC_FAIL; 123 goto error; 124 } 125 memobj = malloc(sizeof(struct memobj_one_frame)); 126 if (!memobj) { 127 err = LIB_ERR_MALLOC_FAIL; 128 goto error; 129 } 130 131 // Create the objects 132 err = memobj_create_one_frame(memobj, size, 0); 133 if (err_is_fail(err)) { 134 err = err_push(err, LIB_ERR_MEMOBJ_CREATE_ANON); 135 goto error; 136 } 137 err = memobj->m.f.fill(&memobj->m, 0, frame, size); 138 if (err_is_fail(err)) { 139 err = err_push(err, LIB_ERR_MEMOBJ_FILL); 140 goto error; 141 } 142 err = vregion_map_fixed(vregion, vspace, &memobj->m, 0, size, vaddr, 143 VREGION_FLAGS_READ | VREGION_FLAGS_WRITE | VREGION_FLAGS_EXECUTE); 144 if (err_is_fail(err)) { 145 err = LIB_ERR_VSPACE_MAP; 146 goto error; 147 } 148 err = memobj->m.f.pagefault(&memobj->m, vregion, 0, 0); 149 if (err_is_fail(err)) { 150 err = err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); 151 goto error; 152 } 153 154 return SYS_ERR_OK; 155 156 error: // XXX: proper cleanup 157 if (vregion) { 158 free(vregion); 159 } 160 if (memobj) { 161 free(memobj); 162 } 163 return err; 164} 165 166#define GUEST_VSPACE_SIZE (1ul<<32) // GB 167 168static errval_t vspace_map_wrapper(lvaddr_t vaddr, struct capref frame, 169 size_t size) 170{ 171 errval_t err; 172 static struct memobj_anon *memobj = NULL; 173 static struct vregion *vregion = NULL; 174 static bool initialized = false; 175 176 if (!initialized) { 177 // Allocate space 178 memobj = malloc(sizeof(struct memobj_anon)); 179 if (!memobj) { 180 return LIB_ERR_MALLOC_FAIL; 181 } 182 vregion = malloc(sizeof(struct vregion)); 183 if (!vregion) { 184 return LIB_ERR_MALLOC_FAIL; 185 } 186 187 // Create a memobj and vregion 188 err = memobj_create_anon(memobj, GUEST_VSPACE_SIZE, 0); 189 if (err_is_fail(err)) { 190 return err_push(err, LIB_ERR_MEMOBJ_CREATE_ANON); 191 } 192 err = vregion_map(vregion, get_current_vspace(), &memobj->m, 0, 193 GUEST_VSPACE_SIZE, VREGION_FLAGS_READ_WRITE); 194 if (err_is_fail(err)) { 195 return err_push(err, LIB_ERR_VREGION_MAP); 196 } 197 198 guest_offset = vregion_get_base_addr(vregion); 199 initialized = true; 200 } 201 202 // Create mapping 203 err = memobj->m.f.fill(&memobj->m, vaddr, frame, size); 204 if (err_is_fail(err)) { 205 return err_push(err, LIB_ERR_MEMOBJ_FILL); 206 } 207 err = memobj->m.f.pagefault(&memobj->m, vregion, vaddr, 0); 208 if (err_is_fail(err)) { 209 return err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); 210 } 211 212 return SYS_ERR_OK; 213} 214// allocates some bytes of memory for the guest starting at a specific addr 215// also performs the mapping into the vspace of the monitor 216errval_t 217alloc_guest_mem(struct guest *g, lvaddr_t guest_paddr, size_t bytes) 218{ 219 errval_t err; 220 221 // only allow multiple of page sizes to be allocated 222 assert(bytes > 0 && (bytes & BASE_PAGE_MASK) == 0); 223 // do not allow allocation outside of the guests physical memory 224 assert(guest_paddr + bytes <= g->mem_high_va); 225 226 // Allocate frame 227 struct capref cap; 228 err = guest_slot_alloc(g, &cap); 229 230 231 232 233 if (err_is_fail(err)) { 234 return err_push(err, LIB_ERR_SLOT_ALLOC); 235 } 236 err = frame_create(cap, bytes, NULL); 237 if (err_is_fail(err)) { 238 return err_push(err, LIB_ERR_FRAME_CREATE); 239 } 240 241 // Map into the guest vspace 242 err = guest_vspace_map_wrapper(&g->vspace, guest_paddr, cap, bytes); 243 if (err_is_fail(err)) { 244 return err; 245 } 246 247 // Create a copy of the capability to map in our vspace 248 struct capref host_cap; 249 err = slot_alloc(&host_cap); 250 if (err_is_fail(err)) { 251 return err; 252 } 253 err = cap_copy(host_cap, cap); 254 if (err_is_fail(err)) { 255 return err; 256 } 257 258 // Map into my vspace 259 err = vspace_map_wrapper(guest_to_host(guest_paddr), host_cap, bytes); 260 if (err_is_fail(err)) { 261 return err; 262 } 263 264 struct frame_identity frameid = { .base = 0, .bytes = 0 }; 265 errval_t r = invoke_frame_identify(cap, &frameid); 266 assert(err_is_ok(r)); 267 VMKIT_PCI_DEBUG("alloc_guest_mem: frameid.base: 0x%lx, frameid.bytes: %zd, " 268 "g->mem_low_va: 0x%lx, g->mem_high_va: 0x%lx\n", 269 frameid.base, frameid.bytes, g->mem_low_va, g->mem_high_va); 270 271 return SYS_ERR_OK; 272} 273 274static void 275initialize_iopm (struct guest *self) { 276 // intercept all IO port accesses (for now) 277#ifdef CONFIG_SVM 278 memset((void*)self->iopm_va, 0xFF, IOPM_SIZE); 279#else 280 memset((void*)self->iobmp_a_va, 0xFF, IOBMP_A_SIZE); 281 memset((void*)self->iobmp_b_va, 0xFF, IOBMP_B_SIZE); 282#endif 283} 284 285// access_mode: 0 all access, 1 read intercept, 2 write intercept, 3 all interc. 286static inline void 287set_msr_access (struct guest *g, uint32_t msr, int access_mode) 288{ 289 assert(access_mode >= 0 && access_mode <= 3); 290 291 // a region a 2K bytes represents the access bits of 8K MSRs, therefore each 292 // MSR takes two bits (one for rdmsr and one for wrmsr) 293 uintptr_t byte_offset = (msr & 0xffff) / 4; 294 int bit_offset = ((msr & 0xffff) % 4) * 2; 295 296 if (msr < 0x2000) { 297 // do nothing 298 } else if (msr >= 0xc0000000 && msr < 0xc0002000) { 299 byte_offset += 0x800; 300 } else if (msr >= 0xc0010000 && msr < 0xc0012000) { 301 byte_offset += 0x1000; 302 } else { 303 assert(!"not reached"); 304 } 305 306 assert(byte_offset < MSRPM_SIZE); 307 308 // read the byte holding the relevant bits 309 uint8_t val = *(uint8_t *)(g->msrpm_va + byte_offset); 310 // set the access params according to the arguments 311 val = (val & ~(0x3 << bit_offset)) | (access_mode << bit_offset); 312 // store the modified value back in the map 313 *(uint8_t *)(g->msrpm_va + byte_offset) = val; 314 315 //printf("MSR: msr %x, byte_offset %lx, bit_offset %x, val %x\n", msr, byte_offset, bit_offset, val); 316} 317 318static void 319initialize_msrpm (struct guest *g) { 320 // intercept all MSR accesses (for now) 321 memset((void*)g->msrpm_va, 0xff, MSRPM_SIZE); 322#if 0 323 // allow performance counters and evnets MSR accesses 324 set_msr_access (g, 0xc0010000, 0); 325 set_msr_access (g, 0xc0010001, 0); 326 set_msr_access (g, 0xc0010002, 0); 327 set_msr_access (g, 0xc0010003, 0); 328 set_msr_access (g, 0xc0010004, 0); 329 set_msr_access (g, 0xc0010005, 0); 330 set_msr_access (g, 0xc0010006, 0); 331 set_msr_access (g, 0xc0010007, 0); 332#endif 333} 334 335#define INIT_DATA_SEGREG(vmcb,x) \ 336do { \ 337 amd_vmcb_seg_attrib_t __sa = { \ 338 .segtype = 3, \ 339 .p = 1, \ 340 .s = 1 \ 341 }; \ 342 amd_vmcb_##x## _attrib_wr((vmcb), __sa); \ 343 amd_vmcb_##x## _selector_wr((vmcb), 0x0); \ 344 amd_vmcb_##x## _base_wr((vmcb), 0x0); \ 345 amd_vmcb_##x## _limit_wr((vmcb), 0xffff); \ 346} while (0) 347 348#define INIT_CODE_SEGREG(vmcb,x) \ 349do { \ 350 amd_vmcb_seg_attrib_t __sa = { \ 351 .segtype = 11, \ 352 .p = 1, \ 353 .s = 1 \ 354 }; \ 355 amd_vmcb_##x## _attrib_wr((vmcb), __sa); \ 356 amd_vmcb_##x## _selector_wr((vmcb), 0xf000); \ 357 amd_vmcb_##x## _base_wr((vmcb), 0xffff0000); \ 358 amd_vmcb_##x## _limit_wr((vmcb), 0xffff); \ 359} while (0) 360 361#define INIT_SYS_SEGREG(vmcb,x) \ 362do { \ 363 amd_vmcb_seg_attrib_t __sa = { \ 364 .segtype = 2, \ 365 .p = 1 \ 366 }; \ 367 amd_vmcb_##x## _attrib_wr((vmcb), __sa); \ 368 amd_vmcb_##x## _selector_wr((vmcb), 0x0); \ 369 amd_vmcb_##x## _base_wr((vmcb), 0x0); \ 370 amd_vmcb_##x## _limit_wr((vmcb), 0xffff); \ 371} while (0) 372 373/* This method initializes a new VMCB memory regsion and sets the initial 374 * machine state as defined by the AMD64 architecture specification */ 375#ifdef CONFIG_SVM 376static void 377initialize_vmcb (struct guest *self) { 378 amd_vmcb_initialize(&self->vmcb, (mackerel_addr_t)self->vmcb_va); 379 380 // 1. Initialize intercepts 381 382 /* For now we intercept just everything */ 383 384 amd_vmcb_cr_access_wr_raw(&self->vmcb, ~0u); 385 amd_vmcb_cr_access_rdcr2_wrf(&self->vmcb, 0); 386 amd_vmcb_cr_access_wrcr2_wrf(&self->vmcb, 0); 387 amd_vmcb_cr_access_rdcr4_wrf(&self->vmcb, 0); 388 amd_vmcb_cr_access_wrcr4_wrf(&self->vmcb, 0); 389 390 // FIXME: ignoring DR accesses may be insecure 391 //amd_vmcb_dr_access_wr_raw(&self->vmcb, ~0u); 392 amd_vmcb_exceptions_wr_raw(&self->vmcb, ~0u); 393 amd_vmcb_exceptions_vector7_wrf(&self->vmcb, 0); 394 amd_vmcb_exceptions_vector14_wrf(&self->vmcb, 0); 395 396 amd_vmcb_intercepts_wr_raw(&self->vmcb, 0x1fffffffffff); 397 amd_vmcb_intercepts_pushf_wrf(&self->vmcb, 0); 398 amd_vmcb_intercepts_popf_wrf(&self->vmcb, 0); 399 amd_vmcb_intercepts_invlpg_wrf(&self->vmcb, 0); 400 amd_vmcb_intercepts_rdtsc_wrf(&self->vmcb, 0); 401 amd_vmcb_intercepts_rdtscp_wrf(&self->vmcb, 0); 402 amd_vmcb_intercepts_iret_wrf(&self->vmcb, 0); 403 amd_vmcb_intercepts_wbinvd_wrf(&self->vmcb, 0); 404 amd_vmcb_intercepts_pause_wrf(&self->vmcb, 0); 405 amd_vmcb_intercepts_vintr_wrf(&self->vmcb, 0); 406 407 // 2. Setup some config fields 408 409 // physical addresses of IOPM and MSRPM_SIZE 410 amd_vmcb_iopm_base_pa_wr(&self->vmcb, self->iopm_pa); 411 amd_vmcb_msrpm_base_pa_wr(&self->vmcb, self->msrpm_pa); 412 // assign guest ASID 413 // FIXME: use real asid allocator. BF does not know about tagged TLBs atm 414 amd_vmcb_tlb_guest_asid_wrf(&self->vmcb, ++last_guest_asid); 415 // enable virtual intr masking 416 amd_vmcb_vintr_vintr_masking_wrf(&self->vmcb, 1); 417 // enable nested paging 418 amd_vmcb_np_enable_wrf(&self->vmcb, 1); 419 420 /* 3. Guest state initialization 421 * according to Intels Manual 3A: Table 9-1. */ 422 423 // The second bit of rflags needs to be 1, also indicate that we support the 424 // CPUID instruction. 425 amd_vmcb_rflags_wr_raw(&self->vmcb, 0x00200002); 426 amd_vmcb_rip_wr(&self->vmcb, 0x0000fff0); 427 amd_vmcb_cr0_wr_raw(&self->vmcb, 0x60000010); 428 429 INIT_CODE_SEGREG(&self->vmcb, cs); 430 INIT_DATA_SEGREG(&self->vmcb, ss); 431 INIT_DATA_SEGREG(&self->vmcb, ds); 432 INIT_DATA_SEGREG(&self->vmcb, es); 433 INIT_DATA_SEGREG(&self->vmcb, fs); 434 INIT_DATA_SEGREG(&self->vmcb, gs); 435 436 INIT_SYS_SEGREG(&self->vmcb, gdtr); 437 INIT_SYS_SEGREG(&self->vmcb, idtr); 438 INIT_SYS_SEGREG(&self->vmcb, ldtr); 439 INIT_SYS_SEGREG(&self->vmcb, tr); 440 441 amd_vmcb_dr6_wr(&self->vmcb, 0xffff0ff0); 442 amd_vmcb_dr7_wr(&self->vmcb, 0x00000400); 443 444 // taken from the linux SVM source 445 amd_vmcb_gpat_wr(&self->vmcb, 0x0007040600070406ul); 446 447 // svm requires guest EFER.SVME to be set 448 amd_vmcb_efer_svme_wrf(&self->vmcb, 1); 449} 450#endif 451 452static void 453idc_handler(void *arg) 454{ 455 struct guest *g = arg; 456 errval_t err; 457 458 // consume message 459 struct lmp_recv_buf buf = { .buflen = 0 }; 460 err = lmp_endpoint_recv(g->monitor_ep, &buf, NULL); 461 assert(err_is_ok(err)); 462 463 // run real handler 464 guest_handle_vmexit(g); 465 466 // re-register 467 struct event_closure cl = { 468 .handler = idc_handler, 469 .arg = arg, 470 }; 471 err = lmp_endpoint_register(g->monitor_ep, get_default_waitset(), cl); 472 assert(err_is_ok(err)); 473} 474 475/* This method duplicates some code from spawndomain since we need to spawn very 476 * special domains */ 477static void 478spawn_guest_domain (struct guest *self) { 479 errval_t err; 480 481 // create the guest virtual address space 482 struct capref vnode_cap; 483 err = guest_slot_alloc(self, &vnode_cap); 484 assert(err_is_ok(err)); 485 err = vnode_create(vnode_cap, ObjType_VNode_x86_64_pml4); 486 assert(err_is_ok(err)); 487 488 struct pmap *pmap = malloc(sizeof(struct pmap_x86)); 489 assert(pmap); 490 err = pmap_x86_64_init(pmap, &self->vspace, vnode_cap, NULL); 491 assert(err_is_ok(err)); 492 err = vspace_init(&self->vspace, pmap); 493 assert(err_is_ok(err)); 494 495 // create DCB 496 err = guest_slot_alloc(self, &self->dcb_cap); 497 assert(err_is_ok(err)); 498 err = dispatcher_create(self->dcb_cap); 499 assert(err_is_ok(err)); 500 501 // create end point 502 struct capref ep_cap; 503 504 // use minimum-sized endpoint, because we don't need to buffer >1 vmexit 505 err = endpoint_create(LMP_RECV_LENGTH, &ep_cap, &self->monitor_ep); 506 assert(err_is_ok(err)); 507 508 // register to receive on this endpoint 509 struct event_closure cl = { 510 .handler = idc_handler, 511 .arg = self, 512 }; 513 err = lmp_endpoint_register(self->monitor_ep, get_default_waitset(), cl); 514 assert(err_is_ok(err)); 515 516 // setup the DCB 517 err = invoke_dispatcher_setup_guest(self->dcb_cap, ep_cap, vnode_cap, 518 self->vmcb_cap, self->ctrl_cap); 519 assert(err_is_ok(err)); 520 521#ifndef CONFIG_SVM 522 initialize_guest_msr_area(self); 523 524 err = 0; 525 err += invoke_dispatcher_vmwrite(self->dcb_cap, VMX_IOBMP_A_F, self->iobmp_a_pa); 526 err += invoke_dispatcher_vmwrite(self->dcb_cap, VMX_IOBMP_B_F, self->iobmp_b_pa); 527 err += invoke_dispatcher_vmwrite(self->dcb_cap, VMX_MSRBMP_F, self->msrpm_pa); 528 assert(err_is_ok(err)); 529#endif 530 // set up the guests physical address space 531 self->mem_low_va = 0; 532 // FIXME: Hardcoded guest memory size 533 self->mem_high_va = 0x80000000; // 2 GiB 534 // allocate the memory used for real mode 535 // This is not 100% necessary since one could also catch the pagefaults. 536 // If we allocate the whole memory at once we use less caps and reduce 537 // the risk run out of CSpace. 538 err = alloc_guest_mem(self, 0x0, 0x80000000); 539 assert_err(err, "alloc_guest_mem"); 540} 541 542static void 543install_grub_stage2 (struct guest *g, void *img, size_t img_size) 544{ 545 assert(img != NULL); 546 547 /* the grub image goes to 0x8000 according to 548 * http://www.gnu.org/software/grub/manual/html_node/Memory-map.html */ 549 memcpy((void *)(guest_to_host(g->mem_low_va + 0x8000)), img, img_size); 550 // according to grub stage2 source its entry point is at 0x8200 551#ifdef CONFIG_SVM 552 amd_vmcb_rip_wr(&g->vmcb, 0x8200); 553 // switch to the first segment 554 amd_vmcb_cs_selector_wr(&g->vmcb, 0x0); 555 amd_vmcb_cs_base_wr(&g->vmcb, 0x0); 556 amd_vmcb_cs_limit_wr(&g->vmcb, 0xffff); 557#else 558 errval_t err = invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, 0x8200); 559 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CS_SEL, 0x0); 560 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CS_BASE, 0x0); 561 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CS_LIM, 0xffff); 562 assert(err_is_ok(err)); 563#endif 564 565} 566 567#if 0 568static void 569install_debug_app (struct guest *g) 570{ 571 //static uint8_t app[] = { 0xcd, 0x20 }; 572 static uint8_t app[] = { 0xcd, 0x20, 0x90, 0x90, 0x90, 0x90, 0x90 }; 573 memcpy((void *)g->rm_mem_va, app, sizeof(app)); 574 amd_vmcb_rip_wr(&g->vmcb, 0x0); 575 // disable nested pageing in real mode 576 amd_vmcb_np_enable_wrf(&g->vmcb, 0); 577 // enable paged real mode 578 //amd_vmcb_cr0_pg_wrf(&g->vmcb, 0x1); 579 //g->save_area->cr0 |= X86_CR0_PE_MASK; 580 amd_vmcb_rsp_wr(&g->vmcb, 0x1000); 581 amd_vmcb_cs_selector_wr(&g->vmcb, 0x0); 582 amd_vmcb_cs_base_wr(&g->vmcb, 0x0); 583 amd_vmcb_cs_limit_wr(&g->vmcb, 0xffff); 584 //g->save_area->cs.selector = 0x1000; 585 //g->save_area->cs.base = 0x10000; 586 //g->save_area->cs.base = 0x1ffff; 587} 588#endif 589 590static bool 591virq_pending (void *ud, uint8_t *irq, uint8_t *irq_prio) 592{ 593 assert(ud != NULL); 594 595 struct guest *g = ud; 596#ifdef CONFIG_SVM 597 if (amd_vmcb_vintr_rd(&g->vmcb).virq == 1) { 598#else 599 uint64_t info; 600 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_ENTRY_INTR_INFO, &info); 601 assert(err_is_ok(err)); 602 if (!!(info & (1UL << 31))) { 603#endif 604 if (irq != NULL) { 605#ifdef CONFIG_SVM 606 *irq = amd_vmcb_vintr_rd(&g->vmcb).vintr_vector; 607#else 608 *irq = info & 0xff; 609#endif 610 } 611 if (irq_prio != NULL) { 612#ifdef CONFIG_SVM 613 *irq_prio = amd_vmcb_vintr_rd(&g->vmcb).vintr_prio; 614#else 615 *irq_prio = interrupt_priority; 616#endif 617 } 618 return true; 619 } else { 620 return false; 621 } 622} 623 624#ifndef CONFIG_SVM 625static bool 626virq_accepting (void *ud) 627{ 628 assert(ud != NULL); 629 630 struct guest *g = ud; 631 632 uint64_t guest_rflags; 633 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags); 634 assert(err_is_ok(err)); 635 return (guest_rflags & (1UL << 9)); 636} 637#endif 638 639static void 640virq_handler (void *ud, uint8_t irq, uint8_t irq_prio) 641{ 642 assert(ud != NULL); 643 644 struct guest *g = ud; 645 646 // tell the hw extensions that there is a virtual IRQ pending 647#ifdef CONFIG_SVM 648 amd_vmcb_vintr_virq_wrf(&g->vmcb, 1); 649 amd_vmcb_vintr_vintr_prio_wrf(&g->vmcb, irq_prio); 650 amd_vmcb_vintr_vintr_vector_wrf(&g->vmcb, irq); 651 amd_vmcb_vintr_v_ign_tpr_wrf(&g->vmcb, 1); 652#else 653 uint64_t guest_rflags; 654 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags); 655 assert(guest_rflags & (1UL << 9)); 656 657 uint64_t info = (0 << 8 /*HWINTR*/) | (1UL << 31 /*INTR VALID*/) | irq; 658 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_ENTRY_INTR_INFO, info); 659 660 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_ACTIV_STATE, 0x0); 661 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_INTR_STATE, 0x0); 662 assert(err_is_ok(err)); 663 664 interrupt_priority = irq_prio; 665#endif 666 // if the guest is currently waiting then we have to restart it to make 667 // forward progress 668 if (!g->runnable) { 669 g->runnable = true; 670 guest_make_runnable(g, true); 671 } 672} 673 674static void 675guest_setup (struct guest *g) 676{ 677 errval_t err; 678 679 // initialize the guests slot_allocator 680 err = two_level_slot_alloc_init(&g->slot_alloc); 681 assert_err(err, "two_level_slot_alloc_init"); 682 683 struct frame_identity fi; 684 685 // allocate memory for the vmcb 686 err = guest_slot_alloc(g, &g->vmcb_cap); 687 assert_err(err, "guest_cspace_alloc"); 688 err = frame_create(g->vmcb_cap, VMCB_SIZE, NULL); 689 assert_err(err, "frame_create"); 690 err = invoke_frame_identify(g->vmcb_cap, &fi); 691 assert_err(err, "frame_identify"); 692 g->vmcb_pa = fi.base; 693 err = vspace_map_one_frame_attr((void**)&g->vmcb_va, VMCB_SIZE, g->vmcb_cap, 694 VREGION_FLAGS_READ_WRITE_NOCACHE, 695 NULL, NULL); 696 if (err_is_fail(err)) { 697 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 698 } 699 700 // guest control 701 err = frame_alloc(&g->ctrl_cap, sizeof(struct guest_control), NULL); 702 assert_err(err, "frame_alloc"); 703 size_t size = ROUND_UP(sizeof(struct guest_control), BASE_PAGE_SIZE); 704 err = vspace_map_one_frame_attr((void**)&g->ctrl, size, g->ctrl_cap, 705 VREGION_FLAGS_READ_WRITE_NOCACHE, 706 NULL, NULL); 707 if (err_is_fail(err)) { 708 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 709 } 710 g->ctrl->num_vm_exits_with_monitor_invocation = 0; 711 g->ctrl->num_vm_exits_without_monitor_invocation = 0; 712#ifdef CONFIG_SVM 713 // allocate memory for the iopm 714 err = frame_alloc(&g->iopm_cap, IOPM_SIZE, NULL); 715 assert_err(err, "frame_alloc"); 716 err = invoke_frame_identify(g->iopm_cap, &fi); 717 assert_err(err, "frame_identify"); 718 g->iopm_pa = fi.base; 719 err = vspace_map_one_frame_attr((void**)&g->iopm_va, IOPM_SIZE, g->iopm_cap, 720 VREGION_FLAGS_READ_WRITE_NOCACHE, 721 NULL, NULL); 722 if (err_is_fail(err)) { 723 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 724 } 725#else 726 // allocate memory for I/O bitmap A 727 err = frame_alloc(&g->iobmp_a_cap, IOBMP_A_SIZE, NULL); 728 assert_err(err, "frame_alloc"); 729 err = invoke_frame_identify(g->iobmp_a_cap, &fi); 730 assert_err(err, "frame_identify"); 731 g->iobmp_a_pa = fi.base; 732 err = vspace_map_one_frame_attr((void**)&g->iobmp_a_va, IOBMP_A_SIZE, g->iobmp_a_cap, 733 VREGION_FLAGS_READ_WRITE_NOCACHE, 734 NULL, NULL); 735 if (err_is_fail(err)) { 736 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 737 } 738 739 // allocate memory for I/O bitmap B 740 err = frame_alloc(&g->iobmp_b_cap, IOBMP_B_SIZE, NULL); 741 assert_err(err, "frame_alloc"); 742 err = invoke_frame_identify(g->iobmp_b_cap, &fi); 743 assert_err(err, "frame_identify"); 744 g->iobmp_b_pa = fi.base; 745 err = vspace_map_one_frame_attr((void**)&g->iobmp_b_va, IOBMP_B_SIZE, g->iobmp_b_cap, 746 VREGION_FLAGS_READ_WRITE_NOCACHE, 747 NULL, NULL); 748 if (err_is_fail(err)) { 749 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 750 } 751 752 // allocate memory for the guest MSR store/load area 753 err = frame_alloc(&g->msr_area_cap, VMX_MSR_AREA_SIZE, NULL); 754 assert_err(err, "frame_alloc"); 755 err = invoke_frame_identify(g->msr_area_cap, &fi); 756 assert_err(err, "frame_identify"); 757 g->msr_area_pa = fi.base; 758 err = vspace_map_one_frame_attr((void**)&g->msr_area_va, VMX_MSR_AREA_SIZE, 759 g->msr_area_cap, 760 VREGION_FLAGS_READ_WRITE_NOCACHE, 761 NULL, NULL); 762 if (err_is_fail(err)) { 763 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 764 } 765#endif 766 // allocate memory for the msrpm 767 err = frame_alloc(&g->msrpm_cap, MSRPM_SIZE, NULL); 768 assert_err(err, "frame_alloc"); 769 err = invoke_frame_identify(g->msrpm_cap, &fi); 770 assert_err(err, "frame_identify"); 771 g->msrpm_pa = fi.base; 772 err = vspace_map_one_frame_attr((void**)&g->msrpm_va, MSRPM_SIZE, 773 g->msrpm_cap, 774 VREGION_FLAGS_READ_WRITE_NOCACHE, 775 NULL, NULL); 776 if (err_is_fail(err)) { 777 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 778 } 779 780 // initialize the allocated structures 781 initialize_iopm(g); 782 initialize_msrpm(g); 783#ifdef CONFIG_SVM 784 initialize_vmcb(g); 785#endif 786 // spawn the guest domain 787 spawn_guest_domain(g); 788 assert (grub_image != NULL); 789 install_grub_stage2(g, grub_image, grub_image_size); 790 //install_debug_app(g); 791 792 // add virtual hardware 793 g->apic = apic_new(APIC_BASE); 794 g->lpc = lpc_new(virq_handler, virq_pending, 795#ifndef CONFIG_SVM 796 virq_accepting, 797#endif 798 g, g->apic); 799 if (hdd0_image != NULL) { 800 g->hdds[0] = hdd_new_from_memory(hdd0_image, hdd0_image_size); 801 g->hdd_count++; 802 } 803 g->console = console_new(); 804 g->serial_ports[0] = pc16550d_new(0x3f8, 4, g->lpc); 805 806 // FIXME: Which virtual uart port is connected to which host port 807 // should be adjustable from the command line or a configuration 808 // file. 809 pc16550d_attach_to_host_uart(g->serial_ports[0], SERIAL_DRIVER); 810 g->serial_ports[1] = pc16550d_new(0x2f8, 3, g->lpc); 811 g->serial_ports[2] = pc16550d_new(0x3e8, 4, g->lpc); 812 g->serial_ports[3] = pc16550d_new(0x2e8, 3, g->lpc); 813 g->serial_port_count = 4; 814 815 g->pci = pci_new(); 816 init_host_devices(g->pci); 817 818// struct pci_device *ethernet = pci_ethernet_new(g->lpc, g); 819// int r = pci_attach_device(g->pci, 0, 2, ethernet); 820// assert(r == 0); 821// 822// struct pci_device *vmkitmon_eth = pci_vmkitmon_eth_new(g->lpc, g); 823// r = pci_attach_device(g->pci, 0, 3, vmkitmon_eth); 824// assert(r==0); 825 826 // set up bios memory 827 // FIXME: find a modular way to do this 828 *(uint16_t *)guest_to_host(g->mem_low_va + 0x400) = 0x3f8; // COM1 829 *(uint16_t *)guest_to_host(g->mem_low_va + 0x402) = 0x2f8; // COM2 830 831 g->runnable = true; 832} 833 834/** 835 * \brief Create a new guest. 836 * 837 * This function creates a new guest. It will do everything necessary to make 838 * the guest accept images to run. It will create a new domain and assign some 839 * memory to that domain. Afterwards it will load a bios into the memory and 840 * set the guest initial IP to the POST entry of the bios. 841 * 842 * \return The pointer to the newly created structure describing the guest. 843 */ 844struct guest * 845guest_create (void) 846{ 847 // support the allocation of one guest for now 848 assert(__guestp == NULL); 849 __guestp = &__guest; 850 memset(__guestp, 0, sizeof(struct guest)); 851 guest_setup(__guestp); 852 return __guestp; 853} 854 855static int 856run_realmode (struct guest *g) 857{ 858 int r; 859 860 realmode_switch_to(g); 861 r = realmode_exec(); 862 assert(r == REALMODE_ERR_OK); 863 realmode_switch_from(g); 864 865 guest_handle_vmexit(g); 866 867 return 0; 868}; 869 870#ifndef CONFIG_SVM 871// Return true if the "Enable EPT" Secondary Processor-based control is 872// set in the VMCS, else false. 873static inline bool vmx_ept_enabled(struct guest *g) 874{ 875 uint64_t sp_controls; 876 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_EXEC_SEC_PROC, &sp_controls); 877 assert(err_is_ok(err)); 878 return ((sp_controls & SP_CLTS_ENABLE_EPT) != 0); 879} 880 881// Set or clear the "Descriptor-table exiting" Secondary Processor-based 882// control if val is 1 or 0, respectively. 883static inline void vmx_intercept_desc_table_wrf(struct guest *g, int val) 884{ 885 assert(val == 0 || val == 1); 886 887 uint64_t sec_proc_ctrls; 888 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_EXEC_SEC_PROC, &sec_proc_ctrls); 889 if (val) { 890 uint64_t prim_proc_ctrls; 891 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_EXEC_PRIM_PROC, &prim_proc_ctrls); 892 assert(prim_proc_ctrls & PP_CLTS_SEC_CTLS); 893 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXEC_SEC_PROC, 894 sec_proc_ctrls | SP_CLTS_DESC_TABLE); 895 } else { 896 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXEC_SEC_PROC, 897 sec_proc_ctrls & ~SP_CLTS_DESC_TABLE); 898 } 899 assert(err_is_ok(err)); 900} 901 902 903// Before entering the guest, synchronize the CR0 shadow with the guest 904// CR0 value that is potentially changed in the real-mode emulator. 905static inline void vmx_set_cr0_shadow(struct guest *g) 906{ 907 uint64_t cr0_shadow; 908 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &cr0_shadow); 909 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_CR0_RD_SHADOW, cr0_shadow); 910 assert(err_is_ok(err)); 911} 912#endif 913 914/** 915 * \brief Marks a guest as runnable. 916 * 917 * A call to this method will update the guest's runnable state and, if made 918 * runnable, yield the remaining time slice to the guest domain. 919 * 920 * \return Zero on success, non-zero on error 921 */ 922errval_t 923guest_make_runnable (struct guest *g, bool run) 924{ 925 assert(g->runnable); 926 927 errval_t err; 928 929 /* If the guest is currently in real mode (CR0.PE flag clear) then we do not 930 * schedule the domain to run the virtualization but run the real-mode 931 * emulation */ 932#ifdef CONFIG_SVM 933 if (UNLIKELY(run && amd_vmcb_cr0_rd(&g->vmcb).pe == 0)) { 934 if (!g->emulated_before_exit) { 935 // do the inverse of the code below 936 amd_vmcb_intercepts_rdgdtr_wrf(&g->vmcb, 1); 937 amd_vmcb_intercepts_wrgdtr_wrf(&g->vmcb, 1); 938 amd_vmcb_intercepts_rdldtr_wrf(&g->vmcb, 1); 939 amd_vmcb_intercepts_wrldtr_wrf(&g->vmcb, 1); 940 amd_vmcb_intercepts_rdidtr_wrf(&g->vmcb, 1); 941 amd_vmcb_intercepts_wridtr_wrf(&g->vmcb, 1); 942 amd_vmcb_intercepts_rdtr_wrf(&g->vmcb, 1); 943 amd_vmcb_intercepts_wrtr_wrf(&g->vmcb, 1); 944 amd_vmcb_cr_access_rdcr0_wrf(&g->vmcb, 1); 945 amd_vmcb_cr_access_wrcr0_wrf(&g->vmcb, 1); 946 amd_vmcb_cr_access_rdcr3_wrf(&g->vmcb, 1); 947 amd_vmcb_cr_access_wrcr3_wrf(&g->vmcb, 1); 948 amd_vmcb_intercepts_intn_wrf(&g->vmcb, 1); 949 950 // mark guest as emulated 951 g->emulated_before_exit = true; 952 } 953#else 954 uint64_t guest_cr0; 955 err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0); 956 assert(err_is_ok(err)); 957 if (UNLIKELY(run && (guest_cr0 & CR0_PE) == 0)) { 958 if (!g->emulated_before_exit) { 959 vmx_intercept_desc_table_wrf(g, 1); 960 g->emulated_before_exit = true; 961 } 962#endif 963#if 0 /* why create a thread for this? it seems fine without! -AB */ 964 struct thread *t = thread_create((thread_func_t)run_realmode, g); 965 assert(t != NULL); 966 err = thread_detach(t); 967 assert(err_is_ok(err)); 968#else 969 run_realmode(g); 970#endif 971 return SYS_ERR_OK; 972 } 973 974 /* every time we move the machine from the emulated to virtualized we need 975 * to adjust some intercepts */ 976 if (UNLIKELY(run && g->emulated_before_exit)) { 977#ifdef CONFIG_SVM 978 // we enforce NP to be enabled (no shadow paging support) 979 assert(amd_vmcb_np_rd(&g->vmcb).enable == 1); 980 981 // disable GDTR intercept 982 amd_vmcb_intercepts_rdgdtr_wrf(&g->vmcb, 0); 983 amd_vmcb_intercepts_wrgdtr_wrf(&g->vmcb, 0); 984 // disable GDTR intercept 985 amd_vmcb_intercepts_rdldtr_wrf(&g->vmcb, 0); 986 amd_vmcb_intercepts_wrldtr_wrf(&g->vmcb, 0); 987 // disable IDTR intercept 988 amd_vmcb_intercepts_rdidtr_wrf(&g->vmcb, 0); 989 amd_vmcb_intercepts_wridtr_wrf(&g->vmcb, 0); 990 // disable TR intercept 991 amd_vmcb_intercepts_rdtr_wrf(&g->vmcb, 0); 992 amd_vmcb_intercepts_wrtr_wrf(&g->vmcb, 0); 993 // disable non essential CR0 access intercepts_t 994 amd_vmcb_cr_access_rdcr0_wrf(&g->vmcb, 0); 995 amd_vmcb_cr_access_wrcr0_wrf(&g->vmcb, 0); 996 // disable CR3 access intercepts 997 assert(amd_vmcb_np_rd(&g->vmcb).enable != 0); 998 amd_vmcb_cr_access_rdcr3_wrf(&g->vmcb, 0); 999 amd_vmcb_cr_access_wrcr3_wrf(&g->vmcb, 0); 1000 // disable INTn intercept 1001 // we have to be outside of real mode for this to work 1002 assert(amd_vmcb_cr0_rd(&g->vmcb).pe != 0); 1003 amd_vmcb_intercepts_intn_wrf(&g->vmcb, 0); 1004#else 1005 bool ept_enabled = vmx_ept_enabled(g); 1006 assert(ept_enabled); 1007 vmx_intercept_desc_table_wrf(g, 0); 1008 assert(guest_cr0 & CR0_PE); 1009 vmx_set_cr0_shadow(g); 1010#endif 1011 // mark guest as not emulated 1012 g->emulated_before_exit = false; 1013 } 1014 1015 // update the guets domain's runnable state 1016 err = invoke_dispatcher(g->dcb_cap, NULL_CAP, NULL_CAP, NULL_CAP, NULL_CAP, run); 1017 assert_err(err, "dispatcher_make_runnable"); 1018 // yield the dispatcher 1019 if (run) { 1020 thread_yield_dispatcher(NULL_CAP); 1021 } 1022 1023 return SYS_ERR_OK; 1024} 1025 1026/* VMEXIT hanlders */ 1027 1028#define HANDLER_ERR_OK (0) 1029#define HANDLER_ERR_FATAL (-1) 1030 1031#ifdef CONFIG_SVM 1032static int 1033handle_vmexit_unhandeled (struct guest *g) 1034{ 1035 printf("Unhandeled guest vmexit:\n"); 1036 printf(" code:\t %lx\n", amd_vmcb_exitcode_rd(&g->vmcb)); 1037 printf(" info1:\t %lx\n", amd_vmcb_exitinfo1_rd(&g->vmcb)); 1038 printf(" info2:\t %lx\n", amd_vmcb_exitinfo2_rd(&g->vmcb)); 1039 printf(" intinfo: %lx\n", amd_vmcb_exitintinfo_rd(&g->vmcb)); 1040 1041 printf("VMCB save area:\n"); 1042 printf(" cr0:\t%lx\n", amd_vmcb_cr0_rd_raw(&g->vmcb)); 1043 printf(" cr2:\t%lx\n", amd_vmcb_cr2_rd_raw(&g->vmcb)); 1044 printf(" cr3:\t%lx\n", amd_vmcb_cr3_rd_raw(&g->vmcb)); 1045 printf(" cr4:\t%lx\n", amd_vmcb_cr4_rd_raw(&g->vmcb)); 1046 printf(" efer:\t%lx\n", amd_vmcb_efer_rd_raw(&g->vmcb)); 1047 printf(" rip:\t%lx\n", amd_vmcb_rip_rd_raw(&g->vmcb)); 1048 printf(" cs:\tselector %x, base %lx, limit %x, attrib %x\n", 1049 amd_vmcb_cs_selector_rd(&g->vmcb), amd_vmcb_cs_base_rd(&g->vmcb), 1050 amd_vmcb_cs_limit_rd(&g->vmcb), amd_vmcb_cs_attrib_rd_raw(&g->vmcb)); 1051 printf(" ds:\tselector %x, base %lx, limit %x, attrib %x\n", 1052 amd_vmcb_ds_selector_rd(&g->vmcb), amd_vmcb_ds_base_rd(&g->vmcb), 1053 amd_vmcb_ds_limit_rd(&g->vmcb), amd_vmcb_ds_attrib_rd_raw(&g->vmcb)); 1054 printf(" es:\tselector %x, base %lx, limit %x, attrib %x\n", 1055 amd_vmcb_es_selector_rd(&g->vmcb), amd_vmcb_es_base_rd(&g->vmcb), 1056 amd_vmcb_es_limit_rd(&g->vmcb), amd_vmcb_es_attrib_rd_raw(&g->vmcb)); 1057 printf(" ss:\tselector %x, base %lx, limit %x, attrib %x\n", 1058 amd_vmcb_ss_selector_rd(&g->vmcb), amd_vmcb_ss_base_rd(&g->vmcb), 1059 amd_vmcb_ss_limit_rd(&g->vmcb), amd_vmcb_ss_attrib_rd_raw(&g->vmcb)); 1060 printf(" rax:\t%lx\n", amd_vmcb_rax_rd_raw(&g->vmcb)); 1061 printf(" rbx:\t%lx\n", g->ctrl->regs.rbx); 1062 printf(" rcx:\t%lx\n", g->ctrl->regs.rcx); 1063 printf(" rdx:\t%lx\n", g->ctrl->regs.rdx); 1064 printf(" rsi:\t%lx\n", g->ctrl->regs.rsi); 1065 printf(" rdi:\t%lx\n", g->ctrl->regs.rdi); 1066 1067 return HANDLER_ERR_FATAL; 1068} 1069#else 1070static int 1071handle_vmexit_unhandeled (struct guest *g) 1072{ 1073 printf("Unhandeled guest vmexit:\n"); 1074 printf(" exit reason:\t %"PRIu16"\n", saved_exit_reason); 1075 printf(" exit qualification:\t %"PRIx64"\n", saved_exit_qual); 1076 printf(" next rip (I/O instruction):\t %"PRIx64"\n", saved_rip); 1077 1078 uint64_t gpaddr; 1079 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GPADDR_F, &gpaddr); 1080 printf(" guest physical-address:\t %"PRIx64"\n", gpaddr); 1081 1082 uint64_t guest_cr0, guest_cr3, guest_cr4; 1083 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0); 1084 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR3, &guest_cr3); 1085 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR4, &guest_cr4); 1086 1087 uint64_t guest_efer, guest_rip; 1088 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_EFER_F, &guest_efer); 1089 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1090 1091 uint64_t guest_cs_sel, guest_cs_base, guest_cs_lim, 1092 guest_cs_access; 1093 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_SEL, &guest_cs_sel); 1094 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_BASE, &guest_cs_base); 1095 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_LIM, &guest_cs_lim); 1096 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_ACCESS, &guest_cs_access); 1097 1098 uint64_t guest_ds_sel, guest_ds_base, guest_ds_lim, 1099 guest_ds_access; 1100 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_SEL, &guest_ds_sel); 1101 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base); 1102 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_LIM, &guest_ds_lim); 1103 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_ACCESS, &guest_ds_access); 1104 1105 uint64_t guest_es_sel, guest_es_base, guest_es_lim, 1106 guest_es_access; 1107 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_SEL, &guest_es_sel); 1108 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_BASE, &guest_es_base); 1109 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_LIM, &guest_es_lim); 1110 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_ACCESS, &guest_es_access); 1111 1112 uint64_t guest_ss_sel, guest_ss_base, guest_ss_lim, 1113 guest_ss_access; 1114 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_SEL, &guest_ss_sel); 1115 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_BASE, &guest_ss_base); 1116 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_LIM, &guest_ss_lim); 1117 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_ACCESS, &guest_ss_access); 1118 assert(err_is_ok(err)); 1119 1120 printf("VMCS save area:\n"); 1121 printf(" cr0:\t%lx\n", guest_cr0); 1122 printf(" cr3:\t%lx\n", guest_cr3); 1123 printf(" cr4:\t%lx\n", guest_cr4); 1124 printf(" efer:\t%lx\n", guest_efer); 1125 printf(" rip:\t%lx\n", guest_rip); 1126 printf(" cs:\tselector %lx, base %lx, limit %lx, access %lx\n", 1127 guest_cs_sel, guest_cs_base, guest_cs_lim, guest_cs_access); 1128 printf(" ds:\tselector %lx, base %lx, limit %lx, access %lx\n", 1129 guest_ds_sel, guest_ds_base, guest_ds_lim, guest_ds_access); 1130 printf(" es:\tselector %lx, base %lx, limit %lx, access %lx\n", 1131 guest_es_sel, guest_es_base, guest_es_lim, guest_es_access); 1132 printf(" ss:\tselector %lx, base %lx, limit %lx, access %lx\n", 1133 guest_ss_sel, guest_ss_base, guest_ss_lim, guest_ss_access); 1134 printf(" rax:\t%lx\n", g->ctrl->regs.rax); 1135 printf(" rbx:\t%lx\n", g->ctrl->regs.rbx); 1136 printf(" rcx:\t%lx\n", g->ctrl->regs.rcx); 1137 printf(" rdx:\t%lx\n", g->ctrl->regs.rdx); 1138 printf(" rsi:\t%lx\n", g->ctrl->regs.rsi); 1139 printf(" rdi:\t%lx\n", g->ctrl->regs.rdi); 1140 1141 return HANDLER_ERR_FATAL; 1142} 1143#endif 1144 1145static inline uint64_t 1146lookup_paddr_long_mode (struct guest *g, uint64_t vaddr) 1147{ 1148 union x86_lm_va va = { .raw = vaddr }; 1149 uint64_t *page_table; 1150 1151 // get a pointer to the pml4 table 1152#ifdef CONFIG_SVM 1153 page_table = (uint64_t *)guest_to_host(amd_vmcb_cr3_rd(&g->vmcb)); 1154#else 1155 uint64_t guest_cr3; 1156 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR3, &guest_cr3); 1157 assert(err_is_ok(err)); 1158 page_table = (uint64_t *)guest_to_host(guest_cr3); 1159#endif 1160 // get pml4 entry 1161 union x86_lm_pml4_entry pml4e = { .raw = page_table[va.u.pml4_idx] }; 1162 assert (pml4e.u.p == 1); 1163 1164 // get a pointer to the pdp table 1165 page_table = (uint64_t *)guest_to_host(pml4e.u.pdp_base_pa << 12); 1166 // get pdp entry 1167 union x86_lm_pdp_entry pdpe = { .raw = page_table[va.u.pdp_idx] }; 1168 assert(pdpe.u.p == 1); 1169 // check for 1GB page (PS bit set) 1170 if (pdpe.u.ps == 1) { 1171 return (pdpe.u1gb.base_pa << 30) | va.u1gb.pa_offset; 1172 } 1173 1174 // get a pointer to the pd table 1175 page_table = (uint64_t *)guest_to_host(pdpe.u.pd_base_pa << 12); 1176 // get pd entry 1177 union x86_lm_pd_entry pde = { .raw = page_table[va.u.pd_idx] }; 1178 if (pde.u.p == 0) { 1179 printf("g2h %lx, pml4e %p %lx, pdpe %p %lx, pde %p %lx\n", 1180 guest_to_host(0), &pml4e, pml4e.raw, &pdpe, pdpe.raw, &pde, pde.raw); 1181 } 1182 assert(pde.u.p == 1); 1183 // check for 2MB page (PS bit set) 1184 if (pde.u.ps == 1) { 1185 return (pde.u2mb.base_pa << 21) | va.u2mb.pa_offset; 1186 } 1187 1188 // get a pointer to the page table 1189 page_table = (uint64_t *)guest_to_host(pde.u.pt_base_pa << 12); 1190 // get the page table entry 1191 union x86_lm_pt_entry pte = { .raw = page_table[va.u.pt_idx] }; 1192 assert(pte.u.p == 1); 1193 1194 return (pte.u.base_pa << 12) | va.u.pa_offset; 1195} 1196 1197static inline uint32_t 1198lookup_paddr_legacy_mode (struct guest *g, uint32_t vaddr) 1199{ 1200// printf("lookup_paddr_legacy_mode enter\n"); 1201 // PAE not supported 1202#ifdef CONFIG_SVM 1203 guest_assert(g, amd_vmcb_cr4_rd(&g->vmcb).pae == 0); 1204#else 1205 uint64_t guest_cr4; 1206 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR4, &guest_cr4); 1207 guest_assert(g, (guest_cr4 & CR4_PAE) == 0); 1208#endif 1209 union x86_legm_va va = { .raw = vaddr }; 1210 uint32_t *page_table; 1211 1212 // get a pointer to the pd table 1213#ifdef CONFIG_SVM 1214 page_table = (uint32_t *)guest_to_host(amd_vmcb_cr3_rd(&g->vmcb)); 1215#else 1216 uint64_t guest_cr3; 1217 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR3, &guest_cr3); 1218 assert(err_is_ok(err)); 1219 page_table = (uint32_t *)guest_to_host(guest_cr3); 1220#endif 1221 1222 // get pd entry 1223 union x86_legm_pd_entry pde = { .raw = page_table[va.u.pd_idx] }; 1224 assert (pde.u.p == 1); 1225 // check for 4MB page (PS bit set) 1226 if (pde.u.ps == 1) { 1227 return (pde.u4mb.base_pa << 22) | va.u4mb.pa_offset; 1228 } 1229 1230 // get a pointer to the page table 1231 page_table = (uint32_t *)guest_to_host(pde.u.pt_base_pa << 12); 1232 // get the page table entry 1233 union x86_legm_pt_entry pte = { .raw = page_table[va.u.pt_idx] }; 1234 assert(pte.u.p == 1); 1235 1236 return (pte.u.base_pa << 12) | va.u.pa_offset; 1237} 1238 1239// retunrs a pointer to a byte array starting at the current instruction 1240static inline int 1241get_instr_arr (struct guest *g, uint8_t **arr) 1242{ 1243#ifdef CONFIG_SVM 1244 if (UNLIKELY(amd_vmcb_cr0_rd(&g->vmcb).pg == 0)) { 1245#else 1246 uint64_t guest_cr0; 1247 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0); 1248 if (UNLIKELY((guest_cr0 & CR0_PG) == 0)) { 1249#endif 1250 //printf("Segmentation active!\n"); 1251 // without paging 1252 // take segmentation into account 1253#ifdef CONFIG_SVM 1254 *arr = (uint8_t *)(guest_to_host(g->mem_low_va) + 1255 amd_vmcb_cs_base_rd(&g->vmcb) + 1256 amd_vmcb_rip_rd(&g->vmcb)); 1257#else 1258 uint64_t guest_cs_base, guest_rip; 1259 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_BASE, &guest_cs_base); 1260 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1261 *arr = (uint8_t *)(guest_to_host(g->mem_low_va) + 1262 guest_cs_base + guest_rip); 1263#endif 1264 } else { 1265 // with paging 1266#ifdef CONFIG_SVM 1267 if (amd_vmcb_efer_rd(&g->vmcb).lma == 1) { 1268#else 1269 uint64_t guest_efer; 1270 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_EFER_F, &guest_efer); 1271 if (guest_efer & EFER_LMA) { 1272#endif 1273 // long mode 1274#ifdef CONFIG_SVM 1275 if (amd_vmcb_cs_attrib_rd(&g->vmcb).l == 1) { 1276 // 64-bit mode 1277 *arr = (uint8_t *)guest_to_host(lookup_paddr_long_mode(g, 1278 amd_vmcb_rip_rd(&g->vmcb))); 1279#else 1280 uint64_t cs_access_rights, guest_rip; 1281 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_ACCESS, &cs_access_rights); 1282 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1283 if (cs_access_rights & ACCESS_RIGHTS_LONG_MODE) { 1284 *arr = (uint8_t *)guest_to_host(lookup_paddr_long_mode(g, 1285 guest_rip)); 1286#endif 1287 } else { 1288 // cmpatibility mode 1289 guest_assert(g, !"compatiblity mode not supported yet"); 1290 } 1291 } else { 1292 // Legacy (aka. Paged Protected) Mode 1293#ifdef CONFIG_SVM 1294 assert(amd_vmcb_cr0_rd(&g->vmcb).pe == 1); 1295 1296 *arr = (uint8_t *)guest_to_host(lookup_paddr_legacy_mode(g, 1297 amd_vmcb_rip_rd(&g->vmcb))); 1298#else 1299 assert(guest_cr0 & CR0_PE); 1300 1301 uint64_t guest_rip; 1302 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1303 *arr = (uint8_t *)guest_to_host(lookup_paddr_legacy_mode(g, 1304 guest_rip)); 1305#endif 1306 } 1307 } 1308#ifndef CONFIG_SVM 1309 assert(err_is_ok(err)); 1310#endif 1311 return HANDLER_ERR_OK; 1312} 1313 1314static inline uint64_t 1315get_reg_val_by_reg_num (struct guest *g, uint8_t regnum) { 1316 switch (regnum) { 1317 case 0: 1318 return guest_get_rax(g); 1319 case 1: 1320 return guest_get_rcx(g); 1321 case 2: 1322 return guest_get_rdx(g); 1323 case 3: 1324 return guest_get_rbx(g); 1325 case 4: 1326 return guest_get_rsp(g); 1327 case 5: 1328 return guest_get_rbp(g); 1329 case 6: 1330 return guest_get_rsi(g); 1331 case 7: 1332 return guest_get_rdi(g); 1333 default: 1334 assert(!"not reached"); 1335 return 0; 1336 } 1337} 1338 1339static inline void 1340set_reg_val_by_reg_num (struct guest *g, uint8_t regnum, uint64_t val) { 1341 switch (regnum) { 1342 case 0: 1343 guest_set_rax(g, val); 1344 break; 1345 case 1: 1346 guest_set_rcx(g, val); 1347 break; 1348 case 2: 1349 guest_set_rdx(g, val); 1350 break; 1351 case 3: 1352 guest_set_rbx(g, val); 1353 break; 1354 case 4: 1355 guest_set_rsp(g, val); 1356 break; 1357 case 5: 1358 guest_set_rbp(g, val); 1359 break; 1360 case 6: 1361 guest_set_rsi(g, val); 1362 break; 1363 case 7: 1364 guest_set_rdi(g, val); 1365 break; 1366 default: 1367 assert(!"not reached"); 1368 break; 1369 } 1370} 1371 1372static int 1373handle_vmexit_cr_access (struct guest *g) 1374{ 1375 int r; 1376 uint8_t *code = NULL; 1377#ifndef CONFIG_SVM 1378 errval_t err = 0; 1379 if (g->emulated_before_exit) { 1380 assert(saved_exit_reason == VMX_EXIT_REASON_CR_ACCESS); 1381 assert(((saved_exit_qual >> 0) & 0xf) == 0); 1382 } 1383#endif 1384 // fetch the location to the code 1385 r = get_instr_arr(g, &code); 1386 if (r != HANDLER_ERR_OK) { 1387 return r; 1388 } 1389 assert(code != NULL); 1390 1391 assert(code[0] == 0x0f && (code[1] == 0x20 || code[1] == 0x22)); 1392 1393 uint64_t val; 1394 bool read = (code[1] == 0x20); 1395 union x86_modrm mod; 1396 mod.raw = code[2]; 1397 1398 // FIXME: use proper exception 1399 assert(mod.u.mod == 3); 1400 1401 // source 1402 if (read) { 1403 // read from CR 1404 switch (mod.u.regop) { 1405 case 0: 1406#ifdef CONFIG_SVM 1407 val = amd_vmcb_cr0_rd_raw(&g->vmcb); 1408#else 1409 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &val); 1410#endif 1411 break; 1412 default: 1413 printf("CR access: unknown CR source register\n"); 1414 return handle_vmexit_unhandeled(g); 1415 } 1416 } else { 1417 // read from GPR 1418 val = get_reg_val_by_reg_num(g, mod.u.rm); 1419 } 1420 1421 // destination 1422 if (read) { 1423 // write to GPR 1424 switch (mod.u.rm) { 1425 case 0: 1426 guest_set_rax(g, val); 1427 break; 1428 case 1: 1429 guest_set_rcx(g, val); 1430 break; 1431 case 2: 1432 guest_set_rdx(g, val); 1433 break; 1434 case 3: 1435 guest_set_rbx(g, val); 1436 break; 1437 default: 1438 printf("CR access: unknown GPR destination register\n"); 1439 return handle_vmexit_unhandeled(g); 1440 } 1441 } else { 1442 // write to CR 1443 switch (mod.u.regop) { 1444 case 0: 1445#ifdef CONFIG_SVM 1446 amd_vmcb_cr0_wr_raw(&g->vmcb, val); 1447#else 1448 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CR0, val); 1449#endif 1450 break; 1451 1452 case 4: 1453 // allow writing to CR4 by do nothing for this case 1454 break; 1455 default: 1456 printf("CR access: unknown CR destination register\n"); 1457 return handle_vmexit_unhandeled(g); 1458 } 1459 } 1460 1461 // advance the rip beyond the instruction 1462#ifdef CONFIG_SVM 1463 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 3); 1464#else 1465 uint64_t guest_rip; 1466 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1467 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 3); 1468 assert(err_is_ok(err)); 1469#endif 1470 return HANDLER_ERR_OK; 1471} 1472 1473static int 1474handle_vmexit_ldt (struct guest *g) 1475{ 1476 int r; 1477 uint8_t *code = NULL; 1478 uint8_t *mem; 1479 1480 // this handler supports only real-mode 1481#ifdef CONFIG_SVM 1482 assert(amd_vmcb_cr0_rd(&g->vmcb).pe == 0); 1483#else 1484 uint64_t guest_cr0; 1485 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0); 1486 assert((guest_cr0 & CR0_PE) == 0); 1487#endif 1488 // fetch the location to the code 1489 r = get_instr_arr(g, &code); 1490 if (r != HANDLER_ERR_OK) { 1491 return r; 1492 } 1493 mem = (uint8_t *)guest_to_host(g->mem_low_va); 1494 assert(code != NULL); 1495 1496 assert (code[0] == 0x0f && code[1] == 0x01); 1497 1498 // check for relevant instruction prefixes 1499 bool addr32 = code[-2] == 0x67 || code[-1] == 0x67; 1500 bool op32 = code[-2] == 0x66 || code[-1] == 0x66; 1501 // fetch modrm 1502 union x86_modrm modrm = { .raw = code[2] }; 1503 1504 assert(modrm.u.regop == 2 || modrm.u.regop == 3); 1505 guest_assert(g, op32); 1506 1507 uint32_t addr; 1508 if (addr32) { 1509 // byte 3-6 hold a 32 bit address to a mem location where the first word 1510 // holds the limit and the following dword holds the base 1511 addr = *(uint32_t *)&code[3]; 1512 } else { 1513 // byte 3-4 hold a 16 bit address to a mem location where the first word 1514 // holds the limit and the following dword holds the base 1515 // this address is relative to DS base 1516#ifdef CONFIG_SVM 1517 addr = *(uint16_t *)&code[3] + amd_vmcb_ds_base_rd(&g->vmcb); 1518#else 1519 uint64_t guest_ds_base; 1520 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base); 1521 addr = *(uint16_t *)&code[3] + guest_ds_base; 1522#endif 1523 } 1524 1525 // santity check on the addr 1526 // FIXME: raise a proper exception 1527 if (addr > g->mem_high_va) { 1528 printf("Memory access beyond physical address space\n"); 1529 return HANDLER_ERR_FATAL; 1530 } 1531 1532 // load the actual register 1533 if (modrm.u.regop == 2) { 1534 // LGDT 1535#ifdef CONFIG_SVM 1536 amd_vmcb_gdtr_limit_wr(&g->vmcb, *(uint16_t*)(mem + addr)); 1537 amd_vmcb_gdtr_base_wr(&g->vmcb, *(uint32_t*)(mem + addr + 2)); 1538#else 1539 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_GDTR_LIM, 1540 *(uint16_t*)(mem + addr)); 1541 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_GDTR_BASE, 1542 *(uint32_t*)(mem + addr + 2)); 1543#endif 1544 1545 } else if (modrm.u.regop == 3) { 1546 // LIDT 1547#ifdef CONFIG_SVM 1548 amd_vmcb_idtr_limit_wr(&g->vmcb, *(uint16_t*)(mem + addr)); 1549 amd_vmcb_idtr_base_wr(&g->vmcb, *(uint32_t*)(mem + addr + 2)); 1550#else 1551 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_IDTR_LIM, 1552 *(uint16_t*)(mem + addr)); 1553 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_IDTR_BASE, 1554 *(uint32_t*)(mem + addr + 2)); 1555#endif 1556 } else { 1557 assert(!"not reached"); 1558 } 1559 1560 // advance the rip beyond the instruction 1561#ifdef CONFIG_SVM 1562 if (addr32) { 1563 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 7); 1564 } else { 1565 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 5); 1566 } 1567#else 1568 uint64_t guest_rip; 1569 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1570 if (addr32) { 1571 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 7); 1572 } else { 1573 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 5); 1574 } 1575 assert(err_is_ok(err)); 1576#endif 1577 return HANDLER_ERR_OK; 1578} 1579 1580#ifndef CONFIG_SVM 1581static inline void vmx_vmcs_rflags_cf_wrf(struct guest *g, int val) { 1582 assert(val == 0 || val == 1); 1583 uint64_t guest_rflags; 1584 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags); 1585 if (val) { 1586 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RFLAGS, 1587 guest_rflags | RFLAGS_CF); 1588 } else { 1589 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RFLAGS, 1590 guest_rflags & (~RFLAGS_CF)); 1591 } 1592 assert(err_is_ok(err)); 1593} 1594#endif 1595 1596static int 1597handle_vmexit_swint (struct guest *g) 1598{ 1599 int r; 1600 uint8_t *code = NULL; 1601 1602 r = get_instr_arr(g, &code); 1603 if (r != HANDLER_ERR_OK) { 1604 return r; 1605 } 1606 assert (code != NULL); 1607 1608 // check for correct instruciton 1609 assert(code[0] == 0xcd); 1610 1611 // the number of the interrupt is followed by the INT (0xcd) opcode 1612 uint8_t int_num = code[1]; 1613 1614 // check whether the guest is in real mode 1615#ifdef CONFIG_SVM 1616 if (amd_vmcb_cr0_rd(&g->vmcb).pe == 0) { 1617#else 1618 uint64_t guest_ds_base, es_guest_base; 1619 uint64_t guest_cr0, guest_rip; 1620 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0); 1621 if ((guest_cr0 & CR0_PE) == 0) { 1622#endif 1623 // in real mode the interrupts starting at 10 have different meaning 1624 // examine the sw interrupt 1625 switch (int_num) { 1626 case 0x10: 1627 r = console_handle_int10(g->console, g); 1628 if (r != HANDLER_ERR_OK) { 1629 printf("Unhandeled method on INT 0x10\n"); 1630 return handle_vmexit_unhandeled(g); 1631 } 1632 break; 1633 case 0x12: 1634 switch (guest_get_ax(g)) { 1635 case 0: // GET MEMORY SIZE 1636 // our VM always has 1MB of base memory 1637 // AX holds the amount of 1KB memory blocks starting at 1638 // addr 0 which is 640 (640 KiB) 1639 guest_set_ax(g, 640); 1640 break; 1641 default: 1642 printf("Unhandeled method on INT 0x12\n"); 1643 return handle_vmexit_unhandeled(g); 1644 } 1645 break; 1646 case 0x13: 1647 // Bootable CD-ROM - GET STATUS 1648 if (guest_get_ax(g) == 0x4b01) { 1649 // no cdrom support 1650#ifdef CONFIG_SVM 1651 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1652#else 1653 vmx_vmcs_rflags_cf_wrf(g, 1); 1654#endif 1655 } 1656 // DISK RESET 1657 else if (guest_get_ah(g) == 0) { 1658 for (int i = 0; i < g->hdd_count; i++) { 1659 hdd_reset(g->hdds[i]); 1660 } 1661 } 1662 // DISK - GET DRIVE PARAMETERS (PC,XT286,CONV,PS,ESDI,SCSI) 1663 else if (guest_get_ah(g) == 0x08) { 1664 uint8_t dl = guest_get_dl(g); 1665 1666 // only respond to installed hard disks 1667 if ((dl >> 7) && ((dl & 0x7f) < g->hdd_count)) { 1668 uint16_t c; 1669 uint8_t h, s; 1670 1671 r = hdd_get_geometry_chs(g->hdds[dl & 0x7f], &c, &h, &s); 1672 assert(r == 0); 1673 1674 // set some return values for success 1675 guest_set_ah(g, 0); 1676#ifdef CONFIG_SVM 1677 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1678#else 1679 vmx_vmcs_rflags_cf_wrf(g, 0); 1680#endif 1681 guest_set_bl(g, 0); 1682 // store the geometry into the correct registers 1683 guest_set_cx(g, c << 6 | (s & 0x3f)); 1684 guest_set_dh(g, h); 1685 guest_set_dl(g, g->hdd_count); 1686 } else { 1687#ifdef CONFIG_SVM 1688 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1689#else 1690 vmx_vmcs_rflags_cf_wrf(g, 1); 1691#endif 1692 // it is not really clear to me what ah should contain 1693 // when the drive is not present, so set it to FF 1694 guest_set_ah(g, 1); 1695 } 1696 } 1697 // INT 13 Extensions - INSTALLATION CHECK 1698 else if (guest_get_ah(g) == 0x41 && guest_get_bx(g) == 0x55aa) { 1699#ifdef CONFIG_SVM 1700 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1701#else 1702 vmx_vmcs_rflags_cf_wrf(g, 0); 1703#endif 1704 guest_set_bx(g, 0xaa55); 1705 guest_set_ah(g, 0x01); // Drive extensions 1.x 1706 guest_set_al(g, 0); 1707 guest_set_cx(g, 0x5); 1708 } 1709 // IBM/MS INT 13 Extensions - EXTENDED READ 1710 else if (guest_get_ah(g) == 0x42) { 1711 uint8_t dl = guest_get_dl(g); 1712 1713 // only respond to installed hard disks 1714 if ((dl >> 7) && ((dl & 0x7f) < g->hdd_count)) { 1715#ifdef CONFIG_SVM 1716 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1717#else 1718 vmx_vmcs_rflags_cf_wrf(g, 0); 1719#endif 1720 guest_set_ah(g, 0); 1721 1722 struct disk_access_block { 1723 uint8_t size; 1724 uint8_t reserved; 1725 uint16_t count; 1726 // pointer to the data buffer formated like 1727 // SEGMENT:ADDRESS 1728 uint32_t transfer_buffer; 1729 uint64_t abs_block_number; 1730 } __attribute__ ((packed)); 1731 1732 // memory location of the disk access block 1733#ifdef CONFIG_SVM 1734 uintptr_t mem = guest_to_host(g->mem_low_va) + 1735 amd_vmcb_ds_base_rd(&g->vmcb) + 1736 guest_get_si(g); 1737#else 1738 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base); 1739 uintptr_t mem = guest_to_host(g->mem_low_va) + 1740 guest_ds_base + guest_get_si(g); 1741#endif 1742 1743 struct disk_access_block *dap = (void *)mem; 1744 1745 if (dap->size < 0x10) { 1746#ifdef CONFIG_SVM 1747 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1748#else 1749 vmx_vmcs_rflags_cf_wrf(g, 1); 1750#endif 1751 guest_set_ah(g, 1); 1752 } else { 1753 // dap->transfer buffer points to a real-mode segment 1754 // resolve it according to that rules 1755 mem = guest_to_host(g->mem_low_va) + 1756 ((dap->transfer_buffer >> 16) << 4) + 1757 (dap->transfer_buffer & 0xffff); 1758 1759 size_t count = dap->count; 1760 r = hdd_read_blocks(g->hdds[dl & 0x7f], 1761 dap->abs_block_number, 1762 &count, mem); 1763 dap->count = count; 1764 1765 if (r != HANDLER_ERR_OK) { 1766#ifdef CONFIG_SVM 1767 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1768#else 1769 vmx_vmcs_rflags_cf_wrf(g, 1); 1770#endif 1771 guest_set_ah(g, 1); 1772 } 1773 } 1774 } else { 1775#ifdef CONFIG_SVM 1776 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1777#else 1778 vmx_vmcs_rflags_cf_wrf(g, 1); 1779#endif 1780 // it is not really clear to me what ah should contain 1781 // when the drive is not present, so set it to FF 1782 guest_set_ah(g, 1); 1783 } 1784 } 1785 // IBM/MS INT 13 Extensions - GET DRIVE PARAMETERS 1786 else if (guest_get_ah(g) == 0x48) { 1787 uint8_t dl = guest_get_dl(g); 1788 1789 // only respond to installed hard disks 1790 if ((dl >> 7) && ((dl & 0x7f) < g->hdd_count)) { 1791 // structure to hold drive info 1792 struct drive_params { 1793 uint16_t size; 1794 uint16_t flags; 1795 uint32_t cylinders; 1796 uint32_t heads; 1797 uint32_t sectors; 1798 uint64_t total_sectors; 1799 uint16_t bytes_per_sector; 1800 } __attribute__ ((packed)); 1801 1802 // memory where the drive info shall be stored 1803#ifdef CONFIG_SVM 1804 uintptr_t mem = guest_to_host(g->mem_low_va) + 1805 amd_vmcb_ds_base_rd(&g->vmcb) + 1806 guest_get_si(g); 1807#else 1808 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base); 1809 uintptr_t mem = guest_to_host(g->mem_low_va) + 1810 guest_ds_base + guest_get_si(g); 1811#endif 1812 1813 struct drive_params *drp = (void *)mem; 1814 1815 // sanity check 1816 if (drp->size < sizeof(struct drive_params)) { 1817#ifdef CONFIG_SVM 1818 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1819#else 1820 vmx_vmcs_rflags_cf_wrf(g, 1); 1821#endif 1822 } else { 1823#ifdef CONFIG_SVM 1824 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1825#else 1826 vmx_vmcs_rflags_cf_wrf(g, 0); 1827#endif 1828 guest_set_ah(g, 0); 1829 1830 drp->size = sizeof(struct drive_params); 1831 // CHS invalid, no removable drive, etc 1832 drp->flags = 0; 1833 drp->cylinders = 0; 1834 drp->heads = 0; 1835 drp->sectors = 0; 1836 drp->total_sectors = hdd_get_blocks_count( 1837 g->hdds[dl & 0x7f]); 1838 drp->bytes_per_sector = 512; // FIXME: Hardcoded 1839 } 1840 } else { 1841#ifdef CONFIG_SVM 1842 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1843#else 1844 vmx_vmcs_rflags_cf_wrf(g, 1); 1845#endif 1846 // it is not really clear to me what ah should contain 1847 // when the drive is not present, so set it to FF 1848 guest_set_ah(g, 0x1); 1849 } 1850 } else { 1851 printf("Unhandeled method on INT 0x13\n"); 1852 return handle_vmexit_unhandeled(g); 1853 } 1854 break; 1855 case 0x15: 1856 // ENABLE A20 GATE 1857 if (guest_get_ax(g) == 0x2401) { 1858 g->a20_gate_enabled = true; 1859#ifdef CONFIG_SVM 1860 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1861#else 1862 vmx_vmcs_rflags_cf_wrf(g, 0); 1863#endif 1864 guest_set_ah(g, 0); 1865 } 1866 // APM INSTALLATION CHECK 1867 else if (guest_get_ax(g) == 0x5300) { 1868 // we do not support APM - set carry flag to indicate error 1869#ifdef CONFIG_SVM 1870 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1871#else 1872 vmx_vmcs_rflags_cf_wrf(g, 1); 1873#endif 1874 } 1875 // APM DISCONNECT 1876 else if (guest_get_ax(g) == 0x5304) { 1877 // we do not support APM - set carry flag to indicate error 1878#ifdef CONFIG_SVM 1879 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1880#else 1881 vmx_vmcs_rflags_cf_wrf(g, 1); 1882#endif 1883 } 1884 // GET MEMORY SIZE FOR >64M CONFIGURATIONS 1885 else if (guest_get_ax(g) == 0xe801) { 1886 // we do not support this BIOS call 1887 // both grub and linux may also use the 0xe820 call 1888#ifdef CONFIG_SVM 1889 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1890#else 1891 vmx_vmcs_rflags_cf_wrf(g, 1); 1892#endif 1893 } 1894 // GET SYSTEM MEMORY MAP 1895 // EDX has to contain 0x534d4150 (== 'SMAP') 1896 else if (guest_get_ax(g) == 0xe820 && 1897 guest_get_edx(g) == 0x534d4150) { 1898 // for now we return only one entry containing the real mem 1899 if (guest_get_ebx(g) > 1 || guest_get_ecx(g) < 20) { 1900 // wrong input params -> report error 1901#ifdef CONFIG_SVM 1902 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1903#else 1904 vmx_vmcs_rflags_cf_wrf(g, 1); 1905#endif 1906 } else { 1907 // taken from http://www.ctyme.com/intr/rb-1741.htm 1908#ifdef CONFIG_SVM 1909 uintptr_t addr = guest_to_host(g->mem_low_va) + 1910 amd_vmcb_es_base_rd(&g->vmcb) + 1911 guest_get_di(g); 1912#else 1913 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_BASE, &es_guest_base); 1914 uintptr_t addr = guest_to_host(g->mem_low_va) + 1915 es_guest_base + guest_get_di(g); 1916#endif 1917 // set EAX to 'SMAP' 1918 guest_set_eax(g, 0x534D4150); 1919 // returned bytes (always 20) 1920 guest_set_ecx(g, 20); 1921 1922 switch (guest_get_ebx(g)) { 1923 case 0x0: 1924 // base memory 1925 assert(g->mem_low_va == 0); 1926 // base address 1927 *(uint64_t *)addr = 0; 1928 // size of the memory block 1929 *(uint64_t *)(addr + 8) = 0xa0000; // 640 KiB 1930 // mem type, 1 == "memory, available to the OS" 1931 *(uint32_t *)(addr + 16) = 1; 1932 // indicate that there is more data 1933 guest_set_ebx(g, 1); 1934 break; 1935 case 0x1: 1936 // extended memory 1937 assert(g->mem_high_va > 0x100000); 1938 // base address 1939 *(uint64_t *)addr = 0x100000; // 1 MiB 1940 // size of the memory block 1941 *(uint64_t *)(addr + 8) = g->mem_high_va - 0x100000; 1942 // mem type, 1 == "memory, available to the OS" 1943 *(uint32_t *)(addr + 16) = 1; 1944 // indicate that there is no more data 1945 guest_set_ebx(g, 0); 1946 break; 1947 default: 1948 assert(!"not reached"); 1949 break; 1950 } 1951 1952 // mark success 1953#ifdef CONFIG_SVM 1954 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1955#else 1956 vmx_vmcs_rflags_cf_wrf(g, 0); 1957#endif 1958 } 1959 } 1960 // SYSTEM - Get Intel SpeedStep (IST) information 1961 else if (guest_get_ax(g) == 0xe980) { 1962 // not supportet yet 1963#ifdef CONFIG_SVM 1964 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1965#else 1966 vmx_vmcs_rflags_cf_wrf(g, 1); 1967#endif 1968 } 1969 // SYSTEM - GET CONFIGURATION (XT >1986/1/10,AT mdl 3x9, 1970 // CONV,XT286,PS) 1971 // GRUB BUG: it puts 0xc0 into AX instead of AH 1972 else if (guest_get_ax(g) == 0xc0) { 1973 // we do not support this 1974#ifdef CONFIG_SVM 1975 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1976#else 1977 vmx_vmcs_rflags_cf_wrf(g, 1); 1978#endif 1979 guest_set_ah(g, 0x80); 1980 } 1981 // GET EXTENDED MEMORY SIZE 1982 else if (guest_get_ah(g) == 0x88) { 1983 // calculate number of 1KB chunks starting from 1MB but not 1984 // beyond 16MB 1985 assert(((g->mem_high_va - g->mem_low_va) & 0x3ff) == 0); 1986 guest_set_ax(g, MIN(0x3c00 /* 16MB */, 1987 (g->mem_high_va - g->mem_low_va) / 1024)); 1988 // indicate no error occured 1989#ifdef CONFIG_SVM 1990 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1991#else 1992 vmx_vmcs_rflags_cf_wrf(g, 0); 1993#endif 1994 } 1995 // SYSTEM - GET CONFIGURATION (XT >1986/1/10,AT mdl 3x9, 1996 // CONV,XT286,PS) 1997 else if (guest_get_ah(g) == 0xc0) { 1998 // we do not support this 1999#ifdef CONFIG_SVM 2000 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 2001#else 2002 vmx_vmcs_rflags_cf_wrf(g, 1); 2003#endif 2004 guest_set_ah(g, 0x80); 2005 // SYSTEM - SET BIOS MODE 2006 } else if (guest_get_ah(g) == 0xec) { 2007 // I do no really know the use of this bios call and linux 2008 // expects no action what so ever 2009 } else { 2010 printf("Unhandeled method on INT 0x15\n"); 2011 return handle_vmexit_unhandeled(g); 2012 } 2013 break; 2014 case 0x16: 2015 // KEYBOARD - SET TYPEMATIC RATE AND DELAY 2016 if (guest_get_ah(g) == 0x3) { 2017 // ignore this 2018 } else if (guest_get_ah(g) == 0x2) { 2019 // Return keyboard flags 2020 guest_set_al(g, 0x0); 2021 } else { 2022 printf("Unhandeled method on INT 0x16\n"); 2023 return handle_vmexit_unhandeled(g); 2024 } 2025 break; 2026 case 0x1a: 2027 // TIME - GET REAL-TIME CLOCK TIME (AT,XT286,PS) 2028 if (guest_get_ah(g) == 0x2) { 2029 uint8_t h, m, s; 2030 lpc_rtc_get_time_bcd(g->lpc, &h, &m, &s); 2031 guest_set_ch(g, h); 2032 guest_set_cl(g, m); 2033 guest_set_dh(g, s); 2034 guest_set_dl(g, 0); 2035 // mark success 2036#ifdef CONFIG_SVM 2037 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 2038#else 2039 vmx_vmcs_rflags_cf_wrf(g, 0); 2040#endif 2041 } else { 2042 printf("Unhandeled method on INT 0x1a\n"); 2043 return handle_vmexit_unhandeled(g); 2044 } 2045 break; 2046 default: 2047 printf("handle_vmexit_swint: Unhandeled real-mode interrupt " 2048 "0x%x (%d).\n", int_num, int_num); 2049 return handle_vmexit_unhandeled(g); 2050 } 2051 } else { 2052 printf("vmkitmon: encountered INT instruction outside real mode\n"); 2053 return handle_vmexit_unhandeled(g); 2054 } 2055 2056 // advance the rip beyond the instruction 2057#ifdef CONFIG_SVM 2058 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2); 2059#else 2060 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2061 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2); 2062 assert(err_is_ok(err)); 2063#endif 2064 return HANDLER_ERR_OK; 2065} 2066 2067static inline enum opsize 2068io_access_size_to_opsize (enum x86_io_access io) 2069{ 2070 if (io & X86_IO_ACCESS_SZ8) { 2071 return OPSIZE_8; 2072 } else if (io & X86_IO_ACCESS_SZ16) { 2073 return OPSIZE_16; 2074 } else if (io & X86_IO_ACCESS_SZ32) { 2075 return OPSIZE_32; 2076 } else { 2077 assert(!"NYI"); 2078 return 0; 2079 } 2080} 2081 2082static int 2083handle_vmexit_ioio (struct guest *g) 2084{ 2085 int r; 2086#ifdef CONFIG_SVM 2087 uint64_t info1 = amd_vmcb_exitinfo1_rd(&g->vmcb); 2088 enum x86_io_access io; 2089 uint16_t port = info1 >> 16; 2090#else 2091 errval_t err = 0; 2092 if (!g->emulated_before_exit) { 2093 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_EXIT_QUAL, &saved_exit_qual); 2094 uint64_t instr_len, guest_rip; 2095 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_EXIT_INSTR_LEN, &instr_len); 2096 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2097 saved_rip = guest_rip + instr_len; 2098 } 2099 uint16_t port = (saved_exit_qual >> 16) & 0xffff; 2100#endif 2101 bool write; 2102 enum opsize size; 2103 uint32_t val; 2104 bool newapi = false; // needed as a transition 2105 2106#ifdef CONFIG_SVM 2107 // copy the access flags 2108 // FIXME: this severely exploits the way the x86_io_access flags are set up 2109 io = (info1 >> 1); 2110 io |= info1 & SVM_IOIO_TYPE_MASK; 2111 2112 // gather some params for the io access 2113 write = (io & X86_IO_ACCESS_TYPE) == 0; 2114 size = OPSIZE_8; // make gcc happy 2115 if (io & X86_IO_ACCESS_SZ8) { 2116 size = OPSIZE_8; 2117 } else if (io & X86_IO_ACCESS_SZ16) { 2118 size = OPSIZE_16; 2119 } else if (io & X86_IO_ACCESS_SZ32) { 2120 size = OPSIZE_32; 2121 } 2122#else 2123 write = ((saved_exit_qual >> 3) & 0x1) == 0; 2124 size = OPSIZE_8; 2125 if ((saved_exit_qual & 0x7) == 0) { 2126 size = OPSIZE_8; 2127 } else if ((saved_exit_qual & 0x7) == 1) { 2128 size = OPSIZE_16; 2129 } else if ((saved_exit_qual & 0x7) == 3) { 2130 size = OPSIZE_32; 2131 } else { 2132 assert(!"Invalid size of access value"); 2133 } 2134#endif 2135 // fetch the source val if neccessary 2136 if (write) { 2137 switch (size) { 2138 case OPSIZE_8: 2139 val = guest_get_al(g); 2140 break; 2141 case OPSIZE_16: 2142 val = guest_get_ax(g); 2143 break; 2144 case OPSIZE_32: 2145 val = guest_get_eax(g); 2146 break; 2147 default: 2148 assert(!"not reached"); 2149 break; 2150 } 2151 } 2152 2153 // assign the request to the corresponding subsystem 2154 switch (port) { 2155 // LPC devices 2156 case 0x20: // primary PIC 2157 case 0x21: // primary PIC 2158 case 0x40: // Timer 2159 case 0x41: // Timer 2160 case 0x42: // Timer 2161 case 0x43: // Timer 2162 case 0x61: // NMI Controller 2163 case 0x70: // RTC 2164 case 0x71: // RTC 2165 case 0x72: // RTC 2166 case 0x73: // RTC 2167 case 0x74: // RTC 2168 case 0x75: // RTC 2169 case 0x76: // RTC 2170 case 0x77: // RTC 2171 case 0xa0: // secondary PIC 2172 case 0xa1: // secondary PIC 2173 if (write) { 2174 r = lpc_handle_pio_write(g->lpc, port, size, val); 2175 guest_assert(g, r == 0); 2176 } else { 2177 r = lpc_handle_pio_read(g->lpc, port, size, &val); 2178 assert(r == 0); 2179 } 2180 newapi = true; 2181 break; 2182 // Keyboard 2183 case 0x60: 2184 case 0x64: 2185 // we currently do not support a keyboard 2186 if (!write) { 2187 val = ~0; 2188 } 2189 newapi = true; 2190 break; 2191 case 0x80: 2192 // some apps use writing to this port as a method to delay execution 2193 // so we just do noting 2194 break; 2195 // Coprocessor 2196 case 0xf0: 2197 case 0xf1: 2198 // coprocessor IGNNE# - do nothing for now 2199 break; 2200 2201 // serial COM1 port 2202 // FIXME: this should not be hardcoded ! 2203 case 0x3f8: 2204 case 0x3f9: 2205 case 0x3fa: 2206 case 0x3fb: 2207 case 0x3fc: 2208 case 0x3fd: 2209 case 0x3fe: 2210 case 0x3ff: 2211 // COM2 2212 case 0x2f8: 2213 case 0x2f9: 2214 case 0x2fa: 2215 case 0x2fb: 2216 case 0x2fc: 2217 case 0x2fd: 2218 case 0x2fe: 2219 case 0x2ff: 2220 // COM3 2221 case 0x3e8: 2222 case 0x3e9: 2223 case 0x3ea: 2224 case 0x3eb: 2225 case 0x3ec: 2226 case 0x3ed: 2227 case 0x3ee: 2228 case 0x3ef: 2229 // COM4 2230 case 0x2e8: 2231 case 0x2e9: 2232 case 0x2ea: 2233 case 0x2eb: 2234 case 0x2ec: 2235 case 0x2ed: 2236 case 0x2ee: 2237 case 0x2ef: { 2238 int com; 2239 2240 com = (port & 0xf0) == 0xf0 ? !(port & 0x100) : !(port & 0x100) + 2; 2241 assert(com >= 0 && com < 4); 2242 if (write) { 2243 r = pc16550d_handle_pio_write(g->serial_ports[com], port, 2244 size, val); 2245 assert(r == 0); 2246 } else { 2247 r = pc16550d_handle_pio_read(g->serial_ports[com], port, 2248 size, &val); 2249 assert(r == 0); 2250 } 2251 newapi = true; 2252 break; 2253 } 2254 2255 // PCI config space (address) 2256 case 0xcf8: 2257 case 0xcf9: 2258 case 0xcfa: 2259 case 0xcfb: 2260 // PCI config space (data) 2261 case 0xcfc: 2262 case 0xcfd: 2263 case 0xcfe: 2264 case 0xcff: 2265 if(write) { 2266 r = pci_handle_pio_write(g->pci, port, size, val); 2267 } else { 2268 r = pci_handle_pio_read(g->pci, port, size, &val); 2269 } 2270 assert(r == 0); 2271 newapi = true; 2272 break; 2273 2274 default: 2275 // the default is to return 0xff and to ignore writes 2276 if (!write) { 2277 val = 0xffffffff; 2278 } 2279 newapi = true; 2280 }; 2281 2282 // set the destination when neccessary 2283 if (newapi && !write) { 2284 switch (size) { 2285 case OPSIZE_8: 2286 guest_set_al(g, val); 2287 break; 2288 case OPSIZE_16: 2289 guest_set_ax(g, val); 2290 break; 2291 case OPSIZE_32: 2292 guest_set_eax(g, val); 2293 break; 2294 default: 2295 assert(!"not reached"); 2296 break; 2297 } 2298 } 2299 2300 // the following IP is stored in the exitinfo2 field 2301#ifdef CONFIG_SVM 2302 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_exitinfo2_rd(&g->vmcb)); 2303#else 2304 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, saved_rip); 2305 assert(err_is_ok(err)); 2306#endif 2307 return HANDLER_ERR_OK; 2308} 2309 2310static int 2311handle_vmexit_msr (struct guest *g) { 2312#ifdef CONFIG_SVM 2313 bool write = amd_vmcb_exitinfo1_rd(&g->vmcb) == 1; 2314#else 2315 int msr_index; 2316 errval_t err = 0; 2317 bool write = (saved_exit_reason == VMX_EXIT_REASON_WRMSR); 2318 struct msr_entry *guest_msr_area = (struct msr_entry *)g->msr_area_va; 2319#endif 2320 uint32_t msr = guest_get_ecx(g); 2321 uint64_t val; 2322 2323 // there may be writes or reads to MSRs 2324 if (write) { 2325 // fetch the value to write from EDX:EAX 2326 val = ((uint64_t)guest_get_edx(g) << 32) | guest_get_eax(g); 2327 2328 // store the read value into the corresponding location 2329 switch (msr) { 2330 case X86_MSR_SYSENTER_CS: 2331#ifdef CONFIG_SVM 2332 amd_vmcb_sysenter_cs_wr(&g->vmcb, val); 2333#else 2334 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_SYSENTER_CS, val); 2335#endif 2336 break; 2337 case X86_MSR_SYSENTER_ESP: 2338#ifdef CONFIG_SVM 2339 amd_vmcb_sysenter_esp_wr(&g->vmcb, val); 2340#else 2341 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_SYSENTER_ESP, val); 2342#endif 2343 break; 2344 case X86_MSR_SYSENTER_EIP: 2345#ifdef CONFIG_SVM 2346 amd_vmcb_sysenter_eip_wr(&g->vmcb, val); 2347#else 2348 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_SYSENTER_EIP, val); 2349#endif 2350 break; 2351 case X86_MSR_EFER: 2352#ifdef CONFIG_SVM 2353 amd_vmcb_efer_wr_raw(&g->vmcb, val); 2354#else 2355 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_EFER_F, val); 2356#endif 2357 break; 2358 case X86_MSR_FS_BASE: 2359#ifdef CONFIG_SVM 2360 amd_vmcb_fs_base_wr(&g->vmcb, val); 2361#else 2362 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_FS_BASE, val); 2363#endif 2364 break; 2365 case X86_MSR_GS_BASE: 2366#ifdef CONFIG_SVM 2367 amd_vmcb_gs_base_wr(&g->vmcb, val); 2368#else 2369 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_GS_BASE, val); 2370#endif 2371 break; 2372#ifdef CONFIG_SVM 2373 case X86_MSR_KERNEL_GS_BASE: 2374 amd_vmcb_kernel_gs_base_wr(&g->vmcb, val); 2375 break; 2376 case X86_MSR_STAR: 2377 amd_vmcb_star_wr(&g->vmcb, val); 2378 break; 2379 case X86_MSR_LSTAR: 2380 amd_vmcb_lstar_wr(&g->vmcb, val); 2381 break; 2382 case X86_MSR_CSTAR: 2383 amd_vmcb_cstar_wr(&g->vmcb, val); 2384 break; 2385 case X86_MSR_SFMASK: 2386 amd_vmcb_sfmask_wr(&g->vmcb, val); 2387 break; 2388 default: 2389 printf("MSR: unhandeled MSR write access to %x\n", msr); 2390 return handle_vmexit_unhandeled(g); 2391#else 2392 case X86_MSR_BIOS_SIGN_ID: 2393 break; 2394 default: 2395 msr_index = vmx_guest_msr_index(msr); 2396 if (msr_index == -1) { 2397 printf("MSR: unhandeled MSR write access to %x\n", msr); 2398 return handle_vmexit_unhandeled(g); 2399 } 2400 guest_msr_area[msr_index].val = val; 2401 break; 2402#endif 2403 } 2404 } else { 2405 // read the value from the corresponding location 2406 switch (msr) { 2407 case X86_MSR_SYSENTER_CS: 2408#ifdef CONFIG_SVM 2409 val = amd_vmcb_sysenter_cs_rd(&g->vmcb); 2410#else 2411 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SYSENTER_CS, &val); 2412#endif 2413 break; 2414 case X86_MSR_SYSENTER_ESP: 2415#ifdef CONFIG_SVM 2416 val = amd_vmcb_sysenter_esp_rd(&g->vmcb); 2417#else 2418 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SYSENTER_ESP, &val); 2419#endif 2420 break; 2421 case X86_MSR_SYSENTER_EIP: 2422#ifdef CONFIG_SVM 2423 val = amd_vmcb_sysenter_eip_rd(&g->vmcb); 2424#else 2425 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SYSENTER_EIP, &val); 2426#endif 2427 break; 2428 case X86_MSR_EFER: 2429#ifdef CONFIG_SVM 2430 val = amd_vmcb_efer_rd_raw(&g->vmcb); 2431#else 2432 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_EFER_F, &val); 2433#endif 2434 break; 2435 case X86_MSR_FS_BASE: 2436#ifdef CONFIG_SVM 2437 val = amd_vmcb_fs_base_rd(&g->vmcb); 2438#else 2439 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_FS_BASE, &val); 2440#endif 2441 break; 2442 case X86_MSR_GS_BASE: 2443#ifdef CONFIG_SVM 2444 val = amd_vmcb_gs_base_rd(&g->vmcb); 2445#else 2446 err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_GS_BASE, &val); 2447#endif 2448 break; 2449#ifdef CONFIG_SVM 2450 case X86_MSR_KERNEL_GS_BASE: 2451 val = amd_vmcb_kernel_gs_base_rd(&g->vmcb); 2452 break; 2453 case X86_MSR_STAR: 2454 val = amd_vmcb_star_rd(&g->vmcb); 2455 break; 2456 case X86_MSR_LSTAR: 2457 val = amd_vmcb_lstar_rd(&g->vmcb); 2458 break; 2459 case X86_MSR_CSTAR: 2460 val = amd_vmcb_cstar_rd(&g->vmcb); 2461 break; 2462 case X86_MSR_SFMASK: 2463 val = amd_vmcb_sfmask_rd(&g->vmcb); 2464 break; 2465 default: 2466 printf("MSR: unhandeled MSR read access to %x\n", msr); 2467 return handle_vmexit_unhandeled(g); 2468#else 2469 case X86_MSR_APIC_BASE: 2470 case X86_MSR_BIOS_SIGN_ID: 2471 case X86_MSR_MTRRCAP: 2472 case X86_MSR_MCG_CAP: 2473 case X86_MSR_MCG_STATUS: 2474 case X86_MSR_PAT: 2475 case X86_MTRR_DEF_TYPE: 2476 val = 0x0; 2477 break; 2478 case X86_MSR_MISC_ENABLE: 2479 val = 0x1; // enable fast-string instructions 2480 break; 2481 default: 2482 msr_index = vmx_guest_msr_index(msr); 2483 if (msr_index == -1) { 2484 printf("MSR: unhandeled MSR read access to %x\n", msr); 2485 return handle_vmexit_unhandeled(g); 2486 } 2487 val = guest_msr_area[msr_index].val; 2488 break; 2489#endif 2490 } 2491 2492 // store the value in EDX:EAX 2493 guest_set_eax(g, val); 2494 guest_set_edx(g, val >> 32); 2495 } 2496 2497 // advance the rip beyond the current instruction 2498#ifdef CONFIG_SVM 2499 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2); 2500#else 2501 uint64_t guest_rip; 2502 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2503 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2); 2504 assert(err_is_ok(err)); 2505#endif 2506 return HANDLER_ERR_OK; 2507} 2508 2509static int 2510handle_vmexit_cpuid (struct guest *g) { 2511 uint32_t eax, ebx, ecx, edx; 2512 uint32_t func = guest_get_eax(g); 2513 2514 switch (func) { 2515#ifdef CONFIG_SVM 2516 // Processor Vendor and Largest Standard Function Number 2517 case 0: 2518 case 0x80000000: 2519 // max standard function offset 2520 eax = func == 0 ? 0x1 : 0x80000000; 2521 // string "AuthenticAMD" 2522 ebx = 0x68747541; 2523 ecx = 0x444d4163; 2524 edx = 0x69746e65; 2525 break; 2526 2527 // Family, Model, Stepping Identifiers 2528 case 1: 2529 // we simulate a AMD K6-3D 2530 // Family 5, Model 8, Stepping 12 2531 eax = 0x58c; 2532 // no brand, clflush size 16, no mulitprocessing, no local apic 2533 ebx = 0x0f00; 2534 // support the popcnt instr 2535 ecx = 0x800000; 2536 // support some basic features 2537 edx = 0x89a91b; 2538 break; 2539 2540 default: 2541 // use the answer of the host if there is any other request 2542 // FIXME: this is probably not a good idea ;) 2543 cpuid(func, &eax, &ebx, &ecx, &edx); 2544 printf("handle_vmexit_cpuid: CPUID: func %x, host reports: eax %x, " 2545 "ebx %x, ecx %x, edx %x\n", func, eax, ebx, ecx, edx); 2546 break; 2547#else 2548 case 0: 2549 eax = 0x2; 2550 ebx = 0x756e6547; 2551 ecx = 0x6c65746e; 2552 edx = 0x49656e69; 2553 break; 2554 case 1: 2555 eax = 0x800; 2556 ebx = 0x800; 2557 ecx = 0x80200000; 2558 edx = 0x183fbff; 2559 break; 2560 case 2: 2561 eax = 0x1; 2562 ebx = 0x0; 2563 ecx = 0x4d; 2564 edx = 0x2c307d; 2565 default: 2566 eax = 0x0; 2567 ebx = 0x0; 2568 ecx = 0x0; 2569 edx = 0x0; 2570 break; 2571#endif 2572 } 2573 2574 guest_set_eax(g, eax); 2575 guest_set_ebx(g, ebx); 2576 guest_set_ecx(g, ecx); 2577 guest_set_edx(g, edx); 2578 2579 // advance the rip beyond the instruction 2580#ifdef CONFIG_SVM 2581 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2); 2582#else 2583 uint64_t guest_rip; 2584 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2585 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2); 2586 assert(err_is_ok(err)); 2587#endif 2588 return HANDLER_ERR_OK; 2589} 2590 2591static int 2592handle_vmexit_vmmcall (struct guest *g) { 2593 /*printf("VMMCALL: tsc %lu, exits with mon invocation %lu, exits w/o mon " 2594 "invocation %lu\n", rdtsc(), 2595 g->ctrl->num_vm_exits_with_monitor_invocation, 2596 g->ctrl->num_vm_exits_without_monitor_invocation);*/ 2597 2598 // advance the rip beyond the instruction 2599#ifdef CONFIG_SVM 2600 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 3); 2601#else 2602 uint64_t guest_rip; 2603 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2604 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 3); 2605 assert(err_is_ok(err)); 2606#endif 2607 return HANDLER_ERR_OK; 2608} 2609 2610static int 2611handle_vmexit_hlt (struct guest *g) { 2612 // the guest has nothing to do - poll out irq sources for pending IRQs 2613 // if they do not assert a virtual IRQ then we will do nothing 2614 lpc_pic_process_irqs(g->lpc); 2615 2616 // advance the rip beyond the instruction 2617#ifdef CONFIG_SVM 2618 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 1); 2619#else 2620 uint64_t guest_rip; 2621 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2622 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 1); 2623#endif 2624 2625 // running HLT with IRQs masked does not make any sense 2626 // FIXME: this assert silly, shutting down the VM would be the right way 2627#ifdef CONFIG_SVM 2628 guest_assert(g, amd_vmcb_rflags_rd(&g->vmcb).intrf == 1); 2629#else 2630 uint64_t guest_rflags; 2631 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags); 2632 assert(err_is_ok(err)); 2633 guest_assert(g, guest_rflags & RFLAGS_IF); 2634#endif 2635 if (virq_pending(g, NULL, NULL)) { 2636 // there is an IRQ pending, proceed as normal, the CPU will take it 2637 } else { 2638 // there is really nothing to do - stop the VM and wait 2639 g->runnable = false; 2640 } 2641 2642 return HANDLER_ERR_OK; 2643} 2644 2645static inline int 2646decode_mov_instr_length (struct guest *g, uint8_t *code) 2647{ 2648 int len; 2649 2650 // we only support long mode for now 2651 //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1); 2652 2653 // all non special MOV instructions use one byte as opcode and at least a 2654 // ModR/M byte 2655 len = 2; 2656 // check for the REX prefix 2657 if ((code[0] >> 4) == 0x4) { 2658 len++; 2659 code++; 2660 } 2661 // precaution because I did no check all variants of MOV, at least these two 2662 // variants are supported 2663 assert(code[0] == 0x89 || code[0] == 0x8b); 2664 2665 union x86_modrm modrm = { .raw = code[1] }; 2666 // check for displacements 2667 if (modrm.u.mod == 0x1) { 2668 // 1B displacement 2669 len++; 2670 } else if (modrm.u.mod == 0x2) { 2671 // 4B displacement 2672 len += 4; 2673 } 2674 2675 // check for SIB byte 2676 if (modrm.u.rm == 0x4 && modrm.u.mod != 0x3) { 2677 len++; 2678 } 2679 2680 return len; 2681} 2682 2683// finds out whether a move instruction is a read or a write with respect to 2684// memory 2685static inline bool 2686decode_mov_is_write (struct guest *g, uint8_t *code) 2687{ 2688 // check for the REX prefix 2689 if ((code[0] >> 4) == 0x4) { 2690 code++; 2691 } 2692 2693 // we only support one move variant (in each direction) for now 2694 assert(code[0] == 0x89 || code[0] == 0x8b); 2695 2696 union x86_modrm modrm = { .raw = code[1] }; 2697 // not defined for reg to reg moves 2698 assert(modrm.u.mod != 3); 2699 2700 return code[0] == 0x89; // 0x89 ==> MOV reg -> mem 2701} 2702 2703static inline enum opsize 2704decode_mov_op_size (struct guest *g, uint8_t *code) 2705{ 2706 /* 2707 printf("EFER: 0x%lx\n", amd_vmcb_efer_rd_raw(&g->vmcb)); 2708 printf("Code: 0x%lx\n", *((uint64_t *)code)); 2709 printf("Code[0]: 0x%x, Code[1]: 0x%x, Code[2]: 0x%x, Code[3]: 0x%x\n", code[0],code[1],code[2],code[3]); 2710 printf("Guest EAX: 0x%x\n", guest_get_eax(g)); 2711 printf("Guest EBX: 0x%x\n", guest_get_ebx(g)); 2712 printf("Guest ECX: 0x%x\n", guest_get_ecx(g)); 2713 2714 printf("Guest EDX: 0x%x\n", guest_get_edx(g)); 2715 printf("Guest RDI: 0x%lx\n", guest_get_rdi(g)); 2716 printf("Guest RSI: 0x%lx\n", guest_get_rsi(g)); 2717 printf("Guest RSP: 0x%lx\n", guest_get_rsp(g)); 2718 printf("Guest RBP: 0x%lx\n", guest_get_rbp(g)); 2719 */ 2720 2721 // we only support long mode for now 2722 //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1); 2723 2724 // check for the REX prefix 2725 if ((code[0] >> 4) == 0x4 && code[0] & 0x48) { 2726 return OPSIZE_64; 2727 } 2728 return OPSIZE_32; 2729} 2730 2731 2732static inline uint64_t 2733decode_mov_src_val (struct guest *g, uint8_t *code) { 2734 2735 // we only support long mode for now 2736 //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1); 2737 2738 // check for the REX prefix 2739 if ((code[0] >> 4) == 0x4) { 2740 code++; 2741 } 2742 2743 // we only support one variant for now 2744 assert(code[0] == 0x89); 2745 2746 union x86_modrm modrm = { .raw = code[1] }; 2747 return get_reg_val_by_reg_num(g, modrm.u.regop); 2748} 2749 2750 2751static inline void 2752decode_mov_dest_val (struct guest *g, uint8_t *code, uint64_t val) 2753{ 2754 // we only support long mode for now 2755 //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1); 2756 2757 // check for the REX prefix 2758 if ((code[0] >> 4) == 0x4) { 2759 code++; 2760 } 2761 2762 // we only support one variant for now 2763 assert(code[0] == 0x8b); 2764 2765 union x86_modrm modrm = { .raw = code[1] }; 2766 set_reg_val_by_reg_num(g, modrm.u.regop, val); 2767} 2768 2769/**** e1000 2770#define TDBAL_OFFSET 0x3800 2771#define TDBAH_OFFSET 0x3804 2772#define RDBAL_OFFSET 0x2800 2773#define RDBAH_OFFSET 0x2804 2774#define TDT_OFFSET 0x3818 //Transmit descriptor tail. Writes to this toggle transmission 2775#define TCTL_OFFSET 0x400 //Transmission Control 2776 2777#define IMS_OFFSET 0xd0 // Interrupt Mask Set/Read Register 2778#define ICS_OFFSET 0xc8 // Interrupt Cause Set Register 2779 2780static int register_needs_translation(uint64_t addr){ 2781 return ( 2782 addr == TDBAL_OFFSET || 2783 addr == TDBAH_OFFSET || 2784 addr == RDBAL_OFFSET || 2785 addr == RDBAH_OFFSET 2786 ); 2787 2788} 2789 2790**** e1000 */ 2791 2792 2793 2794 2795#define MMIO_MASK(bytes) (~(~(bytes) + 1)) // I think ~(-bytes) is also correct 2796 2797static int 2798handle_vmexit_npf (struct guest *g) { 2799 int r; 2800#ifdef CONFIG_SVM 2801 uint64_t fault_addr = amd_vmcb_exitinfo2_rd(&g->vmcb); 2802#else 2803 uint64_t fault_addr, guest_rip; 2804 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GPADDR_F, &fault_addr); 2805 assert(err_is_ok(err)); 2806#endif 2807 uint8_t *code = NULL; 2808 2809 // check for fault inside the guest physical memory region 2810 if (fault_addr >= g->mem_low_va && fault_addr < g->mem_high_va) { 2811 // allocate the missing memory 2812 alloc_guest_mem(g, fault_addr & ~BASE_PAGE_MASK, BASE_PAGE_SIZE); 2813 // do not advance the RIP, it is safe (and neccessary) to 2814 // replay the faulting instruction 2815 return HANDLER_ERR_OK; 2816 } 2817 2818 // fetch the location to the code 2819 r = get_instr_arr(g, &code); 2820 assert (r == 0); 2821 2822 // virtual devices 2823 switch (fault_addr & ~BASE_PAGE_MASK) { 2824 case APIC_BASE: { 2825 uint64_t val; 2826 enum opsize size; 2827 2828 assert(g->apic != NULL); 2829 size = decode_mov_op_size(g, code); 2830 if (decode_mov_is_write(g, code)) { 2831 val = decode_mov_src_val(g, code); 2832 r = apic_handle_mmio_write(g->apic, fault_addr, size, val); 2833 assert(r == 0); 2834 } else { 2835 r = apic_handle_mmio_read(g->apic, fault_addr, size, &val); 2836 assert(r == 0); 2837 decode_mov_dest_val(g, code, val); 2838 } 2839 2840 // advance the rip beyond the instruction 2841#ifdef CONFIG_SVM 2842 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2843 decode_mov_instr_length(g, code)); 2844#else 2845 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2846 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2847 decode_mov_instr_length(g, code)); 2848 assert(err_is_ok(err)); 2849#endif 2850 return HANDLER_ERR_OK; 2851 } 2852 } 2853 2854 //Check if this is a access to a pci device memory 2855 2856 for(int bus_i = 0; bus_i<256; bus_i++){ 2857 for(int dev_i = 0; dev_i < 32; dev_i++){ 2858 struct pci_bus *bus = g->pci->bus[bus_i]; 2859 if(bus) { 2860 struct pci_device* dev = bus->device[dev_i]; 2861 if(dev){ 2862 for(int bar_i=0; bar_i<5; bar_i++){ 2863 struct bar_info *curbar = &dev->bars[bar_i]; 2864 if(curbar->paddr <= fault_addr && fault_addr < curbar->paddr + curbar->bytes){ 2865 if(decode_mov_is_write(g, code)){ 2866 uint64_t val = decode_mov_src_val(g, code); 2867 if(dev->mem_write) { 2868 dev->mem_write(dev, MMIO_MASK(curbar->bytes) & fault_addr, bar_i, val ); 2869 } else { 2870 goto error; 2871 } 2872 } else { 2873 uint64_t val; 2874 if(dev->mem_read){ 2875 dev->mem_read(dev, MMIO_MASK(curbar->bytes) & fault_addr, bar_i, (uint32_t*)&val); 2876 decode_mov_dest_val(g, code, val); 2877 } else { 2878 goto error; 2879 } 2880 } 2881#ifdef CONFIG_SVM 2882 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2883 decode_mov_instr_length(g, code)); 2884#else 2885 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2886 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2887 decode_mov_instr_length(g, code)); 2888 assert(err_is_ok(err)); 2889#endif 2890 return HANDLER_ERR_OK; 2891 } 2892 } 2893 } 2894 } 2895 } 2896 } 2897 2898 error: 2899 printf("vmkitmon: access to an unknown memory location: %lx", fault_addr); 2900 return handle_vmexit_unhandeled(g); 2901} 2902 2903typedef int (*vmexit_handler)(struct guest *g); 2904 2905#ifdef CONFIG_SVM 2906static vmexit_handler vmexit_handlers[0x8c] = { 2907 [SVM_VMEXIT_CR0_READ] = handle_vmexit_cr_access, 2908 [SVM_VMEXIT_CR0_WRITE] = handle_vmexit_cr_access, 2909 [SVM_VMEXIT_CR0_SEL_WRITE] = handle_vmexit_cr_access, 2910 [SVM_VMEXIT_SWINT] = handle_vmexit_swint, 2911 [SVM_VMEXIT_IDTR_WRITE] = handle_vmexit_ldt, 2912 [SVM_VMEXIT_GDTR_WRITE] = handle_vmexit_ldt, 2913 [SVM_VMEXIT_IOIO] = handle_vmexit_ioio, 2914 [SVM_VMEXIT_MSR] = handle_vmexit_msr, 2915 [SVM_VMEXIT_CPUID] = handle_vmexit_cpuid, 2916 [SVM_VMEXIT_VMMCALL] = handle_vmexit_vmmcall, 2917 [SVM_VMEXIT_HLT] = handle_vmexit_hlt 2918}; 2919#else 2920static vmexit_handler vmexit_handlers[0x8c] = { 2921 [VMX_EXIT_REASON_CPUID] = handle_vmexit_cpuid, 2922 [VMX_EXIT_REASON_HLT] = handle_vmexit_hlt, 2923 [VMX_EXIT_REASON_VMCALL] = handle_vmexit_vmmcall, 2924 [VMX_EXIT_REASON_CR_ACCESS] = handle_vmexit_cr_access, 2925 [VMX_EXIT_REASON_INOUT] = handle_vmexit_ioio, 2926 [VMX_EXIT_REASON_RDMSR] = handle_vmexit_msr, 2927 [VMX_EXIT_REASON_WRMSR] = handle_vmexit_msr, 2928 [VMX_EXIT_REASON_GDTR_IDTR] = handle_vmexit_ldt, 2929 [VMX_EXIT_REASON_EPT_FAULT] = handle_vmexit_npf, 2930 [VMX_EXIT_REASON_SWINT] = handle_vmexit_swint 2931}; 2932#endif 2933 2934void 2935guest_handle_vmexit (struct guest *g) { 2936 //struct pci_ethernet * eth = (struct pci_ethernet * ) g->pci->bus[0]->device[2]->state;// 2937 //printf("guest_handle_vmexit\n"); 2938 vmexit_handler handler; 2939#ifdef CONFIG_SVM 2940 uint64_t exitcode = amd_vmcb_exitcode_rd(&g->vmcb); 2941 if (exitcode == SVM_VMEXIT_NPF) { 2942 handler = handle_vmexit_npf; 2943 } else if (LIKELY(vmexit_handlers[exitcode] != NULL)) { 2944 handler = vmexit_handlers[exitcode]; 2945 } else { 2946 handle_vmexit_unhandeled(g); 2947 return; 2948 } 2949#else 2950 if (!g->emulated_before_exit) { 2951 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_EXIT_REASON, 2952 (uint64_t *)&saved_exit_reason); 2953 assert(err_is_ok(err)); 2954 } 2955 2956 if (LIKELY(vmexit_handlers[saved_exit_reason] != NULL)) { 2957 handler = vmexit_handlers[saved_exit_reason]; 2958 } else { 2959 handle_vmexit_unhandeled(g); 2960 return; 2961 } 2962#endif 2963 int r = handler(g); 2964 if (LIKELY(r == HANDLER_ERR_OK)) { 2965 if (g->runnable) { 2966 guest_make_runnable(g, true); 2967 } 2968 } 2969} 2970