1 2/** 3 * \file 4 */ 5 6/* 7 * Copyright (c) 2009, 2010, ETH Zurich. 8 * All rights reserved. 9 * 10 * This file is distributed under the terms in the attached LICENSE file. 11 * If you do not find this file, copies can be found by writing to: 12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group. 13 */ 14 15#include <stdlib.h> 16#include <string.h> 17#include "vmkitmon.h" 18#include <barrelfish/lmp_endpoints.h> 19#include "x86.h" 20#ifdef CONFIG_SVM 21#include "svm.h" 22#endif 23#include "realmode.h" 24#include "hdd.h" 25#include "console.h" 26#include "pc16550d.h" 27#include "apic.h" 28#include "lpc.h" 29#include "pci.h" 30#include "pci_host.h" 31#include "pci_devices.h" 32#include "pci_ethernet.h" 33#include <driverkit/hwmodel.h> 34#include <driverkit/iommu.h> 35#include <skb/skb.h> 36 37#define VMCB_SIZE 0x1000 // 4KB 38 39#ifdef CONFIG_SVM 40#define IOPM_SIZE 0x3000 // 12KB 41#define MSRPM_SIZE 0x2000 // 8KB 42#else 43#define IOBMP_A_SIZE 0x1000 // 4KB 44#define IOBMP_B_SIZE 0x1000 // 4KB 45#define MSRPM_SIZE 0x1000 // 4KB 46#endif 47 48#define RM_MEM_SIZE (0x100000 + BASE_PAGE_SIZE) // 1MB + A20 gate space 49 50#define APIC_BASE 0xfee00000 51 52#define SERIAL_DRIVER "serial0.raw" 53 54#ifndef CONFIG_SVM 55extern uint16_t saved_exit_reason; 56extern uint64_t saved_exit_qual, saved_rip; 57 58// List of MSRs that are saved on VM-exit and loaded on VM-entry. 59static uint32_t msr_list[VMX_MSR_COUNT] = 60 {X86_MSR_KERNEL_GS_BASE, X86_MSR_STAR, X86_MSR_LSTAR, X86_MSR_CSTAR, X86_MSR_SFMASK}; 61 62// Saved priority of the most recent irq that is asserted. 63uint8_t interrupt_priority = 0; 64#endif 65 66#ifndef CONFIG_SVM 67static inline int vmx_guest_msr_index(uint32_t msr_index) 68{ 69 for (int i = 0; i < VMX_MSR_COUNT; i++) { 70 if (msr_list[i] == msr_index) { 71 return i; 72 } 73 } 74 return -1; 75} 76 77static void initialize_guest_msr_area(struct guest *g) 78{ 79 struct msr_entry *guest_msr_area = (struct msr_entry *)g->msr_area_va; 80 81 // The values of the MSRs in the guest MSR area are all set to 0. 82 for (int i = 0; i < VMX_MSR_COUNT; i++) { 83 guest_msr_area[i].index = msr_list[i]; 84 guest_msr_area[i].val = 0x0; 85 } 86 87 errval_t err = invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXIT_MSR_STORE_F, g->msr_area_pa); 88 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXIT_MSR_STORE_CNT, VMX_MSR_COUNT); 89 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_ENTRY_MSR_LOAD_F, g->msr_area_pa); 90 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_ENTRY_MSR_LOAD_CNT, VMX_MSR_COUNT); 91 assert(err_is_ok(err)); 92} 93#endif 94 95lvaddr_t guest_offset = 0; 96static struct guest __guest; 97static struct guest *__guestp = NULL; 98 99#ifdef CONFIG_SVM 100/// stores the last used guest ASID 101static uint32_t last_guest_asid = 0; 102#endif 103 104// FIXME: this is somewhat broken by design... we should emit proper exceptions 105// to the guest opposed to just halt the VM 106#define guest_assert(g, e) \ 107 ((e) ? (void)0 : (handle_vmexit_unhandeled(g), assert(e))) 108 109static errval_t 110guest_slot_alloc(struct guest *g, struct capref *ret) 111{ 112 return g->slot_alloc.a.alloc(&g->slot_alloc.a, ret); 113} 114 115errval_t guest_vspace_map_wrapper(struct vspace *vspace, lvaddr_t vaddr, 116 struct capref frame, size_t size) 117{ 118 errval_t err; 119 struct vregion *vregion = NULL; 120 struct memobj_one_frame *memobj = NULL; 121 122 // Allocate space 123 vregion = malloc(sizeof(struct vregion)); 124 if (!vregion) { 125 err = LIB_ERR_MALLOC_FAIL; 126 goto error; 127 } 128 memobj = malloc(sizeof(struct memobj_one_frame)); 129 if (!memobj) { 130 err = LIB_ERR_MALLOC_FAIL; 131 goto error; 132 } 133 134 // Create the objects 135 err = memobj_create_one_frame(memobj, size, 0); 136 if (err_is_fail(err)) { 137 err = err_push(err, LIB_ERR_MEMOBJ_CREATE_ANON); 138 goto error; 139 } 140 err = memobj->m.f.fill(&memobj->m, 0, frame, size); 141 if (err_is_fail(err)) { 142 err = err_push(err, LIB_ERR_MEMOBJ_FILL); 143 goto error; 144 } 145 err = vregion_map_fixed(vregion, vspace, &memobj->m, 0, size, vaddr, 146 VREGION_FLAGS_READ | VREGION_FLAGS_WRITE | VREGION_FLAGS_EXECUTE); 147 if (err_is_fail(err)) { 148 err = LIB_ERR_VSPACE_MAP; 149 goto error; 150 } 151 err = memobj->m.f.pagefault(&memobj->m, vregion, 0, 0); 152 if (err_is_fail(err)) { 153 err = err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); 154 goto error; 155 } 156 157 return SYS_ERR_OK; 158 159 error: // XXX: proper cleanup 160 if (vregion) { 161 free(vregion); 162 } 163 if (memobj) { 164 free(memobj); 165 } 166 return err; 167} 168 169#ifdef DISABLE_MODEL 170#define GUEST_VSPACE_SIZE 1073741824UL // 1GB 171#else 172#define GUEST_VSPACE_SIZE (1ul<<32) // GB 173#endif 174static errval_t vspace_map_wrapper(lvaddr_t vaddr, struct capref frame, 175 size_t size) 176{ 177 errval_t err; 178 static struct memobj_anon *memobj = NULL; 179 static struct vregion *vregion = NULL; 180 static bool initialized = false; 181 182 if (!initialized) { 183 // Allocate space 184 memobj = malloc(sizeof(struct memobj_anon)); 185 if (!memobj) { 186 return LIB_ERR_MALLOC_FAIL; 187 } 188 vregion = malloc(sizeof(struct vregion)); 189 if (!vregion) { 190 return LIB_ERR_MALLOC_FAIL; 191 } 192 193 // Create a memobj and vregion 194 err = memobj_create_anon(memobj, GUEST_VSPACE_SIZE, 0); 195 if (err_is_fail(err)) { 196 return err_push(err, LIB_ERR_MEMOBJ_CREATE_ANON); 197 } 198 err = vregion_map(vregion, get_current_vspace(), &memobj->m, 0, 199 GUEST_VSPACE_SIZE, VREGION_FLAGS_READ_WRITE); 200 if (err_is_fail(err)) { 201 return err_push(err, LIB_ERR_VREGION_MAP); 202 } 203 204 guest_offset = vregion_get_base_addr(vregion); 205 initialized = true; 206 } 207 208 // Create mapping 209 err = memobj->m.f.fill(&memobj->m, vaddr, frame, size); 210 if (err_is_fail(err)) { 211 return err_push(err, LIB_ERR_MEMOBJ_FILL); 212 } 213 err = memobj->m.f.pagefault(&memobj->m, vregion, vaddr, 0); 214 if (err_is_fail(err)) { 215 return err_push(err, LIB_ERR_MEMOBJ_PAGEFAULT_HANDLER); 216 } 217 218 return SYS_ERR_OK; 219} 220// allocates some bytes of memory for the guest starting at a specific addr 221// also performs the mapping into the vspace of the monitor 222errval_t 223alloc_guest_mem(struct guest *g, lvaddr_t guest_paddr, size_t bytes) 224{ 225 errval_t err; 226 227 // only allow multiple of page sizes to be allocated 228 assert(bytes > 0 && (bytes & BASE_PAGE_MASK) == 0); 229 // do not allow allocation outside of the guests physical memory 230 assert(guest_paddr + bytes <= g->mem_high_va); 231 232 // Allocate frame 233 struct capref cap; 234 235#ifdef DISABLE_MODEL 236 int32_t node_id_self = driverkit_hwmodel_get_my_node_id(); 237 int32_t node_id_ram = driverkit_hwmodel_lookup_dram_node_id(); 238 int32_t nodes_data[] = {node_id_self, 0}; 239 240 err = driverkit_hwmodel_frame_alloc(&cap, bytes, node_id_ram, nodes_data); 241 if (err_is_fail(err)) { 242 return err; 243 } 244 245#else 246 if (err_is_fail(err)) { 247 return err_push(err, LIB_ERR_SLOT_ALLOC); 248 } 249 err = frame_create(cap, bytes, NULL); 250 if (err_is_fail(err)) { 251 return err_push(err, LIB_ERR_FRAME_CREATE); 252 } 253#endif 254 255 // Map into the guest vspace 256 err = guest_vspace_map_wrapper(&g->vspace, guest_paddr, cap, bytes); 257 if (err_is_fail(err)) { 258 return err; 259 } 260 261 // Create a copy of the capability to map in our vspace 262 struct capref host_cap; 263 err = slot_alloc(&host_cap); 264 if (err_is_fail(err)) { 265 return err; 266 } 267 err = cap_copy(host_cap, cap); 268 if (err_is_fail(err)) { 269 return err; 270 } 271 272 // Map into my vspace 273 err = vspace_map_wrapper(guest_to_host(guest_paddr), host_cap, bytes); 274 if (err_is_fail(err)) { 275 return err; 276 } 277 278 struct frame_identity frameid = { .base = 0, .bytes = 0 }; 279 errval_t r = frame_identify(cap, &frameid); 280 assert(err_is_ok(r)); 281 VMKIT_PCI_DEBUG("alloc_guest_mem: frameid.base: 0x%lx, frameid.bytes: %zd, " 282 "g->mem_low_va: 0x%lx, g->mem_high_va: 0x%lx\n", 283 frameid.base, frameid.bytes, g->mem_low_va, g->mem_high_va); 284 285 return SYS_ERR_OK; 286} 287 288static void 289initialize_iopm (struct guest *self) { 290 // intercept all IO port accesses (for now) 291#ifdef CONFIG_SVM 292 memset((void*)self->iopm_va, 0xFF, IOPM_SIZE); 293#else 294 memset((void*)self->iobmp_a_va, 0xFF, IOBMP_A_SIZE); 295 memset((void*)self->iobmp_b_va, 0xFF, IOBMP_B_SIZE); 296#endif 297} 298 299// access_mode: 0 all access, 1 read intercept, 2 write intercept, 3 all interc. 300static inline void 301set_msr_access (struct guest *g, uint32_t msr, int access_mode) 302{ 303 assert(access_mode >= 0 && access_mode <= 3); 304 305 // a region a 2K bytes represents the access bits of 8K MSRs, therefore each 306 // MSR takes two bits (one for rdmsr and one for wrmsr) 307 uintptr_t byte_offset = (msr & 0xffff) / 4; 308 int bit_offset = ((msr & 0xffff) % 4) * 2; 309 310 if (msr < 0x2000) { 311 // do nothing 312 } else if (msr >= 0xc0000000 && msr < 0xc0002000) { 313 byte_offset += 0x800; 314 } else if (msr >= 0xc0010000 && msr < 0xc0012000) { 315 byte_offset += 0x1000; 316 } else { 317 assert(!"not reached"); 318 } 319 320 assert(byte_offset < MSRPM_SIZE); 321 322 // read the byte holding the relevant bits 323 uint8_t val = *(uint8_t *)(g->msrpm_va + byte_offset); 324 // set the access params according to the arguments 325 val = (val & ~(0x3 << bit_offset)) | (access_mode << bit_offset); 326 // store the modified value back in the map 327 *(uint8_t *)(g->msrpm_va + byte_offset) = val; 328 329 //printf("MSR: msr %x, byte_offset %lx, bit_offset %x, val %x\n", msr, byte_offset, bit_offset, val); 330} 331 332static void 333initialize_msrpm (struct guest *g) { 334 // intercept all MSR accesses (for now) 335 memset((void*)g->msrpm_va, 0xff, MSRPM_SIZE); 336#if 0 337 // allow performance counters and evnets MSR accesses 338 set_msr_access (g, 0xc0010000, 0); 339 set_msr_access (g, 0xc0010001, 0); 340 set_msr_access (g, 0xc0010002, 0); 341 set_msr_access (g, 0xc0010003, 0); 342 set_msr_access (g, 0xc0010004, 0); 343 set_msr_access (g, 0xc0010005, 0); 344 set_msr_access (g, 0xc0010006, 0); 345 set_msr_access (g, 0xc0010007, 0); 346#endif 347} 348 349#define INIT_DATA_SEGREG(vmcb,x) \ 350do { \ 351 amd_vmcb_seg_attrib_t __sa = { \ 352 .segtype = 3, \ 353 .p = 1, \ 354 .s = 1 \ 355 }; \ 356 amd_vmcb_##x## _attrib_wr((vmcb), __sa); \ 357 amd_vmcb_##x## _selector_wr((vmcb), 0x0); \ 358 amd_vmcb_##x## _base_wr((vmcb), 0x0); \ 359 amd_vmcb_##x## _limit_wr((vmcb), 0xffff); \ 360} while (0) 361 362#define INIT_CODE_SEGREG(vmcb,x) \ 363do { \ 364 amd_vmcb_seg_attrib_t __sa = { \ 365 .segtype = 11, \ 366 .p = 1, \ 367 .s = 1 \ 368 }; \ 369 amd_vmcb_##x## _attrib_wr((vmcb), __sa); \ 370 amd_vmcb_##x## _selector_wr((vmcb), 0xf000); \ 371 amd_vmcb_##x## _base_wr((vmcb), 0xffff0000); \ 372 amd_vmcb_##x## _limit_wr((vmcb), 0xffff); \ 373} while (0) 374 375#define INIT_SYS_SEGREG(vmcb,x) \ 376do { \ 377 amd_vmcb_seg_attrib_t __sa = { \ 378 .segtype = 2, \ 379 .p = 1 \ 380 }; \ 381 amd_vmcb_##x## _attrib_wr((vmcb), __sa); \ 382 amd_vmcb_##x## _selector_wr((vmcb), 0x0); \ 383 amd_vmcb_##x## _base_wr((vmcb), 0x0); \ 384 amd_vmcb_##x## _limit_wr((vmcb), 0xffff); \ 385} while (0) 386 387/* This method initializes a new VMCB memory regsion and sets the initial 388 * machine state as defined by the AMD64 architecture specification */ 389#ifdef CONFIG_SVM 390static void 391initialize_vmcb (struct guest *self) { 392 amd_vmcb_initialize(&self->vmcb, (mackerel_addr_t)self->vmcb_va); 393 394 // 1. Initialize intercepts 395 396 /* For now we intercept just everything */ 397 398 amd_vmcb_cr_access_wr_raw(&self->vmcb, ~0u); 399 amd_vmcb_cr_access_rdcr2_wrf(&self->vmcb, 0); 400 amd_vmcb_cr_access_wrcr2_wrf(&self->vmcb, 0); 401 amd_vmcb_cr_access_rdcr4_wrf(&self->vmcb, 0); 402 amd_vmcb_cr_access_wrcr4_wrf(&self->vmcb, 0); 403 404 // FIXME: ignoring DR accesses may be insecure 405 //amd_vmcb_dr_access_wr_raw(&self->vmcb, ~0u); 406 amd_vmcb_exceptions_wr_raw(&self->vmcb, ~0u); 407 amd_vmcb_exceptions_vector7_wrf(&self->vmcb, 0); 408 amd_vmcb_exceptions_vector14_wrf(&self->vmcb, 0); 409 410 amd_vmcb_intercepts_wr_raw(&self->vmcb, 0x1fffffffffff); 411 amd_vmcb_intercepts_pushf_wrf(&self->vmcb, 0); 412 amd_vmcb_intercepts_popf_wrf(&self->vmcb, 0); 413 amd_vmcb_intercepts_invlpg_wrf(&self->vmcb, 0); 414 amd_vmcb_intercepts_rdtsc_wrf(&self->vmcb, 0); 415 amd_vmcb_intercepts_rdtscp_wrf(&self->vmcb, 0); 416 amd_vmcb_intercepts_iret_wrf(&self->vmcb, 0); 417 amd_vmcb_intercepts_wbinvd_wrf(&self->vmcb, 0); 418 amd_vmcb_intercepts_pause_wrf(&self->vmcb, 0); 419 amd_vmcb_intercepts_vintr_wrf(&self->vmcb, 0); 420 421 // 2. Setup some config fields 422 423 // physical addresses of IOPM and MSRPM_SIZE 424 amd_vmcb_iopm_base_pa_wr(&self->vmcb, self->iopm_pa); 425 amd_vmcb_msrpm_base_pa_wr(&self->vmcb, self->msrpm_pa); 426 // assign guest ASID 427 // FIXME: use real asid allocator. BF does not know about tagged TLBs atm 428 amd_vmcb_tlb_guest_asid_wrf(&self->vmcb, ++last_guest_asid); 429 // enable virtual intr masking 430 amd_vmcb_vintr_vintr_masking_wrf(&self->vmcb, 1); 431 // enable nested paging 432 amd_vmcb_np_enable_wrf(&self->vmcb, 1); 433 434 /* 3. Guest state initialization 435 * according to Intels Manual 3A: Table 9-1. */ 436 437 // The second bit of rflags needs to be 1, also indicate that we support the 438 // CPUID instruction. 439 amd_vmcb_rflags_wr_raw(&self->vmcb, 0x00200002); 440 amd_vmcb_rip_wr(&self->vmcb, 0x0000fff0); 441 amd_vmcb_cr0_wr_raw(&self->vmcb, 0x60000010); 442 443 INIT_CODE_SEGREG(&self->vmcb, cs); 444 INIT_DATA_SEGREG(&self->vmcb, ss); 445 INIT_DATA_SEGREG(&self->vmcb, ds); 446 INIT_DATA_SEGREG(&self->vmcb, es); 447 INIT_DATA_SEGREG(&self->vmcb, fs); 448 INIT_DATA_SEGREG(&self->vmcb, gs); 449 450 INIT_SYS_SEGREG(&self->vmcb, gdtr); 451 INIT_SYS_SEGREG(&self->vmcb, idtr); 452 INIT_SYS_SEGREG(&self->vmcb, ldtr); 453 INIT_SYS_SEGREG(&self->vmcb, tr); 454 455 amd_vmcb_dr6_wr(&self->vmcb, 0xffff0ff0); 456 amd_vmcb_dr7_wr(&self->vmcb, 0x00000400); 457 458 // taken from the linux SVM source 459 amd_vmcb_gpat_wr(&self->vmcb, 0x0007040600070406ul); 460 461 // svm requires guest EFER.SVME to be set 462 amd_vmcb_efer_svme_wrf(&self->vmcb, 1); 463} 464#endif 465 466static void 467idc_handler(void *arg) 468{ 469 struct guest *g = arg; 470 errval_t err; 471 472 // consume message 473 struct lmp_recv_buf buf = { .buflen = 0 }; 474 err = lmp_endpoint_recv(g->monitor_ep, &buf, NULL); 475 assert(err_is_ok(err)); 476 477 // run real handler 478 guest_handle_vmexit(g); 479 480 // re-register 481 struct event_closure cl = { 482 .handler = idc_handler, 483 .arg = arg, 484 }; 485 err = lmp_endpoint_register(g->monitor_ep, get_default_waitset(), cl); 486 assert(err_is_ok(err)); 487} 488 489/* This method duplicates some code from spawndomain since we need to spawn very 490 * special domains */ 491static void 492spawn_guest_domain (struct guest *self) { 493 errval_t err; 494 495 // create the guest virtual address space 496 struct capref vnode_cap; 497 err = guest_slot_alloc(self, &vnode_cap); 498 assert(err_is_ok(err)); 499 err = vnode_create(vnode_cap, ObjType_VNode_x86_64_pml4); 500 assert(err_is_ok(err)); 501 502 struct pmap *pmap = malloc(sizeof(struct pmap_x86)); 503 assert(pmap); 504 err = pmap_x86_64_init(pmap, &self->vspace, vnode_cap, NULL); 505 assert(err_is_ok(err)); 506 err = vspace_init(&self->vspace, pmap); 507 assert(err_is_ok(err)); 508 509 // create DCB 510 err = guest_slot_alloc(self, &self->dcb_cap); 511 assert(err_is_ok(err)); 512 err = dispatcher_create(self->dcb_cap); 513 assert(err_is_ok(err)); 514 515 // create end point 516 struct capref ep_cap; 517 518 // use minimum-sized endpoint, because we don't need to buffer >1 vmexit 519 err = endpoint_create(LMP_RECV_LENGTH, &ep_cap, &self->monitor_ep); 520 assert(err_is_ok(err)); 521 522 // register to receive on this endpoint 523 struct event_closure cl = { 524 .handler = idc_handler, 525 .arg = self, 526 }; 527 err = lmp_endpoint_register(self->monitor_ep, get_default_waitset(), cl); 528 assert(err_is_ok(err)); 529 530 // setup the DCB 531 err = invoke_dispatcher_setup_guest(self->dcb_cap, ep_cap, vnode_cap, 532 self->vmcb_cap, self->ctrl_cap); 533 assert(err_is_ok(err)); 534 535#ifndef CONFIG_SVM 536 initialize_guest_msr_area(self); 537 538 err = 0; 539 err += invoke_dispatcher_vmwrite(self->dcb_cap, VMX_IOBMP_A_F, self->iobmp_a_pa); 540 err += invoke_dispatcher_vmwrite(self->dcb_cap, VMX_IOBMP_B_F, self->iobmp_b_pa); 541 err += invoke_dispatcher_vmwrite(self->dcb_cap, VMX_MSRBMP_F, self->msrpm_pa); 542 assert(err_is_ok(err)); 543#endif 544 // set up the guests physical address space 545 self->mem_low_va = 0; 546 // FIXME: Hardcoded guest memory size 547 // allocate the memory used for real mode 548 // This is not 100% necessary since one could also catch the pagefaults. 549 // If we allocate the whole memory at once we use less caps and reduce 550 // the risk run out of CSpace. 551#ifdef DISABLE_MODEL 552 self->mem_high_va = 0x80000000; 553 err = alloc_guest_mem(self, 0x0, 0x80000000); 554#else 555 self->mem_high_va = GUEST_VSPACE_SIZE; 556 err = alloc_guest_mem(self, 0x0, GUEST_VSPACE_SIZE); 557#endif 558 assert_err(err, "alloc_guest_mem"); 559} 560 561static void 562install_grub_stage2 (struct guest *g, void *img, size_t img_size) 563{ 564 assert(img != NULL); 565 566 /* the grub image goes to 0x8000 according to 567 * http://www.gnu.org/software/grub/manual/html_node/Memory-map.html */ 568 memcpy((void *)(guest_to_host(g->mem_low_va + 0x8000)), img, img_size); 569 // according to grub stage2 source its entry point is at 0x8200 570#ifdef CONFIG_SVM 571 amd_vmcb_rip_wr(&g->vmcb, 0x8200); 572 // switch to the first segment 573 amd_vmcb_cs_selector_wr(&g->vmcb, 0x0); 574 amd_vmcb_cs_base_wr(&g->vmcb, 0x0); 575 amd_vmcb_cs_limit_wr(&g->vmcb, 0xffff); 576#else 577 errval_t err = invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, 0x8200); 578 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CS_SEL, 0x0); 579 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CS_BASE, 0x0); 580 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CS_LIM, 0xffff); 581 assert(err_is_ok(err)); 582#endif 583 584} 585 586#if 0 587static void 588install_debug_app (struct guest *g) 589{ 590 //static uint8_t app[] = { 0xcd, 0x20 }; 591 static uint8_t app[] = { 0xcd, 0x20, 0x90, 0x90, 0x90, 0x90, 0x90 }; 592 memcpy((void *)g->rm_mem_va, app, sizeof(app)); 593 amd_vmcb_rip_wr(&g->vmcb, 0x0); 594 // disable nested pageing in real mode 595 amd_vmcb_np_enable_wrf(&g->vmcb, 0); 596 // enable paged real mode 597 //amd_vmcb_cr0_pg_wrf(&g->vmcb, 0x1); 598 //g->save_area->cr0 |= X86_CR0_PE_MASK; 599 amd_vmcb_rsp_wr(&g->vmcb, 0x1000); 600 amd_vmcb_cs_selector_wr(&g->vmcb, 0x0); 601 amd_vmcb_cs_base_wr(&g->vmcb, 0x0); 602 amd_vmcb_cs_limit_wr(&g->vmcb, 0xffff); 603 //g->save_area->cs.selector = 0x1000; 604 //g->save_area->cs.base = 0x10000; 605 //g->save_area->cs.base = 0x1ffff; 606} 607#endif 608 609static bool 610virq_pending (void *ud, uint8_t *irq, uint8_t *irq_prio) 611{ 612 assert(ud != NULL); 613 614 struct guest *g = ud; 615#ifdef CONFIG_SVM 616 if (amd_vmcb_vintr_rd(&g->vmcb).virq == 1) { 617#else 618 uint64_t info; 619 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_ENTRY_INTR_INFO, &info); 620 assert(err_is_ok(err)); 621 if (!!(info & (1UL << 31))) { 622#endif 623 if (irq != NULL) { 624#ifdef CONFIG_SVM 625 *irq = amd_vmcb_vintr_rd(&g->vmcb).vintr_vector; 626#else 627 *irq = info & 0xff; 628#endif 629 } 630 if (irq_prio != NULL) { 631#ifdef CONFIG_SVM 632 *irq_prio = amd_vmcb_vintr_rd(&g->vmcb).vintr_prio; 633#else 634 *irq_prio = interrupt_priority; 635#endif 636 } 637 return true; 638 } else { 639 return false; 640 } 641} 642 643#ifndef CONFIG_SVM 644static bool 645virq_accepting (void *ud) 646{ 647 assert(ud != NULL); 648 649 struct guest *g = ud; 650 651 uint64_t guest_rflags; 652 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags); 653 assert(err_is_ok(err)); 654 return (guest_rflags & (1UL << 9)); 655} 656#endif 657 658static void 659virq_handler (void *ud, uint8_t irq, uint8_t irq_prio) 660{ 661 assert(ud != NULL); 662 663 struct guest *g = ud; 664 665 // tell the hw extensions that there is a virtual IRQ pending 666#ifdef CONFIG_SVM 667 amd_vmcb_vintr_virq_wrf(&g->vmcb, 1); 668 amd_vmcb_vintr_vintr_prio_wrf(&g->vmcb, irq_prio); 669 amd_vmcb_vintr_vintr_vector_wrf(&g->vmcb, irq); 670 amd_vmcb_vintr_v_ign_tpr_wrf(&g->vmcb, 1); 671#else 672 uint64_t guest_rflags; 673 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags); 674 assert(guest_rflags & (1UL << 9)); 675 676 uint64_t info = (0 << 8 /*HWINTR*/) | (1UL << 31 /*INTR VALID*/) | irq; 677 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_ENTRY_INTR_INFO, info); 678 679 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_ACTIV_STATE, 0x0); 680 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_INTR_STATE, 0x0); 681 assert(err_is_ok(err)); 682 683 interrupt_priority = irq_prio; 684#endif 685 // if the guest is currently waiting then we have to restart it to make 686 // forward progress 687 if (!g->runnable) { 688 g->runnable = true; 689 guest_make_runnable(g, true); 690 } 691} 692 693static void 694guest_setup (struct guest *g) 695{ 696 errval_t err; 697 698 // initialize the guests slot_allocator 699 err = two_level_slot_alloc_init(&g->slot_alloc); 700 assert_err(err, "two_level_slot_alloc_init"); 701 702 struct frame_identity fi; 703 704 // allocate memory for the vmcb 705 err = guest_slot_alloc(g, &g->vmcb_cap); 706 assert_err(err, "guest_cspace_alloc"); 707 err = frame_create(g->vmcb_cap, VMCB_SIZE, NULL); 708 assert_err(err, "frame_create"); 709 err = frame_identify(g->vmcb_cap, &fi); 710 assert_err(err, "frame_identify"); 711 g->vmcb_pa = fi.base; 712 err = vspace_map_one_frame_attr((void**)&g->vmcb_va, VMCB_SIZE, g->vmcb_cap, 713 VREGION_FLAGS_READ_WRITE_NOCACHE, 714 NULL, NULL); 715 if (err_is_fail(err)) { 716 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 717 } 718 719 // guest control 720 err = frame_alloc(&g->ctrl_cap, sizeof(struct guest_control), NULL); 721 assert_err(err, "frame_alloc"); 722 size_t size = ROUND_UP(sizeof(struct guest_control), BASE_PAGE_SIZE); 723 err = vspace_map_one_frame_attr((void**)&g->ctrl, size, g->ctrl_cap, 724 VREGION_FLAGS_READ_WRITE_NOCACHE, 725 NULL, NULL); 726 if (err_is_fail(err)) { 727 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 728 } 729 g->ctrl->num_vm_exits_with_monitor_invocation = 0; 730 g->ctrl->num_vm_exits_without_monitor_invocation = 0; 731#ifdef CONFIG_SVM 732 // allocate memory for the iopm 733 err = frame_alloc(&g->iopm_cap, IOPM_SIZE, NULL); 734 assert_err(err, "frame_alloc"); 735 err = frame_identify(g->iopm_cap, &fi); 736 assert_err(err, "frame_identify"); 737 g->iopm_pa = fi.base; 738 err = vspace_map_one_frame_attr((void**)&g->iopm_va, IOPM_SIZE, g->iopm_cap, 739 VREGION_FLAGS_READ_WRITE_NOCACHE, 740 NULL, NULL); 741 if (err_is_fail(err)) { 742 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 743 } 744#else 745 // allocate memory for I/O bitmap A 746 err = frame_alloc(&g->iobmp_a_cap, IOBMP_A_SIZE, NULL); 747 assert_err(err, "frame_alloc"); 748 err = frame_identify(g->iobmp_a_cap, &fi); 749 assert_err(err, "frame_identify"); 750 g->iobmp_a_pa = fi.base; 751 err = vspace_map_one_frame_attr((void**)&g->iobmp_a_va, IOBMP_A_SIZE, g->iobmp_a_cap, 752 VREGION_FLAGS_READ_WRITE_NOCACHE, 753 NULL, NULL); 754 if (err_is_fail(err)) { 755 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 756 } 757 758 // allocate memory for I/O bitmap B 759 err = frame_alloc(&g->iobmp_b_cap, IOBMP_B_SIZE, NULL); 760 assert_err(err, "frame_alloc"); 761 err = frame_identify(g->iobmp_b_cap, &fi); 762 assert_err(err, "frame_identify"); 763 g->iobmp_b_pa = fi.base; 764 err = vspace_map_one_frame_attr((void**)&g->iobmp_b_va, IOBMP_B_SIZE, g->iobmp_b_cap, 765 VREGION_FLAGS_READ_WRITE_NOCACHE, 766 NULL, NULL); 767 if (err_is_fail(err)) { 768 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 769 } 770 771 // allocate memory for the guest MSR store/load area 772 err = frame_alloc(&g->msr_area_cap, VMX_MSR_AREA_SIZE, NULL); 773 assert_err(err, "frame_alloc"); 774 err = frame_identify(g->msr_area_cap, &fi); 775 assert_err(err, "frame_identify"); 776 g->msr_area_pa = fi.base; 777 err = vspace_map_one_frame_attr((void**)&g->msr_area_va, VMX_MSR_AREA_SIZE, 778 g->msr_area_cap, 779 VREGION_FLAGS_READ_WRITE_NOCACHE, 780 NULL, NULL); 781 if (err_is_fail(err)) { 782 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 783 } 784#endif 785 // allocate memory for the msrpm 786 err = frame_alloc(&g->msrpm_cap, MSRPM_SIZE, NULL); 787 assert_err(err, "frame_alloc"); 788 err = frame_identify(g->msrpm_cap, &fi); 789 assert_err(err, "frame_identify"); 790 g->msrpm_pa = fi.base; 791 err = vspace_map_one_frame_attr((void**)&g->msrpm_va, MSRPM_SIZE, 792 g->msrpm_cap, 793 VREGION_FLAGS_READ_WRITE_NOCACHE, 794 NULL, NULL); 795 if (err_is_fail(err)) { 796 DEBUG_ERR(err, "vspace_map_one_frame_attr failed"); 797 } 798 799 // initialize the allocated structures 800 initialize_iopm(g); 801 initialize_msrpm(g); 802#ifdef CONFIG_SVM 803 initialize_vmcb(g); 804#endif 805 // spawn the guest domain 806 spawn_guest_domain(g); 807 assert (grub_image != NULL); 808 install_grub_stage2(g, grub_image, grub_image_size); 809 //install_debug_app(g); 810 811 // add virtual hardware 812 g->apic = apic_new(APIC_BASE); 813 g->lpc = lpc_new(virq_handler, virq_pending, 814#ifndef CONFIG_SVM 815 virq_accepting, 816#endif 817 g, g->apic); 818 if (hdd0_image != NULL) { 819 g->hdds[0] = hdd_new_from_memory(hdd0_image, hdd0_image_size); 820 g->hdd_count++; 821 } 822 g->console = console_new(); 823 g->serial_ports[0] = pc16550d_new(0x3f8, 4, g->lpc); 824 825 // FIXME: Which virtual uart port is connected to which host port 826 // should be adjustable from the command line or a configuration 827 // file. 828 pc16550d_attach_to_host_uart(g->serial_ports[0], SERIAL_DRIVER); 829 g->serial_ports[1] = pc16550d_new(0x2f8, 3, g->lpc); 830 g->serial_ports[2] = pc16550d_new(0x3e8, 4, g->lpc); 831 g->serial_ports[3] = pc16550d_new(0x2e8, 3, g->lpc); 832 g->serial_port_count = 4; 833 834 g->pci = pci_new(); 835 init_host_devices(g->pci); 836 837// struct pci_device *ethernet = pci_ethernet_new(g->lpc, g); 838// int r = pci_attach_device(g->pci, 0, 2, ethernet); 839// assert(r == 0); 840// 841// struct pci_device *vmkitmon_eth = pci_vmkitmon_eth_new(g->lpc, g); 842// r = pci_attach_device(g->pci, 0, 3, vmkitmon_eth); 843// assert(r==0); 844 845 // set up bios memory 846 // FIXME: find a modular way to do this 847 *(uint16_t *)guest_to_host(g->mem_low_va + 0x400) = 0x3f8; // COM1 848 *(uint16_t *)guest_to_host(g->mem_low_va + 0x402) = 0x2f8; // COM2 849 850 g->runnable = true; 851} 852 853/** 854 * \brief Create a new guest. 855 * 856 * This function creates a new guest. It will do everything necessary to make 857 * the guest accept images to run. It will create a new domain and assign some 858 * memory to that domain. Afterwards it will load a bios into the memory and 859 * set the guest initial IP to the POST entry of the bios. 860 * 861 * \return The pointer to the newly created structure describing the guest. 862 */ 863struct guest * 864guest_create (void) 865{ 866 // support the allocation of one guest for now 867 assert(__guestp == NULL); 868 __guestp = &__guest; 869 memset(__guestp, 0, sizeof(struct guest)); 870 guest_setup(__guestp); 871 return __guestp; 872} 873 874static int 875run_realmode (struct guest *g) 876{ 877 int r; 878 879 realmode_switch_to(g); 880 r = realmode_exec(); 881 assert(r == REALMODE_ERR_OK); 882 realmode_switch_from(g); 883 884 guest_handle_vmexit(g); 885 886 return 0; 887}; 888 889#ifndef CONFIG_SVM 890// Return true if the "Enable EPT" Secondary Processor-based control is 891// set in the VMCS, else false. 892static inline bool vmx_ept_enabled(struct guest *g) 893{ 894 uint64_t sp_controls; 895 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_EXEC_SEC_PROC, &sp_controls); 896 assert(err_is_ok(err)); 897 return ((sp_controls & SP_CLTS_ENABLE_EPT) != 0); 898} 899 900// Set or clear the "Descriptor-table exiting" Secondary Processor-based 901// control if val is 1 or 0, respectively. 902static inline void vmx_intercept_desc_table_wrf(struct guest *g, int val) 903{ 904 assert(val == 0 || val == 1); 905 906 uint64_t sec_proc_ctrls; 907 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_EXEC_SEC_PROC, &sec_proc_ctrls); 908 if (val) { 909 uint64_t prim_proc_ctrls; 910 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_EXEC_PRIM_PROC, &prim_proc_ctrls); 911 assert(prim_proc_ctrls & PP_CLTS_SEC_CTLS); 912 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXEC_SEC_PROC, 913 sec_proc_ctrls | SP_CLTS_DESC_TABLE); 914 } else { 915 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_EXEC_SEC_PROC, 916 sec_proc_ctrls & ~SP_CLTS_DESC_TABLE); 917 } 918 assert(err_is_ok(err)); 919} 920 921 922// Before entering the guest, synchronize the CR0 shadow with the guest 923// CR0 value that is potentially changed in the real-mode emulator. 924static inline void vmx_set_cr0_shadow(struct guest *g) 925{ 926 uint64_t cr0_shadow; 927 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &cr0_shadow); 928 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_CR0_RD_SHADOW, cr0_shadow); 929 assert(err_is_ok(err)); 930} 931#endif 932 933/** 934 * \brief Marks a guest as runnable. 935 * 936 * A call to this method will update the guest's runnable state and, if made 937 * runnable, yield the remaining time slice to the guest domain. 938 * 939 * \return Zero on success, non-zero on error 940 */ 941errval_t 942guest_make_runnable (struct guest *g, bool run) 943{ 944 assert(g->runnable); 945 946 errval_t err; 947 948 /* If the guest is currently in real mode (CR0.PE flag clear) then we do not 949 * schedule the domain to run the virtualization but run the real-mode 950 * emulation */ 951#ifdef CONFIG_SVM 952 if (UNLIKELY(run && amd_vmcb_cr0_rd(&g->vmcb).pe == 0)) { 953 if (!g->emulated_before_exit) { 954 // do the inverse of the code below 955 amd_vmcb_intercepts_rdgdtr_wrf(&g->vmcb, 1); 956 amd_vmcb_intercepts_wrgdtr_wrf(&g->vmcb, 1); 957 amd_vmcb_intercepts_rdldtr_wrf(&g->vmcb, 1); 958 amd_vmcb_intercepts_wrldtr_wrf(&g->vmcb, 1); 959 amd_vmcb_intercepts_rdidtr_wrf(&g->vmcb, 1); 960 amd_vmcb_intercepts_wridtr_wrf(&g->vmcb, 1); 961 amd_vmcb_intercepts_rdtr_wrf(&g->vmcb, 1); 962 amd_vmcb_intercepts_wrtr_wrf(&g->vmcb, 1); 963 amd_vmcb_cr_access_rdcr0_wrf(&g->vmcb, 1); 964 amd_vmcb_cr_access_wrcr0_wrf(&g->vmcb, 1); 965 amd_vmcb_cr_access_rdcr3_wrf(&g->vmcb, 1); 966 amd_vmcb_cr_access_wrcr3_wrf(&g->vmcb, 1); 967 amd_vmcb_intercepts_intn_wrf(&g->vmcb, 1); 968 969 // mark guest as emulated 970 g->emulated_before_exit = true; 971 } 972#else 973 uint64_t guest_cr0; 974 err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0); 975 assert(err_is_ok(err)); 976 if (UNLIKELY(run && (guest_cr0 & CR0_PE) == 0)) { 977 if (!g->emulated_before_exit) { 978 vmx_intercept_desc_table_wrf(g, 1); 979 g->emulated_before_exit = true; 980 } 981#endif 982#if 0 /* why create a thread for this? it seems fine without! -AB */ 983 struct thread *t = thread_create((thread_func_t)run_realmode, g); 984 assert(t != NULL); 985 err = thread_detach(t); 986 assert(err_is_ok(err)); 987#else 988 run_realmode(g); 989#endif 990 return SYS_ERR_OK; 991 } 992 993 /* every time we move the machine from the emulated to virtualized we need 994 * to adjust some intercepts */ 995 if (UNLIKELY(run && g->emulated_before_exit)) { 996#ifdef CONFIG_SVM 997 // we enforce NP to be enabled (no shadow paging support) 998 assert(amd_vmcb_np_rd(&g->vmcb).enable == 1); 999 1000 // disable GDTR intercept 1001 amd_vmcb_intercepts_rdgdtr_wrf(&g->vmcb, 0); 1002 amd_vmcb_intercepts_wrgdtr_wrf(&g->vmcb, 0); 1003 // disable GDTR intercept 1004 amd_vmcb_intercepts_rdldtr_wrf(&g->vmcb, 0); 1005 amd_vmcb_intercepts_wrldtr_wrf(&g->vmcb, 0); 1006 // disable IDTR intercept 1007 amd_vmcb_intercepts_rdidtr_wrf(&g->vmcb, 0); 1008 amd_vmcb_intercepts_wridtr_wrf(&g->vmcb, 0); 1009 // disable TR intercept 1010 amd_vmcb_intercepts_rdtr_wrf(&g->vmcb, 0); 1011 amd_vmcb_intercepts_wrtr_wrf(&g->vmcb, 0); 1012 // disable non essential CR0 access intercepts_t 1013 amd_vmcb_cr_access_rdcr0_wrf(&g->vmcb, 0); 1014 amd_vmcb_cr_access_wrcr0_wrf(&g->vmcb, 0); 1015 // disable CR3 access intercepts 1016 assert(amd_vmcb_np_rd(&g->vmcb).enable != 0); 1017 amd_vmcb_cr_access_rdcr3_wrf(&g->vmcb, 0); 1018 amd_vmcb_cr_access_wrcr3_wrf(&g->vmcb, 0); 1019 // disable INTn intercept 1020 // we have to be outside of real mode for this to work 1021 assert(amd_vmcb_cr0_rd(&g->vmcb).pe != 0); 1022 amd_vmcb_intercepts_intn_wrf(&g->vmcb, 0); 1023#else 1024 bool ept_enabled = vmx_ept_enabled(g); 1025 assert(ept_enabled); 1026 vmx_intercept_desc_table_wrf(g, 0); 1027 assert(guest_cr0 & CR0_PE); 1028 vmx_set_cr0_shadow(g); 1029#endif 1030 // mark guest as not emulated 1031 g->emulated_before_exit = false; 1032 } 1033 1034 // update the guets domain's runnable state 1035 err = invoke_dispatcher(g->dcb_cap, NULL_CAP, NULL_CAP, NULL_CAP, NULL_CAP, run); 1036 assert_err(err, "dispatcher_make_runnable"); 1037 // yield the dispatcher 1038 if (run) { 1039 thread_yield_dispatcher(NULL_CAP); 1040 } 1041 1042 return SYS_ERR_OK; 1043} 1044 1045/* VMEXIT hanlders */ 1046 1047#define HANDLER_ERR_OK (0) 1048#define HANDLER_ERR_FATAL (-1) 1049 1050#ifdef CONFIG_SVM 1051static int 1052handle_vmexit_unhandeled (struct guest *g) 1053{ 1054 printf("Unhandeled guest vmexit:\n"); 1055 printf(" code:\t %lx\n", amd_vmcb_exitcode_rd(&g->vmcb)); 1056 printf(" info1:\t %lx\n", amd_vmcb_exitinfo1_rd(&g->vmcb)); 1057 printf(" info2:\t %lx\n", amd_vmcb_exitinfo2_rd(&g->vmcb)); 1058 printf(" intinfo: %lx\n", amd_vmcb_exitintinfo_rd(&g->vmcb)); 1059 1060 printf("VMCB save area:\n"); 1061 printf(" cr0:\t%lx\n", amd_vmcb_cr0_rd_raw(&g->vmcb)); 1062 printf(" cr2:\t%lx\n", amd_vmcb_cr2_rd_raw(&g->vmcb)); 1063 printf(" cr3:\t%lx\n", amd_vmcb_cr3_rd_raw(&g->vmcb)); 1064 printf(" cr4:\t%lx\n", amd_vmcb_cr4_rd_raw(&g->vmcb)); 1065 printf(" efer:\t%lx\n", amd_vmcb_efer_rd_raw(&g->vmcb)); 1066 printf(" rip:\t%lx\n", amd_vmcb_rip_rd_raw(&g->vmcb)); 1067 printf(" cs:\tselector %x, base %lx, limit %x, attrib %x\n", 1068 amd_vmcb_cs_selector_rd(&g->vmcb), amd_vmcb_cs_base_rd(&g->vmcb), 1069 amd_vmcb_cs_limit_rd(&g->vmcb), amd_vmcb_cs_attrib_rd_raw(&g->vmcb)); 1070 printf(" ds:\tselector %x, base %lx, limit %x, attrib %x\n", 1071 amd_vmcb_ds_selector_rd(&g->vmcb), amd_vmcb_ds_base_rd(&g->vmcb), 1072 amd_vmcb_ds_limit_rd(&g->vmcb), amd_vmcb_ds_attrib_rd_raw(&g->vmcb)); 1073 printf(" es:\tselector %x, base %lx, limit %x, attrib %x\n", 1074 amd_vmcb_es_selector_rd(&g->vmcb), amd_vmcb_es_base_rd(&g->vmcb), 1075 amd_vmcb_es_limit_rd(&g->vmcb), amd_vmcb_es_attrib_rd_raw(&g->vmcb)); 1076 printf(" ss:\tselector %x, base %lx, limit %x, attrib %x\n", 1077 amd_vmcb_ss_selector_rd(&g->vmcb), amd_vmcb_ss_base_rd(&g->vmcb), 1078 amd_vmcb_ss_limit_rd(&g->vmcb), amd_vmcb_ss_attrib_rd_raw(&g->vmcb)); 1079 printf(" rax:\t%lx\n", amd_vmcb_rax_rd_raw(&g->vmcb)); 1080 printf(" rbx:\t%lx\n", g->ctrl->regs.rbx); 1081 printf(" rcx:\t%lx\n", g->ctrl->regs.rcx); 1082 printf(" rdx:\t%lx\n", g->ctrl->regs.rdx); 1083 printf(" rsi:\t%lx\n", g->ctrl->regs.rsi); 1084 printf(" rdi:\t%lx\n", g->ctrl->regs.rdi); 1085 1086 return HANDLER_ERR_FATAL; 1087} 1088#else 1089static int 1090handle_vmexit_unhandeled (struct guest *g) 1091{ 1092 printf("Unhandeled guest vmexit:\n"); 1093 printf(" exit reason:\t %"PRIu16"\n", saved_exit_reason); 1094 printf(" exit qualification:\t %"PRIx64"\n", saved_exit_qual); 1095 printf(" next rip (I/O instruction):\t %"PRIx64"\n", saved_rip); 1096 1097 uint64_t gpaddr; 1098 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GPADDR_F, &gpaddr); 1099 printf(" guest physical-address:\t %"PRIx64"\n", gpaddr); 1100 1101 uint64_t guest_cr0, guest_cr3, guest_cr4; 1102 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0); 1103 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR3, &guest_cr3); 1104 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR4, &guest_cr4); 1105 1106 uint64_t guest_efer, guest_rip; 1107 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_EFER_F, &guest_efer); 1108 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1109 1110 uint64_t guest_cs_sel, guest_cs_base, guest_cs_lim, 1111 guest_cs_access; 1112 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_SEL, &guest_cs_sel); 1113 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_BASE, &guest_cs_base); 1114 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_LIM, &guest_cs_lim); 1115 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_ACCESS, &guest_cs_access); 1116 1117 uint64_t guest_ds_sel, guest_ds_base, guest_ds_lim, 1118 guest_ds_access; 1119 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_SEL, &guest_ds_sel); 1120 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base); 1121 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_LIM, &guest_ds_lim); 1122 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_ACCESS, &guest_ds_access); 1123 1124 uint64_t guest_es_sel, guest_es_base, guest_es_lim, 1125 guest_es_access; 1126 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_SEL, &guest_es_sel); 1127 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_BASE, &guest_es_base); 1128 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_LIM, &guest_es_lim); 1129 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_ACCESS, &guest_es_access); 1130 1131 uint64_t guest_ss_sel, guest_ss_base, guest_ss_lim, 1132 guest_ss_access; 1133 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_SEL, &guest_ss_sel); 1134 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_BASE, &guest_ss_base); 1135 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_LIM, &guest_ss_lim); 1136 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SS_ACCESS, &guest_ss_access); 1137 assert(err_is_ok(err)); 1138 1139 printf("VMCS save area:\n"); 1140 printf(" cr0:\t%lx\n", guest_cr0); 1141 printf(" cr3:\t%lx\n", guest_cr3); 1142 printf(" cr4:\t%lx\n", guest_cr4); 1143 printf(" efer:\t%lx\n", guest_efer); 1144 printf(" rip:\t%lx\n", guest_rip); 1145 printf(" cs:\tselector %lx, base %lx, limit %lx, access %lx\n", 1146 guest_cs_sel, guest_cs_base, guest_cs_lim, guest_cs_access); 1147 printf(" ds:\tselector %lx, base %lx, limit %lx, access %lx\n", 1148 guest_ds_sel, guest_ds_base, guest_ds_lim, guest_ds_access); 1149 printf(" es:\tselector %lx, base %lx, limit %lx, access %lx\n", 1150 guest_es_sel, guest_es_base, guest_es_lim, guest_es_access); 1151 printf(" ss:\tselector %lx, base %lx, limit %lx, access %lx\n", 1152 guest_ss_sel, guest_ss_base, guest_ss_lim, guest_ss_access); 1153 printf(" rax:\t%lx\n", g->ctrl->regs.rax); 1154 printf(" rbx:\t%lx\n", g->ctrl->regs.rbx); 1155 printf(" rcx:\t%lx\n", g->ctrl->regs.rcx); 1156 printf(" rdx:\t%lx\n", g->ctrl->regs.rdx); 1157 printf(" rsi:\t%lx\n", g->ctrl->regs.rsi); 1158 printf(" rdi:\t%lx\n", g->ctrl->regs.rdi); 1159 1160 return HANDLER_ERR_FATAL; 1161} 1162#endif 1163 1164static inline uint64_t 1165lookup_paddr_long_mode (struct guest *g, uint64_t vaddr) 1166{ 1167 union x86_lm_va va = { .raw = vaddr }; 1168 uint64_t *page_table; 1169 1170 // get a pointer to the pml4 table 1171#ifdef CONFIG_SVM 1172 page_table = (uint64_t *)guest_to_host(amd_vmcb_cr3_rd(&g->vmcb)); 1173#else 1174 uint64_t guest_cr3; 1175 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR3, &guest_cr3); 1176 assert(err_is_ok(err)); 1177 page_table = (uint64_t *)guest_to_host(guest_cr3); 1178#endif 1179 // get pml4 entry 1180 union x86_lm_pml4_entry pml4e = { .raw = page_table[va.u.pml4_idx] }; 1181 assert (pml4e.u.p == 1); 1182 1183 // get a pointer to the pdp table 1184 page_table = (uint64_t *)guest_to_host(pml4e.u.pdp_base_pa << 12); 1185 // get pdp entry 1186 union x86_lm_pdp_entry pdpe = { .raw = page_table[va.u.pdp_idx] }; 1187 assert(pdpe.u.p == 1); 1188 // check for 1GB page (PS bit set) 1189 if (pdpe.u.ps == 1) { 1190 return (pdpe.u1gb.base_pa << 30) | va.u1gb.pa_offset; 1191 } 1192 1193 // get a pointer to the pd table 1194 page_table = (uint64_t *)guest_to_host(pdpe.u.pd_base_pa << 12); 1195 // get pd entry 1196 union x86_lm_pd_entry pde = { .raw = page_table[va.u.pd_idx] }; 1197 if (pde.u.p == 0) { 1198 printf("g2h %lx, pml4e %p %lx, pdpe %p %lx, pde %p %lx\n", 1199 guest_to_host(0), &pml4e, pml4e.raw, &pdpe, pdpe.raw, &pde, pde.raw); 1200 } 1201 assert(pde.u.p == 1); 1202 // check for 2MB page (PS bit set) 1203 if (pde.u.ps == 1) { 1204 return (pde.u2mb.base_pa << 21) | va.u2mb.pa_offset; 1205 } 1206 1207 // get a pointer to the page table 1208 page_table = (uint64_t *)guest_to_host(pde.u.pt_base_pa << 12); 1209 // get the page table entry 1210 union x86_lm_pt_entry pte = { .raw = page_table[va.u.pt_idx] }; 1211 assert(pte.u.p == 1); 1212 1213 return (pte.u.base_pa << 12) | va.u.pa_offset; 1214} 1215 1216static inline uint32_t 1217lookup_paddr_legacy_mode (struct guest *g, uint32_t vaddr) 1218{ 1219// printf("lookup_paddr_legacy_mode enter\n"); 1220 // PAE not supported 1221#ifdef CONFIG_SVM 1222 guest_assert(g, amd_vmcb_cr4_rd(&g->vmcb).pae == 0); 1223#else 1224 uint64_t guest_cr4; 1225 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR4, &guest_cr4); 1226 guest_assert(g, (guest_cr4 & CR4_PAE) == 0); 1227#endif 1228 union x86_legm_va va = { .raw = vaddr }; 1229 uint32_t *page_table; 1230 1231 // get a pointer to the pd table 1232#ifdef CONFIG_SVM 1233 page_table = (uint32_t *)guest_to_host(amd_vmcb_cr3_rd(&g->vmcb)); 1234#else 1235 uint64_t guest_cr3; 1236 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR3, &guest_cr3); 1237 assert(err_is_ok(err)); 1238 page_table = (uint32_t *)guest_to_host(guest_cr3); 1239#endif 1240 1241 // get pd entry 1242 union x86_legm_pd_entry pde = { .raw = page_table[va.u.pd_idx] }; 1243 assert (pde.u.p == 1); 1244 // check for 4MB page (PS bit set) 1245 if (pde.u.ps == 1) { 1246 return (pde.u4mb.base_pa << 22) | va.u4mb.pa_offset; 1247 } 1248 1249 // get a pointer to the page table 1250 page_table = (uint32_t *)guest_to_host(pde.u.pt_base_pa << 12); 1251 // get the page table entry 1252 union x86_legm_pt_entry pte = { .raw = page_table[va.u.pt_idx] }; 1253 assert(pte.u.p == 1); 1254 1255 return (pte.u.base_pa << 12) | va.u.pa_offset; 1256} 1257 1258// retunrs a pointer to a byte array starting at the current instruction 1259static inline int 1260get_instr_arr (struct guest *g, uint8_t **arr) 1261{ 1262#ifdef CONFIG_SVM 1263 if (UNLIKELY(amd_vmcb_cr0_rd(&g->vmcb).pg == 0)) { 1264#else 1265 uint64_t guest_cr0; 1266 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0); 1267 if (UNLIKELY((guest_cr0 & CR0_PG) == 0)) { 1268#endif 1269 //printf("Segmentation active!\n"); 1270 // without paging 1271 // take segmentation into account 1272#ifdef CONFIG_SVM 1273 *arr = (uint8_t *)(guest_to_host(g->mem_low_va) + 1274 amd_vmcb_cs_base_rd(&g->vmcb) + 1275 amd_vmcb_rip_rd(&g->vmcb)); 1276#else 1277 uint64_t guest_cs_base, guest_rip; 1278 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_BASE, &guest_cs_base); 1279 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1280 *arr = (uint8_t *)(guest_to_host(g->mem_low_va) + 1281 guest_cs_base + guest_rip); 1282#endif 1283 } else { 1284 // with paging 1285#ifdef CONFIG_SVM 1286 if (amd_vmcb_efer_rd(&g->vmcb).lma == 1) { 1287#else 1288 uint64_t guest_efer; 1289 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_EFER_F, &guest_efer); 1290 if (guest_efer & EFER_LMA) { 1291#endif 1292 // long mode 1293#ifdef CONFIG_SVM 1294 if (amd_vmcb_cs_attrib_rd(&g->vmcb).l == 1) { 1295 // 64-bit mode 1296 *arr = (uint8_t *)guest_to_host(lookup_paddr_long_mode(g, 1297 amd_vmcb_rip_rd(&g->vmcb))); 1298#else 1299 uint64_t cs_access_rights, guest_rip; 1300 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CS_ACCESS, &cs_access_rights); 1301 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1302 if (cs_access_rights & ACCESS_RIGHTS_LONG_MODE) { 1303 *arr = (uint8_t *)guest_to_host(lookup_paddr_long_mode(g, 1304 guest_rip)); 1305#endif 1306 } else { 1307 // cmpatibility mode 1308 guest_assert(g, !"compatiblity mode not supported yet"); 1309 } 1310 } else { 1311 // Legacy (aka. Paged Protected) Mode 1312#ifdef CONFIG_SVM 1313 assert(amd_vmcb_cr0_rd(&g->vmcb).pe == 1); 1314 1315 *arr = (uint8_t *)guest_to_host(lookup_paddr_legacy_mode(g, 1316 amd_vmcb_rip_rd(&g->vmcb))); 1317#else 1318 assert(guest_cr0 & CR0_PE); 1319 1320 uint64_t guest_rip; 1321 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1322 *arr = (uint8_t *)guest_to_host(lookup_paddr_legacy_mode(g, 1323 guest_rip)); 1324#endif 1325 } 1326 } 1327#ifndef CONFIG_SVM 1328 assert(err_is_ok(err)); 1329#endif 1330 return HANDLER_ERR_OK; 1331} 1332 1333static inline uint64_t 1334get_reg_val_by_reg_num (struct guest *g, uint8_t regnum) { 1335 switch (regnum) { 1336 case 0: 1337 return guest_get_rax(g); 1338 case 1: 1339 return guest_get_rcx(g); 1340 case 2: 1341 return guest_get_rdx(g); 1342 case 3: 1343 return guest_get_rbx(g); 1344 case 4: 1345 return guest_get_rsp(g); 1346 case 5: 1347 return guest_get_rbp(g); 1348 case 6: 1349 return guest_get_rsi(g); 1350 case 7: 1351 return guest_get_rdi(g); 1352 default: 1353 assert(!"not reached"); 1354 return 0; 1355 } 1356} 1357 1358static inline void 1359set_reg_val_by_reg_num (struct guest *g, uint8_t regnum, uint64_t val) { 1360 switch (regnum) { 1361 case 0: 1362 guest_set_rax(g, val); 1363 break; 1364 case 1: 1365 guest_set_rcx(g, val); 1366 break; 1367 case 2: 1368 guest_set_rdx(g, val); 1369 break; 1370 case 3: 1371 guest_set_rbx(g, val); 1372 break; 1373 case 4: 1374 guest_set_rsp(g, val); 1375 break; 1376 case 5: 1377 guest_set_rbp(g, val); 1378 break; 1379 case 6: 1380 guest_set_rsi(g, val); 1381 break; 1382 case 7: 1383 guest_set_rdi(g, val); 1384 break; 1385 default: 1386 assert(!"not reached"); 1387 break; 1388 } 1389} 1390 1391static int 1392handle_vmexit_cr_access (struct guest *g) 1393{ 1394 int r; 1395 uint8_t *code = NULL; 1396#ifndef CONFIG_SVM 1397 errval_t err = 0; 1398 if (g->emulated_before_exit) { 1399 assert(saved_exit_reason == VMX_EXIT_REASON_CR_ACCESS); 1400 assert(((saved_exit_qual >> 0) & 0xf) == 0); 1401 } 1402#endif 1403 // fetch the location to the code 1404 r = get_instr_arr(g, &code); 1405 if (r != HANDLER_ERR_OK) { 1406 return r; 1407 } 1408 assert(code != NULL); 1409 1410 assert(code[0] == 0x0f && (code[1] == 0x20 || code[1] == 0x22)); 1411 1412 uint64_t val; 1413 bool read = (code[1] == 0x20); 1414 union x86_modrm mod; 1415 mod.raw = code[2]; 1416 1417 // FIXME: use proper exception 1418 assert(mod.u.mod == 3); 1419 1420 // source 1421 if (read) { 1422 // read from CR 1423 switch (mod.u.regop) { 1424 case 0: 1425#ifdef CONFIG_SVM 1426 val = amd_vmcb_cr0_rd_raw(&g->vmcb); 1427#else 1428 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &val); 1429#endif 1430 break; 1431 default: 1432 printf("CR access: unknown CR source register\n"); 1433 return handle_vmexit_unhandeled(g); 1434 } 1435 } else { 1436 // read from GPR 1437 val = get_reg_val_by_reg_num(g, mod.u.rm); 1438 } 1439 1440 // destination 1441 if (read) { 1442 // write to GPR 1443 switch (mod.u.rm) { 1444 case 0: 1445 guest_set_rax(g, val); 1446 break; 1447 case 1: 1448 guest_set_rcx(g, val); 1449 break; 1450 case 2: 1451 guest_set_rdx(g, val); 1452 break; 1453 case 3: 1454 guest_set_rbx(g, val); 1455 break; 1456 default: 1457 printf("CR access: unknown GPR destination register\n"); 1458 return handle_vmexit_unhandeled(g); 1459 } 1460 } else { 1461 // write to CR 1462 switch (mod.u.regop) { 1463 case 0: 1464#ifdef CONFIG_SVM 1465 amd_vmcb_cr0_wr_raw(&g->vmcb, val); 1466#else 1467 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_CR0, val); 1468#endif 1469 break; 1470 1471 case 4: 1472 // ignore writing to CR4 1473 // allow writing to CR4 by do nothing for this case 1474 break; 1475 default: 1476 printf("CR access: unknown CR destination register\n"); 1477 return handle_vmexit_unhandeled(g); 1478 } 1479 } 1480 1481 // advance the rip beyond the instruction 1482#ifdef CONFIG_SVM 1483 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 3); 1484#else 1485 uint64_t guest_rip; 1486 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1487 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 3); 1488 assert(err_is_ok(err)); 1489#endif 1490 return HANDLER_ERR_OK; 1491} 1492 1493static int 1494handle_vmexit_ldt (struct guest *g) 1495{ 1496 int r; 1497 uint8_t *code = NULL; 1498 uint8_t *mem; 1499 1500 // this handler supports only real-mode 1501#ifdef CONFIG_SVM 1502 assert(amd_vmcb_cr0_rd(&g->vmcb).pe == 0); 1503#else 1504 uint64_t guest_cr0; 1505 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0); 1506 assert((guest_cr0 & CR0_PE) == 0); 1507#endif 1508 // fetch the location to the code 1509 r = get_instr_arr(g, &code); 1510 if (r != HANDLER_ERR_OK) { 1511 return r; 1512 } 1513 mem = (uint8_t *)guest_to_host(g->mem_low_va); 1514 assert(code != NULL); 1515 1516 assert (code[0] == 0x0f && code[1] == 0x01); 1517 1518 // check for relevant instruction prefixes 1519 bool addr32 = code[-2] == 0x67 || code[-1] == 0x67; 1520 bool op32 = code[-2] == 0x66 || code[-1] == 0x66; 1521 // fetch modrm 1522 union x86_modrm modrm = { .raw = code[2] }; 1523 1524 assert(modrm.u.regop == 2 || modrm.u.regop == 3); 1525 guest_assert(g, op32); 1526 1527 uint32_t addr; 1528 if (addr32) { 1529 // byte 3-6 hold a 32 bit address to a mem location where the first word 1530 // holds the limit and the following dword holds the base 1531 addr = *(uint32_t *)&code[3]; 1532 } else { 1533 // byte 3-4 hold a 16 bit address to a mem location where the first word 1534 // holds the limit and the following dword holds the base 1535 // this address is relative to DS base 1536#ifdef CONFIG_SVM 1537 addr = *(uint16_t *)&code[3] + amd_vmcb_ds_base_rd(&g->vmcb); 1538#else 1539 uint64_t guest_ds_base; 1540 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base); 1541 addr = *(uint16_t *)&code[3] + guest_ds_base; 1542#endif 1543 } 1544 1545 // santity check on the addr 1546 // FIXME: raise a proper exception 1547 if (addr > g->mem_high_va) { 1548 printf("Memory access beyond physical address space\n"); 1549 return HANDLER_ERR_FATAL; 1550 } 1551 1552 // load the actual register 1553 if (modrm.u.regop == 2) { 1554 // LGDT 1555#ifdef CONFIG_SVM 1556 amd_vmcb_gdtr_limit_wr(&g->vmcb, *(uint16_t*)(mem + addr)); 1557 amd_vmcb_gdtr_base_wr(&g->vmcb, *(uint32_t*)(mem + addr + 2)); 1558#else 1559 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_GDTR_LIM, 1560 *(uint16_t*)(mem + addr)); 1561 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_GDTR_BASE, 1562 *(uint32_t*)(mem + addr + 2)); 1563#endif 1564 1565 } else if (modrm.u.regop == 3) { 1566 // LIDT 1567#ifdef CONFIG_SVM 1568 amd_vmcb_idtr_limit_wr(&g->vmcb, *(uint16_t*)(mem + addr)); 1569 amd_vmcb_idtr_base_wr(&g->vmcb, *(uint32_t*)(mem + addr + 2)); 1570#else 1571 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_IDTR_LIM, 1572 *(uint16_t*)(mem + addr)); 1573 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_IDTR_BASE, 1574 *(uint32_t*)(mem + addr + 2)); 1575#endif 1576 } else { 1577 assert(!"not reached"); 1578 } 1579 1580 // advance the rip beyond the instruction 1581#ifdef CONFIG_SVM 1582 if (addr32) { 1583 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 7); 1584 } else { 1585 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 5); 1586 } 1587#else 1588 uint64_t guest_rip; 1589 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 1590 if (addr32) { 1591 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 7); 1592 } else { 1593 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 5); 1594 } 1595 assert(err_is_ok(err)); 1596#endif 1597 return HANDLER_ERR_OK; 1598} 1599 1600#ifndef CONFIG_SVM 1601static inline void vmx_vmcs_rflags_cf_wrf(struct guest *g, int val) { 1602 assert(val == 0 || val == 1); 1603 uint64_t guest_rflags; 1604 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags); 1605 if (val) { 1606 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RFLAGS, 1607 guest_rflags | RFLAGS_CF); 1608 } else { 1609 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RFLAGS, 1610 guest_rflags & (~RFLAGS_CF)); 1611 } 1612 assert(err_is_ok(err)); 1613} 1614#endif 1615 1616static int 1617handle_vmexit_swint (struct guest *g) 1618{ 1619 int r; 1620 uint8_t *code = NULL; 1621 1622 r = get_instr_arr(g, &code); 1623 if (r != HANDLER_ERR_OK) { 1624 return r; 1625 } 1626 assert (code != NULL); 1627 1628 // check for correct instruciton 1629 assert(code[0] == 0xcd); 1630 1631 // the number of the interrupt is followed by the INT (0xcd) opcode 1632 uint8_t int_num = code[1]; 1633 1634 // check whether the guest is in real mode 1635#ifdef CONFIG_SVM 1636 if (amd_vmcb_cr0_rd(&g->vmcb).pe == 0) { 1637#else 1638 uint64_t guest_ds_base, es_guest_base; 1639 uint64_t guest_cr0, guest_rip; 1640 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_CR0, &guest_cr0); 1641 if ((guest_cr0 & CR0_PE) == 0) { 1642#endif 1643 // in real mode the interrupts starting at 10 have different meaning 1644 // examine the sw interrupt 1645 switch (int_num) { 1646 case 0x10: 1647 r = console_handle_int10(g->console, g); 1648 if (r != HANDLER_ERR_OK) { 1649 printf("Unhandeled method on INT 0x10\n"); 1650 return handle_vmexit_unhandeled(g); 1651 } 1652 break; 1653 case 0x12: 1654 switch (guest_get_ax(g)) { 1655 case 0: // GET MEMORY SIZE 1656 // our VM always has 1MB of base memory 1657 // AX holds the amount of 1KB memory blocks starting at 1658 // addr 0 which is 640 (640 KiB) 1659 guest_set_ax(g, 640); 1660 break; 1661 default: 1662 printf("Unhandeled method on INT 0x12\n"); 1663 return handle_vmexit_unhandeled(g); 1664 } 1665 break; 1666 case 0x13: 1667 // Bootable CD-ROM - GET STATUS 1668 if (guest_get_ax(g) == 0x4b01) { 1669 // no cdrom support 1670#ifdef CONFIG_SVM 1671 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1672#else 1673 vmx_vmcs_rflags_cf_wrf(g, 1); 1674#endif 1675 } 1676 // DISK RESET 1677 else if (guest_get_ah(g) == 0) { 1678 for (int i = 0; i < g->hdd_count; i++) { 1679 hdd_reset(g->hdds[i]); 1680 } 1681 } 1682 // DISK - GET DRIVE PARAMETERS (PC,XT286,CONV,PS,ESDI,SCSI) 1683 else if (guest_get_ah(g) == 0x08) { 1684 uint8_t dl = guest_get_dl(g); 1685 1686 // only respond to installed hard disks 1687 if ((dl >> 7) && ((dl & 0x7f) < g->hdd_count)) { 1688 uint16_t c; 1689 uint8_t h, s; 1690 1691 r = hdd_get_geometry_chs(g->hdds[dl & 0x7f], &c, &h, &s); 1692 assert(r == 0); 1693 1694 // set some return values for success 1695 guest_set_ah(g, 0); 1696#ifdef CONFIG_SVM 1697 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1698#else 1699 vmx_vmcs_rflags_cf_wrf(g, 0); 1700#endif 1701 guest_set_bl(g, 0); 1702 // store the geometry into the correct registers 1703 guest_set_cx(g, c << 6 | (s & 0x3f)); 1704 guest_set_dh(g, h); 1705 guest_set_dl(g, g->hdd_count); 1706 } else { 1707#ifdef CONFIG_SVM 1708 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1709#else 1710 vmx_vmcs_rflags_cf_wrf(g, 1); 1711#endif 1712 // it is not really clear to me what ah should contain 1713 // when the drive is not present, so set it to FF 1714 guest_set_ah(g, 1); 1715 } 1716 } 1717 // INT 13 Extensions - INSTALLATION CHECK 1718 else if (guest_get_ah(g) == 0x41 && guest_get_bx(g) == 0x55aa) { 1719#ifdef CONFIG_SVM 1720 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1721#else 1722 vmx_vmcs_rflags_cf_wrf(g, 0); 1723#endif 1724 guest_set_bx(g, 0xaa55); 1725 guest_set_ah(g, 0x01); // Drive extensions 1.x 1726 guest_set_al(g, 0); 1727 guest_set_cx(g, 0x5); 1728 } 1729 // IBM/MS INT 13 Extensions - EXTENDED READ 1730 else if (guest_get_ah(g) == 0x42) { 1731 uint8_t dl = guest_get_dl(g); 1732 1733 // only respond to installed hard disks 1734 if ((dl >> 7) && ((dl & 0x7f) < g->hdd_count)) { 1735#ifdef CONFIG_SVM 1736 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1737#else 1738 vmx_vmcs_rflags_cf_wrf(g, 0); 1739#endif 1740 guest_set_ah(g, 0); 1741 1742 struct disk_access_block { 1743 uint8_t size; 1744 uint8_t reserved; 1745 uint16_t count; 1746 // pointer to the data buffer formated like 1747 // SEGMENT:ADDRESS 1748 uint32_t transfer_buffer; 1749 uint64_t abs_block_number; 1750 } __attribute__ ((packed)); 1751 1752 // memory location of the disk access block 1753#ifdef CONFIG_SVM 1754 uintptr_t mem = guest_to_host(g->mem_low_va) + 1755 amd_vmcb_ds_base_rd(&g->vmcb) + 1756 guest_get_si(g); 1757#else 1758 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base); 1759 uintptr_t mem = guest_to_host(g->mem_low_va) + 1760 guest_ds_base + guest_get_si(g); 1761#endif 1762 1763 struct disk_access_block *dap = (void *)mem; 1764 1765 if (dap->size < 0x10) { 1766#ifdef CONFIG_SVM 1767 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1768#else 1769 vmx_vmcs_rflags_cf_wrf(g, 1); 1770#endif 1771 guest_set_ah(g, 1); 1772 } else { 1773 // dap->transfer buffer points to a real-mode segment 1774 // resolve it according to that rules 1775 mem = guest_to_host(g->mem_low_va) + 1776 ((dap->transfer_buffer >> 16) << 4) + 1777 (dap->transfer_buffer & 0xffff); 1778 1779 size_t count = dap->count; 1780 r = hdd_read_blocks(g->hdds[dl & 0x7f], 1781 dap->abs_block_number, 1782 &count, mem); 1783 dap->count = count; 1784 1785 if (r != HANDLER_ERR_OK) { 1786#ifdef CONFIG_SVM 1787 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1788#else 1789 vmx_vmcs_rflags_cf_wrf(g, 1); 1790#endif 1791 guest_set_ah(g, 1); 1792 } 1793 } 1794 } else { 1795#ifdef CONFIG_SVM 1796 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1797#else 1798 vmx_vmcs_rflags_cf_wrf(g, 1); 1799#endif 1800 // it is not really clear to me what ah should contain 1801 // when the drive is not present, so set it to FF 1802 guest_set_ah(g, 1); 1803 } 1804 } 1805 // IBM/MS INT 13 Extensions - GET DRIVE PARAMETERS 1806 else if (guest_get_ah(g) == 0x48) { 1807 uint8_t dl = guest_get_dl(g); 1808 1809 // only respond to installed hard disks 1810 if ((dl >> 7) && ((dl & 0x7f) < g->hdd_count)) { 1811 // structure to hold drive info 1812 struct drive_params { 1813 uint16_t size; 1814 uint16_t flags; 1815 uint32_t cylinders; 1816 uint32_t heads; 1817 uint32_t sectors; 1818 uint64_t total_sectors; 1819 uint16_t bytes_per_sector; 1820 } __attribute__ ((packed)); 1821 1822 // memory where the drive info shall be stored 1823#ifdef CONFIG_SVM 1824 uintptr_t mem = guest_to_host(g->mem_low_va) + 1825 amd_vmcb_ds_base_rd(&g->vmcb) + 1826 guest_get_si(g); 1827#else 1828 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_DS_BASE, &guest_ds_base); 1829 uintptr_t mem = guest_to_host(g->mem_low_va) + 1830 guest_ds_base + guest_get_si(g); 1831#endif 1832 1833 struct drive_params *drp = (void *)mem; 1834 1835 // sanity check 1836 if (drp->size < sizeof(struct drive_params)) { 1837#ifdef CONFIG_SVM 1838 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1839#else 1840 vmx_vmcs_rflags_cf_wrf(g, 1); 1841#endif 1842 } else { 1843#ifdef CONFIG_SVM 1844 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1845#else 1846 vmx_vmcs_rflags_cf_wrf(g, 0); 1847#endif 1848 guest_set_ah(g, 0); 1849 1850 drp->size = sizeof(struct drive_params); 1851 // CHS invalid, no removable drive, etc 1852 drp->flags = 0; 1853 drp->cylinders = 0; 1854 drp->heads = 0; 1855 drp->sectors = 0; 1856 drp->total_sectors = hdd_get_blocks_count( 1857 g->hdds[dl & 0x7f]); 1858 drp->bytes_per_sector = 512; // FIXME: Hardcoded 1859 } 1860 } else { 1861#ifdef CONFIG_SVM 1862 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1863#else 1864 vmx_vmcs_rflags_cf_wrf(g, 1); 1865#endif 1866 // it is not really clear to me what ah should contain 1867 // when the drive is not present, so set it to FF 1868 guest_set_ah(g, 0x1); 1869 } 1870 } else { 1871 printf("Unhandeled method on INT 0x13\n"); 1872 return handle_vmexit_unhandeled(g); 1873 } 1874 break; 1875 case 0x15: 1876 // ENABLE A20 GATE 1877 if (guest_get_ax(g) == 0x2401) { 1878 g->a20_gate_enabled = true; 1879#ifdef CONFIG_SVM 1880 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1881#else 1882 vmx_vmcs_rflags_cf_wrf(g, 0); 1883#endif 1884 guest_set_ah(g, 0); 1885 } 1886 // APM INSTALLATION CHECK 1887 else if (guest_get_ax(g) == 0x5300) { 1888 // we do not support APM - set carry flag to indicate error 1889#ifdef CONFIG_SVM 1890 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1891#else 1892 vmx_vmcs_rflags_cf_wrf(g, 1); 1893#endif 1894 } 1895 // APM DISCONNECT 1896 else if (guest_get_ax(g) == 0x5304) { 1897 // we do not support APM - set carry flag to indicate error 1898#ifdef CONFIG_SVM 1899 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1900#else 1901 vmx_vmcs_rflags_cf_wrf(g, 1); 1902#endif 1903 } 1904 // GET MEMORY SIZE FOR >64M CONFIGURATIONS 1905 else if (guest_get_ax(g) == 0xe801) { 1906 // we do not support this BIOS call 1907 // both grub and linux may also use the 0xe820 call 1908#ifdef CONFIG_SVM 1909 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1910#else 1911 vmx_vmcs_rflags_cf_wrf(g, 1); 1912#endif 1913 } 1914 // GET SYSTEM MEMORY MAP 1915 // EDX has to contain 0x534d4150 (== 'SMAP') 1916 else if (guest_get_ax(g) == 0xe820 && 1917 guest_get_edx(g) == 0x534d4150) { 1918 // for now we return only one entry containing the real mem 1919 if (guest_get_ebx(g) > 1 || guest_get_ecx(g) < 20) { 1920 // wrong input params -> report error 1921#ifdef CONFIG_SVM 1922 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1923#else 1924 vmx_vmcs_rflags_cf_wrf(g, 1); 1925#endif 1926 } else { 1927 // taken from http://www.ctyme.com/intr/rb-1741.htm 1928#ifdef CONFIG_SVM 1929 uintptr_t addr = guest_to_host(g->mem_low_va) + 1930 amd_vmcb_es_base_rd(&g->vmcb) + 1931 guest_get_di(g); 1932#else 1933 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_ES_BASE, &es_guest_base); 1934 uintptr_t addr = guest_to_host(g->mem_low_va) + 1935 es_guest_base + guest_get_di(g); 1936#endif 1937 // set EAX to 'SMAP' 1938 guest_set_eax(g, 0x534D4150); 1939 // returned bytes (always 20) 1940 guest_set_ecx(g, 20); 1941 1942 switch (guest_get_ebx(g)) { 1943 case 0x0: 1944 // base memory 1945 assert(g->mem_low_va == 0); 1946 // base address 1947 *(uint64_t *)addr = 0; 1948 // size of the memory block 1949 *(uint64_t *)(addr + 8) = 0xa0000; // 640 KiB 1950 // mem type, 1 == "memory, available to the OS" 1951 *(uint32_t *)(addr + 16) = 1; 1952 // indicate that there is more data 1953 guest_set_ebx(g, 1); 1954 break; 1955 case 0x1: 1956 // extended memory 1957 assert(g->mem_high_va > 0x100000); 1958 // base address 1959 *(uint64_t *)addr = 0x100000; // 1 MiB 1960 // size of the memory block 1961 *(uint64_t *)(addr + 8) = g->mem_high_va - 0x100000; 1962 // mem type, 1 == "memory, available to the OS" 1963 *(uint32_t *)(addr + 16) = 1; 1964 // indicate that there is no more data 1965 guest_set_ebx(g, 0); 1966 break; 1967 default: 1968 assert(!"not reached"); 1969 break; 1970 } 1971 1972 // mark success 1973#ifdef CONFIG_SVM 1974 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 1975#else 1976 vmx_vmcs_rflags_cf_wrf(g, 0); 1977#endif 1978 } 1979 } 1980 // SYSTEM - Get Intel SpeedStep (IST) information 1981 else if (guest_get_ax(g) == 0xe980) { 1982 // not supportet yet 1983#ifdef CONFIG_SVM 1984 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1985#else 1986 vmx_vmcs_rflags_cf_wrf(g, 1); 1987#endif 1988 } 1989 // SYSTEM - GET CONFIGURATION (XT >1986/1/10,AT mdl 3x9, 1990 // CONV,XT286,PS) 1991 // GRUB BUG: it puts 0xc0 into AX instead of AH 1992 else if (guest_get_ax(g) == 0xc0) { 1993 // we do not support this 1994#ifdef CONFIG_SVM 1995 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 1996#else 1997 vmx_vmcs_rflags_cf_wrf(g, 1); 1998#endif 1999 guest_set_ah(g, 0x80); 2000 } 2001 // GET EXTENDED MEMORY SIZE 2002 else if (guest_get_ah(g) == 0x88) { 2003 // calculate number of 1KB chunks starting from 1MB but not 2004 // beyond 16MB 2005 assert(((g->mem_high_va - g->mem_low_va) & 0x3ff) == 0); 2006 guest_set_ax(g, MIN(0x3c00 /* 16MB */, 2007 (g->mem_high_va - g->mem_low_va) / 1024)); 2008 // indicate no error occured 2009#ifdef CONFIG_SVM 2010 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 2011#else 2012 vmx_vmcs_rflags_cf_wrf(g, 0); 2013#endif 2014 } 2015 // SYSTEM - GET CONFIGURATION (XT >1986/1/10,AT mdl 3x9, 2016 // CONV,XT286,PS) 2017 else if (guest_get_ah(g) == 0xc0) { 2018 // we do not support this 2019#ifdef CONFIG_SVM 2020 amd_vmcb_rflags_cf_wrf(&g->vmcb, 1); 2021#else 2022 vmx_vmcs_rflags_cf_wrf(g, 1); 2023#endif 2024 guest_set_ah(g, 0x80); 2025 // SYSTEM - SET BIOS MODE 2026 } else if (guest_get_ah(g) == 0xec) { 2027 // I do no really know the use of this bios call and linux 2028 // expects no action what so ever 2029 } else { 2030 printf("Unhandeled method on INT 0x15\n"); 2031 return handle_vmexit_unhandeled(g); 2032 } 2033 break; 2034 case 0x16: 2035 // KEYBOARD - SET TYPEMATIC RATE AND DELAY 2036 if (guest_get_ah(g) == 0x3) { 2037 // ignore this 2038 } else if (guest_get_ah(g) == 0x2) { 2039 // Return keyboard flags 2040 guest_set_al(g, 0x0); 2041 } else { 2042 printf("Unhandeled method on INT 0x16\n"); 2043 return handle_vmexit_unhandeled(g); 2044 } 2045 break; 2046 case 0x1a: 2047 // TIME - GET REAL-TIME CLOCK TIME (AT,XT286,PS) 2048 if (guest_get_ah(g) == 0x2) { 2049 uint8_t h, m, s; 2050 lpc_rtc_get_time_bcd(g->lpc, &h, &m, &s); 2051 guest_set_ch(g, h); 2052 guest_set_cl(g, m); 2053 guest_set_dh(g, s); 2054 guest_set_dl(g, 0); 2055 // mark success 2056#ifdef CONFIG_SVM 2057 amd_vmcb_rflags_cf_wrf(&g->vmcb, 0); 2058#else 2059 vmx_vmcs_rflags_cf_wrf(g, 0); 2060#endif 2061 } else { 2062 printf("Unhandeled method on INT 0x1a\n"); 2063 return handle_vmexit_unhandeled(g); 2064 } 2065 break; 2066 default: 2067 printf("handle_vmexit_swint: Unhandeled real-mode interrupt " 2068 "0x%x (%d).\n", int_num, int_num); 2069 return handle_vmexit_unhandeled(g); 2070 } 2071 } else { 2072 printf("vmkitmon: encountered INT instruction outside real mode\n"); 2073 return handle_vmexit_unhandeled(g); 2074 } 2075 2076 // advance the rip beyond the instruction 2077#ifdef CONFIG_SVM 2078 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2); 2079#else 2080 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2081 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2); 2082 assert(err_is_ok(err)); 2083#endif 2084 return HANDLER_ERR_OK; 2085} 2086 2087static inline enum opsize 2088io_access_size_to_opsize (enum x86_io_access io) 2089{ 2090 if (io & X86_IO_ACCESS_SZ8) { 2091 return OPSIZE_8; 2092 } else if (io & X86_IO_ACCESS_SZ16) { 2093 return OPSIZE_16; 2094 } else if (io & X86_IO_ACCESS_SZ32) { 2095 return OPSIZE_32; 2096 } else { 2097 assert(!"NYI"); 2098 return 0; 2099 } 2100} 2101 2102static int 2103handle_vmexit_ioio (struct guest *g) 2104{ 2105 int r; 2106#ifdef CONFIG_SVM 2107 uint64_t info1 = amd_vmcb_exitinfo1_rd(&g->vmcb); 2108 enum x86_io_access io; 2109 uint16_t port = info1 >> 16; 2110#else 2111 errval_t err = 0; 2112 if (!g->emulated_before_exit) { 2113 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_EXIT_QUAL, &saved_exit_qual); 2114 uint64_t instr_len, guest_rip; 2115 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_EXIT_INSTR_LEN, &instr_len); 2116 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2117 saved_rip = guest_rip + instr_len; 2118 } 2119 uint16_t port = (saved_exit_qual >> 16) & 0xffff; 2120#endif 2121 bool write; 2122 enum opsize size; 2123 uint32_t val; 2124 bool newapi = false; // needed as a transition 2125 2126#ifdef CONFIG_SVM 2127 // copy the access flags 2128 // FIXME: this severely exploits the way the x86_io_access flags are set up 2129 io = (info1 >> 1); 2130 io |= info1 & SVM_IOIO_TYPE_MASK; 2131 2132 // gather some params for the io access 2133 write = (io & X86_IO_ACCESS_TYPE) == 0; 2134 size = OPSIZE_8; // make gcc happy 2135 if (io & X86_IO_ACCESS_SZ8) { 2136 size = OPSIZE_8; 2137 } else if (io & X86_IO_ACCESS_SZ16) { 2138 size = OPSIZE_16; 2139 } else if (io & X86_IO_ACCESS_SZ32) { 2140 size = OPSIZE_32; 2141 } 2142#else 2143 write = ((saved_exit_qual >> 3) & 0x1) == 0; 2144 size = OPSIZE_8; 2145 if ((saved_exit_qual & 0x7) == 0) { 2146 size = OPSIZE_8; 2147 } else if ((saved_exit_qual & 0x7) == 1) { 2148 size = OPSIZE_16; 2149 } else if ((saved_exit_qual & 0x7) == 3) { 2150 size = OPSIZE_32; 2151 } else { 2152 assert(!"Invalid size of access value"); 2153 } 2154#endif 2155 // fetch the source val if neccessary 2156 if (write) { 2157 switch (size) { 2158 case OPSIZE_8: 2159 val = guest_get_al(g); 2160 break; 2161 case OPSIZE_16: 2162 val = guest_get_ax(g); 2163 break; 2164 case OPSIZE_32: 2165 val = guest_get_eax(g); 2166 break; 2167 default: 2168 assert(!"not reached"); 2169 break; 2170 } 2171 } 2172 2173 // assign the request to the corresponding subsystem 2174 switch (port) { 2175 // LPC devices 2176 case 0x20: // primary PIC 2177 case 0x21: // primary PIC 2178 case 0x40: // Timer 2179 case 0x41: // Timer 2180 case 0x42: // Timer 2181 case 0x43: // Timer 2182 case 0x61: // NMI Controller 2183 case 0x70: // RTC 2184 case 0x71: // RTC 2185 case 0x72: // RTC 2186 case 0x73: // RTC 2187 case 0x74: // RTC 2188 case 0x75: // RTC 2189 case 0x76: // RTC 2190 case 0x77: // RTC 2191 case 0xa0: // secondary PIC 2192 case 0xa1: // secondary PIC 2193 if (write) { 2194 r = lpc_handle_pio_write(g->lpc, port, size, val); 2195 guest_assert(g, r == 0); 2196 } else { 2197 r = lpc_handle_pio_read(g->lpc, port, size, &val); 2198 assert(r == 0); 2199 } 2200 newapi = true; 2201 break; 2202 // Keyboard 2203 case 0x60: 2204 case 0x64: 2205 // we currently do not support a keyboard 2206 if (!write) { 2207 val = ~0; 2208 } 2209 newapi = true; 2210 break; 2211 case 0x80: 2212 // some apps use writing to this port as a method to delay execution 2213 // so we just do noting 2214 break; 2215 // Coprocessor 2216 case 0xf0: 2217 case 0xf1: 2218 // coprocessor IGNNE# - do nothing for now 2219 break; 2220 2221 // serial COM1 port 2222 // FIXME: this should not be hardcoded ! 2223 case 0x3f8: 2224 case 0x3f9: 2225 case 0x3fa: 2226 case 0x3fb: 2227 case 0x3fc: 2228 case 0x3fd: 2229 case 0x3fe: 2230 case 0x3ff: 2231 // COM2 2232 case 0x2f8: 2233 case 0x2f9: 2234 case 0x2fa: 2235 case 0x2fb: 2236 case 0x2fc: 2237 case 0x2fd: 2238 case 0x2fe: 2239 case 0x2ff: 2240 // COM3 2241 case 0x3e8: 2242 case 0x3e9: 2243 case 0x3ea: 2244 case 0x3eb: 2245 case 0x3ec: 2246 case 0x3ed: 2247 case 0x3ee: 2248 case 0x3ef: 2249 // COM4 2250 case 0x2e8: 2251 case 0x2e9: 2252 case 0x2ea: 2253 case 0x2eb: 2254 case 0x2ec: 2255 case 0x2ed: 2256 case 0x2ee: 2257 case 0x2ef: { 2258 int com; 2259 2260 com = (port & 0xf0) == 0xf0 ? !(port & 0x100) : !(port & 0x100) + 2; 2261 assert(com >= 0 && com < 4); 2262 if (write) { 2263 r = pc16550d_handle_pio_write(g->serial_ports[com], port, 2264 size, val); 2265 assert(r == 0); 2266 } else { 2267 r = pc16550d_handle_pio_read(g->serial_ports[com], port, 2268 size, &val); 2269 assert(r == 0); 2270 } 2271 newapi = true; 2272 break; 2273 } 2274 2275 // PCI config space (address) 2276 case 0xcf8: 2277 case 0xcf9: 2278 case 0xcfa: 2279 case 0xcfb: 2280 // PCI config space (data) 2281 case 0xcfc: 2282 case 0xcfd: 2283 case 0xcfe: 2284 case 0xcff: 2285 if(write) { 2286 r = pci_handle_pio_write(g->pci, port, size, val); 2287 } else { 2288 r = pci_handle_pio_read(g->pci, port, size, &val); 2289 } 2290 assert(r == 0); 2291 newapi = true; 2292 break; 2293 2294 default: 2295 // the default is to return 0xff and to ignore writes 2296 if (!write) { 2297 val = 0xffffffff; 2298 } 2299 newapi = true; 2300 }; 2301 2302 // set the destination when neccessary 2303 if (newapi && !write) { 2304 switch (size) { 2305 case OPSIZE_8: 2306 guest_set_al(g, val); 2307 break; 2308 case OPSIZE_16: 2309 guest_set_ax(g, val); 2310 break; 2311 case OPSIZE_32: 2312 guest_set_eax(g, val); 2313 break; 2314 default: 2315 assert(!"not reached"); 2316 break; 2317 } 2318 } 2319 2320 // the following IP is stored in the exitinfo2 field 2321#ifdef CONFIG_SVM 2322 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_exitinfo2_rd(&g->vmcb)); 2323#else 2324 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, saved_rip); 2325 assert(err_is_ok(err)); 2326#endif 2327 return HANDLER_ERR_OK; 2328} 2329 2330static int 2331handle_vmexit_msr (struct guest *g) { 2332#ifdef CONFIG_SVM 2333 bool write = amd_vmcb_exitinfo1_rd(&g->vmcb) == 1; 2334#else 2335 int msr_index; 2336 errval_t err = 0; 2337 bool write = (saved_exit_reason == VMX_EXIT_REASON_WRMSR); 2338 struct msr_entry *guest_msr_area = (struct msr_entry *)g->msr_area_va; 2339#endif 2340 uint32_t msr = guest_get_ecx(g); 2341 uint64_t val; 2342 2343 // there may be writes or reads to MSRs 2344 if (write) { 2345 // fetch the value to write from EDX:EAX 2346 val = ((uint64_t)guest_get_edx(g) << 32) | guest_get_eax(g); 2347 2348 // store the read value into the corresponding location 2349 switch (msr) { 2350 case X86_MSR_SYSENTER_CS: 2351#ifdef CONFIG_SVM 2352 amd_vmcb_sysenter_cs_wr(&g->vmcb, val); 2353#else 2354 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_SYSENTER_CS, val); 2355#endif 2356 break; 2357 case X86_MSR_SYSENTER_ESP: 2358#ifdef CONFIG_SVM 2359 amd_vmcb_sysenter_esp_wr(&g->vmcb, val); 2360#else 2361 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_SYSENTER_ESP, val); 2362#endif 2363 break; 2364 case X86_MSR_SYSENTER_EIP: 2365#ifdef CONFIG_SVM 2366 amd_vmcb_sysenter_eip_wr(&g->vmcb, val); 2367#else 2368 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_SYSENTER_EIP, val); 2369#endif 2370 break; 2371 case X86_MSR_EFER: 2372#ifdef CONFIG_SVM 2373 amd_vmcb_efer_wr_raw(&g->vmcb, val); 2374#else 2375 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_EFER_F, val); 2376#endif 2377 break; 2378 case X86_MSR_FS_BASE: 2379#ifdef CONFIG_SVM 2380 amd_vmcb_fs_base_wr(&g->vmcb, val); 2381#else 2382 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_FS_BASE, val); 2383#endif 2384 break; 2385 case X86_MSR_GS_BASE: 2386#ifdef CONFIG_SVM 2387 amd_vmcb_gs_base_wr(&g->vmcb, val); 2388#else 2389 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_GS_BASE, val); 2390#endif 2391 break; 2392#ifdef CONFIG_SVM 2393 case X86_MSR_KERNEL_GS_BASE: 2394 amd_vmcb_kernel_gs_base_wr(&g->vmcb, val); 2395 break; 2396 case X86_MSR_STAR: 2397 amd_vmcb_star_wr(&g->vmcb, val); 2398 break; 2399 case X86_MSR_LSTAR: 2400 amd_vmcb_lstar_wr(&g->vmcb, val); 2401 break; 2402 case X86_MSR_CSTAR: 2403 amd_vmcb_cstar_wr(&g->vmcb, val); 2404 break; 2405 case X86_MSR_SFMASK: 2406 amd_vmcb_sfmask_wr(&g->vmcb, val); 2407 break; 2408 default: 2409 printf("MSR: unhandeled MSR write access to %x\n", msr); 2410 return handle_vmexit_unhandeled(g); 2411#else 2412 case X86_MSR_BIOS_SIGN_ID: 2413 break; 2414 default: 2415 msr_index = vmx_guest_msr_index(msr); 2416 if (msr_index == -1) { 2417 printf("MSR: unhandeled MSR write access to %x\n", msr); 2418 return handle_vmexit_unhandeled(g); 2419 } 2420 guest_msr_area[msr_index].val = val; 2421 break; 2422#endif 2423 } 2424 } else { 2425 // read the value from the corresponding location 2426 switch (msr) { 2427 case X86_MSR_SYSENTER_CS: 2428#ifdef CONFIG_SVM 2429 val = amd_vmcb_sysenter_cs_rd(&g->vmcb); 2430#else 2431 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SYSENTER_CS, &val); 2432#endif 2433 break; 2434 case X86_MSR_SYSENTER_ESP: 2435#ifdef CONFIG_SVM 2436 val = amd_vmcb_sysenter_esp_rd(&g->vmcb); 2437#else 2438 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SYSENTER_ESP, &val); 2439#endif 2440 break; 2441 case X86_MSR_SYSENTER_EIP: 2442#ifdef CONFIG_SVM 2443 val = amd_vmcb_sysenter_eip_rd(&g->vmcb); 2444#else 2445 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_SYSENTER_EIP, &val); 2446#endif 2447 break; 2448 case X86_MSR_EFER: 2449#ifdef CONFIG_SVM 2450 val = amd_vmcb_efer_rd_raw(&g->vmcb); 2451#else 2452 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_EFER_F, &val); 2453#endif 2454 break; 2455 case X86_MSR_FS_BASE: 2456#ifdef CONFIG_SVM 2457 val = amd_vmcb_fs_base_rd(&g->vmcb); 2458#else 2459 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_FS_BASE, &val); 2460#endif 2461 break; 2462 case X86_MSR_GS_BASE: 2463#ifdef CONFIG_SVM 2464 val = amd_vmcb_gs_base_rd(&g->vmcb); 2465#else 2466 err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_GS_BASE, &val); 2467#endif 2468 break; 2469#ifdef CONFIG_SVM 2470 case X86_MSR_KERNEL_GS_BASE: 2471 val = amd_vmcb_kernel_gs_base_rd(&g->vmcb); 2472 break; 2473 case X86_MSR_STAR: 2474 val = amd_vmcb_star_rd(&g->vmcb); 2475 break; 2476 case X86_MSR_LSTAR: 2477 val = amd_vmcb_lstar_rd(&g->vmcb); 2478 break; 2479 case X86_MSR_CSTAR: 2480 val = amd_vmcb_cstar_rd(&g->vmcb); 2481 break; 2482 case X86_MSR_SFMASK: 2483 val = amd_vmcb_sfmask_rd(&g->vmcb); 2484 break; 2485 default: 2486 printf("MSR: unhandeled MSR read access to %x\n", msr); 2487 return handle_vmexit_unhandeled(g); 2488#else 2489 case X86_MSR_APIC_BASE: 2490 case X86_MSR_BIOS_SIGN_ID: 2491 case X86_MSR_MTRRCAP: 2492 case X86_MSR_MCG_CAP: 2493 case X86_MSR_MCG_STATUS: 2494 case X86_MSR_PAT: 2495 case X86_MTRR_DEF_TYPE: 2496 val = 0x0; 2497 break; 2498 case X86_MSR_MISC_ENABLE: 2499 val = 0x1; // enable fast-string instructions 2500 break; 2501 default: 2502 msr_index = vmx_guest_msr_index(msr); 2503 if (msr_index == -1) { 2504 printf("MSR: unhandeled MSR read access to %x\n", msr); 2505 return handle_vmexit_unhandeled(g); 2506 } 2507 val = guest_msr_area[msr_index].val; 2508 break; 2509#endif 2510 } 2511 2512 // store the value in EDX:EAX 2513 guest_set_eax(g, val); 2514 guest_set_edx(g, val >> 32); 2515 } 2516 2517 // advance the rip beyond the current instruction 2518#ifdef CONFIG_SVM 2519 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2); 2520#else 2521 uint64_t guest_rip; 2522 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2523 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2); 2524 assert(err_is_ok(err)); 2525#endif 2526 return HANDLER_ERR_OK; 2527} 2528 2529static int 2530handle_vmexit_cpuid (struct guest *g) { 2531 uint32_t eax, ebx, ecx, edx; 2532 uint32_t func = guest_get_eax(g); 2533 2534 /* the register values are copied from an emuliated Pentium processor in QEMU*/ 2535 switch (func) { 2536#ifdef CONFIG_SVM 2537 // Processor Vendor and Largest Standard Function Number 2538 case 0: 2539 case 0x80000000: 2540 // max standard function offset 2541 eax = func == 0 ? 0x1 : 0x80000000; 2542 // string "AuthenticAMD" 2543 ebx = 0x68747541; 2544 ecx = 0x444d4163; 2545 edx = 0x69746e65; 2546 break; 2547 2548 // Family, Model, Stepping Identifiers 2549 case 1: 2550 // we simulate a AMD K6-3D 2551 // Family 5, Model 8, Stepping 12 2552 eax = 0x58c; 2553 // no brand, clflush size 16, no mulitprocessing, no local apic 2554 ebx = 0x0f00; 2555 // support the popcnt instr 2556 ecx = 0x800000; 2557 // support some basic features 2558 edx = 0x89a91b; 2559 break; 2560 2561 default: 2562 // use the answer of the host if there is any other request 2563 // FIXME: this is probably not a good idea ;) 2564 cpuid(func, &eax, &ebx, &ecx, &edx); 2565 printf("handle_vmexit_cpuid: CPUID: func %x, host reports: eax %x, " 2566 "ebx %x, ecx %x, edx %x\n", func, eax, ebx, ecx, edx); 2567 break; 2568#else 2569 case 0: 2570 eax = 0x2; 2571 ebx = 0x756e6547; 2572 ecx = 0x6c65746e; 2573 edx = 0x49656e69; 2574 break; 2575 case 1: 2576 eax = 0x800; 2577 ebx = 0x800; 2578 ecx = 0x80200000; 2579 edx = 0x183fbff; 2580 break; 2581 case 2: 2582 eax = 0x1; 2583 ebx = 0x0; 2584 ecx = 0x4d; 2585 edx = 0x2c307d; 2586 default: 2587 eax = 0x0; 2588 ebx = 0x0; 2589 ecx = 0x0; 2590 edx = 0x0; 2591 break; 2592#endif 2593 } 2594 2595 guest_set_eax(g, eax); 2596 guest_set_ebx(g, ebx); 2597 guest_set_ecx(g, ecx); 2598 guest_set_edx(g, edx); 2599 2600 // advance the rip beyond the instruction 2601#ifdef CONFIG_SVM 2602 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2); 2603#else 2604 uint64_t guest_rip; 2605 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2606 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2); 2607 assert(err_is_ok(err)); 2608#endif 2609 return HANDLER_ERR_OK; 2610} 2611 2612static int 2613handle_vmexit_vmmcall (struct guest *g) { 2614 /*printf("VMMCALL: tsc %lu, exits with mon invocation %lu, exits w/o mon " 2615 "invocation %lu\n", rdtsc(), 2616 g->ctrl->num_vm_exits_with_monitor_invocation, 2617 g->ctrl->num_vm_exits_without_monitor_invocation);*/ 2618 2619 // advance the rip beyond the instruction 2620#ifdef CONFIG_SVM 2621 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 3); 2622#else 2623 uint64_t guest_rip; 2624 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2625 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 3); 2626 assert(err_is_ok(err)); 2627#endif 2628 return HANDLER_ERR_OK; 2629} 2630 2631static int 2632handle_vmexit_hlt (struct guest *g) { 2633 // the guest has nothing to do - poll out irq sources for pending IRQs 2634 // if they do not assert a virtual IRQ then we will do nothing 2635 lpc_pic_process_irqs(g->lpc); 2636 2637 // advance the rip beyond the instruction 2638#ifdef CONFIG_SVM 2639 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 1); 2640#else 2641 uint64_t guest_rip; 2642 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2643 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 1); 2644#endif 2645 2646 // running HLT with IRQs masked does not make any sense 2647 // FIXME: this assert silly, shutting down the VM would be the right way 2648#ifdef CONFIG_SVM 2649 guest_assert(g, amd_vmcb_rflags_rd(&g->vmcb).intrf == 1); 2650#else 2651 uint64_t guest_rflags; 2652 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RFLAGS, &guest_rflags); 2653 assert(err_is_ok(err)); 2654 guest_assert(g, guest_rflags & RFLAGS_IF); 2655#endif 2656 if (virq_pending(g, NULL, NULL)) { 2657 // there is an IRQ pending, proceed as normal, the CPU will take it 2658 } else { 2659 // there is really nothing to do - stop the VM and wait 2660 g->runnable = false; 2661 } 2662 2663 return HANDLER_ERR_OK; 2664} 2665 2666static inline int 2667decode_mov_instr_length (struct guest *g, uint8_t *code) 2668{ 2669 int len; 2670 2671 // we only support long mode for now 2672 //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1); 2673 2674 // all non special MOV instructions use one byte as opcode and at least a 2675 // ModR/M byte 2676 len = 2; 2677 // check for the REX prefix 2678 if ((code[0] >> 4) == 0x4) { 2679 len++; 2680 code++; 2681 } 2682 // precaution because I did no check all variants of MOV, at least these two 2683 // variants are supported 2684 assert(code[0] == 0x89 || code[0] == 0x8b); 2685 2686 union x86_modrm modrm = { .raw = code[1] }; 2687 // check for displacements 2688 if (modrm.u.mod == 0x1) { 2689 // 1B displacement 2690 len++; 2691 } else if (modrm.u.mod == 0x2) { 2692 // 4B displacement 2693 len += 4; 2694 } 2695 2696 // check for SIB byte 2697 if (modrm.u.rm == 0x4 && modrm.u.mod != 0x3) { 2698 len++; 2699 } 2700 2701 return len; 2702} 2703 2704// finds out whether a move instruction is a read or a write with respect to 2705// memory 2706static inline bool 2707decode_mov_is_write (struct guest *g, uint8_t *code) 2708{ 2709 // check for the REX prefix 2710 if ((code[0] >> 4) == 0x4) { 2711 code++; 2712 } 2713 2714 // we only support one move variant (in each direction) for now 2715 assert(code[0] == 0x89 || code[0] == 0x8b); 2716 2717 union x86_modrm modrm = { .raw = code[1] }; 2718 // not defined for reg to reg moves 2719 assert(modrm.u.mod != 3); 2720 2721 return code[0] == 0x89; // 0x89 ==> MOV reg -> mem 2722} 2723 2724static inline enum opsize 2725decode_mov_op_size (struct guest *g, uint8_t *code) 2726{ 2727 /* 2728 printf("EFER: 0x%lx\n", amd_vmcb_efer_rd_raw(&g->vmcb)); 2729 printf("Code: 0x%lx\n", *((uint64_t *)code)); 2730 printf("Code[0]: 0x%x, Code[1]: 0x%x, Code[2]: 0x%x, Code[3]: 0x%x\n", code[0],code[1],code[2],code[3]); 2731 printf("Guest EAX: 0x%x\n", guest_get_eax(g)); 2732 printf("Guest EBX: 0x%x\n", guest_get_ebx(g)); 2733 printf("Guest ECX: 0x%x\n", guest_get_ecx(g)); 2734 2735 printf("Guest EDX: 0x%x\n", guest_get_edx(g)); 2736 printf("Guest RDI: 0x%lx\n", guest_get_rdi(g)); 2737 printf("Guest RSI: 0x%lx\n", guest_get_rsi(g)); 2738 printf("Guest RSP: 0x%lx\n", guest_get_rsp(g)); 2739 printf("Guest RBP: 0x%lx\n", guest_get_rbp(g)); 2740 */ 2741 2742 // we only support long mode for now 2743 //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1); 2744 2745 // check for the REX prefix 2746 if ((code[0] >> 4) == 0x4 && code[0] & 0x48) { 2747 return OPSIZE_64; 2748 } 2749 return OPSIZE_32; 2750} 2751 2752 2753static inline uint64_t 2754decode_mov_src_val (struct guest *g, uint8_t *code) { 2755 2756 // we only support long mode for now 2757 //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1); 2758 2759 // check for the REX prefix 2760 if ((code[0] >> 4) == 0x4) { 2761 code++; 2762 } 2763 2764 // we only support one variant for now 2765 assert(code[0] == 0x89); 2766 2767 union x86_modrm modrm = { .raw = code[1] }; 2768 return get_reg_val_by_reg_num(g, modrm.u.regop); 2769} 2770 2771 2772static inline void 2773decode_mov_dest_val (struct guest *g, uint8_t *code, uint64_t val) 2774{ 2775 // we only support long mode for now 2776 //assert(amd_vmcb_efer_rd(&g->vmcb).lma == 1); 2777 2778 // check for the REX prefix 2779 if ((code[0] >> 4) == 0x4) { 2780 code++; 2781 } 2782 2783 // we only support one variant for now 2784 assert(code[0] == 0x8b); 2785 2786 union x86_modrm modrm = { .raw = code[1] }; 2787 set_reg_val_by_reg_num(g, modrm.u.regop, val); 2788} 2789 2790/**** e1000 2791#define TDBAL_OFFSET 0x3800 2792#define TDBAH_OFFSET 0x3804 2793#define RDBAL_OFFSET 0x2800 2794#define RDBAH_OFFSET 0x2804 2795#define TDT_OFFSET 0x3818 //Transmit descriptor tail. Writes to this toggle transmission 2796#define TCTL_OFFSET 0x400 //Transmission Control 2797 2798#define IMS_OFFSET 0xd0 // Interrupt Mask Set/Read Register 2799#define ICS_OFFSET 0xc8 // Interrupt Cause Set Register 2800 2801static int register_needs_translation(uint64_t addr){ 2802 return ( 2803 addr == TDBAL_OFFSET || 2804 addr == TDBAH_OFFSET || 2805 addr == RDBAL_OFFSET || 2806 addr == RDBAH_OFFSET 2807 ); 2808 2809} 2810 2811**** e1000 */ 2812 2813 2814 2815 2816#define MMIO_MASK(bytes) (~(~(bytes) + 1)) // I think ~(-bytes) is also correct 2817 2818static int 2819handle_vmexit_npf (struct guest *g) { 2820 int r; 2821#ifdef CONFIG_SVM 2822 uint64_t fault_addr = amd_vmcb_exitinfo2_rd(&g->vmcb); 2823#else 2824 uint64_t fault_addr, guest_rip; 2825 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_GPADDR_F, &fault_addr); 2826 assert(err_is_ok(err)); 2827#endif 2828 uint8_t *code = NULL; 2829 2830 // check for fault inside the guest physical memory region 2831 if (fault_addr >= g->mem_low_va && fault_addr < g->mem_high_va) { 2832 // allocate the missing memory 2833 alloc_guest_mem(g, fault_addr & ~BASE_PAGE_MASK, BASE_PAGE_SIZE); 2834 // do not advance the RIP, it is safe (and neccessary) to 2835 // replay the faulting instruction 2836 return HANDLER_ERR_OK; 2837 } 2838 2839 // fetch the location to the code 2840 r = get_instr_arr(g, &code); 2841 assert (r == 0); 2842 2843 // virtual devices 2844 switch (fault_addr & ~BASE_PAGE_MASK) { 2845 case APIC_BASE: { 2846 uint64_t val; 2847 enum opsize size; 2848 2849 assert(g->apic != NULL); 2850 size = decode_mov_op_size(g, code); 2851 if (decode_mov_is_write(g, code)) { 2852 val = decode_mov_src_val(g, code); 2853 r = apic_handle_mmio_write(g->apic, fault_addr, size, val); 2854 assert(r == 0); 2855 } else { 2856 r = apic_handle_mmio_read(g->apic, fault_addr, size, &val); 2857 assert(r == 0); 2858 decode_mov_dest_val(g, code, val); 2859 } 2860 2861 // advance the rip beyond the instruction 2862#ifdef CONFIG_SVM 2863 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2864 decode_mov_instr_length(g, code)); 2865#else 2866 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2867 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2868 decode_mov_instr_length(g, code)); 2869 assert(err_is_ok(err)); 2870#endif 2871 return HANDLER_ERR_OK; 2872 } 2873 } 2874 2875 //Check if this is a access to a pci device memory 2876 2877 for(int bus_i = 0; bus_i<256; bus_i++){ 2878 for(int dev_i = 0; dev_i < 32; dev_i++){ 2879 struct pci_bus *bus = g->pci->bus[bus_i]; 2880 if(bus) { 2881 struct pci_device* dev = bus->device[dev_i]; 2882 if(dev){ 2883 for(int bar_i=0; bar_i<5; bar_i++){ 2884 struct bar_info *curbar = &dev->bars[bar_i]; 2885 if(curbar->paddr <= fault_addr && fault_addr < curbar->paddr + curbar->bytes){ 2886 if(decode_mov_is_write(g, code)){ 2887 uint64_t val = decode_mov_src_val(g, code); 2888 if(dev->mem_write) { 2889 dev->mem_write(dev, MMIO_MASK(curbar->bytes) & fault_addr, bar_i, val ); 2890 } else { 2891 goto error; 2892 } 2893 } else { 2894 uint64_t val; 2895 if(dev->mem_read){ 2896 dev->mem_read(dev, MMIO_MASK(curbar->bytes) & fault_addr, bar_i, (uint32_t*)&val); 2897 decode_mov_dest_val(g, code, val); 2898 } else { 2899 goto error; 2900 } 2901 } 2902#ifdef CONFIG_SVM 2903 amd_vmcb_rip_wr(&g->vmcb, amd_vmcb_rip_rd(&g->vmcb) + 2904 decode_mov_instr_length(g, code)); 2905#else 2906 err += invoke_dispatcher_vmread(g->dcb_cap, VMX_GUEST_RIP, &guest_rip); 2907 err += invoke_dispatcher_vmwrite(g->dcb_cap, VMX_GUEST_RIP, guest_rip + 2908 decode_mov_instr_length(g, code)); 2909 assert(err_is_ok(err)); 2910#endif 2911 return HANDLER_ERR_OK; 2912 } 2913 } 2914 } 2915 } 2916 } 2917 } 2918 2919 error: 2920 printf("vmkitmon: access to an unknown memory location: %lx", fault_addr); 2921 return handle_vmexit_unhandeled(g); 2922} 2923 2924typedef int (*vmexit_handler)(struct guest *g); 2925 2926#ifdef CONFIG_SVM 2927static vmexit_handler vmexit_handlers[0x8c] = { 2928 [SVM_VMEXIT_CR0_READ] = handle_vmexit_cr_access, 2929 [SVM_VMEXIT_CR0_WRITE] = handle_vmexit_cr_access, 2930 [SVM_VMEXIT_CR0_SEL_WRITE] = handle_vmexit_cr_access, 2931 [SVM_VMEXIT_SWINT] = handle_vmexit_swint, 2932 [SVM_VMEXIT_IDTR_WRITE] = handle_vmexit_ldt, 2933 [SVM_VMEXIT_GDTR_WRITE] = handle_vmexit_ldt, 2934 [SVM_VMEXIT_IOIO] = handle_vmexit_ioio, 2935 [SVM_VMEXIT_MSR] = handle_vmexit_msr, 2936 [SVM_VMEXIT_CPUID] = handle_vmexit_cpuid, 2937 [SVM_VMEXIT_VMMCALL] = handle_vmexit_vmmcall, 2938 [SVM_VMEXIT_HLT] = handle_vmexit_hlt 2939}; 2940#else 2941static vmexit_handler vmexit_handlers[0x8c] = { 2942 [VMX_EXIT_REASON_CPUID] = handle_vmexit_cpuid, 2943 [VMX_EXIT_REASON_HLT] = handle_vmexit_hlt, 2944 [VMX_EXIT_REASON_VMCALL] = handle_vmexit_vmmcall, 2945 [VMX_EXIT_REASON_CR_ACCESS] = handle_vmexit_cr_access, 2946 [VMX_EXIT_REASON_INOUT] = handle_vmexit_ioio, 2947 [VMX_EXIT_REASON_RDMSR] = handle_vmexit_msr, 2948 [VMX_EXIT_REASON_WRMSR] = handle_vmexit_msr, 2949 [VMX_EXIT_REASON_GDTR_IDTR] = handle_vmexit_ldt, 2950 [VMX_EXIT_REASON_EPT_FAULT] = handle_vmexit_npf, 2951 [VMX_EXIT_REASON_SWINT] = handle_vmexit_swint 2952}; 2953#endif 2954 2955void 2956guest_handle_vmexit (struct guest *g) { 2957 //struct pci_ethernet * eth = (struct pci_ethernet * ) g->pci->bus[0]->device[2]->state;// 2958 //printf("guest_handle_vmexit\n"); 2959 vmexit_handler handler; 2960#ifdef CONFIG_SVM 2961 uint64_t exitcode = amd_vmcb_exitcode_rd(&g->vmcb); 2962 if (exitcode == SVM_VMEXIT_NPF) { 2963 handler = handle_vmexit_npf; 2964 } else if (LIKELY(vmexit_handlers[exitcode] != NULL)) { 2965 handler = vmexit_handlers[exitcode]; 2966 } else { 2967 handle_vmexit_unhandeled(g); 2968 return; 2969 } 2970#else 2971 if (!g->emulated_before_exit) { 2972 errval_t err = invoke_dispatcher_vmread(g->dcb_cap, VMX_EXIT_REASON, 2973 (uint64_t *)&saved_exit_reason); 2974 assert(err_is_ok(err)); 2975 } 2976 2977 if (LIKELY(vmexit_handlers[saved_exit_reason] != NULL)) { 2978 handler = vmexit_handlers[saved_exit_reason]; 2979 } else { 2980 handle_vmexit_unhandeled(g); 2981 return; 2982 } 2983#endif 2984 int r = handler(g); 2985 if (LIKELY(r == HANDLER_ERR_OK)) { 2986 if (g->runnable) { 2987 guest_make_runnable(g, true); 2988 } 2989 } 2990} 2991