vmm.c revision 253854
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/amd64/vmm/vmm.c 253854 2013-08-01 05:59:28Z grehan $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm.c 253854 2013-08-01 05:59:28Z grehan $"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35#include <sys/module.h> 36#include <sys/sysctl.h> 37#include <sys/malloc.h> 38#include <sys/pcpu.h> 39#include <sys/lock.h> 40#include <sys/mutex.h> 41#include <sys/proc.h> 42#include <sys/sched.h> 43#include <sys/smp.h> 44#include <sys/systm.h> 45 46#include <vm/vm.h> 47 48#include <machine/vm.h> 49#include <machine/pcb.h> 50#include <machine/smp.h> 51#include <x86/apicreg.h> 52 53#include <machine/vmm.h> 54#include "vmm_host.h" 55#include "vmm_mem.h" 56#include "vmm_util.h" 57#include <machine/vmm_dev.h> 58#include "vlapic.h" 59#include "vmm_msr.h" 60#include "vmm_ipi.h" 61#include "vmm_stat.h" 62#include "vmm_lapic.h" 63 64#include "io/ppt.h" 65#include "io/iommu.h" 66 67struct vlapic; 68 69struct vcpu { 70 int flags; 71 enum vcpu_state state; 72 struct mtx mtx; 73 int hostcpu; /* host cpuid this vcpu last ran on */ 74 uint64_t guest_msrs[VMM_MSR_NUM]; 75 struct vlapic *vlapic; 76 int vcpuid; 77 struct savefpu *guestfpu; /* guest fpu state */ 78 void *stats; 79 struct vm_exit exitinfo; 80 enum x2apic_state x2apic_state; 81 int nmi_pending; 82}; 83 84#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 85#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 86#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 87 88#define VM_MAX_MEMORY_SEGMENTS 2 89 90struct vm { 91 void *cookie; /* processor-specific data */ 92 void *iommu; /* iommu-specific data */ 93 struct vcpu vcpu[VM_MAXCPU]; 94 int num_mem_segs; 95 struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS]; 96 char name[VM_MAX_NAMELEN]; 97 98 /* 99 * Set of active vcpus. 100 * An active vcpu is one that has been started implicitly (BSP) or 101 * explicitly (AP) by sending it a startup ipi. 102 */ 103 cpuset_t active_cpus; 104}; 105 106static int vmm_initialized; 107 108static struct vmm_ops *ops; 109#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) 110#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 111 112#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL) 113#define VMRUN(vmi, vcpu, rip) \ 114 (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO) 115#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 116#define VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm) \ 117 (ops != NULL ? \ 118 (*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) : \ 119 ENXIO) 120#define VMMMAP_GET(vmi, gpa) \ 121 (ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO) 122#define VMGETREG(vmi, vcpu, num, retval) \ 123 (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 124#define VMSETREG(vmi, vcpu, num, val) \ 125 (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 126#define VMGETDESC(vmi, vcpu, num, desc) \ 127 (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 128#define VMSETDESC(vmi, vcpu, num, desc) \ 129 (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 130#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \ 131 (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO) 132#define VMGETCAP(vmi, vcpu, num, retval) \ 133 (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 134#define VMSETCAP(vmi, vcpu, num, val) \ 135 (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 136 137#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 138#define fpu_stop_emulating() clts() 139 140static MALLOC_DEFINE(M_VM, "vm", "vm"); 141CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 142 143/* statistics */ 144static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 145 146static void 147vcpu_cleanup(struct vcpu *vcpu) 148{ 149 vlapic_cleanup(vcpu->vlapic); 150 vmm_stat_free(vcpu->stats); 151 fpu_save_area_free(vcpu->guestfpu); 152} 153 154static void 155vcpu_init(struct vm *vm, uint32_t vcpu_id) 156{ 157 struct vcpu *vcpu; 158 159 vcpu = &vm->vcpu[vcpu_id]; 160 161 vcpu_lock_init(vcpu); 162 vcpu->hostcpu = NOCPU; 163 vcpu->vcpuid = vcpu_id; 164 vcpu->vlapic = vlapic_init(vm, vcpu_id); 165 vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); 166 vcpu->guestfpu = fpu_save_area_alloc(); 167 fpu_save_area_reset(vcpu->guestfpu); 168 vcpu->stats = vmm_stat_alloc(); 169} 170 171struct vm_exit * 172vm_exitinfo(struct vm *vm, int cpuid) 173{ 174 struct vcpu *vcpu; 175 176 if (cpuid < 0 || cpuid >= VM_MAXCPU) 177 panic("vm_exitinfo: invalid cpuid %d", cpuid); 178 179 vcpu = &vm->vcpu[cpuid]; 180 181 return (&vcpu->exitinfo); 182} 183 184static int 185vmm_init(void) 186{ 187 int error; 188 189 vmm_host_state_init(); 190 vmm_ipi_init(); 191 192 error = vmm_mem_init(); 193 if (error) 194 return (error); 195 196 if (vmm_is_intel()) 197 ops = &vmm_ops_intel; 198 else if (vmm_is_amd()) 199 ops = &vmm_ops_amd; 200 else 201 return (ENXIO); 202 203 vmm_msr_init(); 204 205 return (VMM_INIT()); 206} 207 208static int 209vmm_handler(module_t mod, int what, void *arg) 210{ 211 int error; 212 213 switch (what) { 214 case MOD_LOAD: 215 vmmdev_init(); 216 iommu_init(); 217 error = vmm_init(); 218 if (error == 0) 219 vmm_initialized = 1; 220 break; 221 case MOD_UNLOAD: 222 error = vmmdev_cleanup(); 223 if (error == 0) { 224 iommu_cleanup(); 225 vmm_ipi_cleanup(); 226 error = VMM_CLEANUP(); 227 /* 228 * Something bad happened - prevent new 229 * VMs from being created 230 */ 231 if (error) 232 vmm_initialized = 0; 233 } 234 break; 235 default: 236 error = 0; 237 break; 238 } 239 return (error); 240} 241 242static moduledata_t vmm_kmod = { 243 "vmm", 244 vmm_handler, 245 NULL 246}; 247 248/* 249 * vmm initialization has the following dependencies: 250 * 251 * - iommu initialization must happen after the pci passthru driver has had 252 * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 253 * 254 * - VT-x initialization requires smp_rendezvous() and therefore must happen 255 * after SMP is fully functional (after SI_SUB_SMP). 256 */ 257DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 258MODULE_VERSION(vmm, 1); 259 260SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 261 262int 263vm_create(const char *name, struct vm **retvm) 264{ 265 int i; 266 struct vm *vm; 267 vm_paddr_t maxaddr; 268 269 const int BSP = 0; 270 271 /* 272 * If vmm.ko could not be successfully initialized then don't attempt 273 * to create the virtual machine. 274 */ 275 if (!vmm_initialized) 276 return (ENXIO); 277 278 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 279 return (EINVAL); 280 281 vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 282 strcpy(vm->name, name); 283 vm->cookie = VMINIT(vm); 284 285 for (i = 0; i < VM_MAXCPU; i++) { 286 vcpu_init(vm, i); 287 guest_msrs_init(vm, i); 288 } 289 290 maxaddr = vmm_mem_maxaddr(); 291 vm->iommu = iommu_create_domain(maxaddr); 292 vm_activate_cpu(vm, BSP); 293 294 *retvm = vm; 295 return (0); 296} 297 298static void 299vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg) 300{ 301 size_t len; 302 vm_paddr_t hpa; 303 void *host_domain; 304 305 host_domain = iommu_host_domain(); 306 307 len = 0; 308 while (len < seg->len) { 309 hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE); 310 if (hpa == (vm_paddr_t)-1) { 311 panic("vm_free_mem_segs: cannot free hpa " 312 "associated with gpa 0x%016lx", seg->gpa + len); 313 } 314 315 /* 316 * Remove the 'gpa' to 'hpa' mapping in VMs domain. 317 * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'. 318 */ 319 iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE); 320 iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE); 321 322 vmm_mem_free(hpa, PAGE_SIZE); 323 324 len += PAGE_SIZE; 325 } 326 327 /* 328 * Invalidate cached translations associated with 'vm->iommu' since 329 * we have now moved some pages from it. 330 */ 331 iommu_invalidate_tlb(vm->iommu); 332 333 bzero(seg, sizeof(struct vm_memory_segment)); 334} 335 336void 337vm_destroy(struct vm *vm) 338{ 339 int i; 340 341 ppt_unassign_all(vm); 342 343 for (i = 0; i < vm->num_mem_segs; i++) 344 vm_free_mem_seg(vm, &vm->mem_segs[i]); 345 346 vm->num_mem_segs = 0; 347 348 for (i = 0; i < VM_MAXCPU; i++) 349 vcpu_cleanup(&vm->vcpu[i]); 350 351 iommu_destroy_domain(vm->iommu); 352 353 VMCLEANUP(vm->cookie); 354 355 free(vm, M_VM); 356} 357 358const char * 359vm_name(struct vm *vm) 360{ 361 return (vm->name); 362} 363 364int 365vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 366{ 367 const boolean_t spok = TRUE; /* superpage mappings are ok */ 368 369 return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE, 370 VM_PROT_RW, spok)); 371} 372 373int 374vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 375{ 376 const boolean_t spok = TRUE; /* superpage mappings are ok */ 377 378 return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0, 379 VM_PROT_NONE, spok)); 380} 381 382/* 383 * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise 384 */ 385static boolean_t 386vm_gpa_available(struct vm *vm, vm_paddr_t gpa) 387{ 388 int i; 389 vm_paddr_t gpabase, gpalimit; 390 391 if (gpa & PAGE_MASK) 392 panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa); 393 394 for (i = 0; i < vm->num_mem_segs; i++) { 395 gpabase = vm->mem_segs[i].gpa; 396 gpalimit = gpabase + vm->mem_segs[i].len; 397 if (gpa >= gpabase && gpa < gpalimit) 398 return (FALSE); 399 } 400 401 return (TRUE); 402} 403 404int 405vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 406{ 407 int error, available, allocated; 408 struct vm_memory_segment *seg; 409 vm_paddr_t g, hpa; 410 void *host_domain; 411 412 const boolean_t spok = TRUE; /* superpage mappings are ok */ 413 414 if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 415 return (EINVAL); 416 417 available = allocated = 0; 418 g = gpa; 419 while (g < gpa + len) { 420 if (vm_gpa_available(vm, g)) 421 available++; 422 else 423 allocated++; 424 425 g += PAGE_SIZE; 426 } 427 428 /* 429 * If there are some allocated and some available pages in the address 430 * range then it is an error. 431 */ 432 if (allocated && available) 433 return (EINVAL); 434 435 /* 436 * If the entire address range being requested has already been 437 * allocated then there isn't anything more to do. 438 */ 439 if (allocated && available == 0) 440 return (0); 441 442 if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 443 return (E2BIG); 444 445 host_domain = iommu_host_domain(); 446 447 seg = &vm->mem_segs[vm->num_mem_segs]; 448 449 error = 0; 450 seg->gpa = gpa; 451 seg->len = 0; 452 while (seg->len < len) { 453 hpa = vmm_mem_alloc(PAGE_SIZE); 454 if (hpa == 0) { 455 error = ENOMEM; 456 break; 457 } 458 459 error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE, 460 VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok); 461 if (error) 462 break; 463 464 /* 465 * Remove the 1:1 mapping for 'hpa' from the 'host_domain'. 466 * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain. 467 */ 468 iommu_remove_mapping(host_domain, hpa, PAGE_SIZE); 469 iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE); 470 471 seg->len += PAGE_SIZE; 472 } 473 474 if (error) { 475 vm_free_mem_seg(vm, seg); 476 return (error); 477 } 478 479 /* 480 * Invalidate cached translations associated with 'host_domain' since 481 * we have now moved some pages from it. 482 */ 483 iommu_invalidate_tlb(host_domain); 484 485 vm->num_mem_segs++; 486 487 return (0); 488} 489 490vm_paddr_t 491vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len) 492{ 493 vm_paddr_t nextpage; 494 495 nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); 496 if (len > nextpage - gpa) 497 panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len); 498 499 return (VMMMAP_GET(vm->cookie, gpa)); 500} 501 502int 503vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 504 struct vm_memory_segment *seg) 505{ 506 int i; 507 508 for (i = 0; i < vm->num_mem_segs; i++) { 509 if (gpabase == vm->mem_segs[i].gpa) { 510 *seg = vm->mem_segs[i]; 511 return (0); 512 } 513 } 514 return (-1); 515} 516 517int 518vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 519{ 520 521 if (vcpu < 0 || vcpu >= VM_MAXCPU) 522 return (EINVAL); 523 524 if (reg >= VM_REG_LAST) 525 return (EINVAL); 526 527 return (VMGETREG(vm->cookie, vcpu, reg, retval)); 528} 529 530int 531vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 532{ 533 534 if (vcpu < 0 || vcpu >= VM_MAXCPU) 535 return (EINVAL); 536 537 if (reg >= VM_REG_LAST) 538 return (EINVAL); 539 540 return (VMSETREG(vm->cookie, vcpu, reg, val)); 541} 542 543static boolean_t 544is_descriptor_table(int reg) 545{ 546 547 switch (reg) { 548 case VM_REG_GUEST_IDTR: 549 case VM_REG_GUEST_GDTR: 550 return (TRUE); 551 default: 552 return (FALSE); 553 } 554} 555 556static boolean_t 557is_segment_register(int reg) 558{ 559 560 switch (reg) { 561 case VM_REG_GUEST_ES: 562 case VM_REG_GUEST_CS: 563 case VM_REG_GUEST_SS: 564 case VM_REG_GUEST_DS: 565 case VM_REG_GUEST_FS: 566 case VM_REG_GUEST_GS: 567 case VM_REG_GUEST_TR: 568 case VM_REG_GUEST_LDTR: 569 return (TRUE); 570 default: 571 return (FALSE); 572 } 573} 574 575int 576vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 577 struct seg_desc *desc) 578{ 579 580 if (vcpu < 0 || vcpu >= VM_MAXCPU) 581 return (EINVAL); 582 583 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 584 return (EINVAL); 585 586 return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 587} 588 589int 590vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 591 struct seg_desc *desc) 592{ 593 if (vcpu < 0 || vcpu >= VM_MAXCPU) 594 return (EINVAL); 595 596 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 597 return (EINVAL); 598 599 return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 600} 601 602static void 603restore_guest_fpustate(struct vcpu *vcpu) 604{ 605 606 /* flush host state to the pcb */ 607 fpuexit(curthread); 608 609 /* restore guest FPU state */ 610 fpu_stop_emulating(); 611 fpurestore(vcpu->guestfpu); 612 613 /* 614 * The FPU is now "dirty" with the guest's state so turn on emulation 615 * to trap any access to the FPU by the host. 616 */ 617 fpu_start_emulating(); 618} 619 620static void 621save_guest_fpustate(struct vcpu *vcpu) 622{ 623 624 if ((rcr0() & CR0_TS) == 0) 625 panic("fpu emulation not enabled in host!"); 626 627 /* save guest FPU state */ 628 fpu_stop_emulating(); 629 fpusave(vcpu->guestfpu); 630 fpu_start_emulating(); 631} 632 633static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 634 635int 636vm_run(struct vm *vm, struct vm_run *vmrun) 637{ 638 int error, vcpuid, sleepticks, t; 639 struct vcpu *vcpu; 640 struct pcb *pcb; 641 uint64_t tscval, rip; 642 struct vm_exit *vme; 643 644 vcpuid = vmrun->cpuid; 645 646 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 647 return (EINVAL); 648 649 vcpu = &vm->vcpu[vcpuid]; 650 vme = &vmrun->vm_exit; 651 rip = vmrun->rip; 652restart: 653 critical_enter(); 654 655 tscval = rdtsc(); 656 657 pcb = PCPU_GET(curpcb); 658 set_pcb_flags(pcb, PCB_FULL_IRET); 659 660 restore_guest_msrs(vm, vcpuid); 661 restore_guest_fpustate(vcpu); 662 663 vcpu->hostcpu = curcpu; 664 error = VMRUN(vm->cookie, vcpuid, rip); 665 vcpu->hostcpu = NOCPU; 666 667 save_guest_fpustate(vcpu); 668 restore_host_msrs(vm, vcpuid); 669 670 vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 671 672 /* copy the exit information */ 673 bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit)); 674 675 critical_exit(); 676 677 /* 678 * Oblige the guest's desire to 'hlt' by sleeping until the vcpu 679 * is ready to run. 680 */ 681 if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) { 682 vcpu_lock(vcpu); 683 684 /* 685 * Figure out the number of host ticks until the next apic 686 * timer interrupt in the guest. 687 */ 688 sleepticks = lapic_timer_tick(vm, vcpuid); 689 690 /* 691 * If the guest local apic timer is disabled then sleep for 692 * a long time but not forever. 693 */ 694 if (sleepticks < 0) 695 sleepticks = hz; 696 697 /* 698 * Do a final check for pending NMI or interrupts before 699 * really putting this thread to sleep. 700 * 701 * These interrupts could have happened any time after we 702 * returned from VMRUN() and before we grabbed the vcpu lock. 703 */ 704 if (!vm_nmi_pending(vm, vcpuid) && 705 lapic_pending_intr(vm, vcpuid) < 0) { 706 if (sleepticks <= 0) 707 panic("invalid sleepticks %d", sleepticks); 708 t = ticks; 709 msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); 710 vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 711 } 712 713 vcpu_unlock(vcpu); 714 715 rip = vme->rip + vme->inst_length; 716 goto restart; 717 } 718 719 return (error); 720} 721 722int 723vm_inject_event(struct vm *vm, int vcpuid, int type, 724 int vector, uint32_t code, int code_valid) 725{ 726 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 727 return (EINVAL); 728 729 if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0) 730 return (EINVAL); 731 732 if (vector < 0 || vector > 255) 733 return (EINVAL); 734 735 return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid)); 736} 737 738static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 739 740int 741vm_inject_nmi(struct vm *vm, int vcpuid) 742{ 743 struct vcpu *vcpu; 744 745 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 746 return (EINVAL); 747 748 vcpu = &vm->vcpu[vcpuid]; 749 750 vcpu->nmi_pending = 1; 751 vm_interrupt_hostcpu(vm, vcpuid); 752 return (0); 753} 754 755int 756vm_nmi_pending(struct vm *vm, int vcpuid) 757{ 758 struct vcpu *vcpu; 759 760 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 761 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 762 763 vcpu = &vm->vcpu[vcpuid]; 764 765 return (vcpu->nmi_pending); 766} 767 768void 769vm_nmi_clear(struct vm *vm, int vcpuid) 770{ 771 struct vcpu *vcpu; 772 773 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 774 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 775 776 vcpu = &vm->vcpu[vcpuid]; 777 778 if (vcpu->nmi_pending == 0) 779 panic("vm_nmi_clear: inconsistent nmi_pending state"); 780 781 vcpu->nmi_pending = 0; 782 vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 783} 784 785int 786vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 787{ 788 if (vcpu < 0 || vcpu >= VM_MAXCPU) 789 return (EINVAL); 790 791 if (type < 0 || type >= VM_CAP_MAX) 792 return (EINVAL); 793 794 return (VMGETCAP(vm->cookie, vcpu, type, retval)); 795} 796 797int 798vm_set_capability(struct vm *vm, int vcpu, int type, int val) 799{ 800 if (vcpu < 0 || vcpu >= VM_MAXCPU) 801 return (EINVAL); 802 803 if (type < 0 || type >= VM_CAP_MAX) 804 return (EINVAL); 805 806 return (VMSETCAP(vm->cookie, vcpu, type, val)); 807} 808 809uint64_t * 810vm_guest_msrs(struct vm *vm, int cpu) 811{ 812 return (vm->vcpu[cpu].guest_msrs); 813} 814 815struct vlapic * 816vm_lapic(struct vm *vm, int cpu) 817{ 818 return (vm->vcpu[cpu].vlapic); 819} 820 821boolean_t 822vmm_is_pptdev(int bus, int slot, int func) 823{ 824 int found, i, n; 825 int b, s, f; 826 char *val, *cp, *cp2; 827 828 /* 829 * XXX 830 * The length of an environment variable is limited to 128 bytes which 831 * puts an upper limit on the number of passthru devices that may be 832 * specified using a single environment variable. 833 * 834 * Work around this by scanning multiple environment variable 835 * names instead of a single one - yuck! 836 */ 837 const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 838 839 /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 840 found = 0; 841 for (i = 0; names[i] != NULL && !found; i++) { 842 cp = val = getenv(names[i]); 843 while (cp != NULL && *cp != '\0') { 844 if ((cp2 = strchr(cp, ' ')) != NULL) 845 *cp2 = '\0'; 846 847 n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 848 if (n == 3 && bus == b && slot == s && func == f) { 849 found = 1; 850 break; 851 } 852 853 if (cp2 != NULL) 854 *cp2++ = ' '; 855 856 cp = cp2; 857 } 858 freeenv(val); 859 } 860 return (found); 861} 862 863void * 864vm_iommu_domain(struct vm *vm) 865{ 866 867 return (vm->iommu); 868} 869 870int 871vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state) 872{ 873 int error; 874 struct vcpu *vcpu; 875 876 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 877 panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 878 879 vcpu = &vm->vcpu[vcpuid]; 880 881 vcpu_lock(vcpu); 882 883 /* 884 * The following state transitions are allowed: 885 * IDLE -> RUNNING -> IDLE 886 * IDLE -> CANNOT_RUN -> IDLE 887 */ 888 if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) || 889 (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) { 890 error = 0; 891 vcpu->state = state; 892 } else { 893 error = EBUSY; 894 } 895 896 vcpu_unlock(vcpu); 897 898 return (error); 899} 900 901enum vcpu_state 902vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 903{ 904 struct vcpu *vcpu; 905 enum vcpu_state state; 906 907 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 908 panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 909 910 vcpu = &vm->vcpu[vcpuid]; 911 912 vcpu_lock(vcpu); 913 state = vcpu->state; 914 if (hostcpu != NULL) 915 *hostcpu = vcpu->hostcpu; 916 vcpu_unlock(vcpu); 917 918 return (state); 919} 920 921void 922vm_activate_cpu(struct vm *vm, int vcpuid) 923{ 924 925 if (vcpuid >= 0 && vcpuid < VM_MAXCPU) 926 CPU_SET(vcpuid, &vm->active_cpus); 927} 928 929cpuset_t 930vm_active_cpus(struct vm *vm) 931{ 932 933 return (vm->active_cpus); 934} 935 936void * 937vcpu_stats(struct vm *vm, int vcpuid) 938{ 939 940 return (vm->vcpu[vcpuid].stats); 941} 942 943int 944vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 945{ 946 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 947 return (EINVAL); 948 949 *state = vm->vcpu[vcpuid].x2apic_state; 950 951 return (0); 952} 953 954int 955vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 956{ 957 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 958 return (EINVAL); 959 960 if (state >= X2APIC_STATE_LAST) 961 return (EINVAL); 962 963 vm->vcpu[vcpuid].x2apic_state = state; 964 965 vlapic_set_x2apic_state(vm, vcpuid, state); 966 967 return (0); 968} 969 970void 971vm_interrupt_hostcpu(struct vm *vm, int vcpuid) 972{ 973 int hostcpu; 974 struct vcpu *vcpu; 975 976 vcpu = &vm->vcpu[vcpuid]; 977 978 vcpu_lock(vcpu); 979 hostcpu = vcpu->hostcpu; 980 if (hostcpu == NOCPU) { 981 /* 982 * If the vcpu is 'RUNNING' but without a valid 'hostcpu' then 983 * the host thread must be sleeping waiting for an event to 984 * kick the vcpu out of 'hlt'. 985 * 986 * XXX this is racy because the condition exists right before 987 * and after calling VMRUN() in vm_run(). The wakeup() is 988 * benign in this case. 989 */ 990 if (vcpu->state == VCPU_RUNNING) 991 wakeup_one(vcpu); 992 } else { 993 if (vcpu->state != VCPU_RUNNING) 994 panic("invalid vcpu state %d", vcpu->state); 995 if (hostcpu != curcpu) 996 ipi_cpu(hostcpu, vmm_ipinum); 997 } 998 vcpu_unlock(vcpu); 999} 1000