vmm.c revision 262349
1124524Sume/*- 266776Skris * Copyright (c) 2011 NetApp, Inc. 355163Sshin * All rights reserved. 455163Sshin * 5222732Shrs * Redistribution and use in source and binary forms, with or without 655163Sshin * modification, are permitted provided that the following conditions 762632Skris * are met: 855163Sshin * 1. Redistributions of source code must retain the above copyright 955163Sshin * notice, this list of conditions and the following disclaimer. 1055163Sshin * 2. Redistributions in binary form must reproduce the above copyright 1155163Sshin * notice, this list of conditions and the following disclaimer in the 1255163Sshin * documentation and/or other materials provided with the distribution. 1355163Sshin * 1455163Sshin * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 1555163Sshin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1655163Sshin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1755163Sshin * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 1855163Sshin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1962632Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2055163Sshin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2155163Sshin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2255163Sshin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2355163Sshin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2455163Sshin * SUCH DAMAGE. 2555163Sshin * 2655163Sshin * $FreeBSD: stable/10/sys/amd64/vmm/vmm.c 262349 2014-02-22 23:34:39Z jhb $ 2755163Sshin */ 2855163Sshin 2955163Sshin#include <sys/cdefs.h> 3055163Sshin__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm.c 262349 2014-02-22 23:34:39Z jhb $"); 3155163Sshin 3255163Sshin#include <sys/param.h> 3355163Sshin#include <sys/systm.h> 3455163Sshin#include <sys/kernel.h> 3555163Sshin#include <sys/module.h> 3655163Sshin#include <sys/sysctl.h> 3755163Sshin#include <sys/malloc.h> 3866776Skris#include <sys/pcpu.h> 39118661Sume#include <sys/lock.h> 40118661Sume#include <sys/mutex.h> 4155163Sshin#include <sys/proc.h> 4255163Sshin#include <sys/rwlock.h> 4355163Sshin#include <sys/sched.h> 4455163Sshin#include <sys/smp.h> 4555163Sshin#include <sys/systm.h> 46222732Shrs 4755163Sshin#include <vm/vm.h> 48222732Shrs#include <vm/vm_object.h> 4955163Sshin#include <vm/vm_page.h> 5055163Sshin#include <vm/pmap.h> 5155163Sshin#include <vm/vm_map.h> 5255163Sshin#include <vm/vm_extern.h> 5355163Sshin#include <vm/vm_param.h> 5455163Sshin 55222732Shrs#include <machine/cpu.h> 5655163Sshin#include <machine/vm.h> 57119026Sume#include <machine/pcb.h> 5855163Sshin#include <machine/smp.h> 5955163Sshin#include <x86/apicreg.h> 60253970Shrs#include <machine/vmparam.h> 6155163Sshin 6255163Sshin#include <machine/vmm.h> 6355163Sshin#include <machine/vmm_dev.h> 6455163Sshin 6555163Sshin#include "vmm_ktr.h" 6655163Sshin#include "vmm_host.h" 6755163Sshin#include "vmm_mem.h" 6855163Sshin#include "vmm_util.h" 6955163Sshin#include "vhpet.h" 7055163Sshin#include "vioapic.h" 7155163Sshin#include "vlapic.h" 7255163Sshin#include "vmm_msr.h" 73119026Sume#include "vmm_ipi.h" 7455163Sshin#include "vmm_stat.h" 7562632Skris#include "vmm_lapic.h" 76225520Shrs 77222732Shrs#include "io/ppt.h" 78222732Shrs#include "io/iommu.h" 7955163Sshin 80222732Shrsstruct vlapic; 81204407Suqs 82204407Suqsstruct vcpu { 83222732Shrs int flags; 84204407Suqs enum vcpu_state state; 8555163Sshin struct mtx mtx; 86225520Shrs int hostcpu; /* host cpuid this vcpu last ran on */ 87225520Shrs uint64_t guest_msrs[VMM_MSR_NUM]; 88222732Shrs struct vlapic *vlapic; 89173412Skevlo int vcpuid; 90222861Shrs struct savefpu *guestfpu; /* guest fpu state */ 91225520Shrs void *stats; 92225520Shrs struct vm_exit exitinfo; 93225520Shrs enum x2apic_state x2apic_state; 94118661Sume int nmi_pending; 95222732Shrs}; 96225520Shrs 97225520Shrs#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 98222732Shrs#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 99222732Shrs#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 100222732Shrs#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 101222732Shrs 102222732Shrsstruct mem_seg { 103222732Shrs vm_paddr_t gpa; 104222732Shrs size_t len; 105222732Shrs boolean_t wired; 106222732Shrs vm_object_t object; 107222732Shrs}; 108222732Shrs#define VM_MAX_MEMORY_SEGMENTS 2 109222732Shrs 110222732Shrsstruct vm { 111222732Shrs void *cookie; /* processor-specific data */ 112222732Shrs void *iommu; /* iommu-specific data */ 113222732Shrs struct vhpet *vhpet; /* virtual HPET */ 114222732Shrs struct vioapic *vioapic; /* virtual ioapic */ 115222732Shrs struct vmspace *vmspace; /* guest's address space */ 11655163Sshin struct vcpu vcpu[VM_MAXCPU]; 117124524Sume int num_mem_segs; 11855163Sshin struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; 119119026Sume char name[VM_MAX_NAMELEN]; 120119026Sume 121119026Sume /* 12255163Sshin * Set of active vcpus. 12355163Sshin * An active vcpu is one that has been started implicitly (BSP) or 12462632Skris * explicitly (AP) by sending it a startup ipi. 125118664Sume */ 12662632Skris cpuset_t active_cpus; 127118660Sume}; 128118664Sume 129222732Shrsstatic int vmm_initialized; 13062632Skris 131118664Sumestatic struct vmm_ops *ops; 132118660Sume#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) 133118664Sume#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 134222732Shrs#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) 13562632Skris 13655163Sshin#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 137118660Sume#define VMRUN(vmi, vcpu, rip, pmap) \ 138222732Shrs (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap) : ENXIO) 13955163Sshin#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 14055163Sshin#define VMSPACE_ALLOC(min, max) \ 14155163Sshin (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 14255163Sshin#define VMSPACE_FREE(vmspace) \ 14362632Skris (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 144118664Sume#define VMGETREG(vmi, vcpu, num, retval) \ 145118660Sume (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 146118664Sume#define VMSETREG(vmi, vcpu, num, val) \ 14762632Skris (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 14862632Skris#define VMGETDESC(vmi, vcpu, num, desc) \ 14955163Sshin (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 150222732Shrs#define VMSETDESC(vmi, vcpu, num, desc) \ 15155163Sshin (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 15262632Skris#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \ 153118664Sume (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO) 154118660Sume#define VMGETCAP(vmi, vcpu, num, retval) \ 155118664Sume (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 15662632Skris#define VMSETCAP(vmi, vcpu, num, val) \ 15762632Skris (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 15855163Sshin 15955163Sshin#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 16055163Sshin#define fpu_stop_emulating() clts() 16155163Sshin 16255163Sshinstatic MALLOC_DEFINE(M_VM, "vm", "vm"); 163118664SumeCTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 164118660Sume 165118664Sume/* statistics */ 16655163Sshinstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 16755163Sshin 16855163Sshinstatic void 16955163Sshinvcpu_cleanup(struct vcpu *vcpu) 17055163Sshin{ 17155163Sshin vlapic_cleanup(vcpu->vlapic); 17255163Sshin vmm_stat_free(vcpu->stats); 17355163Sshin fpu_save_area_free(vcpu->guestfpu); 17455163Sshin} 17555163Sshin 17655163Sshinstatic void 17755163Sshinvcpu_init(struct vm *vm, uint32_t vcpu_id) 17855163Sshin{ 17955163Sshin struct vcpu *vcpu; 18055163Sshin 18155163Sshin vcpu = &vm->vcpu[vcpu_id]; 18262632Skris 18355163Sshin vcpu_lock_init(vcpu); 184222732Shrs vcpu->hostcpu = NOCPU; 18555163Sshin vcpu->vcpuid = vcpu_id; 18655163Sshin vcpu->vlapic = vlapic_init(vm, vcpu_id); 18755163Sshin vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); 188222732Shrs vcpu->guestfpu = fpu_save_area_alloc(); 18955163Sshin fpu_save_area_reset(vcpu->guestfpu); 190118664Sume vcpu->stats = vmm_stat_alloc(); 191118664Sume} 192118664Sume 193204407Suqsstruct vm_exit * 194119026Sumevm_exitinfo(struct vm *vm, int cpuid) 19555163Sshin{ 196119026Sume struct vcpu *vcpu; 197222732Shrs 198119026Sume if (cpuid < 0 || cpuid >= VM_MAXCPU) 199119026Sume panic("vm_exitinfo: invalid cpuid %d", cpuid); 200222732Shrs 201222732Shrs vcpu = &vm->vcpu[cpuid]; 20255163Sshin 20355163Sshin return (&vcpu->exitinfo); 20455163Sshin} 20555163Sshin 20655163Sshinstatic void 20755163Sshinvmm_resume(void) 208254462Shrs{ 20955163Sshin VMM_RESUME(); 210222732Shrs} 21155163Sshin 21255163Sshinstatic int 213118664Sumevmm_init(void) 214118664Sume{ 215118664Sume int error; 216118664Sume 217118664Sume vmm_host_state_init(); 21855163Sshin vmm_ipi_init(); 219118664Sume 220118664Sume error = vmm_mem_init(); 221222732Shrs if (error) 22255163Sshin return (error); 223222732Shrs 22455163Sshin if (vmm_is_intel()) 22555163Sshin ops = &vmm_ops_intel; 22655163Sshin else if (vmm_is_amd()) 22755163Sshin ops = &vmm_ops_amd; 22855163Sshin else 229118660Sume return (ENXIO); 230222732Shrs 23155163Sshin vmm_msr_init(); 23255163Sshin vmm_resume_p = vmm_resume; 23355163Sshin 234222732Shrs return (VMM_INIT()); 23555163Sshin} 23655163Sshin 23755163Sshinstatic int 23855163Sshinvmm_handler(module_t mod, int what, void *arg) 23955163Sshin{ 240253376Skevlo int error; 241222732Shrs 242222732Shrs switch (what) { 24355163Sshin case MOD_LOAD: 24455163Sshin vmmdev_init(); 245222732Shrs iommu_init(); 246119026Sume error = vmm_init(); 247118661Sume if (error == 0) 248119026Sume vmm_initialized = 1; 249222861Shrs break; 250222732Shrs case MOD_UNLOAD: 251222732Shrs error = vmmdev_cleanup(); 252222732Shrs if (error == 0) { 253222732Shrs vmm_resume_p = NULL; 254222732Shrs iommu_cleanup(); 255222732Shrs vmm_ipi_cleanup(); 256222732Shrs error = VMM_CLEANUP(); 257222732Shrs /* 258222732Shrs * Something bad happened - prevent new 259253970Shrs * VMs from being created 260253970Shrs */ 261222861Shrs if (error) 262222861Shrs vmm_initialized = 0; 26355163Sshin } 264119026Sume break; 265119026Sume default: 266119026Sume error = 0; 267222732Shrs break; 268118660Sume } 26955163Sshin return (error); 27055163Sshin} 27155163Sshin 27255163Sshinstatic moduledata_t vmm_kmod = { 273118664Sume "vmm", 274118664Sume vmm_handler, 27555163Sshin NULL 27655163Sshin}; 27755163Sshin 278254462Shrs/* 27955163Sshin * vmm initialization has the following dependencies: 28055163Sshin * 28155163Sshin * - iommu initialization must happen after the pci passthru driver has had 28255163Sshin * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 28355163Sshin * 284254462Shrs * - VT-x initialization requires smp_rendezvous() and therefore must happen 28555163Sshin * after SMP is fully functional (after SI_SUB_SMP). 28655163Sshin */ 28755163SshinDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 288118664SumeMODULE_VERSION(vmm, 1); 289118664Sume 29055163SshinSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 29155163Sshin 29255163Sshinint 293118664Sumevm_create(const char *name, struct vm **retvm) 294118664Sume{ 29555163Sshin int i; 29655163Sshin struct vm *vm; 29755163Sshin struct vmspace *vmspace; 298222732Shrs 299118906Sume const int BSP = 0; 300222732Shrs 30155163Sshin /* 30255163Sshin * If vmm.ko could not be successfully initialized then don't attempt 30355163Sshin * to create the virtual machine. 30455163Sshin */ 30555163Sshin if (!vmm_initialized) 30655163Sshin return (ENXIO); 307118906Sume 308118906Sume if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 309118906Sume return (EINVAL); 310118906Sume 311118660Sume vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 312118664Sume if (vmspace == NULL) 313118664Sume return (ENOMEM); 314225520Shrs 315118664Sume vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 31655163Sshin strcpy(vm->name, name); 31755163Sshin vm->cookie = VMINIT(vm, vmspace_pmap(vmspace)); 31855163Sshin vm->vioapic = vioapic_init(vm); 31955163Sshin vm->vhpet = vhpet_init(vm); 320118906Sume 321118664Sume for (i = 0; i < VM_MAXCPU; i++) { 322118664Sume vcpu_init(vm, i); 323225520Shrs guest_msrs_init(vm, i); 324118664Sume } 32555163Sshin 32655163Sshin vm_activate_cpu(vm, BSP); 32755163Sshin vm->vmspace = vmspace; 32855163Sshin 329118906Sume *retvm = vm; 330118664Sume return (0); 331118664Sume} 332118664Sume 333225520Shrsstatic void 334118664Sumevm_free_mem_seg(struct vm *vm, struct mem_seg *seg) 33555163Sshin{ 33655163Sshin 33755163Sshin if (seg->object != NULL) 33855163Sshin vmm_mem_free(vm->vmspace, seg->gpa, seg->len); 339118906Sume 340118664Sume bzero(seg, sizeof(*seg)); 341118664Sume} 342225520Shrs 343118664Sumevoid 34455163Sshinvm_destroy(struct vm *vm) 34555163Sshin{ 34655163Sshin int i; 34755163Sshin 34855163Sshin ppt_unassign_all(vm); 34955163Sshin 350118906Sume if (vm->iommu != NULL) 351118664Sume iommu_destroy_domain(vm->iommu); 352118664Sume 353225520Shrs vhpet_cleanup(vm->vhpet); 354118664Sume vioapic_cleanup(vm->vioapic); 35555163Sshin 35655163Sshin for (i = 0; i < vm->num_mem_segs; i++) 35755163Sshin vm_free_mem_seg(vm, &vm->mem_segs[i]); 358118660Sume 359118664Sume vm->num_mem_segs = 0; 360225520Shrs 361118664Sume for (i = 0; i < VM_MAXCPU; i++) 36255163Sshin vcpu_cleanup(&vm->vcpu[i]); 363118661Sume 364118661Sume VMSPACE_FREE(vm->vmspace); 365118661Sume 366118661Sume VMCLEANUP(vm->cookie); 367118661Sume 368118661Sume free(vm, M_VM); 369118661Sume} 370118661Sume 371118661Sumeconst char * 372118661Sumevm_name(struct vm *vm) 373118661Sume{ 374118661Sume return (vm->name); 375118661Sume} 376118661Sume 377222732Shrsint 378118661Sumevm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 379253970Shrs{ 380222861Shrs vm_object_t obj; 381222861Shrs 382222861Shrs if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 383222861Shrs return (ENOMEM); 384222861Shrs else 385222861Shrs return (0); 386225520Shrs} 387225520Shrs 388222861Shrsint 389222861Shrsvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 390222861Shrs{ 391222861Shrs 392222732Shrs vmm_mmio_free(vm->vmspace, gpa, len); 393222732Shrs return (0); 394222732Shrs} 395222732Shrs 396222732Shrsboolean_t 397222732Shrsvm_mem_allocated(struct vm *vm, vm_paddr_t gpa) 398222732Shrs{ 399222732Shrs int i; 400222732Shrs vm_paddr_t gpabase, gpalimit; 401222732Shrs 402222732Shrs for (i = 0; i < vm->num_mem_segs; i++) { 403222732Shrs gpabase = vm->mem_segs[i].gpa; 404222732Shrs gpalimit = gpabase + vm->mem_segs[i].len; 405222732Shrs if (gpa >= gpabase && gpa < gpalimit) 406222732Shrs return (TRUE); /* 'gpa' is regular memory */ 407222732Shrs } 408222732Shrs 409222732Shrs if (ppt_is_mmio(vm, gpa)) 410222732Shrs return (TRUE); /* 'gpa' is pci passthru mmio */ 411222732Shrs 412222732Shrs return (FALSE); 413222732Shrs} 414222732Shrs 415222732Shrsint 416225520Shrsvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 417222732Shrs{ 418222732Shrs int available, allocated; 419222732Shrs struct mem_seg *seg; 420254462Shrs vm_object_t object; 421222732Shrs vm_paddr_t g; 422222732Shrs 423225520Shrs if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 424222732Shrs return (EINVAL); 425222732Shrs 426222732Shrs available = allocated = 0; 427222732Shrs g = gpa; 428225520Shrs while (g < gpa + len) { 429222732Shrs if (vm_mem_allocated(vm, g)) 430222732Shrs allocated++; 431222732Shrs else 432222732Shrs available++; 433222732Shrs 434222732Shrs g += PAGE_SIZE; 435222732Shrs } 436222732Shrs 437222732Shrs /* 438222732Shrs * If there are some allocated and some available pages in the address 439222732Shrs * range then it is an error. 440222732Shrs */ 441222732Shrs if (allocated && available) 442222732Shrs return (EINVAL); 443222732Shrs 444222732Shrs /* 445222732Shrs * If the entire address range being requested has already been 446222732Shrs * allocated then there isn't anything more to do. 447222732Shrs */ 448222732Shrs if (allocated && available == 0) 449222732Shrs return (0); 450222732Shrs 451222861Shrs if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 452222861Shrs return (E2BIG); 453222861Shrs 454222861Shrs seg = &vm->mem_segs[vm->num_mem_segs]; 455222861Shrs 456222861Shrs if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) 457222861Shrs return (ENOMEM); 458222861Shrs 459222861Shrs seg->gpa = gpa; 460222861Shrs seg->len = len; 461222861Shrs seg->object = object; 462222861Shrs seg->wired = FALSE; 463222861Shrs 464222861Shrs vm->num_mem_segs++; 465222861Shrs 466222861Shrs return (0); 467222861Shrs} 468222732Shrs 469222732Shrsstatic void 470222861Shrsvm_gpa_unwire(struct vm *vm) 471222861Shrs{ 472222732Shrs int i, rv; 473222861Shrs struct mem_seg *seg; 474222861Shrs 475253970Shrs for (i = 0; i < vm->num_mem_segs; i++) { 476222732Shrs seg = &vm->mem_segs[i]; 477222861Shrs if (!seg->wired) 478222861Shrs continue; 479222861Shrs 480222732Shrs rv = vm_map_unwire(&vm->vmspace->vm_map, 481222732Shrs seg->gpa, seg->gpa + seg->len, 482222732Shrs VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 483222732Shrs KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " 484222732Shrs "%#lx/%ld could not be unwired: %d", 485222732Shrs vm_name(vm), seg->gpa, seg->len, rv)); 486222732Shrs 487222732Shrs seg->wired = FALSE; 488222732Shrs } 489222732Shrs} 490222732Shrs 491222732Shrsstatic int 492225520Shrsvm_gpa_wire(struct vm *vm) 493222732Shrs{ 494222732Shrs int i, rv; 495222732Shrs struct mem_seg *seg; 496222732Shrs 497222732Shrs for (i = 0; i < vm->num_mem_segs; i++) { 498222732Shrs seg = &vm->mem_segs[i]; 499222732Shrs if (seg->wired) 500222732Shrs continue; 501222732Shrs 502222732Shrs /* XXX rlimits? */ 503222732Shrs rv = vm_map_wire(&vm->vmspace->vm_map, 504222732Shrs seg->gpa, seg->gpa + seg->len, 505222732Shrs VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 506222732Shrs if (rv != KERN_SUCCESS) 507222732Shrs break; 508222732Shrs 509222732Shrs seg->wired = TRUE; 510222861Shrs } 511222861Shrs 512222861Shrs if (i < vm->num_mem_segs) { 513222861Shrs /* 514222861Shrs * Undo the wiring before returning an error. 515222861Shrs */ 516222861Shrs vm_gpa_unwire(vm); 517222861Shrs return (EAGAIN); 518222861Shrs } 519222861Shrs 520222861Shrs return (0); 521222861Shrs} 522222861Shrs 523222861Shrsstatic void 524222861Shrsvm_iommu_modify(struct vm *vm, boolean_t map) 525222861Shrs{ 526222861Shrs int i, sz; 527222732Shrs vm_paddr_t gpa, hpa; 528222732Shrs struct mem_seg *seg; 529222861Shrs void *vp, *cookie, *host_domain; 530222861Shrs 531222732Shrs sz = PAGE_SIZE; 532222861Shrs host_domain = iommu_host_domain(); 533222861Shrs 534253970Shrs for (i = 0; i < vm->num_mem_segs; i++) { 535222732Shrs seg = &vm->mem_segs[i]; 536222861Shrs KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", 537222861Shrs vm_name(vm), seg->gpa, seg->len)); 538222861Shrs 539222732Shrs gpa = seg->gpa; 540222732Shrs while (gpa < seg->gpa + seg->len) { 541222732Shrs vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, 542222732Shrs &cookie); 543222732Shrs KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 544222732Shrs vm_name(vm), gpa)); 545222732Shrs 546222732Shrs vm_gpa_release(cookie); 547222732Shrs 548222861Shrs hpa = DMAP_TO_PHYS((uintptr_t)vp); 549222861Shrs if (map) { 550222861Shrs iommu_create_mapping(vm->iommu, gpa, hpa, sz); 551222732Shrs iommu_remove_mapping(host_domain, hpa, sz); 55255163Sshin } else { 55355163Sshin iommu_remove_mapping(vm->iommu, gpa, sz); 554118664Sume iommu_create_mapping(host_domain, hpa, hpa, sz); 555118664Sume } 556118664Sume 557118664Sume gpa += PAGE_SIZE; 558118664Sume } 559118664Sume } 560118664Sume 561118664Sume /* 562118664Sume * Invalidate the cached translations associated with the domain 56355163Sshin * from which pages were removed. 56455163Sshin */ 565118661Sume if (map) 566222732Shrs iommu_invalidate_tlb(host_domain); 567222732Shrs else 568222732Shrs iommu_invalidate_tlb(vm->iommu); 569222732Shrs} 570222732Shrs 571222732Shrs#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 572222732Shrs#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 573222732Shrs 574222732Shrsint 575222861Shrsvm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 576222732Shrs{ 577253970Shrs int error; 578225520Shrs 579225520Shrs error = ppt_unassign_device(vm, bus, slot, func); 580225520Shrs if (error) 581225520Shrs return (error); 582225520Shrs 583222732Shrs if (ppt_num_devices(vm) == 0) { 584222732Shrs vm_iommu_unmap(vm); 585222732Shrs vm_gpa_unwire(vm); 586222732Shrs } 587253970Shrs return (0); 588222732Shrs} 589222861Shrs 590222861Shrsint 591222861Shrsvm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 592222861Shrs{ 593222861Shrs int error; 594222861Shrs vm_paddr_t maxaddr; 595222861Shrs 596222861Shrs /* 597222861Shrs * Virtual machines with pci passthru devices get special treatment: 598253970Shrs * - the guest physical memory is wired 599222861Shrs * - the iommu is programmed to do the 'gpa' to 'hpa' translation 600222861Shrs * 601222861Shrs * We need to do this before the first pci passthru device is attached. 602222861Shrs */ 603222861Shrs if (ppt_num_devices(vm) == 0) { 604222861Shrs KASSERT(vm->iommu == NULL, 605222861Shrs ("vm_assign_pptdev: iommu must be NULL")); 606222861Shrs maxaddr = vmm_mem_maxaddr(); 607222861Shrs vm->iommu = iommu_create_domain(maxaddr); 608222861Shrs 609222861Shrs error = vm_gpa_wire(vm); 610222861Shrs if (error) 611222861Shrs return (error); 612222861Shrs 613222861Shrs vm_iommu_map(vm); 614222861Shrs } 615222861Shrs 616222861Shrs error = ppt_assign_device(vm, bus, slot, func); 617222861Shrs return (error); 618222732Shrs} 619222861Shrs 620253970Shrsvoid * 621222861Shrsvm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 622222861Shrs void **cookie) 623222861Shrs{ 624222861Shrs int count, pageoff; 625222861Shrs vm_page_t m; 626222861Shrs 627222861Shrs pageoff = gpa & PAGE_MASK; 628222861Shrs if (len > PAGE_SIZE - pageoff) 629222861Shrs panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 630222861Shrs 631222861Shrs count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 632222861Shrs trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 633222861Shrs 634222861Shrs if (count == 1) { 635222861Shrs *cookie = m; 636222861Shrs return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 637222861Shrs } else { 638222861Shrs *cookie = NULL; 639222861Shrs return (NULL); 640222861Shrs } 641222861Shrs} 642222861Shrs 643222861Shrsvoid 644222861Shrsvm_gpa_release(void *cookie) 645222861Shrs{ 646222861Shrs vm_page_t m = cookie; 647222861Shrs 648222861Shrs vm_page_lock(m); 649222861Shrs vm_page_unhold(m); 650222861Shrs vm_page_unlock(m); 651222861Shrs} 652222861Shrs 653222861Shrsint 654222861Shrsvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 655222861Shrs struct vm_memory_segment *seg) 656222861Shrs{ 657222732Shrs int i; 658222861Shrs 659222861Shrs for (i = 0; i < vm->num_mem_segs; i++) { 660222861Shrs if (gpabase == vm->mem_segs[i].gpa) { 661222732Shrs seg->gpa = vm->mem_segs[i].gpa; 662222732Shrs seg->len = vm->mem_segs[i].len; 663222861Shrs seg->wired = vm->mem_segs[i].wired; 664222732Shrs return (0); 665222861Shrs } 666222732Shrs } 667222861Shrs return (-1); 668222861Shrs} 669225520Shrs 670225520Shrsint 671225520Shrsvm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 672225520Shrs vm_offset_t *offset, struct vm_object **object) 673222732Shrs{ 674225520Shrs int i; 675225520Shrs size_t seg_len; 676225520Shrs vm_paddr_t seg_gpa; 677225520Shrs vm_object_t seg_obj; 678225520Shrs 679225520Shrs for (i = 0; i < vm->num_mem_segs; i++) { 680225520Shrs if ((seg_obj = vm->mem_segs[i].object) == NULL) 681225520Shrs continue; 682225520Shrs 683225520Shrs seg_gpa = vm->mem_segs[i].gpa; 684225520Shrs seg_len = vm->mem_segs[i].len; 685225520Shrs 686225520Shrs if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { 687225520Shrs *offset = gpa - seg_gpa; 688225520Shrs *object = seg_obj; 689225520Shrs vm_object_reference(seg_obj); 690225520Shrs return (0); 691225520Shrs } 692225520Shrs } 693225520Shrs 694225520Shrs return (EINVAL); 695225520Shrs} 696225520Shrs 697225520Shrsint 698225520Shrsvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 699225520Shrs{ 700225520Shrs 701225520Shrs if (vcpu < 0 || vcpu >= VM_MAXCPU) 702225520Shrs return (EINVAL); 703225520Shrs 704225520Shrs if (reg >= VM_REG_LAST) 705225520Shrs return (EINVAL); 706225520Shrs 707225520Shrs return (VMGETREG(vm->cookie, vcpu, reg, retval)); 708225520Shrs} 709225520Shrs 710225520Shrsint 711225520Shrsvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 712222732Shrs{ 713225520Shrs 714225520Shrs if (vcpu < 0 || vcpu >= VM_MAXCPU) 715222732Shrs return (EINVAL); 716225520Shrs 717222732Shrs if (reg >= VM_REG_LAST) 718225520Shrs return (EINVAL); 719222732Shrs 720225520Shrs return (VMSETREG(vm->cookie, vcpu, reg, val)); 721225520Shrs} 722225520Shrs 723225520Shrsstatic boolean_t 724225520Shrsis_descriptor_table(int reg) 725225520Shrs{ 726225520Shrs 727225520Shrs switch (reg) { 728225520Shrs case VM_REG_GUEST_IDTR: 729225520Shrs case VM_REG_GUEST_GDTR: 730225520Shrs return (TRUE); 731225520Shrs default: 732222861Shrs return (FALSE); 733222861Shrs } 734222732Shrs} 735222861Shrs 736222861Shrsstatic boolean_t 737222861Shrsis_segment_register(int reg) 738222732Shrs{ 739225520Shrs 740222732Shrs switch (reg) { 741225520Shrs case VM_REG_GUEST_ES: 742225520Shrs case VM_REG_GUEST_CS: 743225520Shrs case VM_REG_GUEST_SS: 744222732Shrs case VM_REG_GUEST_DS: 745222732Shrs case VM_REG_GUEST_FS: 746222732Shrs case VM_REG_GUEST_GS: 747225520Shrs case VM_REG_GUEST_TR: 748225520Shrs case VM_REG_GUEST_LDTR: 749225520Shrs return (TRUE); 750222861Shrs default: 751222861Shrs return (FALSE); 752222861Shrs } 753225520Shrs} 754222732Shrs 755222732Shrsint 756222861Shrsvm_get_seg_desc(struct vm *vm, int vcpu, int reg, 757222861Shrs struct seg_desc *desc) 758222861Shrs{ 759222861Shrs 760222861Shrs if (vcpu < 0 || vcpu >= VM_MAXCPU) 761222861Shrs return (EINVAL); 762222861Shrs 763222861Shrs if (!is_segment_register(reg) && !is_descriptor_table(reg)) 764222861Shrs return (EINVAL); 765222861Shrs 766222861Shrs return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 767222861Shrs} 768222861Shrs 769222861Shrsint 770222861Shrsvm_set_seg_desc(struct vm *vm, int vcpu, int reg, 771118661Sume struct seg_desc *desc) 772225520Shrs{ 773225520Shrs if (vcpu < 0 || vcpu >= VM_MAXCPU) 774118661Sume return (EINVAL); 775222732Shrs 776222732Shrs if (!is_segment_register(reg) && !is_descriptor_table(reg)) 777222732Shrs return (EINVAL); 778118661Sume 779118661Sume return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 780222732Shrs} 781118661Sume 782118661Sumestatic void 783222732Shrsrestore_guest_fpustate(struct vcpu *vcpu) 784222732Shrs{ 785222732Shrs 786222732Shrs /* flush host state to the pcb */ 787222732Shrs fpuexit(curthread); 788222732Shrs 789222732Shrs /* restore guest FPU state */ 790222732Shrs fpu_stop_emulating(); 791222732Shrs fpurestore(vcpu->guestfpu); 792222732Shrs 793118661Sume /* 794118661Sume * The FPU is now "dirty" with the guest's state so turn on emulation 795118661Sume * to trap any access to the FPU by the host. 796118661Sume */ 797118661Sume fpu_start_emulating(); 798118661Sume} 799222732Shrs 800118661Sumestatic void 801118661Sumesave_guest_fpustate(struct vcpu *vcpu) 802222732Shrs{ 803222732Shrs 804222732Shrs if ((rcr0() & CR0_TS) == 0) 805222732Shrs panic("fpu emulation not enabled in host!"); 806222732Shrs 807222732Shrs /* save guest FPU state */ 808222732Shrs fpu_stop_emulating(); 809222732Shrs fpusave(vcpu->guestfpu); 810222732Shrs fpu_start_emulating(); 811222732Shrs} 812222732Shrs 813222732Shrsstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 814222732Shrs 815222732Shrsstatic int 816222732Shrsvcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 817222732Shrs{ 818222732Shrs int error; 819222732Shrs 820222732Shrs vcpu_assert_locked(vcpu); 821118661Sume 822118661Sume /* 823118661Sume * The following state transitions are allowed: 824118661Sume * IDLE -> FROZEN -> IDLE 825118661Sume * FROZEN -> RUNNING -> FROZEN 826118661Sume * FROZEN -> SLEEPING -> FROZEN 827118661Sume */ 828222732Shrs switch (vcpu->state) { 829118661Sume case VCPU_IDLE: 830118661Sume case VCPU_RUNNING: 831222732Shrs case VCPU_SLEEPING: 832222732Shrs error = (newstate != VCPU_FROZEN); 833222732Shrs break; 834118661Sume case VCPU_FROZEN: 835118661Sume error = (newstate == VCPU_FROZEN); 836118661Sume break; 837118661Sume default: 838118661Sume error = 1; 839118661Sume break; 840118661Sume } 841222732Shrs 842222732Shrs if (error == 0) 843222732Shrs vcpu->state = newstate; 844222732Shrs else 845222732Shrs error = EBUSY; 846222732Shrs 847222732Shrs return (error); 848222732Shrs} 849222732Shrs 850222732Shrsstatic void 851222732Shrsvcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 852222732Shrs{ 853222732Shrs int error; 854222732Shrs 855222732Shrs if ((error = vcpu_set_state(vm, vcpuid, newstate)) != 0) 856222732Shrs panic("Error %d setting state to %d\n", error, newstate); 857222732Shrs} 858222732Shrs 859222732Shrsstatic void 860222732Shrsvcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 861222732Shrs{ 862222732Shrs int error; 863222732Shrs 864222732Shrs if ((error = vcpu_set_state_locked(vcpu, newstate)) != 0) 865118661Sume panic("Error %d setting state to %d", error, newstate); 866222732Shrs} 867222732Shrs 868222732Shrs/* 869222732Shrs * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 870222732Shrs */ 871118661Sumestatic int 872222732Shrsvm_handle_hlt(struct vm *vm, int vcpuid, boolean_t *retu) 873222732Shrs{ 874118661Sume struct vcpu *vcpu; 875118661Sume int sleepticks, t; 876222732Shrs 877118661Sume vcpu = &vm->vcpu[vcpuid]; 878118661Sume 879118661Sume vcpu_lock(vcpu); 880118661Sume 881118661Sume /* 882118661Sume * Figure out the number of host ticks until the next apic 883124524Sume * timer interrupt in the guest. 884118661Sume */ 885118661Sume sleepticks = lapic_timer_tick(vm, vcpuid); 886118661Sume 887118661Sume /* 888118661Sume * If the guest local apic timer is disabled then sleep for 889118661Sume * a long time but not forever. 890118661Sume */ 891118661Sume if (sleepticks < 0) 892118661Sume sleepticks = hz; 893118661Sume 894118661Sume /* 895118661Sume * Do a final check for pending NMI or interrupts before 896118661Sume * really putting this thread to sleep. 897118661Sume * 898118661Sume * These interrupts could have happened any time after we 899118661Sume * returned from VMRUN() and before we grabbed the vcpu lock. 900118661Sume */ 901118661Sume if (!vm_nmi_pending(vm, vcpuid) && lapic_pending_intr(vm, vcpuid) < 0) { 902118661Sume if (sleepticks <= 0) 903118661Sume panic("invalid sleepticks %d", sleepticks); 904118661Sume t = ticks; 905118661Sume vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 906118661Sume msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); 907118661Sume vcpu_require_state_locked(vcpu, VCPU_FROZEN); 908118661Sume vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 909118661Sume } 910118661Sume vcpu_unlock(vcpu); 911118661Sume 912118661Sume return (0); 913118661Sume} 914118661Sume 915118661Sumestatic int 916118661Sumevm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu) 917118661Sume{ 918118661Sume int rv, ftype; 919118661Sume struct vm_map *map; 920118661Sume struct vcpu *vcpu; 921222732Shrs struct vm_exit *vme; 922222732Shrs 923222732Shrs vcpu = &vm->vcpu[vcpuid]; 924222732Shrs vme = &vcpu->exitinfo; 925222732Shrs 926222732Shrs ftype = vme->u.paging.fault_type; 927222732Shrs KASSERT(ftype == VM_PROT_READ || 928222732Shrs ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 929222732Shrs ("vm_handle_paging: invalid fault_type %d", ftype)); 930222732Shrs 931222732Shrs if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 932222732Shrs rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 933222732Shrs vme->u.paging.gpa, ftype); 934222732Shrs if (rv == 0) 935222732Shrs goto done; 936222732Shrs } 937222732Shrs 938222732Shrs map = &vm->vmspace->vm_map; 939222732Shrs rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 940222732Shrs 941222732Shrs VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 942222732Shrs "ftype = %d", rv, vme->u.paging.gpa, ftype); 943222732Shrs 944222732Shrs if (rv != KERN_SUCCESS) 945222732Shrs return (EFAULT); 946222732Shrsdone: 947222732Shrs /* restart execution at the faulting instruction */ 948222732Shrs vme->inst_length = 0; 949222732Shrs 950222732Shrs return (0); 951222732Shrs} 952222732Shrs 953222732Shrsstatic int 954222732Shrsvm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu) 955{ 956 struct vie *vie; 957 struct vcpu *vcpu; 958 struct vm_exit *vme; 959 int error, inst_length; 960 uint64_t rip, gla, gpa, cr3; 961 mem_region_read_t mread; 962 mem_region_write_t mwrite; 963 964 vcpu = &vm->vcpu[vcpuid]; 965 vme = &vcpu->exitinfo; 966 967 rip = vme->rip; 968 inst_length = vme->inst_length; 969 970 gla = vme->u.inst_emul.gla; 971 gpa = vme->u.inst_emul.gpa; 972 cr3 = vme->u.inst_emul.cr3; 973 vie = &vme->u.inst_emul.vie; 974 975 vie_init(vie); 976 977 /* Fetch, decode and emulate the faulting instruction */ 978 if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, vie) != 0) 979 return (EFAULT); 980 981 if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0) 982 return (EFAULT); 983 984 /* return to userland unless this is an in-kernel emulated device */ 985 if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 986 mread = lapic_mmio_read; 987 mwrite = lapic_mmio_write; 988 } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 989 mread = vioapic_mmio_read; 990 mwrite = vioapic_mmio_write; 991 } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 992 mread = vhpet_mmio_read; 993 mwrite = vhpet_mmio_write; 994 } else { 995 *retu = TRUE; 996 return (0); 997 } 998 999 error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, 0); 1000 1001 /* return to userland to spin up the AP */ 1002 if (error == 0 && vme->exitcode == VM_EXITCODE_SPINUP_AP) 1003 *retu = TRUE; 1004 1005 return (error); 1006} 1007 1008int 1009vm_run(struct vm *vm, struct vm_run *vmrun) 1010{ 1011 int error, vcpuid; 1012 struct vcpu *vcpu; 1013 struct pcb *pcb; 1014 uint64_t tscval, rip; 1015 struct vm_exit *vme; 1016 boolean_t retu; 1017 pmap_t pmap; 1018 1019 vcpuid = vmrun->cpuid; 1020 1021 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1022 return (EINVAL); 1023 1024 pmap = vmspace_pmap(vm->vmspace); 1025 vcpu = &vm->vcpu[vcpuid]; 1026 vme = &vcpu->exitinfo; 1027 rip = vmrun->rip; 1028restart: 1029 critical_enter(); 1030 1031 KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1032 ("vm_run: absurd pm_active")); 1033 1034 tscval = rdtsc(); 1035 1036 pcb = PCPU_GET(curpcb); 1037 set_pcb_flags(pcb, PCB_FULL_IRET); 1038 1039 restore_guest_msrs(vm, vcpuid); 1040 restore_guest_fpustate(vcpu); 1041 1042 vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1043 vcpu->hostcpu = curcpu; 1044 error = VMRUN(vm->cookie, vcpuid, rip, pmap); 1045 vcpu->hostcpu = NOCPU; 1046 vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1047 1048 save_guest_fpustate(vcpu); 1049 restore_host_msrs(vm, vcpuid); 1050 1051 vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1052 1053 critical_exit(); 1054 1055 if (error == 0) { 1056 retu = FALSE; 1057 switch (vme->exitcode) { 1058 case VM_EXITCODE_HLT: 1059 error = vm_handle_hlt(vm, vcpuid, &retu); 1060 break; 1061 case VM_EXITCODE_PAGING: 1062 error = vm_handle_paging(vm, vcpuid, &retu); 1063 break; 1064 case VM_EXITCODE_INST_EMUL: 1065 error = vm_handle_inst_emul(vm, vcpuid, &retu); 1066 break; 1067 default: 1068 retu = TRUE; /* handled in userland */ 1069 break; 1070 } 1071 } 1072 1073 if (error == 0 && retu == FALSE) { 1074 rip = vme->rip + vme->inst_length; 1075 goto restart; 1076 } 1077 1078 /* copy the exit information */ 1079 bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1080 return (error); 1081} 1082 1083int 1084vm_inject_event(struct vm *vm, int vcpuid, int type, 1085 int vector, uint32_t code, int code_valid) 1086{ 1087 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1088 return (EINVAL); 1089 1090 if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0) 1091 return (EINVAL); 1092 1093 if (vector < 0 || vector > 255) 1094 return (EINVAL); 1095 1096 return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid)); 1097} 1098 1099static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 1100 1101int 1102vm_inject_nmi(struct vm *vm, int vcpuid) 1103{ 1104 struct vcpu *vcpu; 1105 1106 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1107 return (EINVAL); 1108 1109 vcpu = &vm->vcpu[vcpuid]; 1110 1111 vcpu->nmi_pending = 1; 1112 vm_interrupt_hostcpu(vm, vcpuid); 1113 return (0); 1114} 1115 1116int 1117vm_nmi_pending(struct vm *vm, int vcpuid) 1118{ 1119 struct vcpu *vcpu; 1120 1121 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1122 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1123 1124 vcpu = &vm->vcpu[vcpuid]; 1125 1126 return (vcpu->nmi_pending); 1127} 1128 1129void 1130vm_nmi_clear(struct vm *vm, int vcpuid) 1131{ 1132 struct vcpu *vcpu; 1133 1134 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1135 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1136 1137 vcpu = &vm->vcpu[vcpuid]; 1138 1139 if (vcpu->nmi_pending == 0) 1140 panic("vm_nmi_clear: inconsistent nmi_pending state"); 1141 1142 vcpu->nmi_pending = 0; 1143 vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 1144} 1145 1146int 1147vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 1148{ 1149 if (vcpu < 0 || vcpu >= VM_MAXCPU) 1150 return (EINVAL); 1151 1152 if (type < 0 || type >= VM_CAP_MAX) 1153 return (EINVAL); 1154 1155 return (VMGETCAP(vm->cookie, vcpu, type, retval)); 1156} 1157 1158int 1159vm_set_capability(struct vm *vm, int vcpu, int type, int val) 1160{ 1161 if (vcpu < 0 || vcpu >= VM_MAXCPU) 1162 return (EINVAL); 1163 1164 if (type < 0 || type >= VM_CAP_MAX) 1165 return (EINVAL); 1166 1167 return (VMSETCAP(vm->cookie, vcpu, type, val)); 1168} 1169 1170uint64_t * 1171vm_guest_msrs(struct vm *vm, int cpu) 1172{ 1173 return (vm->vcpu[cpu].guest_msrs); 1174} 1175 1176struct vlapic * 1177vm_lapic(struct vm *vm, int cpu) 1178{ 1179 return (vm->vcpu[cpu].vlapic); 1180} 1181 1182struct vioapic * 1183vm_ioapic(struct vm *vm) 1184{ 1185 1186 return (vm->vioapic); 1187} 1188 1189struct vhpet * 1190vm_hpet(struct vm *vm) 1191{ 1192 1193 return (vm->vhpet); 1194} 1195 1196boolean_t 1197vmm_is_pptdev(int bus, int slot, int func) 1198{ 1199 int found, i, n; 1200 int b, s, f; 1201 char *val, *cp, *cp2; 1202 1203 /* 1204 * XXX 1205 * The length of an environment variable is limited to 128 bytes which 1206 * puts an upper limit on the number of passthru devices that may be 1207 * specified using a single environment variable. 1208 * 1209 * Work around this by scanning multiple environment variable 1210 * names instead of a single one - yuck! 1211 */ 1212 const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 1213 1214 /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 1215 found = 0; 1216 for (i = 0; names[i] != NULL && !found; i++) { 1217 cp = val = getenv(names[i]); 1218 while (cp != NULL && *cp != '\0') { 1219 if ((cp2 = strchr(cp, ' ')) != NULL) 1220 *cp2 = '\0'; 1221 1222 n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 1223 if (n == 3 && bus == b && slot == s && func == f) { 1224 found = 1; 1225 break; 1226 } 1227 1228 if (cp2 != NULL) 1229 *cp2++ = ' '; 1230 1231 cp = cp2; 1232 } 1233 freeenv(val); 1234 } 1235 return (found); 1236} 1237 1238void * 1239vm_iommu_domain(struct vm *vm) 1240{ 1241 1242 return (vm->iommu); 1243} 1244 1245int 1246vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1247{ 1248 int error; 1249 struct vcpu *vcpu; 1250 1251 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1252 panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 1253 1254 vcpu = &vm->vcpu[vcpuid]; 1255 1256 vcpu_lock(vcpu); 1257 error = vcpu_set_state_locked(vcpu, newstate); 1258 vcpu_unlock(vcpu); 1259 1260 return (error); 1261} 1262 1263enum vcpu_state 1264vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 1265{ 1266 struct vcpu *vcpu; 1267 enum vcpu_state state; 1268 1269 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1270 panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 1271 1272 vcpu = &vm->vcpu[vcpuid]; 1273 1274 vcpu_lock(vcpu); 1275 state = vcpu->state; 1276 if (hostcpu != NULL) 1277 *hostcpu = vcpu->hostcpu; 1278 vcpu_unlock(vcpu); 1279 1280 return (state); 1281} 1282 1283void 1284vm_activate_cpu(struct vm *vm, int vcpuid) 1285{ 1286 1287 if (vcpuid >= 0 && vcpuid < VM_MAXCPU) 1288 CPU_SET(vcpuid, &vm->active_cpus); 1289} 1290 1291cpuset_t 1292vm_active_cpus(struct vm *vm) 1293{ 1294 1295 return (vm->active_cpus); 1296} 1297 1298void * 1299vcpu_stats(struct vm *vm, int vcpuid) 1300{ 1301 1302 return (vm->vcpu[vcpuid].stats); 1303} 1304 1305int 1306vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 1307{ 1308 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1309 return (EINVAL); 1310 1311 *state = vm->vcpu[vcpuid].x2apic_state; 1312 1313 return (0); 1314} 1315 1316int 1317vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1318{ 1319 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1320 return (EINVAL); 1321 1322 if (state >= X2APIC_STATE_LAST) 1323 return (EINVAL); 1324 1325 vm->vcpu[vcpuid].x2apic_state = state; 1326 1327 vlapic_set_x2apic_state(vm, vcpuid, state); 1328 1329 return (0); 1330} 1331 1332void 1333vm_interrupt_hostcpu(struct vm *vm, int vcpuid) 1334{ 1335 int hostcpu; 1336 struct vcpu *vcpu; 1337 1338 vcpu = &vm->vcpu[vcpuid]; 1339 1340 vcpu_lock(vcpu); 1341 hostcpu = vcpu->hostcpu; 1342 if (hostcpu == NOCPU) { 1343 if (vcpu->state == VCPU_SLEEPING) 1344 wakeup_one(vcpu); 1345 } else { 1346 if (vcpu->state != VCPU_RUNNING) 1347 panic("invalid vcpu state %d", vcpu->state); 1348 if (hostcpu != curcpu) 1349 ipi_cpu(hostcpu, vmm_ipinum); 1350 } 1351 vcpu_unlock(vcpu); 1352} 1353 1354struct vmspace * 1355vm_get_vmspace(struct vm *vm) 1356{ 1357 1358 return (vm->vmspace); 1359} 1360 1361int 1362vm_apicid2vcpuid(struct vm *vm, int apicid) 1363{ 1364 /* 1365 * XXX apic id is assumed to be numerically identical to vcpu id 1366 */ 1367 return (apicid); 1368} 1369