1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD$ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD$"); 31221828Sgrehan 32221828Sgrehan#include <sys/param.h> 33234695Sgrehan#include <sys/systm.h> 34221828Sgrehan#include <sys/kernel.h> 35221828Sgrehan#include <sys/module.h> 36221828Sgrehan#include <sys/sysctl.h> 37221828Sgrehan#include <sys/malloc.h> 38221828Sgrehan#include <sys/pcpu.h> 39221828Sgrehan#include <sys/lock.h> 40221828Sgrehan#include <sys/mutex.h> 41221828Sgrehan#include <sys/proc.h> 42256072Sneel#include <sys/rwlock.h> 43221828Sgrehan#include <sys/sched.h> 44221828Sgrehan#include <sys/smp.h> 45221828Sgrehan#include <sys/systm.h> 46221828Sgrehan 47221828Sgrehan#include <vm/vm.h> 48256072Sneel#include <vm/vm_object.h> 49256072Sneel#include <vm/vm_page.h> 50256072Sneel#include <vm/pmap.h> 51256072Sneel#include <vm/vm_map.h> 52256072Sneel#include <vm/vm_extern.h> 53256072Sneel#include <vm/vm_param.h> 54221828Sgrehan 55261275Sjhb#include <machine/cpu.h> 56221828Sgrehan#include <machine/vm.h> 57221828Sgrehan#include <machine/pcb.h> 58241489Sneel#include <machine/smp.h> 59262350Sjhb#include <x86/psl.h> 60221914Sjhb#include <x86/apicreg.h> 61256072Sneel#include <machine/vmparam.h> 62221828Sgrehan 63221828Sgrehan#include <machine/vmm.h> 64261088Sjhb#include <machine/vmm_dev.h> 65268976Sjhb#include <machine/vmm_instruction_emul.h> 66261088Sjhb 67268976Sjhb#include "vmm_ioport.h" 68256072Sneel#include "vmm_ktr.h" 69242275Sneel#include "vmm_host.h" 70221828Sgrehan#include "vmm_mem.h" 71221828Sgrehan#include "vmm_util.h" 72268891Sjhb#include "vatpic.h" 73268891Sjhb#include "vatpit.h" 74261088Sjhb#include "vhpet.h" 75261088Sjhb#include "vioapic.h" 76221828Sgrehan#include "vlapic.h" 77221828Sgrehan#include "vmm_msr.h" 78221828Sgrehan#include "vmm_ipi.h" 79221828Sgrehan#include "vmm_stat.h" 80242065Sneel#include "vmm_lapic.h" 81221828Sgrehan 82221828Sgrehan#include "io/ppt.h" 83221828Sgrehan#include "io/iommu.h" 84221828Sgrehan 85221828Sgrehanstruct vlapic; 86221828Sgrehan 87270071Sgrehan/* 88270071Sgrehan * Initialization: 89270071Sgrehan * (a) allocated when vcpu is created 90270071Sgrehan * (i) initialized when vcpu is created and when it is reinitialized 91270071Sgrehan * (o) initialized the first time the vcpu is created 92270071Sgrehan * (x) initialized before use 93270071Sgrehan */ 94221828Sgrehanstruct vcpu { 95270071Sgrehan struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ 96270071Sgrehan enum vcpu_state state; /* (o) vcpu state */ 97270071Sgrehan int hostcpu; /* (o) vcpu's host cpu */ 98270071Sgrehan struct vlapic *vlapic; /* (i) APIC device model */ 99270071Sgrehan enum x2apic_state x2apic_state; /* (i) APIC mode */ 100270159Sgrehan uint64_t exitintinfo; /* (i) events pending at VM exit */ 101270071Sgrehan int nmi_pending; /* (i) NMI pending */ 102270071Sgrehan int extint_pending; /* (i) INTR pending */ 103270071Sgrehan struct vm_exception exception; /* (x) exception collateral */ 104270071Sgrehan int exception_pending; /* (i) exception pending */ 105270071Sgrehan struct savefpu *guestfpu; /* (a,i) guest fpu state */ 106270071Sgrehan uint64_t guest_xcr0; /* (i) guest %xcr0 register */ 107270071Sgrehan void *stats; /* (a,i) statistics */ 108270071Sgrehan uint64_t guest_msrs[VMM_MSR_NUM]; /* (i) emulated MSRs */ 109270071Sgrehan struct vm_exit exitinfo; /* (x) exit reason and collateral */ 110221828Sgrehan}; 111221828Sgrehan 112270071Sgrehan#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 113242065Sneel#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 114242065Sneel#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 115242065Sneel#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 116256072Sneel#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 117241489Sneel 118256072Sneelstruct mem_seg { 119256072Sneel vm_paddr_t gpa; 120256072Sneel size_t len; 121256072Sneel boolean_t wired; 122256072Sneel vm_object_t object; 123256072Sneel}; 124221828Sgrehan#define VM_MAX_MEMORY_SEGMENTS 2 125221828Sgrehan 126270071Sgrehan/* 127270071Sgrehan * Initialization: 128270071Sgrehan * (o) initialized the first time the VM is created 129270071Sgrehan * (i) initialized when VM is created and when it is reinitialized 130270071Sgrehan * (x) initialized before use 131270071Sgrehan */ 132221828Sgrehanstruct vm { 133270071Sgrehan void *cookie; /* (i) cpu-specific data */ 134270071Sgrehan void *iommu; /* (x) iommu-specific data */ 135270071Sgrehan struct vhpet *vhpet; /* (i) virtual HPET */ 136270071Sgrehan struct vioapic *vioapic; /* (i) virtual ioapic */ 137270071Sgrehan struct vatpic *vatpic; /* (i) virtual atpic */ 138270071Sgrehan struct vatpit *vatpit; /* (i) virtual atpit */ 139270071Sgrehan volatile cpuset_t active_cpus; /* (i) active vcpus */ 140270071Sgrehan int suspend; /* (i) stop VM execution */ 141270071Sgrehan volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 142270071Sgrehan volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 143270071Sgrehan cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ 144270071Sgrehan cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ 145270071Sgrehan void *rendezvous_arg; /* (x) rendezvous func/arg */ 146270071Sgrehan vm_rendezvous_func_t rendezvous_func; 147270071Sgrehan struct mtx rendezvous_mtx; /* (o) rendezvous lock */ 148270071Sgrehan int num_mem_segs; /* (o) guest memory segments */ 149256072Sneel struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; 150270071Sgrehan struct vmspace *vmspace; /* (o) guest's address space */ 151270071Sgrehan char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 152270071Sgrehan struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ 153221828Sgrehan}; 154221828Sgrehan 155249396Sneelstatic int vmm_initialized; 156249396Sneel 157221828Sgrehanstatic struct vmm_ops *ops; 158266339Sjhb#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) 159221828Sgrehan#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 160261275Sjhb#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) 161221828Sgrehan 162256072Sneel#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 163268935Sjhb#define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \ 164268935Sjhb (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO) 165221828Sgrehan#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 166256072Sneel#define VMSPACE_ALLOC(min, max) \ 167256072Sneel (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 168256072Sneel#define VMSPACE_FREE(vmspace) \ 169256072Sneel (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 170221828Sgrehan#define VMGETREG(vmi, vcpu, num, retval) \ 171221828Sgrehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 172221828Sgrehan#define VMSETREG(vmi, vcpu, num, val) \ 173221828Sgrehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 174221828Sgrehan#define VMGETDESC(vmi, vcpu, num, desc) \ 175221828Sgrehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 176221828Sgrehan#define VMSETDESC(vmi, vcpu, num, desc) \ 177221828Sgrehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 178221828Sgrehan#define VMGETCAP(vmi, vcpu, num, retval) \ 179221828Sgrehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 180221828Sgrehan#define VMSETCAP(vmi, vcpu, num, val) \ 181221828Sgrehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 182266339Sjhb#define VLAPIC_INIT(vmi, vcpu) \ 183266339Sjhb (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL) 184266339Sjhb#define VLAPIC_CLEANUP(vmi, vlapic) \ 185266339Sjhb (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL) 186221828Sgrehan 187245021Sneel#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 188245021Sneel#define fpu_stop_emulating() clts() 189221828Sgrehan 190221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm"); 191221828SgrehanCTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 192221828Sgrehan 193221828Sgrehan/* statistics */ 194248389Sneelstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 195221828Sgrehan 196266339SjhbSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 197266339Sjhb 198268935Sjhb/* 199268935Sjhb * Halt the guest if all vcpus are executing a HLT instruction with 200268935Sjhb * interrupts disabled. 201268935Sjhb */ 202268935Sjhbstatic int halt_detection_enabled = 1; 203268935SjhbTUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled); 204268935SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 205268935Sjhb &halt_detection_enabled, 0, 206268935Sjhb "Halt VM if all vcpus execute HLT with interrupts disabled"); 207268935Sjhb 208266339Sjhbstatic int vmm_ipinum; 209266339SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 210266339Sjhb "IPI vector used for vcpu notifications"); 211266339Sjhb 212221828Sgrehanstatic void 213270071Sgrehanvcpu_cleanup(struct vm *vm, int i, bool destroy) 214221828Sgrehan{ 215266339Sjhb struct vcpu *vcpu = &vm->vcpu[i]; 216266339Sjhb 217266339Sjhb VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); 218270071Sgrehan if (destroy) { 219270071Sgrehan vmm_stat_free(vcpu->stats); 220270071Sgrehan fpu_save_area_free(vcpu->guestfpu); 221270071Sgrehan } 222221828Sgrehan} 223221828Sgrehan 224221828Sgrehanstatic void 225270071Sgrehanvcpu_init(struct vm *vm, int vcpu_id, bool create) 226221828Sgrehan{ 227221828Sgrehan struct vcpu *vcpu; 228270071Sgrehan 229270071Sgrehan KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU, 230270071Sgrehan ("vcpu_init: invalid vcpu %d", vcpu_id)); 231270071Sgrehan 232221828Sgrehan vcpu = &vm->vcpu[vcpu_id]; 233221828Sgrehan 234270071Sgrehan if (create) { 235270071Sgrehan KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " 236270071Sgrehan "initialized", vcpu_id)); 237270071Sgrehan vcpu_lock_init(vcpu); 238270071Sgrehan vcpu->state = VCPU_IDLE; 239270071Sgrehan vcpu->hostcpu = NOCPU; 240270071Sgrehan vcpu->guestfpu = fpu_save_area_alloc(); 241270071Sgrehan vcpu->stats = vmm_stat_alloc(); 242270071Sgrehan } 243270071Sgrehan 244266339Sjhb vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); 245267447Sjhb vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); 246270159Sgrehan vcpu->exitintinfo = 0; 247270071Sgrehan vcpu->nmi_pending = 0; 248270071Sgrehan vcpu->extint_pending = 0; 249270071Sgrehan vcpu->exception_pending = 0; 250267427Sjhb vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 251234695Sgrehan fpu_save_area_reset(vcpu->guestfpu); 252270071Sgrehan vmm_stat_init(vcpu->stats); 253270071Sgrehan guest_msrs_init(vm, vcpu_id); 254221828Sgrehan} 255221828Sgrehan 256240894Sneelstruct vm_exit * 257240894Sneelvm_exitinfo(struct vm *vm, int cpuid) 258240894Sneel{ 259240894Sneel struct vcpu *vcpu; 260240894Sneel 261240894Sneel if (cpuid < 0 || cpuid >= VM_MAXCPU) 262240894Sneel panic("vm_exitinfo: invalid cpuid %d", cpuid); 263240894Sneel 264240894Sneel vcpu = &vm->vcpu[cpuid]; 265240894Sneel 266240894Sneel return (&vcpu->exitinfo); 267240894Sneel} 268240894Sneel 269261275Sjhbstatic void 270261275Sjhbvmm_resume(void) 271261275Sjhb{ 272261275Sjhb VMM_RESUME(); 273261275Sjhb} 274261275Sjhb 275221828Sgrehanstatic int 276221828Sgrehanvmm_init(void) 277221828Sgrehan{ 278221828Sgrehan int error; 279221828Sgrehan 280242275Sneel vmm_host_state_init(); 281221828Sgrehan 282266339Sjhb vmm_ipinum = vmm_ipi_alloc(); 283266339Sjhb if (vmm_ipinum == 0) 284266339Sjhb vmm_ipinum = IPI_AST; 285266339Sjhb 286221828Sgrehan error = vmm_mem_init(); 287221828Sgrehan if (error) 288221828Sgrehan return (error); 289221828Sgrehan 290221828Sgrehan if (vmm_is_intel()) 291221828Sgrehan ops = &vmm_ops_intel; 292221828Sgrehan else if (vmm_is_amd()) 293221828Sgrehan ops = &vmm_ops_amd; 294221828Sgrehan else 295221828Sgrehan return (ENXIO); 296221828Sgrehan 297221828Sgrehan vmm_msr_init(); 298261275Sjhb vmm_resume_p = vmm_resume; 299221828Sgrehan 300266339Sjhb return (VMM_INIT(vmm_ipinum)); 301221828Sgrehan} 302221828Sgrehan 303221828Sgrehanstatic int 304221828Sgrehanvmm_handler(module_t mod, int what, void *arg) 305221828Sgrehan{ 306221828Sgrehan int error; 307221828Sgrehan 308221828Sgrehan switch (what) { 309221828Sgrehan case MOD_LOAD: 310221828Sgrehan vmmdev_init(); 311267070Sjhb if (ppt_avail_devices() > 0) 312267070Sjhb iommu_init(); 313221828Sgrehan error = vmm_init(); 314249396Sneel if (error == 0) 315249396Sneel vmm_initialized = 1; 316221828Sgrehan break; 317221828Sgrehan case MOD_UNLOAD: 318241454Sneel error = vmmdev_cleanup(); 319241454Sneel if (error == 0) { 320261275Sjhb vmm_resume_p = NULL; 321241454Sneel iommu_cleanup(); 322266339Sjhb if (vmm_ipinum != IPI_AST) 323266339Sjhb vmm_ipi_free(vmm_ipinum); 324241454Sneel error = VMM_CLEANUP(); 325253854Sgrehan /* 326253854Sgrehan * Something bad happened - prevent new 327253854Sgrehan * VMs from being created 328253854Sgrehan */ 329253854Sgrehan if (error) 330253854Sgrehan vmm_initialized = 0; 331241454Sneel } 332221828Sgrehan break; 333221828Sgrehan default: 334221828Sgrehan error = 0; 335221828Sgrehan break; 336221828Sgrehan } 337221828Sgrehan return (error); 338221828Sgrehan} 339221828Sgrehan 340221828Sgrehanstatic moduledata_t vmm_kmod = { 341221828Sgrehan "vmm", 342221828Sgrehan vmm_handler, 343221828Sgrehan NULL 344221828Sgrehan}; 345221828Sgrehan 346221828Sgrehan/* 347245704Sneel * vmm initialization has the following dependencies: 348245704Sneel * 349245704Sneel * - iommu initialization must happen after the pci passthru driver has had 350245704Sneel * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 351245704Sneel * 352245704Sneel * - VT-x initialization requires smp_rendezvous() and therefore must happen 353245704Sneel * after SMP is fully functional (after SI_SUB_SMP). 354221828Sgrehan */ 355245704SneelDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 356221828SgrehanMODULE_VERSION(vmm, 1); 357221828Sgrehan 358270071Sgrehanstatic void 359270071Sgrehanvm_init(struct vm *vm, bool create) 360270071Sgrehan{ 361270071Sgrehan int i; 362270071Sgrehan 363270071Sgrehan vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace)); 364270071Sgrehan vm->iommu = NULL; 365270071Sgrehan vm->vioapic = vioapic_init(vm); 366270071Sgrehan vm->vhpet = vhpet_init(vm); 367270071Sgrehan vm->vatpic = vatpic_init(vm); 368270071Sgrehan vm->vatpit = vatpit_init(vm); 369270071Sgrehan 370270071Sgrehan CPU_ZERO(&vm->active_cpus); 371270071Sgrehan 372270071Sgrehan vm->suspend = 0; 373270071Sgrehan CPU_ZERO(&vm->suspended_cpus); 374270071Sgrehan 375270071Sgrehan for (i = 0; i < VM_MAXCPU; i++) 376270071Sgrehan vcpu_init(vm, i, create); 377270071Sgrehan} 378270071Sgrehan 379249396Sneelint 380249396Sneelvm_create(const char *name, struct vm **retvm) 381221828Sgrehan{ 382221828Sgrehan struct vm *vm; 383256072Sneel struct vmspace *vmspace; 384221828Sgrehan 385249396Sneel /* 386249396Sneel * If vmm.ko could not be successfully initialized then don't attempt 387249396Sneel * to create the virtual machine. 388249396Sneel */ 389249396Sneel if (!vmm_initialized) 390249396Sneel return (ENXIO); 391249396Sneel 392221828Sgrehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 393249396Sneel return (EINVAL); 394221828Sgrehan 395256072Sneel vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 396256072Sneel if (vmspace == NULL) 397256072Sneel return (ENOMEM); 398256072Sneel 399221828Sgrehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 400221828Sgrehan strcpy(vm->name, name); 401270071Sgrehan vm->num_mem_segs = 0; 402266339Sjhb vm->vmspace = vmspace; 403266339Sjhb mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 404221828Sgrehan 405270071Sgrehan vm_init(vm, true); 406221828Sgrehan 407249396Sneel *retvm = vm; 408249396Sneel return (0); 409221828Sgrehan} 410221828Sgrehan 411241178Sneelstatic void 412256072Sneelvm_free_mem_seg(struct vm *vm, struct mem_seg *seg) 413241178Sneel{ 414241178Sneel 415256072Sneel if (seg->object != NULL) 416256072Sneel vmm_mem_free(vm->vmspace, seg->gpa, seg->len); 417241362Sneel 418256072Sneel bzero(seg, sizeof(*seg)); 419241178Sneel} 420241178Sneel 421270071Sgrehanstatic void 422270071Sgrehanvm_cleanup(struct vm *vm, bool destroy) 423221828Sgrehan{ 424221828Sgrehan int i; 425221828Sgrehan 426221828Sgrehan ppt_unassign_all(vm); 427221828Sgrehan 428256072Sneel if (vm->iommu != NULL) 429256072Sneel iommu_destroy_domain(vm->iommu); 430256072Sneel 431268891Sjhb vatpit_cleanup(vm->vatpit); 432261088Sjhb vhpet_cleanup(vm->vhpet); 433268891Sjhb vatpic_cleanup(vm->vatpic); 434261088Sjhb vioapic_cleanup(vm->vioapic); 435261088Sjhb 436270071Sgrehan for (i = 0; i < VM_MAXCPU; i++) 437270071Sgrehan vcpu_cleanup(vm, i, destroy); 438221828Sgrehan 439270071Sgrehan VMCLEANUP(vm->cookie); 440241178Sneel 441270071Sgrehan if (destroy) { 442270071Sgrehan for (i = 0; i < vm->num_mem_segs; i++) 443270071Sgrehan vm_free_mem_seg(vm, &vm->mem_segs[i]); 444221828Sgrehan 445270071Sgrehan vm->num_mem_segs = 0; 446221828Sgrehan 447270071Sgrehan VMSPACE_FREE(vm->vmspace); 448270071Sgrehan vm->vmspace = NULL; 449270071Sgrehan } 450270071Sgrehan} 451221828Sgrehan 452270071Sgrehanvoid 453270071Sgrehanvm_destroy(struct vm *vm) 454270071Sgrehan{ 455270071Sgrehan vm_cleanup(vm, true); 456221828Sgrehan free(vm, M_VM); 457221828Sgrehan} 458221828Sgrehan 459270071Sgrehanint 460270071Sgrehanvm_reinit(struct vm *vm) 461270071Sgrehan{ 462270071Sgrehan int error; 463270071Sgrehan 464270071Sgrehan /* 465270071Sgrehan * A virtual machine can be reset only if all vcpus are suspended. 466270071Sgrehan */ 467270071Sgrehan if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 468270071Sgrehan vm_cleanup(vm, false); 469270071Sgrehan vm_init(vm, false); 470270071Sgrehan error = 0; 471270071Sgrehan } else { 472270071Sgrehan error = EBUSY; 473270071Sgrehan } 474270071Sgrehan 475270071Sgrehan return (error); 476270071Sgrehan} 477270071Sgrehan 478221828Sgrehanconst char * 479221828Sgrehanvm_name(struct vm *vm) 480221828Sgrehan{ 481221828Sgrehan return (vm->name); 482221828Sgrehan} 483221828Sgrehan 484221828Sgrehanint 485221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 486221828Sgrehan{ 487256072Sneel vm_object_t obj; 488221828Sgrehan 489256072Sneel if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 490256072Sneel return (ENOMEM); 491256072Sneel else 492256072Sneel return (0); 493221828Sgrehan} 494221828Sgrehan 495221828Sgrehanint 496221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 497221828Sgrehan{ 498221828Sgrehan 499256072Sneel vmm_mmio_free(vm->vmspace, gpa, len); 500256072Sneel return (0); 501221828Sgrehan} 502221828Sgrehan 503256072Sneelboolean_t 504256072Sneelvm_mem_allocated(struct vm *vm, vm_paddr_t gpa) 505241041Sneel{ 506241041Sneel int i; 507241041Sneel vm_paddr_t gpabase, gpalimit; 508241041Sneel 509241041Sneel for (i = 0; i < vm->num_mem_segs; i++) { 510241041Sneel gpabase = vm->mem_segs[i].gpa; 511241041Sneel gpalimit = gpabase + vm->mem_segs[i].len; 512241041Sneel if (gpa >= gpabase && gpa < gpalimit) 513256072Sneel return (TRUE); /* 'gpa' is regular memory */ 514241041Sneel } 515241041Sneel 516256072Sneel if (ppt_is_mmio(vm, gpa)) 517256072Sneel return (TRUE); /* 'gpa' is pci passthru mmio */ 518256072Sneel 519256072Sneel return (FALSE); 520241041Sneel} 521241041Sneel 522221828Sgrehanint 523241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 524221828Sgrehan{ 525256072Sneel int available, allocated; 526256072Sneel struct mem_seg *seg; 527256072Sneel vm_object_t object; 528256072Sneel vm_paddr_t g; 529221828Sgrehan 530241041Sneel if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 531241041Sneel return (EINVAL); 532221828Sgrehan 533241041Sneel available = allocated = 0; 534241041Sneel g = gpa; 535241041Sneel while (g < gpa + len) { 536256072Sneel if (vm_mem_allocated(vm, g)) 537256072Sneel allocated++; 538256072Sneel else 539241041Sneel available++; 540241041Sneel 541241041Sneel g += PAGE_SIZE; 542241041Sneel } 543241041Sneel 544221828Sgrehan /* 545241041Sneel * If there are some allocated and some available pages in the address 546241041Sneel * range then it is an error. 547221828Sgrehan */ 548241041Sneel if (allocated && available) 549241041Sneel return (EINVAL); 550221828Sgrehan 551241041Sneel /* 552241041Sneel * If the entire address range being requested has already been 553241041Sneel * allocated then there isn't anything more to do. 554241041Sneel */ 555241041Sneel if (allocated && available == 0) 556241041Sneel return (0); 557241041Sneel 558221828Sgrehan if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 559221828Sgrehan return (E2BIG); 560221828Sgrehan 561241178Sneel seg = &vm->mem_segs[vm->num_mem_segs]; 562221828Sgrehan 563256072Sneel if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) 564256072Sneel return (ENOMEM); 565256072Sneel 566241178Sneel seg->gpa = gpa; 567256072Sneel seg->len = len; 568256072Sneel seg->object = object; 569256072Sneel seg->wired = FALSE; 570241178Sneel 571256072Sneel vm->num_mem_segs++; 572256072Sneel 573256072Sneel return (0); 574256072Sneel} 575256072Sneel 576270159Sgrehanstatic vm_paddr_t 577270159Sgrehanvm_maxmem(struct vm *vm) 578270159Sgrehan{ 579270159Sgrehan int i; 580270159Sgrehan vm_paddr_t gpa, maxmem; 581270159Sgrehan 582270159Sgrehan maxmem = 0; 583270159Sgrehan for (i = 0; i < vm->num_mem_segs; i++) { 584270159Sgrehan gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len; 585270159Sgrehan if (gpa > maxmem) 586270159Sgrehan maxmem = gpa; 587270159Sgrehan } 588270159Sgrehan return (maxmem); 589270159Sgrehan} 590270159Sgrehan 591256072Sneelstatic void 592256072Sneelvm_gpa_unwire(struct vm *vm) 593256072Sneel{ 594256072Sneel int i, rv; 595256072Sneel struct mem_seg *seg; 596256072Sneel 597256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 598256072Sneel seg = &vm->mem_segs[i]; 599256072Sneel if (!seg->wired) 600256072Sneel continue; 601256072Sneel 602256072Sneel rv = vm_map_unwire(&vm->vmspace->vm_map, 603256072Sneel seg->gpa, seg->gpa + seg->len, 604256072Sneel VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 605256072Sneel KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " 606256072Sneel "%#lx/%ld could not be unwired: %d", 607256072Sneel vm_name(vm), seg->gpa, seg->len, rv)); 608256072Sneel 609256072Sneel seg->wired = FALSE; 610256072Sneel } 611256072Sneel} 612256072Sneel 613256072Sneelstatic int 614256072Sneelvm_gpa_wire(struct vm *vm) 615256072Sneel{ 616256072Sneel int i, rv; 617256072Sneel struct mem_seg *seg; 618256072Sneel 619256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 620256072Sneel seg = &vm->mem_segs[i]; 621256072Sneel if (seg->wired) 622256072Sneel continue; 623256072Sneel 624256072Sneel /* XXX rlimits? */ 625256072Sneel rv = vm_map_wire(&vm->vmspace->vm_map, 626256072Sneel seg->gpa, seg->gpa + seg->len, 627256072Sneel VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 628256072Sneel if (rv != KERN_SUCCESS) 629241178Sneel break; 630241178Sneel 631256072Sneel seg->wired = TRUE; 632256072Sneel } 633256072Sneel 634256072Sneel if (i < vm->num_mem_segs) { 635241362Sneel /* 636256072Sneel * Undo the wiring before returning an error. 637241362Sneel */ 638256072Sneel vm_gpa_unwire(vm); 639256072Sneel return (EAGAIN); 640256072Sneel } 641241178Sneel 642256072Sneel return (0); 643256072Sneel} 644256072Sneel 645256072Sneelstatic void 646256072Sneelvm_iommu_modify(struct vm *vm, boolean_t map) 647256072Sneel{ 648256072Sneel int i, sz; 649256072Sneel vm_paddr_t gpa, hpa; 650256072Sneel struct mem_seg *seg; 651256072Sneel void *vp, *cookie, *host_domain; 652256072Sneel 653256072Sneel sz = PAGE_SIZE; 654256072Sneel host_domain = iommu_host_domain(); 655256072Sneel 656256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 657256072Sneel seg = &vm->mem_segs[i]; 658256072Sneel KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", 659256072Sneel vm_name(vm), seg->gpa, seg->len)); 660256072Sneel 661256072Sneel gpa = seg->gpa; 662256072Sneel while (gpa < seg->gpa + seg->len) { 663256072Sneel vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, 664256072Sneel &cookie); 665256072Sneel KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 666256072Sneel vm_name(vm), gpa)); 667256072Sneel 668256072Sneel vm_gpa_release(cookie); 669256072Sneel 670256072Sneel hpa = DMAP_TO_PHYS((uintptr_t)vp); 671256072Sneel if (map) { 672256072Sneel iommu_create_mapping(vm->iommu, gpa, hpa, sz); 673256072Sneel iommu_remove_mapping(host_domain, hpa, sz); 674256072Sneel } else { 675256072Sneel iommu_remove_mapping(vm->iommu, gpa, sz); 676256072Sneel iommu_create_mapping(host_domain, hpa, hpa, sz); 677256072Sneel } 678256072Sneel 679256072Sneel gpa += PAGE_SIZE; 680256072Sneel } 681241178Sneel } 682241178Sneel 683256072Sneel /* 684256072Sneel * Invalidate the cached translations associated with the domain 685256072Sneel * from which pages were removed. 686256072Sneel */ 687256072Sneel if (map) 688256072Sneel iommu_invalidate_tlb(host_domain); 689256072Sneel else 690256072Sneel iommu_invalidate_tlb(vm->iommu); 691256072Sneel} 692256072Sneel 693256072Sneel#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 694256072Sneel#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 695256072Sneel 696256072Sneelint 697256072Sneelvm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 698256072Sneel{ 699256072Sneel int error; 700256072Sneel 701256072Sneel error = ppt_unassign_device(vm, bus, slot, func); 702256072Sneel if (error) 703221828Sgrehan return (error); 704256072Sneel 705267070Sjhb if (ppt_assigned_devices(vm) == 0) { 706256072Sneel vm_iommu_unmap(vm); 707256072Sneel vm_gpa_unwire(vm); 708221828Sgrehan } 709256072Sneel return (0); 710256072Sneel} 711221828Sgrehan 712256072Sneelint 713256072Sneelvm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 714256072Sneel{ 715256072Sneel int error; 716256072Sneel vm_paddr_t maxaddr; 717256072Sneel 718241362Sneel /* 719256072Sneel * Virtual machines with pci passthru devices get special treatment: 720256072Sneel * - the guest physical memory is wired 721256072Sneel * - the iommu is programmed to do the 'gpa' to 'hpa' translation 722256072Sneel * 723256072Sneel * We need to do this before the first pci passthru device is attached. 724241362Sneel */ 725267070Sjhb if (ppt_assigned_devices(vm) == 0) { 726256072Sneel KASSERT(vm->iommu == NULL, 727256072Sneel ("vm_assign_pptdev: iommu must be NULL")); 728270159Sgrehan maxaddr = vm_maxmem(vm); 729256072Sneel vm->iommu = iommu_create_domain(maxaddr); 730241362Sneel 731256072Sneel error = vm_gpa_wire(vm); 732256072Sneel if (error) 733256072Sneel return (error); 734241041Sneel 735256072Sneel vm_iommu_map(vm); 736256072Sneel } 737256072Sneel 738256072Sneel error = ppt_assign_device(vm, bus, slot, func); 739256072Sneel return (error); 740221828Sgrehan} 741221828Sgrehan 742256072Sneelvoid * 743256072Sneelvm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 744256072Sneel void **cookie) 745221828Sgrehan{ 746256072Sneel int count, pageoff; 747256072Sneel vm_page_t m; 748221828Sgrehan 749256072Sneel pageoff = gpa & PAGE_MASK; 750256072Sneel if (len > PAGE_SIZE - pageoff) 751256072Sneel panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 752241148Sneel 753256072Sneel count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 754256072Sneel trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 755256072Sneel 756256072Sneel if (count == 1) { 757256072Sneel *cookie = m; 758256072Sneel return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 759256072Sneel } else { 760256072Sneel *cookie = NULL; 761256072Sneel return (NULL); 762256072Sneel } 763221828Sgrehan} 764221828Sgrehan 765256072Sneelvoid 766256072Sneelvm_gpa_release(void *cookie) 767256072Sneel{ 768256072Sneel vm_page_t m = cookie; 769256072Sneel 770256072Sneel vm_page_lock(m); 771256072Sneel vm_page_unhold(m); 772256072Sneel vm_page_unlock(m); 773256072Sneel} 774256072Sneel 775221828Sgrehanint 776221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 777221828Sgrehan struct vm_memory_segment *seg) 778221828Sgrehan{ 779221828Sgrehan int i; 780221828Sgrehan 781221828Sgrehan for (i = 0; i < vm->num_mem_segs; i++) { 782221828Sgrehan if (gpabase == vm->mem_segs[i].gpa) { 783256072Sneel seg->gpa = vm->mem_segs[i].gpa; 784256072Sneel seg->len = vm->mem_segs[i].len; 785256072Sneel seg->wired = vm->mem_segs[i].wired; 786221828Sgrehan return (0); 787221828Sgrehan } 788221828Sgrehan } 789221828Sgrehan return (-1); 790221828Sgrehan} 791221828Sgrehan 792221828Sgrehanint 793256072Sneelvm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 794256072Sneel vm_offset_t *offset, struct vm_object **object) 795256072Sneel{ 796256072Sneel int i; 797256072Sneel size_t seg_len; 798256072Sneel vm_paddr_t seg_gpa; 799256072Sneel vm_object_t seg_obj; 800256072Sneel 801256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 802256072Sneel if ((seg_obj = vm->mem_segs[i].object) == NULL) 803256072Sneel continue; 804256072Sneel 805256072Sneel seg_gpa = vm->mem_segs[i].gpa; 806256072Sneel seg_len = vm->mem_segs[i].len; 807256072Sneel 808256072Sneel if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { 809256072Sneel *offset = gpa - seg_gpa; 810256072Sneel *object = seg_obj; 811256072Sneel vm_object_reference(seg_obj); 812256072Sneel return (0); 813256072Sneel } 814256072Sneel } 815256072Sneel 816256072Sneel return (EINVAL); 817256072Sneel} 818256072Sneel 819256072Sneelint 820221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 821221828Sgrehan{ 822221828Sgrehan 823221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 824221828Sgrehan return (EINVAL); 825221828Sgrehan 826221828Sgrehan if (reg >= VM_REG_LAST) 827221828Sgrehan return (EINVAL); 828221828Sgrehan 829221828Sgrehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 830221828Sgrehan} 831221828Sgrehan 832221828Sgrehanint 833221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 834221828Sgrehan{ 835221828Sgrehan 836221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 837221828Sgrehan return (EINVAL); 838221828Sgrehan 839221828Sgrehan if (reg >= VM_REG_LAST) 840221828Sgrehan return (EINVAL); 841221828Sgrehan 842221828Sgrehan return (VMSETREG(vm->cookie, vcpu, reg, val)); 843221828Sgrehan} 844221828Sgrehan 845221828Sgrehanstatic boolean_t 846221828Sgrehanis_descriptor_table(int reg) 847221828Sgrehan{ 848221828Sgrehan 849221828Sgrehan switch (reg) { 850221828Sgrehan case VM_REG_GUEST_IDTR: 851221828Sgrehan case VM_REG_GUEST_GDTR: 852221828Sgrehan return (TRUE); 853221828Sgrehan default: 854221828Sgrehan return (FALSE); 855221828Sgrehan } 856221828Sgrehan} 857221828Sgrehan 858221828Sgrehanstatic boolean_t 859221828Sgrehanis_segment_register(int reg) 860221828Sgrehan{ 861221828Sgrehan 862221828Sgrehan switch (reg) { 863221828Sgrehan case VM_REG_GUEST_ES: 864221828Sgrehan case VM_REG_GUEST_CS: 865221828Sgrehan case VM_REG_GUEST_SS: 866221828Sgrehan case VM_REG_GUEST_DS: 867221828Sgrehan case VM_REG_GUEST_FS: 868221828Sgrehan case VM_REG_GUEST_GS: 869221828Sgrehan case VM_REG_GUEST_TR: 870221828Sgrehan case VM_REG_GUEST_LDTR: 871221828Sgrehan return (TRUE); 872221828Sgrehan default: 873221828Sgrehan return (FALSE); 874221828Sgrehan } 875221828Sgrehan} 876221828Sgrehan 877221828Sgrehanint 878221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg, 879221828Sgrehan struct seg_desc *desc) 880221828Sgrehan{ 881221828Sgrehan 882221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 883221828Sgrehan return (EINVAL); 884221828Sgrehan 885221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 886221828Sgrehan return (EINVAL); 887221828Sgrehan 888221828Sgrehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 889221828Sgrehan} 890221828Sgrehan 891221828Sgrehanint 892221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg, 893221828Sgrehan struct seg_desc *desc) 894221828Sgrehan{ 895221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 896221828Sgrehan return (EINVAL); 897221828Sgrehan 898221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 899221828Sgrehan return (EINVAL); 900221828Sgrehan 901221828Sgrehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 902221828Sgrehan} 903221828Sgrehan 904221828Sgrehanstatic void 905221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu) 906221828Sgrehan{ 907221828Sgrehan 908234695Sgrehan /* flush host state to the pcb */ 909234695Sgrehan fpuexit(curthread); 910242122Sneel 911242122Sneel /* restore guest FPU state */ 912221828Sgrehan fpu_stop_emulating(); 913234695Sgrehan fpurestore(vcpu->guestfpu); 914242122Sneel 915267427Sjhb /* restore guest XCR0 if XSAVE is enabled in the host */ 916267427Sjhb if (rcr4() & CR4_XSAVE) 917267427Sjhb load_xcr(0, vcpu->guest_xcr0); 918267427Sjhb 919242122Sneel /* 920242122Sneel * The FPU is now "dirty" with the guest's state so turn on emulation 921242122Sneel * to trap any access to the FPU by the host. 922242122Sneel */ 923242122Sneel fpu_start_emulating(); 924221828Sgrehan} 925221828Sgrehan 926221828Sgrehanstatic void 927221828Sgrehansave_guest_fpustate(struct vcpu *vcpu) 928221828Sgrehan{ 929221828Sgrehan 930242122Sneel if ((rcr0() & CR0_TS) == 0) 931242122Sneel panic("fpu emulation not enabled in host!"); 932242122Sneel 933267427Sjhb /* save guest XCR0 and restore host XCR0 */ 934267427Sjhb if (rcr4() & CR4_XSAVE) { 935267427Sjhb vcpu->guest_xcr0 = rxcr(0); 936267427Sjhb load_xcr(0, vmm_get_host_xcr0()); 937267427Sjhb } 938267427Sjhb 939242122Sneel /* save guest FPU state */ 940242122Sneel fpu_stop_emulating(); 941234695Sgrehan fpusave(vcpu->guestfpu); 942221828Sgrehan fpu_start_emulating(); 943221828Sgrehan} 944221828Sgrehan 945248389Sneelstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 946242065Sneel 947256072Sneelstatic int 948266393Sjhbvcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 949266393Sjhb bool from_idle) 950256072Sneel{ 951256072Sneel int error; 952256072Sneel 953256072Sneel vcpu_assert_locked(vcpu); 954256072Sneel 955256072Sneel /* 956266393Sjhb * State transitions from the vmmdev_ioctl() must always begin from 957266393Sjhb * the VCPU_IDLE state. This guarantees that there is only a single 958266393Sjhb * ioctl() operating on a vcpu at any point. 959266393Sjhb */ 960266393Sjhb if (from_idle) { 961266393Sjhb while (vcpu->state != VCPU_IDLE) 962266393Sjhb msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 963266393Sjhb } else { 964266393Sjhb KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 965266393Sjhb "vcpu idle state")); 966266393Sjhb } 967266393Sjhb 968266393Sjhb if (vcpu->state == VCPU_RUNNING) { 969266393Sjhb KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 970266393Sjhb "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 971266393Sjhb } else { 972266393Sjhb KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 973266393Sjhb "vcpu that is not running", vcpu->hostcpu)); 974266393Sjhb } 975266393Sjhb 976266393Sjhb /* 977256072Sneel * The following state transitions are allowed: 978256072Sneel * IDLE -> FROZEN -> IDLE 979256072Sneel * FROZEN -> RUNNING -> FROZEN 980256072Sneel * FROZEN -> SLEEPING -> FROZEN 981256072Sneel */ 982256072Sneel switch (vcpu->state) { 983256072Sneel case VCPU_IDLE: 984256072Sneel case VCPU_RUNNING: 985256072Sneel case VCPU_SLEEPING: 986256072Sneel error = (newstate != VCPU_FROZEN); 987256072Sneel break; 988256072Sneel case VCPU_FROZEN: 989256072Sneel error = (newstate == VCPU_FROZEN); 990256072Sneel break; 991256072Sneel default: 992256072Sneel error = 1; 993256072Sneel break; 994256072Sneel } 995256072Sneel 996266393Sjhb if (error) 997266393Sjhb return (EBUSY); 998266393Sjhb 999266393Sjhb vcpu->state = newstate; 1000266393Sjhb if (newstate == VCPU_RUNNING) 1001266393Sjhb vcpu->hostcpu = curcpu; 1002256072Sneel else 1003266393Sjhb vcpu->hostcpu = NOCPU; 1004256072Sneel 1005266393Sjhb if (newstate == VCPU_IDLE) 1006266393Sjhb wakeup(&vcpu->state); 1007266393Sjhb 1008266393Sjhb return (0); 1009256072Sneel} 1010256072Sneel 1011256072Sneelstatic void 1012256072Sneelvcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1013256072Sneel{ 1014256072Sneel int error; 1015256072Sneel 1016266393Sjhb if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) 1017256072Sneel panic("Error %d setting state to %d\n", error, newstate); 1018256072Sneel} 1019256072Sneel 1020256072Sneelstatic void 1021256072Sneelvcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1022256072Sneel{ 1023256072Sneel int error; 1024256072Sneel 1025266393Sjhb if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1026256072Sneel panic("Error %d setting state to %d", error, newstate); 1027256072Sneel} 1028256072Sneel 1029266339Sjhbstatic void 1030266339Sjhbvm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func) 1031266339Sjhb{ 1032266339Sjhb 1033266339Sjhb KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked")); 1034266339Sjhb 1035266339Sjhb /* 1036266339Sjhb * Update 'rendezvous_func' and execute a write memory barrier to 1037266339Sjhb * ensure that it is visible across all host cpus. This is not needed 1038266339Sjhb * for correctness but it does ensure that all the vcpus will notice 1039266339Sjhb * that the rendezvous is requested immediately. 1040266339Sjhb */ 1041266339Sjhb vm->rendezvous_func = func; 1042266339Sjhb wmb(); 1043266339Sjhb} 1044266339Sjhb 1045266339Sjhb#define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ 1046266339Sjhb do { \ 1047266339Sjhb if (vcpuid >= 0) \ 1048266339Sjhb VCPU_CTR0(vm, vcpuid, fmt); \ 1049266339Sjhb else \ 1050266339Sjhb VM_CTR0(vm, fmt); \ 1051266339Sjhb } while (0) 1052266339Sjhb 1053266339Sjhbstatic void 1054266339Sjhbvm_handle_rendezvous(struct vm *vm, int vcpuid) 1055266339Sjhb{ 1056266339Sjhb 1057266339Sjhb KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 1058266339Sjhb ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); 1059266339Sjhb 1060266339Sjhb mtx_lock(&vm->rendezvous_mtx); 1061266339Sjhb while (vm->rendezvous_func != NULL) { 1062266339Sjhb /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 1063266339Sjhb CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); 1064266339Sjhb 1065266339Sjhb if (vcpuid != -1 && 1066266339Sjhb CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 1067266339Sjhb !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 1068266339Sjhb VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); 1069266339Sjhb (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); 1070266339Sjhb CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 1071266339Sjhb } 1072266339Sjhb if (CPU_CMP(&vm->rendezvous_req_cpus, 1073266339Sjhb &vm->rendezvous_done_cpus) == 0) { 1074266339Sjhb VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); 1075266339Sjhb vm_set_rendezvous_func(vm, NULL); 1076266339Sjhb wakeup(&vm->rendezvous_func); 1077266339Sjhb break; 1078266339Sjhb } 1079266339Sjhb RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); 1080266339Sjhb mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 1081266339Sjhb "vmrndv", 0); 1082266339Sjhb } 1083266339Sjhb mtx_unlock(&vm->rendezvous_mtx); 1084266339Sjhb} 1085266339Sjhb 1086256072Sneel/* 1087256072Sneel * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1088256072Sneel */ 1089256072Sneelstatic int 1090262350Sjhbvm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) 1091256072Sneel{ 1092256072Sneel struct vcpu *vcpu; 1093268935Sjhb const char *wmesg; 1094268935Sjhb int t, vcpu_halted, vm_halted; 1095256072Sneel 1096268935Sjhb KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 1097268935Sjhb 1098256072Sneel vcpu = &vm->vcpu[vcpuid]; 1099268935Sjhb vcpu_halted = 0; 1100268935Sjhb vm_halted = 0; 1101256072Sneel 1102256072Sneel vcpu_lock(vcpu); 1103268935Sjhb while (1) { 1104268935Sjhb /* 1105268935Sjhb * Do a final check for pending NMI or interrupts before 1106268935Sjhb * really putting this thread to sleep. Also check for 1107268935Sjhb * software events that would cause this vcpu to wakeup. 1108268935Sjhb * 1109268935Sjhb * These interrupts/events could have happened after the 1110268935Sjhb * vcpu returned from VMRUN() and before it acquired the 1111268935Sjhb * vcpu lock above. 1112268935Sjhb */ 1113268935Sjhb if (vm->rendezvous_func != NULL || vm->suspend) 1114268935Sjhb break; 1115268935Sjhb if (vm_nmi_pending(vm, vcpuid)) 1116268935Sjhb break; 1117268935Sjhb if (!intr_disabled) { 1118268935Sjhb if (vm_extint_pending(vm, vcpuid) || 1119268935Sjhb vlapic_pending_intr(vcpu->vlapic, NULL)) { 1120268935Sjhb break; 1121268935Sjhb } 1122268935Sjhb } 1123256072Sneel 1124270159Sgrehan /* Don't go to sleep if the vcpu thread needs to yield */ 1125270159Sgrehan if (vcpu_should_yield(vm, vcpuid)) 1126270159Sgrehan break; 1127270159Sgrehan 1128268935Sjhb /* 1129268935Sjhb * Some Linux guests implement "halt" by having all vcpus 1130268935Sjhb * execute HLT with interrupts disabled. 'halted_cpus' keeps 1131268935Sjhb * track of the vcpus that have entered this state. When all 1132268935Sjhb * vcpus enter the halted state the virtual machine is halted. 1133268935Sjhb */ 1134268935Sjhb if (intr_disabled) { 1135268935Sjhb wmesg = "vmhalt"; 1136268935Sjhb VCPU_CTR0(vm, vcpuid, "Halted"); 1137268935Sjhb if (!vcpu_halted && halt_detection_enabled) { 1138268935Sjhb vcpu_halted = 1; 1139268935Sjhb CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1140268935Sjhb } 1141268935Sjhb if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1142268935Sjhb vm_halted = 1; 1143268935Sjhb break; 1144268935Sjhb } 1145268935Sjhb } else { 1146268935Sjhb wmesg = "vmidle"; 1147268935Sjhb } 1148268935Sjhb 1149256072Sneel t = ticks; 1150256072Sneel vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1151270159Sgrehan /* 1152270159Sgrehan * XXX msleep_spin() cannot be interrupted by signals so 1153270159Sgrehan * wake up periodically to check pending signals. 1154270159Sgrehan */ 1155270159Sgrehan msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); 1156256072Sneel vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1157256072Sneel vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 1158256072Sneel } 1159268935Sjhb 1160268935Sjhb if (vcpu_halted) 1161268935Sjhb CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1162268935Sjhb 1163256072Sneel vcpu_unlock(vcpu); 1164256072Sneel 1165268935Sjhb if (vm_halted) 1166268935Sjhb vm_suspend(vm, VM_SUSPEND_HALT); 1167266339Sjhb 1168256072Sneel return (0); 1169256072Sneel} 1170256072Sneel 1171256072Sneelstatic int 1172262350Sjhbvm_handle_paging(struct vm *vm, int vcpuid, bool *retu) 1173256072Sneel{ 1174256072Sneel int rv, ftype; 1175256072Sneel struct vm_map *map; 1176256072Sneel struct vcpu *vcpu; 1177256072Sneel struct vm_exit *vme; 1178256072Sneel 1179256072Sneel vcpu = &vm->vcpu[vcpuid]; 1180256072Sneel vme = &vcpu->exitinfo; 1181256072Sneel 1182256072Sneel ftype = vme->u.paging.fault_type; 1183256072Sneel KASSERT(ftype == VM_PROT_READ || 1184256072Sneel ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1185256072Sneel ("vm_handle_paging: invalid fault_type %d", ftype)); 1186256072Sneel 1187256072Sneel if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1188256072Sneel rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1189256072Sneel vme->u.paging.gpa, ftype); 1190256072Sneel if (rv == 0) 1191256072Sneel goto done; 1192256072Sneel } 1193256072Sneel 1194256072Sneel map = &vm->vmspace->vm_map; 1195256072Sneel rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 1196256072Sneel 1197261088Sjhb VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 1198261088Sjhb "ftype = %d", rv, vme->u.paging.gpa, ftype); 1199256072Sneel 1200256072Sneel if (rv != KERN_SUCCESS) 1201256072Sneel return (EFAULT); 1202256072Sneeldone: 1203256072Sneel /* restart execution at the faulting instruction */ 1204256072Sneel vme->inst_length = 0; 1205256072Sneel 1206256072Sneel return (0); 1207256072Sneel} 1208256072Sneel 1209256072Sneelstatic int 1210262350Sjhbvm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) 1211256072Sneel{ 1212256072Sneel struct vie *vie; 1213256072Sneel struct vcpu *vcpu; 1214256072Sneel struct vm_exit *vme; 1215268976Sjhb uint64_t gla, gpa; 1216268976Sjhb struct vm_guest_paging *paging; 1217261088Sjhb mem_region_read_t mread; 1218261088Sjhb mem_region_write_t mwrite; 1219270159Sgrehan enum vm_cpu_mode cpu_mode; 1220270159Sgrehan int cs_d, error; 1221256072Sneel 1222256072Sneel vcpu = &vm->vcpu[vcpuid]; 1223256072Sneel vme = &vcpu->exitinfo; 1224256072Sneel 1225256072Sneel gla = vme->u.inst_emul.gla; 1226256072Sneel gpa = vme->u.inst_emul.gpa; 1227270159Sgrehan cs_d = vme->u.inst_emul.cs_d; 1228256072Sneel vie = &vme->u.inst_emul.vie; 1229268976Sjhb paging = &vme->u.inst_emul.paging; 1230270159Sgrehan cpu_mode = paging->cpu_mode; 1231256072Sneel 1232256072Sneel vie_init(vie); 1233256072Sneel 1234256072Sneel /* Fetch, decode and emulate the faulting instruction */ 1235268976Sjhb error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip, 1236268976Sjhb vme->inst_length, vie); 1237268976Sjhb if (error == 1) 1238268976Sjhb return (0); /* Resume guest to handle page fault */ 1239268976Sjhb else if (error == -1) 1240256072Sneel return (EFAULT); 1241268976Sjhb else if (error != 0) 1242268976Sjhb panic("%s: vmm_fetch_instruction error %d", __func__, error); 1243256072Sneel 1244270159Sgrehan if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) 1245256072Sneel return (EFAULT); 1246256072Sneel 1247261088Sjhb /* return to userland unless this is an in-kernel emulated device */ 1248261088Sjhb if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1249261088Sjhb mread = lapic_mmio_read; 1250261088Sjhb mwrite = lapic_mmio_write; 1251261088Sjhb } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1252261088Sjhb mread = vioapic_mmio_read; 1253261088Sjhb mwrite = vioapic_mmio_write; 1254261088Sjhb } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 1255261088Sjhb mread = vhpet_mmio_read; 1256261088Sjhb mwrite = vhpet_mmio_write; 1257261088Sjhb } else { 1258262350Sjhb *retu = true; 1259256072Sneel return (0); 1260256072Sneel } 1261256072Sneel 1262270159Sgrehan error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, 1263270159Sgrehan mread, mwrite, retu); 1264256072Sneel 1265256072Sneel return (error); 1266256072Sneel} 1267256072Sneel 1268268935Sjhbstatic int 1269268935Sjhbvm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) 1270268935Sjhb{ 1271268935Sjhb int i, done; 1272268935Sjhb struct vcpu *vcpu; 1273268935Sjhb 1274268935Sjhb done = 0; 1275268935Sjhb vcpu = &vm->vcpu[vcpuid]; 1276268935Sjhb 1277268935Sjhb CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); 1278268935Sjhb 1279268935Sjhb /* 1280268935Sjhb * Wait until all 'active_cpus' have suspended themselves. 1281268935Sjhb * 1282268935Sjhb * Since a VM may be suspended at any time including when one or 1283268935Sjhb * more vcpus are doing a rendezvous we need to call the rendezvous 1284268935Sjhb * handler while we are waiting to prevent a deadlock. 1285268935Sjhb */ 1286268935Sjhb vcpu_lock(vcpu); 1287268935Sjhb while (1) { 1288268935Sjhb if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 1289268935Sjhb VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); 1290268935Sjhb break; 1291268935Sjhb } 1292268935Sjhb 1293268935Sjhb if (vm->rendezvous_func == NULL) { 1294268935Sjhb VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); 1295268935Sjhb vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1296268935Sjhb msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1297268935Sjhb vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1298268935Sjhb } else { 1299268935Sjhb VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); 1300268935Sjhb vcpu_unlock(vcpu); 1301268935Sjhb vm_handle_rendezvous(vm, vcpuid); 1302268935Sjhb vcpu_lock(vcpu); 1303268935Sjhb } 1304268935Sjhb } 1305268935Sjhb vcpu_unlock(vcpu); 1306268935Sjhb 1307268935Sjhb /* 1308268935Sjhb * Wakeup the other sleeping vcpus and return to userspace. 1309268935Sjhb */ 1310268935Sjhb for (i = 0; i < VM_MAXCPU; i++) { 1311268935Sjhb if (CPU_ISSET(i, &vm->suspended_cpus)) { 1312268935Sjhb vcpu_notify_event(vm, i, false); 1313268935Sjhb } 1314268935Sjhb } 1315268935Sjhb 1316268935Sjhb *retu = true; 1317268935Sjhb return (0); 1318268935Sjhb} 1319268935Sjhb 1320221828Sgrehanint 1321268935Sjhbvm_suspend(struct vm *vm, enum vm_suspend_how how) 1322268935Sjhb{ 1323268935Sjhb int i; 1324268935Sjhb 1325268935Sjhb if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1326268935Sjhb return (EINVAL); 1327268935Sjhb 1328268935Sjhb if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1329268935Sjhb VM_CTR2(vm, "virtual machine already suspended %d/%d", 1330268935Sjhb vm->suspend, how); 1331268935Sjhb return (EALREADY); 1332268935Sjhb } 1333268935Sjhb 1334268935Sjhb VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1335268935Sjhb 1336268935Sjhb /* 1337268935Sjhb * Notify all active vcpus that they are now suspended. 1338268935Sjhb */ 1339268935Sjhb for (i = 0; i < VM_MAXCPU; i++) { 1340268935Sjhb if (CPU_ISSET(i, &vm->active_cpus)) 1341268935Sjhb vcpu_notify_event(vm, i, false); 1342268935Sjhb } 1343268935Sjhb 1344268935Sjhb return (0); 1345268935Sjhb} 1346268935Sjhb 1347268935Sjhbvoid 1348268935Sjhbvm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) 1349268935Sjhb{ 1350268935Sjhb struct vm_exit *vmexit; 1351268935Sjhb 1352268935Sjhb KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1353268935Sjhb ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1354268935Sjhb 1355268935Sjhb vmexit = vm_exitinfo(vm, vcpuid); 1356268935Sjhb vmexit->rip = rip; 1357268935Sjhb vmexit->inst_length = 0; 1358268935Sjhb vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1359268935Sjhb vmexit->u.suspended.how = vm->suspend; 1360268935Sjhb} 1361268935Sjhb 1362270074Sgrehanvoid 1363270074Sgrehanvm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) 1364270074Sgrehan{ 1365270074Sgrehan struct vm_exit *vmexit; 1366270074Sgrehan 1367270074Sgrehan KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress")); 1368270074Sgrehan 1369270074Sgrehan vmexit = vm_exitinfo(vm, vcpuid); 1370270074Sgrehan vmexit->rip = rip; 1371270074Sgrehan vmexit->inst_length = 0; 1372270074Sgrehan vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; 1373270074Sgrehan vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1); 1374270074Sgrehan} 1375270074Sgrehan 1376270074Sgrehanvoid 1377270074Sgrehanvm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) 1378270074Sgrehan{ 1379270074Sgrehan struct vm_exit *vmexit; 1380270074Sgrehan 1381270074Sgrehan vmexit = vm_exitinfo(vm, vcpuid); 1382270074Sgrehan vmexit->rip = rip; 1383270074Sgrehan vmexit->inst_length = 0; 1384270074Sgrehan vmexit->exitcode = VM_EXITCODE_BOGUS; 1385270074Sgrehan vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1); 1386270074Sgrehan} 1387270074Sgrehan 1388268935Sjhbint 1389221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun) 1390221828Sgrehan{ 1391256072Sneel int error, vcpuid; 1392221828Sgrehan struct vcpu *vcpu; 1393221828Sgrehan struct pcb *pcb; 1394242065Sneel uint64_t tscval, rip; 1395242065Sneel struct vm_exit *vme; 1396262350Sjhb bool retu, intr_disabled; 1397256072Sneel pmap_t pmap; 1398268935Sjhb void *rptr, *sptr; 1399221828Sgrehan 1400221828Sgrehan vcpuid = vmrun->cpuid; 1401221828Sgrehan 1402221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1403221828Sgrehan return (EINVAL); 1404221828Sgrehan 1405270070Sgrehan if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1406270070Sgrehan return (EINVAL); 1407270070Sgrehan 1408270070Sgrehan if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1409270070Sgrehan return (EINVAL); 1410270070Sgrehan 1411268935Sjhb rptr = &vm->rendezvous_func; 1412268935Sjhb sptr = &vm->suspend; 1413256072Sneel pmap = vmspace_pmap(vm->vmspace); 1414221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1415256072Sneel vme = &vcpu->exitinfo; 1416242065Sneel rip = vmrun->rip; 1417242065Sneelrestart: 1418221828Sgrehan critical_enter(); 1419221828Sgrehan 1420256072Sneel KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1421256072Sneel ("vm_run: absurd pm_active")); 1422256072Sneel 1423221828Sgrehan tscval = rdtsc(); 1424221828Sgrehan 1425221828Sgrehan pcb = PCPU_GET(curpcb); 1426221914Sjhb set_pcb_flags(pcb, PCB_FULL_IRET); 1427221828Sgrehan 1428234695Sgrehan restore_guest_msrs(vm, vcpuid); 1429221828Sgrehan restore_guest_fpustate(vcpu); 1430241489Sneel 1431256072Sneel vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1432268935Sjhb error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr); 1433256072Sneel vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1434241489Sneel 1435221828Sgrehan save_guest_fpustate(vcpu); 1436221828Sgrehan restore_host_msrs(vm, vcpuid); 1437221828Sgrehan 1438221828Sgrehan vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1439221828Sgrehan 1440221828Sgrehan critical_exit(); 1441221828Sgrehan 1442256072Sneel if (error == 0) { 1443262350Sjhb retu = false; 1444256072Sneel switch (vme->exitcode) { 1445268935Sjhb case VM_EXITCODE_SUSPENDED: 1446268935Sjhb error = vm_handle_suspend(vm, vcpuid, &retu); 1447268935Sjhb break; 1448266339Sjhb case VM_EXITCODE_IOAPIC_EOI: 1449266339Sjhb vioapic_process_eoi(vm, vcpuid, 1450266339Sjhb vme->u.ioapic_eoi.vector); 1451266339Sjhb break; 1452266339Sjhb case VM_EXITCODE_RENDEZVOUS: 1453266339Sjhb vm_handle_rendezvous(vm, vcpuid); 1454266339Sjhb error = 0; 1455266339Sjhb break; 1456256072Sneel case VM_EXITCODE_HLT: 1457262350Sjhb intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 1458262350Sjhb error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); 1459256072Sneel break; 1460256072Sneel case VM_EXITCODE_PAGING: 1461256072Sneel error = vm_handle_paging(vm, vcpuid, &retu); 1462256072Sneel break; 1463256072Sneel case VM_EXITCODE_INST_EMUL: 1464256072Sneel error = vm_handle_inst_emul(vm, vcpuid, &retu); 1465256072Sneel break; 1466268976Sjhb case VM_EXITCODE_INOUT: 1467268976Sjhb case VM_EXITCODE_INOUT_STR: 1468268976Sjhb error = vm_handle_inout(vm, vcpuid, vme, &retu); 1469268976Sjhb break; 1470256072Sneel default: 1471262350Sjhb retu = true; /* handled in userland */ 1472256072Sneel break; 1473242065Sneel } 1474256072Sneel } 1475242065Sneel 1476262350Sjhb if (error == 0 && retu == false) { 1477242065Sneel rip = vme->rip + vme->inst_length; 1478242065Sneel goto restart; 1479242065Sneel } 1480242065Sneel 1481256072Sneel /* copy the exit information */ 1482256072Sneel bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1483221828Sgrehan return (error); 1484221828Sgrehan} 1485221828Sgrehan 1486221828Sgrehanint 1487270159Sgrehanvm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) 1488270159Sgrehan{ 1489270159Sgrehan struct vcpu *vcpu; 1490270159Sgrehan int type, vector; 1491270159Sgrehan 1492270159Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1493270159Sgrehan return (EINVAL); 1494270159Sgrehan 1495270159Sgrehan vcpu = &vm->vcpu[vcpuid]; 1496270159Sgrehan 1497270159Sgrehan if (info & VM_INTINFO_VALID) { 1498270159Sgrehan type = info & VM_INTINFO_TYPE; 1499270159Sgrehan vector = info & 0xff; 1500270159Sgrehan if (type == VM_INTINFO_NMI && vector != IDT_NMI) 1501270159Sgrehan return (EINVAL); 1502270159Sgrehan if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) 1503270159Sgrehan return (EINVAL); 1504270159Sgrehan if (info & VM_INTINFO_RSVD) 1505270159Sgrehan return (EINVAL); 1506270159Sgrehan } else { 1507270159Sgrehan info = 0; 1508270159Sgrehan } 1509270159Sgrehan VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); 1510270159Sgrehan vcpu->exitintinfo = info; 1511270159Sgrehan return (0); 1512270159Sgrehan} 1513270159Sgrehan 1514270159Sgrehanenum exc_class { 1515270159Sgrehan EXC_BENIGN, 1516270159Sgrehan EXC_CONTRIBUTORY, 1517270159Sgrehan EXC_PAGEFAULT 1518270159Sgrehan}; 1519270159Sgrehan 1520270159Sgrehan#define IDT_VE 20 /* Virtualization Exception (Intel specific) */ 1521270159Sgrehan 1522270159Sgrehanstatic enum exc_class 1523270159Sgrehanexception_class(uint64_t info) 1524270159Sgrehan{ 1525270159Sgrehan int type, vector; 1526270159Sgrehan 1527270159Sgrehan KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); 1528270159Sgrehan type = info & VM_INTINFO_TYPE; 1529270159Sgrehan vector = info & 0xff; 1530270159Sgrehan 1531270159Sgrehan /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ 1532270159Sgrehan switch (type) { 1533270159Sgrehan case VM_INTINFO_HWINTR: 1534270159Sgrehan case VM_INTINFO_SWINTR: 1535270159Sgrehan case VM_INTINFO_NMI: 1536270159Sgrehan return (EXC_BENIGN); 1537270159Sgrehan default: 1538270159Sgrehan /* 1539270159Sgrehan * Hardware exception. 1540270159Sgrehan * 1541270159Sgrehan * SVM and VT-x use identical type values to represent NMI, 1542270159Sgrehan * hardware interrupt and software interrupt. 1543270159Sgrehan * 1544270159Sgrehan * SVM uses type '3' for all exceptions. VT-x uses type '3' 1545270159Sgrehan * for exceptions except #BP and #OF. #BP and #OF use a type 1546270159Sgrehan * value of '5' or '6'. Therefore we don't check for explicit 1547270159Sgrehan * values of 'type' to classify 'intinfo' into a hardware 1548270159Sgrehan * exception. 1549270159Sgrehan */ 1550270159Sgrehan break; 1551270159Sgrehan } 1552270159Sgrehan 1553270159Sgrehan switch (vector) { 1554270159Sgrehan case IDT_PF: 1555270159Sgrehan case IDT_VE: 1556270159Sgrehan return (EXC_PAGEFAULT); 1557270159Sgrehan case IDT_DE: 1558270159Sgrehan case IDT_TS: 1559270159Sgrehan case IDT_NP: 1560270159Sgrehan case IDT_SS: 1561270159Sgrehan case IDT_GP: 1562270159Sgrehan return (EXC_CONTRIBUTORY); 1563270159Sgrehan default: 1564270159Sgrehan return (EXC_BENIGN); 1565270159Sgrehan } 1566270159Sgrehan} 1567270159Sgrehan 1568270159Sgrehanstatic int 1569270159Sgrehannested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, 1570270159Sgrehan uint64_t *retinfo) 1571270159Sgrehan{ 1572270159Sgrehan enum exc_class exc1, exc2; 1573270159Sgrehan int type1, vector1; 1574270159Sgrehan 1575270159Sgrehan KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); 1576270159Sgrehan KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); 1577270159Sgrehan 1578270159Sgrehan /* 1579270159Sgrehan * If an exception occurs while attempting to call the double-fault 1580270159Sgrehan * handler the processor enters shutdown mode (aka triple fault). 1581270159Sgrehan */ 1582270159Sgrehan type1 = info1 & VM_INTINFO_TYPE; 1583270159Sgrehan vector1 = info1 & 0xff; 1584270159Sgrehan if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { 1585270159Sgrehan VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", 1586270159Sgrehan info1, info2); 1587270159Sgrehan vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); 1588270159Sgrehan *retinfo = 0; 1589270159Sgrehan return (0); 1590270159Sgrehan } 1591270159Sgrehan 1592270159Sgrehan /* 1593270159Sgrehan * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 1594270159Sgrehan */ 1595270159Sgrehan exc1 = exception_class(info1); 1596270159Sgrehan exc2 = exception_class(info2); 1597270159Sgrehan if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || 1598270159Sgrehan (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { 1599270159Sgrehan /* Convert nested fault into a double fault. */ 1600270159Sgrehan *retinfo = IDT_DF; 1601270159Sgrehan *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1602270159Sgrehan *retinfo |= VM_INTINFO_DEL_ERRCODE; 1603270159Sgrehan } else { 1604270159Sgrehan /* Handle exceptions serially */ 1605270159Sgrehan *retinfo = info2; 1606270159Sgrehan } 1607270159Sgrehan return (1); 1608270159Sgrehan} 1609270159Sgrehan 1610270159Sgrehanstatic uint64_t 1611270159Sgrehanvcpu_exception_intinfo(struct vcpu *vcpu) 1612270159Sgrehan{ 1613270159Sgrehan uint64_t info = 0; 1614270159Sgrehan 1615270159Sgrehan if (vcpu->exception_pending) { 1616270159Sgrehan info = vcpu->exception.vector & 0xff; 1617270159Sgrehan info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1618270159Sgrehan if (vcpu->exception.error_code_valid) { 1619270159Sgrehan info |= VM_INTINFO_DEL_ERRCODE; 1620270159Sgrehan info |= (uint64_t)vcpu->exception.error_code << 32; 1621270159Sgrehan } 1622270159Sgrehan } 1623270159Sgrehan return (info); 1624270159Sgrehan} 1625270159Sgrehan 1626270159Sgrehanint 1627270159Sgrehanvm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) 1628270159Sgrehan{ 1629270159Sgrehan struct vcpu *vcpu; 1630270159Sgrehan uint64_t info1, info2; 1631270159Sgrehan int valid; 1632270159Sgrehan 1633270159Sgrehan KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); 1634270159Sgrehan 1635270159Sgrehan vcpu = &vm->vcpu[vcpuid]; 1636270159Sgrehan 1637270159Sgrehan info1 = vcpu->exitintinfo; 1638270159Sgrehan vcpu->exitintinfo = 0; 1639270159Sgrehan 1640270159Sgrehan info2 = 0; 1641270159Sgrehan if (vcpu->exception_pending) { 1642270159Sgrehan info2 = vcpu_exception_intinfo(vcpu); 1643270159Sgrehan vcpu->exception_pending = 0; 1644270159Sgrehan VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", 1645270159Sgrehan vcpu->exception.vector, info2); 1646270159Sgrehan } 1647270159Sgrehan 1648270159Sgrehan if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { 1649270159Sgrehan valid = nested_fault(vm, vcpuid, info1, info2, retinfo); 1650270159Sgrehan } else if (info1 & VM_INTINFO_VALID) { 1651270159Sgrehan *retinfo = info1; 1652270159Sgrehan valid = 1; 1653270159Sgrehan } else if (info2 & VM_INTINFO_VALID) { 1654270159Sgrehan *retinfo = info2; 1655270159Sgrehan valid = 1; 1656270159Sgrehan } else { 1657270159Sgrehan valid = 0; 1658270159Sgrehan } 1659270159Sgrehan 1660270159Sgrehan if (valid) { 1661270159Sgrehan VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " 1662270159Sgrehan "retinfo(%#lx)", __func__, info1, info2, *retinfo); 1663270159Sgrehan } 1664270159Sgrehan 1665270159Sgrehan return (valid); 1666270159Sgrehan} 1667270159Sgrehan 1668270159Sgrehanint 1669270159Sgrehanvm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) 1670270159Sgrehan{ 1671270159Sgrehan struct vcpu *vcpu; 1672270159Sgrehan 1673270159Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1674270159Sgrehan return (EINVAL); 1675270159Sgrehan 1676270159Sgrehan vcpu = &vm->vcpu[vcpuid]; 1677270159Sgrehan *info1 = vcpu->exitintinfo; 1678270159Sgrehan *info2 = vcpu_exception_intinfo(vcpu); 1679270159Sgrehan return (0); 1680270159Sgrehan} 1681270159Sgrehan 1682270159Sgrehanint 1683267427Sjhbvm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) 1684221828Sgrehan{ 1685267427Sjhb struct vcpu *vcpu; 1686267427Sjhb 1687221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1688221828Sgrehan return (EINVAL); 1689221828Sgrehan 1690267427Sjhb if (exception->vector < 0 || exception->vector >= 32) 1691221828Sgrehan return (EINVAL); 1692221828Sgrehan 1693270159Sgrehan /* 1694270159Sgrehan * A double fault exception should never be injected directly into 1695270159Sgrehan * the guest. It is a derived exception that results from specific 1696270159Sgrehan * combinations of nested faults. 1697270159Sgrehan */ 1698270159Sgrehan if (exception->vector == IDT_DF) 1699270159Sgrehan return (EINVAL); 1700270159Sgrehan 1701267427Sjhb vcpu = &vm->vcpu[vcpuid]; 1702221828Sgrehan 1703267427Sjhb if (vcpu->exception_pending) { 1704267427Sjhb VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " 1705267427Sjhb "pending exception %d", exception->vector, 1706267427Sjhb vcpu->exception.vector); 1707267427Sjhb return (EBUSY); 1708267427Sjhb } 1709267427Sjhb 1710267427Sjhb vcpu->exception_pending = 1; 1711267427Sjhb vcpu->exception = *exception; 1712267427Sjhb VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector); 1713267427Sjhb return (0); 1714221828Sgrehan} 1715221828Sgrehan 1716270159Sgrehanvoid 1717270159Sgrehanvm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid, 1718270159Sgrehan int errcode) 1719267427Sjhb{ 1720270159Sgrehan struct vm_exception exception; 1721267427Sjhb struct vm_exit *vmexit; 1722270159Sgrehan struct vm *vm; 1723267427Sjhb int error; 1724267427Sjhb 1725270159Sgrehan vm = vmarg; 1726270159Sgrehan 1727270159Sgrehan exception.vector = vector; 1728270159Sgrehan exception.error_code = errcode; 1729270159Sgrehan exception.error_code_valid = errcode_valid; 1730270159Sgrehan error = vm_inject_exception(vm, vcpuid, &exception); 1731267427Sjhb KASSERT(error == 0, ("vm_inject_exception error %d", error)); 1732267427Sjhb 1733267427Sjhb /* 1734267427Sjhb * A fault-like exception allows the instruction to be restarted 1735267427Sjhb * after the exception handler returns. 1736267427Sjhb * 1737267427Sjhb * By setting the inst_length to 0 we ensure that the instruction 1738267427Sjhb * pointer remains at the faulting instruction. 1739267427Sjhb */ 1740267427Sjhb vmexit = vm_exitinfo(vm, vcpuid); 1741267427Sjhb vmexit->inst_length = 0; 1742267427Sjhb} 1743267427Sjhb 1744267427Sjhbvoid 1745270159Sgrehanvm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) 1746268976Sjhb{ 1747270159Sgrehan struct vm *vm; 1748268976Sjhb int error; 1749268976Sjhb 1750270159Sgrehan vm = vmarg; 1751268976Sjhb VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", 1752268976Sjhb error_code, cr2); 1753268976Sjhb 1754268976Sjhb error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); 1755268976Sjhb KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 1756268976Sjhb 1757270159Sgrehan vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code); 1758268976Sjhb} 1759268976Sjhb 1760248389Sneelstatic VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 1761241982Sneel 1762221828Sgrehanint 1763241982Sneelvm_inject_nmi(struct vm *vm, int vcpuid) 1764221828Sgrehan{ 1765241982Sneel struct vcpu *vcpu; 1766221828Sgrehan 1767241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1768221828Sgrehan return (EINVAL); 1769221828Sgrehan 1770241982Sneel vcpu = &vm->vcpu[vcpuid]; 1771241982Sneel 1772241982Sneel vcpu->nmi_pending = 1; 1773266339Sjhb vcpu_notify_event(vm, vcpuid, false); 1774241982Sneel return (0); 1775221828Sgrehan} 1776221828Sgrehan 1777221828Sgrehanint 1778241982Sneelvm_nmi_pending(struct vm *vm, int vcpuid) 1779241982Sneel{ 1780241982Sneel struct vcpu *vcpu; 1781241982Sneel 1782241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1783241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1784241982Sneel 1785241982Sneel vcpu = &vm->vcpu[vcpuid]; 1786241982Sneel 1787241982Sneel return (vcpu->nmi_pending); 1788241982Sneel} 1789241982Sneel 1790241982Sneelvoid 1791241982Sneelvm_nmi_clear(struct vm *vm, int vcpuid) 1792241982Sneel{ 1793241982Sneel struct vcpu *vcpu; 1794241982Sneel 1795241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1796241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1797241982Sneel 1798241982Sneel vcpu = &vm->vcpu[vcpuid]; 1799241982Sneel 1800241982Sneel if (vcpu->nmi_pending == 0) 1801241982Sneel panic("vm_nmi_clear: inconsistent nmi_pending state"); 1802241982Sneel 1803241982Sneel vcpu->nmi_pending = 0; 1804241982Sneel vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 1805241982Sneel} 1806241982Sneel 1807268891Sjhbstatic VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 1808268891Sjhb 1809241982Sneelint 1810268891Sjhbvm_inject_extint(struct vm *vm, int vcpuid) 1811268891Sjhb{ 1812268891Sjhb struct vcpu *vcpu; 1813268891Sjhb 1814268891Sjhb if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1815268891Sjhb return (EINVAL); 1816268891Sjhb 1817268891Sjhb vcpu = &vm->vcpu[vcpuid]; 1818268891Sjhb 1819268891Sjhb vcpu->extint_pending = 1; 1820268891Sjhb vcpu_notify_event(vm, vcpuid, false); 1821268891Sjhb return (0); 1822268891Sjhb} 1823268891Sjhb 1824268891Sjhbint 1825268891Sjhbvm_extint_pending(struct vm *vm, int vcpuid) 1826268891Sjhb{ 1827268891Sjhb struct vcpu *vcpu; 1828268891Sjhb 1829268891Sjhb if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1830268891Sjhb panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 1831268891Sjhb 1832268891Sjhb vcpu = &vm->vcpu[vcpuid]; 1833268891Sjhb 1834268891Sjhb return (vcpu->extint_pending); 1835268891Sjhb} 1836268891Sjhb 1837268891Sjhbvoid 1838268891Sjhbvm_extint_clear(struct vm *vm, int vcpuid) 1839268891Sjhb{ 1840268891Sjhb struct vcpu *vcpu; 1841268891Sjhb 1842268891Sjhb if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1843268891Sjhb panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 1844268891Sjhb 1845268891Sjhb vcpu = &vm->vcpu[vcpuid]; 1846268891Sjhb 1847268891Sjhb if (vcpu->extint_pending == 0) 1848268891Sjhb panic("vm_extint_clear: inconsistent extint_pending state"); 1849268891Sjhb 1850268891Sjhb vcpu->extint_pending = 0; 1851268891Sjhb vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); 1852268891Sjhb} 1853268891Sjhb 1854268891Sjhbint 1855221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 1856221828Sgrehan{ 1857221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1858221828Sgrehan return (EINVAL); 1859221828Sgrehan 1860221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 1861221828Sgrehan return (EINVAL); 1862221828Sgrehan 1863221828Sgrehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 1864221828Sgrehan} 1865221828Sgrehan 1866221828Sgrehanint 1867221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val) 1868221828Sgrehan{ 1869221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1870221828Sgrehan return (EINVAL); 1871221828Sgrehan 1872221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 1873221828Sgrehan return (EINVAL); 1874221828Sgrehan 1875221828Sgrehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 1876221828Sgrehan} 1877221828Sgrehan 1878221828Sgrehanuint64_t * 1879221828Sgrehanvm_guest_msrs(struct vm *vm, int cpu) 1880221828Sgrehan{ 1881221828Sgrehan return (vm->vcpu[cpu].guest_msrs); 1882221828Sgrehan} 1883221828Sgrehan 1884221828Sgrehanstruct vlapic * 1885221828Sgrehanvm_lapic(struct vm *vm, int cpu) 1886221828Sgrehan{ 1887221828Sgrehan return (vm->vcpu[cpu].vlapic); 1888221828Sgrehan} 1889221828Sgrehan 1890261088Sjhbstruct vioapic * 1891261088Sjhbvm_ioapic(struct vm *vm) 1892261088Sjhb{ 1893261088Sjhb 1894261088Sjhb return (vm->vioapic); 1895261088Sjhb} 1896261088Sjhb 1897261088Sjhbstruct vhpet * 1898261088Sjhbvm_hpet(struct vm *vm) 1899261088Sjhb{ 1900261088Sjhb 1901261088Sjhb return (vm->vhpet); 1902261088Sjhb} 1903261088Sjhb 1904221828Sgrehanboolean_t 1905221828Sgrehanvmm_is_pptdev(int bus, int slot, int func) 1906221828Sgrehan{ 1907246188Sneel int found, i, n; 1908246188Sneel int b, s, f; 1909221828Sgrehan char *val, *cp, *cp2; 1910221828Sgrehan 1911221828Sgrehan /* 1912246188Sneel * XXX 1913246188Sneel * The length of an environment variable is limited to 128 bytes which 1914246188Sneel * puts an upper limit on the number of passthru devices that may be 1915246188Sneel * specified using a single environment variable. 1916246188Sneel * 1917246188Sneel * Work around this by scanning multiple environment variable 1918246188Sneel * names instead of a single one - yuck! 1919221828Sgrehan */ 1920246188Sneel const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 1921246188Sneel 1922246188Sneel /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 1923221828Sgrehan found = 0; 1924246188Sneel for (i = 0; names[i] != NULL && !found; i++) { 1925246188Sneel cp = val = getenv(names[i]); 1926246188Sneel while (cp != NULL && *cp != '\0') { 1927246188Sneel if ((cp2 = strchr(cp, ' ')) != NULL) 1928246188Sneel *cp2 = '\0'; 1929221828Sgrehan 1930246188Sneel n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 1931246188Sneel if (n == 3 && bus == b && slot == s && func == f) { 1932246188Sneel found = 1; 1933246188Sneel break; 1934246188Sneel } 1935221828Sgrehan 1936246188Sneel if (cp2 != NULL) 1937246188Sneel *cp2++ = ' '; 1938221828Sgrehan 1939246188Sneel cp = cp2; 1940246188Sneel } 1941246188Sneel freeenv(val); 1942221828Sgrehan } 1943221828Sgrehan return (found); 1944221828Sgrehan} 1945221828Sgrehan 1946221828Sgrehanvoid * 1947221828Sgrehanvm_iommu_domain(struct vm *vm) 1948221828Sgrehan{ 1949221828Sgrehan 1950221828Sgrehan return (vm->iommu); 1951221828Sgrehan} 1952221828Sgrehan 1953241489Sneelint 1954266393Sjhbvcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, 1955266393Sjhb bool from_idle) 1956221828Sgrehan{ 1957241489Sneel int error; 1958221828Sgrehan struct vcpu *vcpu; 1959221828Sgrehan 1960221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1961221828Sgrehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 1962221828Sgrehan 1963221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1964221828Sgrehan 1965241489Sneel vcpu_lock(vcpu); 1966266393Sjhb error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1967241489Sneel vcpu_unlock(vcpu); 1968241489Sneel 1969241489Sneel return (error); 1970221828Sgrehan} 1971221828Sgrehan 1972241489Sneelenum vcpu_state 1973249879Sgrehanvcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 1974221828Sgrehan{ 1975221828Sgrehan struct vcpu *vcpu; 1976241489Sneel enum vcpu_state state; 1977221828Sgrehan 1978221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1979221828Sgrehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 1980221828Sgrehan 1981221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1982221828Sgrehan 1983241489Sneel vcpu_lock(vcpu); 1984241489Sneel state = vcpu->state; 1985249879Sgrehan if (hostcpu != NULL) 1986249879Sgrehan *hostcpu = vcpu->hostcpu; 1987241489Sneel vcpu_unlock(vcpu); 1988221828Sgrehan 1989241489Sneel return (state); 1990221828Sgrehan} 1991221828Sgrehan 1992270070Sgrehanint 1993221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid) 1994221828Sgrehan{ 1995221828Sgrehan 1996270070Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1997270070Sgrehan return (EINVAL); 1998266339Sjhb 1999270070Sgrehan if (CPU_ISSET(vcpuid, &vm->active_cpus)) 2000270070Sgrehan return (EBUSY); 2001270070Sgrehan 2002266339Sjhb VCPU_CTR0(vm, vcpuid, "activated"); 2003266339Sjhb CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); 2004270070Sgrehan return (0); 2005221828Sgrehan} 2006221828Sgrehan 2007223621Sgrehancpuset_t 2008221828Sgrehanvm_active_cpus(struct vm *vm) 2009221828Sgrehan{ 2010221828Sgrehan 2011221828Sgrehan return (vm->active_cpus); 2012221828Sgrehan} 2013221828Sgrehan 2014270070Sgrehancpuset_t 2015270070Sgrehanvm_suspended_cpus(struct vm *vm) 2016270070Sgrehan{ 2017270070Sgrehan 2018270070Sgrehan return (vm->suspended_cpus); 2019270070Sgrehan} 2020270070Sgrehan 2021221828Sgrehanvoid * 2022221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid) 2023221828Sgrehan{ 2024221828Sgrehan 2025221828Sgrehan return (vm->vcpu[vcpuid].stats); 2026221828Sgrehan} 2027240922Sneel 2028240922Sneelint 2029240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 2030240922Sneel{ 2031240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2032240922Sneel return (EINVAL); 2033240922Sneel 2034240922Sneel *state = vm->vcpu[vcpuid].x2apic_state; 2035240922Sneel 2036240922Sneel return (0); 2037240922Sneel} 2038240922Sneel 2039240922Sneelint 2040240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 2041240922Sneel{ 2042240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2043240922Sneel return (EINVAL); 2044240922Sneel 2045248392Sneel if (state >= X2APIC_STATE_LAST) 2046240922Sneel return (EINVAL); 2047240922Sneel 2048240922Sneel vm->vcpu[vcpuid].x2apic_state = state; 2049240922Sneel 2050240943Sneel vlapic_set_x2apic_state(vm, vcpuid, state); 2051240943Sneel 2052240922Sneel return (0); 2053240922Sneel} 2054241489Sneel 2055262350Sjhb/* 2056262350Sjhb * This function is called to ensure that a vcpu "sees" a pending event 2057262350Sjhb * as soon as possible: 2058262350Sjhb * - If the vcpu thread is sleeping then it is woken up. 2059262350Sjhb * - If the vcpu is running on a different host_cpu then an IPI will be directed 2060262350Sjhb * to the host_cpu to cause the vcpu to trap into the hypervisor. 2061262350Sjhb */ 2062241489Sneelvoid 2063266339Sjhbvcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) 2064241489Sneel{ 2065241489Sneel int hostcpu; 2066241489Sneel struct vcpu *vcpu; 2067241489Sneel 2068241489Sneel vcpu = &vm->vcpu[vcpuid]; 2069241489Sneel 2070242065Sneel vcpu_lock(vcpu); 2071241489Sneel hostcpu = vcpu->hostcpu; 2072266393Sjhb if (vcpu->state == VCPU_RUNNING) { 2073266393Sjhb KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 2074266339Sjhb if (hostcpu != curcpu) { 2075266393Sjhb if (lapic_intr) { 2076266339Sjhb vlapic_post_intr(vcpu->vlapic, hostcpu, 2077266339Sjhb vmm_ipinum); 2078266393Sjhb } else { 2079266339Sjhb ipi_cpu(hostcpu, vmm_ipinum); 2080266393Sjhb } 2081266393Sjhb } else { 2082266393Sjhb /* 2083266393Sjhb * If the 'vcpu' is running on 'curcpu' then it must 2084266393Sjhb * be sending a notification to itself (e.g. SELF_IPI). 2085266393Sjhb * The pending event will be picked up when the vcpu 2086266393Sjhb * transitions back to guest context. 2087266393Sjhb */ 2088266339Sjhb } 2089266393Sjhb } else { 2090266393Sjhb KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 2091266393Sjhb "with hostcpu %d", vcpu->state, hostcpu)); 2092266393Sjhb if (vcpu->state == VCPU_SLEEPING) 2093266393Sjhb wakeup_one(vcpu); 2094242065Sneel } 2095242065Sneel vcpu_unlock(vcpu); 2096241489Sneel} 2097256072Sneel 2098256072Sneelstruct vmspace * 2099256072Sneelvm_get_vmspace(struct vm *vm) 2100256072Sneel{ 2101256072Sneel 2102256072Sneel return (vm->vmspace); 2103256072Sneel} 2104261088Sjhb 2105261088Sjhbint 2106261088Sjhbvm_apicid2vcpuid(struct vm *vm, int apicid) 2107261088Sjhb{ 2108261088Sjhb /* 2109261088Sjhb * XXX apic id is assumed to be numerically identical to vcpu id 2110261088Sjhb */ 2111261088Sjhb return (apicid); 2112261088Sjhb} 2113266339Sjhb 2114266339Sjhbvoid 2115266339Sjhbvm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 2116266339Sjhb vm_rendezvous_func_t func, void *arg) 2117266339Sjhb{ 2118266339Sjhb int i; 2119266339Sjhb 2120266339Sjhb /* 2121266339Sjhb * Enforce that this function is called without any locks 2122266339Sjhb */ 2123266339Sjhb WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 2124266339Sjhb KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 2125266339Sjhb ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); 2126266339Sjhb 2127266339Sjhbrestart: 2128266339Sjhb mtx_lock(&vm->rendezvous_mtx); 2129266339Sjhb if (vm->rendezvous_func != NULL) { 2130266339Sjhb /* 2131266339Sjhb * If a rendezvous is already in progress then we need to 2132266339Sjhb * call the rendezvous handler in case this 'vcpuid' is one 2133266339Sjhb * of the targets of the rendezvous. 2134266339Sjhb */ 2135266339Sjhb RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); 2136266339Sjhb mtx_unlock(&vm->rendezvous_mtx); 2137266339Sjhb vm_handle_rendezvous(vm, vcpuid); 2138266339Sjhb goto restart; 2139266339Sjhb } 2140266339Sjhb KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 2141266339Sjhb "rendezvous is still in progress")); 2142266339Sjhb 2143266339Sjhb RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); 2144266339Sjhb vm->rendezvous_req_cpus = dest; 2145266339Sjhb CPU_ZERO(&vm->rendezvous_done_cpus); 2146266339Sjhb vm->rendezvous_arg = arg; 2147266339Sjhb vm_set_rendezvous_func(vm, func); 2148266339Sjhb mtx_unlock(&vm->rendezvous_mtx); 2149266339Sjhb 2150266339Sjhb /* 2151266339Sjhb * Wake up any sleeping vcpus and trigger a VM-exit in any running 2152266339Sjhb * vcpus so they handle the rendezvous as soon as possible. 2153266339Sjhb */ 2154266339Sjhb for (i = 0; i < VM_MAXCPU; i++) { 2155266339Sjhb if (CPU_ISSET(i, &dest)) 2156266339Sjhb vcpu_notify_event(vm, i, false); 2157266339Sjhb } 2158266339Sjhb 2159266339Sjhb vm_handle_rendezvous(vm, vcpuid); 2160266339Sjhb} 2161268891Sjhb 2162268891Sjhbstruct vatpic * 2163268891Sjhbvm_atpic(struct vm *vm) 2164268891Sjhb{ 2165268891Sjhb return (vm->vatpic); 2166268891Sjhb} 2167268891Sjhb 2168268891Sjhbstruct vatpit * 2169268891Sjhbvm_atpit(struct vm *vm) 2170268891Sjhb{ 2171268891Sjhb return (vm->vatpit); 2172268891Sjhb} 2173268976Sjhb 2174268976Sjhbenum vm_reg_name 2175268976Sjhbvm_segment_name(int seg) 2176268976Sjhb{ 2177268976Sjhb static enum vm_reg_name seg_names[] = { 2178268976Sjhb VM_REG_GUEST_ES, 2179268976Sjhb VM_REG_GUEST_CS, 2180268976Sjhb VM_REG_GUEST_SS, 2181268976Sjhb VM_REG_GUEST_DS, 2182268976Sjhb VM_REG_GUEST_FS, 2183268976Sjhb VM_REG_GUEST_GS 2184268976Sjhb }; 2185268976Sjhb 2186268976Sjhb KASSERT(seg >= 0 && seg < nitems(seg_names), 2187268976Sjhb ("%s: invalid segment encoding %d", __func__, seg)); 2188268976Sjhb return (seg_names[seg]); 2189268976Sjhb} 2190270074Sgrehan 2191270159Sgrehanvoid 2192270159Sgrehanvm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, 2193270159Sgrehan int num_copyinfo) 2194270159Sgrehan{ 2195270159Sgrehan int idx; 2196270074Sgrehan 2197270159Sgrehan for (idx = 0; idx < num_copyinfo; idx++) { 2198270159Sgrehan if (copyinfo[idx].cookie != NULL) 2199270159Sgrehan vm_gpa_release(copyinfo[idx].cookie); 2200270159Sgrehan } 2201270159Sgrehan bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); 2202270159Sgrehan} 2203270159Sgrehan 2204270159Sgrehanint 2205270159Sgrehanvm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 2206270159Sgrehan uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, 2207270159Sgrehan int num_copyinfo) 2208270159Sgrehan{ 2209270159Sgrehan int error, idx, nused; 2210270159Sgrehan size_t n, off, remaining; 2211270159Sgrehan void *hva, *cookie; 2212270159Sgrehan uint64_t gpa; 2213270159Sgrehan 2214270159Sgrehan bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); 2215270159Sgrehan 2216270159Sgrehan nused = 0; 2217270159Sgrehan remaining = len; 2218270159Sgrehan while (remaining > 0) { 2219270159Sgrehan KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); 2220270159Sgrehan error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa); 2221270159Sgrehan if (error) 2222270159Sgrehan return (error); 2223270159Sgrehan off = gpa & PAGE_MASK; 2224270159Sgrehan n = min(remaining, PAGE_SIZE - off); 2225270159Sgrehan copyinfo[nused].gpa = gpa; 2226270159Sgrehan copyinfo[nused].len = n; 2227270159Sgrehan remaining -= n; 2228270159Sgrehan gla += n; 2229270159Sgrehan nused++; 2230270159Sgrehan } 2231270159Sgrehan 2232270159Sgrehan for (idx = 0; idx < nused; idx++) { 2233270159Sgrehan hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len, 2234270159Sgrehan prot, &cookie); 2235270159Sgrehan if (hva == NULL) 2236270159Sgrehan break; 2237270159Sgrehan copyinfo[idx].hva = hva; 2238270159Sgrehan copyinfo[idx].cookie = cookie; 2239270159Sgrehan } 2240270159Sgrehan 2241270159Sgrehan if (idx != nused) { 2242270159Sgrehan vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); 2243270159Sgrehan return (-1); 2244270159Sgrehan } else { 2245270159Sgrehan return (0); 2246270159Sgrehan } 2247270159Sgrehan} 2248270159Sgrehan 2249270159Sgrehanvoid 2250270159Sgrehanvm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, 2251270159Sgrehan size_t len) 2252270159Sgrehan{ 2253270159Sgrehan char *dst; 2254270159Sgrehan int idx; 2255270159Sgrehan 2256270159Sgrehan dst = kaddr; 2257270159Sgrehan idx = 0; 2258270159Sgrehan while (len > 0) { 2259270159Sgrehan bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); 2260270159Sgrehan len -= copyinfo[idx].len; 2261270159Sgrehan dst += copyinfo[idx].len; 2262270159Sgrehan idx++; 2263270159Sgrehan } 2264270159Sgrehan} 2265270159Sgrehan 2266270159Sgrehanvoid 2267270159Sgrehanvm_copyout(struct vm *vm, int vcpuid, const void *kaddr, 2268270159Sgrehan struct vm_copyinfo *copyinfo, size_t len) 2269270159Sgrehan{ 2270270159Sgrehan const char *src; 2271270159Sgrehan int idx; 2272270159Sgrehan 2273270159Sgrehan src = kaddr; 2274270159Sgrehan idx = 0; 2275270159Sgrehan while (len > 0) { 2276270159Sgrehan bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); 2277270159Sgrehan len -= copyinfo[idx].len; 2278270159Sgrehan src += copyinfo[idx].len; 2279270159Sgrehan idx++; 2280270159Sgrehan } 2281270159Sgrehan} 2282270159Sgrehan 2283270074Sgrehan/* 2284270074Sgrehan * Return the amount of in-use and wired memory for the VM. Since 2285270074Sgrehan * these are global stats, only return the values with for vCPU 0 2286270074Sgrehan */ 2287270074SgrehanVMM_STAT_DECLARE(VMM_MEM_RESIDENT); 2288270074SgrehanVMM_STAT_DECLARE(VMM_MEM_WIRED); 2289270074Sgrehan 2290270074Sgrehanstatic void 2291270074Sgrehanvm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2292270074Sgrehan{ 2293270074Sgrehan 2294270074Sgrehan if (vcpu == 0) { 2295270074Sgrehan vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT, 2296270074Sgrehan PAGE_SIZE * vmspace_resident_count(vm->vmspace)); 2297270074Sgrehan } 2298270074Sgrehan} 2299270074Sgrehan 2300270074Sgrehanstatic void 2301270074Sgrehanvm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2302270074Sgrehan{ 2303270074Sgrehan 2304270074Sgrehan if (vcpu == 0) { 2305270074Sgrehan vmm_stat_set(vm, vcpu, VMM_MEM_WIRED, 2306270074Sgrehan PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); 2307270074Sgrehan } 2308270074Sgrehan} 2309270074Sgrehan 2310270074SgrehanVMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); 2311270074SgrehanVMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); 2312