vmm.c revision 261275
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: stable/10/sys/amd64/vmm/vmm.c 261275 2014-01-29 21:23:37Z jhb $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm.c 261275 2014-01-29 21:23:37Z jhb $"); 31221828Sgrehan 32221828Sgrehan#include <sys/param.h> 33234695Sgrehan#include <sys/systm.h> 34221828Sgrehan#include <sys/kernel.h> 35221828Sgrehan#include <sys/module.h> 36221828Sgrehan#include <sys/sysctl.h> 37221828Sgrehan#include <sys/malloc.h> 38221828Sgrehan#include <sys/pcpu.h> 39221828Sgrehan#include <sys/lock.h> 40221828Sgrehan#include <sys/mutex.h> 41221828Sgrehan#include <sys/proc.h> 42256072Sneel#include <sys/rwlock.h> 43221828Sgrehan#include <sys/sched.h> 44221828Sgrehan#include <sys/smp.h> 45221828Sgrehan#include <sys/systm.h> 46221828Sgrehan 47221828Sgrehan#include <vm/vm.h> 48256072Sneel#include <vm/vm_object.h> 49256072Sneel#include <vm/vm_page.h> 50256072Sneel#include <vm/pmap.h> 51256072Sneel#include <vm/vm_map.h> 52256072Sneel#include <vm/vm_extern.h> 53256072Sneel#include <vm/vm_param.h> 54221828Sgrehan 55261275Sjhb#include <machine/cpu.h> 56221828Sgrehan#include <machine/vm.h> 57221828Sgrehan#include <machine/pcb.h> 58241489Sneel#include <machine/smp.h> 59221914Sjhb#include <x86/apicreg.h> 60256072Sneel#include <machine/pmap.h> 61256072Sneel#include <machine/vmparam.h> 62221828Sgrehan 63221828Sgrehan#include <machine/vmm.h> 64261088Sjhb#include <machine/vmm_dev.h> 65261088Sjhb 66256072Sneel#include "vmm_ktr.h" 67242275Sneel#include "vmm_host.h" 68221828Sgrehan#include "vmm_mem.h" 69221828Sgrehan#include "vmm_util.h" 70261088Sjhb#include "vhpet.h" 71261088Sjhb#include "vioapic.h" 72221828Sgrehan#include "vlapic.h" 73221828Sgrehan#include "vmm_msr.h" 74221828Sgrehan#include "vmm_ipi.h" 75221828Sgrehan#include "vmm_stat.h" 76242065Sneel#include "vmm_lapic.h" 77221828Sgrehan 78221828Sgrehan#include "io/ppt.h" 79221828Sgrehan#include "io/iommu.h" 80221828Sgrehan 81221828Sgrehanstruct vlapic; 82221828Sgrehan 83221828Sgrehanstruct vcpu { 84221828Sgrehan int flags; 85241489Sneel enum vcpu_state state; 86241489Sneel struct mtx mtx; 87221828Sgrehan int hostcpu; /* host cpuid this vcpu last ran on */ 88221828Sgrehan uint64_t guest_msrs[VMM_MSR_NUM]; 89221828Sgrehan struct vlapic *vlapic; 90221828Sgrehan int vcpuid; 91234695Sgrehan struct savefpu *guestfpu; /* guest fpu state */ 92221828Sgrehan void *stats; 93240894Sneel struct vm_exit exitinfo; 94240922Sneel enum x2apic_state x2apic_state; 95241982Sneel int nmi_pending; 96221828Sgrehan}; 97221828Sgrehan 98242065Sneel#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 99242065Sneel#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 100242065Sneel#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 101256072Sneel#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 102241489Sneel 103256072Sneelstruct mem_seg { 104256072Sneel vm_paddr_t gpa; 105256072Sneel size_t len; 106256072Sneel boolean_t wired; 107256072Sneel vm_object_t object; 108256072Sneel}; 109221828Sgrehan#define VM_MAX_MEMORY_SEGMENTS 2 110221828Sgrehan 111221828Sgrehanstruct vm { 112221828Sgrehan void *cookie; /* processor-specific data */ 113221828Sgrehan void *iommu; /* iommu-specific data */ 114261088Sjhb struct vhpet *vhpet; /* virtual HPET */ 115261088Sjhb struct vioapic *vioapic; /* virtual ioapic */ 116256072Sneel struct vmspace *vmspace; /* guest's address space */ 117221828Sgrehan struct vcpu vcpu[VM_MAXCPU]; 118221828Sgrehan int num_mem_segs; 119256072Sneel struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; 120221828Sgrehan char name[VM_MAX_NAMELEN]; 121221828Sgrehan 122221828Sgrehan /* 123223621Sgrehan * Set of active vcpus. 124221828Sgrehan * An active vcpu is one that has been started implicitly (BSP) or 125221828Sgrehan * explicitly (AP) by sending it a startup ipi. 126221828Sgrehan */ 127223621Sgrehan cpuset_t active_cpus; 128221828Sgrehan}; 129221828Sgrehan 130249396Sneelstatic int vmm_initialized; 131249396Sneel 132221828Sgrehanstatic struct vmm_ops *ops; 133221828Sgrehan#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) 134221828Sgrehan#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 135261275Sjhb#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) 136221828Sgrehan 137256072Sneel#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 138256072Sneel#define VMRUN(vmi, vcpu, rip, pmap) \ 139256072Sneel (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap) : ENXIO) 140221828Sgrehan#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 141256072Sneel#define VMSPACE_ALLOC(min, max) \ 142256072Sneel (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 143256072Sneel#define VMSPACE_FREE(vmspace) \ 144256072Sneel (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 145221828Sgrehan#define VMGETREG(vmi, vcpu, num, retval) \ 146221828Sgrehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 147221828Sgrehan#define VMSETREG(vmi, vcpu, num, val) \ 148221828Sgrehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 149221828Sgrehan#define VMGETDESC(vmi, vcpu, num, desc) \ 150221828Sgrehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 151221828Sgrehan#define VMSETDESC(vmi, vcpu, num, desc) \ 152221828Sgrehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 153221828Sgrehan#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \ 154221828Sgrehan (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO) 155221828Sgrehan#define VMGETCAP(vmi, vcpu, num, retval) \ 156221828Sgrehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 157221828Sgrehan#define VMSETCAP(vmi, vcpu, num, val) \ 158221828Sgrehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 159221828Sgrehan 160245021Sneel#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 161245021Sneel#define fpu_stop_emulating() clts() 162221828Sgrehan 163221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm"); 164221828SgrehanCTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 165221828Sgrehan 166221828Sgrehan/* statistics */ 167248389Sneelstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 168221828Sgrehan 169221828Sgrehanstatic void 170221828Sgrehanvcpu_cleanup(struct vcpu *vcpu) 171221828Sgrehan{ 172221828Sgrehan vlapic_cleanup(vcpu->vlapic); 173234695Sgrehan vmm_stat_free(vcpu->stats); 174234695Sgrehan fpu_save_area_free(vcpu->guestfpu); 175221828Sgrehan} 176221828Sgrehan 177221828Sgrehanstatic void 178221828Sgrehanvcpu_init(struct vm *vm, uint32_t vcpu_id) 179221828Sgrehan{ 180221828Sgrehan struct vcpu *vcpu; 181221828Sgrehan 182221828Sgrehan vcpu = &vm->vcpu[vcpu_id]; 183221828Sgrehan 184241489Sneel vcpu_lock_init(vcpu); 185241489Sneel vcpu->hostcpu = NOCPU; 186221828Sgrehan vcpu->vcpuid = vcpu_id; 187221828Sgrehan vcpu->vlapic = vlapic_init(vm, vcpu_id); 188240943Sneel vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); 189234695Sgrehan vcpu->guestfpu = fpu_save_area_alloc(); 190234695Sgrehan fpu_save_area_reset(vcpu->guestfpu); 191221828Sgrehan vcpu->stats = vmm_stat_alloc(); 192221828Sgrehan} 193221828Sgrehan 194240894Sneelstruct vm_exit * 195240894Sneelvm_exitinfo(struct vm *vm, int cpuid) 196240894Sneel{ 197240894Sneel struct vcpu *vcpu; 198240894Sneel 199240894Sneel if (cpuid < 0 || cpuid >= VM_MAXCPU) 200240894Sneel panic("vm_exitinfo: invalid cpuid %d", cpuid); 201240894Sneel 202240894Sneel vcpu = &vm->vcpu[cpuid]; 203240894Sneel 204240894Sneel return (&vcpu->exitinfo); 205240894Sneel} 206240894Sneel 207261275Sjhbstatic void 208261275Sjhbvmm_resume(void) 209261275Sjhb{ 210261275Sjhb VMM_RESUME(); 211261275Sjhb} 212261275Sjhb 213221828Sgrehanstatic int 214221828Sgrehanvmm_init(void) 215221828Sgrehan{ 216221828Sgrehan int error; 217221828Sgrehan 218242275Sneel vmm_host_state_init(); 219221828Sgrehan vmm_ipi_init(); 220221828Sgrehan 221221828Sgrehan error = vmm_mem_init(); 222221828Sgrehan if (error) 223221828Sgrehan return (error); 224221828Sgrehan 225221828Sgrehan if (vmm_is_intel()) 226221828Sgrehan ops = &vmm_ops_intel; 227221828Sgrehan else if (vmm_is_amd()) 228221828Sgrehan ops = &vmm_ops_amd; 229221828Sgrehan else 230221828Sgrehan return (ENXIO); 231221828Sgrehan 232221828Sgrehan vmm_msr_init(); 233261275Sjhb vmm_resume_p = vmm_resume; 234221828Sgrehan 235221828Sgrehan return (VMM_INIT()); 236221828Sgrehan} 237221828Sgrehan 238221828Sgrehanstatic int 239221828Sgrehanvmm_handler(module_t mod, int what, void *arg) 240221828Sgrehan{ 241221828Sgrehan int error; 242221828Sgrehan 243221828Sgrehan switch (what) { 244221828Sgrehan case MOD_LOAD: 245221828Sgrehan vmmdev_init(); 246256072Sneel iommu_init(); 247221828Sgrehan error = vmm_init(); 248249396Sneel if (error == 0) 249249396Sneel vmm_initialized = 1; 250221828Sgrehan break; 251221828Sgrehan case MOD_UNLOAD: 252241454Sneel error = vmmdev_cleanup(); 253241454Sneel if (error == 0) { 254261275Sjhb vmm_resume_p = NULL; 255241454Sneel iommu_cleanup(); 256241454Sneel vmm_ipi_cleanup(); 257241454Sneel error = VMM_CLEANUP(); 258253854Sgrehan /* 259253854Sgrehan * Something bad happened - prevent new 260253854Sgrehan * VMs from being created 261253854Sgrehan */ 262253854Sgrehan if (error) 263253854Sgrehan vmm_initialized = 0; 264241454Sneel } 265221828Sgrehan break; 266221828Sgrehan default: 267221828Sgrehan error = 0; 268221828Sgrehan break; 269221828Sgrehan } 270221828Sgrehan return (error); 271221828Sgrehan} 272221828Sgrehan 273221828Sgrehanstatic moduledata_t vmm_kmod = { 274221828Sgrehan "vmm", 275221828Sgrehan vmm_handler, 276221828Sgrehan NULL 277221828Sgrehan}; 278221828Sgrehan 279221828Sgrehan/* 280245704Sneel * vmm initialization has the following dependencies: 281245704Sneel * 282245704Sneel * - iommu initialization must happen after the pci passthru driver has had 283245704Sneel * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 284245704Sneel * 285245704Sneel * - VT-x initialization requires smp_rendezvous() and therefore must happen 286245704Sneel * after SMP is fully functional (after SI_SUB_SMP). 287221828Sgrehan */ 288245704SneelDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 289221828SgrehanMODULE_VERSION(vmm, 1); 290221828Sgrehan 291221828SgrehanSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 292221828Sgrehan 293249396Sneelint 294249396Sneelvm_create(const char *name, struct vm **retvm) 295221828Sgrehan{ 296221828Sgrehan int i; 297221828Sgrehan struct vm *vm; 298256072Sneel struct vmspace *vmspace; 299221828Sgrehan 300221828Sgrehan const int BSP = 0; 301221828Sgrehan 302249396Sneel /* 303249396Sneel * If vmm.ko could not be successfully initialized then don't attempt 304249396Sneel * to create the virtual machine. 305249396Sneel */ 306249396Sneel if (!vmm_initialized) 307249396Sneel return (ENXIO); 308249396Sneel 309221828Sgrehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 310249396Sneel return (EINVAL); 311221828Sgrehan 312256072Sneel vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 313256072Sneel if (vmspace == NULL) 314256072Sneel return (ENOMEM); 315256072Sneel 316221828Sgrehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 317221828Sgrehan strcpy(vm->name, name); 318256072Sneel vm->cookie = VMINIT(vm, vmspace_pmap(vmspace)); 319261088Sjhb vm->vioapic = vioapic_init(vm); 320261088Sjhb vm->vhpet = vhpet_init(vm); 321221828Sgrehan 322221828Sgrehan for (i = 0; i < VM_MAXCPU; i++) { 323221828Sgrehan vcpu_init(vm, i); 324221828Sgrehan guest_msrs_init(vm, i); 325221828Sgrehan } 326221828Sgrehan 327221828Sgrehan vm_activate_cpu(vm, BSP); 328256072Sneel vm->vmspace = vmspace; 329221828Sgrehan 330249396Sneel *retvm = vm; 331249396Sneel return (0); 332221828Sgrehan} 333221828Sgrehan 334241178Sneelstatic void 335256072Sneelvm_free_mem_seg(struct vm *vm, struct mem_seg *seg) 336241178Sneel{ 337241178Sneel 338256072Sneel if (seg->object != NULL) 339256072Sneel vmm_mem_free(vm->vmspace, seg->gpa, seg->len); 340241362Sneel 341256072Sneel bzero(seg, sizeof(*seg)); 342241178Sneel} 343241178Sneel 344221828Sgrehanvoid 345221828Sgrehanvm_destroy(struct vm *vm) 346221828Sgrehan{ 347221828Sgrehan int i; 348221828Sgrehan 349221828Sgrehan ppt_unassign_all(vm); 350221828Sgrehan 351256072Sneel if (vm->iommu != NULL) 352256072Sneel iommu_destroy_domain(vm->iommu); 353256072Sneel 354261088Sjhb vhpet_cleanup(vm->vhpet); 355261088Sjhb vioapic_cleanup(vm->vioapic); 356261088Sjhb 357221828Sgrehan for (i = 0; i < vm->num_mem_segs; i++) 358241178Sneel vm_free_mem_seg(vm, &vm->mem_segs[i]); 359221828Sgrehan 360241178Sneel vm->num_mem_segs = 0; 361241178Sneel 362221828Sgrehan for (i = 0; i < VM_MAXCPU; i++) 363221828Sgrehan vcpu_cleanup(&vm->vcpu[i]); 364221828Sgrehan 365256072Sneel VMSPACE_FREE(vm->vmspace); 366221828Sgrehan 367221828Sgrehan VMCLEANUP(vm->cookie); 368221828Sgrehan 369221828Sgrehan free(vm, M_VM); 370221828Sgrehan} 371221828Sgrehan 372221828Sgrehanconst char * 373221828Sgrehanvm_name(struct vm *vm) 374221828Sgrehan{ 375221828Sgrehan return (vm->name); 376221828Sgrehan} 377221828Sgrehan 378221828Sgrehanint 379221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 380221828Sgrehan{ 381256072Sneel vm_object_t obj; 382221828Sgrehan 383256072Sneel if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 384256072Sneel return (ENOMEM); 385256072Sneel else 386256072Sneel return (0); 387221828Sgrehan} 388221828Sgrehan 389221828Sgrehanint 390221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 391221828Sgrehan{ 392221828Sgrehan 393256072Sneel vmm_mmio_free(vm->vmspace, gpa, len); 394256072Sneel return (0); 395221828Sgrehan} 396221828Sgrehan 397256072Sneelboolean_t 398256072Sneelvm_mem_allocated(struct vm *vm, vm_paddr_t gpa) 399241041Sneel{ 400241041Sneel int i; 401241041Sneel vm_paddr_t gpabase, gpalimit; 402241041Sneel 403241041Sneel for (i = 0; i < vm->num_mem_segs; i++) { 404241041Sneel gpabase = vm->mem_segs[i].gpa; 405241041Sneel gpalimit = gpabase + vm->mem_segs[i].len; 406241041Sneel if (gpa >= gpabase && gpa < gpalimit) 407256072Sneel return (TRUE); /* 'gpa' is regular memory */ 408241041Sneel } 409241041Sneel 410256072Sneel if (ppt_is_mmio(vm, gpa)) 411256072Sneel return (TRUE); /* 'gpa' is pci passthru mmio */ 412256072Sneel 413256072Sneel return (FALSE); 414241041Sneel} 415241041Sneel 416221828Sgrehanint 417241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 418221828Sgrehan{ 419256072Sneel int available, allocated; 420256072Sneel struct mem_seg *seg; 421256072Sneel vm_object_t object; 422256072Sneel vm_paddr_t g; 423221828Sgrehan 424241041Sneel if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 425241041Sneel return (EINVAL); 426221828Sgrehan 427241041Sneel available = allocated = 0; 428241041Sneel g = gpa; 429241041Sneel while (g < gpa + len) { 430256072Sneel if (vm_mem_allocated(vm, g)) 431256072Sneel allocated++; 432256072Sneel else 433241041Sneel available++; 434241041Sneel 435241041Sneel g += PAGE_SIZE; 436241041Sneel } 437241041Sneel 438221828Sgrehan /* 439241041Sneel * If there are some allocated and some available pages in the address 440241041Sneel * range then it is an error. 441221828Sgrehan */ 442241041Sneel if (allocated && available) 443241041Sneel return (EINVAL); 444221828Sgrehan 445241041Sneel /* 446241041Sneel * If the entire address range being requested has already been 447241041Sneel * allocated then there isn't anything more to do. 448241041Sneel */ 449241041Sneel if (allocated && available == 0) 450241041Sneel return (0); 451241041Sneel 452221828Sgrehan if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 453221828Sgrehan return (E2BIG); 454221828Sgrehan 455241178Sneel seg = &vm->mem_segs[vm->num_mem_segs]; 456221828Sgrehan 457256072Sneel if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) 458256072Sneel return (ENOMEM); 459256072Sneel 460241178Sneel seg->gpa = gpa; 461256072Sneel seg->len = len; 462256072Sneel seg->object = object; 463256072Sneel seg->wired = FALSE; 464241178Sneel 465256072Sneel vm->num_mem_segs++; 466256072Sneel 467256072Sneel return (0); 468256072Sneel} 469256072Sneel 470256072Sneelstatic void 471256072Sneelvm_gpa_unwire(struct vm *vm) 472256072Sneel{ 473256072Sneel int i, rv; 474256072Sneel struct mem_seg *seg; 475256072Sneel 476256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 477256072Sneel seg = &vm->mem_segs[i]; 478256072Sneel if (!seg->wired) 479256072Sneel continue; 480256072Sneel 481256072Sneel rv = vm_map_unwire(&vm->vmspace->vm_map, 482256072Sneel seg->gpa, seg->gpa + seg->len, 483256072Sneel VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 484256072Sneel KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " 485256072Sneel "%#lx/%ld could not be unwired: %d", 486256072Sneel vm_name(vm), seg->gpa, seg->len, rv)); 487256072Sneel 488256072Sneel seg->wired = FALSE; 489256072Sneel } 490256072Sneel} 491256072Sneel 492256072Sneelstatic int 493256072Sneelvm_gpa_wire(struct vm *vm) 494256072Sneel{ 495256072Sneel int i, rv; 496256072Sneel struct mem_seg *seg; 497256072Sneel 498256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 499256072Sneel seg = &vm->mem_segs[i]; 500256072Sneel if (seg->wired) 501256072Sneel continue; 502256072Sneel 503256072Sneel /* XXX rlimits? */ 504256072Sneel rv = vm_map_wire(&vm->vmspace->vm_map, 505256072Sneel seg->gpa, seg->gpa + seg->len, 506256072Sneel VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 507256072Sneel if (rv != KERN_SUCCESS) 508241178Sneel break; 509241178Sneel 510256072Sneel seg->wired = TRUE; 511256072Sneel } 512256072Sneel 513256072Sneel if (i < vm->num_mem_segs) { 514241362Sneel /* 515256072Sneel * Undo the wiring before returning an error. 516241362Sneel */ 517256072Sneel vm_gpa_unwire(vm); 518256072Sneel return (EAGAIN); 519256072Sneel } 520241178Sneel 521256072Sneel return (0); 522256072Sneel} 523256072Sneel 524256072Sneelstatic void 525256072Sneelvm_iommu_modify(struct vm *vm, boolean_t map) 526256072Sneel{ 527256072Sneel int i, sz; 528256072Sneel vm_paddr_t gpa, hpa; 529256072Sneel struct mem_seg *seg; 530256072Sneel void *vp, *cookie, *host_domain; 531256072Sneel 532256072Sneel sz = PAGE_SIZE; 533256072Sneel host_domain = iommu_host_domain(); 534256072Sneel 535256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 536256072Sneel seg = &vm->mem_segs[i]; 537256072Sneel KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", 538256072Sneel vm_name(vm), seg->gpa, seg->len)); 539256072Sneel 540256072Sneel gpa = seg->gpa; 541256072Sneel while (gpa < seg->gpa + seg->len) { 542256072Sneel vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, 543256072Sneel &cookie); 544256072Sneel KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 545256072Sneel vm_name(vm), gpa)); 546256072Sneel 547256072Sneel vm_gpa_release(cookie); 548256072Sneel 549256072Sneel hpa = DMAP_TO_PHYS((uintptr_t)vp); 550256072Sneel if (map) { 551256072Sneel iommu_create_mapping(vm->iommu, gpa, hpa, sz); 552256072Sneel iommu_remove_mapping(host_domain, hpa, sz); 553256072Sneel } else { 554256072Sneel iommu_remove_mapping(vm->iommu, gpa, sz); 555256072Sneel iommu_create_mapping(host_domain, hpa, hpa, sz); 556256072Sneel } 557256072Sneel 558256072Sneel gpa += PAGE_SIZE; 559256072Sneel } 560241178Sneel } 561241178Sneel 562256072Sneel /* 563256072Sneel * Invalidate the cached translations associated with the domain 564256072Sneel * from which pages were removed. 565256072Sneel */ 566256072Sneel if (map) 567256072Sneel iommu_invalidate_tlb(host_domain); 568256072Sneel else 569256072Sneel iommu_invalidate_tlb(vm->iommu); 570256072Sneel} 571256072Sneel 572256072Sneel#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 573256072Sneel#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 574256072Sneel 575256072Sneelint 576256072Sneelvm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 577256072Sneel{ 578256072Sneel int error; 579256072Sneel 580256072Sneel error = ppt_unassign_device(vm, bus, slot, func); 581256072Sneel if (error) 582221828Sgrehan return (error); 583256072Sneel 584256072Sneel if (ppt_num_devices(vm) == 0) { 585256072Sneel vm_iommu_unmap(vm); 586256072Sneel vm_gpa_unwire(vm); 587221828Sgrehan } 588256072Sneel return (0); 589256072Sneel} 590221828Sgrehan 591256072Sneelint 592256072Sneelvm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 593256072Sneel{ 594256072Sneel int error; 595256072Sneel vm_paddr_t maxaddr; 596256072Sneel 597241362Sneel /* 598256072Sneel * Virtual machines with pci passthru devices get special treatment: 599256072Sneel * - the guest physical memory is wired 600256072Sneel * - the iommu is programmed to do the 'gpa' to 'hpa' translation 601256072Sneel * 602256072Sneel * We need to do this before the first pci passthru device is attached. 603241362Sneel */ 604256072Sneel if (ppt_num_devices(vm) == 0) { 605256072Sneel KASSERT(vm->iommu == NULL, 606256072Sneel ("vm_assign_pptdev: iommu must be NULL")); 607256072Sneel maxaddr = vmm_mem_maxaddr(); 608256072Sneel vm->iommu = iommu_create_domain(maxaddr); 609241362Sneel 610256072Sneel error = vm_gpa_wire(vm); 611256072Sneel if (error) 612256072Sneel return (error); 613241041Sneel 614256072Sneel vm_iommu_map(vm); 615256072Sneel } 616256072Sneel 617256072Sneel error = ppt_assign_device(vm, bus, slot, func); 618256072Sneel return (error); 619221828Sgrehan} 620221828Sgrehan 621256072Sneelvoid * 622256072Sneelvm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 623256072Sneel void **cookie) 624221828Sgrehan{ 625256072Sneel int count, pageoff; 626256072Sneel vm_page_t m; 627221828Sgrehan 628256072Sneel pageoff = gpa & PAGE_MASK; 629256072Sneel if (len > PAGE_SIZE - pageoff) 630256072Sneel panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 631241148Sneel 632256072Sneel count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 633256072Sneel trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 634256072Sneel 635256072Sneel if (count == 1) { 636256072Sneel *cookie = m; 637256072Sneel return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 638256072Sneel } else { 639256072Sneel *cookie = NULL; 640256072Sneel return (NULL); 641256072Sneel } 642221828Sgrehan} 643221828Sgrehan 644256072Sneelvoid 645256072Sneelvm_gpa_release(void *cookie) 646256072Sneel{ 647256072Sneel vm_page_t m = cookie; 648256072Sneel 649256072Sneel vm_page_lock(m); 650256072Sneel vm_page_unhold(m); 651256072Sneel vm_page_unlock(m); 652256072Sneel} 653256072Sneel 654221828Sgrehanint 655221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 656221828Sgrehan struct vm_memory_segment *seg) 657221828Sgrehan{ 658221828Sgrehan int i; 659221828Sgrehan 660221828Sgrehan for (i = 0; i < vm->num_mem_segs; i++) { 661221828Sgrehan if (gpabase == vm->mem_segs[i].gpa) { 662256072Sneel seg->gpa = vm->mem_segs[i].gpa; 663256072Sneel seg->len = vm->mem_segs[i].len; 664256072Sneel seg->wired = vm->mem_segs[i].wired; 665221828Sgrehan return (0); 666221828Sgrehan } 667221828Sgrehan } 668221828Sgrehan return (-1); 669221828Sgrehan} 670221828Sgrehan 671221828Sgrehanint 672256072Sneelvm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 673256072Sneel vm_offset_t *offset, struct vm_object **object) 674256072Sneel{ 675256072Sneel int i; 676256072Sneel size_t seg_len; 677256072Sneel vm_paddr_t seg_gpa; 678256072Sneel vm_object_t seg_obj; 679256072Sneel 680256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 681256072Sneel if ((seg_obj = vm->mem_segs[i].object) == NULL) 682256072Sneel continue; 683256072Sneel 684256072Sneel seg_gpa = vm->mem_segs[i].gpa; 685256072Sneel seg_len = vm->mem_segs[i].len; 686256072Sneel 687256072Sneel if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { 688256072Sneel *offset = gpa - seg_gpa; 689256072Sneel *object = seg_obj; 690256072Sneel vm_object_reference(seg_obj); 691256072Sneel return (0); 692256072Sneel } 693256072Sneel } 694256072Sneel 695256072Sneel return (EINVAL); 696256072Sneel} 697256072Sneel 698256072Sneelint 699221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 700221828Sgrehan{ 701221828Sgrehan 702221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 703221828Sgrehan return (EINVAL); 704221828Sgrehan 705221828Sgrehan if (reg >= VM_REG_LAST) 706221828Sgrehan return (EINVAL); 707221828Sgrehan 708221828Sgrehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 709221828Sgrehan} 710221828Sgrehan 711221828Sgrehanint 712221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 713221828Sgrehan{ 714221828Sgrehan 715221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 716221828Sgrehan return (EINVAL); 717221828Sgrehan 718221828Sgrehan if (reg >= VM_REG_LAST) 719221828Sgrehan return (EINVAL); 720221828Sgrehan 721221828Sgrehan return (VMSETREG(vm->cookie, vcpu, reg, val)); 722221828Sgrehan} 723221828Sgrehan 724221828Sgrehanstatic boolean_t 725221828Sgrehanis_descriptor_table(int reg) 726221828Sgrehan{ 727221828Sgrehan 728221828Sgrehan switch (reg) { 729221828Sgrehan case VM_REG_GUEST_IDTR: 730221828Sgrehan case VM_REG_GUEST_GDTR: 731221828Sgrehan return (TRUE); 732221828Sgrehan default: 733221828Sgrehan return (FALSE); 734221828Sgrehan } 735221828Sgrehan} 736221828Sgrehan 737221828Sgrehanstatic boolean_t 738221828Sgrehanis_segment_register(int reg) 739221828Sgrehan{ 740221828Sgrehan 741221828Sgrehan switch (reg) { 742221828Sgrehan case VM_REG_GUEST_ES: 743221828Sgrehan case VM_REG_GUEST_CS: 744221828Sgrehan case VM_REG_GUEST_SS: 745221828Sgrehan case VM_REG_GUEST_DS: 746221828Sgrehan case VM_REG_GUEST_FS: 747221828Sgrehan case VM_REG_GUEST_GS: 748221828Sgrehan case VM_REG_GUEST_TR: 749221828Sgrehan case VM_REG_GUEST_LDTR: 750221828Sgrehan return (TRUE); 751221828Sgrehan default: 752221828Sgrehan return (FALSE); 753221828Sgrehan } 754221828Sgrehan} 755221828Sgrehan 756221828Sgrehanint 757221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg, 758221828Sgrehan struct seg_desc *desc) 759221828Sgrehan{ 760221828Sgrehan 761221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 762221828Sgrehan return (EINVAL); 763221828Sgrehan 764221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 765221828Sgrehan return (EINVAL); 766221828Sgrehan 767221828Sgrehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 768221828Sgrehan} 769221828Sgrehan 770221828Sgrehanint 771221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg, 772221828Sgrehan struct seg_desc *desc) 773221828Sgrehan{ 774221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 775221828Sgrehan return (EINVAL); 776221828Sgrehan 777221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 778221828Sgrehan return (EINVAL); 779221828Sgrehan 780221828Sgrehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 781221828Sgrehan} 782221828Sgrehan 783221828Sgrehanstatic void 784221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu) 785221828Sgrehan{ 786221828Sgrehan 787234695Sgrehan /* flush host state to the pcb */ 788234695Sgrehan fpuexit(curthread); 789242122Sneel 790242122Sneel /* restore guest FPU state */ 791221828Sgrehan fpu_stop_emulating(); 792234695Sgrehan fpurestore(vcpu->guestfpu); 793242122Sneel 794242122Sneel /* 795242122Sneel * The FPU is now "dirty" with the guest's state so turn on emulation 796242122Sneel * to trap any access to the FPU by the host. 797242122Sneel */ 798242122Sneel fpu_start_emulating(); 799221828Sgrehan} 800221828Sgrehan 801221828Sgrehanstatic void 802221828Sgrehansave_guest_fpustate(struct vcpu *vcpu) 803221828Sgrehan{ 804221828Sgrehan 805242122Sneel if ((rcr0() & CR0_TS) == 0) 806242122Sneel panic("fpu emulation not enabled in host!"); 807242122Sneel 808242122Sneel /* save guest FPU state */ 809242122Sneel fpu_stop_emulating(); 810234695Sgrehan fpusave(vcpu->guestfpu); 811221828Sgrehan fpu_start_emulating(); 812221828Sgrehan} 813221828Sgrehan 814248389Sneelstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 815242065Sneel 816256072Sneelstatic int 817256072Sneelvcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 818256072Sneel{ 819256072Sneel int error; 820256072Sneel 821256072Sneel vcpu_assert_locked(vcpu); 822256072Sneel 823256072Sneel /* 824256072Sneel * The following state transitions are allowed: 825256072Sneel * IDLE -> FROZEN -> IDLE 826256072Sneel * FROZEN -> RUNNING -> FROZEN 827256072Sneel * FROZEN -> SLEEPING -> FROZEN 828256072Sneel */ 829256072Sneel switch (vcpu->state) { 830256072Sneel case VCPU_IDLE: 831256072Sneel case VCPU_RUNNING: 832256072Sneel case VCPU_SLEEPING: 833256072Sneel error = (newstate != VCPU_FROZEN); 834256072Sneel break; 835256072Sneel case VCPU_FROZEN: 836256072Sneel error = (newstate == VCPU_FROZEN); 837256072Sneel break; 838256072Sneel default: 839256072Sneel error = 1; 840256072Sneel break; 841256072Sneel } 842256072Sneel 843256072Sneel if (error == 0) 844256072Sneel vcpu->state = newstate; 845256072Sneel else 846256072Sneel error = EBUSY; 847256072Sneel 848256072Sneel return (error); 849256072Sneel} 850256072Sneel 851256072Sneelstatic void 852256072Sneelvcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 853256072Sneel{ 854256072Sneel int error; 855256072Sneel 856256072Sneel if ((error = vcpu_set_state(vm, vcpuid, newstate)) != 0) 857256072Sneel panic("Error %d setting state to %d\n", error, newstate); 858256072Sneel} 859256072Sneel 860256072Sneelstatic void 861256072Sneelvcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 862256072Sneel{ 863256072Sneel int error; 864256072Sneel 865256072Sneel if ((error = vcpu_set_state_locked(vcpu, newstate)) != 0) 866256072Sneel panic("Error %d setting state to %d", error, newstate); 867256072Sneel} 868256072Sneel 869256072Sneel/* 870256072Sneel * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 871256072Sneel */ 872256072Sneelstatic int 873256072Sneelvm_handle_hlt(struct vm *vm, int vcpuid, boolean_t *retu) 874256072Sneel{ 875256072Sneel struct vcpu *vcpu; 876256072Sneel int sleepticks, t; 877256072Sneel 878256072Sneel vcpu = &vm->vcpu[vcpuid]; 879256072Sneel 880256072Sneel vcpu_lock(vcpu); 881256072Sneel 882256072Sneel /* 883256072Sneel * Figure out the number of host ticks until the next apic 884256072Sneel * timer interrupt in the guest. 885256072Sneel */ 886256072Sneel sleepticks = lapic_timer_tick(vm, vcpuid); 887256072Sneel 888256072Sneel /* 889256072Sneel * If the guest local apic timer is disabled then sleep for 890256072Sneel * a long time but not forever. 891256072Sneel */ 892256072Sneel if (sleepticks < 0) 893256072Sneel sleepticks = hz; 894256072Sneel 895256072Sneel /* 896256072Sneel * Do a final check for pending NMI or interrupts before 897256072Sneel * really putting this thread to sleep. 898256072Sneel * 899256072Sneel * These interrupts could have happened any time after we 900256072Sneel * returned from VMRUN() and before we grabbed the vcpu lock. 901256072Sneel */ 902256072Sneel if (!vm_nmi_pending(vm, vcpuid) && lapic_pending_intr(vm, vcpuid) < 0) { 903256072Sneel if (sleepticks <= 0) 904256072Sneel panic("invalid sleepticks %d", sleepticks); 905256072Sneel t = ticks; 906256072Sneel vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 907256072Sneel msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); 908256072Sneel vcpu_require_state_locked(vcpu, VCPU_FROZEN); 909256072Sneel vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 910256072Sneel } 911256072Sneel vcpu_unlock(vcpu); 912256072Sneel 913256072Sneel return (0); 914256072Sneel} 915256072Sneel 916256072Sneelstatic int 917256072Sneelvm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu) 918256072Sneel{ 919256072Sneel int rv, ftype; 920256072Sneel struct vm_map *map; 921256072Sneel struct vcpu *vcpu; 922256072Sneel struct vm_exit *vme; 923256072Sneel 924256072Sneel vcpu = &vm->vcpu[vcpuid]; 925256072Sneel vme = &vcpu->exitinfo; 926256072Sneel 927256072Sneel ftype = vme->u.paging.fault_type; 928256072Sneel KASSERT(ftype == VM_PROT_READ || 929256072Sneel ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 930256072Sneel ("vm_handle_paging: invalid fault_type %d", ftype)); 931256072Sneel 932256072Sneel if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 933256072Sneel rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 934256072Sneel vme->u.paging.gpa, ftype); 935256072Sneel if (rv == 0) 936256072Sneel goto done; 937256072Sneel } 938256072Sneel 939256072Sneel map = &vm->vmspace->vm_map; 940256072Sneel rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 941256072Sneel 942261088Sjhb VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 943261088Sjhb "ftype = %d", rv, vme->u.paging.gpa, ftype); 944256072Sneel 945256072Sneel if (rv != KERN_SUCCESS) 946256072Sneel return (EFAULT); 947256072Sneeldone: 948256072Sneel /* restart execution at the faulting instruction */ 949256072Sneel vme->inst_length = 0; 950256072Sneel 951256072Sneel return (0); 952256072Sneel} 953256072Sneel 954256072Sneelstatic int 955256072Sneelvm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu) 956256072Sneel{ 957256072Sneel struct vie *vie; 958256072Sneel struct vcpu *vcpu; 959256072Sneel struct vm_exit *vme; 960256072Sneel int error, inst_length; 961256072Sneel uint64_t rip, gla, gpa, cr3; 962261088Sjhb mem_region_read_t mread; 963261088Sjhb mem_region_write_t mwrite; 964256072Sneel 965256072Sneel vcpu = &vm->vcpu[vcpuid]; 966256072Sneel vme = &vcpu->exitinfo; 967256072Sneel 968256072Sneel rip = vme->rip; 969256072Sneel inst_length = vme->inst_length; 970256072Sneel 971256072Sneel gla = vme->u.inst_emul.gla; 972256072Sneel gpa = vme->u.inst_emul.gpa; 973256072Sneel cr3 = vme->u.inst_emul.cr3; 974256072Sneel vie = &vme->u.inst_emul.vie; 975256072Sneel 976256072Sneel vie_init(vie); 977256072Sneel 978256072Sneel /* Fetch, decode and emulate the faulting instruction */ 979256072Sneel if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, vie) != 0) 980256072Sneel return (EFAULT); 981256072Sneel 982256072Sneel if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0) 983256072Sneel return (EFAULT); 984256072Sneel 985261088Sjhb /* return to userland unless this is an in-kernel emulated device */ 986261088Sjhb if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 987261088Sjhb mread = lapic_mmio_read; 988261088Sjhb mwrite = lapic_mmio_write; 989261088Sjhb } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 990261088Sjhb mread = vioapic_mmio_read; 991261088Sjhb mwrite = vioapic_mmio_write; 992261088Sjhb } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 993261088Sjhb mread = vhpet_mmio_read; 994261088Sjhb mwrite = vhpet_mmio_write; 995261088Sjhb } else { 996256072Sneel *retu = TRUE; 997256072Sneel return (0); 998256072Sneel } 999256072Sneel 1000261088Sjhb error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, 0); 1001256072Sneel 1002256072Sneel /* return to userland to spin up the AP */ 1003256072Sneel if (error == 0 && vme->exitcode == VM_EXITCODE_SPINUP_AP) 1004256072Sneel *retu = TRUE; 1005256072Sneel 1006256072Sneel return (error); 1007256072Sneel} 1008256072Sneel 1009221828Sgrehanint 1010221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun) 1011221828Sgrehan{ 1012256072Sneel int error, vcpuid; 1013221828Sgrehan struct vcpu *vcpu; 1014221828Sgrehan struct pcb *pcb; 1015242065Sneel uint64_t tscval, rip; 1016242065Sneel struct vm_exit *vme; 1017256072Sneel boolean_t retu; 1018256072Sneel pmap_t pmap; 1019221828Sgrehan 1020221828Sgrehan vcpuid = vmrun->cpuid; 1021221828Sgrehan 1022221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1023221828Sgrehan return (EINVAL); 1024221828Sgrehan 1025256072Sneel pmap = vmspace_pmap(vm->vmspace); 1026221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1027256072Sneel vme = &vcpu->exitinfo; 1028242065Sneel rip = vmrun->rip; 1029242065Sneelrestart: 1030221828Sgrehan critical_enter(); 1031221828Sgrehan 1032256072Sneel KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1033256072Sneel ("vm_run: absurd pm_active")); 1034256072Sneel 1035221828Sgrehan tscval = rdtsc(); 1036221828Sgrehan 1037221828Sgrehan pcb = PCPU_GET(curpcb); 1038221914Sjhb set_pcb_flags(pcb, PCB_FULL_IRET); 1039221828Sgrehan 1040234695Sgrehan restore_guest_msrs(vm, vcpuid); 1041221828Sgrehan restore_guest_fpustate(vcpu); 1042241489Sneel 1043256072Sneel vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1044241489Sneel vcpu->hostcpu = curcpu; 1045256072Sneel error = VMRUN(vm->cookie, vcpuid, rip, pmap); 1046241489Sneel vcpu->hostcpu = NOCPU; 1047256072Sneel vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1048241489Sneel 1049221828Sgrehan save_guest_fpustate(vcpu); 1050221828Sgrehan restore_host_msrs(vm, vcpuid); 1051221828Sgrehan 1052221828Sgrehan vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1053221828Sgrehan 1054221828Sgrehan critical_exit(); 1055221828Sgrehan 1056256072Sneel if (error == 0) { 1057256072Sneel retu = FALSE; 1058256072Sneel switch (vme->exitcode) { 1059256072Sneel case VM_EXITCODE_HLT: 1060256072Sneel error = vm_handle_hlt(vm, vcpuid, &retu); 1061256072Sneel break; 1062256072Sneel case VM_EXITCODE_PAGING: 1063256072Sneel error = vm_handle_paging(vm, vcpuid, &retu); 1064256072Sneel break; 1065256072Sneel case VM_EXITCODE_INST_EMUL: 1066256072Sneel error = vm_handle_inst_emul(vm, vcpuid, &retu); 1067256072Sneel break; 1068256072Sneel default: 1069256072Sneel retu = TRUE; /* handled in userland */ 1070256072Sneel break; 1071242065Sneel } 1072256072Sneel } 1073242065Sneel 1074256072Sneel if (error == 0 && retu == FALSE) { 1075242065Sneel rip = vme->rip + vme->inst_length; 1076242065Sneel goto restart; 1077242065Sneel } 1078242065Sneel 1079256072Sneel /* copy the exit information */ 1080256072Sneel bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1081221828Sgrehan return (error); 1082221828Sgrehan} 1083221828Sgrehan 1084221828Sgrehanint 1085221828Sgrehanvm_inject_event(struct vm *vm, int vcpuid, int type, 1086221828Sgrehan int vector, uint32_t code, int code_valid) 1087221828Sgrehan{ 1088221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1089221828Sgrehan return (EINVAL); 1090221828Sgrehan 1091221828Sgrehan if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0) 1092221828Sgrehan return (EINVAL); 1093221828Sgrehan 1094221828Sgrehan if (vector < 0 || vector > 255) 1095221828Sgrehan return (EINVAL); 1096221828Sgrehan 1097221828Sgrehan return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid)); 1098221828Sgrehan} 1099221828Sgrehan 1100248389Sneelstatic VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 1101241982Sneel 1102221828Sgrehanint 1103241982Sneelvm_inject_nmi(struct vm *vm, int vcpuid) 1104221828Sgrehan{ 1105241982Sneel struct vcpu *vcpu; 1106221828Sgrehan 1107241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1108221828Sgrehan return (EINVAL); 1109221828Sgrehan 1110241982Sneel vcpu = &vm->vcpu[vcpuid]; 1111241982Sneel 1112241982Sneel vcpu->nmi_pending = 1; 1113241982Sneel vm_interrupt_hostcpu(vm, vcpuid); 1114241982Sneel return (0); 1115221828Sgrehan} 1116221828Sgrehan 1117221828Sgrehanint 1118241982Sneelvm_nmi_pending(struct vm *vm, int vcpuid) 1119241982Sneel{ 1120241982Sneel struct vcpu *vcpu; 1121241982Sneel 1122241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1123241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1124241982Sneel 1125241982Sneel vcpu = &vm->vcpu[vcpuid]; 1126241982Sneel 1127241982Sneel return (vcpu->nmi_pending); 1128241982Sneel} 1129241982Sneel 1130241982Sneelvoid 1131241982Sneelvm_nmi_clear(struct vm *vm, int vcpuid) 1132241982Sneel{ 1133241982Sneel struct vcpu *vcpu; 1134241982Sneel 1135241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1136241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1137241982Sneel 1138241982Sneel vcpu = &vm->vcpu[vcpuid]; 1139241982Sneel 1140241982Sneel if (vcpu->nmi_pending == 0) 1141241982Sneel panic("vm_nmi_clear: inconsistent nmi_pending state"); 1142241982Sneel 1143241982Sneel vcpu->nmi_pending = 0; 1144241982Sneel vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 1145241982Sneel} 1146241982Sneel 1147241982Sneelint 1148221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 1149221828Sgrehan{ 1150221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1151221828Sgrehan return (EINVAL); 1152221828Sgrehan 1153221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 1154221828Sgrehan return (EINVAL); 1155221828Sgrehan 1156221828Sgrehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 1157221828Sgrehan} 1158221828Sgrehan 1159221828Sgrehanint 1160221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val) 1161221828Sgrehan{ 1162221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1163221828Sgrehan return (EINVAL); 1164221828Sgrehan 1165221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 1166221828Sgrehan return (EINVAL); 1167221828Sgrehan 1168221828Sgrehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 1169221828Sgrehan} 1170221828Sgrehan 1171221828Sgrehanuint64_t * 1172221828Sgrehanvm_guest_msrs(struct vm *vm, int cpu) 1173221828Sgrehan{ 1174221828Sgrehan return (vm->vcpu[cpu].guest_msrs); 1175221828Sgrehan} 1176221828Sgrehan 1177221828Sgrehanstruct vlapic * 1178221828Sgrehanvm_lapic(struct vm *vm, int cpu) 1179221828Sgrehan{ 1180221828Sgrehan return (vm->vcpu[cpu].vlapic); 1181221828Sgrehan} 1182221828Sgrehan 1183261088Sjhbstruct vioapic * 1184261088Sjhbvm_ioapic(struct vm *vm) 1185261088Sjhb{ 1186261088Sjhb 1187261088Sjhb return (vm->vioapic); 1188261088Sjhb} 1189261088Sjhb 1190261088Sjhbstruct vhpet * 1191261088Sjhbvm_hpet(struct vm *vm) 1192261088Sjhb{ 1193261088Sjhb 1194261088Sjhb return (vm->vhpet); 1195261088Sjhb} 1196261088Sjhb 1197221828Sgrehanboolean_t 1198221828Sgrehanvmm_is_pptdev(int bus, int slot, int func) 1199221828Sgrehan{ 1200246188Sneel int found, i, n; 1201246188Sneel int b, s, f; 1202221828Sgrehan char *val, *cp, *cp2; 1203221828Sgrehan 1204221828Sgrehan /* 1205246188Sneel * XXX 1206246188Sneel * The length of an environment variable is limited to 128 bytes which 1207246188Sneel * puts an upper limit on the number of passthru devices that may be 1208246188Sneel * specified using a single environment variable. 1209246188Sneel * 1210246188Sneel * Work around this by scanning multiple environment variable 1211246188Sneel * names instead of a single one - yuck! 1212221828Sgrehan */ 1213246188Sneel const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 1214246188Sneel 1215246188Sneel /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 1216221828Sgrehan found = 0; 1217246188Sneel for (i = 0; names[i] != NULL && !found; i++) { 1218246188Sneel cp = val = getenv(names[i]); 1219246188Sneel while (cp != NULL && *cp != '\0') { 1220246188Sneel if ((cp2 = strchr(cp, ' ')) != NULL) 1221246188Sneel *cp2 = '\0'; 1222221828Sgrehan 1223246188Sneel n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 1224246188Sneel if (n == 3 && bus == b && slot == s && func == f) { 1225246188Sneel found = 1; 1226246188Sneel break; 1227246188Sneel } 1228221828Sgrehan 1229246188Sneel if (cp2 != NULL) 1230246188Sneel *cp2++ = ' '; 1231221828Sgrehan 1232246188Sneel cp = cp2; 1233246188Sneel } 1234246188Sneel freeenv(val); 1235221828Sgrehan } 1236221828Sgrehan return (found); 1237221828Sgrehan} 1238221828Sgrehan 1239221828Sgrehanvoid * 1240221828Sgrehanvm_iommu_domain(struct vm *vm) 1241221828Sgrehan{ 1242221828Sgrehan 1243221828Sgrehan return (vm->iommu); 1244221828Sgrehan} 1245221828Sgrehan 1246241489Sneelint 1247256072Sneelvcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1248221828Sgrehan{ 1249241489Sneel int error; 1250221828Sgrehan struct vcpu *vcpu; 1251221828Sgrehan 1252221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1253221828Sgrehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 1254221828Sgrehan 1255221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1256221828Sgrehan 1257241489Sneel vcpu_lock(vcpu); 1258256072Sneel error = vcpu_set_state_locked(vcpu, newstate); 1259241489Sneel vcpu_unlock(vcpu); 1260241489Sneel 1261241489Sneel return (error); 1262221828Sgrehan} 1263221828Sgrehan 1264241489Sneelenum vcpu_state 1265249879Sgrehanvcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 1266221828Sgrehan{ 1267221828Sgrehan struct vcpu *vcpu; 1268241489Sneel enum vcpu_state state; 1269221828Sgrehan 1270221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1271221828Sgrehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 1272221828Sgrehan 1273221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1274221828Sgrehan 1275241489Sneel vcpu_lock(vcpu); 1276241489Sneel state = vcpu->state; 1277249879Sgrehan if (hostcpu != NULL) 1278249879Sgrehan *hostcpu = vcpu->hostcpu; 1279241489Sneel vcpu_unlock(vcpu); 1280221828Sgrehan 1281241489Sneel return (state); 1282221828Sgrehan} 1283221828Sgrehan 1284221828Sgrehanvoid 1285221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid) 1286221828Sgrehan{ 1287221828Sgrehan 1288221828Sgrehan if (vcpuid >= 0 && vcpuid < VM_MAXCPU) 1289223621Sgrehan CPU_SET(vcpuid, &vm->active_cpus); 1290221828Sgrehan} 1291221828Sgrehan 1292223621Sgrehancpuset_t 1293221828Sgrehanvm_active_cpus(struct vm *vm) 1294221828Sgrehan{ 1295221828Sgrehan 1296221828Sgrehan return (vm->active_cpus); 1297221828Sgrehan} 1298221828Sgrehan 1299221828Sgrehanvoid * 1300221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid) 1301221828Sgrehan{ 1302221828Sgrehan 1303221828Sgrehan return (vm->vcpu[vcpuid].stats); 1304221828Sgrehan} 1305240922Sneel 1306240922Sneelint 1307240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 1308240922Sneel{ 1309240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1310240922Sneel return (EINVAL); 1311240922Sneel 1312240922Sneel *state = vm->vcpu[vcpuid].x2apic_state; 1313240922Sneel 1314240922Sneel return (0); 1315240922Sneel} 1316240922Sneel 1317240922Sneelint 1318240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1319240922Sneel{ 1320240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1321240922Sneel return (EINVAL); 1322240922Sneel 1323248392Sneel if (state >= X2APIC_STATE_LAST) 1324240922Sneel return (EINVAL); 1325240922Sneel 1326240922Sneel vm->vcpu[vcpuid].x2apic_state = state; 1327240922Sneel 1328240943Sneel vlapic_set_x2apic_state(vm, vcpuid, state); 1329240943Sneel 1330240922Sneel return (0); 1331240922Sneel} 1332241489Sneel 1333241489Sneelvoid 1334241489Sneelvm_interrupt_hostcpu(struct vm *vm, int vcpuid) 1335241489Sneel{ 1336241489Sneel int hostcpu; 1337241489Sneel struct vcpu *vcpu; 1338241489Sneel 1339241489Sneel vcpu = &vm->vcpu[vcpuid]; 1340241489Sneel 1341242065Sneel vcpu_lock(vcpu); 1342241489Sneel hostcpu = vcpu->hostcpu; 1343242065Sneel if (hostcpu == NOCPU) { 1344256072Sneel if (vcpu->state == VCPU_SLEEPING) 1345242065Sneel wakeup_one(vcpu); 1346242065Sneel } else { 1347242065Sneel if (vcpu->state != VCPU_RUNNING) 1348242065Sneel panic("invalid vcpu state %d", vcpu->state); 1349242065Sneel if (hostcpu != curcpu) 1350242065Sneel ipi_cpu(hostcpu, vmm_ipinum); 1351242065Sneel } 1352242065Sneel vcpu_unlock(vcpu); 1353241489Sneel} 1354256072Sneel 1355256072Sneelstruct vmspace * 1356256072Sneelvm_get_vmspace(struct vm *vm) 1357256072Sneel{ 1358256072Sneel 1359256072Sneel return (vm->vmspace); 1360256072Sneel} 1361261088Sjhb 1362261088Sjhbint 1363261088Sjhbvm_apicid2vcpuid(struct vm *vm, int apicid) 1364261088Sjhb{ 1365261088Sjhb /* 1366261088Sjhb * XXX apic id is assumed to be numerically identical to vcpu id 1367261088Sjhb */ 1368261088Sjhb return (apicid); 1369261088Sjhb} 1370