vmm.c revision 261088
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: stable/10/sys/amd64/vmm/vmm.c 261088 2014-01-23 20:21:39Z jhb $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm.c 261088 2014-01-23 20:21:39Z jhb $"); 31221828Sgrehan 32221828Sgrehan#include <sys/param.h> 33234695Sgrehan#include <sys/systm.h> 34221828Sgrehan#include <sys/kernel.h> 35221828Sgrehan#include <sys/module.h> 36221828Sgrehan#include <sys/sysctl.h> 37221828Sgrehan#include <sys/malloc.h> 38221828Sgrehan#include <sys/pcpu.h> 39221828Sgrehan#include <sys/lock.h> 40221828Sgrehan#include <sys/mutex.h> 41221828Sgrehan#include <sys/proc.h> 42256072Sneel#include <sys/rwlock.h> 43221828Sgrehan#include <sys/sched.h> 44221828Sgrehan#include <sys/smp.h> 45221828Sgrehan#include <sys/systm.h> 46221828Sgrehan 47221828Sgrehan#include <vm/vm.h> 48256072Sneel#include <vm/vm_object.h> 49256072Sneel#include <vm/vm_page.h> 50256072Sneel#include <vm/pmap.h> 51256072Sneel#include <vm/vm_map.h> 52256072Sneel#include <vm/vm_extern.h> 53256072Sneel#include <vm/vm_param.h> 54221828Sgrehan 55221828Sgrehan#include <machine/vm.h> 56221828Sgrehan#include <machine/pcb.h> 57241489Sneel#include <machine/smp.h> 58221914Sjhb#include <x86/apicreg.h> 59256072Sneel#include <machine/pmap.h> 60256072Sneel#include <machine/vmparam.h> 61221828Sgrehan 62221828Sgrehan#include <machine/vmm.h> 63261088Sjhb#include <machine/vmm_dev.h> 64261088Sjhb 65256072Sneel#include "vmm_ktr.h" 66242275Sneel#include "vmm_host.h" 67221828Sgrehan#include "vmm_mem.h" 68221828Sgrehan#include "vmm_util.h" 69261088Sjhb#include "vhpet.h" 70261088Sjhb#include "vioapic.h" 71221828Sgrehan#include "vlapic.h" 72221828Sgrehan#include "vmm_msr.h" 73221828Sgrehan#include "vmm_ipi.h" 74221828Sgrehan#include "vmm_stat.h" 75242065Sneel#include "vmm_lapic.h" 76221828Sgrehan 77221828Sgrehan#include "io/ppt.h" 78221828Sgrehan#include "io/iommu.h" 79221828Sgrehan 80221828Sgrehanstruct vlapic; 81221828Sgrehan 82221828Sgrehanstruct vcpu { 83221828Sgrehan int flags; 84241489Sneel enum vcpu_state state; 85241489Sneel struct mtx mtx; 86221828Sgrehan int hostcpu; /* host cpuid this vcpu last ran on */ 87221828Sgrehan uint64_t guest_msrs[VMM_MSR_NUM]; 88221828Sgrehan struct vlapic *vlapic; 89221828Sgrehan int vcpuid; 90234695Sgrehan struct savefpu *guestfpu; /* guest fpu state */ 91221828Sgrehan void *stats; 92240894Sneel struct vm_exit exitinfo; 93240922Sneel enum x2apic_state x2apic_state; 94241982Sneel int nmi_pending; 95221828Sgrehan}; 96221828Sgrehan 97242065Sneel#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 98242065Sneel#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 99242065Sneel#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 100256072Sneel#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 101241489Sneel 102256072Sneelstruct mem_seg { 103256072Sneel vm_paddr_t gpa; 104256072Sneel size_t len; 105256072Sneel boolean_t wired; 106256072Sneel vm_object_t object; 107256072Sneel}; 108221828Sgrehan#define VM_MAX_MEMORY_SEGMENTS 2 109221828Sgrehan 110221828Sgrehanstruct vm { 111221828Sgrehan void *cookie; /* processor-specific data */ 112221828Sgrehan void *iommu; /* iommu-specific data */ 113261088Sjhb struct vhpet *vhpet; /* virtual HPET */ 114261088Sjhb struct vioapic *vioapic; /* virtual ioapic */ 115256072Sneel struct vmspace *vmspace; /* guest's address space */ 116221828Sgrehan struct vcpu vcpu[VM_MAXCPU]; 117221828Sgrehan int num_mem_segs; 118256072Sneel struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; 119221828Sgrehan char name[VM_MAX_NAMELEN]; 120221828Sgrehan 121221828Sgrehan /* 122223621Sgrehan * Set of active vcpus. 123221828Sgrehan * An active vcpu is one that has been started implicitly (BSP) or 124221828Sgrehan * explicitly (AP) by sending it a startup ipi. 125221828Sgrehan */ 126223621Sgrehan cpuset_t active_cpus; 127221828Sgrehan}; 128221828Sgrehan 129249396Sneelstatic int vmm_initialized; 130249396Sneel 131221828Sgrehanstatic struct vmm_ops *ops; 132221828Sgrehan#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) 133221828Sgrehan#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 134221828Sgrehan 135256072Sneel#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 136256072Sneel#define VMRUN(vmi, vcpu, rip, pmap) \ 137256072Sneel (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap) : ENXIO) 138221828Sgrehan#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 139256072Sneel#define VMSPACE_ALLOC(min, max) \ 140256072Sneel (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 141256072Sneel#define VMSPACE_FREE(vmspace) \ 142256072Sneel (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 143221828Sgrehan#define VMGETREG(vmi, vcpu, num, retval) \ 144221828Sgrehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 145221828Sgrehan#define VMSETREG(vmi, vcpu, num, val) \ 146221828Sgrehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 147221828Sgrehan#define VMGETDESC(vmi, vcpu, num, desc) \ 148221828Sgrehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 149221828Sgrehan#define VMSETDESC(vmi, vcpu, num, desc) \ 150221828Sgrehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 151221828Sgrehan#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \ 152221828Sgrehan (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO) 153221828Sgrehan#define VMGETCAP(vmi, vcpu, num, retval) \ 154221828Sgrehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 155221828Sgrehan#define VMSETCAP(vmi, vcpu, num, val) \ 156221828Sgrehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 157221828Sgrehan 158245021Sneel#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 159245021Sneel#define fpu_stop_emulating() clts() 160221828Sgrehan 161221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm"); 162221828SgrehanCTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 163221828Sgrehan 164221828Sgrehan/* statistics */ 165248389Sneelstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 166221828Sgrehan 167221828Sgrehanstatic void 168221828Sgrehanvcpu_cleanup(struct vcpu *vcpu) 169221828Sgrehan{ 170221828Sgrehan vlapic_cleanup(vcpu->vlapic); 171234695Sgrehan vmm_stat_free(vcpu->stats); 172234695Sgrehan fpu_save_area_free(vcpu->guestfpu); 173221828Sgrehan} 174221828Sgrehan 175221828Sgrehanstatic void 176221828Sgrehanvcpu_init(struct vm *vm, uint32_t vcpu_id) 177221828Sgrehan{ 178221828Sgrehan struct vcpu *vcpu; 179221828Sgrehan 180221828Sgrehan vcpu = &vm->vcpu[vcpu_id]; 181221828Sgrehan 182241489Sneel vcpu_lock_init(vcpu); 183241489Sneel vcpu->hostcpu = NOCPU; 184221828Sgrehan vcpu->vcpuid = vcpu_id; 185221828Sgrehan vcpu->vlapic = vlapic_init(vm, vcpu_id); 186240943Sneel vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); 187234695Sgrehan vcpu->guestfpu = fpu_save_area_alloc(); 188234695Sgrehan fpu_save_area_reset(vcpu->guestfpu); 189221828Sgrehan vcpu->stats = vmm_stat_alloc(); 190221828Sgrehan} 191221828Sgrehan 192240894Sneelstruct vm_exit * 193240894Sneelvm_exitinfo(struct vm *vm, int cpuid) 194240894Sneel{ 195240894Sneel struct vcpu *vcpu; 196240894Sneel 197240894Sneel if (cpuid < 0 || cpuid >= VM_MAXCPU) 198240894Sneel panic("vm_exitinfo: invalid cpuid %d", cpuid); 199240894Sneel 200240894Sneel vcpu = &vm->vcpu[cpuid]; 201240894Sneel 202240894Sneel return (&vcpu->exitinfo); 203240894Sneel} 204240894Sneel 205221828Sgrehanstatic int 206221828Sgrehanvmm_init(void) 207221828Sgrehan{ 208221828Sgrehan int error; 209221828Sgrehan 210242275Sneel vmm_host_state_init(); 211221828Sgrehan vmm_ipi_init(); 212221828Sgrehan 213221828Sgrehan error = vmm_mem_init(); 214221828Sgrehan if (error) 215221828Sgrehan return (error); 216221828Sgrehan 217221828Sgrehan if (vmm_is_intel()) 218221828Sgrehan ops = &vmm_ops_intel; 219221828Sgrehan else if (vmm_is_amd()) 220221828Sgrehan ops = &vmm_ops_amd; 221221828Sgrehan else 222221828Sgrehan return (ENXIO); 223221828Sgrehan 224221828Sgrehan vmm_msr_init(); 225221828Sgrehan 226221828Sgrehan return (VMM_INIT()); 227221828Sgrehan} 228221828Sgrehan 229221828Sgrehanstatic int 230221828Sgrehanvmm_handler(module_t mod, int what, void *arg) 231221828Sgrehan{ 232221828Sgrehan int error; 233221828Sgrehan 234221828Sgrehan switch (what) { 235221828Sgrehan case MOD_LOAD: 236221828Sgrehan vmmdev_init(); 237256072Sneel iommu_init(); 238221828Sgrehan error = vmm_init(); 239249396Sneel if (error == 0) 240249396Sneel vmm_initialized = 1; 241221828Sgrehan break; 242221828Sgrehan case MOD_UNLOAD: 243241454Sneel error = vmmdev_cleanup(); 244241454Sneel if (error == 0) { 245241454Sneel iommu_cleanup(); 246241454Sneel vmm_ipi_cleanup(); 247241454Sneel error = VMM_CLEANUP(); 248253854Sgrehan /* 249253854Sgrehan * Something bad happened - prevent new 250253854Sgrehan * VMs from being created 251253854Sgrehan */ 252253854Sgrehan if (error) 253253854Sgrehan vmm_initialized = 0; 254241454Sneel } 255221828Sgrehan break; 256221828Sgrehan default: 257221828Sgrehan error = 0; 258221828Sgrehan break; 259221828Sgrehan } 260221828Sgrehan return (error); 261221828Sgrehan} 262221828Sgrehan 263221828Sgrehanstatic moduledata_t vmm_kmod = { 264221828Sgrehan "vmm", 265221828Sgrehan vmm_handler, 266221828Sgrehan NULL 267221828Sgrehan}; 268221828Sgrehan 269221828Sgrehan/* 270245704Sneel * vmm initialization has the following dependencies: 271245704Sneel * 272245704Sneel * - iommu initialization must happen after the pci passthru driver has had 273245704Sneel * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 274245704Sneel * 275245704Sneel * - VT-x initialization requires smp_rendezvous() and therefore must happen 276245704Sneel * after SMP is fully functional (after SI_SUB_SMP). 277221828Sgrehan */ 278245704SneelDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 279221828SgrehanMODULE_VERSION(vmm, 1); 280221828Sgrehan 281221828SgrehanSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 282221828Sgrehan 283249396Sneelint 284249396Sneelvm_create(const char *name, struct vm **retvm) 285221828Sgrehan{ 286221828Sgrehan int i; 287221828Sgrehan struct vm *vm; 288256072Sneel struct vmspace *vmspace; 289221828Sgrehan 290221828Sgrehan const int BSP = 0; 291221828Sgrehan 292249396Sneel /* 293249396Sneel * If vmm.ko could not be successfully initialized then don't attempt 294249396Sneel * to create the virtual machine. 295249396Sneel */ 296249396Sneel if (!vmm_initialized) 297249396Sneel return (ENXIO); 298249396Sneel 299221828Sgrehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 300249396Sneel return (EINVAL); 301221828Sgrehan 302256072Sneel vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 303256072Sneel if (vmspace == NULL) 304256072Sneel return (ENOMEM); 305256072Sneel 306221828Sgrehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 307221828Sgrehan strcpy(vm->name, name); 308256072Sneel vm->cookie = VMINIT(vm, vmspace_pmap(vmspace)); 309261088Sjhb vm->vioapic = vioapic_init(vm); 310261088Sjhb vm->vhpet = vhpet_init(vm); 311221828Sgrehan 312221828Sgrehan for (i = 0; i < VM_MAXCPU; i++) { 313221828Sgrehan vcpu_init(vm, i); 314221828Sgrehan guest_msrs_init(vm, i); 315221828Sgrehan } 316221828Sgrehan 317221828Sgrehan vm_activate_cpu(vm, BSP); 318256072Sneel vm->vmspace = vmspace; 319221828Sgrehan 320249396Sneel *retvm = vm; 321249396Sneel return (0); 322221828Sgrehan} 323221828Sgrehan 324241178Sneelstatic void 325256072Sneelvm_free_mem_seg(struct vm *vm, struct mem_seg *seg) 326241178Sneel{ 327241178Sneel 328256072Sneel if (seg->object != NULL) 329256072Sneel vmm_mem_free(vm->vmspace, seg->gpa, seg->len); 330241362Sneel 331256072Sneel bzero(seg, sizeof(*seg)); 332241178Sneel} 333241178Sneel 334221828Sgrehanvoid 335221828Sgrehanvm_destroy(struct vm *vm) 336221828Sgrehan{ 337221828Sgrehan int i; 338221828Sgrehan 339221828Sgrehan ppt_unassign_all(vm); 340221828Sgrehan 341256072Sneel if (vm->iommu != NULL) 342256072Sneel iommu_destroy_domain(vm->iommu); 343256072Sneel 344261088Sjhb vhpet_cleanup(vm->vhpet); 345261088Sjhb vioapic_cleanup(vm->vioapic); 346261088Sjhb 347221828Sgrehan for (i = 0; i < vm->num_mem_segs; i++) 348241178Sneel vm_free_mem_seg(vm, &vm->mem_segs[i]); 349221828Sgrehan 350241178Sneel vm->num_mem_segs = 0; 351241178Sneel 352221828Sgrehan for (i = 0; i < VM_MAXCPU; i++) 353221828Sgrehan vcpu_cleanup(&vm->vcpu[i]); 354221828Sgrehan 355256072Sneel VMSPACE_FREE(vm->vmspace); 356221828Sgrehan 357221828Sgrehan VMCLEANUP(vm->cookie); 358221828Sgrehan 359221828Sgrehan free(vm, M_VM); 360221828Sgrehan} 361221828Sgrehan 362221828Sgrehanconst char * 363221828Sgrehanvm_name(struct vm *vm) 364221828Sgrehan{ 365221828Sgrehan return (vm->name); 366221828Sgrehan} 367221828Sgrehan 368221828Sgrehanint 369221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 370221828Sgrehan{ 371256072Sneel vm_object_t obj; 372221828Sgrehan 373256072Sneel if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 374256072Sneel return (ENOMEM); 375256072Sneel else 376256072Sneel return (0); 377221828Sgrehan} 378221828Sgrehan 379221828Sgrehanint 380221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 381221828Sgrehan{ 382221828Sgrehan 383256072Sneel vmm_mmio_free(vm->vmspace, gpa, len); 384256072Sneel return (0); 385221828Sgrehan} 386221828Sgrehan 387256072Sneelboolean_t 388256072Sneelvm_mem_allocated(struct vm *vm, vm_paddr_t gpa) 389241041Sneel{ 390241041Sneel int i; 391241041Sneel vm_paddr_t gpabase, gpalimit; 392241041Sneel 393241041Sneel for (i = 0; i < vm->num_mem_segs; i++) { 394241041Sneel gpabase = vm->mem_segs[i].gpa; 395241041Sneel gpalimit = gpabase + vm->mem_segs[i].len; 396241041Sneel if (gpa >= gpabase && gpa < gpalimit) 397256072Sneel return (TRUE); /* 'gpa' is regular memory */ 398241041Sneel } 399241041Sneel 400256072Sneel if (ppt_is_mmio(vm, gpa)) 401256072Sneel return (TRUE); /* 'gpa' is pci passthru mmio */ 402256072Sneel 403256072Sneel return (FALSE); 404241041Sneel} 405241041Sneel 406221828Sgrehanint 407241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 408221828Sgrehan{ 409256072Sneel int available, allocated; 410256072Sneel struct mem_seg *seg; 411256072Sneel vm_object_t object; 412256072Sneel vm_paddr_t g; 413221828Sgrehan 414241041Sneel if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 415241041Sneel return (EINVAL); 416221828Sgrehan 417241041Sneel available = allocated = 0; 418241041Sneel g = gpa; 419241041Sneel while (g < gpa + len) { 420256072Sneel if (vm_mem_allocated(vm, g)) 421256072Sneel allocated++; 422256072Sneel else 423241041Sneel available++; 424241041Sneel 425241041Sneel g += PAGE_SIZE; 426241041Sneel } 427241041Sneel 428221828Sgrehan /* 429241041Sneel * If there are some allocated and some available pages in the address 430241041Sneel * range then it is an error. 431221828Sgrehan */ 432241041Sneel if (allocated && available) 433241041Sneel return (EINVAL); 434221828Sgrehan 435241041Sneel /* 436241041Sneel * If the entire address range being requested has already been 437241041Sneel * allocated then there isn't anything more to do. 438241041Sneel */ 439241041Sneel if (allocated && available == 0) 440241041Sneel return (0); 441241041Sneel 442221828Sgrehan if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 443221828Sgrehan return (E2BIG); 444221828Sgrehan 445241178Sneel seg = &vm->mem_segs[vm->num_mem_segs]; 446221828Sgrehan 447256072Sneel if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) 448256072Sneel return (ENOMEM); 449256072Sneel 450241178Sneel seg->gpa = gpa; 451256072Sneel seg->len = len; 452256072Sneel seg->object = object; 453256072Sneel seg->wired = FALSE; 454241178Sneel 455256072Sneel vm->num_mem_segs++; 456256072Sneel 457256072Sneel return (0); 458256072Sneel} 459256072Sneel 460256072Sneelstatic void 461256072Sneelvm_gpa_unwire(struct vm *vm) 462256072Sneel{ 463256072Sneel int i, rv; 464256072Sneel struct mem_seg *seg; 465256072Sneel 466256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 467256072Sneel seg = &vm->mem_segs[i]; 468256072Sneel if (!seg->wired) 469256072Sneel continue; 470256072Sneel 471256072Sneel rv = vm_map_unwire(&vm->vmspace->vm_map, 472256072Sneel seg->gpa, seg->gpa + seg->len, 473256072Sneel VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 474256072Sneel KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " 475256072Sneel "%#lx/%ld could not be unwired: %d", 476256072Sneel vm_name(vm), seg->gpa, seg->len, rv)); 477256072Sneel 478256072Sneel seg->wired = FALSE; 479256072Sneel } 480256072Sneel} 481256072Sneel 482256072Sneelstatic int 483256072Sneelvm_gpa_wire(struct vm *vm) 484256072Sneel{ 485256072Sneel int i, rv; 486256072Sneel struct mem_seg *seg; 487256072Sneel 488256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 489256072Sneel seg = &vm->mem_segs[i]; 490256072Sneel if (seg->wired) 491256072Sneel continue; 492256072Sneel 493256072Sneel /* XXX rlimits? */ 494256072Sneel rv = vm_map_wire(&vm->vmspace->vm_map, 495256072Sneel seg->gpa, seg->gpa + seg->len, 496256072Sneel VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 497256072Sneel if (rv != KERN_SUCCESS) 498241178Sneel break; 499241178Sneel 500256072Sneel seg->wired = TRUE; 501256072Sneel } 502256072Sneel 503256072Sneel if (i < vm->num_mem_segs) { 504241362Sneel /* 505256072Sneel * Undo the wiring before returning an error. 506241362Sneel */ 507256072Sneel vm_gpa_unwire(vm); 508256072Sneel return (EAGAIN); 509256072Sneel } 510241178Sneel 511256072Sneel return (0); 512256072Sneel} 513256072Sneel 514256072Sneelstatic void 515256072Sneelvm_iommu_modify(struct vm *vm, boolean_t map) 516256072Sneel{ 517256072Sneel int i, sz; 518256072Sneel vm_paddr_t gpa, hpa; 519256072Sneel struct mem_seg *seg; 520256072Sneel void *vp, *cookie, *host_domain; 521256072Sneel 522256072Sneel sz = PAGE_SIZE; 523256072Sneel host_domain = iommu_host_domain(); 524256072Sneel 525256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 526256072Sneel seg = &vm->mem_segs[i]; 527256072Sneel KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", 528256072Sneel vm_name(vm), seg->gpa, seg->len)); 529256072Sneel 530256072Sneel gpa = seg->gpa; 531256072Sneel while (gpa < seg->gpa + seg->len) { 532256072Sneel vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, 533256072Sneel &cookie); 534256072Sneel KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 535256072Sneel vm_name(vm), gpa)); 536256072Sneel 537256072Sneel vm_gpa_release(cookie); 538256072Sneel 539256072Sneel hpa = DMAP_TO_PHYS((uintptr_t)vp); 540256072Sneel if (map) { 541256072Sneel iommu_create_mapping(vm->iommu, gpa, hpa, sz); 542256072Sneel iommu_remove_mapping(host_domain, hpa, sz); 543256072Sneel } else { 544256072Sneel iommu_remove_mapping(vm->iommu, gpa, sz); 545256072Sneel iommu_create_mapping(host_domain, hpa, hpa, sz); 546256072Sneel } 547256072Sneel 548256072Sneel gpa += PAGE_SIZE; 549256072Sneel } 550241178Sneel } 551241178Sneel 552256072Sneel /* 553256072Sneel * Invalidate the cached translations associated with the domain 554256072Sneel * from which pages were removed. 555256072Sneel */ 556256072Sneel if (map) 557256072Sneel iommu_invalidate_tlb(host_domain); 558256072Sneel else 559256072Sneel iommu_invalidate_tlb(vm->iommu); 560256072Sneel} 561256072Sneel 562256072Sneel#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 563256072Sneel#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 564256072Sneel 565256072Sneelint 566256072Sneelvm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 567256072Sneel{ 568256072Sneel int error; 569256072Sneel 570256072Sneel error = ppt_unassign_device(vm, bus, slot, func); 571256072Sneel if (error) 572221828Sgrehan return (error); 573256072Sneel 574256072Sneel if (ppt_num_devices(vm) == 0) { 575256072Sneel vm_iommu_unmap(vm); 576256072Sneel vm_gpa_unwire(vm); 577221828Sgrehan } 578256072Sneel return (0); 579256072Sneel} 580221828Sgrehan 581256072Sneelint 582256072Sneelvm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 583256072Sneel{ 584256072Sneel int error; 585256072Sneel vm_paddr_t maxaddr; 586256072Sneel 587241362Sneel /* 588256072Sneel * Virtual machines with pci passthru devices get special treatment: 589256072Sneel * - the guest physical memory is wired 590256072Sneel * - the iommu is programmed to do the 'gpa' to 'hpa' translation 591256072Sneel * 592256072Sneel * We need to do this before the first pci passthru device is attached. 593241362Sneel */ 594256072Sneel if (ppt_num_devices(vm) == 0) { 595256072Sneel KASSERT(vm->iommu == NULL, 596256072Sneel ("vm_assign_pptdev: iommu must be NULL")); 597256072Sneel maxaddr = vmm_mem_maxaddr(); 598256072Sneel vm->iommu = iommu_create_domain(maxaddr); 599241362Sneel 600256072Sneel error = vm_gpa_wire(vm); 601256072Sneel if (error) 602256072Sneel return (error); 603241041Sneel 604256072Sneel vm_iommu_map(vm); 605256072Sneel } 606256072Sneel 607256072Sneel error = ppt_assign_device(vm, bus, slot, func); 608256072Sneel return (error); 609221828Sgrehan} 610221828Sgrehan 611256072Sneelvoid * 612256072Sneelvm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 613256072Sneel void **cookie) 614221828Sgrehan{ 615256072Sneel int count, pageoff; 616256072Sneel vm_page_t m; 617221828Sgrehan 618256072Sneel pageoff = gpa & PAGE_MASK; 619256072Sneel if (len > PAGE_SIZE - pageoff) 620256072Sneel panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 621241148Sneel 622256072Sneel count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 623256072Sneel trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 624256072Sneel 625256072Sneel if (count == 1) { 626256072Sneel *cookie = m; 627256072Sneel return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 628256072Sneel } else { 629256072Sneel *cookie = NULL; 630256072Sneel return (NULL); 631256072Sneel } 632221828Sgrehan} 633221828Sgrehan 634256072Sneelvoid 635256072Sneelvm_gpa_release(void *cookie) 636256072Sneel{ 637256072Sneel vm_page_t m = cookie; 638256072Sneel 639256072Sneel vm_page_lock(m); 640256072Sneel vm_page_unhold(m); 641256072Sneel vm_page_unlock(m); 642256072Sneel} 643256072Sneel 644221828Sgrehanint 645221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 646221828Sgrehan struct vm_memory_segment *seg) 647221828Sgrehan{ 648221828Sgrehan int i; 649221828Sgrehan 650221828Sgrehan for (i = 0; i < vm->num_mem_segs; i++) { 651221828Sgrehan if (gpabase == vm->mem_segs[i].gpa) { 652256072Sneel seg->gpa = vm->mem_segs[i].gpa; 653256072Sneel seg->len = vm->mem_segs[i].len; 654256072Sneel seg->wired = vm->mem_segs[i].wired; 655221828Sgrehan return (0); 656221828Sgrehan } 657221828Sgrehan } 658221828Sgrehan return (-1); 659221828Sgrehan} 660221828Sgrehan 661221828Sgrehanint 662256072Sneelvm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 663256072Sneel vm_offset_t *offset, struct vm_object **object) 664256072Sneel{ 665256072Sneel int i; 666256072Sneel size_t seg_len; 667256072Sneel vm_paddr_t seg_gpa; 668256072Sneel vm_object_t seg_obj; 669256072Sneel 670256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 671256072Sneel if ((seg_obj = vm->mem_segs[i].object) == NULL) 672256072Sneel continue; 673256072Sneel 674256072Sneel seg_gpa = vm->mem_segs[i].gpa; 675256072Sneel seg_len = vm->mem_segs[i].len; 676256072Sneel 677256072Sneel if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { 678256072Sneel *offset = gpa - seg_gpa; 679256072Sneel *object = seg_obj; 680256072Sneel vm_object_reference(seg_obj); 681256072Sneel return (0); 682256072Sneel } 683256072Sneel } 684256072Sneel 685256072Sneel return (EINVAL); 686256072Sneel} 687256072Sneel 688256072Sneelint 689221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 690221828Sgrehan{ 691221828Sgrehan 692221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 693221828Sgrehan return (EINVAL); 694221828Sgrehan 695221828Sgrehan if (reg >= VM_REG_LAST) 696221828Sgrehan return (EINVAL); 697221828Sgrehan 698221828Sgrehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 699221828Sgrehan} 700221828Sgrehan 701221828Sgrehanint 702221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 703221828Sgrehan{ 704221828Sgrehan 705221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 706221828Sgrehan return (EINVAL); 707221828Sgrehan 708221828Sgrehan if (reg >= VM_REG_LAST) 709221828Sgrehan return (EINVAL); 710221828Sgrehan 711221828Sgrehan return (VMSETREG(vm->cookie, vcpu, reg, val)); 712221828Sgrehan} 713221828Sgrehan 714221828Sgrehanstatic boolean_t 715221828Sgrehanis_descriptor_table(int reg) 716221828Sgrehan{ 717221828Sgrehan 718221828Sgrehan switch (reg) { 719221828Sgrehan case VM_REG_GUEST_IDTR: 720221828Sgrehan case VM_REG_GUEST_GDTR: 721221828Sgrehan return (TRUE); 722221828Sgrehan default: 723221828Sgrehan return (FALSE); 724221828Sgrehan } 725221828Sgrehan} 726221828Sgrehan 727221828Sgrehanstatic boolean_t 728221828Sgrehanis_segment_register(int reg) 729221828Sgrehan{ 730221828Sgrehan 731221828Sgrehan switch (reg) { 732221828Sgrehan case VM_REG_GUEST_ES: 733221828Sgrehan case VM_REG_GUEST_CS: 734221828Sgrehan case VM_REG_GUEST_SS: 735221828Sgrehan case VM_REG_GUEST_DS: 736221828Sgrehan case VM_REG_GUEST_FS: 737221828Sgrehan case VM_REG_GUEST_GS: 738221828Sgrehan case VM_REG_GUEST_TR: 739221828Sgrehan case VM_REG_GUEST_LDTR: 740221828Sgrehan return (TRUE); 741221828Sgrehan default: 742221828Sgrehan return (FALSE); 743221828Sgrehan } 744221828Sgrehan} 745221828Sgrehan 746221828Sgrehanint 747221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg, 748221828Sgrehan struct seg_desc *desc) 749221828Sgrehan{ 750221828Sgrehan 751221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 752221828Sgrehan return (EINVAL); 753221828Sgrehan 754221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 755221828Sgrehan return (EINVAL); 756221828Sgrehan 757221828Sgrehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 758221828Sgrehan} 759221828Sgrehan 760221828Sgrehanint 761221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg, 762221828Sgrehan struct seg_desc *desc) 763221828Sgrehan{ 764221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 765221828Sgrehan return (EINVAL); 766221828Sgrehan 767221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 768221828Sgrehan return (EINVAL); 769221828Sgrehan 770221828Sgrehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 771221828Sgrehan} 772221828Sgrehan 773221828Sgrehanstatic void 774221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu) 775221828Sgrehan{ 776221828Sgrehan 777234695Sgrehan /* flush host state to the pcb */ 778234695Sgrehan fpuexit(curthread); 779242122Sneel 780242122Sneel /* restore guest FPU state */ 781221828Sgrehan fpu_stop_emulating(); 782234695Sgrehan fpurestore(vcpu->guestfpu); 783242122Sneel 784242122Sneel /* 785242122Sneel * The FPU is now "dirty" with the guest's state so turn on emulation 786242122Sneel * to trap any access to the FPU by the host. 787242122Sneel */ 788242122Sneel fpu_start_emulating(); 789221828Sgrehan} 790221828Sgrehan 791221828Sgrehanstatic void 792221828Sgrehansave_guest_fpustate(struct vcpu *vcpu) 793221828Sgrehan{ 794221828Sgrehan 795242122Sneel if ((rcr0() & CR0_TS) == 0) 796242122Sneel panic("fpu emulation not enabled in host!"); 797242122Sneel 798242122Sneel /* save guest FPU state */ 799242122Sneel fpu_stop_emulating(); 800234695Sgrehan fpusave(vcpu->guestfpu); 801221828Sgrehan fpu_start_emulating(); 802221828Sgrehan} 803221828Sgrehan 804248389Sneelstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 805242065Sneel 806256072Sneelstatic int 807256072Sneelvcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 808256072Sneel{ 809256072Sneel int error; 810256072Sneel 811256072Sneel vcpu_assert_locked(vcpu); 812256072Sneel 813256072Sneel /* 814256072Sneel * The following state transitions are allowed: 815256072Sneel * IDLE -> FROZEN -> IDLE 816256072Sneel * FROZEN -> RUNNING -> FROZEN 817256072Sneel * FROZEN -> SLEEPING -> FROZEN 818256072Sneel */ 819256072Sneel switch (vcpu->state) { 820256072Sneel case VCPU_IDLE: 821256072Sneel case VCPU_RUNNING: 822256072Sneel case VCPU_SLEEPING: 823256072Sneel error = (newstate != VCPU_FROZEN); 824256072Sneel break; 825256072Sneel case VCPU_FROZEN: 826256072Sneel error = (newstate == VCPU_FROZEN); 827256072Sneel break; 828256072Sneel default: 829256072Sneel error = 1; 830256072Sneel break; 831256072Sneel } 832256072Sneel 833256072Sneel if (error == 0) 834256072Sneel vcpu->state = newstate; 835256072Sneel else 836256072Sneel error = EBUSY; 837256072Sneel 838256072Sneel return (error); 839256072Sneel} 840256072Sneel 841256072Sneelstatic void 842256072Sneelvcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 843256072Sneel{ 844256072Sneel int error; 845256072Sneel 846256072Sneel if ((error = vcpu_set_state(vm, vcpuid, newstate)) != 0) 847256072Sneel panic("Error %d setting state to %d\n", error, newstate); 848256072Sneel} 849256072Sneel 850256072Sneelstatic void 851256072Sneelvcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 852256072Sneel{ 853256072Sneel int error; 854256072Sneel 855256072Sneel if ((error = vcpu_set_state_locked(vcpu, newstate)) != 0) 856256072Sneel panic("Error %d setting state to %d", error, newstate); 857256072Sneel} 858256072Sneel 859256072Sneel/* 860256072Sneel * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 861256072Sneel */ 862256072Sneelstatic int 863256072Sneelvm_handle_hlt(struct vm *vm, int vcpuid, boolean_t *retu) 864256072Sneel{ 865256072Sneel struct vcpu *vcpu; 866256072Sneel int sleepticks, t; 867256072Sneel 868256072Sneel vcpu = &vm->vcpu[vcpuid]; 869256072Sneel 870256072Sneel vcpu_lock(vcpu); 871256072Sneel 872256072Sneel /* 873256072Sneel * Figure out the number of host ticks until the next apic 874256072Sneel * timer interrupt in the guest. 875256072Sneel */ 876256072Sneel sleepticks = lapic_timer_tick(vm, vcpuid); 877256072Sneel 878256072Sneel /* 879256072Sneel * If the guest local apic timer is disabled then sleep for 880256072Sneel * a long time but not forever. 881256072Sneel */ 882256072Sneel if (sleepticks < 0) 883256072Sneel sleepticks = hz; 884256072Sneel 885256072Sneel /* 886256072Sneel * Do a final check for pending NMI or interrupts before 887256072Sneel * really putting this thread to sleep. 888256072Sneel * 889256072Sneel * These interrupts could have happened any time after we 890256072Sneel * returned from VMRUN() and before we grabbed the vcpu lock. 891256072Sneel */ 892256072Sneel if (!vm_nmi_pending(vm, vcpuid) && lapic_pending_intr(vm, vcpuid) < 0) { 893256072Sneel if (sleepticks <= 0) 894256072Sneel panic("invalid sleepticks %d", sleepticks); 895256072Sneel t = ticks; 896256072Sneel vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 897256072Sneel msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); 898256072Sneel vcpu_require_state_locked(vcpu, VCPU_FROZEN); 899256072Sneel vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 900256072Sneel } 901256072Sneel vcpu_unlock(vcpu); 902256072Sneel 903256072Sneel return (0); 904256072Sneel} 905256072Sneel 906256072Sneelstatic int 907256072Sneelvm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu) 908256072Sneel{ 909256072Sneel int rv, ftype; 910256072Sneel struct vm_map *map; 911256072Sneel struct vcpu *vcpu; 912256072Sneel struct vm_exit *vme; 913256072Sneel 914256072Sneel vcpu = &vm->vcpu[vcpuid]; 915256072Sneel vme = &vcpu->exitinfo; 916256072Sneel 917256072Sneel ftype = vme->u.paging.fault_type; 918256072Sneel KASSERT(ftype == VM_PROT_READ || 919256072Sneel ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 920256072Sneel ("vm_handle_paging: invalid fault_type %d", ftype)); 921256072Sneel 922256072Sneel if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 923256072Sneel rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 924256072Sneel vme->u.paging.gpa, ftype); 925256072Sneel if (rv == 0) 926256072Sneel goto done; 927256072Sneel } 928256072Sneel 929256072Sneel map = &vm->vmspace->vm_map; 930256072Sneel rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 931256072Sneel 932261088Sjhb VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 933261088Sjhb "ftype = %d", rv, vme->u.paging.gpa, ftype); 934256072Sneel 935256072Sneel if (rv != KERN_SUCCESS) 936256072Sneel return (EFAULT); 937256072Sneeldone: 938256072Sneel /* restart execution at the faulting instruction */ 939256072Sneel vme->inst_length = 0; 940256072Sneel 941256072Sneel return (0); 942256072Sneel} 943256072Sneel 944256072Sneelstatic int 945256072Sneelvm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu) 946256072Sneel{ 947256072Sneel struct vie *vie; 948256072Sneel struct vcpu *vcpu; 949256072Sneel struct vm_exit *vme; 950256072Sneel int error, inst_length; 951256072Sneel uint64_t rip, gla, gpa, cr3; 952261088Sjhb mem_region_read_t mread; 953261088Sjhb mem_region_write_t mwrite; 954256072Sneel 955256072Sneel vcpu = &vm->vcpu[vcpuid]; 956256072Sneel vme = &vcpu->exitinfo; 957256072Sneel 958256072Sneel rip = vme->rip; 959256072Sneel inst_length = vme->inst_length; 960256072Sneel 961256072Sneel gla = vme->u.inst_emul.gla; 962256072Sneel gpa = vme->u.inst_emul.gpa; 963256072Sneel cr3 = vme->u.inst_emul.cr3; 964256072Sneel vie = &vme->u.inst_emul.vie; 965256072Sneel 966256072Sneel vie_init(vie); 967256072Sneel 968256072Sneel /* Fetch, decode and emulate the faulting instruction */ 969256072Sneel if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, vie) != 0) 970256072Sneel return (EFAULT); 971256072Sneel 972256072Sneel if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0) 973256072Sneel return (EFAULT); 974256072Sneel 975261088Sjhb /* return to userland unless this is an in-kernel emulated device */ 976261088Sjhb if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 977261088Sjhb mread = lapic_mmio_read; 978261088Sjhb mwrite = lapic_mmio_write; 979261088Sjhb } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 980261088Sjhb mread = vioapic_mmio_read; 981261088Sjhb mwrite = vioapic_mmio_write; 982261088Sjhb } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 983261088Sjhb mread = vhpet_mmio_read; 984261088Sjhb mwrite = vhpet_mmio_write; 985261088Sjhb } else { 986256072Sneel *retu = TRUE; 987256072Sneel return (0); 988256072Sneel } 989256072Sneel 990261088Sjhb error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, 0); 991256072Sneel 992256072Sneel /* return to userland to spin up the AP */ 993256072Sneel if (error == 0 && vme->exitcode == VM_EXITCODE_SPINUP_AP) 994256072Sneel *retu = TRUE; 995256072Sneel 996256072Sneel return (error); 997256072Sneel} 998256072Sneel 999221828Sgrehanint 1000221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun) 1001221828Sgrehan{ 1002256072Sneel int error, vcpuid; 1003221828Sgrehan struct vcpu *vcpu; 1004221828Sgrehan struct pcb *pcb; 1005242065Sneel uint64_t tscval, rip; 1006242065Sneel struct vm_exit *vme; 1007256072Sneel boolean_t retu; 1008256072Sneel pmap_t pmap; 1009221828Sgrehan 1010221828Sgrehan vcpuid = vmrun->cpuid; 1011221828Sgrehan 1012221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1013221828Sgrehan return (EINVAL); 1014221828Sgrehan 1015256072Sneel pmap = vmspace_pmap(vm->vmspace); 1016221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1017256072Sneel vme = &vcpu->exitinfo; 1018242065Sneel rip = vmrun->rip; 1019242065Sneelrestart: 1020221828Sgrehan critical_enter(); 1021221828Sgrehan 1022256072Sneel KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1023256072Sneel ("vm_run: absurd pm_active")); 1024256072Sneel 1025221828Sgrehan tscval = rdtsc(); 1026221828Sgrehan 1027221828Sgrehan pcb = PCPU_GET(curpcb); 1028221914Sjhb set_pcb_flags(pcb, PCB_FULL_IRET); 1029221828Sgrehan 1030234695Sgrehan restore_guest_msrs(vm, vcpuid); 1031221828Sgrehan restore_guest_fpustate(vcpu); 1032241489Sneel 1033256072Sneel vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1034241489Sneel vcpu->hostcpu = curcpu; 1035256072Sneel error = VMRUN(vm->cookie, vcpuid, rip, pmap); 1036241489Sneel vcpu->hostcpu = NOCPU; 1037256072Sneel vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1038241489Sneel 1039221828Sgrehan save_guest_fpustate(vcpu); 1040221828Sgrehan restore_host_msrs(vm, vcpuid); 1041221828Sgrehan 1042221828Sgrehan vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1043221828Sgrehan 1044221828Sgrehan critical_exit(); 1045221828Sgrehan 1046256072Sneel if (error == 0) { 1047256072Sneel retu = FALSE; 1048256072Sneel switch (vme->exitcode) { 1049256072Sneel case VM_EXITCODE_HLT: 1050256072Sneel error = vm_handle_hlt(vm, vcpuid, &retu); 1051256072Sneel break; 1052256072Sneel case VM_EXITCODE_PAGING: 1053256072Sneel error = vm_handle_paging(vm, vcpuid, &retu); 1054256072Sneel break; 1055256072Sneel case VM_EXITCODE_INST_EMUL: 1056256072Sneel error = vm_handle_inst_emul(vm, vcpuid, &retu); 1057256072Sneel break; 1058256072Sneel default: 1059256072Sneel retu = TRUE; /* handled in userland */ 1060256072Sneel break; 1061242065Sneel } 1062256072Sneel } 1063242065Sneel 1064256072Sneel if (error == 0 && retu == FALSE) { 1065242065Sneel rip = vme->rip + vme->inst_length; 1066242065Sneel goto restart; 1067242065Sneel } 1068242065Sneel 1069256072Sneel /* copy the exit information */ 1070256072Sneel bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1071221828Sgrehan return (error); 1072221828Sgrehan} 1073221828Sgrehan 1074221828Sgrehanint 1075221828Sgrehanvm_inject_event(struct vm *vm, int vcpuid, int type, 1076221828Sgrehan int vector, uint32_t code, int code_valid) 1077221828Sgrehan{ 1078221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1079221828Sgrehan return (EINVAL); 1080221828Sgrehan 1081221828Sgrehan if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0) 1082221828Sgrehan return (EINVAL); 1083221828Sgrehan 1084221828Sgrehan if (vector < 0 || vector > 255) 1085221828Sgrehan return (EINVAL); 1086221828Sgrehan 1087221828Sgrehan return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid)); 1088221828Sgrehan} 1089221828Sgrehan 1090248389Sneelstatic VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 1091241982Sneel 1092221828Sgrehanint 1093241982Sneelvm_inject_nmi(struct vm *vm, int vcpuid) 1094221828Sgrehan{ 1095241982Sneel struct vcpu *vcpu; 1096221828Sgrehan 1097241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1098221828Sgrehan return (EINVAL); 1099221828Sgrehan 1100241982Sneel vcpu = &vm->vcpu[vcpuid]; 1101241982Sneel 1102241982Sneel vcpu->nmi_pending = 1; 1103241982Sneel vm_interrupt_hostcpu(vm, vcpuid); 1104241982Sneel return (0); 1105221828Sgrehan} 1106221828Sgrehan 1107221828Sgrehanint 1108241982Sneelvm_nmi_pending(struct vm *vm, int vcpuid) 1109241982Sneel{ 1110241982Sneel struct vcpu *vcpu; 1111241982Sneel 1112241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1113241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1114241982Sneel 1115241982Sneel vcpu = &vm->vcpu[vcpuid]; 1116241982Sneel 1117241982Sneel return (vcpu->nmi_pending); 1118241982Sneel} 1119241982Sneel 1120241982Sneelvoid 1121241982Sneelvm_nmi_clear(struct vm *vm, int vcpuid) 1122241982Sneel{ 1123241982Sneel struct vcpu *vcpu; 1124241982Sneel 1125241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1126241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1127241982Sneel 1128241982Sneel vcpu = &vm->vcpu[vcpuid]; 1129241982Sneel 1130241982Sneel if (vcpu->nmi_pending == 0) 1131241982Sneel panic("vm_nmi_clear: inconsistent nmi_pending state"); 1132241982Sneel 1133241982Sneel vcpu->nmi_pending = 0; 1134241982Sneel vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 1135241982Sneel} 1136241982Sneel 1137241982Sneelint 1138221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 1139221828Sgrehan{ 1140221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1141221828Sgrehan return (EINVAL); 1142221828Sgrehan 1143221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 1144221828Sgrehan return (EINVAL); 1145221828Sgrehan 1146221828Sgrehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 1147221828Sgrehan} 1148221828Sgrehan 1149221828Sgrehanint 1150221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val) 1151221828Sgrehan{ 1152221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1153221828Sgrehan return (EINVAL); 1154221828Sgrehan 1155221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 1156221828Sgrehan return (EINVAL); 1157221828Sgrehan 1158221828Sgrehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 1159221828Sgrehan} 1160221828Sgrehan 1161221828Sgrehanuint64_t * 1162221828Sgrehanvm_guest_msrs(struct vm *vm, int cpu) 1163221828Sgrehan{ 1164221828Sgrehan return (vm->vcpu[cpu].guest_msrs); 1165221828Sgrehan} 1166221828Sgrehan 1167221828Sgrehanstruct vlapic * 1168221828Sgrehanvm_lapic(struct vm *vm, int cpu) 1169221828Sgrehan{ 1170221828Sgrehan return (vm->vcpu[cpu].vlapic); 1171221828Sgrehan} 1172221828Sgrehan 1173261088Sjhbstruct vioapic * 1174261088Sjhbvm_ioapic(struct vm *vm) 1175261088Sjhb{ 1176261088Sjhb 1177261088Sjhb return (vm->vioapic); 1178261088Sjhb} 1179261088Sjhb 1180261088Sjhbstruct vhpet * 1181261088Sjhbvm_hpet(struct vm *vm) 1182261088Sjhb{ 1183261088Sjhb 1184261088Sjhb return (vm->vhpet); 1185261088Sjhb} 1186261088Sjhb 1187221828Sgrehanboolean_t 1188221828Sgrehanvmm_is_pptdev(int bus, int slot, int func) 1189221828Sgrehan{ 1190246188Sneel int found, i, n; 1191246188Sneel int b, s, f; 1192221828Sgrehan char *val, *cp, *cp2; 1193221828Sgrehan 1194221828Sgrehan /* 1195246188Sneel * XXX 1196246188Sneel * The length of an environment variable is limited to 128 bytes which 1197246188Sneel * puts an upper limit on the number of passthru devices that may be 1198246188Sneel * specified using a single environment variable. 1199246188Sneel * 1200246188Sneel * Work around this by scanning multiple environment variable 1201246188Sneel * names instead of a single one - yuck! 1202221828Sgrehan */ 1203246188Sneel const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 1204246188Sneel 1205246188Sneel /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 1206221828Sgrehan found = 0; 1207246188Sneel for (i = 0; names[i] != NULL && !found; i++) { 1208246188Sneel cp = val = getenv(names[i]); 1209246188Sneel while (cp != NULL && *cp != '\0') { 1210246188Sneel if ((cp2 = strchr(cp, ' ')) != NULL) 1211246188Sneel *cp2 = '\0'; 1212221828Sgrehan 1213246188Sneel n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 1214246188Sneel if (n == 3 && bus == b && slot == s && func == f) { 1215246188Sneel found = 1; 1216246188Sneel break; 1217246188Sneel } 1218221828Sgrehan 1219246188Sneel if (cp2 != NULL) 1220246188Sneel *cp2++ = ' '; 1221221828Sgrehan 1222246188Sneel cp = cp2; 1223246188Sneel } 1224246188Sneel freeenv(val); 1225221828Sgrehan } 1226221828Sgrehan return (found); 1227221828Sgrehan} 1228221828Sgrehan 1229221828Sgrehanvoid * 1230221828Sgrehanvm_iommu_domain(struct vm *vm) 1231221828Sgrehan{ 1232221828Sgrehan 1233221828Sgrehan return (vm->iommu); 1234221828Sgrehan} 1235221828Sgrehan 1236241489Sneelint 1237256072Sneelvcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1238221828Sgrehan{ 1239241489Sneel int error; 1240221828Sgrehan struct vcpu *vcpu; 1241221828Sgrehan 1242221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1243221828Sgrehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 1244221828Sgrehan 1245221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1246221828Sgrehan 1247241489Sneel vcpu_lock(vcpu); 1248256072Sneel error = vcpu_set_state_locked(vcpu, newstate); 1249241489Sneel vcpu_unlock(vcpu); 1250241489Sneel 1251241489Sneel return (error); 1252221828Sgrehan} 1253221828Sgrehan 1254241489Sneelenum vcpu_state 1255249879Sgrehanvcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 1256221828Sgrehan{ 1257221828Sgrehan struct vcpu *vcpu; 1258241489Sneel enum vcpu_state state; 1259221828Sgrehan 1260221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1261221828Sgrehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 1262221828Sgrehan 1263221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1264221828Sgrehan 1265241489Sneel vcpu_lock(vcpu); 1266241489Sneel state = vcpu->state; 1267249879Sgrehan if (hostcpu != NULL) 1268249879Sgrehan *hostcpu = vcpu->hostcpu; 1269241489Sneel vcpu_unlock(vcpu); 1270221828Sgrehan 1271241489Sneel return (state); 1272221828Sgrehan} 1273221828Sgrehan 1274221828Sgrehanvoid 1275221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid) 1276221828Sgrehan{ 1277221828Sgrehan 1278221828Sgrehan if (vcpuid >= 0 && vcpuid < VM_MAXCPU) 1279223621Sgrehan CPU_SET(vcpuid, &vm->active_cpus); 1280221828Sgrehan} 1281221828Sgrehan 1282223621Sgrehancpuset_t 1283221828Sgrehanvm_active_cpus(struct vm *vm) 1284221828Sgrehan{ 1285221828Sgrehan 1286221828Sgrehan return (vm->active_cpus); 1287221828Sgrehan} 1288221828Sgrehan 1289221828Sgrehanvoid * 1290221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid) 1291221828Sgrehan{ 1292221828Sgrehan 1293221828Sgrehan return (vm->vcpu[vcpuid].stats); 1294221828Sgrehan} 1295240922Sneel 1296240922Sneelint 1297240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 1298240922Sneel{ 1299240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1300240922Sneel return (EINVAL); 1301240922Sneel 1302240922Sneel *state = vm->vcpu[vcpuid].x2apic_state; 1303240922Sneel 1304240922Sneel return (0); 1305240922Sneel} 1306240922Sneel 1307240922Sneelint 1308240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1309240922Sneel{ 1310240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1311240922Sneel return (EINVAL); 1312240922Sneel 1313248392Sneel if (state >= X2APIC_STATE_LAST) 1314240922Sneel return (EINVAL); 1315240922Sneel 1316240922Sneel vm->vcpu[vcpuid].x2apic_state = state; 1317240922Sneel 1318240943Sneel vlapic_set_x2apic_state(vm, vcpuid, state); 1319240943Sneel 1320240922Sneel return (0); 1321240922Sneel} 1322241489Sneel 1323241489Sneelvoid 1324241489Sneelvm_interrupt_hostcpu(struct vm *vm, int vcpuid) 1325241489Sneel{ 1326241489Sneel int hostcpu; 1327241489Sneel struct vcpu *vcpu; 1328241489Sneel 1329241489Sneel vcpu = &vm->vcpu[vcpuid]; 1330241489Sneel 1331242065Sneel vcpu_lock(vcpu); 1332241489Sneel hostcpu = vcpu->hostcpu; 1333242065Sneel if (hostcpu == NOCPU) { 1334256072Sneel if (vcpu->state == VCPU_SLEEPING) 1335242065Sneel wakeup_one(vcpu); 1336242065Sneel } else { 1337242065Sneel if (vcpu->state != VCPU_RUNNING) 1338242065Sneel panic("invalid vcpu state %d", vcpu->state); 1339242065Sneel if (hostcpu != curcpu) 1340242065Sneel ipi_cpu(hostcpu, vmm_ipinum); 1341242065Sneel } 1342242065Sneel vcpu_unlock(vcpu); 1343241489Sneel} 1344256072Sneel 1345256072Sneelstruct vmspace * 1346256072Sneelvm_get_vmspace(struct vm *vm) 1347256072Sneel{ 1348256072Sneel 1349256072Sneel return (vm->vmspace); 1350256072Sneel} 1351261088Sjhb 1352261088Sjhbint 1353261088Sjhbvm_apicid2vcpuid(struct vm *vm, int apicid) 1354261088Sjhb{ 1355261088Sjhb /* 1356261088Sjhb * XXX apic id is assumed to be numerically identical to vcpu id 1357261088Sjhb */ 1358261088Sjhb return (apicid); 1359261088Sjhb} 1360