vmm.c revision 270070
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: stable/10/sys/amd64/vmm/vmm.c 270070 2014-08-17 00:52:07Z grehan $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm.c 270070 2014-08-17 00:52:07Z grehan $"); 31221828Sgrehan 32221828Sgrehan#include <sys/param.h> 33234695Sgrehan#include <sys/systm.h> 34221828Sgrehan#include <sys/kernel.h> 35221828Sgrehan#include <sys/module.h> 36221828Sgrehan#include <sys/sysctl.h> 37221828Sgrehan#include <sys/malloc.h> 38221828Sgrehan#include <sys/pcpu.h> 39221828Sgrehan#include <sys/lock.h> 40221828Sgrehan#include <sys/mutex.h> 41221828Sgrehan#include <sys/proc.h> 42256072Sneel#include <sys/rwlock.h> 43221828Sgrehan#include <sys/sched.h> 44221828Sgrehan#include <sys/smp.h> 45221828Sgrehan#include <sys/systm.h> 46221828Sgrehan 47221828Sgrehan#include <vm/vm.h> 48256072Sneel#include <vm/vm_object.h> 49256072Sneel#include <vm/vm_page.h> 50256072Sneel#include <vm/pmap.h> 51256072Sneel#include <vm/vm_map.h> 52256072Sneel#include <vm/vm_extern.h> 53256072Sneel#include <vm/vm_param.h> 54221828Sgrehan 55261275Sjhb#include <machine/cpu.h> 56221828Sgrehan#include <machine/vm.h> 57221828Sgrehan#include <machine/pcb.h> 58241489Sneel#include <machine/smp.h> 59262350Sjhb#include <x86/psl.h> 60221914Sjhb#include <x86/apicreg.h> 61256072Sneel#include <machine/vmparam.h> 62221828Sgrehan 63221828Sgrehan#include <machine/vmm.h> 64261088Sjhb#include <machine/vmm_dev.h> 65268976Sjhb#include <machine/vmm_instruction_emul.h> 66261088Sjhb 67268976Sjhb#include "vmm_ioport.h" 68256072Sneel#include "vmm_ktr.h" 69242275Sneel#include "vmm_host.h" 70221828Sgrehan#include "vmm_mem.h" 71221828Sgrehan#include "vmm_util.h" 72268891Sjhb#include "vatpic.h" 73268891Sjhb#include "vatpit.h" 74261088Sjhb#include "vhpet.h" 75261088Sjhb#include "vioapic.h" 76221828Sgrehan#include "vlapic.h" 77221828Sgrehan#include "vmm_msr.h" 78221828Sgrehan#include "vmm_ipi.h" 79221828Sgrehan#include "vmm_stat.h" 80242065Sneel#include "vmm_lapic.h" 81221828Sgrehan 82221828Sgrehan#include "io/ppt.h" 83221828Sgrehan#include "io/iommu.h" 84221828Sgrehan 85221828Sgrehanstruct vlapic; 86221828Sgrehan 87221828Sgrehanstruct vcpu { 88221828Sgrehan int flags; 89241489Sneel enum vcpu_state state; 90241489Sneel struct mtx mtx; 91221828Sgrehan int hostcpu; /* host cpuid this vcpu last ran on */ 92221828Sgrehan uint64_t guest_msrs[VMM_MSR_NUM]; 93221828Sgrehan struct vlapic *vlapic; 94221828Sgrehan int vcpuid; 95234695Sgrehan struct savefpu *guestfpu; /* guest fpu state */ 96267427Sjhb uint64_t guest_xcr0; 97221828Sgrehan void *stats; 98240894Sneel struct vm_exit exitinfo; 99240922Sneel enum x2apic_state x2apic_state; 100241982Sneel int nmi_pending; 101268891Sjhb int extint_pending; 102267427Sjhb struct vm_exception exception; 103267427Sjhb int exception_pending; 104221828Sgrehan}; 105221828Sgrehan 106242065Sneel#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 107242065Sneel#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 108242065Sneel#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 109256072Sneel#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 110241489Sneel 111256072Sneelstruct mem_seg { 112256072Sneel vm_paddr_t gpa; 113256072Sneel size_t len; 114256072Sneel boolean_t wired; 115256072Sneel vm_object_t object; 116256072Sneel}; 117221828Sgrehan#define VM_MAX_MEMORY_SEGMENTS 2 118221828Sgrehan 119221828Sgrehanstruct vm { 120221828Sgrehan void *cookie; /* processor-specific data */ 121221828Sgrehan void *iommu; /* iommu-specific data */ 122261088Sjhb struct vhpet *vhpet; /* virtual HPET */ 123261088Sjhb struct vioapic *vioapic; /* virtual ioapic */ 124268891Sjhb struct vatpic *vatpic; /* virtual atpic */ 125268891Sjhb struct vatpit *vatpit; /* virtual atpit */ 126256072Sneel struct vmspace *vmspace; /* guest's address space */ 127221828Sgrehan struct vcpu vcpu[VM_MAXCPU]; 128221828Sgrehan int num_mem_segs; 129256072Sneel struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; 130221828Sgrehan char name[VM_MAX_NAMELEN]; 131221828Sgrehan 132221828Sgrehan /* 133223621Sgrehan * Set of active vcpus. 134221828Sgrehan * An active vcpu is one that has been started implicitly (BSP) or 135221828Sgrehan * explicitly (AP) by sending it a startup ipi. 136221828Sgrehan */ 137266339Sjhb volatile cpuset_t active_cpus; 138266339Sjhb 139266339Sjhb struct mtx rendezvous_mtx; 140266339Sjhb cpuset_t rendezvous_req_cpus; 141266339Sjhb cpuset_t rendezvous_done_cpus; 142266339Sjhb void *rendezvous_arg; 143266339Sjhb vm_rendezvous_func_t rendezvous_func; 144268935Sjhb 145268935Sjhb int suspend; 146268935Sjhb volatile cpuset_t suspended_cpus; 147268935Sjhb 148268935Sjhb volatile cpuset_t halted_cpus; 149221828Sgrehan}; 150221828Sgrehan 151249396Sneelstatic int vmm_initialized; 152249396Sneel 153221828Sgrehanstatic struct vmm_ops *ops; 154266339Sjhb#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) 155221828Sgrehan#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 156261275Sjhb#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) 157221828Sgrehan 158256072Sneel#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 159268935Sjhb#define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \ 160268935Sjhb (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO) 161221828Sgrehan#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 162256072Sneel#define VMSPACE_ALLOC(min, max) \ 163256072Sneel (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 164256072Sneel#define VMSPACE_FREE(vmspace) \ 165256072Sneel (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 166221828Sgrehan#define VMGETREG(vmi, vcpu, num, retval) \ 167221828Sgrehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 168221828Sgrehan#define VMSETREG(vmi, vcpu, num, val) \ 169221828Sgrehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 170221828Sgrehan#define VMGETDESC(vmi, vcpu, num, desc) \ 171221828Sgrehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 172221828Sgrehan#define VMSETDESC(vmi, vcpu, num, desc) \ 173221828Sgrehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 174221828Sgrehan#define VMGETCAP(vmi, vcpu, num, retval) \ 175221828Sgrehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 176221828Sgrehan#define VMSETCAP(vmi, vcpu, num, val) \ 177221828Sgrehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 178266339Sjhb#define VLAPIC_INIT(vmi, vcpu) \ 179266339Sjhb (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL) 180266339Sjhb#define VLAPIC_CLEANUP(vmi, vlapic) \ 181266339Sjhb (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL) 182221828Sgrehan 183245021Sneel#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 184245021Sneel#define fpu_stop_emulating() clts() 185221828Sgrehan 186221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm"); 187221828SgrehanCTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 188221828Sgrehan 189221828Sgrehan/* statistics */ 190248389Sneelstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 191221828Sgrehan 192266339SjhbSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 193266339Sjhb 194268935Sjhb/* 195268935Sjhb * Halt the guest if all vcpus are executing a HLT instruction with 196268935Sjhb * interrupts disabled. 197268935Sjhb */ 198268935Sjhbstatic int halt_detection_enabled = 1; 199268935SjhbTUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled); 200268935SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 201268935Sjhb &halt_detection_enabled, 0, 202268935Sjhb "Halt VM if all vcpus execute HLT with interrupts disabled"); 203268935Sjhb 204266339Sjhbstatic int vmm_ipinum; 205266339SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 206266339Sjhb "IPI vector used for vcpu notifications"); 207266339Sjhb 208221828Sgrehanstatic void 209266339Sjhbvcpu_cleanup(struct vm *vm, int i) 210221828Sgrehan{ 211266339Sjhb struct vcpu *vcpu = &vm->vcpu[i]; 212266339Sjhb 213266339Sjhb VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); 214234695Sgrehan vmm_stat_free(vcpu->stats); 215234695Sgrehan fpu_save_area_free(vcpu->guestfpu); 216221828Sgrehan} 217221828Sgrehan 218221828Sgrehanstatic void 219221828Sgrehanvcpu_init(struct vm *vm, uint32_t vcpu_id) 220221828Sgrehan{ 221221828Sgrehan struct vcpu *vcpu; 222221828Sgrehan 223221828Sgrehan vcpu = &vm->vcpu[vcpu_id]; 224221828Sgrehan 225241489Sneel vcpu_lock_init(vcpu); 226241489Sneel vcpu->hostcpu = NOCPU; 227221828Sgrehan vcpu->vcpuid = vcpu_id; 228266339Sjhb vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); 229267447Sjhb vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); 230267427Sjhb vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 231234695Sgrehan vcpu->guestfpu = fpu_save_area_alloc(); 232234695Sgrehan fpu_save_area_reset(vcpu->guestfpu); 233221828Sgrehan vcpu->stats = vmm_stat_alloc(); 234221828Sgrehan} 235221828Sgrehan 236240894Sneelstruct vm_exit * 237240894Sneelvm_exitinfo(struct vm *vm, int cpuid) 238240894Sneel{ 239240894Sneel struct vcpu *vcpu; 240240894Sneel 241240894Sneel if (cpuid < 0 || cpuid >= VM_MAXCPU) 242240894Sneel panic("vm_exitinfo: invalid cpuid %d", cpuid); 243240894Sneel 244240894Sneel vcpu = &vm->vcpu[cpuid]; 245240894Sneel 246240894Sneel return (&vcpu->exitinfo); 247240894Sneel} 248240894Sneel 249261275Sjhbstatic void 250261275Sjhbvmm_resume(void) 251261275Sjhb{ 252261275Sjhb VMM_RESUME(); 253261275Sjhb} 254261275Sjhb 255221828Sgrehanstatic int 256221828Sgrehanvmm_init(void) 257221828Sgrehan{ 258221828Sgrehan int error; 259221828Sgrehan 260242275Sneel vmm_host_state_init(); 261221828Sgrehan 262266339Sjhb vmm_ipinum = vmm_ipi_alloc(); 263266339Sjhb if (vmm_ipinum == 0) 264266339Sjhb vmm_ipinum = IPI_AST; 265266339Sjhb 266221828Sgrehan error = vmm_mem_init(); 267221828Sgrehan if (error) 268221828Sgrehan return (error); 269221828Sgrehan 270221828Sgrehan if (vmm_is_intel()) 271221828Sgrehan ops = &vmm_ops_intel; 272221828Sgrehan else if (vmm_is_amd()) 273221828Sgrehan ops = &vmm_ops_amd; 274221828Sgrehan else 275221828Sgrehan return (ENXIO); 276221828Sgrehan 277221828Sgrehan vmm_msr_init(); 278261275Sjhb vmm_resume_p = vmm_resume; 279221828Sgrehan 280266339Sjhb return (VMM_INIT(vmm_ipinum)); 281221828Sgrehan} 282221828Sgrehan 283221828Sgrehanstatic int 284221828Sgrehanvmm_handler(module_t mod, int what, void *arg) 285221828Sgrehan{ 286221828Sgrehan int error; 287221828Sgrehan 288221828Sgrehan switch (what) { 289221828Sgrehan case MOD_LOAD: 290221828Sgrehan vmmdev_init(); 291267070Sjhb if (ppt_avail_devices() > 0) 292267070Sjhb iommu_init(); 293221828Sgrehan error = vmm_init(); 294249396Sneel if (error == 0) 295249396Sneel vmm_initialized = 1; 296221828Sgrehan break; 297221828Sgrehan case MOD_UNLOAD: 298241454Sneel error = vmmdev_cleanup(); 299241454Sneel if (error == 0) { 300261275Sjhb vmm_resume_p = NULL; 301241454Sneel iommu_cleanup(); 302266339Sjhb if (vmm_ipinum != IPI_AST) 303266339Sjhb vmm_ipi_free(vmm_ipinum); 304241454Sneel error = VMM_CLEANUP(); 305253854Sgrehan /* 306253854Sgrehan * Something bad happened - prevent new 307253854Sgrehan * VMs from being created 308253854Sgrehan */ 309253854Sgrehan if (error) 310253854Sgrehan vmm_initialized = 0; 311241454Sneel } 312221828Sgrehan break; 313221828Sgrehan default: 314221828Sgrehan error = 0; 315221828Sgrehan break; 316221828Sgrehan } 317221828Sgrehan return (error); 318221828Sgrehan} 319221828Sgrehan 320221828Sgrehanstatic moduledata_t vmm_kmod = { 321221828Sgrehan "vmm", 322221828Sgrehan vmm_handler, 323221828Sgrehan NULL 324221828Sgrehan}; 325221828Sgrehan 326221828Sgrehan/* 327245704Sneel * vmm initialization has the following dependencies: 328245704Sneel * 329245704Sneel * - iommu initialization must happen after the pci passthru driver has had 330245704Sneel * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 331245704Sneel * 332245704Sneel * - VT-x initialization requires smp_rendezvous() and therefore must happen 333245704Sneel * after SMP is fully functional (after SI_SUB_SMP). 334221828Sgrehan */ 335245704SneelDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 336221828SgrehanMODULE_VERSION(vmm, 1); 337221828Sgrehan 338249396Sneelint 339249396Sneelvm_create(const char *name, struct vm **retvm) 340221828Sgrehan{ 341221828Sgrehan int i; 342221828Sgrehan struct vm *vm; 343256072Sneel struct vmspace *vmspace; 344221828Sgrehan 345249396Sneel /* 346249396Sneel * If vmm.ko could not be successfully initialized then don't attempt 347249396Sneel * to create the virtual machine. 348249396Sneel */ 349249396Sneel if (!vmm_initialized) 350249396Sneel return (ENXIO); 351249396Sneel 352221828Sgrehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 353249396Sneel return (EINVAL); 354221828Sgrehan 355256072Sneel vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); 356256072Sneel if (vmspace == NULL) 357256072Sneel return (ENOMEM); 358256072Sneel 359221828Sgrehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 360221828Sgrehan strcpy(vm->name, name); 361266339Sjhb vm->vmspace = vmspace; 362266339Sjhb mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 363256072Sneel vm->cookie = VMINIT(vm, vmspace_pmap(vmspace)); 364261088Sjhb vm->vioapic = vioapic_init(vm); 365261088Sjhb vm->vhpet = vhpet_init(vm); 366268891Sjhb vm->vatpic = vatpic_init(vm); 367268891Sjhb vm->vatpit = vatpit_init(vm); 368221828Sgrehan 369221828Sgrehan for (i = 0; i < VM_MAXCPU; i++) { 370221828Sgrehan vcpu_init(vm, i); 371221828Sgrehan guest_msrs_init(vm, i); 372221828Sgrehan } 373221828Sgrehan 374249396Sneel *retvm = vm; 375249396Sneel return (0); 376221828Sgrehan} 377221828Sgrehan 378241178Sneelstatic void 379256072Sneelvm_free_mem_seg(struct vm *vm, struct mem_seg *seg) 380241178Sneel{ 381241178Sneel 382256072Sneel if (seg->object != NULL) 383256072Sneel vmm_mem_free(vm->vmspace, seg->gpa, seg->len); 384241362Sneel 385256072Sneel bzero(seg, sizeof(*seg)); 386241178Sneel} 387241178Sneel 388221828Sgrehanvoid 389221828Sgrehanvm_destroy(struct vm *vm) 390221828Sgrehan{ 391221828Sgrehan int i; 392221828Sgrehan 393221828Sgrehan ppt_unassign_all(vm); 394221828Sgrehan 395256072Sneel if (vm->iommu != NULL) 396256072Sneel iommu_destroy_domain(vm->iommu); 397256072Sneel 398268891Sjhb vatpit_cleanup(vm->vatpit); 399261088Sjhb vhpet_cleanup(vm->vhpet); 400268891Sjhb vatpic_cleanup(vm->vatpic); 401261088Sjhb vioapic_cleanup(vm->vioapic); 402261088Sjhb 403221828Sgrehan for (i = 0; i < vm->num_mem_segs; i++) 404241178Sneel vm_free_mem_seg(vm, &vm->mem_segs[i]); 405221828Sgrehan 406241178Sneel vm->num_mem_segs = 0; 407241178Sneel 408221828Sgrehan for (i = 0; i < VM_MAXCPU; i++) 409266339Sjhb vcpu_cleanup(vm, i); 410221828Sgrehan 411256072Sneel VMSPACE_FREE(vm->vmspace); 412221828Sgrehan 413221828Sgrehan VMCLEANUP(vm->cookie); 414221828Sgrehan 415221828Sgrehan free(vm, M_VM); 416221828Sgrehan} 417221828Sgrehan 418221828Sgrehanconst char * 419221828Sgrehanvm_name(struct vm *vm) 420221828Sgrehan{ 421221828Sgrehan return (vm->name); 422221828Sgrehan} 423221828Sgrehan 424221828Sgrehanint 425221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 426221828Sgrehan{ 427256072Sneel vm_object_t obj; 428221828Sgrehan 429256072Sneel if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 430256072Sneel return (ENOMEM); 431256072Sneel else 432256072Sneel return (0); 433221828Sgrehan} 434221828Sgrehan 435221828Sgrehanint 436221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 437221828Sgrehan{ 438221828Sgrehan 439256072Sneel vmm_mmio_free(vm->vmspace, gpa, len); 440256072Sneel return (0); 441221828Sgrehan} 442221828Sgrehan 443256072Sneelboolean_t 444256072Sneelvm_mem_allocated(struct vm *vm, vm_paddr_t gpa) 445241041Sneel{ 446241041Sneel int i; 447241041Sneel vm_paddr_t gpabase, gpalimit; 448241041Sneel 449241041Sneel for (i = 0; i < vm->num_mem_segs; i++) { 450241041Sneel gpabase = vm->mem_segs[i].gpa; 451241041Sneel gpalimit = gpabase + vm->mem_segs[i].len; 452241041Sneel if (gpa >= gpabase && gpa < gpalimit) 453256072Sneel return (TRUE); /* 'gpa' is regular memory */ 454241041Sneel } 455241041Sneel 456256072Sneel if (ppt_is_mmio(vm, gpa)) 457256072Sneel return (TRUE); /* 'gpa' is pci passthru mmio */ 458256072Sneel 459256072Sneel return (FALSE); 460241041Sneel} 461241041Sneel 462221828Sgrehanint 463241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 464221828Sgrehan{ 465256072Sneel int available, allocated; 466256072Sneel struct mem_seg *seg; 467256072Sneel vm_object_t object; 468256072Sneel vm_paddr_t g; 469221828Sgrehan 470241041Sneel if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 471241041Sneel return (EINVAL); 472221828Sgrehan 473241041Sneel available = allocated = 0; 474241041Sneel g = gpa; 475241041Sneel while (g < gpa + len) { 476256072Sneel if (vm_mem_allocated(vm, g)) 477256072Sneel allocated++; 478256072Sneel else 479241041Sneel available++; 480241041Sneel 481241041Sneel g += PAGE_SIZE; 482241041Sneel } 483241041Sneel 484221828Sgrehan /* 485241041Sneel * If there are some allocated and some available pages in the address 486241041Sneel * range then it is an error. 487221828Sgrehan */ 488241041Sneel if (allocated && available) 489241041Sneel return (EINVAL); 490221828Sgrehan 491241041Sneel /* 492241041Sneel * If the entire address range being requested has already been 493241041Sneel * allocated then there isn't anything more to do. 494241041Sneel */ 495241041Sneel if (allocated && available == 0) 496241041Sneel return (0); 497241041Sneel 498221828Sgrehan if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 499221828Sgrehan return (E2BIG); 500221828Sgrehan 501241178Sneel seg = &vm->mem_segs[vm->num_mem_segs]; 502221828Sgrehan 503256072Sneel if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) 504256072Sneel return (ENOMEM); 505256072Sneel 506241178Sneel seg->gpa = gpa; 507256072Sneel seg->len = len; 508256072Sneel seg->object = object; 509256072Sneel seg->wired = FALSE; 510241178Sneel 511256072Sneel vm->num_mem_segs++; 512256072Sneel 513256072Sneel return (0); 514256072Sneel} 515256072Sneel 516256072Sneelstatic void 517256072Sneelvm_gpa_unwire(struct vm *vm) 518256072Sneel{ 519256072Sneel int i, rv; 520256072Sneel struct mem_seg *seg; 521256072Sneel 522256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 523256072Sneel seg = &vm->mem_segs[i]; 524256072Sneel if (!seg->wired) 525256072Sneel continue; 526256072Sneel 527256072Sneel rv = vm_map_unwire(&vm->vmspace->vm_map, 528256072Sneel seg->gpa, seg->gpa + seg->len, 529256072Sneel VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 530256072Sneel KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " 531256072Sneel "%#lx/%ld could not be unwired: %d", 532256072Sneel vm_name(vm), seg->gpa, seg->len, rv)); 533256072Sneel 534256072Sneel seg->wired = FALSE; 535256072Sneel } 536256072Sneel} 537256072Sneel 538256072Sneelstatic int 539256072Sneelvm_gpa_wire(struct vm *vm) 540256072Sneel{ 541256072Sneel int i, rv; 542256072Sneel struct mem_seg *seg; 543256072Sneel 544256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 545256072Sneel seg = &vm->mem_segs[i]; 546256072Sneel if (seg->wired) 547256072Sneel continue; 548256072Sneel 549256072Sneel /* XXX rlimits? */ 550256072Sneel rv = vm_map_wire(&vm->vmspace->vm_map, 551256072Sneel seg->gpa, seg->gpa + seg->len, 552256072Sneel VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 553256072Sneel if (rv != KERN_SUCCESS) 554241178Sneel break; 555241178Sneel 556256072Sneel seg->wired = TRUE; 557256072Sneel } 558256072Sneel 559256072Sneel if (i < vm->num_mem_segs) { 560241362Sneel /* 561256072Sneel * Undo the wiring before returning an error. 562241362Sneel */ 563256072Sneel vm_gpa_unwire(vm); 564256072Sneel return (EAGAIN); 565256072Sneel } 566241178Sneel 567256072Sneel return (0); 568256072Sneel} 569256072Sneel 570256072Sneelstatic void 571256072Sneelvm_iommu_modify(struct vm *vm, boolean_t map) 572256072Sneel{ 573256072Sneel int i, sz; 574256072Sneel vm_paddr_t gpa, hpa; 575256072Sneel struct mem_seg *seg; 576256072Sneel void *vp, *cookie, *host_domain; 577256072Sneel 578256072Sneel sz = PAGE_SIZE; 579256072Sneel host_domain = iommu_host_domain(); 580256072Sneel 581256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 582256072Sneel seg = &vm->mem_segs[i]; 583256072Sneel KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", 584256072Sneel vm_name(vm), seg->gpa, seg->len)); 585256072Sneel 586256072Sneel gpa = seg->gpa; 587256072Sneel while (gpa < seg->gpa + seg->len) { 588256072Sneel vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, 589256072Sneel &cookie); 590256072Sneel KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 591256072Sneel vm_name(vm), gpa)); 592256072Sneel 593256072Sneel vm_gpa_release(cookie); 594256072Sneel 595256072Sneel hpa = DMAP_TO_PHYS((uintptr_t)vp); 596256072Sneel if (map) { 597256072Sneel iommu_create_mapping(vm->iommu, gpa, hpa, sz); 598256072Sneel iommu_remove_mapping(host_domain, hpa, sz); 599256072Sneel } else { 600256072Sneel iommu_remove_mapping(vm->iommu, gpa, sz); 601256072Sneel iommu_create_mapping(host_domain, hpa, hpa, sz); 602256072Sneel } 603256072Sneel 604256072Sneel gpa += PAGE_SIZE; 605256072Sneel } 606241178Sneel } 607241178Sneel 608256072Sneel /* 609256072Sneel * Invalidate the cached translations associated with the domain 610256072Sneel * from which pages were removed. 611256072Sneel */ 612256072Sneel if (map) 613256072Sneel iommu_invalidate_tlb(host_domain); 614256072Sneel else 615256072Sneel iommu_invalidate_tlb(vm->iommu); 616256072Sneel} 617256072Sneel 618256072Sneel#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 619256072Sneel#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 620256072Sneel 621256072Sneelint 622256072Sneelvm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 623256072Sneel{ 624256072Sneel int error; 625256072Sneel 626256072Sneel error = ppt_unassign_device(vm, bus, slot, func); 627256072Sneel if (error) 628221828Sgrehan return (error); 629256072Sneel 630267070Sjhb if (ppt_assigned_devices(vm) == 0) { 631256072Sneel vm_iommu_unmap(vm); 632256072Sneel vm_gpa_unwire(vm); 633221828Sgrehan } 634256072Sneel return (0); 635256072Sneel} 636221828Sgrehan 637256072Sneelint 638256072Sneelvm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 639256072Sneel{ 640256072Sneel int error; 641256072Sneel vm_paddr_t maxaddr; 642256072Sneel 643241362Sneel /* 644256072Sneel * Virtual machines with pci passthru devices get special treatment: 645256072Sneel * - the guest physical memory is wired 646256072Sneel * - the iommu is programmed to do the 'gpa' to 'hpa' translation 647256072Sneel * 648256072Sneel * We need to do this before the first pci passthru device is attached. 649241362Sneel */ 650267070Sjhb if (ppt_assigned_devices(vm) == 0) { 651256072Sneel KASSERT(vm->iommu == NULL, 652256072Sneel ("vm_assign_pptdev: iommu must be NULL")); 653256072Sneel maxaddr = vmm_mem_maxaddr(); 654256072Sneel vm->iommu = iommu_create_domain(maxaddr); 655241362Sneel 656256072Sneel error = vm_gpa_wire(vm); 657256072Sneel if (error) 658256072Sneel return (error); 659241041Sneel 660256072Sneel vm_iommu_map(vm); 661256072Sneel } 662256072Sneel 663256072Sneel error = ppt_assign_device(vm, bus, slot, func); 664256072Sneel return (error); 665221828Sgrehan} 666221828Sgrehan 667256072Sneelvoid * 668256072Sneelvm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 669256072Sneel void **cookie) 670221828Sgrehan{ 671256072Sneel int count, pageoff; 672256072Sneel vm_page_t m; 673221828Sgrehan 674256072Sneel pageoff = gpa & PAGE_MASK; 675256072Sneel if (len > PAGE_SIZE - pageoff) 676256072Sneel panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 677241148Sneel 678256072Sneel count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 679256072Sneel trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 680256072Sneel 681256072Sneel if (count == 1) { 682256072Sneel *cookie = m; 683256072Sneel return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 684256072Sneel } else { 685256072Sneel *cookie = NULL; 686256072Sneel return (NULL); 687256072Sneel } 688221828Sgrehan} 689221828Sgrehan 690256072Sneelvoid 691256072Sneelvm_gpa_release(void *cookie) 692256072Sneel{ 693256072Sneel vm_page_t m = cookie; 694256072Sneel 695256072Sneel vm_page_lock(m); 696256072Sneel vm_page_unhold(m); 697256072Sneel vm_page_unlock(m); 698256072Sneel} 699256072Sneel 700221828Sgrehanint 701221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 702221828Sgrehan struct vm_memory_segment *seg) 703221828Sgrehan{ 704221828Sgrehan int i; 705221828Sgrehan 706221828Sgrehan for (i = 0; i < vm->num_mem_segs; i++) { 707221828Sgrehan if (gpabase == vm->mem_segs[i].gpa) { 708256072Sneel seg->gpa = vm->mem_segs[i].gpa; 709256072Sneel seg->len = vm->mem_segs[i].len; 710256072Sneel seg->wired = vm->mem_segs[i].wired; 711221828Sgrehan return (0); 712221828Sgrehan } 713221828Sgrehan } 714221828Sgrehan return (-1); 715221828Sgrehan} 716221828Sgrehan 717221828Sgrehanint 718256072Sneelvm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 719256072Sneel vm_offset_t *offset, struct vm_object **object) 720256072Sneel{ 721256072Sneel int i; 722256072Sneel size_t seg_len; 723256072Sneel vm_paddr_t seg_gpa; 724256072Sneel vm_object_t seg_obj; 725256072Sneel 726256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 727256072Sneel if ((seg_obj = vm->mem_segs[i].object) == NULL) 728256072Sneel continue; 729256072Sneel 730256072Sneel seg_gpa = vm->mem_segs[i].gpa; 731256072Sneel seg_len = vm->mem_segs[i].len; 732256072Sneel 733256072Sneel if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { 734256072Sneel *offset = gpa - seg_gpa; 735256072Sneel *object = seg_obj; 736256072Sneel vm_object_reference(seg_obj); 737256072Sneel return (0); 738256072Sneel } 739256072Sneel } 740256072Sneel 741256072Sneel return (EINVAL); 742256072Sneel} 743256072Sneel 744256072Sneelint 745221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 746221828Sgrehan{ 747221828Sgrehan 748221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 749221828Sgrehan return (EINVAL); 750221828Sgrehan 751221828Sgrehan if (reg >= VM_REG_LAST) 752221828Sgrehan return (EINVAL); 753221828Sgrehan 754221828Sgrehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 755221828Sgrehan} 756221828Sgrehan 757221828Sgrehanint 758221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 759221828Sgrehan{ 760221828Sgrehan 761221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 762221828Sgrehan return (EINVAL); 763221828Sgrehan 764221828Sgrehan if (reg >= VM_REG_LAST) 765221828Sgrehan return (EINVAL); 766221828Sgrehan 767221828Sgrehan return (VMSETREG(vm->cookie, vcpu, reg, val)); 768221828Sgrehan} 769221828Sgrehan 770221828Sgrehanstatic boolean_t 771221828Sgrehanis_descriptor_table(int reg) 772221828Sgrehan{ 773221828Sgrehan 774221828Sgrehan switch (reg) { 775221828Sgrehan case VM_REG_GUEST_IDTR: 776221828Sgrehan case VM_REG_GUEST_GDTR: 777221828Sgrehan return (TRUE); 778221828Sgrehan default: 779221828Sgrehan return (FALSE); 780221828Sgrehan } 781221828Sgrehan} 782221828Sgrehan 783221828Sgrehanstatic boolean_t 784221828Sgrehanis_segment_register(int reg) 785221828Sgrehan{ 786221828Sgrehan 787221828Sgrehan switch (reg) { 788221828Sgrehan case VM_REG_GUEST_ES: 789221828Sgrehan case VM_REG_GUEST_CS: 790221828Sgrehan case VM_REG_GUEST_SS: 791221828Sgrehan case VM_REG_GUEST_DS: 792221828Sgrehan case VM_REG_GUEST_FS: 793221828Sgrehan case VM_REG_GUEST_GS: 794221828Sgrehan case VM_REG_GUEST_TR: 795221828Sgrehan case VM_REG_GUEST_LDTR: 796221828Sgrehan return (TRUE); 797221828Sgrehan default: 798221828Sgrehan return (FALSE); 799221828Sgrehan } 800221828Sgrehan} 801221828Sgrehan 802221828Sgrehanint 803221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg, 804221828Sgrehan struct seg_desc *desc) 805221828Sgrehan{ 806221828Sgrehan 807221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 808221828Sgrehan return (EINVAL); 809221828Sgrehan 810221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 811221828Sgrehan return (EINVAL); 812221828Sgrehan 813221828Sgrehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 814221828Sgrehan} 815221828Sgrehan 816221828Sgrehanint 817221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg, 818221828Sgrehan struct seg_desc *desc) 819221828Sgrehan{ 820221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 821221828Sgrehan return (EINVAL); 822221828Sgrehan 823221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 824221828Sgrehan return (EINVAL); 825221828Sgrehan 826221828Sgrehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 827221828Sgrehan} 828221828Sgrehan 829221828Sgrehanstatic void 830221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu) 831221828Sgrehan{ 832221828Sgrehan 833234695Sgrehan /* flush host state to the pcb */ 834234695Sgrehan fpuexit(curthread); 835242122Sneel 836242122Sneel /* restore guest FPU state */ 837221828Sgrehan fpu_stop_emulating(); 838234695Sgrehan fpurestore(vcpu->guestfpu); 839242122Sneel 840267427Sjhb /* restore guest XCR0 if XSAVE is enabled in the host */ 841267427Sjhb if (rcr4() & CR4_XSAVE) 842267427Sjhb load_xcr(0, vcpu->guest_xcr0); 843267427Sjhb 844242122Sneel /* 845242122Sneel * The FPU is now "dirty" with the guest's state so turn on emulation 846242122Sneel * to trap any access to the FPU by the host. 847242122Sneel */ 848242122Sneel fpu_start_emulating(); 849221828Sgrehan} 850221828Sgrehan 851221828Sgrehanstatic void 852221828Sgrehansave_guest_fpustate(struct vcpu *vcpu) 853221828Sgrehan{ 854221828Sgrehan 855242122Sneel if ((rcr0() & CR0_TS) == 0) 856242122Sneel panic("fpu emulation not enabled in host!"); 857242122Sneel 858267427Sjhb /* save guest XCR0 and restore host XCR0 */ 859267427Sjhb if (rcr4() & CR4_XSAVE) { 860267427Sjhb vcpu->guest_xcr0 = rxcr(0); 861267427Sjhb load_xcr(0, vmm_get_host_xcr0()); 862267427Sjhb } 863267427Sjhb 864242122Sneel /* save guest FPU state */ 865242122Sneel fpu_stop_emulating(); 866234695Sgrehan fpusave(vcpu->guestfpu); 867221828Sgrehan fpu_start_emulating(); 868221828Sgrehan} 869221828Sgrehan 870248389Sneelstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 871242065Sneel 872256072Sneelstatic int 873266393Sjhbvcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 874266393Sjhb bool from_idle) 875256072Sneel{ 876256072Sneel int error; 877256072Sneel 878256072Sneel vcpu_assert_locked(vcpu); 879256072Sneel 880256072Sneel /* 881266393Sjhb * State transitions from the vmmdev_ioctl() must always begin from 882266393Sjhb * the VCPU_IDLE state. This guarantees that there is only a single 883266393Sjhb * ioctl() operating on a vcpu at any point. 884266393Sjhb */ 885266393Sjhb if (from_idle) { 886266393Sjhb while (vcpu->state != VCPU_IDLE) 887266393Sjhb msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 888266393Sjhb } else { 889266393Sjhb KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 890266393Sjhb "vcpu idle state")); 891266393Sjhb } 892266393Sjhb 893266393Sjhb if (vcpu->state == VCPU_RUNNING) { 894266393Sjhb KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 895266393Sjhb "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 896266393Sjhb } else { 897266393Sjhb KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 898266393Sjhb "vcpu that is not running", vcpu->hostcpu)); 899266393Sjhb } 900266393Sjhb 901266393Sjhb /* 902256072Sneel * The following state transitions are allowed: 903256072Sneel * IDLE -> FROZEN -> IDLE 904256072Sneel * FROZEN -> RUNNING -> FROZEN 905256072Sneel * FROZEN -> SLEEPING -> FROZEN 906256072Sneel */ 907256072Sneel switch (vcpu->state) { 908256072Sneel case VCPU_IDLE: 909256072Sneel case VCPU_RUNNING: 910256072Sneel case VCPU_SLEEPING: 911256072Sneel error = (newstate != VCPU_FROZEN); 912256072Sneel break; 913256072Sneel case VCPU_FROZEN: 914256072Sneel error = (newstate == VCPU_FROZEN); 915256072Sneel break; 916256072Sneel default: 917256072Sneel error = 1; 918256072Sneel break; 919256072Sneel } 920256072Sneel 921266393Sjhb if (error) 922266393Sjhb return (EBUSY); 923266393Sjhb 924266393Sjhb vcpu->state = newstate; 925266393Sjhb if (newstate == VCPU_RUNNING) 926266393Sjhb vcpu->hostcpu = curcpu; 927256072Sneel else 928266393Sjhb vcpu->hostcpu = NOCPU; 929256072Sneel 930266393Sjhb if (newstate == VCPU_IDLE) 931266393Sjhb wakeup(&vcpu->state); 932266393Sjhb 933266393Sjhb return (0); 934256072Sneel} 935256072Sneel 936256072Sneelstatic void 937256072Sneelvcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 938256072Sneel{ 939256072Sneel int error; 940256072Sneel 941266393Sjhb if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) 942256072Sneel panic("Error %d setting state to %d\n", error, newstate); 943256072Sneel} 944256072Sneel 945256072Sneelstatic void 946256072Sneelvcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 947256072Sneel{ 948256072Sneel int error; 949256072Sneel 950266393Sjhb if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 951256072Sneel panic("Error %d setting state to %d", error, newstate); 952256072Sneel} 953256072Sneel 954266339Sjhbstatic void 955266339Sjhbvm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func) 956266339Sjhb{ 957266339Sjhb 958266339Sjhb KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked")); 959266339Sjhb 960266339Sjhb /* 961266339Sjhb * Update 'rendezvous_func' and execute a write memory barrier to 962266339Sjhb * ensure that it is visible across all host cpus. This is not needed 963266339Sjhb * for correctness but it does ensure that all the vcpus will notice 964266339Sjhb * that the rendezvous is requested immediately. 965266339Sjhb */ 966266339Sjhb vm->rendezvous_func = func; 967266339Sjhb wmb(); 968266339Sjhb} 969266339Sjhb 970266339Sjhb#define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ 971266339Sjhb do { \ 972266339Sjhb if (vcpuid >= 0) \ 973266339Sjhb VCPU_CTR0(vm, vcpuid, fmt); \ 974266339Sjhb else \ 975266339Sjhb VM_CTR0(vm, fmt); \ 976266339Sjhb } while (0) 977266339Sjhb 978266339Sjhbstatic void 979266339Sjhbvm_handle_rendezvous(struct vm *vm, int vcpuid) 980266339Sjhb{ 981266339Sjhb 982266339Sjhb KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 983266339Sjhb ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); 984266339Sjhb 985266339Sjhb mtx_lock(&vm->rendezvous_mtx); 986266339Sjhb while (vm->rendezvous_func != NULL) { 987266339Sjhb /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 988266339Sjhb CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); 989266339Sjhb 990266339Sjhb if (vcpuid != -1 && 991266339Sjhb CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 992266339Sjhb !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 993266339Sjhb VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); 994266339Sjhb (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); 995266339Sjhb CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 996266339Sjhb } 997266339Sjhb if (CPU_CMP(&vm->rendezvous_req_cpus, 998266339Sjhb &vm->rendezvous_done_cpus) == 0) { 999266339Sjhb VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); 1000266339Sjhb vm_set_rendezvous_func(vm, NULL); 1001266339Sjhb wakeup(&vm->rendezvous_func); 1002266339Sjhb break; 1003266339Sjhb } 1004266339Sjhb RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); 1005266339Sjhb mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 1006266339Sjhb "vmrndv", 0); 1007266339Sjhb } 1008266339Sjhb mtx_unlock(&vm->rendezvous_mtx); 1009266339Sjhb} 1010266339Sjhb 1011256072Sneel/* 1012256072Sneel * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1013256072Sneel */ 1014256072Sneelstatic int 1015262350Sjhbvm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) 1016256072Sneel{ 1017256072Sneel struct vcpu *vcpu; 1018268935Sjhb const char *wmesg; 1019268935Sjhb int t, vcpu_halted, vm_halted; 1020256072Sneel 1021268935Sjhb KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 1022268935Sjhb 1023256072Sneel vcpu = &vm->vcpu[vcpuid]; 1024268935Sjhb vcpu_halted = 0; 1025268935Sjhb vm_halted = 0; 1026256072Sneel 1027256072Sneel vcpu_lock(vcpu); 1028268935Sjhb while (1) { 1029268935Sjhb /* 1030268935Sjhb * Do a final check for pending NMI or interrupts before 1031268935Sjhb * really putting this thread to sleep. Also check for 1032268935Sjhb * software events that would cause this vcpu to wakeup. 1033268935Sjhb * 1034268935Sjhb * These interrupts/events could have happened after the 1035268935Sjhb * vcpu returned from VMRUN() and before it acquired the 1036268935Sjhb * vcpu lock above. 1037268935Sjhb */ 1038268935Sjhb if (vm->rendezvous_func != NULL || vm->suspend) 1039268935Sjhb break; 1040268935Sjhb if (vm_nmi_pending(vm, vcpuid)) 1041268935Sjhb break; 1042268935Sjhb if (!intr_disabled) { 1043268935Sjhb if (vm_extint_pending(vm, vcpuid) || 1044268935Sjhb vlapic_pending_intr(vcpu->vlapic, NULL)) { 1045268935Sjhb break; 1046268935Sjhb } 1047268935Sjhb } 1048256072Sneel 1049268935Sjhb /* 1050268935Sjhb * Some Linux guests implement "halt" by having all vcpus 1051268935Sjhb * execute HLT with interrupts disabled. 'halted_cpus' keeps 1052268935Sjhb * track of the vcpus that have entered this state. When all 1053268935Sjhb * vcpus enter the halted state the virtual machine is halted. 1054268935Sjhb */ 1055268935Sjhb if (intr_disabled) { 1056268935Sjhb wmesg = "vmhalt"; 1057268935Sjhb VCPU_CTR0(vm, vcpuid, "Halted"); 1058268935Sjhb if (!vcpu_halted && halt_detection_enabled) { 1059268935Sjhb vcpu_halted = 1; 1060268935Sjhb CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1061268935Sjhb } 1062268935Sjhb if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1063268935Sjhb vm_halted = 1; 1064268935Sjhb break; 1065268935Sjhb } 1066268935Sjhb } else { 1067268935Sjhb wmesg = "vmidle"; 1068268935Sjhb } 1069268935Sjhb 1070256072Sneel t = ticks; 1071256072Sneel vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1072268935Sjhb msleep_spin(vcpu, &vcpu->mtx, wmesg, 0); 1073256072Sneel vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1074256072Sneel vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 1075256072Sneel } 1076268935Sjhb 1077268935Sjhb if (vcpu_halted) 1078268935Sjhb CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1079268935Sjhb 1080256072Sneel vcpu_unlock(vcpu); 1081256072Sneel 1082268935Sjhb if (vm_halted) 1083268935Sjhb vm_suspend(vm, VM_SUSPEND_HALT); 1084266339Sjhb 1085256072Sneel return (0); 1086256072Sneel} 1087256072Sneel 1088256072Sneelstatic int 1089262350Sjhbvm_handle_paging(struct vm *vm, int vcpuid, bool *retu) 1090256072Sneel{ 1091256072Sneel int rv, ftype; 1092256072Sneel struct vm_map *map; 1093256072Sneel struct vcpu *vcpu; 1094256072Sneel struct vm_exit *vme; 1095256072Sneel 1096256072Sneel vcpu = &vm->vcpu[vcpuid]; 1097256072Sneel vme = &vcpu->exitinfo; 1098256072Sneel 1099256072Sneel ftype = vme->u.paging.fault_type; 1100256072Sneel KASSERT(ftype == VM_PROT_READ || 1101256072Sneel ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1102256072Sneel ("vm_handle_paging: invalid fault_type %d", ftype)); 1103256072Sneel 1104256072Sneel if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1105256072Sneel rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1106256072Sneel vme->u.paging.gpa, ftype); 1107256072Sneel if (rv == 0) 1108256072Sneel goto done; 1109256072Sneel } 1110256072Sneel 1111256072Sneel map = &vm->vmspace->vm_map; 1112256072Sneel rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 1113256072Sneel 1114261088Sjhb VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 1115261088Sjhb "ftype = %d", rv, vme->u.paging.gpa, ftype); 1116256072Sneel 1117256072Sneel if (rv != KERN_SUCCESS) 1118256072Sneel return (EFAULT); 1119256072Sneeldone: 1120256072Sneel /* restart execution at the faulting instruction */ 1121256072Sneel vme->inst_length = 0; 1122256072Sneel 1123256072Sneel return (0); 1124256072Sneel} 1125256072Sneel 1126256072Sneelstatic int 1127262350Sjhbvm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) 1128256072Sneel{ 1129256072Sneel struct vie *vie; 1130256072Sneel struct vcpu *vcpu; 1131256072Sneel struct vm_exit *vme; 1132268976Sjhb uint64_t gla, gpa; 1133268976Sjhb struct vm_guest_paging *paging; 1134261088Sjhb mem_region_read_t mread; 1135261088Sjhb mem_region_write_t mwrite; 1136268976Sjhb int error; 1137256072Sneel 1138256072Sneel vcpu = &vm->vcpu[vcpuid]; 1139256072Sneel vme = &vcpu->exitinfo; 1140256072Sneel 1141256072Sneel gla = vme->u.inst_emul.gla; 1142256072Sneel gpa = vme->u.inst_emul.gpa; 1143256072Sneel vie = &vme->u.inst_emul.vie; 1144268976Sjhb paging = &vme->u.inst_emul.paging; 1145256072Sneel 1146256072Sneel vie_init(vie); 1147256072Sneel 1148256072Sneel /* Fetch, decode and emulate the faulting instruction */ 1149268976Sjhb error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip, 1150268976Sjhb vme->inst_length, vie); 1151268976Sjhb if (error == 1) 1152268976Sjhb return (0); /* Resume guest to handle page fault */ 1153268976Sjhb else if (error == -1) 1154256072Sneel return (EFAULT); 1155268976Sjhb else if (error != 0) 1156268976Sjhb panic("%s: vmm_fetch_instruction error %d", __func__, error); 1157256072Sneel 1158268976Sjhb if (vmm_decode_instruction(vm, vcpuid, gla, paging->cpu_mode, vie) != 0) 1159256072Sneel return (EFAULT); 1160256072Sneel 1161261088Sjhb /* return to userland unless this is an in-kernel emulated device */ 1162261088Sjhb if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1163261088Sjhb mread = lapic_mmio_read; 1164261088Sjhb mwrite = lapic_mmio_write; 1165261088Sjhb } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1166261088Sjhb mread = vioapic_mmio_read; 1167261088Sjhb mwrite = vioapic_mmio_write; 1168261088Sjhb } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 1169261088Sjhb mread = vhpet_mmio_read; 1170261088Sjhb mwrite = vhpet_mmio_write; 1171261088Sjhb } else { 1172262350Sjhb *retu = true; 1173256072Sneel return (0); 1174256072Sneel } 1175256072Sneel 1176262350Sjhb error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, 1177262350Sjhb retu); 1178256072Sneel 1179256072Sneel return (error); 1180256072Sneel} 1181256072Sneel 1182268935Sjhbstatic int 1183268935Sjhbvm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) 1184268935Sjhb{ 1185268935Sjhb int i, done; 1186268935Sjhb struct vcpu *vcpu; 1187268935Sjhb 1188268935Sjhb done = 0; 1189268935Sjhb vcpu = &vm->vcpu[vcpuid]; 1190268935Sjhb 1191268935Sjhb CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); 1192268935Sjhb 1193268935Sjhb /* 1194268935Sjhb * Wait until all 'active_cpus' have suspended themselves. 1195268935Sjhb * 1196268935Sjhb * Since a VM may be suspended at any time including when one or 1197268935Sjhb * more vcpus are doing a rendezvous we need to call the rendezvous 1198268935Sjhb * handler while we are waiting to prevent a deadlock. 1199268935Sjhb */ 1200268935Sjhb vcpu_lock(vcpu); 1201268935Sjhb while (1) { 1202268935Sjhb if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 1203268935Sjhb VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); 1204268935Sjhb break; 1205268935Sjhb } 1206268935Sjhb 1207268935Sjhb if (vm->rendezvous_func == NULL) { 1208268935Sjhb VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); 1209268935Sjhb vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1210268935Sjhb msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1211268935Sjhb vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1212268935Sjhb } else { 1213268935Sjhb VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); 1214268935Sjhb vcpu_unlock(vcpu); 1215268935Sjhb vm_handle_rendezvous(vm, vcpuid); 1216268935Sjhb vcpu_lock(vcpu); 1217268935Sjhb } 1218268935Sjhb } 1219268935Sjhb vcpu_unlock(vcpu); 1220268935Sjhb 1221268935Sjhb /* 1222268935Sjhb * Wakeup the other sleeping vcpus and return to userspace. 1223268935Sjhb */ 1224268935Sjhb for (i = 0; i < VM_MAXCPU; i++) { 1225268935Sjhb if (CPU_ISSET(i, &vm->suspended_cpus)) { 1226268935Sjhb vcpu_notify_event(vm, i, false); 1227268935Sjhb } 1228268935Sjhb } 1229268935Sjhb 1230268935Sjhb *retu = true; 1231268935Sjhb return (0); 1232268935Sjhb} 1233268935Sjhb 1234221828Sgrehanint 1235268935Sjhbvm_suspend(struct vm *vm, enum vm_suspend_how how) 1236268935Sjhb{ 1237268935Sjhb int i; 1238268935Sjhb 1239268935Sjhb if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1240268935Sjhb return (EINVAL); 1241268935Sjhb 1242268935Sjhb if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1243268935Sjhb VM_CTR2(vm, "virtual machine already suspended %d/%d", 1244268935Sjhb vm->suspend, how); 1245268935Sjhb return (EALREADY); 1246268935Sjhb } 1247268935Sjhb 1248268935Sjhb VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1249268935Sjhb 1250268935Sjhb /* 1251268935Sjhb * Notify all active vcpus that they are now suspended. 1252268935Sjhb */ 1253268935Sjhb for (i = 0; i < VM_MAXCPU; i++) { 1254268935Sjhb if (CPU_ISSET(i, &vm->active_cpus)) 1255268935Sjhb vcpu_notify_event(vm, i, false); 1256268935Sjhb } 1257268935Sjhb 1258268935Sjhb return (0); 1259268935Sjhb} 1260268935Sjhb 1261268935Sjhbvoid 1262268935Sjhbvm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) 1263268935Sjhb{ 1264268935Sjhb struct vm_exit *vmexit; 1265268935Sjhb 1266268935Sjhb KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1267268935Sjhb ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1268268935Sjhb 1269268935Sjhb vmexit = vm_exitinfo(vm, vcpuid); 1270268935Sjhb vmexit->rip = rip; 1271268935Sjhb vmexit->inst_length = 0; 1272268935Sjhb vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1273268935Sjhb vmexit->u.suspended.how = vm->suspend; 1274268935Sjhb} 1275268935Sjhb 1276268935Sjhbint 1277221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun) 1278221828Sgrehan{ 1279256072Sneel int error, vcpuid; 1280221828Sgrehan struct vcpu *vcpu; 1281221828Sgrehan struct pcb *pcb; 1282242065Sneel uint64_t tscval, rip; 1283242065Sneel struct vm_exit *vme; 1284262350Sjhb bool retu, intr_disabled; 1285256072Sneel pmap_t pmap; 1286268935Sjhb void *rptr, *sptr; 1287221828Sgrehan 1288221828Sgrehan vcpuid = vmrun->cpuid; 1289221828Sgrehan 1290221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1291221828Sgrehan return (EINVAL); 1292221828Sgrehan 1293270070Sgrehan if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1294270070Sgrehan return (EINVAL); 1295270070Sgrehan 1296270070Sgrehan if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1297270070Sgrehan return (EINVAL); 1298270070Sgrehan 1299268935Sjhb rptr = &vm->rendezvous_func; 1300268935Sjhb sptr = &vm->suspend; 1301256072Sneel pmap = vmspace_pmap(vm->vmspace); 1302221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1303256072Sneel vme = &vcpu->exitinfo; 1304242065Sneel rip = vmrun->rip; 1305242065Sneelrestart: 1306221828Sgrehan critical_enter(); 1307221828Sgrehan 1308256072Sneel KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1309256072Sneel ("vm_run: absurd pm_active")); 1310256072Sneel 1311221828Sgrehan tscval = rdtsc(); 1312221828Sgrehan 1313221828Sgrehan pcb = PCPU_GET(curpcb); 1314221914Sjhb set_pcb_flags(pcb, PCB_FULL_IRET); 1315221828Sgrehan 1316234695Sgrehan restore_guest_msrs(vm, vcpuid); 1317221828Sgrehan restore_guest_fpustate(vcpu); 1318241489Sneel 1319256072Sneel vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1320268935Sjhb error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr); 1321256072Sneel vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1322241489Sneel 1323221828Sgrehan save_guest_fpustate(vcpu); 1324221828Sgrehan restore_host_msrs(vm, vcpuid); 1325221828Sgrehan 1326221828Sgrehan vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1327221828Sgrehan 1328221828Sgrehan critical_exit(); 1329221828Sgrehan 1330256072Sneel if (error == 0) { 1331262350Sjhb retu = false; 1332256072Sneel switch (vme->exitcode) { 1333268935Sjhb case VM_EXITCODE_SUSPENDED: 1334268935Sjhb error = vm_handle_suspend(vm, vcpuid, &retu); 1335268935Sjhb break; 1336266339Sjhb case VM_EXITCODE_IOAPIC_EOI: 1337266339Sjhb vioapic_process_eoi(vm, vcpuid, 1338266339Sjhb vme->u.ioapic_eoi.vector); 1339266339Sjhb break; 1340266339Sjhb case VM_EXITCODE_RENDEZVOUS: 1341266339Sjhb vm_handle_rendezvous(vm, vcpuid); 1342266339Sjhb error = 0; 1343266339Sjhb break; 1344256072Sneel case VM_EXITCODE_HLT: 1345262350Sjhb intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 1346262350Sjhb error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); 1347256072Sneel break; 1348256072Sneel case VM_EXITCODE_PAGING: 1349256072Sneel error = vm_handle_paging(vm, vcpuid, &retu); 1350256072Sneel break; 1351256072Sneel case VM_EXITCODE_INST_EMUL: 1352256072Sneel error = vm_handle_inst_emul(vm, vcpuid, &retu); 1353256072Sneel break; 1354268976Sjhb case VM_EXITCODE_INOUT: 1355268976Sjhb case VM_EXITCODE_INOUT_STR: 1356268976Sjhb error = vm_handle_inout(vm, vcpuid, vme, &retu); 1357268976Sjhb break; 1358256072Sneel default: 1359262350Sjhb retu = true; /* handled in userland */ 1360256072Sneel break; 1361242065Sneel } 1362256072Sneel } 1363242065Sneel 1364262350Sjhb if (error == 0 && retu == false) { 1365242065Sneel rip = vme->rip + vme->inst_length; 1366242065Sneel goto restart; 1367242065Sneel } 1368242065Sneel 1369256072Sneel /* copy the exit information */ 1370256072Sneel bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1371221828Sgrehan return (error); 1372221828Sgrehan} 1373221828Sgrehan 1374221828Sgrehanint 1375267427Sjhbvm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) 1376221828Sgrehan{ 1377267427Sjhb struct vcpu *vcpu; 1378267427Sjhb 1379221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1380221828Sgrehan return (EINVAL); 1381221828Sgrehan 1382267427Sjhb if (exception->vector < 0 || exception->vector >= 32) 1383221828Sgrehan return (EINVAL); 1384221828Sgrehan 1385267427Sjhb vcpu = &vm->vcpu[vcpuid]; 1386221828Sgrehan 1387267427Sjhb if (vcpu->exception_pending) { 1388267427Sjhb VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " 1389267427Sjhb "pending exception %d", exception->vector, 1390267427Sjhb vcpu->exception.vector); 1391267427Sjhb return (EBUSY); 1392267427Sjhb } 1393267427Sjhb 1394267427Sjhb vcpu->exception_pending = 1; 1395267427Sjhb vcpu->exception = *exception; 1396267427Sjhb VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector); 1397267427Sjhb return (0); 1398221828Sgrehan} 1399221828Sgrehan 1400267427Sjhbint 1401267427Sjhbvm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception) 1402267427Sjhb{ 1403267427Sjhb struct vcpu *vcpu; 1404267427Sjhb int pending; 1405267427Sjhb 1406267427Sjhb KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); 1407267427Sjhb 1408267427Sjhb vcpu = &vm->vcpu[vcpuid]; 1409267427Sjhb pending = vcpu->exception_pending; 1410267427Sjhb if (pending) { 1411267427Sjhb vcpu->exception_pending = 0; 1412267427Sjhb *exception = vcpu->exception; 1413267427Sjhb VCPU_CTR1(vm, vcpuid, "Exception %d delivered", 1414267427Sjhb exception->vector); 1415267427Sjhb } 1416267427Sjhb return (pending); 1417267427Sjhb} 1418267427Sjhb 1419267427Sjhbstatic void 1420267427Sjhbvm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception) 1421267427Sjhb{ 1422267427Sjhb struct vm_exit *vmexit; 1423267427Sjhb int error; 1424267427Sjhb 1425267427Sjhb error = vm_inject_exception(vm, vcpuid, exception); 1426267427Sjhb KASSERT(error == 0, ("vm_inject_exception error %d", error)); 1427267427Sjhb 1428267427Sjhb /* 1429267427Sjhb * A fault-like exception allows the instruction to be restarted 1430267427Sjhb * after the exception handler returns. 1431267427Sjhb * 1432267427Sjhb * By setting the inst_length to 0 we ensure that the instruction 1433267427Sjhb * pointer remains at the faulting instruction. 1434267427Sjhb */ 1435267427Sjhb vmexit = vm_exitinfo(vm, vcpuid); 1436267427Sjhb vmexit->inst_length = 0; 1437267427Sjhb} 1438267427Sjhb 1439267427Sjhbvoid 1440268976Sjhbvm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2) 1441268976Sjhb{ 1442268976Sjhb struct vm_exception pf = { 1443268976Sjhb .vector = IDT_PF, 1444268976Sjhb .error_code_valid = 1, 1445268976Sjhb .error_code = error_code 1446268976Sjhb }; 1447268976Sjhb int error; 1448268976Sjhb 1449268976Sjhb VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", 1450268976Sjhb error_code, cr2); 1451268976Sjhb 1452268976Sjhb error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); 1453268976Sjhb KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 1454268976Sjhb 1455268976Sjhb vm_inject_fault(vm, vcpuid, &pf); 1456268976Sjhb} 1457268976Sjhb 1458268976Sjhbvoid 1459267427Sjhbvm_inject_gp(struct vm *vm, int vcpuid) 1460267427Sjhb{ 1461267427Sjhb struct vm_exception gpf = { 1462267427Sjhb .vector = IDT_GP, 1463267427Sjhb .error_code_valid = 1, 1464267427Sjhb .error_code = 0 1465267427Sjhb }; 1466267427Sjhb 1467267427Sjhb vm_inject_fault(vm, vcpuid, &gpf); 1468267427Sjhb} 1469267427Sjhb 1470267427Sjhbvoid 1471267427Sjhbvm_inject_ud(struct vm *vm, int vcpuid) 1472267427Sjhb{ 1473267427Sjhb struct vm_exception udf = { 1474267427Sjhb .vector = IDT_UD, 1475267427Sjhb .error_code_valid = 0 1476267427Sjhb }; 1477267427Sjhb 1478267427Sjhb vm_inject_fault(vm, vcpuid, &udf); 1479267427Sjhb} 1480267427Sjhb 1481248389Sneelstatic VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 1482241982Sneel 1483221828Sgrehanint 1484241982Sneelvm_inject_nmi(struct vm *vm, int vcpuid) 1485221828Sgrehan{ 1486241982Sneel struct vcpu *vcpu; 1487221828Sgrehan 1488241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1489221828Sgrehan return (EINVAL); 1490221828Sgrehan 1491241982Sneel vcpu = &vm->vcpu[vcpuid]; 1492241982Sneel 1493241982Sneel vcpu->nmi_pending = 1; 1494266339Sjhb vcpu_notify_event(vm, vcpuid, false); 1495241982Sneel return (0); 1496221828Sgrehan} 1497221828Sgrehan 1498221828Sgrehanint 1499241982Sneelvm_nmi_pending(struct vm *vm, int vcpuid) 1500241982Sneel{ 1501241982Sneel struct vcpu *vcpu; 1502241982Sneel 1503241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1504241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1505241982Sneel 1506241982Sneel vcpu = &vm->vcpu[vcpuid]; 1507241982Sneel 1508241982Sneel return (vcpu->nmi_pending); 1509241982Sneel} 1510241982Sneel 1511241982Sneelvoid 1512241982Sneelvm_nmi_clear(struct vm *vm, int vcpuid) 1513241982Sneel{ 1514241982Sneel struct vcpu *vcpu; 1515241982Sneel 1516241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1517241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1518241982Sneel 1519241982Sneel vcpu = &vm->vcpu[vcpuid]; 1520241982Sneel 1521241982Sneel if (vcpu->nmi_pending == 0) 1522241982Sneel panic("vm_nmi_clear: inconsistent nmi_pending state"); 1523241982Sneel 1524241982Sneel vcpu->nmi_pending = 0; 1525241982Sneel vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 1526241982Sneel} 1527241982Sneel 1528268891Sjhbstatic VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 1529268891Sjhb 1530241982Sneelint 1531268891Sjhbvm_inject_extint(struct vm *vm, int vcpuid) 1532268891Sjhb{ 1533268891Sjhb struct vcpu *vcpu; 1534268891Sjhb 1535268891Sjhb if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1536268891Sjhb return (EINVAL); 1537268891Sjhb 1538268891Sjhb vcpu = &vm->vcpu[vcpuid]; 1539268891Sjhb 1540268891Sjhb vcpu->extint_pending = 1; 1541268891Sjhb vcpu_notify_event(vm, vcpuid, false); 1542268891Sjhb return (0); 1543268891Sjhb} 1544268891Sjhb 1545268891Sjhbint 1546268891Sjhbvm_extint_pending(struct vm *vm, int vcpuid) 1547268891Sjhb{ 1548268891Sjhb struct vcpu *vcpu; 1549268891Sjhb 1550268891Sjhb if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1551268891Sjhb panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 1552268891Sjhb 1553268891Sjhb vcpu = &vm->vcpu[vcpuid]; 1554268891Sjhb 1555268891Sjhb return (vcpu->extint_pending); 1556268891Sjhb} 1557268891Sjhb 1558268891Sjhbvoid 1559268891Sjhbvm_extint_clear(struct vm *vm, int vcpuid) 1560268891Sjhb{ 1561268891Sjhb struct vcpu *vcpu; 1562268891Sjhb 1563268891Sjhb if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1564268891Sjhb panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 1565268891Sjhb 1566268891Sjhb vcpu = &vm->vcpu[vcpuid]; 1567268891Sjhb 1568268891Sjhb if (vcpu->extint_pending == 0) 1569268891Sjhb panic("vm_extint_clear: inconsistent extint_pending state"); 1570268891Sjhb 1571268891Sjhb vcpu->extint_pending = 0; 1572268891Sjhb vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); 1573268891Sjhb} 1574268891Sjhb 1575268891Sjhbint 1576221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 1577221828Sgrehan{ 1578221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1579221828Sgrehan return (EINVAL); 1580221828Sgrehan 1581221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 1582221828Sgrehan return (EINVAL); 1583221828Sgrehan 1584221828Sgrehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 1585221828Sgrehan} 1586221828Sgrehan 1587221828Sgrehanint 1588221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val) 1589221828Sgrehan{ 1590221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1591221828Sgrehan return (EINVAL); 1592221828Sgrehan 1593221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 1594221828Sgrehan return (EINVAL); 1595221828Sgrehan 1596221828Sgrehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 1597221828Sgrehan} 1598221828Sgrehan 1599221828Sgrehanuint64_t * 1600221828Sgrehanvm_guest_msrs(struct vm *vm, int cpu) 1601221828Sgrehan{ 1602221828Sgrehan return (vm->vcpu[cpu].guest_msrs); 1603221828Sgrehan} 1604221828Sgrehan 1605221828Sgrehanstruct vlapic * 1606221828Sgrehanvm_lapic(struct vm *vm, int cpu) 1607221828Sgrehan{ 1608221828Sgrehan return (vm->vcpu[cpu].vlapic); 1609221828Sgrehan} 1610221828Sgrehan 1611261088Sjhbstruct vioapic * 1612261088Sjhbvm_ioapic(struct vm *vm) 1613261088Sjhb{ 1614261088Sjhb 1615261088Sjhb return (vm->vioapic); 1616261088Sjhb} 1617261088Sjhb 1618261088Sjhbstruct vhpet * 1619261088Sjhbvm_hpet(struct vm *vm) 1620261088Sjhb{ 1621261088Sjhb 1622261088Sjhb return (vm->vhpet); 1623261088Sjhb} 1624261088Sjhb 1625221828Sgrehanboolean_t 1626221828Sgrehanvmm_is_pptdev(int bus, int slot, int func) 1627221828Sgrehan{ 1628246188Sneel int found, i, n; 1629246188Sneel int b, s, f; 1630221828Sgrehan char *val, *cp, *cp2; 1631221828Sgrehan 1632221828Sgrehan /* 1633246188Sneel * XXX 1634246188Sneel * The length of an environment variable is limited to 128 bytes which 1635246188Sneel * puts an upper limit on the number of passthru devices that may be 1636246188Sneel * specified using a single environment variable. 1637246188Sneel * 1638246188Sneel * Work around this by scanning multiple environment variable 1639246188Sneel * names instead of a single one - yuck! 1640221828Sgrehan */ 1641246188Sneel const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 1642246188Sneel 1643246188Sneel /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 1644221828Sgrehan found = 0; 1645246188Sneel for (i = 0; names[i] != NULL && !found; i++) { 1646246188Sneel cp = val = getenv(names[i]); 1647246188Sneel while (cp != NULL && *cp != '\0') { 1648246188Sneel if ((cp2 = strchr(cp, ' ')) != NULL) 1649246188Sneel *cp2 = '\0'; 1650221828Sgrehan 1651246188Sneel n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 1652246188Sneel if (n == 3 && bus == b && slot == s && func == f) { 1653246188Sneel found = 1; 1654246188Sneel break; 1655246188Sneel } 1656221828Sgrehan 1657246188Sneel if (cp2 != NULL) 1658246188Sneel *cp2++ = ' '; 1659221828Sgrehan 1660246188Sneel cp = cp2; 1661246188Sneel } 1662246188Sneel freeenv(val); 1663221828Sgrehan } 1664221828Sgrehan return (found); 1665221828Sgrehan} 1666221828Sgrehan 1667221828Sgrehanvoid * 1668221828Sgrehanvm_iommu_domain(struct vm *vm) 1669221828Sgrehan{ 1670221828Sgrehan 1671221828Sgrehan return (vm->iommu); 1672221828Sgrehan} 1673221828Sgrehan 1674241489Sneelint 1675266393Sjhbvcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, 1676266393Sjhb bool from_idle) 1677221828Sgrehan{ 1678241489Sneel int error; 1679221828Sgrehan struct vcpu *vcpu; 1680221828Sgrehan 1681221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1682221828Sgrehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 1683221828Sgrehan 1684221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1685221828Sgrehan 1686241489Sneel vcpu_lock(vcpu); 1687266393Sjhb error = vcpu_set_state_locked(vcpu, newstate, from_idle); 1688241489Sneel vcpu_unlock(vcpu); 1689241489Sneel 1690241489Sneel return (error); 1691221828Sgrehan} 1692221828Sgrehan 1693241489Sneelenum vcpu_state 1694249879Sgrehanvcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 1695221828Sgrehan{ 1696221828Sgrehan struct vcpu *vcpu; 1697241489Sneel enum vcpu_state state; 1698221828Sgrehan 1699221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1700221828Sgrehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 1701221828Sgrehan 1702221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1703221828Sgrehan 1704241489Sneel vcpu_lock(vcpu); 1705241489Sneel state = vcpu->state; 1706249879Sgrehan if (hostcpu != NULL) 1707249879Sgrehan *hostcpu = vcpu->hostcpu; 1708241489Sneel vcpu_unlock(vcpu); 1709221828Sgrehan 1710241489Sneel return (state); 1711221828Sgrehan} 1712221828Sgrehan 1713270070Sgrehanint 1714221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid) 1715221828Sgrehan{ 1716221828Sgrehan 1717270070Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1718270070Sgrehan return (EINVAL); 1719266339Sjhb 1720270070Sgrehan if (CPU_ISSET(vcpuid, &vm->active_cpus)) 1721270070Sgrehan return (EBUSY); 1722270070Sgrehan 1723266339Sjhb VCPU_CTR0(vm, vcpuid, "activated"); 1724266339Sjhb CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); 1725270070Sgrehan return (0); 1726221828Sgrehan} 1727221828Sgrehan 1728223621Sgrehancpuset_t 1729221828Sgrehanvm_active_cpus(struct vm *vm) 1730221828Sgrehan{ 1731221828Sgrehan 1732221828Sgrehan return (vm->active_cpus); 1733221828Sgrehan} 1734221828Sgrehan 1735270070Sgrehancpuset_t 1736270070Sgrehanvm_suspended_cpus(struct vm *vm) 1737270070Sgrehan{ 1738270070Sgrehan 1739270070Sgrehan return (vm->suspended_cpus); 1740270070Sgrehan} 1741270070Sgrehan 1742221828Sgrehanvoid * 1743221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid) 1744221828Sgrehan{ 1745221828Sgrehan 1746221828Sgrehan return (vm->vcpu[vcpuid].stats); 1747221828Sgrehan} 1748240922Sneel 1749240922Sneelint 1750240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 1751240922Sneel{ 1752240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1753240922Sneel return (EINVAL); 1754240922Sneel 1755240922Sneel *state = vm->vcpu[vcpuid].x2apic_state; 1756240922Sneel 1757240922Sneel return (0); 1758240922Sneel} 1759240922Sneel 1760240922Sneelint 1761240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 1762240922Sneel{ 1763240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1764240922Sneel return (EINVAL); 1765240922Sneel 1766248392Sneel if (state >= X2APIC_STATE_LAST) 1767240922Sneel return (EINVAL); 1768240922Sneel 1769240922Sneel vm->vcpu[vcpuid].x2apic_state = state; 1770240922Sneel 1771240943Sneel vlapic_set_x2apic_state(vm, vcpuid, state); 1772240943Sneel 1773240922Sneel return (0); 1774240922Sneel} 1775241489Sneel 1776262350Sjhb/* 1777262350Sjhb * This function is called to ensure that a vcpu "sees" a pending event 1778262350Sjhb * as soon as possible: 1779262350Sjhb * - If the vcpu thread is sleeping then it is woken up. 1780262350Sjhb * - If the vcpu is running on a different host_cpu then an IPI will be directed 1781262350Sjhb * to the host_cpu to cause the vcpu to trap into the hypervisor. 1782262350Sjhb */ 1783241489Sneelvoid 1784266339Sjhbvcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) 1785241489Sneel{ 1786241489Sneel int hostcpu; 1787241489Sneel struct vcpu *vcpu; 1788241489Sneel 1789241489Sneel vcpu = &vm->vcpu[vcpuid]; 1790241489Sneel 1791242065Sneel vcpu_lock(vcpu); 1792241489Sneel hostcpu = vcpu->hostcpu; 1793266393Sjhb if (vcpu->state == VCPU_RUNNING) { 1794266393Sjhb KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 1795266339Sjhb if (hostcpu != curcpu) { 1796266393Sjhb if (lapic_intr) { 1797266339Sjhb vlapic_post_intr(vcpu->vlapic, hostcpu, 1798266339Sjhb vmm_ipinum); 1799266393Sjhb } else { 1800266339Sjhb ipi_cpu(hostcpu, vmm_ipinum); 1801266393Sjhb } 1802266393Sjhb } else { 1803266393Sjhb /* 1804266393Sjhb * If the 'vcpu' is running on 'curcpu' then it must 1805266393Sjhb * be sending a notification to itself (e.g. SELF_IPI). 1806266393Sjhb * The pending event will be picked up when the vcpu 1807266393Sjhb * transitions back to guest context. 1808266393Sjhb */ 1809266339Sjhb } 1810266393Sjhb } else { 1811266393Sjhb KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 1812266393Sjhb "with hostcpu %d", vcpu->state, hostcpu)); 1813266393Sjhb if (vcpu->state == VCPU_SLEEPING) 1814266393Sjhb wakeup_one(vcpu); 1815242065Sneel } 1816242065Sneel vcpu_unlock(vcpu); 1817241489Sneel} 1818256072Sneel 1819256072Sneelstruct vmspace * 1820256072Sneelvm_get_vmspace(struct vm *vm) 1821256072Sneel{ 1822256072Sneel 1823256072Sneel return (vm->vmspace); 1824256072Sneel} 1825261088Sjhb 1826261088Sjhbint 1827261088Sjhbvm_apicid2vcpuid(struct vm *vm, int apicid) 1828261088Sjhb{ 1829261088Sjhb /* 1830261088Sjhb * XXX apic id is assumed to be numerically identical to vcpu id 1831261088Sjhb */ 1832261088Sjhb return (apicid); 1833261088Sjhb} 1834266339Sjhb 1835266339Sjhbvoid 1836266339Sjhbvm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 1837266339Sjhb vm_rendezvous_func_t func, void *arg) 1838266339Sjhb{ 1839266339Sjhb int i; 1840266339Sjhb 1841266339Sjhb /* 1842266339Sjhb * Enforce that this function is called without any locks 1843266339Sjhb */ 1844266339Sjhb WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 1845266339Sjhb KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 1846266339Sjhb ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); 1847266339Sjhb 1848266339Sjhbrestart: 1849266339Sjhb mtx_lock(&vm->rendezvous_mtx); 1850266339Sjhb if (vm->rendezvous_func != NULL) { 1851266339Sjhb /* 1852266339Sjhb * If a rendezvous is already in progress then we need to 1853266339Sjhb * call the rendezvous handler in case this 'vcpuid' is one 1854266339Sjhb * of the targets of the rendezvous. 1855266339Sjhb */ 1856266339Sjhb RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); 1857266339Sjhb mtx_unlock(&vm->rendezvous_mtx); 1858266339Sjhb vm_handle_rendezvous(vm, vcpuid); 1859266339Sjhb goto restart; 1860266339Sjhb } 1861266339Sjhb KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 1862266339Sjhb "rendezvous is still in progress")); 1863266339Sjhb 1864266339Sjhb RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); 1865266339Sjhb vm->rendezvous_req_cpus = dest; 1866266339Sjhb CPU_ZERO(&vm->rendezvous_done_cpus); 1867266339Sjhb vm->rendezvous_arg = arg; 1868266339Sjhb vm_set_rendezvous_func(vm, func); 1869266339Sjhb mtx_unlock(&vm->rendezvous_mtx); 1870266339Sjhb 1871266339Sjhb /* 1872266339Sjhb * Wake up any sleeping vcpus and trigger a VM-exit in any running 1873266339Sjhb * vcpus so they handle the rendezvous as soon as possible. 1874266339Sjhb */ 1875266339Sjhb for (i = 0; i < VM_MAXCPU; i++) { 1876266339Sjhb if (CPU_ISSET(i, &dest)) 1877266339Sjhb vcpu_notify_event(vm, i, false); 1878266339Sjhb } 1879266339Sjhb 1880266339Sjhb vm_handle_rendezvous(vm, vcpuid); 1881266339Sjhb} 1882268891Sjhb 1883268891Sjhbstruct vatpic * 1884268891Sjhbvm_atpic(struct vm *vm) 1885268891Sjhb{ 1886268891Sjhb return (vm->vatpic); 1887268891Sjhb} 1888268891Sjhb 1889268891Sjhbstruct vatpit * 1890268891Sjhbvm_atpit(struct vm *vm) 1891268891Sjhb{ 1892268891Sjhb return (vm->vatpit); 1893268891Sjhb} 1894268976Sjhb 1895268976Sjhbenum vm_reg_name 1896268976Sjhbvm_segment_name(int seg) 1897268976Sjhb{ 1898268976Sjhb static enum vm_reg_name seg_names[] = { 1899268976Sjhb VM_REG_GUEST_ES, 1900268976Sjhb VM_REG_GUEST_CS, 1901268976Sjhb VM_REG_GUEST_SS, 1902268976Sjhb VM_REG_GUEST_DS, 1903268976Sjhb VM_REG_GUEST_FS, 1904268976Sjhb VM_REG_GUEST_GS 1905268976Sjhb }; 1906268976Sjhb 1907268976Sjhb KASSERT(seg >= 0 && seg < nitems(seg_names), 1908268976Sjhb ("%s: invalid segment encoding %d", __func__, seg)); 1909268976Sjhb return (seg_names[seg]); 1910268976Sjhb} 1911