vmm.c revision 249879
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: head/sys/amd64/vmm/vmm.c 249879 2013-04-25 04:56:43Z grehan $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm.c 249879 2013-04-25 04:56:43Z grehan $"); 31221828Sgrehan 32221828Sgrehan#include <sys/param.h> 33234695Sgrehan#include <sys/systm.h> 34221828Sgrehan#include <sys/kernel.h> 35221828Sgrehan#include <sys/module.h> 36221828Sgrehan#include <sys/sysctl.h> 37221828Sgrehan#include <sys/malloc.h> 38221828Sgrehan#include <sys/pcpu.h> 39221828Sgrehan#include <sys/lock.h> 40221828Sgrehan#include <sys/mutex.h> 41221828Sgrehan#include <sys/proc.h> 42221828Sgrehan#include <sys/sched.h> 43221828Sgrehan#include <sys/smp.h> 44221828Sgrehan#include <sys/systm.h> 45221828Sgrehan 46221828Sgrehan#include <vm/vm.h> 47221828Sgrehan 48221828Sgrehan#include <machine/vm.h> 49221828Sgrehan#include <machine/pcb.h> 50241489Sneel#include <machine/smp.h> 51221914Sjhb#include <x86/apicreg.h> 52221828Sgrehan 53221828Sgrehan#include <machine/vmm.h> 54242275Sneel#include "vmm_host.h" 55221828Sgrehan#include "vmm_mem.h" 56221828Sgrehan#include "vmm_util.h" 57221828Sgrehan#include <machine/vmm_dev.h> 58221828Sgrehan#include "vlapic.h" 59221828Sgrehan#include "vmm_msr.h" 60221828Sgrehan#include "vmm_ipi.h" 61221828Sgrehan#include "vmm_stat.h" 62242065Sneel#include "vmm_lapic.h" 63221828Sgrehan 64221828Sgrehan#include "io/ppt.h" 65221828Sgrehan#include "io/iommu.h" 66221828Sgrehan 67221828Sgrehanstruct vlapic; 68221828Sgrehan 69221828Sgrehanstruct vcpu { 70221828Sgrehan int flags; 71241489Sneel enum vcpu_state state; 72241489Sneel struct mtx mtx; 73221828Sgrehan int hostcpu; /* host cpuid this vcpu last ran on */ 74221828Sgrehan uint64_t guest_msrs[VMM_MSR_NUM]; 75221828Sgrehan struct vlapic *vlapic; 76221828Sgrehan int vcpuid; 77234695Sgrehan struct savefpu *guestfpu; /* guest fpu state */ 78221828Sgrehan void *stats; 79240894Sneel struct vm_exit exitinfo; 80240922Sneel enum x2apic_state x2apic_state; 81241982Sneel int nmi_pending; 82221828Sgrehan}; 83221828Sgrehan 84242065Sneel#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 85242065Sneel#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 86242065Sneel#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 87241489Sneel 88221828Sgrehan#define VM_MAX_MEMORY_SEGMENTS 2 89221828Sgrehan 90221828Sgrehanstruct vm { 91221828Sgrehan void *cookie; /* processor-specific data */ 92221828Sgrehan void *iommu; /* iommu-specific data */ 93221828Sgrehan struct vcpu vcpu[VM_MAXCPU]; 94221828Sgrehan int num_mem_segs; 95221828Sgrehan struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS]; 96221828Sgrehan char name[VM_MAX_NAMELEN]; 97221828Sgrehan 98221828Sgrehan /* 99223621Sgrehan * Set of active vcpus. 100221828Sgrehan * An active vcpu is one that has been started implicitly (BSP) or 101221828Sgrehan * explicitly (AP) by sending it a startup ipi. 102221828Sgrehan */ 103223621Sgrehan cpuset_t active_cpus; 104221828Sgrehan}; 105221828Sgrehan 106249396Sneelstatic int vmm_initialized; 107249396Sneel 108221828Sgrehanstatic struct vmm_ops *ops; 109221828Sgrehan#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) 110221828Sgrehan#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 111221828Sgrehan 112221828Sgrehan#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL) 113240894Sneel#define VMRUN(vmi, vcpu, rip) \ 114240894Sneel (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO) 115221828Sgrehan#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 116241147Sneel#define VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm) \ 117241147Sneel (ops != NULL ? \ 118241147Sneel (*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) : \ 119241147Sneel ENXIO) 120241147Sneel#define VMMMAP_GET(vmi, gpa) \ 121241147Sneel (ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO) 122221828Sgrehan#define VMGETREG(vmi, vcpu, num, retval) \ 123221828Sgrehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 124221828Sgrehan#define VMSETREG(vmi, vcpu, num, val) \ 125221828Sgrehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 126221828Sgrehan#define VMGETDESC(vmi, vcpu, num, desc) \ 127221828Sgrehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 128221828Sgrehan#define VMSETDESC(vmi, vcpu, num, desc) \ 129221828Sgrehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 130221828Sgrehan#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \ 131221828Sgrehan (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO) 132221828Sgrehan#define VMGETCAP(vmi, vcpu, num, retval) \ 133221828Sgrehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 134221828Sgrehan#define VMSETCAP(vmi, vcpu, num, val) \ 135221828Sgrehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 136221828Sgrehan 137245021Sneel#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 138245021Sneel#define fpu_stop_emulating() clts() 139221828Sgrehan 140221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm"); 141221828SgrehanCTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 142221828Sgrehan 143221828Sgrehan/* statistics */ 144248389Sneelstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 145221828Sgrehan 146221828Sgrehanstatic void 147221828Sgrehanvcpu_cleanup(struct vcpu *vcpu) 148221828Sgrehan{ 149221828Sgrehan vlapic_cleanup(vcpu->vlapic); 150234695Sgrehan vmm_stat_free(vcpu->stats); 151234695Sgrehan fpu_save_area_free(vcpu->guestfpu); 152221828Sgrehan} 153221828Sgrehan 154221828Sgrehanstatic void 155221828Sgrehanvcpu_init(struct vm *vm, uint32_t vcpu_id) 156221828Sgrehan{ 157221828Sgrehan struct vcpu *vcpu; 158221828Sgrehan 159221828Sgrehan vcpu = &vm->vcpu[vcpu_id]; 160221828Sgrehan 161241489Sneel vcpu_lock_init(vcpu); 162241489Sneel vcpu->hostcpu = NOCPU; 163221828Sgrehan vcpu->vcpuid = vcpu_id; 164221828Sgrehan vcpu->vlapic = vlapic_init(vm, vcpu_id); 165240943Sneel vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); 166234695Sgrehan vcpu->guestfpu = fpu_save_area_alloc(); 167234695Sgrehan fpu_save_area_reset(vcpu->guestfpu); 168221828Sgrehan vcpu->stats = vmm_stat_alloc(); 169221828Sgrehan} 170221828Sgrehan 171240894Sneelstruct vm_exit * 172240894Sneelvm_exitinfo(struct vm *vm, int cpuid) 173240894Sneel{ 174240894Sneel struct vcpu *vcpu; 175240894Sneel 176240894Sneel if (cpuid < 0 || cpuid >= VM_MAXCPU) 177240894Sneel panic("vm_exitinfo: invalid cpuid %d", cpuid); 178240894Sneel 179240894Sneel vcpu = &vm->vcpu[cpuid]; 180240894Sneel 181240894Sneel return (&vcpu->exitinfo); 182240894Sneel} 183240894Sneel 184221828Sgrehanstatic int 185221828Sgrehanvmm_init(void) 186221828Sgrehan{ 187221828Sgrehan int error; 188221828Sgrehan 189242275Sneel vmm_host_state_init(); 190221828Sgrehan vmm_ipi_init(); 191221828Sgrehan 192221828Sgrehan error = vmm_mem_init(); 193221828Sgrehan if (error) 194221828Sgrehan return (error); 195221828Sgrehan 196221828Sgrehan if (vmm_is_intel()) 197221828Sgrehan ops = &vmm_ops_intel; 198221828Sgrehan else if (vmm_is_amd()) 199221828Sgrehan ops = &vmm_ops_amd; 200221828Sgrehan else 201221828Sgrehan return (ENXIO); 202221828Sgrehan 203221828Sgrehan vmm_msr_init(); 204221828Sgrehan 205221828Sgrehan return (VMM_INIT()); 206221828Sgrehan} 207221828Sgrehan 208221828Sgrehanstatic int 209221828Sgrehanvmm_handler(module_t mod, int what, void *arg) 210221828Sgrehan{ 211221828Sgrehan int error; 212221828Sgrehan 213221828Sgrehan switch (what) { 214221828Sgrehan case MOD_LOAD: 215221828Sgrehan vmmdev_init(); 216221828Sgrehan iommu_init(); 217221828Sgrehan error = vmm_init(); 218249396Sneel if (error == 0) 219249396Sneel vmm_initialized = 1; 220221828Sgrehan break; 221221828Sgrehan case MOD_UNLOAD: 222241454Sneel error = vmmdev_cleanup(); 223241454Sneel if (error == 0) { 224241454Sneel iommu_cleanup(); 225241454Sneel vmm_ipi_cleanup(); 226241454Sneel error = VMM_CLEANUP(); 227241454Sneel } 228249396Sneel vmm_initialized = 0; 229221828Sgrehan break; 230221828Sgrehan default: 231221828Sgrehan error = 0; 232221828Sgrehan break; 233221828Sgrehan } 234221828Sgrehan return (error); 235221828Sgrehan} 236221828Sgrehan 237221828Sgrehanstatic moduledata_t vmm_kmod = { 238221828Sgrehan "vmm", 239221828Sgrehan vmm_handler, 240221828Sgrehan NULL 241221828Sgrehan}; 242221828Sgrehan 243221828Sgrehan/* 244245704Sneel * vmm initialization has the following dependencies: 245245704Sneel * 246245704Sneel * - iommu initialization must happen after the pci passthru driver has had 247245704Sneel * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 248245704Sneel * 249245704Sneel * - VT-x initialization requires smp_rendezvous() and therefore must happen 250245704Sneel * after SMP is fully functional (after SI_SUB_SMP). 251221828Sgrehan */ 252245704SneelDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 253221828SgrehanMODULE_VERSION(vmm, 1); 254221828Sgrehan 255221828SgrehanSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 256221828Sgrehan 257249396Sneelint 258249396Sneelvm_create(const char *name, struct vm **retvm) 259221828Sgrehan{ 260221828Sgrehan int i; 261221828Sgrehan struct vm *vm; 262221828Sgrehan vm_paddr_t maxaddr; 263221828Sgrehan 264221828Sgrehan const int BSP = 0; 265221828Sgrehan 266249396Sneel /* 267249396Sneel * If vmm.ko could not be successfully initialized then don't attempt 268249396Sneel * to create the virtual machine. 269249396Sneel */ 270249396Sneel if (!vmm_initialized) 271249396Sneel return (ENXIO); 272249396Sneel 273221828Sgrehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 274249396Sneel return (EINVAL); 275221828Sgrehan 276221828Sgrehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 277221828Sgrehan strcpy(vm->name, name); 278221828Sgrehan vm->cookie = VMINIT(vm); 279221828Sgrehan 280221828Sgrehan for (i = 0; i < VM_MAXCPU; i++) { 281221828Sgrehan vcpu_init(vm, i); 282221828Sgrehan guest_msrs_init(vm, i); 283221828Sgrehan } 284221828Sgrehan 285221828Sgrehan maxaddr = vmm_mem_maxaddr(); 286221828Sgrehan vm->iommu = iommu_create_domain(maxaddr); 287221828Sgrehan vm_activate_cpu(vm, BSP); 288221828Sgrehan 289249396Sneel *retvm = vm; 290249396Sneel return (0); 291221828Sgrehan} 292221828Sgrehan 293241178Sneelstatic void 294241178Sneelvm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg) 295241178Sneel{ 296241178Sneel size_t len; 297241178Sneel vm_paddr_t hpa; 298241362Sneel void *host_domain; 299241178Sneel 300241362Sneel host_domain = iommu_host_domain(); 301241362Sneel 302241178Sneel len = 0; 303241178Sneel while (len < seg->len) { 304241178Sneel hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE); 305241178Sneel if (hpa == (vm_paddr_t)-1) { 306241178Sneel panic("vm_free_mem_segs: cannot free hpa " 307241178Sneel "associated with gpa 0x%016lx", seg->gpa + len); 308241178Sneel } 309241178Sneel 310241362Sneel /* 311241362Sneel * Remove the 'gpa' to 'hpa' mapping in VMs domain. 312241362Sneel * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'. 313241362Sneel */ 314241362Sneel iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE); 315241362Sneel iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE); 316241362Sneel 317241178Sneel vmm_mem_free(hpa, PAGE_SIZE); 318241178Sneel 319241178Sneel len += PAGE_SIZE; 320241178Sneel } 321241178Sneel 322241362Sneel /* 323241362Sneel * Invalidate cached translations associated with 'vm->iommu' since 324241362Sneel * we have now moved some pages from it. 325241362Sneel */ 326241362Sneel iommu_invalidate_tlb(vm->iommu); 327241362Sneel 328241178Sneel bzero(seg, sizeof(struct vm_memory_segment)); 329241178Sneel} 330241178Sneel 331221828Sgrehanvoid 332221828Sgrehanvm_destroy(struct vm *vm) 333221828Sgrehan{ 334221828Sgrehan int i; 335221828Sgrehan 336221828Sgrehan ppt_unassign_all(vm); 337221828Sgrehan 338221828Sgrehan for (i = 0; i < vm->num_mem_segs; i++) 339241178Sneel vm_free_mem_seg(vm, &vm->mem_segs[i]); 340221828Sgrehan 341241178Sneel vm->num_mem_segs = 0; 342241178Sneel 343221828Sgrehan for (i = 0; i < VM_MAXCPU; i++) 344221828Sgrehan vcpu_cleanup(&vm->vcpu[i]); 345221828Sgrehan 346221828Sgrehan iommu_destroy_domain(vm->iommu); 347221828Sgrehan 348221828Sgrehan VMCLEANUP(vm->cookie); 349221828Sgrehan 350221828Sgrehan free(vm, M_VM); 351221828Sgrehan} 352221828Sgrehan 353221828Sgrehanconst char * 354221828Sgrehanvm_name(struct vm *vm) 355221828Sgrehan{ 356221828Sgrehan return (vm->name); 357221828Sgrehan} 358221828Sgrehan 359221828Sgrehanint 360221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 361221828Sgrehan{ 362221828Sgrehan const boolean_t spok = TRUE; /* superpage mappings are ok */ 363221828Sgrehan 364241147Sneel return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE, 365241147Sneel VM_PROT_RW, spok)); 366221828Sgrehan} 367221828Sgrehan 368221828Sgrehanint 369221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 370221828Sgrehan{ 371221828Sgrehan const boolean_t spok = TRUE; /* superpage mappings are ok */ 372221828Sgrehan 373241147Sneel return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0, 374241147Sneel VM_PROT_NONE, spok)); 375221828Sgrehan} 376221828Sgrehan 377241041Sneel/* 378241041Sneel * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise 379241041Sneel */ 380241041Sneelstatic boolean_t 381241041Sneelvm_gpa_available(struct vm *vm, vm_paddr_t gpa) 382241041Sneel{ 383241041Sneel int i; 384241041Sneel vm_paddr_t gpabase, gpalimit; 385241041Sneel 386241041Sneel if (gpa & PAGE_MASK) 387241041Sneel panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa); 388241041Sneel 389241041Sneel for (i = 0; i < vm->num_mem_segs; i++) { 390241041Sneel gpabase = vm->mem_segs[i].gpa; 391241041Sneel gpalimit = gpabase + vm->mem_segs[i].len; 392241041Sneel if (gpa >= gpabase && gpa < gpalimit) 393241041Sneel return (FALSE); 394241041Sneel } 395241041Sneel 396241041Sneel return (TRUE); 397241041Sneel} 398241041Sneel 399221828Sgrehanint 400241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 401221828Sgrehan{ 402241041Sneel int error, available, allocated; 403241178Sneel struct vm_memory_segment *seg; 404241041Sneel vm_paddr_t g, hpa; 405241362Sneel void *host_domain; 406221828Sgrehan 407221828Sgrehan const boolean_t spok = TRUE; /* superpage mappings are ok */ 408241041Sneel 409241041Sneel if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 410241041Sneel return (EINVAL); 411221828Sgrehan 412241041Sneel available = allocated = 0; 413241041Sneel g = gpa; 414241041Sneel while (g < gpa + len) { 415241041Sneel if (vm_gpa_available(vm, g)) 416241041Sneel available++; 417241041Sneel else 418241041Sneel allocated++; 419241041Sneel 420241041Sneel g += PAGE_SIZE; 421241041Sneel } 422241041Sneel 423221828Sgrehan /* 424241041Sneel * If there are some allocated and some available pages in the address 425241041Sneel * range then it is an error. 426221828Sgrehan */ 427241041Sneel if (allocated && available) 428241041Sneel return (EINVAL); 429221828Sgrehan 430241041Sneel /* 431241041Sneel * If the entire address range being requested has already been 432241041Sneel * allocated then there isn't anything more to do. 433241041Sneel */ 434241041Sneel if (allocated && available == 0) 435241041Sneel return (0); 436241041Sneel 437221828Sgrehan if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 438221828Sgrehan return (E2BIG); 439221828Sgrehan 440241362Sneel host_domain = iommu_host_domain(); 441241362Sneel 442241178Sneel seg = &vm->mem_segs[vm->num_mem_segs]; 443221828Sgrehan 444241362Sneel error = 0; 445241178Sneel seg->gpa = gpa; 446241178Sneel seg->len = 0; 447241178Sneel while (seg->len < len) { 448241178Sneel hpa = vmm_mem_alloc(PAGE_SIZE); 449241178Sneel if (hpa == 0) { 450241178Sneel error = ENOMEM; 451241178Sneel break; 452241178Sneel } 453241178Sneel 454241178Sneel error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE, 455241178Sneel VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok); 456241178Sneel if (error) 457241178Sneel break; 458241178Sneel 459241362Sneel /* 460241362Sneel * Remove the 1:1 mapping for 'hpa' from the 'host_domain'. 461241362Sneel * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain. 462241362Sneel */ 463241362Sneel iommu_remove_mapping(host_domain, hpa, PAGE_SIZE); 464241178Sneel iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE); 465241178Sneel 466241178Sneel seg->len += PAGE_SIZE; 467241178Sneel } 468241178Sneel 469241362Sneel if (error) { 470241178Sneel vm_free_mem_seg(vm, seg); 471221828Sgrehan return (error); 472221828Sgrehan } 473221828Sgrehan 474241362Sneel /* 475241362Sneel * Invalidate cached translations associated with 'host_domain' since 476241362Sneel * we have now moved some pages from it. 477241362Sneel */ 478241362Sneel iommu_invalidate_tlb(host_domain); 479241362Sneel 480221828Sgrehan vm->num_mem_segs++; 481241041Sneel 482221828Sgrehan return (0); 483221828Sgrehan} 484221828Sgrehan 485221828Sgrehanvm_paddr_t 486221828Sgrehanvm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len) 487221828Sgrehan{ 488241148Sneel vm_paddr_t nextpage; 489221828Sgrehan 490241148Sneel nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); 491241148Sneel if (len > nextpage - gpa) 492241148Sneel panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len); 493241148Sneel 494241147Sneel return (VMMMAP_GET(vm->cookie, gpa)); 495221828Sgrehan} 496221828Sgrehan 497221828Sgrehanint 498221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 499221828Sgrehan struct vm_memory_segment *seg) 500221828Sgrehan{ 501221828Sgrehan int i; 502221828Sgrehan 503221828Sgrehan for (i = 0; i < vm->num_mem_segs; i++) { 504221828Sgrehan if (gpabase == vm->mem_segs[i].gpa) { 505221828Sgrehan *seg = vm->mem_segs[i]; 506221828Sgrehan return (0); 507221828Sgrehan } 508221828Sgrehan } 509221828Sgrehan return (-1); 510221828Sgrehan} 511221828Sgrehan 512221828Sgrehanint 513221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 514221828Sgrehan{ 515221828Sgrehan 516221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 517221828Sgrehan return (EINVAL); 518221828Sgrehan 519221828Sgrehan if (reg >= VM_REG_LAST) 520221828Sgrehan return (EINVAL); 521221828Sgrehan 522221828Sgrehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 523221828Sgrehan} 524221828Sgrehan 525221828Sgrehanint 526221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 527221828Sgrehan{ 528221828Sgrehan 529221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 530221828Sgrehan return (EINVAL); 531221828Sgrehan 532221828Sgrehan if (reg >= VM_REG_LAST) 533221828Sgrehan return (EINVAL); 534221828Sgrehan 535221828Sgrehan return (VMSETREG(vm->cookie, vcpu, reg, val)); 536221828Sgrehan} 537221828Sgrehan 538221828Sgrehanstatic boolean_t 539221828Sgrehanis_descriptor_table(int reg) 540221828Sgrehan{ 541221828Sgrehan 542221828Sgrehan switch (reg) { 543221828Sgrehan case VM_REG_GUEST_IDTR: 544221828Sgrehan case VM_REG_GUEST_GDTR: 545221828Sgrehan return (TRUE); 546221828Sgrehan default: 547221828Sgrehan return (FALSE); 548221828Sgrehan } 549221828Sgrehan} 550221828Sgrehan 551221828Sgrehanstatic boolean_t 552221828Sgrehanis_segment_register(int reg) 553221828Sgrehan{ 554221828Sgrehan 555221828Sgrehan switch (reg) { 556221828Sgrehan case VM_REG_GUEST_ES: 557221828Sgrehan case VM_REG_GUEST_CS: 558221828Sgrehan case VM_REG_GUEST_SS: 559221828Sgrehan case VM_REG_GUEST_DS: 560221828Sgrehan case VM_REG_GUEST_FS: 561221828Sgrehan case VM_REG_GUEST_GS: 562221828Sgrehan case VM_REG_GUEST_TR: 563221828Sgrehan case VM_REG_GUEST_LDTR: 564221828Sgrehan return (TRUE); 565221828Sgrehan default: 566221828Sgrehan return (FALSE); 567221828Sgrehan } 568221828Sgrehan} 569221828Sgrehan 570221828Sgrehanint 571221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg, 572221828Sgrehan struct seg_desc *desc) 573221828Sgrehan{ 574221828Sgrehan 575221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 576221828Sgrehan return (EINVAL); 577221828Sgrehan 578221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 579221828Sgrehan return (EINVAL); 580221828Sgrehan 581221828Sgrehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 582221828Sgrehan} 583221828Sgrehan 584221828Sgrehanint 585221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg, 586221828Sgrehan struct seg_desc *desc) 587221828Sgrehan{ 588221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 589221828Sgrehan return (EINVAL); 590221828Sgrehan 591221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 592221828Sgrehan return (EINVAL); 593221828Sgrehan 594221828Sgrehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 595221828Sgrehan} 596221828Sgrehan 597221828Sgrehanstatic void 598221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu) 599221828Sgrehan{ 600221828Sgrehan 601234695Sgrehan /* flush host state to the pcb */ 602234695Sgrehan fpuexit(curthread); 603242122Sneel 604242122Sneel /* restore guest FPU state */ 605221828Sgrehan fpu_stop_emulating(); 606234695Sgrehan fpurestore(vcpu->guestfpu); 607242122Sneel 608242122Sneel /* 609242122Sneel * The FPU is now "dirty" with the guest's state so turn on emulation 610242122Sneel * to trap any access to the FPU by the host. 611242122Sneel */ 612242122Sneel fpu_start_emulating(); 613221828Sgrehan} 614221828Sgrehan 615221828Sgrehanstatic void 616221828Sgrehansave_guest_fpustate(struct vcpu *vcpu) 617221828Sgrehan{ 618221828Sgrehan 619242122Sneel if ((rcr0() & CR0_TS) == 0) 620242122Sneel panic("fpu emulation not enabled in host!"); 621242122Sneel 622242122Sneel /* save guest FPU state */ 623242122Sneel fpu_stop_emulating(); 624234695Sgrehan fpusave(vcpu->guestfpu); 625221828Sgrehan fpu_start_emulating(); 626221828Sgrehan} 627221828Sgrehan 628248389Sneelstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 629242065Sneel 630221828Sgrehanint 631221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun) 632221828Sgrehan{ 633242065Sneel int error, vcpuid, sleepticks, t; 634221828Sgrehan struct vcpu *vcpu; 635221828Sgrehan struct pcb *pcb; 636242065Sneel uint64_t tscval, rip; 637242065Sneel struct vm_exit *vme; 638221828Sgrehan 639221828Sgrehan vcpuid = vmrun->cpuid; 640221828Sgrehan 641221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 642221828Sgrehan return (EINVAL); 643221828Sgrehan 644221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 645242065Sneel vme = &vmrun->vm_exit; 646242065Sneel rip = vmrun->rip; 647242065Sneelrestart: 648221828Sgrehan critical_enter(); 649221828Sgrehan 650221828Sgrehan tscval = rdtsc(); 651221828Sgrehan 652221828Sgrehan pcb = PCPU_GET(curpcb); 653221914Sjhb set_pcb_flags(pcb, PCB_FULL_IRET); 654221828Sgrehan 655234695Sgrehan restore_guest_msrs(vm, vcpuid); 656221828Sgrehan restore_guest_fpustate(vcpu); 657241489Sneel 658241489Sneel vcpu->hostcpu = curcpu; 659242065Sneel error = VMRUN(vm->cookie, vcpuid, rip); 660241489Sneel vcpu->hostcpu = NOCPU; 661241489Sneel 662221828Sgrehan save_guest_fpustate(vcpu); 663221828Sgrehan restore_host_msrs(vm, vcpuid); 664221828Sgrehan 665221828Sgrehan vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 666221828Sgrehan 667240894Sneel /* copy the exit information */ 668242065Sneel bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit)); 669240894Sneel 670221828Sgrehan critical_exit(); 671221828Sgrehan 672242065Sneel /* 673242065Sneel * Oblige the guest's desire to 'hlt' by sleeping until the vcpu 674242065Sneel * is ready to run. 675242065Sneel */ 676242065Sneel if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) { 677242065Sneel vcpu_lock(vcpu); 678242065Sneel 679242065Sneel /* 680242065Sneel * Figure out the number of host ticks until the next apic 681242065Sneel * timer interrupt in the guest. 682242065Sneel */ 683242065Sneel sleepticks = lapic_timer_tick(vm, vcpuid); 684242065Sneel 685242065Sneel /* 686242065Sneel * If the guest local apic timer is disabled then sleep for 687242065Sneel * a long time but not forever. 688242065Sneel */ 689242065Sneel if (sleepticks < 0) 690242065Sneel sleepticks = hz; 691242065Sneel 692242065Sneel /* 693242065Sneel * Do a final check for pending NMI or interrupts before 694242065Sneel * really putting this thread to sleep. 695242065Sneel * 696242065Sneel * These interrupts could have happened any time after we 697242065Sneel * returned from VMRUN() and before we grabbed the vcpu lock. 698242065Sneel */ 699242065Sneel if (!vm_nmi_pending(vm, vcpuid) && 700242065Sneel lapic_pending_intr(vm, vcpuid) < 0) { 701242065Sneel if (sleepticks <= 0) 702242065Sneel panic("invalid sleepticks %d", sleepticks); 703242065Sneel t = ticks; 704242065Sneel msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); 705242065Sneel vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 706242065Sneel } 707242065Sneel 708242065Sneel vcpu_unlock(vcpu); 709242065Sneel 710242065Sneel rip = vme->rip + vme->inst_length; 711242065Sneel goto restart; 712242065Sneel } 713242065Sneel 714221828Sgrehan return (error); 715221828Sgrehan} 716221828Sgrehan 717221828Sgrehanint 718221828Sgrehanvm_inject_event(struct vm *vm, int vcpuid, int type, 719221828Sgrehan int vector, uint32_t code, int code_valid) 720221828Sgrehan{ 721221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 722221828Sgrehan return (EINVAL); 723221828Sgrehan 724221828Sgrehan if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0) 725221828Sgrehan return (EINVAL); 726221828Sgrehan 727221828Sgrehan if (vector < 0 || vector > 255) 728221828Sgrehan return (EINVAL); 729221828Sgrehan 730221828Sgrehan return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid)); 731221828Sgrehan} 732221828Sgrehan 733248389Sneelstatic VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 734241982Sneel 735221828Sgrehanint 736241982Sneelvm_inject_nmi(struct vm *vm, int vcpuid) 737221828Sgrehan{ 738241982Sneel struct vcpu *vcpu; 739221828Sgrehan 740241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 741221828Sgrehan return (EINVAL); 742221828Sgrehan 743241982Sneel vcpu = &vm->vcpu[vcpuid]; 744241982Sneel 745241982Sneel vcpu->nmi_pending = 1; 746241982Sneel vm_interrupt_hostcpu(vm, vcpuid); 747241982Sneel return (0); 748221828Sgrehan} 749221828Sgrehan 750221828Sgrehanint 751241982Sneelvm_nmi_pending(struct vm *vm, int vcpuid) 752241982Sneel{ 753241982Sneel struct vcpu *vcpu; 754241982Sneel 755241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 756241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 757241982Sneel 758241982Sneel vcpu = &vm->vcpu[vcpuid]; 759241982Sneel 760241982Sneel return (vcpu->nmi_pending); 761241982Sneel} 762241982Sneel 763241982Sneelvoid 764241982Sneelvm_nmi_clear(struct vm *vm, int vcpuid) 765241982Sneel{ 766241982Sneel struct vcpu *vcpu; 767241982Sneel 768241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 769241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 770241982Sneel 771241982Sneel vcpu = &vm->vcpu[vcpuid]; 772241982Sneel 773241982Sneel if (vcpu->nmi_pending == 0) 774241982Sneel panic("vm_nmi_clear: inconsistent nmi_pending state"); 775241982Sneel 776241982Sneel vcpu->nmi_pending = 0; 777241982Sneel vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 778241982Sneel} 779241982Sneel 780241982Sneelint 781221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 782221828Sgrehan{ 783221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 784221828Sgrehan return (EINVAL); 785221828Sgrehan 786221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 787221828Sgrehan return (EINVAL); 788221828Sgrehan 789221828Sgrehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 790221828Sgrehan} 791221828Sgrehan 792221828Sgrehanint 793221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val) 794221828Sgrehan{ 795221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 796221828Sgrehan return (EINVAL); 797221828Sgrehan 798221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 799221828Sgrehan return (EINVAL); 800221828Sgrehan 801221828Sgrehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 802221828Sgrehan} 803221828Sgrehan 804221828Sgrehanuint64_t * 805221828Sgrehanvm_guest_msrs(struct vm *vm, int cpu) 806221828Sgrehan{ 807221828Sgrehan return (vm->vcpu[cpu].guest_msrs); 808221828Sgrehan} 809221828Sgrehan 810221828Sgrehanstruct vlapic * 811221828Sgrehanvm_lapic(struct vm *vm, int cpu) 812221828Sgrehan{ 813221828Sgrehan return (vm->vcpu[cpu].vlapic); 814221828Sgrehan} 815221828Sgrehan 816221828Sgrehanboolean_t 817221828Sgrehanvmm_is_pptdev(int bus, int slot, int func) 818221828Sgrehan{ 819246188Sneel int found, i, n; 820246188Sneel int b, s, f; 821221828Sgrehan char *val, *cp, *cp2; 822221828Sgrehan 823221828Sgrehan /* 824246188Sneel * XXX 825246188Sneel * The length of an environment variable is limited to 128 bytes which 826246188Sneel * puts an upper limit on the number of passthru devices that may be 827246188Sneel * specified using a single environment variable. 828246188Sneel * 829246188Sneel * Work around this by scanning multiple environment variable 830246188Sneel * names instead of a single one - yuck! 831221828Sgrehan */ 832246188Sneel const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 833246188Sneel 834246188Sneel /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 835221828Sgrehan found = 0; 836246188Sneel for (i = 0; names[i] != NULL && !found; i++) { 837246188Sneel cp = val = getenv(names[i]); 838246188Sneel while (cp != NULL && *cp != '\0') { 839246188Sneel if ((cp2 = strchr(cp, ' ')) != NULL) 840246188Sneel *cp2 = '\0'; 841221828Sgrehan 842246188Sneel n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 843246188Sneel if (n == 3 && bus == b && slot == s && func == f) { 844246188Sneel found = 1; 845246188Sneel break; 846246188Sneel } 847221828Sgrehan 848246188Sneel if (cp2 != NULL) 849246188Sneel *cp2++ = ' '; 850221828Sgrehan 851246188Sneel cp = cp2; 852246188Sneel } 853246188Sneel freeenv(val); 854221828Sgrehan } 855221828Sgrehan return (found); 856221828Sgrehan} 857221828Sgrehan 858221828Sgrehanvoid * 859221828Sgrehanvm_iommu_domain(struct vm *vm) 860221828Sgrehan{ 861221828Sgrehan 862221828Sgrehan return (vm->iommu); 863221828Sgrehan} 864221828Sgrehan 865241489Sneelint 866241489Sneelvcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state) 867221828Sgrehan{ 868241489Sneel int error; 869221828Sgrehan struct vcpu *vcpu; 870221828Sgrehan 871221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 872221828Sgrehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 873221828Sgrehan 874221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 875221828Sgrehan 876241489Sneel vcpu_lock(vcpu); 877241489Sneel 878241489Sneel /* 879241489Sneel * The following state transitions are allowed: 880241489Sneel * IDLE -> RUNNING -> IDLE 881241489Sneel * IDLE -> CANNOT_RUN -> IDLE 882241489Sneel */ 883241489Sneel if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) || 884241489Sneel (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) { 885241489Sneel error = 0; 886241489Sneel vcpu->state = state; 887221828Sgrehan } else { 888241489Sneel error = EBUSY; 889221828Sgrehan } 890241489Sneel 891241489Sneel vcpu_unlock(vcpu); 892241489Sneel 893241489Sneel return (error); 894221828Sgrehan} 895221828Sgrehan 896241489Sneelenum vcpu_state 897249879Sgrehanvcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 898221828Sgrehan{ 899221828Sgrehan struct vcpu *vcpu; 900241489Sneel enum vcpu_state state; 901221828Sgrehan 902221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 903221828Sgrehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 904221828Sgrehan 905221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 906221828Sgrehan 907241489Sneel vcpu_lock(vcpu); 908241489Sneel state = vcpu->state; 909249879Sgrehan if (hostcpu != NULL) 910249879Sgrehan *hostcpu = vcpu->hostcpu; 911241489Sneel vcpu_unlock(vcpu); 912221828Sgrehan 913241489Sneel return (state); 914221828Sgrehan} 915221828Sgrehan 916221828Sgrehanvoid 917221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid) 918221828Sgrehan{ 919221828Sgrehan 920221828Sgrehan if (vcpuid >= 0 && vcpuid < VM_MAXCPU) 921223621Sgrehan CPU_SET(vcpuid, &vm->active_cpus); 922221828Sgrehan} 923221828Sgrehan 924223621Sgrehancpuset_t 925221828Sgrehanvm_active_cpus(struct vm *vm) 926221828Sgrehan{ 927221828Sgrehan 928221828Sgrehan return (vm->active_cpus); 929221828Sgrehan} 930221828Sgrehan 931221828Sgrehanvoid * 932221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid) 933221828Sgrehan{ 934221828Sgrehan 935221828Sgrehan return (vm->vcpu[vcpuid].stats); 936221828Sgrehan} 937240922Sneel 938240922Sneelint 939240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 940240922Sneel{ 941240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 942240922Sneel return (EINVAL); 943240922Sneel 944240922Sneel *state = vm->vcpu[vcpuid].x2apic_state; 945240922Sneel 946240922Sneel return (0); 947240922Sneel} 948240922Sneel 949240922Sneelint 950240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 951240922Sneel{ 952240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 953240922Sneel return (EINVAL); 954240922Sneel 955248392Sneel if (state >= X2APIC_STATE_LAST) 956240922Sneel return (EINVAL); 957240922Sneel 958240922Sneel vm->vcpu[vcpuid].x2apic_state = state; 959240922Sneel 960240943Sneel vlapic_set_x2apic_state(vm, vcpuid, state); 961240943Sneel 962240922Sneel return (0); 963240922Sneel} 964241489Sneel 965241489Sneelvoid 966241489Sneelvm_interrupt_hostcpu(struct vm *vm, int vcpuid) 967241489Sneel{ 968241489Sneel int hostcpu; 969241489Sneel struct vcpu *vcpu; 970241489Sneel 971241489Sneel vcpu = &vm->vcpu[vcpuid]; 972241489Sneel 973242065Sneel vcpu_lock(vcpu); 974241489Sneel hostcpu = vcpu->hostcpu; 975242065Sneel if (hostcpu == NOCPU) { 976242065Sneel /* 977242065Sneel * If the vcpu is 'RUNNING' but without a valid 'hostcpu' then 978242065Sneel * the host thread must be sleeping waiting for an event to 979242065Sneel * kick the vcpu out of 'hlt'. 980242065Sneel * 981242065Sneel * XXX this is racy because the condition exists right before 982242065Sneel * and after calling VMRUN() in vm_run(). The wakeup() is 983242065Sneel * benign in this case. 984242065Sneel */ 985242065Sneel if (vcpu->state == VCPU_RUNNING) 986242065Sneel wakeup_one(vcpu); 987242065Sneel } else { 988242065Sneel if (vcpu->state != VCPU_RUNNING) 989242065Sneel panic("invalid vcpu state %d", vcpu->state); 990242065Sneel if (hostcpu != curcpu) 991242065Sneel ipi_cpu(hostcpu, vmm_ipinum); 992242065Sneel } 993242065Sneel vcpu_unlock(vcpu); 994241489Sneel} 995