vmm.c revision 284899
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: stable/10/sys/amd64/vmm/vmm.c 284899 2015-06-28 01:21:55Z neel $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm.c 284899 2015-06-28 01:21:55Z neel $"); 31221828Sgrehan 32221828Sgrehan#include <sys/param.h> 33234695Sgrehan#include <sys/systm.h> 34221828Sgrehan#include <sys/kernel.h> 35221828Sgrehan#include <sys/module.h> 36221828Sgrehan#include <sys/sysctl.h> 37221828Sgrehan#include <sys/malloc.h> 38221828Sgrehan#include <sys/pcpu.h> 39221828Sgrehan#include <sys/lock.h> 40221828Sgrehan#include <sys/mutex.h> 41221828Sgrehan#include <sys/proc.h> 42256072Sneel#include <sys/rwlock.h> 43221828Sgrehan#include <sys/sched.h> 44221828Sgrehan#include <sys/smp.h> 45221828Sgrehan#include <sys/systm.h> 46221828Sgrehan 47221828Sgrehan#include <vm/vm.h> 48256072Sneel#include <vm/vm_object.h> 49256072Sneel#include <vm/vm_page.h> 50256072Sneel#include <vm/pmap.h> 51256072Sneel#include <vm/vm_map.h> 52256072Sneel#include <vm/vm_extern.h> 53256072Sneel#include <vm/vm_param.h> 54221828Sgrehan 55261275Sjhb#include <machine/cpu.h> 56221828Sgrehan#include <machine/vm.h> 57221828Sgrehan#include <machine/pcb.h> 58241489Sneel#include <machine/smp.h> 59262350Sjhb#include <x86/psl.h> 60221914Sjhb#include <x86/apicreg.h> 61256072Sneel#include <machine/vmparam.h> 62221828Sgrehan 63221828Sgrehan#include <machine/vmm.h> 64261088Sjhb#include <machine/vmm_dev.h> 65268976Sjhb#include <machine/vmm_instruction_emul.h> 66261088Sjhb 67268976Sjhb#include "vmm_ioport.h" 68256072Sneel#include "vmm_ktr.h" 69242275Sneel#include "vmm_host.h" 70221828Sgrehan#include "vmm_mem.h" 71221828Sgrehan#include "vmm_util.h" 72268891Sjhb#include "vatpic.h" 73268891Sjhb#include "vatpit.h" 74261088Sjhb#include "vhpet.h" 75261088Sjhb#include "vioapic.h" 76221828Sgrehan#include "vlapic.h" 77276429Sneel#include "vpmtmr.h" 78284894Sneel#include "vrtc.h" 79221828Sgrehan#include "vmm_ipi.h" 80221828Sgrehan#include "vmm_stat.h" 81242065Sneel#include "vmm_lapic.h" 82221828Sgrehan 83221828Sgrehan#include "io/ppt.h" 84221828Sgrehan#include "io/iommu.h" 85221828Sgrehan 86221828Sgrehanstruct vlapic; 87221828Sgrehan 88270071Sgrehan/* 89270071Sgrehan * Initialization: 90270071Sgrehan * (a) allocated when vcpu is created 91270071Sgrehan * (i) initialized when vcpu is created and when it is reinitialized 92270071Sgrehan * (o) initialized the first time the vcpu is created 93270071Sgrehan * (x) initialized before use 94270071Sgrehan */ 95221828Sgrehanstruct vcpu { 96270071Sgrehan struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ 97270071Sgrehan enum vcpu_state state; /* (o) vcpu state */ 98270071Sgrehan int hostcpu; /* (o) vcpu's host cpu */ 99270071Sgrehan struct vlapic *vlapic; /* (i) APIC device model */ 100270071Sgrehan enum x2apic_state x2apic_state; /* (i) APIC mode */ 101270159Sgrehan uint64_t exitintinfo; /* (i) events pending at VM exit */ 102270071Sgrehan int nmi_pending; /* (i) NMI pending */ 103270071Sgrehan int extint_pending; /* (i) INTR pending */ 104270071Sgrehan int exception_pending; /* (i) exception pending */ 105284894Sneel int exc_vector; /* (x) exception collateral */ 106284894Sneel int exc_errcode_valid; 107284894Sneel uint32_t exc_errcode; 108270071Sgrehan struct savefpu *guestfpu; /* (a,i) guest fpu state */ 109270071Sgrehan uint64_t guest_xcr0; /* (i) guest %xcr0 register */ 110270071Sgrehan void *stats; /* (a,i) statistics */ 111270071Sgrehan struct vm_exit exitinfo; /* (x) exit reason and collateral */ 112284894Sneel uint64_t nextrip; /* (x) next instruction to execute */ 113221828Sgrehan}; 114221828Sgrehan 115270071Sgrehan#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) 116242065Sneel#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 117242065Sneel#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 118242065Sneel#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 119256072Sneel#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) 120241489Sneel 121256072Sneelstruct mem_seg { 122256072Sneel vm_paddr_t gpa; 123256072Sneel size_t len; 124256072Sneel boolean_t wired; 125256072Sneel vm_object_t object; 126256072Sneel}; 127221828Sgrehan#define VM_MAX_MEMORY_SEGMENTS 2 128221828Sgrehan 129270071Sgrehan/* 130270071Sgrehan * Initialization: 131270071Sgrehan * (o) initialized the first time the VM is created 132270071Sgrehan * (i) initialized when VM is created and when it is reinitialized 133270071Sgrehan * (x) initialized before use 134270071Sgrehan */ 135221828Sgrehanstruct vm { 136270071Sgrehan void *cookie; /* (i) cpu-specific data */ 137270071Sgrehan void *iommu; /* (x) iommu-specific data */ 138270071Sgrehan struct vhpet *vhpet; /* (i) virtual HPET */ 139270071Sgrehan struct vioapic *vioapic; /* (i) virtual ioapic */ 140270071Sgrehan struct vatpic *vatpic; /* (i) virtual atpic */ 141270071Sgrehan struct vatpit *vatpit; /* (i) virtual atpit */ 142276429Sneel struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ 143284894Sneel struct vrtc *vrtc; /* (o) virtual RTC */ 144270071Sgrehan volatile cpuset_t active_cpus; /* (i) active vcpus */ 145270071Sgrehan int suspend; /* (i) stop VM execution */ 146270071Sgrehan volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ 147270071Sgrehan volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ 148270071Sgrehan cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ 149270071Sgrehan cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ 150270071Sgrehan void *rendezvous_arg; /* (x) rendezvous func/arg */ 151270071Sgrehan vm_rendezvous_func_t rendezvous_func; 152270071Sgrehan struct mtx rendezvous_mtx; /* (o) rendezvous lock */ 153270071Sgrehan int num_mem_segs; /* (o) guest memory segments */ 154256072Sneel struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; 155270071Sgrehan struct vmspace *vmspace; /* (o) guest's address space */ 156270071Sgrehan char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ 157270071Sgrehan struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ 158221828Sgrehan}; 159221828Sgrehan 160249396Sneelstatic int vmm_initialized; 161249396Sneel 162221828Sgrehanstatic struct vmm_ops *ops; 163266339Sjhb#define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) 164221828Sgrehan#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 165261275Sjhb#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) 166221828Sgrehan 167256072Sneel#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) 168268935Sjhb#define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \ 169268935Sjhb (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO) 170221828Sgrehan#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 171256072Sneel#define VMSPACE_ALLOC(min, max) \ 172256072Sneel (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) 173256072Sneel#define VMSPACE_FREE(vmspace) \ 174256072Sneel (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) 175221828Sgrehan#define VMGETREG(vmi, vcpu, num, retval) \ 176221828Sgrehan (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 177221828Sgrehan#define VMSETREG(vmi, vcpu, num, val) \ 178221828Sgrehan (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 179221828Sgrehan#define VMGETDESC(vmi, vcpu, num, desc) \ 180221828Sgrehan (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 181221828Sgrehan#define VMSETDESC(vmi, vcpu, num, desc) \ 182221828Sgrehan (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 183221828Sgrehan#define VMGETCAP(vmi, vcpu, num, retval) \ 184221828Sgrehan (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 185221828Sgrehan#define VMSETCAP(vmi, vcpu, num, val) \ 186221828Sgrehan (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 187266339Sjhb#define VLAPIC_INIT(vmi, vcpu) \ 188266339Sjhb (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL) 189266339Sjhb#define VLAPIC_CLEANUP(vmi, vlapic) \ 190266339Sjhb (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL) 191221828Sgrehan 192245021Sneel#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 193245021Sneel#define fpu_stop_emulating() clts() 194221828Sgrehan 195221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm"); 196221828Sgrehan 197221828Sgrehan/* statistics */ 198248389Sneelstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 199221828Sgrehan 200266339SjhbSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 201266339Sjhb 202268935Sjhb/* 203268935Sjhb * Halt the guest if all vcpus are executing a HLT instruction with 204268935Sjhb * interrupts disabled. 205268935Sjhb */ 206268935Sjhbstatic int halt_detection_enabled = 1; 207268935SjhbTUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled); 208268935SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, 209268935Sjhb &halt_detection_enabled, 0, 210268935Sjhb "Halt VM if all vcpus execute HLT with interrupts disabled"); 211268935Sjhb 212266339Sjhbstatic int vmm_ipinum; 213266339SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, 214266339Sjhb "IPI vector used for vcpu notifications"); 215266339Sjhb 216276403Sneelstatic int trace_guest_exceptions; 217276403SneelSYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, 218276403Sneel &trace_guest_exceptions, 0, 219276403Sneel "Trap into hypervisor on all guest exceptions and reflect them back"); 220276403Sneel 221284899Sneelstatic int vmm_force_iommu = 0; 222284899SneelTUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu); 223284899SneelSYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0, 224284899Sneel "Force use of I/O MMU even if no passthrough devices were found."); 225284899Sneel 226221828Sgrehanstatic void 227270071Sgrehanvcpu_cleanup(struct vm *vm, int i, bool destroy) 228221828Sgrehan{ 229266339Sjhb struct vcpu *vcpu = &vm->vcpu[i]; 230266339Sjhb 231266339Sjhb VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); 232270071Sgrehan if (destroy) { 233270071Sgrehan vmm_stat_free(vcpu->stats); 234270071Sgrehan fpu_save_area_free(vcpu->guestfpu); 235270071Sgrehan } 236221828Sgrehan} 237221828Sgrehan 238221828Sgrehanstatic void 239270071Sgrehanvcpu_init(struct vm *vm, int vcpu_id, bool create) 240221828Sgrehan{ 241221828Sgrehan struct vcpu *vcpu; 242270071Sgrehan 243270071Sgrehan KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU, 244270071Sgrehan ("vcpu_init: invalid vcpu %d", vcpu_id)); 245270071Sgrehan 246221828Sgrehan vcpu = &vm->vcpu[vcpu_id]; 247221828Sgrehan 248270071Sgrehan if (create) { 249270071Sgrehan KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " 250270071Sgrehan "initialized", vcpu_id)); 251270071Sgrehan vcpu_lock_init(vcpu); 252270071Sgrehan vcpu->state = VCPU_IDLE; 253270071Sgrehan vcpu->hostcpu = NOCPU; 254270071Sgrehan vcpu->guestfpu = fpu_save_area_alloc(); 255270071Sgrehan vcpu->stats = vmm_stat_alloc(); 256270071Sgrehan } 257270071Sgrehan 258266339Sjhb vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); 259267447Sjhb vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); 260270159Sgrehan vcpu->exitintinfo = 0; 261270071Sgrehan vcpu->nmi_pending = 0; 262270071Sgrehan vcpu->extint_pending = 0; 263270071Sgrehan vcpu->exception_pending = 0; 264267427Sjhb vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; 265234695Sgrehan fpu_save_area_reset(vcpu->guestfpu); 266270071Sgrehan vmm_stat_init(vcpu->stats); 267221828Sgrehan} 268221828Sgrehan 269276403Sneelint 270276403Sneelvcpu_trace_exceptions(struct vm *vm, int vcpuid) 271276403Sneel{ 272276403Sneel 273276403Sneel return (trace_guest_exceptions); 274276403Sneel} 275276403Sneel 276240894Sneelstruct vm_exit * 277240894Sneelvm_exitinfo(struct vm *vm, int cpuid) 278240894Sneel{ 279240894Sneel struct vcpu *vcpu; 280240894Sneel 281240894Sneel if (cpuid < 0 || cpuid >= VM_MAXCPU) 282240894Sneel panic("vm_exitinfo: invalid cpuid %d", cpuid); 283240894Sneel 284240894Sneel vcpu = &vm->vcpu[cpuid]; 285240894Sneel 286240894Sneel return (&vcpu->exitinfo); 287240894Sneel} 288240894Sneel 289261275Sjhbstatic void 290261275Sjhbvmm_resume(void) 291261275Sjhb{ 292261275Sjhb VMM_RESUME(); 293261275Sjhb} 294261275Sjhb 295221828Sgrehanstatic int 296221828Sgrehanvmm_init(void) 297221828Sgrehan{ 298221828Sgrehan int error; 299221828Sgrehan 300242275Sneel vmm_host_state_init(); 301221828Sgrehan 302266339Sjhb vmm_ipinum = vmm_ipi_alloc(); 303266339Sjhb if (vmm_ipinum == 0) 304266339Sjhb vmm_ipinum = IPI_AST; 305266339Sjhb 306221828Sgrehan error = vmm_mem_init(); 307221828Sgrehan if (error) 308221828Sgrehan return (error); 309221828Sgrehan 310221828Sgrehan if (vmm_is_intel()) 311221828Sgrehan ops = &vmm_ops_intel; 312221828Sgrehan else if (vmm_is_amd()) 313221828Sgrehan ops = &vmm_ops_amd; 314221828Sgrehan else 315221828Sgrehan return (ENXIO); 316221828Sgrehan 317261275Sjhb vmm_resume_p = vmm_resume; 318221828Sgrehan 319266339Sjhb return (VMM_INIT(vmm_ipinum)); 320221828Sgrehan} 321221828Sgrehan 322221828Sgrehanstatic int 323221828Sgrehanvmm_handler(module_t mod, int what, void *arg) 324221828Sgrehan{ 325221828Sgrehan int error; 326221828Sgrehan 327221828Sgrehan switch (what) { 328221828Sgrehan case MOD_LOAD: 329221828Sgrehan vmmdev_init(); 330284899Sneel if (vmm_force_iommu || ppt_avail_devices() > 0) 331267070Sjhb iommu_init(); 332221828Sgrehan error = vmm_init(); 333249396Sneel if (error == 0) 334249396Sneel vmm_initialized = 1; 335221828Sgrehan break; 336221828Sgrehan case MOD_UNLOAD: 337241454Sneel error = vmmdev_cleanup(); 338241454Sneel if (error == 0) { 339261275Sjhb vmm_resume_p = NULL; 340241454Sneel iommu_cleanup(); 341266339Sjhb if (vmm_ipinum != IPI_AST) 342266339Sjhb vmm_ipi_free(vmm_ipinum); 343241454Sneel error = VMM_CLEANUP(); 344253854Sgrehan /* 345253854Sgrehan * Something bad happened - prevent new 346253854Sgrehan * VMs from being created 347253854Sgrehan */ 348253854Sgrehan if (error) 349253854Sgrehan vmm_initialized = 0; 350241454Sneel } 351221828Sgrehan break; 352221828Sgrehan default: 353221828Sgrehan error = 0; 354221828Sgrehan break; 355221828Sgrehan } 356221828Sgrehan return (error); 357221828Sgrehan} 358221828Sgrehan 359221828Sgrehanstatic moduledata_t vmm_kmod = { 360221828Sgrehan "vmm", 361221828Sgrehan vmm_handler, 362221828Sgrehan NULL 363221828Sgrehan}; 364221828Sgrehan 365221828Sgrehan/* 366245704Sneel * vmm initialization has the following dependencies: 367245704Sneel * 368245704Sneel * - iommu initialization must happen after the pci passthru driver has had 369245704Sneel * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 370245704Sneel * 371245704Sneel * - VT-x initialization requires smp_rendezvous() and therefore must happen 372245704Sneel * after SMP is fully functional (after SI_SUB_SMP). 373221828Sgrehan */ 374245704SneelDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 375221828SgrehanMODULE_VERSION(vmm, 1); 376221828Sgrehan 377270071Sgrehanstatic void 378270071Sgrehanvm_init(struct vm *vm, bool create) 379270071Sgrehan{ 380270071Sgrehan int i; 381270071Sgrehan 382270071Sgrehan vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace)); 383270071Sgrehan vm->iommu = NULL; 384270071Sgrehan vm->vioapic = vioapic_init(vm); 385270071Sgrehan vm->vhpet = vhpet_init(vm); 386270071Sgrehan vm->vatpic = vatpic_init(vm); 387270071Sgrehan vm->vatpit = vatpit_init(vm); 388276429Sneel vm->vpmtmr = vpmtmr_init(vm); 389284894Sneel if (create) 390284894Sneel vm->vrtc = vrtc_init(vm); 391270071Sgrehan 392270071Sgrehan CPU_ZERO(&vm->active_cpus); 393270071Sgrehan 394270071Sgrehan vm->suspend = 0; 395270071Sgrehan CPU_ZERO(&vm->suspended_cpus); 396270071Sgrehan 397270071Sgrehan for (i = 0; i < VM_MAXCPU; i++) 398270071Sgrehan vcpu_init(vm, i, create); 399270071Sgrehan} 400270071Sgrehan 401249396Sneelint 402249396Sneelvm_create(const char *name, struct vm **retvm) 403221828Sgrehan{ 404221828Sgrehan struct vm *vm; 405256072Sneel struct vmspace *vmspace; 406221828Sgrehan 407249396Sneel /* 408249396Sneel * If vmm.ko could not be successfully initialized then don't attempt 409249396Sneel * to create the virtual machine. 410249396Sneel */ 411249396Sneel if (!vmm_initialized) 412249396Sneel return (ENXIO); 413249396Sneel 414221828Sgrehan if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 415249396Sneel return (EINVAL); 416221828Sgrehan 417276429Sneel vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS); 418256072Sneel if (vmspace == NULL) 419256072Sneel return (ENOMEM); 420256072Sneel 421221828Sgrehan vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 422221828Sgrehan strcpy(vm->name, name); 423270071Sgrehan vm->num_mem_segs = 0; 424266339Sjhb vm->vmspace = vmspace; 425266339Sjhb mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); 426221828Sgrehan 427270071Sgrehan vm_init(vm, true); 428221828Sgrehan 429249396Sneel *retvm = vm; 430249396Sneel return (0); 431221828Sgrehan} 432221828Sgrehan 433241178Sneelstatic void 434256072Sneelvm_free_mem_seg(struct vm *vm, struct mem_seg *seg) 435241178Sneel{ 436241178Sneel 437256072Sneel if (seg->object != NULL) 438256072Sneel vmm_mem_free(vm->vmspace, seg->gpa, seg->len); 439241362Sneel 440256072Sneel bzero(seg, sizeof(*seg)); 441241178Sneel} 442241178Sneel 443270071Sgrehanstatic void 444270071Sgrehanvm_cleanup(struct vm *vm, bool destroy) 445221828Sgrehan{ 446221828Sgrehan int i; 447221828Sgrehan 448221828Sgrehan ppt_unassign_all(vm); 449221828Sgrehan 450256072Sneel if (vm->iommu != NULL) 451256072Sneel iommu_destroy_domain(vm->iommu); 452256072Sneel 453284894Sneel if (destroy) 454284894Sneel vrtc_cleanup(vm->vrtc); 455284894Sneel else 456284894Sneel vrtc_reset(vm->vrtc); 457276429Sneel vpmtmr_cleanup(vm->vpmtmr); 458268891Sjhb vatpit_cleanup(vm->vatpit); 459261088Sjhb vhpet_cleanup(vm->vhpet); 460268891Sjhb vatpic_cleanup(vm->vatpic); 461261088Sjhb vioapic_cleanup(vm->vioapic); 462261088Sjhb 463270071Sgrehan for (i = 0; i < VM_MAXCPU; i++) 464270071Sgrehan vcpu_cleanup(vm, i, destroy); 465221828Sgrehan 466270071Sgrehan VMCLEANUP(vm->cookie); 467241178Sneel 468270071Sgrehan if (destroy) { 469270071Sgrehan for (i = 0; i < vm->num_mem_segs; i++) 470270071Sgrehan vm_free_mem_seg(vm, &vm->mem_segs[i]); 471221828Sgrehan 472270071Sgrehan vm->num_mem_segs = 0; 473221828Sgrehan 474270071Sgrehan VMSPACE_FREE(vm->vmspace); 475270071Sgrehan vm->vmspace = NULL; 476270071Sgrehan } 477270071Sgrehan} 478221828Sgrehan 479270071Sgrehanvoid 480270071Sgrehanvm_destroy(struct vm *vm) 481270071Sgrehan{ 482270071Sgrehan vm_cleanup(vm, true); 483221828Sgrehan free(vm, M_VM); 484221828Sgrehan} 485221828Sgrehan 486270071Sgrehanint 487270071Sgrehanvm_reinit(struct vm *vm) 488270071Sgrehan{ 489270071Sgrehan int error; 490270071Sgrehan 491270071Sgrehan /* 492270071Sgrehan * A virtual machine can be reset only if all vcpus are suspended. 493270071Sgrehan */ 494270071Sgrehan if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 495270071Sgrehan vm_cleanup(vm, false); 496270071Sgrehan vm_init(vm, false); 497270071Sgrehan error = 0; 498270071Sgrehan } else { 499270071Sgrehan error = EBUSY; 500270071Sgrehan } 501270071Sgrehan 502270071Sgrehan return (error); 503270071Sgrehan} 504270071Sgrehan 505221828Sgrehanconst char * 506221828Sgrehanvm_name(struct vm *vm) 507221828Sgrehan{ 508221828Sgrehan return (vm->name); 509221828Sgrehan} 510221828Sgrehan 511221828Sgrehanint 512221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 513221828Sgrehan{ 514256072Sneel vm_object_t obj; 515221828Sgrehan 516256072Sneel if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) 517256072Sneel return (ENOMEM); 518256072Sneel else 519256072Sneel return (0); 520221828Sgrehan} 521221828Sgrehan 522221828Sgrehanint 523221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 524221828Sgrehan{ 525221828Sgrehan 526256072Sneel vmm_mmio_free(vm->vmspace, gpa, len); 527256072Sneel return (0); 528221828Sgrehan} 529221828Sgrehan 530256072Sneelboolean_t 531256072Sneelvm_mem_allocated(struct vm *vm, vm_paddr_t gpa) 532241041Sneel{ 533241041Sneel int i; 534241041Sneel vm_paddr_t gpabase, gpalimit; 535241041Sneel 536241041Sneel for (i = 0; i < vm->num_mem_segs; i++) { 537241041Sneel gpabase = vm->mem_segs[i].gpa; 538241041Sneel gpalimit = gpabase + vm->mem_segs[i].len; 539241041Sneel if (gpa >= gpabase && gpa < gpalimit) 540256072Sneel return (TRUE); /* 'gpa' is regular memory */ 541241041Sneel } 542241041Sneel 543256072Sneel if (ppt_is_mmio(vm, gpa)) 544256072Sneel return (TRUE); /* 'gpa' is pci passthru mmio */ 545256072Sneel 546256072Sneel return (FALSE); 547241041Sneel} 548241041Sneel 549221828Sgrehanint 550241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 551221828Sgrehan{ 552256072Sneel int available, allocated; 553256072Sneel struct mem_seg *seg; 554256072Sneel vm_object_t object; 555256072Sneel vm_paddr_t g; 556221828Sgrehan 557241041Sneel if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 558241041Sneel return (EINVAL); 559221828Sgrehan 560241041Sneel available = allocated = 0; 561241041Sneel g = gpa; 562241041Sneel while (g < gpa + len) { 563256072Sneel if (vm_mem_allocated(vm, g)) 564256072Sneel allocated++; 565256072Sneel else 566241041Sneel available++; 567241041Sneel 568241041Sneel g += PAGE_SIZE; 569241041Sneel } 570241041Sneel 571221828Sgrehan /* 572241041Sneel * If there are some allocated and some available pages in the address 573241041Sneel * range then it is an error. 574221828Sgrehan */ 575241041Sneel if (allocated && available) 576241041Sneel return (EINVAL); 577221828Sgrehan 578241041Sneel /* 579241041Sneel * If the entire address range being requested has already been 580241041Sneel * allocated then there isn't anything more to do. 581241041Sneel */ 582241041Sneel if (allocated && available == 0) 583241041Sneel return (0); 584241041Sneel 585221828Sgrehan if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 586221828Sgrehan return (E2BIG); 587221828Sgrehan 588241178Sneel seg = &vm->mem_segs[vm->num_mem_segs]; 589221828Sgrehan 590256072Sneel if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) 591256072Sneel return (ENOMEM); 592256072Sneel 593241178Sneel seg->gpa = gpa; 594256072Sneel seg->len = len; 595256072Sneel seg->object = object; 596256072Sneel seg->wired = FALSE; 597241178Sneel 598256072Sneel vm->num_mem_segs++; 599256072Sneel 600256072Sneel return (0); 601256072Sneel} 602256072Sneel 603270159Sgrehanstatic vm_paddr_t 604270159Sgrehanvm_maxmem(struct vm *vm) 605270159Sgrehan{ 606270159Sgrehan int i; 607270159Sgrehan vm_paddr_t gpa, maxmem; 608270159Sgrehan 609270159Sgrehan maxmem = 0; 610270159Sgrehan for (i = 0; i < vm->num_mem_segs; i++) { 611270159Sgrehan gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len; 612270159Sgrehan if (gpa > maxmem) 613270159Sgrehan maxmem = gpa; 614270159Sgrehan } 615270159Sgrehan return (maxmem); 616270159Sgrehan} 617270159Sgrehan 618256072Sneelstatic void 619256072Sneelvm_gpa_unwire(struct vm *vm) 620256072Sneel{ 621256072Sneel int i, rv; 622256072Sneel struct mem_seg *seg; 623256072Sneel 624256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 625256072Sneel seg = &vm->mem_segs[i]; 626256072Sneel if (!seg->wired) 627256072Sneel continue; 628256072Sneel 629256072Sneel rv = vm_map_unwire(&vm->vmspace->vm_map, 630256072Sneel seg->gpa, seg->gpa + seg->len, 631256072Sneel VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 632256072Sneel KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " 633256072Sneel "%#lx/%ld could not be unwired: %d", 634256072Sneel vm_name(vm), seg->gpa, seg->len, rv)); 635256072Sneel 636256072Sneel seg->wired = FALSE; 637256072Sneel } 638256072Sneel} 639256072Sneel 640256072Sneelstatic int 641256072Sneelvm_gpa_wire(struct vm *vm) 642256072Sneel{ 643256072Sneel int i, rv; 644256072Sneel struct mem_seg *seg; 645256072Sneel 646256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 647256072Sneel seg = &vm->mem_segs[i]; 648256072Sneel if (seg->wired) 649256072Sneel continue; 650256072Sneel 651256072Sneel /* XXX rlimits? */ 652256072Sneel rv = vm_map_wire(&vm->vmspace->vm_map, 653256072Sneel seg->gpa, seg->gpa + seg->len, 654256072Sneel VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 655256072Sneel if (rv != KERN_SUCCESS) 656241178Sneel break; 657241178Sneel 658256072Sneel seg->wired = TRUE; 659256072Sneel } 660256072Sneel 661256072Sneel if (i < vm->num_mem_segs) { 662241362Sneel /* 663256072Sneel * Undo the wiring before returning an error. 664241362Sneel */ 665256072Sneel vm_gpa_unwire(vm); 666256072Sneel return (EAGAIN); 667256072Sneel } 668241178Sneel 669256072Sneel return (0); 670256072Sneel} 671256072Sneel 672256072Sneelstatic void 673256072Sneelvm_iommu_modify(struct vm *vm, boolean_t map) 674256072Sneel{ 675256072Sneel int i, sz; 676256072Sneel vm_paddr_t gpa, hpa; 677256072Sneel struct mem_seg *seg; 678256072Sneel void *vp, *cookie, *host_domain; 679256072Sneel 680256072Sneel sz = PAGE_SIZE; 681256072Sneel host_domain = iommu_host_domain(); 682256072Sneel 683256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 684256072Sneel seg = &vm->mem_segs[i]; 685256072Sneel KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", 686256072Sneel vm_name(vm), seg->gpa, seg->len)); 687256072Sneel 688256072Sneel gpa = seg->gpa; 689256072Sneel while (gpa < seg->gpa + seg->len) { 690256072Sneel vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, 691256072Sneel &cookie); 692256072Sneel KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", 693256072Sneel vm_name(vm), gpa)); 694256072Sneel 695256072Sneel vm_gpa_release(cookie); 696256072Sneel 697256072Sneel hpa = DMAP_TO_PHYS((uintptr_t)vp); 698256072Sneel if (map) { 699256072Sneel iommu_create_mapping(vm->iommu, gpa, hpa, sz); 700256072Sneel iommu_remove_mapping(host_domain, hpa, sz); 701256072Sneel } else { 702256072Sneel iommu_remove_mapping(vm->iommu, gpa, sz); 703256072Sneel iommu_create_mapping(host_domain, hpa, hpa, sz); 704256072Sneel } 705256072Sneel 706256072Sneel gpa += PAGE_SIZE; 707256072Sneel } 708241178Sneel } 709241178Sneel 710256072Sneel /* 711256072Sneel * Invalidate the cached translations associated with the domain 712256072Sneel * from which pages were removed. 713256072Sneel */ 714256072Sneel if (map) 715256072Sneel iommu_invalidate_tlb(host_domain); 716256072Sneel else 717256072Sneel iommu_invalidate_tlb(vm->iommu); 718256072Sneel} 719256072Sneel 720256072Sneel#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) 721256072Sneel#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) 722256072Sneel 723256072Sneelint 724256072Sneelvm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) 725256072Sneel{ 726256072Sneel int error; 727256072Sneel 728256072Sneel error = ppt_unassign_device(vm, bus, slot, func); 729256072Sneel if (error) 730221828Sgrehan return (error); 731256072Sneel 732267070Sjhb if (ppt_assigned_devices(vm) == 0) { 733256072Sneel vm_iommu_unmap(vm); 734256072Sneel vm_gpa_unwire(vm); 735221828Sgrehan } 736256072Sneel return (0); 737256072Sneel} 738221828Sgrehan 739256072Sneelint 740256072Sneelvm_assign_pptdev(struct vm *vm, int bus, int slot, int func) 741256072Sneel{ 742256072Sneel int error; 743256072Sneel vm_paddr_t maxaddr; 744256072Sneel 745241362Sneel /* 746256072Sneel * Virtual machines with pci passthru devices get special treatment: 747256072Sneel * - the guest physical memory is wired 748256072Sneel * - the iommu is programmed to do the 'gpa' to 'hpa' translation 749256072Sneel * 750256072Sneel * We need to do this before the first pci passthru device is attached. 751241362Sneel */ 752267070Sjhb if (ppt_assigned_devices(vm) == 0) { 753256072Sneel KASSERT(vm->iommu == NULL, 754256072Sneel ("vm_assign_pptdev: iommu must be NULL")); 755270159Sgrehan maxaddr = vm_maxmem(vm); 756256072Sneel vm->iommu = iommu_create_domain(maxaddr); 757241362Sneel 758256072Sneel error = vm_gpa_wire(vm); 759256072Sneel if (error) 760256072Sneel return (error); 761241041Sneel 762256072Sneel vm_iommu_map(vm); 763256072Sneel } 764256072Sneel 765256072Sneel error = ppt_assign_device(vm, bus, slot, func); 766256072Sneel return (error); 767221828Sgrehan} 768221828Sgrehan 769256072Sneelvoid * 770256072Sneelvm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, 771256072Sneel void **cookie) 772221828Sgrehan{ 773256072Sneel int count, pageoff; 774256072Sneel vm_page_t m; 775221828Sgrehan 776256072Sneel pageoff = gpa & PAGE_MASK; 777256072Sneel if (len > PAGE_SIZE - pageoff) 778256072Sneel panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); 779241148Sneel 780256072Sneel count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, 781256072Sneel trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); 782256072Sneel 783256072Sneel if (count == 1) { 784256072Sneel *cookie = m; 785256072Sneel return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); 786256072Sneel } else { 787256072Sneel *cookie = NULL; 788256072Sneel return (NULL); 789256072Sneel } 790221828Sgrehan} 791221828Sgrehan 792256072Sneelvoid 793256072Sneelvm_gpa_release(void *cookie) 794256072Sneel{ 795256072Sneel vm_page_t m = cookie; 796256072Sneel 797256072Sneel vm_page_lock(m); 798256072Sneel vm_page_unhold(m); 799256072Sneel vm_page_unlock(m); 800256072Sneel} 801256072Sneel 802221828Sgrehanint 803221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 804221828Sgrehan struct vm_memory_segment *seg) 805221828Sgrehan{ 806221828Sgrehan int i; 807221828Sgrehan 808221828Sgrehan for (i = 0; i < vm->num_mem_segs; i++) { 809221828Sgrehan if (gpabase == vm->mem_segs[i].gpa) { 810256072Sneel seg->gpa = vm->mem_segs[i].gpa; 811256072Sneel seg->len = vm->mem_segs[i].len; 812256072Sneel seg->wired = vm->mem_segs[i].wired; 813221828Sgrehan return (0); 814221828Sgrehan } 815221828Sgrehan } 816221828Sgrehan return (-1); 817221828Sgrehan} 818221828Sgrehan 819221828Sgrehanint 820256072Sneelvm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 821256072Sneel vm_offset_t *offset, struct vm_object **object) 822256072Sneel{ 823256072Sneel int i; 824256072Sneel size_t seg_len; 825256072Sneel vm_paddr_t seg_gpa; 826256072Sneel vm_object_t seg_obj; 827256072Sneel 828256072Sneel for (i = 0; i < vm->num_mem_segs; i++) { 829256072Sneel if ((seg_obj = vm->mem_segs[i].object) == NULL) 830256072Sneel continue; 831256072Sneel 832256072Sneel seg_gpa = vm->mem_segs[i].gpa; 833256072Sneel seg_len = vm->mem_segs[i].len; 834256072Sneel 835256072Sneel if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { 836256072Sneel *offset = gpa - seg_gpa; 837256072Sneel *object = seg_obj; 838256072Sneel vm_object_reference(seg_obj); 839256072Sneel return (0); 840256072Sneel } 841256072Sneel } 842256072Sneel 843256072Sneel return (EINVAL); 844256072Sneel} 845256072Sneel 846256072Sneelint 847221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 848221828Sgrehan{ 849221828Sgrehan 850221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 851221828Sgrehan return (EINVAL); 852221828Sgrehan 853221828Sgrehan if (reg >= VM_REG_LAST) 854221828Sgrehan return (EINVAL); 855221828Sgrehan 856221828Sgrehan return (VMGETREG(vm->cookie, vcpu, reg, retval)); 857221828Sgrehan} 858221828Sgrehan 859221828Sgrehanint 860284894Sneelvm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) 861221828Sgrehan{ 862284894Sneel struct vcpu *vcpu; 863284894Sneel int error; 864221828Sgrehan 865284894Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 866221828Sgrehan return (EINVAL); 867221828Sgrehan 868221828Sgrehan if (reg >= VM_REG_LAST) 869221828Sgrehan return (EINVAL); 870221828Sgrehan 871284894Sneel error = VMSETREG(vm->cookie, vcpuid, reg, val); 872284894Sneel if (error || reg != VM_REG_GUEST_RIP) 873284894Sneel return (error); 874284894Sneel 875284894Sneel /* Set 'nextrip' to match the value of %rip */ 876284894Sneel VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val); 877284894Sneel vcpu = &vm->vcpu[vcpuid]; 878284894Sneel vcpu->nextrip = val; 879284894Sneel return (0); 880221828Sgrehan} 881221828Sgrehan 882221828Sgrehanstatic boolean_t 883221828Sgrehanis_descriptor_table(int reg) 884221828Sgrehan{ 885221828Sgrehan 886221828Sgrehan switch (reg) { 887221828Sgrehan case VM_REG_GUEST_IDTR: 888221828Sgrehan case VM_REG_GUEST_GDTR: 889221828Sgrehan return (TRUE); 890221828Sgrehan default: 891221828Sgrehan return (FALSE); 892221828Sgrehan } 893221828Sgrehan} 894221828Sgrehan 895221828Sgrehanstatic boolean_t 896221828Sgrehanis_segment_register(int reg) 897221828Sgrehan{ 898221828Sgrehan 899221828Sgrehan switch (reg) { 900221828Sgrehan case VM_REG_GUEST_ES: 901221828Sgrehan case VM_REG_GUEST_CS: 902221828Sgrehan case VM_REG_GUEST_SS: 903221828Sgrehan case VM_REG_GUEST_DS: 904221828Sgrehan case VM_REG_GUEST_FS: 905221828Sgrehan case VM_REG_GUEST_GS: 906221828Sgrehan case VM_REG_GUEST_TR: 907221828Sgrehan case VM_REG_GUEST_LDTR: 908221828Sgrehan return (TRUE); 909221828Sgrehan default: 910221828Sgrehan return (FALSE); 911221828Sgrehan } 912221828Sgrehan} 913221828Sgrehan 914221828Sgrehanint 915221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg, 916221828Sgrehan struct seg_desc *desc) 917221828Sgrehan{ 918221828Sgrehan 919221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 920221828Sgrehan return (EINVAL); 921221828Sgrehan 922221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 923221828Sgrehan return (EINVAL); 924221828Sgrehan 925221828Sgrehan return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 926221828Sgrehan} 927221828Sgrehan 928221828Sgrehanint 929221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg, 930221828Sgrehan struct seg_desc *desc) 931221828Sgrehan{ 932221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 933221828Sgrehan return (EINVAL); 934221828Sgrehan 935221828Sgrehan if (!is_segment_register(reg) && !is_descriptor_table(reg)) 936221828Sgrehan return (EINVAL); 937221828Sgrehan 938221828Sgrehan return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 939221828Sgrehan} 940221828Sgrehan 941221828Sgrehanstatic void 942221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu) 943221828Sgrehan{ 944221828Sgrehan 945234695Sgrehan /* flush host state to the pcb */ 946234695Sgrehan fpuexit(curthread); 947242122Sneel 948242122Sneel /* restore guest FPU state */ 949221828Sgrehan fpu_stop_emulating(); 950234695Sgrehan fpurestore(vcpu->guestfpu); 951242122Sneel 952267427Sjhb /* restore guest XCR0 if XSAVE is enabled in the host */ 953267427Sjhb if (rcr4() & CR4_XSAVE) 954267427Sjhb load_xcr(0, vcpu->guest_xcr0); 955267427Sjhb 956242122Sneel /* 957242122Sneel * The FPU is now "dirty" with the guest's state so turn on emulation 958242122Sneel * to trap any access to the FPU by the host. 959242122Sneel */ 960242122Sneel fpu_start_emulating(); 961221828Sgrehan} 962221828Sgrehan 963221828Sgrehanstatic void 964221828Sgrehansave_guest_fpustate(struct vcpu *vcpu) 965221828Sgrehan{ 966221828Sgrehan 967242122Sneel if ((rcr0() & CR0_TS) == 0) 968242122Sneel panic("fpu emulation not enabled in host!"); 969242122Sneel 970267427Sjhb /* save guest XCR0 and restore host XCR0 */ 971267427Sjhb if (rcr4() & CR4_XSAVE) { 972267427Sjhb vcpu->guest_xcr0 = rxcr(0); 973267427Sjhb load_xcr(0, vmm_get_host_xcr0()); 974267427Sjhb } 975267427Sjhb 976242122Sneel /* save guest FPU state */ 977242122Sneel fpu_stop_emulating(); 978234695Sgrehan fpusave(vcpu->guestfpu); 979221828Sgrehan fpu_start_emulating(); 980221828Sgrehan} 981221828Sgrehan 982248389Sneelstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 983242065Sneel 984256072Sneelstatic int 985266393Sjhbvcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, 986266393Sjhb bool from_idle) 987256072Sneel{ 988256072Sneel int error; 989256072Sneel 990256072Sneel vcpu_assert_locked(vcpu); 991256072Sneel 992256072Sneel /* 993266393Sjhb * State transitions from the vmmdev_ioctl() must always begin from 994266393Sjhb * the VCPU_IDLE state. This guarantees that there is only a single 995266393Sjhb * ioctl() operating on a vcpu at any point. 996266393Sjhb */ 997266393Sjhb if (from_idle) { 998266393Sjhb while (vcpu->state != VCPU_IDLE) 999266393Sjhb msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); 1000266393Sjhb } else { 1001266393Sjhb KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " 1002266393Sjhb "vcpu idle state")); 1003266393Sjhb } 1004266393Sjhb 1005266393Sjhb if (vcpu->state == VCPU_RUNNING) { 1006266393Sjhb KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " 1007266393Sjhb "mismatch for running vcpu", curcpu, vcpu->hostcpu)); 1008266393Sjhb } else { 1009266393Sjhb KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " 1010266393Sjhb "vcpu that is not running", vcpu->hostcpu)); 1011266393Sjhb } 1012266393Sjhb 1013266393Sjhb /* 1014256072Sneel * The following state transitions are allowed: 1015256072Sneel * IDLE -> FROZEN -> IDLE 1016256072Sneel * FROZEN -> RUNNING -> FROZEN 1017256072Sneel * FROZEN -> SLEEPING -> FROZEN 1018256072Sneel */ 1019256072Sneel switch (vcpu->state) { 1020256072Sneel case VCPU_IDLE: 1021256072Sneel case VCPU_RUNNING: 1022256072Sneel case VCPU_SLEEPING: 1023256072Sneel error = (newstate != VCPU_FROZEN); 1024256072Sneel break; 1025256072Sneel case VCPU_FROZEN: 1026256072Sneel error = (newstate == VCPU_FROZEN); 1027256072Sneel break; 1028256072Sneel default: 1029256072Sneel error = 1; 1030256072Sneel break; 1031256072Sneel } 1032256072Sneel 1033266393Sjhb if (error) 1034266393Sjhb return (EBUSY); 1035266393Sjhb 1036266393Sjhb vcpu->state = newstate; 1037266393Sjhb if (newstate == VCPU_RUNNING) 1038266393Sjhb vcpu->hostcpu = curcpu; 1039256072Sneel else 1040266393Sjhb vcpu->hostcpu = NOCPU; 1041256072Sneel 1042266393Sjhb if (newstate == VCPU_IDLE) 1043266393Sjhb wakeup(&vcpu->state); 1044266393Sjhb 1045266393Sjhb return (0); 1046256072Sneel} 1047256072Sneel 1048256072Sneelstatic void 1049256072Sneelvcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) 1050256072Sneel{ 1051256072Sneel int error; 1052256072Sneel 1053266393Sjhb if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) 1054256072Sneel panic("Error %d setting state to %d\n", error, newstate); 1055256072Sneel} 1056256072Sneel 1057256072Sneelstatic void 1058256072Sneelvcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) 1059256072Sneel{ 1060256072Sneel int error; 1061256072Sneel 1062266393Sjhb if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) 1063256072Sneel panic("Error %d setting state to %d", error, newstate); 1064256072Sneel} 1065256072Sneel 1066266339Sjhbstatic void 1067266339Sjhbvm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func) 1068266339Sjhb{ 1069266339Sjhb 1070266339Sjhb KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked")); 1071266339Sjhb 1072266339Sjhb /* 1073266339Sjhb * Update 'rendezvous_func' and execute a write memory barrier to 1074266339Sjhb * ensure that it is visible across all host cpus. This is not needed 1075266339Sjhb * for correctness but it does ensure that all the vcpus will notice 1076266339Sjhb * that the rendezvous is requested immediately. 1077266339Sjhb */ 1078266339Sjhb vm->rendezvous_func = func; 1079266339Sjhb wmb(); 1080266339Sjhb} 1081266339Sjhb 1082266339Sjhb#define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ 1083266339Sjhb do { \ 1084266339Sjhb if (vcpuid >= 0) \ 1085266339Sjhb VCPU_CTR0(vm, vcpuid, fmt); \ 1086266339Sjhb else \ 1087266339Sjhb VM_CTR0(vm, fmt); \ 1088266339Sjhb } while (0) 1089266339Sjhb 1090266339Sjhbstatic void 1091266339Sjhbvm_handle_rendezvous(struct vm *vm, int vcpuid) 1092266339Sjhb{ 1093266339Sjhb 1094266339Sjhb KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 1095266339Sjhb ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); 1096266339Sjhb 1097266339Sjhb mtx_lock(&vm->rendezvous_mtx); 1098266339Sjhb while (vm->rendezvous_func != NULL) { 1099266339Sjhb /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ 1100266339Sjhb CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); 1101266339Sjhb 1102266339Sjhb if (vcpuid != -1 && 1103266339Sjhb CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && 1104266339Sjhb !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { 1105266339Sjhb VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); 1106266339Sjhb (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); 1107266339Sjhb CPU_SET(vcpuid, &vm->rendezvous_done_cpus); 1108266339Sjhb } 1109266339Sjhb if (CPU_CMP(&vm->rendezvous_req_cpus, 1110266339Sjhb &vm->rendezvous_done_cpus) == 0) { 1111266339Sjhb VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); 1112266339Sjhb vm_set_rendezvous_func(vm, NULL); 1113266339Sjhb wakeup(&vm->rendezvous_func); 1114266339Sjhb break; 1115266339Sjhb } 1116266339Sjhb RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); 1117266339Sjhb mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, 1118266339Sjhb "vmrndv", 0); 1119266339Sjhb } 1120266339Sjhb mtx_unlock(&vm->rendezvous_mtx); 1121266339Sjhb} 1122266339Sjhb 1123256072Sneel/* 1124256072Sneel * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. 1125256072Sneel */ 1126256072Sneelstatic int 1127262350Sjhbvm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) 1128256072Sneel{ 1129256072Sneel struct vcpu *vcpu; 1130268935Sjhb const char *wmesg; 1131284894Sneel int t, vcpu_halted, vm_halted; 1132256072Sneel 1133268935Sjhb KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); 1134268935Sjhb 1135256072Sneel vcpu = &vm->vcpu[vcpuid]; 1136268935Sjhb vcpu_halted = 0; 1137268935Sjhb vm_halted = 0; 1138256072Sneel 1139256072Sneel vcpu_lock(vcpu); 1140268935Sjhb while (1) { 1141268935Sjhb /* 1142268935Sjhb * Do a final check for pending NMI or interrupts before 1143268935Sjhb * really putting this thread to sleep. Also check for 1144268935Sjhb * software events that would cause this vcpu to wakeup. 1145268935Sjhb * 1146268935Sjhb * These interrupts/events could have happened after the 1147268935Sjhb * vcpu returned from VMRUN() and before it acquired the 1148268935Sjhb * vcpu lock above. 1149268935Sjhb */ 1150268935Sjhb if (vm->rendezvous_func != NULL || vm->suspend) 1151268935Sjhb break; 1152268935Sjhb if (vm_nmi_pending(vm, vcpuid)) 1153268935Sjhb break; 1154268935Sjhb if (!intr_disabled) { 1155268935Sjhb if (vm_extint_pending(vm, vcpuid) || 1156268935Sjhb vlapic_pending_intr(vcpu->vlapic, NULL)) { 1157268935Sjhb break; 1158268935Sjhb } 1159268935Sjhb } 1160256072Sneel 1161270159Sgrehan /* Don't go to sleep if the vcpu thread needs to yield */ 1162270159Sgrehan if (vcpu_should_yield(vm, vcpuid)) 1163270159Sgrehan break; 1164270159Sgrehan 1165268935Sjhb /* 1166268935Sjhb * Some Linux guests implement "halt" by having all vcpus 1167268935Sjhb * execute HLT with interrupts disabled. 'halted_cpus' keeps 1168268935Sjhb * track of the vcpus that have entered this state. When all 1169268935Sjhb * vcpus enter the halted state the virtual machine is halted. 1170268935Sjhb */ 1171268935Sjhb if (intr_disabled) { 1172268935Sjhb wmesg = "vmhalt"; 1173268935Sjhb VCPU_CTR0(vm, vcpuid, "Halted"); 1174268935Sjhb if (!vcpu_halted && halt_detection_enabled) { 1175268935Sjhb vcpu_halted = 1; 1176268935Sjhb CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); 1177268935Sjhb } 1178268935Sjhb if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { 1179268935Sjhb vm_halted = 1; 1180268935Sjhb break; 1181268935Sjhb } 1182268935Sjhb } else { 1183268935Sjhb wmesg = "vmidle"; 1184268935Sjhb } 1185268935Sjhb 1186256072Sneel t = ticks; 1187256072Sneel vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1188270159Sgrehan /* 1189270159Sgrehan * XXX msleep_spin() cannot be interrupted by signals so 1190270159Sgrehan * wake up periodically to check pending signals. 1191270159Sgrehan */ 1192270159Sgrehan msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); 1193256072Sneel vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1194256072Sneel vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 1195256072Sneel } 1196268935Sjhb 1197268935Sjhb if (vcpu_halted) 1198268935Sjhb CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); 1199268935Sjhb 1200256072Sneel vcpu_unlock(vcpu); 1201256072Sneel 1202268935Sjhb if (vm_halted) 1203268935Sjhb vm_suspend(vm, VM_SUSPEND_HALT); 1204266339Sjhb 1205256072Sneel return (0); 1206256072Sneel} 1207256072Sneel 1208256072Sneelstatic int 1209262350Sjhbvm_handle_paging(struct vm *vm, int vcpuid, bool *retu) 1210256072Sneel{ 1211256072Sneel int rv, ftype; 1212256072Sneel struct vm_map *map; 1213256072Sneel struct vcpu *vcpu; 1214256072Sneel struct vm_exit *vme; 1215256072Sneel 1216256072Sneel vcpu = &vm->vcpu[vcpuid]; 1217256072Sneel vme = &vcpu->exitinfo; 1218256072Sneel 1219284894Sneel KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", 1220284894Sneel __func__, vme->inst_length)); 1221284894Sneel 1222256072Sneel ftype = vme->u.paging.fault_type; 1223256072Sneel KASSERT(ftype == VM_PROT_READ || 1224256072Sneel ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, 1225256072Sneel ("vm_handle_paging: invalid fault_type %d", ftype)); 1226256072Sneel 1227256072Sneel if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { 1228256072Sneel rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), 1229256072Sneel vme->u.paging.gpa, ftype); 1230276349Sneel if (rv == 0) { 1231276349Sneel VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx", 1232276349Sneel ftype == VM_PROT_READ ? "accessed" : "dirty", 1233276349Sneel vme->u.paging.gpa); 1234256072Sneel goto done; 1235276349Sneel } 1236256072Sneel } 1237256072Sneel 1238256072Sneel map = &vm->vmspace->vm_map; 1239256072Sneel rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); 1240256072Sneel 1241261088Sjhb VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " 1242261088Sjhb "ftype = %d", rv, vme->u.paging.gpa, ftype); 1243256072Sneel 1244256072Sneel if (rv != KERN_SUCCESS) 1245256072Sneel return (EFAULT); 1246256072Sneeldone: 1247256072Sneel return (0); 1248256072Sneel} 1249256072Sneel 1250256072Sneelstatic int 1251262350Sjhbvm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) 1252256072Sneel{ 1253256072Sneel struct vie *vie; 1254256072Sneel struct vcpu *vcpu; 1255256072Sneel struct vm_exit *vme; 1256284899Sneel uint64_t gla, gpa, cs_base; 1257268976Sjhb struct vm_guest_paging *paging; 1258261088Sjhb mem_region_read_t mread; 1259261088Sjhb mem_region_write_t mwrite; 1260270159Sgrehan enum vm_cpu_mode cpu_mode; 1261276403Sneel int cs_d, error, length; 1262256072Sneel 1263256072Sneel vcpu = &vm->vcpu[vcpuid]; 1264256072Sneel vme = &vcpu->exitinfo; 1265256072Sneel 1266256072Sneel gla = vme->u.inst_emul.gla; 1267256072Sneel gpa = vme->u.inst_emul.gpa; 1268284899Sneel cs_base = vme->u.inst_emul.cs_base; 1269270159Sgrehan cs_d = vme->u.inst_emul.cs_d; 1270256072Sneel vie = &vme->u.inst_emul.vie; 1271268976Sjhb paging = &vme->u.inst_emul.paging; 1272270159Sgrehan cpu_mode = paging->cpu_mode; 1273256072Sneel 1274276349Sneel VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa); 1275276349Sneel 1276256072Sneel /* Fetch, decode and emulate the faulting instruction */ 1277276403Sneel if (vie->num_valid == 0) { 1278276403Sneel /* 1279276403Sneel * If the instruction length is not known then assume a 1280276403Sneel * maximum size instruction. 1281276403Sneel */ 1282276403Sneel length = vme->inst_length ? vme->inst_length : VIE_INST_SIZE; 1283284899Sneel error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip + 1284284899Sneel cs_base, length, vie); 1285276403Sneel } else { 1286276403Sneel /* 1287276403Sneel * The instruction bytes have already been copied into 'vie' 1288276403Sneel */ 1289276403Sneel error = 0; 1290276403Sneel } 1291268976Sjhb if (error == 1) 1292268976Sjhb return (0); /* Resume guest to handle page fault */ 1293268976Sjhb else if (error == -1) 1294256072Sneel return (EFAULT); 1295268976Sjhb else if (error != 0) 1296268976Sjhb panic("%s: vmm_fetch_instruction error %d", __func__, error); 1297256072Sneel 1298270159Sgrehan if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) 1299256072Sneel return (EFAULT); 1300256072Sneel 1301276403Sneel /* 1302284894Sneel * If the instruction length was not specified then update it now 1303284894Sneel * along with 'nextrip'. 1304276403Sneel */ 1305284894Sneel if (vme->inst_length == 0) { 1306276403Sneel vme->inst_length = vie->num_processed; 1307284894Sneel vcpu->nextrip += vie->num_processed; 1308284894Sneel } 1309276403Sneel 1310261088Sjhb /* return to userland unless this is an in-kernel emulated device */ 1311261088Sjhb if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 1312261088Sjhb mread = lapic_mmio_read; 1313261088Sjhb mwrite = lapic_mmio_write; 1314261088Sjhb } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 1315261088Sjhb mread = vioapic_mmio_read; 1316261088Sjhb mwrite = vioapic_mmio_write; 1317261088Sjhb } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { 1318261088Sjhb mread = vhpet_mmio_read; 1319261088Sjhb mwrite = vhpet_mmio_write; 1320261088Sjhb } else { 1321262350Sjhb *retu = true; 1322256072Sneel return (0); 1323256072Sneel } 1324256072Sneel 1325270159Sgrehan error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, 1326270159Sgrehan mread, mwrite, retu); 1327256072Sneel 1328256072Sneel return (error); 1329256072Sneel} 1330256072Sneel 1331268935Sjhbstatic int 1332268935Sjhbvm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) 1333268935Sjhb{ 1334268935Sjhb int i, done; 1335268935Sjhb struct vcpu *vcpu; 1336268935Sjhb 1337268935Sjhb done = 0; 1338268935Sjhb vcpu = &vm->vcpu[vcpuid]; 1339268935Sjhb 1340268935Sjhb CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); 1341268935Sjhb 1342268935Sjhb /* 1343268935Sjhb * Wait until all 'active_cpus' have suspended themselves. 1344268935Sjhb * 1345268935Sjhb * Since a VM may be suspended at any time including when one or 1346268935Sjhb * more vcpus are doing a rendezvous we need to call the rendezvous 1347268935Sjhb * handler while we are waiting to prevent a deadlock. 1348268935Sjhb */ 1349268935Sjhb vcpu_lock(vcpu); 1350268935Sjhb while (1) { 1351268935Sjhb if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { 1352268935Sjhb VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); 1353268935Sjhb break; 1354268935Sjhb } 1355268935Sjhb 1356268935Sjhb if (vm->rendezvous_func == NULL) { 1357268935Sjhb VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); 1358268935Sjhb vcpu_require_state_locked(vcpu, VCPU_SLEEPING); 1359268935Sjhb msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); 1360268935Sjhb vcpu_require_state_locked(vcpu, VCPU_FROZEN); 1361268935Sjhb } else { 1362268935Sjhb VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); 1363268935Sjhb vcpu_unlock(vcpu); 1364268935Sjhb vm_handle_rendezvous(vm, vcpuid); 1365268935Sjhb vcpu_lock(vcpu); 1366268935Sjhb } 1367268935Sjhb } 1368268935Sjhb vcpu_unlock(vcpu); 1369268935Sjhb 1370268935Sjhb /* 1371268935Sjhb * Wakeup the other sleeping vcpus and return to userspace. 1372268935Sjhb */ 1373268935Sjhb for (i = 0; i < VM_MAXCPU; i++) { 1374268935Sjhb if (CPU_ISSET(i, &vm->suspended_cpus)) { 1375268935Sjhb vcpu_notify_event(vm, i, false); 1376268935Sjhb } 1377268935Sjhb } 1378268935Sjhb 1379268935Sjhb *retu = true; 1380268935Sjhb return (0); 1381268935Sjhb} 1382268935Sjhb 1383221828Sgrehanint 1384268935Sjhbvm_suspend(struct vm *vm, enum vm_suspend_how how) 1385268935Sjhb{ 1386268935Sjhb int i; 1387268935Sjhb 1388268935Sjhb if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) 1389268935Sjhb return (EINVAL); 1390268935Sjhb 1391268935Sjhb if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { 1392268935Sjhb VM_CTR2(vm, "virtual machine already suspended %d/%d", 1393268935Sjhb vm->suspend, how); 1394268935Sjhb return (EALREADY); 1395268935Sjhb } 1396268935Sjhb 1397268935Sjhb VM_CTR1(vm, "virtual machine successfully suspended %d", how); 1398268935Sjhb 1399268935Sjhb /* 1400268935Sjhb * Notify all active vcpus that they are now suspended. 1401268935Sjhb */ 1402268935Sjhb for (i = 0; i < VM_MAXCPU; i++) { 1403268935Sjhb if (CPU_ISSET(i, &vm->active_cpus)) 1404268935Sjhb vcpu_notify_event(vm, i, false); 1405268935Sjhb } 1406268935Sjhb 1407268935Sjhb return (0); 1408268935Sjhb} 1409268935Sjhb 1410268935Sjhbvoid 1411268935Sjhbvm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) 1412268935Sjhb{ 1413268935Sjhb struct vm_exit *vmexit; 1414268935Sjhb 1415268935Sjhb KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, 1416268935Sjhb ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); 1417268935Sjhb 1418268935Sjhb vmexit = vm_exitinfo(vm, vcpuid); 1419268935Sjhb vmexit->rip = rip; 1420268935Sjhb vmexit->inst_length = 0; 1421268935Sjhb vmexit->exitcode = VM_EXITCODE_SUSPENDED; 1422268935Sjhb vmexit->u.suspended.how = vm->suspend; 1423268935Sjhb} 1424268935Sjhb 1425270074Sgrehanvoid 1426270074Sgrehanvm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) 1427270074Sgrehan{ 1428270074Sgrehan struct vm_exit *vmexit; 1429270074Sgrehan 1430270074Sgrehan KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress")); 1431270074Sgrehan 1432270074Sgrehan vmexit = vm_exitinfo(vm, vcpuid); 1433270074Sgrehan vmexit->rip = rip; 1434270074Sgrehan vmexit->inst_length = 0; 1435270074Sgrehan vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; 1436270074Sgrehan vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1); 1437270074Sgrehan} 1438270074Sgrehan 1439270074Sgrehanvoid 1440270074Sgrehanvm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) 1441270074Sgrehan{ 1442270074Sgrehan struct vm_exit *vmexit; 1443270074Sgrehan 1444270074Sgrehan vmexit = vm_exitinfo(vm, vcpuid); 1445270074Sgrehan vmexit->rip = rip; 1446270074Sgrehan vmexit->inst_length = 0; 1447270074Sgrehan vmexit->exitcode = VM_EXITCODE_BOGUS; 1448270074Sgrehan vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1); 1449270074Sgrehan} 1450270074Sgrehan 1451268935Sjhbint 1452221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun) 1453221828Sgrehan{ 1454256072Sneel int error, vcpuid; 1455221828Sgrehan struct vcpu *vcpu; 1456221828Sgrehan struct pcb *pcb; 1457284894Sneel uint64_t tscval; 1458242065Sneel struct vm_exit *vme; 1459262350Sjhb bool retu, intr_disabled; 1460256072Sneel pmap_t pmap; 1461268935Sjhb void *rptr, *sptr; 1462221828Sgrehan 1463221828Sgrehan vcpuid = vmrun->cpuid; 1464221828Sgrehan 1465221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1466221828Sgrehan return (EINVAL); 1467221828Sgrehan 1468270070Sgrehan if (!CPU_ISSET(vcpuid, &vm->active_cpus)) 1469270070Sgrehan return (EINVAL); 1470270070Sgrehan 1471270070Sgrehan if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) 1472270070Sgrehan return (EINVAL); 1473270070Sgrehan 1474268935Sjhb rptr = &vm->rendezvous_func; 1475268935Sjhb sptr = &vm->suspend; 1476256072Sneel pmap = vmspace_pmap(vm->vmspace); 1477221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 1478256072Sneel vme = &vcpu->exitinfo; 1479242065Sneelrestart: 1480221828Sgrehan critical_enter(); 1481221828Sgrehan 1482256072Sneel KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), 1483256072Sneel ("vm_run: absurd pm_active")); 1484256072Sneel 1485221828Sgrehan tscval = rdtsc(); 1486221828Sgrehan 1487221828Sgrehan pcb = PCPU_GET(curpcb); 1488221914Sjhb set_pcb_flags(pcb, PCB_FULL_IRET); 1489221828Sgrehan 1490221828Sgrehan restore_guest_fpustate(vcpu); 1491241489Sneel 1492256072Sneel vcpu_require_state(vm, vcpuid, VCPU_RUNNING); 1493284894Sneel error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr); 1494256072Sneel vcpu_require_state(vm, vcpuid, VCPU_FROZEN); 1495241489Sneel 1496221828Sgrehan save_guest_fpustate(vcpu); 1497221828Sgrehan 1498221828Sgrehan vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 1499221828Sgrehan 1500221828Sgrehan critical_exit(); 1501221828Sgrehan 1502256072Sneel if (error == 0) { 1503262350Sjhb retu = false; 1504284894Sneel vcpu->nextrip = vme->rip + vme->inst_length; 1505256072Sneel switch (vme->exitcode) { 1506268935Sjhb case VM_EXITCODE_SUSPENDED: 1507268935Sjhb error = vm_handle_suspend(vm, vcpuid, &retu); 1508268935Sjhb break; 1509266339Sjhb case VM_EXITCODE_IOAPIC_EOI: 1510266339Sjhb vioapic_process_eoi(vm, vcpuid, 1511266339Sjhb vme->u.ioapic_eoi.vector); 1512266339Sjhb break; 1513266339Sjhb case VM_EXITCODE_RENDEZVOUS: 1514266339Sjhb vm_handle_rendezvous(vm, vcpuid); 1515266339Sjhb error = 0; 1516266339Sjhb break; 1517256072Sneel case VM_EXITCODE_HLT: 1518262350Sjhb intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); 1519262350Sjhb error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); 1520256072Sneel break; 1521256072Sneel case VM_EXITCODE_PAGING: 1522256072Sneel error = vm_handle_paging(vm, vcpuid, &retu); 1523256072Sneel break; 1524256072Sneel case VM_EXITCODE_INST_EMUL: 1525256072Sneel error = vm_handle_inst_emul(vm, vcpuid, &retu); 1526256072Sneel break; 1527268976Sjhb case VM_EXITCODE_INOUT: 1528268976Sjhb case VM_EXITCODE_INOUT_STR: 1529268976Sjhb error = vm_handle_inout(vm, vcpuid, vme, &retu); 1530268976Sjhb break; 1531276349Sneel case VM_EXITCODE_MONITOR: 1532276349Sneel case VM_EXITCODE_MWAIT: 1533276349Sneel vm_inject_ud(vm, vcpuid); 1534276349Sneel break; 1535256072Sneel default: 1536262350Sjhb retu = true; /* handled in userland */ 1537256072Sneel break; 1538242065Sneel } 1539256072Sneel } 1540242065Sneel 1541284894Sneel if (error == 0 && retu == false) 1542242065Sneel goto restart; 1543242065Sneel 1544256072Sneel /* copy the exit information */ 1545256072Sneel bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); 1546221828Sgrehan return (error); 1547221828Sgrehan} 1548221828Sgrehan 1549221828Sgrehanint 1550284894Sneelvm_restart_instruction(void *arg, int vcpuid) 1551284894Sneel{ 1552284894Sneel struct vm *vm; 1553284894Sneel struct vcpu *vcpu; 1554284894Sneel enum vcpu_state state; 1555284894Sneel uint64_t rip; 1556284894Sneel int error; 1557284894Sneel 1558284894Sneel vm = arg; 1559284894Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1560284894Sneel return (EINVAL); 1561284894Sneel 1562284894Sneel vcpu = &vm->vcpu[vcpuid]; 1563284894Sneel state = vcpu_get_state(vm, vcpuid, NULL); 1564284894Sneel if (state == VCPU_RUNNING) { 1565284894Sneel /* 1566284894Sneel * When a vcpu is "running" the next instruction is determined 1567284894Sneel * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'. 1568284894Sneel * Thus setting 'inst_length' to zero will cause the current 1569284894Sneel * instruction to be restarted. 1570284894Sneel */ 1571284894Sneel vcpu->exitinfo.inst_length = 0; 1572284894Sneel VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by " 1573284894Sneel "setting inst_length to zero", vcpu->exitinfo.rip); 1574284894Sneel } else if (state == VCPU_FROZEN) { 1575284894Sneel /* 1576284894Sneel * When a vcpu is "frozen" it is outside the critical section 1577284894Sneel * around VMRUN() and 'nextrip' points to the next instruction. 1578284894Sneel * Thus instruction restart is achieved by setting 'nextrip' 1579284894Sneel * to the vcpu's %rip. 1580284894Sneel */ 1581284894Sneel error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip); 1582284894Sneel KASSERT(!error, ("%s: error %d getting rip", __func__, error)); 1583284894Sneel VCPU_CTR2(vm, vcpuid, "restarting instruction by updating " 1584284894Sneel "nextrip from %#lx to %#lx", vcpu->nextrip, rip); 1585284894Sneel vcpu->nextrip = rip; 1586284894Sneel } else { 1587284894Sneel panic("%s: invalid state %d", __func__, state); 1588284894Sneel } 1589284894Sneel return (0); 1590284894Sneel} 1591284894Sneel 1592284894Sneelint 1593270159Sgrehanvm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) 1594270159Sgrehan{ 1595270159Sgrehan struct vcpu *vcpu; 1596270159Sgrehan int type, vector; 1597270159Sgrehan 1598270159Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1599270159Sgrehan return (EINVAL); 1600270159Sgrehan 1601270159Sgrehan vcpu = &vm->vcpu[vcpuid]; 1602270159Sgrehan 1603270159Sgrehan if (info & VM_INTINFO_VALID) { 1604270159Sgrehan type = info & VM_INTINFO_TYPE; 1605270159Sgrehan vector = info & 0xff; 1606270159Sgrehan if (type == VM_INTINFO_NMI && vector != IDT_NMI) 1607270159Sgrehan return (EINVAL); 1608270159Sgrehan if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) 1609270159Sgrehan return (EINVAL); 1610270159Sgrehan if (info & VM_INTINFO_RSVD) 1611270159Sgrehan return (EINVAL); 1612270159Sgrehan } else { 1613270159Sgrehan info = 0; 1614270159Sgrehan } 1615270159Sgrehan VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); 1616270159Sgrehan vcpu->exitintinfo = info; 1617270159Sgrehan return (0); 1618270159Sgrehan} 1619270159Sgrehan 1620270159Sgrehanenum exc_class { 1621270159Sgrehan EXC_BENIGN, 1622270159Sgrehan EXC_CONTRIBUTORY, 1623270159Sgrehan EXC_PAGEFAULT 1624270159Sgrehan}; 1625270159Sgrehan 1626270159Sgrehan#define IDT_VE 20 /* Virtualization Exception (Intel specific) */ 1627270159Sgrehan 1628270159Sgrehanstatic enum exc_class 1629270159Sgrehanexception_class(uint64_t info) 1630270159Sgrehan{ 1631270159Sgrehan int type, vector; 1632270159Sgrehan 1633270159Sgrehan KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); 1634270159Sgrehan type = info & VM_INTINFO_TYPE; 1635270159Sgrehan vector = info & 0xff; 1636270159Sgrehan 1637270159Sgrehan /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ 1638270159Sgrehan switch (type) { 1639270159Sgrehan case VM_INTINFO_HWINTR: 1640270159Sgrehan case VM_INTINFO_SWINTR: 1641270159Sgrehan case VM_INTINFO_NMI: 1642270159Sgrehan return (EXC_BENIGN); 1643270159Sgrehan default: 1644270159Sgrehan /* 1645270159Sgrehan * Hardware exception. 1646270159Sgrehan * 1647270159Sgrehan * SVM and VT-x use identical type values to represent NMI, 1648270159Sgrehan * hardware interrupt and software interrupt. 1649270159Sgrehan * 1650270159Sgrehan * SVM uses type '3' for all exceptions. VT-x uses type '3' 1651270159Sgrehan * for exceptions except #BP and #OF. #BP and #OF use a type 1652270159Sgrehan * value of '5' or '6'. Therefore we don't check for explicit 1653270159Sgrehan * values of 'type' to classify 'intinfo' into a hardware 1654270159Sgrehan * exception. 1655270159Sgrehan */ 1656270159Sgrehan break; 1657270159Sgrehan } 1658270159Sgrehan 1659270159Sgrehan switch (vector) { 1660270159Sgrehan case IDT_PF: 1661270159Sgrehan case IDT_VE: 1662270159Sgrehan return (EXC_PAGEFAULT); 1663270159Sgrehan case IDT_DE: 1664270159Sgrehan case IDT_TS: 1665270159Sgrehan case IDT_NP: 1666270159Sgrehan case IDT_SS: 1667270159Sgrehan case IDT_GP: 1668270159Sgrehan return (EXC_CONTRIBUTORY); 1669270159Sgrehan default: 1670270159Sgrehan return (EXC_BENIGN); 1671270159Sgrehan } 1672270159Sgrehan} 1673270159Sgrehan 1674270159Sgrehanstatic int 1675270159Sgrehannested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, 1676270159Sgrehan uint64_t *retinfo) 1677270159Sgrehan{ 1678270159Sgrehan enum exc_class exc1, exc2; 1679270159Sgrehan int type1, vector1; 1680270159Sgrehan 1681270159Sgrehan KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); 1682270159Sgrehan KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); 1683270159Sgrehan 1684270159Sgrehan /* 1685270159Sgrehan * If an exception occurs while attempting to call the double-fault 1686270159Sgrehan * handler the processor enters shutdown mode (aka triple fault). 1687270159Sgrehan */ 1688270159Sgrehan type1 = info1 & VM_INTINFO_TYPE; 1689270159Sgrehan vector1 = info1 & 0xff; 1690270159Sgrehan if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { 1691270159Sgrehan VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", 1692270159Sgrehan info1, info2); 1693270159Sgrehan vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); 1694270159Sgrehan *retinfo = 0; 1695270159Sgrehan return (0); 1696270159Sgrehan } 1697270159Sgrehan 1698270159Sgrehan /* 1699270159Sgrehan * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 1700270159Sgrehan */ 1701270159Sgrehan exc1 = exception_class(info1); 1702270159Sgrehan exc2 = exception_class(info2); 1703270159Sgrehan if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || 1704270159Sgrehan (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { 1705270159Sgrehan /* Convert nested fault into a double fault. */ 1706270159Sgrehan *retinfo = IDT_DF; 1707270159Sgrehan *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1708270159Sgrehan *retinfo |= VM_INTINFO_DEL_ERRCODE; 1709270159Sgrehan } else { 1710270159Sgrehan /* Handle exceptions serially */ 1711270159Sgrehan *retinfo = info2; 1712270159Sgrehan } 1713270159Sgrehan return (1); 1714270159Sgrehan} 1715270159Sgrehan 1716270159Sgrehanstatic uint64_t 1717270159Sgrehanvcpu_exception_intinfo(struct vcpu *vcpu) 1718270159Sgrehan{ 1719270159Sgrehan uint64_t info = 0; 1720270159Sgrehan 1721270159Sgrehan if (vcpu->exception_pending) { 1722284894Sneel info = vcpu->exc_vector & 0xff; 1723270159Sgrehan info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; 1724284894Sneel if (vcpu->exc_errcode_valid) { 1725270159Sgrehan info |= VM_INTINFO_DEL_ERRCODE; 1726284894Sneel info |= (uint64_t)vcpu->exc_errcode << 32; 1727270159Sgrehan } 1728270159Sgrehan } 1729270159Sgrehan return (info); 1730270159Sgrehan} 1731270159Sgrehan 1732270159Sgrehanint 1733270159Sgrehanvm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) 1734270159Sgrehan{ 1735270159Sgrehan struct vcpu *vcpu; 1736270159Sgrehan uint64_t info1, info2; 1737270159Sgrehan int valid; 1738270159Sgrehan 1739270159Sgrehan KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); 1740270159Sgrehan 1741270159Sgrehan vcpu = &vm->vcpu[vcpuid]; 1742270159Sgrehan 1743270159Sgrehan info1 = vcpu->exitintinfo; 1744270159Sgrehan vcpu->exitintinfo = 0; 1745270159Sgrehan 1746270159Sgrehan info2 = 0; 1747270159Sgrehan if (vcpu->exception_pending) { 1748270159Sgrehan info2 = vcpu_exception_intinfo(vcpu); 1749270159Sgrehan vcpu->exception_pending = 0; 1750270159Sgrehan VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", 1751284894Sneel vcpu->exc_vector, info2); 1752270159Sgrehan } 1753270159Sgrehan 1754270159Sgrehan if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { 1755270159Sgrehan valid = nested_fault(vm, vcpuid, info1, info2, retinfo); 1756270159Sgrehan } else if (info1 & VM_INTINFO_VALID) { 1757270159Sgrehan *retinfo = info1; 1758270159Sgrehan valid = 1; 1759270159Sgrehan } else if (info2 & VM_INTINFO_VALID) { 1760270159Sgrehan *retinfo = info2; 1761270159Sgrehan valid = 1; 1762270159Sgrehan } else { 1763270159Sgrehan valid = 0; 1764270159Sgrehan } 1765270159Sgrehan 1766270159Sgrehan if (valid) { 1767270159Sgrehan VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " 1768270159Sgrehan "retinfo(%#lx)", __func__, info1, info2, *retinfo); 1769270159Sgrehan } 1770270159Sgrehan 1771270159Sgrehan return (valid); 1772270159Sgrehan} 1773270159Sgrehan 1774270159Sgrehanint 1775270159Sgrehanvm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) 1776270159Sgrehan{ 1777270159Sgrehan struct vcpu *vcpu; 1778270159Sgrehan 1779270159Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1780270159Sgrehan return (EINVAL); 1781270159Sgrehan 1782270159Sgrehan vcpu = &vm->vcpu[vcpuid]; 1783270159Sgrehan *info1 = vcpu->exitintinfo; 1784270159Sgrehan *info2 = vcpu_exception_intinfo(vcpu); 1785270159Sgrehan return (0); 1786270159Sgrehan} 1787270159Sgrehan 1788270159Sgrehanint 1789284894Sneelvm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid, 1790284894Sneel uint32_t errcode, int restart_instruction) 1791221828Sgrehan{ 1792267427Sjhb struct vcpu *vcpu; 1793284894Sneel int error; 1794267427Sjhb 1795221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1796221828Sgrehan return (EINVAL); 1797221828Sgrehan 1798284894Sneel if (vector < 0 || vector >= 32) 1799221828Sgrehan return (EINVAL); 1800221828Sgrehan 1801270159Sgrehan /* 1802270159Sgrehan * A double fault exception should never be injected directly into 1803270159Sgrehan * the guest. It is a derived exception that results from specific 1804270159Sgrehan * combinations of nested faults. 1805270159Sgrehan */ 1806284894Sneel if (vector == IDT_DF) 1807270159Sgrehan return (EINVAL); 1808270159Sgrehan 1809267427Sjhb vcpu = &vm->vcpu[vcpuid]; 1810221828Sgrehan 1811267427Sjhb if (vcpu->exception_pending) { 1812267427Sjhb VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " 1813284894Sneel "pending exception %d", vector, vcpu->exc_vector); 1814267427Sjhb return (EBUSY); 1815267427Sjhb } 1816267427Sjhb 1817284894Sneel /* 1818284894Sneel * From section 26.6.1 "Interruptibility State" in Intel SDM: 1819284894Sneel * 1820284894Sneel * Event blocking by "STI" or "MOV SS" is cleared after guest executes 1821284894Sneel * one instruction or incurs an exception. 1822284894Sneel */ 1823284894Sneel error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0); 1824284894Sneel KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", 1825284894Sneel __func__, error)); 1826284894Sneel 1827284894Sneel if (restart_instruction) 1828284894Sneel vm_restart_instruction(vm, vcpuid); 1829284894Sneel 1830267427Sjhb vcpu->exception_pending = 1; 1831284894Sneel vcpu->exc_vector = vector; 1832284894Sneel vcpu->exc_errcode = errcode; 1833284894Sneel vcpu->exc_errcode_valid = errcode_valid; 1834284894Sneel VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector); 1835267427Sjhb return (0); 1836221828Sgrehan} 1837221828Sgrehan 1838270159Sgrehanvoid 1839270159Sgrehanvm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid, 1840270159Sgrehan int errcode) 1841267427Sjhb{ 1842270159Sgrehan struct vm *vm; 1843284894Sneel int error, restart_instruction; 1844267427Sjhb 1845270159Sgrehan vm = vmarg; 1846284894Sneel restart_instruction = 1; 1847270159Sgrehan 1848284894Sneel error = vm_inject_exception(vm, vcpuid, vector, errcode_valid, 1849284894Sneel errcode, restart_instruction); 1850267427Sjhb KASSERT(error == 0, ("vm_inject_exception error %d", error)); 1851267427Sjhb} 1852267427Sjhb 1853267427Sjhbvoid 1854270159Sgrehanvm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) 1855268976Sjhb{ 1856270159Sgrehan struct vm *vm; 1857268976Sjhb int error; 1858268976Sjhb 1859270159Sgrehan vm = vmarg; 1860268976Sjhb VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", 1861268976Sjhb error_code, cr2); 1862268976Sjhb 1863268976Sjhb error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); 1864268976Sjhb KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); 1865268976Sjhb 1866270159Sgrehan vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code); 1867268976Sjhb} 1868268976Sjhb 1869248389Sneelstatic VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 1870241982Sneel 1871221828Sgrehanint 1872241982Sneelvm_inject_nmi(struct vm *vm, int vcpuid) 1873221828Sgrehan{ 1874241982Sneel struct vcpu *vcpu; 1875221828Sgrehan 1876241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1877221828Sgrehan return (EINVAL); 1878221828Sgrehan 1879241982Sneel vcpu = &vm->vcpu[vcpuid]; 1880241982Sneel 1881241982Sneel vcpu->nmi_pending = 1; 1882266339Sjhb vcpu_notify_event(vm, vcpuid, false); 1883241982Sneel return (0); 1884221828Sgrehan} 1885221828Sgrehan 1886221828Sgrehanint 1887241982Sneelvm_nmi_pending(struct vm *vm, int vcpuid) 1888241982Sneel{ 1889241982Sneel struct vcpu *vcpu; 1890241982Sneel 1891241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1892241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1893241982Sneel 1894241982Sneel vcpu = &vm->vcpu[vcpuid]; 1895241982Sneel 1896241982Sneel return (vcpu->nmi_pending); 1897241982Sneel} 1898241982Sneel 1899241982Sneelvoid 1900241982Sneelvm_nmi_clear(struct vm *vm, int vcpuid) 1901241982Sneel{ 1902241982Sneel struct vcpu *vcpu; 1903241982Sneel 1904241982Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1905241982Sneel panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 1906241982Sneel 1907241982Sneel vcpu = &vm->vcpu[vcpuid]; 1908241982Sneel 1909241982Sneel if (vcpu->nmi_pending == 0) 1910241982Sneel panic("vm_nmi_clear: inconsistent nmi_pending state"); 1911241982Sneel 1912241982Sneel vcpu->nmi_pending = 0; 1913241982Sneel vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 1914241982Sneel} 1915241982Sneel 1916268891Sjhbstatic VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); 1917268891Sjhb 1918241982Sneelint 1919268891Sjhbvm_inject_extint(struct vm *vm, int vcpuid) 1920268891Sjhb{ 1921268891Sjhb struct vcpu *vcpu; 1922268891Sjhb 1923268891Sjhb if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1924268891Sjhb return (EINVAL); 1925268891Sjhb 1926268891Sjhb vcpu = &vm->vcpu[vcpuid]; 1927268891Sjhb 1928268891Sjhb vcpu->extint_pending = 1; 1929268891Sjhb vcpu_notify_event(vm, vcpuid, false); 1930268891Sjhb return (0); 1931268891Sjhb} 1932268891Sjhb 1933268891Sjhbint 1934268891Sjhbvm_extint_pending(struct vm *vm, int vcpuid) 1935268891Sjhb{ 1936268891Sjhb struct vcpu *vcpu; 1937268891Sjhb 1938268891Sjhb if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1939268891Sjhb panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 1940268891Sjhb 1941268891Sjhb vcpu = &vm->vcpu[vcpuid]; 1942268891Sjhb 1943268891Sjhb return (vcpu->extint_pending); 1944268891Sjhb} 1945268891Sjhb 1946268891Sjhbvoid 1947268891Sjhbvm_extint_clear(struct vm *vm, int vcpuid) 1948268891Sjhb{ 1949268891Sjhb struct vcpu *vcpu; 1950268891Sjhb 1951268891Sjhb if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 1952268891Sjhb panic("vm_extint_pending: invalid vcpuid %d", vcpuid); 1953268891Sjhb 1954268891Sjhb vcpu = &vm->vcpu[vcpuid]; 1955268891Sjhb 1956268891Sjhb if (vcpu->extint_pending == 0) 1957268891Sjhb panic("vm_extint_clear: inconsistent extint_pending state"); 1958268891Sjhb 1959268891Sjhb vcpu->extint_pending = 0; 1960268891Sjhb vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); 1961268891Sjhb} 1962268891Sjhb 1963268891Sjhbint 1964221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 1965221828Sgrehan{ 1966221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1967221828Sgrehan return (EINVAL); 1968221828Sgrehan 1969221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 1970221828Sgrehan return (EINVAL); 1971221828Sgrehan 1972221828Sgrehan return (VMGETCAP(vm->cookie, vcpu, type, retval)); 1973221828Sgrehan} 1974221828Sgrehan 1975221828Sgrehanint 1976221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val) 1977221828Sgrehan{ 1978221828Sgrehan if (vcpu < 0 || vcpu >= VM_MAXCPU) 1979221828Sgrehan return (EINVAL); 1980221828Sgrehan 1981221828Sgrehan if (type < 0 || type >= VM_CAP_MAX) 1982221828Sgrehan return (EINVAL); 1983221828Sgrehan 1984221828Sgrehan return (VMSETCAP(vm->cookie, vcpu, type, val)); 1985221828Sgrehan} 1986221828Sgrehan 1987221828Sgrehanstruct vlapic * 1988221828Sgrehanvm_lapic(struct vm *vm, int cpu) 1989221828Sgrehan{ 1990221828Sgrehan return (vm->vcpu[cpu].vlapic); 1991221828Sgrehan} 1992221828Sgrehan 1993261088Sjhbstruct vioapic * 1994261088Sjhbvm_ioapic(struct vm *vm) 1995261088Sjhb{ 1996261088Sjhb 1997261088Sjhb return (vm->vioapic); 1998261088Sjhb} 1999261088Sjhb 2000261088Sjhbstruct vhpet * 2001261088Sjhbvm_hpet(struct vm *vm) 2002261088Sjhb{ 2003261088Sjhb 2004261088Sjhb return (vm->vhpet); 2005261088Sjhb} 2006261088Sjhb 2007221828Sgrehanboolean_t 2008221828Sgrehanvmm_is_pptdev(int bus, int slot, int func) 2009221828Sgrehan{ 2010246188Sneel int found, i, n; 2011246188Sneel int b, s, f; 2012221828Sgrehan char *val, *cp, *cp2; 2013221828Sgrehan 2014221828Sgrehan /* 2015246188Sneel * XXX 2016246188Sneel * The length of an environment variable is limited to 128 bytes which 2017246188Sneel * puts an upper limit on the number of passthru devices that may be 2018246188Sneel * specified using a single environment variable. 2019246188Sneel * 2020246188Sneel * Work around this by scanning multiple environment variable 2021246188Sneel * names instead of a single one - yuck! 2022221828Sgrehan */ 2023246188Sneel const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 2024246188Sneel 2025246188Sneel /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 2026221828Sgrehan found = 0; 2027246188Sneel for (i = 0; names[i] != NULL && !found; i++) { 2028246188Sneel cp = val = getenv(names[i]); 2029246188Sneel while (cp != NULL && *cp != '\0') { 2030246188Sneel if ((cp2 = strchr(cp, ' ')) != NULL) 2031246188Sneel *cp2 = '\0'; 2032221828Sgrehan 2033246188Sneel n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 2034246188Sneel if (n == 3 && bus == b && slot == s && func == f) { 2035246188Sneel found = 1; 2036246188Sneel break; 2037246188Sneel } 2038221828Sgrehan 2039246188Sneel if (cp2 != NULL) 2040246188Sneel *cp2++ = ' '; 2041221828Sgrehan 2042246188Sneel cp = cp2; 2043246188Sneel } 2044246188Sneel freeenv(val); 2045221828Sgrehan } 2046221828Sgrehan return (found); 2047221828Sgrehan} 2048221828Sgrehan 2049221828Sgrehanvoid * 2050221828Sgrehanvm_iommu_domain(struct vm *vm) 2051221828Sgrehan{ 2052221828Sgrehan 2053221828Sgrehan return (vm->iommu); 2054221828Sgrehan} 2055221828Sgrehan 2056241489Sneelint 2057266393Sjhbvcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, 2058266393Sjhb bool from_idle) 2059221828Sgrehan{ 2060241489Sneel int error; 2061221828Sgrehan struct vcpu *vcpu; 2062221828Sgrehan 2063221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2064221828Sgrehan panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 2065221828Sgrehan 2066221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 2067221828Sgrehan 2068241489Sneel vcpu_lock(vcpu); 2069266393Sjhb error = vcpu_set_state_locked(vcpu, newstate, from_idle); 2070241489Sneel vcpu_unlock(vcpu); 2071241489Sneel 2072241489Sneel return (error); 2073221828Sgrehan} 2074221828Sgrehan 2075241489Sneelenum vcpu_state 2076249879Sgrehanvcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) 2077221828Sgrehan{ 2078221828Sgrehan struct vcpu *vcpu; 2079241489Sneel enum vcpu_state state; 2080221828Sgrehan 2081221828Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2082221828Sgrehan panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 2083221828Sgrehan 2084221828Sgrehan vcpu = &vm->vcpu[vcpuid]; 2085221828Sgrehan 2086241489Sneel vcpu_lock(vcpu); 2087241489Sneel state = vcpu->state; 2088249879Sgrehan if (hostcpu != NULL) 2089249879Sgrehan *hostcpu = vcpu->hostcpu; 2090241489Sneel vcpu_unlock(vcpu); 2091221828Sgrehan 2092241489Sneel return (state); 2093221828Sgrehan} 2094221828Sgrehan 2095270070Sgrehanint 2096221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid) 2097221828Sgrehan{ 2098221828Sgrehan 2099270070Sgrehan if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2100270070Sgrehan return (EINVAL); 2101266339Sjhb 2102270070Sgrehan if (CPU_ISSET(vcpuid, &vm->active_cpus)) 2103270070Sgrehan return (EBUSY); 2104270070Sgrehan 2105266339Sjhb VCPU_CTR0(vm, vcpuid, "activated"); 2106266339Sjhb CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); 2107270070Sgrehan return (0); 2108221828Sgrehan} 2109221828Sgrehan 2110223621Sgrehancpuset_t 2111221828Sgrehanvm_active_cpus(struct vm *vm) 2112221828Sgrehan{ 2113221828Sgrehan 2114221828Sgrehan return (vm->active_cpus); 2115221828Sgrehan} 2116221828Sgrehan 2117270070Sgrehancpuset_t 2118270070Sgrehanvm_suspended_cpus(struct vm *vm) 2119270070Sgrehan{ 2120270070Sgrehan 2121270070Sgrehan return (vm->suspended_cpus); 2122270070Sgrehan} 2123270070Sgrehan 2124221828Sgrehanvoid * 2125221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid) 2126221828Sgrehan{ 2127221828Sgrehan 2128221828Sgrehan return (vm->vcpu[vcpuid].stats); 2129221828Sgrehan} 2130240922Sneel 2131240922Sneelint 2132240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 2133240922Sneel{ 2134240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2135240922Sneel return (EINVAL); 2136240922Sneel 2137240922Sneel *state = vm->vcpu[vcpuid].x2apic_state; 2138240922Sneel 2139240922Sneel return (0); 2140240922Sneel} 2141240922Sneel 2142240922Sneelint 2143240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 2144240922Sneel{ 2145240922Sneel if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 2146240922Sneel return (EINVAL); 2147240922Sneel 2148248392Sneel if (state >= X2APIC_STATE_LAST) 2149240922Sneel return (EINVAL); 2150240922Sneel 2151240922Sneel vm->vcpu[vcpuid].x2apic_state = state; 2152240922Sneel 2153240943Sneel vlapic_set_x2apic_state(vm, vcpuid, state); 2154240943Sneel 2155240922Sneel return (0); 2156240922Sneel} 2157241489Sneel 2158262350Sjhb/* 2159262350Sjhb * This function is called to ensure that a vcpu "sees" a pending event 2160262350Sjhb * as soon as possible: 2161262350Sjhb * - If the vcpu thread is sleeping then it is woken up. 2162262350Sjhb * - If the vcpu is running on a different host_cpu then an IPI will be directed 2163262350Sjhb * to the host_cpu to cause the vcpu to trap into the hypervisor. 2164262350Sjhb */ 2165241489Sneelvoid 2166266339Sjhbvcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) 2167241489Sneel{ 2168241489Sneel int hostcpu; 2169241489Sneel struct vcpu *vcpu; 2170241489Sneel 2171241489Sneel vcpu = &vm->vcpu[vcpuid]; 2172241489Sneel 2173242065Sneel vcpu_lock(vcpu); 2174241489Sneel hostcpu = vcpu->hostcpu; 2175266393Sjhb if (vcpu->state == VCPU_RUNNING) { 2176266393Sjhb KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); 2177266339Sjhb if (hostcpu != curcpu) { 2178266393Sjhb if (lapic_intr) { 2179266339Sjhb vlapic_post_intr(vcpu->vlapic, hostcpu, 2180266339Sjhb vmm_ipinum); 2181266393Sjhb } else { 2182266339Sjhb ipi_cpu(hostcpu, vmm_ipinum); 2183266393Sjhb } 2184266393Sjhb } else { 2185266393Sjhb /* 2186266393Sjhb * If the 'vcpu' is running on 'curcpu' then it must 2187266393Sjhb * be sending a notification to itself (e.g. SELF_IPI). 2188266393Sjhb * The pending event will be picked up when the vcpu 2189266393Sjhb * transitions back to guest context. 2190266393Sjhb */ 2191266339Sjhb } 2192266393Sjhb } else { 2193266393Sjhb KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " 2194266393Sjhb "with hostcpu %d", vcpu->state, hostcpu)); 2195266393Sjhb if (vcpu->state == VCPU_SLEEPING) 2196266393Sjhb wakeup_one(vcpu); 2197242065Sneel } 2198242065Sneel vcpu_unlock(vcpu); 2199241489Sneel} 2200256072Sneel 2201256072Sneelstruct vmspace * 2202256072Sneelvm_get_vmspace(struct vm *vm) 2203256072Sneel{ 2204256072Sneel 2205256072Sneel return (vm->vmspace); 2206256072Sneel} 2207261088Sjhb 2208261088Sjhbint 2209261088Sjhbvm_apicid2vcpuid(struct vm *vm, int apicid) 2210261088Sjhb{ 2211261088Sjhb /* 2212261088Sjhb * XXX apic id is assumed to be numerically identical to vcpu id 2213261088Sjhb */ 2214261088Sjhb return (apicid); 2215261088Sjhb} 2216266339Sjhb 2217266339Sjhbvoid 2218266339Sjhbvm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 2219266339Sjhb vm_rendezvous_func_t func, void *arg) 2220266339Sjhb{ 2221266339Sjhb int i; 2222266339Sjhb 2223266339Sjhb /* 2224266339Sjhb * Enforce that this function is called without any locks 2225266339Sjhb */ 2226266339Sjhb WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); 2227266339Sjhb KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), 2228266339Sjhb ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); 2229266339Sjhb 2230266339Sjhbrestart: 2231266339Sjhb mtx_lock(&vm->rendezvous_mtx); 2232266339Sjhb if (vm->rendezvous_func != NULL) { 2233266339Sjhb /* 2234266339Sjhb * If a rendezvous is already in progress then we need to 2235266339Sjhb * call the rendezvous handler in case this 'vcpuid' is one 2236266339Sjhb * of the targets of the rendezvous. 2237266339Sjhb */ 2238266339Sjhb RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); 2239266339Sjhb mtx_unlock(&vm->rendezvous_mtx); 2240266339Sjhb vm_handle_rendezvous(vm, vcpuid); 2241266339Sjhb goto restart; 2242266339Sjhb } 2243266339Sjhb KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " 2244266339Sjhb "rendezvous is still in progress")); 2245266339Sjhb 2246266339Sjhb RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); 2247266339Sjhb vm->rendezvous_req_cpus = dest; 2248266339Sjhb CPU_ZERO(&vm->rendezvous_done_cpus); 2249266339Sjhb vm->rendezvous_arg = arg; 2250266339Sjhb vm_set_rendezvous_func(vm, func); 2251266339Sjhb mtx_unlock(&vm->rendezvous_mtx); 2252266339Sjhb 2253266339Sjhb /* 2254266339Sjhb * Wake up any sleeping vcpus and trigger a VM-exit in any running 2255266339Sjhb * vcpus so they handle the rendezvous as soon as possible. 2256266339Sjhb */ 2257266339Sjhb for (i = 0; i < VM_MAXCPU; i++) { 2258266339Sjhb if (CPU_ISSET(i, &dest)) 2259266339Sjhb vcpu_notify_event(vm, i, false); 2260266339Sjhb } 2261266339Sjhb 2262266339Sjhb vm_handle_rendezvous(vm, vcpuid); 2263266339Sjhb} 2264268891Sjhb 2265268891Sjhbstruct vatpic * 2266268891Sjhbvm_atpic(struct vm *vm) 2267268891Sjhb{ 2268268891Sjhb return (vm->vatpic); 2269268891Sjhb} 2270268891Sjhb 2271268891Sjhbstruct vatpit * 2272268891Sjhbvm_atpit(struct vm *vm) 2273268891Sjhb{ 2274268891Sjhb return (vm->vatpit); 2275268891Sjhb} 2276268976Sjhb 2277276429Sneelstruct vpmtmr * 2278276429Sneelvm_pmtmr(struct vm *vm) 2279276429Sneel{ 2280276429Sneel 2281276429Sneel return (vm->vpmtmr); 2282276429Sneel} 2283276429Sneel 2284284894Sneelstruct vrtc * 2285284894Sneelvm_rtc(struct vm *vm) 2286284894Sneel{ 2287284894Sneel 2288284894Sneel return (vm->vrtc); 2289284894Sneel} 2290284894Sneel 2291268976Sjhbenum vm_reg_name 2292268976Sjhbvm_segment_name(int seg) 2293268976Sjhb{ 2294268976Sjhb static enum vm_reg_name seg_names[] = { 2295268976Sjhb VM_REG_GUEST_ES, 2296268976Sjhb VM_REG_GUEST_CS, 2297268976Sjhb VM_REG_GUEST_SS, 2298268976Sjhb VM_REG_GUEST_DS, 2299268976Sjhb VM_REG_GUEST_FS, 2300268976Sjhb VM_REG_GUEST_GS 2301268976Sjhb }; 2302268976Sjhb 2303268976Sjhb KASSERT(seg >= 0 && seg < nitems(seg_names), 2304268976Sjhb ("%s: invalid segment encoding %d", __func__, seg)); 2305268976Sjhb return (seg_names[seg]); 2306268976Sjhb} 2307270074Sgrehan 2308270159Sgrehanvoid 2309270159Sgrehanvm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, 2310270159Sgrehan int num_copyinfo) 2311270159Sgrehan{ 2312270159Sgrehan int idx; 2313270074Sgrehan 2314270159Sgrehan for (idx = 0; idx < num_copyinfo; idx++) { 2315270159Sgrehan if (copyinfo[idx].cookie != NULL) 2316270159Sgrehan vm_gpa_release(copyinfo[idx].cookie); 2317270159Sgrehan } 2318270159Sgrehan bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); 2319270159Sgrehan} 2320270159Sgrehan 2321270159Sgrehanint 2322270159Sgrehanvm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 2323270159Sgrehan uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, 2324270159Sgrehan int num_copyinfo) 2325270159Sgrehan{ 2326270159Sgrehan int error, idx, nused; 2327270159Sgrehan size_t n, off, remaining; 2328270159Sgrehan void *hva, *cookie; 2329270159Sgrehan uint64_t gpa; 2330270159Sgrehan 2331270159Sgrehan bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); 2332270159Sgrehan 2333270159Sgrehan nused = 0; 2334270159Sgrehan remaining = len; 2335270159Sgrehan while (remaining > 0) { 2336270159Sgrehan KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); 2337284899Sneel error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa); 2338270159Sgrehan if (error) 2339270159Sgrehan return (error); 2340270159Sgrehan off = gpa & PAGE_MASK; 2341270159Sgrehan n = min(remaining, PAGE_SIZE - off); 2342270159Sgrehan copyinfo[nused].gpa = gpa; 2343270159Sgrehan copyinfo[nused].len = n; 2344270159Sgrehan remaining -= n; 2345270159Sgrehan gla += n; 2346270159Sgrehan nused++; 2347270159Sgrehan } 2348270159Sgrehan 2349270159Sgrehan for (idx = 0; idx < nused; idx++) { 2350270159Sgrehan hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len, 2351270159Sgrehan prot, &cookie); 2352270159Sgrehan if (hva == NULL) 2353270159Sgrehan break; 2354270159Sgrehan copyinfo[idx].hva = hva; 2355270159Sgrehan copyinfo[idx].cookie = cookie; 2356270159Sgrehan } 2357270159Sgrehan 2358270159Sgrehan if (idx != nused) { 2359270159Sgrehan vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); 2360270159Sgrehan return (-1); 2361270159Sgrehan } else { 2362270159Sgrehan return (0); 2363270159Sgrehan } 2364270159Sgrehan} 2365270159Sgrehan 2366270159Sgrehanvoid 2367270159Sgrehanvm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, 2368270159Sgrehan size_t len) 2369270159Sgrehan{ 2370270159Sgrehan char *dst; 2371270159Sgrehan int idx; 2372270159Sgrehan 2373270159Sgrehan dst = kaddr; 2374270159Sgrehan idx = 0; 2375270159Sgrehan while (len > 0) { 2376270159Sgrehan bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); 2377270159Sgrehan len -= copyinfo[idx].len; 2378270159Sgrehan dst += copyinfo[idx].len; 2379270159Sgrehan idx++; 2380270159Sgrehan } 2381270159Sgrehan} 2382270159Sgrehan 2383270159Sgrehanvoid 2384270159Sgrehanvm_copyout(struct vm *vm, int vcpuid, const void *kaddr, 2385270159Sgrehan struct vm_copyinfo *copyinfo, size_t len) 2386270159Sgrehan{ 2387270159Sgrehan const char *src; 2388270159Sgrehan int idx; 2389270159Sgrehan 2390270159Sgrehan src = kaddr; 2391270159Sgrehan idx = 0; 2392270159Sgrehan while (len > 0) { 2393270159Sgrehan bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); 2394270159Sgrehan len -= copyinfo[idx].len; 2395270159Sgrehan src += copyinfo[idx].len; 2396270159Sgrehan idx++; 2397270159Sgrehan } 2398270159Sgrehan} 2399270159Sgrehan 2400270074Sgrehan/* 2401270074Sgrehan * Return the amount of in-use and wired memory for the VM. Since 2402270074Sgrehan * these are global stats, only return the values with for vCPU 0 2403270074Sgrehan */ 2404270074SgrehanVMM_STAT_DECLARE(VMM_MEM_RESIDENT); 2405270074SgrehanVMM_STAT_DECLARE(VMM_MEM_WIRED); 2406270074Sgrehan 2407270074Sgrehanstatic void 2408270074Sgrehanvm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2409270074Sgrehan{ 2410270074Sgrehan 2411270074Sgrehan if (vcpu == 0) { 2412270074Sgrehan vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT, 2413270074Sgrehan PAGE_SIZE * vmspace_resident_count(vm->vmspace)); 2414270074Sgrehan } 2415270074Sgrehan} 2416270074Sgrehan 2417270074Sgrehanstatic void 2418270074Sgrehanvm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) 2419270074Sgrehan{ 2420270074Sgrehan 2421270074Sgrehan if (vcpu == 0) { 2422270074Sgrehan vmm_stat_set(vm, vcpu, VMM_MEM_WIRED, 2423270074Sgrehan PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); 2424270074Sgrehan } 2425270074Sgrehan} 2426270074Sgrehan 2427270074SgrehanVMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); 2428270074SgrehanVMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); 2429