vmm.c revision 276349
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD: stable/10/sys/amd64/vmm/vmm.c 276349 2014-12-28 21:27:13Z neel $
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm.c 276349 2014-12-28 21:27:13Z neel $");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33234695Sgrehan#include <sys/systm.h>
34221828Sgrehan#include <sys/kernel.h>
35221828Sgrehan#include <sys/module.h>
36221828Sgrehan#include <sys/sysctl.h>
37221828Sgrehan#include <sys/malloc.h>
38221828Sgrehan#include <sys/pcpu.h>
39221828Sgrehan#include <sys/lock.h>
40221828Sgrehan#include <sys/mutex.h>
41221828Sgrehan#include <sys/proc.h>
42256072Sneel#include <sys/rwlock.h>
43221828Sgrehan#include <sys/sched.h>
44221828Sgrehan#include <sys/smp.h>
45221828Sgrehan#include <sys/systm.h>
46221828Sgrehan
47221828Sgrehan#include <vm/vm.h>
48256072Sneel#include <vm/vm_object.h>
49256072Sneel#include <vm/vm_page.h>
50256072Sneel#include <vm/pmap.h>
51256072Sneel#include <vm/vm_map.h>
52256072Sneel#include <vm/vm_extern.h>
53256072Sneel#include <vm/vm_param.h>
54221828Sgrehan
55261275Sjhb#include <machine/cpu.h>
56221828Sgrehan#include <machine/vm.h>
57221828Sgrehan#include <machine/pcb.h>
58241489Sneel#include <machine/smp.h>
59262350Sjhb#include <x86/psl.h>
60221914Sjhb#include <x86/apicreg.h>
61256072Sneel#include <machine/vmparam.h>
62221828Sgrehan
63221828Sgrehan#include <machine/vmm.h>
64261088Sjhb#include <machine/vmm_dev.h>
65268976Sjhb#include <machine/vmm_instruction_emul.h>
66261088Sjhb
67268976Sjhb#include "vmm_ioport.h"
68256072Sneel#include "vmm_ktr.h"
69242275Sneel#include "vmm_host.h"
70221828Sgrehan#include "vmm_mem.h"
71221828Sgrehan#include "vmm_util.h"
72268891Sjhb#include "vatpic.h"
73268891Sjhb#include "vatpit.h"
74261088Sjhb#include "vhpet.h"
75261088Sjhb#include "vioapic.h"
76221828Sgrehan#include "vlapic.h"
77221828Sgrehan#include "vmm_ipi.h"
78221828Sgrehan#include "vmm_stat.h"
79242065Sneel#include "vmm_lapic.h"
80221828Sgrehan
81221828Sgrehan#include "io/ppt.h"
82221828Sgrehan#include "io/iommu.h"
83221828Sgrehan
84221828Sgrehanstruct vlapic;
85221828Sgrehan
86270071Sgrehan/*
87270071Sgrehan * Initialization:
88270071Sgrehan * (a) allocated when vcpu is created
89270071Sgrehan * (i) initialized when vcpu is created and when it is reinitialized
90270071Sgrehan * (o) initialized the first time the vcpu is created
91270071Sgrehan * (x) initialized before use
92270071Sgrehan */
93221828Sgrehanstruct vcpu {
94270071Sgrehan	struct mtx 	mtx;		/* (o) protects 'state' and 'hostcpu' */
95270071Sgrehan	enum vcpu_state	state;		/* (o) vcpu state */
96270071Sgrehan	int		hostcpu;	/* (o) vcpu's host cpu */
97270071Sgrehan	struct vlapic	*vlapic;	/* (i) APIC device model */
98270071Sgrehan	enum x2apic_state x2apic_state;	/* (i) APIC mode */
99270159Sgrehan	uint64_t	exitintinfo;	/* (i) events pending at VM exit */
100270071Sgrehan	int		nmi_pending;	/* (i) NMI pending */
101270071Sgrehan	int		extint_pending;	/* (i) INTR pending */
102270071Sgrehan	struct vm_exception exception;	/* (x) exception collateral */
103270071Sgrehan	int	exception_pending;	/* (i) exception pending */
104270071Sgrehan	struct savefpu	*guestfpu;	/* (a,i) guest fpu state */
105270071Sgrehan	uint64_t	guest_xcr0;	/* (i) guest %xcr0 register */
106270071Sgrehan	void		*stats;		/* (a,i) statistics */
107270071Sgrehan	struct vm_exit	exitinfo;	/* (x) exit reason and collateral */
108221828Sgrehan};
109221828Sgrehan
110270071Sgrehan#define	vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
111242065Sneel#define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
112242065Sneel#define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
113242065Sneel#define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
114256072Sneel#define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
115241489Sneel
116256072Sneelstruct mem_seg {
117256072Sneel	vm_paddr_t	gpa;
118256072Sneel	size_t		len;
119256072Sneel	boolean_t	wired;
120256072Sneel	vm_object_t	object;
121256072Sneel};
122221828Sgrehan#define	VM_MAX_MEMORY_SEGMENTS	2
123221828Sgrehan
124270071Sgrehan/*
125270071Sgrehan * Initialization:
126270071Sgrehan * (o) initialized the first time the VM is created
127270071Sgrehan * (i) initialized when VM is created and when it is reinitialized
128270071Sgrehan * (x) initialized before use
129270071Sgrehan */
130221828Sgrehanstruct vm {
131270071Sgrehan	void		*cookie;		/* (i) cpu-specific data */
132270071Sgrehan	void		*iommu;			/* (x) iommu-specific data */
133270071Sgrehan	struct vhpet	*vhpet;			/* (i) virtual HPET */
134270071Sgrehan	struct vioapic	*vioapic;		/* (i) virtual ioapic */
135270071Sgrehan	struct vatpic	*vatpic;		/* (i) virtual atpic */
136270071Sgrehan	struct vatpit	*vatpit;		/* (i) virtual atpit */
137270071Sgrehan	volatile cpuset_t active_cpus;		/* (i) active vcpus */
138270071Sgrehan	int		suspend;		/* (i) stop VM execution */
139270071Sgrehan	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
140270071Sgrehan	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
141270071Sgrehan	cpuset_t	rendezvous_req_cpus;	/* (x) rendezvous requested */
142270071Sgrehan	cpuset_t	rendezvous_done_cpus;	/* (x) rendezvous finished */
143270071Sgrehan	void		*rendezvous_arg;	/* (x) rendezvous func/arg */
144270071Sgrehan	vm_rendezvous_func_t rendezvous_func;
145270071Sgrehan	struct mtx	rendezvous_mtx;		/* (o) rendezvous lock */
146270071Sgrehan	int		num_mem_segs;		/* (o) guest memory segments */
147256072Sneel	struct mem_seg	mem_segs[VM_MAX_MEMORY_SEGMENTS];
148270071Sgrehan	struct vmspace	*vmspace;		/* (o) guest's address space */
149270071Sgrehan	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
150270071Sgrehan	struct vcpu	vcpu[VM_MAXCPU];	/* (i) guest vcpus */
151221828Sgrehan};
152221828Sgrehan
153249396Sneelstatic int vmm_initialized;
154249396Sneel
155221828Sgrehanstatic struct vmm_ops *ops;
156266339Sjhb#define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
157221828Sgrehan#define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
158261275Sjhb#define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
159221828Sgrehan
160256072Sneel#define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
161268935Sjhb#define	VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
162268935Sjhb	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
163221828Sgrehan#define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
164256072Sneel#define	VMSPACE_ALLOC(min, max) \
165256072Sneel	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
166256072Sneel#define	VMSPACE_FREE(vmspace) \
167256072Sneel	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
168221828Sgrehan#define	VMGETREG(vmi, vcpu, num, retval)		\
169221828Sgrehan	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
170221828Sgrehan#define	VMSETREG(vmi, vcpu, num, val)		\
171221828Sgrehan	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
172221828Sgrehan#define	VMGETDESC(vmi, vcpu, num, desc)		\
173221828Sgrehan	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
174221828Sgrehan#define	VMSETDESC(vmi, vcpu, num, desc)		\
175221828Sgrehan	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
176221828Sgrehan#define	VMGETCAP(vmi, vcpu, num, retval)	\
177221828Sgrehan	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
178221828Sgrehan#define	VMSETCAP(vmi, vcpu, num, val)		\
179221828Sgrehan	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
180266339Sjhb#define	VLAPIC_INIT(vmi, vcpu)			\
181266339Sjhb	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
182266339Sjhb#define	VLAPIC_CLEANUP(vmi, vlapic)		\
183266339Sjhb	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
184221828Sgrehan
185245021Sneel#define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
186245021Sneel#define	fpu_stop_emulating()	clts()
187221828Sgrehan
188221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm");
189221828Sgrehan
190221828Sgrehan/* statistics */
191248389Sneelstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
192221828Sgrehan
193266339SjhbSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
194266339Sjhb
195268935Sjhb/*
196268935Sjhb * Halt the guest if all vcpus are executing a HLT instruction with
197268935Sjhb * interrupts disabled.
198268935Sjhb */
199268935Sjhbstatic int halt_detection_enabled = 1;
200268935SjhbTUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled);
201268935SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
202268935Sjhb    &halt_detection_enabled, 0,
203268935Sjhb    "Halt VM if all vcpus execute HLT with interrupts disabled");
204268935Sjhb
205266339Sjhbstatic int vmm_ipinum;
206266339SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
207266339Sjhb    "IPI vector used for vcpu notifications");
208266339Sjhb
209221828Sgrehanstatic void
210270071Sgrehanvcpu_cleanup(struct vm *vm, int i, bool destroy)
211221828Sgrehan{
212266339Sjhb	struct vcpu *vcpu = &vm->vcpu[i];
213266339Sjhb
214266339Sjhb	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
215270071Sgrehan	if (destroy) {
216270071Sgrehan		vmm_stat_free(vcpu->stats);
217270071Sgrehan		fpu_save_area_free(vcpu->guestfpu);
218270071Sgrehan	}
219221828Sgrehan}
220221828Sgrehan
221221828Sgrehanstatic void
222270071Sgrehanvcpu_init(struct vm *vm, int vcpu_id, bool create)
223221828Sgrehan{
224221828Sgrehan	struct vcpu *vcpu;
225270071Sgrehan
226270071Sgrehan	KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU,
227270071Sgrehan	    ("vcpu_init: invalid vcpu %d", vcpu_id));
228270071Sgrehan
229221828Sgrehan	vcpu = &vm->vcpu[vcpu_id];
230221828Sgrehan
231270071Sgrehan	if (create) {
232270071Sgrehan		KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
233270071Sgrehan		    "initialized", vcpu_id));
234270071Sgrehan		vcpu_lock_init(vcpu);
235270071Sgrehan		vcpu->state = VCPU_IDLE;
236270071Sgrehan		vcpu->hostcpu = NOCPU;
237270071Sgrehan		vcpu->guestfpu = fpu_save_area_alloc();
238270071Sgrehan		vcpu->stats = vmm_stat_alloc();
239270071Sgrehan	}
240270071Sgrehan
241266339Sjhb	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
242267447Sjhb	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
243270159Sgrehan	vcpu->exitintinfo = 0;
244270071Sgrehan	vcpu->nmi_pending = 0;
245270071Sgrehan	vcpu->extint_pending = 0;
246270071Sgrehan	vcpu->exception_pending = 0;
247267427Sjhb	vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
248234695Sgrehan	fpu_save_area_reset(vcpu->guestfpu);
249270071Sgrehan	vmm_stat_init(vcpu->stats);
250221828Sgrehan}
251221828Sgrehan
252240894Sneelstruct vm_exit *
253240894Sneelvm_exitinfo(struct vm *vm, int cpuid)
254240894Sneel{
255240894Sneel	struct vcpu *vcpu;
256240894Sneel
257240894Sneel	if (cpuid < 0 || cpuid >= VM_MAXCPU)
258240894Sneel		panic("vm_exitinfo: invalid cpuid %d", cpuid);
259240894Sneel
260240894Sneel	vcpu = &vm->vcpu[cpuid];
261240894Sneel
262240894Sneel	return (&vcpu->exitinfo);
263240894Sneel}
264240894Sneel
265261275Sjhbstatic void
266261275Sjhbvmm_resume(void)
267261275Sjhb{
268261275Sjhb	VMM_RESUME();
269261275Sjhb}
270261275Sjhb
271221828Sgrehanstatic int
272221828Sgrehanvmm_init(void)
273221828Sgrehan{
274221828Sgrehan	int error;
275221828Sgrehan
276242275Sneel	vmm_host_state_init();
277221828Sgrehan
278266339Sjhb	vmm_ipinum = vmm_ipi_alloc();
279266339Sjhb	if (vmm_ipinum == 0)
280266339Sjhb		vmm_ipinum = IPI_AST;
281266339Sjhb
282221828Sgrehan	error = vmm_mem_init();
283221828Sgrehan	if (error)
284221828Sgrehan		return (error);
285221828Sgrehan
286221828Sgrehan	if (vmm_is_intel())
287221828Sgrehan		ops = &vmm_ops_intel;
288221828Sgrehan	else if (vmm_is_amd())
289221828Sgrehan		ops = &vmm_ops_amd;
290221828Sgrehan	else
291221828Sgrehan		return (ENXIO);
292221828Sgrehan
293261275Sjhb	vmm_resume_p = vmm_resume;
294221828Sgrehan
295266339Sjhb	return (VMM_INIT(vmm_ipinum));
296221828Sgrehan}
297221828Sgrehan
298221828Sgrehanstatic int
299221828Sgrehanvmm_handler(module_t mod, int what, void *arg)
300221828Sgrehan{
301221828Sgrehan	int error;
302221828Sgrehan
303221828Sgrehan	switch (what) {
304221828Sgrehan	case MOD_LOAD:
305221828Sgrehan		vmmdev_init();
306267070Sjhb		if (ppt_avail_devices() > 0)
307267070Sjhb			iommu_init();
308221828Sgrehan		error = vmm_init();
309249396Sneel		if (error == 0)
310249396Sneel			vmm_initialized = 1;
311221828Sgrehan		break;
312221828Sgrehan	case MOD_UNLOAD:
313241454Sneel		error = vmmdev_cleanup();
314241454Sneel		if (error == 0) {
315261275Sjhb			vmm_resume_p = NULL;
316241454Sneel			iommu_cleanup();
317266339Sjhb			if (vmm_ipinum != IPI_AST)
318266339Sjhb				vmm_ipi_free(vmm_ipinum);
319241454Sneel			error = VMM_CLEANUP();
320253854Sgrehan			/*
321253854Sgrehan			 * Something bad happened - prevent new
322253854Sgrehan			 * VMs from being created
323253854Sgrehan			 */
324253854Sgrehan			if (error)
325253854Sgrehan				vmm_initialized = 0;
326241454Sneel		}
327221828Sgrehan		break;
328221828Sgrehan	default:
329221828Sgrehan		error = 0;
330221828Sgrehan		break;
331221828Sgrehan	}
332221828Sgrehan	return (error);
333221828Sgrehan}
334221828Sgrehan
335221828Sgrehanstatic moduledata_t vmm_kmod = {
336221828Sgrehan	"vmm",
337221828Sgrehan	vmm_handler,
338221828Sgrehan	NULL
339221828Sgrehan};
340221828Sgrehan
341221828Sgrehan/*
342245704Sneel * vmm initialization has the following dependencies:
343245704Sneel *
344245704Sneel * - iommu initialization must happen after the pci passthru driver has had
345245704Sneel *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
346245704Sneel *
347245704Sneel * - VT-x initialization requires smp_rendezvous() and therefore must happen
348245704Sneel *   after SMP is fully functional (after SI_SUB_SMP).
349221828Sgrehan */
350245704SneelDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
351221828SgrehanMODULE_VERSION(vmm, 1);
352221828Sgrehan
353270071Sgrehanstatic void
354270071Sgrehanvm_init(struct vm *vm, bool create)
355270071Sgrehan{
356270071Sgrehan	int i;
357270071Sgrehan
358270071Sgrehan	vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace));
359270071Sgrehan	vm->iommu = NULL;
360270071Sgrehan	vm->vioapic = vioapic_init(vm);
361270071Sgrehan	vm->vhpet = vhpet_init(vm);
362270071Sgrehan	vm->vatpic = vatpic_init(vm);
363270071Sgrehan	vm->vatpit = vatpit_init(vm);
364270071Sgrehan
365270071Sgrehan	CPU_ZERO(&vm->active_cpus);
366270071Sgrehan
367270071Sgrehan	vm->suspend = 0;
368270071Sgrehan	CPU_ZERO(&vm->suspended_cpus);
369270071Sgrehan
370270071Sgrehan	for (i = 0; i < VM_MAXCPU; i++)
371270071Sgrehan		vcpu_init(vm, i, create);
372270071Sgrehan}
373270071Sgrehan
374249396Sneelint
375249396Sneelvm_create(const char *name, struct vm **retvm)
376221828Sgrehan{
377221828Sgrehan	struct vm *vm;
378256072Sneel	struct vmspace *vmspace;
379221828Sgrehan
380249396Sneel	/*
381249396Sneel	 * If vmm.ko could not be successfully initialized then don't attempt
382249396Sneel	 * to create the virtual machine.
383249396Sneel	 */
384249396Sneel	if (!vmm_initialized)
385249396Sneel		return (ENXIO);
386249396Sneel
387221828Sgrehan	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
388249396Sneel		return (EINVAL);
389221828Sgrehan
390256072Sneel	vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
391256072Sneel	if (vmspace == NULL)
392256072Sneel		return (ENOMEM);
393256072Sneel
394221828Sgrehan	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
395221828Sgrehan	strcpy(vm->name, name);
396270071Sgrehan	vm->num_mem_segs = 0;
397266339Sjhb	vm->vmspace = vmspace;
398266339Sjhb	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
399221828Sgrehan
400270071Sgrehan	vm_init(vm, true);
401221828Sgrehan
402249396Sneel	*retvm = vm;
403249396Sneel	return (0);
404221828Sgrehan}
405221828Sgrehan
406241178Sneelstatic void
407256072Sneelvm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
408241178Sneel{
409241178Sneel
410256072Sneel	if (seg->object != NULL)
411256072Sneel		vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
412241362Sneel
413256072Sneel	bzero(seg, sizeof(*seg));
414241178Sneel}
415241178Sneel
416270071Sgrehanstatic void
417270071Sgrehanvm_cleanup(struct vm *vm, bool destroy)
418221828Sgrehan{
419221828Sgrehan	int i;
420221828Sgrehan
421221828Sgrehan	ppt_unassign_all(vm);
422221828Sgrehan
423256072Sneel	if (vm->iommu != NULL)
424256072Sneel		iommu_destroy_domain(vm->iommu);
425256072Sneel
426268891Sjhb	vatpit_cleanup(vm->vatpit);
427261088Sjhb	vhpet_cleanup(vm->vhpet);
428268891Sjhb	vatpic_cleanup(vm->vatpic);
429261088Sjhb	vioapic_cleanup(vm->vioapic);
430261088Sjhb
431270071Sgrehan	for (i = 0; i < VM_MAXCPU; i++)
432270071Sgrehan		vcpu_cleanup(vm, i, destroy);
433221828Sgrehan
434270071Sgrehan	VMCLEANUP(vm->cookie);
435241178Sneel
436270071Sgrehan	if (destroy) {
437270071Sgrehan		for (i = 0; i < vm->num_mem_segs; i++)
438270071Sgrehan			vm_free_mem_seg(vm, &vm->mem_segs[i]);
439221828Sgrehan
440270071Sgrehan		vm->num_mem_segs = 0;
441221828Sgrehan
442270071Sgrehan		VMSPACE_FREE(vm->vmspace);
443270071Sgrehan		vm->vmspace = NULL;
444270071Sgrehan	}
445270071Sgrehan}
446221828Sgrehan
447270071Sgrehanvoid
448270071Sgrehanvm_destroy(struct vm *vm)
449270071Sgrehan{
450270071Sgrehan	vm_cleanup(vm, true);
451221828Sgrehan	free(vm, M_VM);
452221828Sgrehan}
453221828Sgrehan
454270071Sgrehanint
455270071Sgrehanvm_reinit(struct vm *vm)
456270071Sgrehan{
457270071Sgrehan	int error;
458270071Sgrehan
459270071Sgrehan	/*
460270071Sgrehan	 * A virtual machine can be reset only if all vcpus are suspended.
461270071Sgrehan	 */
462270071Sgrehan	if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
463270071Sgrehan		vm_cleanup(vm, false);
464270071Sgrehan		vm_init(vm, false);
465270071Sgrehan		error = 0;
466270071Sgrehan	} else {
467270071Sgrehan		error = EBUSY;
468270071Sgrehan	}
469270071Sgrehan
470270071Sgrehan	return (error);
471270071Sgrehan}
472270071Sgrehan
473221828Sgrehanconst char *
474221828Sgrehanvm_name(struct vm *vm)
475221828Sgrehan{
476221828Sgrehan	return (vm->name);
477221828Sgrehan}
478221828Sgrehan
479221828Sgrehanint
480221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
481221828Sgrehan{
482256072Sneel	vm_object_t obj;
483221828Sgrehan
484256072Sneel	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
485256072Sneel		return (ENOMEM);
486256072Sneel	else
487256072Sneel		return (0);
488221828Sgrehan}
489221828Sgrehan
490221828Sgrehanint
491221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
492221828Sgrehan{
493221828Sgrehan
494256072Sneel	vmm_mmio_free(vm->vmspace, gpa, len);
495256072Sneel	return (0);
496221828Sgrehan}
497221828Sgrehan
498256072Sneelboolean_t
499256072Sneelvm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
500241041Sneel{
501241041Sneel	int i;
502241041Sneel	vm_paddr_t gpabase, gpalimit;
503241041Sneel
504241041Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
505241041Sneel		gpabase = vm->mem_segs[i].gpa;
506241041Sneel		gpalimit = gpabase + vm->mem_segs[i].len;
507241041Sneel		if (gpa >= gpabase && gpa < gpalimit)
508256072Sneel			return (TRUE);		/* 'gpa' is regular memory */
509241041Sneel	}
510241041Sneel
511256072Sneel	if (ppt_is_mmio(vm, gpa))
512256072Sneel		return (TRUE);			/* 'gpa' is pci passthru mmio */
513256072Sneel
514256072Sneel	return (FALSE);
515241041Sneel}
516241041Sneel
517221828Sgrehanint
518241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
519221828Sgrehan{
520256072Sneel	int available, allocated;
521256072Sneel	struct mem_seg *seg;
522256072Sneel	vm_object_t object;
523256072Sneel	vm_paddr_t g;
524221828Sgrehan
525241041Sneel	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
526241041Sneel		return (EINVAL);
527221828Sgrehan
528241041Sneel	available = allocated = 0;
529241041Sneel	g = gpa;
530241041Sneel	while (g < gpa + len) {
531256072Sneel		if (vm_mem_allocated(vm, g))
532256072Sneel			allocated++;
533256072Sneel		else
534241041Sneel			available++;
535241041Sneel
536241041Sneel		g += PAGE_SIZE;
537241041Sneel	}
538241041Sneel
539221828Sgrehan	/*
540241041Sneel	 * If there are some allocated and some available pages in the address
541241041Sneel	 * range then it is an error.
542221828Sgrehan	 */
543241041Sneel	if (allocated && available)
544241041Sneel		return (EINVAL);
545221828Sgrehan
546241041Sneel	/*
547241041Sneel	 * If the entire address range being requested has already been
548241041Sneel	 * allocated then there isn't anything more to do.
549241041Sneel	 */
550241041Sneel	if (allocated && available == 0)
551241041Sneel		return (0);
552241041Sneel
553221828Sgrehan	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
554221828Sgrehan		return (E2BIG);
555221828Sgrehan
556241178Sneel	seg = &vm->mem_segs[vm->num_mem_segs];
557221828Sgrehan
558256072Sneel	if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
559256072Sneel		return (ENOMEM);
560256072Sneel
561241178Sneel	seg->gpa = gpa;
562256072Sneel	seg->len = len;
563256072Sneel	seg->object = object;
564256072Sneel	seg->wired = FALSE;
565241178Sneel
566256072Sneel	vm->num_mem_segs++;
567256072Sneel
568256072Sneel	return (0);
569256072Sneel}
570256072Sneel
571270159Sgrehanstatic vm_paddr_t
572270159Sgrehanvm_maxmem(struct vm *vm)
573270159Sgrehan{
574270159Sgrehan	int i;
575270159Sgrehan	vm_paddr_t gpa, maxmem;
576270159Sgrehan
577270159Sgrehan	maxmem = 0;
578270159Sgrehan	for (i = 0; i < vm->num_mem_segs; i++) {
579270159Sgrehan		gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len;
580270159Sgrehan		if (gpa > maxmem)
581270159Sgrehan			maxmem = gpa;
582270159Sgrehan	}
583270159Sgrehan	return (maxmem);
584270159Sgrehan}
585270159Sgrehan
586256072Sneelstatic void
587256072Sneelvm_gpa_unwire(struct vm *vm)
588256072Sneel{
589256072Sneel	int i, rv;
590256072Sneel	struct mem_seg *seg;
591256072Sneel
592256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
593256072Sneel		seg = &vm->mem_segs[i];
594256072Sneel		if (!seg->wired)
595256072Sneel			continue;
596256072Sneel
597256072Sneel		rv = vm_map_unwire(&vm->vmspace->vm_map,
598256072Sneel				   seg->gpa, seg->gpa + seg->len,
599256072Sneel				   VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
600256072Sneel		KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
601256072Sneel		    "%#lx/%ld could not be unwired: %d",
602256072Sneel		    vm_name(vm), seg->gpa, seg->len, rv));
603256072Sneel
604256072Sneel		seg->wired = FALSE;
605256072Sneel	}
606256072Sneel}
607256072Sneel
608256072Sneelstatic int
609256072Sneelvm_gpa_wire(struct vm *vm)
610256072Sneel{
611256072Sneel	int i, rv;
612256072Sneel	struct mem_seg *seg;
613256072Sneel
614256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
615256072Sneel		seg = &vm->mem_segs[i];
616256072Sneel		if (seg->wired)
617256072Sneel			continue;
618256072Sneel
619256072Sneel		/* XXX rlimits? */
620256072Sneel		rv = vm_map_wire(&vm->vmspace->vm_map,
621256072Sneel				 seg->gpa, seg->gpa + seg->len,
622256072Sneel				 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
623256072Sneel		if (rv != KERN_SUCCESS)
624241178Sneel			break;
625241178Sneel
626256072Sneel		seg->wired = TRUE;
627256072Sneel	}
628256072Sneel
629256072Sneel	if (i < vm->num_mem_segs) {
630241362Sneel		/*
631256072Sneel		 * Undo the wiring before returning an error.
632241362Sneel		 */
633256072Sneel		vm_gpa_unwire(vm);
634256072Sneel		return (EAGAIN);
635256072Sneel	}
636241178Sneel
637256072Sneel	return (0);
638256072Sneel}
639256072Sneel
640256072Sneelstatic void
641256072Sneelvm_iommu_modify(struct vm *vm, boolean_t map)
642256072Sneel{
643256072Sneel	int i, sz;
644256072Sneel	vm_paddr_t gpa, hpa;
645256072Sneel	struct mem_seg *seg;
646256072Sneel	void *vp, *cookie, *host_domain;
647256072Sneel
648256072Sneel	sz = PAGE_SIZE;
649256072Sneel	host_domain = iommu_host_domain();
650256072Sneel
651256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
652256072Sneel		seg = &vm->mem_segs[i];
653256072Sneel		KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
654256072Sneel		    vm_name(vm), seg->gpa, seg->len));
655256072Sneel
656256072Sneel		gpa = seg->gpa;
657256072Sneel		while (gpa < seg->gpa + seg->len) {
658256072Sneel			vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
659256072Sneel					 &cookie);
660256072Sneel			KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
661256072Sneel			    vm_name(vm), gpa));
662256072Sneel
663256072Sneel			vm_gpa_release(cookie);
664256072Sneel
665256072Sneel			hpa = DMAP_TO_PHYS((uintptr_t)vp);
666256072Sneel			if (map) {
667256072Sneel				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
668256072Sneel				iommu_remove_mapping(host_domain, hpa, sz);
669256072Sneel			} else {
670256072Sneel				iommu_remove_mapping(vm->iommu, gpa, sz);
671256072Sneel				iommu_create_mapping(host_domain, hpa, hpa, sz);
672256072Sneel			}
673256072Sneel
674256072Sneel			gpa += PAGE_SIZE;
675256072Sneel		}
676241178Sneel	}
677241178Sneel
678256072Sneel	/*
679256072Sneel	 * Invalidate the cached translations associated with the domain
680256072Sneel	 * from which pages were removed.
681256072Sneel	 */
682256072Sneel	if (map)
683256072Sneel		iommu_invalidate_tlb(host_domain);
684256072Sneel	else
685256072Sneel		iommu_invalidate_tlb(vm->iommu);
686256072Sneel}
687256072Sneel
688256072Sneel#define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), FALSE)
689256072Sneel#define	vm_iommu_map(vm)	vm_iommu_modify((vm), TRUE)
690256072Sneel
691256072Sneelint
692256072Sneelvm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
693256072Sneel{
694256072Sneel	int error;
695256072Sneel
696256072Sneel	error = ppt_unassign_device(vm, bus, slot, func);
697256072Sneel	if (error)
698221828Sgrehan		return (error);
699256072Sneel
700267070Sjhb	if (ppt_assigned_devices(vm) == 0) {
701256072Sneel		vm_iommu_unmap(vm);
702256072Sneel		vm_gpa_unwire(vm);
703221828Sgrehan	}
704256072Sneel	return (0);
705256072Sneel}
706221828Sgrehan
707256072Sneelint
708256072Sneelvm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
709256072Sneel{
710256072Sneel	int error;
711256072Sneel	vm_paddr_t maxaddr;
712256072Sneel
713241362Sneel	/*
714256072Sneel	 * Virtual machines with pci passthru devices get special treatment:
715256072Sneel	 * - the guest physical memory is wired
716256072Sneel	 * - the iommu is programmed to do the 'gpa' to 'hpa' translation
717256072Sneel	 *
718256072Sneel	 * We need to do this before the first pci passthru device is attached.
719241362Sneel	 */
720267070Sjhb	if (ppt_assigned_devices(vm) == 0) {
721256072Sneel		KASSERT(vm->iommu == NULL,
722256072Sneel		    ("vm_assign_pptdev: iommu must be NULL"));
723270159Sgrehan		maxaddr = vm_maxmem(vm);
724256072Sneel		vm->iommu = iommu_create_domain(maxaddr);
725241362Sneel
726256072Sneel		error = vm_gpa_wire(vm);
727256072Sneel		if (error)
728256072Sneel			return (error);
729241041Sneel
730256072Sneel		vm_iommu_map(vm);
731256072Sneel	}
732256072Sneel
733256072Sneel	error = ppt_assign_device(vm, bus, slot, func);
734256072Sneel	return (error);
735221828Sgrehan}
736221828Sgrehan
737256072Sneelvoid *
738256072Sneelvm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
739256072Sneel	    void **cookie)
740221828Sgrehan{
741256072Sneel	int count, pageoff;
742256072Sneel	vm_page_t m;
743221828Sgrehan
744256072Sneel	pageoff = gpa & PAGE_MASK;
745256072Sneel	if (len > PAGE_SIZE - pageoff)
746256072Sneel		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
747241148Sneel
748256072Sneel	count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
749256072Sneel	    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
750256072Sneel
751256072Sneel	if (count == 1) {
752256072Sneel		*cookie = m;
753256072Sneel		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
754256072Sneel	} else {
755256072Sneel		*cookie = NULL;
756256072Sneel		return (NULL);
757256072Sneel	}
758221828Sgrehan}
759221828Sgrehan
760256072Sneelvoid
761256072Sneelvm_gpa_release(void *cookie)
762256072Sneel{
763256072Sneel	vm_page_t m = cookie;
764256072Sneel
765256072Sneel	vm_page_lock(m);
766256072Sneel	vm_page_unhold(m);
767256072Sneel	vm_page_unlock(m);
768256072Sneel}
769256072Sneel
770221828Sgrehanint
771221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
772221828Sgrehan		  struct vm_memory_segment *seg)
773221828Sgrehan{
774221828Sgrehan	int i;
775221828Sgrehan
776221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++) {
777221828Sgrehan		if (gpabase == vm->mem_segs[i].gpa) {
778256072Sneel			seg->gpa = vm->mem_segs[i].gpa;
779256072Sneel			seg->len = vm->mem_segs[i].len;
780256072Sneel			seg->wired = vm->mem_segs[i].wired;
781221828Sgrehan			return (0);
782221828Sgrehan		}
783221828Sgrehan	}
784221828Sgrehan	return (-1);
785221828Sgrehan}
786221828Sgrehan
787221828Sgrehanint
788256072Sneelvm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
789256072Sneel	      vm_offset_t *offset, struct vm_object **object)
790256072Sneel{
791256072Sneel	int i;
792256072Sneel	size_t seg_len;
793256072Sneel	vm_paddr_t seg_gpa;
794256072Sneel	vm_object_t seg_obj;
795256072Sneel
796256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
797256072Sneel		if ((seg_obj = vm->mem_segs[i].object) == NULL)
798256072Sneel			continue;
799256072Sneel
800256072Sneel		seg_gpa = vm->mem_segs[i].gpa;
801256072Sneel		seg_len = vm->mem_segs[i].len;
802256072Sneel
803256072Sneel		if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
804256072Sneel			*offset = gpa - seg_gpa;
805256072Sneel			*object = seg_obj;
806256072Sneel			vm_object_reference(seg_obj);
807256072Sneel			return (0);
808256072Sneel		}
809256072Sneel	}
810256072Sneel
811256072Sneel	return (EINVAL);
812256072Sneel}
813256072Sneel
814256072Sneelint
815221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
816221828Sgrehan{
817221828Sgrehan
818221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
819221828Sgrehan		return (EINVAL);
820221828Sgrehan
821221828Sgrehan	if (reg >= VM_REG_LAST)
822221828Sgrehan		return (EINVAL);
823221828Sgrehan
824221828Sgrehan	return (VMGETREG(vm->cookie, vcpu, reg, retval));
825221828Sgrehan}
826221828Sgrehan
827221828Sgrehanint
828221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
829221828Sgrehan{
830221828Sgrehan
831221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
832221828Sgrehan		return (EINVAL);
833221828Sgrehan
834221828Sgrehan	if (reg >= VM_REG_LAST)
835221828Sgrehan		return (EINVAL);
836221828Sgrehan
837221828Sgrehan	return (VMSETREG(vm->cookie, vcpu, reg, val));
838221828Sgrehan}
839221828Sgrehan
840221828Sgrehanstatic boolean_t
841221828Sgrehanis_descriptor_table(int reg)
842221828Sgrehan{
843221828Sgrehan
844221828Sgrehan	switch (reg) {
845221828Sgrehan	case VM_REG_GUEST_IDTR:
846221828Sgrehan	case VM_REG_GUEST_GDTR:
847221828Sgrehan		return (TRUE);
848221828Sgrehan	default:
849221828Sgrehan		return (FALSE);
850221828Sgrehan	}
851221828Sgrehan}
852221828Sgrehan
853221828Sgrehanstatic boolean_t
854221828Sgrehanis_segment_register(int reg)
855221828Sgrehan{
856221828Sgrehan
857221828Sgrehan	switch (reg) {
858221828Sgrehan	case VM_REG_GUEST_ES:
859221828Sgrehan	case VM_REG_GUEST_CS:
860221828Sgrehan	case VM_REG_GUEST_SS:
861221828Sgrehan	case VM_REG_GUEST_DS:
862221828Sgrehan	case VM_REG_GUEST_FS:
863221828Sgrehan	case VM_REG_GUEST_GS:
864221828Sgrehan	case VM_REG_GUEST_TR:
865221828Sgrehan	case VM_REG_GUEST_LDTR:
866221828Sgrehan		return (TRUE);
867221828Sgrehan	default:
868221828Sgrehan		return (FALSE);
869221828Sgrehan	}
870221828Sgrehan}
871221828Sgrehan
872221828Sgrehanint
873221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg,
874221828Sgrehan		struct seg_desc *desc)
875221828Sgrehan{
876221828Sgrehan
877221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
878221828Sgrehan		return (EINVAL);
879221828Sgrehan
880221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
881221828Sgrehan		return (EINVAL);
882221828Sgrehan
883221828Sgrehan	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
884221828Sgrehan}
885221828Sgrehan
886221828Sgrehanint
887221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg,
888221828Sgrehan		struct seg_desc *desc)
889221828Sgrehan{
890221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
891221828Sgrehan		return (EINVAL);
892221828Sgrehan
893221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
894221828Sgrehan		return (EINVAL);
895221828Sgrehan
896221828Sgrehan	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
897221828Sgrehan}
898221828Sgrehan
899221828Sgrehanstatic void
900221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu)
901221828Sgrehan{
902221828Sgrehan
903234695Sgrehan	/* flush host state to the pcb */
904234695Sgrehan	fpuexit(curthread);
905242122Sneel
906242122Sneel	/* restore guest FPU state */
907221828Sgrehan	fpu_stop_emulating();
908234695Sgrehan	fpurestore(vcpu->guestfpu);
909242122Sneel
910267427Sjhb	/* restore guest XCR0 if XSAVE is enabled in the host */
911267427Sjhb	if (rcr4() & CR4_XSAVE)
912267427Sjhb		load_xcr(0, vcpu->guest_xcr0);
913267427Sjhb
914242122Sneel	/*
915242122Sneel	 * The FPU is now "dirty" with the guest's state so turn on emulation
916242122Sneel	 * to trap any access to the FPU by the host.
917242122Sneel	 */
918242122Sneel	fpu_start_emulating();
919221828Sgrehan}
920221828Sgrehan
921221828Sgrehanstatic void
922221828Sgrehansave_guest_fpustate(struct vcpu *vcpu)
923221828Sgrehan{
924221828Sgrehan
925242122Sneel	if ((rcr0() & CR0_TS) == 0)
926242122Sneel		panic("fpu emulation not enabled in host!");
927242122Sneel
928267427Sjhb	/* save guest XCR0 and restore host XCR0 */
929267427Sjhb	if (rcr4() & CR4_XSAVE) {
930267427Sjhb		vcpu->guest_xcr0 = rxcr(0);
931267427Sjhb		load_xcr(0, vmm_get_host_xcr0());
932267427Sjhb	}
933267427Sjhb
934242122Sneel	/* save guest FPU state */
935242122Sneel	fpu_stop_emulating();
936234695Sgrehan	fpusave(vcpu->guestfpu);
937221828Sgrehan	fpu_start_emulating();
938221828Sgrehan}
939221828Sgrehan
940248389Sneelstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
941242065Sneel
942256072Sneelstatic int
943266393Sjhbvcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
944266393Sjhb    bool from_idle)
945256072Sneel{
946256072Sneel	int error;
947256072Sneel
948256072Sneel	vcpu_assert_locked(vcpu);
949256072Sneel
950256072Sneel	/*
951266393Sjhb	 * State transitions from the vmmdev_ioctl() must always begin from
952266393Sjhb	 * the VCPU_IDLE state. This guarantees that there is only a single
953266393Sjhb	 * ioctl() operating on a vcpu at any point.
954266393Sjhb	 */
955266393Sjhb	if (from_idle) {
956266393Sjhb		while (vcpu->state != VCPU_IDLE)
957266393Sjhb			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
958266393Sjhb	} else {
959266393Sjhb		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
960266393Sjhb		    "vcpu idle state"));
961266393Sjhb	}
962266393Sjhb
963266393Sjhb	if (vcpu->state == VCPU_RUNNING) {
964266393Sjhb		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
965266393Sjhb		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
966266393Sjhb	} else {
967266393Sjhb		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
968266393Sjhb		    "vcpu that is not running", vcpu->hostcpu));
969266393Sjhb	}
970266393Sjhb
971266393Sjhb	/*
972256072Sneel	 * The following state transitions are allowed:
973256072Sneel	 * IDLE -> FROZEN -> IDLE
974256072Sneel	 * FROZEN -> RUNNING -> FROZEN
975256072Sneel	 * FROZEN -> SLEEPING -> FROZEN
976256072Sneel	 */
977256072Sneel	switch (vcpu->state) {
978256072Sneel	case VCPU_IDLE:
979256072Sneel	case VCPU_RUNNING:
980256072Sneel	case VCPU_SLEEPING:
981256072Sneel		error = (newstate != VCPU_FROZEN);
982256072Sneel		break;
983256072Sneel	case VCPU_FROZEN:
984256072Sneel		error = (newstate == VCPU_FROZEN);
985256072Sneel		break;
986256072Sneel	default:
987256072Sneel		error = 1;
988256072Sneel		break;
989256072Sneel	}
990256072Sneel
991266393Sjhb	if (error)
992266393Sjhb		return (EBUSY);
993266393Sjhb
994266393Sjhb	vcpu->state = newstate;
995266393Sjhb	if (newstate == VCPU_RUNNING)
996266393Sjhb		vcpu->hostcpu = curcpu;
997256072Sneel	else
998266393Sjhb		vcpu->hostcpu = NOCPU;
999256072Sneel
1000266393Sjhb	if (newstate == VCPU_IDLE)
1001266393Sjhb		wakeup(&vcpu->state);
1002266393Sjhb
1003266393Sjhb	return (0);
1004256072Sneel}
1005256072Sneel
1006256072Sneelstatic void
1007256072Sneelvcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
1008256072Sneel{
1009256072Sneel	int error;
1010256072Sneel
1011266393Sjhb	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
1012256072Sneel		panic("Error %d setting state to %d\n", error, newstate);
1013256072Sneel}
1014256072Sneel
1015256072Sneelstatic void
1016256072Sneelvcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
1017256072Sneel{
1018256072Sneel	int error;
1019256072Sneel
1020266393Sjhb	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
1021256072Sneel		panic("Error %d setting state to %d", error, newstate);
1022256072Sneel}
1023256072Sneel
1024266339Sjhbstatic void
1025266339Sjhbvm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
1026266339Sjhb{
1027266339Sjhb
1028266339Sjhb	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
1029266339Sjhb
1030266339Sjhb	/*
1031266339Sjhb	 * Update 'rendezvous_func' and execute a write memory barrier to
1032266339Sjhb	 * ensure that it is visible across all host cpus. This is not needed
1033266339Sjhb	 * for correctness but it does ensure that all the vcpus will notice
1034266339Sjhb	 * that the rendezvous is requested immediately.
1035266339Sjhb	 */
1036266339Sjhb	vm->rendezvous_func = func;
1037266339Sjhb	wmb();
1038266339Sjhb}
1039266339Sjhb
1040266339Sjhb#define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
1041266339Sjhb	do {								\
1042266339Sjhb		if (vcpuid >= 0)					\
1043266339Sjhb			VCPU_CTR0(vm, vcpuid, fmt);			\
1044266339Sjhb		else							\
1045266339Sjhb			VM_CTR0(vm, fmt);				\
1046266339Sjhb	} while (0)
1047266339Sjhb
1048266339Sjhbstatic void
1049266339Sjhbvm_handle_rendezvous(struct vm *vm, int vcpuid)
1050266339Sjhb{
1051266339Sjhb
1052266339Sjhb	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
1053266339Sjhb	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
1054266339Sjhb
1055266339Sjhb	mtx_lock(&vm->rendezvous_mtx);
1056266339Sjhb	while (vm->rendezvous_func != NULL) {
1057266339Sjhb		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
1058266339Sjhb		CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
1059266339Sjhb
1060266339Sjhb		if (vcpuid != -1 &&
1061266339Sjhb		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
1062266339Sjhb		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
1063266339Sjhb			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
1064266339Sjhb			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
1065266339Sjhb			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
1066266339Sjhb		}
1067266339Sjhb		if (CPU_CMP(&vm->rendezvous_req_cpus,
1068266339Sjhb		    &vm->rendezvous_done_cpus) == 0) {
1069266339Sjhb			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
1070266339Sjhb			vm_set_rendezvous_func(vm, NULL);
1071266339Sjhb			wakeup(&vm->rendezvous_func);
1072266339Sjhb			break;
1073266339Sjhb		}
1074266339Sjhb		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
1075266339Sjhb		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
1076266339Sjhb		    "vmrndv", 0);
1077266339Sjhb	}
1078266339Sjhb	mtx_unlock(&vm->rendezvous_mtx);
1079266339Sjhb}
1080266339Sjhb
1081256072Sneel/*
1082256072Sneel * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
1083256072Sneel */
1084256072Sneelstatic int
1085262350Sjhbvm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
1086256072Sneel{
1087256072Sneel	struct vcpu *vcpu;
1088268935Sjhb	const char *wmesg;
1089276349Sneel	int error, t, vcpu_halted, vm_halted;
1090256072Sneel
1091268935Sjhb	KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
1092268935Sjhb
1093256072Sneel	vcpu = &vm->vcpu[vcpuid];
1094268935Sjhb	vcpu_halted = 0;
1095268935Sjhb	vm_halted = 0;
1096256072Sneel
1097276349Sneel	/*
1098276349Sneel	 * The typical way to halt a cpu is to execute: "sti; hlt"
1099276349Sneel	 *
1100276349Sneel	 * STI sets RFLAGS.IF to enable interrupts. However, the processor
1101276349Sneel	 * remains in an "interrupt shadow" for an additional instruction
1102276349Sneel	 * following the STI. This guarantees that "sti; hlt" sequence is
1103276349Sneel	 * atomic and a pending interrupt will be recognized after the HLT.
1104276349Sneel	 *
1105276349Sneel	 * After the HLT emulation is done the vcpu is no longer in an
1106276349Sneel	 * interrupt shadow and a pending interrupt can be injected on
1107276349Sneel	 * the next entry into the guest.
1108276349Sneel	 */
1109276349Sneel	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
1110276349Sneel	KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
1111276349Sneel	    __func__, error));
1112276349Sneel
1113256072Sneel	vcpu_lock(vcpu);
1114268935Sjhb	while (1) {
1115268935Sjhb		/*
1116268935Sjhb		 * Do a final check for pending NMI or interrupts before
1117268935Sjhb		 * really putting this thread to sleep. Also check for
1118268935Sjhb		 * software events that would cause this vcpu to wakeup.
1119268935Sjhb		 *
1120268935Sjhb		 * These interrupts/events could have happened after the
1121268935Sjhb		 * vcpu returned from VMRUN() and before it acquired the
1122268935Sjhb		 * vcpu lock above.
1123268935Sjhb		 */
1124268935Sjhb		if (vm->rendezvous_func != NULL || vm->suspend)
1125268935Sjhb			break;
1126268935Sjhb		if (vm_nmi_pending(vm, vcpuid))
1127268935Sjhb			break;
1128268935Sjhb		if (!intr_disabled) {
1129268935Sjhb			if (vm_extint_pending(vm, vcpuid) ||
1130268935Sjhb			    vlapic_pending_intr(vcpu->vlapic, NULL)) {
1131268935Sjhb				break;
1132268935Sjhb			}
1133268935Sjhb		}
1134256072Sneel
1135270159Sgrehan		/* Don't go to sleep if the vcpu thread needs to yield */
1136270159Sgrehan		if (vcpu_should_yield(vm, vcpuid))
1137270159Sgrehan			break;
1138270159Sgrehan
1139268935Sjhb		/*
1140268935Sjhb		 * Some Linux guests implement "halt" by having all vcpus
1141268935Sjhb		 * execute HLT with interrupts disabled. 'halted_cpus' keeps
1142268935Sjhb		 * track of the vcpus that have entered this state. When all
1143268935Sjhb		 * vcpus enter the halted state the virtual machine is halted.
1144268935Sjhb		 */
1145268935Sjhb		if (intr_disabled) {
1146268935Sjhb			wmesg = "vmhalt";
1147268935Sjhb			VCPU_CTR0(vm, vcpuid, "Halted");
1148268935Sjhb			if (!vcpu_halted && halt_detection_enabled) {
1149268935Sjhb				vcpu_halted = 1;
1150268935Sjhb				CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
1151268935Sjhb			}
1152268935Sjhb			if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
1153268935Sjhb				vm_halted = 1;
1154268935Sjhb				break;
1155268935Sjhb			}
1156268935Sjhb		} else {
1157268935Sjhb			wmesg = "vmidle";
1158268935Sjhb		}
1159268935Sjhb
1160256072Sneel		t = ticks;
1161256072Sneel		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1162270159Sgrehan		/*
1163270159Sgrehan		 * XXX msleep_spin() cannot be interrupted by signals so
1164270159Sgrehan		 * wake up periodically to check pending signals.
1165270159Sgrehan		 */
1166270159Sgrehan		msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
1167256072Sneel		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1168256072Sneel		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
1169256072Sneel	}
1170268935Sjhb
1171268935Sjhb	if (vcpu_halted)
1172268935Sjhb		CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus);
1173268935Sjhb
1174256072Sneel	vcpu_unlock(vcpu);
1175256072Sneel
1176268935Sjhb	if (vm_halted)
1177268935Sjhb		vm_suspend(vm, VM_SUSPEND_HALT);
1178266339Sjhb
1179256072Sneel	return (0);
1180256072Sneel}
1181256072Sneel
1182256072Sneelstatic int
1183262350Sjhbvm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
1184256072Sneel{
1185256072Sneel	int rv, ftype;
1186256072Sneel	struct vm_map *map;
1187256072Sneel	struct vcpu *vcpu;
1188256072Sneel	struct vm_exit *vme;
1189256072Sneel
1190256072Sneel	vcpu = &vm->vcpu[vcpuid];
1191256072Sneel	vme = &vcpu->exitinfo;
1192256072Sneel
1193256072Sneel	ftype = vme->u.paging.fault_type;
1194256072Sneel	KASSERT(ftype == VM_PROT_READ ||
1195256072Sneel	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
1196256072Sneel	    ("vm_handle_paging: invalid fault_type %d", ftype));
1197256072Sneel
1198256072Sneel	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
1199256072Sneel		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
1200256072Sneel		    vme->u.paging.gpa, ftype);
1201276349Sneel		if (rv == 0) {
1202276349Sneel			VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx",
1203276349Sneel			    ftype == VM_PROT_READ ? "accessed" : "dirty",
1204276349Sneel			    vme->u.paging.gpa);
1205256072Sneel			goto done;
1206276349Sneel		}
1207256072Sneel	}
1208256072Sneel
1209256072Sneel	map = &vm->vmspace->vm_map;
1210256072Sneel	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
1211256072Sneel
1212261088Sjhb	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
1213261088Sjhb	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
1214256072Sneel
1215256072Sneel	if (rv != KERN_SUCCESS)
1216256072Sneel		return (EFAULT);
1217256072Sneeldone:
1218256072Sneel	/* restart execution at the faulting instruction */
1219256072Sneel	vme->inst_length = 0;
1220256072Sneel
1221256072Sneel	return (0);
1222256072Sneel}
1223256072Sneel
1224256072Sneelstatic int
1225262350Sjhbvm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
1226256072Sneel{
1227256072Sneel	struct vie *vie;
1228256072Sneel	struct vcpu *vcpu;
1229256072Sneel	struct vm_exit *vme;
1230268976Sjhb	uint64_t gla, gpa;
1231268976Sjhb	struct vm_guest_paging *paging;
1232261088Sjhb	mem_region_read_t mread;
1233261088Sjhb	mem_region_write_t mwrite;
1234270159Sgrehan	enum vm_cpu_mode cpu_mode;
1235270159Sgrehan	int cs_d, error;
1236256072Sneel
1237256072Sneel	vcpu = &vm->vcpu[vcpuid];
1238256072Sneel	vme = &vcpu->exitinfo;
1239256072Sneel
1240256072Sneel	gla = vme->u.inst_emul.gla;
1241256072Sneel	gpa = vme->u.inst_emul.gpa;
1242270159Sgrehan	cs_d = vme->u.inst_emul.cs_d;
1243256072Sneel	vie = &vme->u.inst_emul.vie;
1244268976Sjhb	paging = &vme->u.inst_emul.paging;
1245270159Sgrehan	cpu_mode = paging->cpu_mode;
1246256072Sneel
1247276349Sneel	VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa);
1248276349Sneel
1249256072Sneel	vie_init(vie);
1250256072Sneel
1251256072Sneel	/* Fetch, decode and emulate the faulting instruction */
1252268976Sjhb	error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip,
1253268976Sjhb	    vme->inst_length, vie);
1254268976Sjhb	if (error == 1)
1255268976Sjhb		return (0);		/* Resume guest to handle page fault */
1256268976Sjhb	else if (error == -1)
1257256072Sneel		return (EFAULT);
1258268976Sjhb	else if (error != 0)
1259268976Sjhb		panic("%s: vmm_fetch_instruction error %d", __func__, error);
1260256072Sneel
1261270159Sgrehan	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0)
1262256072Sneel		return (EFAULT);
1263256072Sneel
1264261088Sjhb	/* return to userland unless this is an in-kernel emulated device */
1265261088Sjhb	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
1266261088Sjhb		mread = lapic_mmio_read;
1267261088Sjhb		mwrite = lapic_mmio_write;
1268261088Sjhb	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
1269261088Sjhb		mread = vioapic_mmio_read;
1270261088Sjhb		mwrite = vioapic_mmio_write;
1271261088Sjhb	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
1272261088Sjhb		mread = vhpet_mmio_read;
1273261088Sjhb		mwrite = vhpet_mmio_write;
1274261088Sjhb	} else {
1275262350Sjhb		*retu = true;
1276256072Sneel		return (0);
1277256072Sneel	}
1278256072Sneel
1279270159Sgrehan	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging,
1280270159Sgrehan	    mread, mwrite, retu);
1281256072Sneel
1282256072Sneel	return (error);
1283256072Sneel}
1284256072Sneel
1285268935Sjhbstatic int
1286268935Sjhbvm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
1287268935Sjhb{
1288268935Sjhb	int i, done;
1289268935Sjhb	struct vcpu *vcpu;
1290268935Sjhb
1291268935Sjhb	done = 0;
1292268935Sjhb	vcpu = &vm->vcpu[vcpuid];
1293268935Sjhb
1294268935Sjhb	CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus);
1295268935Sjhb
1296268935Sjhb	/*
1297268935Sjhb	 * Wait until all 'active_cpus' have suspended themselves.
1298268935Sjhb	 *
1299268935Sjhb	 * Since a VM may be suspended at any time including when one or
1300268935Sjhb	 * more vcpus are doing a rendezvous we need to call the rendezvous
1301268935Sjhb	 * handler while we are waiting to prevent a deadlock.
1302268935Sjhb	 */
1303268935Sjhb	vcpu_lock(vcpu);
1304268935Sjhb	while (1) {
1305268935Sjhb		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
1306268935Sjhb			VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
1307268935Sjhb			break;
1308268935Sjhb		}
1309268935Sjhb
1310268935Sjhb		if (vm->rendezvous_func == NULL) {
1311268935Sjhb			VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
1312268935Sjhb			vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1313268935Sjhb			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1314268935Sjhb			vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1315268935Sjhb		} else {
1316268935Sjhb			VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
1317268935Sjhb			vcpu_unlock(vcpu);
1318268935Sjhb			vm_handle_rendezvous(vm, vcpuid);
1319268935Sjhb			vcpu_lock(vcpu);
1320268935Sjhb		}
1321268935Sjhb	}
1322268935Sjhb	vcpu_unlock(vcpu);
1323268935Sjhb
1324268935Sjhb	/*
1325268935Sjhb	 * Wakeup the other sleeping vcpus and return to userspace.
1326268935Sjhb	 */
1327268935Sjhb	for (i = 0; i < VM_MAXCPU; i++) {
1328268935Sjhb		if (CPU_ISSET(i, &vm->suspended_cpus)) {
1329268935Sjhb			vcpu_notify_event(vm, i, false);
1330268935Sjhb		}
1331268935Sjhb	}
1332268935Sjhb
1333268935Sjhb	*retu = true;
1334268935Sjhb	return (0);
1335268935Sjhb}
1336268935Sjhb
1337221828Sgrehanint
1338268935Sjhbvm_suspend(struct vm *vm, enum vm_suspend_how how)
1339268935Sjhb{
1340268935Sjhb	int i;
1341268935Sjhb
1342268935Sjhb	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
1343268935Sjhb		return (EINVAL);
1344268935Sjhb
1345268935Sjhb	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
1346268935Sjhb		VM_CTR2(vm, "virtual machine already suspended %d/%d",
1347268935Sjhb		    vm->suspend, how);
1348268935Sjhb		return (EALREADY);
1349268935Sjhb	}
1350268935Sjhb
1351268935Sjhb	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
1352268935Sjhb
1353268935Sjhb	/*
1354268935Sjhb	 * Notify all active vcpus that they are now suspended.
1355268935Sjhb	 */
1356268935Sjhb	for (i = 0; i < VM_MAXCPU; i++) {
1357268935Sjhb		if (CPU_ISSET(i, &vm->active_cpus))
1358268935Sjhb			vcpu_notify_event(vm, i, false);
1359268935Sjhb	}
1360268935Sjhb
1361268935Sjhb	return (0);
1362268935Sjhb}
1363268935Sjhb
1364268935Sjhbvoid
1365268935Sjhbvm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
1366268935Sjhb{
1367268935Sjhb	struct vm_exit *vmexit;
1368268935Sjhb
1369268935Sjhb	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
1370268935Sjhb	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
1371268935Sjhb
1372268935Sjhb	vmexit = vm_exitinfo(vm, vcpuid);
1373268935Sjhb	vmexit->rip = rip;
1374268935Sjhb	vmexit->inst_length = 0;
1375268935Sjhb	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
1376268935Sjhb	vmexit->u.suspended.how = vm->suspend;
1377268935Sjhb}
1378268935Sjhb
1379270074Sgrehanvoid
1380270074Sgrehanvm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
1381270074Sgrehan{
1382270074Sgrehan	struct vm_exit *vmexit;
1383270074Sgrehan
1384270074Sgrehan	KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress"));
1385270074Sgrehan
1386270074Sgrehan	vmexit = vm_exitinfo(vm, vcpuid);
1387270074Sgrehan	vmexit->rip = rip;
1388270074Sgrehan	vmexit->inst_length = 0;
1389270074Sgrehan	vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
1390270074Sgrehan	vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1);
1391270074Sgrehan}
1392270074Sgrehan
1393270074Sgrehanvoid
1394270074Sgrehanvm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
1395270074Sgrehan{
1396270074Sgrehan	struct vm_exit *vmexit;
1397270074Sgrehan
1398270074Sgrehan	vmexit = vm_exitinfo(vm, vcpuid);
1399270074Sgrehan	vmexit->rip = rip;
1400270074Sgrehan	vmexit->inst_length = 0;
1401270074Sgrehan	vmexit->exitcode = VM_EXITCODE_BOGUS;
1402270074Sgrehan	vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
1403270074Sgrehan}
1404270074Sgrehan
1405268935Sjhbint
1406221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun)
1407221828Sgrehan{
1408256072Sneel	int error, vcpuid;
1409221828Sgrehan	struct vcpu *vcpu;
1410221828Sgrehan	struct pcb *pcb;
1411242065Sneel	uint64_t tscval, rip;
1412242065Sneel	struct vm_exit *vme;
1413262350Sjhb	bool retu, intr_disabled;
1414256072Sneel	pmap_t pmap;
1415268935Sjhb	void *rptr, *sptr;
1416221828Sgrehan
1417221828Sgrehan	vcpuid = vmrun->cpuid;
1418221828Sgrehan
1419221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1420221828Sgrehan		return (EINVAL);
1421221828Sgrehan
1422270070Sgrehan	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1423270070Sgrehan		return (EINVAL);
1424270070Sgrehan
1425270070Sgrehan	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1426270070Sgrehan		return (EINVAL);
1427270070Sgrehan
1428268935Sjhb	rptr = &vm->rendezvous_func;
1429268935Sjhb	sptr = &vm->suspend;
1430256072Sneel	pmap = vmspace_pmap(vm->vmspace);
1431221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
1432256072Sneel	vme = &vcpu->exitinfo;
1433242065Sneel	rip = vmrun->rip;
1434242065Sneelrestart:
1435221828Sgrehan	critical_enter();
1436221828Sgrehan
1437256072Sneel	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
1438256072Sneel	    ("vm_run: absurd pm_active"));
1439256072Sneel
1440221828Sgrehan	tscval = rdtsc();
1441221828Sgrehan
1442221828Sgrehan	pcb = PCPU_GET(curpcb);
1443221914Sjhb	set_pcb_flags(pcb, PCB_FULL_IRET);
1444221828Sgrehan
1445221828Sgrehan	restore_guest_fpustate(vcpu);
1446241489Sneel
1447256072Sneel	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
1448268935Sjhb	error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
1449256072Sneel	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
1450241489Sneel
1451221828Sgrehan	save_guest_fpustate(vcpu);
1452221828Sgrehan
1453221828Sgrehan	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
1454221828Sgrehan
1455221828Sgrehan	critical_exit();
1456221828Sgrehan
1457256072Sneel	if (error == 0) {
1458262350Sjhb		retu = false;
1459256072Sneel		switch (vme->exitcode) {
1460268935Sjhb		case VM_EXITCODE_SUSPENDED:
1461268935Sjhb			error = vm_handle_suspend(vm, vcpuid, &retu);
1462268935Sjhb			break;
1463266339Sjhb		case VM_EXITCODE_IOAPIC_EOI:
1464266339Sjhb			vioapic_process_eoi(vm, vcpuid,
1465266339Sjhb			    vme->u.ioapic_eoi.vector);
1466266339Sjhb			break;
1467266339Sjhb		case VM_EXITCODE_RENDEZVOUS:
1468266339Sjhb			vm_handle_rendezvous(vm, vcpuid);
1469266339Sjhb			error = 0;
1470266339Sjhb			break;
1471256072Sneel		case VM_EXITCODE_HLT:
1472262350Sjhb			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
1473262350Sjhb			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
1474256072Sneel			break;
1475256072Sneel		case VM_EXITCODE_PAGING:
1476256072Sneel			error = vm_handle_paging(vm, vcpuid, &retu);
1477256072Sneel			break;
1478256072Sneel		case VM_EXITCODE_INST_EMUL:
1479256072Sneel			error = vm_handle_inst_emul(vm, vcpuid, &retu);
1480256072Sneel			break;
1481268976Sjhb		case VM_EXITCODE_INOUT:
1482268976Sjhb		case VM_EXITCODE_INOUT_STR:
1483268976Sjhb			error = vm_handle_inout(vm, vcpuid, vme, &retu);
1484268976Sjhb			break;
1485276349Sneel		case VM_EXITCODE_MONITOR:
1486276349Sneel		case VM_EXITCODE_MWAIT:
1487276349Sneel			vm_inject_ud(vm, vcpuid);
1488276349Sneel			break;
1489256072Sneel		default:
1490262350Sjhb			retu = true;	/* handled in userland */
1491256072Sneel			break;
1492242065Sneel		}
1493256072Sneel	}
1494242065Sneel
1495262350Sjhb	if (error == 0 && retu == false) {
1496242065Sneel		rip = vme->rip + vme->inst_length;
1497242065Sneel		goto restart;
1498242065Sneel	}
1499242065Sneel
1500256072Sneel	/* copy the exit information */
1501256072Sneel	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
1502221828Sgrehan	return (error);
1503221828Sgrehan}
1504221828Sgrehan
1505221828Sgrehanint
1506270159Sgrehanvm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
1507270159Sgrehan{
1508270159Sgrehan	struct vcpu *vcpu;
1509270159Sgrehan	int type, vector;
1510270159Sgrehan
1511270159Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1512270159Sgrehan		return (EINVAL);
1513270159Sgrehan
1514270159Sgrehan	vcpu = &vm->vcpu[vcpuid];
1515270159Sgrehan
1516270159Sgrehan	if (info & VM_INTINFO_VALID) {
1517270159Sgrehan		type = info & VM_INTINFO_TYPE;
1518270159Sgrehan		vector = info & 0xff;
1519270159Sgrehan		if (type == VM_INTINFO_NMI && vector != IDT_NMI)
1520270159Sgrehan			return (EINVAL);
1521270159Sgrehan		if (type == VM_INTINFO_HWEXCEPTION && vector >= 32)
1522270159Sgrehan			return (EINVAL);
1523270159Sgrehan		if (info & VM_INTINFO_RSVD)
1524270159Sgrehan			return (EINVAL);
1525270159Sgrehan	} else {
1526270159Sgrehan		info = 0;
1527270159Sgrehan	}
1528270159Sgrehan	VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info);
1529270159Sgrehan	vcpu->exitintinfo = info;
1530270159Sgrehan	return (0);
1531270159Sgrehan}
1532270159Sgrehan
1533270159Sgrehanenum exc_class {
1534270159Sgrehan	EXC_BENIGN,
1535270159Sgrehan	EXC_CONTRIBUTORY,
1536270159Sgrehan	EXC_PAGEFAULT
1537270159Sgrehan};
1538270159Sgrehan
1539270159Sgrehan#define	IDT_VE	20	/* Virtualization Exception (Intel specific) */
1540270159Sgrehan
1541270159Sgrehanstatic enum exc_class
1542270159Sgrehanexception_class(uint64_t info)
1543270159Sgrehan{
1544270159Sgrehan	int type, vector;
1545270159Sgrehan
1546270159Sgrehan	KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info));
1547270159Sgrehan	type = info & VM_INTINFO_TYPE;
1548270159Sgrehan	vector = info & 0xff;
1549270159Sgrehan
1550270159Sgrehan	/* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */
1551270159Sgrehan	switch (type) {
1552270159Sgrehan	case VM_INTINFO_HWINTR:
1553270159Sgrehan	case VM_INTINFO_SWINTR:
1554270159Sgrehan	case VM_INTINFO_NMI:
1555270159Sgrehan		return (EXC_BENIGN);
1556270159Sgrehan	default:
1557270159Sgrehan		/*
1558270159Sgrehan		 * Hardware exception.
1559270159Sgrehan		 *
1560270159Sgrehan		 * SVM and VT-x use identical type values to represent NMI,
1561270159Sgrehan		 * hardware interrupt and software interrupt.
1562270159Sgrehan		 *
1563270159Sgrehan		 * SVM uses type '3' for all exceptions. VT-x uses type '3'
1564270159Sgrehan		 * for exceptions except #BP and #OF. #BP and #OF use a type
1565270159Sgrehan		 * value of '5' or '6'. Therefore we don't check for explicit
1566270159Sgrehan		 * values of 'type' to classify 'intinfo' into a hardware
1567270159Sgrehan		 * exception.
1568270159Sgrehan		 */
1569270159Sgrehan		break;
1570270159Sgrehan	}
1571270159Sgrehan
1572270159Sgrehan	switch (vector) {
1573270159Sgrehan	case IDT_PF:
1574270159Sgrehan	case IDT_VE:
1575270159Sgrehan		return (EXC_PAGEFAULT);
1576270159Sgrehan	case IDT_DE:
1577270159Sgrehan	case IDT_TS:
1578270159Sgrehan	case IDT_NP:
1579270159Sgrehan	case IDT_SS:
1580270159Sgrehan	case IDT_GP:
1581270159Sgrehan		return (EXC_CONTRIBUTORY);
1582270159Sgrehan	default:
1583270159Sgrehan		return (EXC_BENIGN);
1584270159Sgrehan	}
1585270159Sgrehan}
1586270159Sgrehan
1587270159Sgrehanstatic int
1588270159Sgrehannested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
1589270159Sgrehan    uint64_t *retinfo)
1590270159Sgrehan{
1591270159Sgrehan	enum exc_class exc1, exc2;
1592270159Sgrehan	int type1, vector1;
1593270159Sgrehan
1594270159Sgrehan	KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1));
1595270159Sgrehan	KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2));
1596270159Sgrehan
1597270159Sgrehan	/*
1598270159Sgrehan	 * If an exception occurs while attempting to call the double-fault
1599270159Sgrehan	 * handler the processor enters shutdown mode (aka triple fault).
1600270159Sgrehan	 */
1601270159Sgrehan	type1 = info1 & VM_INTINFO_TYPE;
1602270159Sgrehan	vector1 = info1 & 0xff;
1603270159Sgrehan	if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) {
1604270159Sgrehan		VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)",
1605270159Sgrehan		    info1, info2);
1606270159Sgrehan		vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT);
1607270159Sgrehan		*retinfo = 0;
1608270159Sgrehan		return (0);
1609270159Sgrehan	}
1610270159Sgrehan
1611270159Sgrehan	/*
1612270159Sgrehan	 * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3
1613270159Sgrehan	 */
1614270159Sgrehan	exc1 = exception_class(info1);
1615270159Sgrehan	exc2 = exception_class(info2);
1616270159Sgrehan	if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) ||
1617270159Sgrehan	    (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) {
1618270159Sgrehan		/* Convert nested fault into a double fault. */
1619270159Sgrehan		*retinfo = IDT_DF;
1620270159Sgrehan		*retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
1621270159Sgrehan		*retinfo |= VM_INTINFO_DEL_ERRCODE;
1622270159Sgrehan	} else {
1623270159Sgrehan		/* Handle exceptions serially */
1624270159Sgrehan		*retinfo = info2;
1625270159Sgrehan	}
1626270159Sgrehan	return (1);
1627270159Sgrehan}
1628270159Sgrehan
1629270159Sgrehanstatic uint64_t
1630270159Sgrehanvcpu_exception_intinfo(struct vcpu *vcpu)
1631270159Sgrehan{
1632270159Sgrehan	uint64_t info = 0;
1633270159Sgrehan
1634270159Sgrehan	if (vcpu->exception_pending) {
1635270159Sgrehan		info = vcpu->exception.vector & 0xff;
1636270159Sgrehan		info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
1637270159Sgrehan		if (vcpu->exception.error_code_valid) {
1638270159Sgrehan			info |= VM_INTINFO_DEL_ERRCODE;
1639270159Sgrehan			info |= (uint64_t)vcpu->exception.error_code << 32;
1640270159Sgrehan		}
1641270159Sgrehan	}
1642270159Sgrehan	return (info);
1643270159Sgrehan}
1644270159Sgrehan
1645270159Sgrehanint
1646270159Sgrehanvm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
1647270159Sgrehan{
1648270159Sgrehan	struct vcpu *vcpu;
1649270159Sgrehan	uint64_t info1, info2;
1650270159Sgrehan	int valid;
1651270159Sgrehan
1652270159Sgrehan	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
1653270159Sgrehan
1654270159Sgrehan	vcpu = &vm->vcpu[vcpuid];
1655270159Sgrehan
1656270159Sgrehan	info1 = vcpu->exitintinfo;
1657270159Sgrehan	vcpu->exitintinfo = 0;
1658270159Sgrehan
1659270159Sgrehan	info2 = 0;
1660270159Sgrehan	if (vcpu->exception_pending) {
1661270159Sgrehan		info2 = vcpu_exception_intinfo(vcpu);
1662270159Sgrehan		vcpu->exception_pending = 0;
1663270159Sgrehan		VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
1664270159Sgrehan		    vcpu->exception.vector, info2);
1665270159Sgrehan	}
1666270159Sgrehan
1667270159Sgrehan	if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
1668270159Sgrehan		valid = nested_fault(vm, vcpuid, info1, info2, retinfo);
1669270159Sgrehan	} else if (info1 & VM_INTINFO_VALID) {
1670270159Sgrehan		*retinfo = info1;
1671270159Sgrehan		valid = 1;
1672270159Sgrehan	} else if (info2 & VM_INTINFO_VALID) {
1673270159Sgrehan		*retinfo = info2;
1674270159Sgrehan		valid = 1;
1675270159Sgrehan	} else {
1676270159Sgrehan		valid = 0;
1677270159Sgrehan	}
1678270159Sgrehan
1679270159Sgrehan	if (valid) {
1680270159Sgrehan		VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), "
1681270159Sgrehan		    "retinfo(%#lx)", __func__, info1, info2, *retinfo);
1682270159Sgrehan	}
1683270159Sgrehan
1684270159Sgrehan	return (valid);
1685270159Sgrehan}
1686270159Sgrehan
1687270159Sgrehanint
1688270159Sgrehanvm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
1689270159Sgrehan{
1690270159Sgrehan	struct vcpu *vcpu;
1691270159Sgrehan
1692270159Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1693270159Sgrehan		return (EINVAL);
1694270159Sgrehan
1695270159Sgrehan	vcpu = &vm->vcpu[vcpuid];
1696270159Sgrehan	*info1 = vcpu->exitintinfo;
1697270159Sgrehan	*info2 = vcpu_exception_intinfo(vcpu);
1698270159Sgrehan	return (0);
1699270159Sgrehan}
1700270159Sgrehan
1701270159Sgrehanint
1702267427Sjhbvm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
1703221828Sgrehan{
1704267427Sjhb	struct vcpu *vcpu;
1705267427Sjhb
1706221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1707221828Sgrehan		return (EINVAL);
1708221828Sgrehan
1709267427Sjhb	if (exception->vector < 0 || exception->vector >= 32)
1710221828Sgrehan		return (EINVAL);
1711221828Sgrehan
1712270159Sgrehan	/*
1713270159Sgrehan	 * A double fault exception should never be injected directly into
1714270159Sgrehan	 * the guest. It is a derived exception that results from specific
1715270159Sgrehan	 * combinations of nested faults.
1716270159Sgrehan	 */
1717270159Sgrehan	if (exception->vector == IDT_DF)
1718270159Sgrehan		return (EINVAL);
1719270159Sgrehan
1720267427Sjhb	vcpu = &vm->vcpu[vcpuid];
1721221828Sgrehan
1722267427Sjhb	if (vcpu->exception_pending) {
1723267427Sjhb		VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
1724267427Sjhb		    "pending exception %d", exception->vector,
1725267427Sjhb		    vcpu->exception.vector);
1726267427Sjhb		return (EBUSY);
1727267427Sjhb	}
1728267427Sjhb
1729267427Sjhb	vcpu->exception_pending = 1;
1730267427Sjhb	vcpu->exception = *exception;
1731267427Sjhb	VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
1732267427Sjhb	return (0);
1733221828Sgrehan}
1734221828Sgrehan
1735270159Sgrehanvoid
1736270159Sgrehanvm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid,
1737270159Sgrehan    int errcode)
1738267427Sjhb{
1739270159Sgrehan	struct vm_exception exception;
1740267427Sjhb	struct vm_exit *vmexit;
1741270159Sgrehan	struct vm *vm;
1742267427Sjhb	int error;
1743267427Sjhb
1744270159Sgrehan	vm = vmarg;
1745270159Sgrehan
1746270159Sgrehan	exception.vector = vector;
1747270159Sgrehan	exception.error_code = errcode;
1748270159Sgrehan	exception.error_code_valid = errcode_valid;
1749270159Sgrehan	error = vm_inject_exception(vm, vcpuid, &exception);
1750267427Sjhb	KASSERT(error == 0, ("vm_inject_exception error %d", error));
1751267427Sjhb
1752267427Sjhb	/*
1753267427Sjhb	 * A fault-like exception allows the instruction to be restarted
1754267427Sjhb	 * after the exception handler returns.
1755267427Sjhb	 *
1756267427Sjhb	 * By setting the inst_length to 0 we ensure that the instruction
1757267427Sjhb	 * pointer remains at the faulting instruction.
1758267427Sjhb	 */
1759267427Sjhb	vmexit = vm_exitinfo(vm, vcpuid);
1760267427Sjhb	vmexit->inst_length = 0;
1761267427Sjhb}
1762267427Sjhb
1763267427Sjhbvoid
1764270159Sgrehanvm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2)
1765268976Sjhb{
1766270159Sgrehan	struct vm *vm;
1767268976Sjhb	int error;
1768268976Sjhb
1769270159Sgrehan	vm = vmarg;
1770268976Sjhb	VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
1771268976Sjhb	    error_code, cr2);
1772268976Sjhb
1773268976Sjhb	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
1774268976Sjhb	KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
1775268976Sjhb
1776270159Sgrehan	vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code);
1777268976Sjhb}
1778268976Sjhb
1779248389Sneelstatic VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
1780241982Sneel
1781221828Sgrehanint
1782241982Sneelvm_inject_nmi(struct vm *vm, int vcpuid)
1783221828Sgrehan{
1784241982Sneel	struct vcpu *vcpu;
1785221828Sgrehan
1786241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1787221828Sgrehan		return (EINVAL);
1788221828Sgrehan
1789241982Sneel	vcpu = &vm->vcpu[vcpuid];
1790241982Sneel
1791241982Sneel	vcpu->nmi_pending = 1;
1792266339Sjhb	vcpu_notify_event(vm, vcpuid, false);
1793241982Sneel	return (0);
1794221828Sgrehan}
1795221828Sgrehan
1796221828Sgrehanint
1797241982Sneelvm_nmi_pending(struct vm *vm, int vcpuid)
1798241982Sneel{
1799241982Sneel	struct vcpu *vcpu;
1800241982Sneel
1801241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1802241982Sneel		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1803241982Sneel
1804241982Sneel	vcpu = &vm->vcpu[vcpuid];
1805241982Sneel
1806241982Sneel	return (vcpu->nmi_pending);
1807241982Sneel}
1808241982Sneel
1809241982Sneelvoid
1810241982Sneelvm_nmi_clear(struct vm *vm, int vcpuid)
1811241982Sneel{
1812241982Sneel	struct vcpu *vcpu;
1813241982Sneel
1814241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1815241982Sneel		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1816241982Sneel
1817241982Sneel	vcpu = &vm->vcpu[vcpuid];
1818241982Sneel
1819241982Sneel	if (vcpu->nmi_pending == 0)
1820241982Sneel		panic("vm_nmi_clear: inconsistent nmi_pending state");
1821241982Sneel
1822241982Sneel	vcpu->nmi_pending = 0;
1823241982Sneel	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
1824241982Sneel}
1825241982Sneel
1826268891Sjhbstatic VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu");
1827268891Sjhb
1828241982Sneelint
1829268891Sjhbvm_inject_extint(struct vm *vm, int vcpuid)
1830268891Sjhb{
1831268891Sjhb	struct vcpu *vcpu;
1832268891Sjhb
1833268891Sjhb	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1834268891Sjhb		return (EINVAL);
1835268891Sjhb
1836268891Sjhb	vcpu = &vm->vcpu[vcpuid];
1837268891Sjhb
1838268891Sjhb	vcpu->extint_pending = 1;
1839268891Sjhb	vcpu_notify_event(vm, vcpuid, false);
1840268891Sjhb	return (0);
1841268891Sjhb}
1842268891Sjhb
1843268891Sjhbint
1844268891Sjhbvm_extint_pending(struct vm *vm, int vcpuid)
1845268891Sjhb{
1846268891Sjhb	struct vcpu *vcpu;
1847268891Sjhb
1848268891Sjhb	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1849268891Sjhb		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
1850268891Sjhb
1851268891Sjhb	vcpu = &vm->vcpu[vcpuid];
1852268891Sjhb
1853268891Sjhb	return (vcpu->extint_pending);
1854268891Sjhb}
1855268891Sjhb
1856268891Sjhbvoid
1857268891Sjhbvm_extint_clear(struct vm *vm, int vcpuid)
1858268891Sjhb{
1859268891Sjhb	struct vcpu *vcpu;
1860268891Sjhb
1861268891Sjhb	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1862268891Sjhb		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
1863268891Sjhb
1864268891Sjhb	vcpu = &vm->vcpu[vcpuid];
1865268891Sjhb
1866268891Sjhb	if (vcpu->extint_pending == 0)
1867268891Sjhb		panic("vm_extint_clear: inconsistent extint_pending state");
1868268891Sjhb
1869268891Sjhb	vcpu->extint_pending = 0;
1870268891Sjhb	vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
1871268891Sjhb}
1872268891Sjhb
1873268891Sjhbint
1874221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
1875221828Sgrehan{
1876221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1877221828Sgrehan		return (EINVAL);
1878221828Sgrehan
1879221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
1880221828Sgrehan		return (EINVAL);
1881221828Sgrehan
1882221828Sgrehan	return (VMGETCAP(vm->cookie, vcpu, type, retval));
1883221828Sgrehan}
1884221828Sgrehan
1885221828Sgrehanint
1886221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val)
1887221828Sgrehan{
1888221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1889221828Sgrehan		return (EINVAL);
1890221828Sgrehan
1891221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
1892221828Sgrehan		return (EINVAL);
1893221828Sgrehan
1894221828Sgrehan	return (VMSETCAP(vm->cookie, vcpu, type, val));
1895221828Sgrehan}
1896221828Sgrehan
1897221828Sgrehanstruct vlapic *
1898221828Sgrehanvm_lapic(struct vm *vm, int cpu)
1899221828Sgrehan{
1900221828Sgrehan	return (vm->vcpu[cpu].vlapic);
1901221828Sgrehan}
1902221828Sgrehan
1903261088Sjhbstruct vioapic *
1904261088Sjhbvm_ioapic(struct vm *vm)
1905261088Sjhb{
1906261088Sjhb
1907261088Sjhb	return (vm->vioapic);
1908261088Sjhb}
1909261088Sjhb
1910261088Sjhbstruct vhpet *
1911261088Sjhbvm_hpet(struct vm *vm)
1912261088Sjhb{
1913261088Sjhb
1914261088Sjhb	return (vm->vhpet);
1915261088Sjhb}
1916261088Sjhb
1917221828Sgrehanboolean_t
1918221828Sgrehanvmm_is_pptdev(int bus, int slot, int func)
1919221828Sgrehan{
1920246188Sneel	int found, i, n;
1921246188Sneel	int b, s, f;
1922221828Sgrehan	char *val, *cp, *cp2;
1923221828Sgrehan
1924221828Sgrehan	/*
1925246188Sneel	 * XXX
1926246188Sneel	 * The length of an environment variable is limited to 128 bytes which
1927246188Sneel	 * puts an upper limit on the number of passthru devices that may be
1928246188Sneel	 * specified using a single environment variable.
1929246188Sneel	 *
1930246188Sneel	 * Work around this by scanning multiple environment variable
1931246188Sneel	 * names instead of a single one - yuck!
1932221828Sgrehan	 */
1933246188Sneel	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
1934246188Sneel
1935246188Sneel	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
1936221828Sgrehan	found = 0;
1937246188Sneel	for (i = 0; names[i] != NULL && !found; i++) {
1938246188Sneel		cp = val = getenv(names[i]);
1939246188Sneel		while (cp != NULL && *cp != '\0') {
1940246188Sneel			if ((cp2 = strchr(cp, ' ')) != NULL)
1941246188Sneel				*cp2 = '\0';
1942221828Sgrehan
1943246188Sneel			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
1944246188Sneel			if (n == 3 && bus == b && slot == s && func == f) {
1945246188Sneel				found = 1;
1946246188Sneel				break;
1947246188Sneel			}
1948221828Sgrehan
1949246188Sneel			if (cp2 != NULL)
1950246188Sneel				*cp2++ = ' ';
1951221828Sgrehan
1952246188Sneel			cp = cp2;
1953246188Sneel		}
1954246188Sneel		freeenv(val);
1955221828Sgrehan	}
1956221828Sgrehan	return (found);
1957221828Sgrehan}
1958221828Sgrehan
1959221828Sgrehanvoid *
1960221828Sgrehanvm_iommu_domain(struct vm *vm)
1961221828Sgrehan{
1962221828Sgrehan
1963221828Sgrehan	return (vm->iommu);
1964221828Sgrehan}
1965221828Sgrehan
1966241489Sneelint
1967266393Sjhbvcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
1968266393Sjhb    bool from_idle)
1969221828Sgrehan{
1970241489Sneel	int error;
1971221828Sgrehan	struct vcpu *vcpu;
1972221828Sgrehan
1973221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1974221828Sgrehan		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
1975221828Sgrehan
1976221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
1977221828Sgrehan
1978241489Sneel	vcpu_lock(vcpu);
1979266393Sjhb	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
1980241489Sneel	vcpu_unlock(vcpu);
1981241489Sneel
1982241489Sneel	return (error);
1983221828Sgrehan}
1984221828Sgrehan
1985241489Sneelenum vcpu_state
1986249879Sgrehanvcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
1987221828Sgrehan{
1988221828Sgrehan	struct vcpu *vcpu;
1989241489Sneel	enum vcpu_state state;
1990221828Sgrehan
1991221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1992221828Sgrehan		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
1993221828Sgrehan
1994221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
1995221828Sgrehan
1996241489Sneel	vcpu_lock(vcpu);
1997241489Sneel	state = vcpu->state;
1998249879Sgrehan	if (hostcpu != NULL)
1999249879Sgrehan		*hostcpu = vcpu->hostcpu;
2000241489Sneel	vcpu_unlock(vcpu);
2001221828Sgrehan
2002241489Sneel	return (state);
2003221828Sgrehan}
2004221828Sgrehan
2005270070Sgrehanint
2006221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid)
2007221828Sgrehan{
2008221828Sgrehan
2009270070Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2010270070Sgrehan		return (EINVAL);
2011266339Sjhb
2012270070Sgrehan	if (CPU_ISSET(vcpuid, &vm->active_cpus))
2013270070Sgrehan		return (EBUSY);
2014270070Sgrehan
2015266339Sjhb	VCPU_CTR0(vm, vcpuid, "activated");
2016266339Sjhb	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
2017270070Sgrehan	return (0);
2018221828Sgrehan}
2019221828Sgrehan
2020223621Sgrehancpuset_t
2021221828Sgrehanvm_active_cpus(struct vm *vm)
2022221828Sgrehan{
2023221828Sgrehan
2024221828Sgrehan	return (vm->active_cpus);
2025221828Sgrehan}
2026221828Sgrehan
2027270070Sgrehancpuset_t
2028270070Sgrehanvm_suspended_cpus(struct vm *vm)
2029270070Sgrehan{
2030270070Sgrehan
2031270070Sgrehan	return (vm->suspended_cpus);
2032270070Sgrehan}
2033270070Sgrehan
2034221828Sgrehanvoid *
2035221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid)
2036221828Sgrehan{
2037221828Sgrehan
2038221828Sgrehan	return (vm->vcpu[vcpuid].stats);
2039221828Sgrehan}
2040240922Sneel
2041240922Sneelint
2042240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
2043240922Sneel{
2044240922Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2045240922Sneel		return (EINVAL);
2046240922Sneel
2047240922Sneel	*state = vm->vcpu[vcpuid].x2apic_state;
2048240922Sneel
2049240922Sneel	return (0);
2050240922Sneel}
2051240922Sneel
2052240922Sneelint
2053240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
2054240922Sneel{
2055240922Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2056240922Sneel		return (EINVAL);
2057240922Sneel
2058248392Sneel	if (state >= X2APIC_STATE_LAST)
2059240922Sneel		return (EINVAL);
2060240922Sneel
2061240922Sneel	vm->vcpu[vcpuid].x2apic_state = state;
2062240922Sneel
2063240943Sneel	vlapic_set_x2apic_state(vm, vcpuid, state);
2064240943Sneel
2065240922Sneel	return (0);
2066240922Sneel}
2067241489Sneel
2068262350Sjhb/*
2069262350Sjhb * This function is called to ensure that a vcpu "sees" a pending event
2070262350Sjhb * as soon as possible:
2071262350Sjhb * - If the vcpu thread is sleeping then it is woken up.
2072262350Sjhb * - If the vcpu is running on a different host_cpu then an IPI will be directed
2073262350Sjhb *   to the host_cpu to cause the vcpu to trap into the hypervisor.
2074262350Sjhb */
2075241489Sneelvoid
2076266339Sjhbvcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
2077241489Sneel{
2078241489Sneel	int hostcpu;
2079241489Sneel	struct vcpu *vcpu;
2080241489Sneel
2081241489Sneel	vcpu = &vm->vcpu[vcpuid];
2082241489Sneel
2083242065Sneel	vcpu_lock(vcpu);
2084241489Sneel	hostcpu = vcpu->hostcpu;
2085266393Sjhb	if (vcpu->state == VCPU_RUNNING) {
2086266393Sjhb		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
2087266339Sjhb		if (hostcpu != curcpu) {
2088266393Sjhb			if (lapic_intr) {
2089266339Sjhb				vlapic_post_intr(vcpu->vlapic, hostcpu,
2090266339Sjhb				    vmm_ipinum);
2091266393Sjhb			} else {
2092266339Sjhb				ipi_cpu(hostcpu, vmm_ipinum);
2093266393Sjhb			}
2094266393Sjhb		} else {
2095266393Sjhb			/*
2096266393Sjhb			 * If the 'vcpu' is running on 'curcpu' then it must
2097266393Sjhb			 * be sending a notification to itself (e.g. SELF_IPI).
2098266393Sjhb			 * The pending event will be picked up when the vcpu
2099266393Sjhb			 * transitions back to guest context.
2100266393Sjhb			 */
2101266339Sjhb		}
2102266393Sjhb	} else {
2103266393Sjhb		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
2104266393Sjhb		    "with hostcpu %d", vcpu->state, hostcpu));
2105266393Sjhb		if (vcpu->state == VCPU_SLEEPING)
2106266393Sjhb			wakeup_one(vcpu);
2107242065Sneel	}
2108242065Sneel	vcpu_unlock(vcpu);
2109241489Sneel}
2110256072Sneel
2111256072Sneelstruct vmspace *
2112256072Sneelvm_get_vmspace(struct vm *vm)
2113256072Sneel{
2114256072Sneel
2115256072Sneel	return (vm->vmspace);
2116256072Sneel}
2117261088Sjhb
2118261088Sjhbint
2119261088Sjhbvm_apicid2vcpuid(struct vm *vm, int apicid)
2120261088Sjhb{
2121261088Sjhb	/*
2122261088Sjhb	 * XXX apic id is assumed to be numerically identical to vcpu id
2123261088Sjhb	 */
2124261088Sjhb	return (apicid);
2125261088Sjhb}
2126266339Sjhb
2127266339Sjhbvoid
2128266339Sjhbvm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
2129266339Sjhb    vm_rendezvous_func_t func, void *arg)
2130266339Sjhb{
2131266339Sjhb	int i;
2132266339Sjhb
2133266339Sjhb	/*
2134266339Sjhb	 * Enforce that this function is called without any locks
2135266339Sjhb	 */
2136266339Sjhb	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
2137266339Sjhb	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
2138266339Sjhb	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
2139266339Sjhb
2140266339Sjhbrestart:
2141266339Sjhb	mtx_lock(&vm->rendezvous_mtx);
2142266339Sjhb	if (vm->rendezvous_func != NULL) {
2143266339Sjhb		/*
2144266339Sjhb		 * If a rendezvous is already in progress then we need to
2145266339Sjhb		 * call the rendezvous handler in case this 'vcpuid' is one
2146266339Sjhb		 * of the targets of the rendezvous.
2147266339Sjhb		 */
2148266339Sjhb		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
2149266339Sjhb		mtx_unlock(&vm->rendezvous_mtx);
2150266339Sjhb		vm_handle_rendezvous(vm, vcpuid);
2151266339Sjhb		goto restart;
2152266339Sjhb	}
2153266339Sjhb	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
2154266339Sjhb	    "rendezvous is still in progress"));
2155266339Sjhb
2156266339Sjhb	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
2157266339Sjhb	vm->rendezvous_req_cpus = dest;
2158266339Sjhb	CPU_ZERO(&vm->rendezvous_done_cpus);
2159266339Sjhb	vm->rendezvous_arg = arg;
2160266339Sjhb	vm_set_rendezvous_func(vm, func);
2161266339Sjhb	mtx_unlock(&vm->rendezvous_mtx);
2162266339Sjhb
2163266339Sjhb	/*
2164266339Sjhb	 * Wake up any sleeping vcpus and trigger a VM-exit in any running
2165266339Sjhb	 * vcpus so they handle the rendezvous as soon as possible.
2166266339Sjhb	 */
2167266339Sjhb	for (i = 0; i < VM_MAXCPU; i++) {
2168266339Sjhb		if (CPU_ISSET(i, &dest))
2169266339Sjhb			vcpu_notify_event(vm, i, false);
2170266339Sjhb	}
2171266339Sjhb
2172266339Sjhb	vm_handle_rendezvous(vm, vcpuid);
2173266339Sjhb}
2174268891Sjhb
2175268891Sjhbstruct vatpic *
2176268891Sjhbvm_atpic(struct vm *vm)
2177268891Sjhb{
2178268891Sjhb	return (vm->vatpic);
2179268891Sjhb}
2180268891Sjhb
2181268891Sjhbstruct vatpit *
2182268891Sjhbvm_atpit(struct vm *vm)
2183268891Sjhb{
2184268891Sjhb	return (vm->vatpit);
2185268891Sjhb}
2186268976Sjhb
2187268976Sjhbenum vm_reg_name
2188268976Sjhbvm_segment_name(int seg)
2189268976Sjhb{
2190268976Sjhb	static enum vm_reg_name seg_names[] = {
2191268976Sjhb		VM_REG_GUEST_ES,
2192268976Sjhb		VM_REG_GUEST_CS,
2193268976Sjhb		VM_REG_GUEST_SS,
2194268976Sjhb		VM_REG_GUEST_DS,
2195268976Sjhb		VM_REG_GUEST_FS,
2196268976Sjhb		VM_REG_GUEST_GS
2197268976Sjhb	};
2198268976Sjhb
2199268976Sjhb	KASSERT(seg >= 0 && seg < nitems(seg_names),
2200268976Sjhb	    ("%s: invalid segment encoding %d", __func__, seg));
2201268976Sjhb	return (seg_names[seg]);
2202268976Sjhb}
2203270074Sgrehan
2204270159Sgrehanvoid
2205270159Sgrehanvm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
2206270159Sgrehan    int num_copyinfo)
2207270159Sgrehan{
2208270159Sgrehan	int idx;
2209270074Sgrehan
2210270159Sgrehan	for (idx = 0; idx < num_copyinfo; idx++) {
2211270159Sgrehan		if (copyinfo[idx].cookie != NULL)
2212270159Sgrehan			vm_gpa_release(copyinfo[idx].cookie);
2213270159Sgrehan	}
2214270159Sgrehan	bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo));
2215270159Sgrehan}
2216270159Sgrehan
2217270159Sgrehanint
2218270159Sgrehanvm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
2219270159Sgrehan    uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
2220270159Sgrehan    int num_copyinfo)
2221270159Sgrehan{
2222270159Sgrehan	int error, idx, nused;
2223270159Sgrehan	size_t n, off, remaining;
2224270159Sgrehan	void *hva, *cookie;
2225270159Sgrehan	uint64_t gpa;
2226270159Sgrehan
2227270159Sgrehan	bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo);
2228270159Sgrehan
2229270159Sgrehan	nused = 0;
2230270159Sgrehan	remaining = len;
2231270159Sgrehan	while (remaining > 0) {
2232270159Sgrehan		KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
2233270159Sgrehan		error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa);
2234270159Sgrehan		if (error)
2235270159Sgrehan			return (error);
2236270159Sgrehan		off = gpa & PAGE_MASK;
2237270159Sgrehan		n = min(remaining, PAGE_SIZE - off);
2238270159Sgrehan		copyinfo[nused].gpa = gpa;
2239270159Sgrehan		copyinfo[nused].len = n;
2240270159Sgrehan		remaining -= n;
2241270159Sgrehan		gla += n;
2242270159Sgrehan		nused++;
2243270159Sgrehan	}
2244270159Sgrehan
2245270159Sgrehan	for (idx = 0; idx < nused; idx++) {
2246270159Sgrehan		hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len,
2247270159Sgrehan		    prot, &cookie);
2248270159Sgrehan		if (hva == NULL)
2249270159Sgrehan			break;
2250270159Sgrehan		copyinfo[idx].hva = hva;
2251270159Sgrehan		copyinfo[idx].cookie = cookie;
2252270159Sgrehan	}
2253270159Sgrehan
2254270159Sgrehan	if (idx != nused) {
2255270159Sgrehan		vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo);
2256270159Sgrehan		return (-1);
2257270159Sgrehan	} else {
2258270159Sgrehan		return (0);
2259270159Sgrehan	}
2260270159Sgrehan}
2261270159Sgrehan
2262270159Sgrehanvoid
2263270159Sgrehanvm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr,
2264270159Sgrehan    size_t len)
2265270159Sgrehan{
2266270159Sgrehan	char *dst;
2267270159Sgrehan	int idx;
2268270159Sgrehan
2269270159Sgrehan	dst = kaddr;
2270270159Sgrehan	idx = 0;
2271270159Sgrehan	while (len > 0) {
2272270159Sgrehan		bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len);
2273270159Sgrehan		len -= copyinfo[idx].len;
2274270159Sgrehan		dst += copyinfo[idx].len;
2275270159Sgrehan		idx++;
2276270159Sgrehan	}
2277270159Sgrehan}
2278270159Sgrehan
2279270159Sgrehanvoid
2280270159Sgrehanvm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
2281270159Sgrehan    struct vm_copyinfo *copyinfo, size_t len)
2282270159Sgrehan{
2283270159Sgrehan	const char *src;
2284270159Sgrehan	int idx;
2285270159Sgrehan
2286270159Sgrehan	src = kaddr;
2287270159Sgrehan	idx = 0;
2288270159Sgrehan	while (len > 0) {
2289270159Sgrehan		bcopy(src, copyinfo[idx].hva, copyinfo[idx].len);
2290270159Sgrehan		len -= copyinfo[idx].len;
2291270159Sgrehan		src += copyinfo[idx].len;
2292270159Sgrehan		idx++;
2293270159Sgrehan	}
2294270159Sgrehan}
2295270159Sgrehan
2296270074Sgrehan/*
2297270074Sgrehan * Return the amount of in-use and wired memory for the VM. Since
2298270074Sgrehan * these are global stats, only return the values with for vCPU 0
2299270074Sgrehan */
2300270074SgrehanVMM_STAT_DECLARE(VMM_MEM_RESIDENT);
2301270074SgrehanVMM_STAT_DECLARE(VMM_MEM_WIRED);
2302270074Sgrehan
2303270074Sgrehanstatic void
2304270074Sgrehanvm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
2305270074Sgrehan{
2306270074Sgrehan
2307270074Sgrehan	if (vcpu == 0) {
2308270074Sgrehan		vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT,
2309270074Sgrehan	       	    PAGE_SIZE * vmspace_resident_count(vm->vmspace));
2310270074Sgrehan	}
2311270074Sgrehan}
2312270074Sgrehan
2313270074Sgrehanstatic void
2314270074Sgrehanvm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
2315270074Sgrehan{
2316270074Sgrehan
2317270074Sgrehan	if (vcpu == 0) {
2318270074Sgrehan		vmm_stat_set(vm, vcpu, VMM_MEM_WIRED,
2319270074Sgrehan	      	    PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace)));
2320270074Sgrehan	}
2321270074Sgrehan}
2322270074Sgrehan
2323270074SgrehanVMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
2324270074SgrehanVMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
2325