vmm.c revision 276403
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD: stable/10/sys/amd64/vmm/vmm.c 276403 2014-12-30 08:24:14Z neel $
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm.c 276403 2014-12-30 08:24:14Z neel $");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33234695Sgrehan#include <sys/systm.h>
34221828Sgrehan#include <sys/kernel.h>
35221828Sgrehan#include <sys/module.h>
36221828Sgrehan#include <sys/sysctl.h>
37221828Sgrehan#include <sys/malloc.h>
38221828Sgrehan#include <sys/pcpu.h>
39221828Sgrehan#include <sys/lock.h>
40221828Sgrehan#include <sys/mutex.h>
41221828Sgrehan#include <sys/proc.h>
42256072Sneel#include <sys/rwlock.h>
43221828Sgrehan#include <sys/sched.h>
44221828Sgrehan#include <sys/smp.h>
45221828Sgrehan#include <sys/systm.h>
46221828Sgrehan
47221828Sgrehan#include <vm/vm.h>
48256072Sneel#include <vm/vm_object.h>
49256072Sneel#include <vm/vm_page.h>
50256072Sneel#include <vm/pmap.h>
51256072Sneel#include <vm/vm_map.h>
52256072Sneel#include <vm/vm_extern.h>
53256072Sneel#include <vm/vm_param.h>
54221828Sgrehan
55261275Sjhb#include <machine/cpu.h>
56221828Sgrehan#include <machine/vm.h>
57221828Sgrehan#include <machine/pcb.h>
58241489Sneel#include <machine/smp.h>
59262350Sjhb#include <x86/psl.h>
60221914Sjhb#include <x86/apicreg.h>
61256072Sneel#include <machine/vmparam.h>
62221828Sgrehan
63221828Sgrehan#include <machine/vmm.h>
64261088Sjhb#include <machine/vmm_dev.h>
65268976Sjhb#include <machine/vmm_instruction_emul.h>
66261088Sjhb
67268976Sjhb#include "vmm_ioport.h"
68256072Sneel#include "vmm_ktr.h"
69242275Sneel#include "vmm_host.h"
70221828Sgrehan#include "vmm_mem.h"
71221828Sgrehan#include "vmm_util.h"
72268891Sjhb#include "vatpic.h"
73268891Sjhb#include "vatpit.h"
74261088Sjhb#include "vhpet.h"
75261088Sjhb#include "vioapic.h"
76221828Sgrehan#include "vlapic.h"
77221828Sgrehan#include "vmm_ipi.h"
78221828Sgrehan#include "vmm_stat.h"
79242065Sneel#include "vmm_lapic.h"
80221828Sgrehan
81221828Sgrehan#include "io/ppt.h"
82221828Sgrehan#include "io/iommu.h"
83221828Sgrehan
84221828Sgrehanstruct vlapic;
85221828Sgrehan
86270071Sgrehan/*
87270071Sgrehan * Initialization:
88270071Sgrehan * (a) allocated when vcpu is created
89270071Sgrehan * (i) initialized when vcpu is created and when it is reinitialized
90270071Sgrehan * (o) initialized the first time the vcpu is created
91270071Sgrehan * (x) initialized before use
92270071Sgrehan */
93221828Sgrehanstruct vcpu {
94270071Sgrehan	struct mtx 	mtx;		/* (o) protects 'state' and 'hostcpu' */
95270071Sgrehan	enum vcpu_state	state;		/* (o) vcpu state */
96270071Sgrehan	int		hostcpu;	/* (o) vcpu's host cpu */
97270071Sgrehan	struct vlapic	*vlapic;	/* (i) APIC device model */
98270071Sgrehan	enum x2apic_state x2apic_state;	/* (i) APIC mode */
99270159Sgrehan	uint64_t	exitintinfo;	/* (i) events pending at VM exit */
100270071Sgrehan	int		nmi_pending;	/* (i) NMI pending */
101270071Sgrehan	int		extint_pending;	/* (i) INTR pending */
102270071Sgrehan	struct vm_exception exception;	/* (x) exception collateral */
103270071Sgrehan	int	exception_pending;	/* (i) exception pending */
104270071Sgrehan	struct savefpu	*guestfpu;	/* (a,i) guest fpu state */
105270071Sgrehan	uint64_t	guest_xcr0;	/* (i) guest %xcr0 register */
106270071Sgrehan	void		*stats;		/* (a,i) statistics */
107270071Sgrehan	struct vm_exit	exitinfo;	/* (x) exit reason and collateral */
108221828Sgrehan};
109221828Sgrehan
110270071Sgrehan#define	vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
111242065Sneel#define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
112242065Sneel#define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
113242065Sneel#define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
114256072Sneel#define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
115241489Sneel
116256072Sneelstruct mem_seg {
117256072Sneel	vm_paddr_t	gpa;
118256072Sneel	size_t		len;
119256072Sneel	boolean_t	wired;
120256072Sneel	vm_object_t	object;
121256072Sneel};
122221828Sgrehan#define	VM_MAX_MEMORY_SEGMENTS	2
123221828Sgrehan
124270071Sgrehan/*
125270071Sgrehan * Initialization:
126270071Sgrehan * (o) initialized the first time the VM is created
127270071Sgrehan * (i) initialized when VM is created and when it is reinitialized
128270071Sgrehan * (x) initialized before use
129270071Sgrehan */
130221828Sgrehanstruct vm {
131270071Sgrehan	void		*cookie;		/* (i) cpu-specific data */
132270071Sgrehan	void		*iommu;			/* (x) iommu-specific data */
133270071Sgrehan	struct vhpet	*vhpet;			/* (i) virtual HPET */
134270071Sgrehan	struct vioapic	*vioapic;		/* (i) virtual ioapic */
135270071Sgrehan	struct vatpic	*vatpic;		/* (i) virtual atpic */
136270071Sgrehan	struct vatpit	*vatpit;		/* (i) virtual atpit */
137270071Sgrehan	volatile cpuset_t active_cpus;		/* (i) active vcpus */
138270071Sgrehan	int		suspend;		/* (i) stop VM execution */
139270071Sgrehan	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
140270071Sgrehan	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
141270071Sgrehan	cpuset_t	rendezvous_req_cpus;	/* (x) rendezvous requested */
142270071Sgrehan	cpuset_t	rendezvous_done_cpus;	/* (x) rendezvous finished */
143270071Sgrehan	void		*rendezvous_arg;	/* (x) rendezvous func/arg */
144270071Sgrehan	vm_rendezvous_func_t rendezvous_func;
145270071Sgrehan	struct mtx	rendezvous_mtx;		/* (o) rendezvous lock */
146270071Sgrehan	int		num_mem_segs;		/* (o) guest memory segments */
147256072Sneel	struct mem_seg	mem_segs[VM_MAX_MEMORY_SEGMENTS];
148270071Sgrehan	struct vmspace	*vmspace;		/* (o) guest's address space */
149270071Sgrehan	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
150270071Sgrehan	struct vcpu	vcpu[VM_MAXCPU];	/* (i) guest vcpus */
151221828Sgrehan};
152221828Sgrehan
153249396Sneelstatic int vmm_initialized;
154249396Sneel
155221828Sgrehanstatic struct vmm_ops *ops;
156266339Sjhb#define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
157221828Sgrehan#define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
158261275Sjhb#define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
159221828Sgrehan
160256072Sneel#define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
161268935Sjhb#define	VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
162268935Sjhb	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
163221828Sgrehan#define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
164256072Sneel#define	VMSPACE_ALLOC(min, max) \
165256072Sneel	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
166256072Sneel#define	VMSPACE_FREE(vmspace) \
167256072Sneel	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
168221828Sgrehan#define	VMGETREG(vmi, vcpu, num, retval)		\
169221828Sgrehan	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
170221828Sgrehan#define	VMSETREG(vmi, vcpu, num, val)		\
171221828Sgrehan	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
172221828Sgrehan#define	VMGETDESC(vmi, vcpu, num, desc)		\
173221828Sgrehan	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
174221828Sgrehan#define	VMSETDESC(vmi, vcpu, num, desc)		\
175221828Sgrehan	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
176221828Sgrehan#define	VMGETCAP(vmi, vcpu, num, retval)	\
177221828Sgrehan	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
178221828Sgrehan#define	VMSETCAP(vmi, vcpu, num, val)		\
179221828Sgrehan	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
180266339Sjhb#define	VLAPIC_INIT(vmi, vcpu)			\
181266339Sjhb	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
182266339Sjhb#define	VLAPIC_CLEANUP(vmi, vlapic)		\
183266339Sjhb	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
184221828Sgrehan
185245021Sneel#define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
186245021Sneel#define	fpu_stop_emulating()	clts()
187221828Sgrehan
188221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm");
189221828Sgrehan
190221828Sgrehan/* statistics */
191248389Sneelstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
192221828Sgrehan
193266339SjhbSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
194266339Sjhb
195268935Sjhb/*
196268935Sjhb * Halt the guest if all vcpus are executing a HLT instruction with
197268935Sjhb * interrupts disabled.
198268935Sjhb */
199268935Sjhbstatic int halt_detection_enabled = 1;
200268935SjhbTUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled);
201268935SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
202268935Sjhb    &halt_detection_enabled, 0,
203268935Sjhb    "Halt VM if all vcpus execute HLT with interrupts disabled");
204268935Sjhb
205266339Sjhbstatic int vmm_ipinum;
206266339SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
207266339Sjhb    "IPI vector used for vcpu notifications");
208266339Sjhb
209276403Sneelstatic int trace_guest_exceptions;
210276403SneelSYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
211276403Sneel    &trace_guest_exceptions, 0,
212276403Sneel    "Trap into hypervisor on all guest exceptions and reflect them back");
213276403Sneel
214221828Sgrehanstatic void
215270071Sgrehanvcpu_cleanup(struct vm *vm, int i, bool destroy)
216221828Sgrehan{
217266339Sjhb	struct vcpu *vcpu = &vm->vcpu[i];
218266339Sjhb
219266339Sjhb	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
220270071Sgrehan	if (destroy) {
221270071Sgrehan		vmm_stat_free(vcpu->stats);
222270071Sgrehan		fpu_save_area_free(vcpu->guestfpu);
223270071Sgrehan	}
224221828Sgrehan}
225221828Sgrehan
226221828Sgrehanstatic void
227270071Sgrehanvcpu_init(struct vm *vm, int vcpu_id, bool create)
228221828Sgrehan{
229221828Sgrehan	struct vcpu *vcpu;
230270071Sgrehan
231270071Sgrehan	KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU,
232270071Sgrehan	    ("vcpu_init: invalid vcpu %d", vcpu_id));
233270071Sgrehan
234221828Sgrehan	vcpu = &vm->vcpu[vcpu_id];
235221828Sgrehan
236270071Sgrehan	if (create) {
237270071Sgrehan		KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
238270071Sgrehan		    "initialized", vcpu_id));
239270071Sgrehan		vcpu_lock_init(vcpu);
240270071Sgrehan		vcpu->state = VCPU_IDLE;
241270071Sgrehan		vcpu->hostcpu = NOCPU;
242270071Sgrehan		vcpu->guestfpu = fpu_save_area_alloc();
243270071Sgrehan		vcpu->stats = vmm_stat_alloc();
244270071Sgrehan	}
245270071Sgrehan
246266339Sjhb	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
247267447Sjhb	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
248270159Sgrehan	vcpu->exitintinfo = 0;
249270071Sgrehan	vcpu->nmi_pending = 0;
250270071Sgrehan	vcpu->extint_pending = 0;
251270071Sgrehan	vcpu->exception_pending = 0;
252267427Sjhb	vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
253234695Sgrehan	fpu_save_area_reset(vcpu->guestfpu);
254270071Sgrehan	vmm_stat_init(vcpu->stats);
255221828Sgrehan}
256221828Sgrehan
257276403Sneelint
258276403Sneelvcpu_trace_exceptions(struct vm *vm, int vcpuid)
259276403Sneel{
260276403Sneel
261276403Sneel	return (trace_guest_exceptions);
262276403Sneel}
263276403Sneel
264240894Sneelstruct vm_exit *
265240894Sneelvm_exitinfo(struct vm *vm, int cpuid)
266240894Sneel{
267240894Sneel	struct vcpu *vcpu;
268240894Sneel
269240894Sneel	if (cpuid < 0 || cpuid >= VM_MAXCPU)
270240894Sneel		panic("vm_exitinfo: invalid cpuid %d", cpuid);
271240894Sneel
272240894Sneel	vcpu = &vm->vcpu[cpuid];
273240894Sneel
274240894Sneel	return (&vcpu->exitinfo);
275240894Sneel}
276240894Sneel
277261275Sjhbstatic void
278261275Sjhbvmm_resume(void)
279261275Sjhb{
280261275Sjhb	VMM_RESUME();
281261275Sjhb}
282261275Sjhb
283221828Sgrehanstatic int
284221828Sgrehanvmm_init(void)
285221828Sgrehan{
286221828Sgrehan	int error;
287221828Sgrehan
288242275Sneel	vmm_host_state_init();
289221828Sgrehan
290266339Sjhb	vmm_ipinum = vmm_ipi_alloc();
291266339Sjhb	if (vmm_ipinum == 0)
292266339Sjhb		vmm_ipinum = IPI_AST;
293266339Sjhb
294221828Sgrehan	error = vmm_mem_init();
295221828Sgrehan	if (error)
296221828Sgrehan		return (error);
297221828Sgrehan
298221828Sgrehan	if (vmm_is_intel())
299221828Sgrehan		ops = &vmm_ops_intel;
300221828Sgrehan	else if (vmm_is_amd())
301221828Sgrehan		ops = &vmm_ops_amd;
302221828Sgrehan	else
303221828Sgrehan		return (ENXIO);
304221828Sgrehan
305261275Sjhb	vmm_resume_p = vmm_resume;
306221828Sgrehan
307266339Sjhb	return (VMM_INIT(vmm_ipinum));
308221828Sgrehan}
309221828Sgrehan
310221828Sgrehanstatic int
311221828Sgrehanvmm_handler(module_t mod, int what, void *arg)
312221828Sgrehan{
313221828Sgrehan	int error;
314221828Sgrehan
315221828Sgrehan	switch (what) {
316221828Sgrehan	case MOD_LOAD:
317221828Sgrehan		vmmdev_init();
318267070Sjhb		if (ppt_avail_devices() > 0)
319267070Sjhb			iommu_init();
320221828Sgrehan		error = vmm_init();
321249396Sneel		if (error == 0)
322249396Sneel			vmm_initialized = 1;
323221828Sgrehan		break;
324221828Sgrehan	case MOD_UNLOAD:
325241454Sneel		error = vmmdev_cleanup();
326241454Sneel		if (error == 0) {
327261275Sjhb			vmm_resume_p = NULL;
328241454Sneel			iommu_cleanup();
329266339Sjhb			if (vmm_ipinum != IPI_AST)
330266339Sjhb				vmm_ipi_free(vmm_ipinum);
331241454Sneel			error = VMM_CLEANUP();
332253854Sgrehan			/*
333253854Sgrehan			 * Something bad happened - prevent new
334253854Sgrehan			 * VMs from being created
335253854Sgrehan			 */
336253854Sgrehan			if (error)
337253854Sgrehan				vmm_initialized = 0;
338241454Sneel		}
339221828Sgrehan		break;
340221828Sgrehan	default:
341221828Sgrehan		error = 0;
342221828Sgrehan		break;
343221828Sgrehan	}
344221828Sgrehan	return (error);
345221828Sgrehan}
346221828Sgrehan
347221828Sgrehanstatic moduledata_t vmm_kmod = {
348221828Sgrehan	"vmm",
349221828Sgrehan	vmm_handler,
350221828Sgrehan	NULL
351221828Sgrehan};
352221828Sgrehan
353221828Sgrehan/*
354245704Sneel * vmm initialization has the following dependencies:
355245704Sneel *
356245704Sneel * - iommu initialization must happen after the pci passthru driver has had
357245704Sneel *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
358245704Sneel *
359245704Sneel * - VT-x initialization requires smp_rendezvous() and therefore must happen
360245704Sneel *   after SMP is fully functional (after SI_SUB_SMP).
361221828Sgrehan */
362245704SneelDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
363221828SgrehanMODULE_VERSION(vmm, 1);
364221828Sgrehan
365270071Sgrehanstatic void
366270071Sgrehanvm_init(struct vm *vm, bool create)
367270071Sgrehan{
368270071Sgrehan	int i;
369270071Sgrehan
370270071Sgrehan	vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace));
371270071Sgrehan	vm->iommu = NULL;
372270071Sgrehan	vm->vioapic = vioapic_init(vm);
373270071Sgrehan	vm->vhpet = vhpet_init(vm);
374270071Sgrehan	vm->vatpic = vatpic_init(vm);
375270071Sgrehan	vm->vatpit = vatpit_init(vm);
376270071Sgrehan
377270071Sgrehan	CPU_ZERO(&vm->active_cpus);
378270071Sgrehan
379270071Sgrehan	vm->suspend = 0;
380270071Sgrehan	CPU_ZERO(&vm->suspended_cpus);
381270071Sgrehan
382270071Sgrehan	for (i = 0; i < VM_MAXCPU; i++)
383270071Sgrehan		vcpu_init(vm, i, create);
384270071Sgrehan}
385270071Sgrehan
386249396Sneelint
387249396Sneelvm_create(const char *name, struct vm **retvm)
388221828Sgrehan{
389221828Sgrehan	struct vm *vm;
390256072Sneel	struct vmspace *vmspace;
391221828Sgrehan
392249396Sneel	/*
393249396Sneel	 * If vmm.ko could not be successfully initialized then don't attempt
394249396Sneel	 * to create the virtual machine.
395249396Sneel	 */
396249396Sneel	if (!vmm_initialized)
397249396Sneel		return (ENXIO);
398249396Sneel
399221828Sgrehan	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
400249396Sneel		return (EINVAL);
401221828Sgrehan
402256072Sneel	vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
403256072Sneel	if (vmspace == NULL)
404256072Sneel		return (ENOMEM);
405256072Sneel
406221828Sgrehan	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
407221828Sgrehan	strcpy(vm->name, name);
408270071Sgrehan	vm->num_mem_segs = 0;
409266339Sjhb	vm->vmspace = vmspace;
410266339Sjhb	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
411221828Sgrehan
412270071Sgrehan	vm_init(vm, true);
413221828Sgrehan
414249396Sneel	*retvm = vm;
415249396Sneel	return (0);
416221828Sgrehan}
417221828Sgrehan
418241178Sneelstatic void
419256072Sneelvm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
420241178Sneel{
421241178Sneel
422256072Sneel	if (seg->object != NULL)
423256072Sneel		vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
424241362Sneel
425256072Sneel	bzero(seg, sizeof(*seg));
426241178Sneel}
427241178Sneel
428270071Sgrehanstatic void
429270071Sgrehanvm_cleanup(struct vm *vm, bool destroy)
430221828Sgrehan{
431221828Sgrehan	int i;
432221828Sgrehan
433221828Sgrehan	ppt_unassign_all(vm);
434221828Sgrehan
435256072Sneel	if (vm->iommu != NULL)
436256072Sneel		iommu_destroy_domain(vm->iommu);
437256072Sneel
438268891Sjhb	vatpit_cleanup(vm->vatpit);
439261088Sjhb	vhpet_cleanup(vm->vhpet);
440268891Sjhb	vatpic_cleanup(vm->vatpic);
441261088Sjhb	vioapic_cleanup(vm->vioapic);
442261088Sjhb
443270071Sgrehan	for (i = 0; i < VM_MAXCPU; i++)
444270071Sgrehan		vcpu_cleanup(vm, i, destroy);
445221828Sgrehan
446270071Sgrehan	VMCLEANUP(vm->cookie);
447241178Sneel
448270071Sgrehan	if (destroy) {
449270071Sgrehan		for (i = 0; i < vm->num_mem_segs; i++)
450270071Sgrehan			vm_free_mem_seg(vm, &vm->mem_segs[i]);
451221828Sgrehan
452270071Sgrehan		vm->num_mem_segs = 0;
453221828Sgrehan
454270071Sgrehan		VMSPACE_FREE(vm->vmspace);
455270071Sgrehan		vm->vmspace = NULL;
456270071Sgrehan	}
457270071Sgrehan}
458221828Sgrehan
459270071Sgrehanvoid
460270071Sgrehanvm_destroy(struct vm *vm)
461270071Sgrehan{
462270071Sgrehan	vm_cleanup(vm, true);
463221828Sgrehan	free(vm, M_VM);
464221828Sgrehan}
465221828Sgrehan
466270071Sgrehanint
467270071Sgrehanvm_reinit(struct vm *vm)
468270071Sgrehan{
469270071Sgrehan	int error;
470270071Sgrehan
471270071Sgrehan	/*
472270071Sgrehan	 * A virtual machine can be reset only if all vcpus are suspended.
473270071Sgrehan	 */
474270071Sgrehan	if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
475270071Sgrehan		vm_cleanup(vm, false);
476270071Sgrehan		vm_init(vm, false);
477270071Sgrehan		error = 0;
478270071Sgrehan	} else {
479270071Sgrehan		error = EBUSY;
480270071Sgrehan	}
481270071Sgrehan
482270071Sgrehan	return (error);
483270071Sgrehan}
484270071Sgrehan
485221828Sgrehanconst char *
486221828Sgrehanvm_name(struct vm *vm)
487221828Sgrehan{
488221828Sgrehan	return (vm->name);
489221828Sgrehan}
490221828Sgrehan
491221828Sgrehanint
492221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
493221828Sgrehan{
494256072Sneel	vm_object_t obj;
495221828Sgrehan
496256072Sneel	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
497256072Sneel		return (ENOMEM);
498256072Sneel	else
499256072Sneel		return (0);
500221828Sgrehan}
501221828Sgrehan
502221828Sgrehanint
503221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
504221828Sgrehan{
505221828Sgrehan
506256072Sneel	vmm_mmio_free(vm->vmspace, gpa, len);
507256072Sneel	return (0);
508221828Sgrehan}
509221828Sgrehan
510256072Sneelboolean_t
511256072Sneelvm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
512241041Sneel{
513241041Sneel	int i;
514241041Sneel	vm_paddr_t gpabase, gpalimit;
515241041Sneel
516241041Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
517241041Sneel		gpabase = vm->mem_segs[i].gpa;
518241041Sneel		gpalimit = gpabase + vm->mem_segs[i].len;
519241041Sneel		if (gpa >= gpabase && gpa < gpalimit)
520256072Sneel			return (TRUE);		/* 'gpa' is regular memory */
521241041Sneel	}
522241041Sneel
523256072Sneel	if (ppt_is_mmio(vm, gpa))
524256072Sneel		return (TRUE);			/* 'gpa' is pci passthru mmio */
525256072Sneel
526256072Sneel	return (FALSE);
527241041Sneel}
528241041Sneel
529221828Sgrehanint
530241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
531221828Sgrehan{
532256072Sneel	int available, allocated;
533256072Sneel	struct mem_seg *seg;
534256072Sneel	vm_object_t object;
535256072Sneel	vm_paddr_t g;
536221828Sgrehan
537241041Sneel	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
538241041Sneel		return (EINVAL);
539221828Sgrehan
540241041Sneel	available = allocated = 0;
541241041Sneel	g = gpa;
542241041Sneel	while (g < gpa + len) {
543256072Sneel		if (vm_mem_allocated(vm, g))
544256072Sneel			allocated++;
545256072Sneel		else
546241041Sneel			available++;
547241041Sneel
548241041Sneel		g += PAGE_SIZE;
549241041Sneel	}
550241041Sneel
551221828Sgrehan	/*
552241041Sneel	 * If there are some allocated and some available pages in the address
553241041Sneel	 * range then it is an error.
554221828Sgrehan	 */
555241041Sneel	if (allocated && available)
556241041Sneel		return (EINVAL);
557221828Sgrehan
558241041Sneel	/*
559241041Sneel	 * If the entire address range being requested has already been
560241041Sneel	 * allocated then there isn't anything more to do.
561241041Sneel	 */
562241041Sneel	if (allocated && available == 0)
563241041Sneel		return (0);
564241041Sneel
565221828Sgrehan	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
566221828Sgrehan		return (E2BIG);
567221828Sgrehan
568241178Sneel	seg = &vm->mem_segs[vm->num_mem_segs];
569221828Sgrehan
570256072Sneel	if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
571256072Sneel		return (ENOMEM);
572256072Sneel
573241178Sneel	seg->gpa = gpa;
574256072Sneel	seg->len = len;
575256072Sneel	seg->object = object;
576256072Sneel	seg->wired = FALSE;
577241178Sneel
578256072Sneel	vm->num_mem_segs++;
579256072Sneel
580256072Sneel	return (0);
581256072Sneel}
582256072Sneel
583270159Sgrehanstatic vm_paddr_t
584270159Sgrehanvm_maxmem(struct vm *vm)
585270159Sgrehan{
586270159Sgrehan	int i;
587270159Sgrehan	vm_paddr_t gpa, maxmem;
588270159Sgrehan
589270159Sgrehan	maxmem = 0;
590270159Sgrehan	for (i = 0; i < vm->num_mem_segs; i++) {
591270159Sgrehan		gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len;
592270159Sgrehan		if (gpa > maxmem)
593270159Sgrehan			maxmem = gpa;
594270159Sgrehan	}
595270159Sgrehan	return (maxmem);
596270159Sgrehan}
597270159Sgrehan
598256072Sneelstatic void
599256072Sneelvm_gpa_unwire(struct vm *vm)
600256072Sneel{
601256072Sneel	int i, rv;
602256072Sneel	struct mem_seg *seg;
603256072Sneel
604256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
605256072Sneel		seg = &vm->mem_segs[i];
606256072Sneel		if (!seg->wired)
607256072Sneel			continue;
608256072Sneel
609256072Sneel		rv = vm_map_unwire(&vm->vmspace->vm_map,
610256072Sneel				   seg->gpa, seg->gpa + seg->len,
611256072Sneel				   VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
612256072Sneel		KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
613256072Sneel		    "%#lx/%ld could not be unwired: %d",
614256072Sneel		    vm_name(vm), seg->gpa, seg->len, rv));
615256072Sneel
616256072Sneel		seg->wired = FALSE;
617256072Sneel	}
618256072Sneel}
619256072Sneel
620256072Sneelstatic int
621256072Sneelvm_gpa_wire(struct vm *vm)
622256072Sneel{
623256072Sneel	int i, rv;
624256072Sneel	struct mem_seg *seg;
625256072Sneel
626256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
627256072Sneel		seg = &vm->mem_segs[i];
628256072Sneel		if (seg->wired)
629256072Sneel			continue;
630256072Sneel
631256072Sneel		/* XXX rlimits? */
632256072Sneel		rv = vm_map_wire(&vm->vmspace->vm_map,
633256072Sneel				 seg->gpa, seg->gpa + seg->len,
634256072Sneel				 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
635256072Sneel		if (rv != KERN_SUCCESS)
636241178Sneel			break;
637241178Sneel
638256072Sneel		seg->wired = TRUE;
639256072Sneel	}
640256072Sneel
641256072Sneel	if (i < vm->num_mem_segs) {
642241362Sneel		/*
643256072Sneel		 * Undo the wiring before returning an error.
644241362Sneel		 */
645256072Sneel		vm_gpa_unwire(vm);
646256072Sneel		return (EAGAIN);
647256072Sneel	}
648241178Sneel
649256072Sneel	return (0);
650256072Sneel}
651256072Sneel
652256072Sneelstatic void
653256072Sneelvm_iommu_modify(struct vm *vm, boolean_t map)
654256072Sneel{
655256072Sneel	int i, sz;
656256072Sneel	vm_paddr_t gpa, hpa;
657256072Sneel	struct mem_seg *seg;
658256072Sneel	void *vp, *cookie, *host_domain;
659256072Sneel
660256072Sneel	sz = PAGE_SIZE;
661256072Sneel	host_domain = iommu_host_domain();
662256072Sneel
663256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
664256072Sneel		seg = &vm->mem_segs[i];
665256072Sneel		KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
666256072Sneel		    vm_name(vm), seg->gpa, seg->len));
667256072Sneel
668256072Sneel		gpa = seg->gpa;
669256072Sneel		while (gpa < seg->gpa + seg->len) {
670256072Sneel			vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
671256072Sneel					 &cookie);
672256072Sneel			KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
673256072Sneel			    vm_name(vm), gpa));
674256072Sneel
675256072Sneel			vm_gpa_release(cookie);
676256072Sneel
677256072Sneel			hpa = DMAP_TO_PHYS((uintptr_t)vp);
678256072Sneel			if (map) {
679256072Sneel				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
680256072Sneel				iommu_remove_mapping(host_domain, hpa, sz);
681256072Sneel			} else {
682256072Sneel				iommu_remove_mapping(vm->iommu, gpa, sz);
683256072Sneel				iommu_create_mapping(host_domain, hpa, hpa, sz);
684256072Sneel			}
685256072Sneel
686256072Sneel			gpa += PAGE_SIZE;
687256072Sneel		}
688241178Sneel	}
689241178Sneel
690256072Sneel	/*
691256072Sneel	 * Invalidate the cached translations associated with the domain
692256072Sneel	 * from which pages were removed.
693256072Sneel	 */
694256072Sneel	if (map)
695256072Sneel		iommu_invalidate_tlb(host_domain);
696256072Sneel	else
697256072Sneel		iommu_invalidate_tlb(vm->iommu);
698256072Sneel}
699256072Sneel
700256072Sneel#define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), FALSE)
701256072Sneel#define	vm_iommu_map(vm)	vm_iommu_modify((vm), TRUE)
702256072Sneel
703256072Sneelint
704256072Sneelvm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
705256072Sneel{
706256072Sneel	int error;
707256072Sneel
708256072Sneel	error = ppt_unassign_device(vm, bus, slot, func);
709256072Sneel	if (error)
710221828Sgrehan		return (error);
711256072Sneel
712267070Sjhb	if (ppt_assigned_devices(vm) == 0) {
713256072Sneel		vm_iommu_unmap(vm);
714256072Sneel		vm_gpa_unwire(vm);
715221828Sgrehan	}
716256072Sneel	return (0);
717256072Sneel}
718221828Sgrehan
719256072Sneelint
720256072Sneelvm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
721256072Sneel{
722256072Sneel	int error;
723256072Sneel	vm_paddr_t maxaddr;
724256072Sneel
725241362Sneel	/*
726256072Sneel	 * Virtual machines with pci passthru devices get special treatment:
727256072Sneel	 * - the guest physical memory is wired
728256072Sneel	 * - the iommu is programmed to do the 'gpa' to 'hpa' translation
729256072Sneel	 *
730256072Sneel	 * We need to do this before the first pci passthru device is attached.
731241362Sneel	 */
732267070Sjhb	if (ppt_assigned_devices(vm) == 0) {
733256072Sneel		KASSERT(vm->iommu == NULL,
734256072Sneel		    ("vm_assign_pptdev: iommu must be NULL"));
735270159Sgrehan		maxaddr = vm_maxmem(vm);
736256072Sneel		vm->iommu = iommu_create_domain(maxaddr);
737241362Sneel
738256072Sneel		error = vm_gpa_wire(vm);
739256072Sneel		if (error)
740256072Sneel			return (error);
741241041Sneel
742256072Sneel		vm_iommu_map(vm);
743256072Sneel	}
744256072Sneel
745256072Sneel	error = ppt_assign_device(vm, bus, slot, func);
746256072Sneel	return (error);
747221828Sgrehan}
748221828Sgrehan
749256072Sneelvoid *
750256072Sneelvm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
751256072Sneel	    void **cookie)
752221828Sgrehan{
753256072Sneel	int count, pageoff;
754256072Sneel	vm_page_t m;
755221828Sgrehan
756256072Sneel	pageoff = gpa & PAGE_MASK;
757256072Sneel	if (len > PAGE_SIZE - pageoff)
758256072Sneel		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
759241148Sneel
760256072Sneel	count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
761256072Sneel	    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
762256072Sneel
763256072Sneel	if (count == 1) {
764256072Sneel		*cookie = m;
765256072Sneel		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
766256072Sneel	} else {
767256072Sneel		*cookie = NULL;
768256072Sneel		return (NULL);
769256072Sneel	}
770221828Sgrehan}
771221828Sgrehan
772256072Sneelvoid
773256072Sneelvm_gpa_release(void *cookie)
774256072Sneel{
775256072Sneel	vm_page_t m = cookie;
776256072Sneel
777256072Sneel	vm_page_lock(m);
778256072Sneel	vm_page_unhold(m);
779256072Sneel	vm_page_unlock(m);
780256072Sneel}
781256072Sneel
782221828Sgrehanint
783221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
784221828Sgrehan		  struct vm_memory_segment *seg)
785221828Sgrehan{
786221828Sgrehan	int i;
787221828Sgrehan
788221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++) {
789221828Sgrehan		if (gpabase == vm->mem_segs[i].gpa) {
790256072Sneel			seg->gpa = vm->mem_segs[i].gpa;
791256072Sneel			seg->len = vm->mem_segs[i].len;
792256072Sneel			seg->wired = vm->mem_segs[i].wired;
793221828Sgrehan			return (0);
794221828Sgrehan		}
795221828Sgrehan	}
796221828Sgrehan	return (-1);
797221828Sgrehan}
798221828Sgrehan
799221828Sgrehanint
800256072Sneelvm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
801256072Sneel	      vm_offset_t *offset, struct vm_object **object)
802256072Sneel{
803256072Sneel	int i;
804256072Sneel	size_t seg_len;
805256072Sneel	vm_paddr_t seg_gpa;
806256072Sneel	vm_object_t seg_obj;
807256072Sneel
808256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
809256072Sneel		if ((seg_obj = vm->mem_segs[i].object) == NULL)
810256072Sneel			continue;
811256072Sneel
812256072Sneel		seg_gpa = vm->mem_segs[i].gpa;
813256072Sneel		seg_len = vm->mem_segs[i].len;
814256072Sneel
815256072Sneel		if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
816256072Sneel			*offset = gpa - seg_gpa;
817256072Sneel			*object = seg_obj;
818256072Sneel			vm_object_reference(seg_obj);
819256072Sneel			return (0);
820256072Sneel		}
821256072Sneel	}
822256072Sneel
823256072Sneel	return (EINVAL);
824256072Sneel}
825256072Sneel
826256072Sneelint
827221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
828221828Sgrehan{
829221828Sgrehan
830221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
831221828Sgrehan		return (EINVAL);
832221828Sgrehan
833221828Sgrehan	if (reg >= VM_REG_LAST)
834221828Sgrehan		return (EINVAL);
835221828Sgrehan
836221828Sgrehan	return (VMGETREG(vm->cookie, vcpu, reg, retval));
837221828Sgrehan}
838221828Sgrehan
839221828Sgrehanint
840221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
841221828Sgrehan{
842221828Sgrehan
843221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
844221828Sgrehan		return (EINVAL);
845221828Sgrehan
846221828Sgrehan	if (reg >= VM_REG_LAST)
847221828Sgrehan		return (EINVAL);
848221828Sgrehan
849221828Sgrehan	return (VMSETREG(vm->cookie, vcpu, reg, val));
850221828Sgrehan}
851221828Sgrehan
852221828Sgrehanstatic boolean_t
853221828Sgrehanis_descriptor_table(int reg)
854221828Sgrehan{
855221828Sgrehan
856221828Sgrehan	switch (reg) {
857221828Sgrehan	case VM_REG_GUEST_IDTR:
858221828Sgrehan	case VM_REG_GUEST_GDTR:
859221828Sgrehan		return (TRUE);
860221828Sgrehan	default:
861221828Sgrehan		return (FALSE);
862221828Sgrehan	}
863221828Sgrehan}
864221828Sgrehan
865221828Sgrehanstatic boolean_t
866221828Sgrehanis_segment_register(int reg)
867221828Sgrehan{
868221828Sgrehan
869221828Sgrehan	switch (reg) {
870221828Sgrehan	case VM_REG_GUEST_ES:
871221828Sgrehan	case VM_REG_GUEST_CS:
872221828Sgrehan	case VM_REG_GUEST_SS:
873221828Sgrehan	case VM_REG_GUEST_DS:
874221828Sgrehan	case VM_REG_GUEST_FS:
875221828Sgrehan	case VM_REG_GUEST_GS:
876221828Sgrehan	case VM_REG_GUEST_TR:
877221828Sgrehan	case VM_REG_GUEST_LDTR:
878221828Sgrehan		return (TRUE);
879221828Sgrehan	default:
880221828Sgrehan		return (FALSE);
881221828Sgrehan	}
882221828Sgrehan}
883221828Sgrehan
884221828Sgrehanint
885221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg,
886221828Sgrehan		struct seg_desc *desc)
887221828Sgrehan{
888221828Sgrehan
889221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
890221828Sgrehan		return (EINVAL);
891221828Sgrehan
892221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
893221828Sgrehan		return (EINVAL);
894221828Sgrehan
895221828Sgrehan	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
896221828Sgrehan}
897221828Sgrehan
898221828Sgrehanint
899221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg,
900221828Sgrehan		struct seg_desc *desc)
901221828Sgrehan{
902221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
903221828Sgrehan		return (EINVAL);
904221828Sgrehan
905221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
906221828Sgrehan		return (EINVAL);
907221828Sgrehan
908221828Sgrehan	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
909221828Sgrehan}
910221828Sgrehan
911221828Sgrehanstatic void
912221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu)
913221828Sgrehan{
914221828Sgrehan
915234695Sgrehan	/* flush host state to the pcb */
916234695Sgrehan	fpuexit(curthread);
917242122Sneel
918242122Sneel	/* restore guest FPU state */
919221828Sgrehan	fpu_stop_emulating();
920234695Sgrehan	fpurestore(vcpu->guestfpu);
921242122Sneel
922267427Sjhb	/* restore guest XCR0 if XSAVE is enabled in the host */
923267427Sjhb	if (rcr4() & CR4_XSAVE)
924267427Sjhb		load_xcr(0, vcpu->guest_xcr0);
925267427Sjhb
926242122Sneel	/*
927242122Sneel	 * The FPU is now "dirty" with the guest's state so turn on emulation
928242122Sneel	 * to trap any access to the FPU by the host.
929242122Sneel	 */
930242122Sneel	fpu_start_emulating();
931221828Sgrehan}
932221828Sgrehan
933221828Sgrehanstatic void
934221828Sgrehansave_guest_fpustate(struct vcpu *vcpu)
935221828Sgrehan{
936221828Sgrehan
937242122Sneel	if ((rcr0() & CR0_TS) == 0)
938242122Sneel		panic("fpu emulation not enabled in host!");
939242122Sneel
940267427Sjhb	/* save guest XCR0 and restore host XCR0 */
941267427Sjhb	if (rcr4() & CR4_XSAVE) {
942267427Sjhb		vcpu->guest_xcr0 = rxcr(0);
943267427Sjhb		load_xcr(0, vmm_get_host_xcr0());
944267427Sjhb	}
945267427Sjhb
946242122Sneel	/* save guest FPU state */
947242122Sneel	fpu_stop_emulating();
948234695Sgrehan	fpusave(vcpu->guestfpu);
949221828Sgrehan	fpu_start_emulating();
950221828Sgrehan}
951221828Sgrehan
952248389Sneelstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
953242065Sneel
954256072Sneelstatic int
955266393Sjhbvcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
956266393Sjhb    bool from_idle)
957256072Sneel{
958256072Sneel	int error;
959256072Sneel
960256072Sneel	vcpu_assert_locked(vcpu);
961256072Sneel
962256072Sneel	/*
963266393Sjhb	 * State transitions from the vmmdev_ioctl() must always begin from
964266393Sjhb	 * the VCPU_IDLE state. This guarantees that there is only a single
965266393Sjhb	 * ioctl() operating on a vcpu at any point.
966266393Sjhb	 */
967266393Sjhb	if (from_idle) {
968266393Sjhb		while (vcpu->state != VCPU_IDLE)
969266393Sjhb			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
970266393Sjhb	} else {
971266393Sjhb		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
972266393Sjhb		    "vcpu idle state"));
973266393Sjhb	}
974266393Sjhb
975266393Sjhb	if (vcpu->state == VCPU_RUNNING) {
976266393Sjhb		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
977266393Sjhb		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
978266393Sjhb	} else {
979266393Sjhb		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
980266393Sjhb		    "vcpu that is not running", vcpu->hostcpu));
981266393Sjhb	}
982266393Sjhb
983266393Sjhb	/*
984256072Sneel	 * The following state transitions are allowed:
985256072Sneel	 * IDLE -> FROZEN -> IDLE
986256072Sneel	 * FROZEN -> RUNNING -> FROZEN
987256072Sneel	 * FROZEN -> SLEEPING -> FROZEN
988256072Sneel	 */
989256072Sneel	switch (vcpu->state) {
990256072Sneel	case VCPU_IDLE:
991256072Sneel	case VCPU_RUNNING:
992256072Sneel	case VCPU_SLEEPING:
993256072Sneel		error = (newstate != VCPU_FROZEN);
994256072Sneel		break;
995256072Sneel	case VCPU_FROZEN:
996256072Sneel		error = (newstate == VCPU_FROZEN);
997256072Sneel		break;
998256072Sneel	default:
999256072Sneel		error = 1;
1000256072Sneel		break;
1001256072Sneel	}
1002256072Sneel
1003266393Sjhb	if (error)
1004266393Sjhb		return (EBUSY);
1005266393Sjhb
1006266393Sjhb	vcpu->state = newstate;
1007266393Sjhb	if (newstate == VCPU_RUNNING)
1008266393Sjhb		vcpu->hostcpu = curcpu;
1009256072Sneel	else
1010266393Sjhb		vcpu->hostcpu = NOCPU;
1011256072Sneel
1012266393Sjhb	if (newstate == VCPU_IDLE)
1013266393Sjhb		wakeup(&vcpu->state);
1014266393Sjhb
1015266393Sjhb	return (0);
1016256072Sneel}
1017256072Sneel
1018256072Sneelstatic void
1019256072Sneelvcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
1020256072Sneel{
1021256072Sneel	int error;
1022256072Sneel
1023266393Sjhb	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
1024256072Sneel		panic("Error %d setting state to %d\n", error, newstate);
1025256072Sneel}
1026256072Sneel
1027256072Sneelstatic void
1028256072Sneelvcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
1029256072Sneel{
1030256072Sneel	int error;
1031256072Sneel
1032266393Sjhb	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
1033256072Sneel		panic("Error %d setting state to %d", error, newstate);
1034256072Sneel}
1035256072Sneel
1036266339Sjhbstatic void
1037266339Sjhbvm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
1038266339Sjhb{
1039266339Sjhb
1040266339Sjhb	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
1041266339Sjhb
1042266339Sjhb	/*
1043266339Sjhb	 * Update 'rendezvous_func' and execute a write memory barrier to
1044266339Sjhb	 * ensure that it is visible across all host cpus. This is not needed
1045266339Sjhb	 * for correctness but it does ensure that all the vcpus will notice
1046266339Sjhb	 * that the rendezvous is requested immediately.
1047266339Sjhb	 */
1048266339Sjhb	vm->rendezvous_func = func;
1049266339Sjhb	wmb();
1050266339Sjhb}
1051266339Sjhb
1052266339Sjhb#define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
1053266339Sjhb	do {								\
1054266339Sjhb		if (vcpuid >= 0)					\
1055266339Sjhb			VCPU_CTR0(vm, vcpuid, fmt);			\
1056266339Sjhb		else							\
1057266339Sjhb			VM_CTR0(vm, fmt);				\
1058266339Sjhb	} while (0)
1059266339Sjhb
1060266339Sjhbstatic void
1061266339Sjhbvm_handle_rendezvous(struct vm *vm, int vcpuid)
1062266339Sjhb{
1063266339Sjhb
1064266339Sjhb	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
1065266339Sjhb	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
1066266339Sjhb
1067266339Sjhb	mtx_lock(&vm->rendezvous_mtx);
1068266339Sjhb	while (vm->rendezvous_func != NULL) {
1069266339Sjhb		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
1070266339Sjhb		CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
1071266339Sjhb
1072266339Sjhb		if (vcpuid != -1 &&
1073266339Sjhb		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
1074266339Sjhb		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
1075266339Sjhb			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
1076266339Sjhb			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
1077266339Sjhb			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
1078266339Sjhb		}
1079266339Sjhb		if (CPU_CMP(&vm->rendezvous_req_cpus,
1080266339Sjhb		    &vm->rendezvous_done_cpus) == 0) {
1081266339Sjhb			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
1082266339Sjhb			vm_set_rendezvous_func(vm, NULL);
1083266339Sjhb			wakeup(&vm->rendezvous_func);
1084266339Sjhb			break;
1085266339Sjhb		}
1086266339Sjhb		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
1087266339Sjhb		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
1088266339Sjhb		    "vmrndv", 0);
1089266339Sjhb	}
1090266339Sjhb	mtx_unlock(&vm->rendezvous_mtx);
1091266339Sjhb}
1092266339Sjhb
1093256072Sneel/*
1094256072Sneel * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
1095256072Sneel */
1096256072Sneelstatic int
1097262350Sjhbvm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
1098256072Sneel{
1099256072Sneel	struct vcpu *vcpu;
1100268935Sjhb	const char *wmesg;
1101276349Sneel	int error, t, vcpu_halted, vm_halted;
1102256072Sneel
1103268935Sjhb	KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
1104268935Sjhb
1105256072Sneel	vcpu = &vm->vcpu[vcpuid];
1106268935Sjhb	vcpu_halted = 0;
1107268935Sjhb	vm_halted = 0;
1108256072Sneel
1109276349Sneel	/*
1110276349Sneel	 * The typical way to halt a cpu is to execute: "sti; hlt"
1111276349Sneel	 *
1112276349Sneel	 * STI sets RFLAGS.IF to enable interrupts. However, the processor
1113276349Sneel	 * remains in an "interrupt shadow" for an additional instruction
1114276349Sneel	 * following the STI. This guarantees that "sti; hlt" sequence is
1115276349Sneel	 * atomic and a pending interrupt will be recognized after the HLT.
1116276349Sneel	 *
1117276349Sneel	 * After the HLT emulation is done the vcpu is no longer in an
1118276349Sneel	 * interrupt shadow and a pending interrupt can be injected on
1119276349Sneel	 * the next entry into the guest.
1120276349Sneel	 */
1121276349Sneel	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
1122276349Sneel	KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
1123276349Sneel	    __func__, error));
1124276349Sneel
1125256072Sneel	vcpu_lock(vcpu);
1126268935Sjhb	while (1) {
1127268935Sjhb		/*
1128268935Sjhb		 * Do a final check for pending NMI or interrupts before
1129268935Sjhb		 * really putting this thread to sleep. Also check for
1130268935Sjhb		 * software events that would cause this vcpu to wakeup.
1131268935Sjhb		 *
1132268935Sjhb		 * These interrupts/events could have happened after the
1133268935Sjhb		 * vcpu returned from VMRUN() and before it acquired the
1134268935Sjhb		 * vcpu lock above.
1135268935Sjhb		 */
1136268935Sjhb		if (vm->rendezvous_func != NULL || vm->suspend)
1137268935Sjhb			break;
1138268935Sjhb		if (vm_nmi_pending(vm, vcpuid))
1139268935Sjhb			break;
1140268935Sjhb		if (!intr_disabled) {
1141268935Sjhb			if (vm_extint_pending(vm, vcpuid) ||
1142268935Sjhb			    vlapic_pending_intr(vcpu->vlapic, NULL)) {
1143268935Sjhb				break;
1144268935Sjhb			}
1145268935Sjhb		}
1146256072Sneel
1147270159Sgrehan		/* Don't go to sleep if the vcpu thread needs to yield */
1148270159Sgrehan		if (vcpu_should_yield(vm, vcpuid))
1149270159Sgrehan			break;
1150270159Sgrehan
1151268935Sjhb		/*
1152268935Sjhb		 * Some Linux guests implement "halt" by having all vcpus
1153268935Sjhb		 * execute HLT with interrupts disabled. 'halted_cpus' keeps
1154268935Sjhb		 * track of the vcpus that have entered this state. When all
1155268935Sjhb		 * vcpus enter the halted state the virtual machine is halted.
1156268935Sjhb		 */
1157268935Sjhb		if (intr_disabled) {
1158268935Sjhb			wmesg = "vmhalt";
1159268935Sjhb			VCPU_CTR0(vm, vcpuid, "Halted");
1160268935Sjhb			if (!vcpu_halted && halt_detection_enabled) {
1161268935Sjhb				vcpu_halted = 1;
1162268935Sjhb				CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
1163268935Sjhb			}
1164268935Sjhb			if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
1165268935Sjhb				vm_halted = 1;
1166268935Sjhb				break;
1167268935Sjhb			}
1168268935Sjhb		} else {
1169268935Sjhb			wmesg = "vmidle";
1170268935Sjhb		}
1171268935Sjhb
1172256072Sneel		t = ticks;
1173256072Sneel		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1174270159Sgrehan		/*
1175270159Sgrehan		 * XXX msleep_spin() cannot be interrupted by signals so
1176270159Sgrehan		 * wake up periodically to check pending signals.
1177270159Sgrehan		 */
1178270159Sgrehan		msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
1179256072Sneel		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1180256072Sneel		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
1181256072Sneel	}
1182268935Sjhb
1183268935Sjhb	if (vcpu_halted)
1184268935Sjhb		CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus);
1185268935Sjhb
1186256072Sneel	vcpu_unlock(vcpu);
1187256072Sneel
1188268935Sjhb	if (vm_halted)
1189268935Sjhb		vm_suspend(vm, VM_SUSPEND_HALT);
1190266339Sjhb
1191256072Sneel	return (0);
1192256072Sneel}
1193256072Sneel
1194256072Sneelstatic int
1195262350Sjhbvm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
1196256072Sneel{
1197256072Sneel	int rv, ftype;
1198256072Sneel	struct vm_map *map;
1199256072Sneel	struct vcpu *vcpu;
1200256072Sneel	struct vm_exit *vme;
1201256072Sneel
1202256072Sneel	vcpu = &vm->vcpu[vcpuid];
1203256072Sneel	vme = &vcpu->exitinfo;
1204256072Sneel
1205256072Sneel	ftype = vme->u.paging.fault_type;
1206256072Sneel	KASSERT(ftype == VM_PROT_READ ||
1207256072Sneel	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
1208256072Sneel	    ("vm_handle_paging: invalid fault_type %d", ftype));
1209256072Sneel
1210256072Sneel	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
1211256072Sneel		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
1212256072Sneel		    vme->u.paging.gpa, ftype);
1213276349Sneel		if (rv == 0) {
1214276349Sneel			VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx",
1215276349Sneel			    ftype == VM_PROT_READ ? "accessed" : "dirty",
1216276349Sneel			    vme->u.paging.gpa);
1217256072Sneel			goto done;
1218276349Sneel		}
1219256072Sneel	}
1220256072Sneel
1221256072Sneel	map = &vm->vmspace->vm_map;
1222256072Sneel	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
1223256072Sneel
1224261088Sjhb	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
1225261088Sjhb	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
1226256072Sneel
1227256072Sneel	if (rv != KERN_SUCCESS)
1228256072Sneel		return (EFAULT);
1229256072Sneeldone:
1230256072Sneel	/* restart execution at the faulting instruction */
1231256072Sneel	vme->inst_length = 0;
1232256072Sneel
1233256072Sneel	return (0);
1234256072Sneel}
1235256072Sneel
1236256072Sneelstatic int
1237262350Sjhbvm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
1238256072Sneel{
1239256072Sneel	struct vie *vie;
1240256072Sneel	struct vcpu *vcpu;
1241256072Sneel	struct vm_exit *vme;
1242268976Sjhb	uint64_t gla, gpa;
1243268976Sjhb	struct vm_guest_paging *paging;
1244261088Sjhb	mem_region_read_t mread;
1245261088Sjhb	mem_region_write_t mwrite;
1246270159Sgrehan	enum vm_cpu_mode cpu_mode;
1247276403Sneel	int cs_d, error, length;
1248256072Sneel
1249256072Sneel	vcpu = &vm->vcpu[vcpuid];
1250256072Sneel	vme = &vcpu->exitinfo;
1251256072Sneel
1252256072Sneel	gla = vme->u.inst_emul.gla;
1253256072Sneel	gpa = vme->u.inst_emul.gpa;
1254270159Sgrehan	cs_d = vme->u.inst_emul.cs_d;
1255256072Sneel	vie = &vme->u.inst_emul.vie;
1256268976Sjhb	paging = &vme->u.inst_emul.paging;
1257270159Sgrehan	cpu_mode = paging->cpu_mode;
1258256072Sneel
1259276349Sneel	VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa);
1260276349Sneel
1261256072Sneel	/* Fetch, decode and emulate the faulting instruction */
1262276403Sneel	if (vie->num_valid == 0) {
1263276403Sneel		/*
1264276403Sneel		 * If the instruction length is not known then assume a
1265276403Sneel		 * maximum size instruction.
1266276403Sneel		 */
1267276403Sneel		length = vme->inst_length ? vme->inst_length : VIE_INST_SIZE;
1268276403Sneel		error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip,
1269276403Sneel		    length, vie);
1270276403Sneel	} else {
1271276403Sneel		/*
1272276403Sneel		 * The instruction bytes have already been copied into 'vie'
1273276403Sneel		 */
1274276403Sneel		error = 0;
1275276403Sneel	}
1276268976Sjhb	if (error == 1)
1277268976Sjhb		return (0);		/* Resume guest to handle page fault */
1278268976Sjhb	else if (error == -1)
1279256072Sneel		return (EFAULT);
1280268976Sjhb	else if (error != 0)
1281268976Sjhb		panic("%s: vmm_fetch_instruction error %d", __func__, error);
1282256072Sneel
1283270159Sgrehan	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0)
1284256072Sneel		return (EFAULT);
1285256072Sneel
1286276403Sneel	/*
1287276403Sneel	 * If the instruction length is not specified the update it now.
1288276403Sneel	 */
1289276403Sneel	if (vme->inst_length == 0)
1290276403Sneel		vme->inst_length = vie->num_processed;
1291276403Sneel
1292261088Sjhb	/* return to userland unless this is an in-kernel emulated device */
1293261088Sjhb	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
1294261088Sjhb		mread = lapic_mmio_read;
1295261088Sjhb		mwrite = lapic_mmio_write;
1296261088Sjhb	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
1297261088Sjhb		mread = vioapic_mmio_read;
1298261088Sjhb		mwrite = vioapic_mmio_write;
1299261088Sjhb	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
1300261088Sjhb		mread = vhpet_mmio_read;
1301261088Sjhb		mwrite = vhpet_mmio_write;
1302261088Sjhb	} else {
1303262350Sjhb		*retu = true;
1304256072Sneel		return (0);
1305256072Sneel	}
1306256072Sneel
1307270159Sgrehan	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging,
1308270159Sgrehan	    mread, mwrite, retu);
1309256072Sneel
1310256072Sneel	return (error);
1311256072Sneel}
1312256072Sneel
1313268935Sjhbstatic int
1314268935Sjhbvm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
1315268935Sjhb{
1316268935Sjhb	int i, done;
1317268935Sjhb	struct vcpu *vcpu;
1318268935Sjhb
1319268935Sjhb	done = 0;
1320268935Sjhb	vcpu = &vm->vcpu[vcpuid];
1321268935Sjhb
1322268935Sjhb	CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus);
1323268935Sjhb
1324268935Sjhb	/*
1325268935Sjhb	 * Wait until all 'active_cpus' have suspended themselves.
1326268935Sjhb	 *
1327268935Sjhb	 * Since a VM may be suspended at any time including when one or
1328268935Sjhb	 * more vcpus are doing a rendezvous we need to call the rendezvous
1329268935Sjhb	 * handler while we are waiting to prevent a deadlock.
1330268935Sjhb	 */
1331268935Sjhb	vcpu_lock(vcpu);
1332268935Sjhb	while (1) {
1333268935Sjhb		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
1334268935Sjhb			VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
1335268935Sjhb			break;
1336268935Sjhb		}
1337268935Sjhb
1338268935Sjhb		if (vm->rendezvous_func == NULL) {
1339268935Sjhb			VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
1340268935Sjhb			vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1341268935Sjhb			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1342268935Sjhb			vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1343268935Sjhb		} else {
1344268935Sjhb			VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
1345268935Sjhb			vcpu_unlock(vcpu);
1346268935Sjhb			vm_handle_rendezvous(vm, vcpuid);
1347268935Sjhb			vcpu_lock(vcpu);
1348268935Sjhb		}
1349268935Sjhb	}
1350268935Sjhb	vcpu_unlock(vcpu);
1351268935Sjhb
1352268935Sjhb	/*
1353268935Sjhb	 * Wakeup the other sleeping vcpus and return to userspace.
1354268935Sjhb	 */
1355268935Sjhb	for (i = 0; i < VM_MAXCPU; i++) {
1356268935Sjhb		if (CPU_ISSET(i, &vm->suspended_cpus)) {
1357268935Sjhb			vcpu_notify_event(vm, i, false);
1358268935Sjhb		}
1359268935Sjhb	}
1360268935Sjhb
1361268935Sjhb	*retu = true;
1362268935Sjhb	return (0);
1363268935Sjhb}
1364268935Sjhb
1365221828Sgrehanint
1366268935Sjhbvm_suspend(struct vm *vm, enum vm_suspend_how how)
1367268935Sjhb{
1368268935Sjhb	int i;
1369268935Sjhb
1370268935Sjhb	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
1371268935Sjhb		return (EINVAL);
1372268935Sjhb
1373268935Sjhb	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
1374268935Sjhb		VM_CTR2(vm, "virtual machine already suspended %d/%d",
1375268935Sjhb		    vm->suspend, how);
1376268935Sjhb		return (EALREADY);
1377268935Sjhb	}
1378268935Sjhb
1379268935Sjhb	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
1380268935Sjhb
1381268935Sjhb	/*
1382268935Sjhb	 * Notify all active vcpus that they are now suspended.
1383268935Sjhb	 */
1384268935Sjhb	for (i = 0; i < VM_MAXCPU; i++) {
1385268935Sjhb		if (CPU_ISSET(i, &vm->active_cpus))
1386268935Sjhb			vcpu_notify_event(vm, i, false);
1387268935Sjhb	}
1388268935Sjhb
1389268935Sjhb	return (0);
1390268935Sjhb}
1391268935Sjhb
1392268935Sjhbvoid
1393268935Sjhbvm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
1394268935Sjhb{
1395268935Sjhb	struct vm_exit *vmexit;
1396268935Sjhb
1397268935Sjhb	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
1398268935Sjhb	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
1399268935Sjhb
1400268935Sjhb	vmexit = vm_exitinfo(vm, vcpuid);
1401268935Sjhb	vmexit->rip = rip;
1402268935Sjhb	vmexit->inst_length = 0;
1403268935Sjhb	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
1404268935Sjhb	vmexit->u.suspended.how = vm->suspend;
1405268935Sjhb}
1406268935Sjhb
1407270074Sgrehanvoid
1408270074Sgrehanvm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
1409270074Sgrehan{
1410270074Sgrehan	struct vm_exit *vmexit;
1411270074Sgrehan
1412270074Sgrehan	KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress"));
1413270074Sgrehan
1414270074Sgrehan	vmexit = vm_exitinfo(vm, vcpuid);
1415270074Sgrehan	vmexit->rip = rip;
1416270074Sgrehan	vmexit->inst_length = 0;
1417270074Sgrehan	vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
1418270074Sgrehan	vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1);
1419270074Sgrehan}
1420270074Sgrehan
1421270074Sgrehanvoid
1422270074Sgrehanvm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
1423270074Sgrehan{
1424270074Sgrehan	struct vm_exit *vmexit;
1425270074Sgrehan
1426270074Sgrehan	vmexit = vm_exitinfo(vm, vcpuid);
1427270074Sgrehan	vmexit->rip = rip;
1428270074Sgrehan	vmexit->inst_length = 0;
1429270074Sgrehan	vmexit->exitcode = VM_EXITCODE_BOGUS;
1430270074Sgrehan	vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
1431270074Sgrehan}
1432270074Sgrehan
1433268935Sjhbint
1434221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun)
1435221828Sgrehan{
1436256072Sneel	int error, vcpuid;
1437221828Sgrehan	struct vcpu *vcpu;
1438221828Sgrehan	struct pcb *pcb;
1439242065Sneel	uint64_t tscval, rip;
1440242065Sneel	struct vm_exit *vme;
1441262350Sjhb	bool retu, intr_disabled;
1442256072Sneel	pmap_t pmap;
1443268935Sjhb	void *rptr, *sptr;
1444221828Sgrehan
1445221828Sgrehan	vcpuid = vmrun->cpuid;
1446221828Sgrehan
1447221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1448221828Sgrehan		return (EINVAL);
1449221828Sgrehan
1450270070Sgrehan	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1451270070Sgrehan		return (EINVAL);
1452270070Sgrehan
1453270070Sgrehan	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1454270070Sgrehan		return (EINVAL);
1455270070Sgrehan
1456268935Sjhb	rptr = &vm->rendezvous_func;
1457268935Sjhb	sptr = &vm->suspend;
1458256072Sneel	pmap = vmspace_pmap(vm->vmspace);
1459221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
1460256072Sneel	vme = &vcpu->exitinfo;
1461242065Sneel	rip = vmrun->rip;
1462242065Sneelrestart:
1463221828Sgrehan	critical_enter();
1464221828Sgrehan
1465256072Sneel	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
1466256072Sneel	    ("vm_run: absurd pm_active"));
1467256072Sneel
1468221828Sgrehan	tscval = rdtsc();
1469221828Sgrehan
1470221828Sgrehan	pcb = PCPU_GET(curpcb);
1471221914Sjhb	set_pcb_flags(pcb, PCB_FULL_IRET);
1472221828Sgrehan
1473221828Sgrehan	restore_guest_fpustate(vcpu);
1474241489Sneel
1475256072Sneel	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
1476268935Sjhb	error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
1477256072Sneel	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
1478241489Sneel
1479221828Sgrehan	save_guest_fpustate(vcpu);
1480221828Sgrehan
1481221828Sgrehan	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
1482221828Sgrehan
1483221828Sgrehan	critical_exit();
1484221828Sgrehan
1485256072Sneel	if (error == 0) {
1486262350Sjhb		retu = false;
1487256072Sneel		switch (vme->exitcode) {
1488268935Sjhb		case VM_EXITCODE_SUSPENDED:
1489268935Sjhb			error = vm_handle_suspend(vm, vcpuid, &retu);
1490268935Sjhb			break;
1491266339Sjhb		case VM_EXITCODE_IOAPIC_EOI:
1492266339Sjhb			vioapic_process_eoi(vm, vcpuid,
1493266339Sjhb			    vme->u.ioapic_eoi.vector);
1494266339Sjhb			break;
1495266339Sjhb		case VM_EXITCODE_RENDEZVOUS:
1496266339Sjhb			vm_handle_rendezvous(vm, vcpuid);
1497266339Sjhb			error = 0;
1498266339Sjhb			break;
1499256072Sneel		case VM_EXITCODE_HLT:
1500262350Sjhb			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
1501262350Sjhb			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
1502256072Sneel			break;
1503256072Sneel		case VM_EXITCODE_PAGING:
1504256072Sneel			error = vm_handle_paging(vm, vcpuid, &retu);
1505256072Sneel			break;
1506256072Sneel		case VM_EXITCODE_INST_EMUL:
1507256072Sneel			error = vm_handle_inst_emul(vm, vcpuid, &retu);
1508256072Sneel			break;
1509268976Sjhb		case VM_EXITCODE_INOUT:
1510268976Sjhb		case VM_EXITCODE_INOUT_STR:
1511268976Sjhb			error = vm_handle_inout(vm, vcpuid, vme, &retu);
1512268976Sjhb			break;
1513276349Sneel		case VM_EXITCODE_MONITOR:
1514276349Sneel		case VM_EXITCODE_MWAIT:
1515276349Sneel			vm_inject_ud(vm, vcpuid);
1516276349Sneel			break;
1517256072Sneel		default:
1518262350Sjhb			retu = true;	/* handled in userland */
1519256072Sneel			break;
1520242065Sneel		}
1521256072Sneel	}
1522242065Sneel
1523262350Sjhb	if (error == 0 && retu == false) {
1524242065Sneel		rip = vme->rip + vme->inst_length;
1525242065Sneel		goto restart;
1526242065Sneel	}
1527242065Sneel
1528256072Sneel	/* copy the exit information */
1529256072Sneel	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
1530221828Sgrehan	return (error);
1531221828Sgrehan}
1532221828Sgrehan
1533221828Sgrehanint
1534270159Sgrehanvm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
1535270159Sgrehan{
1536270159Sgrehan	struct vcpu *vcpu;
1537270159Sgrehan	int type, vector;
1538270159Sgrehan
1539270159Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1540270159Sgrehan		return (EINVAL);
1541270159Sgrehan
1542270159Sgrehan	vcpu = &vm->vcpu[vcpuid];
1543270159Sgrehan
1544270159Sgrehan	if (info & VM_INTINFO_VALID) {
1545270159Sgrehan		type = info & VM_INTINFO_TYPE;
1546270159Sgrehan		vector = info & 0xff;
1547270159Sgrehan		if (type == VM_INTINFO_NMI && vector != IDT_NMI)
1548270159Sgrehan			return (EINVAL);
1549270159Sgrehan		if (type == VM_INTINFO_HWEXCEPTION && vector >= 32)
1550270159Sgrehan			return (EINVAL);
1551270159Sgrehan		if (info & VM_INTINFO_RSVD)
1552270159Sgrehan			return (EINVAL);
1553270159Sgrehan	} else {
1554270159Sgrehan		info = 0;
1555270159Sgrehan	}
1556270159Sgrehan	VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info);
1557270159Sgrehan	vcpu->exitintinfo = info;
1558270159Sgrehan	return (0);
1559270159Sgrehan}
1560270159Sgrehan
1561270159Sgrehanenum exc_class {
1562270159Sgrehan	EXC_BENIGN,
1563270159Sgrehan	EXC_CONTRIBUTORY,
1564270159Sgrehan	EXC_PAGEFAULT
1565270159Sgrehan};
1566270159Sgrehan
1567270159Sgrehan#define	IDT_VE	20	/* Virtualization Exception (Intel specific) */
1568270159Sgrehan
1569270159Sgrehanstatic enum exc_class
1570270159Sgrehanexception_class(uint64_t info)
1571270159Sgrehan{
1572270159Sgrehan	int type, vector;
1573270159Sgrehan
1574270159Sgrehan	KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info));
1575270159Sgrehan	type = info & VM_INTINFO_TYPE;
1576270159Sgrehan	vector = info & 0xff;
1577270159Sgrehan
1578270159Sgrehan	/* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */
1579270159Sgrehan	switch (type) {
1580270159Sgrehan	case VM_INTINFO_HWINTR:
1581270159Sgrehan	case VM_INTINFO_SWINTR:
1582270159Sgrehan	case VM_INTINFO_NMI:
1583270159Sgrehan		return (EXC_BENIGN);
1584270159Sgrehan	default:
1585270159Sgrehan		/*
1586270159Sgrehan		 * Hardware exception.
1587270159Sgrehan		 *
1588270159Sgrehan		 * SVM and VT-x use identical type values to represent NMI,
1589270159Sgrehan		 * hardware interrupt and software interrupt.
1590270159Sgrehan		 *
1591270159Sgrehan		 * SVM uses type '3' for all exceptions. VT-x uses type '3'
1592270159Sgrehan		 * for exceptions except #BP and #OF. #BP and #OF use a type
1593270159Sgrehan		 * value of '5' or '6'. Therefore we don't check for explicit
1594270159Sgrehan		 * values of 'type' to classify 'intinfo' into a hardware
1595270159Sgrehan		 * exception.
1596270159Sgrehan		 */
1597270159Sgrehan		break;
1598270159Sgrehan	}
1599270159Sgrehan
1600270159Sgrehan	switch (vector) {
1601270159Sgrehan	case IDT_PF:
1602270159Sgrehan	case IDT_VE:
1603270159Sgrehan		return (EXC_PAGEFAULT);
1604270159Sgrehan	case IDT_DE:
1605270159Sgrehan	case IDT_TS:
1606270159Sgrehan	case IDT_NP:
1607270159Sgrehan	case IDT_SS:
1608270159Sgrehan	case IDT_GP:
1609270159Sgrehan		return (EXC_CONTRIBUTORY);
1610270159Sgrehan	default:
1611270159Sgrehan		return (EXC_BENIGN);
1612270159Sgrehan	}
1613270159Sgrehan}
1614270159Sgrehan
1615270159Sgrehanstatic int
1616270159Sgrehannested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
1617270159Sgrehan    uint64_t *retinfo)
1618270159Sgrehan{
1619270159Sgrehan	enum exc_class exc1, exc2;
1620270159Sgrehan	int type1, vector1;
1621270159Sgrehan
1622270159Sgrehan	KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1));
1623270159Sgrehan	KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2));
1624270159Sgrehan
1625270159Sgrehan	/*
1626270159Sgrehan	 * If an exception occurs while attempting to call the double-fault
1627270159Sgrehan	 * handler the processor enters shutdown mode (aka triple fault).
1628270159Sgrehan	 */
1629270159Sgrehan	type1 = info1 & VM_INTINFO_TYPE;
1630270159Sgrehan	vector1 = info1 & 0xff;
1631270159Sgrehan	if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) {
1632270159Sgrehan		VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)",
1633270159Sgrehan		    info1, info2);
1634270159Sgrehan		vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT);
1635270159Sgrehan		*retinfo = 0;
1636270159Sgrehan		return (0);
1637270159Sgrehan	}
1638270159Sgrehan
1639270159Sgrehan	/*
1640270159Sgrehan	 * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3
1641270159Sgrehan	 */
1642270159Sgrehan	exc1 = exception_class(info1);
1643270159Sgrehan	exc2 = exception_class(info2);
1644270159Sgrehan	if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) ||
1645270159Sgrehan	    (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) {
1646270159Sgrehan		/* Convert nested fault into a double fault. */
1647270159Sgrehan		*retinfo = IDT_DF;
1648270159Sgrehan		*retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
1649270159Sgrehan		*retinfo |= VM_INTINFO_DEL_ERRCODE;
1650270159Sgrehan	} else {
1651270159Sgrehan		/* Handle exceptions serially */
1652270159Sgrehan		*retinfo = info2;
1653270159Sgrehan	}
1654270159Sgrehan	return (1);
1655270159Sgrehan}
1656270159Sgrehan
1657270159Sgrehanstatic uint64_t
1658270159Sgrehanvcpu_exception_intinfo(struct vcpu *vcpu)
1659270159Sgrehan{
1660270159Sgrehan	uint64_t info = 0;
1661270159Sgrehan
1662270159Sgrehan	if (vcpu->exception_pending) {
1663270159Sgrehan		info = vcpu->exception.vector & 0xff;
1664270159Sgrehan		info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
1665270159Sgrehan		if (vcpu->exception.error_code_valid) {
1666270159Sgrehan			info |= VM_INTINFO_DEL_ERRCODE;
1667270159Sgrehan			info |= (uint64_t)vcpu->exception.error_code << 32;
1668270159Sgrehan		}
1669270159Sgrehan	}
1670270159Sgrehan	return (info);
1671270159Sgrehan}
1672270159Sgrehan
1673270159Sgrehanint
1674270159Sgrehanvm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
1675270159Sgrehan{
1676270159Sgrehan	struct vcpu *vcpu;
1677270159Sgrehan	uint64_t info1, info2;
1678270159Sgrehan	int valid;
1679270159Sgrehan
1680270159Sgrehan	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
1681270159Sgrehan
1682270159Sgrehan	vcpu = &vm->vcpu[vcpuid];
1683270159Sgrehan
1684270159Sgrehan	info1 = vcpu->exitintinfo;
1685270159Sgrehan	vcpu->exitintinfo = 0;
1686270159Sgrehan
1687270159Sgrehan	info2 = 0;
1688270159Sgrehan	if (vcpu->exception_pending) {
1689270159Sgrehan		info2 = vcpu_exception_intinfo(vcpu);
1690270159Sgrehan		vcpu->exception_pending = 0;
1691270159Sgrehan		VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
1692270159Sgrehan		    vcpu->exception.vector, info2);
1693270159Sgrehan	}
1694270159Sgrehan
1695270159Sgrehan	if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
1696270159Sgrehan		valid = nested_fault(vm, vcpuid, info1, info2, retinfo);
1697270159Sgrehan	} else if (info1 & VM_INTINFO_VALID) {
1698270159Sgrehan		*retinfo = info1;
1699270159Sgrehan		valid = 1;
1700270159Sgrehan	} else if (info2 & VM_INTINFO_VALID) {
1701270159Sgrehan		*retinfo = info2;
1702270159Sgrehan		valid = 1;
1703270159Sgrehan	} else {
1704270159Sgrehan		valid = 0;
1705270159Sgrehan	}
1706270159Sgrehan
1707270159Sgrehan	if (valid) {
1708270159Sgrehan		VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), "
1709270159Sgrehan		    "retinfo(%#lx)", __func__, info1, info2, *retinfo);
1710270159Sgrehan	}
1711270159Sgrehan
1712270159Sgrehan	return (valid);
1713270159Sgrehan}
1714270159Sgrehan
1715270159Sgrehanint
1716270159Sgrehanvm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
1717270159Sgrehan{
1718270159Sgrehan	struct vcpu *vcpu;
1719270159Sgrehan
1720270159Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1721270159Sgrehan		return (EINVAL);
1722270159Sgrehan
1723270159Sgrehan	vcpu = &vm->vcpu[vcpuid];
1724270159Sgrehan	*info1 = vcpu->exitintinfo;
1725270159Sgrehan	*info2 = vcpu_exception_intinfo(vcpu);
1726270159Sgrehan	return (0);
1727270159Sgrehan}
1728270159Sgrehan
1729270159Sgrehanint
1730267427Sjhbvm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
1731221828Sgrehan{
1732267427Sjhb	struct vcpu *vcpu;
1733267427Sjhb
1734221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1735221828Sgrehan		return (EINVAL);
1736221828Sgrehan
1737267427Sjhb	if (exception->vector < 0 || exception->vector >= 32)
1738221828Sgrehan		return (EINVAL);
1739221828Sgrehan
1740270159Sgrehan	/*
1741270159Sgrehan	 * A double fault exception should never be injected directly into
1742270159Sgrehan	 * the guest. It is a derived exception that results from specific
1743270159Sgrehan	 * combinations of nested faults.
1744270159Sgrehan	 */
1745270159Sgrehan	if (exception->vector == IDT_DF)
1746270159Sgrehan		return (EINVAL);
1747270159Sgrehan
1748267427Sjhb	vcpu = &vm->vcpu[vcpuid];
1749221828Sgrehan
1750267427Sjhb	if (vcpu->exception_pending) {
1751267427Sjhb		VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
1752267427Sjhb		    "pending exception %d", exception->vector,
1753267427Sjhb		    vcpu->exception.vector);
1754267427Sjhb		return (EBUSY);
1755267427Sjhb	}
1756267427Sjhb
1757267427Sjhb	vcpu->exception_pending = 1;
1758267427Sjhb	vcpu->exception = *exception;
1759267427Sjhb	VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
1760267427Sjhb	return (0);
1761221828Sgrehan}
1762221828Sgrehan
1763270159Sgrehanvoid
1764270159Sgrehanvm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid,
1765270159Sgrehan    int errcode)
1766267427Sjhb{
1767270159Sgrehan	struct vm_exception exception;
1768267427Sjhb	struct vm_exit *vmexit;
1769270159Sgrehan	struct vm *vm;
1770267427Sjhb	int error;
1771267427Sjhb
1772270159Sgrehan	vm = vmarg;
1773270159Sgrehan
1774270159Sgrehan	exception.vector = vector;
1775270159Sgrehan	exception.error_code = errcode;
1776270159Sgrehan	exception.error_code_valid = errcode_valid;
1777270159Sgrehan	error = vm_inject_exception(vm, vcpuid, &exception);
1778267427Sjhb	KASSERT(error == 0, ("vm_inject_exception error %d", error));
1779267427Sjhb
1780267427Sjhb	/*
1781267427Sjhb	 * A fault-like exception allows the instruction to be restarted
1782267427Sjhb	 * after the exception handler returns.
1783267427Sjhb	 *
1784267427Sjhb	 * By setting the inst_length to 0 we ensure that the instruction
1785267427Sjhb	 * pointer remains at the faulting instruction.
1786267427Sjhb	 */
1787267427Sjhb	vmexit = vm_exitinfo(vm, vcpuid);
1788267427Sjhb	vmexit->inst_length = 0;
1789267427Sjhb}
1790267427Sjhb
1791267427Sjhbvoid
1792270159Sgrehanvm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2)
1793268976Sjhb{
1794270159Sgrehan	struct vm *vm;
1795268976Sjhb	int error;
1796268976Sjhb
1797270159Sgrehan	vm = vmarg;
1798268976Sjhb	VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
1799268976Sjhb	    error_code, cr2);
1800268976Sjhb
1801268976Sjhb	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
1802268976Sjhb	KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
1803268976Sjhb
1804270159Sgrehan	vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code);
1805268976Sjhb}
1806268976Sjhb
1807248389Sneelstatic VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
1808241982Sneel
1809221828Sgrehanint
1810241982Sneelvm_inject_nmi(struct vm *vm, int vcpuid)
1811221828Sgrehan{
1812241982Sneel	struct vcpu *vcpu;
1813221828Sgrehan
1814241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1815221828Sgrehan		return (EINVAL);
1816221828Sgrehan
1817241982Sneel	vcpu = &vm->vcpu[vcpuid];
1818241982Sneel
1819241982Sneel	vcpu->nmi_pending = 1;
1820266339Sjhb	vcpu_notify_event(vm, vcpuid, false);
1821241982Sneel	return (0);
1822221828Sgrehan}
1823221828Sgrehan
1824221828Sgrehanint
1825241982Sneelvm_nmi_pending(struct vm *vm, int vcpuid)
1826241982Sneel{
1827241982Sneel	struct vcpu *vcpu;
1828241982Sneel
1829241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1830241982Sneel		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1831241982Sneel
1832241982Sneel	vcpu = &vm->vcpu[vcpuid];
1833241982Sneel
1834241982Sneel	return (vcpu->nmi_pending);
1835241982Sneel}
1836241982Sneel
1837241982Sneelvoid
1838241982Sneelvm_nmi_clear(struct vm *vm, int vcpuid)
1839241982Sneel{
1840241982Sneel	struct vcpu *vcpu;
1841241982Sneel
1842241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1843241982Sneel		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1844241982Sneel
1845241982Sneel	vcpu = &vm->vcpu[vcpuid];
1846241982Sneel
1847241982Sneel	if (vcpu->nmi_pending == 0)
1848241982Sneel		panic("vm_nmi_clear: inconsistent nmi_pending state");
1849241982Sneel
1850241982Sneel	vcpu->nmi_pending = 0;
1851241982Sneel	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
1852241982Sneel}
1853241982Sneel
1854268891Sjhbstatic VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu");
1855268891Sjhb
1856241982Sneelint
1857268891Sjhbvm_inject_extint(struct vm *vm, int vcpuid)
1858268891Sjhb{
1859268891Sjhb	struct vcpu *vcpu;
1860268891Sjhb
1861268891Sjhb	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1862268891Sjhb		return (EINVAL);
1863268891Sjhb
1864268891Sjhb	vcpu = &vm->vcpu[vcpuid];
1865268891Sjhb
1866268891Sjhb	vcpu->extint_pending = 1;
1867268891Sjhb	vcpu_notify_event(vm, vcpuid, false);
1868268891Sjhb	return (0);
1869268891Sjhb}
1870268891Sjhb
1871268891Sjhbint
1872268891Sjhbvm_extint_pending(struct vm *vm, int vcpuid)
1873268891Sjhb{
1874268891Sjhb	struct vcpu *vcpu;
1875268891Sjhb
1876268891Sjhb	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1877268891Sjhb		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
1878268891Sjhb
1879268891Sjhb	vcpu = &vm->vcpu[vcpuid];
1880268891Sjhb
1881268891Sjhb	return (vcpu->extint_pending);
1882268891Sjhb}
1883268891Sjhb
1884268891Sjhbvoid
1885268891Sjhbvm_extint_clear(struct vm *vm, int vcpuid)
1886268891Sjhb{
1887268891Sjhb	struct vcpu *vcpu;
1888268891Sjhb
1889268891Sjhb	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1890268891Sjhb		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
1891268891Sjhb
1892268891Sjhb	vcpu = &vm->vcpu[vcpuid];
1893268891Sjhb
1894268891Sjhb	if (vcpu->extint_pending == 0)
1895268891Sjhb		panic("vm_extint_clear: inconsistent extint_pending state");
1896268891Sjhb
1897268891Sjhb	vcpu->extint_pending = 0;
1898268891Sjhb	vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
1899268891Sjhb}
1900268891Sjhb
1901268891Sjhbint
1902221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
1903221828Sgrehan{
1904221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1905221828Sgrehan		return (EINVAL);
1906221828Sgrehan
1907221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
1908221828Sgrehan		return (EINVAL);
1909221828Sgrehan
1910221828Sgrehan	return (VMGETCAP(vm->cookie, vcpu, type, retval));
1911221828Sgrehan}
1912221828Sgrehan
1913221828Sgrehanint
1914221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val)
1915221828Sgrehan{
1916221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1917221828Sgrehan		return (EINVAL);
1918221828Sgrehan
1919221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
1920221828Sgrehan		return (EINVAL);
1921221828Sgrehan
1922221828Sgrehan	return (VMSETCAP(vm->cookie, vcpu, type, val));
1923221828Sgrehan}
1924221828Sgrehan
1925221828Sgrehanstruct vlapic *
1926221828Sgrehanvm_lapic(struct vm *vm, int cpu)
1927221828Sgrehan{
1928221828Sgrehan	return (vm->vcpu[cpu].vlapic);
1929221828Sgrehan}
1930221828Sgrehan
1931261088Sjhbstruct vioapic *
1932261088Sjhbvm_ioapic(struct vm *vm)
1933261088Sjhb{
1934261088Sjhb
1935261088Sjhb	return (vm->vioapic);
1936261088Sjhb}
1937261088Sjhb
1938261088Sjhbstruct vhpet *
1939261088Sjhbvm_hpet(struct vm *vm)
1940261088Sjhb{
1941261088Sjhb
1942261088Sjhb	return (vm->vhpet);
1943261088Sjhb}
1944261088Sjhb
1945221828Sgrehanboolean_t
1946221828Sgrehanvmm_is_pptdev(int bus, int slot, int func)
1947221828Sgrehan{
1948246188Sneel	int found, i, n;
1949246188Sneel	int b, s, f;
1950221828Sgrehan	char *val, *cp, *cp2;
1951221828Sgrehan
1952221828Sgrehan	/*
1953246188Sneel	 * XXX
1954246188Sneel	 * The length of an environment variable is limited to 128 bytes which
1955246188Sneel	 * puts an upper limit on the number of passthru devices that may be
1956246188Sneel	 * specified using a single environment variable.
1957246188Sneel	 *
1958246188Sneel	 * Work around this by scanning multiple environment variable
1959246188Sneel	 * names instead of a single one - yuck!
1960221828Sgrehan	 */
1961246188Sneel	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
1962246188Sneel
1963246188Sneel	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
1964221828Sgrehan	found = 0;
1965246188Sneel	for (i = 0; names[i] != NULL && !found; i++) {
1966246188Sneel		cp = val = getenv(names[i]);
1967246188Sneel		while (cp != NULL && *cp != '\0') {
1968246188Sneel			if ((cp2 = strchr(cp, ' ')) != NULL)
1969246188Sneel				*cp2 = '\0';
1970221828Sgrehan
1971246188Sneel			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
1972246188Sneel			if (n == 3 && bus == b && slot == s && func == f) {
1973246188Sneel				found = 1;
1974246188Sneel				break;
1975246188Sneel			}
1976221828Sgrehan
1977246188Sneel			if (cp2 != NULL)
1978246188Sneel				*cp2++ = ' ';
1979221828Sgrehan
1980246188Sneel			cp = cp2;
1981246188Sneel		}
1982246188Sneel		freeenv(val);
1983221828Sgrehan	}
1984221828Sgrehan	return (found);
1985221828Sgrehan}
1986221828Sgrehan
1987221828Sgrehanvoid *
1988221828Sgrehanvm_iommu_domain(struct vm *vm)
1989221828Sgrehan{
1990221828Sgrehan
1991221828Sgrehan	return (vm->iommu);
1992221828Sgrehan}
1993221828Sgrehan
1994241489Sneelint
1995266393Sjhbvcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
1996266393Sjhb    bool from_idle)
1997221828Sgrehan{
1998241489Sneel	int error;
1999221828Sgrehan	struct vcpu *vcpu;
2000221828Sgrehan
2001221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2002221828Sgrehan		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
2003221828Sgrehan
2004221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
2005221828Sgrehan
2006241489Sneel	vcpu_lock(vcpu);
2007266393Sjhb	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
2008241489Sneel	vcpu_unlock(vcpu);
2009241489Sneel
2010241489Sneel	return (error);
2011221828Sgrehan}
2012221828Sgrehan
2013241489Sneelenum vcpu_state
2014249879Sgrehanvcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
2015221828Sgrehan{
2016221828Sgrehan	struct vcpu *vcpu;
2017241489Sneel	enum vcpu_state state;
2018221828Sgrehan
2019221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2020221828Sgrehan		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
2021221828Sgrehan
2022221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
2023221828Sgrehan
2024241489Sneel	vcpu_lock(vcpu);
2025241489Sneel	state = vcpu->state;
2026249879Sgrehan	if (hostcpu != NULL)
2027249879Sgrehan		*hostcpu = vcpu->hostcpu;
2028241489Sneel	vcpu_unlock(vcpu);
2029221828Sgrehan
2030241489Sneel	return (state);
2031221828Sgrehan}
2032221828Sgrehan
2033270070Sgrehanint
2034221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid)
2035221828Sgrehan{
2036221828Sgrehan
2037270070Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2038270070Sgrehan		return (EINVAL);
2039266339Sjhb
2040270070Sgrehan	if (CPU_ISSET(vcpuid, &vm->active_cpus))
2041270070Sgrehan		return (EBUSY);
2042270070Sgrehan
2043266339Sjhb	VCPU_CTR0(vm, vcpuid, "activated");
2044266339Sjhb	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
2045270070Sgrehan	return (0);
2046221828Sgrehan}
2047221828Sgrehan
2048223621Sgrehancpuset_t
2049221828Sgrehanvm_active_cpus(struct vm *vm)
2050221828Sgrehan{
2051221828Sgrehan
2052221828Sgrehan	return (vm->active_cpus);
2053221828Sgrehan}
2054221828Sgrehan
2055270070Sgrehancpuset_t
2056270070Sgrehanvm_suspended_cpus(struct vm *vm)
2057270070Sgrehan{
2058270070Sgrehan
2059270070Sgrehan	return (vm->suspended_cpus);
2060270070Sgrehan}
2061270070Sgrehan
2062221828Sgrehanvoid *
2063221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid)
2064221828Sgrehan{
2065221828Sgrehan
2066221828Sgrehan	return (vm->vcpu[vcpuid].stats);
2067221828Sgrehan}
2068240922Sneel
2069240922Sneelint
2070240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
2071240922Sneel{
2072240922Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2073240922Sneel		return (EINVAL);
2074240922Sneel
2075240922Sneel	*state = vm->vcpu[vcpuid].x2apic_state;
2076240922Sneel
2077240922Sneel	return (0);
2078240922Sneel}
2079240922Sneel
2080240922Sneelint
2081240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
2082240922Sneel{
2083240922Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2084240922Sneel		return (EINVAL);
2085240922Sneel
2086248392Sneel	if (state >= X2APIC_STATE_LAST)
2087240922Sneel		return (EINVAL);
2088240922Sneel
2089240922Sneel	vm->vcpu[vcpuid].x2apic_state = state;
2090240922Sneel
2091240943Sneel	vlapic_set_x2apic_state(vm, vcpuid, state);
2092240943Sneel
2093240922Sneel	return (0);
2094240922Sneel}
2095241489Sneel
2096262350Sjhb/*
2097262350Sjhb * This function is called to ensure that a vcpu "sees" a pending event
2098262350Sjhb * as soon as possible:
2099262350Sjhb * - If the vcpu thread is sleeping then it is woken up.
2100262350Sjhb * - If the vcpu is running on a different host_cpu then an IPI will be directed
2101262350Sjhb *   to the host_cpu to cause the vcpu to trap into the hypervisor.
2102262350Sjhb */
2103241489Sneelvoid
2104266339Sjhbvcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
2105241489Sneel{
2106241489Sneel	int hostcpu;
2107241489Sneel	struct vcpu *vcpu;
2108241489Sneel
2109241489Sneel	vcpu = &vm->vcpu[vcpuid];
2110241489Sneel
2111242065Sneel	vcpu_lock(vcpu);
2112241489Sneel	hostcpu = vcpu->hostcpu;
2113266393Sjhb	if (vcpu->state == VCPU_RUNNING) {
2114266393Sjhb		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
2115266339Sjhb		if (hostcpu != curcpu) {
2116266393Sjhb			if (lapic_intr) {
2117266339Sjhb				vlapic_post_intr(vcpu->vlapic, hostcpu,
2118266339Sjhb				    vmm_ipinum);
2119266393Sjhb			} else {
2120266339Sjhb				ipi_cpu(hostcpu, vmm_ipinum);
2121266393Sjhb			}
2122266393Sjhb		} else {
2123266393Sjhb			/*
2124266393Sjhb			 * If the 'vcpu' is running on 'curcpu' then it must
2125266393Sjhb			 * be sending a notification to itself (e.g. SELF_IPI).
2126266393Sjhb			 * The pending event will be picked up when the vcpu
2127266393Sjhb			 * transitions back to guest context.
2128266393Sjhb			 */
2129266339Sjhb		}
2130266393Sjhb	} else {
2131266393Sjhb		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
2132266393Sjhb		    "with hostcpu %d", vcpu->state, hostcpu));
2133266393Sjhb		if (vcpu->state == VCPU_SLEEPING)
2134266393Sjhb			wakeup_one(vcpu);
2135242065Sneel	}
2136242065Sneel	vcpu_unlock(vcpu);
2137241489Sneel}
2138256072Sneel
2139256072Sneelstruct vmspace *
2140256072Sneelvm_get_vmspace(struct vm *vm)
2141256072Sneel{
2142256072Sneel
2143256072Sneel	return (vm->vmspace);
2144256072Sneel}
2145261088Sjhb
2146261088Sjhbint
2147261088Sjhbvm_apicid2vcpuid(struct vm *vm, int apicid)
2148261088Sjhb{
2149261088Sjhb	/*
2150261088Sjhb	 * XXX apic id is assumed to be numerically identical to vcpu id
2151261088Sjhb	 */
2152261088Sjhb	return (apicid);
2153261088Sjhb}
2154266339Sjhb
2155266339Sjhbvoid
2156266339Sjhbvm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
2157266339Sjhb    vm_rendezvous_func_t func, void *arg)
2158266339Sjhb{
2159266339Sjhb	int i;
2160266339Sjhb
2161266339Sjhb	/*
2162266339Sjhb	 * Enforce that this function is called without any locks
2163266339Sjhb	 */
2164266339Sjhb	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
2165266339Sjhb	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
2166266339Sjhb	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
2167266339Sjhb
2168266339Sjhbrestart:
2169266339Sjhb	mtx_lock(&vm->rendezvous_mtx);
2170266339Sjhb	if (vm->rendezvous_func != NULL) {
2171266339Sjhb		/*
2172266339Sjhb		 * If a rendezvous is already in progress then we need to
2173266339Sjhb		 * call the rendezvous handler in case this 'vcpuid' is one
2174266339Sjhb		 * of the targets of the rendezvous.
2175266339Sjhb		 */
2176266339Sjhb		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
2177266339Sjhb		mtx_unlock(&vm->rendezvous_mtx);
2178266339Sjhb		vm_handle_rendezvous(vm, vcpuid);
2179266339Sjhb		goto restart;
2180266339Sjhb	}
2181266339Sjhb	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
2182266339Sjhb	    "rendezvous is still in progress"));
2183266339Sjhb
2184266339Sjhb	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
2185266339Sjhb	vm->rendezvous_req_cpus = dest;
2186266339Sjhb	CPU_ZERO(&vm->rendezvous_done_cpus);
2187266339Sjhb	vm->rendezvous_arg = arg;
2188266339Sjhb	vm_set_rendezvous_func(vm, func);
2189266339Sjhb	mtx_unlock(&vm->rendezvous_mtx);
2190266339Sjhb
2191266339Sjhb	/*
2192266339Sjhb	 * Wake up any sleeping vcpus and trigger a VM-exit in any running
2193266339Sjhb	 * vcpus so they handle the rendezvous as soon as possible.
2194266339Sjhb	 */
2195266339Sjhb	for (i = 0; i < VM_MAXCPU; i++) {
2196266339Sjhb		if (CPU_ISSET(i, &dest))
2197266339Sjhb			vcpu_notify_event(vm, i, false);
2198266339Sjhb	}
2199266339Sjhb
2200266339Sjhb	vm_handle_rendezvous(vm, vcpuid);
2201266339Sjhb}
2202268891Sjhb
2203268891Sjhbstruct vatpic *
2204268891Sjhbvm_atpic(struct vm *vm)
2205268891Sjhb{
2206268891Sjhb	return (vm->vatpic);
2207268891Sjhb}
2208268891Sjhb
2209268891Sjhbstruct vatpit *
2210268891Sjhbvm_atpit(struct vm *vm)
2211268891Sjhb{
2212268891Sjhb	return (vm->vatpit);
2213268891Sjhb}
2214268976Sjhb
2215268976Sjhbenum vm_reg_name
2216268976Sjhbvm_segment_name(int seg)
2217268976Sjhb{
2218268976Sjhb	static enum vm_reg_name seg_names[] = {
2219268976Sjhb		VM_REG_GUEST_ES,
2220268976Sjhb		VM_REG_GUEST_CS,
2221268976Sjhb		VM_REG_GUEST_SS,
2222268976Sjhb		VM_REG_GUEST_DS,
2223268976Sjhb		VM_REG_GUEST_FS,
2224268976Sjhb		VM_REG_GUEST_GS
2225268976Sjhb	};
2226268976Sjhb
2227268976Sjhb	KASSERT(seg >= 0 && seg < nitems(seg_names),
2228268976Sjhb	    ("%s: invalid segment encoding %d", __func__, seg));
2229268976Sjhb	return (seg_names[seg]);
2230268976Sjhb}
2231270074Sgrehan
2232270159Sgrehanvoid
2233270159Sgrehanvm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
2234270159Sgrehan    int num_copyinfo)
2235270159Sgrehan{
2236270159Sgrehan	int idx;
2237270074Sgrehan
2238270159Sgrehan	for (idx = 0; idx < num_copyinfo; idx++) {
2239270159Sgrehan		if (copyinfo[idx].cookie != NULL)
2240270159Sgrehan			vm_gpa_release(copyinfo[idx].cookie);
2241270159Sgrehan	}
2242270159Sgrehan	bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo));
2243270159Sgrehan}
2244270159Sgrehan
2245270159Sgrehanint
2246270159Sgrehanvm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
2247270159Sgrehan    uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
2248270159Sgrehan    int num_copyinfo)
2249270159Sgrehan{
2250270159Sgrehan	int error, idx, nused;
2251270159Sgrehan	size_t n, off, remaining;
2252270159Sgrehan	void *hva, *cookie;
2253270159Sgrehan	uint64_t gpa;
2254270159Sgrehan
2255270159Sgrehan	bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo);
2256270159Sgrehan
2257270159Sgrehan	nused = 0;
2258270159Sgrehan	remaining = len;
2259270159Sgrehan	while (remaining > 0) {
2260270159Sgrehan		KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
2261270159Sgrehan		error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa);
2262270159Sgrehan		if (error)
2263270159Sgrehan			return (error);
2264270159Sgrehan		off = gpa & PAGE_MASK;
2265270159Sgrehan		n = min(remaining, PAGE_SIZE - off);
2266270159Sgrehan		copyinfo[nused].gpa = gpa;
2267270159Sgrehan		copyinfo[nused].len = n;
2268270159Sgrehan		remaining -= n;
2269270159Sgrehan		gla += n;
2270270159Sgrehan		nused++;
2271270159Sgrehan	}
2272270159Sgrehan
2273270159Sgrehan	for (idx = 0; idx < nused; idx++) {
2274270159Sgrehan		hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len,
2275270159Sgrehan		    prot, &cookie);
2276270159Sgrehan		if (hva == NULL)
2277270159Sgrehan			break;
2278270159Sgrehan		copyinfo[idx].hva = hva;
2279270159Sgrehan		copyinfo[idx].cookie = cookie;
2280270159Sgrehan	}
2281270159Sgrehan
2282270159Sgrehan	if (idx != nused) {
2283270159Sgrehan		vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo);
2284270159Sgrehan		return (-1);
2285270159Sgrehan	} else {
2286270159Sgrehan		return (0);
2287270159Sgrehan	}
2288270159Sgrehan}
2289270159Sgrehan
2290270159Sgrehanvoid
2291270159Sgrehanvm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr,
2292270159Sgrehan    size_t len)
2293270159Sgrehan{
2294270159Sgrehan	char *dst;
2295270159Sgrehan	int idx;
2296270159Sgrehan
2297270159Sgrehan	dst = kaddr;
2298270159Sgrehan	idx = 0;
2299270159Sgrehan	while (len > 0) {
2300270159Sgrehan		bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len);
2301270159Sgrehan		len -= copyinfo[idx].len;
2302270159Sgrehan		dst += copyinfo[idx].len;
2303270159Sgrehan		idx++;
2304270159Sgrehan	}
2305270159Sgrehan}
2306270159Sgrehan
2307270159Sgrehanvoid
2308270159Sgrehanvm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
2309270159Sgrehan    struct vm_copyinfo *copyinfo, size_t len)
2310270159Sgrehan{
2311270159Sgrehan	const char *src;
2312270159Sgrehan	int idx;
2313270159Sgrehan
2314270159Sgrehan	src = kaddr;
2315270159Sgrehan	idx = 0;
2316270159Sgrehan	while (len > 0) {
2317270159Sgrehan		bcopy(src, copyinfo[idx].hva, copyinfo[idx].len);
2318270159Sgrehan		len -= copyinfo[idx].len;
2319270159Sgrehan		src += copyinfo[idx].len;
2320270159Sgrehan		idx++;
2321270159Sgrehan	}
2322270159Sgrehan}
2323270159Sgrehan
2324270074Sgrehan/*
2325270074Sgrehan * Return the amount of in-use and wired memory for the VM. Since
2326270074Sgrehan * these are global stats, only return the values with for vCPU 0
2327270074Sgrehan */
2328270074SgrehanVMM_STAT_DECLARE(VMM_MEM_RESIDENT);
2329270074SgrehanVMM_STAT_DECLARE(VMM_MEM_WIRED);
2330270074Sgrehan
2331270074Sgrehanstatic void
2332270074Sgrehanvm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
2333270074Sgrehan{
2334270074Sgrehan
2335270074Sgrehan	if (vcpu == 0) {
2336270074Sgrehan		vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT,
2337270074Sgrehan	       	    PAGE_SIZE * vmspace_resident_count(vm->vmspace));
2338270074Sgrehan	}
2339270074Sgrehan}
2340270074Sgrehan
2341270074Sgrehanstatic void
2342270074Sgrehanvm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
2343270074Sgrehan{
2344270074Sgrehan
2345270074Sgrehan	if (vcpu == 0) {
2346270074Sgrehan		vmm_stat_set(vm, vcpu, VMM_MEM_WIRED,
2347270074Sgrehan	      	    PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace)));
2348270074Sgrehan	}
2349270074Sgrehan}
2350270074Sgrehan
2351270074SgrehanVMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
2352270074SgrehanVMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
2353