vmm.c revision 249879
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD: head/sys/amd64/vmm/vmm.c 249879 2013-04-25 04:56:43Z grehan $
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm.c 249879 2013-04-25 04:56:43Z grehan $");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33234695Sgrehan#include <sys/systm.h>
34221828Sgrehan#include <sys/kernel.h>
35221828Sgrehan#include <sys/module.h>
36221828Sgrehan#include <sys/sysctl.h>
37221828Sgrehan#include <sys/malloc.h>
38221828Sgrehan#include <sys/pcpu.h>
39221828Sgrehan#include <sys/lock.h>
40221828Sgrehan#include <sys/mutex.h>
41221828Sgrehan#include <sys/proc.h>
42221828Sgrehan#include <sys/sched.h>
43221828Sgrehan#include <sys/smp.h>
44221828Sgrehan#include <sys/systm.h>
45221828Sgrehan
46221828Sgrehan#include <vm/vm.h>
47221828Sgrehan
48221828Sgrehan#include <machine/vm.h>
49221828Sgrehan#include <machine/pcb.h>
50241489Sneel#include <machine/smp.h>
51221914Sjhb#include <x86/apicreg.h>
52221828Sgrehan
53221828Sgrehan#include <machine/vmm.h>
54242275Sneel#include "vmm_host.h"
55221828Sgrehan#include "vmm_mem.h"
56221828Sgrehan#include "vmm_util.h"
57221828Sgrehan#include <machine/vmm_dev.h>
58221828Sgrehan#include "vlapic.h"
59221828Sgrehan#include "vmm_msr.h"
60221828Sgrehan#include "vmm_ipi.h"
61221828Sgrehan#include "vmm_stat.h"
62242065Sneel#include "vmm_lapic.h"
63221828Sgrehan
64221828Sgrehan#include "io/ppt.h"
65221828Sgrehan#include "io/iommu.h"
66221828Sgrehan
67221828Sgrehanstruct vlapic;
68221828Sgrehan
69221828Sgrehanstruct vcpu {
70221828Sgrehan	int		flags;
71241489Sneel	enum vcpu_state	state;
72241489Sneel	struct mtx	mtx;
73221828Sgrehan	int		hostcpu;	/* host cpuid this vcpu last ran on */
74221828Sgrehan	uint64_t	guest_msrs[VMM_MSR_NUM];
75221828Sgrehan	struct vlapic	*vlapic;
76221828Sgrehan	int		 vcpuid;
77234695Sgrehan	struct savefpu	*guestfpu;	/* guest fpu state */
78221828Sgrehan	void		*stats;
79240894Sneel	struct vm_exit	exitinfo;
80240922Sneel	enum x2apic_state x2apic_state;
81241982Sneel	int		nmi_pending;
82221828Sgrehan};
83221828Sgrehan
84242065Sneel#define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
85242065Sneel#define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
86242065Sneel#define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
87241489Sneel
88221828Sgrehan#define	VM_MAX_MEMORY_SEGMENTS	2
89221828Sgrehan
90221828Sgrehanstruct vm {
91221828Sgrehan	void		*cookie;	/* processor-specific data */
92221828Sgrehan	void		*iommu;		/* iommu-specific data */
93221828Sgrehan	struct vcpu	vcpu[VM_MAXCPU];
94221828Sgrehan	int		num_mem_segs;
95221828Sgrehan	struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
96221828Sgrehan	char		name[VM_MAX_NAMELEN];
97221828Sgrehan
98221828Sgrehan	/*
99223621Sgrehan	 * Set of active vcpus.
100221828Sgrehan	 * An active vcpu is one that has been started implicitly (BSP) or
101221828Sgrehan	 * explicitly (AP) by sending it a startup ipi.
102221828Sgrehan	 */
103223621Sgrehan	cpuset_t	active_cpus;
104221828Sgrehan};
105221828Sgrehan
106249396Sneelstatic int vmm_initialized;
107249396Sneel
108221828Sgrehanstatic struct vmm_ops *ops;
109221828Sgrehan#define	VMM_INIT()	(ops != NULL ? (*ops->init)() : 0)
110221828Sgrehan#define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
111221828Sgrehan
112221828Sgrehan#define	VMINIT(vm)	(ops != NULL ? (*ops->vminit)(vm): NULL)
113240894Sneel#define	VMRUN(vmi, vcpu, rip) \
114240894Sneel	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO)
115221828Sgrehan#define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
116241147Sneel#define	VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm)			\
117241147Sneel    	(ops != NULL ? 							\
118241147Sneel    	(*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) :	\
119241147Sneel	ENXIO)
120241147Sneel#define	VMMMAP_GET(vmi, gpa) \
121241147Sneel	(ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO)
122221828Sgrehan#define	VMGETREG(vmi, vcpu, num, retval)		\
123221828Sgrehan	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
124221828Sgrehan#define	VMSETREG(vmi, vcpu, num, val)		\
125221828Sgrehan	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
126221828Sgrehan#define	VMGETDESC(vmi, vcpu, num, desc)		\
127221828Sgrehan	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
128221828Sgrehan#define	VMSETDESC(vmi, vcpu, num, desc)		\
129221828Sgrehan	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
130221828Sgrehan#define	VMINJECT(vmi, vcpu, type, vec, ec, ecv)	\
131221828Sgrehan	(ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
132221828Sgrehan#define	VMGETCAP(vmi, vcpu, num, retval)	\
133221828Sgrehan	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
134221828Sgrehan#define	VMSETCAP(vmi, vcpu, num, val)		\
135221828Sgrehan	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
136221828Sgrehan
137245021Sneel#define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
138245021Sneel#define	fpu_stop_emulating()	clts()
139221828Sgrehan
140221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm");
141221828SgrehanCTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
142221828Sgrehan
143221828Sgrehan/* statistics */
144248389Sneelstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
145221828Sgrehan
146221828Sgrehanstatic void
147221828Sgrehanvcpu_cleanup(struct vcpu *vcpu)
148221828Sgrehan{
149221828Sgrehan	vlapic_cleanup(vcpu->vlapic);
150234695Sgrehan	vmm_stat_free(vcpu->stats);
151234695Sgrehan	fpu_save_area_free(vcpu->guestfpu);
152221828Sgrehan}
153221828Sgrehan
154221828Sgrehanstatic void
155221828Sgrehanvcpu_init(struct vm *vm, uint32_t vcpu_id)
156221828Sgrehan{
157221828Sgrehan	struct vcpu *vcpu;
158221828Sgrehan
159221828Sgrehan	vcpu = &vm->vcpu[vcpu_id];
160221828Sgrehan
161241489Sneel	vcpu_lock_init(vcpu);
162241489Sneel	vcpu->hostcpu = NOCPU;
163221828Sgrehan	vcpu->vcpuid = vcpu_id;
164221828Sgrehan	vcpu->vlapic = vlapic_init(vm, vcpu_id);
165240943Sneel	vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
166234695Sgrehan	vcpu->guestfpu = fpu_save_area_alloc();
167234695Sgrehan	fpu_save_area_reset(vcpu->guestfpu);
168221828Sgrehan	vcpu->stats = vmm_stat_alloc();
169221828Sgrehan}
170221828Sgrehan
171240894Sneelstruct vm_exit *
172240894Sneelvm_exitinfo(struct vm *vm, int cpuid)
173240894Sneel{
174240894Sneel	struct vcpu *vcpu;
175240894Sneel
176240894Sneel	if (cpuid < 0 || cpuid >= VM_MAXCPU)
177240894Sneel		panic("vm_exitinfo: invalid cpuid %d", cpuid);
178240894Sneel
179240894Sneel	vcpu = &vm->vcpu[cpuid];
180240894Sneel
181240894Sneel	return (&vcpu->exitinfo);
182240894Sneel}
183240894Sneel
184221828Sgrehanstatic int
185221828Sgrehanvmm_init(void)
186221828Sgrehan{
187221828Sgrehan	int error;
188221828Sgrehan
189242275Sneel	vmm_host_state_init();
190221828Sgrehan	vmm_ipi_init();
191221828Sgrehan
192221828Sgrehan	error = vmm_mem_init();
193221828Sgrehan	if (error)
194221828Sgrehan		return (error);
195221828Sgrehan
196221828Sgrehan	if (vmm_is_intel())
197221828Sgrehan		ops = &vmm_ops_intel;
198221828Sgrehan	else if (vmm_is_amd())
199221828Sgrehan		ops = &vmm_ops_amd;
200221828Sgrehan	else
201221828Sgrehan		return (ENXIO);
202221828Sgrehan
203221828Sgrehan	vmm_msr_init();
204221828Sgrehan
205221828Sgrehan	return (VMM_INIT());
206221828Sgrehan}
207221828Sgrehan
208221828Sgrehanstatic int
209221828Sgrehanvmm_handler(module_t mod, int what, void *arg)
210221828Sgrehan{
211221828Sgrehan	int error;
212221828Sgrehan
213221828Sgrehan	switch (what) {
214221828Sgrehan	case MOD_LOAD:
215221828Sgrehan		vmmdev_init();
216221828Sgrehan		iommu_init();
217221828Sgrehan		error = vmm_init();
218249396Sneel		if (error == 0)
219249396Sneel			vmm_initialized = 1;
220221828Sgrehan		break;
221221828Sgrehan	case MOD_UNLOAD:
222241454Sneel		error = vmmdev_cleanup();
223241454Sneel		if (error == 0) {
224241454Sneel			iommu_cleanup();
225241454Sneel			vmm_ipi_cleanup();
226241454Sneel			error = VMM_CLEANUP();
227241454Sneel		}
228249396Sneel		vmm_initialized = 0;
229221828Sgrehan		break;
230221828Sgrehan	default:
231221828Sgrehan		error = 0;
232221828Sgrehan		break;
233221828Sgrehan	}
234221828Sgrehan	return (error);
235221828Sgrehan}
236221828Sgrehan
237221828Sgrehanstatic moduledata_t vmm_kmod = {
238221828Sgrehan	"vmm",
239221828Sgrehan	vmm_handler,
240221828Sgrehan	NULL
241221828Sgrehan};
242221828Sgrehan
243221828Sgrehan/*
244245704Sneel * vmm initialization has the following dependencies:
245245704Sneel *
246245704Sneel * - iommu initialization must happen after the pci passthru driver has had
247245704Sneel *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
248245704Sneel *
249245704Sneel * - VT-x initialization requires smp_rendezvous() and therefore must happen
250245704Sneel *   after SMP is fully functional (after SI_SUB_SMP).
251221828Sgrehan */
252245704SneelDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
253221828SgrehanMODULE_VERSION(vmm, 1);
254221828Sgrehan
255221828SgrehanSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
256221828Sgrehan
257249396Sneelint
258249396Sneelvm_create(const char *name, struct vm **retvm)
259221828Sgrehan{
260221828Sgrehan	int i;
261221828Sgrehan	struct vm *vm;
262221828Sgrehan	vm_paddr_t maxaddr;
263221828Sgrehan
264221828Sgrehan	const int BSP = 0;
265221828Sgrehan
266249396Sneel	/*
267249396Sneel	 * If vmm.ko could not be successfully initialized then don't attempt
268249396Sneel	 * to create the virtual machine.
269249396Sneel	 */
270249396Sneel	if (!vmm_initialized)
271249396Sneel		return (ENXIO);
272249396Sneel
273221828Sgrehan	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
274249396Sneel		return (EINVAL);
275221828Sgrehan
276221828Sgrehan	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
277221828Sgrehan	strcpy(vm->name, name);
278221828Sgrehan	vm->cookie = VMINIT(vm);
279221828Sgrehan
280221828Sgrehan	for (i = 0; i < VM_MAXCPU; i++) {
281221828Sgrehan		vcpu_init(vm, i);
282221828Sgrehan		guest_msrs_init(vm, i);
283221828Sgrehan	}
284221828Sgrehan
285221828Sgrehan	maxaddr = vmm_mem_maxaddr();
286221828Sgrehan	vm->iommu = iommu_create_domain(maxaddr);
287221828Sgrehan	vm_activate_cpu(vm, BSP);
288221828Sgrehan
289249396Sneel	*retvm = vm;
290249396Sneel	return (0);
291221828Sgrehan}
292221828Sgrehan
293241178Sneelstatic void
294241178Sneelvm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg)
295241178Sneel{
296241178Sneel	size_t len;
297241178Sneel	vm_paddr_t hpa;
298241362Sneel	void *host_domain;
299241178Sneel
300241362Sneel	host_domain = iommu_host_domain();
301241362Sneel
302241178Sneel	len = 0;
303241178Sneel	while (len < seg->len) {
304241178Sneel		hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE);
305241178Sneel		if (hpa == (vm_paddr_t)-1) {
306241178Sneel			panic("vm_free_mem_segs: cannot free hpa "
307241178Sneel			      "associated with gpa 0x%016lx", seg->gpa + len);
308241178Sneel		}
309241178Sneel
310241362Sneel		/*
311241362Sneel		 * Remove the 'gpa' to 'hpa' mapping in VMs domain.
312241362Sneel		 * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'.
313241362Sneel		 */
314241362Sneel		iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE);
315241362Sneel		iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE);
316241362Sneel
317241178Sneel		vmm_mem_free(hpa, PAGE_SIZE);
318241178Sneel
319241178Sneel		len += PAGE_SIZE;
320241178Sneel	}
321241178Sneel
322241362Sneel	/*
323241362Sneel	 * Invalidate cached translations associated with 'vm->iommu' since
324241362Sneel	 * we have now moved some pages from it.
325241362Sneel	 */
326241362Sneel	iommu_invalidate_tlb(vm->iommu);
327241362Sneel
328241178Sneel	bzero(seg, sizeof(struct vm_memory_segment));
329241178Sneel}
330241178Sneel
331221828Sgrehanvoid
332221828Sgrehanvm_destroy(struct vm *vm)
333221828Sgrehan{
334221828Sgrehan	int i;
335221828Sgrehan
336221828Sgrehan	ppt_unassign_all(vm);
337221828Sgrehan
338221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++)
339241178Sneel		vm_free_mem_seg(vm, &vm->mem_segs[i]);
340221828Sgrehan
341241178Sneel	vm->num_mem_segs = 0;
342241178Sneel
343221828Sgrehan	for (i = 0; i < VM_MAXCPU; i++)
344221828Sgrehan		vcpu_cleanup(&vm->vcpu[i]);
345221828Sgrehan
346221828Sgrehan	iommu_destroy_domain(vm->iommu);
347221828Sgrehan
348221828Sgrehan	VMCLEANUP(vm->cookie);
349221828Sgrehan
350221828Sgrehan	free(vm, M_VM);
351221828Sgrehan}
352221828Sgrehan
353221828Sgrehanconst char *
354221828Sgrehanvm_name(struct vm *vm)
355221828Sgrehan{
356221828Sgrehan	return (vm->name);
357221828Sgrehan}
358221828Sgrehan
359221828Sgrehanint
360221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
361221828Sgrehan{
362221828Sgrehan	const boolean_t spok = TRUE;	/* superpage mappings are ok */
363221828Sgrehan
364241147Sneel	return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
365241147Sneel			   VM_PROT_RW, spok));
366221828Sgrehan}
367221828Sgrehan
368221828Sgrehanint
369221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
370221828Sgrehan{
371221828Sgrehan	const boolean_t spok = TRUE;	/* superpage mappings are ok */
372221828Sgrehan
373241147Sneel	return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0,
374241147Sneel			   VM_PROT_NONE, spok));
375221828Sgrehan}
376221828Sgrehan
377241041Sneel/*
378241041Sneel * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise
379241041Sneel */
380241041Sneelstatic boolean_t
381241041Sneelvm_gpa_available(struct vm *vm, vm_paddr_t gpa)
382241041Sneel{
383241041Sneel	int i;
384241041Sneel	vm_paddr_t gpabase, gpalimit;
385241041Sneel
386241041Sneel	if (gpa & PAGE_MASK)
387241041Sneel		panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa);
388241041Sneel
389241041Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
390241041Sneel		gpabase = vm->mem_segs[i].gpa;
391241041Sneel		gpalimit = gpabase + vm->mem_segs[i].len;
392241041Sneel		if (gpa >= gpabase && gpa < gpalimit)
393241041Sneel			return (FALSE);
394241041Sneel	}
395241041Sneel
396241041Sneel	return (TRUE);
397241041Sneel}
398241041Sneel
399221828Sgrehanint
400241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
401221828Sgrehan{
402241041Sneel	int error, available, allocated;
403241178Sneel	struct vm_memory_segment *seg;
404241041Sneel	vm_paddr_t g, hpa;
405241362Sneel	void *host_domain;
406221828Sgrehan
407221828Sgrehan	const boolean_t spok = TRUE;	/* superpage mappings are ok */
408241041Sneel
409241041Sneel	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
410241041Sneel		return (EINVAL);
411221828Sgrehan
412241041Sneel	available = allocated = 0;
413241041Sneel	g = gpa;
414241041Sneel	while (g < gpa + len) {
415241041Sneel		if (vm_gpa_available(vm, g))
416241041Sneel			available++;
417241041Sneel		else
418241041Sneel			allocated++;
419241041Sneel
420241041Sneel		g += PAGE_SIZE;
421241041Sneel	}
422241041Sneel
423221828Sgrehan	/*
424241041Sneel	 * If there are some allocated and some available pages in the address
425241041Sneel	 * range then it is an error.
426221828Sgrehan	 */
427241041Sneel	if (allocated && available)
428241041Sneel		return (EINVAL);
429221828Sgrehan
430241041Sneel	/*
431241041Sneel	 * If the entire address range being requested has already been
432241041Sneel	 * allocated then there isn't anything more to do.
433241041Sneel	 */
434241041Sneel	if (allocated && available == 0)
435241041Sneel		return (0);
436241041Sneel
437221828Sgrehan	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
438221828Sgrehan		return (E2BIG);
439221828Sgrehan
440241362Sneel	host_domain = iommu_host_domain();
441241362Sneel
442241178Sneel	seg = &vm->mem_segs[vm->num_mem_segs];
443221828Sgrehan
444241362Sneel	error = 0;
445241178Sneel	seg->gpa = gpa;
446241178Sneel	seg->len = 0;
447241178Sneel	while (seg->len < len) {
448241178Sneel		hpa = vmm_mem_alloc(PAGE_SIZE);
449241178Sneel		if (hpa == 0) {
450241178Sneel			error = ENOMEM;
451241178Sneel			break;
452241178Sneel		}
453241178Sneel
454241178Sneel		error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE,
455241178Sneel				   VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok);
456241178Sneel		if (error)
457241178Sneel			break;
458241178Sneel
459241362Sneel		/*
460241362Sneel		 * Remove the 1:1 mapping for 'hpa' from the 'host_domain'.
461241362Sneel		 * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain.
462241362Sneel		 */
463241362Sneel		iommu_remove_mapping(host_domain, hpa, PAGE_SIZE);
464241178Sneel		iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE);
465241178Sneel
466241178Sneel		seg->len += PAGE_SIZE;
467241178Sneel	}
468241178Sneel
469241362Sneel	if (error) {
470241178Sneel		vm_free_mem_seg(vm, seg);
471221828Sgrehan		return (error);
472221828Sgrehan	}
473221828Sgrehan
474241362Sneel	/*
475241362Sneel	 * Invalidate cached translations associated with 'host_domain' since
476241362Sneel	 * we have now moved some pages from it.
477241362Sneel	 */
478241362Sneel	iommu_invalidate_tlb(host_domain);
479241362Sneel
480221828Sgrehan	vm->num_mem_segs++;
481241041Sneel
482221828Sgrehan	return (0);
483221828Sgrehan}
484221828Sgrehan
485221828Sgrehanvm_paddr_t
486221828Sgrehanvm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
487221828Sgrehan{
488241148Sneel	vm_paddr_t nextpage;
489221828Sgrehan
490241148Sneel	nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE);
491241148Sneel	if (len > nextpage - gpa)
492241148Sneel		panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len);
493241148Sneel
494241147Sneel	return (VMMMAP_GET(vm->cookie, gpa));
495221828Sgrehan}
496221828Sgrehan
497221828Sgrehanint
498221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
499221828Sgrehan		  struct vm_memory_segment *seg)
500221828Sgrehan{
501221828Sgrehan	int i;
502221828Sgrehan
503221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++) {
504221828Sgrehan		if (gpabase == vm->mem_segs[i].gpa) {
505221828Sgrehan			*seg = vm->mem_segs[i];
506221828Sgrehan			return (0);
507221828Sgrehan		}
508221828Sgrehan	}
509221828Sgrehan	return (-1);
510221828Sgrehan}
511221828Sgrehan
512221828Sgrehanint
513221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
514221828Sgrehan{
515221828Sgrehan
516221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
517221828Sgrehan		return (EINVAL);
518221828Sgrehan
519221828Sgrehan	if (reg >= VM_REG_LAST)
520221828Sgrehan		return (EINVAL);
521221828Sgrehan
522221828Sgrehan	return (VMGETREG(vm->cookie, vcpu, reg, retval));
523221828Sgrehan}
524221828Sgrehan
525221828Sgrehanint
526221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
527221828Sgrehan{
528221828Sgrehan
529221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
530221828Sgrehan		return (EINVAL);
531221828Sgrehan
532221828Sgrehan	if (reg >= VM_REG_LAST)
533221828Sgrehan		return (EINVAL);
534221828Sgrehan
535221828Sgrehan	return (VMSETREG(vm->cookie, vcpu, reg, val));
536221828Sgrehan}
537221828Sgrehan
538221828Sgrehanstatic boolean_t
539221828Sgrehanis_descriptor_table(int reg)
540221828Sgrehan{
541221828Sgrehan
542221828Sgrehan	switch (reg) {
543221828Sgrehan	case VM_REG_GUEST_IDTR:
544221828Sgrehan	case VM_REG_GUEST_GDTR:
545221828Sgrehan		return (TRUE);
546221828Sgrehan	default:
547221828Sgrehan		return (FALSE);
548221828Sgrehan	}
549221828Sgrehan}
550221828Sgrehan
551221828Sgrehanstatic boolean_t
552221828Sgrehanis_segment_register(int reg)
553221828Sgrehan{
554221828Sgrehan
555221828Sgrehan	switch (reg) {
556221828Sgrehan	case VM_REG_GUEST_ES:
557221828Sgrehan	case VM_REG_GUEST_CS:
558221828Sgrehan	case VM_REG_GUEST_SS:
559221828Sgrehan	case VM_REG_GUEST_DS:
560221828Sgrehan	case VM_REG_GUEST_FS:
561221828Sgrehan	case VM_REG_GUEST_GS:
562221828Sgrehan	case VM_REG_GUEST_TR:
563221828Sgrehan	case VM_REG_GUEST_LDTR:
564221828Sgrehan		return (TRUE);
565221828Sgrehan	default:
566221828Sgrehan		return (FALSE);
567221828Sgrehan	}
568221828Sgrehan}
569221828Sgrehan
570221828Sgrehanint
571221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg,
572221828Sgrehan		struct seg_desc *desc)
573221828Sgrehan{
574221828Sgrehan
575221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
576221828Sgrehan		return (EINVAL);
577221828Sgrehan
578221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
579221828Sgrehan		return (EINVAL);
580221828Sgrehan
581221828Sgrehan	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
582221828Sgrehan}
583221828Sgrehan
584221828Sgrehanint
585221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg,
586221828Sgrehan		struct seg_desc *desc)
587221828Sgrehan{
588221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
589221828Sgrehan		return (EINVAL);
590221828Sgrehan
591221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
592221828Sgrehan		return (EINVAL);
593221828Sgrehan
594221828Sgrehan	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
595221828Sgrehan}
596221828Sgrehan
597221828Sgrehanstatic void
598221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu)
599221828Sgrehan{
600221828Sgrehan
601234695Sgrehan	/* flush host state to the pcb */
602234695Sgrehan	fpuexit(curthread);
603242122Sneel
604242122Sneel	/* restore guest FPU state */
605221828Sgrehan	fpu_stop_emulating();
606234695Sgrehan	fpurestore(vcpu->guestfpu);
607242122Sneel
608242122Sneel	/*
609242122Sneel	 * The FPU is now "dirty" with the guest's state so turn on emulation
610242122Sneel	 * to trap any access to the FPU by the host.
611242122Sneel	 */
612242122Sneel	fpu_start_emulating();
613221828Sgrehan}
614221828Sgrehan
615221828Sgrehanstatic void
616221828Sgrehansave_guest_fpustate(struct vcpu *vcpu)
617221828Sgrehan{
618221828Sgrehan
619242122Sneel	if ((rcr0() & CR0_TS) == 0)
620242122Sneel		panic("fpu emulation not enabled in host!");
621242122Sneel
622242122Sneel	/* save guest FPU state */
623242122Sneel	fpu_stop_emulating();
624234695Sgrehan	fpusave(vcpu->guestfpu);
625221828Sgrehan	fpu_start_emulating();
626221828Sgrehan}
627221828Sgrehan
628248389Sneelstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
629242065Sneel
630221828Sgrehanint
631221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun)
632221828Sgrehan{
633242065Sneel	int error, vcpuid, sleepticks, t;
634221828Sgrehan	struct vcpu *vcpu;
635221828Sgrehan	struct pcb *pcb;
636242065Sneel	uint64_t tscval, rip;
637242065Sneel	struct vm_exit *vme;
638221828Sgrehan
639221828Sgrehan	vcpuid = vmrun->cpuid;
640221828Sgrehan
641221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
642221828Sgrehan		return (EINVAL);
643221828Sgrehan
644221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
645242065Sneel	vme = &vmrun->vm_exit;
646242065Sneel	rip = vmrun->rip;
647242065Sneelrestart:
648221828Sgrehan	critical_enter();
649221828Sgrehan
650221828Sgrehan	tscval = rdtsc();
651221828Sgrehan
652221828Sgrehan	pcb = PCPU_GET(curpcb);
653221914Sjhb	set_pcb_flags(pcb, PCB_FULL_IRET);
654221828Sgrehan
655234695Sgrehan	restore_guest_msrs(vm, vcpuid);
656221828Sgrehan	restore_guest_fpustate(vcpu);
657241489Sneel
658241489Sneel	vcpu->hostcpu = curcpu;
659242065Sneel	error = VMRUN(vm->cookie, vcpuid, rip);
660241489Sneel	vcpu->hostcpu = NOCPU;
661241489Sneel
662221828Sgrehan	save_guest_fpustate(vcpu);
663221828Sgrehan	restore_host_msrs(vm, vcpuid);
664221828Sgrehan
665221828Sgrehan	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
666221828Sgrehan
667240894Sneel	/* copy the exit information */
668242065Sneel	bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit));
669240894Sneel
670221828Sgrehan	critical_exit();
671221828Sgrehan
672242065Sneel	/*
673242065Sneel	 * Oblige the guest's desire to 'hlt' by sleeping until the vcpu
674242065Sneel	 * is ready to run.
675242065Sneel	 */
676242065Sneel	if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) {
677242065Sneel		vcpu_lock(vcpu);
678242065Sneel
679242065Sneel		/*
680242065Sneel		 * Figure out the number of host ticks until the next apic
681242065Sneel		 * timer interrupt in the guest.
682242065Sneel		 */
683242065Sneel		sleepticks = lapic_timer_tick(vm, vcpuid);
684242065Sneel
685242065Sneel		/*
686242065Sneel		 * If the guest local apic timer is disabled then sleep for
687242065Sneel		 * a long time but not forever.
688242065Sneel		 */
689242065Sneel		if (sleepticks < 0)
690242065Sneel			sleepticks = hz;
691242065Sneel
692242065Sneel		/*
693242065Sneel		 * Do a final check for pending NMI or interrupts before
694242065Sneel		 * really putting this thread to sleep.
695242065Sneel		 *
696242065Sneel		 * These interrupts could have happened any time after we
697242065Sneel		 * returned from VMRUN() and before we grabbed the vcpu lock.
698242065Sneel		 */
699242065Sneel		if (!vm_nmi_pending(vm, vcpuid) &&
700242065Sneel		    lapic_pending_intr(vm, vcpuid) < 0) {
701242065Sneel			if (sleepticks <= 0)
702242065Sneel				panic("invalid sleepticks %d", sleepticks);
703242065Sneel			t = ticks;
704242065Sneel			msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks);
705242065Sneel			vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
706242065Sneel		}
707242065Sneel
708242065Sneel		vcpu_unlock(vcpu);
709242065Sneel
710242065Sneel		rip = vme->rip + vme->inst_length;
711242065Sneel		goto restart;
712242065Sneel	}
713242065Sneel
714221828Sgrehan	return (error);
715221828Sgrehan}
716221828Sgrehan
717221828Sgrehanint
718221828Sgrehanvm_inject_event(struct vm *vm, int vcpuid, int type,
719221828Sgrehan		int vector, uint32_t code, int code_valid)
720221828Sgrehan{
721221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
722221828Sgrehan		return (EINVAL);
723221828Sgrehan
724221828Sgrehan	if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
725221828Sgrehan		return (EINVAL);
726221828Sgrehan
727221828Sgrehan	if (vector < 0 || vector > 255)
728221828Sgrehan		return (EINVAL);
729221828Sgrehan
730221828Sgrehan	return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
731221828Sgrehan}
732221828Sgrehan
733248389Sneelstatic VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
734241982Sneel
735221828Sgrehanint
736241982Sneelvm_inject_nmi(struct vm *vm, int vcpuid)
737221828Sgrehan{
738241982Sneel	struct vcpu *vcpu;
739221828Sgrehan
740241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
741221828Sgrehan		return (EINVAL);
742221828Sgrehan
743241982Sneel	vcpu = &vm->vcpu[vcpuid];
744241982Sneel
745241982Sneel	vcpu->nmi_pending = 1;
746241982Sneel	vm_interrupt_hostcpu(vm, vcpuid);
747241982Sneel	return (0);
748221828Sgrehan}
749221828Sgrehan
750221828Sgrehanint
751241982Sneelvm_nmi_pending(struct vm *vm, int vcpuid)
752241982Sneel{
753241982Sneel	struct vcpu *vcpu;
754241982Sneel
755241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
756241982Sneel		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
757241982Sneel
758241982Sneel	vcpu = &vm->vcpu[vcpuid];
759241982Sneel
760241982Sneel	return (vcpu->nmi_pending);
761241982Sneel}
762241982Sneel
763241982Sneelvoid
764241982Sneelvm_nmi_clear(struct vm *vm, int vcpuid)
765241982Sneel{
766241982Sneel	struct vcpu *vcpu;
767241982Sneel
768241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
769241982Sneel		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
770241982Sneel
771241982Sneel	vcpu = &vm->vcpu[vcpuid];
772241982Sneel
773241982Sneel	if (vcpu->nmi_pending == 0)
774241982Sneel		panic("vm_nmi_clear: inconsistent nmi_pending state");
775241982Sneel
776241982Sneel	vcpu->nmi_pending = 0;
777241982Sneel	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
778241982Sneel}
779241982Sneel
780241982Sneelint
781221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
782221828Sgrehan{
783221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
784221828Sgrehan		return (EINVAL);
785221828Sgrehan
786221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
787221828Sgrehan		return (EINVAL);
788221828Sgrehan
789221828Sgrehan	return (VMGETCAP(vm->cookie, vcpu, type, retval));
790221828Sgrehan}
791221828Sgrehan
792221828Sgrehanint
793221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val)
794221828Sgrehan{
795221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
796221828Sgrehan		return (EINVAL);
797221828Sgrehan
798221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
799221828Sgrehan		return (EINVAL);
800221828Sgrehan
801221828Sgrehan	return (VMSETCAP(vm->cookie, vcpu, type, val));
802221828Sgrehan}
803221828Sgrehan
804221828Sgrehanuint64_t *
805221828Sgrehanvm_guest_msrs(struct vm *vm, int cpu)
806221828Sgrehan{
807221828Sgrehan	return (vm->vcpu[cpu].guest_msrs);
808221828Sgrehan}
809221828Sgrehan
810221828Sgrehanstruct vlapic *
811221828Sgrehanvm_lapic(struct vm *vm, int cpu)
812221828Sgrehan{
813221828Sgrehan	return (vm->vcpu[cpu].vlapic);
814221828Sgrehan}
815221828Sgrehan
816221828Sgrehanboolean_t
817221828Sgrehanvmm_is_pptdev(int bus, int slot, int func)
818221828Sgrehan{
819246188Sneel	int found, i, n;
820246188Sneel	int b, s, f;
821221828Sgrehan	char *val, *cp, *cp2;
822221828Sgrehan
823221828Sgrehan	/*
824246188Sneel	 * XXX
825246188Sneel	 * The length of an environment variable is limited to 128 bytes which
826246188Sneel	 * puts an upper limit on the number of passthru devices that may be
827246188Sneel	 * specified using a single environment variable.
828246188Sneel	 *
829246188Sneel	 * Work around this by scanning multiple environment variable
830246188Sneel	 * names instead of a single one - yuck!
831221828Sgrehan	 */
832246188Sneel	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
833246188Sneel
834246188Sneel	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
835221828Sgrehan	found = 0;
836246188Sneel	for (i = 0; names[i] != NULL && !found; i++) {
837246188Sneel		cp = val = getenv(names[i]);
838246188Sneel		while (cp != NULL && *cp != '\0') {
839246188Sneel			if ((cp2 = strchr(cp, ' ')) != NULL)
840246188Sneel				*cp2 = '\0';
841221828Sgrehan
842246188Sneel			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
843246188Sneel			if (n == 3 && bus == b && slot == s && func == f) {
844246188Sneel				found = 1;
845246188Sneel				break;
846246188Sneel			}
847221828Sgrehan
848246188Sneel			if (cp2 != NULL)
849246188Sneel				*cp2++ = ' ';
850221828Sgrehan
851246188Sneel			cp = cp2;
852246188Sneel		}
853246188Sneel		freeenv(val);
854221828Sgrehan	}
855221828Sgrehan	return (found);
856221828Sgrehan}
857221828Sgrehan
858221828Sgrehanvoid *
859221828Sgrehanvm_iommu_domain(struct vm *vm)
860221828Sgrehan{
861221828Sgrehan
862221828Sgrehan	return (vm->iommu);
863221828Sgrehan}
864221828Sgrehan
865241489Sneelint
866241489Sneelvcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state)
867221828Sgrehan{
868241489Sneel	int error;
869221828Sgrehan	struct vcpu *vcpu;
870221828Sgrehan
871221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
872221828Sgrehan		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
873221828Sgrehan
874221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
875221828Sgrehan
876241489Sneel	vcpu_lock(vcpu);
877241489Sneel
878241489Sneel	/*
879241489Sneel	 * The following state transitions are allowed:
880241489Sneel	 * IDLE -> RUNNING -> IDLE
881241489Sneel	 * IDLE -> CANNOT_RUN -> IDLE
882241489Sneel	 */
883241489Sneel	if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) ||
884241489Sneel	    (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) {
885241489Sneel		error = 0;
886241489Sneel		vcpu->state = state;
887221828Sgrehan	} else {
888241489Sneel		error = EBUSY;
889221828Sgrehan	}
890241489Sneel
891241489Sneel	vcpu_unlock(vcpu);
892241489Sneel
893241489Sneel	return (error);
894221828Sgrehan}
895221828Sgrehan
896241489Sneelenum vcpu_state
897249879Sgrehanvcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
898221828Sgrehan{
899221828Sgrehan	struct vcpu *vcpu;
900241489Sneel	enum vcpu_state state;
901221828Sgrehan
902221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
903221828Sgrehan		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
904221828Sgrehan
905221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
906221828Sgrehan
907241489Sneel	vcpu_lock(vcpu);
908241489Sneel	state = vcpu->state;
909249879Sgrehan	if (hostcpu != NULL)
910249879Sgrehan		*hostcpu = vcpu->hostcpu;
911241489Sneel	vcpu_unlock(vcpu);
912221828Sgrehan
913241489Sneel	return (state);
914221828Sgrehan}
915221828Sgrehan
916221828Sgrehanvoid
917221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid)
918221828Sgrehan{
919221828Sgrehan
920221828Sgrehan	if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
921223621Sgrehan		CPU_SET(vcpuid, &vm->active_cpus);
922221828Sgrehan}
923221828Sgrehan
924223621Sgrehancpuset_t
925221828Sgrehanvm_active_cpus(struct vm *vm)
926221828Sgrehan{
927221828Sgrehan
928221828Sgrehan	return (vm->active_cpus);
929221828Sgrehan}
930221828Sgrehan
931221828Sgrehanvoid *
932221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid)
933221828Sgrehan{
934221828Sgrehan
935221828Sgrehan	return (vm->vcpu[vcpuid].stats);
936221828Sgrehan}
937240922Sneel
938240922Sneelint
939240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
940240922Sneel{
941240922Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
942240922Sneel		return (EINVAL);
943240922Sneel
944240922Sneel	*state = vm->vcpu[vcpuid].x2apic_state;
945240922Sneel
946240922Sneel	return (0);
947240922Sneel}
948240922Sneel
949240922Sneelint
950240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
951240922Sneel{
952240922Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
953240922Sneel		return (EINVAL);
954240922Sneel
955248392Sneel	if (state >= X2APIC_STATE_LAST)
956240922Sneel		return (EINVAL);
957240922Sneel
958240922Sneel	vm->vcpu[vcpuid].x2apic_state = state;
959240922Sneel
960240943Sneel	vlapic_set_x2apic_state(vm, vcpuid, state);
961240943Sneel
962240922Sneel	return (0);
963240922Sneel}
964241489Sneel
965241489Sneelvoid
966241489Sneelvm_interrupt_hostcpu(struct vm *vm, int vcpuid)
967241489Sneel{
968241489Sneel	int hostcpu;
969241489Sneel	struct vcpu *vcpu;
970241489Sneel
971241489Sneel	vcpu = &vm->vcpu[vcpuid];
972241489Sneel
973242065Sneel	vcpu_lock(vcpu);
974241489Sneel	hostcpu = vcpu->hostcpu;
975242065Sneel	if (hostcpu == NOCPU) {
976242065Sneel		/*
977242065Sneel		 * If the vcpu is 'RUNNING' but without a valid 'hostcpu' then
978242065Sneel		 * the host thread must be sleeping waiting for an event to
979242065Sneel		 * kick the vcpu out of 'hlt'.
980242065Sneel		 *
981242065Sneel		 * XXX this is racy because the condition exists right before
982242065Sneel		 * and after calling VMRUN() in vm_run(). The wakeup() is
983242065Sneel		 * benign in this case.
984242065Sneel		 */
985242065Sneel		if (vcpu->state == VCPU_RUNNING)
986242065Sneel			wakeup_one(vcpu);
987242065Sneel	} else {
988242065Sneel		if (vcpu->state != VCPU_RUNNING)
989242065Sneel			panic("invalid vcpu state %d", vcpu->state);
990242065Sneel		if (hostcpu != curcpu)
991242065Sneel			ipi_cpu(hostcpu, vmm_ipinum);
992242065Sneel	}
993242065Sneel	vcpu_unlock(vcpu);
994241489Sneel}
995