vmm.c revision 270070
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD: stable/10/sys/amd64/vmm/vmm.c 270070 2014-08-17 00:52:07Z grehan $
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm.c 270070 2014-08-17 00:52:07Z grehan $");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33234695Sgrehan#include <sys/systm.h>
34221828Sgrehan#include <sys/kernel.h>
35221828Sgrehan#include <sys/module.h>
36221828Sgrehan#include <sys/sysctl.h>
37221828Sgrehan#include <sys/malloc.h>
38221828Sgrehan#include <sys/pcpu.h>
39221828Sgrehan#include <sys/lock.h>
40221828Sgrehan#include <sys/mutex.h>
41221828Sgrehan#include <sys/proc.h>
42256072Sneel#include <sys/rwlock.h>
43221828Sgrehan#include <sys/sched.h>
44221828Sgrehan#include <sys/smp.h>
45221828Sgrehan#include <sys/systm.h>
46221828Sgrehan
47221828Sgrehan#include <vm/vm.h>
48256072Sneel#include <vm/vm_object.h>
49256072Sneel#include <vm/vm_page.h>
50256072Sneel#include <vm/pmap.h>
51256072Sneel#include <vm/vm_map.h>
52256072Sneel#include <vm/vm_extern.h>
53256072Sneel#include <vm/vm_param.h>
54221828Sgrehan
55261275Sjhb#include <machine/cpu.h>
56221828Sgrehan#include <machine/vm.h>
57221828Sgrehan#include <machine/pcb.h>
58241489Sneel#include <machine/smp.h>
59262350Sjhb#include <x86/psl.h>
60221914Sjhb#include <x86/apicreg.h>
61256072Sneel#include <machine/vmparam.h>
62221828Sgrehan
63221828Sgrehan#include <machine/vmm.h>
64261088Sjhb#include <machine/vmm_dev.h>
65268976Sjhb#include <machine/vmm_instruction_emul.h>
66261088Sjhb
67268976Sjhb#include "vmm_ioport.h"
68256072Sneel#include "vmm_ktr.h"
69242275Sneel#include "vmm_host.h"
70221828Sgrehan#include "vmm_mem.h"
71221828Sgrehan#include "vmm_util.h"
72268891Sjhb#include "vatpic.h"
73268891Sjhb#include "vatpit.h"
74261088Sjhb#include "vhpet.h"
75261088Sjhb#include "vioapic.h"
76221828Sgrehan#include "vlapic.h"
77221828Sgrehan#include "vmm_msr.h"
78221828Sgrehan#include "vmm_ipi.h"
79221828Sgrehan#include "vmm_stat.h"
80242065Sneel#include "vmm_lapic.h"
81221828Sgrehan
82221828Sgrehan#include "io/ppt.h"
83221828Sgrehan#include "io/iommu.h"
84221828Sgrehan
85221828Sgrehanstruct vlapic;
86221828Sgrehan
87221828Sgrehanstruct vcpu {
88221828Sgrehan	int		flags;
89241489Sneel	enum vcpu_state	state;
90241489Sneel	struct mtx	mtx;
91221828Sgrehan	int		hostcpu;	/* host cpuid this vcpu last ran on */
92221828Sgrehan	uint64_t	guest_msrs[VMM_MSR_NUM];
93221828Sgrehan	struct vlapic	*vlapic;
94221828Sgrehan	int		 vcpuid;
95234695Sgrehan	struct savefpu	*guestfpu;	/* guest fpu state */
96267427Sjhb	uint64_t	guest_xcr0;
97221828Sgrehan	void		*stats;
98240894Sneel	struct vm_exit	exitinfo;
99240922Sneel	enum x2apic_state x2apic_state;
100241982Sneel	int		nmi_pending;
101268891Sjhb	int		extint_pending;
102267427Sjhb	struct vm_exception exception;
103267427Sjhb	int		exception_pending;
104221828Sgrehan};
105221828Sgrehan
106242065Sneel#define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
107242065Sneel#define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
108242065Sneel#define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
109256072Sneel#define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
110241489Sneel
111256072Sneelstruct mem_seg {
112256072Sneel	vm_paddr_t	gpa;
113256072Sneel	size_t		len;
114256072Sneel	boolean_t	wired;
115256072Sneel	vm_object_t	object;
116256072Sneel};
117221828Sgrehan#define	VM_MAX_MEMORY_SEGMENTS	2
118221828Sgrehan
119221828Sgrehanstruct vm {
120221828Sgrehan	void		*cookie;	/* processor-specific data */
121221828Sgrehan	void		*iommu;		/* iommu-specific data */
122261088Sjhb	struct vhpet	*vhpet;		/* virtual HPET */
123261088Sjhb	struct vioapic	*vioapic;	/* virtual ioapic */
124268891Sjhb	struct vatpic	*vatpic;	/* virtual atpic */
125268891Sjhb	struct vatpit	*vatpit;	/* virtual atpit */
126256072Sneel	struct vmspace	*vmspace;	/* guest's address space */
127221828Sgrehan	struct vcpu	vcpu[VM_MAXCPU];
128221828Sgrehan	int		num_mem_segs;
129256072Sneel	struct mem_seg	mem_segs[VM_MAX_MEMORY_SEGMENTS];
130221828Sgrehan	char		name[VM_MAX_NAMELEN];
131221828Sgrehan
132221828Sgrehan	/*
133223621Sgrehan	 * Set of active vcpus.
134221828Sgrehan	 * An active vcpu is one that has been started implicitly (BSP) or
135221828Sgrehan	 * explicitly (AP) by sending it a startup ipi.
136221828Sgrehan	 */
137266339Sjhb	volatile cpuset_t active_cpus;
138266339Sjhb
139266339Sjhb	struct mtx	rendezvous_mtx;
140266339Sjhb	cpuset_t	rendezvous_req_cpus;
141266339Sjhb	cpuset_t	rendezvous_done_cpus;
142266339Sjhb	void		*rendezvous_arg;
143266339Sjhb	vm_rendezvous_func_t rendezvous_func;
144268935Sjhb
145268935Sjhb	int		suspend;
146268935Sjhb	volatile cpuset_t suspended_cpus;
147268935Sjhb
148268935Sjhb	volatile cpuset_t halted_cpus;
149221828Sgrehan};
150221828Sgrehan
151249396Sneelstatic int vmm_initialized;
152249396Sneel
153221828Sgrehanstatic struct vmm_ops *ops;
154266339Sjhb#define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
155221828Sgrehan#define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
156261275Sjhb#define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
157221828Sgrehan
158256072Sneel#define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
159268935Sjhb#define	VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
160268935Sjhb	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
161221828Sgrehan#define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
162256072Sneel#define	VMSPACE_ALLOC(min, max) \
163256072Sneel	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
164256072Sneel#define	VMSPACE_FREE(vmspace) \
165256072Sneel	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
166221828Sgrehan#define	VMGETREG(vmi, vcpu, num, retval)		\
167221828Sgrehan	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
168221828Sgrehan#define	VMSETREG(vmi, vcpu, num, val)		\
169221828Sgrehan	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
170221828Sgrehan#define	VMGETDESC(vmi, vcpu, num, desc)		\
171221828Sgrehan	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
172221828Sgrehan#define	VMSETDESC(vmi, vcpu, num, desc)		\
173221828Sgrehan	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
174221828Sgrehan#define	VMGETCAP(vmi, vcpu, num, retval)	\
175221828Sgrehan	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
176221828Sgrehan#define	VMSETCAP(vmi, vcpu, num, val)		\
177221828Sgrehan	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
178266339Sjhb#define	VLAPIC_INIT(vmi, vcpu)			\
179266339Sjhb	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
180266339Sjhb#define	VLAPIC_CLEANUP(vmi, vlapic)		\
181266339Sjhb	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
182221828Sgrehan
183245021Sneel#define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
184245021Sneel#define	fpu_stop_emulating()	clts()
185221828Sgrehan
186221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm");
187221828SgrehanCTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
188221828Sgrehan
189221828Sgrehan/* statistics */
190248389Sneelstatic VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
191221828Sgrehan
192266339SjhbSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
193266339Sjhb
194268935Sjhb/*
195268935Sjhb * Halt the guest if all vcpus are executing a HLT instruction with
196268935Sjhb * interrupts disabled.
197268935Sjhb */
198268935Sjhbstatic int halt_detection_enabled = 1;
199268935SjhbTUNABLE_INT("hw.vmm.halt_detection", &halt_detection_enabled);
200268935SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
201268935Sjhb    &halt_detection_enabled, 0,
202268935Sjhb    "Halt VM if all vcpus execute HLT with interrupts disabled");
203268935Sjhb
204266339Sjhbstatic int vmm_ipinum;
205266339SjhbSYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
206266339Sjhb    "IPI vector used for vcpu notifications");
207266339Sjhb
208221828Sgrehanstatic void
209266339Sjhbvcpu_cleanup(struct vm *vm, int i)
210221828Sgrehan{
211266339Sjhb	struct vcpu *vcpu = &vm->vcpu[i];
212266339Sjhb
213266339Sjhb	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
214234695Sgrehan	vmm_stat_free(vcpu->stats);
215234695Sgrehan	fpu_save_area_free(vcpu->guestfpu);
216221828Sgrehan}
217221828Sgrehan
218221828Sgrehanstatic void
219221828Sgrehanvcpu_init(struct vm *vm, uint32_t vcpu_id)
220221828Sgrehan{
221221828Sgrehan	struct vcpu *vcpu;
222221828Sgrehan
223221828Sgrehan	vcpu = &vm->vcpu[vcpu_id];
224221828Sgrehan
225241489Sneel	vcpu_lock_init(vcpu);
226241489Sneel	vcpu->hostcpu = NOCPU;
227221828Sgrehan	vcpu->vcpuid = vcpu_id;
228266339Sjhb	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
229267447Sjhb	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
230267427Sjhb	vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
231234695Sgrehan	vcpu->guestfpu = fpu_save_area_alloc();
232234695Sgrehan	fpu_save_area_reset(vcpu->guestfpu);
233221828Sgrehan	vcpu->stats = vmm_stat_alloc();
234221828Sgrehan}
235221828Sgrehan
236240894Sneelstruct vm_exit *
237240894Sneelvm_exitinfo(struct vm *vm, int cpuid)
238240894Sneel{
239240894Sneel	struct vcpu *vcpu;
240240894Sneel
241240894Sneel	if (cpuid < 0 || cpuid >= VM_MAXCPU)
242240894Sneel		panic("vm_exitinfo: invalid cpuid %d", cpuid);
243240894Sneel
244240894Sneel	vcpu = &vm->vcpu[cpuid];
245240894Sneel
246240894Sneel	return (&vcpu->exitinfo);
247240894Sneel}
248240894Sneel
249261275Sjhbstatic void
250261275Sjhbvmm_resume(void)
251261275Sjhb{
252261275Sjhb	VMM_RESUME();
253261275Sjhb}
254261275Sjhb
255221828Sgrehanstatic int
256221828Sgrehanvmm_init(void)
257221828Sgrehan{
258221828Sgrehan	int error;
259221828Sgrehan
260242275Sneel	vmm_host_state_init();
261221828Sgrehan
262266339Sjhb	vmm_ipinum = vmm_ipi_alloc();
263266339Sjhb	if (vmm_ipinum == 0)
264266339Sjhb		vmm_ipinum = IPI_AST;
265266339Sjhb
266221828Sgrehan	error = vmm_mem_init();
267221828Sgrehan	if (error)
268221828Sgrehan		return (error);
269221828Sgrehan
270221828Sgrehan	if (vmm_is_intel())
271221828Sgrehan		ops = &vmm_ops_intel;
272221828Sgrehan	else if (vmm_is_amd())
273221828Sgrehan		ops = &vmm_ops_amd;
274221828Sgrehan	else
275221828Sgrehan		return (ENXIO);
276221828Sgrehan
277221828Sgrehan	vmm_msr_init();
278261275Sjhb	vmm_resume_p = vmm_resume;
279221828Sgrehan
280266339Sjhb	return (VMM_INIT(vmm_ipinum));
281221828Sgrehan}
282221828Sgrehan
283221828Sgrehanstatic int
284221828Sgrehanvmm_handler(module_t mod, int what, void *arg)
285221828Sgrehan{
286221828Sgrehan	int error;
287221828Sgrehan
288221828Sgrehan	switch (what) {
289221828Sgrehan	case MOD_LOAD:
290221828Sgrehan		vmmdev_init();
291267070Sjhb		if (ppt_avail_devices() > 0)
292267070Sjhb			iommu_init();
293221828Sgrehan		error = vmm_init();
294249396Sneel		if (error == 0)
295249396Sneel			vmm_initialized = 1;
296221828Sgrehan		break;
297221828Sgrehan	case MOD_UNLOAD:
298241454Sneel		error = vmmdev_cleanup();
299241454Sneel		if (error == 0) {
300261275Sjhb			vmm_resume_p = NULL;
301241454Sneel			iommu_cleanup();
302266339Sjhb			if (vmm_ipinum != IPI_AST)
303266339Sjhb				vmm_ipi_free(vmm_ipinum);
304241454Sneel			error = VMM_CLEANUP();
305253854Sgrehan			/*
306253854Sgrehan			 * Something bad happened - prevent new
307253854Sgrehan			 * VMs from being created
308253854Sgrehan			 */
309253854Sgrehan			if (error)
310253854Sgrehan				vmm_initialized = 0;
311241454Sneel		}
312221828Sgrehan		break;
313221828Sgrehan	default:
314221828Sgrehan		error = 0;
315221828Sgrehan		break;
316221828Sgrehan	}
317221828Sgrehan	return (error);
318221828Sgrehan}
319221828Sgrehan
320221828Sgrehanstatic moduledata_t vmm_kmod = {
321221828Sgrehan	"vmm",
322221828Sgrehan	vmm_handler,
323221828Sgrehan	NULL
324221828Sgrehan};
325221828Sgrehan
326221828Sgrehan/*
327245704Sneel * vmm initialization has the following dependencies:
328245704Sneel *
329245704Sneel * - iommu initialization must happen after the pci passthru driver has had
330245704Sneel *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
331245704Sneel *
332245704Sneel * - VT-x initialization requires smp_rendezvous() and therefore must happen
333245704Sneel *   after SMP is fully functional (after SI_SUB_SMP).
334221828Sgrehan */
335245704SneelDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
336221828SgrehanMODULE_VERSION(vmm, 1);
337221828Sgrehan
338249396Sneelint
339249396Sneelvm_create(const char *name, struct vm **retvm)
340221828Sgrehan{
341221828Sgrehan	int i;
342221828Sgrehan	struct vm *vm;
343256072Sneel	struct vmspace *vmspace;
344221828Sgrehan
345249396Sneel	/*
346249396Sneel	 * If vmm.ko could not be successfully initialized then don't attempt
347249396Sneel	 * to create the virtual machine.
348249396Sneel	 */
349249396Sneel	if (!vmm_initialized)
350249396Sneel		return (ENXIO);
351249396Sneel
352221828Sgrehan	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
353249396Sneel		return (EINVAL);
354221828Sgrehan
355256072Sneel	vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
356256072Sneel	if (vmspace == NULL)
357256072Sneel		return (ENOMEM);
358256072Sneel
359221828Sgrehan	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
360221828Sgrehan	strcpy(vm->name, name);
361266339Sjhb	vm->vmspace = vmspace;
362266339Sjhb	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
363256072Sneel	vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
364261088Sjhb	vm->vioapic = vioapic_init(vm);
365261088Sjhb	vm->vhpet = vhpet_init(vm);
366268891Sjhb	vm->vatpic = vatpic_init(vm);
367268891Sjhb	vm->vatpit = vatpit_init(vm);
368221828Sgrehan
369221828Sgrehan	for (i = 0; i < VM_MAXCPU; i++) {
370221828Sgrehan		vcpu_init(vm, i);
371221828Sgrehan		guest_msrs_init(vm, i);
372221828Sgrehan	}
373221828Sgrehan
374249396Sneel	*retvm = vm;
375249396Sneel	return (0);
376221828Sgrehan}
377221828Sgrehan
378241178Sneelstatic void
379256072Sneelvm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
380241178Sneel{
381241178Sneel
382256072Sneel	if (seg->object != NULL)
383256072Sneel		vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
384241362Sneel
385256072Sneel	bzero(seg, sizeof(*seg));
386241178Sneel}
387241178Sneel
388221828Sgrehanvoid
389221828Sgrehanvm_destroy(struct vm *vm)
390221828Sgrehan{
391221828Sgrehan	int i;
392221828Sgrehan
393221828Sgrehan	ppt_unassign_all(vm);
394221828Sgrehan
395256072Sneel	if (vm->iommu != NULL)
396256072Sneel		iommu_destroy_domain(vm->iommu);
397256072Sneel
398268891Sjhb	vatpit_cleanup(vm->vatpit);
399261088Sjhb	vhpet_cleanup(vm->vhpet);
400268891Sjhb	vatpic_cleanup(vm->vatpic);
401261088Sjhb	vioapic_cleanup(vm->vioapic);
402261088Sjhb
403221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++)
404241178Sneel		vm_free_mem_seg(vm, &vm->mem_segs[i]);
405221828Sgrehan
406241178Sneel	vm->num_mem_segs = 0;
407241178Sneel
408221828Sgrehan	for (i = 0; i < VM_MAXCPU; i++)
409266339Sjhb		vcpu_cleanup(vm, i);
410221828Sgrehan
411256072Sneel	VMSPACE_FREE(vm->vmspace);
412221828Sgrehan
413221828Sgrehan	VMCLEANUP(vm->cookie);
414221828Sgrehan
415221828Sgrehan	free(vm, M_VM);
416221828Sgrehan}
417221828Sgrehan
418221828Sgrehanconst char *
419221828Sgrehanvm_name(struct vm *vm)
420221828Sgrehan{
421221828Sgrehan	return (vm->name);
422221828Sgrehan}
423221828Sgrehan
424221828Sgrehanint
425221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
426221828Sgrehan{
427256072Sneel	vm_object_t obj;
428221828Sgrehan
429256072Sneel	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
430256072Sneel		return (ENOMEM);
431256072Sneel	else
432256072Sneel		return (0);
433221828Sgrehan}
434221828Sgrehan
435221828Sgrehanint
436221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
437221828Sgrehan{
438221828Sgrehan
439256072Sneel	vmm_mmio_free(vm->vmspace, gpa, len);
440256072Sneel	return (0);
441221828Sgrehan}
442221828Sgrehan
443256072Sneelboolean_t
444256072Sneelvm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
445241041Sneel{
446241041Sneel	int i;
447241041Sneel	vm_paddr_t gpabase, gpalimit;
448241041Sneel
449241041Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
450241041Sneel		gpabase = vm->mem_segs[i].gpa;
451241041Sneel		gpalimit = gpabase + vm->mem_segs[i].len;
452241041Sneel		if (gpa >= gpabase && gpa < gpalimit)
453256072Sneel			return (TRUE);		/* 'gpa' is regular memory */
454241041Sneel	}
455241041Sneel
456256072Sneel	if (ppt_is_mmio(vm, gpa))
457256072Sneel		return (TRUE);			/* 'gpa' is pci passthru mmio */
458256072Sneel
459256072Sneel	return (FALSE);
460241041Sneel}
461241041Sneel
462221828Sgrehanint
463241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
464221828Sgrehan{
465256072Sneel	int available, allocated;
466256072Sneel	struct mem_seg *seg;
467256072Sneel	vm_object_t object;
468256072Sneel	vm_paddr_t g;
469221828Sgrehan
470241041Sneel	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
471241041Sneel		return (EINVAL);
472221828Sgrehan
473241041Sneel	available = allocated = 0;
474241041Sneel	g = gpa;
475241041Sneel	while (g < gpa + len) {
476256072Sneel		if (vm_mem_allocated(vm, g))
477256072Sneel			allocated++;
478256072Sneel		else
479241041Sneel			available++;
480241041Sneel
481241041Sneel		g += PAGE_SIZE;
482241041Sneel	}
483241041Sneel
484221828Sgrehan	/*
485241041Sneel	 * If there are some allocated and some available pages in the address
486241041Sneel	 * range then it is an error.
487221828Sgrehan	 */
488241041Sneel	if (allocated && available)
489241041Sneel		return (EINVAL);
490221828Sgrehan
491241041Sneel	/*
492241041Sneel	 * If the entire address range being requested has already been
493241041Sneel	 * allocated then there isn't anything more to do.
494241041Sneel	 */
495241041Sneel	if (allocated && available == 0)
496241041Sneel		return (0);
497241041Sneel
498221828Sgrehan	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
499221828Sgrehan		return (E2BIG);
500221828Sgrehan
501241178Sneel	seg = &vm->mem_segs[vm->num_mem_segs];
502221828Sgrehan
503256072Sneel	if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
504256072Sneel		return (ENOMEM);
505256072Sneel
506241178Sneel	seg->gpa = gpa;
507256072Sneel	seg->len = len;
508256072Sneel	seg->object = object;
509256072Sneel	seg->wired = FALSE;
510241178Sneel
511256072Sneel	vm->num_mem_segs++;
512256072Sneel
513256072Sneel	return (0);
514256072Sneel}
515256072Sneel
516256072Sneelstatic void
517256072Sneelvm_gpa_unwire(struct vm *vm)
518256072Sneel{
519256072Sneel	int i, rv;
520256072Sneel	struct mem_seg *seg;
521256072Sneel
522256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
523256072Sneel		seg = &vm->mem_segs[i];
524256072Sneel		if (!seg->wired)
525256072Sneel			continue;
526256072Sneel
527256072Sneel		rv = vm_map_unwire(&vm->vmspace->vm_map,
528256072Sneel				   seg->gpa, seg->gpa + seg->len,
529256072Sneel				   VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
530256072Sneel		KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
531256072Sneel		    "%#lx/%ld could not be unwired: %d",
532256072Sneel		    vm_name(vm), seg->gpa, seg->len, rv));
533256072Sneel
534256072Sneel		seg->wired = FALSE;
535256072Sneel	}
536256072Sneel}
537256072Sneel
538256072Sneelstatic int
539256072Sneelvm_gpa_wire(struct vm *vm)
540256072Sneel{
541256072Sneel	int i, rv;
542256072Sneel	struct mem_seg *seg;
543256072Sneel
544256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
545256072Sneel		seg = &vm->mem_segs[i];
546256072Sneel		if (seg->wired)
547256072Sneel			continue;
548256072Sneel
549256072Sneel		/* XXX rlimits? */
550256072Sneel		rv = vm_map_wire(&vm->vmspace->vm_map,
551256072Sneel				 seg->gpa, seg->gpa + seg->len,
552256072Sneel				 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
553256072Sneel		if (rv != KERN_SUCCESS)
554241178Sneel			break;
555241178Sneel
556256072Sneel		seg->wired = TRUE;
557256072Sneel	}
558256072Sneel
559256072Sneel	if (i < vm->num_mem_segs) {
560241362Sneel		/*
561256072Sneel		 * Undo the wiring before returning an error.
562241362Sneel		 */
563256072Sneel		vm_gpa_unwire(vm);
564256072Sneel		return (EAGAIN);
565256072Sneel	}
566241178Sneel
567256072Sneel	return (0);
568256072Sneel}
569256072Sneel
570256072Sneelstatic void
571256072Sneelvm_iommu_modify(struct vm *vm, boolean_t map)
572256072Sneel{
573256072Sneel	int i, sz;
574256072Sneel	vm_paddr_t gpa, hpa;
575256072Sneel	struct mem_seg *seg;
576256072Sneel	void *vp, *cookie, *host_domain;
577256072Sneel
578256072Sneel	sz = PAGE_SIZE;
579256072Sneel	host_domain = iommu_host_domain();
580256072Sneel
581256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
582256072Sneel		seg = &vm->mem_segs[i];
583256072Sneel		KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
584256072Sneel		    vm_name(vm), seg->gpa, seg->len));
585256072Sneel
586256072Sneel		gpa = seg->gpa;
587256072Sneel		while (gpa < seg->gpa + seg->len) {
588256072Sneel			vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
589256072Sneel					 &cookie);
590256072Sneel			KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
591256072Sneel			    vm_name(vm), gpa));
592256072Sneel
593256072Sneel			vm_gpa_release(cookie);
594256072Sneel
595256072Sneel			hpa = DMAP_TO_PHYS((uintptr_t)vp);
596256072Sneel			if (map) {
597256072Sneel				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
598256072Sneel				iommu_remove_mapping(host_domain, hpa, sz);
599256072Sneel			} else {
600256072Sneel				iommu_remove_mapping(vm->iommu, gpa, sz);
601256072Sneel				iommu_create_mapping(host_domain, hpa, hpa, sz);
602256072Sneel			}
603256072Sneel
604256072Sneel			gpa += PAGE_SIZE;
605256072Sneel		}
606241178Sneel	}
607241178Sneel
608256072Sneel	/*
609256072Sneel	 * Invalidate the cached translations associated with the domain
610256072Sneel	 * from which pages were removed.
611256072Sneel	 */
612256072Sneel	if (map)
613256072Sneel		iommu_invalidate_tlb(host_domain);
614256072Sneel	else
615256072Sneel		iommu_invalidate_tlb(vm->iommu);
616256072Sneel}
617256072Sneel
618256072Sneel#define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), FALSE)
619256072Sneel#define	vm_iommu_map(vm)	vm_iommu_modify((vm), TRUE)
620256072Sneel
621256072Sneelint
622256072Sneelvm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
623256072Sneel{
624256072Sneel	int error;
625256072Sneel
626256072Sneel	error = ppt_unassign_device(vm, bus, slot, func);
627256072Sneel	if (error)
628221828Sgrehan		return (error);
629256072Sneel
630267070Sjhb	if (ppt_assigned_devices(vm) == 0) {
631256072Sneel		vm_iommu_unmap(vm);
632256072Sneel		vm_gpa_unwire(vm);
633221828Sgrehan	}
634256072Sneel	return (0);
635256072Sneel}
636221828Sgrehan
637256072Sneelint
638256072Sneelvm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
639256072Sneel{
640256072Sneel	int error;
641256072Sneel	vm_paddr_t maxaddr;
642256072Sneel
643241362Sneel	/*
644256072Sneel	 * Virtual machines with pci passthru devices get special treatment:
645256072Sneel	 * - the guest physical memory is wired
646256072Sneel	 * - the iommu is programmed to do the 'gpa' to 'hpa' translation
647256072Sneel	 *
648256072Sneel	 * We need to do this before the first pci passthru device is attached.
649241362Sneel	 */
650267070Sjhb	if (ppt_assigned_devices(vm) == 0) {
651256072Sneel		KASSERT(vm->iommu == NULL,
652256072Sneel		    ("vm_assign_pptdev: iommu must be NULL"));
653256072Sneel		maxaddr = vmm_mem_maxaddr();
654256072Sneel		vm->iommu = iommu_create_domain(maxaddr);
655241362Sneel
656256072Sneel		error = vm_gpa_wire(vm);
657256072Sneel		if (error)
658256072Sneel			return (error);
659241041Sneel
660256072Sneel		vm_iommu_map(vm);
661256072Sneel	}
662256072Sneel
663256072Sneel	error = ppt_assign_device(vm, bus, slot, func);
664256072Sneel	return (error);
665221828Sgrehan}
666221828Sgrehan
667256072Sneelvoid *
668256072Sneelvm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
669256072Sneel	    void **cookie)
670221828Sgrehan{
671256072Sneel	int count, pageoff;
672256072Sneel	vm_page_t m;
673221828Sgrehan
674256072Sneel	pageoff = gpa & PAGE_MASK;
675256072Sneel	if (len > PAGE_SIZE - pageoff)
676256072Sneel		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
677241148Sneel
678256072Sneel	count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
679256072Sneel	    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
680256072Sneel
681256072Sneel	if (count == 1) {
682256072Sneel		*cookie = m;
683256072Sneel		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
684256072Sneel	} else {
685256072Sneel		*cookie = NULL;
686256072Sneel		return (NULL);
687256072Sneel	}
688221828Sgrehan}
689221828Sgrehan
690256072Sneelvoid
691256072Sneelvm_gpa_release(void *cookie)
692256072Sneel{
693256072Sneel	vm_page_t m = cookie;
694256072Sneel
695256072Sneel	vm_page_lock(m);
696256072Sneel	vm_page_unhold(m);
697256072Sneel	vm_page_unlock(m);
698256072Sneel}
699256072Sneel
700221828Sgrehanint
701221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
702221828Sgrehan		  struct vm_memory_segment *seg)
703221828Sgrehan{
704221828Sgrehan	int i;
705221828Sgrehan
706221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++) {
707221828Sgrehan		if (gpabase == vm->mem_segs[i].gpa) {
708256072Sneel			seg->gpa = vm->mem_segs[i].gpa;
709256072Sneel			seg->len = vm->mem_segs[i].len;
710256072Sneel			seg->wired = vm->mem_segs[i].wired;
711221828Sgrehan			return (0);
712221828Sgrehan		}
713221828Sgrehan	}
714221828Sgrehan	return (-1);
715221828Sgrehan}
716221828Sgrehan
717221828Sgrehanint
718256072Sneelvm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
719256072Sneel	      vm_offset_t *offset, struct vm_object **object)
720256072Sneel{
721256072Sneel	int i;
722256072Sneel	size_t seg_len;
723256072Sneel	vm_paddr_t seg_gpa;
724256072Sneel	vm_object_t seg_obj;
725256072Sneel
726256072Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
727256072Sneel		if ((seg_obj = vm->mem_segs[i].object) == NULL)
728256072Sneel			continue;
729256072Sneel
730256072Sneel		seg_gpa = vm->mem_segs[i].gpa;
731256072Sneel		seg_len = vm->mem_segs[i].len;
732256072Sneel
733256072Sneel		if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
734256072Sneel			*offset = gpa - seg_gpa;
735256072Sneel			*object = seg_obj;
736256072Sneel			vm_object_reference(seg_obj);
737256072Sneel			return (0);
738256072Sneel		}
739256072Sneel	}
740256072Sneel
741256072Sneel	return (EINVAL);
742256072Sneel}
743256072Sneel
744256072Sneelint
745221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
746221828Sgrehan{
747221828Sgrehan
748221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
749221828Sgrehan		return (EINVAL);
750221828Sgrehan
751221828Sgrehan	if (reg >= VM_REG_LAST)
752221828Sgrehan		return (EINVAL);
753221828Sgrehan
754221828Sgrehan	return (VMGETREG(vm->cookie, vcpu, reg, retval));
755221828Sgrehan}
756221828Sgrehan
757221828Sgrehanint
758221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
759221828Sgrehan{
760221828Sgrehan
761221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
762221828Sgrehan		return (EINVAL);
763221828Sgrehan
764221828Sgrehan	if (reg >= VM_REG_LAST)
765221828Sgrehan		return (EINVAL);
766221828Sgrehan
767221828Sgrehan	return (VMSETREG(vm->cookie, vcpu, reg, val));
768221828Sgrehan}
769221828Sgrehan
770221828Sgrehanstatic boolean_t
771221828Sgrehanis_descriptor_table(int reg)
772221828Sgrehan{
773221828Sgrehan
774221828Sgrehan	switch (reg) {
775221828Sgrehan	case VM_REG_GUEST_IDTR:
776221828Sgrehan	case VM_REG_GUEST_GDTR:
777221828Sgrehan		return (TRUE);
778221828Sgrehan	default:
779221828Sgrehan		return (FALSE);
780221828Sgrehan	}
781221828Sgrehan}
782221828Sgrehan
783221828Sgrehanstatic boolean_t
784221828Sgrehanis_segment_register(int reg)
785221828Sgrehan{
786221828Sgrehan
787221828Sgrehan	switch (reg) {
788221828Sgrehan	case VM_REG_GUEST_ES:
789221828Sgrehan	case VM_REG_GUEST_CS:
790221828Sgrehan	case VM_REG_GUEST_SS:
791221828Sgrehan	case VM_REG_GUEST_DS:
792221828Sgrehan	case VM_REG_GUEST_FS:
793221828Sgrehan	case VM_REG_GUEST_GS:
794221828Sgrehan	case VM_REG_GUEST_TR:
795221828Sgrehan	case VM_REG_GUEST_LDTR:
796221828Sgrehan		return (TRUE);
797221828Sgrehan	default:
798221828Sgrehan		return (FALSE);
799221828Sgrehan	}
800221828Sgrehan}
801221828Sgrehan
802221828Sgrehanint
803221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg,
804221828Sgrehan		struct seg_desc *desc)
805221828Sgrehan{
806221828Sgrehan
807221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
808221828Sgrehan		return (EINVAL);
809221828Sgrehan
810221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
811221828Sgrehan		return (EINVAL);
812221828Sgrehan
813221828Sgrehan	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
814221828Sgrehan}
815221828Sgrehan
816221828Sgrehanint
817221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg,
818221828Sgrehan		struct seg_desc *desc)
819221828Sgrehan{
820221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
821221828Sgrehan		return (EINVAL);
822221828Sgrehan
823221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
824221828Sgrehan		return (EINVAL);
825221828Sgrehan
826221828Sgrehan	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
827221828Sgrehan}
828221828Sgrehan
829221828Sgrehanstatic void
830221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu)
831221828Sgrehan{
832221828Sgrehan
833234695Sgrehan	/* flush host state to the pcb */
834234695Sgrehan	fpuexit(curthread);
835242122Sneel
836242122Sneel	/* restore guest FPU state */
837221828Sgrehan	fpu_stop_emulating();
838234695Sgrehan	fpurestore(vcpu->guestfpu);
839242122Sneel
840267427Sjhb	/* restore guest XCR0 if XSAVE is enabled in the host */
841267427Sjhb	if (rcr4() & CR4_XSAVE)
842267427Sjhb		load_xcr(0, vcpu->guest_xcr0);
843267427Sjhb
844242122Sneel	/*
845242122Sneel	 * The FPU is now "dirty" with the guest's state so turn on emulation
846242122Sneel	 * to trap any access to the FPU by the host.
847242122Sneel	 */
848242122Sneel	fpu_start_emulating();
849221828Sgrehan}
850221828Sgrehan
851221828Sgrehanstatic void
852221828Sgrehansave_guest_fpustate(struct vcpu *vcpu)
853221828Sgrehan{
854221828Sgrehan
855242122Sneel	if ((rcr0() & CR0_TS) == 0)
856242122Sneel		panic("fpu emulation not enabled in host!");
857242122Sneel
858267427Sjhb	/* save guest XCR0 and restore host XCR0 */
859267427Sjhb	if (rcr4() & CR4_XSAVE) {
860267427Sjhb		vcpu->guest_xcr0 = rxcr(0);
861267427Sjhb		load_xcr(0, vmm_get_host_xcr0());
862267427Sjhb	}
863267427Sjhb
864242122Sneel	/* save guest FPU state */
865242122Sneel	fpu_stop_emulating();
866234695Sgrehan	fpusave(vcpu->guestfpu);
867221828Sgrehan	fpu_start_emulating();
868221828Sgrehan}
869221828Sgrehan
870248389Sneelstatic VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
871242065Sneel
872256072Sneelstatic int
873266393Sjhbvcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
874266393Sjhb    bool from_idle)
875256072Sneel{
876256072Sneel	int error;
877256072Sneel
878256072Sneel	vcpu_assert_locked(vcpu);
879256072Sneel
880256072Sneel	/*
881266393Sjhb	 * State transitions from the vmmdev_ioctl() must always begin from
882266393Sjhb	 * the VCPU_IDLE state. This guarantees that there is only a single
883266393Sjhb	 * ioctl() operating on a vcpu at any point.
884266393Sjhb	 */
885266393Sjhb	if (from_idle) {
886266393Sjhb		while (vcpu->state != VCPU_IDLE)
887266393Sjhb			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
888266393Sjhb	} else {
889266393Sjhb		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
890266393Sjhb		    "vcpu idle state"));
891266393Sjhb	}
892266393Sjhb
893266393Sjhb	if (vcpu->state == VCPU_RUNNING) {
894266393Sjhb		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
895266393Sjhb		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
896266393Sjhb	} else {
897266393Sjhb		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
898266393Sjhb		    "vcpu that is not running", vcpu->hostcpu));
899266393Sjhb	}
900266393Sjhb
901266393Sjhb	/*
902256072Sneel	 * The following state transitions are allowed:
903256072Sneel	 * IDLE -> FROZEN -> IDLE
904256072Sneel	 * FROZEN -> RUNNING -> FROZEN
905256072Sneel	 * FROZEN -> SLEEPING -> FROZEN
906256072Sneel	 */
907256072Sneel	switch (vcpu->state) {
908256072Sneel	case VCPU_IDLE:
909256072Sneel	case VCPU_RUNNING:
910256072Sneel	case VCPU_SLEEPING:
911256072Sneel		error = (newstate != VCPU_FROZEN);
912256072Sneel		break;
913256072Sneel	case VCPU_FROZEN:
914256072Sneel		error = (newstate == VCPU_FROZEN);
915256072Sneel		break;
916256072Sneel	default:
917256072Sneel		error = 1;
918256072Sneel		break;
919256072Sneel	}
920256072Sneel
921266393Sjhb	if (error)
922266393Sjhb		return (EBUSY);
923266393Sjhb
924266393Sjhb	vcpu->state = newstate;
925266393Sjhb	if (newstate == VCPU_RUNNING)
926266393Sjhb		vcpu->hostcpu = curcpu;
927256072Sneel	else
928266393Sjhb		vcpu->hostcpu = NOCPU;
929256072Sneel
930266393Sjhb	if (newstate == VCPU_IDLE)
931266393Sjhb		wakeup(&vcpu->state);
932266393Sjhb
933266393Sjhb	return (0);
934256072Sneel}
935256072Sneel
936256072Sneelstatic void
937256072Sneelvcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
938256072Sneel{
939256072Sneel	int error;
940256072Sneel
941266393Sjhb	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
942256072Sneel		panic("Error %d setting state to %d\n", error, newstate);
943256072Sneel}
944256072Sneel
945256072Sneelstatic void
946256072Sneelvcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
947256072Sneel{
948256072Sneel	int error;
949256072Sneel
950266393Sjhb	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
951256072Sneel		panic("Error %d setting state to %d", error, newstate);
952256072Sneel}
953256072Sneel
954266339Sjhbstatic void
955266339Sjhbvm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
956266339Sjhb{
957266339Sjhb
958266339Sjhb	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
959266339Sjhb
960266339Sjhb	/*
961266339Sjhb	 * Update 'rendezvous_func' and execute a write memory barrier to
962266339Sjhb	 * ensure that it is visible across all host cpus. This is not needed
963266339Sjhb	 * for correctness but it does ensure that all the vcpus will notice
964266339Sjhb	 * that the rendezvous is requested immediately.
965266339Sjhb	 */
966266339Sjhb	vm->rendezvous_func = func;
967266339Sjhb	wmb();
968266339Sjhb}
969266339Sjhb
970266339Sjhb#define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
971266339Sjhb	do {								\
972266339Sjhb		if (vcpuid >= 0)					\
973266339Sjhb			VCPU_CTR0(vm, vcpuid, fmt);			\
974266339Sjhb		else							\
975266339Sjhb			VM_CTR0(vm, fmt);				\
976266339Sjhb	} while (0)
977266339Sjhb
978266339Sjhbstatic void
979266339Sjhbvm_handle_rendezvous(struct vm *vm, int vcpuid)
980266339Sjhb{
981266339Sjhb
982266339Sjhb	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
983266339Sjhb	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
984266339Sjhb
985266339Sjhb	mtx_lock(&vm->rendezvous_mtx);
986266339Sjhb	while (vm->rendezvous_func != NULL) {
987266339Sjhb		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
988266339Sjhb		CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
989266339Sjhb
990266339Sjhb		if (vcpuid != -1 &&
991266339Sjhb		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
992266339Sjhb		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
993266339Sjhb			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
994266339Sjhb			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
995266339Sjhb			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
996266339Sjhb		}
997266339Sjhb		if (CPU_CMP(&vm->rendezvous_req_cpus,
998266339Sjhb		    &vm->rendezvous_done_cpus) == 0) {
999266339Sjhb			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
1000266339Sjhb			vm_set_rendezvous_func(vm, NULL);
1001266339Sjhb			wakeup(&vm->rendezvous_func);
1002266339Sjhb			break;
1003266339Sjhb		}
1004266339Sjhb		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
1005266339Sjhb		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
1006266339Sjhb		    "vmrndv", 0);
1007266339Sjhb	}
1008266339Sjhb	mtx_unlock(&vm->rendezvous_mtx);
1009266339Sjhb}
1010266339Sjhb
1011256072Sneel/*
1012256072Sneel * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
1013256072Sneel */
1014256072Sneelstatic int
1015262350Sjhbvm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
1016256072Sneel{
1017256072Sneel	struct vcpu *vcpu;
1018268935Sjhb	const char *wmesg;
1019268935Sjhb	int t, vcpu_halted, vm_halted;
1020256072Sneel
1021268935Sjhb	KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
1022268935Sjhb
1023256072Sneel	vcpu = &vm->vcpu[vcpuid];
1024268935Sjhb	vcpu_halted = 0;
1025268935Sjhb	vm_halted = 0;
1026256072Sneel
1027256072Sneel	vcpu_lock(vcpu);
1028268935Sjhb	while (1) {
1029268935Sjhb		/*
1030268935Sjhb		 * Do a final check for pending NMI or interrupts before
1031268935Sjhb		 * really putting this thread to sleep. Also check for
1032268935Sjhb		 * software events that would cause this vcpu to wakeup.
1033268935Sjhb		 *
1034268935Sjhb		 * These interrupts/events could have happened after the
1035268935Sjhb		 * vcpu returned from VMRUN() and before it acquired the
1036268935Sjhb		 * vcpu lock above.
1037268935Sjhb		 */
1038268935Sjhb		if (vm->rendezvous_func != NULL || vm->suspend)
1039268935Sjhb			break;
1040268935Sjhb		if (vm_nmi_pending(vm, vcpuid))
1041268935Sjhb			break;
1042268935Sjhb		if (!intr_disabled) {
1043268935Sjhb			if (vm_extint_pending(vm, vcpuid) ||
1044268935Sjhb			    vlapic_pending_intr(vcpu->vlapic, NULL)) {
1045268935Sjhb				break;
1046268935Sjhb			}
1047268935Sjhb		}
1048256072Sneel
1049268935Sjhb		/*
1050268935Sjhb		 * Some Linux guests implement "halt" by having all vcpus
1051268935Sjhb		 * execute HLT with interrupts disabled. 'halted_cpus' keeps
1052268935Sjhb		 * track of the vcpus that have entered this state. When all
1053268935Sjhb		 * vcpus enter the halted state the virtual machine is halted.
1054268935Sjhb		 */
1055268935Sjhb		if (intr_disabled) {
1056268935Sjhb			wmesg = "vmhalt";
1057268935Sjhb			VCPU_CTR0(vm, vcpuid, "Halted");
1058268935Sjhb			if (!vcpu_halted && halt_detection_enabled) {
1059268935Sjhb				vcpu_halted = 1;
1060268935Sjhb				CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
1061268935Sjhb			}
1062268935Sjhb			if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
1063268935Sjhb				vm_halted = 1;
1064268935Sjhb				break;
1065268935Sjhb			}
1066268935Sjhb		} else {
1067268935Sjhb			wmesg = "vmidle";
1068268935Sjhb		}
1069268935Sjhb
1070256072Sneel		t = ticks;
1071256072Sneel		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1072268935Sjhb		msleep_spin(vcpu, &vcpu->mtx, wmesg, 0);
1073256072Sneel		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1074256072Sneel		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
1075256072Sneel	}
1076268935Sjhb
1077268935Sjhb	if (vcpu_halted)
1078268935Sjhb		CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus);
1079268935Sjhb
1080256072Sneel	vcpu_unlock(vcpu);
1081256072Sneel
1082268935Sjhb	if (vm_halted)
1083268935Sjhb		vm_suspend(vm, VM_SUSPEND_HALT);
1084266339Sjhb
1085256072Sneel	return (0);
1086256072Sneel}
1087256072Sneel
1088256072Sneelstatic int
1089262350Sjhbvm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
1090256072Sneel{
1091256072Sneel	int rv, ftype;
1092256072Sneel	struct vm_map *map;
1093256072Sneel	struct vcpu *vcpu;
1094256072Sneel	struct vm_exit *vme;
1095256072Sneel
1096256072Sneel	vcpu = &vm->vcpu[vcpuid];
1097256072Sneel	vme = &vcpu->exitinfo;
1098256072Sneel
1099256072Sneel	ftype = vme->u.paging.fault_type;
1100256072Sneel	KASSERT(ftype == VM_PROT_READ ||
1101256072Sneel	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
1102256072Sneel	    ("vm_handle_paging: invalid fault_type %d", ftype));
1103256072Sneel
1104256072Sneel	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
1105256072Sneel		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
1106256072Sneel		    vme->u.paging.gpa, ftype);
1107256072Sneel		if (rv == 0)
1108256072Sneel			goto done;
1109256072Sneel	}
1110256072Sneel
1111256072Sneel	map = &vm->vmspace->vm_map;
1112256072Sneel	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
1113256072Sneel
1114261088Sjhb	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
1115261088Sjhb	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
1116256072Sneel
1117256072Sneel	if (rv != KERN_SUCCESS)
1118256072Sneel		return (EFAULT);
1119256072Sneeldone:
1120256072Sneel	/* restart execution at the faulting instruction */
1121256072Sneel	vme->inst_length = 0;
1122256072Sneel
1123256072Sneel	return (0);
1124256072Sneel}
1125256072Sneel
1126256072Sneelstatic int
1127262350Sjhbvm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
1128256072Sneel{
1129256072Sneel	struct vie *vie;
1130256072Sneel	struct vcpu *vcpu;
1131256072Sneel	struct vm_exit *vme;
1132268976Sjhb	uint64_t gla, gpa;
1133268976Sjhb	struct vm_guest_paging *paging;
1134261088Sjhb	mem_region_read_t mread;
1135261088Sjhb	mem_region_write_t mwrite;
1136268976Sjhb	int error;
1137256072Sneel
1138256072Sneel	vcpu = &vm->vcpu[vcpuid];
1139256072Sneel	vme = &vcpu->exitinfo;
1140256072Sneel
1141256072Sneel	gla = vme->u.inst_emul.gla;
1142256072Sneel	gpa = vme->u.inst_emul.gpa;
1143256072Sneel	vie = &vme->u.inst_emul.vie;
1144268976Sjhb	paging = &vme->u.inst_emul.paging;
1145256072Sneel
1146256072Sneel	vie_init(vie);
1147256072Sneel
1148256072Sneel	/* Fetch, decode and emulate the faulting instruction */
1149268976Sjhb	error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip,
1150268976Sjhb	    vme->inst_length, vie);
1151268976Sjhb	if (error == 1)
1152268976Sjhb		return (0);		/* Resume guest to handle page fault */
1153268976Sjhb	else if (error == -1)
1154256072Sneel		return (EFAULT);
1155268976Sjhb	else if (error != 0)
1156268976Sjhb		panic("%s: vmm_fetch_instruction error %d", __func__, error);
1157256072Sneel
1158268976Sjhb	if (vmm_decode_instruction(vm, vcpuid, gla, paging->cpu_mode, vie) != 0)
1159256072Sneel		return (EFAULT);
1160256072Sneel
1161261088Sjhb	/* return to userland unless this is an in-kernel emulated device */
1162261088Sjhb	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
1163261088Sjhb		mread = lapic_mmio_read;
1164261088Sjhb		mwrite = lapic_mmio_write;
1165261088Sjhb	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
1166261088Sjhb		mread = vioapic_mmio_read;
1167261088Sjhb		mwrite = vioapic_mmio_write;
1168261088Sjhb	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
1169261088Sjhb		mread = vhpet_mmio_read;
1170261088Sjhb		mwrite = vhpet_mmio_write;
1171261088Sjhb	} else {
1172262350Sjhb		*retu = true;
1173256072Sneel		return (0);
1174256072Sneel	}
1175256072Sneel
1176262350Sjhb	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
1177262350Sjhb	    retu);
1178256072Sneel
1179256072Sneel	return (error);
1180256072Sneel}
1181256072Sneel
1182268935Sjhbstatic int
1183268935Sjhbvm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
1184268935Sjhb{
1185268935Sjhb	int i, done;
1186268935Sjhb	struct vcpu *vcpu;
1187268935Sjhb
1188268935Sjhb	done = 0;
1189268935Sjhb	vcpu = &vm->vcpu[vcpuid];
1190268935Sjhb
1191268935Sjhb	CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus);
1192268935Sjhb
1193268935Sjhb	/*
1194268935Sjhb	 * Wait until all 'active_cpus' have suspended themselves.
1195268935Sjhb	 *
1196268935Sjhb	 * Since a VM may be suspended at any time including when one or
1197268935Sjhb	 * more vcpus are doing a rendezvous we need to call the rendezvous
1198268935Sjhb	 * handler while we are waiting to prevent a deadlock.
1199268935Sjhb	 */
1200268935Sjhb	vcpu_lock(vcpu);
1201268935Sjhb	while (1) {
1202268935Sjhb		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
1203268935Sjhb			VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
1204268935Sjhb			break;
1205268935Sjhb		}
1206268935Sjhb
1207268935Sjhb		if (vm->rendezvous_func == NULL) {
1208268935Sjhb			VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
1209268935Sjhb			vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1210268935Sjhb			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1211268935Sjhb			vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1212268935Sjhb		} else {
1213268935Sjhb			VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
1214268935Sjhb			vcpu_unlock(vcpu);
1215268935Sjhb			vm_handle_rendezvous(vm, vcpuid);
1216268935Sjhb			vcpu_lock(vcpu);
1217268935Sjhb		}
1218268935Sjhb	}
1219268935Sjhb	vcpu_unlock(vcpu);
1220268935Sjhb
1221268935Sjhb	/*
1222268935Sjhb	 * Wakeup the other sleeping vcpus and return to userspace.
1223268935Sjhb	 */
1224268935Sjhb	for (i = 0; i < VM_MAXCPU; i++) {
1225268935Sjhb		if (CPU_ISSET(i, &vm->suspended_cpus)) {
1226268935Sjhb			vcpu_notify_event(vm, i, false);
1227268935Sjhb		}
1228268935Sjhb	}
1229268935Sjhb
1230268935Sjhb	*retu = true;
1231268935Sjhb	return (0);
1232268935Sjhb}
1233268935Sjhb
1234221828Sgrehanint
1235268935Sjhbvm_suspend(struct vm *vm, enum vm_suspend_how how)
1236268935Sjhb{
1237268935Sjhb	int i;
1238268935Sjhb
1239268935Sjhb	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
1240268935Sjhb		return (EINVAL);
1241268935Sjhb
1242268935Sjhb	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
1243268935Sjhb		VM_CTR2(vm, "virtual machine already suspended %d/%d",
1244268935Sjhb		    vm->suspend, how);
1245268935Sjhb		return (EALREADY);
1246268935Sjhb	}
1247268935Sjhb
1248268935Sjhb	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
1249268935Sjhb
1250268935Sjhb	/*
1251268935Sjhb	 * Notify all active vcpus that they are now suspended.
1252268935Sjhb	 */
1253268935Sjhb	for (i = 0; i < VM_MAXCPU; i++) {
1254268935Sjhb		if (CPU_ISSET(i, &vm->active_cpus))
1255268935Sjhb			vcpu_notify_event(vm, i, false);
1256268935Sjhb	}
1257268935Sjhb
1258268935Sjhb	return (0);
1259268935Sjhb}
1260268935Sjhb
1261268935Sjhbvoid
1262268935Sjhbvm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
1263268935Sjhb{
1264268935Sjhb	struct vm_exit *vmexit;
1265268935Sjhb
1266268935Sjhb	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
1267268935Sjhb	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
1268268935Sjhb
1269268935Sjhb	vmexit = vm_exitinfo(vm, vcpuid);
1270268935Sjhb	vmexit->rip = rip;
1271268935Sjhb	vmexit->inst_length = 0;
1272268935Sjhb	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
1273268935Sjhb	vmexit->u.suspended.how = vm->suspend;
1274268935Sjhb}
1275268935Sjhb
1276268935Sjhbint
1277221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun)
1278221828Sgrehan{
1279256072Sneel	int error, vcpuid;
1280221828Sgrehan	struct vcpu *vcpu;
1281221828Sgrehan	struct pcb *pcb;
1282242065Sneel	uint64_t tscval, rip;
1283242065Sneel	struct vm_exit *vme;
1284262350Sjhb	bool retu, intr_disabled;
1285256072Sneel	pmap_t pmap;
1286268935Sjhb	void *rptr, *sptr;
1287221828Sgrehan
1288221828Sgrehan	vcpuid = vmrun->cpuid;
1289221828Sgrehan
1290221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1291221828Sgrehan		return (EINVAL);
1292221828Sgrehan
1293270070Sgrehan	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1294270070Sgrehan		return (EINVAL);
1295270070Sgrehan
1296270070Sgrehan	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1297270070Sgrehan		return (EINVAL);
1298270070Sgrehan
1299268935Sjhb	rptr = &vm->rendezvous_func;
1300268935Sjhb	sptr = &vm->suspend;
1301256072Sneel	pmap = vmspace_pmap(vm->vmspace);
1302221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
1303256072Sneel	vme = &vcpu->exitinfo;
1304242065Sneel	rip = vmrun->rip;
1305242065Sneelrestart:
1306221828Sgrehan	critical_enter();
1307221828Sgrehan
1308256072Sneel	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
1309256072Sneel	    ("vm_run: absurd pm_active"));
1310256072Sneel
1311221828Sgrehan	tscval = rdtsc();
1312221828Sgrehan
1313221828Sgrehan	pcb = PCPU_GET(curpcb);
1314221914Sjhb	set_pcb_flags(pcb, PCB_FULL_IRET);
1315221828Sgrehan
1316234695Sgrehan	restore_guest_msrs(vm, vcpuid);
1317221828Sgrehan	restore_guest_fpustate(vcpu);
1318241489Sneel
1319256072Sneel	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
1320268935Sjhb	error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
1321256072Sneel	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
1322241489Sneel
1323221828Sgrehan	save_guest_fpustate(vcpu);
1324221828Sgrehan	restore_host_msrs(vm, vcpuid);
1325221828Sgrehan
1326221828Sgrehan	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
1327221828Sgrehan
1328221828Sgrehan	critical_exit();
1329221828Sgrehan
1330256072Sneel	if (error == 0) {
1331262350Sjhb		retu = false;
1332256072Sneel		switch (vme->exitcode) {
1333268935Sjhb		case VM_EXITCODE_SUSPENDED:
1334268935Sjhb			error = vm_handle_suspend(vm, vcpuid, &retu);
1335268935Sjhb			break;
1336266339Sjhb		case VM_EXITCODE_IOAPIC_EOI:
1337266339Sjhb			vioapic_process_eoi(vm, vcpuid,
1338266339Sjhb			    vme->u.ioapic_eoi.vector);
1339266339Sjhb			break;
1340266339Sjhb		case VM_EXITCODE_RENDEZVOUS:
1341266339Sjhb			vm_handle_rendezvous(vm, vcpuid);
1342266339Sjhb			error = 0;
1343266339Sjhb			break;
1344256072Sneel		case VM_EXITCODE_HLT:
1345262350Sjhb			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
1346262350Sjhb			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
1347256072Sneel			break;
1348256072Sneel		case VM_EXITCODE_PAGING:
1349256072Sneel			error = vm_handle_paging(vm, vcpuid, &retu);
1350256072Sneel			break;
1351256072Sneel		case VM_EXITCODE_INST_EMUL:
1352256072Sneel			error = vm_handle_inst_emul(vm, vcpuid, &retu);
1353256072Sneel			break;
1354268976Sjhb		case VM_EXITCODE_INOUT:
1355268976Sjhb		case VM_EXITCODE_INOUT_STR:
1356268976Sjhb			error = vm_handle_inout(vm, vcpuid, vme, &retu);
1357268976Sjhb			break;
1358256072Sneel		default:
1359262350Sjhb			retu = true;	/* handled in userland */
1360256072Sneel			break;
1361242065Sneel		}
1362256072Sneel	}
1363242065Sneel
1364262350Sjhb	if (error == 0 && retu == false) {
1365242065Sneel		rip = vme->rip + vme->inst_length;
1366242065Sneel		goto restart;
1367242065Sneel	}
1368242065Sneel
1369256072Sneel	/* copy the exit information */
1370256072Sneel	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
1371221828Sgrehan	return (error);
1372221828Sgrehan}
1373221828Sgrehan
1374221828Sgrehanint
1375267427Sjhbvm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
1376221828Sgrehan{
1377267427Sjhb	struct vcpu *vcpu;
1378267427Sjhb
1379221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1380221828Sgrehan		return (EINVAL);
1381221828Sgrehan
1382267427Sjhb	if (exception->vector < 0 || exception->vector >= 32)
1383221828Sgrehan		return (EINVAL);
1384221828Sgrehan
1385267427Sjhb	vcpu = &vm->vcpu[vcpuid];
1386221828Sgrehan
1387267427Sjhb	if (vcpu->exception_pending) {
1388267427Sjhb		VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
1389267427Sjhb		    "pending exception %d", exception->vector,
1390267427Sjhb		    vcpu->exception.vector);
1391267427Sjhb		return (EBUSY);
1392267427Sjhb	}
1393267427Sjhb
1394267427Sjhb	vcpu->exception_pending = 1;
1395267427Sjhb	vcpu->exception = *exception;
1396267427Sjhb	VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
1397267427Sjhb	return (0);
1398221828Sgrehan}
1399221828Sgrehan
1400267427Sjhbint
1401267427Sjhbvm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception)
1402267427Sjhb{
1403267427Sjhb	struct vcpu *vcpu;
1404267427Sjhb	int pending;
1405267427Sjhb
1406267427Sjhb	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
1407267427Sjhb
1408267427Sjhb	vcpu = &vm->vcpu[vcpuid];
1409267427Sjhb	pending = vcpu->exception_pending;
1410267427Sjhb	if (pending) {
1411267427Sjhb		vcpu->exception_pending = 0;
1412267427Sjhb		*exception = vcpu->exception;
1413267427Sjhb		VCPU_CTR1(vm, vcpuid, "Exception %d delivered",
1414267427Sjhb		    exception->vector);
1415267427Sjhb	}
1416267427Sjhb	return (pending);
1417267427Sjhb}
1418267427Sjhb
1419267427Sjhbstatic void
1420267427Sjhbvm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception)
1421267427Sjhb{
1422267427Sjhb	struct vm_exit *vmexit;
1423267427Sjhb	int error;
1424267427Sjhb
1425267427Sjhb	error = vm_inject_exception(vm, vcpuid, exception);
1426267427Sjhb	KASSERT(error == 0, ("vm_inject_exception error %d", error));
1427267427Sjhb
1428267427Sjhb	/*
1429267427Sjhb	 * A fault-like exception allows the instruction to be restarted
1430267427Sjhb	 * after the exception handler returns.
1431267427Sjhb	 *
1432267427Sjhb	 * By setting the inst_length to 0 we ensure that the instruction
1433267427Sjhb	 * pointer remains at the faulting instruction.
1434267427Sjhb	 */
1435267427Sjhb	vmexit = vm_exitinfo(vm, vcpuid);
1436267427Sjhb	vmexit->inst_length = 0;
1437267427Sjhb}
1438267427Sjhb
1439267427Sjhbvoid
1440268976Sjhbvm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2)
1441268976Sjhb{
1442268976Sjhb	struct vm_exception pf = {
1443268976Sjhb		.vector = IDT_PF,
1444268976Sjhb		.error_code_valid = 1,
1445268976Sjhb		.error_code = error_code
1446268976Sjhb	};
1447268976Sjhb	int error;
1448268976Sjhb
1449268976Sjhb	VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
1450268976Sjhb	    error_code, cr2);
1451268976Sjhb
1452268976Sjhb	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
1453268976Sjhb	KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
1454268976Sjhb
1455268976Sjhb	vm_inject_fault(vm, vcpuid, &pf);
1456268976Sjhb}
1457268976Sjhb
1458268976Sjhbvoid
1459267427Sjhbvm_inject_gp(struct vm *vm, int vcpuid)
1460267427Sjhb{
1461267427Sjhb	struct vm_exception gpf = {
1462267427Sjhb		.vector = IDT_GP,
1463267427Sjhb		.error_code_valid = 1,
1464267427Sjhb		.error_code = 0
1465267427Sjhb	};
1466267427Sjhb
1467267427Sjhb	vm_inject_fault(vm, vcpuid, &gpf);
1468267427Sjhb}
1469267427Sjhb
1470267427Sjhbvoid
1471267427Sjhbvm_inject_ud(struct vm *vm, int vcpuid)
1472267427Sjhb{
1473267427Sjhb	struct vm_exception udf = {
1474267427Sjhb		.vector = IDT_UD,
1475267427Sjhb		.error_code_valid = 0
1476267427Sjhb	};
1477267427Sjhb
1478267427Sjhb	vm_inject_fault(vm, vcpuid, &udf);
1479267427Sjhb}
1480267427Sjhb
1481248389Sneelstatic VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
1482241982Sneel
1483221828Sgrehanint
1484241982Sneelvm_inject_nmi(struct vm *vm, int vcpuid)
1485221828Sgrehan{
1486241982Sneel	struct vcpu *vcpu;
1487221828Sgrehan
1488241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1489221828Sgrehan		return (EINVAL);
1490221828Sgrehan
1491241982Sneel	vcpu = &vm->vcpu[vcpuid];
1492241982Sneel
1493241982Sneel	vcpu->nmi_pending = 1;
1494266339Sjhb	vcpu_notify_event(vm, vcpuid, false);
1495241982Sneel	return (0);
1496221828Sgrehan}
1497221828Sgrehan
1498221828Sgrehanint
1499241982Sneelvm_nmi_pending(struct vm *vm, int vcpuid)
1500241982Sneel{
1501241982Sneel	struct vcpu *vcpu;
1502241982Sneel
1503241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1504241982Sneel		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1505241982Sneel
1506241982Sneel	vcpu = &vm->vcpu[vcpuid];
1507241982Sneel
1508241982Sneel	return (vcpu->nmi_pending);
1509241982Sneel}
1510241982Sneel
1511241982Sneelvoid
1512241982Sneelvm_nmi_clear(struct vm *vm, int vcpuid)
1513241982Sneel{
1514241982Sneel	struct vcpu *vcpu;
1515241982Sneel
1516241982Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1517241982Sneel		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1518241982Sneel
1519241982Sneel	vcpu = &vm->vcpu[vcpuid];
1520241982Sneel
1521241982Sneel	if (vcpu->nmi_pending == 0)
1522241982Sneel		panic("vm_nmi_clear: inconsistent nmi_pending state");
1523241982Sneel
1524241982Sneel	vcpu->nmi_pending = 0;
1525241982Sneel	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
1526241982Sneel}
1527241982Sneel
1528268891Sjhbstatic VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu");
1529268891Sjhb
1530241982Sneelint
1531268891Sjhbvm_inject_extint(struct vm *vm, int vcpuid)
1532268891Sjhb{
1533268891Sjhb	struct vcpu *vcpu;
1534268891Sjhb
1535268891Sjhb	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1536268891Sjhb		return (EINVAL);
1537268891Sjhb
1538268891Sjhb	vcpu = &vm->vcpu[vcpuid];
1539268891Sjhb
1540268891Sjhb	vcpu->extint_pending = 1;
1541268891Sjhb	vcpu_notify_event(vm, vcpuid, false);
1542268891Sjhb	return (0);
1543268891Sjhb}
1544268891Sjhb
1545268891Sjhbint
1546268891Sjhbvm_extint_pending(struct vm *vm, int vcpuid)
1547268891Sjhb{
1548268891Sjhb	struct vcpu *vcpu;
1549268891Sjhb
1550268891Sjhb	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1551268891Sjhb		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
1552268891Sjhb
1553268891Sjhb	vcpu = &vm->vcpu[vcpuid];
1554268891Sjhb
1555268891Sjhb	return (vcpu->extint_pending);
1556268891Sjhb}
1557268891Sjhb
1558268891Sjhbvoid
1559268891Sjhbvm_extint_clear(struct vm *vm, int vcpuid)
1560268891Sjhb{
1561268891Sjhb	struct vcpu *vcpu;
1562268891Sjhb
1563268891Sjhb	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1564268891Sjhb		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
1565268891Sjhb
1566268891Sjhb	vcpu = &vm->vcpu[vcpuid];
1567268891Sjhb
1568268891Sjhb	if (vcpu->extint_pending == 0)
1569268891Sjhb		panic("vm_extint_clear: inconsistent extint_pending state");
1570268891Sjhb
1571268891Sjhb	vcpu->extint_pending = 0;
1572268891Sjhb	vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
1573268891Sjhb}
1574268891Sjhb
1575268891Sjhbint
1576221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
1577221828Sgrehan{
1578221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1579221828Sgrehan		return (EINVAL);
1580221828Sgrehan
1581221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
1582221828Sgrehan		return (EINVAL);
1583221828Sgrehan
1584221828Sgrehan	return (VMGETCAP(vm->cookie, vcpu, type, retval));
1585221828Sgrehan}
1586221828Sgrehan
1587221828Sgrehanint
1588221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val)
1589221828Sgrehan{
1590221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
1591221828Sgrehan		return (EINVAL);
1592221828Sgrehan
1593221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
1594221828Sgrehan		return (EINVAL);
1595221828Sgrehan
1596221828Sgrehan	return (VMSETCAP(vm->cookie, vcpu, type, val));
1597221828Sgrehan}
1598221828Sgrehan
1599221828Sgrehanuint64_t *
1600221828Sgrehanvm_guest_msrs(struct vm *vm, int cpu)
1601221828Sgrehan{
1602221828Sgrehan	return (vm->vcpu[cpu].guest_msrs);
1603221828Sgrehan}
1604221828Sgrehan
1605221828Sgrehanstruct vlapic *
1606221828Sgrehanvm_lapic(struct vm *vm, int cpu)
1607221828Sgrehan{
1608221828Sgrehan	return (vm->vcpu[cpu].vlapic);
1609221828Sgrehan}
1610221828Sgrehan
1611261088Sjhbstruct vioapic *
1612261088Sjhbvm_ioapic(struct vm *vm)
1613261088Sjhb{
1614261088Sjhb
1615261088Sjhb	return (vm->vioapic);
1616261088Sjhb}
1617261088Sjhb
1618261088Sjhbstruct vhpet *
1619261088Sjhbvm_hpet(struct vm *vm)
1620261088Sjhb{
1621261088Sjhb
1622261088Sjhb	return (vm->vhpet);
1623261088Sjhb}
1624261088Sjhb
1625221828Sgrehanboolean_t
1626221828Sgrehanvmm_is_pptdev(int bus, int slot, int func)
1627221828Sgrehan{
1628246188Sneel	int found, i, n;
1629246188Sneel	int b, s, f;
1630221828Sgrehan	char *val, *cp, *cp2;
1631221828Sgrehan
1632221828Sgrehan	/*
1633246188Sneel	 * XXX
1634246188Sneel	 * The length of an environment variable is limited to 128 bytes which
1635246188Sneel	 * puts an upper limit on the number of passthru devices that may be
1636246188Sneel	 * specified using a single environment variable.
1637246188Sneel	 *
1638246188Sneel	 * Work around this by scanning multiple environment variable
1639246188Sneel	 * names instead of a single one - yuck!
1640221828Sgrehan	 */
1641246188Sneel	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
1642246188Sneel
1643246188Sneel	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
1644221828Sgrehan	found = 0;
1645246188Sneel	for (i = 0; names[i] != NULL && !found; i++) {
1646246188Sneel		cp = val = getenv(names[i]);
1647246188Sneel		while (cp != NULL && *cp != '\0') {
1648246188Sneel			if ((cp2 = strchr(cp, ' ')) != NULL)
1649246188Sneel				*cp2 = '\0';
1650221828Sgrehan
1651246188Sneel			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
1652246188Sneel			if (n == 3 && bus == b && slot == s && func == f) {
1653246188Sneel				found = 1;
1654246188Sneel				break;
1655246188Sneel			}
1656221828Sgrehan
1657246188Sneel			if (cp2 != NULL)
1658246188Sneel				*cp2++ = ' ';
1659221828Sgrehan
1660246188Sneel			cp = cp2;
1661246188Sneel		}
1662246188Sneel		freeenv(val);
1663221828Sgrehan	}
1664221828Sgrehan	return (found);
1665221828Sgrehan}
1666221828Sgrehan
1667221828Sgrehanvoid *
1668221828Sgrehanvm_iommu_domain(struct vm *vm)
1669221828Sgrehan{
1670221828Sgrehan
1671221828Sgrehan	return (vm->iommu);
1672221828Sgrehan}
1673221828Sgrehan
1674241489Sneelint
1675266393Sjhbvcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
1676266393Sjhb    bool from_idle)
1677221828Sgrehan{
1678241489Sneel	int error;
1679221828Sgrehan	struct vcpu *vcpu;
1680221828Sgrehan
1681221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1682221828Sgrehan		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
1683221828Sgrehan
1684221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
1685221828Sgrehan
1686241489Sneel	vcpu_lock(vcpu);
1687266393Sjhb	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
1688241489Sneel	vcpu_unlock(vcpu);
1689241489Sneel
1690241489Sneel	return (error);
1691221828Sgrehan}
1692221828Sgrehan
1693241489Sneelenum vcpu_state
1694249879Sgrehanvcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
1695221828Sgrehan{
1696221828Sgrehan	struct vcpu *vcpu;
1697241489Sneel	enum vcpu_state state;
1698221828Sgrehan
1699221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1700221828Sgrehan		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
1701221828Sgrehan
1702221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
1703221828Sgrehan
1704241489Sneel	vcpu_lock(vcpu);
1705241489Sneel	state = vcpu->state;
1706249879Sgrehan	if (hostcpu != NULL)
1707249879Sgrehan		*hostcpu = vcpu->hostcpu;
1708241489Sneel	vcpu_unlock(vcpu);
1709221828Sgrehan
1710241489Sneel	return (state);
1711221828Sgrehan}
1712221828Sgrehan
1713270070Sgrehanint
1714221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid)
1715221828Sgrehan{
1716221828Sgrehan
1717270070Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1718270070Sgrehan		return (EINVAL);
1719266339Sjhb
1720270070Sgrehan	if (CPU_ISSET(vcpuid, &vm->active_cpus))
1721270070Sgrehan		return (EBUSY);
1722270070Sgrehan
1723266339Sjhb	VCPU_CTR0(vm, vcpuid, "activated");
1724266339Sjhb	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
1725270070Sgrehan	return (0);
1726221828Sgrehan}
1727221828Sgrehan
1728223621Sgrehancpuset_t
1729221828Sgrehanvm_active_cpus(struct vm *vm)
1730221828Sgrehan{
1731221828Sgrehan
1732221828Sgrehan	return (vm->active_cpus);
1733221828Sgrehan}
1734221828Sgrehan
1735270070Sgrehancpuset_t
1736270070Sgrehanvm_suspended_cpus(struct vm *vm)
1737270070Sgrehan{
1738270070Sgrehan
1739270070Sgrehan	return (vm->suspended_cpus);
1740270070Sgrehan}
1741270070Sgrehan
1742221828Sgrehanvoid *
1743221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid)
1744221828Sgrehan{
1745221828Sgrehan
1746221828Sgrehan	return (vm->vcpu[vcpuid].stats);
1747221828Sgrehan}
1748240922Sneel
1749240922Sneelint
1750240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
1751240922Sneel{
1752240922Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1753240922Sneel		return (EINVAL);
1754240922Sneel
1755240922Sneel	*state = vm->vcpu[vcpuid].x2apic_state;
1756240922Sneel
1757240922Sneel	return (0);
1758240922Sneel}
1759240922Sneel
1760240922Sneelint
1761240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1762240922Sneel{
1763240922Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1764240922Sneel		return (EINVAL);
1765240922Sneel
1766248392Sneel	if (state >= X2APIC_STATE_LAST)
1767240922Sneel		return (EINVAL);
1768240922Sneel
1769240922Sneel	vm->vcpu[vcpuid].x2apic_state = state;
1770240922Sneel
1771240943Sneel	vlapic_set_x2apic_state(vm, vcpuid, state);
1772240943Sneel
1773240922Sneel	return (0);
1774240922Sneel}
1775241489Sneel
1776262350Sjhb/*
1777262350Sjhb * This function is called to ensure that a vcpu "sees" a pending event
1778262350Sjhb * as soon as possible:
1779262350Sjhb * - If the vcpu thread is sleeping then it is woken up.
1780262350Sjhb * - If the vcpu is running on a different host_cpu then an IPI will be directed
1781262350Sjhb *   to the host_cpu to cause the vcpu to trap into the hypervisor.
1782262350Sjhb */
1783241489Sneelvoid
1784266339Sjhbvcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
1785241489Sneel{
1786241489Sneel	int hostcpu;
1787241489Sneel	struct vcpu *vcpu;
1788241489Sneel
1789241489Sneel	vcpu = &vm->vcpu[vcpuid];
1790241489Sneel
1791242065Sneel	vcpu_lock(vcpu);
1792241489Sneel	hostcpu = vcpu->hostcpu;
1793266393Sjhb	if (vcpu->state == VCPU_RUNNING) {
1794266393Sjhb		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
1795266339Sjhb		if (hostcpu != curcpu) {
1796266393Sjhb			if (lapic_intr) {
1797266339Sjhb				vlapic_post_intr(vcpu->vlapic, hostcpu,
1798266339Sjhb				    vmm_ipinum);
1799266393Sjhb			} else {
1800266339Sjhb				ipi_cpu(hostcpu, vmm_ipinum);
1801266393Sjhb			}
1802266393Sjhb		} else {
1803266393Sjhb			/*
1804266393Sjhb			 * If the 'vcpu' is running on 'curcpu' then it must
1805266393Sjhb			 * be sending a notification to itself (e.g. SELF_IPI).
1806266393Sjhb			 * The pending event will be picked up when the vcpu
1807266393Sjhb			 * transitions back to guest context.
1808266393Sjhb			 */
1809266339Sjhb		}
1810266393Sjhb	} else {
1811266393Sjhb		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
1812266393Sjhb		    "with hostcpu %d", vcpu->state, hostcpu));
1813266393Sjhb		if (vcpu->state == VCPU_SLEEPING)
1814266393Sjhb			wakeup_one(vcpu);
1815242065Sneel	}
1816242065Sneel	vcpu_unlock(vcpu);
1817241489Sneel}
1818256072Sneel
1819256072Sneelstruct vmspace *
1820256072Sneelvm_get_vmspace(struct vm *vm)
1821256072Sneel{
1822256072Sneel
1823256072Sneel	return (vm->vmspace);
1824256072Sneel}
1825261088Sjhb
1826261088Sjhbint
1827261088Sjhbvm_apicid2vcpuid(struct vm *vm, int apicid)
1828261088Sjhb{
1829261088Sjhb	/*
1830261088Sjhb	 * XXX apic id is assumed to be numerically identical to vcpu id
1831261088Sjhb	 */
1832261088Sjhb	return (apicid);
1833261088Sjhb}
1834266339Sjhb
1835266339Sjhbvoid
1836266339Sjhbvm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
1837266339Sjhb    vm_rendezvous_func_t func, void *arg)
1838266339Sjhb{
1839266339Sjhb	int i;
1840266339Sjhb
1841266339Sjhb	/*
1842266339Sjhb	 * Enforce that this function is called without any locks
1843266339Sjhb	 */
1844266339Sjhb	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
1845266339Sjhb	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
1846266339Sjhb	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
1847266339Sjhb
1848266339Sjhbrestart:
1849266339Sjhb	mtx_lock(&vm->rendezvous_mtx);
1850266339Sjhb	if (vm->rendezvous_func != NULL) {
1851266339Sjhb		/*
1852266339Sjhb		 * If a rendezvous is already in progress then we need to
1853266339Sjhb		 * call the rendezvous handler in case this 'vcpuid' is one
1854266339Sjhb		 * of the targets of the rendezvous.
1855266339Sjhb		 */
1856266339Sjhb		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
1857266339Sjhb		mtx_unlock(&vm->rendezvous_mtx);
1858266339Sjhb		vm_handle_rendezvous(vm, vcpuid);
1859266339Sjhb		goto restart;
1860266339Sjhb	}
1861266339Sjhb	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
1862266339Sjhb	    "rendezvous is still in progress"));
1863266339Sjhb
1864266339Sjhb	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
1865266339Sjhb	vm->rendezvous_req_cpus = dest;
1866266339Sjhb	CPU_ZERO(&vm->rendezvous_done_cpus);
1867266339Sjhb	vm->rendezvous_arg = arg;
1868266339Sjhb	vm_set_rendezvous_func(vm, func);
1869266339Sjhb	mtx_unlock(&vm->rendezvous_mtx);
1870266339Sjhb
1871266339Sjhb	/*
1872266339Sjhb	 * Wake up any sleeping vcpus and trigger a VM-exit in any running
1873266339Sjhb	 * vcpus so they handle the rendezvous as soon as possible.
1874266339Sjhb	 */
1875266339Sjhb	for (i = 0; i < VM_MAXCPU; i++) {
1876266339Sjhb		if (CPU_ISSET(i, &dest))
1877266339Sjhb			vcpu_notify_event(vm, i, false);
1878266339Sjhb	}
1879266339Sjhb
1880266339Sjhb	vm_handle_rendezvous(vm, vcpuid);
1881266339Sjhb}
1882268891Sjhb
1883268891Sjhbstruct vatpic *
1884268891Sjhbvm_atpic(struct vm *vm)
1885268891Sjhb{
1886268891Sjhb	return (vm->vatpic);
1887268891Sjhb}
1888268891Sjhb
1889268891Sjhbstruct vatpit *
1890268891Sjhbvm_atpit(struct vm *vm)
1891268891Sjhb{
1892268891Sjhb	return (vm->vatpit);
1893268891Sjhb}
1894268976Sjhb
1895268976Sjhbenum vm_reg_name
1896268976Sjhbvm_segment_name(int seg)
1897268976Sjhb{
1898268976Sjhb	static enum vm_reg_name seg_names[] = {
1899268976Sjhb		VM_REG_GUEST_ES,
1900268976Sjhb		VM_REG_GUEST_CS,
1901268976Sjhb		VM_REG_GUEST_SS,
1902268976Sjhb		VM_REG_GUEST_DS,
1903268976Sjhb		VM_REG_GUEST_FS,
1904268976Sjhb		VM_REG_GUEST_GS
1905268976Sjhb	};
1906268976Sjhb
1907268976Sjhb	KASSERT(seg >= 0 && seg < nitems(seg_names),
1908268976Sjhb	    ("%s: invalid segment encoding %d", __func__, seg));
1909268976Sjhb	return (seg_names[seg]);
1910268976Sjhb}
1911