vmm.c revision 223621
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33221828Sgrehan#include <sys/kernel.h>
34221828Sgrehan#include <sys/module.h>
35221828Sgrehan#include <sys/sysctl.h>
36221828Sgrehan#include <sys/malloc.h>
37221828Sgrehan#include <sys/pcpu.h>
38221828Sgrehan#include <sys/lock.h>
39221828Sgrehan#include <sys/mutex.h>
40221828Sgrehan#include <sys/proc.h>
41221828Sgrehan#include <sys/sched.h>
42221828Sgrehan#include <sys/smp.h>
43221828Sgrehan#include <sys/systm.h>
44221828Sgrehan
45221828Sgrehan#include <vm/vm.h>
46221828Sgrehan
47221828Sgrehan#include <machine/vm.h>
48221828Sgrehan#include <machine/pcb.h>
49221914Sjhb#include <x86/apicreg.h>
50221828Sgrehan
51221828Sgrehan#include <machine/vmm.h>
52221828Sgrehan#include "vmm_mem.h"
53221828Sgrehan#include "vmm_util.h"
54221828Sgrehan#include <machine/vmm_dev.h>
55221828Sgrehan#include "vlapic.h"
56221828Sgrehan#include "vmm_msr.h"
57221828Sgrehan#include "vmm_ipi.h"
58221828Sgrehan#include "vmm_stat.h"
59221828Sgrehan
60221828Sgrehan#include "io/ppt.h"
61221828Sgrehan#include "io/iommu.h"
62221828Sgrehan
63221828Sgrehanstruct vlapic;
64221828Sgrehan
65221828Sgrehanstruct vcpu {
66221828Sgrehan	int		flags;
67221828Sgrehan	int		pincpu;		/* host cpuid this vcpu is bound to */
68221828Sgrehan	int		hostcpu;	/* host cpuid this vcpu last ran on */
69221828Sgrehan	uint64_t	guest_msrs[VMM_MSR_NUM];
70221828Sgrehan	struct vlapic	*vlapic;
71221828Sgrehan	int		 vcpuid;
72221828Sgrehan	struct savefpu	savefpu;	/* guest fpu state */
73221828Sgrehan	void		*stats;
74221828Sgrehan};
75221828Sgrehan#define	VCPU_F_PINNED	0x0001
76221828Sgrehan#define	VCPU_F_RUNNING	0x0002
77221828Sgrehan
78221828Sgrehan#define	VCPU_PINCPU(vm, vcpuid)	\
79221828Sgrehan    ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
80221828Sgrehan
81221828Sgrehan#define	VCPU_UNPIN(vm, vcpuid)	(vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
82221828Sgrehan
83221828Sgrehan#define	VCPU_PIN(vm, vcpuid, host_cpuid)				\
84221828Sgrehando {									\
85221828Sgrehan	vm->vcpu[vcpuid].flags |= VCPU_F_PINNED;			\
86221828Sgrehan	vm->vcpu[vcpuid].pincpu = host_cpuid;				\
87221828Sgrehan} while(0)
88221828Sgrehan
89221828Sgrehan#define	VM_MAX_MEMORY_SEGMENTS	2
90221828Sgrehan
91221828Sgrehanstruct vm {
92221828Sgrehan	void		*cookie;	/* processor-specific data */
93221828Sgrehan	void		*iommu;		/* iommu-specific data */
94221828Sgrehan	struct vcpu	vcpu[VM_MAXCPU];
95221828Sgrehan	int		num_mem_segs;
96221828Sgrehan	struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
97221828Sgrehan	char		name[VM_MAX_NAMELEN];
98221828Sgrehan
99221828Sgrehan	/*
100223621Sgrehan	 * Set of active vcpus.
101221828Sgrehan	 * An active vcpu is one that has been started implicitly (BSP) or
102221828Sgrehan	 * explicitly (AP) by sending it a startup ipi.
103221828Sgrehan	 */
104223621Sgrehan	cpuset_t	active_cpus;
105221828Sgrehan};
106221828Sgrehan
107221828Sgrehanstatic struct vmm_ops *ops;
108221828Sgrehan#define	VMM_INIT()	(ops != NULL ? (*ops->init)() : 0)
109221828Sgrehan#define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
110221828Sgrehan
111221828Sgrehan#define	VMINIT(vm)	(ops != NULL ? (*ops->vminit)(vm): NULL)
112221828Sgrehan#define	VMRUN(vmi, vcpu, rip, vmexit) \
113221828Sgrehan	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, vmexit) : ENXIO)
114221828Sgrehan#define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
115221828Sgrehan#define	VMMMAP(vmi, gpa, hpa, len, attr, prot, spm)	\
116221828Sgrehan    (ops != NULL ? (*ops->vmmmap)(vmi, gpa, hpa, len, attr, prot, spm) : ENXIO)
117221828Sgrehan#define	VMGETREG(vmi, vcpu, num, retval)		\
118221828Sgrehan	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
119221828Sgrehan#define	VMSETREG(vmi, vcpu, num, val)		\
120221828Sgrehan	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
121221828Sgrehan#define	VMGETDESC(vmi, vcpu, num, desc)		\
122221828Sgrehan	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
123221828Sgrehan#define	VMSETDESC(vmi, vcpu, num, desc)		\
124221828Sgrehan	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
125221828Sgrehan#define	VMINJECT(vmi, vcpu, type, vec, ec, ecv)	\
126221828Sgrehan	(ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
127221828Sgrehan#define	VMNMI(vmi, vcpu)	\
128221828Sgrehan	(ops != NULL ? (*ops->vmnmi)(vmi, vcpu) : ENXIO)
129221828Sgrehan#define	VMGETCAP(vmi, vcpu, num, retval)	\
130221828Sgrehan	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
131221828Sgrehan#define	VMSETCAP(vmi, vcpu, num, val)		\
132221828Sgrehan	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
133221828Sgrehan
134221828Sgrehan#define	fxrstor(addr)		__asm("fxrstor %0" : : "m" (*(addr)))
135221828Sgrehan#define	fxsave(addr)		__asm __volatile("fxsave %0" : "=m" (*(addr)))
136221828Sgrehan#define	fpu_start_emulating()	__asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
137221828Sgrehan				      : : "n" (CR0_TS) : "ax")
138221828Sgrehan#define	fpu_stop_emulating()	__asm("clts")
139221828Sgrehan
140221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm");
141221828SgrehanCTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
142221828Sgrehan
143221828Sgrehan/* statistics */
144221828Sgrehanstatic VMM_STAT_DEFINE(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
145221828Sgrehan
146221828Sgrehanstatic void
147221828Sgrehanvcpu_cleanup(struct vcpu *vcpu)
148221828Sgrehan{
149221828Sgrehan	vlapic_cleanup(vcpu->vlapic);
150221828Sgrehan	vmm_stat_free(vcpu->stats);
151221828Sgrehan}
152221828Sgrehan
153221828Sgrehanstatic void
154221828Sgrehanvcpu_init(struct vm *vm, uint32_t vcpu_id)
155221828Sgrehan{
156221828Sgrehan	struct vcpu *vcpu;
157221828Sgrehan
158221828Sgrehan	vcpu = &vm->vcpu[vcpu_id];
159221828Sgrehan
160221828Sgrehan	vcpu->hostcpu = -1;
161221828Sgrehan	vcpu->vcpuid = vcpu_id;
162221828Sgrehan	vcpu->vlapic = vlapic_init(vm, vcpu_id);
163221914Sjhb	fpugetregs(curthread);
164221914Sjhb	vcpu->savefpu = curthread->td_pcb->pcb_user_save;
165221828Sgrehan	vcpu->stats = vmm_stat_alloc();
166221828Sgrehan}
167221828Sgrehan
168221828Sgrehanstatic int
169221828Sgrehanvmm_init(void)
170221828Sgrehan{
171221828Sgrehan	int error;
172221828Sgrehan
173221828Sgrehan	vmm_ipi_init();
174221828Sgrehan
175221828Sgrehan	error = vmm_mem_init();
176221828Sgrehan	if (error)
177221828Sgrehan		return (error);
178221828Sgrehan
179221828Sgrehan	if (vmm_is_intel())
180221828Sgrehan		ops = &vmm_ops_intel;
181221828Sgrehan	else if (vmm_is_amd())
182221828Sgrehan		ops = &vmm_ops_amd;
183221828Sgrehan	else
184221828Sgrehan		return (ENXIO);
185221828Sgrehan
186221828Sgrehan	vmm_msr_init();
187221828Sgrehan
188221828Sgrehan	return (VMM_INIT());
189221828Sgrehan}
190221828Sgrehan
191221828Sgrehanstatic int
192221828Sgrehanvmm_handler(module_t mod, int what, void *arg)
193221828Sgrehan{
194221828Sgrehan	int error;
195221828Sgrehan
196221828Sgrehan	switch (what) {
197221828Sgrehan	case MOD_LOAD:
198221828Sgrehan		vmmdev_init();
199221828Sgrehan		iommu_init();
200221828Sgrehan		error = vmm_init();
201221828Sgrehan		break;
202221828Sgrehan	case MOD_UNLOAD:
203221828Sgrehan		vmmdev_cleanup();
204221828Sgrehan		iommu_cleanup();
205221828Sgrehan		vmm_ipi_cleanup();
206221828Sgrehan		error = VMM_CLEANUP();
207221828Sgrehan		break;
208221828Sgrehan	default:
209221828Sgrehan		error = 0;
210221828Sgrehan		break;
211221828Sgrehan	}
212221828Sgrehan	return (error);
213221828Sgrehan}
214221828Sgrehan
215221828Sgrehanstatic moduledata_t vmm_kmod = {
216221828Sgrehan	"vmm",
217221828Sgrehan	vmm_handler,
218221828Sgrehan	NULL
219221828Sgrehan};
220221828Sgrehan
221221828Sgrehan/*
222221828Sgrehan * Execute the module load handler after the pci passthru driver has had
223221828Sgrehan * a chance to claim devices. We need this information at the time we do
224221828Sgrehan * iommu initialization.
225221828Sgrehan */
226221828SgrehanDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_CONFIGURE + 1, SI_ORDER_ANY);
227221828SgrehanMODULE_VERSION(vmm, 1);
228221828Sgrehan
229221828SgrehanSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
230221828Sgrehan
231221828Sgrehanstruct vm *
232221828Sgrehanvm_create(const char *name)
233221828Sgrehan{
234221828Sgrehan	int i;
235221828Sgrehan	struct vm *vm;
236221828Sgrehan	vm_paddr_t maxaddr;
237221828Sgrehan
238221828Sgrehan	const int BSP = 0;
239221828Sgrehan
240221828Sgrehan	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
241221828Sgrehan		return (NULL);
242221828Sgrehan
243221828Sgrehan	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
244221828Sgrehan	strcpy(vm->name, name);
245221828Sgrehan	vm->cookie = VMINIT(vm);
246221828Sgrehan
247221828Sgrehan	for (i = 0; i < VM_MAXCPU; i++) {
248221828Sgrehan		vcpu_init(vm, i);
249221828Sgrehan		guest_msrs_init(vm, i);
250221828Sgrehan	}
251221828Sgrehan
252221828Sgrehan	maxaddr = vmm_mem_maxaddr();
253221828Sgrehan	vm->iommu = iommu_create_domain(maxaddr);
254221828Sgrehan	vm_activate_cpu(vm, BSP);
255221828Sgrehan
256221828Sgrehan	return (vm);
257221828Sgrehan}
258221828Sgrehan
259221828Sgrehanvoid
260221828Sgrehanvm_destroy(struct vm *vm)
261221828Sgrehan{
262221828Sgrehan	int i;
263221828Sgrehan
264221828Sgrehan	ppt_unassign_all(vm);
265221828Sgrehan
266221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++)
267221828Sgrehan		vmm_mem_free(vm->mem_segs[i].hpa, vm->mem_segs[i].len);
268221828Sgrehan
269221828Sgrehan	for (i = 0; i < VM_MAXCPU; i++)
270221828Sgrehan		vcpu_cleanup(&vm->vcpu[i]);
271221828Sgrehan
272221828Sgrehan	iommu_destroy_domain(vm->iommu);
273221828Sgrehan
274221828Sgrehan	VMCLEANUP(vm->cookie);
275221828Sgrehan
276221828Sgrehan	free(vm, M_VM);
277221828Sgrehan}
278221828Sgrehan
279221828Sgrehanconst char *
280221828Sgrehanvm_name(struct vm *vm)
281221828Sgrehan{
282221828Sgrehan	return (vm->name);
283221828Sgrehan}
284221828Sgrehan
285221828Sgrehanint
286221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
287221828Sgrehan{
288221828Sgrehan	const boolean_t spok = TRUE;	/* superpage mappings are ok */
289221828Sgrehan
290221828Sgrehan	return (VMMMAP(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
291221828Sgrehan		       VM_PROT_RW, spok));
292221828Sgrehan}
293221828Sgrehan
294221828Sgrehanint
295221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
296221828Sgrehan{
297221828Sgrehan	const boolean_t spok = TRUE;	/* superpage mappings are ok */
298221828Sgrehan
299221828Sgrehan	return (VMMMAP(vm->cookie, gpa, 0, len, VM_MEMATTR_UNCACHEABLE,
300221828Sgrehan		       VM_PROT_NONE, spok));
301221828Sgrehan}
302221828Sgrehan
303221828Sgrehanint
304221828Sgrehanvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t *ret_hpa)
305221828Sgrehan{
306221828Sgrehan	int error;
307221828Sgrehan	vm_paddr_t hpa;
308221828Sgrehan
309221828Sgrehan	const boolean_t spok = TRUE;	/* superpage mappings are ok */
310221828Sgrehan
311221828Sgrehan	/*
312221828Sgrehan	 * find the hpa if already it was already vm_malloc'd.
313221828Sgrehan	 */
314221828Sgrehan	hpa = vm_gpa2hpa(vm, gpa, len);
315221828Sgrehan	if (hpa != ((vm_paddr_t)-1))
316221828Sgrehan		goto out;
317221828Sgrehan
318221828Sgrehan	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
319221828Sgrehan		return (E2BIG);
320221828Sgrehan
321221828Sgrehan	hpa = vmm_mem_alloc(len);
322221828Sgrehan	if (hpa == 0)
323221828Sgrehan		return (ENOMEM);
324221828Sgrehan
325221828Sgrehan	error = VMMMAP(vm->cookie, gpa, hpa, len, VM_MEMATTR_WRITE_BACK,
326221828Sgrehan		       VM_PROT_ALL, spok);
327221828Sgrehan	if (error) {
328221828Sgrehan		vmm_mem_free(hpa, len);
329221828Sgrehan		return (error);
330221828Sgrehan	}
331221828Sgrehan
332221828Sgrehan	iommu_create_mapping(vm->iommu, gpa, hpa, len);
333221828Sgrehan
334221828Sgrehan	vm->mem_segs[vm->num_mem_segs].gpa = gpa;
335221828Sgrehan	vm->mem_segs[vm->num_mem_segs].hpa = hpa;
336221828Sgrehan	vm->mem_segs[vm->num_mem_segs].len = len;
337221828Sgrehan	vm->num_mem_segs++;
338221828Sgrehanout:
339221828Sgrehan	*ret_hpa = hpa;
340221828Sgrehan	return (0);
341221828Sgrehan}
342221828Sgrehan
343221828Sgrehanvm_paddr_t
344221828Sgrehanvm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
345221828Sgrehan{
346221828Sgrehan	int i;
347221828Sgrehan	vm_paddr_t gpabase, gpalimit, hpabase;
348221828Sgrehan
349221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++) {
350221828Sgrehan		hpabase = vm->mem_segs[i].hpa;
351221828Sgrehan		gpabase = vm->mem_segs[i].gpa;
352221828Sgrehan		gpalimit = gpabase + vm->mem_segs[i].len;
353221828Sgrehan		if (gpa >= gpabase && gpa + len <= gpalimit)
354221828Sgrehan			return ((gpa - gpabase) + hpabase);
355221828Sgrehan	}
356221828Sgrehan	return ((vm_paddr_t)-1);
357221828Sgrehan}
358221828Sgrehan
359221828Sgrehanint
360221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
361221828Sgrehan		  struct vm_memory_segment *seg)
362221828Sgrehan{
363221828Sgrehan	int i;
364221828Sgrehan
365221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++) {
366221828Sgrehan		if (gpabase == vm->mem_segs[i].gpa) {
367221828Sgrehan			*seg = vm->mem_segs[i];
368221828Sgrehan			return (0);
369221828Sgrehan		}
370221828Sgrehan	}
371221828Sgrehan	return (-1);
372221828Sgrehan}
373221828Sgrehan
374221828Sgrehanint
375221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
376221828Sgrehan{
377221828Sgrehan
378221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
379221828Sgrehan		return (EINVAL);
380221828Sgrehan
381221828Sgrehan	if (reg >= VM_REG_LAST)
382221828Sgrehan		return (EINVAL);
383221828Sgrehan
384221828Sgrehan	return (VMGETREG(vm->cookie, vcpu, reg, retval));
385221828Sgrehan}
386221828Sgrehan
387221828Sgrehanint
388221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
389221828Sgrehan{
390221828Sgrehan
391221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
392221828Sgrehan		return (EINVAL);
393221828Sgrehan
394221828Sgrehan	if (reg >= VM_REG_LAST)
395221828Sgrehan		return (EINVAL);
396221828Sgrehan
397221828Sgrehan	return (VMSETREG(vm->cookie, vcpu, reg, val));
398221828Sgrehan}
399221828Sgrehan
400221828Sgrehanstatic boolean_t
401221828Sgrehanis_descriptor_table(int reg)
402221828Sgrehan{
403221828Sgrehan
404221828Sgrehan	switch (reg) {
405221828Sgrehan	case VM_REG_GUEST_IDTR:
406221828Sgrehan	case VM_REG_GUEST_GDTR:
407221828Sgrehan		return (TRUE);
408221828Sgrehan	default:
409221828Sgrehan		return (FALSE);
410221828Sgrehan	}
411221828Sgrehan}
412221828Sgrehan
413221828Sgrehanstatic boolean_t
414221828Sgrehanis_segment_register(int reg)
415221828Sgrehan{
416221828Sgrehan
417221828Sgrehan	switch (reg) {
418221828Sgrehan	case VM_REG_GUEST_ES:
419221828Sgrehan	case VM_REG_GUEST_CS:
420221828Sgrehan	case VM_REG_GUEST_SS:
421221828Sgrehan	case VM_REG_GUEST_DS:
422221828Sgrehan	case VM_REG_GUEST_FS:
423221828Sgrehan	case VM_REG_GUEST_GS:
424221828Sgrehan	case VM_REG_GUEST_TR:
425221828Sgrehan	case VM_REG_GUEST_LDTR:
426221828Sgrehan		return (TRUE);
427221828Sgrehan	default:
428221828Sgrehan		return (FALSE);
429221828Sgrehan	}
430221828Sgrehan}
431221828Sgrehan
432221828Sgrehanint
433221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg,
434221828Sgrehan		struct seg_desc *desc)
435221828Sgrehan{
436221828Sgrehan
437221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
438221828Sgrehan		return (EINVAL);
439221828Sgrehan
440221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
441221828Sgrehan		return (EINVAL);
442221828Sgrehan
443221828Sgrehan	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
444221828Sgrehan}
445221828Sgrehan
446221828Sgrehanint
447221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg,
448221828Sgrehan		struct seg_desc *desc)
449221828Sgrehan{
450221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
451221828Sgrehan		return (EINVAL);
452221828Sgrehan
453221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
454221828Sgrehan		return (EINVAL);
455221828Sgrehan
456221828Sgrehan	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
457221828Sgrehan}
458221828Sgrehan
459221828Sgrehanint
460221828Sgrehanvm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
461221828Sgrehan{
462221828Sgrehan
463221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
464221828Sgrehan		return (EINVAL);
465221828Sgrehan
466221828Sgrehan	*cpuid = VCPU_PINCPU(vm, vcpuid);
467221828Sgrehan
468221828Sgrehan	return (0);
469221828Sgrehan}
470221828Sgrehan
471221828Sgrehanint
472221828Sgrehanvm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
473221828Sgrehan{
474221828Sgrehan	struct thread *td;
475221828Sgrehan
476221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
477221828Sgrehan		return (EINVAL);
478221828Sgrehan
479221828Sgrehan	td = curthread;		/* XXXSMP only safe when muxing vcpus */
480221828Sgrehan
481221828Sgrehan	/* unpin */
482221828Sgrehan	if (host_cpuid < 0) {
483221828Sgrehan		VCPU_UNPIN(vm, vcpuid);
484221828Sgrehan		thread_lock(td);
485221828Sgrehan		sched_unbind(td);
486221828Sgrehan		thread_unlock(td);
487221828Sgrehan		return (0);
488221828Sgrehan	}
489221828Sgrehan
490221828Sgrehan	if (CPU_ABSENT(host_cpuid))
491221828Sgrehan		return (EINVAL);
492221828Sgrehan
493221828Sgrehan	/*
494221828Sgrehan	 * XXX we should check that 'host_cpuid' has not already been pinned
495221828Sgrehan	 * by another vm.
496221828Sgrehan	 */
497221828Sgrehan	thread_lock(td);
498221828Sgrehan	sched_bind(td, host_cpuid);
499221828Sgrehan	thread_unlock(td);
500221828Sgrehan	VCPU_PIN(vm, vcpuid, host_cpuid);
501221828Sgrehan
502221828Sgrehan	return (0);
503221828Sgrehan}
504221828Sgrehan
505221828Sgrehanstatic void
506221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu)
507221828Sgrehan{
508221828Sgrehan	register_t s;
509221828Sgrehan
510221828Sgrehan	s = intr_disable();
511221828Sgrehan	fpu_stop_emulating();
512221828Sgrehan	fxrstor(&vcpu->savefpu);
513221828Sgrehan	fpu_start_emulating();
514221828Sgrehan	intr_restore(s);
515221828Sgrehan}
516221828Sgrehan
517221828Sgrehanstatic void
518221828Sgrehansave_guest_fpustate(struct vcpu *vcpu)
519221828Sgrehan{
520221828Sgrehan	register_t s;
521221828Sgrehan
522221828Sgrehan	s = intr_disable();
523221828Sgrehan	fpu_stop_emulating();
524221828Sgrehan	fxsave(&vcpu->savefpu);
525221828Sgrehan	fpu_start_emulating();
526221828Sgrehan	intr_restore(s);
527221828Sgrehan}
528221828Sgrehan
529221828Sgrehanint
530221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun)
531221828Sgrehan{
532221828Sgrehan	int error, vcpuid;
533221828Sgrehan	struct vcpu *vcpu;
534221828Sgrehan	struct pcb *pcb;
535221828Sgrehan	uint64_t tscval;
536221828Sgrehan
537221828Sgrehan	vcpuid = vmrun->cpuid;
538221828Sgrehan
539221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
540221828Sgrehan		return (EINVAL);
541221828Sgrehan
542221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
543221828Sgrehan
544221828Sgrehan	critical_enter();
545221828Sgrehan
546221828Sgrehan	tscval = rdtsc();
547221828Sgrehan
548221828Sgrehan	pcb = PCPU_GET(curpcb);
549221914Sjhb	set_pcb_flags(pcb, PCB_FULL_IRET);
550221828Sgrehan
551221828Sgrehan	vcpu->hostcpu = curcpu;
552221828Sgrehan
553221828Sgrehan	fpuexit(curthread);
554221828Sgrehan	restore_guest_msrs(vm, vcpuid);
555221828Sgrehan	restore_guest_fpustate(vcpu);
556221828Sgrehan	error = VMRUN(vm->cookie, vcpuid, vmrun->rip, &vmrun->vm_exit);
557221828Sgrehan	save_guest_fpustate(vcpu);
558221828Sgrehan	restore_host_msrs(vm, vcpuid);
559221828Sgrehan
560221828Sgrehan	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
561221828Sgrehan
562221828Sgrehan	critical_exit();
563221828Sgrehan
564221828Sgrehan	return (error);
565221828Sgrehan}
566221828Sgrehan
567221828Sgrehanint
568221828Sgrehanvm_inject_event(struct vm *vm, int vcpuid, int type,
569221828Sgrehan		int vector, uint32_t code, int code_valid)
570221828Sgrehan{
571221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
572221828Sgrehan		return (EINVAL);
573221828Sgrehan
574221828Sgrehan	if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
575221828Sgrehan		return (EINVAL);
576221828Sgrehan
577221828Sgrehan	if (vector < 0 || vector > 255)
578221828Sgrehan		return (EINVAL);
579221828Sgrehan
580221828Sgrehan	return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
581221828Sgrehan}
582221828Sgrehan
583221828Sgrehanint
584221828Sgrehanvm_inject_nmi(struct vm *vm, int vcpu)
585221828Sgrehan{
586221828Sgrehan	int error;
587221828Sgrehan
588221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
589221828Sgrehan		return (EINVAL);
590221828Sgrehan
591221828Sgrehan	error = VMNMI(vm->cookie, vcpu);
592221828Sgrehan	vm_interrupt_hostcpu(vm, vcpu);
593221828Sgrehan	return (error);
594221828Sgrehan}
595221828Sgrehan
596221828Sgrehanint
597221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
598221828Sgrehan{
599221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
600221828Sgrehan		return (EINVAL);
601221828Sgrehan
602221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
603221828Sgrehan		return (EINVAL);
604221828Sgrehan
605221828Sgrehan	return (VMGETCAP(vm->cookie, vcpu, type, retval));
606221828Sgrehan}
607221828Sgrehan
608221828Sgrehanint
609221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val)
610221828Sgrehan{
611221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
612221828Sgrehan		return (EINVAL);
613221828Sgrehan
614221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
615221828Sgrehan		return (EINVAL);
616221828Sgrehan
617221828Sgrehan	return (VMSETCAP(vm->cookie, vcpu, type, val));
618221828Sgrehan}
619221828Sgrehan
620221828Sgrehanuint64_t *
621221828Sgrehanvm_guest_msrs(struct vm *vm, int cpu)
622221828Sgrehan{
623221828Sgrehan	return (vm->vcpu[cpu].guest_msrs);
624221828Sgrehan}
625221828Sgrehan
626221828Sgrehanstruct vlapic *
627221828Sgrehanvm_lapic(struct vm *vm, int cpu)
628221828Sgrehan{
629221828Sgrehan	return (vm->vcpu[cpu].vlapic);
630221828Sgrehan}
631221828Sgrehan
632221828Sgrehanboolean_t
633221828Sgrehanvmm_is_pptdev(int bus, int slot, int func)
634221828Sgrehan{
635221828Sgrehan	int found, b, s, f, n;
636221828Sgrehan	char *val, *cp, *cp2;
637221828Sgrehan
638221828Sgrehan	/*
639221828Sgrehan	 * setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12"
640221828Sgrehan	 */
641221828Sgrehan	found = 0;
642221828Sgrehan	cp = val = getenv("pptdevs");
643221828Sgrehan	while (cp != NULL && *cp != '\0') {
644221828Sgrehan		if ((cp2 = strchr(cp, ' ')) != NULL)
645221828Sgrehan			*cp2 = '\0';
646221828Sgrehan
647221828Sgrehan		n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
648221828Sgrehan		if (n == 3 && bus == b && slot == s && func == f) {
649221828Sgrehan			found = 1;
650221828Sgrehan			break;
651221828Sgrehan		}
652221828Sgrehan
653221828Sgrehan		if (cp2 != NULL)
654221828Sgrehan			*cp2++ = ' ';
655221828Sgrehan
656221828Sgrehan		cp = cp2;
657221828Sgrehan	}
658221828Sgrehan	freeenv(val);
659221828Sgrehan	return (found);
660221828Sgrehan}
661221828Sgrehan
662221828Sgrehanvoid *
663221828Sgrehanvm_iommu_domain(struct vm *vm)
664221828Sgrehan{
665221828Sgrehan
666221828Sgrehan	return (vm->iommu);
667221828Sgrehan}
668221828Sgrehan
669221828Sgrehanvoid
670221828Sgrehanvm_set_run_state(struct vm *vm, int vcpuid, int state)
671221828Sgrehan{
672221828Sgrehan	struct vcpu *vcpu;
673221828Sgrehan
674221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
675221828Sgrehan		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
676221828Sgrehan
677221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
678221828Sgrehan
679221828Sgrehan	if (state == VCPU_RUNNING) {
680221828Sgrehan		if (vcpu->flags & VCPU_F_RUNNING) {
681221828Sgrehan			panic("vm_set_run_state: %s[%d] is already running",
682221828Sgrehan			      vm_name(vm), vcpuid);
683221828Sgrehan		}
684221828Sgrehan		vcpu->flags |= VCPU_F_RUNNING;
685221828Sgrehan	} else {
686221828Sgrehan		if ((vcpu->flags & VCPU_F_RUNNING) == 0) {
687221828Sgrehan			panic("vm_set_run_state: %s[%d] is already stopped",
688221828Sgrehan			      vm_name(vm), vcpuid);
689221828Sgrehan		}
690221828Sgrehan		vcpu->flags &= ~VCPU_F_RUNNING;
691221828Sgrehan	}
692221828Sgrehan}
693221828Sgrehan
694221828Sgrehanint
695221828Sgrehanvm_get_run_state(struct vm *vm, int vcpuid, int *cpuptr)
696221828Sgrehan{
697221828Sgrehan	int retval, hostcpu;
698221828Sgrehan	struct vcpu *vcpu;
699221828Sgrehan
700221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
701221828Sgrehan		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
702221828Sgrehan
703221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
704221828Sgrehan	if (vcpu->flags & VCPU_F_RUNNING) {
705221828Sgrehan		retval = VCPU_RUNNING;
706221828Sgrehan		hostcpu = vcpu->hostcpu;
707221828Sgrehan	} else {
708221828Sgrehan		retval = VCPU_STOPPED;
709221828Sgrehan		hostcpu = -1;
710221828Sgrehan	}
711221828Sgrehan
712221828Sgrehan	if (cpuptr)
713221828Sgrehan		*cpuptr = hostcpu;
714221828Sgrehan
715221828Sgrehan	return (retval);
716221828Sgrehan}
717221828Sgrehan
718221828Sgrehanvoid
719221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid)
720221828Sgrehan{
721221828Sgrehan
722221828Sgrehan	if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
723223621Sgrehan		CPU_SET(vcpuid, &vm->active_cpus);
724221828Sgrehan}
725221828Sgrehan
726223621Sgrehancpuset_t
727221828Sgrehanvm_active_cpus(struct vm *vm)
728221828Sgrehan{
729221828Sgrehan
730221828Sgrehan	return (vm->active_cpus);
731221828Sgrehan}
732221828Sgrehan
733221828Sgrehanvoid *
734221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid)
735221828Sgrehan{
736221828Sgrehan
737221828Sgrehan	return (vm->vcpu[vcpuid].stats);
738221828Sgrehan}
739