vmm.c revision 241148
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33234695Sgrehan#include <sys/systm.h>
34221828Sgrehan#include <sys/kernel.h>
35221828Sgrehan#include <sys/module.h>
36221828Sgrehan#include <sys/sysctl.h>
37221828Sgrehan#include <sys/malloc.h>
38221828Sgrehan#include <sys/pcpu.h>
39221828Sgrehan#include <sys/lock.h>
40221828Sgrehan#include <sys/mutex.h>
41221828Sgrehan#include <sys/proc.h>
42221828Sgrehan#include <sys/sched.h>
43221828Sgrehan#include <sys/smp.h>
44221828Sgrehan#include <sys/systm.h>
45221828Sgrehan
46221828Sgrehan#include <vm/vm.h>
47221828Sgrehan
48221828Sgrehan#include <machine/vm.h>
49221828Sgrehan#include <machine/pcb.h>
50221914Sjhb#include <x86/apicreg.h>
51221828Sgrehan
52221828Sgrehan#include <machine/vmm.h>
53221828Sgrehan#include "vmm_mem.h"
54221828Sgrehan#include "vmm_util.h"
55221828Sgrehan#include <machine/vmm_dev.h>
56221828Sgrehan#include "vlapic.h"
57221828Sgrehan#include "vmm_msr.h"
58221828Sgrehan#include "vmm_ipi.h"
59221828Sgrehan#include "vmm_stat.h"
60221828Sgrehan
61221828Sgrehan#include "io/ppt.h"
62221828Sgrehan#include "io/iommu.h"
63221828Sgrehan
64221828Sgrehanstruct vlapic;
65221828Sgrehan
66221828Sgrehanstruct vcpu {
67221828Sgrehan	int		flags;
68221828Sgrehan	int		pincpu;		/* host cpuid this vcpu is bound to */
69221828Sgrehan	int		hostcpu;	/* host cpuid this vcpu last ran on */
70221828Sgrehan	uint64_t	guest_msrs[VMM_MSR_NUM];
71221828Sgrehan	struct vlapic	*vlapic;
72221828Sgrehan	int		 vcpuid;
73234695Sgrehan	struct savefpu	*guestfpu;	/* guest fpu state */
74221828Sgrehan	void		*stats;
75240894Sneel	struct vm_exit	exitinfo;
76240922Sneel	enum x2apic_state x2apic_state;
77221828Sgrehan};
78221828Sgrehan#define	VCPU_F_PINNED	0x0001
79221828Sgrehan#define	VCPU_F_RUNNING	0x0002
80221828Sgrehan
81221828Sgrehan#define	VCPU_PINCPU(vm, vcpuid)	\
82221828Sgrehan    ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
83221828Sgrehan
84221828Sgrehan#define	VCPU_UNPIN(vm, vcpuid)	(vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
85221828Sgrehan
86221828Sgrehan#define	VCPU_PIN(vm, vcpuid, host_cpuid)				\
87221828Sgrehando {									\
88221828Sgrehan	vm->vcpu[vcpuid].flags |= VCPU_F_PINNED;			\
89221828Sgrehan	vm->vcpu[vcpuid].pincpu = host_cpuid;				\
90221828Sgrehan} while(0)
91221828Sgrehan
92221828Sgrehan#define	VM_MAX_MEMORY_SEGMENTS	2
93221828Sgrehan
94221828Sgrehanstruct vm {
95221828Sgrehan	void		*cookie;	/* processor-specific data */
96221828Sgrehan	void		*iommu;		/* iommu-specific data */
97221828Sgrehan	struct vcpu	vcpu[VM_MAXCPU];
98221828Sgrehan	int		num_mem_segs;
99221828Sgrehan	struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
100221828Sgrehan	char		name[VM_MAX_NAMELEN];
101221828Sgrehan
102221828Sgrehan	/*
103223621Sgrehan	 * Set of active vcpus.
104221828Sgrehan	 * An active vcpu is one that has been started implicitly (BSP) or
105221828Sgrehan	 * explicitly (AP) by sending it a startup ipi.
106221828Sgrehan	 */
107223621Sgrehan	cpuset_t	active_cpus;
108221828Sgrehan};
109221828Sgrehan
110221828Sgrehanstatic struct vmm_ops *ops;
111221828Sgrehan#define	VMM_INIT()	(ops != NULL ? (*ops->init)() : 0)
112221828Sgrehan#define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
113221828Sgrehan
114221828Sgrehan#define	VMINIT(vm)	(ops != NULL ? (*ops->vminit)(vm): NULL)
115240894Sneel#define	VMRUN(vmi, vcpu, rip) \
116240894Sneel	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO)
117221828Sgrehan#define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
118241147Sneel#define	VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm)			\
119241147Sneel    	(ops != NULL ? 							\
120241147Sneel    	(*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) :	\
121241147Sneel	ENXIO)
122241147Sneel#define	VMMMAP_GET(vmi, gpa) \
123241147Sneel	(ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO)
124221828Sgrehan#define	VMGETREG(vmi, vcpu, num, retval)		\
125221828Sgrehan	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
126221828Sgrehan#define	VMSETREG(vmi, vcpu, num, val)		\
127221828Sgrehan	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
128221828Sgrehan#define	VMGETDESC(vmi, vcpu, num, desc)		\
129221828Sgrehan	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
130221828Sgrehan#define	VMSETDESC(vmi, vcpu, num, desc)		\
131221828Sgrehan	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
132221828Sgrehan#define	VMINJECT(vmi, vcpu, type, vec, ec, ecv)	\
133221828Sgrehan	(ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
134221828Sgrehan#define	VMNMI(vmi, vcpu)	\
135221828Sgrehan	(ops != NULL ? (*ops->vmnmi)(vmi, vcpu) : ENXIO)
136221828Sgrehan#define	VMGETCAP(vmi, vcpu, num, retval)	\
137221828Sgrehan	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
138221828Sgrehan#define	VMSETCAP(vmi, vcpu, num, val)		\
139221828Sgrehan	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
140221828Sgrehan
141234695Sgrehan#define	fpu_start_emulating()	start_emulating()
142234695Sgrehan#define	fpu_stop_emulating()	stop_emulating()
143221828Sgrehan
144221828Sgrehanstatic MALLOC_DEFINE(M_VM, "vm", "vm");
145221828SgrehanCTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
146221828Sgrehan
147221828Sgrehan/* statistics */
148221828Sgrehanstatic VMM_STAT_DEFINE(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
149221828Sgrehan
150221828Sgrehanstatic void
151221828Sgrehanvcpu_cleanup(struct vcpu *vcpu)
152221828Sgrehan{
153221828Sgrehan	vlapic_cleanup(vcpu->vlapic);
154234695Sgrehan	vmm_stat_free(vcpu->stats);
155234695Sgrehan	fpu_save_area_free(vcpu->guestfpu);
156221828Sgrehan}
157221828Sgrehan
158221828Sgrehanstatic void
159221828Sgrehanvcpu_init(struct vm *vm, uint32_t vcpu_id)
160221828Sgrehan{
161221828Sgrehan	struct vcpu *vcpu;
162221828Sgrehan
163221828Sgrehan	vcpu = &vm->vcpu[vcpu_id];
164221828Sgrehan
165221828Sgrehan	vcpu->hostcpu = -1;
166221828Sgrehan	vcpu->vcpuid = vcpu_id;
167221828Sgrehan	vcpu->vlapic = vlapic_init(vm, vcpu_id);
168240943Sneel	vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
169234695Sgrehan	vcpu->guestfpu = fpu_save_area_alloc();
170234695Sgrehan	fpu_save_area_reset(vcpu->guestfpu);
171221828Sgrehan	vcpu->stats = vmm_stat_alloc();
172221828Sgrehan}
173221828Sgrehan
174240894Sneelstruct vm_exit *
175240894Sneelvm_exitinfo(struct vm *vm, int cpuid)
176240894Sneel{
177240894Sneel	struct vcpu *vcpu;
178240894Sneel
179240894Sneel	if (cpuid < 0 || cpuid >= VM_MAXCPU)
180240894Sneel		panic("vm_exitinfo: invalid cpuid %d", cpuid);
181240894Sneel
182240894Sneel	vcpu = &vm->vcpu[cpuid];
183240894Sneel
184240894Sneel	return (&vcpu->exitinfo);
185240894Sneel}
186240894Sneel
187221828Sgrehanstatic int
188221828Sgrehanvmm_init(void)
189221828Sgrehan{
190221828Sgrehan	int error;
191221828Sgrehan
192221828Sgrehan	vmm_ipi_init();
193221828Sgrehan
194221828Sgrehan	error = vmm_mem_init();
195221828Sgrehan	if (error)
196221828Sgrehan		return (error);
197221828Sgrehan
198221828Sgrehan	if (vmm_is_intel())
199221828Sgrehan		ops = &vmm_ops_intel;
200221828Sgrehan	else if (vmm_is_amd())
201221828Sgrehan		ops = &vmm_ops_amd;
202221828Sgrehan	else
203221828Sgrehan		return (ENXIO);
204221828Sgrehan
205221828Sgrehan	vmm_msr_init();
206221828Sgrehan
207221828Sgrehan	return (VMM_INIT());
208221828Sgrehan}
209221828Sgrehan
210221828Sgrehanstatic int
211221828Sgrehanvmm_handler(module_t mod, int what, void *arg)
212221828Sgrehan{
213221828Sgrehan	int error;
214221828Sgrehan
215221828Sgrehan	switch (what) {
216221828Sgrehan	case MOD_LOAD:
217221828Sgrehan		vmmdev_init();
218221828Sgrehan		iommu_init();
219221828Sgrehan		error = vmm_init();
220221828Sgrehan		break;
221221828Sgrehan	case MOD_UNLOAD:
222221828Sgrehan		vmmdev_cleanup();
223221828Sgrehan		iommu_cleanup();
224221828Sgrehan		vmm_ipi_cleanup();
225221828Sgrehan		error = VMM_CLEANUP();
226221828Sgrehan		break;
227221828Sgrehan	default:
228221828Sgrehan		error = 0;
229221828Sgrehan		break;
230221828Sgrehan	}
231221828Sgrehan	return (error);
232221828Sgrehan}
233221828Sgrehan
234221828Sgrehanstatic moduledata_t vmm_kmod = {
235221828Sgrehan	"vmm",
236221828Sgrehan	vmm_handler,
237221828Sgrehan	NULL
238221828Sgrehan};
239221828Sgrehan
240221828Sgrehan/*
241221828Sgrehan * Execute the module load handler after the pci passthru driver has had
242221828Sgrehan * a chance to claim devices. We need this information at the time we do
243221828Sgrehan * iommu initialization.
244221828Sgrehan */
245221828SgrehanDECLARE_MODULE(vmm, vmm_kmod, SI_SUB_CONFIGURE + 1, SI_ORDER_ANY);
246221828SgrehanMODULE_VERSION(vmm, 1);
247221828Sgrehan
248221828SgrehanSYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
249221828Sgrehan
250221828Sgrehanstruct vm *
251221828Sgrehanvm_create(const char *name)
252221828Sgrehan{
253221828Sgrehan	int i;
254221828Sgrehan	struct vm *vm;
255221828Sgrehan	vm_paddr_t maxaddr;
256221828Sgrehan
257221828Sgrehan	const int BSP = 0;
258221828Sgrehan
259221828Sgrehan	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
260221828Sgrehan		return (NULL);
261221828Sgrehan
262221828Sgrehan	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
263221828Sgrehan	strcpy(vm->name, name);
264221828Sgrehan	vm->cookie = VMINIT(vm);
265221828Sgrehan
266221828Sgrehan	for (i = 0; i < VM_MAXCPU; i++) {
267221828Sgrehan		vcpu_init(vm, i);
268221828Sgrehan		guest_msrs_init(vm, i);
269221828Sgrehan	}
270221828Sgrehan
271221828Sgrehan	maxaddr = vmm_mem_maxaddr();
272221828Sgrehan	vm->iommu = iommu_create_domain(maxaddr);
273221828Sgrehan	vm_activate_cpu(vm, BSP);
274221828Sgrehan
275221828Sgrehan	return (vm);
276221828Sgrehan}
277221828Sgrehan
278221828Sgrehanvoid
279221828Sgrehanvm_destroy(struct vm *vm)
280221828Sgrehan{
281221828Sgrehan	int i;
282221828Sgrehan
283221828Sgrehan	ppt_unassign_all(vm);
284221828Sgrehan
285221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++)
286221828Sgrehan		vmm_mem_free(vm->mem_segs[i].hpa, vm->mem_segs[i].len);
287221828Sgrehan
288221828Sgrehan	for (i = 0; i < VM_MAXCPU; i++)
289221828Sgrehan		vcpu_cleanup(&vm->vcpu[i]);
290221828Sgrehan
291221828Sgrehan	iommu_destroy_domain(vm->iommu);
292221828Sgrehan
293221828Sgrehan	VMCLEANUP(vm->cookie);
294221828Sgrehan
295221828Sgrehan	free(vm, M_VM);
296221828Sgrehan}
297221828Sgrehan
298221828Sgrehanconst char *
299221828Sgrehanvm_name(struct vm *vm)
300221828Sgrehan{
301221828Sgrehan	return (vm->name);
302221828Sgrehan}
303221828Sgrehan
304221828Sgrehanint
305221828Sgrehanvm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
306221828Sgrehan{
307221828Sgrehan	const boolean_t spok = TRUE;	/* superpage mappings are ok */
308221828Sgrehan
309241147Sneel	return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
310241147Sneel			   VM_PROT_RW, spok));
311221828Sgrehan}
312221828Sgrehan
313221828Sgrehanint
314221828Sgrehanvm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
315221828Sgrehan{
316221828Sgrehan	const boolean_t spok = TRUE;	/* superpage mappings are ok */
317221828Sgrehan
318241147Sneel	return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0,
319241147Sneel			   VM_PROT_NONE, spok));
320221828Sgrehan}
321221828Sgrehan
322241041Sneel/*
323241041Sneel * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise
324241041Sneel */
325241041Sneelstatic boolean_t
326241041Sneelvm_gpa_available(struct vm *vm, vm_paddr_t gpa)
327241041Sneel{
328241041Sneel	int i;
329241041Sneel	vm_paddr_t gpabase, gpalimit;
330241041Sneel
331241041Sneel	if (gpa & PAGE_MASK)
332241041Sneel		panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa);
333241041Sneel
334241041Sneel	for (i = 0; i < vm->num_mem_segs; i++) {
335241041Sneel		gpabase = vm->mem_segs[i].gpa;
336241041Sneel		gpalimit = gpabase + vm->mem_segs[i].len;
337241041Sneel		if (gpa >= gpabase && gpa < gpalimit)
338241041Sneel			return (FALSE);
339241041Sneel	}
340241041Sneel
341241041Sneel	return (TRUE);
342241041Sneel}
343241041Sneel
344221828Sgrehanint
345241041Sneelvm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
346221828Sgrehan{
347241041Sneel	int error, available, allocated;
348241041Sneel	vm_paddr_t g, hpa;
349221828Sgrehan
350221828Sgrehan	const boolean_t spok = TRUE;	/* superpage mappings are ok */
351241041Sneel
352241041Sneel	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
353241041Sneel		return (EINVAL);
354221828Sgrehan
355241041Sneel	available = allocated = 0;
356241041Sneel	g = gpa;
357241041Sneel	while (g < gpa + len) {
358241041Sneel		if (vm_gpa_available(vm, g))
359241041Sneel			available++;
360241041Sneel		else
361241041Sneel			allocated++;
362241041Sneel
363241041Sneel		g += PAGE_SIZE;
364241041Sneel	}
365241041Sneel
366221828Sgrehan	/*
367241041Sneel	 * If there are some allocated and some available pages in the address
368241041Sneel	 * range then it is an error.
369221828Sgrehan	 */
370241041Sneel	if (allocated && available)
371241041Sneel		return (EINVAL);
372221828Sgrehan
373241041Sneel	/*
374241041Sneel	 * If the entire address range being requested has already been
375241041Sneel	 * allocated then there isn't anything more to do.
376241041Sneel	 */
377241041Sneel	if (allocated && available == 0)
378241041Sneel		return (0);
379241041Sneel
380221828Sgrehan	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
381221828Sgrehan		return (E2BIG);
382221828Sgrehan
383221828Sgrehan	hpa = vmm_mem_alloc(len);
384221828Sgrehan	if (hpa == 0)
385221828Sgrehan		return (ENOMEM);
386221828Sgrehan
387241147Sneel	error = VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_WRITE_BACK,
388241147Sneel			   VM_PROT_ALL, spok);
389221828Sgrehan	if (error) {
390221828Sgrehan		vmm_mem_free(hpa, len);
391221828Sgrehan		return (error);
392221828Sgrehan	}
393221828Sgrehan
394221828Sgrehan	iommu_create_mapping(vm->iommu, gpa, hpa, len);
395221828Sgrehan
396221828Sgrehan	vm->mem_segs[vm->num_mem_segs].gpa = gpa;
397221828Sgrehan	vm->mem_segs[vm->num_mem_segs].hpa = hpa;
398221828Sgrehan	vm->mem_segs[vm->num_mem_segs].len = len;
399221828Sgrehan	vm->num_mem_segs++;
400241041Sneel
401221828Sgrehan	return (0);
402221828Sgrehan}
403221828Sgrehan
404221828Sgrehanvm_paddr_t
405221828Sgrehanvm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
406221828Sgrehan{
407241148Sneel	vm_paddr_t nextpage;
408221828Sgrehan
409241148Sneel	nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE);
410241148Sneel	if (len > nextpage - gpa)
411241148Sneel		panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len);
412241148Sneel
413241147Sneel	return (VMMMAP_GET(vm->cookie, gpa));
414221828Sgrehan}
415221828Sgrehan
416221828Sgrehanint
417221828Sgrehanvm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
418221828Sgrehan		  struct vm_memory_segment *seg)
419221828Sgrehan{
420221828Sgrehan	int i;
421221828Sgrehan
422221828Sgrehan	for (i = 0; i < vm->num_mem_segs; i++) {
423221828Sgrehan		if (gpabase == vm->mem_segs[i].gpa) {
424221828Sgrehan			*seg = vm->mem_segs[i];
425221828Sgrehan			return (0);
426221828Sgrehan		}
427221828Sgrehan	}
428221828Sgrehan	return (-1);
429221828Sgrehan}
430221828Sgrehan
431221828Sgrehanint
432221828Sgrehanvm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
433221828Sgrehan{
434221828Sgrehan
435221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
436221828Sgrehan		return (EINVAL);
437221828Sgrehan
438221828Sgrehan	if (reg >= VM_REG_LAST)
439221828Sgrehan		return (EINVAL);
440221828Sgrehan
441221828Sgrehan	return (VMGETREG(vm->cookie, vcpu, reg, retval));
442221828Sgrehan}
443221828Sgrehan
444221828Sgrehanint
445221828Sgrehanvm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
446221828Sgrehan{
447221828Sgrehan
448221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
449221828Sgrehan		return (EINVAL);
450221828Sgrehan
451221828Sgrehan	if (reg >= VM_REG_LAST)
452221828Sgrehan		return (EINVAL);
453221828Sgrehan
454221828Sgrehan	return (VMSETREG(vm->cookie, vcpu, reg, val));
455221828Sgrehan}
456221828Sgrehan
457221828Sgrehanstatic boolean_t
458221828Sgrehanis_descriptor_table(int reg)
459221828Sgrehan{
460221828Sgrehan
461221828Sgrehan	switch (reg) {
462221828Sgrehan	case VM_REG_GUEST_IDTR:
463221828Sgrehan	case VM_REG_GUEST_GDTR:
464221828Sgrehan		return (TRUE);
465221828Sgrehan	default:
466221828Sgrehan		return (FALSE);
467221828Sgrehan	}
468221828Sgrehan}
469221828Sgrehan
470221828Sgrehanstatic boolean_t
471221828Sgrehanis_segment_register(int reg)
472221828Sgrehan{
473221828Sgrehan
474221828Sgrehan	switch (reg) {
475221828Sgrehan	case VM_REG_GUEST_ES:
476221828Sgrehan	case VM_REG_GUEST_CS:
477221828Sgrehan	case VM_REG_GUEST_SS:
478221828Sgrehan	case VM_REG_GUEST_DS:
479221828Sgrehan	case VM_REG_GUEST_FS:
480221828Sgrehan	case VM_REG_GUEST_GS:
481221828Sgrehan	case VM_REG_GUEST_TR:
482221828Sgrehan	case VM_REG_GUEST_LDTR:
483221828Sgrehan		return (TRUE);
484221828Sgrehan	default:
485221828Sgrehan		return (FALSE);
486221828Sgrehan	}
487221828Sgrehan}
488221828Sgrehan
489221828Sgrehanint
490221828Sgrehanvm_get_seg_desc(struct vm *vm, int vcpu, int reg,
491221828Sgrehan		struct seg_desc *desc)
492221828Sgrehan{
493221828Sgrehan
494221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
495221828Sgrehan		return (EINVAL);
496221828Sgrehan
497221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
498221828Sgrehan		return (EINVAL);
499221828Sgrehan
500221828Sgrehan	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
501221828Sgrehan}
502221828Sgrehan
503221828Sgrehanint
504221828Sgrehanvm_set_seg_desc(struct vm *vm, int vcpu, int reg,
505221828Sgrehan		struct seg_desc *desc)
506221828Sgrehan{
507221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
508221828Sgrehan		return (EINVAL);
509221828Sgrehan
510221828Sgrehan	if (!is_segment_register(reg) && !is_descriptor_table(reg))
511221828Sgrehan		return (EINVAL);
512221828Sgrehan
513221828Sgrehan	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
514221828Sgrehan}
515221828Sgrehan
516221828Sgrehanint
517221828Sgrehanvm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
518221828Sgrehan{
519221828Sgrehan
520221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
521221828Sgrehan		return (EINVAL);
522221828Sgrehan
523221828Sgrehan	*cpuid = VCPU_PINCPU(vm, vcpuid);
524221828Sgrehan
525221828Sgrehan	return (0);
526221828Sgrehan}
527221828Sgrehan
528221828Sgrehanint
529221828Sgrehanvm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
530221828Sgrehan{
531221828Sgrehan	struct thread *td;
532221828Sgrehan
533221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
534221828Sgrehan		return (EINVAL);
535221828Sgrehan
536221828Sgrehan	td = curthread;		/* XXXSMP only safe when muxing vcpus */
537221828Sgrehan
538221828Sgrehan	/* unpin */
539221828Sgrehan	if (host_cpuid < 0) {
540221828Sgrehan		VCPU_UNPIN(vm, vcpuid);
541221828Sgrehan		thread_lock(td);
542221828Sgrehan		sched_unbind(td);
543221828Sgrehan		thread_unlock(td);
544221828Sgrehan		return (0);
545221828Sgrehan	}
546221828Sgrehan
547221828Sgrehan	if (CPU_ABSENT(host_cpuid))
548221828Sgrehan		return (EINVAL);
549221828Sgrehan
550221828Sgrehan	/*
551221828Sgrehan	 * XXX we should check that 'host_cpuid' has not already been pinned
552221828Sgrehan	 * by another vm.
553221828Sgrehan	 */
554221828Sgrehan	thread_lock(td);
555221828Sgrehan	sched_bind(td, host_cpuid);
556221828Sgrehan	thread_unlock(td);
557221828Sgrehan	VCPU_PIN(vm, vcpuid, host_cpuid);
558221828Sgrehan
559221828Sgrehan	return (0);
560221828Sgrehan}
561221828Sgrehan
562221828Sgrehanstatic void
563221828Sgrehanrestore_guest_fpustate(struct vcpu *vcpu)
564221828Sgrehan{
565221828Sgrehan
566234695Sgrehan	/* flush host state to the pcb */
567234695Sgrehan	fpuexit(curthread);
568221828Sgrehan	fpu_stop_emulating();
569234695Sgrehan	fpurestore(vcpu->guestfpu);
570221828Sgrehan}
571221828Sgrehan
572221828Sgrehanstatic void
573221828Sgrehansave_guest_fpustate(struct vcpu *vcpu)
574221828Sgrehan{
575221828Sgrehan
576234695Sgrehan	fpusave(vcpu->guestfpu);
577221828Sgrehan	fpu_start_emulating();
578221828Sgrehan}
579221828Sgrehan
580221828Sgrehanint
581221828Sgrehanvm_run(struct vm *vm, struct vm_run *vmrun)
582221828Sgrehan{
583221828Sgrehan	int error, vcpuid;
584221828Sgrehan	struct vcpu *vcpu;
585221828Sgrehan	struct pcb *pcb;
586221828Sgrehan	uint64_t tscval;
587221828Sgrehan
588221828Sgrehan	vcpuid = vmrun->cpuid;
589221828Sgrehan
590221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
591221828Sgrehan		return (EINVAL);
592221828Sgrehan
593221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
594221828Sgrehan
595221828Sgrehan	critical_enter();
596221828Sgrehan
597221828Sgrehan	tscval = rdtsc();
598221828Sgrehan
599221828Sgrehan	pcb = PCPU_GET(curpcb);
600221914Sjhb	set_pcb_flags(pcb, PCB_FULL_IRET);
601221828Sgrehan
602221828Sgrehan	vcpu->hostcpu = curcpu;
603221828Sgrehan
604234695Sgrehan	restore_guest_msrs(vm, vcpuid);
605221828Sgrehan	restore_guest_fpustate(vcpu);
606240894Sneel	error = VMRUN(vm->cookie, vcpuid, vmrun->rip);
607221828Sgrehan	save_guest_fpustate(vcpu);
608221828Sgrehan	restore_host_msrs(vm, vcpuid);
609221828Sgrehan
610221828Sgrehan	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
611221828Sgrehan
612240894Sneel	/* copy the exit information */
613240894Sneel	bcopy(&vcpu->exitinfo, &vmrun->vm_exit, sizeof(struct vm_exit));
614240894Sneel
615221828Sgrehan	critical_exit();
616221828Sgrehan
617221828Sgrehan	return (error);
618221828Sgrehan}
619221828Sgrehan
620221828Sgrehanint
621221828Sgrehanvm_inject_event(struct vm *vm, int vcpuid, int type,
622221828Sgrehan		int vector, uint32_t code, int code_valid)
623221828Sgrehan{
624221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
625221828Sgrehan		return (EINVAL);
626221828Sgrehan
627221828Sgrehan	if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
628221828Sgrehan		return (EINVAL);
629221828Sgrehan
630221828Sgrehan	if (vector < 0 || vector > 255)
631221828Sgrehan		return (EINVAL);
632221828Sgrehan
633221828Sgrehan	return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
634221828Sgrehan}
635221828Sgrehan
636221828Sgrehanint
637221828Sgrehanvm_inject_nmi(struct vm *vm, int vcpu)
638221828Sgrehan{
639221828Sgrehan	int error;
640221828Sgrehan
641221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
642221828Sgrehan		return (EINVAL);
643221828Sgrehan
644221828Sgrehan	error = VMNMI(vm->cookie, vcpu);
645221828Sgrehan	vm_interrupt_hostcpu(vm, vcpu);
646221828Sgrehan	return (error);
647221828Sgrehan}
648221828Sgrehan
649221828Sgrehanint
650221828Sgrehanvm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
651221828Sgrehan{
652221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
653221828Sgrehan		return (EINVAL);
654221828Sgrehan
655221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
656221828Sgrehan		return (EINVAL);
657221828Sgrehan
658221828Sgrehan	return (VMGETCAP(vm->cookie, vcpu, type, retval));
659221828Sgrehan}
660221828Sgrehan
661221828Sgrehanint
662221828Sgrehanvm_set_capability(struct vm *vm, int vcpu, int type, int val)
663221828Sgrehan{
664221828Sgrehan	if (vcpu < 0 || vcpu >= VM_MAXCPU)
665221828Sgrehan		return (EINVAL);
666221828Sgrehan
667221828Sgrehan	if (type < 0 || type >= VM_CAP_MAX)
668221828Sgrehan		return (EINVAL);
669221828Sgrehan
670221828Sgrehan	return (VMSETCAP(vm->cookie, vcpu, type, val));
671221828Sgrehan}
672221828Sgrehan
673221828Sgrehanuint64_t *
674221828Sgrehanvm_guest_msrs(struct vm *vm, int cpu)
675221828Sgrehan{
676221828Sgrehan	return (vm->vcpu[cpu].guest_msrs);
677221828Sgrehan}
678221828Sgrehan
679221828Sgrehanstruct vlapic *
680221828Sgrehanvm_lapic(struct vm *vm, int cpu)
681221828Sgrehan{
682221828Sgrehan	return (vm->vcpu[cpu].vlapic);
683221828Sgrehan}
684221828Sgrehan
685221828Sgrehanboolean_t
686221828Sgrehanvmm_is_pptdev(int bus, int slot, int func)
687221828Sgrehan{
688221828Sgrehan	int found, b, s, f, n;
689221828Sgrehan	char *val, *cp, *cp2;
690221828Sgrehan
691221828Sgrehan	/*
692221828Sgrehan	 * setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12"
693221828Sgrehan	 */
694221828Sgrehan	found = 0;
695221828Sgrehan	cp = val = getenv("pptdevs");
696221828Sgrehan	while (cp != NULL && *cp != '\0') {
697221828Sgrehan		if ((cp2 = strchr(cp, ' ')) != NULL)
698221828Sgrehan			*cp2 = '\0';
699221828Sgrehan
700221828Sgrehan		n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
701221828Sgrehan		if (n == 3 && bus == b && slot == s && func == f) {
702221828Sgrehan			found = 1;
703221828Sgrehan			break;
704221828Sgrehan		}
705221828Sgrehan
706221828Sgrehan		if (cp2 != NULL)
707221828Sgrehan			*cp2++ = ' ';
708221828Sgrehan
709221828Sgrehan		cp = cp2;
710221828Sgrehan	}
711221828Sgrehan	freeenv(val);
712221828Sgrehan	return (found);
713221828Sgrehan}
714221828Sgrehan
715221828Sgrehanvoid *
716221828Sgrehanvm_iommu_domain(struct vm *vm)
717221828Sgrehan{
718221828Sgrehan
719221828Sgrehan	return (vm->iommu);
720221828Sgrehan}
721221828Sgrehan
722221828Sgrehanvoid
723221828Sgrehanvm_set_run_state(struct vm *vm, int vcpuid, int state)
724221828Sgrehan{
725221828Sgrehan	struct vcpu *vcpu;
726221828Sgrehan
727221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
728221828Sgrehan		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
729221828Sgrehan
730221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
731221828Sgrehan
732221828Sgrehan	if (state == VCPU_RUNNING) {
733221828Sgrehan		if (vcpu->flags & VCPU_F_RUNNING) {
734221828Sgrehan			panic("vm_set_run_state: %s[%d] is already running",
735221828Sgrehan			      vm_name(vm), vcpuid);
736221828Sgrehan		}
737221828Sgrehan		vcpu->flags |= VCPU_F_RUNNING;
738221828Sgrehan	} else {
739221828Sgrehan		if ((vcpu->flags & VCPU_F_RUNNING) == 0) {
740221828Sgrehan			panic("vm_set_run_state: %s[%d] is already stopped",
741221828Sgrehan			      vm_name(vm), vcpuid);
742221828Sgrehan		}
743221828Sgrehan		vcpu->flags &= ~VCPU_F_RUNNING;
744221828Sgrehan	}
745221828Sgrehan}
746221828Sgrehan
747221828Sgrehanint
748221828Sgrehanvm_get_run_state(struct vm *vm, int vcpuid, int *cpuptr)
749221828Sgrehan{
750221828Sgrehan	int retval, hostcpu;
751221828Sgrehan	struct vcpu *vcpu;
752221828Sgrehan
753221828Sgrehan	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
754221828Sgrehan		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
755221828Sgrehan
756221828Sgrehan	vcpu = &vm->vcpu[vcpuid];
757221828Sgrehan	if (vcpu->flags & VCPU_F_RUNNING) {
758221828Sgrehan		retval = VCPU_RUNNING;
759221828Sgrehan		hostcpu = vcpu->hostcpu;
760221828Sgrehan	} else {
761221828Sgrehan		retval = VCPU_STOPPED;
762221828Sgrehan		hostcpu = -1;
763221828Sgrehan	}
764221828Sgrehan
765221828Sgrehan	if (cpuptr)
766221828Sgrehan		*cpuptr = hostcpu;
767221828Sgrehan
768221828Sgrehan	return (retval);
769221828Sgrehan}
770221828Sgrehan
771221828Sgrehanvoid
772221828Sgrehanvm_activate_cpu(struct vm *vm, int vcpuid)
773221828Sgrehan{
774221828Sgrehan
775221828Sgrehan	if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
776223621Sgrehan		CPU_SET(vcpuid, &vm->active_cpus);
777221828Sgrehan}
778221828Sgrehan
779223621Sgrehancpuset_t
780221828Sgrehanvm_active_cpus(struct vm *vm)
781221828Sgrehan{
782221828Sgrehan
783221828Sgrehan	return (vm->active_cpus);
784221828Sgrehan}
785221828Sgrehan
786221828Sgrehanvoid *
787221828Sgrehanvcpu_stats(struct vm *vm, int vcpuid)
788221828Sgrehan{
789221828Sgrehan
790221828Sgrehan	return (vm->vcpu[vcpuid].stats);
791221828Sgrehan}
792240922Sneel
793240922Sneelint
794240922Sneelvm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
795240922Sneel{
796240922Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
797240922Sneel		return (EINVAL);
798240922Sneel
799240922Sneel	*state = vm->vcpu[vcpuid].x2apic_state;
800240922Sneel
801240922Sneel	return (0);
802240922Sneel}
803240922Sneel
804240922Sneelint
805240922Sneelvm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
806240922Sneel{
807240922Sneel	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
808240922Sneel		return (EINVAL);
809240922Sneel
810240922Sneel	if (state < 0 || state >= X2APIC_STATE_LAST)
811240922Sneel		return (EINVAL);
812240922Sneel
813240922Sneel	vm->vcpu[vcpuid].x2apic_state = state;
814240922Sneel
815240943Sneel	vlapic_set_x2apic_state(vm, vcpuid, state);
816240943Sneel
817240922Sneel	return (0);
818240922Sneel}
819