vmm.h revision 263744
1139749Simp/*-
239217Sgibbs * Copyright (c) 2011 NetApp, Inc.
339217Sgibbs * All rights reserved.
439217Sgibbs *
539217Sgibbs * Redistribution and use in source and binary forms, with or without
639217Sgibbs * modification, are permitted provided that the following conditions
739217Sgibbs * are met:
839217Sgibbs * 1. Redistributions of source code must retain the above copyright
939217Sgibbs *    notice, this list of conditions and the following disclaimer.
1039217Sgibbs * 2. Redistributions in binary form must reproduce the above copyright
1155945Sgibbs *    notice, this list of conditions and the following disclaimer in the
1239217Sgibbs *    documentation and/or other materials provided with the distribution.
1339217Sgibbs *
1439217Sgibbs * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
1539217Sgibbs * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1639217Sgibbs * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1739217Sgibbs * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
1839217Sgibbs * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1939217Sgibbs * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2039217Sgibbs * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2139217Sgibbs * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2239217Sgibbs * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2339217Sgibbs * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2439217Sgibbs * SUCH DAMAGE.
2539217Sgibbs *
2639217Sgibbs * $FreeBSD: head/sys/amd64/include/vmm.h 263744 2014-03-25 19:20:34Z tychon $
2739217Sgibbs */
2839217Sgibbs
2939217Sgibbs#ifndef _VMM_H_
3039217Sgibbs#define	_VMM_H_
3139217Sgibbs
3239217Sgibbs#ifdef _KERNEL
3339217Sgibbs
3439217Sgibbs#define	VM_MAX_NAMELEN	32
35139749Simp
3639217Sgibbsstruct vm;
3739217Sgibbsstruct vm_exception;
3839217Sgibbsstruct vm_memory_segment;
3939217Sgibbsstruct seg_desc;
4039217Sgibbsstruct vm_exit;
4139217Sgibbsstruct vm_run;
4239217Sgibbsstruct vhpet;
4339217Sgibbsstruct vioapic;
4439217Sgibbsstruct vlapic;
4539217Sgibbsstruct vmspace;
4639217Sgibbsstruct vm_object;
47119418Sobrienstruct pmap;
48119418Sobrien
49119418Sobrienenum x2apic_state;
5039217Sgibbs
5139217Sgibbstypedef int	(*vmm_init_func_t)(int ipinum);
52241492Sjhbtypedef int	(*vmm_cleanup_func_t)(void);
5339217Sgibbstypedef void	(*vmm_resume_func_t)(void);
5439217Sgibbstypedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
5539217Sgibbstypedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
56117126Sscottl				  struct pmap *pmap, void *rendezvous_cookie);
57165102Smjacobtypedef void	(*vmi_cleanup_func_t)(void *vmi);
58117126Sscottltypedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
5939217Sgibbs				      uint64_t *retval);
6039217Sgibbstypedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
6159082Snyan				      uint64_t val);
6259082Snyantypedef int	(*vmi_get_desc_t)(void *vmi, int vcpu, int num,
6359082Snyan				  struct seg_desc *desc);
6439217Sgibbstypedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
6539217Sgibbs				  struct seg_desc *desc);
6639217Sgibbstypedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
6739217Sgibbstypedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
6839217Sgibbstypedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
6939217Sgibbstypedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
7039217Sgibbstypedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
7139217Sgibbstypedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
7239217Sgibbs
7339217Sgibbsstruct vmm_ops {
7439217Sgibbs	vmm_init_func_t		init;		/* module wide initialization */
7539217Sgibbs	vmm_cleanup_func_t	cleanup;
7639217Sgibbs	vmm_resume_func_t	resume;
7739217Sgibbs
7839217Sgibbs	vmi_init_func_t		vminit;		/* vm-specific initialization */
7939217Sgibbs	vmi_run_func_t		vmrun;
8039217Sgibbs	vmi_cleanup_func_t	vmcleanup;
8139217Sgibbs	vmi_get_register_t	vmgetreg;
8239217Sgibbs	vmi_set_register_t	vmsetreg;
83241492Sjhb	vmi_get_desc_t		vmgetdesc;
8439217Sgibbs	vmi_set_desc_t		vmsetdesc;
8539217Sgibbs	vmi_get_cap_t		vmgetcap;
8639217Sgibbs	vmi_set_cap_t		vmsetcap;
8739217Sgibbs	vmi_vmspace_alloc	vmspace_alloc;
8839217Sgibbs	vmi_vmspace_free	vmspace_free;
8940420Sgibbs	vmi_vlapic_init		vlapic_init;
9039217Sgibbs	vmi_vlapic_cleanup	vlapic_cleanup;
9139217Sgibbs};
9239217Sgibbs
9339217Sgibbsextern struct vmm_ops vmm_ops_intel;
9455945Sgibbsextern struct vmm_ops vmm_ops_amd;
9555945Sgibbs
9655945Sgibbsint vm_create(const char *name, struct vm **retvm);
9739217Sgibbsvoid vm_destroy(struct vm *vm);
9839217Sgibbsconst char *vm_name(struct vm *vm);
9939217Sgibbsint vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
10039217Sgibbsint vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
10139217Sgibbsint vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
10239217Sgibbsvoid *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
103241492Sjhb		  void **cookie);
104241492Sjhbvoid vm_gpa_release(void *cookie);
10539217Sgibbsint vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
10639217Sgibbs	      struct vm_memory_segment *seg);
10739217Sgibbsint vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
10839217Sgibbs		  vm_offset_t *offset, struct vm_object **object);
10939217Sgibbsboolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
11039217Sgibbsint vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
11139217Sgibbsint vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
11239217Sgibbsint vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
11339217Sgibbs		    struct seg_desc *ret_desc);
11439217Sgibbsint vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
11539217Sgibbs		    struct seg_desc *desc);
11639217Sgibbsint vm_run(struct vm *vm, struct vm_run *vmrun);
11739217Sgibbsint vm_inject_nmi(struct vm *vm, int vcpu);
118241492Sjhbint vm_nmi_pending(struct vm *vm, int vcpuid);
119241492Sjhbvoid vm_nmi_clear(struct vm *vm, int vcpuid);
12039217Sgibbsint vm_inject_extint(struct vm *vm, int vcpu);
12139217Sgibbsint vm_extint_pending(struct vm *vm, int vcpuid);
12239217Sgibbsvoid vm_extint_clear(struct vm *vm, int vcpuid);
12339217Sgibbsuint64_t *vm_guest_msrs(struct vm *vm, int cpu);
12455945Sgibbsstruct vlapic *vm_lapic(struct vm *vm, int cpu);
12555945Sgibbsstruct vioapic *vm_ioapic(struct vm *vm);
12655945Sgibbsstruct vhpet *vm_hpet(struct vm *vm);
12755945Sgibbsint vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
12855945Sgibbsint vm_set_capability(struct vm *vm, int vcpu, int type, int val);
12955945Sgibbsint vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
13055945Sgibbsint vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
13155945Sgibbsint vm_apicid2vcpuid(struct vm *vm, int apicid);
13255945Sgibbsvoid vm_activate_cpu(struct vm *vm, int vcpu);
13355945Sgibbscpuset_t vm_active_cpus(struct vm *vm);
13455945Sgibbsstruct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
13555945Sgibbs
13655945Sgibbs/*
13755945Sgibbs * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
13855945Sgibbs * The rendezvous 'func(arg)' is not allowed to do anything that will
13955945Sgibbs * cause the thread to be put to sleep.
14055945Sgibbs *
14155945Sgibbs * If the rendezvous is being initiated from a vcpu context then the
142241492Sjhb * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
143241492Sjhb *
144241492Sjhb * The caller cannot hold any locks when initiating the rendezvous.
14555945Sgibbs *
14655945Sgibbs * The implementation of this API may cause vcpus other than those specified
14755945Sgibbs * by 'dest' to be stalled. The caller should not rely on any vcpus making
14855945Sgibbs * forward progress when the rendezvous is in progress.
14955945Sgibbs */
15055945Sgibbstypedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
15155945Sgibbsvoid vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
15255945Sgibbs    vm_rendezvous_func_t func, void *arg);
15355945Sgibbs
15455945Sgibbsstatic __inline int
15555945Sgibbsvcpu_rendezvous_pending(void *rendezvous_cookie)
15655945Sgibbs{
15755945Sgibbs
15855945Sgibbs	return (*(uintptr_t *)rendezvous_cookie != 0);
15955945Sgibbs}
16055945Sgibbs
16155945Sgibbs/*
16255945Sgibbs * Return 1 if device indicated by bus/slot/func is supposed to be a
16355945Sgibbs * pci passthrough device.
16455945Sgibbs *
16555945Sgibbs * Return 0 otherwise.
16655945Sgibbs */
16755945Sgibbsint vmm_is_pptdev(int bus, int slot, int func);
16855945Sgibbs
16955945Sgibbsvoid *vm_iommu_domain(struct vm *vm);
170241492Sjhb
171241492Sjhbenum vcpu_state {
172241492Sjhb	VCPU_IDLE,
173241492Sjhb	VCPU_FROZEN,
17455945Sgibbs	VCPU_RUNNING,
17555945Sgibbs	VCPU_SLEEPING,
17655945Sgibbs};
177241492Sjhb
17855945Sgibbsint vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
17955945Sgibbs    bool from_idle);
18055945Sgibbsenum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
18155945Sgibbs
18255945Sgibbsstatic int __inline
18355945Sgibbsvcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
18439217Sgibbs{
18539217Sgibbs	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
18639217Sgibbs}
18739217Sgibbs
18839217Sgibbsvoid *vcpu_stats(struct vm *vm, int vcpu);
18939217Sgibbsvoid vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
19039217Sgibbsstruct vmspace *vm_get_vmspace(struct vm *vm);
19139217Sgibbsint vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
19239217Sgibbsint vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
19339217Sgibbsstruct vatpic *vm_atpic(struct vm *vm);
19439217Sgibbsstruct vatpit *vm_atpit(struct vm *vm);
19539217Sgibbs
19639217Sgibbs/*
19739217Sgibbs * Inject exception 'vme' into the guest vcpu. This function returns 0 on
19839217Sgibbs * success and non-zero on failure.
19939217Sgibbs *
20039217Sgibbs * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
201241492Sjhb * this function directly because they enforce the trap-like or fault-like
20239217Sgibbs * behavior of an exception.
20339217Sgibbs *
20439217Sgibbs * This function should only be called in the context of the thread that is
20539217Sgibbs * executing this vcpu.
20639217Sgibbs */
20739217Sgibbsint vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme);
20839217Sgibbs
20939217Sgibbs/*
210246713Skib * Returns 0 if there is no exception pending for this vcpu. Returns 1 if an
21139217Sgibbs * exception is pending and also updates 'vme'. The pending exception is
21239217Sgibbs * cleared when this function returns.
21339217Sgibbs *
21439217Sgibbs * This function should only be called in the context of the thread that is
21539217Sgibbs * executing this vcpu.
21639217Sgibbs */
21739217Sgibbsint vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *vme);
21839217Sgibbs
21955945Sgibbsvoid vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */
22039217Sgibbsvoid vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */
221246713Skib
222246713Skib#endif	/* KERNEL */
223246713Skib
224246713Skib#include <machine/vmm_instruction_emul.h>
225246713Skib
226246713Skib#define	VM_MAXCPU	16			/* maximum virtual cpus */
227246713Skib
228246713Skib/*
229246713Skib * Identifiers for architecturally defined registers.
230246713Skib */
231246713Skibenum vm_reg_name {
23239217Sgibbs	VM_REG_GUEST_RAX,
23339217Sgibbs	VM_REG_GUEST_RBX,
23439217Sgibbs	VM_REG_GUEST_RCX,
23539217Sgibbs	VM_REG_GUEST_RDX,
23639217Sgibbs	VM_REG_GUEST_RSI,
23739217Sgibbs	VM_REG_GUEST_RDI,
23839217Sgibbs	VM_REG_GUEST_RBP,
23939217Sgibbs	VM_REG_GUEST_R8,
24039217Sgibbs	VM_REG_GUEST_R9,
24139217Sgibbs	VM_REG_GUEST_R10,
24239217Sgibbs	VM_REG_GUEST_R11,
24339217Sgibbs	VM_REG_GUEST_R12,
24439217Sgibbs	VM_REG_GUEST_R13,
245163816Smjacob	VM_REG_GUEST_R14,
246163816Smjacob	VM_REG_GUEST_R15,
24739217Sgibbs	VM_REG_GUEST_CR0,
24839217Sgibbs	VM_REG_GUEST_CR3,
249163816Smjacob	VM_REG_GUEST_CR4,
250163816Smjacob	VM_REG_GUEST_DR7,
25139217Sgibbs	VM_REG_GUEST_RSP,
25239217Sgibbs	VM_REG_GUEST_RIP,
25346581Sken	VM_REG_GUEST_RFLAGS,
25439217Sgibbs	VM_REG_GUEST_ES,
25539217Sgibbs	VM_REG_GUEST_CS,
25639217Sgibbs	VM_REG_GUEST_SS,
25739217Sgibbs	VM_REG_GUEST_DS,
25839217Sgibbs	VM_REG_GUEST_FS,
25946581Sken	VM_REG_GUEST_GS,
26046581Sken	VM_REG_GUEST_LDTR,
26146581Sken	VM_REG_GUEST_TR,
26246581Sken	VM_REG_GUEST_IDTR,
26346581Sken	VM_REG_GUEST_GDTR,
264163816Smjacob	VM_REG_GUEST_EFER,
26546581Sken	VM_REG_LAST
26639217Sgibbs};
267163816Smjacob
26846581Sken/*
26939217Sgibbs * Identifiers for optional vmm capabilities
27046581Sken */
27146581Skenenum vm_cap_type {
27246581Sken	VM_CAP_HALT_EXIT,
27346581Sken	VM_CAP_MTRAP_EXIT,
27439217Sgibbs	VM_CAP_PAUSE_EXIT,
275163816Smjacob	VM_CAP_UNRESTRICTED_GUEST,
276163816Smjacob	VM_CAP_ENABLE_INVPCID,
277163816Smjacob	VM_CAP_MAX
278163816Smjacob};
279163816Smjacob
280163816Smjacobenum x2apic_state {
281163816Smjacob	X2APIC_DISABLED,
282163816Smjacob	X2APIC_ENABLED,
283163816Smjacob	X2APIC_STATE_LAST
284163816Smjacob};
285163816Smjacob
28639217Sgibbs/*
287163816Smjacob * The 'access' field has the format specified in Table 21-2 of the Intel
288163816Smjacob * Architecture Manual vol 3b.
289163816Smjacob *
290163816Smjacob * XXX The contents of the 'access' field are architecturally defined except
291163816Smjacob * bit 16 - Segment Unusable.
292163816Smjacob */
293163816Smjacobstruct seg_desc {
294163816Smjacob	uint64_t	base;
295163816Smjacob	uint32_t	limit;
296163816Smjacob	uint32_t	access;
297163816Smjacob};
298163816Smjacob
299163816Smjacobenum vm_exitcode {
300163816Smjacob	VM_EXITCODE_INOUT,
301163816Smjacob	VM_EXITCODE_VMX,
302163816Smjacob	VM_EXITCODE_BOGUS,
303163816Smjacob	VM_EXITCODE_RDMSR,
304163816Smjacob	VM_EXITCODE_WRMSR,
305163816Smjacob	VM_EXITCODE_HLT,
306163816Smjacob	VM_EXITCODE_MTRAP,
307163816Smjacob	VM_EXITCODE_PAUSE,
308163816Smjacob	VM_EXITCODE_PAGING,
309163816Smjacob	VM_EXITCODE_INST_EMUL,
310163816Smjacob	VM_EXITCODE_SPINUP_AP,
311163816Smjacob	VM_EXITCODE_SPINDOWN_CPU,
312163816Smjacob	VM_EXITCODE_RENDEZVOUS,
313163816Smjacob	VM_EXITCODE_IOAPIC_EOI,
314163816Smjacob	VM_EXITCODE_MAX
315163816Smjacob};
316163816Smjacob
317163816Smjacobstruct vm_exit {
318163816Smjacob	enum vm_exitcode	exitcode;
319163816Smjacob	int			inst_length;	/* 0 means unknown */
320163816Smjacob	uint64_t		rip;
321163816Smjacob	union {
322163816Smjacob		struct {
323163816Smjacob			uint16_t	bytes:3;	/* 1 or 2 or 4 */
324163816Smjacob			uint16_t	in:1;		/* out is 0, in is 1 */
325163816Smjacob			uint16_t	string:1;
326163816Smjacob			uint16_t	rep:1;
327163816Smjacob			uint16_t	port;
328163816Smjacob			uint32_t	eax;		/* valid for out */
329163816Smjacob		} inout;
330163816Smjacob		struct {
331163816Smjacob			uint64_t	gpa;
332163816Smjacob			int		fault_type;
333163816Smjacob		} paging;
334163816Smjacob		struct {
335163816Smjacob			uint64_t	gpa;
336163816Smjacob			uint64_t	gla;
337163816Smjacob			uint64_t	cr3;
338163816Smjacob			enum vie_cpu_mode cpu_mode;
339163816Smjacob			enum vie_paging_mode paging_mode;
34039217Sgibbs			struct vie	vie;
34139217Sgibbs		} inst_emul;
34239217Sgibbs		/*
34339217Sgibbs		 * VMX specific payload. Used when there is no "better"
34439217Sgibbs		 * exitcode to represent the VM-exit.
34539217Sgibbs		 */
34639217Sgibbs		struct {
34739217Sgibbs			int		status;		/* vmx inst status */
348163816Smjacob			/*
349163816Smjacob			 * 'exit_reason' and 'exit_qualification' are valid
35039217Sgibbs			 * only if 'status' is zero.
35139217Sgibbs			 */
35239217Sgibbs			uint32_t	exit_reason;
35339217Sgibbs			uint64_t	exit_qualification;
35439217Sgibbs			/*
35539217Sgibbs			 * 'inst_error' and 'inst_type' are valid
35639217Sgibbs			 * only if 'status' is non-zero.
357163816Smjacob			 */
358163816Smjacob			int		inst_type;
35939217Sgibbs			int		inst_error;
360163816Smjacob		} vmx;
361163816Smjacob		struct {
362163816Smjacob			uint32_t	code;		/* ecx value */
363163816Smjacob			uint64_t	wval;
364163816Smjacob		} msr;
365163816Smjacob		struct {
366163816Smjacob			int		vcpu;
367163816Smjacob			uint64_t	rip;
368163816Smjacob		} spinup_ap;
369163816Smjacob		struct {
370163816Smjacob			uint64_t	rflags;
371163816Smjacob		} hlt;
372163816Smjacob		struct {
373163816Smjacob			int		vector;
374163816Smjacob		} ioapic_eoi;
375163816Smjacob	} u;
376163816Smjacob};
377163816Smjacob
378163816Smjacob#endif	/* _VMM_H_ */
379163816Smjacob