vmm.h revision 276428
137535Sdes/*-
2135546Sdes * Copyright (c) 2011 NetApp, Inc.
337535Sdes * All rights reserved.
437535Sdes *
537535Sdes * Redistribution and use in source and binary forms, with or without
637535Sdes * modification, are permitted provided that the following conditions
737535Sdes * are met:
837535Sdes * 1. Redistributions of source code must retain the above copyright
937535Sdes *    notice, this list of conditions and the following disclaimer.
1037535Sdes * 2. Redistributions in binary form must reproduce the above copyright
1137535Sdes *    notice, this list of conditions and the following disclaimer in the
1237535Sdes *    documentation and/or other materials provided with the distribution.
1337535Sdes *
1437535Sdes * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
1537535Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1637535Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1737535Sdes * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
1837535Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1937535Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2037535Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2137535Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2237535Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2337535Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2437535Sdes * SUCH DAMAGE.
2537535Sdes *
2637535Sdes * $FreeBSD: head/sys/amd64/include/vmm.h 276428 2014-12-30 22:19:34Z neel $
2737535Sdes */
2837535Sdes
2984203Sdillon#ifndef _VMM_H_
3084203Sdillon#define	_VMM_H_
3184203Sdillon
3237535Sdes#include <x86/segments.h>
3337571Sdes
3437535Sdesenum vm_suspend_how {
3537535Sdes	VM_SUSPEND_NONE,
3637535Sdes	VM_SUSPEND_RESET,
3793150Sphk	VM_SUSPEND_POWEROFF,
3837535Sdes	VM_SUSPEND_HALT,
3937535Sdes	VM_SUSPEND_TRIPLEFAULT,
4037535Sdes	VM_SUSPEND_LAST
4137535Sdes};
4237535Sdes
4337535Sdes/*
4437535Sdes * Identifiers for architecturally defined registers.
4537535Sdes */
4637535Sdesenum vm_reg_name {
4737535Sdes	VM_REG_GUEST_RAX,
4837535Sdes	VM_REG_GUEST_RBX,
4937535Sdes	VM_REG_GUEST_RCX,
5037535Sdes	VM_REG_GUEST_RDX,
5137535Sdes	VM_REG_GUEST_RSI,
5237535Sdes	VM_REG_GUEST_RDI,
5337535Sdes	VM_REG_GUEST_RBP,
5437535Sdes	VM_REG_GUEST_R8,
5537535Sdes	VM_REG_GUEST_R9,
5637535Sdes	VM_REG_GUEST_R10,
5737535Sdes	VM_REG_GUEST_R11,
5837535Sdes	VM_REG_GUEST_R12,
5941862Sdes	VM_REG_GUEST_R13,
6037535Sdes	VM_REG_GUEST_R14,
6137535Sdes	VM_REG_GUEST_R15,
6237535Sdes	VM_REG_GUEST_CR0,
6337535Sdes	VM_REG_GUEST_CR3,
6475891Sarchie	VM_REG_GUEST_CR4,
6555557Sdes	VM_REG_GUEST_DR7,
6667430Sdes	VM_REG_GUEST_RSP,
6760188Sdes	VM_REG_GUEST_RIP,
6837573Sdes	VM_REG_GUEST_RFLAGS,
6997856Sdes	VM_REG_GUEST_ES,
7037535Sdes	VM_REG_GUEST_CS,
7137571Sdes	VM_REG_GUEST_SS,
7237535Sdes	VM_REG_GUEST_DS,
7341869Sdes	VM_REG_GUEST_FS,
7437571Sdes	VM_REG_GUEST_GS,
7537535Sdes	VM_REG_GUEST_LDTR,
7637535Sdes	VM_REG_GUEST_TR,
7740939Sdes	VM_REG_GUEST_IDTR,
7841862Sdes	VM_REG_GUEST_GDTR,
7937535Sdes	VM_REG_GUEST_EFER,
8070795Sdes	VM_REG_GUEST_CR2,
8137535Sdes	VM_REG_GUEST_PDPTE0,
8264883Sdes	VM_REG_GUEST_PDPTE1,
8337573Sdes	VM_REG_GUEST_PDPTE2,
8437573Sdes	VM_REG_GUEST_PDPTE3,
8541869Sdes	VM_REG_GUEST_INTR_SHADOW,
8641863Sdes	VM_REG_LAST
8767890Sdes};
8837573Sdes
8960737Sumeenum x2apic_state {
9060737Sume	X2APIC_DISABLED,
9137573Sdes	X2APIC_ENABLED,
9237573Sdes	X2APIC_STATE_LAST
93148986Sdes};
94148986Sdes
95148986Sdes#define	VM_INTINFO_VECTOR(info)	((info) & 0xff)
9637573Sdes#define	VM_INTINFO_DEL_ERRCODE	0x800
9737573Sdes#define	VM_INTINFO_RSVD		0x7ffff000
9860188Sdes#define	VM_INTINFO_VALID	0x80000000
9955557Sdes#define	VM_INTINFO_TYPE		0x700
10063336Sdes#define	VM_INTINFO_HWINTR	(0 << 8)
10137573Sdes#define	VM_INTINFO_NMI		(2 << 8)
10240975Sdes#define	VM_INTINFO_HWEXCEPTION	(3 << 8)
10397856Sdes#define	VM_INTINFO_SWINTR	(4 << 8)
10437535Sdes
105174761Sdes#ifdef _KERNEL
106174761Sdes
107174761Sdes#define	VM_MAX_NAMELEN	32
108174761Sdes
109174761Sdesstruct vm;
110174761Sdesstruct vm_exception;
111174761Sdesstruct vm_memory_segment;
112174761Sdesstruct seg_desc;
113174761Sdesstruct vm_exit;
114174761Sdesstruct vm_run;
11555557Sdesstruct vhpet;
11690267Sdesstruct vioapic;
11790267Sdesstruct vlapic;
11890267Sdesstruct vmspace;
11960737Sumestruct vm_object;
12060737Sumestruct vm_guest_paging;
12160737Sumestruct pmap;
12290267Sdes
12390267Sdestypedef int	(*vmm_init_func_t)(int ipinum);
12490267Sdestypedef int	(*vmm_cleanup_func_t)(void);
12560737Sumetypedef void	(*vmm_resume_func_t)(void);
12690267Sdestypedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
12790267Sdestypedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
12890267Sdes				  struct pmap *pmap, void *rendezvous_cookie,
12990267Sdes				  void *suspend_cookie);
13090267Sdestypedef void	(*vmi_cleanup_func_t)(void *vmi);
13190267Sdestypedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
13290267Sdes				      uint64_t *retval);
13390267Sdestypedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
13490267Sdes				      uint64_t val);
13590267Sdestypedef int	(*vmi_get_desc_t)(void *vmi, int vcpu, int num,
13690267Sdes				  struct seg_desc *desc);
13760737Sumetypedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
13860737Sume				  struct seg_desc *desc);
13937571Sdestypedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
14055557Sdestypedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
14137535Sdestypedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
14237535Sdestypedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
143174588Sdestypedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
14437535Sdestypedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
145174588Sdes
146174588Sdesstruct vmm_ops {
14790267Sdes	vmm_init_func_t		init;		/* module wide initialization */
14862215Sdes	vmm_cleanup_func_t	cleanup;
14997856Sdes	vmm_resume_func_t	resume;
15097856Sdes
151174588Sdes	vmi_init_func_t		vminit;		/* vm-specific initialization */
152174588Sdes	vmi_run_func_t		vmrun;
15390267Sdes	vmi_cleanup_func_t	vmcleanup;
15490267Sdes	vmi_get_register_t	vmgetreg;
15590267Sdes	vmi_set_register_t	vmsetreg;
15690267Sdes	vmi_get_desc_t		vmgetdesc;
15755557Sdes	vmi_set_desc_t		vmsetdesc;
158174761Sdes	vmi_get_cap_t		vmgetcap;
159174761Sdes	vmi_set_cap_t		vmsetcap;
16097856Sdes	vmi_vmspace_alloc	vmspace_alloc;
16197856Sdes	vmi_vmspace_free	vmspace_free;
16237535Sdes	vmi_vlapic_init		vlapic_init;
16397856Sdes	vmi_vlapic_cleanup	vlapic_cleanup;
164174588Sdes};
16590267Sdes
16690267Sdesextern struct vmm_ops vmm_ops_intel;
16755557Sdesextern struct vmm_ops vmm_ops_amd;
16897856Sdes
16997856Sdesint vm_create(const char *name, struct vm **retvm);
17097856Sdesvoid vm_destroy(struct vm *vm);
17190267Sdesint vm_reinit(struct vm *vm);
17297856Sdesconst char *vm_name(struct vm *vm);
17337535Sdesint vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
17437535Sdesint vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
17537535Sdesint vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
17637573Sdesvoid *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
17737535Sdes		  void **cookie);
17837535Sdesvoid vm_gpa_release(void *cookie);
179174588Sdesint vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
18037535Sdes	      struct vm_memory_segment *seg);
18190267Sdesint vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
18290267Sdes		  vm_offset_t *offset, struct vm_object **object);
18390267Sdesboolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
18490267Sdesint vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
18537573Sdesint vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
18690267Sdesint vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
18790267Sdes		    struct seg_desc *ret_desc);
18890267Sdesint vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
18990267Sdes		    struct seg_desc *desc);
19090267Sdesint vm_run(struct vm *vm, struct vm_run *vmrun);
19190267Sdesint vm_suspend(struct vm *vm, enum vm_suspend_how how);
192174588Sdesint vm_inject_nmi(struct vm *vm, int vcpu);
19390267Sdesint vm_nmi_pending(struct vm *vm, int vcpuid);
19490267Sdesvoid vm_nmi_clear(struct vm *vm, int vcpuid);
19590267Sdesint vm_inject_extint(struct vm *vm, int vcpu);
196174588Sdesint vm_extint_pending(struct vm *vm, int vcpuid);
19790267Sdesvoid vm_extint_clear(struct vm *vm, int vcpuid);
19890267Sdesstruct vlapic *vm_lapic(struct vm *vm, int cpu);
19990267Sdesstruct vioapic *vm_ioapic(struct vm *vm);
200174588Sdesstruct vhpet *vm_hpet(struct vm *vm);
20190267Sdesint vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
20290267Sdesint vm_set_capability(struct vm *vm, int vcpu, int type, int val);
20390267Sdesint vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
204174588Sdesint vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
20537535Sdesint vm_apicid2vcpuid(struct vm *vm, int apicid);
20637535Sdesint vm_activate_cpu(struct vm *vm, int vcpu);
20737535Sdescpuset_t vm_active_cpus(struct vm *vm);
20863340Sdescpuset_t vm_suspended_cpus(struct vm *vm);
20963340Sdesstruct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
21075891Sarchievoid vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
211174588Sdesvoid vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
21263340Sdesvoid vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
213148986Sdes
21490267Sdes/*
21590267Sdes * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
216148986Sdes * The rendezvous 'func(arg)' is not allowed to do anything that will
21790267Sdes * cause the thread to be put to sleep.
218148986Sdes *
219148986Sdes * If the rendezvous is being initiated from a vcpu context then the
220148986Sdes * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
221148986Sdes *
222148986Sdes * The caller cannot hold any locks when initiating the rendezvous.
223148986Sdes *
224148986Sdes * The implementation of this API may cause vcpus other than those specified
225148986Sdes * by 'dest' to be stalled. The caller should not rely on any vcpus making
226148986Sdes * forward progress when the rendezvous is in progress.
22763340Sdes */
22863340Sdestypedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
22963340Sdesvoid vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
230148986Sdes    vm_rendezvous_func_t func, void *arg);
231148986Sdes
232148986Sdesstatic __inline int
233148986Sdesvcpu_rendezvous_pending(void *rendezvous_cookie)
234174588Sdes{
235148986Sdes
236148986Sdes	return (*(uintptr_t *)rendezvous_cookie != 0);
237148986Sdes}
238148986Sdes
239148986Sdesstatic __inline int
240148986Sdesvcpu_suspended(void *suspend_cookie)
241148986Sdes{
242148986Sdes
243148986Sdes	return (*(int *)suspend_cookie);
244148986Sdes}
245148986Sdes
246148986Sdes/*
247148986Sdes * Return 1 if device indicated by bus/slot/func is supposed to be a
248148986Sdes * pci passthrough device.
249148986Sdes *
250148986Sdes * Return 0 otherwise.
251148986Sdes */
252148986Sdesint vmm_is_pptdev(int bus, int slot, int func);
253148986Sdes
254148986Sdesvoid *vm_iommu_domain(struct vm *vm);
255148986Sdes
256148986Sdesenum vcpu_state {
257148986Sdes	VCPU_IDLE,
258148986Sdes	VCPU_FROZEN,
259148986Sdes	VCPU_RUNNING,
260148986Sdes	VCPU_SLEEPING,
261148986Sdes};
262148986Sdes
263148986Sdesint vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
264148986Sdes    bool from_idle);
265148986Sdesenum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
26690267Sdes
26790267Sdesstatic int __inline
26863340Sdesvcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
26963340Sdes{
270174588Sdes	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
27163340Sdes}
272148986Sdes
273148986Sdes#ifdef _SYS_PROC_H_
274148986Sdesstatic int __inline
27563340Sdesvcpu_should_yield(struct vm *vm, int vcpu)
276168960Snjl{
277148986Sdes	return (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED));
278148986Sdes}
279174588Sdes#endif
280174588Sdes
281174588Sdesvoid *vcpu_stats(struct vm *vm, int vcpu);
28290267Sdesvoid vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
28390267Sdesstruct vmspace *vm_get_vmspace(struct vm *vm);
284148986Sdesint vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
285148986Sdesint vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
286168960Snjlstruct vatpic *vm_atpic(struct vm *vm);
287168960Snjlstruct vatpit *vm_atpit(struct vm *vm);
288148986Sdesstruct vpmtmr *vm_pmtmr(struct vm *vm);
289148986Sdesstruct vrtc *vm_rtc(struct vm *vm);
290148986Sdes
291148986Sdes/*
292148986Sdes * Inject exception 'vme' into the guest vcpu. This function returns 0 on
293148986Sdes * success and non-zero on failure.
294148986Sdes *
295168960Snjl * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
296148986Sdes * this function directly because they enforce the trap-like or fault-like
297148986Sdes * behavior of an exception.
298174588Sdes *
299174588Sdes * This function should only be called in the context of the thread that is
300174588Sdes * executing this vcpu.
301174588Sdes */
302148986Sdesint vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme);
303148986Sdes
304148986Sdes/*
305168960Snjl * This function is called after a VM-exit that occurred during exception or
306168960Snjl * interrupt delivery through the IDT. The format of 'intinfo' is described
307168960Snjl * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
308168960Snjl *
309168960Snjl * If a VM-exit handler completes the event delivery successfully then it
310168960Snjl * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
311168960Snjl * if the task switch emulation is triggered via a task gate then it should
312168960Snjl * call this function with 'intinfo=0' to indicate that the external event
313168960Snjl * is not pending anymore.
314168960Snjl *
315168960Snjl * Return value is 0 on success and non-zero on failure.
316174588Sdes */
317168960Snjlint vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
318168960Snjl
319168960Snjl/*
320168960Snjl * This function is called before every VM-entry to retrieve a pending
321168960Snjl * event that should be injected into the guest. This function combines
322148986Sdes * nested events into a double or triple fault.
323159565Sdes *
324159565Sdes * Returns 0 if there are no events that need to be injected into the guest
325148986Sdes * and non-zero otherwise.
326148986Sdes */
327174588Sdesint vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
328148986Sdes
329174588Sdesint vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
330148986Sdes
331148986Sdesenum vm_reg_name vm_segment_name(int seg_encoding);
332148986Sdes
33390267Sdesstruct vm_copyinfo {
33463340Sdes	uint64_t	gpa;
33563340Sdes	size_t		len;
33663340Sdes	void		*hva;
337148986Sdes	void		*cookie;
338148986Sdes};
339148986Sdes
340174588Sdes/*
341148986Sdes * Set up 'copyinfo[]' to copy to/from guest linear address space starting
342148986Sdes * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
343148986Sdes * a copyin or PROT_WRITE for a copyout.
344148986Sdes *
345148986Sdes * Returns 0 on success.
346148986Sdes * Returns 1 if an exception was injected into the guest.
347148986Sdes * Returns -1 otherwise.
348148986Sdes *
349148986Sdes * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
350148986Sdes * the return value is 0. The 'copyinfo[]' resources should be freed by calling
351148986Sdes * 'vm_copy_teardown()' after the copy is done.
352148986Sdes */
353174588Sdesint vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
354154550Sdes    uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
355154550Sdes    int num_copyinfo);
356154550Sdesvoid vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
357154550Sdes    int num_copyinfo);
358154550Sdesvoid vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
359154550Sdes    void *kaddr, size_t len);
360154550Sdesvoid vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
361154550Sdes    struct vm_copyinfo *copyinfo, size_t len);
362154550Sdes
363154550Sdesint vcpu_trace_exceptions(struct vm *vm, int vcpuid);
364154550Sdes#endif	/* KERNEL */
365154550Sdes
366154550Sdes#define	VM_MAXCPU	16			/* maximum virtual cpus */
367154550Sdes
368154550Sdes/*
369154550Sdes * Identifiers for optional vmm capabilities
370154550Sdes */
371148986Sdesenum vm_cap_type {
372148986Sdes	VM_CAP_HALT_EXIT,
373148986Sdes	VM_CAP_MTRAP_EXIT,
374148986Sdes	VM_CAP_PAUSE_EXIT,
375148986Sdes	VM_CAP_UNRESTRICTED_GUEST,
376148986Sdes	VM_CAP_ENABLE_INVPCID,
377148986Sdes	VM_CAP_MAX
378148986Sdes};
379148986Sdes
380148986Sdesenum vm_intr_trigger {
381148986Sdes	EDGE_TRIGGER,
382148986Sdes	LEVEL_TRIGGER
383148986Sdes};
384148986Sdes
385148986Sdes/*
386148986Sdes * The 'access' field has the format specified in Table 21-2 of the Intel
387148986Sdes * Architecture Manual vol 3b.
388148986Sdes *
389174588Sdes * XXX The contents of the 'access' field are architecturally defined except
390148986Sdes * bit 16 - Segment Unusable.
391148986Sdes */
392148986Sdesstruct seg_desc {
393148986Sdes	uint64_t	base;
394148986Sdes	uint32_t	limit;
395148986Sdes	uint32_t	access;
39663340Sdes};
39763340Sdes#define	SEG_DESC_TYPE(access)		((access) & 0x001f)
39863340Sdes#define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
399174588Sdes#define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
40063340Sdes#define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
40190267Sdes#define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
402148986Sdes#define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
403148986Sdes
40490267Sdesenum vm_cpu_mode {
40590267Sdes	CPU_MODE_REAL,
40690267Sdes	CPU_MODE_PROTECTED,
40763340Sdes	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
40875292Sdes	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
40990267Sdes};
41063340Sdes
411174588Sdesenum vm_paging_mode {
41290267Sdes	PAGING_MODE_FLAT,
413174588Sdes	PAGING_MODE_32,
414174588Sdes	PAGING_MODE_PAE,
41590267Sdes	PAGING_MODE_64,
41690267Sdes};
417148986Sdes
418174588Sdesstruct vm_guest_paging {
419148986Sdes	uint64_t	cr3;
420174588Sdes	int		cpl;
421148986Sdes	enum vm_cpu_mode cpu_mode;
422148986Sdes	enum vm_paging_mode paging_mode;
423174761Sdes};
42490267Sdes
425174761Sdes/*
42690267Sdes * The data structures 'vie' and 'vie_op' are meant to be opaque to the
427174761Sdes * consumers of instruction decoding. The only reason why their contents
428174588Sdes * need to be exposed is because they are part of the 'vm_exit' structure.
42990267Sdes */
43090267Sdesstruct vie_op {
43190267Sdes	uint8_t		op_byte;	/* actual opcode byte */
43290267Sdes	uint8_t		op_type;	/* type of operation (e.g. MOV) */
43390267Sdes	uint16_t	op_flags;
43490267Sdes};
43590267Sdes
436174588Sdes#define	VIE_INST_SIZE	15
437148986Sdesstruct vie {
438174588Sdes	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
43990267Sdes	uint8_t		num_valid;		/* size of the instruction */
44090267Sdes	uint8_t		num_processed;
441174761Sdes
44290267Sdes	uint8_t		addrsize:4, opsize:4;	/* address and operand sizes */
44390267Sdes	uint8_t		rex_w:1,		/* REX prefix */
44490267Sdes			rex_r:1,
44590267Sdes			rex_x:1,
44690267Sdes			rex_b:1,
44790267Sdes			rex_present:1,
44890267Sdes			opsize_override:1,	/* Operand size override */
44990267Sdes			addrsize_override:1;	/* Address size override */
45090267Sdes
45190267Sdes	uint8_t		mod:2,			/* ModRM byte */
452174588Sdes			reg:4,
45390267Sdes			rm:4;
45490267Sdes
45590267Sdes	uint8_t		ss:2,			/* SIB byte */
45690267Sdes			index:4,
45790267Sdes			base:4;
458174588Sdes
45990267Sdes	uint8_t		disp_bytes;
46090267Sdes	uint8_t		imm_bytes;
46190267Sdes
46290267Sdes	uint8_t		scale;
46390267Sdes	int		base_register;		/* VM_REG_GUEST_xyz */
46490267Sdes	int		index_register;		/* VM_REG_GUEST_xyz */
46590267Sdes
46690267Sdes	int64_t		displacement;		/* optional addr displacement */
46790267Sdes	int64_t		immediate;		/* optional immediate operand */
46890267Sdes
46990267Sdes	uint8_t		decoded;	/* set to 1 if successfully decoded */
47090267Sdes
47190267Sdes	struct vie_op	op;			/* opcode description */
47290267Sdes};
47390267Sdes
47463340Sdesenum vm_exitcode {
47563340Sdes	VM_EXITCODE_INOUT,
47663340Sdes	VM_EXITCODE_VMX,
47767430Sdes	VM_EXITCODE_BOGUS,
47867430Sdes	VM_EXITCODE_RDMSR,
47967430Sdes	VM_EXITCODE_WRMSR,
48097866Sdes	VM_EXITCODE_HLT,
48197866Sdes	VM_EXITCODE_MTRAP,
48290267Sdes	VM_EXITCODE_PAUSE,
48390267Sdes	VM_EXITCODE_PAGING,
48490267Sdes	VM_EXITCODE_INST_EMUL,
48567430Sdes	VM_EXITCODE_SPINUP_AP,
48667430Sdes	VM_EXITCODE_DEPRECATED1,	/* used to be SPINDOWN_CPU */
487174588Sdes	VM_EXITCODE_RENDEZVOUS,
488174588Sdes	VM_EXITCODE_IOAPIC_EOI,
489174588Sdes	VM_EXITCODE_SUSPENDED,
490174588Sdes	VM_EXITCODE_INOUT_STR,
49167430Sdes	VM_EXITCODE_TASK_SWITCH,
49267430Sdes	VM_EXITCODE_MONITOR,
493174588Sdes	VM_EXITCODE_MWAIT,
49467430Sdes	VM_EXITCODE_SVM,
49590267Sdes	VM_EXITCODE_MAX
49690267Sdes};
49767430Sdes
49890267Sdesstruct vm_inout {
49990267Sdes	uint16_t	bytes:3;	/* 1 or 2 or 4 */
50090267Sdes	uint16_t	in:1;
50190267Sdes	uint16_t	string:1;
50290267Sdes	uint16_t	rep:1;
50397866Sdes	uint16_t	port;
50490267Sdes	uint32_t	eax;		/* valid for out */
50590267Sdes};
50690267Sdes
50790267Sdesstruct vm_inout_str {
50890267Sdes	struct vm_inout	inout;		/* must be the first element */
50990267Sdes	struct vm_guest_paging paging;
51090267Sdes	uint64_t	rflags;
51190267Sdes	uint64_t	cr0;
51290267Sdes	uint64_t	index;
513174588Sdes	uint64_t	count;		/* rep=1 (%rcx), rep=0 (1) */
51490267Sdes	int		addrsize;
51590267Sdes	enum vm_reg_name seg_name;
51690267Sdes	struct seg_desc seg_desc;
51790267Sdes};
51890267Sdes
51990267Sdesenum task_switch_reason {
52090267Sdes	TSR_CALL,
52190267Sdes	TSR_IRET,
52290267Sdes	TSR_JMP,
52367430Sdes	TSR_IDT_GATE,	/* task gate in IDT */
52467430Sdes};
52567430Sdes
526174588Sdesstruct vm_task_switch {
52767430Sdes	uint16_t	tsssel;		/* new TSS selector */
52890267Sdes	int		ext;		/* task switch due to external event */
52990267Sdes	uint32_t	errcode;
53090267Sdes	int		errcode_valid;	/* push 'errcode' on the new stack */
53190267Sdes	enum task_switch_reason reason;
53290267Sdes	struct vm_guest_paging paging;
53390267Sdes};
53490267Sdes
53590267Sdesstruct vm_exit {
53697866Sdes	enum vm_exitcode	exitcode;
53790267Sdes	int			inst_length;	/* 0 means unknown */
53890267Sdes	uint64_t		rip;
53990267Sdes	union {
54090267Sdes		struct vm_inout	inout;
54190267Sdes		struct vm_inout_str inout_str;
54290267Sdes		struct {
54390267Sdes			uint64_t	gpa;
544174588Sdes			int		fault_type;
54590267Sdes		} paging;
54690267Sdes		struct {
54790267Sdes			uint64_t	gpa;
54890267Sdes			uint64_t	gla;
54990267Sdes			int		cs_d;		/* CS.D */
55067430Sdes			struct vm_guest_paging paging;
55167430Sdes			struct vie	vie;
55267430Sdes		} inst_emul;
553174588Sdes		/*
55467430Sdes		 * VMX specific payload. Used when there is no "better"
55590267Sdes		 * exitcode to represent the VM-exit.
55690267Sdes		 */
55790267Sdes		struct {
55890267Sdes			int		status;		/* vmx inst status */
55990267Sdes			/*
56090267Sdes			 * 'exit_reason' and 'exit_qualification' are valid
56190267Sdes			 * only if 'status' is zero.
56290267Sdes			 */
56390267Sdes			uint32_t	exit_reason;
56467430Sdes			uint64_t	exit_qualification;
56567430Sdes			/*
56667430Sdes			 * 'inst_error' and 'inst_type' are valid
567174588Sdes			 * only if 'status' is non-zero.
56867430Sdes			 */
56990267Sdes			int		inst_type;
57090267Sdes			int		inst_error;
57167430Sdes		} vmx;
57290267Sdes		/*
57390267Sdes		 * SVM specific payload.
57490267Sdes		 */
57590267Sdes		struct {
57690267Sdes			uint64_t	exitcode;
57790267Sdes			uint64_t	exitinfo1;
57890267Sdes			uint64_t	exitinfo2;
57997866Sdes		} svm;
58090267Sdes		struct {
58190267Sdes			uint32_t	code;		/* ecx value */
58290267Sdes			uint64_t	wval;
583174588Sdes		} msr;
58490267Sdes		struct {
58597866Sdes			int		vcpu;
58690267Sdes			uint64_t	rip;
587174588Sdes		} spinup_ap;
588105903Snjl		struct {
589105903Snjl			uint64_t	rflags;
590174588Sdes		} hlt;
59190267Sdes		struct {
59290267Sdes			int		vector;
59367430Sdes		} ioapic_eoi;
59467430Sdes		struct {
59567430Sdes			enum vm_suspend_how how;
596174588Sdes		} suspended;
59767430Sdes		struct vm_task_switch task_switch;
59890267Sdes	} u;
59990267Sdes};
60067430Sdes
60197866Sdes/* APIs to inject faults into the guest */
60297866Sdesvoid vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
603109967Sdes    int errcode);
60490267Sdes
60597866Sdesstatic __inline void
60697866Sdesvm_inject_ud(void *vm, int vcpuid)
60790267Sdes{
60890267Sdes	vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0);
609174588Sdes}
61090267Sdes
61190267Sdesstatic __inline void
61290267Sdesvm_inject_gp(void *vm, int vcpuid)
61367430Sdes{
61467430Sdes	vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0);
61567430Sdes}
61637608Sdes
61737535Sdesstatic __inline void
61837535Sdesvm_inject_ac(void *vm, int vcpuid, int errcode)
619174588Sdes{
62090267Sdes	vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode);
62137535Sdes}
62290267Sdes
62390267Sdesstatic __inline void
62490267Sdesvm_inject_ss(void *vm, int vcpuid, int errcode)
625159566Sdes{
626148986Sdes	vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode);
627148986Sdes}
62890267Sdes
62990267Sdesvoid vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
63090267Sdes
63190267Sdes#endif	/* _VMM_H_ */
63290267Sdes