vmm.h revision 290102
1238384Sjkim/*- 2238384Sjkim * Copyright (c) 2011 NetApp, Inc. 3238384Sjkim * All rights reserved. 4238384Sjkim * 5238384Sjkim * Redistribution and use in source and binary forms, with or without 6238384Sjkim * modification, are permitted provided that the following conditions 7238384Sjkim * are met: 8238384Sjkim * 1. Redistributions of source code must retain the above copyright 9238384Sjkim * notice, this list of conditions and the following disclaimer. 10238384Sjkim * 2. Redistributions in binary form must reproduce the above copyright 11238384Sjkim * notice, this list of conditions and the following disclaimer in the 12238384Sjkim * documentation and/or other materials provided with the distribution. 13238384Sjkim * 14238384Sjkim * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15238384Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16238384Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17238384Sjkim * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18238384Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19238384Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20238384Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21238384Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22238384Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23238384Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24238384Sjkim * SUCH DAMAGE. 25238384Sjkim * 26238384Sjkim * $FreeBSD: head/sys/amd64/include/vmm.h 290102 2015-10-28 12:30:29Z bapt $ 27238384Sjkim */ 28238384Sjkim 29238384Sjkim#ifndef _VMM_H_ 30238384Sjkim#define _VMM_H_ 31238384Sjkim 32238384Sjkim#include <x86/segments.h> 33238384Sjkim 34238384Sjkimenum vm_suspend_how { 35238384Sjkim VM_SUSPEND_NONE, 36238384Sjkim VM_SUSPEND_RESET, 37238384Sjkim VM_SUSPEND_POWEROFF, 38238384Sjkim VM_SUSPEND_HALT, 39238384Sjkim VM_SUSPEND_TRIPLEFAULT, 40238384Sjkim VM_SUSPEND_LAST 41238384Sjkim}; 42238384Sjkim 43238384Sjkim/* 44238384Sjkim * Identifiers for architecturally defined registers. 45238384Sjkim */ 46238384Sjkimenum vm_reg_name { 47238384Sjkim VM_REG_GUEST_RAX, 48238384Sjkim VM_REG_GUEST_RBX, 49238384Sjkim VM_REG_GUEST_RCX, 50238384Sjkim VM_REG_GUEST_RDX, 51238384Sjkim VM_REG_GUEST_RSI, 52238384Sjkim VM_REG_GUEST_RDI, 53238384Sjkim VM_REG_GUEST_RBP, 54238384Sjkim VM_REG_GUEST_R8, 55238384Sjkim VM_REG_GUEST_R9, 56238384Sjkim VM_REG_GUEST_R10, 57238384Sjkim VM_REG_GUEST_R11, 58238384Sjkim VM_REG_GUEST_R12, 59238384Sjkim VM_REG_GUEST_R13, 60238384Sjkim VM_REG_GUEST_R14, 61238384Sjkim VM_REG_GUEST_R15, 62238384Sjkim VM_REG_GUEST_CR0, 63238384Sjkim VM_REG_GUEST_CR3, 64238384Sjkim VM_REG_GUEST_CR4, 65238384Sjkim VM_REG_GUEST_DR7, 66238384Sjkim VM_REG_GUEST_RSP, 67238384Sjkim VM_REG_GUEST_RIP, 68238384Sjkim VM_REG_GUEST_RFLAGS, 69238384Sjkim VM_REG_GUEST_ES, 70238384Sjkim VM_REG_GUEST_CS, 71238384Sjkim VM_REG_GUEST_SS, 72238384Sjkim VM_REG_GUEST_DS, 73238384Sjkim VM_REG_GUEST_FS, 74238384Sjkim VM_REG_GUEST_GS, 75238384Sjkim VM_REG_GUEST_LDTR, 76238384Sjkim VM_REG_GUEST_TR, 77238384Sjkim VM_REG_GUEST_IDTR, 78238384Sjkim VM_REG_GUEST_GDTR, 79238384Sjkim VM_REG_GUEST_EFER, 80238384Sjkim VM_REG_GUEST_CR2, 81238384Sjkim VM_REG_GUEST_PDPTE0, 82238384Sjkim VM_REG_GUEST_PDPTE1, 83238384Sjkim VM_REG_GUEST_PDPTE2, 84238384Sjkim VM_REG_GUEST_PDPTE3, 85238384Sjkim VM_REG_GUEST_INTR_SHADOW, 86238384Sjkim VM_REG_LAST 87238384Sjkim}; 88238384Sjkim 89238384Sjkimenum x2apic_state { 90238384Sjkim X2APIC_DISABLED, 91238384Sjkim X2APIC_ENABLED, 92238384Sjkim X2APIC_STATE_LAST 93238384Sjkim}; 94238384Sjkim 95238384Sjkim#define VM_INTINFO_VECTOR(info) ((info) & 0xff) 96238384Sjkim#define VM_INTINFO_DEL_ERRCODE 0x800 97238384Sjkim#define VM_INTINFO_RSVD 0x7ffff000 98238384Sjkim#define VM_INTINFO_VALID 0x80000000 99238384Sjkim#define VM_INTINFO_TYPE 0x700 100238384Sjkim#define VM_INTINFO_HWINTR (0 << 8) 101238384Sjkim#define VM_INTINFO_NMI (2 << 8) 102238384Sjkim#define VM_INTINFO_HWEXCEPTION (3 << 8) 103238384Sjkim#define VM_INTINFO_SWINTR (4 << 8) 104238384Sjkim 105238384Sjkim#ifdef _KERNEL 106238384Sjkim 107238384Sjkim#define VM_MAX_NAMELEN 32 108238384Sjkim 109238384Sjkimstruct vm; 110238384Sjkimstruct vm_exception; 111238384Sjkimstruct seg_desc; 112238384Sjkimstruct vm_exit; 113238384Sjkimstruct vm_run; 114238384Sjkimstruct vhpet; 115238384Sjkimstruct vioapic; 116238384Sjkimstruct vlapic; 117238384Sjkimstruct vmspace; 118238384Sjkimstruct vm_object; 119238384Sjkimstruct vm_guest_paging; 120238384Sjkimstruct pmap; 121238384Sjkim 122238384Sjkimstruct vm_eventinfo { 123238384Sjkim void *rptr; /* rendezvous cookie */ 124238384Sjkim int *sptr; /* suspend cookie */ 125238384Sjkim int *iptr; /* reqidle cookie */ 126238384Sjkim}; 127238384Sjkim 128238384Sjkimtypedef int (*vmm_init_func_t)(int ipinum); 129238384Sjkimtypedef int (*vmm_cleanup_func_t)(void); 130238384Sjkimtypedef void (*vmm_resume_func_t)(void); 131238384Sjkimtypedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); 132238384Sjkimtypedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, 133238384Sjkim struct pmap *pmap, struct vm_eventinfo *info); 134238384Sjkimtypedef void (*vmi_cleanup_func_t)(void *vmi); 135238384Sjkimtypedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, 136238384Sjkim uint64_t *retval); 137238384Sjkimtypedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num, 138238384Sjkim uint64_t val); 139238384Sjkimtypedef int (*vmi_get_desc_t)(void *vmi, int vcpu, int num, 140238384Sjkim struct seg_desc *desc); 141238384Sjkimtypedef int (*vmi_set_desc_t)(void *vmi, int vcpu, int num, 142238384Sjkim struct seg_desc *desc); 143238384Sjkimtypedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval); 144238384Sjkimtypedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val); 145238384Sjkimtypedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max); 146238384Sjkimtypedef void (*vmi_vmspace_free)(struct vmspace *vmspace); 147238384Sjkimtypedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu); 148238384Sjkimtypedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic); 149238384Sjkim 150238384Sjkimstruct vmm_ops { 151238384Sjkim vmm_init_func_t init; /* module wide initialization */ 152238384Sjkim vmm_cleanup_func_t cleanup; 153238384Sjkim vmm_resume_func_t resume; 154238384Sjkim 155238384Sjkim vmi_init_func_t vminit; /* vm-specific initialization */ 156238384Sjkim vmi_run_func_t vmrun; 157238384Sjkim vmi_cleanup_func_t vmcleanup; 158238384Sjkim vmi_get_register_t vmgetreg; 159238384Sjkim vmi_set_register_t vmsetreg; 160238384Sjkim vmi_get_desc_t vmgetdesc; 161238384Sjkim vmi_set_desc_t vmsetdesc; 162238384Sjkim vmi_get_cap_t vmgetcap; 163238384Sjkim vmi_set_cap_t vmsetcap; 164238384Sjkim vmi_vmspace_alloc vmspace_alloc; 165238384Sjkim vmi_vmspace_free vmspace_free; 166238384Sjkim vmi_vlapic_init vlapic_init; 167238384Sjkim vmi_vlapic_cleanup vlapic_cleanup; 168238384Sjkim}; 169238384Sjkim 170238384Sjkimextern struct vmm_ops vmm_ops_intel; 171238384Sjkimextern struct vmm_ops vmm_ops_amd; 172238384Sjkim 173238384Sjkimint vm_create(const char *name, struct vm **retvm); 174238384Sjkimvoid vm_destroy(struct vm *vm); 175238384Sjkimint vm_reinit(struct vm *vm); 176238384Sjkimconst char *vm_name(struct vm *vm); 177238384Sjkim 178238384Sjkim/* 179238384Sjkim * APIs that modify the guest memory map require all vcpus to be frozen. 180238384Sjkim */ 181238384Sjkimint vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, 182238384Sjkim size_t len, int prot, int flags); 183238384Sjkimint vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem); 184238384Sjkimvoid vm_free_memseg(struct vm *vm, int ident); 185238384Sjkimint vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); 186238384Sjkimint vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len); 187238384Sjkimint vm_assign_pptdev(struct vm *vm, int bus, int slot, int func); 188238384Sjkimint vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func); 189238384Sjkim 190238384Sjkim/* 191238384Sjkim * APIs that inspect the guest memory map require only a *single* vcpu to 192238384Sjkim * be frozen. This acts like a read lock on the guest memory map since any 193238384Sjkim * modification requires *all* vcpus to be frozen. 194238384Sjkim */ 195238384Sjkimint vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, 196238384Sjkim vm_ooffset_t *segoff, size_t *len, int *prot, int *flags); 197238384Sjkimint vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, 198238384Sjkim struct vm_object **objptr); 199238384Sjkimvoid *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len, 200238384Sjkim int prot, void **cookie); 201238384Sjkimvoid vm_gpa_release(void *cookie); 202238384Sjkimbool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa); 203238384Sjkim 204238384Sjkimint vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval); 205238384Sjkimint vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val); 206238384Sjkimint vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 207238384Sjkim struct seg_desc *ret_desc); 208238384Sjkimint vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 209238384Sjkim struct seg_desc *desc); 210238384Sjkimint vm_run(struct vm *vm, struct vm_run *vmrun); 211238384Sjkimint vm_suspend(struct vm *vm, enum vm_suspend_how how); 212238384Sjkimint vm_inject_nmi(struct vm *vm, int vcpu); 213238384Sjkimint vm_nmi_pending(struct vm *vm, int vcpuid); 214238384Sjkimvoid vm_nmi_clear(struct vm *vm, int vcpuid); 215238384Sjkimint vm_inject_extint(struct vm *vm, int vcpu); 216238384Sjkimint vm_extint_pending(struct vm *vm, int vcpuid); 217238384Sjkimvoid vm_extint_clear(struct vm *vm, int vcpuid); 218238384Sjkimstruct vlapic *vm_lapic(struct vm *vm, int cpu); 219238384Sjkimstruct vioapic *vm_ioapic(struct vm *vm); 220238384Sjkimstruct vhpet *vm_hpet(struct vm *vm); 221238384Sjkimint vm_get_capability(struct vm *vm, int vcpu, int type, int *val); 222238384Sjkimint vm_set_capability(struct vm *vm, int vcpu, int type, int val); 223238384Sjkimint vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state); 224238384Sjkimint vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state); 225238384Sjkimint vm_apicid2vcpuid(struct vm *vm, int apicid); 226238384Sjkimint vm_activate_cpu(struct vm *vm, int vcpu); 227238384Sjkimstruct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); 228238384Sjkimvoid vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); 229238384Sjkimvoid vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip); 230238384Sjkimvoid vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); 231238384Sjkimvoid vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip); 232238384Sjkim 233238384Sjkim#ifdef _SYS__CPUSET_H_ 234238384Sjkim/* 235238384Sjkim * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'. 236238384Sjkim * The rendezvous 'func(arg)' is not allowed to do anything that will 237238384Sjkim * cause the thread to be put to sleep. 238238384Sjkim * 239238384Sjkim * If the rendezvous is being initiated from a vcpu context then the 240238384Sjkim * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1. 241238384Sjkim * 242238384Sjkim * The caller cannot hold any locks when initiating the rendezvous. 243238384Sjkim * 244238384Sjkim * The implementation of this API may cause vcpus other than those specified 245238384Sjkim * by 'dest' to be stalled. The caller should not rely on any vcpus making 246238384Sjkim * forward progress when the rendezvous is in progress. 247238384Sjkim */ 248238384Sjkimtypedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg); 249238384Sjkimvoid vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 250238384Sjkim vm_rendezvous_func_t func, void *arg); 251238384Sjkimcpuset_t vm_active_cpus(struct vm *vm); 252238384Sjkimcpuset_t vm_suspended_cpus(struct vm *vm); 253238384Sjkim#endif /* _SYS__CPUSET_H_ */ 254238384Sjkim 255238384Sjkimstatic __inline int 256238384Sjkimvcpu_rendezvous_pending(struct vm_eventinfo *info) 257238384Sjkim{ 258238384Sjkim 259238384Sjkim return (*((uintptr_t *)(info->rptr)) != 0); 260238384Sjkim} 261238384Sjkim 262238384Sjkimstatic __inline int 263238384Sjkimvcpu_suspended(struct vm_eventinfo *info) 264238384Sjkim{ 265238384Sjkim 266238384Sjkim return (*info->sptr); 267238384Sjkim} 268238384Sjkim 269238384Sjkimstatic __inline int 270238384Sjkimvcpu_reqidle(struct vm_eventinfo *info) 271238384Sjkim{ 272238384Sjkim 273238384Sjkim return (*info->iptr); 274238384Sjkim} 275238384Sjkim 276238384Sjkim/* 277238384Sjkim * Return 1 if device indicated by bus/slot/func is supposed to be a 278238384Sjkim * pci passthrough device. 279238384Sjkim * 280238384Sjkim * Return 0 otherwise. 281238384Sjkim */ 282238384Sjkimint vmm_is_pptdev(int bus, int slot, int func); 283238384Sjkim 284238384Sjkimvoid *vm_iommu_domain(struct vm *vm); 285238384Sjkim 286238384Sjkimenum vcpu_state { 287238384Sjkim VCPU_IDLE, 288238384Sjkim VCPU_FROZEN, 289238384Sjkim VCPU_RUNNING, 290238384Sjkim VCPU_SLEEPING, 291238384Sjkim}; 292238384Sjkim 293238384Sjkimint vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state, 294238384Sjkim bool from_idle); 295238384Sjkimenum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu); 296238384Sjkim 297238384Sjkimstatic int __inline 298238384Sjkimvcpu_is_running(struct vm *vm, int vcpu, int *hostcpu) 299238384Sjkim{ 300238384Sjkim return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING); 301238384Sjkim} 302238384Sjkim 303238384Sjkim#ifdef _SYS_PROC_H_ 304238384Sjkimstatic int __inline 305238384Sjkimvcpu_should_yield(struct vm *vm, int vcpu) 306238384Sjkim{ 307238384Sjkim 308238384Sjkim if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) 309238384Sjkim return (1); 310238384Sjkim else if (curthread->td_owepreempt) 311238384Sjkim return (1); 312238384Sjkim else 313238384Sjkim return (0); 314238384Sjkim} 315238384Sjkim#endif 316238384Sjkim 317238384Sjkimvoid *vcpu_stats(struct vm *vm, int vcpu); 318238384Sjkimvoid vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr); 319238384Sjkimstruct vmspace *vm_get_vmspace(struct vm *vm); 320238384Sjkimstruct vatpic *vm_atpic(struct vm *vm); 321238384Sjkimstruct vatpit *vm_atpit(struct vm *vm); 322238384Sjkimstruct vpmtmr *vm_pmtmr(struct vm *vm); 323238384Sjkimstruct vrtc *vm_rtc(struct vm *vm); 324238384Sjkim 325238384Sjkim/* 326238384Sjkim * Inject exception 'vector' into the guest vcpu. This function returns 0 on 327238384Sjkim * success and non-zero on failure. 328238384Sjkim * 329238384Sjkim * Wrapper functions like 'vm_inject_gp()' should be preferred to calling 330238384Sjkim * this function directly because they enforce the trap-like or fault-like 331238384Sjkim * behavior of an exception. 332238384Sjkim * 333238384Sjkim * This function should only be called in the context of the thread that is 334238384Sjkim * executing this vcpu. 335238384Sjkim */ 336238384Sjkimint vm_inject_exception(struct vm *vm, int vcpuid, int vector, int err_valid, 337238384Sjkim uint32_t errcode, int restart_instruction); 338238384Sjkim 339238384Sjkim/* 340238384Sjkim * This function is called after a VM-exit that occurred during exception or 341238384Sjkim * interrupt delivery through the IDT. The format of 'intinfo' is described 342238384Sjkim * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2. 343238384Sjkim * 344238384Sjkim * If a VM-exit handler completes the event delivery successfully then it 345238384Sjkim * should call vm_exit_intinfo() to extinguish the pending event. For e.g., 346238384Sjkim * if the task switch emulation is triggered via a task gate then it should 347238384Sjkim * call this function with 'intinfo=0' to indicate that the external event 348238384Sjkim * is not pending anymore. 349238384Sjkim * 350238384Sjkim * Return value is 0 on success and non-zero on failure. 351238384Sjkim */ 352238384Sjkimint vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo); 353238384Sjkim 354238384Sjkim/* 355238384Sjkim * This function is called before every VM-entry to retrieve a pending 356238384Sjkim * event that should be injected into the guest. This function combines 357238384Sjkim * nested events into a double or triple fault. 358238384Sjkim * 359238384Sjkim * Returns 0 if there are no events that need to be injected into the guest 360238384Sjkim * and non-zero otherwise. 361238384Sjkim */ 362238384Sjkimint vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info); 363238384Sjkim 364238384Sjkimint vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2); 365238384Sjkim 366238384Sjkimenum vm_reg_name vm_segment_name(int seg_encoding); 367238384Sjkim 368238384Sjkimstruct vm_copyinfo { 369238384Sjkim uint64_t gpa; 370238384Sjkim size_t len; 371238384Sjkim void *hva; 372238384Sjkim void *cookie; 373238384Sjkim}; 374238384Sjkim 375238384Sjkim/* 376238384Sjkim * Set up 'copyinfo[]' to copy to/from guest linear address space starting 377238384Sjkim * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for 378238384Sjkim * a copyin or PROT_WRITE for a copyout. 379238384Sjkim * 380238384Sjkim * retval is_fault Intepretation 381238384Sjkim * 0 0 Success 382238384Sjkim * 0 1 An exception was injected into the guest 383238384Sjkim * EFAULT N/A Unrecoverable error 384238384Sjkim * 385238384Sjkim * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if 386238384Sjkim * the return value is 0. The 'copyinfo[]' resources should be freed by calling 387238384Sjkim * 'vm_copy_teardown()' after the copy is done. 388238384Sjkim */ 389238384Sjkimint vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, 390238384Sjkim uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, 391238384Sjkim int num_copyinfo, int *is_fault); 392238384Sjkimvoid vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, 393238384Sjkim int num_copyinfo); 394238384Sjkimvoid vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, 395238384Sjkim void *kaddr, size_t len); 396238384Sjkimvoid vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, 397238384Sjkim struct vm_copyinfo *copyinfo, size_t len); 398238384Sjkim 399238384Sjkimint vcpu_trace_exceptions(struct vm *vm, int vcpuid); 400238384Sjkim#endif /* KERNEL */ 401238384Sjkim 402238384Sjkim#define VM_MAXCPU 16 /* maximum virtual cpus */ 403238384Sjkim 404238384Sjkim/* 405238384Sjkim * Identifiers for optional vmm capabilities 406238384Sjkim */ 407238384Sjkimenum vm_cap_type { 408238384Sjkim VM_CAP_HALT_EXIT, 409238384Sjkim VM_CAP_MTRAP_EXIT, 410238384Sjkim VM_CAP_PAUSE_EXIT, 411238384Sjkim VM_CAP_UNRESTRICTED_GUEST, 412238384Sjkim VM_CAP_ENABLE_INVPCID, 413238384Sjkim VM_CAP_MAX 414238384Sjkim}; 415238384Sjkim 416238384Sjkimenum vm_intr_trigger { 417238384Sjkim EDGE_TRIGGER, 418238384Sjkim LEVEL_TRIGGER 419238384Sjkim}; 420238384Sjkim 421238384Sjkim/* 422238384Sjkim * The 'access' field has the format specified in Table 21-2 of the Intel 423238384Sjkim * Architecture Manual vol 3b. 424238384Sjkim * 425238384Sjkim * XXX The contents of the 'access' field are architecturally defined except 426238384Sjkim * bit 16 - Segment Unusable. 427238384Sjkim */ 428238384Sjkimstruct seg_desc { 429238384Sjkim uint64_t base; 430238384Sjkim uint32_t limit; 431238384Sjkim uint32_t access; 432238384Sjkim}; 433238384Sjkim#define SEG_DESC_TYPE(access) ((access) & 0x001f) 434238384Sjkim#define SEG_DESC_DPL(access) (((access) >> 5) & 0x3) 435238384Sjkim#define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0) 436238384Sjkim#define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0) 437238384Sjkim#define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0) 438238384Sjkim#define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0) 439238384Sjkim 440238384Sjkimenum vm_cpu_mode { 441238384Sjkim CPU_MODE_REAL, 442238384Sjkim CPU_MODE_PROTECTED, 443238384Sjkim CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ 444238384Sjkim CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ 445238384Sjkim}; 446238384Sjkim 447238384Sjkimenum vm_paging_mode { 448238384Sjkim PAGING_MODE_FLAT, 449238384Sjkim PAGING_MODE_32, 450238384Sjkim PAGING_MODE_PAE, 451238384Sjkim PAGING_MODE_64, 452238384Sjkim}; 453238384Sjkim 454238384Sjkimstruct vm_guest_paging { 455238384Sjkim uint64_t cr3; 456238384Sjkim int cpl; 457238384Sjkim enum vm_cpu_mode cpu_mode; 458238384Sjkim enum vm_paging_mode paging_mode; 459238384Sjkim}; 460238384Sjkim 461238384Sjkim/* 462238384Sjkim * The data structures 'vie' and 'vie_op' are meant to be opaque to the 463238384Sjkim * consumers of instruction decoding. The only reason why their contents 464238384Sjkim * need to be exposed is because they are part of the 'vm_exit' structure. 465238384Sjkim */ 466238384Sjkimstruct vie_op { 467238384Sjkim uint8_t op_byte; /* actual opcode byte */ 468238384Sjkim uint8_t op_type; /* type of operation (e.g. MOV) */ 469238384Sjkim uint16_t op_flags; 470238384Sjkim}; 471238384Sjkim 472238384Sjkim#define VIE_INST_SIZE 15 473238384Sjkimstruct vie { 474238384Sjkim uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ 475238384Sjkim uint8_t num_valid; /* size of the instruction */ 476238384Sjkim uint8_t num_processed; 477238384Sjkim 478238384Sjkim uint8_t addrsize:4, opsize:4; /* address and operand sizes */ 479238384Sjkim uint8_t rex_w:1, /* REX prefix */ 480238384Sjkim rex_r:1, 481238384Sjkim rex_x:1, 482238384Sjkim rex_b:1, 483238384Sjkim rex_present:1, 484238384Sjkim repz_present:1, /* REP/REPE/REPZ prefix */ 485238384Sjkim repnz_present:1, /* REPNE/REPNZ prefix */ 486238384Sjkim opsize_override:1, /* Operand size override */ 487238384Sjkim addrsize_override:1, /* Address size override */ 488238384Sjkim segment_override:1; /* Segment override */ 489238384Sjkim 490238384Sjkim uint8_t mod:2, /* ModRM byte */ 491238384Sjkim reg:4, 492238384Sjkim rm:4; 493238384Sjkim 494238384Sjkim uint8_t ss:2, /* SIB byte */ 495238384Sjkim index:4, 496238384Sjkim base:4; 497238384Sjkim 498238384Sjkim uint8_t disp_bytes; 499238384Sjkim uint8_t imm_bytes; 500238384Sjkim 501238384Sjkim uint8_t scale; 502238384Sjkim int base_register; /* VM_REG_GUEST_xyz */ 503238384Sjkim int index_register; /* VM_REG_GUEST_xyz */ 504238384Sjkim int segment_register; /* VM_REG_GUEST_xyz */ 505238384Sjkim 506238384Sjkim int64_t displacement; /* optional addr displacement */ 507238384Sjkim int64_t immediate; /* optional immediate operand */ 508238384Sjkim 509238384Sjkim uint8_t decoded; /* set to 1 if successfully decoded */ 510238384Sjkim 511238384Sjkim struct vie_op op; /* opcode description */ 512238384Sjkim}; 513238384Sjkim 514238384Sjkimenum vm_exitcode { 515238384Sjkim VM_EXITCODE_INOUT, 516238384Sjkim VM_EXITCODE_VMX, 517238384Sjkim VM_EXITCODE_BOGUS, 518238384Sjkim VM_EXITCODE_RDMSR, 519238384Sjkim VM_EXITCODE_WRMSR, 520238384Sjkim VM_EXITCODE_HLT, 521238384Sjkim VM_EXITCODE_MTRAP, 522238384Sjkim VM_EXITCODE_PAUSE, 523238384Sjkim VM_EXITCODE_PAGING, 524238384Sjkim VM_EXITCODE_INST_EMUL, 525238384Sjkim VM_EXITCODE_SPINUP_AP, 526238384Sjkim VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */ 527238384Sjkim VM_EXITCODE_RENDEZVOUS, 528238384Sjkim VM_EXITCODE_IOAPIC_EOI, 529238384Sjkim VM_EXITCODE_SUSPENDED, 530238384Sjkim VM_EXITCODE_INOUT_STR, 531238384Sjkim VM_EXITCODE_TASK_SWITCH, 532238384Sjkim VM_EXITCODE_MONITOR, 533238384Sjkim VM_EXITCODE_MWAIT, 534238384Sjkim VM_EXITCODE_SVM, 535238384Sjkim VM_EXITCODE_REQIDLE, 536238384Sjkim VM_EXITCODE_MAX 537238384Sjkim}; 538238384Sjkim 539238384Sjkimstruct vm_inout { 540238384Sjkim uint16_t bytes:3; /* 1 or 2 or 4 */ 541238384Sjkim uint16_t in:1; 542238384Sjkim uint16_t string:1; 543238384Sjkim uint16_t rep:1; 544238384Sjkim uint16_t port; 545238384Sjkim uint32_t eax; /* valid for out */ 546238384Sjkim}; 547238384Sjkim 548238384Sjkimstruct vm_inout_str { 549238384Sjkim struct vm_inout inout; /* must be the first element */ 550238384Sjkim struct vm_guest_paging paging; 551238384Sjkim uint64_t rflags; 552238384Sjkim uint64_t cr0; 553238384Sjkim uint64_t index; 554238384Sjkim uint64_t count; /* rep=1 (%rcx), rep=0 (1) */ 555238384Sjkim int addrsize; 556238384Sjkim enum vm_reg_name seg_name; 557238384Sjkim struct seg_desc seg_desc; 558238384Sjkim}; 559238384Sjkim 560238384Sjkimenum task_switch_reason { 561238384Sjkim TSR_CALL, 562238384Sjkim TSR_IRET, 563238384Sjkim TSR_JMP, 564238384Sjkim TSR_IDT_GATE, /* task gate in IDT */ 565238384Sjkim}; 566238384Sjkim 567238384Sjkimstruct vm_task_switch { 568238384Sjkim uint16_t tsssel; /* new TSS selector */ 569238384Sjkim int ext; /* task switch due to external event */ 570238384Sjkim uint32_t errcode; 571238384Sjkim int errcode_valid; /* push 'errcode' on the new stack */ 572238384Sjkim enum task_switch_reason reason; 573238384Sjkim struct vm_guest_paging paging; 574238384Sjkim}; 575238384Sjkim 576238384Sjkimstruct vm_exit { 577238384Sjkim enum vm_exitcode exitcode; 578238384Sjkim int inst_length; /* 0 means unknown */ 579238384Sjkim uint64_t rip; 580238384Sjkim union { 581238384Sjkim struct vm_inout inout; 582238384Sjkim struct vm_inout_str inout_str; 583238384Sjkim struct { 584238384Sjkim uint64_t gpa; 585238384Sjkim int fault_type; 586238384Sjkim } paging; 587238384Sjkim struct { 588238384Sjkim uint64_t gpa; 589238384Sjkim uint64_t gla; 590238384Sjkim uint64_t cs_base; 591238384Sjkim int cs_d; /* CS.D */ 592238384Sjkim struct vm_guest_paging paging; 593238384Sjkim struct vie vie; 594238384Sjkim } inst_emul; 595238384Sjkim /* 596238384Sjkim * VMX specific payload. Used when there is no "better" 597238384Sjkim * exitcode to represent the VM-exit. 598238384Sjkim */ 599238384Sjkim struct { 600238384Sjkim int status; /* vmx inst status */ 601238384Sjkim /* 602238384Sjkim * 'exit_reason' and 'exit_qualification' are valid 603238384Sjkim * only if 'status' is zero. 604238384Sjkim */ 605238384Sjkim uint32_t exit_reason; 606238384Sjkim uint64_t exit_qualification; 607238384Sjkim /* 608238384Sjkim * 'inst_error' and 'inst_type' are valid 609238384Sjkim * only if 'status' is non-zero. 610238384Sjkim */ 611238384Sjkim int inst_type; 612238384Sjkim int inst_error; 613238384Sjkim } vmx; 614238384Sjkim /* 615238384Sjkim * SVM specific payload. 616238384Sjkim */ 617238384Sjkim struct { 618238384Sjkim uint64_t exitcode; 619238384Sjkim uint64_t exitinfo1; 620238384Sjkim uint64_t exitinfo2; 621238384Sjkim } svm; 622238384Sjkim struct { 623238384Sjkim uint32_t code; /* ecx value */ 624238384Sjkim uint64_t wval; 625238384Sjkim } msr; 626238384Sjkim struct { 627238384Sjkim int vcpu; 628238384Sjkim uint64_t rip; 629238384Sjkim } spinup_ap; 630238384Sjkim struct { 631238384Sjkim uint64_t rflags; 632238384Sjkim } hlt; 633238384Sjkim struct { 634238384Sjkim int vector; 635238384Sjkim } ioapic_eoi; 636238384Sjkim struct { 637238384Sjkim enum vm_suspend_how how; 638238384Sjkim } suspended; 639238384Sjkim struct vm_task_switch task_switch; 640238384Sjkim } u; 641238384Sjkim}; 642238384Sjkim 643238384Sjkim/* APIs to inject faults into the guest */ 644238384Sjkimvoid vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid, 645238384Sjkim int errcode); 646238384Sjkim 647238384Sjkimstatic __inline void 648238384Sjkimvm_inject_ud(void *vm, int vcpuid) 649238384Sjkim{ 650238384Sjkim vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0); 651238384Sjkim} 652238384Sjkim 653238384Sjkimstatic __inline void 654238384Sjkimvm_inject_gp(void *vm, int vcpuid) 655238384Sjkim{ 656238384Sjkim vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0); 657238384Sjkim} 658238384Sjkim 659238384Sjkimstatic __inline void 660238384Sjkimvm_inject_ac(void *vm, int vcpuid, int errcode) 661238384Sjkim{ 662238384Sjkim vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode); 663238384Sjkim} 664238384Sjkim 665238384Sjkimstatic __inline void 666238384Sjkimvm_inject_ss(void *vm, int vcpuid, int errcode) 667238384Sjkim{ 668238384Sjkim vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode); 669238384Sjkim} 670238384Sjkim 671238384Sjkimvoid vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2); 672238384Sjkim 673238384Sjkimint vm_restart_instruction(void *vm, int vcpuid); 674238384Sjkim 675238384Sjkim#endif /* _VMM_H_ */ 676238384Sjkim