vmm.h revision 268701
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/amd64/include/vmm.h 268701 2014-07-15 17:37:17Z neel $ 27 */ 28 29#ifndef _VMM_H_ 30#define _VMM_H_ 31 32enum vm_suspend_how { 33 VM_SUSPEND_NONE, 34 VM_SUSPEND_RESET, 35 VM_SUSPEND_POWEROFF, 36 VM_SUSPEND_HALT, 37 VM_SUSPEND_LAST 38}; 39 40/* 41 * Identifiers for architecturally defined registers. 42 */ 43enum vm_reg_name { 44 VM_REG_GUEST_RAX, 45 VM_REG_GUEST_RBX, 46 VM_REG_GUEST_RCX, 47 VM_REG_GUEST_RDX, 48 VM_REG_GUEST_RSI, 49 VM_REG_GUEST_RDI, 50 VM_REG_GUEST_RBP, 51 VM_REG_GUEST_R8, 52 VM_REG_GUEST_R9, 53 VM_REG_GUEST_R10, 54 VM_REG_GUEST_R11, 55 VM_REG_GUEST_R12, 56 VM_REG_GUEST_R13, 57 VM_REG_GUEST_R14, 58 VM_REG_GUEST_R15, 59 VM_REG_GUEST_CR0, 60 VM_REG_GUEST_CR3, 61 VM_REG_GUEST_CR4, 62 VM_REG_GUEST_DR7, 63 VM_REG_GUEST_RSP, 64 VM_REG_GUEST_RIP, 65 VM_REG_GUEST_RFLAGS, 66 VM_REG_GUEST_ES, 67 VM_REG_GUEST_CS, 68 VM_REG_GUEST_SS, 69 VM_REG_GUEST_DS, 70 VM_REG_GUEST_FS, 71 VM_REG_GUEST_GS, 72 VM_REG_GUEST_LDTR, 73 VM_REG_GUEST_TR, 74 VM_REG_GUEST_IDTR, 75 VM_REG_GUEST_GDTR, 76 VM_REG_GUEST_EFER, 77 VM_REG_GUEST_CR2, 78 VM_REG_LAST 79}; 80 81enum x2apic_state { 82 X2APIC_DISABLED, 83 X2APIC_ENABLED, 84 X2APIC_STATE_LAST 85}; 86 87#ifdef _KERNEL 88 89#define VM_MAX_NAMELEN 32 90 91struct vm; 92struct vm_exception; 93struct vm_memory_segment; 94struct seg_desc; 95struct vm_exit; 96struct vm_run; 97struct vhpet; 98struct vioapic; 99struct vlapic; 100struct vmspace; 101struct vm_object; 102struct pmap; 103 104typedef int (*vmm_init_func_t)(int ipinum); 105typedef int (*vmm_cleanup_func_t)(void); 106typedef void (*vmm_resume_func_t)(void); 107typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); 108typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, 109 struct pmap *pmap, void *rendezvous_cookie, 110 void *suspend_cookie); 111typedef void (*vmi_cleanup_func_t)(void *vmi); 112typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, 113 uint64_t *retval); 114typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num, 115 uint64_t val); 116typedef int (*vmi_get_desc_t)(void *vmi, int vcpu, int num, 117 struct seg_desc *desc); 118typedef int (*vmi_set_desc_t)(void *vmi, int vcpu, int num, 119 struct seg_desc *desc); 120typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval); 121typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val); 122typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max); 123typedef void (*vmi_vmspace_free)(struct vmspace *vmspace); 124typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu); 125typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic); 126 127struct vmm_ops { 128 vmm_init_func_t init; /* module wide initialization */ 129 vmm_cleanup_func_t cleanup; 130 vmm_resume_func_t resume; 131 132 vmi_init_func_t vminit; /* vm-specific initialization */ 133 vmi_run_func_t vmrun; 134 vmi_cleanup_func_t vmcleanup; 135 vmi_get_register_t vmgetreg; 136 vmi_set_register_t vmsetreg; 137 vmi_get_desc_t vmgetdesc; 138 vmi_set_desc_t vmsetdesc; 139 vmi_get_cap_t vmgetcap; 140 vmi_set_cap_t vmsetcap; 141 vmi_vmspace_alloc vmspace_alloc; 142 vmi_vmspace_free vmspace_free; 143 vmi_vlapic_init vlapic_init; 144 vmi_vlapic_cleanup vlapic_cleanup; 145}; 146 147extern struct vmm_ops vmm_ops_intel; 148extern struct vmm_ops vmm_ops_amd; 149 150int vm_create(const char *name, struct vm **retvm); 151void vm_destroy(struct vm *vm); 152int vm_reinit(struct vm *vm); 153const char *vm_name(struct vm *vm); 154int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len); 155int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); 156int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len); 157void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot, 158 void **cookie); 159void vm_gpa_release(void *cookie); 160int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 161 struct vm_memory_segment *seg); 162int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, 163 vm_offset_t *offset, struct vm_object **object); 164boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa); 165int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval); 166int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val); 167int vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 168 struct seg_desc *ret_desc); 169int vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 170 struct seg_desc *desc); 171int vm_run(struct vm *vm, struct vm_run *vmrun); 172int vm_suspend(struct vm *vm, enum vm_suspend_how how); 173int vm_inject_nmi(struct vm *vm, int vcpu); 174int vm_nmi_pending(struct vm *vm, int vcpuid); 175void vm_nmi_clear(struct vm *vm, int vcpuid); 176int vm_inject_extint(struct vm *vm, int vcpu); 177int vm_extint_pending(struct vm *vm, int vcpuid); 178void vm_extint_clear(struct vm *vm, int vcpuid); 179uint64_t *vm_guest_msrs(struct vm *vm, int cpu); 180struct vlapic *vm_lapic(struct vm *vm, int cpu); 181struct vioapic *vm_ioapic(struct vm *vm); 182struct vhpet *vm_hpet(struct vm *vm); 183int vm_get_capability(struct vm *vm, int vcpu, int type, int *val); 184int vm_set_capability(struct vm *vm, int vcpu, int type, int val); 185int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state); 186int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state); 187int vm_apicid2vcpuid(struct vm *vm, int apicid); 188int vm_activate_cpu(struct vm *vm, int vcpu); 189cpuset_t vm_active_cpus(struct vm *vm); 190cpuset_t vm_suspended_cpus(struct vm *vm); 191struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); 192void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); 193void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip); 194void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); 195 196/* 197 * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'. 198 * The rendezvous 'func(arg)' is not allowed to do anything that will 199 * cause the thread to be put to sleep. 200 * 201 * If the rendezvous is being initiated from a vcpu context then the 202 * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1. 203 * 204 * The caller cannot hold any locks when initiating the rendezvous. 205 * 206 * The implementation of this API may cause vcpus other than those specified 207 * by 'dest' to be stalled. The caller should not rely on any vcpus making 208 * forward progress when the rendezvous is in progress. 209 */ 210typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg); 211void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, 212 vm_rendezvous_func_t func, void *arg); 213 214static __inline int 215vcpu_rendezvous_pending(void *rendezvous_cookie) 216{ 217 218 return (*(uintptr_t *)rendezvous_cookie != 0); 219} 220 221static __inline int 222vcpu_suspended(void *suspend_cookie) 223{ 224 225 return (*(int *)suspend_cookie); 226} 227 228/* 229 * Return 1 if device indicated by bus/slot/func is supposed to be a 230 * pci passthrough device. 231 * 232 * Return 0 otherwise. 233 */ 234int vmm_is_pptdev(int bus, int slot, int func); 235 236void *vm_iommu_domain(struct vm *vm); 237 238enum vcpu_state { 239 VCPU_IDLE, 240 VCPU_FROZEN, 241 VCPU_RUNNING, 242 VCPU_SLEEPING, 243}; 244 245int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state, 246 bool from_idle); 247enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu); 248 249static int __inline 250vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu) 251{ 252 return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING); 253} 254 255void *vcpu_stats(struct vm *vm, int vcpu); 256void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr); 257struct vmspace *vm_get_vmspace(struct vm *vm); 258int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func); 259int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func); 260struct vatpic *vm_atpic(struct vm *vm); 261struct vatpit *vm_atpit(struct vm *vm); 262 263/* 264 * Inject exception 'vme' into the guest vcpu. This function returns 0 on 265 * success and non-zero on failure. 266 * 267 * Wrapper functions like 'vm_inject_gp()' should be preferred to calling 268 * this function directly because they enforce the trap-like or fault-like 269 * behavior of an exception. 270 * 271 * This function should only be called in the context of the thread that is 272 * executing this vcpu. 273 */ 274int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme); 275 276/* 277 * Returns 0 if there is no exception pending for this vcpu. Returns 1 if an 278 * exception is pending and also updates 'vme'. The pending exception is 279 * cleared when this function returns. 280 * 281 * This function should only be called in the context of the thread that is 282 * executing this vcpu. 283 */ 284int vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *vme); 285 286void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */ 287void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */ 288void vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2); 289 290enum vm_reg_name vm_segment_name(int seg_encoding); 291 292#endif /* KERNEL */ 293 294#define VM_MAXCPU 16 /* maximum virtual cpus */ 295 296/* 297 * Identifiers for optional vmm capabilities 298 */ 299enum vm_cap_type { 300 VM_CAP_HALT_EXIT, 301 VM_CAP_MTRAP_EXIT, 302 VM_CAP_PAUSE_EXIT, 303 VM_CAP_UNRESTRICTED_GUEST, 304 VM_CAP_ENABLE_INVPCID, 305 VM_CAP_MAX 306}; 307 308enum vm_intr_trigger { 309 EDGE_TRIGGER, 310 LEVEL_TRIGGER 311}; 312 313/* 314 * The 'access' field has the format specified in Table 21-2 of the Intel 315 * Architecture Manual vol 3b. 316 * 317 * XXX The contents of the 'access' field are architecturally defined except 318 * bit 16 - Segment Unusable. 319 */ 320struct seg_desc { 321 uint64_t base; 322 uint32_t limit; 323 uint32_t access; 324}; 325#define SEG_DESC_TYPE(access) ((access) & 0x001f) 326#define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0) 327#define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0) 328#define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0) 329#define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0) 330 331enum vm_cpu_mode { 332 CPU_MODE_REAL, 333 CPU_MODE_PROTECTED, 334 CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ 335 CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ 336}; 337 338enum vm_paging_mode { 339 PAGING_MODE_FLAT, 340 PAGING_MODE_32, 341 PAGING_MODE_PAE, 342 PAGING_MODE_64, 343}; 344 345struct vm_guest_paging { 346 uint64_t cr3; 347 int cpl; 348 enum vm_cpu_mode cpu_mode; 349 enum vm_paging_mode paging_mode; 350}; 351 352/* 353 * The data structures 'vie' and 'vie_op' are meant to be opaque to the 354 * consumers of instruction decoding. The only reason why their contents 355 * need to be exposed is because they are part of the 'vm_exit' structure. 356 */ 357struct vie_op { 358 uint8_t op_byte; /* actual opcode byte */ 359 uint8_t op_type; /* type of operation (e.g. MOV) */ 360 uint16_t op_flags; 361}; 362 363#define VIE_INST_SIZE 15 364struct vie { 365 uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ 366 uint8_t num_valid; /* size of the instruction */ 367 uint8_t num_processed; 368 369 uint8_t addrsize:4, opsize:4; /* address and operand sizes */ 370 uint8_t rex_w:1, /* REX prefix */ 371 rex_r:1, 372 rex_x:1, 373 rex_b:1, 374 rex_present:1, 375 opsize_override:1, /* Operand size override */ 376 addrsize_override:1; /* Address size override */ 377 378 uint8_t mod:2, /* ModRM byte */ 379 reg:4, 380 rm:4; 381 382 uint8_t ss:2, /* SIB byte */ 383 index:4, 384 base:4; 385 386 uint8_t disp_bytes; 387 uint8_t imm_bytes; 388 389 uint8_t scale; 390 int base_register; /* VM_REG_GUEST_xyz */ 391 int index_register; /* VM_REG_GUEST_xyz */ 392 393 int64_t displacement; /* optional addr displacement */ 394 int64_t immediate; /* optional immediate operand */ 395 396 uint8_t decoded; /* set to 1 if successfully decoded */ 397 398 struct vie_op op; /* opcode description */ 399}; 400 401enum vm_exitcode { 402 VM_EXITCODE_INOUT, 403 VM_EXITCODE_VMX, 404 VM_EXITCODE_BOGUS, 405 VM_EXITCODE_RDMSR, 406 VM_EXITCODE_WRMSR, 407 VM_EXITCODE_HLT, 408 VM_EXITCODE_MTRAP, 409 VM_EXITCODE_PAUSE, 410 VM_EXITCODE_PAGING, 411 VM_EXITCODE_INST_EMUL, 412 VM_EXITCODE_SPINUP_AP, 413 VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */ 414 VM_EXITCODE_RENDEZVOUS, 415 VM_EXITCODE_IOAPIC_EOI, 416 VM_EXITCODE_SUSPENDED, 417 VM_EXITCODE_INOUT_STR, 418 VM_EXITCODE_MAX 419}; 420 421struct vm_inout { 422 uint16_t bytes:3; /* 1 or 2 or 4 */ 423 uint16_t in:1; 424 uint16_t string:1; 425 uint16_t rep:1; 426 uint16_t port; 427 uint32_t eax; /* valid for out */ 428}; 429 430struct vm_inout_str { 431 struct vm_inout inout; /* must be the first element */ 432 struct vm_guest_paging paging; 433 uint64_t rflags; 434 uint64_t cr0; 435 uint64_t index; 436 uint64_t count; /* rep=1 (%rcx), rep=0 (1) */ 437 int addrsize; 438 enum vm_reg_name seg_name; 439 struct seg_desc seg_desc; 440}; 441 442struct vm_exit { 443 enum vm_exitcode exitcode; 444 int inst_length; /* 0 means unknown */ 445 uint64_t rip; 446 union { 447 struct vm_inout inout; 448 struct vm_inout_str inout_str; 449 struct { 450 uint64_t gpa; 451 int fault_type; 452 } paging; 453 struct { 454 uint64_t gpa; 455 uint64_t gla; 456 int cs_d; /* CS.D */ 457 struct vm_guest_paging paging; 458 struct vie vie; 459 } inst_emul; 460 /* 461 * VMX specific payload. Used when there is no "better" 462 * exitcode to represent the VM-exit. 463 */ 464 struct { 465 int status; /* vmx inst status */ 466 /* 467 * 'exit_reason' and 'exit_qualification' are valid 468 * only if 'status' is zero. 469 */ 470 uint32_t exit_reason; 471 uint64_t exit_qualification; 472 /* 473 * 'inst_error' and 'inst_type' are valid 474 * only if 'status' is non-zero. 475 */ 476 int inst_type; 477 int inst_error; 478 } vmx; 479 struct { 480 uint32_t code; /* ecx value */ 481 uint64_t wval; 482 } msr; 483 struct { 484 int vcpu; 485 uint64_t rip; 486 } spinup_ap; 487 struct { 488 uint64_t rflags; 489 } hlt; 490 struct { 491 int vector; 492 } ioapic_eoi; 493 struct { 494 enum vm_suspend_how how; 495 } suspended; 496 } u; 497}; 498 499#endif /* _VMM_H_ */ 500