vmm.c revision 242275
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35#include <sys/module.h> 36#include <sys/sysctl.h> 37#include <sys/malloc.h> 38#include <sys/pcpu.h> 39#include <sys/lock.h> 40#include <sys/mutex.h> 41#include <sys/proc.h> 42#include <sys/sched.h> 43#include <sys/smp.h> 44#include <sys/systm.h> 45 46#include <vm/vm.h> 47 48#include <machine/vm.h> 49#include <machine/pcb.h> 50#include <machine/smp.h> 51#include <x86/apicreg.h> 52 53#include <machine/vmm.h> 54#include "vmm_host.h" 55#include "vmm_mem.h" 56#include "vmm_util.h" 57#include <machine/vmm_dev.h> 58#include "vlapic.h" 59#include "vmm_msr.h" 60#include "vmm_ipi.h" 61#include "vmm_stat.h" 62#include "vmm_lapic.h" 63 64#include "io/ppt.h" 65#include "io/iommu.h" 66 67struct vlapic; 68 69struct vcpu { 70 int flags; 71 enum vcpu_state state; 72 struct mtx mtx; 73 int pincpu; /* host cpuid this vcpu is bound to */ 74 int hostcpu; /* host cpuid this vcpu last ran on */ 75 uint64_t guest_msrs[VMM_MSR_NUM]; 76 struct vlapic *vlapic; 77 int vcpuid; 78 struct savefpu *guestfpu; /* guest fpu state */ 79 void *stats; 80 struct vm_exit exitinfo; 81 enum x2apic_state x2apic_state; 82 int nmi_pending; 83}; 84#define VCPU_F_PINNED 0x0001 85 86#define VCPU_PINCPU(vm, vcpuid) \ 87 ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1) 88 89#define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED) 90 91#define VCPU_PIN(vm, vcpuid, host_cpuid) \ 92do { \ 93 vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \ 94 vm->vcpu[vcpuid].pincpu = host_cpuid; \ 95} while(0) 96 97#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 98#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 99#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 100 101#define VM_MAX_MEMORY_SEGMENTS 2 102 103struct vm { 104 void *cookie; /* processor-specific data */ 105 void *iommu; /* iommu-specific data */ 106 struct vcpu vcpu[VM_MAXCPU]; 107 int num_mem_segs; 108 struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS]; 109 char name[VM_MAX_NAMELEN]; 110 111 /* 112 * Set of active vcpus. 113 * An active vcpu is one that has been started implicitly (BSP) or 114 * explicitly (AP) by sending it a startup ipi. 115 */ 116 cpuset_t active_cpus; 117}; 118 119static struct vmm_ops *ops; 120#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) 121#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 122 123#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL) 124#define VMRUN(vmi, vcpu, rip) \ 125 (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO) 126#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 127#define VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm) \ 128 (ops != NULL ? \ 129 (*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) : \ 130 ENXIO) 131#define VMMMAP_GET(vmi, gpa) \ 132 (ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO) 133#define VMGETREG(vmi, vcpu, num, retval) \ 134 (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 135#define VMSETREG(vmi, vcpu, num, val) \ 136 (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 137#define VMGETDESC(vmi, vcpu, num, desc) \ 138 (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 139#define VMSETDESC(vmi, vcpu, num, desc) \ 140 (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 141#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \ 142 (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO) 143#define VMGETCAP(vmi, vcpu, num, retval) \ 144 (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 145#define VMSETCAP(vmi, vcpu, num, val) \ 146 (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 147 148#define fpu_start_emulating() start_emulating() 149#define fpu_stop_emulating() stop_emulating() 150 151static MALLOC_DEFINE(M_VM, "vm", "vm"); 152CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 153 154/* statistics */ 155static VMM_STAT_DEFINE(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 156 157static void 158vcpu_cleanup(struct vcpu *vcpu) 159{ 160 vlapic_cleanup(vcpu->vlapic); 161 vmm_stat_free(vcpu->stats); 162 fpu_save_area_free(vcpu->guestfpu); 163} 164 165static void 166vcpu_init(struct vm *vm, uint32_t vcpu_id) 167{ 168 struct vcpu *vcpu; 169 170 vcpu = &vm->vcpu[vcpu_id]; 171 172 vcpu_lock_init(vcpu); 173 vcpu->hostcpu = NOCPU; 174 vcpu->vcpuid = vcpu_id; 175 vcpu->vlapic = vlapic_init(vm, vcpu_id); 176 vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); 177 vcpu->guestfpu = fpu_save_area_alloc(); 178 fpu_save_area_reset(vcpu->guestfpu); 179 vcpu->stats = vmm_stat_alloc(); 180} 181 182struct vm_exit * 183vm_exitinfo(struct vm *vm, int cpuid) 184{ 185 struct vcpu *vcpu; 186 187 if (cpuid < 0 || cpuid >= VM_MAXCPU) 188 panic("vm_exitinfo: invalid cpuid %d", cpuid); 189 190 vcpu = &vm->vcpu[cpuid]; 191 192 return (&vcpu->exitinfo); 193} 194 195static int 196vmm_init(void) 197{ 198 int error; 199 200 vmm_host_state_init(); 201 vmm_ipi_init(); 202 203 error = vmm_mem_init(); 204 if (error) 205 return (error); 206 207 if (vmm_is_intel()) 208 ops = &vmm_ops_intel; 209 else if (vmm_is_amd()) 210 ops = &vmm_ops_amd; 211 else 212 return (ENXIO); 213 214 vmm_msr_init(); 215 216 return (VMM_INIT()); 217} 218 219static int 220vmm_handler(module_t mod, int what, void *arg) 221{ 222 int error; 223 224 switch (what) { 225 case MOD_LOAD: 226 vmmdev_init(); 227 iommu_init(); 228 error = vmm_init(); 229 break; 230 case MOD_UNLOAD: 231 error = vmmdev_cleanup(); 232 if (error == 0) { 233 iommu_cleanup(); 234 vmm_ipi_cleanup(); 235 error = VMM_CLEANUP(); 236 } 237 break; 238 default: 239 error = 0; 240 break; 241 } 242 return (error); 243} 244 245static moduledata_t vmm_kmod = { 246 "vmm", 247 vmm_handler, 248 NULL 249}; 250 251/* 252 * Execute the module load handler after the pci passthru driver has had 253 * a chance to claim devices. We need this information at the time we do 254 * iommu initialization. 255 */ 256DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_CONFIGURE + 1, SI_ORDER_ANY); 257MODULE_VERSION(vmm, 1); 258 259SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 260 261struct vm * 262vm_create(const char *name) 263{ 264 int i; 265 struct vm *vm; 266 vm_paddr_t maxaddr; 267 268 const int BSP = 0; 269 270 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 271 return (NULL); 272 273 vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 274 strcpy(vm->name, name); 275 vm->cookie = VMINIT(vm); 276 277 for (i = 0; i < VM_MAXCPU; i++) { 278 vcpu_init(vm, i); 279 guest_msrs_init(vm, i); 280 } 281 282 maxaddr = vmm_mem_maxaddr(); 283 vm->iommu = iommu_create_domain(maxaddr); 284 vm_activate_cpu(vm, BSP); 285 286 return (vm); 287} 288 289static void 290vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg) 291{ 292 size_t len; 293 vm_paddr_t hpa; 294 void *host_domain; 295 296 host_domain = iommu_host_domain(); 297 298 len = 0; 299 while (len < seg->len) { 300 hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE); 301 if (hpa == (vm_paddr_t)-1) { 302 panic("vm_free_mem_segs: cannot free hpa " 303 "associated with gpa 0x%016lx", seg->gpa + len); 304 } 305 306 /* 307 * Remove the 'gpa' to 'hpa' mapping in VMs domain. 308 * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'. 309 */ 310 iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE); 311 iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE); 312 313 vmm_mem_free(hpa, PAGE_SIZE); 314 315 len += PAGE_SIZE; 316 } 317 318 /* 319 * Invalidate cached translations associated with 'vm->iommu' since 320 * we have now moved some pages from it. 321 */ 322 iommu_invalidate_tlb(vm->iommu); 323 324 bzero(seg, sizeof(struct vm_memory_segment)); 325} 326 327void 328vm_destroy(struct vm *vm) 329{ 330 int i; 331 332 ppt_unassign_all(vm); 333 334 for (i = 0; i < vm->num_mem_segs; i++) 335 vm_free_mem_seg(vm, &vm->mem_segs[i]); 336 337 vm->num_mem_segs = 0; 338 339 for (i = 0; i < VM_MAXCPU; i++) 340 vcpu_cleanup(&vm->vcpu[i]); 341 342 iommu_destroy_domain(vm->iommu); 343 344 VMCLEANUP(vm->cookie); 345 346 free(vm, M_VM); 347} 348 349const char * 350vm_name(struct vm *vm) 351{ 352 return (vm->name); 353} 354 355int 356vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 357{ 358 const boolean_t spok = TRUE; /* superpage mappings are ok */ 359 360 return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE, 361 VM_PROT_RW, spok)); 362} 363 364int 365vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 366{ 367 const boolean_t spok = TRUE; /* superpage mappings are ok */ 368 369 return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0, 370 VM_PROT_NONE, spok)); 371} 372 373/* 374 * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise 375 */ 376static boolean_t 377vm_gpa_available(struct vm *vm, vm_paddr_t gpa) 378{ 379 int i; 380 vm_paddr_t gpabase, gpalimit; 381 382 if (gpa & PAGE_MASK) 383 panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa); 384 385 for (i = 0; i < vm->num_mem_segs; i++) { 386 gpabase = vm->mem_segs[i].gpa; 387 gpalimit = gpabase + vm->mem_segs[i].len; 388 if (gpa >= gpabase && gpa < gpalimit) 389 return (FALSE); 390 } 391 392 return (TRUE); 393} 394 395int 396vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 397{ 398 int error, available, allocated; 399 struct vm_memory_segment *seg; 400 vm_paddr_t g, hpa; 401 void *host_domain; 402 403 const boolean_t spok = TRUE; /* superpage mappings are ok */ 404 405 if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 406 return (EINVAL); 407 408 available = allocated = 0; 409 g = gpa; 410 while (g < gpa + len) { 411 if (vm_gpa_available(vm, g)) 412 available++; 413 else 414 allocated++; 415 416 g += PAGE_SIZE; 417 } 418 419 /* 420 * If there are some allocated and some available pages in the address 421 * range then it is an error. 422 */ 423 if (allocated && available) 424 return (EINVAL); 425 426 /* 427 * If the entire address range being requested has already been 428 * allocated then there isn't anything more to do. 429 */ 430 if (allocated && available == 0) 431 return (0); 432 433 if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 434 return (E2BIG); 435 436 host_domain = iommu_host_domain(); 437 438 seg = &vm->mem_segs[vm->num_mem_segs]; 439 440 error = 0; 441 seg->gpa = gpa; 442 seg->len = 0; 443 while (seg->len < len) { 444 hpa = vmm_mem_alloc(PAGE_SIZE); 445 if (hpa == 0) { 446 error = ENOMEM; 447 break; 448 } 449 450 error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE, 451 VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok); 452 if (error) 453 break; 454 455 /* 456 * Remove the 1:1 mapping for 'hpa' from the 'host_domain'. 457 * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain. 458 */ 459 iommu_remove_mapping(host_domain, hpa, PAGE_SIZE); 460 iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE); 461 462 seg->len += PAGE_SIZE; 463 } 464 465 if (error) { 466 vm_free_mem_seg(vm, seg); 467 return (error); 468 } 469 470 /* 471 * Invalidate cached translations associated with 'host_domain' since 472 * we have now moved some pages from it. 473 */ 474 iommu_invalidate_tlb(host_domain); 475 476 vm->num_mem_segs++; 477 478 return (0); 479} 480 481vm_paddr_t 482vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len) 483{ 484 vm_paddr_t nextpage; 485 486 nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); 487 if (len > nextpage - gpa) 488 panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len); 489 490 return (VMMMAP_GET(vm->cookie, gpa)); 491} 492 493int 494vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 495 struct vm_memory_segment *seg) 496{ 497 int i; 498 499 for (i = 0; i < vm->num_mem_segs; i++) { 500 if (gpabase == vm->mem_segs[i].gpa) { 501 *seg = vm->mem_segs[i]; 502 return (0); 503 } 504 } 505 return (-1); 506} 507 508int 509vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 510{ 511 512 if (vcpu < 0 || vcpu >= VM_MAXCPU) 513 return (EINVAL); 514 515 if (reg >= VM_REG_LAST) 516 return (EINVAL); 517 518 return (VMGETREG(vm->cookie, vcpu, reg, retval)); 519} 520 521int 522vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 523{ 524 525 if (vcpu < 0 || vcpu >= VM_MAXCPU) 526 return (EINVAL); 527 528 if (reg >= VM_REG_LAST) 529 return (EINVAL); 530 531 return (VMSETREG(vm->cookie, vcpu, reg, val)); 532} 533 534static boolean_t 535is_descriptor_table(int reg) 536{ 537 538 switch (reg) { 539 case VM_REG_GUEST_IDTR: 540 case VM_REG_GUEST_GDTR: 541 return (TRUE); 542 default: 543 return (FALSE); 544 } 545} 546 547static boolean_t 548is_segment_register(int reg) 549{ 550 551 switch (reg) { 552 case VM_REG_GUEST_ES: 553 case VM_REG_GUEST_CS: 554 case VM_REG_GUEST_SS: 555 case VM_REG_GUEST_DS: 556 case VM_REG_GUEST_FS: 557 case VM_REG_GUEST_GS: 558 case VM_REG_GUEST_TR: 559 case VM_REG_GUEST_LDTR: 560 return (TRUE); 561 default: 562 return (FALSE); 563 } 564} 565 566int 567vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 568 struct seg_desc *desc) 569{ 570 571 if (vcpu < 0 || vcpu >= VM_MAXCPU) 572 return (EINVAL); 573 574 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 575 return (EINVAL); 576 577 return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 578} 579 580int 581vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 582 struct seg_desc *desc) 583{ 584 if (vcpu < 0 || vcpu >= VM_MAXCPU) 585 return (EINVAL); 586 587 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 588 return (EINVAL); 589 590 return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 591} 592 593int 594vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid) 595{ 596 597 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 598 return (EINVAL); 599 600 *cpuid = VCPU_PINCPU(vm, vcpuid); 601 602 return (0); 603} 604 605int 606vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid) 607{ 608 struct thread *td; 609 610 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 611 return (EINVAL); 612 613 td = curthread; /* XXXSMP only safe when muxing vcpus */ 614 615 /* unpin */ 616 if (host_cpuid < 0) { 617 VCPU_UNPIN(vm, vcpuid); 618 thread_lock(td); 619 sched_unbind(td); 620 thread_unlock(td); 621 return (0); 622 } 623 624 if (CPU_ABSENT(host_cpuid)) 625 return (EINVAL); 626 627 /* 628 * XXX we should check that 'host_cpuid' has not already been pinned 629 * by another vm. 630 */ 631 thread_lock(td); 632 sched_bind(td, host_cpuid); 633 thread_unlock(td); 634 VCPU_PIN(vm, vcpuid, host_cpuid); 635 636 return (0); 637} 638 639static void 640restore_guest_fpustate(struct vcpu *vcpu) 641{ 642 643 /* flush host state to the pcb */ 644 fpuexit(curthread); 645 646 /* restore guest FPU state */ 647 fpu_stop_emulating(); 648 fpurestore(vcpu->guestfpu); 649 650 /* 651 * The FPU is now "dirty" with the guest's state so turn on emulation 652 * to trap any access to the FPU by the host. 653 */ 654 fpu_start_emulating(); 655} 656 657static void 658save_guest_fpustate(struct vcpu *vcpu) 659{ 660 661 if ((rcr0() & CR0_TS) == 0) 662 panic("fpu emulation not enabled in host!"); 663 664 /* save guest FPU state */ 665 fpu_stop_emulating(); 666 fpusave(vcpu->guestfpu); 667 fpu_start_emulating(); 668} 669 670static VMM_STAT_DEFINE(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 671 672int 673vm_run(struct vm *vm, struct vm_run *vmrun) 674{ 675 int error, vcpuid, sleepticks, t; 676 struct vcpu *vcpu; 677 struct pcb *pcb; 678 uint64_t tscval, rip; 679 struct vm_exit *vme; 680 681 vcpuid = vmrun->cpuid; 682 683 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 684 return (EINVAL); 685 686 vcpu = &vm->vcpu[vcpuid]; 687 vme = &vmrun->vm_exit; 688 rip = vmrun->rip; 689restart: 690 critical_enter(); 691 692 tscval = rdtsc(); 693 694 pcb = PCPU_GET(curpcb); 695 set_pcb_flags(pcb, PCB_FULL_IRET); 696 697 restore_guest_msrs(vm, vcpuid); 698 restore_guest_fpustate(vcpu); 699 700 vcpu->hostcpu = curcpu; 701 error = VMRUN(vm->cookie, vcpuid, rip); 702 vcpu->hostcpu = NOCPU; 703 704 save_guest_fpustate(vcpu); 705 restore_host_msrs(vm, vcpuid); 706 707 vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 708 709 /* copy the exit information */ 710 bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit)); 711 712 critical_exit(); 713 714 /* 715 * Oblige the guest's desire to 'hlt' by sleeping until the vcpu 716 * is ready to run. 717 */ 718 if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) { 719 vcpu_lock(vcpu); 720 721 /* 722 * Figure out the number of host ticks until the next apic 723 * timer interrupt in the guest. 724 */ 725 sleepticks = lapic_timer_tick(vm, vcpuid); 726 727 /* 728 * If the guest local apic timer is disabled then sleep for 729 * a long time but not forever. 730 */ 731 if (sleepticks < 0) 732 sleepticks = hz; 733 734 /* 735 * Do a final check for pending NMI or interrupts before 736 * really putting this thread to sleep. 737 * 738 * These interrupts could have happened any time after we 739 * returned from VMRUN() and before we grabbed the vcpu lock. 740 */ 741 if (!vm_nmi_pending(vm, vcpuid) && 742 lapic_pending_intr(vm, vcpuid) < 0) { 743 if (sleepticks <= 0) 744 panic("invalid sleepticks %d", sleepticks); 745 t = ticks; 746 msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); 747 vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 748 } 749 750 vcpu_unlock(vcpu); 751 752 rip = vme->rip + vme->inst_length; 753 goto restart; 754 } 755 756 return (error); 757} 758 759int 760vm_inject_event(struct vm *vm, int vcpuid, int type, 761 int vector, uint32_t code, int code_valid) 762{ 763 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 764 return (EINVAL); 765 766 if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0) 767 return (EINVAL); 768 769 if (vector < 0 || vector > 255) 770 return (EINVAL); 771 772 return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid)); 773} 774 775static VMM_STAT_DEFINE(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 776 777int 778vm_inject_nmi(struct vm *vm, int vcpuid) 779{ 780 struct vcpu *vcpu; 781 782 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 783 return (EINVAL); 784 785 vcpu = &vm->vcpu[vcpuid]; 786 787 vcpu->nmi_pending = 1; 788 vm_interrupt_hostcpu(vm, vcpuid); 789 return (0); 790} 791 792int 793vm_nmi_pending(struct vm *vm, int vcpuid) 794{ 795 struct vcpu *vcpu; 796 797 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 798 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 799 800 vcpu = &vm->vcpu[vcpuid]; 801 802 return (vcpu->nmi_pending); 803} 804 805void 806vm_nmi_clear(struct vm *vm, int vcpuid) 807{ 808 struct vcpu *vcpu; 809 810 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 811 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 812 813 vcpu = &vm->vcpu[vcpuid]; 814 815 if (vcpu->nmi_pending == 0) 816 panic("vm_nmi_clear: inconsistent nmi_pending state"); 817 818 vcpu->nmi_pending = 0; 819 vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 820} 821 822int 823vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 824{ 825 if (vcpu < 0 || vcpu >= VM_MAXCPU) 826 return (EINVAL); 827 828 if (type < 0 || type >= VM_CAP_MAX) 829 return (EINVAL); 830 831 return (VMGETCAP(vm->cookie, vcpu, type, retval)); 832} 833 834int 835vm_set_capability(struct vm *vm, int vcpu, int type, int val) 836{ 837 if (vcpu < 0 || vcpu >= VM_MAXCPU) 838 return (EINVAL); 839 840 if (type < 0 || type >= VM_CAP_MAX) 841 return (EINVAL); 842 843 return (VMSETCAP(vm->cookie, vcpu, type, val)); 844} 845 846uint64_t * 847vm_guest_msrs(struct vm *vm, int cpu) 848{ 849 return (vm->vcpu[cpu].guest_msrs); 850} 851 852struct vlapic * 853vm_lapic(struct vm *vm, int cpu) 854{ 855 return (vm->vcpu[cpu].vlapic); 856} 857 858boolean_t 859vmm_is_pptdev(int bus, int slot, int func) 860{ 861 int found, b, s, f, n; 862 char *val, *cp, *cp2; 863 864 /* 865 * setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12" 866 */ 867 found = 0; 868 cp = val = getenv("pptdevs"); 869 while (cp != NULL && *cp != '\0') { 870 if ((cp2 = strchr(cp, ' ')) != NULL) 871 *cp2 = '\0'; 872 873 n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 874 if (n == 3 && bus == b && slot == s && func == f) { 875 found = 1; 876 break; 877 } 878 879 if (cp2 != NULL) 880 *cp2++ = ' '; 881 882 cp = cp2; 883 } 884 freeenv(val); 885 return (found); 886} 887 888void * 889vm_iommu_domain(struct vm *vm) 890{ 891 892 return (vm->iommu); 893} 894 895int 896vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state) 897{ 898 int error; 899 struct vcpu *vcpu; 900 901 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 902 panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 903 904 vcpu = &vm->vcpu[vcpuid]; 905 906 vcpu_lock(vcpu); 907 908 /* 909 * The following state transitions are allowed: 910 * IDLE -> RUNNING -> IDLE 911 * IDLE -> CANNOT_RUN -> IDLE 912 */ 913 if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) || 914 (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) { 915 error = 0; 916 vcpu->state = state; 917 } else { 918 error = EBUSY; 919 } 920 921 vcpu_unlock(vcpu); 922 923 return (error); 924} 925 926enum vcpu_state 927vcpu_get_state(struct vm *vm, int vcpuid) 928{ 929 struct vcpu *vcpu; 930 enum vcpu_state state; 931 932 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 933 panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 934 935 vcpu = &vm->vcpu[vcpuid]; 936 937 vcpu_lock(vcpu); 938 state = vcpu->state; 939 vcpu_unlock(vcpu); 940 941 return (state); 942} 943 944void 945vm_activate_cpu(struct vm *vm, int vcpuid) 946{ 947 948 if (vcpuid >= 0 && vcpuid < VM_MAXCPU) 949 CPU_SET(vcpuid, &vm->active_cpus); 950} 951 952cpuset_t 953vm_active_cpus(struct vm *vm) 954{ 955 956 return (vm->active_cpus); 957} 958 959void * 960vcpu_stats(struct vm *vm, int vcpuid) 961{ 962 963 return (vm->vcpu[vcpuid].stats); 964} 965 966int 967vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 968{ 969 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 970 return (EINVAL); 971 972 *state = vm->vcpu[vcpuid].x2apic_state; 973 974 return (0); 975} 976 977int 978vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 979{ 980 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 981 return (EINVAL); 982 983 if (state < 0 || state >= X2APIC_STATE_LAST) 984 return (EINVAL); 985 986 vm->vcpu[vcpuid].x2apic_state = state; 987 988 vlapic_set_x2apic_state(vm, vcpuid, state); 989 990 return (0); 991} 992 993void 994vm_interrupt_hostcpu(struct vm *vm, int vcpuid) 995{ 996 int hostcpu; 997 struct vcpu *vcpu; 998 999 vcpu = &vm->vcpu[vcpuid]; 1000 1001 vcpu_lock(vcpu); 1002 hostcpu = vcpu->hostcpu; 1003 if (hostcpu == NOCPU) { 1004 /* 1005 * If the vcpu is 'RUNNING' but without a valid 'hostcpu' then 1006 * the host thread must be sleeping waiting for an event to 1007 * kick the vcpu out of 'hlt'. 1008 * 1009 * XXX this is racy because the condition exists right before 1010 * and after calling VMRUN() in vm_run(). The wakeup() is 1011 * benign in this case. 1012 */ 1013 if (vcpu->state == VCPU_RUNNING) 1014 wakeup_one(vcpu); 1015 } else { 1016 if (vcpu->state != VCPU_RUNNING) 1017 panic("invalid vcpu state %d", vcpu->state); 1018 if (hostcpu != curcpu) 1019 ipi_cpu(hostcpu, vmm_ipinum); 1020 } 1021 vcpu_unlock(vcpu); 1022} 1023