vmm.c revision 242065
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35#include <sys/module.h> 36#include <sys/sysctl.h> 37#include <sys/malloc.h> 38#include <sys/pcpu.h> 39#include <sys/lock.h> 40#include <sys/mutex.h> 41#include <sys/proc.h> 42#include <sys/sched.h> 43#include <sys/smp.h> 44#include <sys/systm.h> 45 46#include <vm/vm.h> 47 48#include <machine/vm.h> 49#include <machine/pcb.h> 50#include <machine/smp.h> 51#include <x86/apicreg.h> 52 53#include <machine/vmm.h> 54#include "vmm_mem.h" 55#include "vmm_util.h" 56#include <machine/vmm_dev.h> 57#include "vlapic.h" 58#include "vmm_msr.h" 59#include "vmm_ipi.h" 60#include "vmm_stat.h" 61#include "vmm_lapic.h" 62 63#include "io/ppt.h" 64#include "io/iommu.h" 65 66struct vlapic; 67 68struct vcpu { 69 int flags; 70 enum vcpu_state state; 71 struct mtx mtx; 72 int pincpu; /* host cpuid this vcpu is bound to */ 73 int hostcpu; /* host cpuid this vcpu last ran on */ 74 uint64_t guest_msrs[VMM_MSR_NUM]; 75 struct vlapic *vlapic; 76 int vcpuid; 77 struct savefpu *guestfpu; /* guest fpu state */ 78 void *stats; 79 struct vm_exit exitinfo; 80 enum x2apic_state x2apic_state; 81 int nmi_pending; 82}; 83#define VCPU_F_PINNED 0x0001 84 85#define VCPU_PINCPU(vm, vcpuid) \ 86 ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1) 87 88#define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED) 89 90#define VCPU_PIN(vm, vcpuid, host_cpuid) \ 91do { \ 92 vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \ 93 vm->vcpu[vcpuid].pincpu = host_cpuid; \ 94} while(0) 95 96#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 97#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 98#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 99 100#define VM_MAX_MEMORY_SEGMENTS 2 101 102struct vm { 103 void *cookie; /* processor-specific data */ 104 void *iommu; /* iommu-specific data */ 105 struct vcpu vcpu[VM_MAXCPU]; 106 int num_mem_segs; 107 struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS]; 108 char name[VM_MAX_NAMELEN]; 109 110 /* 111 * Set of active vcpus. 112 * An active vcpu is one that has been started implicitly (BSP) or 113 * explicitly (AP) by sending it a startup ipi. 114 */ 115 cpuset_t active_cpus; 116}; 117 118static struct vmm_ops *ops; 119#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) 120#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 121 122#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL) 123#define VMRUN(vmi, vcpu, rip) \ 124 (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO) 125#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 126#define VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm) \ 127 (ops != NULL ? \ 128 (*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) : \ 129 ENXIO) 130#define VMMMAP_GET(vmi, gpa) \ 131 (ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO) 132#define VMGETREG(vmi, vcpu, num, retval) \ 133 (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 134#define VMSETREG(vmi, vcpu, num, val) \ 135 (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 136#define VMGETDESC(vmi, vcpu, num, desc) \ 137 (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 138#define VMSETDESC(vmi, vcpu, num, desc) \ 139 (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 140#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \ 141 (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO) 142#define VMGETCAP(vmi, vcpu, num, retval) \ 143 (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 144#define VMSETCAP(vmi, vcpu, num, val) \ 145 (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 146 147#define fpu_start_emulating() start_emulating() 148#define fpu_stop_emulating() stop_emulating() 149 150static MALLOC_DEFINE(M_VM, "vm", "vm"); 151CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 152 153/* statistics */ 154static VMM_STAT_DEFINE(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 155 156static void 157vcpu_cleanup(struct vcpu *vcpu) 158{ 159 vlapic_cleanup(vcpu->vlapic); 160 vmm_stat_free(vcpu->stats); 161 fpu_save_area_free(vcpu->guestfpu); 162} 163 164static void 165vcpu_init(struct vm *vm, uint32_t vcpu_id) 166{ 167 struct vcpu *vcpu; 168 169 vcpu = &vm->vcpu[vcpu_id]; 170 171 vcpu_lock_init(vcpu); 172 vcpu->hostcpu = NOCPU; 173 vcpu->vcpuid = vcpu_id; 174 vcpu->vlapic = vlapic_init(vm, vcpu_id); 175 vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); 176 vcpu->guestfpu = fpu_save_area_alloc(); 177 fpu_save_area_reset(vcpu->guestfpu); 178 vcpu->stats = vmm_stat_alloc(); 179} 180 181struct vm_exit * 182vm_exitinfo(struct vm *vm, int cpuid) 183{ 184 struct vcpu *vcpu; 185 186 if (cpuid < 0 || cpuid >= VM_MAXCPU) 187 panic("vm_exitinfo: invalid cpuid %d", cpuid); 188 189 vcpu = &vm->vcpu[cpuid]; 190 191 return (&vcpu->exitinfo); 192} 193 194static int 195vmm_init(void) 196{ 197 int error; 198 199 vmm_ipi_init(); 200 201 error = vmm_mem_init(); 202 if (error) 203 return (error); 204 205 if (vmm_is_intel()) 206 ops = &vmm_ops_intel; 207 else if (vmm_is_amd()) 208 ops = &vmm_ops_amd; 209 else 210 return (ENXIO); 211 212 vmm_msr_init(); 213 214 return (VMM_INIT()); 215} 216 217static int 218vmm_handler(module_t mod, int what, void *arg) 219{ 220 int error; 221 222 switch (what) { 223 case MOD_LOAD: 224 vmmdev_init(); 225 iommu_init(); 226 error = vmm_init(); 227 break; 228 case MOD_UNLOAD: 229 error = vmmdev_cleanup(); 230 if (error == 0) { 231 iommu_cleanup(); 232 vmm_ipi_cleanup(); 233 error = VMM_CLEANUP(); 234 } 235 break; 236 default: 237 error = 0; 238 break; 239 } 240 return (error); 241} 242 243static moduledata_t vmm_kmod = { 244 "vmm", 245 vmm_handler, 246 NULL 247}; 248 249/* 250 * Execute the module load handler after the pci passthru driver has had 251 * a chance to claim devices. We need this information at the time we do 252 * iommu initialization. 253 */ 254DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_CONFIGURE + 1, SI_ORDER_ANY); 255MODULE_VERSION(vmm, 1); 256 257SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 258 259struct vm * 260vm_create(const char *name) 261{ 262 int i; 263 struct vm *vm; 264 vm_paddr_t maxaddr; 265 266 const int BSP = 0; 267 268 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 269 return (NULL); 270 271 vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 272 strcpy(vm->name, name); 273 vm->cookie = VMINIT(vm); 274 275 for (i = 0; i < VM_MAXCPU; i++) { 276 vcpu_init(vm, i); 277 guest_msrs_init(vm, i); 278 } 279 280 maxaddr = vmm_mem_maxaddr(); 281 vm->iommu = iommu_create_domain(maxaddr); 282 vm_activate_cpu(vm, BSP); 283 284 return (vm); 285} 286 287static void 288vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg) 289{ 290 size_t len; 291 vm_paddr_t hpa; 292 void *host_domain; 293 294 host_domain = iommu_host_domain(); 295 296 len = 0; 297 while (len < seg->len) { 298 hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE); 299 if (hpa == (vm_paddr_t)-1) { 300 panic("vm_free_mem_segs: cannot free hpa " 301 "associated with gpa 0x%016lx", seg->gpa + len); 302 } 303 304 /* 305 * Remove the 'gpa' to 'hpa' mapping in VMs domain. 306 * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'. 307 */ 308 iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE); 309 iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE); 310 311 vmm_mem_free(hpa, PAGE_SIZE); 312 313 len += PAGE_SIZE; 314 } 315 316 /* 317 * Invalidate cached translations associated with 'vm->iommu' since 318 * we have now moved some pages from it. 319 */ 320 iommu_invalidate_tlb(vm->iommu); 321 322 bzero(seg, sizeof(struct vm_memory_segment)); 323} 324 325void 326vm_destroy(struct vm *vm) 327{ 328 int i; 329 330 ppt_unassign_all(vm); 331 332 for (i = 0; i < vm->num_mem_segs; i++) 333 vm_free_mem_seg(vm, &vm->mem_segs[i]); 334 335 vm->num_mem_segs = 0; 336 337 for (i = 0; i < VM_MAXCPU; i++) 338 vcpu_cleanup(&vm->vcpu[i]); 339 340 iommu_destroy_domain(vm->iommu); 341 342 VMCLEANUP(vm->cookie); 343 344 free(vm, M_VM); 345} 346 347const char * 348vm_name(struct vm *vm) 349{ 350 return (vm->name); 351} 352 353int 354vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 355{ 356 const boolean_t spok = TRUE; /* superpage mappings are ok */ 357 358 return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE, 359 VM_PROT_RW, spok)); 360} 361 362int 363vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 364{ 365 const boolean_t spok = TRUE; /* superpage mappings are ok */ 366 367 return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0, 368 VM_PROT_NONE, spok)); 369} 370 371/* 372 * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise 373 */ 374static boolean_t 375vm_gpa_available(struct vm *vm, vm_paddr_t gpa) 376{ 377 int i; 378 vm_paddr_t gpabase, gpalimit; 379 380 if (gpa & PAGE_MASK) 381 panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa); 382 383 for (i = 0; i < vm->num_mem_segs; i++) { 384 gpabase = vm->mem_segs[i].gpa; 385 gpalimit = gpabase + vm->mem_segs[i].len; 386 if (gpa >= gpabase && gpa < gpalimit) 387 return (FALSE); 388 } 389 390 return (TRUE); 391} 392 393int 394vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 395{ 396 int error, available, allocated; 397 struct vm_memory_segment *seg; 398 vm_paddr_t g, hpa; 399 void *host_domain; 400 401 const boolean_t spok = TRUE; /* superpage mappings are ok */ 402 403 if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 404 return (EINVAL); 405 406 available = allocated = 0; 407 g = gpa; 408 while (g < gpa + len) { 409 if (vm_gpa_available(vm, g)) 410 available++; 411 else 412 allocated++; 413 414 g += PAGE_SIZE; 415 } 416 417 /* 418 * If there are some allocated and some available pages in the address 419 * range then it is an error. 420 */ 421 if (allocated && available) 422 return (EINVAL); 423 424 /* 425 * If the entire address range being requested has already been 426 * allocated then there isn't anything more to do. 427 */ 428 if (allocated && available == 0) 429 return (0); 430 431 if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 432 return (E2BIG); 433 434 host_domain = iommu_host_domain(); 435 436 seg = &vm->mem_segs[vm->num_mem_segs]; 437 438 error = 0; 439 seg->gpa = gpa; 440 seg->len = 0; 441 while (seg->len < len) { 442 hpa = vmm_mem_alloc(PAGE_SIZE); 443 if (hpa == 0) { 444 error = ENOMEM; 445 break; 446 } 447 448 error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE, 449 VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok); 450 if (error) 451 break; 452 453 /* 454 * Remove the 1:1 mapping for 'hpa' from the 'host_domain'. 455 * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain. 456 */ 457 iommu_remove_mapping(host_domain, hpa, PAGE_SIZE); 458 iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE); 459 460 seg->len += PAGE_SIZE; 461 } 462 463 if (error) { 464 vm_free_mem_seg(vm, seg); 465 return (error); 466 } 467 468 /* 469 * Invalidate cached translations associated with 'host_domain' since 470 * we have now moved some pages from it. 471 */ 472 iommu_invalidate_tlb(host_domain); 473 474 vm->num_mem_segs++; 475 476 return (0); 477} 478 479vm_paddr_t 480vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len) 481{ 482 vm_paddr_t nextpage; 483 484 nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); 485 if (len > nextpage - gpa) 486 panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len); 487 488 return (VMMMAP_GET(vm->cookie, gpa)); 489} 490 491int 492vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 493 struct vm_memory_segment *seg) 494{ 495 int i; 496 497 for (i = 0; i < vm->num_mem_segs; i++) { 498 if (gpabase == vm->mem_segs[i].gpa) { 499 *seg = vm->mem_segs[i]; 500 return (0); 501 } 502 } 503 return (-1); 504} 505 506int 507vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 508{ 509 510 if (vcpu < 0 || vcpu >= VM_MAXCPU) 511 return (EINVAL); 512 513 if (reg >= VM_REG_LAST) 514 return (EINVAL); 515 516 return (VMGETREG(vm->cookie, vcpu, reg, retval)); 517} 518 519int 520vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 521{ 522 523 if (vcpu < 0 || vcpu >= VM_MAXCPU) 524 return (EINVAL); 525 526 if (reg >= VM_REG_LAST) 527 return (EINVAL); 528 529 return (VMSETREG(vm->cookie, vcpu, reg, val)); 530} 531 532static boolean_t 533is_descriptor_table(int reg) 534{ 535 536 switch (reg) { 537 case VM_REG_GUEST_IDTR: 538 case VM_REG_GUEST_GDTR: 539 return (TRUE); 540 default: 541 return (FALSE); 542 } 543} 544 545static boolean_t 546is_segment_register(int reg) 547{ 548 549 switch (reg) { 550 case VM_REG_GUEST_ES: 551 case VM_REG_GUEST_CS: 552 case VM_REG_GUEST_SS: 553 case VM_REG_GUEST_DS: 554 case VM_REG_GUEST_FS: 555 case VM_REG_GUEST_GS: 556 case VM_REG_GUEST_TR: 557 case VM_REG_GUEST_LDTR: 558 return (TRUE); 559 default: 560 return (FALSE); 561 } 562} 563 564int 565vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 566 struct seg_desc *desc) 567{ 568 569 if (vcpu < 0 || vcpu >= VM_MAXCPU) 570 return (EINVAL); 571 572 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 573 return (EINVAL); 574 575 return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 576} 577 578int 579vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 580 struct seg_desc *desc) 581{ 582 if (vcpu < 0 || vcpu >= VM_MAXCPU) 583 return (EINVAL); 584 585 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 586 return (EINVAL); 587 588 return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 589} 590 591int 592vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid) 593{ 594 595 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 596 return (EINVAL); 597 598 *cpuid = VCPU_PINCPU(vm, vcpuid); 599 600 return (0); 601} 602 603int 604vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid) 605{ 606 struct thread *td; 607 608 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 609 return (EINVAL); 610 611 td = curthread; /* XXXSMP only safe when muxing vcpus */ 612 613 /* unpin */ 614 if (host_cpuid < 0) { 615 VCPU_UNPIN(vm, vcpuid); 616 thread_lock(td); 617 sched_unbind(td); 618 thread_unlock(td); 619 return (0); 620 } 621 622 if (CPU_ABSENT(host_cpuid)) 623 return (EINVAL); 624 625 /* 626 * XXX we should check that 'host_cpuid' has not already been pinned 627 * by another vm. 628 */ 629 thread_lock(td); 630 sched_bind(td, host_cpuid); 631 thread_unlock(td); 632 VCPU_PIN(vm, vcpuid, host_cpuid); 633 634 return (0); 635} 636 637static void 638restore_guest_fpustate(struct vcpu *vcpu) 639{ 640 641 /* flush host state to the pcb */ 642 fpuexit(curthread); 643 fpu_stop_emulating(); 644 fpurestore(vcpu->guestfpu); 645} 646 647static void 648save_guest_fpustate(struct vcpu *vcpu) 649{ 650 651 fpusave(vcpu->guestfpu); 652 fpu_start_emulating(); 653} 654 655static VMM_STAT_DEFINE(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 656 657int 658vm_run(struct vm *vm, struct vm_run *vmrun) 659{ 660 int error, vcpuid, sleepticks, t; 661 struct vcpu *vcpu; 662 struct pcb *pcb; 663 uint64_t tscval, rip; 664 struct vm_exit *vme; 665 666 vcpuid = vmrun->cpuid; 667 668 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 669 return (EINVAL); 670 671 vcpu = &vm->vcpu[vcpuid]; 672 vme = &vmrun->vm_exit; 673 rip = vmrun->rip; 674restart: 675 critical_enter(); 676 677 tscval = rdtsc(); 678 679 pcb = PCPU_GET(curpcb); 680 set_pcb_flags(pcb, PCB_FULL_IRET); 681 682 restore_guest_msrs(vm, vcpuid); 683 restore_guest_fpustate(vcpu); 684 685 vcpu->hostcpu = curcpu; 686 error = VMRUN(vm->cookie, vcpuid, rip); 687 vcpu->hostcpu = NOCPU; 688 689 save_guest_fpustate(vcpu); 690 restore_host_msrs(vm, vcpuid); 691 692 vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 693 694 /* copy the exit information */ 695 bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit)); 696 697 critical_exit(); 698 699 /* 700 * Oblige the guest's desire to 'hlt' by sleeping until the vcpu 701 * is ready to run. 702 */ 703 if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) { 704 vcpu_lock(vcpu); 705 706 /* 707 * Figure out the number of host ticks until the next apic 708 * timer interrupt in the guest. 709 */ 710 sleepticks = lapic_timer_tick(vm, vcpuid); 711 712 /* 713 * If the guest local apic timer is disabled then sleep for 714 * a long time but not forever. 715 */ 716 if (sleepticks < 0) 717 sleepticks = hz; 718 719 /* 720 * Do a final check for pending NMI or interrupts before 721 * really putting this thread to sleep. 722 * 723 * These interrupts could have happened any time after we 724 * returned from VMRUN() and before we grabbed the vcpu lock. 725 */ 726 if (!vm_nmi_pending(vm, vcpuid) && 727 lapic_pending_intr(vm, vcpuid) < 0) { 728 if (sleepticks <= 0) 729 panic("invalid sleepticks %d", sleepticks); 730 t = ticks; 731 msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); 732 vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 733 } 734 735 vcpu_unlock(vcpu); 736 737 rip = vme->rip + vme->inst_length; 738 goto restart; 739 } 740 741 return (error); 742} 743 744int 745vm_inject_event(struct vm *vm, int vcpuid, int type, 746 int vector, uint32_t code, int code_valid) 747{ 748 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 749 return (EINVAL); 750 751 if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0) 752 return (EINVAL); 753 754 if (vector < 0 || vector > 255) 755 return (EINVAL); 756 757 return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid)); 758} 759 760static VMM_STAT_DEFINE(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 761 762int 763vm_inject_nmi(struct vm *vm, int vcpuid) 764{ 765 struct vcpu *vcpu; 766 767 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 768 return (EINVAL); 769 770 vcpu = &vm->vcpu[vcpuid]; 771 772 vcpu->nmi_pending = 1; 773 vm_interrupt_hostcpu(vm, vcpuid); 774 return (0); 775} 776 777int 778vm_nmi_pending(struct vm *vm, int vcpuid) 779{ 780 struct vcpu *vcpu; 781 782 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 783 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 784 785 vcpu = &vm->vcpu[vcpuid]; 786 787 return (vcpu->nmi_pending); 788} 789 790void 791vm_nmi_clear(struct vm *vm, int vcpuid) 792{ 793 struct vcpu *vcpu; 794 795 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 796 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 797 798 vcpu = &vm->vcpu[vcpuid]; 799 800 if (vcpu->nmi_pending == 0) 801 panic("vm_nmi_clear: inconsistent nmi_pending state"); 802 803 vcpu->nmi_pending = 0; 804 vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 805} 806 807int 808vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 809{ 810 if (vcpu < 0 || vcpu >= VM_MAXCPU) 811 return (EINVAL); 812 813 if (type < 0 || type >= VM_CAP_MAX) 814 return (EINVAL); 815 816 return (VMGETCAP(vm->cookie, vcpu, type, retval)); 817} 818 819int 820vm_set_capability(struct vm *vm, int vcpu, int type, int val) 821{ 822 if (vcpu < 0 || vcpu >= VM_MAXCPU) 823 return (EINVAL); 824 825 if (type < 0 || type >= VM_CAP_MAX) 826 return (EINVAL); 827 828 return (VMSETCAP(vm->cookie, vcpu, type, val)); 829} 830 831uint64_t * 832vm_guest_msrs(struct vm *vm, int cpu) 833{ 834 return (vm->vcpu[cpu].guest_msrs); 835} 836 837struct vlapic * 838vm_lapic(struct vm *vm, int cpu) 839{ 840 return (vm->vcpu[cpu].vlapic); 841} 842 843boolean_t 844vmm_is_pptdev(int bus, int slot, int func) 845{ 846 int found, b, s, f, n; 847 char *val, *cp, *cp2; 848 849 /* 850 * setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12" 851 */ 852 found = 0; 853 cp = val = getenv("pptdevs"); 854 while (cp != NULL && *cp != '\0') { 855 if ((cp2 = strchr(cp, ' ')) != NULL) 856 *cp2 = '\0'; 857 858 n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 859 if (n == 3 && bus == b && slot == s && func == f) { 860 found = 1; 861 break; 862 } 863 864 if (cp2 != NULL) 865 *cp2++ = ' '; 866 867 cp = cp2; 868 } 869 freeenv(val); 870 return (found); 871} 872 873void * 874vm_iommu_domain(struct vm *vm) 875{ 876 877 return (vm->iommu); 878} 879 880int 881vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state) 882{ 883 int error; 884 struct vcpu *vcpu; 885 886 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 887 panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 888 889 vcpu = &vm->vcpu[vcpuid]; 890 891 vcpu_lock(vcpu); 892 893 /* 894 * The following state transitions are allowed: 895 * IDLE -> RUNNING -> IDLE 896 * IDLE -> CANNOT_RUN -> IDLE 897 */ 898 if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) || 899 (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) { 900 error = 0; 901 vcpu->state = state; 902 } else { 903 error = EBUSY; 904 } 905 906 vcpu_unlock(vcpu); 907 908 return (error); 909} 910 911enum vcpu_state 912vcpu_get_state(struct vm *vm, int vcpuid) 913{ 914 struct vcpu *vcpu; 915 enum vcpu_state state; 916 917 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 918 panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 919 920 vcpu = &vm->vcpu[vcpuid]; 921 922 vcpu_lock(vcpu); 923 state = vcpu->state; 924 vcpu_unlock(vcpu); 925 926 return (state); 927} 928 929void 930vm_activate_cpu(struct vm *vm, int vcpuid) 931{ 932 933 if (vcpuid >= 0 && vcpuid < VM_MAXCPU) 934 CPU_SET(vcpuid, &vm->active_cpus); 935} 936 937cpuset_t 938vm_active_cpus(struct vm *vm) 939{ 940 941 return (vm->active_cpus); 942} 943 944void * 945vcpu_stats(struct vm *vm, int vcpuid) 946{ 947 948 return (vm->vcpu[vcpuid].stats); 949} 950 951int 952vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 953{ 954 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 955 return (EINVAL); 956 957 *state = vm->vcpu[vcpuid].x2apic_state; 958 959 return (0); 960} 961 962int 963vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 964{ 965 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 966 return (EINVAL); 967 968 if (state < 0 || state >= X2APIC_STATE_LAST) 969 return (EINVAL); 970 971 vm->vcpu[vcpuid].x2apic_state = state; 972 973 vlapic_set_x2apic_state(vm, vcpuid, state); 974 975 return (0); 976} 977 978void 979vm_interrupt_hostcpu(struct vm *vm, int vcpuid) 980{ 981 int hostcpu; 982 struct vcpu *vcpu; 983 984 vcpu = &vm->vcpu[vcpuid]; 985 986 vcpu_lock(vcpu); 987 hostcpu = vcpu->hostcpu; 988 if (hostcpu == NOCPU) { 989 /* 990 * If the vcpu is 'RUNNING' but without a valid 'hostcpu' then 991 * the host thread must be sleeping waiting for an event to 992 * kick the vcpu out of 'hlt'. 993 * 994 * XXX this is racy because the condition exists right before 995 * and after calling VMRUN() in vm_run(). The wakeup() is 996 * benign in this case. 997 */ 998 if (vcpu->state == VCPU_RUNNING) 999 wakeup_one(vcpu); 1000 } else { 1001 if (vcpu->state != VCPU_RUNNING) 1002 panic("invalid vcpu state %d", vcpu->state); 1003 if (hostcpu != curcpu) 1004 ipi_cpu(hostcpu, vmm_ipinum); 1005 } 1006 vcpu_unlock(vcpu); 1007} 1008