vmm.c revision 246188
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/amd64/vmm/vmm.c 246188 2013-02-01 01:16:26Z neel $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/amd64/vmm/vmm.c 246188 2013-02-01 01:16:26Z neel $"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35#include <sys/module.h> 36#include <sys/sysctl.h> 37#include <sys/malloc.h> 38#include <sys/pcpu.h> 39#include <sys/lock.h> 40#include <sys/mutex.h> 41#include <sys/proc.h> 42#include <sys/sched.h> 43#include <sys/smp.h> 44#include <sys/systm.h> 45 46#include <vm/vm.h> 47 48#include <machine/vm.h> 49#include <machine/pcb.h> 50#include <machine/smp.h> 51#include <x86/apicreg.h> 52 53#include <machine/vmm.h> 54#include "vmm_host.h" 55#include "vmm_mem.h" 56#include "vmm_util.h" 57#include <machine/vmm_dev.h> 58#include "vlapic.h" 59#include "vmm_msr.h" 60#include "vmm_ipi.h" 61#include "vmm_stat.h" 62#include "vmm_lapic.h" 63 64#include "io/ppt.h" 65#include "io/iommu.h" 66 67struct vlapic; 68 69struct vcpu { 70 int flags; 71 enum vcpu_state state; 72 struct mtx mtx; 73 int pincpu; /* host cpuid this vcpu is bound to */ 74 int hostcpu; /* host cpuid this vcpu last ran on */ 75 uint64_t guest_msrs[VMM_MSR_NUM]; 76 struct vlapic *vlapic; 77 int vcpuid; 78 struct savefpu *guestfpu; /* guest fpu state */ 79 void *stats; 80 struct vm_exit exitinfo; 81 enum x2apic_state x2apic_state; 82 int nmi_pending; 83}; 84#define VCPU_F_PINNED 0x0001 85 86#define VCPU_PINCPU(vm, vcpuid) \ 87 ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1) 88 89#define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED) 90 91#define VCPU_PIN(vm, vcpuid, host_cpuid) \ 92do { \ 93 vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \ 94 vm->vcpu[vcpuid].pincpu = host_cpuid; \ 95} while(0) 96 97#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) 98#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) 99#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) 100 101#define VM_MAX_MEMORY_SEGMENTS 2 102 103struct vm { 104 void *cookie; /* processor-specific data */ 105 void *iommu; /* iommu-specific data */ 106 struct vcpu vcpu[VM_MAXCPU]; 107 int num_mem_segs; 108 struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS]; 109 char name[VM_MAX_NAMELEN]; 110 111 /* 112 * Set of active vcpus. 113 * An active vcpu is one that has been started implicitly (BSP) or 114 * explicitly (AP) by sending it a startup ipi. 115 */ 116 cpuset_t active_cpus; 117}; 118 119static struct vmm_ops *ops; 120#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0) 121#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) 122 123#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL) 124#define VMRUN(vmi, vcpu, rip) \ 125 (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO) 126#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) 127#define VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm) \ 128 (ops != NULL ? \ 129 (*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) : \ 130 ENXIO) 131#define VMMMAP_GET(vmi, gpa) \ 132 (ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO) 133#define VMGETREG(vmi, vcpu, num, retval) \ 134 (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) 135#define VMSETREG(vmi, vcpu, num, val) \ 136 (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) 137#define VMGETDESC(vmi, vcpu, num, desc) \ 138 (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) 139#define VMSETDESC(vmi, vcpu, num, desc) \ 140 (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) 141#define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \ 142 (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO) 143#define VMGETCAP(vmi, vcpu, num, retval) \ 144 (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) 145#define VMSETCAP(vmi, vcpu, num, val) \ 146 (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) 147 148#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) 149#define fpu_stop_emulating() clts() 150 151static MALLOC_DEFINE(M_VM, "vm", "vm"); 152CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */ 153 154/* statistics */ 155static VMM_STAT_DEFINE(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); 156 157static void 158vcpu_cleanup(struct vcpu *vcpu) 159{ 160 vlapic_cleanup(vcpu->vlapic); 161 vmm_stat_free(vcpu->stats); 162 fpu_save_area_free(vcpu->guestfpu); 163} 164 165static void 166vcpu_init(struct vm *vm, uint32_t vcpu_id) 167{ 168 struct vcpu *vcpu; 169 170 vcpu = &vm->vcpu[vcpu_id]; 171 172 vcpu_lock_init(vcpu); 173 vcpu->hostcpu = NOCPU; 174 vcpu->vcpuid = vcpu_id; 175 vcpu->vlapic = vlapic_init(vm, vcpu_id); 176 vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED); 177 vcpu->guestfpu = fpu_save_area_alloc(); 178 fpu_save_area_reset(vcpu->guestfpu); 179 vcpu->stats = vmm_stat_alloc(); 180} 181 182struct vm_exit * 183vm_exitinfo(struct vm *vm, int cpuid) 184{ 185 struct vcpu *vcpu; 186 187 if (cpuid < 0 || cpuid >= VM_MAXCPU) 188 panic("vm_exitinfo: invalid cpuid %d", cpuid); 189 190 vcpu = &vm->vcpu[cpuid]; 191 192 return (&vcpu->exitinfo); 193} 194 195static int 196vmm_init(void) 197{ 198 int error; 199 200 vmm_host_state_init(); 201 vmm_ipi_init(); 202 203 error = vmm_mem_init(); 204 if (error) 205 return (error); 206 207 if (vmm_is_intel()) 208 ops = &vmm_ops_intel; 209 else if (vmm_is_amd()) 210 ops = &vmm_ops_amd; 211 else 212 return (ENXIO); 213 214 vmm_msr_init(); 215 216 return (VMM_INIT()); 217} 218 219static int 220vmm_handler(module_t mod, int what, void *arg) 221{ 222 int error; 223 224 switch (what) { 225 case MOD_LOAD: 226 vmmdev_init(); 227 iommu_init(); 228 error = vmm_init(); 229 break; 230 case MOD_UNLOAD: 231 error = vmmdev_cleanup(); 232 if (error == 0) { 233 iommu_cleanup(); 234 vmm_ipi_cleanup(); 235 error = VMM_CLEANUP(); 236 } 237 break; 238 default: 239 error = 0; 240 break; 241 } 242 return (error); 243} 244 245static moduledata_t vmm_kmod = { 246 "vmm", 247 vmm_handler, 248 NULL 249}; 250 251/* 252 * vmm initialization has the following dependencies: 253 * 254 * - iommu initialization must happen after the pci passthru driver has had 255 * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). 256 * 257 * - VT-x initialization requires smp_rendezvous() and therefore must happen 258 * after SMP is fully functional (after SI_SUB_SMP). 259 */ 260DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); 261MODULE_VERSION(vmm, 1); 262 263SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); 264 265struct vm * 266vm_create(const char *name) 267{ 268 int i; 269 struct vm *vm; 270 vm_paddr_t maxaddr; 271 272 const int BSP = 0; 273 274 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) 275 return (NULL); 276 277 vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); 278 strcpy(vm->name, name); 279 vm->cookie = VMINIT(vm); 280 281 for (i = 0; i < VM_MAXCPU; i++) { 282 vcpu_init(vm, i); 283 guest_msrs_init(vm, i); 284 } 285 286 maxaddr = vmm_mem_maxaddr(); 287 vm->iommu = iommu_create_domain(maxaddr); 288 vm_activate_cpu(vm, BSP); 289 290 return (vm); 291} 292 293static void 294vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg) 295{ 296 size_t len; 297 vm_paddr_t hpa; 298 void *host_domain; 299 300 host_domain = iommu_host_domain(); 301 302 len = 0; 303 while (len < seg->len) { 304 hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE); 305 if (hpa == (vm_paddr_t)-1) { 306 panic("vm_free_mem_segs: cannot free hpa " 307 "associated with gpa 0x%016lx", seg->gpa + len); 308 } 309 310 /* 311 * Remove the 'gpa' to 'hpa' mapping in VMs domain. 312 * And resurrect the 1:1 mapping for 'hpa' in 'host_domain'. 313 */ 314 iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE); 315 iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE); 316 317 vmm_mem_free(hpa, PAGE_SIZE); 318 319 len += PAGE_SIZE; 320 } 321 322 /* 323 * Invalidate cached translations associated with 'vm->iommu' since 324 * we have now moved some pages from it. 325 */ 326 iommu_invalidate_tlb(vm->iommu); 327 328 bzero(seg, sizeof(struct vm_memory_segment)); 329} 330 331void 332vm_destroy(struct vm *vm) 333{ 334 int i; 335 336 ppt_unassign_all(vm); 337 338 for (i = 0; i < vm->num_mem_segs; i++) 339 vm_free_mem_seg(vm, &vm->mem_segs[i]); 340 341 vm->num_mem_segs = 0; 342 343 for (i = 0; i < VM_MAXCPU; i++) 344 vcpu_cleanup(&vm->vcpu[i]); 345 346 iommu_destroy_domain(vm->iommu); 347 348 VMCLEANUP(vm->cookie); 349 350 free(vm, M_VM); 351} 352 353const char * 354vm_name(struct vm *vm) 355{ 356 return (vm->name); 357} 358 359int 360vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 361{ 362 const boolean_t spok = TRUE; /* superpage mappings are ok */ 363 364 return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE, 365 VM_PROT_RW, spok)); 366} 367 368int 369vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) 370{ 371 const boolean_t spok = TRUE; /* superpage mappings are ok */ 372 373 return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0, 374 VM_PROT_NONE, spok)); 375} 376 377/* 378 * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise 379 */ 380static boolean_t 381vm_gpa_available(struct vm *vm, vm_paddr_t gpa) 382{ 383 int i; 384 vm_paddr_t gpabase, gpalimit; 385 386 if (gpa & PAGE_MASK) 387 panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa); 388 389 for (i = 0; i < vm->num_mem_segs; i++) { 390 gpabase = vm->mem_segs[i].gpa; 391 gpalimit = gpabase + vm->mem_segs[i].len; 392 if (gpa >= gpabase && gpa < gpalimit) 393 return (FALSE); 394 } 395 396 return (TRUE); 397} 398 399int 400vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) 401{ 402 int error, available, allocated; 403 struct vm_memory_segment *seg; 404 vm_paddr_t g, hpa; 405 void *host_domain; 406 407 const boolean_t spok = TRUE; /* superpage mappings are ok */ 408 409 if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) 410 return (EINVAL); 411 412 available = allocated = 0; 413 g = gpa; 414 while (g < gpa + len) { 415 if (vm_gpa_available(vm, g)) 416 available++; 417 else 418 allocated++; 419 420 g += PAGE_SIZE; 421 } 422 423 /* 424 * If there are some allocated and some available pages in the address 425 * range then it is an error. 426 */ 427 if (allocated && available) 428 return (EINVAL); 429 430 /* 431 * If the entire address range being requested has already been 432 * allocated then there isn't anything more to do. 433 */ 434 if (allocated && available == 0) 435 return (0); 436 437 if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) 438 return (E2BIG); 439 440 host_domain = iommu_host_domain(); 441 442 seg = &vm->mem_segs[vm->num_mem_segs]; 443 444 error = 0; 445 seg->gpa = gpa; 446 seg->len = 0; 447 while (seg->len < len) { 448 hpa = vmm_mem_alloc(PAGE_SIZE); 449 if (hpa == 0) { 450 error = ENOMEM; 451 break; 452 } 453 454 error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE, 455 VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok); 456 if (error) 457 break; 458 459 /* 460 * Remove the 1:1 mapping for 'hpa' from the 'host_domain'. 461 * Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain. 462 */ 463 iommu_remove_mapping(host_domain, hpa, PAGE_SIZE); 464 iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE); 465 466 seg->len += PAGE_SIZE; 467 } 468 469 if (error) { 470 vm_free_mem_seg(vm, seg); 471 return (error); 472 } 473 474 /* 475 * Invalidate cached translations associated with 'host_domain' since 476 * we have now moved some pages from it. 477 */ 478 iommu_invalidate_tlb(host_domain); 479 480 vm->num_mem_segs++; 481 482 return (0); 483} 484 485vm_paddr_t 486vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len) 487{ 488 vm_paddr_t nextpage; 489 490 nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); 491 if (len > nextpage - gpa) 492 panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len); 493 494 return (VMMMAP_GET(vm->cookie, gpa)); 495} 496 497int 498vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, 499 struct vm_memory_segment *seg) 500{ 501 int i; 502 503 for (i = 0; i < vm->num_mem_segs; i++) { 504 if (gpabase == vm->mem_segs[i].gpa) { 505 *seg = vm->mem_segs[i]; 506 return (0); 507 } 508 } 509 return (-1); 510} 511 512int 513vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) 514{ 515 516 if (vcpu < 0 || vcpu >= VM_MAXCPU) 517 return (EINVAL); 518 519 if (reg >= VM_REG_LAST) 520 return (EINVAL); 521 522 return (VMGETREG(vm->cookie, vcpu, reg, retval)); 523} 524 525int 526vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) 527{ 528 529 if (vcpu < 0 || vcpu >= VM_MAXCPU) 530 return (EINVAL); 531 532 if (reg >= VM_REG_LAST) 533 return (EINVAL); 534 535 return (VMSETREG(vm->cookie, vcpu, reg, val)); 536} 537 538static boolean_t 539is_descriptor_table(int reg) 540{ 541 542 switch (reg) { 543 case VM_REG_GUEST_IDTR: 544 case VM_REG_GUEST_GDTR: 545 return (TRUE); 546 default: 547 return (FALSE); 548 } 549} 550 551static boolean_t 552is_segment_register(int reg) 553{ 554 555 switch (reg) { 556 case VM_REG_GUEST_ES: 557 case VM_REG_GUEST_CS: 558 case VM_REG_GUEST_SS: 559 case VM_REG_GUEST_DS: 560 case VM_REG_GUEST_FS: 561 case VM_REG_GUEST_GS: 562 case VM_REG_GUEST_TR: 563 case VM_REG_GUEST_LDTR: 564 return (TRUE); 565 default: 566 return (FALSE); 567 } 568} 569 570int 571vm_get_seg_desc(struct vm *vm, int vcpu, int reg, 572 struct seg_desc *desc) 573{ 574 575 if (vcpu < 0 || vcpu >= VM_MAXCPU) 576 return (EINVAL); 577 578 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 579 return (EINVAL); 580 581 return (VMGETDESC(vm->cookie, vcpu, reg, desc)); 582} 583 584int 585vm_set_seg_desc(struct vm *vm, int vcpu, int reg, 586 struct seg_desc *desc) 587{ 588 if (vcpu < 0 || vcpu >= VM_MAXCPU) 589 return (EINVAL); 590 591 if (!is_segment_register(reg) && !is_descriptor_table(reg)) 592 return (EINVAL); 593 594 return (VMSETDESC(vm->cookie, vcpu, reg, desc)); 595} 596 597int 598vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid) 599{ 600 601 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 602 return (EINVAL); 603 604 *cpuid = VCPU_PINCPU(vm, vcpuid); 605 606 return (0); 607} 608 609int 610vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid) 611{ 612 struct thread *td; 613 614 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 615 return (EINVAL); 616 617 td = curthread; /* XXXSMP only safe when muxing vcpus */ 618 619 /* unpin */ 620 if (host_cpuid < 0) { 621 VCPU_UNPIN(vm, vcpuid); 622 thread_lock(td); 623 sched_unbind(td); 624 thread_unlock(td); 625 return (0); 626 } 627 628 if (CPU_ABSENT(host_cpuid)) 629 return (EINVAL); 630 631 /* 632 * XXX we should check that 'host_cpuid' has not already been pinned 633 * by another vm. 634 */ 635 thread_lock(td); 636 sched_bind(td, host_cpuid); 637 thread_unlock(td); 638 VCPU_PIN(vm, vcpuid, host_cpuid); 639 640 return (0); 641} 642 643static void 644restore_guest_fpustate(struct vcpu *vcpu) 645{ 646 647 /* flush host state to the pcb */ 648 fpuexit(curthread); 649 650 /* restore guest FPU state */ 651 fpu_stop_emulating(); 652 fpurestore(vcpu->guestfpu); 653 654 /* 655 * The FPU is now "dirty" with the guest's state so turn on emulation 656 * to trap any access to the FPU by the host. 657 */ 658 fpu_start_emulating(); 659} 660 661static void 662save_guest_fpustate(struct vcpu *vcpu) 663{ 664 665 if ((rcr0() & CR0_TS) == 0) 666 panic("fpu emulation not enabled in host!"); 667 668 /* save guest FPU state */ 669 fpu_stop_emulating(); 670 fpusave(vcpu->guestfpu); 671 fpu_start_emulating(); 672} 673 674static VMM_STAT_DEFINE(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); 675 676int 677vm_run(struct vm *vm, struct vm_run *vmrun) 678{ 679 int error, vcpuid, sleepticks, t; 680 struct vcpu *vcpu; 681 struct pcb *pcb; 682 uint64_t tscval, rip; 683 struct vm_exit *vme; 684 685 vcpuid = vmrun->cpuid; 686 687 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 688 return (EINVAL); 689 690 vcpu = &vm->vcpu[vcpuid]; 691 vme = &vmrun->vm_exit; 692 rip = vmrun->rip; 693restart: 694 critical_enter(); 695 696 tscval = rdtsc(); 697 698 pcb = PCPU_GET(curpcb); 699 set_pcb_flags(pcb, PCB_FULL_IRET); 700 701 restore_guest_msrs(vm, vcpuid); 702 restore_guest_fpustate(vcpu); 703 704 vcpu->hostcpu = curcpu; 705 error = VMRUN(vm->cookie, vcpuid, rip); 706 vcpu->hostcpu = NOCPU; 707 708 save_guest_fpustate(vcpu); 709 restore_host_msrs(vm, vcpuid); 710 711 vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); 712 713 /* copy the exit information */ 714 bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit)); 715 716 critical_exit(); 717 718 /* 719 * Oblige the guest's desire to 'hlt' by sleeping until the vcpu 720 * is ready to run. 721 */ 722 if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) { 723 vcpu_lock(vcpu); 724 725 /* 726 * Figure out the number of host ticks until the next apic 727 * timer interrupt in the guest. 728 */ 729 sleepticks = lapic_timer_tick(vm, vcpuid); 730 731 /* 732 * If the guest local apic timer is disabled then sleep for 733 * a long time but not forever. 734 */ 735 if (sleepticks < 0) 736 sleepticks = hz; 737 738 /* 739 * Do a final check for pending NMI or interrupts before 740 * really putting this thread to sleep. 741 * 742 * These interrupts could have happened any time after we 743 * returned from VMRUN() and before we grabbed the vcpu lock. 744 */ 745 if (!vm_nmi_pending(vm, vcpuid) && 746 lapic_pending_intr(vm, vcpuid) < 0) { 747 if (sleepticks <= 0) 748 panic("invalid sleepticks %d", sleepticks); 749 t = ticks; 750 msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); 751 vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); 752 } 753 754 vcpu_unlock(vcpu); 755 756 rip = vme->rip + vme->inst_length; 757 goto restart; 758 } 759 760 return (error); 761} 762 763int 764vm_inject_event(struct vm *vm, int vcpuid, int type, 765 int vector, uint32_t code, int code_valid) 766{ 767 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 768 return (EINVAL); 769 770 if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0) 771 return (EINVAL); 772 773 if (vector < 0 || vector > 255) 774 return (EINVAL); 775 776 return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid)); 777} 778 779static VMM_STAT_DEFINE(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); 780 781int 782vm_inject_nmi(struct vm *vm, int vcpuid) 783{ 784 struct vcpu *vcpu; 785 786 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 787 return (EINVAL); 788 789 vcpu = &vm->vcpu[vcpuid]; 790 791 vcpu->nmi_pending = 1; 792 vm_interrupt_hostcpu(vm, vcpuid); 793 return (0); 794} 795 796int 797vm_nmi_pending(struct vm *vm, int vcpuid) 798{ 799 struct vcpu *vcpu; 800 801 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 802 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 803 804 vcpu = &vm->vcpu[vcpuid]; 805 806 return (vcpu->nmi_pending); 807} 808 809void 810vm_nmi_clear(struct vm *vm, int vcpuid) 811{ 812 struct vcpu *vcpu; 813 814 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 815 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); 816 817 vcpu = &vm->vcpu[vcpuid]; 818 819 if (vcpu->nmi_pending == 0) 820 panic("vm_nmi_clear: inconsistent nmi_pending state"); 821 822 vcpu->nmi_pending = 0; 823 vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); 824} 825 826int 827vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) 828{ 829 if (vcpu < 0 || vcpu >= VM_MAXCPU) 830 return (EINVAL); 831 832 if (type < 0 || type >= VM_CAP_MAX) 833 return (EINVAL); 834 835 return (VMGETCAP(vm->cookie, vcpu, type, retval)); 836} 837 838int 839vm_set_capability(struct vm *vm, int vcpu, int type, int val) 840{ 841 if (vcpu < 0 || vcpu >= VM_MAXCPU) 842 return (EINVAL); 843 844 if (type < 0 || type >= VM_CAP_MAX) 845 return (EINVAL); 846 847 return (VMSETCAP(vm->cookie, vcpu, type, val)); 848} 849 850uint64_t * 851vm_guest_msrs(struct vm *vm, int cpu) 852{ 853 return (vm->vcpu[cpu].guest_msrs); 854} 855 856struct vlapic * 857vm_lapic(struct vm *vm, int cpu) 858{ 859 return (vm->vcpu[cpu].vlapic); 860} 861 862boolean_t 863vmm_is_pptdev(int bus, int slot, int func) 864{ 865 int found, i, n; 866 int b, s, f; 867 char *val, *cp, *cp2; 868 869 /* 870 * XXX 871 * The length of an environment variable is limited to 128 bytes which 872 * puts an upper limit on the number of passthru devices that may be 873 * specified using a single environment variable. 874 * 875 * Work around this by scanning multiple environment variable 876 * names instead of a single one - yuck! 877 */ 878 const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; 879 880 /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ 881 found = 0; 882 for (i = 0; names[i] != NULL && !found; i++) { 883 cp = val = getenv(names[i]); 884 while (cp != NULL && *cp != '\0') { 885 if ((cp2 = strchr(cp, ' ')) != NULL) 886 *cp2 = '\0'; 887 888 n = sscanf(cp, "%d/%d/%d", &b, &s, &f); 889 if (n == 3 && bus == b && slot == s && func == f) { 890 found = 1; 891 break; 892 } 893 894 if (cp2 != NULL) 895 *cp2++ = ' '; 896 897 cp = cp2; 898 } 899 freeenv(val); 900 } 901 return (found); 902} 903 904void * 905vm_iommu_domain(struct vm *vm) 906{ 907 908 return (vm->iommu); 909} 910 911int 912vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state) 913{ 914 int error; 915 struct vcpu *vcpu; 916 917 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 918 panic("vm_set_run_state: invalid vcpuid %d", vcpuid); 919 920 vcpu = &vm->vcpu[vcpuid]; 921 922 vcpu_lock(vcpu); 923 924 /* 925 * The following state transitions are allowed: 926 * IDLE -> RUNNING -> IDLE 927 * IDLE -> CANNOT_RUN -> IDLE 928 */ 929 if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) || 930 (vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) { 931 error = 0; 932 vcpu->state = state; 933 } else { 934 error = EBUSY; 935 } 936 937 vcpu_unlock(vcpu); 938 939 return (error); 940} 941 942enum vcpu_state 943vcpu_get_state(struct vm *vm, int vcpuid) 944{ 945 struct vcpu *vcpu; 946 enum vcpu_state state; 947 948 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 949 panic("vm_get_run_state: invalid vcpuid %d", vcpuid); 950 951 vcpu = &vm->vcpu[vcpuid]; 952 953 vcpu_lock(vcpu); 954 state = vcpu->state; 955 vcpu_unlock(vcpu); 956 957 return (state); 958} 959 960void 961vm_activate_cpu(struct vm *vm, int vcpuid) 962{ 963 964 if (vcpuid >= 0 && vcpuid < VM_MAXCPU) 965 CPU_SET(vcpuid, &vm->active_cpus); 966} 967 968cpuset_t 969vm_active_cpus(struct vm *vm) 970{ 971 972 return (vm->active_cpus); 973} 974 975void * 976vcpu_stats(struct vm *vm, int vcpuid) 977{ 978 979 return (vm->vcpu[vcpuid].stats); 980} 981 982int 983vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) 984{ 985 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 986 return (EINVAL); 987 988 *state = vm->vcpu[vcpuid].x2apic_state; 989 990 return (0); 991} 992 993int 994vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) 995{ 996 if (vcpuid < 0 || vcpuid >= VM_MAXCPU) 997 return (EINVAL); 998 999 if (state < 0 || state >= X2APIC_STATE_LAST) 1000 return (EINVAL); 1001 1002 vm->vcpu[vcpuid].x2apic_state = state; 1003 1004 vlapic_set_x2apic_state(vm, vcpuid, state); 1005 1006 return (0); 1007} 1008 1009void 1010vm_interrupt_hostcpu(struct vm *vm, int vcpuid) 1011{ 1012 int hostcpu; 1013 struct vcpu *vcpu; 1014 1015 vcpu = &vm->vcpu[vcpuid]; 1016 1017 vcpu_lock(vcpu); 1018 hostcpu = vcpu->hostcpu; 1019 if (hostcpu == NOCPU) { 1020 /* 1021 * If the vcpu is 'RUNNING' but without a valid 'hostcpu' then 1022 * the host thread must be sleeping waiting for an event to 1023 * kick the vcpu out of 'hlt'. 1024 * 1025 * XXX this is racy because the condition exists right before 1026 * and after calling VMRUN() in vm_run(). The wakeup() is 1027 * benign in this case. 1028 */ 1029 if (vcpu->state == VCPU_RUNNING) 1030 wakeup_one(vcpu); 1031 } else { 1032 if (vcpu->state != VCPU_RUNNING) 1033 panic("invalid vcpu state %d", vcpu->state); 1034 if (hostcpu != curcpu) 1035 ipi_cpu(hostcpu, vmm_ipinum); 1036 } 1037 vcpu_unlock(vcpu); 1038} 1039