1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: stable/11/sys/amd64/vmm/vmm_dev.c 348271 2019-05-25 11:27:56Z rgrimes $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: stable/11/sys/amd64/vmm/vmm_dev.c 348271 2019-05-25 11:27:56Z rgrimes $"); 31 32#include <sys/param.h> 33#include <sys/kernel.h> 34#include <sys/queue.h> 35#include <sys/lock.h> 36#include <sys/mutex.h> 37#include <sys/malloc.h> 38#include <sys/conf.h> 39#include <sys/sysctl.h> 40#include <sys/libkern.h> 41#include <sys/ioccom.h> 42#include <sys/mman.h> 43#include <sys/uio.h> 44 45#include <vm/vm.h> 46#include <vm/pmap.h> 47#include <vm/vm_map.h> 48#include <vm/vm_object.h> 49 50#include <machine/vmparam.h> 51#include <machine/vmm.h> 52#include <machine/vmm_instruction_emul.h> 53#include <machine/vmm_dev.h> 54 55#include "vmm_lapic.h" 56#include "vmm_stat.h" 57#include "vmm_mem.h" 58#include "io/ppt.h" 59#include "io/vatpic.h" 60#include "io/vioapic.h" 61#include "io/vhpet.h" 62#include "io/vrtc.h" 63 64struct devmem_softc { 65 int segid; 66 char *name; 67 struct cdev *cdev; 68 struct vmmdev_softc *sc; 69 SLIST_ENTRY(devmem_softc) link; 70}; 71 72struct vmmdev_softc { 73 struct vm *vm; /* vm instance cookie */ 74 struct cdev *cdev; 75 SLIST_ENTRY(vmmdev_softc) link; 76 SLIST_HEAD(, devmem_softc) devmem; 77 int flags; 78}; 79#define VSC_LINKED 0x01 80 81static SLIST_HEAD(, vmmdev_softc) head; 82 83static struct mtx vmmdev_mtx; 84 85static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 86 87SYSCTL_DECL(_hw_vmm); 88 89static int devmem_create_cdev(const char *vmname, int id, char *devmem); 90static void devmem_destroy(void *arg); 91 92static int 93vcpu_lock_one(struct vmmdev_softc *sc, int vcpu) 94{ 95 int error; 96 97 if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm)) 98 return (EINVAL); 99 100 error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); 101 return (error); 102} 103 104static void 105vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu) 106{ 107 enum vcpu_state state; 108 109 state = vcpu_get_state(sc->vm, vcpu, NULL); 110 if (state != VCPU_FROZEN) { 111 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), 112 vcpu, state); 113 } 114 115 vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); 116} 117 118static int 119vcpu_lock_all(struct vmmdev_softc *sc) 120{ 121 int error, vcpu; 122 uint16_t maxcpus; 123 124 maxcpus = vm_get_maxcpus(sc->vm); 125 for (vcpu = 0; vcpu < maxcpus; vcpu++) { 126 error = vcpu_lock_one(sc, vcpu); 127 if (error) 128 break; 129 } 130 131 if (error) { 132 while (--vcpu >= 0) 133 vcpu_unlock_one(sc, vcpu); 134 } 135 136 return (error); 137} 138 139static void 140vcpu_unlock_all(struct vmmdev_softc *sc) 141{ 142 int vcpu; 143 uint16_t maxcpus; 144 145 maxcpus = vm_get_maxcpus(sc->vm); 146 for (vcpu = 0; vcpu < maxcpus; vcpu++) 147 vcpu_unlock_one(sc, vcpu); 148} 149 150static struct vmmdev_softc * 151vmmdev_lookup(const char *name) 152{ 153 struct vmmdev_softc *sc; 154 155#ifdef notyet /* XXX kernel is not compiled with invariants */ 156 mtx_assert(&vmmdev_mtx, MA_OWNED); 157#endif 158 159 SLIST_FOREACH(sc, &head, link) { 160 if (strcmp(name, vm_name(sc->vm)) == 0) 161 break; 162 } 163 164 return (sc); 165} 166 167static struct vmmdev_softc * 168vmmdev_lookup2(struct cdev *cdev) 169{ 170 171 return (cdev->si_drv1); 172} 173 174static int 175vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 176{ 177 int error, off, c, prot; 178 vm_paddr_t gpa, maxaddr; 179 void *hpa, *cookie; 180 struct vmmdev_softc *sc; 181 uint16_t lastcpu; 182 183 sc = vmmdev_lookup2(cdev); 184 if (sc == NULL) 185 return (ENXIO); 186 187 /* 188 * Get a read lock on the guest memory map by freezing any vcpu. 189 */ 190 lastcpu = vm_get_maxcpus(sc->vm) - 1; 191 error = vcpu_lock_one(sc, lastcpu); 192 if (error) 193 return (error); 194 195 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 196 maxaddr = vmm_sysmem_maxaddr(sc->vm); 197 while (uio->uio_resid > 0 && error == 0) { 198 gpa = uio->uio_offset; 199 off = gpa & PAGE_MASK; 200 c = min(uio->uio_resid, PAGE_SIZE - off); 201 202 /* 203 * The VM has a hole in its physical memory map. If we want to 204 * use 'dd' to inspect memory beyond the hole we need to 205 * provide bogus data for memory that lies in the hole. 206 * 207 * Since this device does not support lseek(2), dd(1) will 208 * read(2) blocks of data to simulate the lseek(2). 209 */ 210 hpa = vm_gpa_hold(sc->vm, lastcpu, gpa, c, 211 prot, &cookie); 212 if (hpa == NULL) { 213 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 214 error = uiomove(__DECONST(void *, zero_region), 215 c, uio); 216 else 217 error = EFAULT; 218 } else { 219 error = uiomove(hpa, c, uio); 220 vm_gpa_release(cookie); 221 } 222 } 223 vcpu_unlock_one(sc, lastcpu); 224 return (error); 225} 226 227CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1); 228 229static int 230get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 231{ 232 struct devmem_softc *dsc; 233 int error; 234 bool sysmem; 235 236 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 237 if (error || mseg->len == 0) 238 return (error); 239 240 if (!sysmem) { 241 SLIST_FOREACH(dsc, &sc->devmem, link) { 242 if (dsc->segid == mseg->segid) 243 break; 244 } 245 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 246 __func__, mseg->segid)); 247 error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL); 248 } else { 249 bzero(mseg->name, sizeof(mseg->name)); 250 } 251 252 return (error); 253} 254 255static int 256alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 257{ 258 char *name; 259 int error; 260 bool sysmem; 261 262 error = 0; 263 name = NULL; 264 sysmem = true; 265 266 if (VM_MEMSEG_NAME(mseg)) { 267 sysmem = false; 268 name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK); 269 error = copystr(mseg->name, name, SPECNAMELEN + 1, 0); 270 if (error) 271 goto done; 272 } 273 274 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 275 if (error) 276 goto done; 277 278 if (VM_MEMSEG_NAME(mseg)) { 279 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 280 if (error) 281 vm_free_memseg(sc->vm, mseg->segid); 282 else 283 name = NULL; /* freed when 'cdev' is destroyed */ 284 } 285done: 286 free(name, M_VMMDEV); 287 return (error); 288} 289 290static int 291vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 292 struct thread *td) 293{ 294 int error, vcpu, state_changed, size; 295 cpuset_t *cpuset; 296 struct vmmdev_softc *sc; 297 struct vm_register *vmreg; 298 struct vm_seg_desc *vmsegdesc; 299 struct vm_run *vmrun; 300 struct vm_exception *vmexc; 301 struct vm_lapic_irq *vmirq; 302 struct vm_lapic_msi *vmmsi; 303 struct vm_ioapic_irq *ioapic_irq; 304 struct vm_isa_irq *isa_irq; 305 struct vm_isa_irq_trigger *isa_irq_trigger; 306 struct vm_capability *vmcap; 307 struct vm_pptdev *pptdev; 308 struct vm_pptdev_mmio *pptmmio; 309 struct vm_pptdev_msi *pptmsi; 310 struct vm_pptdev_msix *pptmsix; 311 struct vm_nmi *vmnmi; 312 struct vm_stats *vmstats; 313 struct vm_stat_desc *statdesc; 314 struct vm_x2apic *x2apic; 315 struct vm_gpa_pte *gpapte; 316 struct vm_suspend *vmsuspend; 317 struct vm_gla2gpa *gg; 318 struct vm_activate_cpu *vac; 319 struct vm_cpuset *vm_cpuset; 320 struct vm_intinfo *vmii; 321 struct vm_rtc_time *rtctime; 322 struct vm_rtc_data *rtcdata; 323 struct vm_memmap *mm; 324 struct vm_cpu_topology *topology; 325 326 sc = vmmdev_lookup2(cdev); 327 if (sc == NULL) 328 return (ENXIO); 329 330 error = 0; 331 vcpu = -1; 332 state_changed = 0; 333 334 /* 335 * Some VMM ioctls can operate only on vcpus that are not running. 336 */ 337 switch (cmd) { 338 case VM_RUN: 339 case VM_GET_REGISTER: 340 case VM_SET_REGISTER: 341 case VM_GET_SEGMENT_DESCRIPTOR: 342 case VM_SET_SEGMENT_DESCRIPTOR: 343 case VM_INJECT_EXCEPTION: 344 case VM_GET_CAPABILITY: 345 case VM_SET_CAPABILITY: 346 case VM_PPTDEV_MSI: 347 case VM_PPTDEV_MSIX: 348 case VM_SET_X2APIC_STATE: 349 case VM_GLA2GPA: 350 case VM_ACTIVATE_CPU: 351 case VM_SET_INTINFO: 352 case VM_GET_INTINFO: 353 case VM_RESTART_INSTRUCTION: 354 /* 355 * XXX fragile, handle with care 356 * Assumes that the first field of the ioctl data is the vcpu. 357 */ 358 vcpu = *(int *)data; 359 error = vcpu_lock_one(sc, vcpu); 360 if (error) 361 goto done; 362 state_changed = 1; 363 break; 364 365 case VM_MAP_PPTDEV_MMIO: 366 case VM_BIND_PPTDEV: 367 case VM_UNBIND_PPTDEV: 368 case VM_ALLOC_MEMSEG: 369 case VM_MMAP_MEMSEG: 370 case VM_REINIT: 371 /* 372 * ioctls that operate on the entire virtual machine must 373 * prevent all vcpus from running. 374 */ 375 error = vcpu_lock_all(sc); 376 if (error) 377 goto done; 378 state_changed = 2; 379 break; 380 381 case VM_GET_MEMSEG: 382 case VM_MMAP_GETNEXT: 383 /* 384 * Lock a vcpu to make sure that the memory map cannot be 385 * modified while it is being inspected. 386 */ 387 vcpu = vm_get_maxcpus(sc->vm) - 1; 388 error = vcpu_lock_one(sc, vcpu); 389 if (error) 390 goto done; 391 state_changed = 1; 392 break; 393 394 default: 395 break; 396 } 397 398 switch(cmd) { 399 case VM_RUN: 400 vmrun = (struct vm_run *)data; 401 error = vm_run(sc->vm, vmrun); 402 break; 403 case VM_SUSPEND: 404 vmsuspend = (struct vm_suspend *)data; 405 error = vm_suspend(sc->vm, vmsuspend->how); 406 break; 407 case VM_REINIT: 408 error = vm_reinit(sc->vm); 409 break; 410 case VM_STAT_DESC: { 411 statdesc = (struct vm_stat_desc *)data; 412 error = vmm_stat_desc_copy(statdesc->index, 413 statdesc->desc, sizeof(statdesc->desc)); 414 break; 415 } 416 case VM_STATS: { 417 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); 418 vmstats = (struct vm_stats *)data; 419 getmicrotime(&vmstats->tv); 420 error = vmm_stat_copy(sc->vm, vmstats->cpuid, 421 &vmstats->num_entries, vmstats->statbuf); 422 break; 423 } 424 case VM_PPTDEV_MSI: 425 pptmsi = (struct vm_pptdev_msi *)data; 426 error = ppt_setup_msi(sc->vm, pptmsi->vcpu, 427 pptmsi->bus, pptmsi->slot, pptmsi->func, 428 pptmsi->addr, pptmsi->msg, 429 pptmsi->numvec); 430 break; 431 case VM_PPTDEV_MSIX: 432 pptmsix = (struct vm_pptdev_msix *)data; 433 error = ppt_setup_msix(sc->vm, pptmsix->vcpu, 434 pptmsix->bus, pptmsix->slot, 435 pptmsix->func, pptmsix->idx, 436 pptmsix->addr, pptmsix->msg, 437 pptmsix->vector_control); 438 break; 439 case VM_MAP_PPTDEV_MMIO: 440 pptmmio = (struct vm_pptdev_mmio *)data; 441 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 442 pptmmio->func, pptmmio->gpa, pptmmio->len, 443 pptmmio->hpa); 444 break; 445 case VM_BIND_PPTDEV: 446 pptdev = (struct vm_pptdev *)data; 447 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 448 pptdev->func); 449 break; 450 case VM_UNBIND_PPTDEV: 451 pptdev = (struct vm_pptdev *)data; 452 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 453 pptdev->func); 454 break; 455 case VM_INJECT_EXCEPTION: 456 vmexc = (struct vm_exception *)data; 457 error = vm_inject_exception(sc->vm, vmexc->cpuid, 458 vmexc->vector, vmexc->error_code_valid, vmexc->error_code, 459 vmexc->restart_instruction); 460 break; 461 case VM_INJECT_NMI: 462 vmnmi = (struct vm_nmi *)data; 463 error = vm_inject_nmi(sc->vm, vmnmi->cpuid); 464 break; 465 case VM_LAPIC_IRQ: 466 vmirq = (struct vm_lapic_irq *)data; 467 error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); 468 break; 469 case VM_LAPIC_LOCAL_IRQ: 470 vmirq = (struct vm_lapic_irq *)data; 471 error = lapic_set_local_intr(sc->vm, vmirq->cpuid, 472 vmirq->vector); 473 break; 474 case VM_LAPIC_MSI: 475 vmmsi = (struct vm_lapic_msi *)data; 476 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); 477 break; 478 case VM_IOAPIC_ASSERT_IRQ: 479 ioapic_irq = (struct vm_ioapic_irq *)data; 480 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); 481 break; 482 case VM_IOAPIC_DEASSERT_IRQ: 483 ioapic_irq = (struct vm_ioapic_irq *)data; 484 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); 485 break; 486 case VM_IOAPIC_PULSE_IRQ: 487 ioapic_irq = (struct vm_ioapic_irq *)data; 488 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); 489 break; 490 case VM_IOAPIC_PINCOUNT: 491 *(int *)data = vioapic_pincount(sc->vm); 492 break; 493 case VM_ISA_ASSERT_IRQ: 494 isa_irq = (struct vm_isa_irq *)data; 495 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); 496 if (error == 0 && isa_irq->ioapic_irq != -1) 497 error = vioapic_assert_irq(sc->vm, 498 isa_irq->ioapic_irq); 499 break; 500 case VM_ISA_DEASSERT_IRQ: 501 isa_irq = (struct vm_isa_irq *)data; 502 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); 503 if (error == 0 && isa_irq->ioapic_irq != -1) 504 error = vioapic_deassert_irq(sc->vm, 505 isa_irq->ioapic_irq); 506 break; 507 case VM_ISA_PULSE_IRQ: 508 isa_irq = (struct vm_isa_irq *)data; 509 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); 510 if (error == 0 && isa_irq->ioapic_irq != -1) 511 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); 512 break; 513 case VM_ISA_SET_IRQ_TRIGGER: 514 isa_irq_trigger = (struct vm_isa_irq_trigger *)data; 515 error = vatpic_set_irq_trigger(sc->vm, 516 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); 517 break; 518 case VM_MMAP_GETNEXT: 519 mm = (struct vm_memmap *)data; 520 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 521 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 522 break; 523 case VM_MMAP_MEMSEG: 524 mm = (struct vm_memmap *)data; 525 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 526 mm->len, mm->prot, mm->flags); 527 break; 528 case VM_ALLOC_MEMSEG: 529 error = alloc_memseg(sc, (struct vm_memseg *)data); 530 break; 531 case VM_GET_MEMSEG: 532 error = get_memseg(sc, (struct vm_memseg *)data); 533 break; 534 case VM_GET_REGISTER: 535 vmreg = (struct vm_register *)data; 536 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, 537 &vmreg->regval); 538 break; 539 case VM_SET_REGISTER: 540 vmreg = (struct vm_register *)data; 541 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, 542 vmreg->regval); 543 break; 544 case VM_SET_SEGMENT_DESCRIPTOR: 545 vmsegdesc = (struct vm_seg_desc *)data; 546 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, 547 vmsegdesc->regnum, 548 &vmsegdesc->desc); 549 break; 550 case VM_GET_SEGMENT_DESCRIPTOR: 551 vmsegdesc = (struct vm_seg_desc *)data; 552 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, 553 vmsegdesc->regnum, 554 &vmsegdesc->desc); 555 break; 556 case VM_GET_CAPABILITY: 557 vmcap = (struct vm_capability *)data; 558 error = vm_get_capability(sc->vm, vmcap->cpuid, 559 vmcap->captype, 560 &vmcap->capval); 561 break; 562 case VM_SET_CAPABILITY: 563 vmcap = (struct vm_capability *)data; 564 error = vm_set_capability(sc->vm, vmcap->cpuid, 565 vmcap->captype, 566 vmcap->capval); 567 break; 568 case VM_SET_X2APIC_STATE: 569 x2apic = (struct vm_x2apic *)data; 570 error = vm_set_x2apic_state(sc->vm, 571 x2apic->cpuid, x2apic->state); 572 break; 573 case VM_GET_X2APIC_STATE: 574 x2apic = (struct vm_x2apic *)data; 575 error = vm_get_x2apic_state(sc->vm, 576 x2apic->cpuid, &x2apic->state); 577 break; 578 case VM_GET_GPA_PMAP: 579 gpapte = (struct vm_gpa_pte *)data; 580 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), 581 gpapte->gpa, gpapte->pte, &gpapte->ptenum); 582 error = 0; 583 break; 584 case VM_GET_HPET_CAPABILITIES: 585 error = vhpet_getcap((struct vm_hpet_cap *)data); 586 break; 587 case VM_GLA2GPA: { 588 CTASSERT(PROT_READ == VM_PROT_READ); 589 CTASSERT(PROT_WRITE == VM_PROT_WRITE); 590 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); 591 gg = (struct vm_gla2gpa *)data; 592 error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, 593 gg->prot, &gg->gpa, &gg->fault); 594 KASSERT(error == 0 || error == EFAULT, 595 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 596 break; 597 } 598 case VM_ACTIVATE_CPU: 599 vac = (struct vm_activate_cpu *)data; 600 error = vm_activate_cpu(sc->vm, vac->vcpuid); 601 break; 602 case VM_GET_CPUS: 603 error = 0; 604 vm_cpuset = (struct vm_cpuset *)data; 605 size = vm_cpuset->cpusetsize; 606 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { 607 error = ERANGE; 608 break; 609 } 610 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 611 if (vm_cpuset->which == VM_ACTIVE_CPUS) 612 *cpuset = vm_active_cpus(sc->vm); 613 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 614 *cpuset = vm_suspended_cpus(sc->vm); 615 else 616 error = EINVAL; 617 if (error == 0) 618 error = copyout(cpuset, vm_cpuset->cpus, size); 619 free(cpuset, M_TEMP); 620 break; 621 case VM_SET_INTINFO: 622 vmii = (struct vm_intinfo *)data; 623 error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); 624 break; 625 case VM_GET_INTINFO: 626 vmii = (struct vm_intinfo *)data; 627 error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, 628 &vmii->info2); 629 break; 630 case VM_RTC_WRITE: 631 rtcdata = (struct vm_rtc_data *)data; 632 error = vrtc_nvram_write(sc->vm, rtcdata->offset, 633 rtcdata->value); 634 break; 635 case VM_RTC_READ: 636 rtcdata = (struct vm_rtc_data *)data; 637 error = vrtc_nvram_read(sc->vm, rtcdata->offset, 638 &rtcdata->value); 639 break; 640 case VM_RTC_SETTIME: 641 rtctime = (struct vm_rtc_time *)data; 642 error = vrtc_set_time(sc->vm, rtctime->secs); 643 break; 644 case VM_RTC_GETTIME: 645 error = 0; 646 rtctime = (struct vm_rtc_time *)data; 647 rtctime->secs = vrtc_get_time(sc->vm); 648 break; 649 case VM_RESTART_INSTRUCTION: 650 error = vm_restart_instruction(sc->vm, vcpu); 651 break; 652 case VM_SET_TOPOLOGY: 653 topology = (struct vm_cpu_topology *)data; 654 error = vm_set_topology(sc->vm, topology->sockets, 655 topology->cores, topology->threads, topology->maxcpus); 656 break; 657 case VM_GET_TOPOLOGY: 658 topology = (struct vm_cpu_topology *)data; 659 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 660 &topology->threads, &topology->maxcpus); 661 error = 0; 662 break; 663 default: 664 error = ENOTTY; 665 break; 666 } 667 668 if (state_changed == 1) 669 vcpu_unlock_one(sc, vcpu); 670 else if (state_changed == 2) 671 vcpu_unlock_all(sc); 672 673done: 674 /* Make sure that no handler returns a bogus value like ERESTART */ 675 KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error)); 676 return (error); 677} 678 679static int 680vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 681 struct vm_object **objp, int nprot) 682{ 683 struct vmmdev_softc *sc; 684 vm_paddr_t gpa; 685 size_t len; 686 vm_ooffset_t segoff, first, last; 687 int error, found, segid; 688 uint16_t lastcpu; 689 bool sysmem; 690 691 first = *offset; 692 last = first + mapsize; 693 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 694 return (EINVAL); 695 696 sc = vmmdev_lookup2(cdev); 697 if (sc == NULL) { 698 /* virtual machine is in the process of being created */ 699 return (EINVAL); 700 } 701 702 /* 703 * Get a read lock on the guest memory map by freezing any vcpu. 704 */ 705 lastcpu = vm_get_maxcpus(sc->vm) - 1; 706 error = vcpu_lock_one(sc, lastcpu); 707 if (error) 708 return (error); 709 710 gpa = 0; 711 found = 0; 712 while (!found) { 713 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 714 NULL, NULL); 715 if (error) 716 break; 717 718 if (first >= gpa && last <= gpa + len) 719 found = 1; 720 else 721 gpa += len; 722 } 723 724 if (found) { 725 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 726 KASSERT(error == 0 && *objp != NULL, 727 ("%s: invalid memory segment %d", __func__, segid)); 728 if (sysmem) { 729 vm_object_reference(*objp); 730 *offset = segoff + (first - gpa); 731 } else { 732 error = EINVAL; 733 } 734 } 735 vcpu_unlock_one(sc, lastcpu); 736 return (error); 737} 738 739static void 740vmmdev_destroy(void *arg) 741{ 742 struct vmmdev_softc *sc = arg; 743 struct devmem_softc *dsc; 744 int error; 745 746 error = vcpu_lock_all(sc); 747 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 748 749 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 750 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 751 SLIST_REMOVE_HEAD(&sc->devmem, link); 752 free(dsc->name, M_VMMDEV); 753 free(dsc, M_VMMDEV); 754 } 755 756 if (sc->cdev != NULL) 757 destroy_dev(sc->cdev); 758 759 if (sc->vm != NULL) 760 vm_destroy(sc->vm); 761 762 if ((sc->flags & VSC_LINKED) != 0) { 763 mtx_lock(&vmmdev_mtx); 764 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 765 mtx_unlock(&vmmdev_mtx); 766 } 767 768 free(sc, M_VMMDEV); 769} 770 771static int 772sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 773{ 774 int error; 775 char buf[VM_MAX_NAMELEN]; 776 struct devmem_softc *dsc; 777 struct vmmdev_softc *sc; 778 struct cdev *cdev; 779 780 strlcpy(buf, "beavis", sizeof(buf)); 781 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 782 if (error != 0 || req->newptr == NULL) 783 return (error); 784 785 mtx_lock(&vmmdev_mtx); 786 sc = vmmdev_lookup(buf); 787 if (sc == NULL || sc->cdev == NULL) { 788 mtx_unlock(&vmmdev_mtx); 789 return (EINVAL); 790 } 791 792 /* 793 * The 'cdev' will be destroyed asynchronously when 'si_threadcount' 794 * goes down to 0 so we should not do it again in the callback. 795 * 796 * Setting 'sc->cdev' to NULL is also used to indicate that the VM 797 * is scheduled for destruction. 798 */ 799 cdev = sc->cdev; 800 sc->cdev = NULL; 801 mtx_unlock(&vmmdev_mtx); 802 803 /* 804 * Schedule all cdevs to be destroyed: 805 * 806 * - any new operations on the 'cdev' will return an error (ENXIO). 807 * 808 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will 809 * be destroyed and the callback will be invoked in a taskqueue 810 * context. 811 * 812 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 813 */ 814 SLIST_FOREACH(dsc, &sc->devmem, link) { 815 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 816 destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc); 817 } 818 destroy_dev_sched_cb(cdev, vmmdev_destroy, sc); 819 return (0); 820} 821SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW, 822 NULL, 0, sysctl_vmm_destroy, "A", NULL); 823 824static struct cdevsw vmmdevsw = { 825 .d_name = "vmmdev", 826 .d_version = D_VERSION, 827 .d_ioctl = vmmdev_ioctl, 828 .d_mmap_single = vmmdev_mmap_single, 829 .d_read = vmmdev_rw, 830 .d_write = vmmdev_rw, 831}; 832 833static int 834sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 835{ 836 int error; 837 struct vm *vm; 838 struct cdev *cdev; 839 struct vmmdev_softc *sc, *sc2; 840 char buf[VM_MAX_NAMELEN]; 841 842 strlcpy(buf, "beavis", sizeof(buf)); 843 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 844 if (error != 0 || req->newptr == NULL) 845 return (error); 846 847 mtx_lock(&vmmdev_mtx); 848 sc = vmmdev_lookup(buf); 849 mtx_unlock(&vmmdev_mtx); 850 if (sc != NULL) 851 return (EEXIST); 852 853 error = vm_create(buf, &vm); 854 if (error != 0) 855 return (error); 856 857 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 858 sc->vm = vm; 859 SLIST_INIT(&sc->devmem); 860 861 /* 862 * Lookup the name again just in case somebody sneaked in when we 863 * dropped the lock. 864 */ 865 mtx_lock(&vmmdev_mtx); 866 sc2 = vmmdev_lookup(buf); 867 if (sc2 == NULL) { 868 SLIST_INSERT_HEAD(&head, sc, link); 869 sc->flags |= VSC_LINKED; 870 } 871 mtx_unlock(&vmmdev_mtx); 872 873 if (sc2 != NULL) { 874 vmmdev_destroy(sc); 875 return (EEXIST); 876 } 877 878 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL, 879 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 880 if (error != 0) { 881 vmmdev_destroy(sc); 882 return (error); 883 } 884 885 mtx_lock(&vmmdev_mtx); 886 sc->cdev = cdev; 887 sc->cdev->si_drv1 = sc; 888 mtx_unlock(&vmmdev_mtx); 889 890 return (0); 891} 892SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW, 893 NULL, 0, sysctl_vmm_create, "A", NULL); 894 895void 896vmmdev_init(void) 897{ 898 mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); 899} 900 901int 902vmmdev_cleanup(void) 903{ 904 int error; 905 906 if (SLIST_EMPTY(&head)) 907 error = 0; 908 else 909 error = EBUSY; 910 911 return (error); 912} 913 914static int 915devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 916 struct vm_object **objp, int nprot) 917{ 918 struct devmem_softc *dsc; 919 vm_ooffset_t first, last; 920 size_t seglen; 921 int error; 922 uint16_t lastcpu; 923 bool sysmem; 924 925 dsc = cdev->si_drv1; 926 if (dsc == NULL) { 927 /* 'cdev' has been created but is not ready for use */ 928 return (ENXIO); 929 } 930 931 first = *offset; 932 last = *offset + len; 933 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 934 return (EINVAL); 935 936 lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1; 937 error = vcpu_lock_one(dsc->sc, lastcpu); 938 if (error) 939 return (error); 940 941 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 942 KASSERT(error == 0 && !sysmem && *objp != NULL, 943 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 944 945 vcpu_unlock_one(dsc->sc, lastcpu); 946 947 if (seglen >= last) { 948 vm_object_reference(*objp); 949 return (0); 950 } else { 951 return (EINVAL); 952 } 953} 954 955static struct cdevsw devmemsw = { 956 .d_name = "devmem", 957 .d_version = D_VERSION, 958 .d_mmap_single = devmem_mmap_single, 959}; 960 961static int 962devmem_create_cdev(const char *vmname, int segid, char *devname) 963{ 964 struct devmem_softc *dsc; 965 struct vmmdev_softc *sc; 966 struct cdev *cdev; 967 int error; 968 969 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 970 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 971 if (error) 972 return (error); 973 974 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 975 976 mtx_lock(&vmmdev_mtx); 977 sc = vmmdev_lookup(vmname); 978 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 979 if (sc->cdev == NULL) { 980 /* virtual machine is being created or destroyed */ 981 mtx_unlock(&vmmdev_mtx); 982 free(dsc, M_VMMDEV); 983 destroy_dev_sched_cb(cdev, NULL, 0); 984 return (ENODEV); 985 } 986 987 dsc->segid = segid; 988 dsc->name = devname; 989 dsc->cdev = cdev; 990 dsc->sc = sc; 991 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 992 mtx_unlock(&vmmdev_mtx); 993 994 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 995 cdev->si_drv1 = dsc; 996 return (0); 997} 998 999static void 1000devmem_destroy(void *arg) 1001{ 1002 struct devmem_softc *dsc = arg; 1003 1004 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1005 dsc->cdev = NULL; 1006 dsc->sc = NULL; 1007} 1008