1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include "opt_bhyve_snapshot.h" 30 31#include <sys/param.h> 32#include <sys/kernel.h> 33#include <sys/jail.h> 34#include <sys/queue.h> 35#include <sys/lock.h> 36#include <sys/mutex.h> 37#include <sys/malloc.h> 38#include <sys/conf.h> 39#include <sys/sysctl.h> 40#include <sys/libkern.h> 41#include <sys/ioccom.h> 42#include <sys/mman.h> 43#include <sys/uio.h> 44#include <sys/proc.h> 45 46#include <vm/vm.h> 47#include <vm/pmap.h> 48#include <vm/vm_map.h> 49#include <vm/vm_object.h> 50 51#include <machine/vmparam.h> 52#include <machine/vmm.h> 53#include <machine/vmm_dev.h> 54#include <machine/vmm_instruction_emul.h> 55#include <machine/vmm_snapshot.h> 56#include <x86/apicreg.h> 57 58#include "vmm_lapic.h" 59#include "vmm_stat.h" 60#include "vmm_mem.h" 61#include "io/ppt.h" 62#include "io/vatpic.h" 63#include "io/vioapic.h" 64#include "io/vhpet.h" 65#include "io/vrtc.h" 66 67#ifdef COMPAT_FREEBSD13 68struct vm_stats_old { 69 int cpuid; /* in */ 70 int num_entries; /* out */ 71 struct timeval tv; 72 uint64_t statbuf[MAX_VM_STATS]; 73}; 74 75#define VM_STATS_OLD \ 76 _IOWR('v', IOCNUM_VM_STATS, struct vm_stats_old) 77 78struct vm_snapshot_meta_old { 79 void *ctx; /* unused */ 80 void *dev_data; 81 const char *dev_name; /* identify userspace devices */ 82 enum snapshot_req dev_req; /* identify kernel structs */ 83 84 struct vm_snapshot_buffer buffer; 85 86 enum vm_snapshot_op op; 87}; 88 89#define VM_SNAPSHOT_REQ_OLD \ 90 _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta_old) 91 92struct vm_exit_ipi_13 { 93 uint32_t mode; 94 uint8_t vector; 95 __BITSET_DEFINE(, 256) dmask; 96}; 97 98struct vm_exit_13 { 99 uint32_t exitcode; 100 int32_t inst_length; 101 uint64_t rip; 102 uint64_t u[120 / sizeof(uint64_t)]; 103}; 104 105struct vm_run_13 { 106 int cpuid; 107 struct vm_exit_13 vm_exit; 108}; 109 110#define VM_RUN_13 \ 111 _IOWR('v', IOCNUM_RUN, struct vm_run_13) 112 113#endif /* COMPAT_FREEBSD13 */ 114 115struct devmem_softc { 116 int segid; 117 char *name; 118 struct cdev *cdev; 119 struct vmmdev_softc *sc; 120 SLIST_ENTRY(devmem_softc) link; 121}; 122 123struct vmmdev_softc { 124 struct vm *vm; /* vm instance cookie */ 125 struct cdev *cdev; 126 struct ucred *ucred; 127 SLIST_ENTRY(vmmdev_softc) link; 128 SLIST_HEAD(, devmem_softc) devmem; 129 int flags; 130}; 131#define VSC_LINKED 0x01 132 133static SLIST_HEAD(, vmmdev_softc) head; 134 135static unsigned pr_allow_flag; 136static struct mtx vmmdev_mtx; 137MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); 138 139static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 140 141SYSCTL_DECL(_hw_vmm); 142 143static int vmm_priv_check(struct ucred *ucred); 144static int devmem_create_cdev(const char *vmname, int id, char *devmem); 145static void devmem_destroy(void *arg); 146 147static int 148vmm_priv_check(struct ucred *ucred) 149{ 150 151 if (jailed(ucred) && 152 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 153 return (EPERM); 154 155 return (0); 156} 157 158static int 159vcpu_lock_one(struct vcpu *vcpu) 160{ 161 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 162} 163 164static void 165vcpu_unlock_one(struct vcpu *vcpu) 166{ 167 enum vcpu_state state; 168 169 state = vcpu_get_state(vcpu, NULL); 170 if (state != VCPU_FROZEN) { 171 panic("vcpu %s(%d) has invalid state %d", 172 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 173 } 174 175 vcpu_set_state(vcpu, VCPU_IDLE, false); 176} 177 178static int 179vcpu_lock_all(struct vmmdev_softc *sc) 180{ 181 struct vcpu *vcpu; 182 int error; 183 uint16_t i, j, maxcpus; 184 185 error = 0; 186 vm_slock_vcpus(sc->vm); 187 maxcpus = vm_get_maxcpus(sc->vm); 188 for (i = 0; i < maxcpus; i++) { 189 vcpu = vm_vcpu(sc->vm, i); 190 if (vcpu == NULL) 191 continue; 192 error = vcpu_lock_one(vcpu); 193 if (error) 194 break; 195 } 196 197 if (error) { 198 for (j = 0; j < i; j++) { 199 vcpu = vm_vcpu(sc->vm, j); 200 if (vcpu == NULL) 201 continue; 202 vcpu_unlock_one(vcpu); 203 } 204 vm_unlock_vcpus(sc->vm); 205 } 206 207 return (error); 208} 209 210static void 211vcpu_unlock_all(struct vmmdev_softc *sc) 212{ 213 struct vcpu *vcpu; 214 uint16_t i, maxcpus; 215 216 maxcpus = vm_get_maxcpus(sc->vm); 217 for (i = 0; i < maxcpus; i++) { 218 vcpu = vm_vcpu(sc->vm, i); 219 if (vcpu == NULL) 220 continue; 221 vcpu_unlock_one(vcpu); 222 } 223 vm_unlock_vcpus(sc->vm); 224} 225 226static struct vmmdev_softc * 227vmmdev_lookup(const char *name) 228{ 229 struct vmmdev_softc *sc; 230 231#ifdef notyet /* XXX kernel is not compiled with invariants */ 232 mtx_assert(&vmmdev_mtx, MA_OWNED); 233#endif 234 235 SLIST_FOREACH(sc, &head, link) { 236 if (strcmp(name, vm_name(sc->vm)) == 0) 237 break; 238 } 239 240 if (sc == NULL) 241 return (NULL); 242 243 if (cr_cansee(curthread->td_ucred, sc->ucred)) 244 return (NULL); 245 246 return (sc); 247} 248 249static struct vmmdev_softc * 250vmmdev_lookup2(struct cdev *cdev) 251{ 252 253 return (cdev->si_drv1); 254} 255 256static int 257vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 258{ 259 int error, off, c, prot; 260 vm_paddr_t gpa, maxaddr; 261 void *hpa, *cookie; 262 struct vmmdev_softc *sc; 263 264 error = vmm_priv_check(curthread->td_ucred); 265 if (error) 266 return (error); 267 268 sc = vmmdev_lookup2(cdev); 269 if (sc == NULL) 270 return (ENXIO); 271 272 /* 273 * Get a read lock on the guest memory map. 274 */ 275 vm_slock_memsegs(sc->vm); 276 277 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 278 maxaddr = vmm_sysmem_maxaddr(sc->vm); 279 while (uio->uio_resid > 0 && error == 0) { 280 gpa = uio->uio_offset; 281 off = gpa & PAGE_MASK; 282 c = min(uio->uio_resid, PAGE_SIZE - off); 283 284 /* 285 * The VM has a hole in its physical memory map. If we want to 286 * use 'dd' to inspect memory beyond the hole we need to 287 * provide bogus data for memory that lies in the hole. 288 * 289 * Since this device does not support lseek(2), dd(1) will 290 * read(2) blocks of data to simulate the lseek(2). 291 */ 292 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 293 if (hpa == NULL) { 294 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 295 error = uiomove(__DECONST(void *, zero_region), 296 c, uio); 297 else 298 error = EFAULT; 299 } else { 300 error = uiomove(hpa, c, uio); 301 vm_gpa_release(cookie); 302 } 303 } 304 vm_unlock_memsegs(sc->vm); 305 return (error); 306} 307 308CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 309 310static int 311get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 312{ 313 struct devmem_softc *dsc; 314 int error; 315 bool sysmem; 316 317 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 318 if (error || mseg->len == 0) 319 return (error); 320 321 if (!sysmem) { 322 SLIST_FOREACH(dsc, &sc->devmem, link) { 323 if (dsc->segid == mseg->segid) 324 break; 325 } 326 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 327 __func__, mseg->segid)); 328 error = copystr(dsc->name, mseg->name, len, NULL); 329 } else { 330 bzero(mseg->name, len); 331 } 332 333 return (error); 334} 335 336static int 337alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 338{ 339 char *name; 340 int error; 341 bool sysmem; 342 343 error = 0; 344 name = NULL; 345 sysmem = true; 346 347 /* 348 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 349 * by stripped off when devfs processes the full string. 350 */ 351 if (VM_MEMSEG_NAME(mseg)) { 352 sysmem = false; 353 name = malloc(len, M_VMMDEV, M_WAITOK); 354 error = copystr(mseg->name, name, len, NULL); 355 if (error) 356 goto done; 357 } 358 359 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 360 if (error) 361 goto done; 362 363 if (VM_MEMSEG_NAME(mseg)) { 364 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 365 if (error) 366 vm_free_memseg(sc->vm, mseg->segid); 367 else 368 name = NULL; /* freed when 'cdev' is destroyed */ 369 } 370done: 371 free(name, M_VMMDEV); 372 return (error); 373} 374 375static int 376vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 377 uint64_t *regval) 378{ 379 int error, i; 380 381 error = 0; 382 for (i = 0; i < count; i++) { 383 error = vm_get_register(vcpu, regnum[i], ®val[i]); 384 if (error) 385 break; 386 } 387 return (error); 388} 389 390static int 391vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 392 uint64_t *regval) 393{ 394 int error, i; 395 396 error = 0; 397 for (i = 0; i < count; i++) { 398 error = vm_set_register(vcpu, regnum[i], regval[i]); 399 if (error) 400 break; 401 } 402 return (error); 403} 404 405static int 406vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 407 struct thread *td) 408{ 409 int error, vcpuid, size; 410 cpuset_t *cpuset; 411 struct vmmdev_softc *sc; 412 struct vcpu *vcpu; 413 struct vm_register *vmreg; 414 struct vm_seg_desc *vmsegdesc; 415 struct vm_register_set *vmregset; 416 struct vm_run *vmrun; 417#ifdef COMPAT_FREEBSD13 418 struct vm_run_13 *vmrun_13; 419#endif 420 struct vm_exception *vmexc; 421 struct vm_lapic_irq *vmirq; 422 struct vm_lapic_msi *vmmsi; 423 struct vm_ioapic_irq *ioapic_irq; 424 struct vm_isa_irq *isa_irq; 425 struct vm_isa_irq_trigger *isa_irq_trigger; 426 struct vm_capability *vmcap; 427 struct vm_pptdev *pptdev; 428 struct vm_pptdev_mmio *pptmmio; 429 struct vm_pptdev_msi *pptmsi; 430 struct vm_pptdev_msix *pptmsix; 431#ifdef COMPAT_FREEBSD13 432 struct vm_stats_old *vmstats_old; 433#endif 434 struct vm_stats *vmstats; 435 struct vm_stat_desc *statdesc; 436 struct vm_x2apic *x2apic; 437 struct vm_gpa_pte *gpapte; 438 struct vm_suspend *vmsuspend; 439 struct vm_gla2gpa *gg; 440 struct vm_cpuset *vm_cpuset; 441 struct vm_intinfo *vmii; 442 struct vm_rtc_time *rtctime; 443 struct vm_rtc_data *rtcdata; 444 struct vm_memmap *mm; 445 struct vm_munmap *mu; 446 struct vm_cpu_topology *topology; 447 struct vm_readwrite_kernemu_device *kernemu; 448 uint64_t *regvals; 449 int *regnums; 450 enum { NONE, SINGLE, ALL } vcpus_locked; 451 bool memsegs_locked; 452#ifdef BHYVE_SNAPSHOT 453 struct vm_snapshot_meta *snapshot_meta; 454#ifdef COMPAT_FREEBSD13 455 struct vm_snapshot_meta_old *snapshot_old; 456#endif 457#endif 458 459 error = vmm_priv_check(curthread->td_ucred); 460 if (error) 461 return (error); 462 463 sc = vmmdev_lookup2(cdev); 464 if (sc == NULL) 465 return (ENXIO); 466 467 vcpuid = -1; 468 vcpu = NULL; 469 vcpus_locked = NONE; 470 memsegs_locked = false; 471 472 /* 473 * For VMM ioctls that operate on a single vCPU, lookup the 474 * vcpu. For VMM ioctls which require one or more vCPUs to 475 * not be running, lock necessary vCPUs. 476 * 477 * XXX fragile, handle with care 478 * Most of these assume that the first field of the ioctl data 479 * is the vcpuid. 480 */ 481 switch (cmd) { 482 case VM_RUN: 483#ifdef COMPAT_FREEBSD13 484 case VM_RUN_13: 485#endif 486 case VM_GET_REGISTER: 487 case VM_SET_REGISTER: 488 case VM_GET_SEGMENT_DESCRIPTOR: 489 case VM_SET_SEGMENT_DESCRIPTOR: 490 case VM_GET_REGISTER_SET: 491 case VM_SET_REGISTER_SET: 492 case VM_INJECT_EXCEPTION: 493 case VM_GET_CAPABILITY: 494 case VM_SET_CAPABILITY: 495 case VM_SET_X2APIC_STATE: 496 case VM_GLA2GPA: 497 case VM_GLA2GPA_NOFAULT: 498 case VM_ACTIVATE_CPU: 499 case VM_SET_INTINFO: 500 case VM_GET_INTINFO: 501 case VM_RESTART_INSTRUCTION: 502 case VM_GET_KERNEMU_DEV: 503 case VM_SET_KERNEMU_DEV: 504 /* 505 * ioctls that can operate only on vcpus that are not running. 506 */ 507 vcpuid = *(int *)data; 508 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 509 if (vcpu == NULL) { 510 error = EINVAL; 511 goto done; 512 } 513 error = vcpu_lock_one(vcpu); 514 if (error) 515 goto done; 516 vcpus_locked = SINGLE; 517 break; 518 519#ifdef COMPAT_FREEBSD12 520 case VM_ALLOC_MEMSEG_FBSD12: 521#endif 522 case VM_ALLOC_MEMSEG: 523 case VM_BIND_PPTDEV: 524 case VM_UNBIND_PPTDEV: 525 case VM_MMAP_MEMSEG: 526 case VM_MUNMAP_MEMSEG: 527 case VM_REINIT: 528 /* 529 * ioctls that modify the memory map must lock memory 530 * segments exclusively. 531 */ 532 vm_xlock_memsegs(sc->vm); 533 memsegs_locked = true; 534 /* FALLTHROUGH */ 535 case VM_MAP_PPTDEV_MMIO: 536 case VM_UNMAP_PPTDEV_MMIO: 537#ifdef BHYVE_SNAPSHOT 538 case VM_SNAPSHOT_REQ: 539#ifdef COMPAT_FREEBSD13 540 case VM_SNAPSHOT_REQ_OLD: 541#endif 542 case VM_RESTORE_TIME: 543#endif 544 /* 545 * ioctls that operate on the entire virtual machine must 546 * prevent all vcpus from running. 547 */ 548 error = vcpu_lock_all(sc); 549 if (error) 550 goto done; 551 vcpus_locked = ALL; 552 break; 553 554#ifdef COMPAT_FREEBSD12 555 case VM_GET_MEMSEG_FBSD12: 556#endif 557 case VM_GET_MEMSEG: 558 case VM_MMAP_GETNEXT: 559 /* 560 * Lock the memory map while it is being inspected. 561 */ 562 vm_slock_memsegs(sc->vm); 563 memsegs_locked = true; 564 break; 565 566#ifdef COMPAT_FREEBSD13 567 case VM_STATS_OLD: 568#endif 569 case VM_STATS: 570 case VM_INJECT_NMI: 571 case VM_LAPIC_IRQ: 572 case VM_GET_X2APIC_STATE: 573 /* 574 * These do not need the vCPU locked but do operate on 575 * a specific vCPU. 576 */ 577 vcpuid = *(int *)data; 578 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 579 if (vcpu == NULL) { 580 error = EINVAL; 581 goto done; 582 } 583 break; 584 585 case VM_LAPIC_LOCAL_IRQ: 586 case VM_SUSPEND_CPU: 587 case VM_RESUME_CPU: 588 /* 589 * These can either operate on all CPUs via a vcpuid of 590 * -1 or on a specific vCPU. 591 */ 592 vcpuid = *(int *)data; 593 if (vcpuid == -1) 594 break; 595 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 596 if (vcpu == NULL) { 597 error = EINVAL; 598 goto done; 599 } 600 break; 601 602 default: 603 break; 604 } 605 606 switch (cmd) { 607 case VM_RUN: { 608 struct vm_exit *vme; 609 610 vmrun = (struct vm_run *)data; 611 vme = vm_exitinfo(vcpu); 612 613 error = vm_run(vcpu); 614 if (error != 0) 615 break; 616 617 error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); 618 if (error != 0) 619 break; 620 if (vme->exitcode == VM_EXITCODE_IPI) { 621 error = copyout(vm_exitinfo_cpuset(vcpu), 622 vmrun->cpuset, 623 min(vmrun->cpusetsize, sizeof(cpuset_t))); 624 if (error != 0) 625 break; 626 if (sizeof(cpuset_t) < vmrun->cpusetsize) { 627 uint8_t *p; 628 629 p = (uint8_t *)vmrun->cpuset + 630 sizeof(cpuset_t); 631 while (p < (uint8_t *)vmrun->cpuset + 632 vmrun->cpusetsize) { 633 if (subyte(p++, 0) != 0) { 634 error = EFAULT; 635 break; 636 } 637 } 638 } 639 } 640 break; 641 } 642#ifdef COMPAT_FREEBSD13 643 case VM_RUN_13: { 644 struct vm_exit *vme; 645 struct vm_exit_13 *vme_13; 646 647 vmrun_13 = (struct vm_run_13 *)data; 648 vme_13 = &vmrun_13->vm_exit; 649 vme = vm_exitinfo(vcpu); 650 651 error = vm_run(vcpu); 652 if (error == 0) { 653 vme_13->exitcode = vme->exitcode; 654 vme_13->inst_length = vme->inst_length; 655 vme_13->rip = vme->rip; 656 memcpy(vme_13->u, &vme->u, sizeof(vme_13->u)); 657 if (vme->exitcode == VM_EXITCODE_IPI) { 658 struct vm_exit_ipi_13 *ipi; 659 cpuset_t *dmask; 660 int cpu; 661 662 dmask = vm_exitinfo_cpuset(vcpu); 663 ipi = (struct vm_exit_ipi_13 *)&vme_13->u[0]; 664 BIT_ZERO(256, &ipi->dmask); 665 CPU_FOREACH_ISSET(cpu, dmask) { 666 if (cpu >= 256) 667 break; 668 BIT_SET(256, cpu, &ipi->dmask); 669 } 670 } 671 } 672 break; 673 } 674#endif 675 case VM_SUSPEND: 676 vmsuspend = (struct vm_suspend *)data; 677 error = vm_suspend(sc->vm, vmsuspend->how); 678 break; 679 case VM_REINIT: 680 error = vm_reinit(sc->vm); 681 break; 682 case VM_STAT_DESC: { 683 statdesc = (struct vm_stat_desc *)data; 684 error = vmm_stat_desc_copy(statdesc->index, 685 statdesc->desc, sizeof(statdesc->desc)); 686 break; 687 } 688#ifdef COMPAT_FREEBSD13 689 case VM_STATS_OLD: 690 vmstats_old = (struct vm_stats_old *)data; 691 getmicrotime(&vmstats_old->tv); 692 error = vmm_stat_copy(vcpu, 0, 693 nitems(vmstats_old->statbuf), 694 &vmstats_old->num_entries, 695 vmstats_old->statbuf); 696 break; 697#endif 698 case VM_STATS: { 699 vmstats = (struct vm_stats *)data; 700 getmicrotime(&vmstats->tv); 701 error = vmm_stat_copy(vcpu, vmstats->index, 702 nitems(vmstats->statbuf), 703 &vmstats->num_entries, vmstats->statbuf); 704 break; 705 } 706 case VM_PPTDEV_MSI: 707 pptmsi = (struct vm_pptdev_msi *)data; 708 error = ppt_setup_msi(sc->vm, 709 pptmsi->bus, pptmsi->slot, pptmsi->func, 710 pptmsi->addr, pptmsi->msg, 711 pptmsi->numvec); 712 break; 713 case VM_PPTDEV_MSIX: 714 pptmsix = (struct vm_pptdev_msix *)data; 715 error = ppt_setup_msix(sc->vm, 716 pptmsix->bus, pptmsix->slot, 717 pptmsix->func, pptmsix->idx, 718 pptmsix->addr, pptmsix->msg, 719 pptmsix->vector_control); 720 break; 721 case VM_PPTDEV_DISABLE_MSIX: 722 pptdev = (struct vm_pptdev *)data; 723 error = ppt_disable_msix(sc->vm, pptdev->bus, pptdev->slot, 724 pptdev->func); 725 break; 726 case VM_MAP_PPTDEV_MMIO: 727 pptmmio = (struct vm_pptdev_mmio *)data; 728 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 729 pptmmio->func, pptmmio->gpa, pptmmio->len, 730 pptmmio->hpa); 731 break; 732 case VM_UNMAP_PPTDEV_MMIO: 733 pptmmio = (struct vm_pptdev_mmio *)data; 734 error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 735 pptmmio->func, pptmmio->gpa, pptmmio->len); 736 break; 737 case VM_BIND_PPTDEV: 738 pptdev = (struct vm_pptdev *)data; 739 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 740 pptdev->func); 741 break; 742 case VM_UNBIND_PPTDEV: 743 pptdev = (struct vm_pptdev *)data; 744 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 745 pptdev->func); 746 break; 747 case VM_INJECT_EXCEPTION: 748 vmexc = (struct vm_exception *)data; 749 error = vm_inject_exception(vcpu, 750 vmexc->vector, vmexc->error_code_valid, vmexc->error_code, 751 vmexc->restart_instruction); 752 break; 753 case VM_INJECT_NMI: 754 error = vm_inject_nmi(vcpu); 755 break; 756 case VM_LAPIC_IRQ: 757 vmirq = (struct vm_lapic_irq *)data; 758 error = lapic_intr_edge(vcpu, vmirq->vector); 759 break; 760 case VM_LAPIC_LOCAL_IRQ: 761 vmirq = (struct vm_lapic_irq *)data; 762 error = lapic_set_local_intr(sc->vm, vcpu, vmirq->vector); 763 break; 764 case VM_LAPIC_MSI: 765 vmmsi = (struct vm_lapic_msi *)data; 766 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); 767 break; 768 case VM_IOAPIC_ASSERT_IRQ: 769 ioapic_irq = (struct vm_ioapic_irq *)data; 770 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); 771 break; 772 case VM_IOAPIC_DEASSERT_IRQ: 773 ioapic_irq = (struct vm_ioapic_irq *)data; 774 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); 775 break; 776 case VM_IOAPIC_PULSE_IRQ: 777 ioapic_irq = (struct vm_ioapic_irq *)data; 778 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); 779 break; 780 case VM_IOAPIC_PINCOUNT: 781 *(int *)data = vioapic_pincount(sc->vm); 782 break; 783 case VM_SET_KERNEMU_DEV: 784 case VM_GET_KERNEMU_DEV: { 785 mem_region_write_t mwrite; 786 mem_region_read_t mread; 787 bool arg; 788 789 kernemu = (void *)data; 790 791 if (kernemu->access_width > 0) 792 size = (1u << kernemu->access_width); 793 else 794 size = 1; 795 796 if (kernemu->gpa >= DEFAULT_APIC_BASE && kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { 797 mread = lapic_mmio_read; 798 mwrite = lapic_mmio_write; 799 } else if (kernemu->gpa >= VIOAPIC_BASE && kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { 800 mread = vioapic_mmio_read; 801 mwrite = vioapic_mmio_write; 802 } else if (kernemu->gpa >= VHPET_BASE && kernemu->gpa < VHPET_BASE + VHPET_SIZE) { 803 mread = vhpet_mmio_read; 804 mwrite = vhpet_mmio_write; 805 } else { 806 error = EINVAL; 807 break; 808 } 809 810 if (cmd == VM_SET_KERNEMU_DEV) 811 error = mwrite(vcpu, kernemu->gpa, 812 kernemu->value, size, &arg); 813 else 814 error = mread(vcpu, kernemu->gpa, 815 &kernemu->value, size, &arg); 816 break; 817 } 818 case VM_ISA_ASSERT_IRQ: 819 isa_irq = (struct vm_isa_irq *)data; 820 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); 821 if (error == 0 && isa_irq->ioapic_irq != -1) 822 error = vioapic_assert_irq(sc->vm, 823 isa_irq->ioapic_irq); 824 break; 825 case VM_ISA_DEASSERT_IRQ: 826 isa_irq = (struct vm_isa_irq *)data; 827 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); 828 if (error == 0 && isa_irq->ioapic_irq != -1) 829 error = vioapic_deassert_irq(sc->vm, 830 isa_irq->ioapic_irq); 831 break; 832 case VM_ISA_PULSE_IRQ: 833 isa_irq = (struct vm_isa_irq *)data; 834 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); 835 if (error == 0 && isa_irq->ioapic_irq != -1) 836 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); 837 break; 838 case VM_ISA_SET_IRQ_TRIGGER: 839 isa_irq_trigger = (struct vm_isa_irq_trigger *)data; 840 error = vatpic_set_irq_trigger(sc->vm, 841 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); 842 break; 843 case VM_MMAP_GETNEXT: 844 mm = (struct vm_memmap *)data; 845 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 846 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 847 break; 848 case VM_MMAP_MEMSEG: 849 mm = (struct vm_memmap *)data; 850 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 851 mm->len, mm->prot, mm->flags); 852 break; 853 case VM_MUNMAP_MEMSEG: 854 mu = (struct vm_munmap *)data; 855 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 856 break; 857#ifdef COMPAT_FREEBSD12 858 case VM_ALLOC_MEMSEG_FBSD12: 859 error = alloc_memseg(sc, (struct vm_memseg *)data, 860 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 861 break; 862#endif 863 case VM_ALLOC_MEMSEG: 864 error = alloc_memseg(sc, (struct vm_memseg *)data, 865 sizeof(((struct vm_memseg *)0)->name)); 866 break; 867#ifdef COMPAT_FREEBSD12 868 case VM_GET_MEMSEG_FBSD12: 869 error = get_memseg(sc, (struct vm_memseg *)data, 870 sizeof(((struct vm_memseg_fbsd12 *)0)->name)); 871 break; 872#endif 873 case VM_GET_MEMSEG: 874 error = get_memseg(sc, (struct vm_memseg *)data, 875 sizeof(((struct vm_memseg *)0)->name)); 876 break; 877 case VM_GET_REGISTER: 878 vmreg = (struct vm_register *)data; 879 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 880 break; 881 case VM_SET_REGISTER: 882 vmreg = (struct vm_register *)data; 883 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 884 break; 885 case VM_SET_SEGMENT_DESCRIPTOR: 886 vmsegdesc = (struct vm_seg_desc *)data; 887 error = vm_set_seg_desc(vcpu, 888 vmsegdesc->regnum, 889 &vmsegdesc->desc); 890 break; 891 case VM_GET_SEGMENT_DESCRIPTOR: 892 vmsegdesc = (struct vm_seg_desc *)data; 893 error = vm_get_seg_desc(vcpu, 894 vmsegdesc->regnum, 895 &vmsegdesc->desc); 896 break; 897 case VM_GET_REGISTER_SET: 898 vmregset = (struct vm_register_set *)data; 899 if (vmregset->count > VM_REG_LAST) { 900 error = EINVAL; 901 break; 902 } 903 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 904 M_WAITOK); 905 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 906 M_WAITOK); 907 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 908 vmregset->count); 909 if (error == 0) 910 error = vm_get_register_set(vcpu, 911 vmregset->count, regnums, regvals); 912 if (error == 0) 913 error = copyout(regvals, vmregset->regvals, 914 sizeof(regvals[0]) * vmregset->count); 915 free(regvals, M_VMMDEV); 916 free(regnums, M_VMMDEV); 917 break; 918 case VM_SET_REGISTER_SET: 919 vmregset = (struct vm_register_set *)data; 920 if (vmregset->count > VM_REG_LAST) { 921 error = EINVAL; 922 break; 923 } 924 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 925 M_WAITOK); 926 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 927 M_WAITOK); 928 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 929 vmregset->count); 930 if (error == 0) 931 error = copyin(vmregset->regvals, regvals, 932 sizeof(regvals[0]) * vmregset->count); 933 if (error == 0) 934 error = vm_set_register_set(vcpu, 935 vmregset->count, regnums, regvals); 936 free(regvals, M_VMMDEV); 937 free(regnums, M_VMMDEV); 938 break; 939 case VM_GET_CAPABILITY: 940 vmcap = (struct vm_capability *)data; 941 error = vm_get_capability(vcpu, 942 vmcap->captype, 943 &vmcap->capval); 944 break; 945 case VM_SET_CAPABILITY: 946 vmcap = (struct vm_capability *)data; 947 error = vm_set_capability(vcpu, 948 vmcap->captype, 949 vmcap->capval); 950 break; 951 case VM_SET_X2APIC_STATE: 952 x2apic = (struct vm_x2apic *)data; 953 error = vm_set_x2apic_state(vcpu, x2apic->state); 954 break; 955 case VM_GET_X2APIC_STATE: 956 x2apic = (struct vm_x2apic *)data; 957 error = vm_get_x2apic_state(vcpu, &x2apic->state); 958 break; 959 case VM_GET_GPA_PMAP: 960 gpapte = (struct vm_gpa_pte *)data; 961 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), 962 gpapte->gpa, gpapte->pte, &gpapte->ptenum); 963 error = 0; 964 break; 965 case VM_GET_HPET_CAPABILITIES: 966 error = vhpet_getcap((struct vm_hpet_cap *)data); 967 break; 968 case VM_GLA2GPA: { 969 CTASSERT(PROT_READ == VM_PROT_READ); 970 CTASSERT(PROT_WRITE == VM_PROT_WRITE); 971 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); 972 gg = (struct vm_gla2gpa *)data; 973 error = vm_gla2gpa(vcpu, &gg->paging, gg->gla, 974 gg->prot, &gg->gpa, &gg->fault); 975 KASSERT(error == 0 || error == EFAULT, 976 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 977 break; 978 } 979 case VM_GLA2GPA_NOFAULT: 980 gg = (struct vm_gla2gpa *)data; 981 error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, 982 gg->prot, &gg->gpa, &gg->fault); 983 KASSERT(error == 0 || error == EFAULT, 984 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 985 break; 986 case VM_ACTIVATE_CPU: 987 error = vm_activate_cpu(vcpu); 988 break; 989 case VM_GET_CPUS: 990 error = 0; 991 vm_cpuset = (struct vm_cpuset *)data; 992 size = vm_cpuset->cpusetsize; 993 if (size < 1 || size > CPU_MAXSIZE / NBBY) { 994 error = ERANGE; 995 break; 996 } 997 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP, 998 M_WAITOK | M_ZERO); 999 if (vm_cpuset->which == VM_ACTIVE_CPUS) 1000 *cpuset = vm_active_cpus(sc->vm); 1001 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 1002 *cpuset = vm_suspended_cpus(sc->vm); 1003 else if (vm_cpuset->which == VM_DEBUG_CPUS) 1004 *cpuset = vm_debug_cpus(sc->vm); 1005 else 1006 error = EINVAL; 1007 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY)) 1008 error = ERANGE; 1009 if (error == 0) 1010 error = copyout(cpuset, vm_cpuset->cpus, size); 1011 free(cpuset, M_TEMP); 1012 break; 1013 case VM_SUSPEND_CPU: 1014 error = vm_suspend_cpu(sc->vm, vcpu); 1015 break; 1016 case VM_RESUME_CPU: 1017 error = vm_resume_cpu(sc->vm, vcpu); 1018 break; 1019 case VM_SET_INTINFO: 1020 vmii = (struct vm_intinfo *)data; 1021 error = vm_exit_intinfo(vcpu, vmii->info1); 1022 break; 1023 case VM_GET_INTINFO: 1024 vmii = (struct vm_intinfo *)data; 1025 error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2); 1026 break; 1027 case VM_RTC_WRITE: 1028 rtcdata = (struct vm_rtc_data *)data; 1029 error = vrtc_nvram_write(sc->vm, rtcdata->offset, 1030 rtcdata->value); 1031 break; 1032 case VM_RTC_READ: 1033 rtcdata = (struct vm_rtc_data *)data; 1034 error = vrtc_nvram_read(sc->vm, rtcdata->offset, 1035 &rtcdata->value); 1036 break; 1037 case VM_RTC_SETTIME: 1038 rtctime = (struct vm_rtc_time *)data; 1039 error = vrtc_set_time(sc->vm, rtctime->secs); 1040 break; 1041 case VM_RTC_GETTIME: 1042 error = 0; 1043 rtctime = (struct vm_rtc_time *)data; 1044 rtctime->secs = vrtc_get_time(sc->vm); 1045 break; 1046 case VM_RESTART_INSTRUCTION: 1047 error = vm_restart_instruction(vcpu); 1048 break; 1049 case VM_SET_TOPOLOGY: 1050 topology = (struct vm_cpu_topology *)data; 1051 error = vm_set_topology(sc->vm, topology->sockets, 1052 topology->cores, topology->threads, topology->maxcpus); 1053 break; 1054 case VM_GET_TOPOLOGY: 1055 topology = (struct vm_cpu_topology *)data; 1056 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 1057 &topology->threads, &topology->maxcpus); 1058 error = 0; 1059 break; 1060#ifdef BHYVE_SNAPSHOT 1061 case VM_SNAPSHOT_REQ: 1062 snapshot_meta = (struct vm_snapshot_meta *)data; 1063 error = vm_snapshot_req(sc->vm, snapshot_meta); 1064 break; 1065#ifdef COMPAT_FREEBSD13 1066 case VM_SNAPSHOT_REQ_OLD: 1067 /* 1068 * The old structure just has an additional pointer at 1069 * the start that is ignored. 1070 */ 1071 snapshot_old = (struct vm_snapshot_meta_old *)data; 1072 snapshot_meta = 1073 (struct vm_snapshot_meta *)&snapshot_old->dev_data; 1074 error = vm_snapshot_req(sc->vm, snapshot_meta); 1075 break; 1076#endif 1077 case VM_RESTORE_TIME: 1078 error = vm_restore_time(sc->vm); 1079 break; 1080#endif 1081 default: 1082 error = ENOTTY; 1083 break; 1084 } 1085 1086done: 1087 if (vcpus_locked == SINGLE) 1088 vcpu_unlock_one(vcpu); 1089 else if (vcpus_locked == ALL) 1090 vcpu_unlock_all(sc); 1091 if (memsegs_locked) 1092 vm_unlock_memsegs(sc->vm); 1093 1094 /* 1095 * Make sure that no handler returns a kernel-internal 1096 * error value to userspace. 1097 */ 1098 KASSERT(error == ERESTART || error >= 0, 1099 ("vmmdev_ioctl: invalid error return %d", error)); 1100 return (error); 1101} 1102 1103static int 1104vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 1105 struct vm_object **objp, int nprot) 1106{ 1107 struct vmmdev_softc *sc; 1108 vm_paddr_t gpa; 1109 size_t len; 1110 vm_ooffset_t segoff, first, last; 1111 int error, found, segid; 1112 bool sysmem; 1113 1114 error = vmm_priv_check(curthread->td_ucred); 1115 if (error) 1116 return (error); 1117 1118 first = *offset; 1119 last = first + mapsize; 1120 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1121 return (EINVAL); 1122 1123 sc = vmmdev_lookup2(cdev); 1124 if (sc == NULL) { 1125 /* virtual machine is in the process of being created */ 1126 return (EINVAL); 1127 } 1128 1129 /* 1130 * Get a read lock on the guest memory map. 1131 */ 1132 vm_slock_memsegs(sc->vm); 1133 1134 gpa = 0; 1135 found = 0; 1136 while (!found) { 1137 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 1138 NULL, NULL); 1139 if (error) 1140 break; 1141 1142 if (first >= gpa && last <= gpa + len) 1143 found = 1; 1144 else 1145 gpa += len; 1146 } 1147 1148 if (found) { 1149 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 1150 KASSERT(error == 0 && *objp != NULL, 1151 ("%s: invalid memory segment %d", __func__, segid)); 1152 if (sysmem) { 1153 vm_object_reference(*objp); 1154 *offset = segoff + (first - gpa); 1155 } else { 1156 error = EINVAL; 1157 } 1158 } 1159 vm_unlock_memsegs(sc->vm); 1160 return (error); 1161} 1162 1163static void 1164vmmdev_destroy(void *arg) 1165{ 1166 struct vmmdev_softc *sc = arg; 1167 struct devmem_softc *dsc; 1168 int error __diagused; 1169 1170 vm_disable_vcpu_creation(sc->vm); 1171 error = vcpu_lock_all(sc); 1172 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 1173 vm_unlock_vcpus(sc->vm); 1174 1175 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 1176 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 1177 SLIST_REMOVE_HEAD(&sc->devmem, link); 1178 free(dsc->name, M_VMMDEV); 1179 free(dsc, M_VMMDEV); 1180 } 1181 1182 if (sc->cdev != NULL) 1183 destroy_dev(sc->cdev); 1184 1185 if (sc->vm != NULL) 1186 vm_destroy(sc->vm); 1187 1188 if (sc->ucred != NULL) 1189 crfree(sc->ucred); 1190 1191 if ((sc->flags & VSC_LINKED) != 0) { 1192 mtx_lock(&vmmdev_mtx); 1193 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 1194 mtx_unlock(&vmmdev_mtx); 1195 } 1196 1197 free(sc, M_VMMDEV); 1198} 1199 1200static int 1201sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 1202{ 1203 struct devmem_softc *dsc; 1204 struct vmmdev_softc *sc; 1205 struct cdev *cdev; 1206 char *buf; 1207 int error, buflen; 1208 1209 error = vmm_priv_check(req->td->td_ucred); 1210 if (error) 1211 return (error); 1212 1213 buflen = VM_MAX_NAMELEN + 1; 1214 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1215 strlcpy(buf, "beavis", buflen); 1216 error = sysctl_handle_string(oidp, buf, buflen, req); 1217 if (error != 0 || req->newptr == NULL) 1218 goto out; 1219 1220 mtx_lock(&vmmdev_mtx); 1221 sc = vmmdev_lookup(buf); 1222 if (sc == NULL || sc->cdev == NULL) { 1223 mtx_unlock(&vmmdev_mtx); 1224 error = EINVAL; 1225 goto out; 1226 } 1227 1228 /* 1229 * Setting 'sc->cdev' to NULL is used to indicate that the VM 1230 * is scheduled for destruction. 1231 */ 1232 cdev = sc->cdev; 1233 sc->cdev = NULL; 1234 mtx_unlock(&vmmdev_mtx); 1235 1236 /* 1237 * Destroy all cdevs: 1238 * 1239 * - any new operations on the 'cdev' will return an error (ENXIO). 1240 * 1241 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 1242 */ 1243 SLIST_FOREACH(dsc, &sc->devmem, link) { 1244 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 1245 destroy_dev(dsc->cdev); 1246 devmem_destroy(dsc); 1247 } 1248 destroy_dev(cdev); 1249 vmmdev_destroy(sc); 1250 error = 0; 1251 1252out: 1253 free(buf, M_VMMDEV); 1254 return (error); 1255} 1256SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 1257 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1258 NULL, 0, sysctl_vmm_destroy, "A", 1259 NULL); 1260 1261static struct cdevsw vmmdevsw = { 1262 .d_name = "vmmdev", 1263 .d_version = D_VERSION, 1264 .d_ioctl = vmmdev_ioctl, 1265 .d_mmap_single = vmmdev_mmap_single, 1266 .d_read = vmmdev_rw, 1267 .d_write = vmmdev_rw, 1268}; 1269 1270static int 1271sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 1272{ 1273 struct vm *vm; 1274 struct cdev *cdev; 1275 struct vmmdev_softc *sc, *sc2; 1276 char *buf; 1277 int error, buflen; 1278 1279 error = vmm_priv_check(req->td->td_ucred); 1280 if (error) 1281 return (error); 1282 1283 buflen = VM_MAX_NAMELEN + 1; 1284 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 1285 strlcpy(buf, "beavis", buflen); 1286 error = sysctl_handle_string(oidp, buf, buflen, req); 1287 if (error != 0 || req->newptr == NULL) 1288 goto out; 1289 1290 mtx_lock(&vmmdev_mtx); 1291 sc = vmmdev_lookup(buf); 1292 mtx_unlock(&vmmdev_mtx); 1293 if (sc != NULL) { 1294 error = EEXIST; 1295 goto out; 1296 } 1297 1298 error = vm_create(buf, &vm); 1299 if (error != 0) 1300 goto out; 1301 1302 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1303 sc->ucred = crhold(curthread->td_ucred); 1304 sc->vm = vm; 1305 SLIST_INIT(&sc->devmem); 1306 1307 /* 1308 * Lookup the name again just in case somebody sneaked in when we 1309 * dropped the lock. 1310 */ 1311 mtx_lock(&vmmdev_mtx); 1312 sc2 = vmmdev_lookup(buf); 1313 if (sc2 == NULL) { 1314 SLIST_INSERT_HEAD(&head, sc, link); 1315 sc->flags |= VSC_LINKED; 1316 } 1317 mtx_unlock(&vmmdev_mtx); 1318 1319 if (sc2 != NULL) { 1320 vmmdev_destroy(sc); 1321 error = EEXIST; 1322 goto out; 1323 } 1324 1325 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, 1326 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 1327 if (error != 0) { 1328 vmmdev_destroy(sc); 1329 goto out; 1330 } 1331 1332 mtx_lock(&vmmdev_mtx); 1333 sc->cdev = cdev; 1334 sc->cdev->si_drv1 = sc; 1335 mtx_unlock(&vmmdev_mtx); 1336 1337out: 1338 free(buf, M_VMMDEV); 1339 return (error); 1340} 1341SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1342 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 1343 NULL, 0, sysctl_vmm_create, "A", 1344 NULL); 1345 1346void 1347vmmdev_init(void) 1348{ 1349 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1350 "Allow use of vmm in a jail."); 1351} 1352 1353int 1354vmmdev_cleanup(void) 1355{ 1356 int error; 1357 1358 if (SLIST_EMPTY(&head)) 1359 error = 0; 1360 else 1361 error = EBUSY; 1362 1363 return (error); 1364} 1365 1366static int 1367devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1368 struct vm_object **objp, int nprot) 1369{ 1370 struct devmem_softc *dsc; 1371 vm_ooffset_t first, last; 1372 size_t seglen; 1373 int error; 1374 bool sysmem; 1375 1376 dsc = cdev->si_drv1; 1377 if (dsc == NULL) { 1378 /* 'cdev' has been created but is not ready for use */ 1379 return (ENXIO); 1380 } 1381 1382 first = *offset; 1383 last = *offset + len; 1384 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1385 return (EINVAL); 1386 1387 vm_slock_memsegs(dsc->sc->vm); 1388 1389 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1390 KASSERT(error == 0 && !sysmem && *objp != NULL, 1391 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1392 1393 if (seglen >= last) 1394 vm_object_reference(*objp); 1395 else 1396 error = EINVAL; 1397 1398 vm_unlock_memsegs(dsc->sc->vm); 1399 return (error); 1400} 1401 1402static struct cdevsw devmemsw = { 1403 .d_name = "devmem", 1404 .d_version = D_VERSION, 1405 .d_mmap_single = devmem_mmap_single, 1406}; 1407 1408static int 1409devmem_create_cdev(const char *vmname, int segid, char *devname) 1410{ 1411 struct devmem_softc *dsc; 1412 struct vmmdev_softc *sc; 1413 struct cdev *cdev; 1414 int error; 1415 1416 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 1417 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1418 if (error) 1419 return (error); 1420 1421 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1422 1423 mtx_lock(&vmmdev_mtx); 1424 sc = vmmdev_lookup(vmname); 1425 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 1426 if (sc->cdev == NULL) { 1427 /* virtual machine is being created or destroyed */ 1428 mtx_unlock(&vmmdev_mtx); 1429 free(dsc, M_VMMDEV); 1430 destroy_dev_sched_cb(cdev, NULL, 0); 1431 return (ENODEV); 1432 } 1433 1434 dsc->segid = segid; 1435 dsc->name = devname; 1436 dsc->cdev = cdev; 1437 dsc->sc = sc; 1438 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1439 mtx_unlock(&vmmdev_mtx); 1440 1441 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1442 cdev->si_drv1 = dsc; 1443 return (0); 1444} 1445 1446static void 1447devmem_destroy(void *arg) 1448{ 1449 struct devmem_softc *dsc = arg; 1450 1451 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1452 dsc->cdev = NULL; 1453 dsc->sc = NULL; 1454} 1455