1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * Copyright (C) 2015 Mihai Carabas <mihai.carabas@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/param.h> 31#include <sys/kernel.h> 32#include <sys/jail.h> 33#include <sys/queue.h> 34#include <sys/lock.h> 35#include <sys/mutex.h> 36#include <sys/malloc.h> 37#include <sys/conf.h> 38#include <sys/sysctl.h> 39#include <sys/libkern.h> 40#include <sys/ioccom.h> 41#include <sys/mman.h> 42#include <sys/uio.h> 43#include <sys/proc.h> 44 45#include <vm/vm.h> 46#include <vm/pmap.h> 47#include <vm/vm_map.h> 48#include <vm/vm_object.h> 49 50#include <machine/machdep.h> 51#include <machine/vmparam.h> 52#include <machine/vmm.h> 53#include <machine/vmm_dev.h> 54 55#include "vmm_stat.h" 56 57#include "io/vgic.h" 58 59struct devmem_softc { 60 int segid; 61 char *name; 62 struct cdev *cdev; 63 struct vmmdev_softc *sc; 64 SLIST_ENTRY(devmem_softc) link; 65}; 66 67struct vmmdev_softc { 68 struct vm *vm; /* vm instance cookie */ 69 struct cdev *cdev; 70 struct ucred *ucred; 71 SLIST_ENTRY(vmmdev_softc) link; 72 SLIST_HEAD(, devmem_softc) devmem; 73 int flags; 74}; 75#define VSC_LINKED 0x01 76 77static SLIST_HEAD(, vmmdev_softc) head; 78 79static unsigned pr_allow_flag; 80static struct mtx vmmdev_mtx; 81MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF); 82 83static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 84 85SYSCTL_DECL(_hw_vmm); 86 87static int vmm_priv_check(struct ucred *ucred); 88static int devmem_create_cdev(const char *vmname, int id, char *devmem); 89static void devmem_destroy(void *arg); 90 91static int 92vmm_priv_check(struct ucred *ucred) 93{ 94 95 if (jailed(ucred) && 96 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 97 return (EPERM); 98 99 return (0); 100} 101 102static int 103vcpu_lock_one(struct vcpu *vcpu) 104{ 105 return (vcpu_set_state(vcpu, VCPU_FROZEN, true)); 106} 107 108static void 109vcpu_unlock_one(struct vcpu *vcpu) 110{ 111 enum vcpu_state state; 112 113 state = vcpu_get_state(vcpu, NULL); 114 if (state != VCPU_FROZEN) { 115 panic("vcpu %s(%d) has invalid state %d", 116 vm_name(vcpu_vm(vcpu)), vcpu_vcpuid(vcpu), state); 117 } 118 119 vcpu_set_state(vcpu, VCPU_IDLE, false); 120} 121 122static int 123vcpu_lock_all(struct vmmdev_softc *sc) 124{ 125 struct vcpu *vcpu; 126 int error; 127 uint16_t i, j, maxcpus; 128 129 error = 0; 130 vm_slock_vcpus(sc->vm); 131 maxcpus = vm_get_maxcpus(sc->vm); 132 for (i = 0; i < maxcpus; i++) { 133 vcpu = vm_vcpu(sc->vm, i); 134 if (vcpu == NULL) 135 continue; 136 error = vcpu_lock_one(vcpu); 137 if (error) 138 break; 139 } 140 141 if (error) { 142 for (j = 0; j < i; j++) { 143 vcpu = vm_vcpu(sc->vm, j); 144 if (vcpu == NULL) 145 continue; 146 vcpu_unlock_one(vcpu); 147 } 148 vm_unlock_vcpus(sc->vm); 149 } 150 151 return (error); 152} 153 154static void 155vcpu_unlock_all(struct vmmdev_softc *sc) 156{ 157 struct vcpu *vcpu; 158 uint16_t i, maxcpus; 159 160 maxcpus = vm_get_maxcpus(sc->vm); 161 for (i = 0; i < maxcpus; i++) { 162 vcpu = vm_vcpu(sc->vm, i); 163 if (vcpu == NULL) 164 continue; 165 vcpu_unlock_one(vcpu); 166 } 167 vm_unlock_vcpus(sc->vm); 168} 169 170static struct vmmdev_softc * 171vmmdev_lookup(const char *name) 172{ 173 struct vmmdev_softc *sc; 174 175#ifdef notyet /* XXX kernel is not compiled with invariants */ 176 mtx_assert(&vmmdev_mtx, MA_OWNED); 177#endif 178 179 SLIST_FOREACH(sc, &head, link) { 180 if (strcmp(name, vm_name(sc->vm)) == 0) 181 break; 182 } 183 184 if (sc == NULL) 185 return (NULL); 186 187 if (cr_cansee(curthread->td_ucred, sc->ucred)) 188 return (NULL); 189 190 return (sc); 191} 192 193static struct vmmdev_softc * 194vmmdev_lookup2(struct cdev *cdev) 195{ 196 197 return (cdev->si_drv1); 198} 199 200static int 201vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 202{ 203 int error, off, c, prot; 204 vm_paddr_t gpa, maxaddr; 205 void *hpa, *cookie; 206 struct vmmdev_softc *sc; 207 208 error = vmm_priv_check(curthread->td_ucred); 209 if (error) 210 return (error); 211 212 sc = vmmdev_lookup2(cdev); 213 if (sc == NULL) 214 return (ENXIO); 215 216 /* 217 * Get a read lock on the guest memory map. 218 */ 219 vm_slock_memsegs(sc->vm); 220 221 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 222 maxaddr = vmm_sysmem_maxaddr(sc->vm); 223 while (uio->uio_resid > 0 && error == 0) { 224 gpa = uio->uio_offset; 225 off = gpa & PAGE_MASK; 226 c = min(uio->uio_resid, PAGE_SIZE - off); 227 228 /* 229 * The VM has a hole in its physical memory map. If we want to 230 * use 'dd' to inspect memory beyond the hole we need to 231 * provide bogus data for memory that lies in the hole. 232 * 233 * Since this device does not support lseek(2), dd(1) will 234 * read(2) blocks of data to simulate the lseek(2). 235 */ 236 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie); 237 if (hpa == NULL) { 238 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 239 error = uiomove(__DECONST(void *, zero_region), 240 c, uio); 241 else 242 error = EFAULT; 243 } else { 244 error = uiomove(hpa, c, uio); 245 vm_gpa_release(cookie); 246 } 247 } 248 vm_unlock_memsegs(sc->vm); 249 return (error); 250} 251 252CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1); 253 254static int 255get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 256{ 257 struct devmem_softc *dsc; 258 int error; 259 bool sysmem; 260 261 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 262 if (error || mseg->len == 0) 263 return (error); 264 265 if (!sysmem) { 266 SLIST_FOREACH(dsc, &sc->devmem, link) { 267 if (dsc->segid == mseg->segid) 268 break; 269 } 270 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 271 __func__, mseg->segid)); 272 error = copystr(dsc->name, mseg->name, len, NULL); 273 } else { 274 bzero(mseg->name, len); 275 } 276 277 return (error); 278} 279 280static int 281alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) 282{ 283 char *name; 284 int error; 285 bool sysmem; 286 287 error = 0; 288 name = NULL; 289 sysmem = true; 290 291 /* 292 * The allocation is lengthened by 1 to hold a terminating NUL. It'll 293 * by stripped off when devfs processes the full string. 294 */ 295 if (VM_MEMSEG_NAME(mseg)) { 296 sysmem = false; 297 name = malloc(len, M_VMMDEV, M_WAITOK); 298 error = copystr(mseg->name, name, len, NULL); 299 if (error) 300 goto done; 301 } 302 303 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 304 if (error) 305 goto done; 306 307 if (VM_MEMSEG_NAME(mseg)) { 308 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 309 if (error) 310 vm_free_memseg(sc->vm, mseg->segid); 311 else 312 name = NULL; /* freed when 'cdev' is destroyed */ 313 } 314done: 315 free(name, M_VMMDEV); 316 return (error); 317} 318 319static int 320vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 321 uint64_t *regval) 322{ 323 int error, i; 324 325 error = 0; 326 for (i = 0; i < count; i++) { 327 error = vm_get_register(vcpu, regnum[i], ®val[i]); 328 if (error) 329 break; 330 } 331 return (error); 332} 333 334static int 335vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, 336 uint64_t *regval) 337{ 338 int error, i; 339 340 error = 0; 341 for (i = 0; i < count; i++) { 342 error = vm_set_register(vcpu, regnum[i], regval[i]); 343 if (error) 344 break; 345 } 346 return (error); 347} 348 349static int 350vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 351 struct thread *td) 352{ 353 int error, vcpuid, size; 354 cpuset_t *cpuset; 355 struct vmmdev_softc *sc; 356 struct vcpu *vcpu; 357 struct vm_register *vmreg; 358 struct vm_register_set *vmregset; 359 struct vm_run *vmrun; 360 struct vm_vgic_version *vgv; 361 struct vm_vgic_descr *vgic; 362 struct vm_cpuset *vm_cpuset; 363 struct vm_irq *vi; 364 struct vm_capability *vmcap; 365 struct vm_stats *vmstats; 366 struct vm_stat_desc *statdesc; 367 struct vm_suspend *vmsuspend; 368 struct vm_exception *vmexc; 369 struct vm_gla2gpa *gg; 370 struct vm_memmap *mm; 371 struct vm_munmap *mu; 372 struct vm_msi *vmsi; 373 struct vm_cpu_topology *topology; 374 uint64_t *regvals; 375 int *regnums; 376 enum { NONE, SINGLE, ALL } vcpus_locked; 377 bool memsegs_locked; 378 379 error = vmm_priv_check(curthread->td_ucred); 380 if (error) 381 return (error); 382 383 sc = vmmdev_lookup2(cdev); 384 if (sc == NULL) 385 return (ENXIO); 386 387 error = 0; 388 vcpuid = -1; 389 vcpu = NULL; 390 vcpus_locked = NONE; 391 memsegs_locked = false; 392 393 /* 394 * Some VMM ioctls can operate only on vcpus that are not running. 395 */ 396 switch (cmd) { 397 case VM_RUN: 398 case VM_GET_REGISTER: 399 case VM_SET_REGISTER: 400 case VM_GET_REGISTER_SET: 401 case VM_SET_REGISTER_SET: 402 case VM_INJECT_EXCEPTION: 403 case VM_GET_CAPABILITY: 404 case VM_SET_CAPABILITY: 405 case VM_GLA2GPA_NOFAULT: 406 case VM_ACTIVATE_CPU: 407 /* 408 * ioctls that can operate only on vcpus that are not running. 409 */ 410 vcpuid = *(int *)data; 411 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 412 if (vcpu == NULL) { 413 error = EINVAL; 414 goto done; 415 } 416 error = vcpu_lock_one(vcpu); 417 if (error) 418 goto done; 419 vcpus_locked = SINGLE; 420 break; 421 422 case VM_ALLOC_MEMSEG: 423 case VM_MMAP_MEMSEG: 424 case VM_MUNMAP_MEMSEG: 425 case VM_REINIT: 426 case VM_ATTACH_VGIC: 427 /* 428 * ioctls that modify the memory map must lock memory 429 * segments exclusively. 430 */ 431 vm_xlock_memsegs(sc->vm); 432 memsegs_locked = true; 433 434 /* 435 * ioctls that operate on the entire virtual machine must 436 * prevent all vcpus from running. 437 */ 438 error = vcpu_lock_all(sc); 439 if (error) 440 goto done; 441 vcpus_locked = ALL; 442 break; 443 case VM_GET_MEMSEG: 444 case VM_MMAP_GETNEXT: 445 /* 446 * Lock the memory map while it is being inspected. 447 */ 448 vm_slock_memsegs(sc->vm); 449 memsegs_locked = true; 450 break; 451 452 case VM_STATS: 453 /* 454 * These do not need the vCPU locked but do operate on 455 * a specific vCPU. 456 */ 457 vcpuid = *(int *)data; 458 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 459 if (vcpu == NULL) { 460 error = EINVAL; 461 goto done; 462 } 463 break; 464 465 case VM_SUSPEND_CPU: 466 case VM_RESUME_CPU: 467 /* 468 * These can either operate on all CPUs via a vcpuid of 469 * -1 or on a specific vCPU. 470 */ 471 vcpuid = *(int *)data; 472 if (vcpuid == -1) 473 break; 474 vcpu = vm_alloc_vcpu(sc->vm, vcpuid); 475 if (vcpu == NULL) { 476 error = EINVAL; 477 goto done; 478 } 479 break; 480 481 case VM_ASSERT_IRQ: 482 vi = (struct vm_irq *)data; 483 error = vm_assert_irq(sc->vm, vi->irq); 484 break; 485 case VM_DEASSERT_IRQ: 486 vi = (struct vm_irq *)data; 487 error = vm_deassert_irq(sc->vm, vi->irq); 488 break; 489 default: 490 break; 491 } 492 493 switch (cmd) { 494 case VM_RUN: { 495 struct vm_exit *vme; 496 497 vmrun = (struct vm_run *)data; 498 vme = vm_exitinfo(vcpu); 499 500 error = vm_run(vcpu); 501 if (error != 0) 502 break; 503 504 error = copyout(vme, vmrun->vm_exit, sizeof(*vme)); 505 if (error != 0) 506 break; 507 break; 508 } 509 case VM_SUSPEND: 510 vmsuspend = (struct vm_suspend *)data; 511 error = vm_suspend(sc->vm, vmsuspend->how); 512 break; 513 case VM_REINIT: 514 error = vm_reinit(sc->vm); 515 break; 516 case VM_STAT_DESC: { 517 statdesc = (struct vm_stat_desc *)data; 518 error = vmm_stat_desc_copy(statdesc->index, 519 statdesc->desc, sizeof(statdesc->desc)); 520 break; 521 } 522 case VM_STATS: { 523 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); 524 vmstats = (struct vm_stats *)data; 525 getmicrotime(&vmstats->tv); 526 error = vmm_stat_copy(vcpu, vmstats->index, 527 nitems(vmstats->statbuf), 528 &vmstats->num_entries, vmstats->statbuf); 529 break; 530 } 531 case VM_MMAP_GETNEXT: 532 mm = (struct vm_memmap *)data; 533 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 534 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 535 break; 536 case VM_MMAP_MEMSEG: 537 mm = (struct vm_memmap *)data; 538 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 539 mm->len, mm->prot, mm->flags); 540 break; 541 case VM_MUNMAP_MEMSEG: 542 mu = (struct vm_munmap *)data; 543 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); 544 break; 545 case VM_ALLOC_MEMSEG: 546 error = alloc_memseg(sc, (struct vm_memseg *)data, 547 sizeof(((struct vm_memseg *)0)->name)); 548 break; 549 case VM_GET_MEMSEG: 550 error = get_memseg(sc, (struct vm_memseg *)data, 551 sizeof(((struct vm_memseg *)0)->name)); 552 break; 553 case VM_GET_REGISTER: 554 vmreg = (struct vm_register *)data; 555 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval); 556 break; 557 case VM_SET_REGISTER: 558 vmreg = (struct vm_register *)data; 559 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval); 560 break; 561 case VM_GET_REGISTER_SET: 562 vmregset = (struct vm_register_set *)data; 563 if (vmregset->count > VM_REG_LAST) { 564 error = EINVAL; 565 break; 566 } 567 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 568 M_WAITOK); 569 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 570 M_WAITOK); 571 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 572 vmregset->count); 573 if (error == 0) 574 error = vm_get_register_set(vcpu, vmregset->count, 575 regnums, regvals); 576 if (error == 0) 577 error = copyout(regvals, vmregset->regvals, 578 sizeof(regvals[0]) * vmregset->count); 579 free(regvals, M_VMMDEV); 580 free(regnums, M_VMMDEV); 581 break; 582 case VM_SET_REGISTER_SET: 583 vmregset = (struct vm_register_set *)data; 584 if (vmregset->count > VM_REG_LAST) { 585 error = EINVAL; 586 break; 587 } 588 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 589 M_WAITOK); 590 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 591 M_WAITOK); 592 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 593 vmregset->count); 594 if (error == 0) 595 error = copyin(vmregset->regvals, regvals, 596 sizeof(regvals[0]) * vmregset->count); 597 if (error == 0) 598 error = vm_set_register_set(vcpu, vmregset->count, 599 regnums, regvals); 600 free(regvals, M_VMMDEV); 601 free(regnums, M_VMMDEV); 602 break; 603 case VM_GET_CAPABILITY: 604 vmcap = (struct vm_capability *)data; 605 error = vm_get_capability(vcpu, 606 vmcap->captype, 607 &vmcap->capval); 608 break; 609 case VM_SET_CAPABILITY: 610 vmcap = (struct vm_capability *)data; 611 error = vm_set_capability(vcpu, 612 vmcap->captype, 613 vmcap->capval); 614 break; 615 case VM_INJECT_EXCEPTION: 616 vmexc = (struct vm_exception *)data; 617 error = vm_inject_exception(vcpu, vmexc->esr, vmexc->far); 618 break; 619 case VM_GLA2GPA_NOFAULT: 620 gg = (struct vm_gla2gpa *)data; 621 error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla, 622 gg->prot, &gg->gpa, &gg->fault); 623 KASSERT(error == 0 || error == EFAULT, 624 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 625 break; 626 case VM_ACTIVATE_CPU: 627 error = vm_activate_cpu(vcpu); 628 break; 629 case VM_GET_CPUS: 630 error = 0; 631 vm_cpuset = (struct vm_cpuset *)data; 632 size = vm_cpuset->cpusetsize; 633 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { 634 error = ERANGE; 635 break; 636 } 637 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 638 if (vm_cpuset->which == VM_ACTIVE_CPUS) 639 *cpuset = vm_active_cpus(sc->vm); 640 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 641 *cpuset = vm_suspended_cpus(sc->vm); 642 else if (vm_cpuset->which == VM_DEBUG_CPUS) 643 *cpuset = vm_debug_cpus(sc->vm); 644 else 645 error = EINVAL; 646 if (error == 0) 647 error = copyout(cpuset, vm_cpuset->cpus, size); 648 free(cpuset, M_TEMP); 649 break; 650 case VM_SUSPEND_CPU: 651 error = vm_suspend_cpu(sc->vm, vcpu); 652 break; 653 case VM_RESUME_CPU: 654 error = vm_resume_cpu(sc->vm, vcpu); 655 break; 656 case VM_GET_VGIC_VERSION: 657 vgv = (struct vm_vgic_version *)data; 658 /* TODO: Query the vgic driver for this */ 659 vgv->version = 3; 660 vgv->flags = 0; 661 error = 0; 662 break; 663 case VM_ATTACH_VGIC: 664 vgic = (struct vm_vgic_descr *)data; 665 error = vm_attach_vgic(sc->vm, vgic); 666 break; 667 case VM_RAISE_MSI: 668 vmsi = (struct vm_msi *)data; 669 error = vm_raise_msi(sc->vm, vmsi->msg, vmsi->addr, vmsi->bus, 670 vmsi->slot, vmsi->func); 671 break; 672 case VM_SET_TOPOLOGY: 673 topology = (struct vm_cpu_topology *)data; 674 error = vm_set_topology(sc->vm, topology->sockets, 675 topology->cores, topology->threads, topology->maxcpus); 676 break; 677 case VM_GET_TOPOLOGY: 678 topology = (struct vm_cpu_topology *)data; 679 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 680 &topology->threads, &topology->maxcpus); 681 error = 0; 682 break; 683 default: 684 error = ENOTTY; 685 break; 686 } 687 688done: 689 if (vcpus_locked == SINGLE) 690 vcpu_unlock_one(vcpu); 691 else if (vcpus_locked == ALL) 692 vcpu_unlock_all(sc); 693 if (memsegs_locked) 694 vm_unlock_memsegs(sc->vm); 695 696 /* 697 * Make sure that no handler returns a kernel-internal 698 * error value to userspace. 699 */ 700 KASSERT(error == ERESTART || error >= 0, 701 ("vmmdev_ioctl: invalid error return %d", error)); 702 return (error); 703} 704 705static int 706vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 707 struct vm_object **objp, int nprot) 708{ 709 struct vmmdev_softc *sc; 710 vm_paddr_t gpa; 711 size_t len; 712 vm_ooffset_t segoff, first, last; 713 int error, found, segid; 714 bool sysmem; 715 716 error = vmm_priv_check(curthread->td_ucred); 717 if (error) 718 return (error); 719 720 first = *offset; 721 last = first + mapsize; 722 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 723 return (EINVAL); 724 725 sc = vmmdev_lookup2(cdev); 726 if (sc == NULL) { 727 /* virtual machine is in the process of being created */ 728 return (EINVAL); 729 } 730 731 /* 732 * Get a read lock on the guest memory map. 733 */ 734 vm_slock_memsegs(sc->vm); 735 736 gpa = 0; 737 found = 0; 738 while (!found) { 739 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 740 NULL, NULL); 741 if (error) 742 break; 743 744 if (first >= gpa && last <= gpa + len) 745 found = 1; 746 else 747 gpa += len; 748 } 749 750 if (found) { 751 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 752 KASSERT(error == 0 && *objp != NULL, 753 ("%s: invalid memory segment %d", __func__, segid)); 754 if (sysmem) { 755 vm_object_reference(*objp); 756 *offset = segoff + (first - gpa); 757 } else { 758 error = EINVAL; 759 } 760 } 761 vm_unlock_memsegs(sc->vm); 762 return (error); 763} 764 765static void 766vmmdev_destroy(void *arg) 767{ 768 struct vmmdev_softc *sc = arg; 769 struct devmem_softc *dsc; 770 int error __diagused; 771 772 error = vcpu_lock_all(sc); 773 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 774 vm_unlock_vcpus(sc->vm); 775 776 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 777 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 778 SLIST_REMOVE_HEAD(&sc->devmem, link); 779 free(dsc->name, M_VMMDEV); 780 free(dsc, M_VMMDEV); 781 } 782 783 if (sc->cdev != NULL) 784 destroy_dev(sc->cdev); 785 786 if (sc->vm != NULL) 787 vm_destroy(sc->vm); 788 789 if (sc->ucred != NULL) 790 crfree(sc->ucred); 791 792 if ((sc->flags & VSC_LINKED) != 0) { 793 mtx_lock(&vmmdev_mtx); 794 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 795 mtx_unlock(&vmmdev_mtx); 796 } 797 798 free(sc, M_VMMDEV); 799} 800 801static int 802sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 803{ 804 struct devmem_softc *dsc; 805 struct vmmdev_softc *sc; 806 struct cdev *cdev; 807 char *buf; 808 int error, buflen; 809 810 error = vmm_priv_check(req->td->td_ucred); 811 if (error) 812 return (error); 813 814 buflen = VM_MAX_NAMELEN + 1; 815 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 816 strlcpy(buf, "beavis", buflen); 817 error = sysctl_handle_string(oidp, buf, buflen, req); 818 if (error != 0 || req->newptr == NULL) 819 goto out; 820 821 mtx_lock(&vmmdev_mtx); 822 sc = vmmdev_lookup(buf); 823 if (sc == NULL || sc->cdev == NULL) { 824 mtx_unlock(&vmmdev_mtx); 825 error = EINVAL; 826 goto out; 827 } 828 829 /* 830 * Setting 'sc->cdev' to NULL is used to indicate that the VM 831 * is scheduled for destruction. 832 */ 833 cdev = sc->cdev; 834 sc->cdev = NULL; 835 mtx_unlock(&vmmdev_mtx); 836 837 /* 838 * Destroy all cdevs: 839 * 840 * - any new operations on the 'cdev' will return an error (ENXIO). 841 * 842 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 843 */ 844 SLIST_FOREACH(dsc, &sc->devmem, link) { 845 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 846 destroy_dev(dsc->cdev); 847 devmem_destroy(dsc); 848 } 849 destroy_dev(cdev); 850 vmmdev_destroy(sc); 851 error = 0; 852 853out: 854 free(buf, M_VMMDEV); 855 return (error); 856} 857SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 858 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 859 NULL, 0, sysctl_vmm_destroy, "A", 860 NULL); 861 862static struct cdevsw vmmdevsw = { 863 .d_name = "vmmdev", 864 .d_version = D_VERSION, 865 .d_ioctl = vmmdev_ioctl, 866 .d_mmap_single = vmmdev_mmap_single, 867 .d_read = vmmdev_rw, 868 .d_write = vmmdev_rw, 869}; 870 871static int 872sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 873{ 874 struct vm *vm; 875 struct cdev *cdev; 876 struct vmmdev_softc *sc, *sc2; 877 char *buf; 878 int error, buflen; 879 880 error = vmm_priv_check(req->td->td_ucred); 881 if (error) 882 return (error); 883 884 buflen = VM_MAX_NAMELEN + 1; 885 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); 886 strlcpy(buf, "beavis", buflen); 887 error = sysctl_handle_string(oidp, buf, buflen, req); 888 if (error != 0 || req->newptr == NULL) 889 goto out; 890 891 mtx_lock(&vmmdev_mtx); 892 sc = vmmdev_lookup(buf); 893 mtx_unlock(&vmmdev_mtx); 894 if (sc != NULL) { 895 error = EEXIST; 896 goto out; 897 } 898 899 error = vm_create(buf, &vm); 900 if (error != 0) 901 goto out; 902 903 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 904 sc->ucred = crhold(curthread->td_ucred); 905 sc->vm = vm; 906 SLIST_INIT(&sc->devmem); 907 908 /* 909 * Lookup the name again just in case somebody sneaked in when we 910 * dropped the lock. 911 */ 912 mtx_lock(&vmmdev_mtx); 913 sc2 = vmmdev_lookup(buf); 914 if (sc2 == NULL) { 915 SLIST_INSERT_HEAD(&head, sc, link); 916 sc->flags |= VSC_LINKED; 917 } 918 mtx_unlock(&vmmdev_mtx); 919 920 if (sc2 != NULL) { 921 vmmdev_destroy(sc); 922 error = EEXIST; 923 goto out; 924 } 925 926 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred, 927 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 928 if (error != 0) { 929 vmmdev_destroy(sc); 930 goto out; 931 } 932 933 mtx_lock(&vmmdev_mtx); 934 sc->cdev = cdev; 935 sc->cdev->si_drv1 = sc; 936 mtx_unlock(&vmmdev_mtx); 937 938out: 939 free(buf, M_VMMDEV); 940 return (error); 941} 942SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 943 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 944 NULL, 0, sysctl_vmm_create, "A", 945 NULL); 946 947void 948vmmdev_init(void) 949{ 950 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 951 "Allow use of vmm in a jail."); 952} 953 954int 955vmmdev_cleanup(void) 956{ 957 int error; 958 959 if (SLIST_EMPTY(&head)) 960 error = 0; 961 else 962 error = EBUSY; 963 964 return (error); 965} 966 967static int 968devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 969 struct vm_object **objp, int nprot) 970{ 971 struct devmem_softc *dsc; 972 vm_ooffset_t first, last; 973 size_t seglen; 974 int error; 975 bool sysmem; 976 977 dsc = cdev->si_drv1; 978 if (dsc == NULL) { 979 /* 'cdev' has been created but is not ready for use */ 980 return (ENXIO); 981 } 982 983 first = *offset; 984 last = *offset + len; 985 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 986 return (EINVAL); 987 988 vm_slock_memsegs(dsc->sc->vm); 989 990 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 991 KASSERT(error == 0 && !sysmem && *objp != NULL, 992 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 993 994 if (seglen >= last) 995 vm_object_reference(*objp); 996 else 997 error = EINVAL; 998 999 vm_unlock_memsegs(dsc->sc->vm); 1000 return (error); 1001} 1002 1003static struct cdevsw devmemsw = { 1004 .d_name = "devmem", 1005 .d_version = D_VERSION, 1006 .d_mmap_single = devmem_mmap_single, 1007}; 1008 1009static int 1010devmem_create_cdev(const char *vmname, int segid, char *devname) 1011{ 1012 struct devmem_softc *dsc; 1013 struct vmmdev_softc *sc; 1014 struct cdev *cdev; 1015 int error; 1016 1017 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 1018 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1019 if (error) 1020 return (error); 1021 1022 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1023 1024 mtx_lock(&vmmdev_mtx); 1025 sc = vmmdev_lookup(vmname); 1026 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 1027 if (sc->cdev == NULL) { 1028 /* virtual machine is being created or destroyed */ 1029 mtx_unlock(&vmmdev_mtx); 1030 free(dsc, M_VMMDEV); 1031 destroy_dev_sched_cb(cdev, NULL, 0); 1032 return (ENODEV); 1033 } 1034 1035 dsc->segid = segid; 1036 dsc->name = devname; 1037 dsc->cdev = cdev; 1038 dsc->sc = sc; 1039 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1040 mtx_unlock(&vmmdev_mtx); 1041 1042 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1043 cdev->si_drv1 = dsc; 1044 return (0); 1045} 1046 1047static void 1048devmem_destroy(void *arg) 1049{ 1050 struct devmem_softc *dsc = arg; 1051 1052 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1053 dsc->cdev = NULL; 1054 dsc->sc = NULL; 1055} 1056