1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD$"); 33 34#include <sys/param.h> 35#include <sys/kernel.h> 36#include <sys/jail.h> 37#include <sys/queue.h> 38#include <sys/lock.h> 39#include <sys/mutex.h> 40#include <sys/malloc.h> 41#include <sys/conf.h> 42#include <sys/sysctl.h> 43#include <sys/libkern.h> 44#include <sys/ioccom.h> 45#include <sys/mman.h> 46#include <sys/uio.h> 47#include <sys/proc.h> 48 49#include <vm/vm.h> 50#include <vm/pmap.h> 51#include <vm/vm_map.h> 52#include <vm/vm_object.h> 53 54#include <machine/vmparam.h> 55#include <machine/vmm.h> 56#include <machine/vmm_instruction_emul.h> 57#include <machine/vmm_dev.h> 58 59#include "vmm_lapic.h" 60#include "vmm_stat.h" 61#include "vmm_mem.h" 62#include "io/ppt.h" 63#include "io/vatpic.h" 64#include "io/vioapic.h" 65#include "io/vhpet.h" 66#include "io/vrtc.h" 67 68struct devmem_softc { 69 int segid; 70 char *name; 71 struct cdev *cdev; 72 struct vmmdev_softc *sc; 73 SLIST_ENTRY(devmem_softc) link; 74}; 75 76struct vmmdev_softc { 77 struct vm *vm; /* vm instance cookie */ 78 struct cdev *cdev; 79 SLIST_ENTRY(vmmdev_softc) link; 80 SLIST_HEAD(, devmem_softc) devmem; 81 int flags; 82}; 83#define VSC_LINKED 0x01 84 85static SLIST_HEAD(, vmmdev_softc) head; 86 87static unsigned pr_allow_flag; 88static struct mtx vmmdev_mtx; 89 90static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 91 92SYSCTL_DECL(_hw_vmm); 93 94static int vmm_priv_check(struct ucred *ucred); 95static int devmem_create_cdev(const char *vmname, int id, char *devmem); 96static void devmem_destroy(void *arg); 97 98static int 99vmm_priv_check(struct ucred *ucred) 100{ 101 102 if (jailed(ucred) && 103 !(ucred->cr_prison->pr_allow & pr_allow_flag)) 104 return (EPERM); 105 106 return (0); 107} 108 109static int 110vcpu_lock_one(struct vmmdev_softc *sc, int vcpu) 111{ 112 int error; 113 114 if (vcpu < 0 || vcpu >= vm_get_maxcpus(sc->vm)) 115 return (EINVAL); 116 117 error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); 118 return (error); 119} 120 121static void 122vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu) 123{ 124 enum vcpu_state state; 125 126 state = vcpu_get_state(sc->vm, vcpu, NULL); 127 if (state != VCPU_FROZEN) { 128 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), 129 vcpu, state); 130 } 131 132 vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); 133} 134 135static int 136vcpu_lock_all(struct vmmdev_softc *sc) 137{ 138 int error, vcpu; 139 uint16_t maxcpus; 140 141 maxcpus = vm_get_maxcpus(sc->vm); 142 for (vcpu = 0; vcpu < maxcpus; vcpu++) { 143 error = vcpu_lock_one(sc, vcpu); 144 if (error) 145 break; 146 } 147 148 if (error) { 149 while (--vcpu >= 0) 150 vcpu_unlock_one(sc, vcpu); 151 } 152 153 return (error); 154} 155 156static void 157vcpu_unlock_all(struct vmmdev_softc *sc) 158{ 159 int vcpu; 160 uint16_t maxcpus; 161 162 maxcpus = vm_get_maxcpus(sc->vm); 163 for (vcpu = 0; vcpu < maxcpus; vcpu++) 164 vcpu_unlock_one(sc, vcpu); 165} 166 167static struct vmmdev_softc * 168vmmdev_lookup(const char *name) 169{ 170 struct vmmdev_softc *sc; 171 172#ifdef notyet /* XXX kernel is not compiled with invariants */ 173 mtx_assert(&vmmdev_mtx, MA_OWNED); 174#endif 175 176 SLIST_FOREACH(sc, &head, link) { 177 if (strcmp(name, vm_name(sc->vm)) == 0) 178 break; 179 } 180 181 return (sc); 182} 183 184static struct vmmdev_softc * 185vmmdev_lookup2(struct cdev *cdev) 186{ 187 188 return (cdev->si_drv1); 189} 190 191static int 192vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 193{ 194 int error, off, c, prot; 195 vm_paddr_t gpa, maxaddr; 196 void *hpa, *cookie; 197 struct vmmdev_softc *sc; 198 uint16_t lastcpu; 199 200 error = vmm_priv_check(curthread->td_ucred); 201 if (error) 202 return (error); 203 204 sc = vmmdev_lookup2(cdev); 205 if (sc == NULL) 206 return (ENXIO); 207 208 /* 209 * Get a read lock on the guest memory map by freezing any vcpu. 210 */ 211 lastcpu = vm_get_maxcpus(sc->vm) - 1; 212 error = vcpu_lock_one(sc, lastcpu); 213 if (error) 214 return (error); 215 216 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 217 maxaddr = vmm_sysmem_maxaddr(sc->vm); 218 while (uio->uio_resid > 0 && error == 0) { 219 gpa = uio->uio_offset; 220 off = gpa & PAGE_MASK; 221 c = min(uio->uio_resid, PAGE_SIZE - off); 222 223 /* 224 * The VM has a hole in its physical memory map. If we want to 225 * use 'dd' to inspect memory beyond the hole we need to 226 * provide bogus data for memory that lies in the hole. 227 * 228 * Since this device does not support lseek(2), dd(1) will 229 * read(2) blocks of data to simulate the lseek(2). 230 */ 231 hpa = vm_gpa_hold(sc->vm, lastcpu, gpa, c, 232 prot, &cookie); 233 if (hpa == NULL) { 234 if (uio->uio_rw == UIO_READ && gpa < maxaddr) 235 error = uiomove(__DECONST(void *, zero_region), 236 c, uio); 237 else 238 error = EFAULT; 239 } else { 240 error = uiomove(hpa, c, uio); 241 vm_gpa_release(cookie); 242 } 243 } 244 vcpu_unlock_one(sc, lastcpu); 245 return (error); 246} 247 248CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1); 249 250static int 251get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 252{ 253 struct devmem_softc *dsc; 254 int error; 255 bool sysmem; 256 257 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 258 if (error || mseg->len == 0) 259 return (error); 260 261 if (!sysmem) { 262 SLIST_FOREACH(dsc, &sc->devmem, link) { 263 if (dsc->segid == mseg->segid) 264 break; 265 } 266 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 267 __func__, mseg->segid)); 268 error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL); 269 } else { 270 bzero(mseg->name, sizeof(mseg->name)); 271 } 272 273 return (error); 274} 275 276static int 277alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 278{ 279 char *name; 280 int error; 281 bool sysmem; 282 283 error = 0; 284 name = NULL; 285 sysmem = true; 286 287 if (VM_MEMSEG_NAME(mseg)) { 288 sysmem = false; 289 name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK); 290 error = copystr(mseg->name, name, SPECNAMELEN + 1, 0); 291 if (error) 292 goto done; 293 } 294 295 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 296 if (error) 297 goto done; 298 299 if (VM_MEMSEG_NAME(mseg)) { 300 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 301 if (error) 302 vm_free_memseg(sc->vm, mseg->segid); 303 else 304 name = NULL; /* freed when 'cdev' is destroyed */ 305 } 306done: 307 free(name, M_VMMDEV); 308 return (error); 309} 310 311static int 312vm_get_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum, 313 uint64_t *regval) 314{ 315 int error, i; 316 317 error = 0; 318 for (i = 0; i < count; i++) { 319 error = vm_get_register(vm, vcpu, regnum[i], ®val[i]); 320 if (error) 321 break; 322 } 323 return (error); 324} 325 326static int 327vm_set_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum, 328 uint64_t *regval) 329{ 330 int error, i; 331 332 error = 0; 333 for (i = 0; i < count; i++) { 334 error = vm_set_register(vm, vcpu, regnum[i], regval[i]); 335 if (error) 336 break; 337 } 338 return (error); 339} 340 341static int 342vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 343 struct thread *td) 344{ 345 int error, vcpu, state_changed, size; 346 cpuset_t *cpuset; 347 struct vmmdev_softc *sc; 348 struct vm_register *vmreg; 349 struct vm_seg_desc *vmsegdesc; 350 struct vm_register_set *vmregset; 351 struct vm_run *vmrun; 352 struct vm_exception *vmexc; 353 struct vm_lapic_irq *vmirq; 354 struct vm_lapic_msi *vmmsi; 355 struct vm_ioapic_irq *ioapic_irq; 356 struct vm_isa_irq *isa_irq; 357 struct vm_isa_irq_trigger *isa_irq_trigger; 358 struct vm_capability *vmcap; 359 struct vm_pptdev *pptdev; 360 struct vm_pptdev_mmio *pptmmio; 361 struct vm_pptdev_msi *pptmsi; 362 struct vm_pptdev_msix *pptmsix; 363 struct vm_nmi *vmnmi; 364 struct vm_stats *vmstats; 365 struct vm_stat_desc *statdesc; 366 struct vm_x2apic *x2apic; 367 struct vm_gpa_pte *gpapte; 368 struct vm_suspend *vmsuspend; 369 struct vm_gla2gpa *gg; 370 struct vm_activate_cpu *vac; 371 struct vm_cpuset *vm_cpuset; 372 struct vm_intinfo *vmii; 373 struct vm_rtc_time *rtctime; 374 struct vm_rtc_data *rtcdata; 375 struct vm_memmap *mm; 376 struct vm_cpu_topology *topology; 377 uint64_t *regvals; 378 int *regnums; 379 380 error = vmm_priv_check(curthread->td_ucred); 381 if (error) 382 return (error); 383 384 sc = vmmdev_lookup2(cdev); 385 if (sc == NULL) 386 return (ENXIO); 387 388 vcpu = -1; 389 state_changed = 0; 390 391 /* 392 * Some VMM ioctls can operate only on vcpus that are not running. 393 */ 394 switch (cmd) { 395 case VM_RUN: 396 case VM_GET_REGISTER: 397 case VM_SET_REGISTER: 398 case VM_GET_SEGMENT_DESCRIPTOR: 399 case VM_SET_SEGMENT_DESCRIPTOR: 400 case VM_GET_REGISTER_SET: 401 case VM_SET_REGISTER_SET: 402 case VM_INJECT_EXCEPTION: 403 case VM_GET_CAPABILITY: 404 case VM_SET_CAPABILITY: 405 case VM_PPTDEV_MSI: 406 case VM_PPTDEV_MSIX: 407 case VM_SET_X2APIC_STATE: 408 case VM_GLA2GPA: 409 case VM_GLA2GPA_NOFAULT: 410 case VM_ACTIVATE_CPU: 411 case VM_SET_INTINFO: 412 case VM_GET_INTINFO: 413 case VM_RESTART_INSTRUCTION: 414 /* 415 * XXX fragile, handle with care 416 * Assumes that the first field of the ioctl data is the vcpu. 417 */ 418 vcpu = *(int *)data; 419 error = vcpu_lock_one(sc, vcpu); 420 if (error) 421 goto done; 422 state_changed = 1; 423 break; 424 425 case VM_MAP_PPTDEV_MMIO: 426 case VM_BIND_PPTDEV: 427 case VM_UNBIND_PPTDEV: 428 case VM_ALLOC_MEMSEG: 429 case VM_MMAP_MEMSEG: 430 case VM_REINIT: 431 /* 432 * ioctls that operate on the entire virtual machine must 433 * prevent all vcpus from running. 434 */ 435 error = vcpu_lock_all(sc); 436 if (error) 437 goto done; 438 state_changed = 2; 439 break; 440 441 case VM_GET_MEMSEG: 442 case VM_MMAP_GETNEXT: 443 /* 444 * Lock a vcpu to make sure that the memory map cannot be 445 * modified while it is being inspected. 446 */ 447 vcpu = vm_get_maxcpus(sc->vm) - 1; 448 error = vcpu_lock_one(sc, vcpu); 449 if (error) 450 goto done; 451 state_changed = 1; 452 break; 453 454 default: 455 break; 456 } 457 458 switch(cmd) { 459 case VM_RUN: 460 vmrun = (struct vm_run *)data; 461 error = vm_run(sc->vm, vmrun); 462 break; 463 case VM_SUSPEND: 464 vmsuspend = (struct vm_suspend *)data; 465 error = vm_suspend(sc->vm, vmsuspend->how); 466 break; 467 case VM_REINIT: 468 error = vm_reinit(sc->vm); 469 break; 470 case VM_STAT_DESC: { 471 statdesc = (struct vm_stat_desc *)data; 472 error = vmm_stat_desc_copy(statdesc->index, 473 statdesc->desc, sizeof(statdesc->desc)); 474 break; 475 } 476 case VM_STATS: { 477 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); 478 vmstats = (struct vm_stats *)data; 479 getmicrotime(&vmstats->tv); 480 error = vmm_stat_copy(sc->vm, vmstats->cpuid, 481 &vmstats->num_entries, vmstats->statbuf); 482 break; 483 } 484 case VM_PPTDEV_MSI: 485 pptmsi = (struct vm_pptdev_msi *)data; 486 error = ppt_setup_msi(sc->vm, pptmsi->vcpu, 487 pptmsi->bus, pptmsi->slot, pptmsi->func, 488 pptmsi->addr, pptmsi->msg, 489 pptmsi->numvec); 490 break; 491 case VM_PPTDEV_MSIX: 492 pptmsix = (struct vm_pptdev_msix *)data; 493 error = ppt_setup_msix(sc->vm, pptmsix->vcpu, 494 pptmsix->bus, pptmsix->slot, 495 pptmsix->func, pptmsix->idx, 496 pptmsix->addr, pptmsix->msg, 497 pptmsix->vector_control); 498 break; 499 case VM_PPTDEV_DISABLE_MSIX: 500 pptdev = (struct vm_pptdev *)data; 501 error = ppt_disable_msix(sc->vm, pptdev->bus, pptdev->slot, 502 pptdev->func); 503 break; 504 case VM_MAP_PPTDEV_MMIO: 505 pptmmio = (struct vm_pptdev_mmio *)data; 506 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 507 pptmmio->func, pptmmio->gpa, pptmmio->len, 508 pptmmio->hpa); 509 break; 510 case VM_BIND_PPTDEV: 511 pptdev = (struct vm_pptdev *)data; 512 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 513 pptdev->func); 514 break; 515 case VM_UNBIND_PPTDEV: 516 pptdev = (struct vm_pptdev *)data; 517 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 518 pptdev->func); 519 break; 520 case VM_INJECT_EXCEPTION: 521 vmexc = (struct vm_exception *)data; 522 error = vm_inject_exception(sc->vm, vmexc->cpuid, 523 vmexc->vector, vmexc->error_code_valid, vmexc->error_code, 524 vmexc->restart_instruction); 525 break; 526 case VM_INJECT_NMI: 527 vmnmi = (struct vm_nmi *)data; 528 error = vm_inject_nmi(sc->vm, vmnmi->cpuid); 529 break; 530 case VM_LAPIC_IRQ: 531 vmirq = (struct vm_lapic_irq *)data; 532 error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); 533 break; 534 case VM_LAPIC_LOCAL_IRQ: 535 vmirq = (struct vm_lapic_irq *)data; 536 error = lapic_set_local_intr(sc->vm, vmirq->cpuid, 537 vmirq->vector); 538 break; 539 case VM_LAPIC_MSI: 540 vmmsi = (struct vm_lapic_msi *)data; 541 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); 542 break; 543 case VM_IOAPIC_ASSERT_IRQ: 544 ioapic_irq = (struct vm_ioapic_irq *)data; 545 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); 546 break; 547 case VM_IOAPIC_DEASSERT_IRQ: 548 ioapic_irq = (struct vm_ioapic_irq *)data; 549 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); 550 break; 551 case VM_IOAPIC_PULSE_IRQ: 552 ioapic_irq = (struct vm_ioapic_irq *)data; 553 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); 554 break; 555 case VM_IOAPIC_PINCOUNT: 556 *(int *)data = vioapic_pincount(sc->vm); 557 break; 558 case VM_ISA_ASSERT_IRQ: 559 isa_irq = (struct vm_isa_irq *)data; 560 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); 561 if (error == 0 && isa_irq->ioapic_irq != -1) 562 error = vioapic_assert_irq(sc->vm, 563 isa_irq->ioapic_irq); 564 break; 565 case VM_ISA_DEASSERT_IRQ: 566 isa_irq = (struct vm_isa_irq *)data; 567 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); 568 if (error == 0 && isa_irq->ioapic_irq != -1) 569 error = vioapic_deassert_irq(sc->vm, 570 isa_irq->ioapic_irq); 571 break; 572 case VM_ISA_PULSE_IRQ: 573 isa_irq = (struct vm_isa_irq *)data; 574 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); 575 if (error == 0 && isa_irq->ioapic_irq != -1) 576 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); 577 break; 578 case VM_ISA_SET_IRQ_TRIGGER: 579 isa_irq_trigger = (struct vm_isa_irq_trigger *)data; 580 error = vatpic_set_irq_trigger(sc->vm, 581 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); 582 break; 583 case VM_MMAP_GETNEXT: 584 mm = (struct vm_memmap *)data; 585 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 586 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 587 break; 588 case VM_MMAP_MEMSEG: 589 mm = (struct vm_memmap *)data; 590 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 591 mm->len, mm->prot, mm->flags); 592 break; 593 case VM_ALLOC_MEMSEG: 594 error = alloc_memseg(sc, (struct vm_memseg *)data); 595 break; 596 case VM_GET_MEMSEG: 597 error = get_memseg(sc, (struct vm_memseg *)data); 598 break; 599 case VM_GET_REGISTER: 600 vmreg = (struct vm_register *)data; 601 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, 602 &vmreg->regval); 603 break; 604 case VM_SET_REGISTER: 605 vmreg = (struct vm_register *)data; 606 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, 607 vmreg->regval); 608 break; 609 case VM_SET_SEGMENT_DESCRIPTOR: 610 vmsegdesc = (struct vm_seg_desc *)data; 611 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, 612 vmsegdesc->regnum, 613 &vmsegdesc->desc); 614 break; 615 case VM_GET_SEGMENT_DESCRIPTOR: 616 vmsegdesc = (struct vm_seg_desc *)data; 617 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, 618 vmsegdesc->regnum, 619 &vmsegdesc->desc); 620 break; 621 case VM_GET_REGISTER_SET: 622 vmregset = (struct vm_register_set *)data; 623 if (vmregset->count > VM_REG_LAST) { 624 error = EINVAL; 625 break; 626 } 627 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 628 M_WAITOK); 629 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 630 M_WAITOK); 631 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 632 vmregset->count); 633 if (error == 0) 634 error = vm_get_register_set(sc->vm, vmregset->cpuid, 635 vmregset->count, regnums, regvals); 636 if (error == 0) 637 error = copyout(regvals, vmregset->regvals, 638 sizeof(regvals[0]) * vmregset->count); 639 free(regvals, M_VMMDEV); 640 free(regnums, M_VMMDEV); 641 break; 642 case VM_SET_REGISTER_SET: 643 vmregset = (struct vm_register_set *)data; 644 if (vmregset->count > VM_REG_LAST) { 645 error = EINVAL; 646 break; 647 } 648 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV, 649 M_WAITOK); 650 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV, 651 M_WAITOK); 652 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) * 653 vmregset->count); 654 if (error == 0) 655 error = copyin(vmregset->regvals, regvals, 656 sizeof(regvals[0]) * vmregset->count); 657 if (error == 0) 658 error = vm_set_register_set(sc->vm, vmregset->cpuid, 659 vmregset->count, regnums, regvals); 660 free(regvals, M_VMMDEV); 661 free(regnums, M_VMMDEV); 662 break; 663 case VM_GET_CAPABILITY: 664 vmcap = (struct vm_capability *)data; 665 error = vm_get_capability(sc->vm, vmcap->cpuid, 666 vmcap->captype, 667 &vmcap->capval); 668 break; 669 case VM_SET_CAPABILITY: 670 vmcap = (struct vm_capability *)data; 671 error = vm_set_capability(sc->vm, vmcap->cpuid, 672 vmcap->captype, 673 vmcap->capval); 674 break; 675 case VM_SET_X2APIC_STATE: 676 x2apic = (struct vm_x2apic *)data; 677 error = vm_set_x2apic_state(sc->vm, 678 x2apic->cpuid, x2apic->state); 679 break; 680 case VM_GET_X2APIC_STATE: 681 x2apic = (struct vm_x2apic *)data; 682 error = vm_get_x2apic_state(sc->vm, 683 x2apic->cpuid, &x2apic->state); 684 break; 685 case VM_GET_GPA_PMAP: 686 gpapte = (struct vm_gpa_pte *)data; 687 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), 688 gpapte->gpa, gpapte->pte, &gpapte->ptenum); 689 error = 0; 690 break; 691 case VM_GET_HPET_CAPABILITIES: 692 error = vhpet_getcap((struct vm_hpet_cap *)data); 693 break; 694 case VM_GLA2GPA: { 695 CTASSERT(PROT_READ == VM_PROT_READ); 696 CTASSERT(PROT_WRITE == VM_PROT_WRITE); 697 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); 698 gg = (struct vm_gla2gpa *)data; 699 error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, 700 gg->prot, &gg->gpa, &gg->fault); 701 KASSERT(error == 0 || error == EFAULT, 702 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 703 break; 704 } 705 case VM_GLA2GPA_NOFAULT: 706 gg = (struct vm_gla2gpa *)data; 707 error = vm_gla2gpa_nofault(sc->vm, gg->vcpuid, &gg->paging, 708 gg->gla, gg->prot, &gg->gpa, &gg->fault); 709 KASSERT(error == 0 || error == EFAULT, 710 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 711 break; 712 case VM_ACTIVATE_CPU: 713 vac = (struct vm_activate_cpu *)data; 714 error = vm_activate_cpu(sc->vm, vac->vcpuid); 715 break; 716 case VM_GET_CPUS: 717 error = 0; 718 vm_cpuset = (struct vm_cpuset *)data; 719 size = vm_cpuset->cpusetsize; 720 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { 721 error = ERANGE; 722 break; 723 } 724 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 725 if (vm_cpuset->which == VM_ACTIVE_CPUS) 726 *cpuset = vm_active_cpus(sc->vm); 727 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 728 *cpuset = vm_suspended_cpus(sc->vm); 729 else if (vm_cpuset->which == VM_DEBUG_CPUS) 730 *cpuset = vm_debug_cpus(sc->vm); 731 else 732 error = EINVAL; 733 if (error == 0) 734 error = copyout(cpuset, vm_cpuset->cpus, size); 735 free(cpuset, M_TEMP); 736 break; 737 case VM_SUSPEND_CPU: 738 vac = (struct vm_activate_cpu *)data; 739 error = vm_suspend_cpu(sc->vm, vac->vcpuid); 740 break; 741 case VM_RESUME_CPU: 742 vac = (struct vm_activate_cpu *)data; 743 error = vm_resume_cpu(sc->vm, vac->vcpuid); 744 break; 745 case VM_SET_INTINFO: 746 vmii = (struct vm_intinfo *)data; 747 error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); 748 break; 749 case VM_GET_INTINFO: 750 vmii = (struct vm_intinfo *)data; 751 error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, 752 &vmii->info2); 753 break; 754 case VM_RTC_WRITE: 755 rtcdata = (struct vm_rtc_data *)data; 756 error = vrtc_nvram_write(sc->vm, rtcdata->offset, 757 rtcdata->value); 758 break; 759 case VM_RTC_READ: 760 rtcdata = (struct vm_rtc_data *)data; 761 error = vrtc_nvram_read(sc->vm, rtcdata->offset, 762 &rtcdata->value); 763 break; 764 case VM_RTC_SETTIME: 765 rtctime = (struct vm_rtc_time *)data; 766 error = vrtc_set_time(sc->vm, rtctime->secs); 767 break; 768 case VM_RTC_GETTIME: 769 error = 0; 770 rtctime = (struct vm_rtc_time *)data; 771 rtctime->secs = vrtc_get_time(sc->vm); 772 break; 773 case VM_RESTART_INSTRUCTION: 774 error = vm_restart_instruction(sc->vm, vcpu); 775 break; 776 case VM_SET_TOPOLOGY: 777 topology = (struct vm_cpu_topology *)data; 778 error = vm_set_topology(sc->vm, topology->sockets, 779 topology->cores, topology->threads, topology->maxcpus); 780 break; 781 case VM_GET_TOPOLOGY: 782 topology = (struct vm_cpu_topology *)data; 783 vm_get_topology(sc->vm, &topology->sockets, &topology->cores, 784 &topology->threads, &topology->maxcpus); 785 error = 0; 786 break; 787 default: 788 error = ENOTTY; 789 break; 790 } 791 792 if (state_changed == 1) 793 vcpu_unlock_one(sc, vcpu); 794 else if (state_changed == 2) 795 vcpu_unlock_all(sc); 796 797done: 798 /* 799 * Make sure that no handler returns a kernel-internal 800 * error value to userspace. 801 */ 802 KASSERT(error == ERESTART || error >= 0, 803 ("vmmdev_ioctl: invalid error return %d", error)); 804 return (error); 805} 806 807static int 808vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 809 struct vm_object **objp, int nprot) 810{ 811 struct vmmdev_softc *sc; 812 vm_paddr_t gpa; 813 size_t len; 814 vm_ooffset_t segoff, first, last; 815 int error, found, segid; 816 uint16_t lastcpu; 817 bool sysmem; 818 819 error = vmm_priv_check(curthread->td_ucred); 820 if (error) 821 return (error); 822 823 first = *offset; 824 last = first + mapsize; 825 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 826 return (EINVAL); 827 828 sc = vmmdev_lookup2(cdev); 829 if (sc == NULL) { 830 /* virtual machine is in the process of being created */ 831 return (EINVAL); 832 } 833 834 /* 835 * Get a read lock on the guest memory map by freezing any vcpu. 836 */ 837 lastcpu = vm_get_maxcpus(sc->vm) - 1; 838 error = vcpu_lock_one(sc, lastcpu); 839 if (error) 840 return (error); 841 842 gpa = 0; 843 found = 0; 844 while (!found) { 845 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 846 NULL, NULL); 847 if (error) 848 break; 849 850 if (first >= gpa && last <= gpa + len) 851 found = 1; 852 else 853 gpa += len; 854 } 855 856 if (found) { 857 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 858 KASSERT(error == 0 && *objp != NULL, 859 ("%s: invalid memory segment %d", __func__, segid)); 860 if (sysmem) { 861 vm_object_reference(*objp); 862 *offset = segoff + (first - gpa); 863 } else { 864 error = EINVAL; 865 } 866 } 867 vcpu_unlock_one(sc, lastcpu); 868 return (error); 869} 870 871static void 872vmmdev_destroy(void *arg) 873{ 874 struct vmmdev_softc *sc = arg; 875 struct devmem_softc *dsc; 876 int error; 877 878 error = vcpu_lock_all(sc); 879 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 880 881 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 882 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 883 SLIST_REMOVE_HEAD(&sc->devmem, link); 884 free(dsc->name, M_VMMDEV); 885 free(dsc, M_VMMDEV); 886 } 887 888 if (sc->cdev != NULL) 889 destroy_dev(sc->cdev); 890 891 if (sc->vm != NULL) 892 vm_destroy(sc->vm); 893 894 if ((sc->flags & VSC_LINKED) != 0) { 895 mtx_lock(&vmmdev_mtx); 896 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 897 mtx_unlock(&vmmdev_mtx); 898 } 899 900 free(sc, M_VMMDEV); 901} 902 903static int 904sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 905{ 906 int error; 907 char buf[VM_MAX_NAMELEN]; 908 struct devmem_softc *dsc; 909 struct vmmdev_softc *sc; 910 struct cdev *cdev; 911 912 error = vmm_priv_check(req->td->td_ucred); 913 if (error) 914 return (error); 915 916 strlcpy(buf, "beavis", sizeof(buf)); 917 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 918 if (error != 0 || req->newptr == NULL) 919 return (error); 920 921 mtx_lock(&vmmdev_mtx); 922 sc = vmmdev_lookup(buf); 923 if (sc == NULL || sc->cdev == NULL) { 924 mtx_unlock(&vmmdev_mtx); 925 return (EINVAL); 926 } 927 928 /* 929 * The 'cdev' will be destroyed asynchronously when 'si_threadcount' 930 * goes down to 0 so we should not do it again in the callback. 931 * 932 * Setting 'sc->cdev' to NULL is also used to indicate that the VM 933 * is scheduled for destruction. 934 */ 935 cdev = sc->cdev; 936 sc->cdev = NULL; 937 mtx_unlock(&vmmdev_mtx); 938 939 /* 940 * Schedule all cdevs to be destroyed: 941 * 942 * - any new operations on the 'cdev' will return an error (ENXIO). 943 * 944 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will 945 * be destroyed and the callback will be invoked in a taskqueue 946 * context. 947 * 948 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 949 */ 950 SLIST_FOREACH(dsc, &sc->devmem, link) { 951 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 952 destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc); 953 } 954 destroy_dev_sched_cb(cdev, vmmdev_destroy, sc); 955 return (0); 956} 957SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, 958 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, 959 NULL, 0, sysctl_vmm_destroy, "A", NULL); 960 961static struct cdevsw vmmdevsw = { 962 .d_name = "vmmdev", 963 .d_version = D_VERSION, 964 .d_ioctl = vmmdev_ioctl, 965 .d_mmap_single = vmmdev_mmap_single, 966 .d_read = vmmdev_rw, 967 .d_write = vmmdev_rw, 968}; 969 970static int 971sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 972{ 973 int error; 974 struct vm *vm; 975 struct cdev *cdev; 976 struct vmmdev_softc *sc, *sc2; 977 char buf[VM_MAX_NAMELEN]; 978 979 error = vmm_priv_check(req->td->td_ucred); 980 if (error) 981 return (error); 982 983 strlcpy(buf, "beavis", sizeof(buf)); 984 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 985 if (error != 0 || req->newptr == NULL) 986 return (error); 987 988 mtx_lock(&vmmdev_mtx); 989 sc = vmmdev_lookup(buf); 990 mtx_unlock(&vmmdev_mtx); 991 if (sc != NULL) 992 return (EEXIST); 993 994 error = vm_create(buf, &vm); 995 if (error != 0) 996 return (error); 997 998 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 999 sc->vm = vm; 1000 SLIST_INIT(&sc->devmem); 1001 1002 /* 1003 * Lookup the name again just in case somebody sneaked in when we 1004 * dropped the lock. 1005 */ 1006 mtx_lock(&vmmdev_mtx); 1007 sc2 = vmmdev_lookup(buf); 1008 if (sc2 == NULL) { 1009 SLIST_INSERT_HEAD(&head, sc, link); 1010 sc->flags |= VSC_LINKED; 1011 } 1012 mtx_unlock(&vmmdev_mtx); 1013 1014 if (sc2 != NULL) { 1015 vmmdev_destroy(sc); 1016 return (EEXIST); 1017 } 1018 1019 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL, 1020 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 1021 if (error != 0) { 1022 vmmdev_destroy(sc); 1023 return (error); 1024 } 1025 1026 mtx_lock(&vmmdev_mtx); 1027 sc->cdev = cdev; 1028 sc->cdev->si_drv1 = sc; 1029 mtx_unlock(&vmmdev_mtx); 1030 1031 return (0); 1032} 1033SYSCTL_PROC(_hw_vmm, OID_AUTO, create, 1034 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON, 1035 NULL, 0, sysctl_vmm_create, "A", NULL); 1036 1037void 1038vmmdev_init(void) 1039{ 1040 mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); 1041 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL, 1042 "Allow use of vmm in a jail."); 1043} 1044 1045int 1046vmmdev_cleanup(void) 1047{ 1048 int error; 1049 1050 if (SLIST_EMPTY(&head)) 1051 error = 0; 1052 else 1053 error = EBUSY; 1054 1055 return (error); 1056} 1057 1058static int 1059devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 1060 struct vm_object **objp, int nprot) 1061{ 1062 struct devmem_softc *dsc; 1063 vm_ooffset_t first, last; 1064 size_t seglen; 1065 int error; 1066 uint16_t lastcpu; 1067 bool sysmem; 1068 1069 dsc = cdev->si_drv1; 1070 if (dsc == NULL) { 1071 /* 'cdev' has been created but is not ready for use */ 1072 return (ENXIO); 1073 } 1074 1075 first = *offset; 1076 last = *offset + len; 1077 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 1078 return (EINVAL); 1079 1080 lastcpu = vm_get_maxcpus(dsc->sc->vm) - 1; 1081 error = vcpu_lock_one(dsc->sc, lastcpu); 1082 if (error) 1083 return (error); 1084 1085 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 1086 KASSERT(error == 0 && !sysmem && *objp != NULL, 1087 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 1088 1089 vcpu_unlock_one(dsc->sc, lastcpu); 1090 1091 if (seglen >= last) { 1092 vm_object_reference(*objp); 1093 return (0); 1094 } else { 1095 return (EINVAL); 1096 } 1097} 1098 1099static struct cdevsw devmemsw = { 1100 .d_name = "devmem", 1101 .d_version = D_VERSION, 1102 .d_mmap_single = devmem_mmap_single, 1103}; 1104 1105static int 1106devmem_create_cdev(const char *vmname, int segid, char *devname) 1107{ 1108 struct devmem_softc *dsc; 1109 struct vmmdev_softc *sc; 1110 struct cdev *cdev; 1111 int error; 1112 1113 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 1114 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 1115 if (error) 1116 return (error); 1117 1118 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 1119 1120 mtx_lock(&vmmdev_mtx); 1121 sc = vmmdev_lookup(vmname); 1122 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 1123 if (sc->cdev == NULL) { 1124 /* virtual machine is being created or destroyed */ 1125 mtx_unlock(&vmmdev_mtx); 1126 free(dsc, M_VMMDEV); 1127 destroy_dev_sched_cb(cdev, NULL, 0); 1128 return (ENODEV); 1129 } 1130 1131 dsc->segid = segid; 1132 dsc->name = devname; 1133 dsc->cdev = cdev; 1134 dsc->sc = sc; 1135 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 1136 mtx_unlock(&vmmdev_mtx); 1137 1138 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 1139 cdev->si_drv1 = dsc; 1140 return (0); 1141} 1142 1143static void 1144devmem_destroy(void *arg) 1145{ 1146 struct devmem_softc *dsc = arg; 1147 1148 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 1149 dsc->cdev = NULL; 1150 dsc->sc = NULL; 1151} 1152