vmm_dev.c revision 241454
1210753Srpaulo/*- 2210753Srpaulo * Copyright (c) 2011 NetApp, Inc. 3210753Srpaulo * All rights reserved. 4210753Srpaulo * 5210753Srpaulo * Redistribution and use in source and binary forms, with or without 6210753Srpaulo * modification, are permitted provided that the following conditions 7210753Srpaulo * are met: 8210753Srpaulo * 1. Redistributions of source code must retain the above copyright 9210753Srpaulo * notice, this list of conditions and the following disclaimer. 10210753Srpaulo * 2. Redistributions in binary form must reproduce the above copyright 11210753Srpaulo * notice, this list of conditions and the following disclaimer in the 12210753Srpaulo * documentation and/or other materials provided with the distribution. 13210753Srpaulo * 14210753Srpaulo * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15210753Srpaulo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16210753Srpaulo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17210753Srpaulo * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18210753Srpaulo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19210753Srpaulo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20210753Srpaulo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21210753Srpaulo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22210753Srpaulo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23210753Srpaulo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24210753Srpaulo * SUCH DAMAGE. 25210753Srpaulo * 26210753Srpaulo * $FreeBSD$ 27210753Srpaulo */ 28210753Srpaulo 29210753Srpaulo#include <sys/cdefs.h> 30210753Srpaulo__FBSDID("$FreeBSD$"); 31210753Srpaulo 32210753Srpaulo#include <sys/param.h> 33210753Srpaulo#include <sys/kernel.h> 34210753Srpaulo#include <sys/queue.h> 35210753Srpaulo#include <sys/lock.h> 36210753Srpaulo#include <sys/mutex.h> 37210753Srpaulo#include <sys/malloc.h> 38210753Srpaulo#include <sys/conf.h> 39210753Srpaulo#include <sys/sysctl.h> 40210753Srpaulo#include <sys/libkern.h> 41210753Srpaulo#include <sys/ioccom.h> 42#include <sys/mman.h> 43#include <sys/uio.h> 44 45#include <vm/vm.h> 46#include <vm/pmap.h> 47 48#include <machine/pmap.h> 49#include <machine/vmparam.h> 50 51#include <machine/vmm.h> 52#include "vmm_lapic.h" 53#include "vmm_stat.h" 54#include "vmm_mem.h" 55#include "io/ppt.h" 56#include <machine/vmm_dev.h> 57 58struct vmmdev_softc { 59 struct vm *vm; /* vm instance cookie */ 60 struct cdev *cdev; 61 SLIST_ENTRY(vmmdev_softc) link; 62}; 63static SLIST_HEAD(, vmmdev_softc) head; 64 65static struct mtx vmmdev_mtx; 66 67static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 68 69SYSCTL_DECL(_hw_vmm); 70 71static struct vmmdev_softc * 72vmmdev_lookup(const char *name) 73{ 74 struct vmmdev_softc *sc; 75 76#ifdef notyet /* XXX kernel is not compiled with invariants */ 77 mtx_assert(&vmmdev_mtx, MA_OWNED); 78#endif 79 80 SLIST_FOREACH(sc, &head, link) { 81 if (strcmp(name, vm_name(sc->vm)) == 0) 82 break; 83 } 84 85 return (sc); 86} 87 88static struct vmmdev_softc * 89vmmdev_lookup2(struct cdev *cdev) 90{ 91#ifdef notyet /* XXX kernel is not compiled with invariants */ 92 mtx_assert(&vmmdev_mtx, MA_OWNED); 93#endif 94 95 return (cdev->si_drv1); 96} 97 98static int 99vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 100{ 101 int error, off, c; 102 vm_paddr_t hpa, gpa; 103 struct vmmdev_softc *sc; 104 105 static char zerobuf[PAGE_SIZE]; 106 107 error = 0; 108 mtx_lock(&vmmdev_mtx); 109 sc = vmmdev_lookup2(cdev); 110 if (sc == NULL) 111 error = ENXIO; 112 113 while (uio->uio_resid > 0 && error == 0) { 114 gpa = uio->uio_offset; 115 off = gpa & PAGE_MASK; 116 c = min(uio->uio_resid, PAGE_SIZE - off); 117 118 /* 119 * The VM has a hole in its physical memory map. If we want to 120 * use 'dd' to inspect memory beyond the hole we need to 121 * provide bogus data for memory that lies in the hole. 122 * 123 * Since this device does not support lseek(2), dd(1) will 124 * read(2) blocks of data to simulate the lseek(2). 125 */ 126 hpa = vm_gpa2hpa(sc->vm, gpa, c); 127 if (hpa == (vm_paddr_t)-1) { 128 if (uio->uio_rw == UIO_READ) 129 error = uiomove(zerobuf, c, uio); 130 else 131 error = EFAULT; 132 } else 133 error = uiomove((void *)PHYS_TO_DMAP(hpa), c, uio); 134 } 135 136 mtx_unlock(&vmmdev_mtx); 137 return (error); 138} 139 140static int 141vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 142 struct thread *td) 143{ 144 int error, vcpu; 145 struct vmmdev_softc *sc; 146 struct vm_memory_segment *seg; 147 struct vm_register *vmreg; 148 struct vm_seg_desc* vmsegdesc; 149 struct vm_pin *vmpin; 150 struct vm_run *vmrun; 151 struct vm_event *vmevent; 152 struct vm_lapic_irq *vmirq; 153 struct vm_capability *vmcap; 154 struct vm_pptdev *pptdev; 155 struct vm_pptdev_mmio *pptmmio; 156 struct vm_pptdev_msi *pptmsi; 157 struct vm_pptdev_msix *pptmsix; 158 struct vm_nmi *vmnmi; 159 struct vm_stats *vmstats; 160 struct vm_stat_desc *statdesc; 161 struct vm_x2apic *x2apic; 162 163 mtx_lock(&vmmdev_mtx); 164 sc = vmmdev_lookup2(cdev); 165 if (sc == NULL) { 166 mtx_unlock(&vmmdev_mtx); 167 return (ENXIO); 168 } 169 170 /* 171 * Some VMM ioctls can operate only on vcpus that are not running. 172 */ 173 switch (cmd) { 174 case VM_RUN: 175 case VM_SET_PINNING: 176 case VM_GET_REGISTER: 177 case VM_SET_REGISTER: 178 case VM_GET_SEGMENT_DESCRIPTOR: 179 case VM_SET_SEGMENT_DESCRIPTOR: 180 case VM_INJECT_EVENT: 181 case VM_GET_CAPABILITY: 182 case VM_SET_CAPABILITY: 183 case VM_PPTDEV_MSI: 184 case VM_SET_X2APIC_STATE: 185 /* 186 * XXX fragile, handle with care 187 * Assumes that the first field of the ioctl data is the vcpu. 188 */ 189 vcpu = *(int *)data; 190 if (vcpu < 0 || vcpu >= VM_MAXCPU) { 191 error = EINVAL; 192 goto done; 193 } 194 195 if (vcpu_is_running(sc->vm, vcpu, NULL)) { 196 error = EBUSY; 197 goto done; 198 } 199 break; 200 default: 201 break; 202 } 203 204 switch(cmd) { 205 case VM_RUN: 206 vmrun = (struct vm_run *)data; 207 208 vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_RUNNING); 209 mtx_unlock(&vmmdev_mtx); 210 211 error = vm_run(sc->vm, vmrun); 212 213 mtx_lock(&vmmdev_mtx); 214 vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_STOPPED); 215 break; 216 case VM_STAT_DESC: { 217 const char *desc; 218 statdesc = (struct vm_stat_desc *)data; 219 desc = vmm_stat_desc(statdesc->index); 220 if (desc != NULL) { 221 error = 0; 222 strlcpy(statdesc->desc, desc, sizeof(statdesc->desc)); 223 } else 224 error = EINVAL; 225 break; 226 } 227 case VM_STATS: { 228 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_TYPES); 229 vmstats = (struct vm_stats *)data; 230 getmicrotime(&vmstats->tv); 231 error = vmm_stat_copy(sc->vm, vmstats->cpuid, 232 &vmstats->num_entries, vmstats->statbuf); 233 break; 234 } 235 case VM_PPTDEV_MSI: 236 pptmsi = (struct vm_pptdev_msi *)data; 237 error = ppt_setup_msi(sc->vm, pptmsi->vcpu, 238 pptmsi->bus, pptmsi->slot, pptmsi->func, 239 pptmsi->destcpu, pptmsi->vector, 240 pptmsi->numvec); 241 break; 242 case VM_PPTDEV_MSIX: 243 pptmsix = (struct vm_pptdev_msix *)data; 244 error = ppt_setup_msix(sc->vm, pptmsix->vcpu, 245 pptmsix->bus, pptmsix->slot, 246 pptmsix->func, pptmsix->idx, 247 pptmsix->msg, pptmsix->vector_control, 248 pptmsix->addr); 249 break; 250 case VM_MAP_PPTDEV_MMIO: 251 pptmmio = (struct vm_pptdev_mmio *)data; 252 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 253 pptmmio->func, pptmmio->gpa, pptmmio->len, 254 pptmmio->hpa); 255 break; 256 case VM_BIND_PPTDEV: 257 pptdev = (struct vm_pptdev *)data; 258 error = ppt_assign_device(sc->vm, pptdev->bus, pptdev->slot, 259 pptdev->func); 260 break; 261 case VM_UNBIND_PPTDEV: 262 pptdev = (struct vm_pptdev *)data; 263 error = ppt_unassign_device(sc->vm, pptdev->bus, pptdev->slot, 264 pptdev->func); 265 break; 266 case VM_INJECT_EVENT: 267 vmevent = (struct vm_event *)data; 268 error = vm_inject_event(sc->vm, vmevent->cpuid, vmevent->type, 269 vmevent->vector, 270 vmevent->error_code, 271 vmevent->error_code_valid); 272 break; 273 case VM_INJECT_NMI: 274 vmnmi = (struct vm_nmi *)data; 275 error = vm_inject_nmi(sc->vm, vmnmi->cpuid); 276 break; 277 case VM_LAPIC_IRQ: 278 vmirq = (struct vm_lapic_irq *)data; 279 error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector); 280 break; 281 case VM_SET_PINNING: 282 vmpin = (struct vm_pin *)data; 283 error = vm_set_pinning(sc->vm, vmpin->vm_cpuid, 284 vmpin->host_cpuid); 285 break; 286 case VM_GET_PINNING: 287 vmpin = (struct vm_pin *)data; 288 error = vm_get_pinning(sc->vm, vmpin->vm_cpuid, 289 &vmpin->host_cpuid); 290 break; 291 case VM_MAP_MEMORY: 292 seg = (struct vm_memory_segment *)data; 293 error = vm_malloc(sc->vm, seg->gpa, seg->len); 294 break; 295 case VM_GET_MEMORY_SEG: 296 seg = (struct vm_memory_segment *)data; 297 seg->len = 0; 298 (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg); 299 error = 0; 300 break; 301 case VM_GET_REGISTER: 302 vmreg = (struct vm_register *)data; 303 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, 304 &vmreg->regval); 305 break; 306 case VM_SET_REGISTER: 307 vmreg = (struct vm_register *)data; 308 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, 309 vmreg->regval); 310 break; 311 case VM_SET_SEGMENT_DESCRIPTOR: 312 vmsegdesc = (struct vm_seg_desc *)data; 313 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, 314 vmsegdesc->regnum, 315 &vmsegdesc->desc); 316 break; 317 case VM_GET_SEGMENT_DESCRIPTOR: 318 vmsegdesc = (struct vm_seg_desc *)data; 319 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, 320 vmsegdesc->regnum, 321 &vmsegdesc->desc); 322 break; 323 case VM_GET_CAPABILITY: 324 vmcap = (struct vm_capability *)data; 325 error = vm_get_capability(sc->vm, vmcap->cpuid, 326 vmcap->captype, 327 &vmcap->capval); 328 break; 329 case VM_SET_CAPABILITY: 330 vmcap = (struct vm_capability *)data; 331 error = vm_set_capability(sc->vm, vmcap->cpuid, 332 vmcap->captype, 333 vmcap->capval); 334 break; 335 case VM_SET_X2APIC_STATE: 336 x2apic = (struct vm_x2apic *)data; 337 error = vm_set_x2apic_state(sc->vm, 338 x2apic->cpuid, x2apic->state); 339 break; 340 case VM_GET_X2APIC_STATE: 341 x2apic = (struct vm_x2apic *)data; 342 error = vm_get_x2apic_state(sc->vm, 343 x2apic->cpuid, &x2apic->state); 344 break; 345 default: 346 error = ENOTTY; 347 break; 348 } 349done: 350 mtx_unlock(&vmmdev_mtx); 351 352 return (error); 353} 354 355static int 356vmmdev_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr, 357 int nprot, vm_memattr_t *memattr) 358{ 359 int error; 360 struct vmmdev_softc *sc; 361 362 error = -1; 363 mtx_lock(&vmmdev_mtx); 364 365 sc = vmmdev_lookup2(cdev); 366 if (sc != NULL && (nprot & PROT_EXEC) == 0) { 367 *paddr = vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE); 368 if (*paddr != (vm_paddr_t)-1) 369 error = 0; 370 } 371 372 mtx_unlock(&vmmdev_mtx); 373 374 return (error); 375} 376 377static void 378vmmdev_destroy(struct vmmdev_softc *sc, boolean_t unlink) 379{ 380 381 /* 382 * XXX must stop virtual machine instances that may be still 383 * running and cleanup their state. 384 */ 385 if (sc->cdev) 386 destroy_dev(sc->cdev); 387 388 if (sc->vm) 389 vm_destroy(sc->vm); 390 391 if (unlink) { 392 mtx_lock(&vmmdev_mtx); 393 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 394 mtx_unlock(&vmmdev_mtx); 395 } 396 397 free(sc, M_VMMDEV); 398} 399 400static int 401sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 402{ 403 int error; 404 char buf[VM_MAX_NAMELEN]; 405 struct vmmdev_softc *sc; 406 407 strlcpy(buf, "beavis", sizeof(buf)); 408 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 409 if (error != 0 || req->newptr == NULL) 410 return (error); 411 412 /* 413 * XXX TODO if any process has this device open then fail 414 */ 415 416 mtx_lock(&vmmdev_mtx); 417 sc = vmmdev_lookup(buf); 418 if (sc == NULL) { 419 mtx_unlock(&vmmdev_mtx); 420 return (EINVAL); 421 } 422 423 sc->cdev->si_drv1 = NULL; 424 mtx_unlock(&vmmdev_mtx); 425 426 vmmdev_destroy(sc, TRUE); 427 428 return (0); 429} 430SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW, 431 NULL, 0, sysctl_vmm_destroy, "A", NULL); 432 433static struct cdevsw vmmdevsw = { 434 .d_name = "vmmdev", 435 .d_version = D_VERSION, 436 .d_ioctl = vmmdev_ioctl, 437 .d_mmap = vmmdev_mmap, 438 .d_read = vmmdev_rw, 439 .d_write = vmmdev_rw, 440}; 441 442static int 443sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 444{ 445 int error; 446 struct vm *vm; 447 struct vmmdev_softc *sc, *sc2; 448 char buf[VM_MAX_NAMELEN]; 449 450 strlcpy(buf, "beavis", sizeof(buf)); 451 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 452 if (error != 0 || req->newptr == NULL) 453 return (error); 454 455 mtx_lock(&vmmdev_mtx); 456 sc = vmmdev_lookup(buf); 457 mtx_unlock(&vmmdev_mtx); 458 if (sc != NULL) 459 return (EEXIST); 460 461 vm = vm_create(buf); 462 if (vm == NULL) 463 return (EINVAL); 464 465 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 466 sc->vm = vm; 467 468 /* 469 * Lookup the name again just in case somebody sneaked in when we 470 * dropped the lock. 471 */ 472 mtx_lock(&vmmdev_mtx); 473 sc2 = vmmdev_lookup(buf); 474 if (sc2 == NULL) 475 SLIST_INSERT_HEAD(&head, sc, link); 476 mtx_unlock(&vmmdev_mtx); 477 478 if (sc2 != NULL) { 479 vmmdev_destroy(sc, FALSE); 480 return (EEXIST); 481 } 482 483 sc->cdev = make_dev(&vmmdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 484 "vmm/%s", buf); 485 sc->cdev->si_drv1 = sc; 486 487 return (0); 488} 489SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW, 490 NULL, 0, sysctl_vmm_create, "A", NULL); 491 492void 493vmmdev_init(void) 494{ 495 mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); 496} 497 498int 499vmmdev_cleanup(void) 500{ 501 int error; 502 503 if (SLIST_EMPTY(&head)) 504 error = 0; 505 else 506 error = EBUSY; 507 508 return (error); 509} 510