vmm_dev.c revision 221828
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/kernel.h> 34#include <sys/queue.h> 35#include <sys/lock.h> 36#include <sys/mutex.h> 37#include <sys/malloc.h> 38#include <sys/conf.h> 39#include <sys/sysctl.h> 40#include <sys/libkern.h> 41#include <sys/ioccom.h> 42#include <sys/mman.h> 43#include <sys/uio.h> 44 45#include <vm/vm.h> 46#include <vm/pmap.h> 47 48#include <machine/pmap.h> 49#include <machine/vmparam.h> 50 51#include <machine/vmm.h> 52#include "vmm_lapic.h" 53#include "vmm_stat.h" 54#include "io/ppt.h" 55#include <machine/vmm_dev.h> 56 57struct vmmdev_softc { 58 struct vm *vm; /* vm instance cookie */ 59 struct cdev *cdev; 60 SLIST_ENTRY(vmmdev_softc) link; 61}; 62static SLIST_HEAD(, vmmdev_softc) head; 63 64static struct mtx vmmdev_mtx; 65 66static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 67 68SYSCTL_DECL(_hw_vmm); 69 70static struct vmmdev_softc * 71vmmdev_lookup(const char *name) 72{ 73 struct vmmdev_softc *sc; 74 75#ifdef notyet /* XXX kernel is not compiled with invariants */ 76 mtx_assert(&vmmdev_mtx, MA_OWNED); 77#endif 78 79 SLIST_FOREACH(sc, &head, link) { 80 if (strcmp(name, vm_name(sc->vm)) == 0) 81 break; 82 } 83 84 return (sc); 85} 86 87static struct vmmdev_softc * 88vmmdev_lookup2(struct cdev *cdev) 89{ 90 struct vmmdev_softc *sc; 91 92#ifdef notyet /* XXX kernel is not compiled with invariants */ 93 mtx_assert(&vmmdev_mtx, MA_OWNED); 94#endif 95 96 SLIST_FOREACH(sc, &head, link) { 97 if (sc->cdev == cdev) 98 break; 99 } 100 101 return (sc); 102} 103 104static int 105vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 106{ 107 int error, off, c; 108 vm_paddr_t hpa, gpa; 109 struct vmmdev_softc *sc; 110 111 static char zerobuf[PAGE_SIZE]; 112 113 error = 0; 114 mtx_lock(&vmmdev_mtx); 115 sc = vmmdev_lookup2(cdev); 116 117 while (uio->uio_resid > 0 && error == 0) { 118 gpa = uio->uio_offset; 119 off = gpa & PAGE_MASK; 120 c = min(uio->uio_resid, PAGE_SIZE - off); 121 122 /* 123 * The VM has a hole in its physical memory map. If we want to 124 * use 'dd' to inspect memory beyond the hole we need to 125 * provide bogus data for memory that lies in the hole. 126 * 127 * Since this device does not support lseek(2), dd(1) will 128 * read(2) blocks of data to simulate the lseek(2). 129 */ 130 hpa = vm_gpa2hpa(sc->vm, gpa, c); 131 if (hpa == (vm_paddr_t)-1) { 132 if (uio->uio_rw == UIO_READ) 133 error = uiomove(zerobuf, c, uio); 134 else 135 error = EFAULT; 136 } else 137 error = uiomove((void *)PHYS_TO_DMAP(hpa), c, uio); 138 } 139 140 mtx_unlock(&vmmdev_mtx); 141 return (error); 142} 143 144static int 145vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 146 struct thread *td) 147{ 148 int error, vcpu; 149 struct vmmdev_softc *sc; 150 struct vm_memory_segment *seg; 151 struct vm_register *vmreg; 152 struct vm_seg_desc* vmsegdesc; 153 struct vm_pin *vmpin; 154 struct vm_run *vmrun; 155 struct vm_event *vmevent; 156 struct vm_lapic_irq *vmirq; 157 struct vm_capability *vmcap; 158 struct vm_pptdev *pptdev; 159 struct vm_pptdev_mmio *pptmmio; 160 struct vm_pptdev_msi *pptmsi; 161 struct vm_nmi *vmnmi; 162 struct vm_stats *vmstats; 163 struct vm_stat_desc *statdesc; 164 165 mtx_lock(&vmmdev_mtx); 166 sc = vmmdev_lookup2(cdev); 167 if (sc == NULL) { 168 mtx_unlock(&vmmdev_mtx); 169 return (ENXIO); 170 } 171 172 /* 173 * Some VMM ioctls can operate only on vcpus that are not running. 174 */ 175 switch (cmd) { 176 case VM_RUN: 177 case VM_SET_PINNING: 178 case VM_GET_REGISTER: 179 case VM_SET_REGISTER: 180 case VM_GET_SEGMENT_DESCRIPTOR: 181 case VM_SET_SEGMENT_DESCRIPTOR: 182 case VM_INJECT_EVENT: 183 case VM_GET_CAPABILITY: 184 case VM_SET_CAPABILITY: 185 case VM_PPTDEV_MSI: 186 /* 187 * XXX fragile, handle with care 188 * Assumes that the first field of the ioctl data is the vcpu. 189 */ 190 vcpu = *(int *)data; 191 if (vcpu < 0 || vcpu >= VM_MAXCPU) { 192 error = EINVAL; 193 goto done; 194 } 195 196 if (vcpu_is_running(sc->vm, vcpu, NULL)) { 197 error = EBUSY; 198 goto done; 199 } 200 break; 201 default: 202 break; 203 } 204 205 switch(cmd) { 206 case VM_RUN: 207 vmrun = (struct vm_run *)data; 208 209 vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_RUNNING); 210 mtx_unlock(&vmmdev_mtx); 211 212 error = vm_run(sc->vm, vmrun); 213 214 mtx_lock(&vmmdev_mtx); 215 vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_STOPPED); 216 break; 217 case VM_STAT_DESC: { 218 const char *desc; 219 statdesc = (struct vm_stat_desc *)data; 220 desc = vmm_stat_desc(statdesc->index); 221 if (desc != NULL) { 222 error = 0; 223 strlcpy(statdesc->desc, desc, sizeof(statdesc->desc)); 224 } else 225 error = EINVAL; 226 break; 227 } 228 case VM_STATS: { 229 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_TYPES); 230 vmstats = (struct vm_stats *)data; 231 getmicrotime(&vmstats->tv); 232 error = vmm_stat_copy(sc->vm, vmstats->cpuid, 233 &vmstats->num_entries, vmstats->statbuf); 234 break; 235 } 236 case VM_PPTDEV_MSI: 237 pptmsi = (struct vm_pptdev_msi *)data; 238 error = ppt_setup_msi(sc->vm, pptmsi->vcpu, 239 pptmsi->bus, pptmsi->slot, pptmsi->func, 240 pptmsi->destcpu, pptmsi->vector, 241 pptmsi->numvec); 242 break; 243 case VM_MAP_PPTDEV_MMIO: 244 pptmmio = (struct vm_pptdev_mmio *)data; 245 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 246 pptmmio->func, pptmmio->gpa, pptmmio->len, 247 pptmmio->hpa); 248 break; 249 case VM_BIND_PPTDEV: 250 pptdev = (struct vm_pptdev *)data; 251 error = ppt_assign_device(sc->vm, pptdev->bus, pptdev->slot, 252 pptdev->func); 253 break; 254 case VM_UNBIND_PPTDEV: 255 pptdev = (struct vm_pptdev *)data; 256 error = ppt_unassign_device(sc->vm, pptdev->bus, pptdev->slot, 257 pptdev->func); 258 break; 259 case VM_INJECT_EVENT: 260 vmevent = (struct vm_event *)data; 261 error = vm_inject_event(sc->vm, vmevent->cpuid, vmevent->type, 262 vmevent->vector, 263 vmevent->error_code, 264 vmevent->error_code_valid); 265 break; 266 case VM_INJECT_NMI: 267 vmnmi = (struct vm_nmi *)data; 268 error = vm_inject_nmi(sc->vm, vmnmi->cpuid); 269 break; 270 case VM_LAPIC_IRQ: 271 vmirq = (struct vm_lapic_irq *)data; 272 error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector); 273 break; 274 case VM_SET_PINNING: 275 vmpin = (struct vm_pin *)data; 276 error = vm_set_pinning(sc->vm, vmpin->vm_cpuid, 277 vmpin->host_cpuid); 278 break; 279 case VM_GET_PINNING: 280 vmpin = (struct vm_pin *)data; 281 error = vm_get_pinning(sc->vm, vmpin->vm_cpuid, 282 &vmpin->host_cpuid); 283 break; 284 case VM_MAP_MEMORY: 285 seg = (struct vm_memory_segment *)data; 286 error = vm_malloc(sc->vm, seg->gpa, seg->len, &seg->hpa); 287 break; 288 case VM_GET_MEMORY_SEG: 289 seg = (struct vm_memory_segment *)data; 290 seg->hpa = seg->len = 0; 291 (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg); 292 error = 0; 293 break; 294 case VM_GET_REGISTER: 295 vmreg = (struct vm_register *)data; 296 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, 297 &vmreg->regval); 298 break; 299 case VM_SET_REGISTER: 300 vmreg = (struct vm_register *)data; 301 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, 302 vmreg->regval); 303 break; 304 case VM_SET_SEGMENT_DESCRIPTOR: 305 vmsegdesc = (struct vm_seg_desc *)data; 306 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, 307 vmsegdesc->regnum, 308 &vmsegdesc->desc); 309 break; 310 case VM_GET_SEGMENT_DESCRIPTOR: 311 vmsegdesc = (struct vm_seg_desc *)data; 312 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, 313 vmsegdesc->regnum, 314 &vmsegdesc->desc); 315 break; 316 case VM_GET_CAPABILITY: 317 vmcap = (struct vm_capability *)data; 318 error = vm_get_capability(sc->vm, vmcap->cpuid, 319 vmcap->captype, 320 &vmcap->capval); 321 break; 322 case VM_SET_CAPABILITY: 323 vmcap = (struct vm_capability *)data; 324 error = vm_set_capability(sc->vm, vmcap->cpuid, 325 vmcap->captype, 326 vmcap->capval); 327 break; 328 default: 329 error = ENOTTY; 330 break; 331 } 332done: 333 mtx_unlock(&vmmdev_mtx); 334 335 return (error); 336} 337 338static int 339vmmdev_mmap(struct cdev *cdev, vm_offset_t offset, vm_paddr_t *paddr, int nprot) 340{ 341 int error; 342 struct vmmdev_softc *sc; 343 344 error = -1; 345 mtx_lock(&vmmdev_mtx); 346 347 sc = vmmdev_lookup2(cdev); 348 if (sc != NULL && (nprot & PROT_EXEC) == 0) { 349 *paddr = vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE); 350 if (*paddr != (vm_paddr_t)-1) 351 error = 0; 352 } 353 354 mtx_unlock(&vmmdev_mtx); 355 356 return (error); 357} 358 359static void 360vmmdev_destroy(struct vmmdev_softc *sc) 361{ 362 363#ifdef notyet /* XXX kernel is not compiled with invariants */ 364 mtx_assert(&vmmdev_mtx, MA_OWNED); 365#endif 366 367 /* 368 * XXX must stop virtual machine instances that may be still 369 * running and cleanup their state. 370 */ 371 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 372 destroy_dev(sc->cdev); 373 vm_destroy(sc->vm); 374 free(sc, M_VMMDEV); 375} 376 377static int 378sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 379{ 380 int error; 381 char buf[VM_MAX_NAMELEN]; 382 struct vmmdev_softc *sc; 383 384 strlcpy(buf, "beavis", sizeof(buf)); 385 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 386 if (error != 0 || req->newptr == NULL) 387 return (error); 388 389 mtx_lock(&vmmdev_mtx); 390 sc = vmmdev_lookup(buf); 391 if (sc == NULL) { 392 mtx_unlock(&vmmdev_mtx); 393 return (EINVAL); 394 } 395 vmmdev_destroy(sc); 396 mtx_unlock(&vmmdev_mtx); 397 return (0); 398} 399SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW, 400 NULL, 0, sysctl_vmm_destroy, "A", NULL); 401 402static struct cdevsw vmmdevsw = { 403 .d_name = "vmmdev", 404 .d_version = D_VERSION, 405 .d_ioctl = vmmdev_ioctl, 406 .d_mmap = vmmdev_mmap, 407 .d_read = vmmdev_rw, 408 .d_write = vmmdev_rw, 409}; 410 411static int 412sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 413{ 414 int error; 415 struct vm *vm; 416 struct vmmdev_softc *sc; 417 char buf[VM_MAX_NAMELEN]; 418 419 strlcpy(buf, "beavis", sizeof(buf)); 420 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 421 if (error != 0 || req->newptr == NULL) 422 return (error); 423 424 mtx_lock(&vmmdev_mtx); 425 426 sc = vmmdev_lookup(buf); 427 if (sc != NULL) { 428 mtx_unlock(&vmmdev_mtx); 429 return (EEXIST); 430 } 431 432 vm = vm_create(buf); 433 if (vm == NULL) { 434 mtx_unlock(&vmmdev_mtx); 435 return (EINVAL); 436 } 437 438 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 439 sc->vm = vm; 440 sc->cdev = make_dev(&vmmdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 441 "vmm/%s", buf); 442 sc->cdev->si_drv1 = sc; 443 SLIST_INSERT_HEAD(&head, sc, link); 444 445 mtx_unlock(&vmmdev_mtx); 446 return (0); 447} 448SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW, 449 NULL, 0, sysctl_vmm_create, "A", NULL); 450 451void 452vmmdev_init(void) 453{ 454 mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); 455} 456 457void 458vmmdev_cleanup(void) 459{ 460 struct vmmdev_softc *sc, *sc2; 461 462 mtx_lock(&vmmdev_mtx); 463 464 SLIST_FOREACH_SAFE(sc, &head, link, sc2) 465 vmmdev_destroy(sc); 466 467 mtx_unlock(&vmmdev_mtx); 468} 469