ppt.c revision 234761
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35#include <sys/malloc.h> 36#include <sys/module.h> 37#include <sys/bus.h> 38#include <sys/pciio.h> 39#include <sys/rman.h> 40#include <sys/smp.h> 41 42#include <dev/pci/pcivar.h> 43#include <dev/pci/pcireg.h> 44 45#include <machine/resource.h> 46 47#include <machine/vmm.h> 48#include <machine/vmm_dev.h> 49 50#include "vmm_lapic.h" 51#include "vmm_ktr.h" 52 53#include "iommu.h" 54#include "ppt.h" 55 56#define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0])) 57#define MAX_MMIOSEGS (PCIR_MAX_BAR_0 + 1) 58#define MAX_MSIMSGS 32 59 60MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources"); 61 62struct pptintr_arg { /* pptintr(pptintr_arg) */ 63 struct pptdev *pptdev; 64 int vec; 65 int vcpu; 66}; 67 68static struct pptdev { 69 device_t dev; 70 struct vm *vm; /* owner of this device */ 71 struct vm_memory_segment mmio[MAX_MMIOSEGS]; 72 struct { 73 int num_msgs; /* guest state */ 74 int vector; 75 int vcpu; 76 77 int startrid; /* host state */ 78 struct resource *res[MAX_MSIMSGS]; 79 void *cookie[MAX_MSIMSGS]; 80 struct pptintr_arg arg[MAX_MSIMSGS]; 81 } msi; 82 83 struct { 84 int num_msgs; 85 int startrid; 86 int msix_table_rid; 87 struct resource *msix_table_res; 88 struct resource **res; 89 void **cookie; 90 struct pptintr_arg *arg; 91 } msix; 92} pptdevs[32]; 93 94static int num_pptdevs; 95 96static int 97ppt_probe(device_t dev) 98{ 99 int bus, slot, func; 100 struct pci_devinfo *dinfo; 101 102 dinfo = (struct pci_devinfo *)device_get_ivars(dev); 103 104 bus = pci_get_bus(dev); 105 slot = pci_get_slot(dev); 106 func = pci_get_function(dev); 107 108 /* 109 * To qualify as a pci passthrough device a device must: 110 * - be allowed by administrator to be used in this role 111 * - be an endpoint device 112 */ 113 if (vmm_is_pptdev(bus, slot, func) && 114 (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL) 115 return (0); 116 else 117 return (ENXIO); 118} 119 120static int 121ppt_attach(device_t dev) 122{ 123 int n; 124 125 if (num_pptdevs >= MAX_PPTDEVS) { 126 printf("ppt_attach: maximum number of pci passthrough devices " 127 "exceeded\n"); 128 return (ENXIO); 129 } 130 131 n = num_pptdevs++; 132 pptdevs[n].dev = dev; 133 134 if (bootverbose) 135 device_printf(dev, "attached\n"); 136 137 return (0); 138} 139 140static int 141ppt_detach(device_t dev) 142{ 143 /* 144 * XXX check whether there are any pci passthrough devices assigned 145 * to guests before we allow this driver to detach. 146 */ 147 148 return (0); 149} 150 151static device_method_t ppt_methods[] = { 152 /* Device interface */ 153 DEVMETHOD(device_probe, ppt_probe), 154 DEVMETHOD(device_attach, ppt_attach), 155 DEVMETHOD(device_detach, ppt_detach), 156 {0, 0} 157}; 158 159static devclass_t ppt_devclass; 160DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0); 161DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL); 162 163static struct pptdev * 164ppt_find(int bus, int slot, int func) 165{ 166 device_t dev; 167 int i, b, s, f; 168 169 for (i = 0; i < num_pptdevs; i++) { 170 dev = pptdevs[i].dev; 171 b = pci_get_bus(dev); 172 s = pci_get_slot(dev); 173 f = pci_get_function(dev); 174 if (bus == b && slot == s && func == f) 175 return (&pptdevs[i]); 176 } 177 return (NULL); 178} 179 180static void 181ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt) 182{ 183 int i; 184 struct vm_memory_segment *seg; 185 186 for (i = 0; i < MAX_MMIOSEGS; i++) { 187 seg = &ppt->mmio[i]; 188 if (seg->len == 0) 189 continue; 190 (void)vm_unmap_mmio(vm, seg->gpa, seg->len); 191 bzero(seg, sizeof(struct vm_memory_segment)); 192 } 193} 194 195static void 196ppt_teardown_msi(struct pptdev *ppt) 197{ 198 int i, rid; 199 void *cookie; 200 struct resource *res; 201 202 if (ppt->msi.num_msgs == 0) 203 return; 204 205 for (i = 0; i < ppt->msi.num_msgs; i++) { 206 rid = ppt->msi.startrid + i; 207 res = ppt->msi.res[i]; 208 cookie = ppt->msi.cookie[i]; 209 210 if (cookie != NULL) 211 bus_teardown_intr(ppt->dev, res, cookie); 212 213 if (res != NULL) 214 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 215 216 ppt->msi.res[i] = NULL; 217 ppt->msi.cookie[i] = NULL; 218 } 219 220 if (ppt->msi.startrid == 1) 221 pci_release_msi(ppt->dev); 222 223 ppt->msi.num_msgs = 0; 224} 225 226static void 227ppt_teardown_msix_intr(struct pptdev *ppt, int idx) 228{ 229 int rid; 230 struct resource *res; 231 void *cookie; 232 233 rid = ppt->msix.startrid + idx; 234 res = ppt->msix.res[idx]; 235 cookie = ppt->msix.cookie[idx]; 236 237 if (cookie != NULL) 238 bus_teardown_intr(ppt->dev, res, cookie); 239 240 if (res != NULL) 241 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 242 243 ppt->msix.res[idx] = NULL; 244 ppt->msix.cookie[idx] = NULL; 245} 246 247static void 248ppt_teardown_msix(struct pptdev *ppt) 249{ 250 int i, error; 251 252 if (ppt->msix.num_msgs == 0) 253 return; 254 255 for (i = 0; i < ppt->msix.num_msgs; i++) 256 ppt_teardown_msix_intr(ppt, i); 257 258 if (ppt->msix.msix_table_res) { 259 bus_release_resource(ppt->dev, SYS_RES_MEMORY, 260 ppt->msix.msix_table_rid, 261 ppt->msix.msix_table_res); 262 ppt->msix.msix_table_res = NULL; 263 ppt->msix.msix_table_rid = 0; 264 } 265 266 free(ppt->msix.res, M_PPTMSIX); 267 free(ppt->msix.cookie, M_PPTMSIX); 268 free(ppt->msix.arg, M_PPTMSIX); 269 270 error = pci_release_msi(ppt->dev); 271 if (error) 272 printf("ppt_teardown_msix: Failed to release MSI-X resources (error %i)\n", error); 273 274 ppt->msix.num_msgs = 0; 275} 276 277int 278ppt_assign_device(struct vm *vm, int bus, int slot, int func) 279{ 280 struct pptdev *ppt; 281 282 ppt = ppt_find(bus, slot, func); 283 if (ppt != NULL) { 284 /* 285 * If this device is owned by a different VM then we 286 * cannot change its owner. 287 */ 288 if (ppt->vm != NULL && ppt->vm != vm) 289 return (EBUSY); 290 291 ppt->vm = vm; 292 iommu_add_device(vm_iommu_domain(vm), bus, slot, func); 293 return (0); 294 } 295 return (ENOENT); 296} 297 298int 299ppt_unassign_device(struct vm *vm, int bus, int slot, int func) 300{ 301 struct pptdev *ppt; 302 303 ppt = ppt_find(bus, slot, func); 304 if (ppt != NULL) { 305 /* 306 * If this device is not owned by this 'vm' then bail out. 307 */ 308 if (ppt->vm != vm) 309 return (EBUSY); 310 ppt_unmap_mmio(vm, ppt); 311 ppt_teardown_msi(ppt); 312 ppt_teardown_msix(ppt); 313 iommu_remove_device(vm_iommu_domain(vm), bus, slot, func); 314 ppt->vm = NULL; 315 return (0); 316 } 317 return (ENOENT); 318} 319 320int 321ppt_unassign_all(struct vm *vm) 322{ 323 int i, bus, slot, func; 324 device_t dev; 325 326 for (i = 0; i < num_pptdevs; i++) { 327 if (pptdevs[i].vm == vm) { 328 dev = pptdevs[i].dev; 329 bus = pci_get_bus(dev); 330 slot = pci_get_slot(dev); 331 func = pci_get_function(dev); 332 ppt_unassign_device(vm, bus, slot, func); 333 } 334 } 335 336 return (0); 337} 338 339int 340ppt_map_mmio(struct vm *vm, int bus, int slot, int func, 341 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 342{ 343 int i, error; 344 struct vm_memory_segment *seg; 345 struct pptdev *ppt; 346 347 ppt = ppt_find(bus, slot, func); 348 if (ppt != NULL) { 349 if (ppt->vm != vm) 350 return (EBUSY); 351 352 for (i = 0; i < MAX_MMIOSEGS; i++) { 353 seg = &ppt->mmio[i]; 354 if (seg->len == 0) { 355 error = vm_map_mmio(vm, gpa, len, hpa); 356 if (error == 0) { 357 seg->gpa = gpa; 358 seg->len = len; 359 seg->hpa = hpa; 360 } 361 return (error); 362 } 363 } 364 return (ENOSPC); 365 } 366 return (ENOENT); 367} 368 369static int 370pptintr(void *arg) 371{ 372 int vec; 373 struct pptdev *ppt; 374 struct pptintr_arg *pptarg; 375 376 pptarg = arg; 377 ppt = pptarg->pptdev; 378 vec = pptarg->vec; 379 380 if (ppt->vm != NULL) 381 (void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec); 382 else { 383 /* 384 * XXX 385 * This is not expected to happen - panic? 386 */ 387 } 388 389 /* 390 * For legacy interrupts give other filters a chance in case 391 * the interrupt was not generated by the passthrough device. 392 */ 393 if (ppt->msi.startrid == 0) 394 return (FILTER_STRAY); 395 else 396 return (FILTER_HANDLED); 397} 398 399/* 400 * XXX 401 * When we try to free the MSI resource the kernel will bind the thread to 402 * the host cpu was originally handling the MSI. The function freeing the 403 * MSI vector (apic_free_vector()) will panic the kernel if the thread 404 * is already bound to a cpu. 405 * 406 * So, we temporarily unbind the vcpu thread before freeing the MSI resource. 407 */ 408static void 409PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt) 410{ 411 int pincpu = -1; 412 413 vm_get_pinning(vm, vcpu, &pincpu); 414 415 if (pincpu >= 0) 416 vm_set_pinning(vm, vcpu, -1); 417 418 ppt_teardown_msi(ppt); 419 420 if (pincpu >= 0) 421 vm_set_pinning(vm, vcpu, pincpu); 422} 423 424int 425ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, 426 int destcpu, int vector, int numvec) 427{ 428 int i, rid, flags; 429 int msi_count, startrid, error, tmp; 430 struct pptdev *ppt; 431 432 if ((destcpu >= VM_MAXCPU || destcpu < 0) || 433 (vector < 0 || vector > 255) || 434 (numvec < 0 || numvec > MAX_MSIMSGS)) 435 return (EINVAL); 436 437 ppt = ppt_find(bus, slot, func); 438 if (ppt == NULL) 439 return (ENOENT); 440 if (ppt->vm != vm) /* Make sure we own this device */ 441 return (EBUSY); 442 443 /* Free any allocated resources */ 444 PPT_TEARDOWN_MSI(vm, vcpu, ppt); 445 446 if (numvec == 0) /* nothing more to do */ 447 return (0); 448 449 flags = RF_ACTIVE; 450 msi_count = pci_msi_count(ppt->dev); 451 if (msi_count == 0) { 452 startrid = 0; /* legacy interrupt */ 453 msi_count = 1; 454 flags |= RF_SHAREABLE; 455 } else 456 startrid = 1; /* MSI */ 457 458 /* 459 * The device must be capable of supporting the number of vectors 460 * the guest wants to allocate. 461 */ 462 if (numvec > msi_count) 463 return (EINVAL); 464 465 /* 466 * Make sure that we can allocate all the MSI vectors that are needed 467 * by the guest. 468 */ 469 if (startrid == 1) { 470 tmp = numvec; 471 error = pci_alloc_msi(ppt->dev, &tmp); 472 if (error) 473 return (error); 474 else if (tmp != numvec) { 475 pci_release_msi(ppt->dev); 476 return (ENOSPC); 477 } else { 478 /* success */ 479 } 480 } 481 482 ppt->msi.vector = vector; 483 ppt->msi.vcpu = destcpu; 484 ppt->msi.startrid = startrid; 485 486 /* 487 * Allocate the irq resource and attach it to the interrupt handler. 488 */ 489 for (i = 0; i < numvec; i++) { 490 ppt->msi.num_msgs = i + 1; 491 ppt->msi.cookie[i] = NULL; 492 493 rid = startrid + i; 494 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 495 &rid, flags); 496 if (ppt->msi.res[i] == NULL) 497 break; 498 499 ppt->msi.arg[i].pptdev = ppt; 500 ppt->msi.arg[i].vec = vector + i; 501 502 error = bus_setup_intr(ppt->dev, ppt->msi.res[i], 503 INTR_TYPE_NET | INTR_MPSAFE, 504 pptintr, NULL, &ppt->msi.arg[i], 505 &ppt->msi.cookie[i]); 506 if (error != 0) 507 break; 508 } 509 510 if (i < numvec) { 511 PPT_TEARDOWN_MSI(vm, vcpu, ppt); 512 return (ENXIO); 513 } 514 515 return (0); 516} 517 518int 519ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, 520 int idx, uint32_t msg, uint32_t vector_control, uint64_t addr) 521{ 522 struct pptdev *ppt; 523 struct pci_devinfo *dinfo; 524 int numvec, vector_count, rid, error; 525 size_t res_size, cookie_size, arg_size; 526 527 ppt = ppt_find(bus, slot, func); 528 if (ppt == NULL) 529 return (ENOENT); 530 if (ppt->vm != vm) /* Make sure we own this device */ 531 return (EBUSY); 532 533 dinfo = device_get_ivars(ppt->dev); 534 if (!dinfo) 535 return (ENXIO); 536 537 /* 538 * First-time configuration: 539 * Allocate the MSI-X table 540 * Allocate the IRQ resources 541 * Set up some variables in ppt->msix 542 */ 543 if (!ppt->msix.msix_table_res) { 544 ppt->msix.res = NULL; 545 ppt->msix.cookie = NULL; 546 ppt->msix.arg = NULL; 547 548 rid = dinfo->cfg.msix.msix_table_bar; 549 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev, SYS_RES_MEMORY, 550 &rid, RF_ACTIVE); 551 if (ppt->msix.msix_table_res == NULL) 552 return (ENOSPC); 553 554 ppt->msix.msix_table_rid = rid; 555 556 vector_count = numvec = pci_msix_count(ppt->dev); 557 558 error = pci_alloc_msix(ppt->dev, &numvec); 559 if (error) 560 return (error); 561 else if (vector_count != numvec) { 562 pci_release_msi(ppt->dev); 563 return (ENOSPC); 564 } 565 566 ppt->msix.num_msgs = numvec; 567 568 ppt->msix.startrid = 1; 569 570 res_size = numvec * sizeof(ppt->msix.res[0]); 571 cookie_size = numvec * sizeof(ppt->msix.cookie[0]); 572 arg_size = numvec * sizeof(ppt->msix.arg[0]); 573 574 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK); 575 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX, M_WAITOK); 576 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK); 577 if (ppt->msix.res == NULL || ppt->msix.cookie == NULL || 578 ppt->msix.arg == NULL) { 579 ppt_teardown_msix(ppt); 580 return (ENOSPC); 581 } 582 bzero(ppt->msix.res, res_size); 583 bzero(ppt->msix.cookie, cookie_size); 584 bzero(ppt->msix.arg, arg_size); 585 } 586 587 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 588 /* Tear down the IRQ if it's already set up */ 589 ppt_teardown_msix_intr(ppt, idx); 590 591 /* Allocate the IRQ resource */ 592 ppt->msix.cookie[idx] = NULL; 593 rid = ppt->msix.startrid + idx; 594 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 595 &rid, RF_ACTIVE); 596 if (ppt->msix.res[idx] == NULL) 597 return (ENXIO); 598 599 ppt->msix.arg[idx].pptdev = ppt; 600 ppt->msix.arg[idx].vec = msg; 601 ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF; 602 603 /* Setup the MSI-X interrupt */ 604 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx], 605 INTR_TYPE_NET | INTR_MPSAFE, 606 pptintr, NULL, &ppt->msix.arg[idx], 607 &ppt->msix.cookie[idx]); 608 609 if (error != 0) { 610 bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]); 611 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]); 612 ppt->msix.cookie[idx] = NULL; 613 ppt->msix.res[idx] = NULL; 614 return (ENXIO); 615 } 616 } else { 617 /* Masked, tear it down if it's already been set up */ 618 ppt_teardown_msix_intr(ppt, idx); 619 } 620 621 return (0); 622} 623 624