ppt.c revision 241452
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35#include <sys/malloc.h> 36#include <sys/module.h> 37#include <sys/bus.h> 38#include <sys/pciio.h> 39#include <sys/rman.h> 40#include <sys/smp.h> 41 42#include <dev/pci/pcivar.h> 43#include <dev/pci/pcireg.h> 44 45#include <machine/resource.h> 46 47#include <machine/vmm.h> 48#include <machine/vmm_dev.h> 49 50#include "vmm_lapic.h" 51#include "vmm_ktr.h" 52 53#include "iommu.h" 54#include "ppt.h" 55 56#define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0])) 57#define MAX_MMIOSEGS (PCIR_MAX_BAR_0 + 1) 58#define MAX_MSIMSGS 32 59 60MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources"); 61 62struct pptintr_arg { /* pptintr(pptintr_arg) */ 63 struct pptdev *pptdev; 64 int vec; 65 int vcpu; 66}; 67 68static struct pptdev { 69 device_t dev; 70 struct vm *vm; /* owner of this device */ 71 struct vm_memory_segment mmio[MAX_MMIOSEGS]; 72 struct { 73 int num_msgs; /* guest state */ 74 75 int startrid; /* host state */ 76 struct resource *res[MAX_MSIMSGS]; 77 void *cookie[MAX_MSIMSGS]; 78 struct pptintr_arg arg[MAX_MSIMSGS]; 79 } msi; 80 81 struct { 82 int num_msgs; 83 int startrid; 84 int msix_table_rid; 85 struct resource *msix_table_res; 86 struct resource **res; 87 void **cookie; 88 struct pptintr_arg *arg; 89 } msix; 90} pptdevs[32]; 91 92static int num_pptdevs; 93 94static int 95ppt_probe(device_t dev) 96{ 97 int bus, slot, func; 98 struct pci_devinfo *dinfo; 99 100 dinfo = (struct pci_devinfo *)device_get_ivars(dev); 101 102 bus = pci_get_bus(dev); 103 slot = pci_get_slot(dev); 104 func = pci_get_function(dev); 105 106 /* 107 * To qualify as a pci passthrough device a device must: 108 * - be allowed by administrator to be used in this role 109 * - be an endpoint device 110 */ 111 if (vmm_is_pptdev(bus, slot, func) && 112 (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL) 113 return (0); 114 else 115 return (ENXIO); 116} 117 118static int 119ppt_attach(device_t dev) 120{ 121 int n; 122 123 if (num_pptdevs >= MAX_PPTDEVS) { 124 printf("ppt_attach: maximum number of pci passthrough devices " 125 "exceeded\n"); 126 return (ENXIO); 127 } 128 129 n = num_pptdevs++; 130 pptdevs[n].dev = dev; 131 132 if (bootverbose) 133 device_printf(dev, "attached\n"); 134 135 return (0); 136} 137 138static int 139ppt_detach(device_t dev) 140{ 141 /* 142 * XXX check whether there are any pci passthrough devices assigned 143 * to guests before we allow this driver to detach. 144 */ 145 146 return (0); 147} 148 149static device_method_t ppt_methods[] = { 150 /* Device interface */ 151 DEVMETHOD(device_probe, ppt_probe), 152 DEVMETHOD(device_attach, ppt_attach), 153 DEVMETHOD(device_detach, ppt_detach), 154 {0, 0} 155}; 156 157static devclass_t ppt_devclass; 158DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0); 159DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL); 160 161static struct pptdev * 162ppt_find(int bus, int slot, int func) 163{ 164 device_t dev; 165 int i, b, s, f; 166 167 for (i = 0; i < num_pptdevs; i++) { 168 dev = pptdevs[i].dev; 169 b = pci_get_bus(dev); 170 s = pci_get_slot(dev); 171 f = pci_get_function(dev); 172 if (bus == b && slot == s && func == f) 173 return (&pptdevs[i]); 174 } 175 return (NULL); 176} 177 178static void 179ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt) 180{ 181 int i; 182 struct vm_memory_segment *seg; 183 184 for (i = 0; i < MAX_MMIOSEGS; i++) { 185 seg = &ppt->mmio[i]; 186 if (seg->len == 0) 187 continue; 188 (void)vm_unmap_mmio(vm, seg->gpa, seg->len); 189 bzero(seg, sizeof(struct vm_memory_segment)); 190 } 191} 192 193static void 194ppt_teardown_msi(struct pptdev *ppt) 195{ 196 int i, rid; 197 void *cookie; 198 struct resource *res; 199 200 if (ppt->msi.num_msgs == 0) 201 return; 202 203 for (i = 0; i < ppt->msi.num_msgs; i++) { 204 rid = ppt->msi.startrid + i; 205 res = ppt->msi.res[i]; 206 cookie = ppt->msi.cookie[i]; 207 208 if (cookie != NULL) 209 bus_teardown_intr(ppt->dev, res, cookie); 210 211 if (res != NULL) 212 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 213 214 ppt->msi.res[i] = NULL; 215 ppt->msi.cookie[i] = NULL; 216 } 217 218 if (ppt->msi.startrid == 1) 219 pci_release_msi(ppt->dev); 220 221 ppt->msi.num_msgs = 0; 222} 223 224static void 225ppt_teardown_msix_intr(struct pptdev *ppt, int idx) 226{ 227 int rid; 228 struct resource *res; 229 void *cookie; 230 231 rid = ppt->msix.startrid + idx; 232 res = ppt->msix.res[idx]; 233 cookie = ppt->msix.cookie[idx]; 234 235 if (cookie != NULL) 236 bus_teardown_intr(ppt->dev, res, cookie); 237 238 if (res != NULL) 239 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 240 241 ppt->msix.res[idx] = NULL; 242 ppt->msix.cookie[idx] = NULL; 243} 244 245static void 246ppt_teardown_msix(struct pptdev *ppt) 247{ 248 int i, error; 249 250 if (ppt->msix.num_msgs == 0) 251 return; 252 253 for (i = 0; i < ppt->msix.num_msgs; i++) 254 ppt_teardown_msix_intr(ppt, i); 255 256 if (ppt->msix.msix_table_res) { 257 bus_release_resource(ppt->dev, SYS_RES_MEMORY, 258 ppt->msix.msix_table_rid, 259 ppt->msix.msix_table_res); 260 ppt->msix.msix_table_res = NULL; 261 ppt->msix.msix_table_rid = 0; 262 } 263 264 free(ppt->msix.res, M_PPTMSIX); 265 free(ppt->msix.cookie, M_PPTMSIX); 266 free(ppt->msix.arg, M_PPTMSIX); 267 268 error = pci_release_msi(ppt->dev); 269 if (error) 270 printf("ppt_teardown_msix: Failed to release MSI-X resources (error %i)\n", error); 271 272 ppt->msix.num_msgs = 0; 273} 274 275int 276ppt_assign_device(struct vm *vm, int bus, int slot, int func) 277{ 278 struct pptdev *ppt; 279 280 ppt = ppt_find(bus, slot, func); 281 if (ppt != NULL) { 282 /* 283 * If this device is owned by a different VM then we 284 * cannot change its owner. 285 */ 286 if (ppt->vm != NULL && ppt->vm != vm) 287 return (EBUSY); 288 289 ppt->vm = vm; 290 iommu_add_device(vm_iommu_domain(vm), bus, slot, func); 291 return (0); 292 } 293 return (ENOENT); 294} 295 296int 297ppt_unassign_device(struct vm *vm, int bus, int slot, int func) 298{ 299 struct pptdev *ppt; 300 301 ppt = ppt_find(bus, slot, func); 302 if (ppt != NULL) { 303 /* 304 * If this device is not owned by this 'vm' then bail out. 305 */ 306 if (ppt->vm != vm) 307 return (EBUSY); 308 ppt_unmap_mmio(vm, ppt); 309 ppt_teardown_msi(ppt); 310 ppt_teardown_msix(ppt); 311 iommu_remove_device(vm_iommu_domain(vm), bus, slot, func); 312 ppt->vm = NULL; 313 return (0); 314 } 315 return (ENOENT); 316} 317 318int 319ppt_unassign_all(struct vm *vm) 320{ 321 int i, bus, slot, func; 322 device_t dev; 323 324 for (i = 0; i < num_pptdevs; i++) { 325 if (pptdevs[i].vm == vm) { 326 dev = pptdevs[i].dev; 327 bus = pci_get_bus(dev); 328 slot = pci_get_slot(dev); 329 func = pci_get_function(dev); 330 ppt_unassign_device(vm, bus, slot, func); 331 } 332 } 333 334 return (0); 335} 336 337int 338ppt_map_mmio(struct vm *vm, int bus, int slot, int func, 339 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 340{ 341 int i, error; 342 struct vm_memory_segment *seg; 343 struct pptdev *ppt; 344 345 ppt = ppt_find(bus, slot, func); 346 if (ppt != NULL) { 347 if (ppt->vm != vm) 348 return (EBUSY); 349 350 for (i = 0; i < MAX_MMIOSEGS; i++) { 351 seg = &ppt->mmio[i]; 352 if (seg->len == 0) { 353 error = vm_map_mmio(vm, gpa, len, hpa); 354 if (error == 0) { 355 seg->gpa = gpa; 356 seg->len = len; 357 } 358 return (error); 359 } 360 } 361 return (ENOSPC); 362 } 363 return (ENOENT); 364} 365 366static int 367pptintr(void *arg) 368{ 369 int vec; 370 struct pptdev *ppt; 371 struct pptintr_arg *pptarg; 372 373 pptarg = arg; 374 ppt = pptarg->pptdev; 375 vec = pptarg->vec; 376 377 if (ppt->vm != NULL) 378 (void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec); 379 else { 380 /* 381 * XXX 382 * This is not expected to happen - panic? 383 */ 384 } 385 386 /* 387 * For legacy interrupts give other filters a chance in case 388 * the interrupt was not generated by the passthrough device. 389 */ 390 if (ppt->msi.startrid == 0) 391 return (FILTER_STRAY); 392 else 393 return (FILTER_HANDLED); 394} 395 396/* 397 * XXX 398 * When we try to free the MSI resource the kernel will bind the thread to 399 * the host cpu was originally handling the MSI. The function freeing the 400 * MSI vector (apic_free_vector()) will panic the kernel if the thread 401 * is already bound to a cpu. 402 * 403 * So, we temporarily unbind the vcpu thread before freeing the MSI resource. 404 */ 405static void 406PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt) 407{ 408 int pincpu = -1; 409 410 vm_get_pinning(vm, vcpu, &pincpu); 411 412 if (pincpu >= 0) 413 vm_set_pinning(vm, vcpu, -1); 414 415 ppt_teardown_msi(ppt); 416 417 if (pincpu >= 0) 418 vm_set_pinning(vm, vcpu, pincpu); 419} 420 421int 422ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, 423 int destcpu, int vector, int numvec) 424{ 425 int i, rid, flags; 426 int msi_count, startrid, error, tmp; 427 struct pptdev *ppt; 428 429 if ((destcpu >= VM_MAXCPU || destcpu < 0) || 430 (vector < 0 || vector > 255) || 431 (numvec < 0 || numvec > MAX_MSIMSGS)) 432 return (EINVAL); 433 434 ppt = ppt_find(bus, slot, func); 435 if (ppt == NULL) 436 return (ENOENT); 437 if (ppt->vm != vm) /* Make sure we own this device */ 438 return (EBUSY); 439 440 /* Free any allocated resources */ 441 PPT_TEARDOWN_MSI(vm, vcpu, ppt); 442 443 if (numvec == 0) /* nothing more to do */ 444 return (0); 445 446 flags = RF_ACTIVE; 447 msi_count = pci_msi_count(ppt->dev); 448 if (msi_count == 0) { 449 startrid = 0; /* legacy interrupt */ 450 msi_count = 1; 451 flags |= RF_SHAREABLE; 452 } else 453 startrid = 1; /* MSI */ 454 455 /* 456 * The device must be capable of supporting the number of vectors 457 * the guest wants to allocate. 458 */ 459 if (numvec > msi_count) 460 return (EINVAL); 461 462 /* 463 * Make sure that we can allocate all the MSI vectors that are needed 464 * by the guest. 465 */ 466 if (startrid == 1) { 467 tmp = numvec; 468 error = pci_alloc_msi(ppt->dev, &tmp); 469 if (error) 470 return (error); 471 else if (tmp != numvec) { 472 pci_release_msi(ppt->dev); 473 return (ENOSPC); 474 } else { 475 /* success */ 476 } 477 } 478 479 ppt->msi.startrid = startrid; 480 481 /* 482 * Allocate the irq resource and attach it to the interrupt handler. 483 */ 484 for (i = 0; i < numvec; i++) { 485 ppt->msi.num_msgs = i + 1; 486 ppt->msi.cookie[i] = NULL; 487 488 rid = startrid + i; 489 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 490 &rid, flags); 491 if (ppt->msi.res[i] == NULL) 492 break; 493 494 ppt->msi.arg[i].pptdev = ppt; 495 ppt->msi.arg[i].vec = vector + i; 496 ppt->msi.arg[i].vcpu = destcpu; 497 498 error = bus_setup_intr(ppt->dev, ppt->msi.res[i], 499 INTR_TYPE_NET | INTR_MPSAFE, 500 pptintr, NULL, &ppt->msi.arg[i], 501 &ppt->msi.cookie[i]); 502 if (error != 0) 503 break; 504 } 505 506 if (i < numvec) { 507 PPT_TEARDOWN_MSI(vm, vcpu, ppt); 508 return (ENXIO); 509 } 510 511 return (0); 512} 513 514int 515ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, 516 int idx, uint32_t msg, uint32_t vector_control, uint64_t addr) 517{ 518 struct pptdev *ppt; 519 struct pci_devinfo *dinfo; 520 int numvec, vector_count, rid, error; 521 size_t res_size, cookie_size, arg_size; 522 523 ppt = ppt_find(bus, slot, func); 524 if (ppt == NULL) 525 return (ENOENT); 526 if (ppt->vm != vm) /* Make sure we own this device */ 527 return (EBUSY); 528 529 dinfo = device_get_ivars(ppt->dev); 530 if (!dinfo) 531 return (ENXIO); 532 533 /* 534 * First-time configuration: 535 * Allocate the MSI-X table 536 * Allocate the IRQ resources 537 * Set up some variables in ppt->msix 538 */ 539 if (!ppt->msix.msix_table_res) { 540 ppt->msix.res = NULL; 541 ppt->msix.cookie = NULL; 542 ppt->msix.arg = NULL; 543 544 rid = dinfo->cfg.msix.msix_table_bar; 545 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev, SYS_RES_MEMORY, 546 &rid, RF_ACTIVE); 547 if (ppt->msix.msix_table_res == NULL) 548 return (ENOSPC); 549 550 ppt->msix.msix_table_rid = rid; 551 552 vector_count = numvec = pci_msix_count(ppt->dev); 553 554 error = pci_alloc_msix(ppt->dev, &numvec); 555 if (error) 556 return (error); 557 else if (vector_count != numvec) { 558 pci_release_msi(ppt->dev); 559 return (ENOSPC); 560 } 561 562 ppt->msix.num_msgs = numvec; 563 564 ppt->msix.startrid = 1; 565 566 res_size = numvec * sizeof(ppt->msix.res[0]); 567 cookie_size = numvec * sizeof(ppt->msix.cookie[0]); 568 arg_size = numvec * sizeof(ppt->msix.arg[0]); 569 570 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK); 571 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX, M_WAITOK); 572 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK); 573 if (ppt->msix.res == NULL || ppt->msix.cookie == NULL || 574 ppt->msix.arg == NULL) { 575 ppt_teardown_msix(ppt); 576 return (ENOSPC); 577 } 578 bzero(ppt->msix.res, res_size); 579 bzero(ppt->msix.cookie, cookie_size); 580 bzero(ppt->msix.arg, arg_size); 581 } 582 583 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 584 /* Tear down the IRQ if it's already set up */ 585 ppt_teardown_msix_intr(ppt, idx); 586 587 /* Allocate the IRQ resource */ 588 ppt->msix.cookie[idx] = NULL; 589 rid = ppt->msix.startrid + idx; 590 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 591 &rid, RF_ACTIVE); 592 if (ppt->msix.res[idx] == NULL) 593 return (ENXIO); 594 595 ppt->msix.arg[idx].pptdev = ppt; 596 ppt->msix.arg[idx].vec = msg; 597 ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF; 598 599 /* Setup the MSI-X interrupt */ 600 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx], 601 INTR_TYPE_NET | INTR_MPSAFE, 602 pptintr, NULL, &ppt->msix.arg[idx], 603 &ppt->msix.cookie[idx]); 604 605 if (error != 0) { 606 bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]); 607 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]); 608 ppt->msix.cookie[idx] = NULL; 609 ppt->msix.res[idx] = NULL; 610 return (ENXIO); 611 } 612 } else { 613 /* Masked, tear it down if it's already been set up */ 614 ppt_teardown_msix_intr(ppt, idx); 615 } 616 617 return (0); 618} 619 620