pci_iov.c revision 279449
1/*- 2 * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/dev/pci/pci_iov.c 279449 2015-03-01 00:40:26Z rstone $"); 29 30#include "opt_bus.h" 31 32#include <sys/param.h> 33#include <sys/conf.h> 34#include <sys/kernel.h> 35#include <sys/systm.h> 36#include <sys/bus.h> 37#include <sys/fcntl.h> 38#include <sys/ioccom.h> 39#include <sys/iov.h> 40#include <sys/linker.h> 41#include <sys/malloc.h> 42#include <sys/module.h> 43#include <sys/pciio.h> 44#include <sys/queue.h> 45#include <sys/rman.h> 46#include <sys/sysctl.h> 47 48#include <machine/bus.h> 49 50#include <dev/pci/pcireg.h> 51#include <dev/pci/pcivar.h> 52#include <dev/pci/pci_private.h> 53#include <dev/pci/pci_iov_private.h> 54 55#include "pci_if.h" 56#include "pcib_if.h" 57 58static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations"); 59 60static d_ioctl_t pci_iov_ioctl; 61 62static struct cdevsw iov_cdevsw = { 63 .d_version = D_VERSION, 64 .d_name = "iov", 65 .d_ioctl = pci_iov_ioctl 66}; 67 68#define IOV_READ(d, r, w) \ 69 pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w) 70 71#define IOV_WRITE(d, r, v, w) \ 72 pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w) 73 74int 75pci_iov_attach_method(device_t bus, device_t dev) 76{ 77 device_t pcib; 78 struct pci_devinfo *dinfo; 79 struct pcicfg_iov *iov; 80 uint32_t version; 81 int error; 82 int iov_pos; 83 84 dinfo = device_get_ivars(dev); 85 pcib = device_get_parent(bus); 86 87 error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos); 88 89 if (error != 0) 90 return (error); 91 92 version = pci_read_config(dev, iov_pos, 4); 93 if (PCI_EXTCAP_VER(version) != 1) { 94 if (bootverbose) 95 device_printf(dev, 96 "Unsupported version of SR-IOV (%d) detected\n", 97 PCI_EXTCAP_VER(version)); 98 99 return (ENXIO); 100 } 101 102 iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO); 103 104 mtx_lock(&Giant); 105 if (dinfo->cfg.iov != NULL) { 106 error = EBUSY; 107 goto cleanup; 108 } 109 iov->iov_pos = iov_pos; 110 111 iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev), 112 UID_ROOT, GID_WHEEL, 0600, "iov/%s", device_get_nameunit(dev)); 113 114 if (iov->iov_cdev == NULL) { 115 error = ENOMEM; 116 goto cleanup; 117 } 118 119 dinfo->cfg.iov = iov; 120 iov->iov_cdev->si_drv1 = dinfo; 121 mtx_unlock(&Giant); 122 123 return (0); 124 125cleanup: 126 free(iov, M_SRIOV); 127 mtx_unlock(&Giant); 128 return (error); 129} 130 131int 132pci_iov_detach_method(device_t bus, device_t dev) 133{ 134 struct pci_devinfo *dinfo; 135 struct pcicfg_iov *iov; 136 137 mtx_lock(&Giant); 138 dinfo = device_get_ivars(dev); 139 iov = dinfo->cfg.iov; 140 141 if (iov == NULL) { 142 mtx_unlock(&Giant); 143 return (0); 144 } 145 146 if (iov->iov_num_vfs != 0) { 147 mtx_unlock(&Giant); 148 return (EBUSY); 149 } 150 151 dinfo->cfg.iov = NULL; 152 153 if (iov->iov_cdev) { 154 destroy_dev(iov->iov_cdev); 155 iov->iov_cdev = NULL; 156 } 157 158 free(iov, M_SRIOV); 159 mtx_unlock(&Giant); 160 161 return (0); 162} 163 164static int 165pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift) 166{ 167 struct resource *res; 168 struct pcicfg_iov *iov; 169 device_t dev, bus; 170 u_long start, end; 171 pci_addr_t bar_size; 172 int rid; 173 174 iov = dinfo->cfg.iov; 175 dev = dinfo->cfg.dev; 176 bus = device_get_parent(dev); 177 rid = iov->iov_pos + PCIR_SRIOV_BAR(bar); 178 bar_size = 1 << bar_shift; 179 180 res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0ul, 181 ~0ul, 1, iov->iov_num_vfs, RF_ACTIVE); 182 183 if (res == NULL) 184 return (ENXIO); 185 186 iov->iov_bar[bar].res = res; 187 iov->iov_bar[bar].bar_size = bar_size; 188 iov->iov_bar[bar].bar_shift = bar_shift; 189 190 start = rman_get_start(res); 191 end = rman_get_end(res); 192 return (rman_manage_region(&iov->rman, start, end)); 193} 194 195static void 196pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo) 197{ 198 struct pci_iov_bar *bar; 199 uint64_t bar_start; 200 int i; 201 202 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 203 bar = &iov->iov_bar[i]; 204 if (bar->res != NULL) { 205 bar_start = rman_get_start(bar->res) + 206 dinfo->cfg.vf.index * bar->bar_size; 207 208 pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start, 209 bar->bar_shift); 210 } 211 } 212} 213 214/* 215 * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV 216 * capability. This bit is only writeable on the lowest-numbered PF but 217 * affects all PFs on the device. 218 */ 219static int 220pci_iov_set_ari(device_t bus) 221{ 222 device_t lowest; 223 device_t *devlist; 224 int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func; 225 uint16_t iov_ctl; 226 227 /* If ARI is disabled on the downstream port there is nothing to do. */ 228 if (!PCIB_ARI_ENABLED(device_get_parent(bus))) 229 return (0); 230 231 error = device_get_children(bus, &devlist, &devcount); 232 233 if (error != 0) 234 return (error); 235 236 lowest = NULL; 237 for (i = 0; i < devcount; i++) { 238 if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) { 239 dev_func = pci_get_function(devlist[i]); 240 if (lowest == NULL || dev_func < lowest_func) { 241 lowest = devlist[i]; 242 lowest_func = dev_func; 243 lowest_pos = iov_pos; 244 } 245 } 246 } 247 248 /* 249 * If we called this function some device must have the SR-IOV 250 * capability. 251 */ 252 KASSERT(lowest != NULL, 253 ("Could not find child of %s with SR-IOV capability", 254 device_get_nameunit(bus))); 255 256 iov_ctl = pci_read_config(lowest, iov_pos + PCIR_SRIOV_CTL, 2); 257 iov_ctl |= PCIM_SRIOV_ARI_EN; 258 pci_write_config(lowest, iov_pos + PCIR_SRIOV_CTL, iov_ctl, 2); 259 free(devlist, M_TEMP); 260 return (0); 261} 262 263static int 264pci_iov_config_page_size(struct pci_devinfo *dinfo) 265{ 266 uint32_t page_cap, page_size; 267 268 page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4); 269 270 /* 271 * If the system page size is less than the smallest SR-IOV page size 272 * then round up to the smallest SR-IOV page size. 273 */ 274 if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT) 275 page_size = (1 << 0); 276 else 277 page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT)); 278 279 /* Check that the device supports the system page size. */ 280 if (!(page_size & page_cap)) 281 return (ENXIO); 282 283 IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4); 284 return (0); 285} 286 287static int 288pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov) 289{ 290 int error; 291 292 iov->rman.rm_start = 0; 293 iov->rman.rm_end = ~0ul; 294 iov->rman.rm_type = RMAN_ARRAY; 295 snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory", 296 device_get_nameunit(pf)); 297 iov->rman.rm_descr = iov->rman_name; 298 299 error = rman_init(&iov->rman); 300 if (error != 0) 301 return (error); 302 303 iov->iov_flags |= IOV_RMAN_INITED; 304 return (0); 305} 306 307static int 308pci_iov_setup_bars(struct pci_devinfo *dinfo) 309{ 310 device_t dev; 311 struct pcicfg_iov *iov; 312 pci_addr_t bar_value, testval; 313 int i, last_64, error; 314 315 iov = dinfo->cfg.iov; 316 dev = dinfo->cfg.dev; 317 last_64 = 0; 318 319 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 320 /* 321 * If a PCI BAR is a 64-bit wide BAR, then it spans two 322 * consecutive registers. Therefore if the last BAR that 323 * we looked at was a 64-bit BAR, we need to skip this 324 * register as it's the second half of the last BAR. 325 */ 326 if (!last_64) { 327 pci_read_bar(dev, 328 iov->iov_pos + PCIR_SRIOV_BAR(i), 329 &bar_value, &testval, &last_64); 330 331 if (testval != 0) { 332 error = pci_iov_alloc_bar(dinfo, i, 333 pci_mapsize(testval)); 334 if (error != 0) 335 return (error); 336 } 337 } else 338 last_64 = 0; 339 } 340 341 return (0); 342} 343 344static void 345pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver, 346 uint16_t first_rid, uint16_t rid_stride) 347{ 348 device_t bus, dev, vf; 349 struct pcicfg_iov *iov; 350 struct pci_devinfo *vfinfo; 351 size_t size; 352 int i, error; 353 uint16_t vid, did, next_rid; 354 355 iov = dinfo->cfg.iov; 356 dev = dinfo->cfg.dev; 357 bus = device_get_parent(dev); 358 size = dinfo->cfg.devinfo_size; 359 next_rid = first_rid; 360 vid = pci_get_vendor(dev); 361 did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2); 362 363 for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) { 364 365 366 vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did); 367 if (vf == NULL) 368 break; 369 370 vfinfo = device_get_ivars(vf); 371 372 vfinfo->cfg.iov = iov; 373 vfinfo->cfg.vf.index = i; 374 375 pci_iov_add_bars(iov, vfinfo); 376 377 error = PCI_ADD_VF(dev, i); 378 if (error != 0) { 379 device_printf(dev, "Failed to add VF %d\n", i); 380 pci_delete_child(bus, vf); 381 } 382 } 383 384 bus_generic_attach(bus); 385} 386 387static int 388pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) 389{ 390 device_t bus, dev; 391 const char *driver; 392 struct pci_devinfo *dinfo; 393 struct pcicfg_iov *iov; 394 int i, error; 395 uint16_t rid_off, rid_stride; 396 uint16_t first_rid, last_rid; 397 uint16_t iov_ctl; 398 uint16_t total_vfs; 399 int iov_inited; 400 401 mtx_lock(&Giant); 402 dinfo = cdev->si_drv1; 403 iov = dinfo->cfg.iov; 404 dev = dinfo->cfg.dev; 405 bus = device_get_parent(dev); 406 iov_inited = 0; 407 408 if (iov->iov_num_vfs != 0) { 409 mtx_unlock(&Giant); 410 return (EBUSY); 411 } 412 413 total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2); 414 415 if (arg->num_vfs > total_vfs) { 416 error = EINVAL; 417 goto out; 418 } 419 420 /* 421 * If we are creating passthrough devices then force the ppt driver to 422 * attach to prevent a VF driver from claming the VFs. 423 */ 424 if (arg->passthrough) 425 driver = "ppt"; 426 else 427 driver = NULL; 428 429 error = pci_iov_config_page_size(dinfo); 430 if (error != 0) 431 goto out; 432 433 error = pci_iov_set_ari(bus); 434 if (error != 0) 435 goto out; 436 437 error = PCI_INIT_IOV(dev, arg->num_vfs); 438 439 if (error != 0) 440 goto out; 441 442 iov_inited = 1; 443 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, arg->num_vfs, 2); 444 445 rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2); 446 rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2); 447 448 first_rid = pci_get_rid(dev) + rid_off; 449 last_rid = first_rid + (arg->num_vfs - 1) * rid_stride; 450 451 /* We don't yet support allocating extra bus numbers for VFs. */ 452 if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) { 453 error = ENOSPC; 454 goto out; 455 } 456 457 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 458 iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE); 459 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); 460 461 error = pci_iov_init_rman(dev, iov); 462 if (error != 0) 463 goto out; 464 465 iov->iov_num_vfs = arg->num_vfs; 466 467 error = pci_iov_setup_bars(dinfo); 468 if (error != 0) 469 goto out; 470 471 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 472 iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE; 473 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); 474 475 /* Per specification, we must wait 100ms before accessing VFs. */ 476 pause("iov", roundup(hz, 10)); 477 pci_iov_enumerate_vfs(dinfo, driver, first_rid, rid_stride); 478 mtx_unlock(&Giant); 479 480 return (0); 481out: 482 if (iov_inited) 483 PCI_UNINIT_IOV(dev); 484 485 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 486 if (iov->iov_bar[i].res != NULL) { 487 pci_release_resource(bus, dev, SYS_RES_MEMORY, 488 iov->iov_pos + PCIR_SRIOV_BAR(i), 489 iov->iov_bar[i].res); 490 pci_delete_resource(bus, dev, SYS_RES_MEMORY, 491 iov->iov_pos + PCIR_SRIOV_BAR(i)); 492 iov->iov_bar[i].res = NULL; 493 } 494 } 495 496 if (iov->iov_flags & IOV_RMAN_INITED) { 497 rman_fini(&iov->rman); 498 iov->iov_flags &= ~IOV_RMAN_INITED; 499 } 500 iov->iov_num_vfs = 0; 501 mtx_unlock(&Giant); 502 return (error); 503} 504 505static int 506pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 507 struct thread *td) 508{ 509 510 switch (cmd) { 511 case IOV_CONFIG: 512 return (pci_iov_config(dev, (struct pci_iov_arg *)data)); 513 default: 514 return (EINVAL); 515 } 516} 517 518struct resource * 519pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid, u_long start, 520 u_long end, u_long count, u_int flags) 521{ 522 struct pci_devinfo *dinfo; 523 struct pcicfg_iov *iov; 524 struct pci_map *map; 525 struct resource *res; 526 struct resource_list_entry *rle; 527 u_long bar_start, bar_end; 528 pci_addr_t bar_length; 529 int error; 530 531 dinfo = device_get_ivars(child); 532 iov = dinfo->cfg.iov; 533 534 map = pci_find_bar(child, *rid); 535 if (map == NULL) 536 return (NULL); 537 538 bar_length = 1 << map->pm_size; 539 bar_start = map->pm_value; 540 bar_end = bar_start + bar_length - 1; 541 542 /* Make sure that the resource fits the constraints. */ 543 if (bar_start >= end || bar_end <= bar_start || count != 1) 544 return (NULL); 545 546 /* Clamp the resource to the constraints if necessary. */ 547 if (bar_start < start) 548 bar_start = start; 549 if (bar_end > end) 550 bar_end = end; 551 bar_length = bar_end - bar_start + 1; 552 553 res = rman_reserve_resource(&iov->rman, bar_start, bar_end, 554 bar_length, flags, child); 555 if (res == NULL) 556 return (NULL); 557 558 rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid, 559 bar_start, bar_end, 1); 560 if (rle == NULL) { 561 rman_release_resource(res); 562 return (NULL); 563 } 564 565 rman_set_rid(res, *rid); 566 567 if (flags & RF_ACTIVE) { 568 error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res); 569 if (error != 0) { 570 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, 571 *rid); 572 rman_release_resource(res); 573 return (NULL); 574 } 575 } 576 rle->res = res; 577 578 return (res); 579} 580 581int 582pci_vf_release_mem_resource(device_t dev, device_t child, int rid, 583 struct resource *r) 584{ 585 struct pci_devinfo *dinfo; 586 struct resource_list_entry *rle; 587 int error; 588 589 dinfo = device_get_ivars(child); 590 591 if (rman_get_flags(r) & RF_ACTIVE) { 592 error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r); 593 if (error != 0) 594 return (error); 595 } 596 597 rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid); 598 if (rle != NULL) { 599 rle->res = NULL; 600 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, 601 rid); 602 } 603 604 return (rman_release_resource(r)); 605} 606 607