pci_iov.c revision 279447
1/*- 2 * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/dev/pci/pci_iov.c 279447 2015-03-01 00:40:09Z rstone $"); 29 30#include "opt_bus.h" 31 32#include <sys/param.h> 33#include <sys/conf.h> 34#include <sys/kernel.h> 35#include <sys/systm.h> 36#include <sys/bus.h> 37#include <sys/fcntl.h> 38#include <sys/ioccom.h> 39#include <sys/iov.h> 40#include <sys/linker.h> 41#include <sys/malloc.h> 42#include <sys/module.h> 43#include <sys/pciio.h> 44#include <sys/queue.h> 45#include <sys/rman.h> 46#include <sys/sysctl.h> 47 48#include <machine/bus.h> 49 50#include <dev/pci/pcireg.h> 51#include <dev/pci/pcivar.h> 52#include <dev/pci/pci_private.h> 53#include <dev/pci/pci_iov_private.h> 54 55#include "pci_if.h" 56#include "pcib_if.h" 57 58static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations"); 59 60static d_ioctl_t pci_iov_ioctl; 61 62static struct cdevsw iov_cdevsw = { 63 .d_version = D_VERSION, 64 .d_name = "iov", 65 .d_ioctl = pci_iov_ioctl 66}; 67 68#define IOV_READ(d, r, w) \ 69 pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w) 70 71#define IOV_WRITE(d, r, v, w) \ 72 pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w) 73 74int 75pci_iov_attach_method(device_t bus, device_t dev) 76{ 77 device_t pcib; 78 struct pci_devinfo *dinfo; 79 struct pcicfg_iov *iov; 80 uint32_t version; 81 int error; 82 int iov_pos; 83 84 dinfo = device_get_ivars(dev); 85 pcib = device_get_parent(bus); 86 87 error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos); 88 89 if (error != 0) 90 return (error); 91 92 version = pci_read_config(dev, iov_pos, 4); 93 if (PCI_EXTCAP_VER(version) != 1) { 94 if (bootverbose) 95 device_printf(dev, 96 "Unsupported version of SR-IOV (%d) detected\n", 97 PCI_EXTCAP_VER(version)); 98 99 return (ENXIO); 100 } 101 102 iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO); 103 104 mtx_lock(&Giant); 105 if (dinfo->cfg.iov != NULL) { 106 error = EBUSY; 107 goto cleanup; 108 } 109 110 iov->iov_pos = iov_pos; 111 112 iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev), 113 UID_ROOT, GID_WHEEL, 0600, "iov/%s", device_get_nameunit(dev)); 114 115 if (iov->iov_cdev == NULL) { 116 error = ENOMEM; 117 goto cleanup; 118 } 119 120 dinfo->cfg.iov = iov; 121 iov->iov_cdev->si_drv1 = dinfo; 122 mtx_unlock(&Giant); 123 124 return (0); 125 126cleanup: 127 free(iov, M_SRIOV); 128 mtx_unlock(&Giant); 129 return (error); 130} 131 132int 133pci_iov_detach_method(device_t bus, device_t dev) 134{ 135 struct pci_devinfo *dinfo; 136 struct pcicfg_iov *iov; 137 138 mtx_lock(&Giant); 139 dinfo = device_get_ivars(dev); 140 iov = dinfo->cfg.iov; 141 142 if (iov == NULL) { 143 mtx_unlock(&Giant); 144 return (0); 145 } 146 147 if (iov->iov_num_vfs != 0) { 148 mtx_unlock(&Giant); 149 return (EBUSY); 150 } 151 152 dinfo->cfg.iov = NULL; 153 154 if (iov->iov_cdev) { 155 destroy_dev(iov->iov_cdev); 156 iov->iov_cdev = NULL; 157 } 158 159 free(iov, M_SRIOV); 160 mtx_unlock(&Giant); 161 162 return (0); 163} 164 165/* 166 * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV 167 * capability. This bit is only writeable on the lowest-numbered PF but 168 * affects all PFs on the device. 169 */ 170static int 171pci_iov_set_ari(device_t bus) 172{ 173 device_t lowest; 174 device_t *devlist; 175 int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func; 176 uint16_t iov_ctl; 177 178 /* If ARI is disabled on the downstream port there is nothing to do. */ 179 if (!PCIB_ARI_ENABLED(device_get_parent(bus))) 180 return (0); 181 182 error = device_get_children(bus, &devlist, &devcount); 183 184 if (error != 0) 185 return (error); 186 187 lowest = NULL; 188 for (i = 0; i < devcount; i++) { 189 if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) { 190 dev_func = pci_get_function(devlist[i]); 191 if (lowest == NULL || dev_func < lowest_func) { 192 lowest = devlist[i]; 193 lowest_func = dev_func; 194 lowest_pos = iov_pos; 195 } 196 } 197 } 198 199 /* 200 * If we called this function some device must have the SR-IOV 201 * capability. 202 */ 203 KASSERT(lowest != NULL, 204 ("Could not find child of %s with SR-IOV capability", 205 device_get_nameunit(bus))); 206 207 iov_ctl = pci_read_config(lowest, iov_pos + PCIR_SRIOV_CTL, 2); 208 iov_ctl |= PCIM_SRIOV_ARI_EN; 209 pci_write_config(lowest, iov_pos + PCIR_SRIOV_CTL, iov_ctl, 2); 210 free(devlist, M_TEMP); 211 return (0); 212} 213 214static int 215pci_iov_config_page_size(struct pci_devinfo *dinfo) 216{ 217 uint32_t page_cap, page_size; 218 219 page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4); 220 221 /* 222 * If the system page size is less than the smallest SR-IOV page size 223 * then round up to the smallest SR-IOV page size. 224 */ 225 if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT) 226 page_size = (1 << 0); 227 else 228 page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT)); 229 230 /* Check that the device supports the system page size. */ 231 if (!(page_size & page_cap)) 232 return (ENXIO); 233 234 IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4); 235 return (0); 236} 237 238static void 239pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver, 240 uint16_t first_rid, uint16_t rid_stride) 241{ 242 device_t bus, dev, vf; 243 struct pcicfg_iov *iov; 244 struct pci_devinfo *vfinfo; 245 size_t size; 246 int i, error; 247 uint16_t vid, did, next_rid; 248 249 iov = dinfo->cfg.iov; 250 dev = dinfo->cfg.dev; 251 bus = device_get_parent(dev); 252 size = dinfo->cfg.devinfo_size; 253 next_rid = first_rid; 254 vid = pci_get_vendor(dev); 255 did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2); 256 257 for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) { 258 259 260 vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did); 261 if (vf == NULL) 262 break; 263 264 vfinfo = device_get_ivars(vf); 265 266 vfinfo->cfg.iov = iov; 267 vfinfo->cfg.vf.index = i; 268 269 error = PCI_ADD_VF(dev, i); 270 if (error != 0) { 271 device_printf(dev, "Failed to add VF %d\n", i); 272 pci_delete_child(bus, vf); 273 } 274 } 275 276 bus_generic_attach(bus); 277} 278 279static int 280pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) 281{ 282 device_t bus, dev; 283 const char *driver; 284 struct pci_devinfo *dinfo; 285 struct pcicfg_iov *iov; 286 int error; 287 uint16_t rid_off, rid_stride; 288 uint16_t first_rid, last_rid; 289 uint16_t iov_ctl; 290 uint16_t total_vfs; 291 int iov_inited; 292 293 mtx_lock(&Giant); 294 dinfo = cdev->si_drv1; 295 iov = dinfo->cfg.iov; 296 dev = dinfo->cfg.dev; 297 bus = device_get_parent(dev); 298 iov_inited = 0; 299 300 if (iov->iov_num_vfs != 0) { 301 mtx_unlock(&Giant); 302 return (EBUSY); 303 } 304 305 total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2); 306 307 if (arg->num_vfs > total_vfs) { 308 error = EINVAL; 309 goto out; 310 } 311 312 /* 313 * If we are creating passthrough devices then force the ppt driver to 314 * attach to prevent a VF driver from claming the VFs. 315 */ 316 if (arg->passthrough) 317 driver = "ppt"; 318 else 319 driver = NULL; 320 321 error = pci_iov_config_page_size(dinfo); 322 if (error != 0) 323 goto out; 324 325 error = pci_iov_set_ari(bus); 326 if (error != 0) 327 goto out; 328 329 error = PCI_INIT_IOV(dev, arg->num_vfs); 330 331 if (error != 0) 332 goto out; 333 334 iov_inited = 1; 335 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, arg->num_vfs, 2); 336 337 rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2); 338 rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2); 339 340 first_rid = pci_get_rid(dev) + rid_off; 341 last_rid = first_rid + (arg->num_vfs - 1) * rid_stride; 342 343 /* We don't yet support allocating extra bus numbers for VFs. */ 344 if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) { 345 error = ENOSPC; 346 goto out; 347 } 348 349 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 350 iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE); 351 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); 352 353 iov->iov_num_vfs = arg->num_vfs; 354 355 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 356 iov_ctl |= PCIM_SRIOV_VF_EN; 357 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); 358 359 /* Per specification, we must wait 100ms before accessing VFs. */ 360 pause("iov", roundup(hz, 10)); 361 pci_iov_enumerate_vfs(dinfo, driver, first_rid, rid_stride); 362 mtx_unlock(&Giant); 363 364 return (0); 365out: 366 if (iov_inited) 367 PCI_UNINIT_IOV(dev); 368 iov->iov_num_vfs = 0; 369 mtx_unlock(&Giant); 370 return (error); 371} 372 373static int 374pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 375 struct thread *td) 376{ 377 378 switch (cmd) { 379 case IOV_CONFIG: 380 return (pci_iov_config(dev, (struct pci_iov_arg *)data)); 381 default: 382 return (EINVAL); 383 } 384} 385 386