pci_iov.c revision 279447
1/*-
2 * Copyright (c) 2013-2015 Sandvine Inc.  All rights reserved.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/pci/pci_iov.c 279447 2015-03-01 00:40:09Z rstone $");
29
30#include "opt_bus.h"
31
32#include <sys/param.h>
33#include <sys/conf.h>
34#include <sys/kernel.h>
35#include <sys/systm.h>
36#include <sys/bus.h>
37#include <sys/fcntl.h>
38#include <sys/ioccom.h>
39#include <sys/iov.h>
40#include <sys/linker.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/pciio.h>
44#include <sys/queue.h>
45#include <sys/rman.h>
46#include <sys/sysctl.h>
47
48#include <machine/bus.h>
49
50#include <dev/pci/pcireg.h>
51#include <dev/pci/pcivar.h>
52#include <dev/pci/pci_private.h>
53#include <dev/pci/pci_iov_private.h>
54
55#include "pci_if.h"
56#include "pcib_if.h"
57
58static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations");
59
60static d_ioctl_t pci_iov_ioctl;
61
62static struct cdevsw iov_cdevsw = {
63	.d_version = D_VERSION,
64	.d_name = "iov",
65	.d_ioctl = pci_iov_ioctl
66};
67
68#define IOV_READ(d, r, w) \
69	pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
70
71#define IOV_WRITE(d, r, v, w) \
72	pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w)
73
74int
75pci_iov_attach_method(device_t bus, device_t dev)
76{
77	device_t pcib;
78	struct pci_devinfo *dinfo;
79	struct pcicfg_iov *iov;
80	uint32_t version;
81	int error;
82	int iov_pos;
83
84	dinfo = device_get_ivars(dev);
85	pcib = device_get_parent(bus);
86
87	error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos);
88
89	if (error != 0)
90		return (error);
91
92	version = pci_read_config(dev, iov_pos, 4);
93	if (PCI_EXTCAP_VER(version) != 1) {
94		if (bootverbose)
95			device_printf(dev,
96			    "Unsupported version of SR-IOV (%d) detected\n",
97			    PCI_EXTCAP_VER(version));
98
99		return (ENXIO);
100	}
101
102	iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO);
103
104	mtx_lock(&Giant);
105	if (dinfo->cfg.iov != NULL) {
106		error = EBUSY;
107		goto cleanup;
108	}
109
110	iov->iov_pos = iov_pos;
111
112	iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev),
113	    UID_ROOT, GID_WHEEL, 0600, "iov/%s", device_get_nameunit(dev));
114
115	if (iov->iov_cdev == NULL) {
116		error = ENOMEM;
117		goto cleanup;
118	}
119
120	dinfo->cfg.iov = iov;
121	iov->iov_cdev->si_drv1 = dinfo;
122	mtx_unlock(&Giant);
123
124	return (0);
125
126cleanup:
127	free(iov, M_SRIOV);
128	mtx_unlock(&Giant);
129	return (error);
130}
131
132int
133pci_iov_detach_method(device_t bus, device_t dev)
134{
135	struct pci_devinfo *dinfo;
136	struct pcicfg_iov *iov;
137
138	mtx_lock(&Giant);
139	dinfo = device_get_ivars(dev);
140	iov = dinfo->cfg.iov;
141
142	if (iov == NULL) {
143		mtx_unlock(&Giant);
144		return (0);
145	}
146
147	if (iov->iov_num_vfs != 0) {
148		mtx_unlock(&Giant);
149		return (EBUSY);
150	}
151
152	dinfo->cfg.iov = NULL;
153
154	if (iov->iov_cdev) {
155		destroy_dev(iov->iov_cdev);
156		iov->iov_cdev = NULL;
157	}
158
159	free(iov, M_SRIOV);
160	mtx_unlock(&Giant);
161
162	return (0);
163}
164
165/*
166 * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
167 * capability.  This bit is only writeable on the lowest-numbered PF but
168 * affects all PFs on the device.
169 */
170static int
171pci_iov_set_ari(device_t bus)
172{
173	device_t lowest;
174	device_t *devlist;
175	int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func;
176	uint16_t iov_ctl;
177
178	/* If ARI is disabled on the downstream port there is nothing to do. */
179	if (!PCIB_ARI_ENABLED(device_get_parent(bus)))
180		return (0);
181
182	error = device_get_children(bus, &devlist, &devcount);
183
184	if (error != 0)
185		return (error);
186
187	lowest = NULL;
188	for (i = 0; i < devcount; i++) {
189		if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) {
190			dev_func = pci_get_function(devlist[i]);
191			if (lowest == NULL || dev_func < lowest_func) {
192				lowest = devlist[i];
193				lowest_func = dev_func;
194				lowest_pos = iov_pos;
195			}
196		}
197	}
198
199	/*
200	 * If we called this function some device must have the SR-IOV
201	 * capability.
202	 */
203	KASSERT(lowest != NULL,
204	    ("Could not find child of %s with SR-IOV capability",
205	    device_get_nameunit(bus)));
206
207	iov_ctl = pci_read_config(lowest, iov_pos + PCIR_SRIOV_CTL, 2);
208	iov_ctl |= PCIM_SRIOV_ARI_EN;
209	pci_write_config(lowest, iov_pos + PCIR_SRIOV_CTL, iov_ctl, 2);
210	free(devlist, M_TEMP);
211	return (0);
212}
213
214static int
215pci_iov_config_page_size(struct pci_devinfo *dinfo)
216{
217	uint32_t page_cap, page_size;
218
219	page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4);
220
221	/*
222	 * If the system page size is less than the smallest SR-IOV page size
223	 * then round up to the smallest SR-IOV page size.
224	 */
225	if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT)
226		page_size = (1 << 0);
227	else
228		page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT));
229
230	/* Check that the device supports the system page size. */
231	if (!(page_size & page_cap))
232		return (ENXIO);
233
234	IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4);
235	return (0);
236}
237
238static void
239pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
240    uint16_t first_rid, uint16_t rid_stride)
241{
242	device_t bus, dev, vf;
243	struct pcicfg_iov *iov;
244	struct pci_devinfo *vfinfo;
245	size_t size;
246	int i, error;
247	uint16_t vid, did, next_rid;
248
249	iov = dinfo->cfg.iov;
250	dev = dinfo->cfg.dev;
251	bus = device_get_parent(dev);
252	size = dinfo->cfg.devinfo_size;
253	next_rid = first_rid;
254	vid = pci_get_vendor(dev);
255	did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
256
257	for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
258
259
260		vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
261		if (vf == NULL)
262			break;
263
264		vfinfo = device_get_ivars(vf);
265
266		vfinfo->cfg.iov = iov;
267		vfinfo->cfg.vf.index = i;
268
269		error = PCI_ADD_VF(dev, i);
270		if (error != 0) {
271			device_printf(dev, "Failed to add VF %d\n", i);
272			pci_delete_child(bus, vf);
273		}
274	}
275
276	bus_generic_attach(bus);
277}
278
279static int
280pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
281{
282	device_t bus, dev;
283	const char *driver;
284	struct pci_devinfo *dinfo;
285	struct pcicfg_iov *iov;
286	int error;
287	uint16_t rid_off, rid_stride;
288	uint16_t first_rid, last_rid;
289	uint16_t iov_ctl;
290	uint16_t total_vfs;
291	int iov_inited;
292
293	mtx_lock(&Giant);
294	dinfo = cdev->si_drv1;
295	iov = dinfo->cfg.iov;
296	dev = dinfo->cfg.dev;
297	bus = device_get_parent(dev);
298	iov_inited = 0;
299
300	if (iov->iov_num_vfs != 0) {
301		mtx_unlock(&Giant);
302		return (EBUSY);
303	}
304
305	total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
306
307	if (arg->num_vfs > total_vfs) {
308		error = EINVAL;
309		goto out;
310	}
311
312	/*
313	 * If we are creating passthrough devices then force the ppt driver to
314	 * attach to prevent a VF driver from claming the VFs.
315	 */
316	if (arg->passthrough)
317		driver = "ppt";
318	else
319		driver = NULL;
320
321	error = pci_iov_config_page_size(dinfo);
322	if (error != 0)
323		goto out;
324
325	error = pci_iov_set_ari(bus);
326	if (error != 0)
327		goto out;
328
329	error = PCI_INIT_IOV(dev, arg->num_vfs);
330
331	if (error != 0)
332		goto out;
333
334	iov_inited = 1;
335	IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, arg->num_vfs, 2);
336
337	rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
338	rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
339
340	first_rid = pci_get_rid(dev) + rid_off;
341	last_rid = first_rid + (arg->num_vfs - 1) * rid_stride;
342
343	/* We don't yet support allocating extra bus numbers for VFs. */
344	if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
345		error = ENOSPC;
346		goto out;
347	}
348
349	iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
350	iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
351	IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
352
353	iov->iov_num_vfs = arg->num_vfs;
354
355	iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
356	iov_ctl |= PCIM_SRIOV_VF_EN;
357	IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
358
359	/* Per specification, we must wait 100ms before accessing VFs. */
360	pause("iov", roundup(hz, 10));
361	pci_iov_enumerate_vfs(dinfo, driver, first_rid, rid_stride);
362	mtx_unlock(&Giant);
363
364	return (0);
365out:
366	if (iov_inited)
367		PCI_UNINIT_IOV(dev);
368	iov->iov_num_vfs = 0;
369	mtx_unlock(&Giant);
370	return (error);
371}
372
373static int
374pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
375    struct thread *td)
376{
377
378	switch (cmd) {
379	case IOV_CONFIG:
380		return (pci_iov_config(dev, (struct pci_iov_arg *)data));
381	default:
382		return (EINVAL);
383	}
384}
385
386