pci_iov.c revision 279451
1/*-
2 * Copyright (c) 2013-2015 Sandvine Inc.  All rights reserved.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/pci/pci_iov.c 279451 2015-03-01 00:40:42Z rstone $");
29
30#include "opt_bus.h"
31
32#include <sys/param.h>
33#include <sys/conf.h>
34#include <sys/kernel.h>
35#include <sys/systm.h>
36#include <sys/bus.h>
37#include <sys/fcntl.h>
38#include <sys/ioccom.h>
39#include <sys/iov.h>
40#include <sys/linker.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/pciio.h>
44#include <sys/queue.h>
45#include <sys/rman.h>
46#include <sys/sysctl.h>
47
48#include <machine/bus.h>
49#include <machine/stdarg.h>
50
51#include <sys/nv.h>
52#include <sys/iov_schema.h>
53
54#include <dev/pci/pcireg.h>
55#include <dev/pci/pcivar.h>
56#include <dev/pci/pci_private.h>
57#include <dev/pci/pci_iov_private.h>
58#include <dev/pci/schema_private.h>
59
60#include "pci_if.h"
61#include "pcib_if.h"
62
63static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations");
64
65static d_ioctl_t pci_iov_ioctl;
66
67static struct cdevsw iov_cdevsw = {
68	.d_version = D_VERSION,
69	.d_name = "iov",
70	.d_ioctl = pci_iov_ioctl
71};
72
73#define IOV_READ(d, r, w) \
74	pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
75
76#define IOV_WRITE(d, r, v, w) \
77	pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w)
78
79static nvlist_t	*pci_iov_build_schema(nvlist_t **pf_schema,
80		    nvlist_t **vf_schema);
81static void	pci_iov_build_pf_schema(nvlist_t *schema,
82		    nvlist_t **driver_schema);
83static void	pci_iov_build_vf_schema(nvlist_t *schema,
84		    nvlist_t **driver_schema);
85static nvlist_t	*pci_iov_get_pf_subsystem_schema(void);
86static nvlist_t	*pci_iov_get_vf_subsystem_schema(void);
87
88int
89pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema,
90    nvlist_t *vf_schema)
91{
92	device_t pcib;
93	struct pci_devinfo *dinfo;
94	struct pcicfg_iov *iov;
95	nvlist_t *schema;
96	uint32_t version;
97	int error;
98	int iov_pos;
99
100	dinfo = device_get_ivars(dev);
101	pcib = device_get_parent(bus);
102	schema = NULL;
103
104	error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos);
105
106	if (error != 0)
107		return (error);
108
109	version = pci_read_config(dev, iov_pos, 4);
110	if (PCI_EXTCAP_VER(version) != 1) {
111		if (bootverbose)
112			device_printf(dev,
113			    "Unsupported version of SR-IOV (%d) detected\n",
114			    PCI_EXTCAP_VER(version));
115
116		return (ENXIO);
117	}
118
119	iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO);
120
121	mtx_lock(&Giant);
122	if (dinfo->cfg.iov != NULL) {
123		error = EBUSY;
124		goto cleanup;
125	}
126	iov->iov_pos = iov_pos;
127
128	schema = pci_iov_build_schema(&pf_schema, &vf_schema);
129	if (schema == NULL) {
130		error = ENOMEM;
131		goto cleanup;
132	}
133	iov->iov_schema = schema;
134
135	iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev),
136	    UID_ROOT, GID_WHEEL, 0600, "iov/%s", device_get_nameunit(dev));
137
138	if (iov->iov_cdev == NULL) {
139		error = ENOMEM;
140		goto cleanup;
141	}
142
143	dinfo->cfg.iov = iov;
144	iov->iov_cdev->si_drv1 = dinfo;
145	mtx_unlock(&Giant);
146
147	return (0);
148
149cleanup:
150	nvlist_destroy(schema);
151	nvlist_destroy(pf_schema);
152	nvlist_destroy(vf_schema);
153	free(iov, M_SRIOV);
154	mtx_unlock(&Giant);
155	return (error);
156}
157
158int
159pci_iov_detach_method(device_t bus, device_t dev)
160{
161	struct pci_devinfo *dinfo;
162	struct pcicfg_iov *iov;
163
164	mtx_lock(&Giant);
165	dinfo = device_get_ivars(dev);
166	iov = dinfo->cfg.iov;
167
168	if (iov == NULL) {
169		mtx_unlock(&Giant);
170		return (0);
171	}
172
173	if (iov->iov_num_vfs != 0 || iov->iov_flags & IOV_BUSY) {
174		mtx_unlock(&Giant);
175		return (EBUSY);
176	}
177
178	dinfo->cfg.iov = NULL;
179
180	if (iov->iov_cdev) {
181		destroy_dev(iov->iov_cdev);
182		iov->iov_cdev = NULL;
183	}
184	nvlist_destroy(iov->iov_schema);
185
186	free(iov, M_SRIOV);
187	mtx_unlock(&Giant);
188
189	return (0);
190}
191
192static nvlist_t *
193pci_iov_build_schema(nvlist_t **pf, nvlist_t **vf)
194{
195	nvlist_t *schema, *pf_driver, *vf_driver;
196
197	/* We always take ownership of the schemas. */
198	pf_driver = *pf;
199	*pf = NULL;
200	vf_driver = *vf;
201	*vf = NULL;
202
203	schema = pci_iov_schema_alloc_node();
204	if (schema == NULL)
205		goto cleanup;
206
207	pci_iov_build_pf_schema(schema, &pf_driver);
208	pci_iov_build_vf_schema(schema, &vf_driver);
209
210	if (nvlist_error(schema) != 0)
211		goto cleanup;
212
213	return (schema);
214
215cleanup:
216	nvlist_destroy(schema);
217	nvlist_destroy(pf_driver);
218	nvlist_destroy(vf_driver);
219	return (NULL);
220}
221
222static void
223pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema)
224{
225	nvlist_t *pf_schema, *iov_schema;
226
227	pf_schema = pci_iov_schema_alloc_node();
228	if (pf_schema == NULL) {
229		nvlist_set_error(schema, ENOMEM);
230		return;
231	}
232
233	iov_schema = pci_iov_get_pf_subsystem_schema();
234
235	/*
236	 * Note that if either *driver_schema or iov_schema is NULL, then
237	 * nvlist_move_nvlist will put the schema in the error state and
238	 * SR-IOV will fail to initialize later, so we don't have to explicitly
239	 * handle that case.
240	 */
241	nvlist_move_nvlist(pf_schema, DRIVER_CONFIG_NAME, *driver_schema);
242	nvlist_move_nvlist(pf_schema, IOV_CONFIG_NAME, iov_schema);
243	nvlist_move_nvlist(schema, PF_CONFIG_NAME, pf_schema);
244	*driver_schema = NULL;
245}
246
247static void
248pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema)
249{
250	nvlist_t *vf_schema, *iov_schema;
251
252	vf_schema = pci_iov_schema_alloc_node();
253	if (vf_schema == NULL) {
254		nvlist_set_error(schema, ENOMEM);
255		return;
256	}
257
258	iov_schema = pci_iov_get_vf_subsystem_schema();
259
260	/*
261	 * Note that if either *driver_schema or iov_schema is NULL, then
262	 * nvlist_move_nvlist will put the schema in the error state and
263	 * SR-IOV will fail to initialize later, so we don't have to explicitly
264	 * handle that case.
265	 */
266	nvlist_move_nvlist(vf_schema, DRIVER_CONFIG_NAME, *driver_schema);
267	nvlist_move_nvlist(vf_schema, IOV_CONFIG_NAME, iov_schema);
268	nvlist_move_nvlist(schema, VF_SCHEMA_NAME, vf_schema);
269	*driver_schema = NULL;
270}
271
272static nvlist_t *
273pci_iov_get_pf_subsystem_schema(void)
274{
275	nvlist_t *pf;
276
277	pf = pci_iov_schema_alloc_node();
278	if (pf == NULL)
279		return (NULL);
280
281	pci_iov_schema_add_uint16(pf, "num_vfs", IOV_SCHEMA_REQUIRED, -1);
282	pci_iov_schema_add_string(pf, "device", IOV_SCHEMA_REQUIRED, NULL);
283
284	return (pf);
285}
286
287static nvlist_t *
288pci_iov_get_vf_subsystem_schema(void)
289{
290	nvlist_t *vf;
291
292	vf = pci_iov_schema_alloc_node();
293	if (vf == NULL)
294		return (NULL);
295
296	pci_iov_schema_add_bool(vf, "passthrough", IOV_SCHEMA_HASDEFAULT, 0);
297
298	return (vf);
299}
300
301static int
302pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift)
303{
304	struct resource *res;
305	struct pcicfg_iov *iov;
306	device_t dev, bus;
307	u_long start, end;
308	pci_addr_t bar_size;
309	int rid;
310
311	iov = dinfo->cfg.iov;
312	dev = dinfo->cfg.dev;
313	bus = device_get_parent(dev);
314	rid = iov->iov_pos + PCIR_SRIOV_BAR(bar);
315	bar_size = 1 << bar_shift;
316
317	res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0ul,
318	    ~0ul, 1, iov->iov_num_vfs, RF_ACTIVE);
319
320	if (res == NULL)
321		return (ENXIO);
322
323	iov->iov_bar[bar].res = res;
324	iov->iov_bar[bar].bar_size = bar_size;
325	iov->iov_bar[bar].bar_shift = bar_shift;
326
327	start = rman_get_start(res);
328	end = rman_get_end(res);
329	return (rman_manage_region(&iov->rman, start, end));
330}
331
332static void
333pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
334{
335	struct pci_iov_bar *bar;
336	uint64_t bar_start;
337	int i;
338
339	for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
340		bar = &iov->iov_bar[i];
341		if (bar->res != NULL) {
342			bar_start = rman_get_start(bar->res) +
343			    dinfo->cfg.vf.index * bar->bar_size;
344
345			pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start,
346			    bar->bar_shift);
347		}
348	}
349}
350
351/*
352 * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
353 * capability.  This bit is only writeable on the lowest-numbered PF but
354 * affects all PFs on the device.
355 */
356static int
357pci_iov_set_ari(device_t bus)
358{
359	device_t lowest;
360	device_t *devlist;
361	int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func;
362	uint16_t iov_ctl;
363
364	/* If ARI is disabled on the downstream port there is nothing to do. */
365	if (!PCIB_ARI_ENABLED(device_get_parent(bus)))
366		return (0);
367
368	error = device_get_children(bus, &devlist, &devcount);
369
370	if (error != 0)
371		return (error);
372
373	lowest = NULL;
374	for (i = 0; i < devcount; i++) {
375		if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) {
376			dev_func = pci_get_function(devlist[i]);
377			if (lowest == NULL || dev_func < lowest_func) {
378				lowest = devlist[i];
379				lowest_func = dev_func;
380				lowest_pos = iov_pos;
381			}
382		}
383	}
384
385	/*
386	 * If we called this function some device must have the SR-IOV
387	 * capability.
388	 */
389	KASSERT(lowest != NULL,
390	    ("Could not find child of %s with SR-IOV capability",
391	    device_get_nameunit(bus)));
392
393	iov_ctl = pci_read_config(lowest, iov_pos + PCIR_SRIOV_CTL, 2);
394	iov_ctl |= PCIM_SRIOV_ARI_EN;
395	pci_write_config(lowest, iov_pos + PCIR_SRIOV_CTL, iov_ctl, 2);
396	free(devlist, M_TEMP);
397	return (0);
398}
399
400static int
401pci_iov_config_page_size(struct pci_devinfo *dinfo)
402{
403	uint32_t page_cap, page_size;
404
405	page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4);
406
407	/*
408	 * If the system page size is less than the smallest SR-IOV page size
409	 * then round up to the smallest SR-IOV page size.
410	 */
411	if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT)
412		page_size = (1 << 0);
413	else
414		page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT));
415
416	/* Check that the device supports the system page size. */
417	if (!(page_size & page_cap))
418		return (ENXIO);
419
420	IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4);
421	return (0);
422}
423
424static int
425pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
426{
427	int error;
428
429	iov->rman.rm_start = 0;
430	iov->rman.rm_end = ~0ul;
431	iov->rman.rm_type = RMAN_ARRAY;
432	snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory",
433	    device_get_nameunit(pf));
434	iov->rman.rm_descr = iov->rman_name;
435
436	error = rman_init(&iov->rman);
437	if (error != 0)
438		return (error);
439
440	iov->iov_flags |= IOV_RMAN_INITED;
441	return (0);
442}
443
444static int
445pci_iov_setup_bars(struct pci_devinfo *dinfo)
446{
447	device_t dev;
448	struct pcicfg_iov *iov;
449	pci_addr_t bar_value, testval;
450	int i, last_64, error;
451
452	iov = dinfo->cfg.iov;
453	dev = dinfo->cfg.dev;
454	last_64 = 0;
455
456	for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
457		/*
458		 * If a PCI BAR is a 64-bit wide BAR, then it spans two
459		 * consecutive registers.  Therefore if the last BAR that
460		 * we looked at was a 64-bit BAR, we need to skip this
461		 * register as it's the second half of the last BAR.
462		 */
463		if (!last_64) {
464			pci_read_bar(dev,
465			    iov->iov_pos + PCIR_SRIOV_BAR(i),
466			    &bar_value, &testval, &last_64);
467
468			if (testval != 0) {
469				error = pci_iov_alloc_bar(dinfo, i,
470				   pci_mapsize(testval));
471				if (error != 0)
472					return (error);
473			}
474		} else
475			last_64 = 0;
476	}
477
478	return (0);
479}
480
481static void
482pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
483    uint16_t first_rid, uint16_t rid_stride)
484{
485	device_t bus, dev, vf;
486	struct pcicfg_iov *iov;
487	struct pci_devinfo *vfinfo;
488	size_t size;
489	int i, error;
490	uint16_t vid, did, next_rid;
491
492	iov = dinfo->cfg.iov;
493	dev = dinfo->cfg.dev;
494	bus = device_get_parent(dev);
495	size = dinfo->cfg.devinfo_size;
496	next_rid = first_rid;
497	vid = pci_get_vendor(dev);
498	did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
499
500	for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
501
502
503		vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
504		if (vf == NULL)
505			break;
506
507		vfinfo = device_get_ivars(vf);
508
509		vfinfo->cfg.iov = iov;
510		vfinfo->cfg.vf.index = i;
511
512		pci_iov_add_bars(iov, vfinfo);
513
514		error = PCI_ADD_VF(dev, i);
515		if (error != 0) {
516			device_printf(dev, "Failed to add VF %d\n", i);
517			pci_delete_child(bus, vf);
518		}
519	}
520
521	bus_generic_attach(bus);
522}
523
524static int
525pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
526{
527	device_t bus, dev;
528	const char *driver;
529	struct pci_devinfo *dinfo;
530	struct pcicfg_iov *iov;
531	int i, error;
532	uint16_t rid_off, rid_stride;
533	uint16_t first_rid, last_rid;
534	uint16_t iov_ctl;
535	uint16_t total_vfs;
536	int iov_inited;
537
538	mtx_lock(&Giant);
539	dinfo = cdev->si_drv1;
540	iov = dinfo->cfg.iov;
541	dev = dinfo->cfg.dev;
542	bus = device_get_parent(dev);
543	iov_inited = 0;
544
545	if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) {
546		mtx_unlock(&Giant);
547		return (EBUSY);
548	}
549	iov->iov_flags |= IOV_BUSY;
550
551	total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
552
553	if (arg->num_vfs > total_vfs) {
554		error = EINVAL;
555		goto out;
556	}
557
558	/*
559	 * If we are creating passthrough devices then force the ppt driver to
560	 * attach to prevent a VF driver from claming the VFs.
561	 */
562	if (arg->passthrough)
563		driver = "ppt";
564	else
565		driver = NULL;
566
567	error = pci_iov_config_page_size(dinfo);
568	if (error != 0)
569		goto out;
570
571	error = pci_iov_set_ari(bus);
572	if (error != 0)
573		goto out;
574
575	error = PCI_INIT_IOV(dev, arg->num_vfs);
576
577	if (error != 0)
578		goto out;
579
580	iov_inited = 1;
581	IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, arg->num_vfs, 2);
582
583	rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
584	rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
585
586	first_rid = pci_get_rid(dev) + rid_off;
587	last_rid = first_rid + (arg->num_vfs - 1) * rid_stride;
588
589	/* We don't yet support allocating extra bus numbers for VFs. */
590	if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
591		error = ENOSPC;
592		goto out;
593	}
594
595	iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
596	iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
597	IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
598
599	error = pci_iov_init_rman(dev, iov);
600	if (error != 0)
601		goto out;
602
603	iov->iov_num_vfs = arg->num_vfs;
604
605	error = pci_iov_setup_bars(dinfo);
606	if (error != 0)
607		goto out;
608
609	iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
610	iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE;
611	IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
612
613	/* Per specification, we must wait 100ms before accessing VFs. */
614	pause("iov", roundup(hz, 10));
615	pci_iov_enumerate_vfs(dinfo, driver, first_rid, rid_stride);
616	mtx_unlock(&Giant);
617
618	return (0);
619out:
620	if (iov_inited)
621		PCI_UNINIT_IOV(dev);
622
623	for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
624		if (iov->iov_bar[i].res != NULL) {
625			pci_release_resource(bus, dev, SYS_RES_MEMORY,
626			    iov->iov_pos + PCIR_SRIOV_BAR(i),
627			    iov->iov_bar[i].res);
628			pci_delete_resource(bus, dev, SYS_RES_MEMORY,
629			    iov->iov_pos + PCIR_SRIOV_BAR(i));
630			iov->iov_bar[i].res = NULL;
631		}
632	}
633
634	if (iov->iov_flags & IOV_RMAN_INITED) {
635		rman_fini(&iov->rman);
636		iov->iov_flags &= ~IOV_RMAN_INITED;
637	}
638	iov->iov_num_vfs = 0;
639	iov->iov_flags &= ~IOV_BUSY;
640	mtx_unlock(&Giant);
641	return (error);
642}
643
644/* Return true if child is a VF of the given PF. */
645static int
646pci_iov_is_child_vf(struct pcicfg_iov *pf, device_t child)
647{
648	struct pci_devinfo *vfinfo;
649
650	vfinfo = device_get_ivars(child);
651
652	if (!(vfinfo->cfg.flags & PCICFG_VF))
653		return (0);
654
655	return (pf == vfinfo->cfg.iov);
656}
657
658static int
659pci_iov_delete(struct cdev *cdev)
660{
661	device_t bus, dev, vf, *devlist;
662	struct pci_devinfo *dinfo;
663	struct pcicfg_iov *iov;
664	int i, error, devcount;
665	uint32_t iov_ctl;
666
667	mtx_lock(&Giant);
668	dinfo = cdev->si_drv1;
669	iov = dinfo->cfg.iov;
670	dev = dinfo->cfg.dev;
671	bus = device_get_parent(dev);
672	devlist = NULL;
673
674	if (iov->iov_flags & IOV_BUSY) {
675		mtx_unlock(&Giant);
676		return (EBUSY);
677	}
678
679	if (iov->iov_num_vfs == 0) {
680		mtx_unlock(&Giant);
681		return (ECHILD);
682	}
683
684	iov->iov_flags |= IOV_BUSY;
685
686	error = device_get_children(bus, &devlist, &devcount);
687
688	if (error != 0)
689		goto out;
690
691	for (i = 0; i < devcount; i++) {
692		vf = devlist[i];
693
694		if (!pci_iov_is_child_vf(iov, vf))
695			continue;
696
697		error = device_detach(vf);
698		if (error != 0) {
699			device_printf(dev,
700			   "Could not disable SR-IOV: failed to detach VF %s\n",
701			    device_get_nameunit(vf));
702			goto out;
703		}
704	}
705
706	for (i = 0; i < devcount; i++) {
707		vf = devlist[i];
708
709		if (pci_iov_is_child_vf(iov, vf))
710			pci_delete_child(bus, vf);
711	}
712	PCI_UNINIT_IOV(dev);
713
714	iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
715	iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
716	IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
717	IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, 0, 2);
718
719	iov->iov_num_vfs = 0;
720
721	for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
722		if (iov->iov_bar[i].res != NULL) {
723			pci_release_resource(bus, dev, SYS_RES_MEMORY,
724			    iov->iov_pos + PCIR_SRIOV_BAR(i),
725			    iov->iov_bar[i].res);
726			pci_delete_resource(bus, dev, SYS_RES_MEMORY,
727			    iov->iov_pos + PCIR_SRIOV_BAR(i));
728			iov->iov_bar[i].res = NULL;
729		}
730	}
731
732	if (iov->iov_flags & IOV_RMAN_INITED) {
733		rman_fini(&iov->rman);
734		iov->iov_flags &= ~IOV_RMAN_INITED;
735	}
736
737	error = 0;
738out:
739	free(devlist, M_TEMP);
740	iov->iov_flags &= ~IOV_BUSY;
741	mtx_unlock(&Giant);
742	return (error);
743}
744
745static int
746pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output)
747{
748	struct pci_devinfo *dinfo;
749	void *packed;
750	size_t output_len, size;
751	int error;
752
753	packed = NULL;
754
755	mtx_lock(&Giant);
756	dinfo = cdev->si_drv1;
757	packed = nvlist_pack(dinfo->cfg.iov->iov_schema, &size);
758	mtx_unlock(&Giant);
759
760	if (packed == NULL) {
761		error = ENOMEM;
762		goto fail;
763	}
764
765	output_len = output->len;
766	output->len = size;
767	if (size <= output_len) {
768		error = copyout(packed, output->schema, size);
769
770		if (error != 0)
771			goto fail;
772
773		output->error = 0;
774	} else
775		/*
776		 * If we return an error then the ioctl code won't copyout
777		 * output back to userland, so we flag the error in the struct
778		 * instead.
779		 */
780		output->error = EMSGSIZE;
781
782	error = 0;
783
784fail:
785	free(packed, M_NVLIST);
786
787	return (error);
788}
789
790static int
791pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
792    struct thread *td)
793{
794
795	switch (cmd) {
796	case IOV_CONFIG:
797		return (pci_iov_config(dev, (struct pci_iov_arg *)data));
798	case IOV_DELETE:
799		return (pci_iov_delete(dev));
800	case IOV_GET_SCHEMA:
801		return (pci_iov_get_schema_ioctl(dev,
802		    (struct pci_iov_schema *)data));
803	default:
804		return (EINVAL);
805	}
806}
807
808struct resource *
809pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid, u_long start,
810    u_long end, u_long count, u_int flags)
811{
812	struct pci_devinfo *dinfo;
813	struct pcicfg_iov *iov;
814	struct pci_map *map;
815	struct resource *res;
816	struct resource_list_entry *rle;
817	u_long bar_start, bar_end;
818	pci_addr_t bar_length;
819	int error;
820
821	dinfo = device_get_ivars(child);
822	iov = dinfo->cfg.iov;
823
824	map = pci_find_bar(child, *rid);
825	if (map == NULL)
826		return (NULL);
827
828	bar_length = 1 << map->pm_size;
829	bar_start = map->pm_value;
830	bar_end = bar_start + bar_length - 1;
831
832	/* Make sure that the resource fits the constraints. */
833	if (bar_start >= end || bar_end <= bar_start || count != 1)
834		return (NULL);
835
836	/* Clamp the resource to the constraints if necessary. */
837	if (bar_start < start)
838		bar_start = start;
839	if (bar_end > end)
840		bar_end = end;
841	bar_length = bar_end - bar_start + 1;
842
843	res = rman_reserve_resource(&iov->rman, bar_start, bar_end,
844	    bar_length, flags, child);
845	if (res == NULL)
846		return (NULL);
847
848	rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid,
849	    bar_start, bar_end, 1);
850	if (rle == NULL) {
851		rman_release_resource(res);
852		return (NULL);
853	}
854
855	rman_set_rid(res, *rid);
856
857	if (flags & RF_ACTIVE) {
858		error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res);
859		if (error != 0) {
860			resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
861			    *rid);
862			rman_release_resource(res);
863			return (NULL);
864		}
865	}
866	rle->res = res;
867
868	return (res);
869}
870
871int
872pci_vf_release_mem_resource(device_t dev, device_t child, int rid,
873    struct resource *r)
874{
875	struct pci_devinfo *dinfo;
876	struct resource_list_entry *rle;
877	int error;
878
879	dinfo = device_get_ivars(child);
880
881	if (rman_get_flags(r) & RF_ACTIVE) {
882		error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r);
883		if (error != 0)
884			return (error);
885	}
886
887	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid);
888	if (rle != NULL) {
889		rle->res = NULL;
890		resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
891		    rid);
892	}
893
894	return (rman_release_resource(r));
895}
896
897