ppt.c revision 351060
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/11/sys/amd64/vmm/io/ppt.c 351060 2019-08-14 23:31:53Z jhb $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/sys/amd64/vmm/io/ppt.c 351060 2019-08-14 23:31:53Z jhb $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/pciio.h>
39#include <sys/rman.h>
40#include <sys/smp.h>
41#include <sys/sysctl.h>
42
43#include <dev/pci/pcivar.h>
44#include <dev/pci/pcireg.h>
45
46#include <machine/resource.h>
47
48#include <machine/vmm.h>
49#include <machine/vmm_dev.h>
50
51#include "vmm_lapic.h"
52#include "vmm_ktr.h"
53
54#include "iommu.h"
55#include "ppt.h"
56
57/* XXX locking */
58
59#define	MAX_MSIMSGS	32
60
61/*
62 * If the MSI-X table is located in the middle of a BAR then that MMIO
63 * region gets split into two segments - one segment above the MSI-X table
64 * and the other segment below the MSI-X table - with a hole in place of
65 * the MSI-X table so accesses to it can be trapped and emulated.
66 *
67 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
68 */
69#define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
70
71MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
72
73struct pptintr_arg {				/* pptintr(pptintr_arg) */
74	struct pptdev	*pptdev;
75	uint64_t	addr;
76	uint64_t	msg_data;
77};
78
79struct pptseg {
80	vm_paddr_t	gpa;
81	size_t		len;
82	int		wired;
83};
84
85struct pptdev {
86	device_t	dev;
87	struct vm	*vm;			/* owner of this device */
88	TAILQ_ENTRY(pptdev)	next;
89	struct pptseg mmio[MAX_MMIOSEGS];
90	struct {
91		int	num_msgs;		/* guest state */
92
93		int	startrid;		/* host state */
94		struct resource *res[MAX_MSIMSGS];
95		void	*cookie[MAX_MSIMSGS];
96		struct pptintr_arg arg[MAX_MSIMSGS];
97	} msi;
98
99	struct {
100		int num_msgs;
101		int startrid;
102		int msix_table_rid;
103		int msix_pba_rid;
104		struct resource *msix_table_res;
105		struct resource *msix_pba_res;
106		struct resource **res;
107		void **cookie;
108		struct pptintr_arg *arg;
109	} msix;
110};
111
112SYSCTL_DECL(_hw_vmm);
113SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
114
115static int num_pptdevs;
116SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
117    "number of pci passthru devices");
118
119static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
120
121static int
122ppt_probe(device_t dev)
123{
124	int bus, slot, func;
125	struct pci_devinfo *dinfo;
126
127	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
128
129	bus = pci_get_bus(dev);
130	slot = pci_get_slot(dev);
131	func = pci_get_function(dev);
132
133	/*
134	 * To qualify as a pci passthrough device a device must:
135	 * - be allowed by administrator to be used in this role
136	 * - be an endpoint device
137	 */
138	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
139		return (ENXIO);
140	else if (vmm_is_pptdev(bus, slot, func))
141		return (0);
142	else
143		/*
144		 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
145		 * SR-IOV infrastructure specified as "ppt" passthrough devices.
146		 * All normal devices that did not have "ppt" specified as their
147		 * driver will not be matched by this.
148		 */
149		return (BUS_PROBE_NOWILDCARD);
150}
151
152static int
153ppt_attach(device_t dev)
154{
155	struct pptdev *ppt;
156
157	ppt = device_get_softc(dev);
158
159	iommu_remove_device(iommu_host_domain(), pci_get_rid(dev));
160	num_pptdevs++;
161	TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
162	ppt->dev = dev;
163
164	if (bootverbose)
165		device_printf(dev, "attached\n");
166
167	return (0);
168}
169
170static int
171ppt_detach(device_t dev)
172{
173	struct pptdev *ppt;
174
175	ppt = device_get_softc(dev);
176
177	if (ppt->vm != NULL)
178		return (EBUSY);
179	num_pptdevs--;
180	TAILQ_REMOVE(&pptdev_list, ppt, next);
181	pci_disable_busmaster(dev);
182	iommu_add_device(iommu_host_domain(), pci_get_rid(dev));
183
184	return (0);
185}
186
187static device_method_t ppt_methods[] = {
188	/* Device interface */
189	DEVMETHOD(device_probe,		ppt_probe),
190	DEVMETHOD(device_attach,	ppt_attach),
191	DEVMETHOD(device_detach,	ppt_detach),
192	{0, 0}
193};
194
195static devclass_t ppt_devclass;
196DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
197DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
198
199static struct pptdev *
200ppt_find(int bus, int slot, int func)
201{
202	device_t dev;
203	struct pptdev *ppt;
204	int b, s, f;
205
206	TAILQ_FOREACH(ppt, &pptdev_list, next) {
207		dev = ppt->dev;
208		b = pci_get_bus(dev);
209		s = pci_get_slot(dev);
210		f = pci_get_function(dev);
211		if (bus == b && slot == s && func == f)
212			return (ppt);
213	}
214	return (NULL);
215}
216
217static void
218ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
219{
220	int i;
221	struct pptseg *seg;
222
223	for (i = 0; i < MAX_MMIOSEGS; i++) {
224		seg = &ppt->mmio[i];
225		if (seg->len == 0)
226			continue;
227		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
228		bzero(seg, sizeof(struct pptseg));
229	}
230}
231
232static void
233ppt_teardown_msi(struct pptdev *ppt)
234{
235	int i, rid;
236	void *cookie;
237	struct resource *res;
238
239	if (ppt->msi.num_msgs == 0)
240		return;
241
242	for (i = 0; i < ppt->msi.num_msgs; i++) {
243		rid = ppt->msi.startrid + i;
244		res = ppt->msi.res[i];
245		cookie = ppt->msi.cookie[i];
246
247		if (cookie != NULL)
248			bus_teardown_intr(ppt->dev, res, cookie);
249
250		if (res != NULL)
251			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
252
253		ppt->msi.res[i] = NULL;
254		ppt->msi.cookie[i] = NULL;
255	}
256
257	if (ppt->msi.startrid == 1)
258		pci_release_msi(ppt->dev);
259
260	ppt->msi.num_msgs = 0;
261}
262
263static void
264ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
265{
266	int rid;
267	struct resource *res;
268	void *cookie;
269
270	rid = ppt->msix.startrid + idx;
271	res = ppt->msix.res[idx];
272	cookie = ppt->msix.cookie[idx];
273
274	if (cookie != NULL)
275		bus_teardown_intr(ppt->dev, res, cookie);
276
277	if (res != NULL)
278		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
279
280	ppt->msix.res[idx] = NULL;
281	ppt->msix.cookie[idx] = NULL;
282}
283
284static void
285ppt_teardown_msix(struct pptdev *ppt)
286{
287	int i;
288
289	if (ppt->msix.num_msgs == 0)
290		return;
291
292	for (i = 0; i < ppt->msix.num_msgs; i++)
293		ppt_teardown_msix_intr(ppt, i);
294
295	free(ppt->msix.res, M_PPTMSIX);
296	free(ppt->msix.cookie, M_PPTMSIX);
297	free(ppt->msix.arg, M_PPTMSIX);
298
299	pci_release_msi(ppt->dev);
300
301	if (ppt->msix.msix_table_res) {
302		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
303				     ppt->msix.msix_table_rid,
304				     ppt->msix.msix_table_res);
305		ppt->msix.msix_table_res = NULL;
306		ppt->msix.msix_table_rid = 0;
307	}
308	if (ppt->msix.msix_pba_res) {
309		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
310				     ppt->msix.msix_pba_rid,
311				     ppt->msix.msix_pba_res);
312		ppt->msix.msix_pba_res = NULL;
313		ppt->msix.msix_pba_rid = 0;
314	}
315
316	ppt->msix.num_msgs = 0;
317}
318
319int
320ppt_avail_devices(void)
321{
322
323	return (num_pptdevs);
324}
325
326int
327ppt_assigned_devices(struct vm *vm)
328{
329	struct pptdev *ppt;
330	int num;
331
332	num = 0;
333	TAILQ_FOREACH(ppt, &pptdev_list, next) {
334		if (ppt->vm == vm)
335			num++;
336	}
337	return (num);
338}
339
340boolean_t
341ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
342{
343	int i;
344	struct pptdev *ppt;
345	struct pptseg *seg;
346
347	TAILQ_FOREACH(ppt, &pptdev_list, next) {
348		if (ppt->vm != vm)
349			continue;
350
351		for (i = 0; i < MAX_MMIOSEGS; i++) {
352			seg = &ppt->mmio[i];
353			if (seg->len == 0)
354				continue;
355			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
356				return (TRUE);
357		}
358	}
359
360	return (FALSE);
361}
362
363static void
364ppt_pci_reset(device_t dev)
365{
366
367	if (pcie_flr(dev,
368	     max(pcie_get_max_completion_timeout(dev) / 1000, 10), true))
369		return;
370
371	pci_power_reset(dev);
372}
373
374int
375ppt_assign_device(struct vm *vm, int bus, int slot, int func)
376{
377	struct pptdev *ppt;
378
379	ppt = ppt_find(bus, slot, func);
380	if (ppt != NULL) {
381		/*
382		 * If this device is owned by a different VM then we
383		 * cannot change its owner.
384		 */
385		if (ppt->vm != NULL && ppt->vm != vm)
386			return (EBUSY);
387
388		pci_save_state(ppt->dev);
389		ppt_pci_reset(ppt->dev);
390		pci_restore_state(ppt->dev);
391		ppt->vm = vm;
392		iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
393		return (0);
394	}
395	return (ENOENT);
396}
397
398int
399ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
400{
401	struct pptdev *ppt;
402
403	ppt = ppt_find(bus, slot, func);
404	if (ppt != NULL) {
405		/*
406		 * If this device is not owned by this 'vm' then bail out.
407		 */
408		if (ppt->vm != vm)
409			return (EBUSY);
410
411		pci_save_state(ppt->dev);
412		ppt_pci_reset(ppt->dev);
413		pci_restore_state(ppt->dev);
414		ppt_unmap_mmio(vm, ppt);
415		ppt_teardown_msi(ppt);
416		ppt_teardown_msix(ppt);
417		iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
418		ppt->vm = NULL;
419		return (0);
420	}
421	return (ENOENT);
422}
423
424int
425ppt_unassign_all(struct vm *vm)
426{
427	struct pptdev *ppt;
428	int bus, slot, func;
429	device_t dev;
430
431	TAILQ_FOREACH(ppt, &pptdev_list, next) {
432		if (ppt->vm == vm) {
433			dev = ppt->dev;
434			bus = pci_get_bus(dev);
435			slot = pci_get_slot(dev);
436			func = pci_get_function(dev);
437			vm_unassign_pptdev(vm, bus, slot, func);
438		}
439	}
440
441	return (0);
442}
443
444int
445ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
446	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
447{
448	int i, error;
449	struct pptseg *seg;
450	struct pptdev *ppt;
451
452	ppt = ppt_find(bus, slot, func);
453	if (ppt != NULL) {
454		if (ppt->vm != vm)
455			return (EBUSY);
456
457		for (i = 0; i < MAX_MMIOSEGS; i++) {
458			seg = &ppt->mmio[i];
459			if (seg->len == 0) {
460				error = vm_map_mmio(vm, gpa, len, hpa);
461				if (error == 0) {
462					seg->gpa = gpa;
463					seg->len = len;
464				}
465				return (error);
466			}
467		}
468		return (ENOSPC);
469	}
470	return (ENOENT);
471}
472
473static int
474pptintr(void *arg)
475{
476	struct pptdev *ppt;
477	struct pptintr_arg *pptarg;
478
479	pptarg = arg;
480	ppt = pptarg->pptdev;
481
482	if (ppt->vm != NULL)
483		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
484	else {
485		/*
486		 * XXX
487		 * This is not expected to happen - panic?
488		 */
489	}
490
491	/*
492	 * For legacy interrupts give other filters a chance in case
493	 * the interrupt was not generated by the passthrough device.
494	 */
495	if (ppt->msi.startrid == 0)
496		return (FILTER_STRAY);
497	else
498		return (FILTER_HANDLED);
499}
500
501int
502ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
503	      uint64_t addr, uint64_t msg, int numvec)
504{
505	int i, rid, flags;
506	int msi_count, startrid, error, tmp;
507	struct pptdev *ppt;
508
509	if (numvec < 0 || numvec > MAX_MSIMSGS)
510		return (EINVAL);
511
512	ppt = ppt_find(bus, slot, func);
513	if (ppt == NULL)
514		return (ENOENT);
515	if (ppt->vm != vm)		/* Make sure we own this device */
516		return (EBUSY);
517
518	/* Free any allocated resources */
519	ppt_teardown_msi(ppt);
520
521	if (numvec == 0)		/* nothing more to do */
522		return (0);
523
524	flags = RF_ACTIVE;
525	msi_count = pci_msi_count(ppt->dev);
526	if (msi_count == 0) {
527		startrid = 0;		/* legacy interrupt */
528		msi_count = 1;
529		flags |= RF_SHAREABLE;
530	} else
531		startrid = 1;		/* MSI */
532
533	/*
534	 * The device must be capable of supporting the number of vectors
535	 * the guest wants to allocate.
536	 */
537	if (numvec > msi_count)
538		return (EINVAL);
539
540	/*
541	 * Make sure that we can allocate all the MSI vectors that are needed
542	 * by the guest.
543	 */
544	if (startrid == 1) {
545		tmp = numvec;
546		error = pci_alloc_msi(ppt->dev, &tmp);
547		if (error)
548			return (error);
549		else if (tmp != numvec) {
550			pci_release_msi(ppt->dev);
551			return (ENOSPC);
552		} else {
553			/* success */
554		}
555	}
556
557	ppt->msi.startrid = startrid;
558
559	/*
560	 * Allocate the irq resource and attach it to the interrupt handler.
561	 */
562	for (i = 0; i < numvec; i++) {
563		ppt->msi.num_msgs = i + 1;
564		ppt->msi.cookie[i] = NULL;
565
566		rid = startrid + i;
567		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
568							 &rid, flags);
569		if (ppt->msi.res[i] == NULL)
570			break;
571
572		ppt->msi.arg[i].pptdev = ppt;
573		ppt->msi.arg[i].addr = addr;
574		ppt->msi.arg[i].msg_data = msg + i;
575
576		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
577				       INTR_TYPE_NET | INTR_MPSAFE,
578				       pptintr, NULL, &ppt->msi.arg[i],
579				       &ppt->msi.cookie[i]);
580		if (error != 0)
581			break;
582	}
583
584	if (i < numvec) {
585		ppt_teardown_msi(ppt);
586		return (ENXIO);
587	}
588
589	return (0);
590}
591
592int
593ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
594	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
595{
596	struct pptdev *ppt;
597	struct pci_devinfo *dinfo;
598	int numvec, alloced, rid, error;
599	size_t res_size, cookie_size, arg_size;
600
601	ppt = ppt_find(bus, slot, func);
602	if (ppt == NULL)
603		return (ENOENT);
604	if (ppt->vm != vm)		/* Make sure we own this device */
605		return (EBUSY);
606
607	dinfo = device_get_ivars(ppt->dev);
608	if (!dinfo)
609		return (ENXIO);
610
611	/*
612	 * First-time configuration:
613	 * 	Allocate the MSI-X table
614	 *	Allocate the IRQ resources
615	 *	Set up some variables in ppt->msix
616	 */
617	if (ppt->msix.num_msgs == 0) {
618		numvec = pci_msix_count(ppt->dev);
619		if (numvec <= 0)
620			return (EINVAL);
621
622		ppt->msix.startrid = 1;
623		ppt->msix.num_msgs = numvec;
624
625		res_size = numvec * sizeof(ppt->msix.res[0]);
626		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
627		arg_size = numvec * sizeof(ppt->msix.arg[0]);
628
629		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
630		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
631					  M_WAITOK | M_ZERO);
632		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
633
634		rid = dinfo->cfg.msix.msix_table_bar;
635		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
636					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
637
638		if (ppt->msix.msix_table_res == NULL) {
639			ppt_teardown_msix(ppt);
640			return (ENOSPC);
641		}
642		ppt->msix.msix_table_rid = rid;
643
644		if (dinfo->cfg.msix.msix_table_bar !=
645		    dinfo->cfg.msix.msix_pba_bar) {
646			rid = dinfo->cfg.msix.msix_pba_bar;
647			ppt->msix.msix_pba_res = bus_alloc_resource_any(
648			    ppt->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
649
650			if (ppt->msix.msix_pba_res == NULL) {
651				ppt_teardown_msix(ppt);
652				return (ENOSPC);
653			}
654			ppt->msix.msix_pba_rid = rid;
655		}
656
657		alloced = numvec;
658		error = pci_alloc_msix(ppt->dev, &alloced);
659		if (error || alloced != numvec) {
660			ppt_teardown_msix(ppt);
661			return (error == 0 ? ENOSPC: error);
662		}
663	}
664
665	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
666		/* Tear down the IRQ if it's already set up */
667		ppt_teardown_msix_intr(ppt, idx);
668
669		/* Allocate the IRQ resource */
670		ppt->msix.cookie[idx] = NULL;
671		rid = ppt->msix.startrid + idx;
672		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
673							    &rid, RF_ACTIVE);
674		if (ppt->msix.res[idx] == NULL)
675			return (ENXIO);
676
677		ppt->msix.arg[idx].pptdev = ppt;
678		ppt->msix.arg[idx].addr = addr;
679		ppt->msix.arg[idx].msg_data = msg;
680
681		/* Setup the MSI-X interrupt */
682		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
683				       INTR_TYPE_NET | INTR_MPSAFE,
684				       pptintr, NULL, &ppt->msix.arg[idx],
685				       &ppt->msix.cookie[idx]);
686
687		if (error != 0) {
688			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
689			ppt->msix.cookie[idx] = NULL;
690			ppt->msix.res[idx] = NULL;
691			return (ENXIO);
692		}
693	} else {
694		/* Masked, tear it down if it's already been set up */
695		ppt_teardown_msix_intr(ppt, idx);
696	}
697
698	return (0);
699}
700