ppt.c revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD: stable/11/sys/amd64/vmm/io/ppt.c 330897 2018-03-14 03:19:51Z eadler $
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: stable/11/sys/amd64/vmm/io/ppt.c 330897 2018-03-14 03:19:51Z eadler $");
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/malloc.h>
38#include <sys/module.h>
39#include <sys/bus.h>
40#include <sys/pciio.h>
41#include <sys/rman.h>
42#include <sys/smp.h>
43#include <sys/sysctl.h>
44
45#include <dev/pci/pcivar.h>
46#include <dev/pci/pcireg.h>
47
48#include <machine/resource.h>
49
50#include <machine/vmm.h>
51#include <machine/vmm_dev.h>
52
53#include "vmm_lapic.h"
54#include "vmm_ktr.h"
55
56#include "iommu.h"
57#include "ppt.h"
58
59/* XXX locking */
60
61#define	MAX_MSIMSGS	32
62
63/*
64 * If the MSI-X table is located in the middle of a BAR then that MMIO
65 * region gets split into two segments - one segment above the MSI-X table
66 * and the other segment below the MSI-X table - with a hole in place of
67 * the MSI-X table so accesses to it can be trapped and emulated.
68 *
69 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
70 */
71#define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
72
73MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
74
75struct pptintr_arg {				/* pptintr(pptintr_arg) */
76	struct pptdev	*pptdev;
77	uint64_t	addr;
78	uint64_t	msg_data;
79};
80
81struct pptseg {
82	vm_paddr_t	gpa;
83	size_t		len;
84	int		wired;
85};
86
87struct pptdev {
88	device_t	dev;
89	struct vm	*vm;			/* owner of this device */
90	TAILQ_ENTRY(pptdev)	next;
91	struct pptseg mmio[MAX_MMIOSEGS];
92	struct {
93		int	num_msgs;		/* guest state */
94
95		int	startrid;		/* host state */
96		struct resource *res[MAX_MSIMSGS];
97		void	*cookie[MAX_MSIMSGS];
98		struct pptintr_arg arg[MAX_MSIMSGS];
99	} msi;
100
101	struct {
102		int num_msgs;
103		int startrid;
104		int msix_table_rid;
105		struct resource *msix_table_res;
106		struct resource **res;
107		void **cookie;
108		struct pptintr_arg *arg;
109	} msix;
110};
111
112SYSCTL_DECL(_hw_vmm);
113SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
114
115static int num_pptdevs;
116SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
117    "number of pci passthru devices");
118
119static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
120
121static int
122ppt_probe(device_t dev)
123{
124	int bus, slot, func;
125	struct pci_devinfo *dinfo;
126
127	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
128
129	bus = pci_get_bus(dev);
130	slot = pci_get_slot(dev);
131	func = pci_get_function(dev);
132
133	/*
134	 * To qualify as a pci passthrough device a device must:
135	 * - be allowed by administrator to be used in this role
136	 * - be an endpoint device
137	 */
138	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
139		return (ENXIO);
140	else if (vmm_is_pptdev(bus, slot, func))
141		return (0);
142	else
143		/*
144		 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
145		 * SR-IOV infrastructure specified as "ppt" passthrough devices.
146		 * All normal devices that did not have "ppt" specified as their
147		 * driver will not be matched by this.
148		 */
149		return (BUS_PROBE_NOWILDCARD);
150}
151
152static int
153ppt_attach(device_t dev)
154{
155	struct pptdev *ppt;
156
157	ppt = device_get_softc(dev);
158
159	iommu_remove_device(iommu_host_domain(), pci_get_rid(dev));
160	num_pptdevs++;
161	TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
162	ppt->dev = dev;
163
164	if (bootverbose)
165		device_printf(dev, "attached\n");
166
167	return (0);
168}
169
170static int
171ppt_detach(device_t dev)
172{
173	struct pptdev *ppt;
174
175	ppt = device_get_softc(dev);
176
177	if (ppt->vm != NULL)
178		return (EBUSY);
179	num_pptdevs--;
180	TAILQ_REMOVE(&pptdev_list, ppt, next);
181	pci_disable_busmaster(dev);
182	iommu_add_device(iommu_host_domain(), pci_get_rid(dev));
183
184	return (0);
185}
186
187static device_method_t ppt_methods[] = {
188	/* Device interface */
189	DEVMETHOD(device_probe,		ppt_probe),
190	DEVMETHOD(device_attach,	ppt_attach),
191	DEVMETHOD(device_detach,	ppt_detach),
192	{0, 0}
193};
194
195static devclass_t ppt_devclass;
196DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
197DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
198
199static struct pptdev *
200ppt_find(int bus, int slot, int func)
201{
202	device_t dev;
203	struct pptdev *ppt;
204	int b, s, f;
205
206	TAILQ_FOREACH(ppt, &pptdev_list, next) {
207		dev = ppt->dev;
208		b = pci_get_bus(dev);
209		s = pci_get_slot(dev);
210		f = pci_get_function(dev);
211		if (bus == b && slot == s && func == f)
212			return (ppt);
213	}
214	return (NULL);
215}
216
217static void
218ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
219{
220	int i;
221	struct pptseg *seg;
222
223	for (i = 0; i < MAX_MMIOSEGS; i++) {
224		seg = &ppt->mmio[i];
225		if (seg->len == 0)
226			continue;
227		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
228		bzero(seg, sizeof(struct pptseg));
229	}
230}
231
232static void
233ppt_teardown_msi(struct pptdev *ppt)
234{
235	int i, rid;
236	void *cookie;
237	struct resource *res;
238
239	if (ppt->msi.num_msgs == 0)
240		return;
241
242	for (i = 0; i < ppt->msi.num_msgs; i++) {
243		rid = ppt->msi.startrid + i;
244		res = ppt->msi.res[i];
245		cookie = ppt->msi.cookie[i];
246
247		if (cookie != NULL)
248			bus_teardown_intr(ppt->dev, res, cookie);
249
250		if (res != NULL)
251			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
252
253		ppt->msi.res[i] = NULL;
254		ppt->msi.cookie[i] = NULL;
255	}
256
257	if (ppt->msi.startrid == 1)
258		pci_release_msi(ppt->dev);
259
260	ppt->msi.num_msgs = 0;
261}
262
263static void
264ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
265{
266	int rid;
267	struct resource *res;
268	void *cookie;
269
270	rid = ppt->msix.startrid + idx;
271	res = ppt->msix.res[idx];
272	cookie = ppt->msix.cookie[idx];
273
274	if (cookie != NULL)
275		bus_teardown_intr(ppt->dev, res, cookie);
276
277	if (res != NULL)
278		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
279
280	ppt->msix.res[idx] = NULL;
281	ppt->msix.cookie[idx] = NULL;
282}
283
284static void
285ppt_teardown_msix(struct pptdev *ppt)
286{
287	int i;
288
289	if (ppt->msix.num_msgs == 0)
290		return;
291
292	for (i = 0; i < ppt->msix.num_msgs; i++)
293		ppt_teardown_msix_intr(ppt, i);
294
295	if (ppt->msix.msix_table_res) {
296		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
297				     ppt->msix.msix_table_rid,
298				     ppt->msix.msix_table_res);
299		ppt->msix.msix_table_res = NULL;
300		ppt->msix.msix_table_rid = 0;
301	}
302
303	free(ppt->msix.res, M_PPTMSIX);
304	free(ppt->msix.cookie, M_PPTMSIX);
305	free(ppt->msix.arg, M_PPTMSIX);
306
307	pci_release_msi(ppt->dev);
308
309	ppt->msix.num_msgs = 0;
310}
311
312int
313ppt_avail_devices(void)
314{
315
316	return (num_pptdevs);
317}
318
319int
320ppt_assigned_devices(struct vm *vm)
321{
322	struct pptdev *ppt;
323	int num;
324
325	num = 0;
326	TAILQ_FOREACH(ppt, &pptdev_list, next) {
327		if (ppt->vm == vm)
328			num++;
329	}
330	return (num);
331}
332
333boolean_t
334ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
335{
336	int i;
337	struct pptdev *ppt;
338	struct pptseg *seg;
339
340	TAILQ_FOREACH(ppt, &pptdev_list, next) {
341		if (ppt->vm != vm)
342			continue;
343
344		for (i = 0; i < MAX_MMIOSEGS; i++) {
345			seg = &ppt->mmio[i];
346			if (seg->len == 0)
347				continue;
348			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
349				return (TRUE);
350		}
351	}
352
353	return (FALSE);
354}
355
356int
357ppt_assign_device(struct vm *vm, int bus, int slot, int func)
358{
359	struct pptdev *ppt;
360
361	ppt = ppt_find(bus, slot, func);
362	if (ppt != NULL) {
363		/*
364		 * If this device is owned by a different VM then we
365		 * cannot change its owner.
366		 */
367		if (ppt->vm != NULL && ppt->vm != vm)
368			return (EBUSY);
369
370		pci_save_state(ppt->dev);
371		pcie_flr(ppt->dev,
372		    max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10),
373		    true);
374		pci_restore_state(ppt->dev);
375		ppt->vm = vm;
376		iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
377		return (0);
378	}
379	return (ENOENT);
380}
381
382int
383ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
384{
385	struct pptdev *ppt;
386
387	ppt = ppt_find(bus, slot, func);
388	if (ppt != NULL) {
389		/*
390		 * If this device is not owned by this 'vm' then bail out.
391		 */
392		if (ppt->vm != vm)
393			return (EBUSY);
394
395		pci_save_state(ppt->dev);
396		pcie_flr(ppt->dev,
397		    max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10),
398		    true);
399		pci_restore_state(ppt->dev);
400		ppt_unmap_mmio(vm, ppt);
401		ppt_teardown_msi(ppt);
402		ppt_teardown_msix(ppt);
403		iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
404		ppt->vm = NULL;
405		return (0);
406	}
407	return (ENOENT);
408}
409
410int
411ppt_unassign_all(struct vm *vm)
412{
413	struct pptdev *ppt;
414	int bus, slot, func;
415	device_t dev;
416
417	TAILQ_FOREACH(ppt, &pptdev_list, next) {
418		if (ppt->vm == vm) {
419			dev = ppt->dev;
420			bus = pci_get_bus(dev);
421			slot = pci_get_slot(dev);
422			func = pci_get_function(dev);
423			vm_unassign_pptdev(vm, bus, slot, func);
424		}
425	}
426
427	return (0);
428}
429
430int
431ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
432	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
433{
434	int i, error;
435	struct pptseg *seg;
436	struct pptdev *ppt;
437
438	ppt = ppt_find(bus, slot, func);
439	if (ppt != NULL) {
440		if (ppt->vm != vm)
441			return (EBUSY);
442
443		for (i = 0; i < MAX_MMIOSEGS; i++) {
444			seg = &ppt->mmio[i];
445			if (seg->len == 0) {
446				error = vm_map_mmio(vm, gpa, len, hpa);
447				if (error == 0) {
448					seg->gpa = gpa;
449					seg->len = len;
450				}
451				return (error);
452			}
453		}
454		return (ENOSPC);
455	}
456	return (ENOENT);
457}
458
459static int
460pptintr(void *arg)
461{
462	struct pptdev *ppt;
463	struct pptintr_arg *pptarg;
464
465	pptarg = arg;
466	ppt = pptarg->pptdev;
467
468	if (ppt->vm != NULL)
469		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
470	else {
471		/*
472		 * XXX
473		 * This is not expected to happen - panic?
474		 */
475	}
476
477	/*
478	 * For legacy interrupts give other filters a chance in case
479	 * the interrupt was not generated by the passthrough device.
480	 */
481	if (ppt->msi.startrid == 0)
482		return (FILTER_STRAY);
483	else
484		return (FILTER_HANDLED);
485}
486
487int
488ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
489	      uint64_t addr, uint64_t msg, int numvec)
490{
491	int i, rid, flags;
492	int msi_count, startrid, error, tmp;
493	struct pptdev *ppt;
494
495	if (numvec < 0 || numvec > MAX_MSIMSGS)
496		return (EINVAL);
497
498	ppt = ppt_find(bus, slot, func);
499	if (ppt == NULL)
500		return (ENOENT);
501	if (ppt->vm != vm)		/* Make sure we own this device */
502		return (EBUSY);
503
504	/* Free any allocated resources */
505	ppt_teardown_msi(ppt);
506
507	if (numvec == 0)		/* nothing more to do */
508		return (0);
509
510	flags = RF_ACTIVE;
511	msi_count = pci_msi_count(ppt->dev);
512	if (msi_count == 0) {
513		startrid = 0;		/* legacy interrupt */
514		msi_count = 1;
515		flags |= RF_SHAREABLE;
516	} else
517		startrid = 1;		/* MSI */
518
519	/*
520	 * The device must be capable of supporting the number of vectors
521	 * the guest wants to allocate.
522	 */
523	if (numvec > msi_count)
524		return (EINVAL);
525
526	/*
527	 * Make sure that we can allocate all the MSI vectors that are needed
528	 * by the guest.
529	 */
530	if (startrid == 1) {
531		tmp = numvec;
532		error = pci_alloc_msi(ppt->dev, &tmp);
533		if (error)
534			return (error);
535		else if (tmp != numvec) {
536			pci_release_msi(ppt->dev);
537			return (ENOSPC);
538		} else {
539			/* success */
540		}
541	}
542
543	ppt->msi.startrid = startrid;
544
545	/*
546	 * Allocate the irq resource and attach it to the interrupt handler.
547	 */
548	for (i = 0; i < numvec; i++) {
549		ppt->msi.num_msgs = i + 1;
550		ppt->msi.cookie[i] = NULL;
551
552		rid = startrid + i;
553		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
554							 &rid, flags);
555		if (ppt->msi.res[i] == NULL)
556			break;
557
558		ppt->msi.arg[i].pptdev = ppt;
559		ppt->msi.arg[i].addr = addr;
560		ppt->msi.arg[i].msg_data = msg + i;
561
562		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
563				       INTR_TYPE_NET | INTR_MPSAFE,
564				       pptintr, NULL, &ppt->msi.arg[i],
565				       &ppt->msi.cookie[i]);
566		if (error != 0)
567			break;
568	}
569
570	if (i < numvec) {
571		ppt_teardown_msi(ppt);
572		return (ENXIO);
573	}
574
575	return (0);
576}
577
578int
579ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
580	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
581{
582	struct pptdev *ppt;
583	struct pci_devinfo *dinfo;
584	int numvec, alloced, rid, error;
585	size_t res_size, cookie_size, arg_size;
586
587	ppt = ppt_find(bus, slot, func);
588	if (ppt == NULL)
589		return (ENOENT);
590	if (ppt->vm != vm)		/* Make sure we own this device */
591		return (EBUSY);
592
593	dinfo = device_get_ivars(ppt->dev);
594	if (!dinfo)
595		return (ENXIO);
596
597	/*
598	 * First-time configuration:
599	 * 	Allocate the MSI-X table
600	 *	Allocate the IRQ resources
601	 *	Set up some variables in ppt->msix
602	 */
603	if (ppt->msix.num_msgs == 0) {
604		numvec = pci_msix_count(ppt->dev);
605		if (numvec <= 0)
606			return (EINVAL);
607
608		ppt->msix.startrid = 1;
609		ppt->msix.num_msgs = numvec;
610
611		res_size = numvec * sizeof(ppt->msix.res[0]);
612		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
613		arg_size = numvec * sizeof(ppt->msix.arg[0]);
614
615		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
616		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
617					  M_WAITOK | M_ZERO);
618		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
619
620		rid = dinfo->cfg.msix.msix_table_bar;
621		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
622					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
623
624		if (ppt->msix.msix_table_res == NULL) {
625			ppt_teardown_msix(ppt);
626			return (ENOSPC);
627		}
628		ppt->msix.msix_table_rid = rid;
629
630		alloced = numvec;
631		error = pci_alloc_msix(ppt->dev, &alloced);
632		if (error || alloced != numvec) {
633			ppt_teardown_msix(ppt);
634			return (error == 0 ? ENOSPC: error);
635		}
636	}
637
638	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
639		/* Tear down the IRQ if it's already set up */
640		ppt_teardown_msix_intr(ppt, idx);
641
642		/* Allocate the IRQ resource */
643		ppt->msix.cookie[idx] = NULL;
644		rid = ppt->msix.startrid + idx;
645		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
646							    &rid, RF_ACTIVE);
647		if (ppt->msix.res[idx] == NULL)
648			return (ENXIO);
649
650		ppt->msix.arg[idx].pptdev = ppt;
651		ppt->msix.arg[idx].addr = addr;
652		ppt->msix.arg[idx].msg_data = msg;
653
654		/* Setup the MSI-X interrupt */
655		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
656				       INTR_TYPE_NET | INTR_MPSAFE,
657				       pptintr, NULL, &ppt->msix.arg[idx],
658				       &ppt->msix.cookie[idx]);
659
660		if (error != 0) {
661			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
662			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
663			ppt->msix.cookie[idx] = NULL;
664			ppt->msix.res[idx] = NULL;
665			return (ENXIO);
666		}
667	} else {
668		/* Masked, tear it down if it's already been set up */
669		ppt_teardown_msix_intr(ppt, idx);
670	}
671
672	return (0);
673}
674