1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/malloc.h>
38#include <sys/module.h>
39#include <sys/bus.h>
40#include <sys/pciio.h>
41#include <sys/rman.h>
42#include <sys/smp.h>
43#include <sys/sysctl.h>
44
45#include <dev/pci/pcivar.h>
46#include <dev/pci/pcireg.h>
47
48#include <machine/resource.h>
49
50#include <machine/vmm.h>
51#include <machine/vmm_dev.h>
52
53#include "vmm_lapic.h"
54#include "vmm_ktr.h"
55
56#include "iommu.h"
57#include "ppt.h"
58
59/* XXX locking */
60
61#define	MAX_MSIMSGS	32
62
63/*
64 * If the MSI-X table is located in the middle of a BAR then that MMIO
65 * region gets split into two segments - one segment above the MSI-X table
66 * and the other segment below the MSI-X table - with a hole in place of
67 * the MSI-X table so accesses to it can be trapped and emulated.
68 *
69 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
70 */
71#define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
72
73MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
74
75struct pptintr_arg {				/* pptintr(pptintr_arg) */
76	struct pptdev	*pptdev;
77	uint64_t	addr;
78	uint64_t	msg_data;
79};
80
81struct pptseg {
82	vm_paddr_t	gpa;
83	size_t		len;
84	int		wired;
85};
86
87struct pptdev {
88	device_t	dev;
89	struct vm	*vm;			/* owner of this device */
90	TAILQ_ENTRY(pptdev)	next;
91	struct pptseg mmio[MAX_MMIOSEGS];
92	struct {
93		int	num_msgs;		/* guest state */
94
95		int	startrid;		/* host state */
96		struct resource *res[MAX_MSIMSGS];
97		void	*cookie[MAX_MSIMSGS];
98		struct pptintr_arg arg[MAX_MSIMSGS];
99	} msi;
100
101	struct {
102		int num_msgs;
103		int startrid;
104		int msix_table_rid;
105		int msix_pba_rid;
106		struct resource *msix_table_res;
107		struct resource *msix_pba_res;
108		struct resource **res;
109		void **cookie;
110		struct pptintr_arg *arg;
111	} msix;
112};
113
114SYSCTL_DECL(_hw_vmm);
115SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
116    "bhyve passthru devices");
117
118static int num_pptdevs;
119SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
120    "number of pci passthru devices");
121
122static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
123
124static int
125ppt_probe(device_t dev)
126{
127	int bus, slot, func;
128	struct pci_devinfo *dinfo;
129
130	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
131
132	bus = pci_get_bus(dev);
133	slot = pci_get_slot(dev);
134	func = pci_get_function(dev);
135
136	/*
137	 * To qualify as a pci passthrough device a device must:
138	 * - be allowed by administrator to be used in this role
139	 * - be an endpoint device
140	 */
141	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
142		return (ENXIO);
143	else if (vmm_is_pptdev(bus, slot, func))
144		return (0);
145	else
146		/*
147		 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
148		 * SR-IOV infrastructure specified as "ppt" passthrough devices.
149		 * All normal devices that did not have "ppt" specified as their
150		 * driver will not be matched by this.
151		 */
152		return (BUS_PROBE_NOWILDCARD);
153}
154
155static int
156ppt_attach(device_t dev)
157{
158	struct pptdev *ppt;
159
160	ppt = device_get_softc(dev);
161
162	iommu_remove_device(iommu_host_domain(), pci_get_rid(dev));
163	num_pptdevs++;
164	TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
165	ppt->dev = dev;
166
167	if (bootverbose)
168		device_printf(dev, "attached\n");
169
170	return (0);
171}
172
173static int
174ppt_detach(device_t dev)
175{
176	struct pptdev *ppt;
177
178	ppt = device_get_softc(dev);
179
180	if (ppt->vm != NULL)
181		return (EBUSY);
182	num_pptdevs--;
183	TAILQ_REMOVE(&pptdev_list, ppt, next);
184	pci_disable_busmaster(dev);
185	iommu_add_device(iommu_host_domain(), pci_get_rid(dev));
186
187	return (0);
188}
189
190static device_method_t ppt_methods[] = {
191	/* Device interface */
192	DEVMETHOD(device_probe,		ppt_probe),
193	DEVMETHOD(device_attach,	ppt_attach),
194	DEVMETHOD(device_detach,	ppt_detach),
195	{0, 0}
196};
197
198static devclass_t ppt_devclass;
199DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
200DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
201
202static int
203ppt_find(struct vm *vm, int bus, int slot, int func, struct pptdev **pptp)
204{
205	device_t dev;
206	struct pptdev *ppt;
207	int b, s, f;
208
209	TAILQ_FOREACH(ppt, &pptdev_list, next) {
210		dev = ppt->dev;
211		b = pci_get_bus(dev);
212		s = pci_get_slot(dev);
213		f = pci_get_function(dev);
214		if (bus == b && slot == s && func == f)
215			break;
216	}
217
218	if (ppt == NULL)
219		return (ENOENT);
220	if (ppt->vm != vm)		/* Make sure we own this device */
221		return (EBUSY);
222	*pptp = ppt;
223	return (0);
224}
225
226static void
227ppt_unmap_all_mmio(struct vm *vm, struct pptdev *ppt)
228{
229	int i;
230	struct pptseg *seg;
231
232	for (i = 0; i < MAX_MMIOSEGS; i++) {
233		seg = &ppt->mmio[i];
234		if (seg->len == 0)
235			continue;
236		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
237		bzero(seg, sizeof(struct pptseg));
238	}
239}
240
241static void
242ppt_teardown_msi(struct pptdev *ppt)
243{
244	int i, rid;
245	void *cookie;
246	struct resource *res;
247
248	if (ppt->msi.num_msgs == 0)
249		return;
250
251	for (i = 0; i < ppt->msi.num_msgs; i++) {
252		rid = ppt->msi.startrid + i;
253		res = ppt->msi.res[i];
254		cookie = ppt->msi.cookie[i];
255
256		if (cookie != NULL)
257			bus_teardown_intr(ppt->dev, res, cookie);
258
259		if (res != NULL)
260			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
261
262		ppt->msi.res[i] = NULL;
263		ppt->msi.cookie[i] = NULL;
264	}
265
266	if (ppt->msi.startrid == 1)
267		pci_release_msi(ppt->dev);
268
269	ppt->msi.num_msgs = 0;
270}
271
272static void
273ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
274{
275	int rid;
276	struct resource *res;
277	void *cookie;
278
279	rid = ppt->msix.startrid + idx;
280	res = ppt->msix.res[idx];
281	cookie = ppt->msix.cookie[idx];
282
283	if (cookie != NULL)
284		bus_teardown_intr(ppt->dev, res, cookie);
285
286	if (res != NULL)
287		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
288
289	ppt->msix.res[idx] = NULL;
290	ppt->msix.cookie[idx] = NULL;
291}
292
293static void
294ppt_teardown_msix(struct pptdev *ppt)
295{
296	int i;
297
298	if (ppt->msix.num_msgs == 0)
299		return;
300
301	for (i = 0; i < ppt->msix.num_msgs; i++)
302		ppt_teardown_msix_intr(ppt, i);
303
304	free(ppt->msix.res, M_PPTMSIX);
305	free(ppt->msix.cookie, M_PPTMSIX);
306	free(ppt->msix.arg, M_PPTMSIX);
307
308	pci_release_msi(ppt->dev);
309
310	if (ppt->msix.msix_table_res) {
311		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
312				     ppt->msix.msix_table_rid,
313				     ppt->msix.msix_table_res);
314		ppt->msix.msix_table_res = NULL;
315		ppt->msix.msix_table_rid = 0;
316	}
317	if (ppt->msix.msix_pba_res) {
318		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
319				     ppt->msix.msix_pba_rid,
320				     ppt->msix.msix_pba_res);
321		ppt->msix.msix_pba_res = NULL;
322		ppt->msix.msix_pba_rid = 0;
323	}
324
325	ppt->msix.num_msgs = 0;
326}
327
328int
329ppt_avail_devices(void)
330{
331
332	return (num_pptdevs);
333}
334
335int
336ppt_assigned_devices(struct vm *vm)
337{
338	struct pptdev *ppt;
339	int num;
340
341	num = 0;
342	TAILQ_FOREACH(ppt, &pptdev_list, next) {
343		if (ppt->vm == vm)
344			num++;
345	}
346	return (num);
347}
348
349bool
350ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
351{
352	int i;
353	struct pptdev *ppt;
354	struct pptseg *seg;
355
356	TAILQ_FOREACH(ppt, &pptdev_list, next) {
357		if (ppt->vm != vm)
358			continue;
359
360		for (i = 0; i < MAX_MMIOSEGS; i++) {
361			seg = &ppt->mmio[i];
362			if (seg->len == 0)
363				continue;
364			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
365				return (true);
366		}
367	}
368
369	return (false);
370}
371
372static void
373ppt_pci_reset(device_t dev)
374{
375
376	if (pcie_flr(dev,
377	     max(pcie_get_max_completion_timeout(dev) / 1000, 10), true))
378		return;
379
380	pci_power_reset(dev);
381}
382
383int
384ppt_assign_device(struct vm *vm, int bus, int slot, int func)
385{
386	struct pptdev *ppt;
387	int error;
388
389	/* Passing NULL requires the device to be unowned. */
390	error = ppt_find(NULL, bus, slot, func, &ppt);
391	if (error)
392		return (error);
393
394	pci_save_state(ppt->dev);
395	ppt_pci_reset(ppt->dev);
396	pci_restore_state(ppt->dev);
397	ppt->vm = vm;
398	iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
399	return (0);
400}
401
402int
403ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
404{
405	struct pptdev *ppt;
406	int error;
407
408	error = ppt_find(vm, bus, slot, func, &ppt);
409	if (error)
410		return (error);
411
412	pci_save_state(ppt->dev);
413	ppt_pci_reset(ppt->dev);
414	pci_restore_state(ppt->dev);
415	ppt_unmap_all_mmio(vm, ppt);
416	ppt_teardown_msi(ppt);
417	ppt_teardown_msix(ppt);
418	iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
419	ppt->vm = NULL;
420	return (0);
421}
422
423int
424ppt_unassign_all(struct vm *vm)
425{
426	struct pptdev *ppt;
427	int bus, slot, func;
428	device_t dev;
429
430	TAILQ_FOREACH(ppt, &pptdev_list, next) {
431		if (ppt->vm == vm) {
432			dev = ppt->dev;
433			bus = pci_get_bus(dev);
434			slot = pci_get_slot(dev);
435			func = pci_get_function(dev);
436			vm_unassign_pptdev(vm, bus, slot, func);
437		}
438	}
439
440	return (0);
441}
442
443int
444ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
445	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
446{
447	int i, error;
448	struct pptseg *seg;
449	struct pptdev *ppt;
450
451	error = ppt_find(vm, bus, slot, func, &ppt);
452	if (error)
453		return (error);
454
455	for (i = 0; i < MAX_MMIOSEGS; i++) {
456		seg = &ppt->mmio[i];
457		if (seg->len == 0) {
458			error = vm_map_mmio(vm, gpa, len, hpa);
459			if (error == 0) {
460				seg->gpa = gpa;
461				seg->len = len;
462			}
463			return (error);
464		}
465	}
466	return (ENOSPC);
467}
468
469int
470ppt_unmap_mmio(struct vm *vm, int bus, int slot, int func,
471	       vm_paddr_t gpa, size_t len)
472{
473	int i, error;
474	struct pptseg *seg;
475	struct pptdev *ppt;
476
477	error = ppt_find(vm, bus, slot, func, &ppt);
478	if (error)
479		return (error);
480
481	for (i = 0; i < MAX_MMIOSEGS; i++) {
482		seg = &ppt->mmio[i];
483		if (seg->gpa == gpa && seg->len == len) {
484			error = vm_unmap_mmio(vm, seg->gpa, seg->len);
485			if (error == 0) {
486				seg->gpa = 0;
487				seg->len = 0;
488			}
489			return (error);
490		}
491	}
492	return (ENOENT);
493}
494
495static int
496pptintr(void *arg)
497{
498	struct pptdev *ppt;
499	struct pptintr_arg *pptarg;
500
501	pptarg = arg;
502	ppt = pptarg->pptdev;
503
504	if (ppt->vm != NULL)
505		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
506	else {
507		/*
508		 * XXX
509		 * This is not expected to happen - panic?
510		 */
511	}
512
513	/*
514	 * For legacy interrupts give other filters a chance in case
515	 * the interrupt was not generated by the passthrough device.
516	 */
517	if (ppt->msi.startrid == 0)
518		return (FILTER_STRAY);
519	else
520		return (FILTER_HANDLED);
521}
522
523int
524ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
525	      uint64_t addr, uint64_t msg, int numvec)
526{
527	int i, rid, flags;
528	int msi_count, startrid, error, tmp;
529	struct pptdev *ppt;
530
531	if (numvec < 0 || numvec > MAX_MSIMSGS)
532		return (EINVAL);
533
534	error = ppt_find(vm, bus, slot, func, &ppt);
535	if (error)
536		return (error);
537
538	/* Reject attempts to enable MSI while MSI-X is active. */
539	if (ppt->msix.num_msgs != 0 && numvec != 0)
540		return (EBUSY);
541
542	/* Free any allocated resources */
543	ppt_teardown_msi(ppt);
544
545	if (numvec == 0)		/* nothing more to do */
546		return (0);
547
548	flags = RF_ACTIVE;
549	msi_count = pci_msi_count(ppt->dev);
550	if (msi_count == 0) {
551		startrid = 0;		/* legacy interrupt */
552		msi_count = 1;
553		flags |= RF_SHAREABLE;
554	} else
555		startrid = 1;		/* MSI */
556
557	/*
558	 * The device must be capable of supporting the number of vectors
559	 * the guest wants to allocate.
560	 */
561	if (numvec > msi_count)
562		return (EINVAL);
563
564	/*
565	 * Make sure that we can allocate all the MSI vectors that are needed
566	 * by the guest.
567	 */
568	if (startrid == 1) {
569		tmp = numvec;
570		error = pci_alloc_msi(ppt->dev, &tmp);
571		if (error)
572			return (error);
573		else if (tmp != numvec) {
574			pci_release_msi(ppt->dev);
575			return (ENOSPC);
576		} else {
577			/* success */
578		}
579	}
580
581	ppt->msi.startrid = startrid;
582
583	/*
584	 * Allocate the irq resource and attach it to the interrupt handler.
585	 */
586	for (i = 0; i < numvec; i++) {
587		ppt->msi.num_msgs = i + 1;
588		ppt->msi.cookie[i] = NULL;
589
590		rid = startrid + i;
591		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
592							 &rid, flags);
593		if (ppt->msi.res[i] == NULL)
594			break;
595
596		ppt->msi.arg[i].pptdev = ppt;
597		ppt->msi.arg[i].addr = addr;
598		ppt->msi.arg[i].msg_data = msg + i;
599
600		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
601				       INTR_TYPE_NET | INTR_MPSAFE,
602				       pptintr, NULL, &ppt->msi.arg[i],
603				       &ppt->msi.cookie[i]);
604		if (error != 0)
605			break;
606	}
607
608	if (i < numvec) {
609		ppt_teardown_msi(ppt);
610		return (ENXIO);
611	}
612
613	return (0);
614}
615
616int
617ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
618	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
619{
620	struct pptdev *ppt;
621	struct pci_devinfo *dinfo;
622	int numvec, alloced, rid, error;
623	size_t res_size, cookie_size, arg_size;
624
625	error = ppt_find(vm, bus, slot, func, &ppt);
626	if (error)
627		return (error);
628
629	/* Reject attempts to enable MSI-X while MSI is active. */
630	if (ppt->msi.num_msgs != 0)
631		return (EBUSY);
632
633	dinfo = device_get_ivars(ppt->dev);
634	if (!dinfo)
635		return (ENXIO);
636
637	/*
638	 * First-time configuration:
639	 * 	Allocate the MSI-X table
640	 *	Allocate the IRQ resources
641	 *	Set up some variables in ppt->msix
642	 */
643	if (ppt->msix.num_msgs == 0) {
644		numvec = pci_msix_count(ppt->dev);
645		if (numvec <= 0)
646			return (EINVAL);
647
648		ppt->msix.startrid = 1;
649		ppt->msix.num_msgs = numvec;
650
651		res_size = numvec * sizeof(ppt->msix.res[0]);
652		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
653		arg_size = numvec * sizeof(ppt->msix.arg[0]);
654
655		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
656		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
657					  M_WAITOK | M_ZERO);
658		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
659
660		rid = dinfo->cfg.msix.msix_table_bar;
661		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
662					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
663
664		if (ppt->msix.msix_table_res == NULL) {
665			ppt_teardown_msix(ppt);
666			return (ENOSPC);
667		}
668		ppt->msix.msix_table_rid = rid;
669
670		if (dinfo->cfg.msix.msix_table_bar !=
671		    dinfo->cfg.msix.msix_pba_bar) {
672			rid = dinfo->cfg.msix.msix_pba_bar;
673			ppt->msix.msix_pba_res = bus_alloc_resource_any(
674			    ppt->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
675
676			if (ppt->msix.msix_pba_res == NULL) {
677				ppt_teardown_msix(ppt);
678				return (ENOSPC);
679			}
680			ppt->msix.msix_pba_rid = rid;
681		}
682
683		alloced = numvec;
684		error = pci_alloc_msix(ppt->dev, &alloced);
685		if (error || alloced != numvec) {
686			ppt_teardown_msix(ppt);
687			return (error == 0 ? ENOSPC: error);
688		}
689	}
690
691	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
692		/* Tear down the IRQ if it's already set up */
693		ppt_teardown_msix_intr(ppt, idx);
694
695		/* Allocate the IRQ resource */
696		ppt->msix.cookie[idx] = NULL;
697		rid = ppt->msix.startrid + idx;
698		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
699							    &rid, RF_ACTIVE);
700		if (ppt->msix.res[idx] == NULL)
701			return (ENXIO);
702
703		ppt->msix.arg[idx].pptdev = ppt;
704		ppt->msix.arg[idx].addr = addr;
705		ppt->msix.arg[idx].msg_data = msg;
706
707		/* Setup the MSI-X interrupt */
708		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
709				       INTR_TYPE_NET | INTR_MPSAFE,
710				       pptintr, NULL, &ppt->msix.arg[idx],
711				       &ppt->msix.cookie[idx]);
712
713		if (error != 0) {
714			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
715			ppt->msix.cookie[idx] = NULL;
716			ppt->msix.res[idx] = NULL;
717			return (ENXIO);
718		}
719	} else {
720		/* Masked, tear it down if it's already been set up */
721		ppt_teardown_msix_intr(ppt, idx);
722	}
723
724	return (0);
725}
726
727int
728ppt_disable_msix(struct vm *vm, int bus, int slot, int func)
729{
730	struct pptdev *ppt;
731	int error;
732
733	error = ppt_find(vm, bus, slot, func, &ppt);
734	if (error)
735		return (error);
736
737	ppt_teardown_msix(ppt);
738	return (0);
739}
740