ppt.c revision 284539
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/amd64/vmm/io/ppt.c 284539 2015-06-18 06:00:17Z neel $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/amd64/vmm/io/ppt.c 284539 2015-06-18 06:00:17Z neel $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/pciio.h>
39#include <sys/rman.h>
40#include <sys/smp.h>
41#include <sys/sysctl.h>
42
43#include <dev/pci/pcivar.h>
44#include <dev/pci/pcireg.h>
45
46#include <machine/resource.h>
47
48#include <machine/vmm.h>
49#include <machine/vmm_dev.h>
50
51#include "vmm_lapic.h"
52#include "vmm_ktr.h"
53
54#include "iommu.h"
55#include "ppt.h"
56
57/* XXX locking */
58
59#define	MAX_MSIMSGS	32
60
61/*
62 * If the MSI-X table is located in the middle of a BAR then that MMIO
63 * region gets split into two segments - one segment above the MSI-X table
64 * and the other segment below the MSI-X table - with a hole in place of
65 * the MSI-X table so accesses to it can be trapped and emulated.
66 *
67 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
68 */
69#define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
70
71MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
72
73struct pptintr_arg {				/* pptintr(pptintr_arg) */
74	struct pptdev	*pptdev;
75	uint64_t	addr;
76	uint64_t	msg_data;
77};
78
79struct pptseg {
80	vm_paddr_t	gpa;
81	size_t		len;
82	int		wired;
83};
84
85struct pptdev {
86	device_t	dev;
87	struct vm	*vm;			/* owner of this device */
88	TAILQ_ENTRY(pptdev)	next;
89	struct pptseg mmio[MAX_MMIOSEGS];
90	struct {
91		int	num_msgs;		/* guest state */
92
93		int	startrid;		/* host state */
94		struct resource *res[MAX_MSIMSGS];
95		void	*cookie[MAX_MSIMSGS];
96		struct pptintr_arg arg[MAX_MSIMSGS];
97	} msi;
98
99	struct {
100		int num_msgs;
101		int startrid;
102		int msix_table_rid;
103		struct resource *msix_table_res;
104		struct resource **res;
105		void **cookie;
106		struct pptintr_arg *arg;
107	} msix;
108};
109
110SYSCTL_DECL(_hw_vmm);
111SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
112
113static int num_pptdevs;
114SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
115    "number of pci passthru devices");
116
117static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
118
119static int
120ppt_probe(device_t dev)
121{
122	int bus, slot, func;
123	struct pci_devinfo *dinfo;
124
125	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
126
127	bus = pci_get_bus(dev);
128	slot = pci_get_slot(dev);
129	func = pci_get_function(dev);
130
131	/*
132	 * To qualify as a pci passthrough device a device must:
133	 * - be allowed by administrator to be used in this role
134	 * - be an endpoint device
135	 */
136	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
137		return (ENXIO);
138	else if (vmm_is_pptdev(bus, slot, func))
139		return (0);
140	else
141		/*
142		 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
143		 * SR-IOV infrastructure specified as "ppt" passthrough devices.
144		 * All normal devices that did not have "ppt" specified as their
145		 * driver will not be matched by this.
146		 */
147		return (BUS_PROBE_NOWILDCARD);
148}
149
150static int
151ppt_attach(device_t dev)
152{
153	struct pptdev *ppt;
154
155	ppt = device_get_softc(dev);
156
157	num_pptdevs++;
158	TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
159	ppt->dev = dev;
160
161	if (bootverbose)
162		device_printf(dev, "attached\n");
163
164	return (0);
165}
166
167static int
168ppt_detach(device_t dev)
169{
170	struct pptdev *ppt;
171
172	ppt = device_get_softc(dev);
173
174	if (ppt->vm != NULL)
175		return (EBUSY);
176	num_pptdevs--;
177	TAILQ_REMOVE(&pptdev_list, ppt, next);
178
179	return (0);
180}
181
182static device_method_t ppt_methods[] = {
183	/* Device interface */
184	DEVMETHOD(device_probe,		ppt_probe),
185	DEVMETHOD(device_attach,	ppt_attach),
186	DEVMETHOD(device_detach,	ppt_detach),
187	{0, 0}
188};
189
190static devclass_t ppt_devclass;
191DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
192DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
193
194static struct pptdev *
195ppt_find(int bus, int slot, int func)
196{
197	device_t dev;
198	struct pptdev *ppt;
199	int b, s, f;
200
201	TAILQ_FOREACH(ppt, &pptdev_list, next) {
202		dev = ppt->dev;
203		b = pci_get_bus(dev);
204		s = pci_get_slot(dev);
205		f = pci_get_function(dev);
206		if (bus == b && slot == s && func == f)
207			return (ppt);
208	}
209	return (NULL);
210}
211
212static void
213ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
214{
215	int i;
216	struct pptseg *seg;
217
218	for (i = 0; i < MAX_MMIOSEGS; i++) {
219		seg = &ppt->mmio[i];
220		if (seg->len == 0)
221			continue;
222		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
223		bzero(seg, sizeof(struct pptseg));
224	}
225}
226
227static void
228ppt_teardown_msi(struct pptdev *ppt)
229{
230	int i, rid;
231	void *cookie;
232	struct resource *res;
233
234	if (ppt->msi.num_msgs == 0)
235		return;
236
237	for (i = 0; i < ppt->msi.num_msgs; i++) {
238		rid = ppt->msi.startrid + i;
239		res = ppt->msi.res[i];
240		cookie = ppt->msi.cookie[i];
241
242		if (cookie != NULL)
243			bus_teardown_intr(ppt->dev, res, cookie);
244
245		if (res != NULL)
246			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
247
248		ppt->msi.res[i] = NULL;
249		ppt->msi.cookie[i] = NULL;
250	}
251
252	if (ppt->msi.startrid == 1)
253		pci_release_msi(ppt->dev);
254
255	ppt->msi.num_msgs = 0;
256}
257
258static void
259ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
260{
261	int rid;
262	struct resource *res;
263	void *cookie;
264
265	rid = ppt->msix.startrid + idx;
266	res = ppt->msix.res[idx];
267	cookie = ppt->msix.cookie[idx];
268
269	if (cookie != NULL)
270		bus_teardown_intr(ppt->dev, res, cookie);
271
272	if (res != NULL)
273		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
274
275	ppt->msix.res[idx] = NULL;
276	ppt->msix.cookie[idx] = NULL;
277}
278
279static void
280ppt_teardown_msix(struct pptdev *ppt)
281{
282	int i;
283
284	if (ppt->msix.num_msgs == 0)
285		return;
286
287	for (i = 0; i < ppt->msix.num_msgs; i++)
288		ppt_teardown_msix_intr(ppt, i);
289
290	if (ppt->msix.msix_table_res) {
291		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
292				     ppt->msix.msix_table_rid,
293				     ppt->msix.msix_table_res);
294		ppt->msix.msix_table_res = NULL;
295		ppt->msix.msix_table_rid = 0;
296	}
297
298	free(ppt->msix.res, M_PPTMSIX);
299	free(ppt->msix.cookie, M_PPTMSIX);
300	free(ppt->msix.arg, M_PPTMSIX);
301
302	pci_release_msi(ppt->dev);
303
304	ppt->msix.num_msgs = 0;
305}
306
307int
308ppt_avail_devices(void)
309{
310
311	return (num_pptdevs);
312}
313
314int
315ppt_assigned_devices(struct vm *vm)
316{
317	struct pptdev *ppt;
318	int num;
319
320	num = 0;
321	TAILQ_FOREACH(ppt, &pptdev_list, next) {
322		if (ppt->vm == vm)
323			num++;
324	}
325	return (num);
326}
327
328boolean_t
329ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
330{
331	int i;
332	struct pptdev *ppt;
333	struct pptseg *seg;
334
335	TAILQ_FOREACH(ppt, &pptdev_list, next) {
336		if (ppt->vm != vm)
337			continue;
338
339		for (i = 0; i < MAX_MMIOSEGS; i++) {
340			seg = &ppt->mmio[i];
341			if (seg->len == 0)
342				continue;
343			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
344				return (TRUE);
345		}
346	}
347
348	return (FALSE);
349}
350
351int
352ppt_assign_device(struct vm *vm, int bus, int slot, int func)
353{
354	struct pptdev *ppt;
355
356	ppt = ppt_find(bus, slot, func);
357	if (ppt != NULL) {
358		/*
359		 * If this device is owned by a different VM then we
360		 * cannot change its owner.
361		 */
362		if (ppt->vm != NULL && ppt->vm != vm)
363			return (EBUSY);
364
365		ppt->vm = vm;
366		iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
367		return (0);
368	}
369	return (ENOENT);
370}
371
372int
373ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
374{
375	struct pptdev *ppt;
376
377	ppt = ppt_find(bus, slot, func);
378	if (ppt != NULL) {
379		/*
380		 * If this device is not owned by this 'vm' then bail out.
381		 */
382		if (ppt->vm != vm)
383			return (EBUSY);
384		ppt_unmap_mmio(vm, ppt);
385		ppt_teardown_msi(ppt);
386		ppt_teardown_msix(ppt);
387		iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
388		ppt->vm = NULL;
389		return (0);
390	}
391	return (ENOENT);
392}
393
394int
395ppt_unassign_all(struct vm *vm)
396{
397	struct pptdev *ppt;
398	int bus, slot, func;
399	device_t dev;
400
401	TAILQ_FOREACH(ppt, &pptdev_list, next) {
402		if (ppt->vm == vm) {
403			dev = ppt->dev;
404			bus = pci_get_bus(dev);
405			slot = pci_get_slot(dev);
406			func = pci_get_function(dev);
407			vm_unassign_pptdev(vm, bus, slot, func);
408		}
409	}
410
411	return (0);
412}
413
414int
415ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
416	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
417{
418	int i, error;
419	struct pptseg *seg;
420	struct pptdev *ppt;
421
422	ppt = ppt_find(bus, slot, func);
423	if (ppt != NULL) {
424		if (ppt->vm != vm)
425			return (EBUSY);
426
427		for (i = 0; i < MAX_MMIOSEGS; i++) {
428			seg = &ppt->mmio[i];
429			if (seg->len == 0) {
430				error = vm_map_mmio(vm, gpa, len, hpa);
431				if (error == 0) {
432					seg->gpa = gpa;
433					seg->len = len;
434				}
435				return (error);
436			}
437		}
438		return (ENOSPC);
439	}
440	return (ENOENT);
441}
442
443static int
444pptintr(void *arg)
445{
446	struct pptdev *ppt;
447	struct pptintr_arg *pptarg;
448
449	pptarg = arg;
450	ppt = pptarg->pptdev;
451
452	if (ppt->vm != NULL)
453		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
454	else {
455		/*
456		 * XXX
457		 * This is not expected to happen - panic?
458		 */
459	}
460
461	/*
462	 * For legacy interrupts give other filters a chance in case
463	 * the interrupt was not generated by the passthrough device.
464	 */
465	if (ppt->msi.startrid == 0)
466		return (FILTER_STRAY);
467	else
468		return (FILTER_HANDLED);
469}
470
471int
472ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
473	      uint64_t addr, uint64_t msg, int numvec)
474{
475	int i, rid, flags;
476	int msi_count, startrid, error, tmp;
477	struct pptdev *ppt;
478
479	if (numvec < 0 || numvec > MAX_MSIMSGS)
480		return (EINVAL);
481
482	ppt = ppt_find(bus, slot, func);
483	if (ppt == NULL)
484		return (ENOENT);
485	if (ppt->vm != vm)		/* Make sure we own this device */
486		return (EBUSY);
487
488	/* Free any allocated resources */
489	ppt_teardown_msi(ppt);
490
491	if (numvec == 0)		/* nothing more to do */
492		return (0);
493
494	flags = RF_ACTIVE;
495	msi_count = pci_msi_count(ppt->dev);
496	if (msi_count == 0) {
497		startrid = 0;		/* legacy interrupt */
498		msi_count = 1;
499		flags |= RF_SHAREABLE;
500	} else
501		startrid = 1;		/* MSI */
502
503	/*
504	 * The device must be capable of supporting the number of vectors
505	 * the guest wants to allocate.
506	 */
507	if (numvec > msi_count)
508		return (EINVAL);
509
510	/*
511	 * Make sure that we can allocate all the MSI vectors that are needed
512	 * by the guest.
513	 */
514	if (startrid == 1) {
515		tmp = numvec;
516		error = pci_alloc_msi(ppt->dev, &tmp);
517		if (error)
518			return (error);
519		else if (tmp != numvec) {
520			pci_release_msi(ppt->dev);
521			return (ENOSPC);
522		} else {
523			/* success */
524		}
525	}
526
527	ppt->msi.startrid = startrid;
528
529	/*
530	 * Allocate the irq resource and attach it to the interrupt handler.
531	 */
532	for (i = 0; i < numvec; i++) {
533		ppt->msi.num_msgs = i + 1;
534		ppt->msi.cookie[i] = NULL;
535
536		rid = startrid + i;
537		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
538							 &rid, flags);
539		if (ppt->msi.res[i] == NULL)
540			break;
541
542		ppt->msi.arg[i].pptdev = ppt;
543		ppt->msi.arg[i].addr = addr;
544		ppt->msi.arg[i].msg_data = msg + i;
545
546		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
547				       INTR_TYPE_NET | INTR_MPSAFE,
548				       pptintr, NULL, &ppt->msi.arg[i],
549				       &ppt->msi.cookie[i]);
550		if (error != 0)
551			break;
552	}
553
554	if (i < numvec) {
555		ppt_teardown_msi(ppt);
556		return (ENXIO);
557	}
558
559	return (0);
560}
561
562int
563ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
564	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
565{
566	struct pptdev *ppt;
567	struct pci_devinfo *dinfo;
568	int numvec, alloced, rid, error;
569	size_t res_size, cookie_size, arg_size;
570
571	ppt = ppt_find(bus, slot, func);
572	if (ppt == NULL)
573		return (ENOENT);
574	if (ppt->vm != vm)		/* Make sure we own this device */
575		return (EBUSY);
576
577	dinfo = device_get_ivars(ppt->dev);
578	if (!dinfo)
579		return (ENXIO);
580
581	/*
582	 * First-time configuration:
583	 * 	Allocate the MSI-X table
584	 *	Allocate the IRQ resources
585	 *	Set up some variables in ppt->msix
586	 */
587	if (ppt->msix.num_msgs == 0) {
588		numvec = pci_msix_count(ppt->dev);
589		if (numvec <= 0)
590			return (EINVAL);
591
592		ppt->msix.startrid = 1;
593		ppt->msix.num_msgs = numvec;
594
595		res_size = numvec * sizeof(ppt->msix.res[0]);
596		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
597		arg_size = numvec * sizeof(ppt->msix.arg[0]);
598
599		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
600		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
601					  M_WAITOK | M_ZERO);
602		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
603
604		rid = dinfo->cfg.msix.msix_table_bar;
605		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
606					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
607
608		if (ppt->msix.msix_table_res == NULL) {
609			ppt_teardown_msix(ppt);
610			return (ENOSPC);
611		}
612		ppt->msix.msix_table_rid = rid;
613
614		alloced = numvec;
615		error = pci_alloc_msix(ppt->dev, &alloced);
616		if (error || alloced != numvec) {
617			ppt_teardown_msix(ppt);
618			return (error == 0 ? ENOSPC: error);
619		}
620	}
621
622	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
623		/* Tear down the IRQ if it's already set up */
624		ppt_teardown_msix_intr(ppt, idx);
625
626		/* Allocate the IRQ resource */
627		ppt->msix.cookie[idx] = NULL;
628		rid = ppt->msix.startrid + idx;
629		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
630							    &rid, RF_ACTIVE);
631		if (ppt->msix.res[idx] == NULL)
632			return (ENXIO);
633
634		ppt->msix.arg[idx].pptdev = ppt;
635		ppt->msix.arg[idx].addr = addr;
636		ppt->msix.arg[idx].msg_data = msg;
637
638		/* Setup the MSI-X interrupt */
639		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
640				       INTR_TYPE_NET | INTR_MPSAFE,
641				       pptintr, NULL, &ppt->msix.arg[idx],
642				       &ppt->msix.cookie[idx]);
643
644		if (error != 0) {
645			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
646			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
647			ppt->msix.cookie[idx] = NULL;
648			ppt->msix.res[idx] = NULL;
649			return (ENXIO);
650		}
651	} else {
652		/* Masked, tear it down if it's already been set up */
653		ppt_teardown_msix_intr(ppt, idx);
654	}
655
656	return (0);
657}
658