ppt.c revision 284899
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/sys/amd64/vmm/io/ppt.c 284899 2015-06-28 01:21:55Z neel $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/io/ppt.c 284899 2015-06-28 01:21:55Z neel $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/pciio.h>
39#include <sys/rman.h>
40#include <sys/smp.h>
41#include <sys/sysctl.h>
42
43#include <dev/pci/pcivar.h>
44#include <dev/pci/pcireg.h>
45
46#include <machine/resource.h>
47
48#include <machine/vmm.h>
49#include <machine/vmm_dev.h>
50
51#include "vmm_lapic.h"
52#include "vmm_ktr.h"
53
54#include "iommu.h"
55#include "ppt.h"
56
57/* XXX locking */
58
59#define	MAX_MSIMSGS	32
60
61/*
62 * If the MSI-X table is located in the middle of a BAR then that MMIO
63 * region gets split into two segments - one segment above the MSI-X table
64 * and the other segment below the MSI-X table - with a hole in place of
65 * the MSI-X table so accesses to it can be trapped and emulated.
66 *
67 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
68 */
69#define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
70
71MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
72
73struct pptintr_arg {				/* pptintr(pptintr_arg) */
74	struct pptdev	*pptdev;
75	uint64_t	addr;
76	uint64_t	msg_data;
77};
78
79struct pptdev {
80	device_t	dev;
81	struct vm	*vm;			/* owner of this device */
82	TAILQ_ENTRY(pptdev)	next;
83	struct vm_memory_segment mmio[MAX_MMIOSEGS];
84	struct {
85		int	num_msgs;		/* guest state */
86
87		int	startrid;		/* host state */
88		struct resource *res[MAX_MSIMSGS];
89		void	*cookie[MAX_MSIMSGS];
90		struct pptintr_arg arg[MAX_MSIMSGS];
91	} msi;
92
93	struct {
94		int num_msgs;
95		int startrid;
96		int msix_table_rid;
97		struct resource *msix_table_res;
98		struct resource **res;
99		void **cookie;
100		struct pptintr_arg *arg;
101	} msix;
102};
103
104SYSCTL_DECL(_hw_vmm);
105SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
106
107static int num_pptdevs;
108SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
109    "number of pci passthru devices");
110
111static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
112
113static int
114ppt_probe(device_t dev)
115{
116	int bus, slot, func;
117	struct pci_devinfo *dinfo;
118
119	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
120
121	bus = pci_get_bus(dev);
122	slot = pci_get_slot(dev);
123	func = pci_get_function(dev);
124
125	/*
126	 * To qualify as a pci passthrough device a device must:
127	 * - be allowed by administrator to be used in this role
128	 * - be an endpoint device
129	 */
130	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
131		return (ENXIO);
132	else if (vmm_is_pptdev(bus, slot, func))
133		return (0);
134	else
135		/*
136		 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
137		 * SR-IOV infrastructure specified as "ppt" passthrough devices.
138		 * All normal devices that did not have "ppt" specified as their
139		 * driver will not be matched by this.
140		 */
141		return (BUS_PROBE_NOWILDCARD);
142}
143
144static int
145ppt_attach(device_t dev)
146{
147	struct pptdev *ppt;
148
149	ppt = device_get_softc(dev);
150
151	num_pptdevs++;
152	TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
153	ppt->dev = dev;
154
155	if (bootverbose)
156		device_printf(dev, "attached\n");
157
158	return (0);
159}
160
161static int
162ppt_detach(device_t dev)
163{
164	struct pptdev *ppt;
165
166	ppt = device_get_softc(dev);
167
168	if (ppt->vm != NULL)
169		return (EBUSY);
170	num_pptdevs--;
171	TAILQ_REMOVE(&pptdev_list, ppt, next);
172
173	return (0);
174}
175
176static device_method_t ppt_methods[] = {
177	/* Device interface */
178	DEVMETHOD(device_probe,		ppt_probe),
179	DEVMETHOD(device_attach,	ppt_attach),
180	DEVMETHOD(device_detach,	ppt_detach),
181	{0, 0}
182};
183
184static devclass_t ppt_devclass;
185DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
186DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
187
188static struct pptdev *
189ppt_find(int bus, int slot, int func)
190{
191	device_t dev;
192	struct pptdev *ppt;
193	int b, s, f;
194
195	TAILQ_FOREACH(ppt, &pptdev_list, next) {
196		dev = ppt->dev;
197		b = pci_get_bus(dev);
198		s = pci_get_slot(dev);
199		f = pci_get_function(dev);
200		if (bus == b && slot == s && func == f)
201			return (ppt);
202	}
203	return (NULL);
204}
205
206static void
207ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
208{
209	int i;
210	struct vm_memory_segment *seg;
211
212	for (i = 0; i < MAX_MMIOSEGS; i++) {
213		seg = &ppt->mmio[i];
214		if (seg->len == 0)
215			continue;
216		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
217		bzero(seg, sizeof(struct vm_memory_segment));
218	}
219}
220
221static void
222ppt_teardown_msi(struct pptdev *ppt)
223{
224	int i, rid;
225	void *cookie;
226	struct resource *res;
227
228	if (ppt->msi.num_msgs == 0)
229		return;
230
231	for (i = 0; i < ppt->msi.num_msgs; i++) {
232		rid = ppt->msi.startrid + i;
233		res = ppt->msi.res[i];
234		cookie = ppt->msi.cookie[i];
235
236		if (cookie != NULL)
237			bus_teardown_intr(ppt->dev, res, cookie);
238
239		if (res != NULL)
240			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
241
242		ppt->msi.res[i] = NULL;
243		ppt->msi.cookie[i] = NULL;
244	}
245
246	if (ppt->msi.startrid == 1)
247		pci_release_msi(ppt->dev);
248
249	ppt->msi.num_msgs = 0;
250}
251
252static void
253ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
254{
255	int rid;
256	struct resource *res;
257	void *cookie;
258
259	rid = ppt->msix.startrid + idx;
260	res = ppt->msix.res[idx];
261	cookie = ppt->msix.cookie[idx];
262
263	if (cookie != NULL)
264		bus_teardown_intr(ppt->dev, res, cookie);
265
266	if (res != NULL)
267		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
268
269	ppt->msix.res[idx] = NULL;
270	ppt->msix.cookie[idx] = NULL;
271}
272
273static void
274ppt_teardown_msix(struct pptdev *ppt)
275{
276	int i;
277
278	if (ppt->msix.num_msgs == 0)
279		return;
280
281	for (i = 0; i < ppt->msix.num_msgs; i++)
282		ppt_teardown_msix_intr(ppt, i);
283
284	if (ppt->msix.msix_table_res) {
285		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
286				     ppt->msix.msix_table_rid,
287				     ppt->msix.msix_table_res);
288		ppt->msix.msix_table_res = NULL;
289		ppt->msix.msix_table_rid = 0;
290	}
291
292	free(ppt->msix.res, M_PPTMSIX);
293	free(ppt->msix.cookie, M_PPTMSIX);
294	free(ppt->msix.arg, M_PPTMSIX);
295
296	pci_release_msi(ppt->dev);
297
298	ppt->msix.num_msgs = 0;
299}
300
301int
302ppt_avail_devices(void)
303{
304
305	return (num_pptdevs);
306}
307
308int
309ppt_assigned_devices(struct vm *vm)
310{
311	struct pptdev *ppt;
312	int num;
313
314	num = 0;
315	TAILQ_FOREACH(ppt, &pptdev_list, next) {
316		if (ppt->vm == vm)
317			num++;
318	}
319	return (num);
320}
321
322boolean_t
323ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
324{
325	int i;
326	struct pptdev *ppt;
327	struct vm_memory_segment *seg;
328
329	TAILQ_FOREACH(ppt, &pptdev_list, next) {
330		if (ppt->vm != vm)
331			continue;
332
333		for (i = 0; i < MAX_MMIOSEGS; i++) {
334			seg = &ppt->mmio[i];
335			if (seg->len == 0)
336				continue;
337			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
338				return (TRUE);
339		}
340	}
341
342	return (FALSE);
343}
344
345int
346ppt_assign_device(struct vm *vm, int bus, int slot, int func)
347{
348	struct pptdev *ppt;
349
350	ppt = ppt_find(bus, slot, func);
351	if (ppt != NULL) {
352		/*
353		 * If this device is owned by a different VM then we
354		 * cannot change its owner.
355		 */
356		if (ppt->vm != NULL && ppt->vm != vm)
357			return (EBUSY);
358
359		ppt->vm = vm;
360		iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
361		return (0);
362	}
363	return (ENOENT);
364}
365
366int
367ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
368{
369	struct pptdev *ppt;
370
371	ppt = ppt_find(bus, slot, func);
372	if (ppt != NULL) {
373		/*
374		 * If this device is not owned by this 'vm' then bail out.
375		 */
376		if (ppt->vm != vm)
377			return (EBUSY);
378		ppt_unmap_mmio(vm, ppt);
379		ppt_teardown_msi(ppt);
380		ppt_teardown_msix(ppt);
381		iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
382		ppt->vm = NULL;
383		return (0);
384	}
385	return (ENOENT);
386}
387
388int
389ppt_unassign_all(struct vm *vm)
390{
391	struct pptdev *ppt;
392	int bus, slot, func;
393	device_t dev;
394
395	TAILQ_FOREACH(ppt, &pptdev_list, next) {
396		if (ppt->vm == vm) {
397			dev = ppt->dev;
398			bus = pci_get_bus(dev);
399			slot = pci_get_slot(dev);
400			func = pci_get_function(dev);
401			vm_unassign_pptdev(vm, bus, slot, func);
402		}
403	}
404
405	return (0);
406}
407
408int
409ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
410	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
411{
412	int i, error;
413	struct vm_memory_segment *seg;
414	struct pptdev *ppt;
415
416	ppt = ppt_find(bus, slot, func);
417	if (ppt != NULL) {
418		if (ppt->vm != vm)
419			return (EBUSY);
420
421		for (i = 0; i < MAX_MMIOSEGS; i++) {
422			seg = &ppt->mmio[i];
423			if (seg->len == 0) {
424				error = vm_map_mmio(vm, gpa, len, hpa);
425				if (error == 0) {
426					seg->gpa = gpa;
427					seg->len = len;
428				}
429				return (error);
430			}
431		}
432		return (ENOSPC);
433	}
434	return (ENOENT);
435}
436
437static int
438pptintr(void *arg)
439{
440	struct pptdev *ppt;
441	struct pptintr_arg *pptarg;
442
443	pptarg = arg;
444	ppt = pptarg->pptdev;
445
446	if (ppt->vm != NULL)
447		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
448	else {
449		/*
450		 * XXX
451		 * This is not expected to happen - panic?
452		 */
453	}
454
455	/*
456	 * For legacy interrupts give other filters a chance in case
457	 * the interrupt was not generated by the passthrough device.
458	 */
459	if (ppt->msi.startrid == 0)
460		return (FILTER_STRAY);
461	else
462		return (FILTER_HANDLED);
463}
464
465int
466ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
467	      uint64_t addr, uint64_t msg, int numvec)
468{
469	int i, rid, flags;
470	int msi_count, startrid, error, tmp;
471	struct pptdev *ppt;
472
473	if (numvec < 0 || numvec > MAX_MSIMSGS)
474		return (EINVAL);
475
476	ppt = ppt_find(bus, slot, func);
477	if (ppt == NULL)
478		return (ENOENT);
479	if (ppt->vm != vm)		/* Make sure we own this device */
480		return (EBUSY);
481
482	/* Free any allocated resources */
483	ppt_teardown_msi(ppt);
484
485	if (numvec == 0)		/* nothing more to do */
486		return (0);
487
488	flags = RF_ACTIVE;
489	msi_count = pci_msi_count(ppt->dev);
490	if (msi_count == 0) {
491		startrid = 0;		/* legacy interrupt */
492		msi_count = 1;
493		flags |= RF_SHAREABLE;
494	} else
495		startrid = 1;		/* MSI */
496
497	/*
498	 * The device must be capable of supporting the number of vectors
499	 * the guest wants to allocate.
500	 */
501	if (numvec > msi_count)
502		return (EINVAL);
503
504	/*
505	 * Make sure that we can allocate all the MSI vectors that are needed
506	 * by the guest.
507	 */
508	if (startrid == 1) {
509		tmp = numvec;
510		error = pci_alloc_msi(ppt->dev, &tmp);
511		if (error)
512			return (error);
513		else if (tmp != numvec) {
514			pci_release_msi(ppt->dev);
515			return (ENOSPC);
516		} else {
517			/* success */
518		}
519	}
520
521	ppt->msi.startrid = startrid;
522
523	/*
524	 * Allocate the irq resource and attach it to the interrupt handler.
525	 */
526	for (i = 0; i < numvec; i++) {
527		ppt->msi.num_msgs = i + 1;
528		ppt->msi.cookie[i] = NULL;
529
530		rid = startrid + i;
531		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
532							 &rid, flags);
533		if (ppt->msi.res[i] == NULL)
534			break;
535
536		ppt->msi.arg[i].pptdev = ppt;
537		ppt->msi.arg[i].addr = addr;
538		ppt->msi.arg[i].msg_data = msg + i;
539
540		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
541				       INTR_TYPE_NET | INTR_MPSAFE,
542				       pptintr, NULL, &ppt->msi.arg[i],
543				       &ppt->msi.cookie[i]);
544		if (error != 0)
545			break;
546	}
547
548	if (i < numvec) {
549		ppt_teardown_msi(ppt);
550		return (ENXIO);
551	}
552
553	return (0);
554}
555
556int
557ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
558	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
559{
560	struct pptdev *ppt;
561	struct pci_devinfo *dinfo;
562	int numvec, alloced, rid, error;
563	size_t res_size, cookie_size, arg_size;
564
565	ppt = ppt_find(bus, slot, func);
566	if (ppt == NULL)
567		return (ENOENT);
568	if (ppt->vm != vm)		/* Make sure we own this device */
569		return (EBUSY);
570
571	dinfo = device_get_ivars(ppt->dev);
572	if (!dinfo)
573		return (ENXIO);
574
575	/*
576	 * First-time configuration:
577	 * 	Allocate the MSI-X table
578	 *	Allocate the IRQ resources
579	 *	Set up some variables in ppt->msix
580	 */
581	if (ppt->msix.num_msgs == 0) {
582		numvec = pci_msix_count(ppt->dev);
583		if (numvec <= 0)
584			return (EINVAL);
585
586		ppt->msix.startrid = 1;
587		ppt->msix.num_msgs = numvec;
588
589		res_size = numvec * sizeof(ppt->msix.res[0]);
590		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
591		arg_size = numvec * sizeof(ppt->msix.arg[0]);
592
593		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
594		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
595					  M_WAITOK | M_ZERO);
596		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
597
598		rid = dinfo->cfg.msix.msix_table_bar;
599		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
600					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
601
602		if (ppt->msix.msix_table_res == NULL) {
603			ppt_teardown_msix(ppt);
604			return (ENOSPC);
605		}
606		ppt->msix.msix_table_rid = rid;
607
608		alloced = numvec;
609		error = pci_alloc_msix(ppt->dev, &alloced);
610		if (error || alloced != numvec) {
611			ppt_teardown_msix(ppt);
612			return (error == 0 ? ENOSPC: error);
613		}
614	}
615
616	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
617		/* Tear down the IRQ if it's already set up */
618		ppt_teardown_msix_intr(ppt, idx);
619
620		/* Allocate the IRQ resource */
621		ppt->msix.cookie[idx] = NULL;
622		rid = ppt->msix.startrid + idx;
623		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
624							    &rid, RF_ACTIVE);
625		if (ppt->msix.res[idx] == NULL)
626			return (ENXIO);
627
628		ppt->msix.arg[idx].pptdev = ppt;
629		ppt->msix.arg[idx].addr = addr;
630		ppt->msix.arg[idx].msg_data = msg;
631
632		/* Setup the MSI-X interrupt */
633		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
634				       INTR_TYPE_NET | INTR_MPSAFE,
635				       pptintr, NULL, &ppt->msix.arg[idx],
636				       &ppt->msix.cookie[idx]);
637
638		if (error != 0) {
639			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
640			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
641			ppt->msix.cookie[idx] = NULL;
642			ppt->msix.res[idx] = NULL;
643			return (ENXIO);
644		}
645	} else {
646		/* Masked, tear it down if it's already been set up */
647		ppt_teardown_msix_intr(ppt, idx);
648	}
649
650	return (0);
651}
652