ppt.c revision 258699
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/amd64/vmm/io/ppt.c 258699 2013-11-27 22:18:08Z neel $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/amd64/vmm/io/ppt.c 258699 2013-11-27 22:18:08Z neel $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/pciio.h>
39#include <sys/rman.h>
40#include <sys/smp.h>
41
42#include <dev/pci/pcivar.h>
43#include <dev/pci/pcireg.h>
44
45#include <machine/resource.h>
46
47#include <machine/vmm.h>
48#include <machine/vmm_dev.h>
49
50#include "vmm_lapic.h"
51#include "vmm_ktr.h"
52
53#include "iommu.h"
54#include "ppt.h"
55
56/* XXX locking */
57
58#define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
59#define	MAX_MSIMSGS	32
60
61/*
62 * If the MSI-X table is located in the middle of a BAR then that MMIO
63 * region gets split into two segments - one segment above the MSI-X table
64 * and the other segment below the MSI-X table - with a hole in place of
65 * the MSI-X table so accesses to it can be trapped and emulated.
66 *
67 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
68 */
69#define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
70
71MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
72
73struct pptintr_arg {				/* pptintr(pptintr_arg) */
74	struct pptdev	*pptdev;
75	int		vec;
76	int 		vcpu;
77};
78
79static struct pptdev {
80	device_t	dev;
81	struct vm	*vm;			/* owner of this device */
82	struct vm_memory_segment mmio[MAX_MMIOSEGS];
83	struct {
84		int	num_msgs;		/* guest state */
85
86		int	startrid;		/* host state */
87		struct resource *res[MAX_MSIMSGS];
88		void	*cookie[MAX_MSIMSGS];
89		struct pptintr_arg arg[MAX_MSIMSGS];
90	} msi;
91
92	struct {
93		int num_msgs;
94		int startrid;
95		int msix_table_rid;
96		struct resource *msix_table_res;
97		struct resource **res;
98		void **cookie;
99		struct pptintr_arg *arg;
100	} msix;
101} pptdevs[64];
102
103static int num_pptdevs;
104
105static int
106ppt_probe(device_t dev)
107{
108	int bus, slot, func;
109	struct pci_devinfo *dinfo;
110
111	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
112
113	bus = pci_get_bus(dev);
114	slot = pci_get_slot(dev);
115	func = pci_get_function(dev);
116
117	/*
118	 * To qualify as a pci passthrough device a device must:
119	 * - be allowed by administrator to be used in this role
120	 * - be an endpoint device
121	 */
122	if (vmm_is_pptdev(bus, slot, func) &&
123	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
124		return (0);
125	else
126		return (ENXIO);
127}
128
129static int
130ppt_attach(device_t dev)
131{
132	int n;
133
134	if (num_pptdevs >= MAX_PPTDEVS) {
135		printf("ppt_attach: maximum number of pci passthrough devices "
136		       "exceeded\n");
137		return (ENXIO);
138	}
139
140	n = num_pptdevs++;
141	pptdevs[n].dev = dev;
142
143	if (bootverbose)
144		device_printf(dev, "attached\n");
145
146	return (0);
147}
148
149static int
150ppt_detach(device_t dev)
151{
152	/*
153	 * XXX check whether there are any pci passthrough devices assigned
154	 * to guests before we allow this driver to detach.
155	 */
156
157	return (0);
158}
159
160static device_method_t ppt_methods[] = {
161	/* Device interface */
162	DEVMETHOD(device_probe,		ppt_probe),
163	DEVMETHOD(device_attach,	ppt_attach),
164	DEVMETHOD(device_detach,	ppt_detach),
165	{0, 0}
166};
167
168static devclass_t ppt_devclass;
169DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
170DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
171
172static struct pptdev *
173ppt_find(int bus, int slot, int func)
174{
175	device_t dev;
176	int i, b, s, f;
177
178	for (i = 0; i < num_pptdevs; i++) {
179		dev = pptdevs[i].dev;
180		b = pci_get_bus(dev);
181		s = pci_get_slot(dev);
182		f = pci_get_function(dev);
183		if (bus == b && slot == s && func == f)
184			return (&pptdevs[i]);
185	}
186	return (NULL);
187}
188
189static void
190ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
191{
192	int i;
193	struct vm_memory_segment *seg;
194
195	for (i = 0; i < MAX_MMIOSEGS; i++) {
196		seg = &ppt->mmio[i];
197		if (seg->len == 0)
198			continue;
199		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
200		bzero(seg, sizeof(struct vm_memory_segment));
201	}
202}
203
204static void
205ppt_teardown_msi(struct pptdev *ppt)
206{
207	int i, rid;
208	void *cookie;
209	struct resource *res;
210
211	if (ppt->msi.num_msgs == 0)
212		return;
213
214	for (i = 0; i < ppt->msi.num_msgs; i++) {
215		rid = ppt->msi.startrid + i;
216		res = ppt->msi.res[i];
217		cookie = ppt->msi.cookie[i];
218
219		if (cookie != NULL)
220			bus_teardown_intr(ppt->dev, res, cookie);
221
222		if (res != NULL)
223			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
224
225		ppt->msi.res[i] = NULL;
226		ppt->msi.cookie[i] = NULL;
227	}
228
229	if (ppt->msi.startrid == 1)
230		pci_release_msi(ppt->dev);
231
232	ppt->msi.num_msgs = 0;
233}
234
235static void
236ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
237{
238	int rid;
239	struct resource *res;
240	void *cookie;
241
242	rid = ppt->msix.startrid + idx;
243	res = ppt->msix.res[idx];
244	cookie = ppt->msix.cookie[idx];
245
246	if (cookie != NULL)
247		bus_teardown_intr(ppt->dev, res, cookie);
248
249	if (res != NULL)
250		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
251
252	ppt->msix.res[idx] = NULL;
253	ppt->msix.cookie[idx] = NULL;
254}
255
256static void
257ppt_teardown_msix(struct pptdev *ppt)
258{
259	int i;
260
261	if (ppt->msix.num_msgs == 0)
262		return;
263
264	for (i = 0; i < ppt->msix.num_msgs; i++)
265		ppt_teardown_msix_intr(ppt, i);
266
267	if (ppt->msix.msix_table_res) {
268		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
269				     ppt->msix.msix_table_rid,
270				     ppt->msix.msix_table_res);
271		ppt->msix.msix_table_res = NULL;
272		ppt->msix.msix_table_rid = 0;
273	}
274
275	free(ppt->msix.res, M_PPTMSIX);
276	free(ppt->msix.cookie, M_PPTMSIX);
277	free(ppt->msix.arg, M_PPTMSIX);
278
279	pci_release_msi(ppt->dev);
280
281	ppt->msix.num_msgs = 0;
282}
283
284int
285ppt_num_devices(struct vm *vm)
286{
287	int i, num;
288
289	num = 0;
290	for (i = 0; i < num_pptdevs; i++) {
291		if (pptdevs[i].vm == vm)
292			num++;
293	}
294	return (num);
295}
296
297boolean_t
298ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
299{
300	int i, n;
301	struct pptdev *ppt;
302	struct vm_memory_segment *seg;
303
304	for (n = 0; n < num_pptdevs; n++) {
305		ppt = &pptdevs[n];
306		if (ppt->vm != vm)
307			continue;
308
309		for (i = 0; i < MAX_MMIOSEGS; i++) {
310			seg = &ppt->mmio[i];
311			if (seg->len == 0)
312				continue;
313			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
314				return (TRUE);
315		}
316	}
317
318	return (FALSE);
319}
320
321int
322ppt_assign_device(struct vm *vm, int bus, int slot, int func)
323{
324	struct pptdev *ppt;
325
326	ppt = ppt_find(bus, slot, func);
327	if (ppt != NULL) {
328		/*
329		 * If this device is owned by a different VM then we
330		 * cannot change its owner.
331		 */
332		if (ppt->vm != NULL && ppt->vm != vm)
333			return (EBUSY);
334
335		ppt->vm = vm;
336		iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
337		return (0);
338	}
339	return (ENOENT);
340}
341
342int
343ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
344{
345	struct pptdev *ppt;
346
347	ppt = ppt_find(bus, slot, func);
348	if (ppt != NULL) {
349		/*
350		 * If this device is not owned by this 'vm' then bail out.
351		 */
352		if (ppt->vm != vm)
353			return (EBUSY);
354		ppt_unmap_mmio(vm, ppt);
355		ppt_teardown_msi(ppt);
356		ppt_teardown_msix(ppt);
357		iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
358		ppt->vm = NULL;
359		return (0);
360	}
361	return (ENOENT);
362}
363
364int
365ppt_unassign_all(struct vm *vm)
366{
367	int i, bus, slot, func;
368	device_t dev;
369
370	for (i = 0; i < num_pptdevs; i++) {
371		if (pptdevs[i].vm == vm) {
372			dev = pptdevs[i].dev;
373			bus = pci_get_bus(dev);
374			slot = pci_get_slot(dev);
375			func = pci_get_function(dev);
376			vm_unassign_pptdev(vm, bus, slot, func);
377		}
378	}
379
380	return (0);
381}
382
383int
384ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
385	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
386{
387	int i, error;
388	struct vm_memory_segment *seg;
389	struct pptdev *ppt;
390
391	ppt = ppt_find(bus, slot, func);
392	if (ppt != NULL) {
393		if (ppt->vm != vm)
394			return (EBUSY);
395
396		for (i = 0; i < MAX_MMIOSEGS; i++) {
397			seg = &ppt->mmio[i];
398			if (seg->len == 0) {
399				error = vm_map_mmio(vm, gpa, len, hpa);
400				if (error == 0) {
401					seg->gpa = gpa;
402					seg->len = len;
403				}
404				return (error);
405			}
406		}
407		return (ENOSPC);
408	}
409	return (ENOENT);
410}
411
412static int
413pptintr(void *arg)
414{
415	int vec;
416	struct pptdev *ppt;
417	struct pptintr_arg *pptarg;
418
419	pptarg = arg;
420	ppt = pptarg->pptdev;
421	vec = pptarg->vec;
422
423	if (ppt->vm != NULL)
424		lapic_intr_edge(ppt->vm, pptarg->vcpu, vec);
425	else {
426		/*
427		 * XXX
428		 * This is not expected to happen - panic?
429		 */
430	}
431
432	/*
433	 * For legacy interrupts give other filters a chance in case
434	 * the interrupt was not generated by the passthrough device.
435	 */
436	if (ppt->msi.startrid == 0)
437		return (FILTER_STRAY);
438	else
439		return (FILTER_HANDLED);
440}
441
442int
443ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
444	      int destcpu, int vector, int numvec)
445{
446	int i, rid, flags;
447	int msi_count, startrid, error, tmp;
448	struct pptdev *ppt;
449
450	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
451	    (vector < 0 || vector > 255) ||
452	    (numvec < 0 || numvec > MAX_MSIMSGS))
453		return (EINVAL);
454
455	ppt = ppt_find(bus, slot, func);
456	if (ppt == NULL)
457		return (ENOENT);
458	if (ppt->vm != vm)		/* Make sure we own this device */
459		return (EBUSY);
460
461	/* Free any allocated resources */
462	ppt_teardown_msi(ppt);
463
464	if (numvec == 0)		/* nothing more to do */
465		return (0);
466
467	flags = RF_ACTIVE;
468	msi_count = pci_msi_count(ppt->dev);
469	if (msi_count == 0) {
470		startrid = 0;		/* legacy interrupt */
471		msi_count = 1;
472		flags |= RF_SHAREABLE;
473	} else
474		startrid = 1;		/* MSI */
475
476	/*
477	 * The device must be capable of supporting the number of vectors
478	 * the guest wants to allocate.
479	 */
480	if (numvec > msi_count)
481		return (EINVAL);
482
483	/*
484	 * Make sure that we can allocate all the MSI vectors that are needed
485	 * by the guest.
486	 */
487	if (startrid == 1) {
488		tmp = numvec;
489		error = pci_alloc_msi(ppt->dev, &tmp);
490		if (error)
491			return (error);
492		else if (tmp != numvec) {
493			pci_release_msi(ppt->dev);
494			return (ENOSPC);
495		} else {
496			/* success */
497		}
498	}
499
500	ppt->msi.startrid = startrid;
501
502	/*
503	 * Allocate the irq resource and attach it to the interrupt handler.
504	 */
505	for (i = 0; i < numvec; i++) {
506		ppt->msi.num_msgs = i + 1;
507		ppt->msi.cookie[i] = NULL;
508
509		rid = startrid + i;
510		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
511							 &rid, flags);
512		if (ppt->msi.res[i] == NULL)
513			break;
514
515		ppt->msi.arg[i].pptdev = ppt;
516		ppt->msi.arg[i].vec = vector + i;
517		ppt->msi.arg[i].vcpu = destcpu;
518
519		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
520				       INTR_TYPE_NET | INTR_MPSAFE,
521				       pptintr, NULL, &ppt->msi.arg[i],
522				       &ppt->msi.cookie[i]);
523		if (error != 0)
524			break;
525	}
526
527	if (i < numvec) {
528		ppt_teardown_msi(ppt);
529		return (ENXIO);
530	}
531
532	return (0);
533}
534
535int
536ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
537	       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
538{
539	struct pptdev *ppt;
540	struct pci_devinfo *dinfo;
541	int numvec, alloced, rid, error;
542	size_t res_size, cookie_size, arg_size;
543
544	ppt = ppt_find(bus, slot, func);
545	if (ppt == NULL)
546		return (ENOENT);
547	if (ppt->vm != vm)		/* Make sure we own this device */
548		return (EBUSY);
549
550	dinfo = device_get_ivars(ppt->dev);
551	if (!dinfo)
552		return (ENXIO);
553
554	/*
555	 * First-time configuration:
556	 * 	Allocate the MSI-X table
557	 *	Allocate the IRQ resources
558	 *	Set up some variables in ppt->msix
559	 */
560	if (ppt->msix.num_msgs == 0) {
561		numvec = pci_msix_count(ppt->dev);
562		if (numvec <= 0)
563			return (EINVAL);
564
565		ppt->msix.startrid = 1;
566		ppt->msix.num_msgs = numvec;
567
568		res_size = numvec * sizeof(ppt->msix.res[0]);
569		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
570		arg_size = numvec * sizeof(ppt->msix.arg[0]);
571
572		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
573		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
574					  M_WAITOK | M_ZERO);
575		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
576
577		rid = dinfo->cfg.msix.msix_table_bar;
578		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
579					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
580
581		if (ppt->msix.msix_table_res == NULL) {
582			ppt_teardown_msix(ppt);
583			return (ENOSPC);
584		}
585		ppt->msix.msix_table_rid = rid;
586
587		alloced = numvec;
588		error = pci_alloc_msix(ppt->dev, &alloced);
589		if (error || alloced != numvec) {
590			ppt_teardown_msix(ppt);
591			return (error == 0 ? ENOSPC: error);
592		}
593	}
594
595	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
596		/* Tear down the IRQ if it's already set up */
597		ppt_teardown_msix_intr(ppt, idx);
598
599		/* Allocate the IRQ resource */
600		ppt->msix.cookie[idx] = NULL;
601		rid = ppt->msix.startrid + idx;
602		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
603							    &rid, RF_ACTIVE);
604		if (ppt->msix.res[idx] == NULL)
605			return (ENXIO);
606
607		ppt->msix.arg[idx].pptdev = ppt;
608		ppt->msix.arg[idx].vec = msg & 0xFF;
609		ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
610
611		/* Setup the MSI-X interrupt */
612		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
613				       INTR_TYPE_NET | INTR_MPSAFE,
614				       pptintr, NULL, &ppt->msix.arg[idx],
615				       &ppt->msix.cookie[idx]);
616
617		if (error != 0) {
618			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
619			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
620			ppt->msix.cookie[idx] = NULL;
621			ppt->msix.res[idx] = NULL;
622			return (ENXIO);
623		}
624	} else {
625		/* Masked, tear it down if it's already been set up */
626		ppt_teardown_msix_intr(ppt, idx);
627	}
628
629	return (0);
630}
631