ppt.c revision 241452
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/pciio.h>
39#include <sys/rman.h>
40#include <sys/smp.h>
41
42#include <dev/pci/pcivar.h>
43#include <dev/pci/pcireg.h>
44
45#include <machine/resource.h>
46
47#include <machine/vmm.h>
48#include <machine/vmm_dev.h>
49
50#include "vmm_lapic.h"
51#include "vmm_ktr.h"
52
53#include "iommu.h"
54#include "ppt.h"
55
56#define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
57#define	MAX_MMIOSEGS	(PCIR_MAX_BAR_0 + 1)
58#define	MAX_MSIMSGS	32
59
60MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
61
62struct pptintr_arg {				/* pptintr(pptintr_arg) */
63	struct pptdev	*pptdev;
64	int		vec;
65	int 		vcpu;
66};
67
68static struct pptdev {
69	device_t	dev;
70	struct vm	*vm;			/* owner of this device */
71	struct vm_memory_segment mmio[MAX_MMIOSEGS];
72	struct {
73		int	num_msgs;		/* guest state */
74
75		int	startrid;		/* host state */
76		struct resource *res[MAX_MSIMSGS];
77		void	*cookie[MAX_MSIMSGS];
78		struct pptintr_arg arg[MAX_MSIMSGS];
79	} msi;
80
81	struct {
82		int num_msgs;
83		int startrid;
84		int msix_table_rid;
85		struct resource *msix_table_res;
86		struct resource **res;
87		void **cookie;
88		struct pptintr_arg *arg;
89	} msix;
90} pptdevs[32];
91
92static int num_pptdevs;
93
94static int
95ppt_probe(device_t dev)
96{
97	int bus, slot, func;
98	struct pci_devinfo *dinfo;
99
100	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
101
102	bus = pci_get_bus(dev);
103	slot = pci_get_slot(dev);
104	func = pci_get_function(dev);
105
106	/*
107	 * To qualify as a pci passthrough device a device must:
108	 * - be allowed by administrator to be used in this role
109	 * - be an endpoint device
110	 */
111	if (vmm_is_pptdev(bus, slot, func) &&
112	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
113		return (0);
114	else
115		return (ENXIO);
116}
117
118static int
119ppt_attach(device_t dev)
120{
121	int n;
122
123	if (num_pptdevs >= MAX_PPTDEVS) {
124		printf("ppt_attach: maximum number of pci passthrough devices "
125		       "exceeded\n");
126		return (ENXIO);
127	}
128
129	n = num_pptdevs++;
130	pptdevs[n].dev = dev;
131
132	if (bootverbose)
133		device_printf(dev, "attached\n");
134
135	return (0);
136}
137
138static int
139ppt_detach(device_t dev)
140{
141	/*
142	 * XXX check whether there are any pci passthrough devices assigned
143	 * to guests before we allow this driver to detach.
144	 */
145
146	return (0);
147}
148
149static device_method_t ppt_methods[] = {
150	/* Device interface */
151	DEVMETHOD(device_probe,		ppt_probe),
152	DEVMETHOD(device_attach,	ppt_attach),
153	DEVMETHOD(device_detach,	ppt_detach),
154	{0, 0}
155};
156
157static devclass_t ppt_devclass;
158DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
159DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
160
161static struct pptdev *
162ppt_find(int bus, int slot, int func)
163{
164	device_t dev;
165	int i, b, s, f;
166
167	for (i = 0; i < num_pptdevs; i++) {
168		dev = pptdevs[i].dev;
169		b = pci_get_bus(dev);
170		s = pci_get_slot(dev);
171		f = pci_get_function(dev);
172		if (bus == b && slot == s && func == f)
173			return (&pptdevs[i]);
174	}
175	return (NULL);
176}
177
178static void
179ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
180{
181	int i;
182	struct vm_memory_segment *seg;
183
184	for (i = 0; i < MAX_MMIOSEGS; i++) {
185		seg = &ppt->mmio[i];
186		if (seg->len == 0)
187			continue;
188		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
189		bzero(seg, sizeof(struct vm_memory_segment));
190	}
191}
192
193static void
194ppt_teardown_msi(struct pptdev *ppt)
195{
196	int i, rid;
197	void *cookie;
198	struct resource *res;
199
200	if (ppt->msi.num_msgs == 0)
201		return;
202
203	for (i = 0; i < ppt->msi.num_msgs; i++) {
204		rid = ppt->msi.startrid + i;
205		res = ppt->msi.res[i];
206		cookie = ppt->msi.cookie[i];
207
208		if (cookie != NULL)
209			bus_teardown_intr(ppt->dev, res, cookie);
210
211		if (res != NULL)
212			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
213
214		ppt->msi.res[i] = NULL;
215		ppt->msi.cookie[i] = NULL;
216	}
217
218	if (ppt->msi.startrid == 1)
219		pci_release_msi(ppt->dev);
220
221	ppt->msi.num_msgs = 0;
222}
223
224static void
225ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
226{
227	int rid;
228	struct resource *res;
229	void *cookie;
230
231	rid = ppt->msix.startrid + idx;
232	res = ppt->msix.res[idx];
233	cookie = ppt->msix.cookie[idx];
234
235	if (cookie != NULL)
236		bus_teardown_intr(ppt->dev, res, cookie);
237
238	if (res != NULL)
239		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
240
241	ppt->msix.res[idx] = NULL;
242	ppt->msix.cookie[idx] = NULL;
243}
244
245static void
246ppt_teardown_msix(struct pptdev *ppt)
247{
248	int i, error;
249
250	if (ppt->msix.num_msgs == 0)
251		return;
252
253	for (i = 0; i < ppt->msix.num_msgs; i++)
254		ppt_teardown_msix_intr(ppt, i);
255
256	if (ppt->msix.msix_table_res) {
257		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
258				     ppt->msix.msix_table_rid,
259				     ppt->msix.msix_table_res);
260		ppt->msix.msix_table_res = NULL;
261		ppt->msix.msix_table_rid = 0;
262	}
263
264	free(ppt->msix.res, M_PPTMSIX);
265	free(ppt->msix.cookie, M_PPTMSIX);
266	free(ppt->msix.arg, M_PPTMSIX);
267
268	error = pci_release_msi(ppt->dev);
269	if (error)
270		printf("ppt_teardown_msix: Failed to release MSI-X resources (error %i)\n", error);
271
272	ppt->msix.num_msgs = 0;
273}
274
275int
276ppt_assign_device(struct vm *vm, int bus, int slot, int func)
277{
278	struct pptdev *ppt;
279
280	ppt = ppt_find(bus, slot, func);
281	if (ppt != NULL) {
282		/*
283		 * If this device is owned by a different VM then we
284		 * cannot change its owner.
285		 */
286		if (ppt->vm != NULL && ppt->vm != vm)
287			return (EBUSY);
288
289		ppt->vm = vm;
290		iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
291		return (0);
292	}
293	return (ENOENT);
294}
295
296int
297ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
298{
299	struct pptdev *ppt;
300
301	ppt = ppt_find(bus, slot, func);
302	if (ppt != NULL) {
303		/*
304		 * If this device is not owned by this 'vm' then bail out.
305		 */
306		if (ppt->vm != vm)
307			return (EBUSY);
308		ppt_unmap_mmio(vm, ppt);
309		ppt_teardown_msi(ppt);
310		ppt_teardown_msix(ppt);
311		iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
312		ppt->vm = NULL;
313		return (0);
314	}
315	return (ENOENT);
316}
317
318int
319ppt_unassign_all(struct vm *vm)
320{
321	int i, bus, slot, func;
322	device_t dev;
323
324	for (i = 0; i < num_pptdevs; i++) {
325		if (pptdevs[i].vm == vm) {
326			dev = pptdevs[i].dev;
327			bus = pci_get_bus(dev);
328			slot = pci_get_slot(dev);
329			func = pci_get_function(dev);
330			ppt_unassign_device(vm, bus, slot, func);
331		}
332	}
333
334	return (0);
335}
336
337int
338ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
339	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
340{
341	int i, error;
342	struct vm_memory_segment *seg;
343	struct pptdev *ppt;
344
345	ppt = ppt_find(bus, slot, func);
346	if (ppt != NULL) {
347		if (ppt->vm != vm)
348			return (EBUSY);
349
350		for (i = 0; i < MAX_MMIOSEGS; i++) {
351			seg = &ppt->mmio[i];
352			if (seg->len == 0) {
353				error = vm_map_mmio(vm, gpa, len, hpa);
354				if (error == 0) {
355					seg->gpa = gpa;
356					seg->len = len;
357				}
358				return (error);
359			}
360		}
361		return (ENOSPC);
362	}
363	return (ENOENT);
364}
365
366static int
367pptintr(void *arg)
368{
369	int vec;
370	struct pptdev *ppt;
371	struct pptintr_arg *pptarg;
372
373	pptarg = arg;
374	ppt = pptarg->pptdev;
375	vec = pptarg->vec;
376
377	if (ppt->vm != NULL)
378		(void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec);
379	else {
380		/*
381		 * XXX
382		 * This is not expected to happen - panic?
383		 */
384	}
385
386	/*
387	 * For legacy interrupts give other filters a chance in case
388	 * the interrupt was not generated by the passthrough device.
389	 */
390	if (ppt->msi.startrid == 0)
391		return (FILTER_STRAY);
392	else
393		return (FILTER_HANDLED);
394}
395
396/*
397 * XXX
398 * When we try to free the MSI resource the kernel will bind the thread to
399 * the host cpu was originally handling the MSI. The function freeing the
400 * MSI vector (apic_free_vector()) will panic the kernel if the thread
401 * is already bound to a cpu.
402 *
403 * So, we temporarily unbind the vcpu thread before freeing the MSI resource.
404 */
405static void
406PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
407{
408	int pincpu = -1;
409
410	vm_get_pinning(vm, vcpu, &pincpu);
411
412	if (pincpu >= 0)
413		vm_set_pinning(vm, vcpu, -1);
414
415	ppt_teardown_msi(ppt);
416
417	if (pincpu >= 0)
418		vm_set_pinning(vm, vcpu, pincpu);
419}
420
421int
422ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
423	      int destcpu, int vector, int numvec)
424{
425	int i, rid, flags;
426	int msi_count, startrid, error, tmp;
427	struct pptdev *ppt;
428
429	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
430	    (vector < 0 || vector > 255) ||
431	    (numvec < 0 || numvec > MAX_MSIMSGS))
432		return (EINVAL);
433
434	ppt = ppt_find(bus, slot, func);
435	if (ppt == NULL)
436		return (ENOENT);
437	if (ppt->vm != vm)		/* Make sure we own this device */
438		return (EBUSY);
439
440	/* Free any allocated resources */
441	PPT_TEARDOWN_MSI(vm, vcpu, ppt);
442
443	if (numvec == 0)		/* nothing more to do */
444		return (0);
445
446	flags = RF_ACTIVE;
447	msi_count = pci_msi_count(ppt->dev);
448	if (msi_count == 0) {
449		startrid = 0;		/* legacy interrupt */
450		msi_count = 1;
451		flags |= RF_SHAREABLE;
452	} else
453		startrid = 1;		/* MSI */
454
455	/*
456	 * The device must be capable of supporting the number of vectors
457	 * the guest wants to allocate.
458	 */
459	if (numvec > msi_count)
460		return (EINVAL);
461
462	/*
463	 * Make sure that we can allocate all the MSI vectors that are needed
464	 * by the guest.
465	 */
466	if (startrid == 1) {
467		tmp = numvec;
468		error = pci_alloc_msi(ppt->dev, &tmp);
469		if (error)
470			return (error);
471		else if (tmp != numvec) {
472			pci_release_msi(ppt->dev);
473			return (ENOSPC);
474		} else {
475			/* success */
476		}
477	}
478
479	ppt->msi.startrid = startrid;
480
481	/*
482	 * Allocate the irq resource and attach it to the interrupt handler.
483	 */
484	for (i = 0; i < numvec; i++) {
485		ppt->msi.num_msgs = i + 1;
486		ppt->msi.cookie[i] = NULL;
487
488		rid = startrid + i;
489		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
490							 &rid, flags);
491		if (ppt->msi.res[i] == NULL)
492			break;
493
494		ppt->msi.arg[i].pptdev = ppt;
495		ppt->msi.arg[i].vec = vector + i;
496		ppt->msi.arg[i].vcpu = destcpu;
497
498		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
499				       INTR_TYPE_NET | INTR_MPSAFE,
500				       pptintr, NULL, &ppt->msi.arg[i],
501				       &ppt->msi.cookie[i]);
502		if (error != 0)
503			break;
504	}
505
506	if (i < numvec) {
507		PPT_TEARDOWN_MSI(vm, vcpu, ppt);
508		return (ENXIO);
509	}
510
511	return (0);
512}
513
514int
515ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
516	       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
517{
518	struct pptdev *ppt;
519	struct pci_devinfo *dinfo;
520	int numvec, vector_count, rid, error;
521	size_t res_size, cookie_size, arg_size;
522
523	ppt = ppt_find(bus, slot, func);
524	if (ppt == NULL)
525		return (ENOENT);
526	if (ppt->vm != vm)		/* Make sure we own this device */
527		return (EBUSY);
528
529	dinfo = device_get_ivars(ppt->dev);
530	if (!dinfo)
531		return (ENXIO);
532
533	/*
534	 * First-time configuration:
535	 * 	Allocate the MSI-X table
536	 *	Allocate the IRQ resources
537	 *	Set up some variables in ppt->msix
538	 */
539	if (!ppt->msix.msix_table_res) {
540		ppt->msix.res = NULL;
541		ppt->msix.cookie = NULL;
542		ppt->msix.arg = NULL;
543
544		rid = dinfo->cfg.msix.msix_table_bar;
545		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev, SYS_RES_MEMORY,
546								  &rid, RF_ACTIVE);
547		if (ppt->msix.msix_table_res == NULL)
548			return (ENOSPC);
549
550		ppt->msix.msix_table_rid = rid;
551
552		vector_count = numvec = pci_msix_count(ppt->dev);
553
554		error = pci_alloc_msix(ppt->dev, &numvec);
555		if (error)
556			return (error);
557		else if (vector_count != numvec) {
558			pci_release_msi(ppt->dev);
559			return (ENOSPC);
560		}
561
562		ppt->msix.num_msgs = numvec;
563
564		ppt->msix.startrid = 1;
565
566		res_size = numvec * sizeof(ppt->msix.res[0]);
567		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
568		arg_size = numvec * sizeof(ppt->msix.arg[0]);
569
570		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK);
571		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX, M_WAITOK);
572		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK);
573		if (ppt->msix.res == NULL || ppt->msix.cookie == NULL ||
574		    ppt->msix.arg == NULL) {
575			ppt_teardown_msix(ppt);
576			return (ENOSPC);
577		}
578		bzero(ppt->msix.res, res_size);
579		bzero(ppt->msix.cookie, cookie_size);
580		bzero(ppt->msix.arg, arg_size);
581	}
582
583	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
584		/* Tear down the IRQ if it's already set up */
585		ppt_teardown_msix_intr(ppt, idx);
586
587		/* Allocate the IRQ resource */
588		ppt->msix.cookie[idx] = NULL;
589		rid = ppt->msix.startrid + idx;
590		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
591							    &rid, RF_ACTIVE);
592		if (ppt->msix.res[idx] == NULL)
593			return (ENXIO);
594
595		ppt->msix.arg[idx].pptdev = ppt;
596		ppt->msix.arg[idx].vec = msg;
597		ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
598
599		/* Setup the MSI-X interrupt */
600		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
601				       INTR_TYPE_NET | INTR_MPSAFE,
602				       pptintr, NULL, &ppt->msix.arg[idx],
603				       &ppt->msix.cookie[idx]);
604
605		if (error != 0) {
606			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
607			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
608			ppt->msix.cookie[idx] = NULL;
609			ppt->msix.res[idx] = NULL;
610			return (ENXIO);
611		}
612	} else {
613		/* Masked, tear it down if it's already been set up */
614		ppt_teardown_msix_intr(ppt, idx);
615	}
616
617	return (0);
618}
619
620