ppt.c revision 234761
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/pciio.h>
39#include <sys/rman.h>
40#include <sys/smp.h>
41
42#include <dev/pci/pcivar.h>
43#include <dev/pci/pcireg.h>
44
45#include <machine/resource.h>
46
47#include <machine/vmm.h>
48#include <machine/vmm_dev.h>
49
50#include "vmm_lapic.h"
51#include "vmm_ktr.h"
52
53#include "iommu.h"
54#include "ppt.h"
55
56#define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
57#define	MAX_MMIOSEGS	(PCIR_MAX_BAR_0 + 1)
58#define	MAX_MSIMSGS	32
59
60MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
61
62struct pptintr_arg {				/* pptintr(pptintr_arg) */
63	struct pptdev	*pptdev;
64	int		vec;
65	int 		vcpu;
66};
67
68static struct pptdev {
69	device_t	dev;
70	struct vm	*vm;			/* owner of this device */
71	struct vm_memory_segment mmio[MAX_MMIOSEGS];
72	struct {
73		int	num_msgs;		/* guest state */
74		int	vector;
75		int	vcpu;
76
77		int	startrid;		/* host state */
78		struct resource *res[MAX_MSIMSGS];
79		void	*cookie[MAX_MSIMSGS];
80		struct pptintr_arg arg[MAX_MSIMSGS];
81	} msi;
82
83	struct {
84		int num_msgs;
85		int startrid;
86		int msix_table_rid;
87		struct resource *msix_table_res;
88		struct resource **res;
89		void **cookie;
90		struct pptintr_arg *arg;
91	} msix;
92} pptdevs[32];
93
94static int num_pptdevs;
95
96static int
97ppt_probe(device_t dev)
98{
99	int bus, slot, func;
100	struct pci_devinfo *dinfo;
101
102	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
103
104	bus = pci_get_bus(dev);
105	slot = pci_get_slot(dev);
106	func = pci_get_function(dev);
107
108	/*
109	 * To qualify as a pci passthrough device a device must:
110	 * - be allowed by administrator to be used in this role
111	 * - be an endpoint device
112	 */
113	if (vmm_is_pptdev(bus, slot, func) &&
114	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
115		return (0);
116	else
117		return (ENXIO);
118}
119
120static int
121ppt_attach(device_t dev)
122{
123	int n;
124
125	if (num_pptdevs >= MAX_PPTDEVS) {
126		printf("ppt_attach: maximum number of pci passthrough devices "
127		       "exceeded\n");
128		return (ENXIO);
129	}
130
131	n = num_pptdevs++;
132	pptdevs[n].dev = dev;
133
134	if (bootverbose)
135		device_printf(dev, "attached\n");
136
137	return (0);
138}
139
140static int
141ppt_detach(device_t dev)
142{
143	/*
144	 * XXX check whether there are any pci passthrough devices assigned
145	 * to guests before we allow this driver to detach.
146	 */
147
148	return (0);
149}
150
151static device_method_t ppt_methods[] = {
152	/* Device interface */
153	DEVMETHOD(device_probe,		ppt_probe),
154	DEVMETHOD(device_attach,	ppt_attach),
155	DEVMETHOD(device_detach,	ppt_detach),
156	{0, 0}
157};
158
159static devclass_t ppt_devclass;
160DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
161DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
162
163static struct pptdev *
164ppt_find(int bus, int slot, int func)
165{
166	device_t dev;
167	int i, b, s, f;
168
169	for (i = 0; i < num_pptdevs; i++) {
170		dev = pptdevs[i].dev;
171		b = pci_get_bus(dev);
172		s = pci_get_slot(dev);
173		f = pci_get_function(dev);
174		if (bus == b && slot == s && func == f)
175			return (&pptdevs[i]);
176	}
177	return (NULL);
178}
179
180static void
181ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
182{
183	int i;
184	struct vm_memory_segment *seg;
185
186	for (i = 0; i < MAX_MMIOSEGS; i++) {
187		seg = &ppt->mmio[i];
188		if (seg->len == 0)
189			continue;
190		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
191		bzero(seg, sizeof(struct vm_memory_segment));
192	}
193}
194
195static void
196ppt_teardown_msi(struct pptdev *ppt)
197{
198	int i, rid;
199	void *cookie;
200	struct resource *res;
201
202	if (ppt->msi.num_msgs == 0)
203		return;
204
205	for (i = 0; i < ppt->msi.num_msgs; i++) {
206		rid = ppt->msi.startrid + i;
207		res = ppt->msi.res[i];
208		cookie = ppt->msi.cookie[i];
209
210		if (cookie != NULL)
211			bus_teardown_intr(ppt->dev, res, cookie);
212
213		if (res != NULL)
214			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
215
216		ppt->msi.res[i] = NULL;
217		ppt->msi.cookie[i] = NULL;
218	}
219
220	if (ppt->msi.startrid == 1)
221		pci_release_msi(ppt->dev);
222
223	ppt->msi.num_msgs = 0;
224}
225
226static void
227ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
228{
229	int rid;
230	struct resource *res;
231	void *cookie;
232
233	rid = ppt->msix.startrid + idx;
234	res = ppt->msix.res[idx];
235	cookie = ppt->msix.cookie[idx];
236
237	if (cookie != NULL)
238		bus_teardown_intr(ppt->dev, res, cookie);
239
240	if (res != NULL)
241		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
242
243	ppt->msix.res[idx] = NULL;
244	ppt->msix.cookie[idx] = NULL;
245}
246
247static void
248ppt_teardown_msix(struct pptdev *ppt)
249{
250	int i, error;
251
252	if (ppt->msix.num_msgs == 0)
253		return;
254
255	for (i = 0; i < ppt->msix.num_msgs; i++)
256		ppt_teardown_msix_intr(ppt, i);
257
258	if (ppt->msix.msix_table_res) {
259		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
260				     ppt->msix.msix_table_rid,
261				     ppt->msix.msix_table_res);
262		ppt->msix.msix_table_res = NULL;
263		ppt->msix.msix_table_rid = 0;
264	}
265
266	free(ppt->msix.res, M_PPTMSIX);
267	free(ppt->msix.cookie, M_PPTMSIX);
268	free(ppt->msix.arg, M_PPTMSIX);
269
270	error = pci_release_msi(ppt->dev);
271	if (error)
272		printf("ppt_teardown_msix: Failed to release MSI-X resources (error %i)\n", error);
273
274	ppt->msix.num_msgs = 0;
275}
276
277int
278ppt_assign_device(struct vm *vm, int bus, int slot, int func)
279{
280	struct pptdev *ppt;
281
282	ppt = ppt_find(bus, slot, func);
283	if (ppt != NULL) {
284		/*
285		 * If this device is owned by a different VM then we
286		 * cannot change its owner.
287		 */
288		if (ppt->vm != NULL && ppt->vm != vm)
289			return (EBUSY);
290
291		ppt->vm = vm;
292		iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
293		return (0);
294	}
295	return (ENOENT);
296}
297
298int
299ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
300{
301	struct pptdev *ppt;
302
303	ppt = ppt_find(bus, slot, func);
304	if (ppt != NULL) {
305		/*
306		 * If this device is not owned by this 'vm' then bail out.
307		 */
308		if (ppt->vm != vm)
309			return (EBUSY);
310		ppt_unmap_mmio(vm, ppt);
311		ppt_teardown_msi(ppt);
312		ppt_teardown_msix(ppt);
313		iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
314		ppt->vm = NULL;
315		return (0);
316	}
317	return (ENOENT);
318}
319
320int
321ppt_unassign_all(struct vm *vm)
322{
323	int i, bus, slot, func;
324	device_t dev;
325
326	for (i = 0; i < num_pptdevs; i++) {
327		if (pptdevs[i].vm == vm) {
328			dev = pptdevs[i].dev;
329			bus = pci_get_bus(dev);
330			slot = pci_get_slot(dev);
331			func = pci_get_function(dev);
332			ppt_unassign_device(vm, bus, slot, func);
333		}
334	}
335
336	return (0);
337}
338
339int
340ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
341	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
342{
343	int i, error;
344	struct vm_memory_segment *seg;
345	struct pptdev *ppt;
346
347	ppt = ppt_find(bus, slot, func);
348	if (ppt != NULL) {
349		if (ppt->vm != vm)
350			return (EBUSY);
351
352		for (i = 0; i < MAX_MMIOSEGS; i++) {
353			seg = &ppt->mmio[i];
354			if (seg->len == 0) {
355				error = vm_map_mmio(vm, gpa, len, hpa);
356				if (error == 0) {
357					seg->gpa = gpa;
358					seg->len = len;
359					seg->hpa = hpa;
360				}
361				return (error);
362			}
363		}
364		return (ENOSPC);
365	}
366	return (ENOENT);
367}
368
369static int
370pptintr(void *arg)
371{
372	int vec;
373	struct pptdev *ppt;
374	struct pptintr_arg *pptarg;
375
376	pptarg = arg;
377	ppt = pptarg->pptdev;
378	vec = pptarg->vec;
379
380	if (ppt->vm != NULL)
381		(void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec);
382	else {
383		/*
384		 * XXX
385		 * This is not expected to happen - panic?
386		 */
387	}
388
389	/*
390	 * For legacy interrupts give other filters a chance in case
391	 * the interrupt was not generated by the passthrough device.
392	 */
393	if (ppt->msi.startrid == 0)
394		return (FILTER_STRAY);
395	else
396		return (FILTER_HANDLED);
397}
398
399/*
400 * XXX
401 * When we try to free the MSI resource the kernel will bind the thread to
402 * the host cpu was originally handling the MSI. The function freeing the
403 * MSI vector (apic_free_vector()) will panic the kernel if the thread
404 * is already bound to a cpu.
405 *
406 * So, we temporarily unbind the vcpu thread before freeing the MSI resource.
407 */
408static void
409PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
410{
411	int pincpu = -1;
412
413	vm_get_pinning(vm, vcpu, &pincpu);
414
415	if (pincpu >= 0)
416		vm_set_pinning(vm, vcpu, -1);
417
418	ppt_teardown_msi(ppt);
419
420	if (pincpu >= 0)
421		vm_set_pinning(vm, vcpu, pincpu);
422}
423
424int
425ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
426	      int destcpu, int vector, int numvec)
427{
428	int i, rid, flags;
429	int msi_count, startrid, error, tmp;
430	struct pptdev *ppt;
431
432	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
433	    (vector < 0 || vector > 255) ||
434	    (numvec < 0 || numvec > MAX_MSIMSGS))
435		return (EINVAL);
436
437	ppt = ppt_find(bus, slot, func);
438	if (ppt == NULL)
439		return (ENOENT);
440	if (ppt->vm != vm)		/* Make sure we own this device */
441		return (EBUSY);
442
443	/* Free any allocated resources */
444	PPT_TEARDOWN_MSI(vm, vcpu, ppt);
445
446	if (numvec == 0)		/* nothing more to do */
447		return (0);
448
449	flags = RF_ACTIVE;
450	msi_count = pci_msi_count(ppt->dev);
451	if (msi_count == 0) {
452		startrid = 0;		/* legacy interrupt */
453		msi_count = 1;
454		flags |= RF_SHAREABLE;
455	} else
456		startrid = 1;		/* MSI */
457
458	/*
459	 * The device must be capable of supporting the number of vectors
460	 * the guest wants to allocate.
461	 */
462	if (numvec > msi_count)
463		return (EINVAL);
464
465	/*
466	 * Make sure that we can allocate all the MSI vectors that are needed
467	 * by the guest.
468	 */
469	if (startrid == 1) {
470		tmp = numvec;
471		error = pci_alloc_msi(ppt->dev, &tmp);
472		if (error)
473			return (error);
474		else if (tmp != numvec) {
475			pci_release_msi(ppt->dev);
476			return (ENOSPC);
477		} else {
478			/* success */
479		}
480	}
481
482	ppt->msi.vector = vector;
483	ppt->msi.vcpu = destcpu;
484	ppt->msi.startrid = startrid;
485
486	/*
487	 * Allocate the irq resource and attach it to the interrupt handler.
488	 */
489	for (i = 0; i < numvec; i++) {
490		ppt->msi.num_msgs = i + 1;
491		ppt->msi.cookie[i] = NULL;
492
493		rid = startrid + i;
494		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
495							 &rid, flags);
496		if (ppt->msi.res[i] == NULL)
497			break;
498
499		ppt->msi.arg[i].pptdev = ppt;
500		ppt->msi.arg[i].vec = vector + i;
501
502		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
503				       INTR_TYPE_NET | INTR_MPSAFE,
504				       pptintr, NULL, &ppt->msi.arg[i],
505				       &ppt->msi.cookie[i]);
506		if (error != 0)
507			break;
508	}
509
510	if (i < numvec) {
511		PPT_TEARDOWN_MSI(vm, vcpu, ppt);
512		return (ENXIO);
513	}
514
515	return (0);
516}
517
518int
519ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
520	       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
521{
522	struct pptdev *ppt;
523	struct pci_devinfo *dinfo;
524	int numvec, vector_count, rid, error;
525	size_t res_size, cookie_size, arg_size;
526
527	ppt = ppt_find(bus, slot, func);
528	if (ppt == NULL)
529		return (ENOENT);
530	if (ppt->vm != vm)		/* Make sure we own this device */
531		return (EBUSY);
532
533	dinfo = device_get_ivars(ppt->dev);
534	if (!dinfo)
535		return (ENXIO);
536
537	/*
538	 * First-time configuration:
539	 * 	Allocate the MSI-X table
540	 *	Allocate the IRQ resources
541	 *	Set up some variables in ppt->msix
542	 */
543	if (!ppt->msix.msix_table_res) {
544		ppt->msix.res = NULL;
545		ppt->msix.cookie = NULL;
546		ppt->msix.arg = NULL;
547
548		rid = dinfo->cfg.msix.msix_table_bar;
549		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev, SYS_RES_MEMORY,
550								  &rid, RF_ACTIVE);
551		if (ppt->msix.msix_table_res == NULL)
552			return (ENOSPC);
553
554		ppt->msix.msix_table_rid = rid;
555
556		vector_count = numvec = pci_msix_count(ppt->dev);
557
558		error = pci_alloc_msix(ppt->dev, &numvec);
559		if (error)
560			return (error);
561		else if (vector_count != numvec) {
562			pci_release_msi(ppt->dev);
563			return (ENOSPC);
564		}
565
566		ppt->msix.num_msgs = numvec;
567
568		ppt->msix.startrid = 1;
569
570		res_size = numvec * sizeof(ppt->msix.res[0]);
571		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
572		arg_size = numvec * sizeof(ppt->msix.arg[0]);
573
574		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK);
575		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX, M_WAITOK);
576		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK);
577		if (ppt->msix.res == NULL || ppt->msix.cookie == NULL ||
578		    ppt->msix.arg == NULL) {
579			ppt_teardown_msix(ppt);
580			return (ENOSPC);
581		}
582		bzero(ppt->msix.res, res_size);
583		bzero(ppt->msix.cookie, cookie_size);
584		bzero(ppt->msix.arg, arg_size);
585	}
586
587	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
588		/* Tear down the IRQ if it's already set up */
589		ppt_teardown_msix_intr(ppt, idx);
590
591		/* Allocate the IRQ resource */
592		ppt->msix.cookie[idx] = NULL;
593		rid = ppt->msix.startrid + idx;
594		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
595							    &rid, RF_ACTIVE);
596		if (ppt->msix.res[idx] == NULL)
597			return (ENXIO);
598
599		ppt->msix.arg[idx].pptdev = ppt;
600		ppt->msix.arg[idx].vec = msg;
601		ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
602
603		/* Setup the MSI-X interrupt */
604		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
605				       INTR_TYPE_NET | INTR_MPSAFE,
606				       pptintr, NULL, &ppt->msix.arg[idx],
607				       &ppt->msix.cookie[idx]);
608
609		if (error != 0) {
610			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
611			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
612			ppt->msix.cookie[idx] = NULL;
613			ppt->msix.res[idx] = NULL;
614			return (ENXIO);
615		}
616	} else {
617		/* Masked, tear it down if it's already been set up */
618		ppt_teardown_msix_intr(ppt, idx);
619	}
620
621	return (0);
622}
623
624