ppt.c revision 221828
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/module.h>
36#include <sys/bus.h>
37#include <sys/pciio.h>
38#include <sys/rman.h>
39
40#include <dev/pci/pcivar.h>
41#include <dev/pci/pcireg.h>
42
43#include <machine/resource.h>
44
45#include <machine/vmm.h>
46#include <machine/vmm_dev.h>
47
48#include "vmm_lapic.h"
49#include "vmm_ktr.h"
50
51#include "iommu.h"
52#include "ppt.h"
53
54#define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
55#define	MAX_MMIOSEGS	(PCIR_MAX_BAR_0 + 1)
56#define	MAX_MSIMSGS	32
57
58struct pptintr_arg {				/* pptintr(pptintr_arg) */
59	struct pptdev	*pptdev;
60	int		msg;
61};
62
63static struct pptdev {
64	device_t	dev;
65	struct vm	*vm;			/* owner of this device */
66	struct vm_memory_segment mmio[MAX_MMIOSEGS];
67	struct {
68		int	num_msgs;		/* guest state */
69		int	vector;
70		int	vcpu;
71
72		int	startrid;		/* host state */
73		struct resource *res[MAX_MSIMSGS];
74		void	*cookie[MAX_MSIMSGS];
75		struct pptintr_arg arg[MAX_MSIMSGS];
76	} msi;
77} pptdevs[32];
78
79static int num_pptdevs;
80
81static int
82ppt_probe(device_t dev)
83{
84	int bus, slot, func;
85	struct pci_devinfo *dinfo;
86
87	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
88
89	bus = pci_get_bus(dev);
90	slot = pci_get_slot(dev);
91	func = pci_get_function(dev);
92
93	/*
94	 * To qualify as a pci passthrough device a device must:
95	 * - be allowed by administrator to be used in this role
96	 * - be an endpoint device
97	 */
98	if (vmm_is_pptdev(bus, slot, func) &&
99	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
100		return (0);
101	else
102		return (ENXIO);
103}
104
105static int
106ppt_attach(device_t dev)
107{
108	int n;
109
110	if (num_pptdevs >= MAX_PPTDEVS) {
111		printf("ppt_attach: maximum number of pci passthrough devices "
112		       "exceeded\n");
113		return (ENXIO);
114	}
115
116	n = num_pptdevs++;
117	pptdevs[n].dev = dev;
118
119	if (bootverbose)
120		device_printf(dev, "attached\n");
121
122	return (0);
123}
124
125static int
126ppt_detach(device_t dev)
127{
128	/*
129	 * XXX check whether there are any pci passthrough devices assigned
130	 * to guests before we allow this driver to detach.
131	 */
132
133	return (0);
134}
135
136static device_method_t ppt_methods[] = {
137	/* Device interface */
138	DEVMETHOD(device_probe,		ppt_probe),
139	DEVMETHOD(device_attach,	ppt_attach),
140	DEVMETHOD(device_detach,	ppt_detach),
141	{0, 0}
142};
143
144static devclass_t ppt_devclass;
145DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
146DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
147
148static struct pptdev *
149ppt_find(int bus, int slot, int func)
150{
151	device_t dev;
152	int i, b, s, f;
153
154	for (i = 0; i < num_pptdevs; i++) {
155		dev = pptdevs[i].dev;
156		b = pci_get_bus(dev);
157		s = pci_get_slot(dev);
158		f = pci_get_function(dev);
159		if (bus == b && slot == s && func == f)
160			return (&pptdevs[i]);
161	}
162	return (NULL);
163}
164
165static void
166ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
167{
168	int i;
169	struct vm_memory_segment *seg;
170
171	for (i = 0; i < MAX_MMIOSEGS; i++) {
172		seg = &ppt->mmio[i];
173		if (seg->len == 0)
174			continue;
175		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
176		bzero(seg, sizeof(struct vm_memory_segment));
177	}
178}
179
180static void
181ppt_teardown_msi(struct pptdev *ppt)
182{
183	int i, rid;
184	void *cookie;
185	struct resource *res;
186
187	if (ppt->msi.num_msgs == 0)
188		return;
189
190	for (i = 0; i < ppt->msi.num_msgs; i++) {
191		rid = ppt->msi.startrid + i;
192		res = ppt->msi.res[i];
193		cookie = ppt->msi.cookie[i];
194
195		if (cookie != NULL)
196			bus_teardown_intr(ppt->dev, res, cookie);
197
198		if (res != NULL)
199			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
200
201		ppt->msi.res[i] = NULL;
202		ppt->msi.cookie[i] = NULL;
203	}
204
205	if (ppt->msi.startrid == 1)
206		pci_release_msi(ppt->dev);
207
208	ppt->msi.num_msgs = 0;
209}
210
211int
212ppt_assign_device(struct vm *vm, int bus, int slot, int func)
213{
214	struct pptdev *ppt;
215
216	ppt = ppt_find(bus, slot, func);
217	if (ppt != NULL) {
218		/*
219		 * If this device is owned by a different VM then we
220		 * cannot change its owner.
221		 */
222		if (ppt->vm != NULL && ppt->vm != vm)
223			return (EBUSY);
224
225		ppt->vm = vm;
226		iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
227		return (0);
228	}
229	return (ENOENT);
230}
231
232int
233ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
234{
235	struct pptdev *ppt;
236
237	ppt = ppt_find(bus, slot, func);
238	if (ppt != NULL) {
239		/*
240		 * If this device is not owned by this 'vm' then bail out.
241		 */
242		if (ppt->vm != vm)
243			return (EBUSY);
244		ppt_unmap_mmio(vm, ppt);
245		ppt_teardown_msi(ppt);
246		iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
247		ppt->vm = NULL;
248		return (0);
249	}
250	return (ENOENT);
251}
252
253int
254ppt_unassign_all(struct vm *vm)
255{
256	int i, bus, slot, func;
257	device_t dev;
258
259	for (i = 0; i < num_pptdevs; i++) {
260		if (pptdevs[i].vm == vm) {
261			dev = pptdevs[i].dev;
262			bus = pci_get_bus(dev);
263			slot = pci_get_slot(dev);
264			func = pci_get_function(dev);
265			ppt_unassign_device(vm, bus, slot, func);
266		}
267	}
268
269	return (0);
270}
271
272int
273ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
274	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
275{
276	int i, error;
277	struct vm_memory_segment *seg;
278	struct pptdev *ppt;
279
280	ppt = ppt_find(bus, slot, func);
281	if (ppt != NULL) {
282		if (ppt->vm != vm)
283			return (EBUSY);
284
285		for (i = 0; i < MAX_MMIOSEGS; i++) {
286			seg = &ppt->mmio[i];
287			if (seg->len == 0) {
288				error = vm_map_mmio(vm, gpa, len, hpa);
289				if (error == 0) {
290					seg->gpa = gpa;
291					seg->len = len;
292					seg->hpa = hpa;
293				}
294				return (error);
295			}
296		}
297		return (ENOSPC);
298	}
299	return (ENOENT);
300}
301
302static int
303pptintr(void *arg)
304{
305	int vec;
306	struct pptdev *ppt;
307	struct pptintr_arg *pptarg;
308
309	pptarg = arg;
310	ppt = pptarg->pptdev;
311	vec = ppt->msi.vector + pptarg->msg;
312
313	if (ppt->vm != NULL)
314		(void) lapic_set_intr(ppt->vm, ppt->msi.vcpu, vec);
315	else {
316		/*
317		 * XXX
318		 * This is not expected to happen - panic?
319		 */
320	}
321
322	/*
323	 * For legacy interrupts give other filters a chance in case
324	 * the interrupt was not generated by the passthrough device.
325	 */
326	if (ppt->msi.startrid == 0)
327		return (FILTER_STRAY);
328	else
329		return (FILTER_HANDLED);
330}
331
332/*
333 * XXX
334 * When we try to free the MSI resource the kernel will bind the thread to
335 * the host cpu was originally handling the MSI. The function freeing the
336 * MSI vector (apic_free_vector()) will panic the kernel if the thread
337 * is already bound to a cpu.
338 *
339 * So, we temporarily unbind the vcpu thread before freeing the MSI resource.
340 */
341static void
342PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
343{
344	int pincpu = -1;
345
346	vm_get_pinning(vm, vcpu, &pincpu);
347
348	if (pincpu >= 0)
349		vm_set_pinning(vm, vcpu, -1);
350
351	ppt_teardown_msi(ppt);
352
353	if (pincpu >= 0)
354		vm_set_pinning(vm, vcpu, pincpu);
355}
356
357int
358ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
359	      int destcpu, int vector, int numvec)
360{
361	int i, rid, flags;
362	int msi_count, startrid, error, tmp;
363	struct pptdev *ppt;
364
365	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
366	    (vector < 0 || vector > 255) ||
367	    (numvec < 0 || numvec > MAX_MSIMSGS))
368		return (EINVAL);
369
370	ppt = ppt_find(bus, slot, func);
371	if (ppt == NULL)
372		return (ENOENT);
373	if (ppt->vm != vm)		/* Make sure we own this device */
374		return (EBUSY);
375
376	/* Free any allocated resources */
377	PPT_TEARDOWN_MSI(vm, vcpu, ppt);
378
379	if (numvec == 0)		/* nothing more to do */
380		return (0);
381
382	flags = RF_ACTIVE;
383	msi_count = pci_msi_count(ppt->dev);
384	if (msi_count == 0) {
385		startrid = 0;		/* legacy interrupt */
386		msi_count = 1;
387		flags |= RF_SHAREABLE;
388	} else
389		startrid = 1;		/* MSI */
390
391	/*
392	 * The device must be capable of supporting the number of vectors
393	 * the guest wants to allocate.
394	 */
395	if (numvec > msi_count)
396		return (EINVAL);
397
398	/*
399	 * Make sure that we can allocate all the MSI vectors that are needed
400	 * by the guest.
401	 */
402	if (startrid == 1) {
403		tmp = numvec;
404		error = pci_alloc_msi(ppt->dev, &tmp);
405		if (error)
406			return (error);
407		else if (tmp != numvec) {
408			pci_release_msi(ppt->dev);
409			return (ENOSPC);
410		} else {
411			/* success */
412		}
413	}
414
415	ppt->msi.vector = vector;
416	ppt->msi.vcpu = destcpu;
417	ppt->msi.startrid = startrid;
418
419	/*
420	 * Allocate the irq resource and attach it to the interrupt handler.
421	 */
422	for (i = 0; i < numvec; i++) {
423		ppt->msi.num_msgs = i + 1;
424		ppt->msi.cookie[i] = NULL;
425
426		rid = startrid + i;
427		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
428							 &rid, flags);
429		if (ppt->msi.res[i] == NULL)
430			break;
431
432		ppt->msi.arg[i].pptdev = ppt;
433		ppt->msi.arg[i].msg = i;
434
435		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
436				       INTR_TYPE_NET | INTR_MPSAFE | INTR_FAST,
437				       pptintr, NULL, &ppt->msi.arg[i],
438				       &ppt->msi.cookie[i]);
439		if (error != 0)
440			break;
441	}
442
443	if (i < numvec) {
444		PPT_TEARDOWN_MSI(vm, vcpu, ppt);
445		return (ENXIO);
446	}
447
448	return (0);
449}
450