ppt.c revision 223621
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/module.h>
36#include <sys/bus.h>
37#include <sys/pciio.h>
38#include <sys/rman.h>
39#include <sys/smp.h>
40
41#include <dev/pci/pcivar.h>
42#include <dev/pci/pcireg.h>
43
44#include <machine/resource.h>
45
46#include <machine/vmm.h>
47#include <machine/vmm_dev.h>
48
49#include "vmm_lapic.h"
50#include "vmm_ktr.h"
51
52#include "iommu.h"
53#include "ppt.h"
54
55#define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
56#define	MAX_MMIOSEGS	(PCIR_MAX_BAR_0 + 1)
57#define	MAX_MSIMSGS	32
58
59struct pptintr_arg {				/* pptintr(pptintr_arg) */
60	struct pptdev	*pptdev;
61	int		msg;
62};
63
64static struct pptdev {
65	device_t	dev;
66	struct vm	*vm;			/* owner of this device */
67	struct vm_memory_segment mmio[MAX_MMIOSEGS];
68	struct {
69		int	num_msgs;		/* guest state */
70		int	vector;
71		int	vcpu;
72
73		int	startrid;		/* host state */
74		struct resource *res[MAX_MSIMSGS];
75		void	*cookie[MAX_MSIMSGS];
76		struct pptintr_arg arg[MAX_MSIMSGS];
77	} msi;
78} pptdevs[32];
79
80static int num_pptdevs;
81
82static int
83ppt_probe(device_t dev)
84{
85	int bus, slot, func;
86	struct pci_devinfo *dinfo;
87
88	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
89
90	bus = pci_get_bus(dev);
91	slot = pci_get_slot(dev);
92	func = pci_get_function(dev);
93
94	/*
95	 * To qualify as a pci passthrough device a device must:
96	 * - be allowed by administrator to be used in this role
97	 * - be an endpoint device
98	 */
99	if (vmm_is_pptdev(bus, slot, func) &&
100	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
101		return (0);
102	else
103		return (ENXIO);
104}
105
106static int
107ppt_attach(device_t dev)
108{
109	int n;
110
111	if (num_pptdevs >= MAX_PPTDEVS) {
112		printf("ppt_attach: maximum number of pci passthrough devices "
113		       "exceeded\n");
114		return (ENXIO);
115	}
116
117	n = num_pptdevs++;
118	pptdevs[n].dev = dev;
119
120	if (bootverbose)
121		device_printf(dev, "attached\n");
122
123	return (0);
124}
125
126static int
127ppt_detach(device_t dev)
128{
129	/*
130	 * XXX check whether there are any pci passthrough devices assigned
131	 * to guests before we allow this driver to detach.
132	 */
133
134	return (0);
135}
136
137static device_method_t ppt_methods[] = {
138	/* Device interface */
139	DEVMETHOD(device_probe,		ppt_probe),
140	DEVMETHOD(device_attach,	ppt_attach),
141	DEVMETHOD(device_detach,	ppt_detach),
142	{0, 0}
143};
144
145static devclass_t ppt_devclass;
146DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
147DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
148
149static struct pptdev *
150ppt_find(int bus, int slot, int func)
151{
152	device_t dev;
153	int i, b, s, f;
154
155	for (i = 0; i < num_pptdevs; i++) {
156		dev = pptdevs[i].dev;
157		b = pci_get_bus(dev);
158		s = pci_get_slot(dev);
159		f = pci_get_function(dev);
160		if (bus == b && slot == s && func == f)
161			return (&pptdevs[i]);
162	}
163	return (NULL);
164}
165
166static void
167ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
168{
169	int i;
170	struct vm_memory_segment *seg;
171
172	for (i = 0; i < MAX_MMIOSEGS; i++) {
173		seg = &ppt->mmio[i];
174		if (seg->len == 0)
175			continue;
176		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
177		bzero(seg, sizeof(struct vm_memory_segment));
178	}
179}
180
181static void
182ppt_teardown_msi(struct pptdev *ppt)
183{
184	int i, rid;
185	void *cookie;
186	struct resource *res;
187
188	if (ppt->msi.num_msgs == 0)
189		return;
190
191	for (i = 0; i < ppt->msi.num_msgs; i++) {
192		rid = ppt->msi.startrid + i;
193		res = ppt->msi.res[i];
194		cookie = ppt->msi.cookie[i];
195
196		if (cookie != NULL)
197			bus_teardown_intr(ppt->dev, res, cookie);
198
199		if (res != NULL)
200			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
201
202		ppt->msi.res[i] = NULL;
203		ppt->msi.cookie[i] = NULL;
204	}
205
206	if (ppt->msi.startrid == 1)
207		pci_release_msi(ppt->dev);
208
209	ppt->msi.num_msgs = 0;
210}
211
212int
213ppt_assign_device(struct vm *vm, int bus, int slot, int func)
214{
215	struct pptdev *ppt;
216
217	ppt = ppt_find(bus, slot, func);
218	if (ppt != NULL) {
219		/*
220		 * If this device is owned by a different VM then we
221		 * cannot change its owner.
222		 */
223		if (ppt->vm != NULL && ppt->vm != vm)
224			return (EBUSY);
225
226		ppt->vm = vm;
227		iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
228		return (0);
229	}
230	return (ENOENT);
231}
232
233int
234ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
235{
236	struct pptdev *ppt;
237
238	ppt = ppt_find(bus, slot, func);
239	if (ppt != NULL) {
240		/*
241		 * If this device is not owned by this 'vm' then bail out.
242		 */
243		if (ppt->vm != vm)
244			return (EBUSY);
245		ppt_unmap_mmio(vm, ppt);
246		ppt_teardown_msi(ppt);
247		iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
248		ppt->vm = NULL;
249		return (0);
250	}
251	return (ENOENT);
252}
253
254int
255ppt_unassign_all(struct vm *vm)
256{
257	int i, bus, slot, func;
258	device_t dev;
259
260	for (i = 0; i < num_pptdevs; i++) {
261		if (pptdevs[i].vm == vm) {
262			dev = pptdevs[i].dev;
263			bus = pci_get_bus(dev);
264			slot = pci_get_slot(dev);
265			func = pci_get_function(dev);
266			ppt_unassign_device(vm, bus, slot, func);
267		}
268	}
269
270	return (0);
271}
272
273int
274ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
275	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
276{
277	int i, error;
278	struct vm_memory_segment *seg;
279	struct pptdev *ppt;
280
281	ppt = ppt_find(bus, slot, func);
282	if (ppt != NULL) {
283		if (ppt->vm != vm)
284			return (EBUSY);
285
286		for (i = 0; i < MAX_MMIOSEGS; i++) {
287			seg = &ppt->mmio[i];
288			if (seg->len == 0) {
289				error = vm_map_mmio(vm, gpa, len, hpa);
290				if (error == 0) {
291					seg->gpa = gpa;
292					seg->len = len;
293					seg->hpa = hpa;
294				}
295				return (error);
296			}
297		}
298		return (ENOSPC);
299	}
300	return (ENOENT);
301}
302
303static int
304pptintr(void *arg)
305{
306	int vec;
307	struct pptdev *ppt;
308	struct pptintr_arg *pptarg;
309
310	pptarg = arg;
311	ppt = pptarg->pptdev;
312	vec = ppt->msi.vector + pptarg->msg;
313
314	if (ppt->vm != NULL)
315		(void) lapic_set_intr(ppt->vm, ppt->msi.vcpu, vec);
316	else {
317		/*
318		 * XXX
319		 * This is not expected to happen - panic?
320		 */
321	}
322
323	/*
324	 * For legacy interrupts give other filters a chance in case
325	 * the interrupt was not generated by the passthrough device.
326	 */
327	if (ppt->msi.startrid == 0)
328		return (FILTER_STRAY);
329	else
330		return (FILTER_HANDLED);
331}
332
333/*
334 * XXX
335 * When we try to free the MSI resource the kernel will bind the thread to
336 * the host cpu was originally handling the MSI. The function freeing the
337 * MSI vector (apic_free_vector()) will panic the kernel if the thread
338 * is already bound to a cpu.
339 *
340 * So, we temporarily unbind the vcpu thread before freeing the MSI resource.
341 */
342static void
343PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
344{
345	int pincpu = -1;
346
347	vm_get_pinning(vm, vcpu, &pincpu);
348
349	if (pincpu >= 0)
350		vm_set_pinning(vm, vcpu, -1);
351
352	ppt_teardown_msi(ppt);
353
354	if (pincpu >= 0)
355		vm_set_pinning(vm, vcpu, pincpu);
356}
357
358int
359ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
360	      int destcpu, int vector, int numvec)
361{
362	int i, rid, flags;
363	int msi_count, startrid, error, tmp;
364	struct pptdev *ppt;
365
366	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
367	    (vector < 0 || vector > 255) ||
368	    (numvec < 0 || numvec > MAX_MSIMSGS))
369		return (EINVAL);
370
371	ppt = ppt_find(bus, slot, func);
372	if (ppt == NULL)
373		return (ENOENT);
374	if (ppt->vm != vm)		/* Make sure we own this device */
375		return (EBUSY);
376
377	/* Free any allocated resources */
378	PPT_TEARDOWN_MSI(vm, vcpu, ppt);
379
380	if (numvec == 0)		/* nothing more to do */
381		return (0);
382
383	flags = RF_ACTIVE;
384	msi_count = pci_msi_count(ppt->dev);
385	if (msi_count == 0) {
386		startrid = 0;		/* legacy interrupt */
387		msi_count = 1;
388		flags |= RF_SHAREABLE;
389	} else
390		startrid = 1;		/* MSI */
391
392	/*
393	 * The device must be capable of supporting the number of vectors
394	 * the guest wants to allocate.
395	 */
396	if (numvec > msi_count)
397		return (EINVAL);
398
399	/*
400	 * Make sure that we can allocate all the MSI vectors that are needed
401	 * by the guest.
402	 */
403	if (startrid == 1) {
404		tmp = numvec;
405		error = pci_alloc_msi(ppt->dev, &tmp);
406		if (error)
407			return (error);
408		else if (tmp != numvec) {
409			pci_release_msi(ppt->dev);
410			return (ENOSPC);
411		} else {
412			/* success */
413		}
414	}
415
416	ppt->msi.vector = vector;
417	ppt->msi.vcpu = destcpu;
418	ppt->msi.startrid = startrid;
419
420	/*
421	 * Allocate the irq resource and attach it to the interrupt handler.
422	 */
423	for (i = 0; i < numvec; i++) {
424		ppt->msi.num_msgs = i + 1;
425		ppt->msi.cookie[i] = NULL;
426
427		rid = startrid + i;
428		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
429							 &rid, flags);
430		if (ppt->msi.res[i] == NULL)
431			break;
432
433		ppt->msi.arg[i].pptdev = ppt;
434		ppt->msi.arg[i].msg = i;
435
436		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
437				       INTR_TYPE_NET | INTR_MPSAFE,
438				       pptintr, NULL, &ppt->msi.arg[i],
439				       &ppt->msi.cookie[i]);
440		if (error != 0)
441			break;
442	}
443
444	if (i < numvec) {
445		PPT_TEARDOWN_MSI(vm, vcpu, ppt);
446		return (ENXIO);
447	}
448
449	return (0);
450}
451