ppt.c revision 223621
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35#include <sys/module.h> 36#include <sys/bus.h> 37#include <sys/pciio.h> 38#include <sys/rman.h> 39#include <sys/smp.h> 40 41#include <dev/pci/pcivar.h> 42#include <dev/pci/pcireg.h> 43 44#include <machine/resource.h> 45 46#include <machine/vmm.h> 47#include <machine/vmm_dev.h> 48 49#include "vmm_lapic.h" 50#include "vmm_ktr.h" 51 52#include "iommu.h" 53#include "ppt.h" 54 55#define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0])) 56#define MAX_MMIOSEGS (PCIR_MAX_BAR_0 + 1) 57#define MAX_MSIMSGS 32 58 59struct pptintr_arg { /* pptintr(pptintr_arg) */ 60 struct pptdev *pptdev; 61 int msg; 62}; 63 64static struct pptdev { 65 device_t dev; 66 struct vm *vm; /* owner of this device */ 67 struct vm_memory_segment mmio[MAX_MMIOSEGS]; 68 struct { 69 int num_msgs; /* guest state */ 70 int vector; 71 int vcpu; 72 73 int startrid; /* host state */ 74 struct resource *res[MAX_MSIMSGS]; 75 void *cookie[MAX_MSIMSGS]; 76 struct pptintr_arg arg[MAX_MSIMSGS]; 77 } msi; 78} pptdevs[32]; 79 80static int num_pptdevs; 81 82static int 83ppt_probe(device_t dev) 84{ 85 int bus, slot, func; 86 struct pci_devinfo *dinfo; 87 88 dinfo = (struct pci_devinfo *)device_get_ivars(dev); 89 90 bus = pci_get_bus(dev); 91 slot = pci_get_slot(dev); 92 func = pci_get_function(dev); 93 94 /* 95 * To qualify as a pci passthrough device a device must: 96 * - be allowed by administrator to be used in this role 97 * - be an endpoint device 98 */ 99 if (vmm_is_pptdev(bus, slot, func) && 100 (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL) 101 return (0); 102 else 103 return (ENXIO); 104} 105 106static int 107ppt_attach(device_t dev) 108{ 109 int n; 110 111 if (num_pptdevs >= MAX_PPTDEVS) { 112 printf("ppt_attach: maximum number of pci passthrough devices " 113 "exceeded\n"); 114 return (ENXIO); 115 } 116 117 n = num_pptdevs++; 118 pptdevs[n].dev = dev; 119 120 if (bootverbose) 121 device_printf(dev, "attached\n"); 122 123 return (0); 124} 125 126static int 127ppt_detach(device_t dev) 128{ 129 /* 130 * XXX check whether there are any pci passthrough devices assigned 131 * to guests before we allow this driver to detach. 132 */ 133 134 return (0); 135} 136 137static device_method_t ppt_methods[] = { 138 /* Device interface */ 139 DEVMETHOD(device_probe, ppt_probe), 140 DEVMETHOD(device_attach, ppt_attach), 141 DEVMETHOD(device_detach, ppt_detach), 142 {0, 0} 143}; 144 145static devclass_t ppt_devclass; 146DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0); 147DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL); 148 149static struct pptdev * 150ppt_find(int bus, int slot, int func) 151{ 152 device_t dev; 153 int i, b, s, f; 154 155 for (i = 0; i < num_pptdevs; i++) { 156 dev = pptdevs[i].dev; 157 b = pci_get_bus(dev); 158 s = pci_get_slot(dev); 159 f = pci_get_function(dev); 160 if (bus == b && slot == s && func == f) 161 return (&pptdevs[i]); 162 } 163 return (NULL); 164} 165 166static void 167ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt) 168{ 169 int i; 170 struct vm_memory_segment *seg; 171 172 for (i = 0; i < MAX_MMIOSEGS; i++) { 173 seg = &ppt->mmio[i]; 174 if (seg->len == 0) 175 continue; 176 (void)vm_unmap_mmio(vm, seg->gpa, seg->len); 177 bzero(seg, sizeof(struct vm_memory_segment)); 178 } 179} 180 181static void 182ppt_teardown_msi(struct pptdev *ppt) 183{ 184 int i, rid; 185 void *cookie; 186 struct resource *res; 187 188 if (ppt->msi.num_msgs == 0) 189 return; 190 191 for (i = 0; i < ppt->msi.num_msgs; i++) { 192 rid = ppt->msi.startrid + i; 193 res = ppt->msi.res[i]; 194 cookie = ppt->msi.cookie[i]; 195 196 if (cookie != NULL) 197 bus_teardown_intr(ppt->dev, res, cookie); 198 199 if (res != NULL) 200 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 201 202 ppt->msi.res[i] = NULL; 203 ppt->msi.cookie[i] = NULL; 204 } 205 206 if (ppt->msi.startrid == 1) 207 pci_release_msi(ppt->dev); 208 209 ppt->msi.num_msgs = 0; 210} 211 212int 213ppt_assign_device(struct vm *vm, int bus, int slot, int func) 214{ 215 struct pptdev *ppt; 216 217 ppt = ppt_find(bus, slot, func); 218 if (ppt != NULL) { 219 /* 220 * If this device is owned by a different VM then we 221 * cannot change its owner. 222 */ 223 if (ppt->vm != NULL && ppt->vm != vm) 224 return (EBUSY); 225 226 ppt->vm = vm; 227 iommu_add_device(vm_iommu_domain(vm), bus, slot, func); 228 return (0); 229 } 230 return (ENOENT); 231} 232 233int 234ppt_unassign_device(struct vm *vm, int bus, int slot, int func) 235{ 236 struct pptdev *ppt; 237 238 ppt = ppt_find(bus, slot, func); 239 if (ppt != NULL) { 240 /* 241 * If this device is not owned by this 'vm' then bail out. 242 */ 243 if (ppt->vm != vm) 244 return (EBUSY); 245 ppt_unmap_mmio(vm, ppt); 246 ppt_teardown_msi(ppt); 247 iommu_remove_device(vm_iommu_domain(vm), bus, slot, func); 248 ppt->vm = NULL; 249 return (0); 250 } 251 return (ENOENT); 252} 253 254int 255ppt_unassign_all(struct vm *vm) 256{ 257 int i, bus, slot, func; 258 device_t dev; 259 260 for (i = 0; i < num_pptdevs; i++) { 261 if (pptdevs[i].vm == vm) { 262 dev = pptdevs[i].dev; 263 bus = pci_get_bus(dev); 264 slot = pci_get_slot(dev); 265 func = pci_get_function(dev); 266 ppt_unassign_device(vm, bus, slot, func); 267 } 268 } 269 270 return (0); 271} 272 273int 274ppt_map_mmio(struct vm *vm, int bus, int slot, int func, 275 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 276{ 277 int i, error; 278 struct vm_memory_segment *seg; 279 struct pptdev *ppt; 280 281 ppt = ppt_find(bus, slot, func); 282 if (ppt != NULL) { 283 if (ppt->vm != vm) 284 return (EBUSY); 285 286 for (i = 0; i < MAX_MMIOSEGS; i++) { 287 seg = &ppt->mmio[i]; 288 if (seg->len == 0) { 289 error = vm_map_mmio(vm, gpa, len, hpa); 290 if (error == 0) { 291 seg->gpa = gpa; 292 seg->len = len; 293 seg->hpa = hpa; 294 } 295 return (error); 296 } 297 } 298 return (ENOSPC); 299 } 300 return (ENOENT); 301} 302 303static int 304pptintr(void *arg) 305{ 306 int vec; 307 struct pptdev *ppt; 308 struct pptintr_arg *pptarg; 309 310 pptarg = arg; 311 ppt = pptarg->pptdev; 312 vec = ppt->msi.vector + pptarg->msg; 313 314 if (ppt->vm != NULL) 315 (void) lapic_set_intr(ppt->vm, ppt->msi.vcpu, vec); 316 else { 317 /* 318 * XXX 319 * This is not expected to happen - panic? 320 */ 321 } 322 323 /* 324 * For legacy interrupts give other filters a chance in case 325 * the interrupt was not generated by the passthrough device. 326 */ 327 if (ppt->msi.startrid == 0) 328 return (FILTER_STRAY); 329 else 330 return (FILTER_HANDLED); 331} 332 333/* 334 * XXX 335 * When we try to free the MSI resource the kernel will bind the thread to 336 * the host cpu was originally handling the MSI. The function freeing the 337 * MSI vector (apic_free_vector()) will panic the kernel if the thread 338 * is already bound to a cpu. 339 * 340 * So, we temporarily unbind the vcpu thread before freeing the MSI resource. 341 */ 342static void 343PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt) 344{ 345 int pincpu = -1; 346 347 vm_get_pinning(vm, vcpu, &pincpu); 348 349 if (pincpu >= 0) 350 vm_set_pinning(vm, vcpu, -1); 351 352 ppt_teardown_msi(ppt); 353 354 if (pincpu >= 0) 355 vm_set_pinning(vm, vcpu, pincpu); 356} 357 358int 359ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, 360 int destcpu, int vector, int numvec) 361{ 362 int i, rid, flags; 363 int msi_count, startrid, error, tmp; 364 struct pptdev *ppt; 365 366 if ((destcpu >= VM_MAXCPU || destcpu < 0) || 367 (vector < 0 || vector > 255) || 368 (numvec < 0 || numvec > MAX_MSIMSGS)) 369 return (EINVAL); 370 371 ppt = ppt_find(bus, slot, func); 372 if (ppt == NULL) 373 return (ENOENT); 374 if (ppt->vm != vm) /* Make sure we own this device */ 375 return (EBUSY); 376 377 /* Free any allocated resources */ 378 PPT_TEARDOWN_MSI(vm, vcpu, ppt); 379 380 if (numvec == 0) /* nothing more to do */ 381 return (0); 382 383 flags = RF_ACTIVE; 384 msi_count = pci_msi_count(ppt->dev); 385 if (msi_count == 0) { 386 startrid = 0; /* legacy interrupt */ 387 msi_count = 1; 388 flags |= RF_SHAREABLE; 389 } else 390 startrid = 1; /* MSI */ 391 392 /* 393 * The device must be capable of supporting the number of vectors 394 * the guest wants to allocate. 395 */ 396 if (numvec > msi_count) 397 return (EINVAL); 398 399 /* 400 * Make sure that we can allocate all the MSI vectors that are needed 401 * by the guest. 402 */ 403 if (startrid == 1) { 404 tmp = numvec; 405 error = pci_alloc_msi(ppt->dev, &tmp); 406 if (error) 407 return (error); 408 else if (tmp != numvec) { 409 pci_release_msi(ppt->dev); 410 return (ENOSPC); 411 } else { 412 /* success */ 413 } 414 } 415 416 ppt->msi.vector = vector; 417 ppt->msi.vcpu = destcpu; 418 ppt->msi.startrid = startrid; 419 420 /* 421 * Allocate the irq resource and attach it to the interrupt handler. 422 */ 423 for (i = 0; i < numvec; i++) { 424 ppt->msi.num_msgs = i + 1; 425 ppt->msi.cookie[i] = NULL; 426 427 rid = startrid + i; 428 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 429 &rid, flags); 430 if (ppt->msi.res[i] == NULL) 431 break; 432 433 ppt->msi.arg[i].pptdev = ppt; 434 ppt->msi.arg[i].msg = i; 435 436 error = bus_setup_intr(ppt->dev, ppt->msi.res[i], 437 INTR_TYPE_NET | INTR_MPSAFE, 438 pptintr, NULL, &ppt->msi.arg[i], 439 &ppt->msi.cookie[i]); 440 if (error != 0) 441 break; 442 } 443 444 if (i < numvec) { 445 PPT_TEARDOWN_MSI(vm, vcpu, ppt); 446 return (ENXIO); 447 } 448 449 return (0); 450} 451