ppt.c revision 221828
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35#include <sys/module.h> 36#include <sys/bus.h> 37#include <sys/pciio.h> 38#include <sys/rman.h> 39 40#include <dev/pci/pcivar.h> 41#include <dev/pci/pcireg.h> 42 43#include <machine/resource.h> 44 45#include <machine/vmm.h> 46#include <machine/vmm_dev.h> 47 48#include "vmm_lapic.h" 49#include "vmm_ktr.h" 50 51#include "iommu.h" 52#include "ppt.h" 53 54#define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0])) 55#define MAX_MMIOSEGS (PCIR_MAX_BAR_0 + 1) 56#define MAX_MSIMSGS 32 57 58struct pptintr_arg { /* pptintr(pptintr_arg) */ 59 struct pptdev *pptdev; 60 int msg; 61}; 62 63static struct pptdev { 64 device_t dev; 65 struct vm *vm; /* owner of this device */ 66 struct vm_memory_segment mmio[MAX_MMIOSEGS]; 67 struct { 68 int num_msgs; /* guest state */ 69 int vector; 70 int vcpu; 71 72 int startrid; /* host state */ 73 struct resource *res[MAX_MSIMSGS]; 74 void *cookie[MAX_MSIMSGS]; 75 struct pptintr_arg arg[MAX_MSIMSGS]; 76 } msi; 77} pptdevs[32]; 78 79static int num_pptdevs; 80 81static int 82ppt_probe(device_t dev) 83{ 84 int bus, slot, func; 85 struct pci_devinfo *dinfo; 86 87 dinfo = (struct pci_devinfo *)device_get_ivars(dev); 88 89 bus = pci_get_bus(dev); 90 slot = pci_get_slot(dev); 91 func = pci_get_function(dev); 92 93 /* 94 * To qualify as a pci passthrough device a device must: 95 * - be allowed by administrator to be used in this role 96 * - be an endpoint device 97 */ 98 if (vmm_is_pptdev(bus, slot, func) && 99 (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL) 100 return (0); 101 else 102 return (ENXIO); 103} 104 105static int 106ppt_attach(device_t dev) 107{ 108 int n; 109 110 if (num_pptdevs >= MAX_PPTDEVS) { 111 printf("ppt_attach: maximum number of pci passthrough devices " 112 "exceeded\n"); 113 return (ENXIO); 114 } 115 116 n = num_pptdevs++; 117 pptdevs[n].dev = dev; 118 119 if (bootverbose) 120 device_printf(dev, "attached\n"); 121 122 return (0); 123} 124 125static int 126ppt_detach(device_t dev) 127{ 128 /* 129 * XXX check whether there are any pci passthrough devices assigned 130 * to guests before we allow this driver to detach. 131 */ 132 133 return (0); 134} 135 136static device_method_t ppt_methods[] = { 137 /* Device interface */ 138 DEVMETHOD(device_probe, ppt_probe), 139 DEVMETHOD(device_attach, ppt_attach), 140 DEVMETHOD(device_detach, ppt_detach), 141 {0, 0} 142}; 143 144static devclass_t ppt_devclass; 145DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0); 146DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL); 147 148static struct pptdev * 149ppt_find(int bus, int slot, int func) 150{ 151 device_t dev; 152 int i, b, s, f; 153 154 for (i = 0; i < num_pptdevs; i++) { 155 dev = pptdevs[i].dev; 156 b = pci_get_bus(dev); 157 s = pci_get_slot(dev); 158 f = pci_get_function(dev); 159 if (bus == b && slot == s && func == f) 160 return (&pptdevs[i]); 161 } 162 return (NULL); 163} 164 165static void 166ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt) 167{ 168 int i; 169 struct vm_memory_segment *seg; 170 171 for (i = 0; i < MAX_MMIOSEGS; i++) { 172 seg = &ppt->mmio[i]; 173 if (seg->len == 0) 174 continue; 175 (void)vm_unmap_mmio(vm, seg->gpa, seg->len); 176 bzero(seg, sizeof(struct vm_memory_segment)); 177 } 178} 179 180static void 181ppt_teardown_msi(struct pptdev *ppt) 182{ 183 int i, rid; 184 void *cookie; 185 struct resource *res; 186 187 if (ppt->msi.num_msgs == 0) 188 return; 189 190 for (i = 0; i < ppt->msi.num_msgs; i++) { 191 rid = ppt->msi.startrid + i; 192 res = ppt->msi.res[i]; 193 cookie = ppt->msi.cookie[i]; 194 195 if (cookie != NULL) 196 bus_teardown_intr(ppt->dev, res, cookie); 197 198 if (res != NULL) 199 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 200 201 ppt->msi.res[i] = NULL; 202 ppt->msi.cookie[i] = NULL; 203 } 204 205 if (ppt->msi.startrid == 1) 206 pci_release_msi(ppt->dev); 207 208 ppt->msi.num_msgs = 0; 209} 210 211int 212ppt_assign_device(struct vm *vm, int bus, int slot, int func) 213{ 214 struct pptdev *ppt; 215 216 ppt = ppt_find(bus, slot, func); 217 if (ppt != NULL) { 218 /* 219 * If this device is owned by a different VM then we 220 * cannot change its owner. 221 */ 222 if (ppt->vm != NULL && ppt->vm != vm) 223 return (EBUSY); 224 225 ppt->vm = vm; 226 iommu_add_device(vm_iommu_domain(vm), bus, slot, func); 227 return (0); 228 } 229 return (ENOENT); 230} 231 232int 233ppt_unassign_device(struct vm *vm, int bus, int slot, int func) 234{ 235 struct pptdev *ppt; 236 237 ppt = ppt_find(bus, slot, func); 238 if (ppt != NULL) { 239 /* 240 * If this device is not owned by this 'vm' then bail out. 241 */ 242 if (ppt->vm != vm) 243 return (EBUSY); 244 ppt_unmap_mmio(vm, ppt); 245 ppt_teardown_msi(ppt); 246 iommu_remove_device(vm_iommu_domain(vm), bus, slot, func); 247 ppt->vm = NULL; 248 return (0); 249 } 250 return (ENOENT); 251} 252 253int 254ppt_unassign_all(struct vm *vm) 255{ 256 int i, bus, slot, func; 257 device_t dev; 258 259 for (i = 0; i < num_pptdevs; i++) { 260 if (pptdevs[i].vm == vm) { 261 dev = pptdevs[i].dev; 262 bus = pci_get_bus(dev); 263 slot = pci_get_slot(dev); 264 func = pci_get_function(dev); 265 ppt_unassign_device(vm, bus, slot, func); 266 } 267 } 268 269 return (0); 270} 271 272int 273ppt_map_mmio(struct vm *vm, int bus, int slot, int func, 274 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 275{ 276 int i, error; 277 struct vm_memory_segment *seg; 278 struct pptdev *ppt; 279 280 ppt = ppt_find(bus, slot, func); 281 if (ppt != NULL) { 282 if (ppt->vm != vm) 283 return (EBUSY); 284 285 for (i = 0; i < MAX_MMIOSEGS; i++) { 286 seg = &ppt->mmio[i]; 287 if (seg->len == 0) { 288 error = vm_map_mmio(vm, gpa, len, hpa); 289 if (error == 0) { 290 seg->gpa = gpa; 291 seg->len = len; 292 seg->hpa = hpa; 293 } 294 return (error); 295 } 296 } 297 return (ENOSPC); 298 } 299 return (ENOENT); 300} 301 302static int 303pptintr(void *arg) 304{ 305 int vec; 306 struct pptdev *ppt; 307 struct pptintr_arg *pptarg; 308 309 pptarg = arg; 310 ppt = pptarg->pptdev; 311 vec = ppt->msi.vector + pptarg->msg; 312 313 if (ppt->vm != NULL) 314 (void) lapic_set_intr(ppt->vm, ppt->msi.vcpu, vec); 315 else { 316 /* 317 * XXX 318 * This is not expected to happen - panic? 319 */ 320 } 321 322 /* 323 * For legacy interrupts give other filters a chance in case 324 * the interrupt was not generated by the passthrough device. 325 */ 326 if (ppt->msi.startrid == 0) 327 return (FILTER_STRAY); 328 else 329 return (FILTER_HANDLED); 330} 331 332/* 333 * XXX 334 * When we try to free the MSI resource the kernel will bind the thread to 335 * the host cpu was originally handling the MSI. The function freeing the 336 * MSI vector (apic_free_vector()) will panic the kernel if the thread 337 * is already bound to a cpu. 338 * 339 * So, we temporarily unbind the vcpu thread before freeing the MSI resource. 340 */ 341static void 342PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt) 343{ 344 int pincpu = -1; 345 346 vm_get_pinning(vm, vcpu, &pincpu); 347 348 if (pincpu >= 0) 349 vm_set_pinning(vm, vcpu, -1); 350 351 ppt_teardown_msi(ppt); 352 353 if (pincpu >= 0) 354 vm_set_pinning(vm, vcpu, pincpu); 355} 356 357int 358ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, 359 int destcpu, int vector, int numvec) 360{ 361 int i, rid, flags; 362 int msi_count, startrid, error, tmp; 363 struct pptdev *ppt; 364 365 if ((destcpu >= VM_MAXCPU || destcpu < 0) || 366 (vector < 0 || vector > 255) || 367 (numvec < 0 || numvec > MAX_MSIMSGS)) 368 return (EINVAL); 369 370 ppt = ppt_find(bus, slot, func); 371 if (ppt == NULL) 372 return (ENOENT); 373 if (ppt->vm != vm) /* Make sure we own this device */ 374 return (EBUSY); 375 376 /* Free any allocated resources */ 377 PPT_TEARDOWN_MSI(vm, vcpu, ppt); 378 379 if (numvec == 0) /* nothing more to do */ 380 return (0); 381 382 flags = RF_ACTIVE; 383 msi_count = pci_msi_count(ppt->dev); 384 if (msi_count == 0) { 385 startrid = 0; /* legacy interrupt */ 386 msi_count = 1; 387 flags |= RF_SHAREABLE; 388 } else 389 startrid = 1; /* MSI */ 390 391 /* 392 * The device must be capable of supporting the number of vectors 393 * the guest wants to allocate. 394 */ 395 if (numvec > msi_count) 396 return (EINVAL); 397 398 /* 399 * Make sure that we can allocate all the MSI vectors that are needed 400 * by the guest. 401 */ 402 if (startrid == 1) { 403 tmp = numvec; 404 error = pci_alloc_msi(ppt->dev, &tmp); 405 if (error) 406 return (error); 407 else if (tmp != numvec) { 408 pci_release_msi(ppt->dev); 409 return (ENOSPC); 410 } else { 411 /* success */ 412 } 413 } 414 415 ppt->msi.vector = vector; 416 ppt->msi.vcpu = destcpu; 417 ppt->msi.startrid = startrid; 418 419 /* 420 * Allocate the irq resource and attach it to the interrupt handler. 421 */ 422 for (i = 0; i < numvec; i++) { 423 ppt->msi.num_msgs = i + 1; 424 ppt->msi.cookie[i] = NULL; 425 426 rid = startrid + i; 427 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 428 &rid, flags); 429 if (ppt->msi.res[i] == NULL) 430 break; 431 432 ppt->msi.arg[i].pptdev = ppt; 433 ppt->msi.arg[i].msg = i; 434 435 error = bus_setup_intr(ppt->dev, ppt->msi.res[i], 436 INTR_TYPE_NET | INTR_MPSAFE | INTR_FAST, 437 pptintr, NULL, &ppt->msi.arg[i], 438 &ppt->msi.cookie[i]); 439 if (error != 0) 440 break; 441 } 442 443 if (i < numvec) { 444 PPT_TEARDOWN_MSI(vm, vcpu, ppt); 445 return (ENXIO); 446 } 447 448 return (0); 449} 450