pci_passthru.c revision 295124
1238384Sjkim/*- 2238384Sjkim * Copyright (c) 2011 NetApp, Inc. 3238384Sjkim * All rights reserved. 4238384Sjkim * 5238384Sjkim * Redistribution and use in source and binary forms, with or without 6238384Sjkim * modification, are permitted provided that the following conditions 7238384Sjkim * are met: 8238384Sjkim * 1. Redistributions of source code must retain the above copyright 9238384Sjkim * notice, this list of conditions and the following disclaimer. 10238384Sjkim * 2. Redistributions in binary form must reproduce the above copyright 11238384Sjkim * notice, this list of conditions and the following disclaimer in the 12238384Sjkim * documentation and/or other materials provided with the distribution. 13238384Sjkim * 14238384Sjkim * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15238384Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16238384Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17238384Sjkim * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18238384Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19238384Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20238384Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21238384Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22238384Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23238384Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24238384Sjkim * SUCH DAMAGE. 25238384Sjkim * 26238384Sjkim * $FreeBSD: stable/10/usr.sbin/bhyve/pci_passthru.c 295124 2016-02-01 14:56:11Z grehan $ 27238384Sjkim */ 28238384Sjkim 29238384Sjkim#include <sys/cdefs.h> 30238384Sjkim__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_passthru.c 295124 2016-02-01 14:56:11Z grehan $"); 31238384Sjkim 32238384Sjkim#include <sys/param.h> 33238384Sjkim#include <sys/types.h> 34238384Sjkim#include <sys/pciio.h> 35238384Sjkim#include <sys/ioctl.h> 36238384Sjkim 37238384Sjkim#include <dev/io/iodev.h> 38238384Sjkim#include <dev/pci/pcireg.h> 39238384Sjkim 40238384Sjkim#include <machine/iodev.h> 41238384Sjkim 42238384Sjkim#include <stdio.h> 43238384Sjkim#include <stdlib.h> 44238384Sjkim#include <string.h> 45238384Sjkim#include <errno.h> 46238384Sjkim#include <fcntl.h> 47238384Sjkim#include <unistd.h> 48238384Sjkim 49238384Sjkim#include <machine/vmm.h> 50238384Sjkim#include <vmmapi.h> 51238384Sjkim#include "pci_emul.h" 52238384Sjkim#include "mem.h" 53238384Sjkim 54238384Sjkim#ifndef _PATH_DEVPCI 55238384Sjkim#define _PATH_DEVPCI "/dev/pci" 56238384Sjkim#endif 57238384Sjkim 58238384Sjkim#ifndef _PATH_DEVIO 59238384Sjkim#define _PATH_DEVIO "/dev/io" 60238384Sjkim#endif 61238384Sjkim 62238384Sjkim#define LEGACY_SUPPORT 1 63238384Sjkim 64238384Sjkim#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) 65238384Sjkim#define MSIX_CAPLEN 12 66238384Sjkim 67238384Sjkimstatic int pcifd = -1; 68238384Sjkimstatic int iofd = -1; 69238384Sjkim 70238384Sjkimstruct passthru_softc { 71238384Sjkim struct pci_devinst *psc_pi; 72238384Sjkim struct pcibar psc_bar[PCI_BARMAX + 1]; 73238384Sjkim struct { 74238384Sjkim int capoff; 75238384Sjkim int msgctrl; 76238384Sjkim int emulated; 77238384Sjkim } psc_msi; 78238384Sjkim struct { 79238384Sjkim int capoff; 80238384Sjkim } psc_msix; 81238384Sjkim struct pcisel psc_sel; 82238384Sjkim}; 83238384Sjkim 84238384Sjkimstatic int 85238384Sjkimmsi_caplen(int msgctrl) 86238384Sjkim{ 87238384Sjkim int len; 88238384Sjkim 89238384Sjkim len = 10; /* minimum length of msi capability */ 90238384Sjkim 91238384Sjkim if (msgctrl & PCIM_MSICTRL_64BIT) 92238384Sjkim len += 4; 93238384Sjkim 94238384Sjkim#if 0 95238384Sjkim /* 96238384Sjkim * Ignore the 'mask' and 'pending' bits in the MSI capability. 97238384Sjkim * We'll let the guest manipulate them directly. 98238384Sjkim */ 99238384Sjkim if (msgctrl & PCIM_MSICTRL_VECTOR) 100238384Sjkim len += 10; 101238384Sjkim#endif 102238384Sjkim 103238384Sjkim return (len); 104238384Sjkim} 105238384Sjkim 106238384Sjkimstatic uint32_t 107238384Sjkimread_config(const struct pcisel *sel, long reg, int width) 108238384Sjkim{ 109238384Sjkim struct pci_io pi; 110238384Sjkim 111238384Sjkim bzero(&pi, sizeof(pi)); 112238384Sjkim pi.pi_sel = *sel; 113238384Sjkim pi.pi_reg = reg; 114238384Sjkim pi.pi_width = width; 115238384Sjkim 116238384Sjkim if (ioctl(pcifd, PCIOCREAD, &pi) < 0) 117238384Sjkim return (0); /* XXX */ 118238384Sjkim else 119238384Sjkim return (pi.pi_data); 120238384Sjkim} 121238384Sjkim 122238384Sjkimstatic void 123238384Sjkimwrite_config(const struct pcisel *sel, long reg, int width, uint32_t data) 124238384Sjkim{ 125238384Sjkim struct pci_io pi; 126238384Sjkim 127238384Sjkim bzero(&pi, sizeof(pi)); 128238384Sjkim pi.pi_sel = *sel; 129238384Sjkim pi.pi_reg = reg; 130238384Sjkim pi.pi_width = width; 131238384Sjkim pi.pi_data = data; 132238384Sjkim 133238384Sjkim (void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */ 134238384Sjkim} 135238384Sjkim 136238384Sjkim#ifdef LEGACY_SUPPORT 137238384Sjkimstatic int 138238384Sjkimpassthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) 139238384Sjkim{ 140238384Sjkim int capoff, i; 141238384Sjkim struct msicap msicap; 142238384Sjkim u_char *capdata; 143238384Sjkim 144238384Sjkim pci_populate_msicap(&msicap, msgnum, nextptr); 145238384Sjkim 146238384Sjkim /* 147238384Sjkim * XXX 148238384Sjkim * Copy the msi capability structure in the last 16 bytes of the 149238384Sjkim * config space. This is wrong because it could shadow something 150238384Sjkim * useful to the device. 151238384Sjkim */ 152238384Sjkim capoff = 256 - roundup(sizeof(msicap), 4); 153238384Sjkim capdata = (u_char *)&msicap; 154238384Sjkim for (i = 0; i < sizeof(msicap); i++) 155238384Sjkim pci_set_cfgdata8(pi, capoff + i, capdata[i]); 156238384Sjkim 157238384Sjkim return (capoff); 158238384Sjkim} 159238384Sjkim#endif /* LEGACY_SUPPORT */ 160238384Sjkim 161238384Sjkimstatic int 162238384Sjkimcfginitmsi(struct passthru_softc *sc) 163238384Sjkim{ 164238384Sjkim int i, ptr, capptr, cap, sts, caplen, table_size; 165238384Sjkim uint32_t u32; 166238384Sjkim struct pcisel sel; 167238384Sjkim struct pci_devinst *pi; 168238384Sjkim struct msixcap msixcap; 169238384Sjkim uint32_t *msixcap_ptr; 170238384Sjkim 171238384Sjkim pi = sc->psc_pi; 172238384Sjkim sel = sc->psc_sel; 173238384Sjkim 174238384Sjkim /* 175238384Sjkim * Parse the capabilities and cache the location of the MSI 176238384Sjkim * and MSI-X capabilities. 177238384Sjkim */ 178238384Sjkim sts = read_config(&sel, PCIR_STATUS, 2); 179238384Sjkim if (sts & PCIM_STATUS_CAPPRESENT) { 180238384Sjkim ptr = read_config(&sel, PCIR_CAP_PTR, 1); 181238384Sjkim while (ptr != 0 && ptr != 0xff) { 182238384Sjkim cap = read_config(&sel, ptr + PCICAP_ID, 1); 183238384Sjkim if (cap == PCIY_MSI) { 184238384Sjkim /* 185238384Sjkim * Copy the MSI capability into the config 186238384Sjkim * space of the emulated pci device 187238384Sjkim */ 188238384Sjkim sc->psc_msi.capoff = ptr; 189238384Sjkim sc->psc_msi.msgctrl = read_config(&sel, 190238384Sjkim ptr + 2, 2); 191238384Sjkim sc->psc_msi.emulated = 0; 192238384Sjkim caplen = msi_caplen(sc->psc_msi.msgctrl); 193238384Sjkim capptr = ptr; 194238384Sjkim while (caplen > 0) { 195238384Sjkim u32 = read_config(&sel, capptr, 4); 196238384Sjkim pci_set_cfgdata32(pi, capptr, u32); 197238384Sjkim caplen -= 4; 198238384Sjkim capptr += 4; 199238384Sjkim } 200238384Sjkim } else if (cap == PCIY_MSIX) { 201238384Sjkim /* 202238384Sjkim * Copy the MSI-X capability 203238384Sjkim */ 204238384Sjkim sc->psc_msix.capoff = ptr; 205238384Sjkim caplen = 12; 206238384Sjkim msixcap_ptr = (uint32_t*) &msixcap; 207238384Sjkim capptr = ptr; 208238384Sjkim while (caplen > 0) { 209238384Sjkim u32 = read_config(&sel, capptr, 4); 210238384Sjkim *msixcap_ptr = u32; 211238384Sjkim pci_set_cfgdata32(pi, capptr, u32); 212238384Sjkim caplen -= 4; 213238384Sjkim capptr += 4; 214238384Sjkim msixcap_ptr++; 215238384Sjkim } 216238384Sjkim } 217238384Sjkim ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1); 218238384Sjkim } 219238384Sjkim } 220238384Sjkim 221238384Sjkim if (sc->psc_msix.capoff != 0) { 222238384Sjkim pi->pi_msix.pba_bar = 223238384Sjkim msixcap.pba_info & PCIM_MSIX_BIR_MASK; 224238384Sjkim pi->pi_msix.pba_offset = 225238384Sjkim msixcap.pba_info & ~PCIM_MSIX_BIR_MASK; 226238384Sjkim pi->pi_msix.table_bar = 227238384Sjkim msixcap.table_info & PCIM_MSIX_BIR_MASK; 228238384Sjkim pi->pi_msix.table_offset = 229238384Sjkim msixcap.table_info & ~PCIM_MSIX_BIR_MASK; 230238384Sjkim pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); 231238384Sjkim pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count); 232238384Sjkim 233238384Sjkim /* Allocate the emulated MSI-X table array */ 234238384Sjkim table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 235238384Sjkim pi->pi_msix.table = calloc(1, table_size); 236238384Sjkim 237238384Sjkim /* Mask all table entries */ 238238384Sjkim for (i = 0; i < pi->pi_msix.table_count; i++) { 239238384Sjkim pi->pi_msix.table[i].vector_control |= 240238384Sjkim PCIM_MSIX_VCTRL_MASK; 241238384Sjkim } 242238384Sjkim } 243238384Sjkim 244238384Sjkim#ifdef LEGACY_SUPPORT 245238384Sjkim /* 246238384Sjkim * If the passthrough device does not support MSI then craft a 247238384Sjkim * MSI capability for it. We link the new MSI capability at the 248238384Sjkim * head of the list of capabilities. 249238384Sjkim */ 250238384Sjkim if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) { 251238384Sjkim int origptr, msiptr; 252238384Sjkim origptr = read_config(&sel, PCIR_CAP_PTR, 1); 253238384Sjkim msiptr = passthru_add_msicap(pi, 1, origptr); 254238384Sjkim sc->psc_msi.capoff = msiptr; 255238384Sjkim sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2); 256238384Sjkim sc->psc_msi.emulated = 1; 257238384Sjkim pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr); 258238384Sjkim } 259238384Sjkim#endif 260238384Sjkim 261238384Sjkim /* Make sure one of the capabilities is present */ 262238384Sjkim if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) 263238384Sjkim return (-1); 264238384Sjkim else 265238384Sjkim return (0); 266238384Sjkim} 267238384Sjkim 268238384Sjkimstatic uint64_t 269238384Sjkimmsix_table_read(struct passthru_softc *sc, uint64_t offset, int size) 270238384Sjkim{ 271238384Sjkim struct pci_devinst *pi; 272238384Sjkim struct msix_table_entry *entry; 273238384Sjkim uint8_t *src8; 274238384Sjkim uint16_t *src16; 275238384Sjkim uint32_t *src32; 276238384Sjkim uint64_t *src64; 277238384Sjkim uint64_t data; 278238384Sjkim size_t entry_offset; 279238384Sjkim int index; 280238384Sjkim 281238384Sjkim pi = sc->psc_pi; 282238384Sjkim if (offset < pi->pi_msix.table_offset) 283238384Sjkim return (-1); 284238384Sjkim 285238384Sjkim offset -= pi->pi_msix.table_offset; 286238384Sjkim index = offset / MSIX_TABLE_ENTRY_SIZE; 287238384Sjkim if (index >= pi->pi_msix.table_count) 288238384Sjkim return (-1); 289238384Sjkim 290238384Sjkim entry = &pi->pi_msix.table[index]; 291238384Sjkim entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 292238384Sjkim 293238384Sjkim switch(size) { 294238384Sjkim case 1: 295238384Sjkim src8 = (uint8_t *)((void *)entry + entry_offset); 296238384Sjkim data = *src8; 297238384Sjkim break; 298238384Sjkim case 2: 299238384Sjkim src16 = (uint16_t *)((void *)entry + entry_offset); 300238384Sjkim data = *src16; 301238384Sjkim break; 302238384Sjkim case 4: 303238384Sjkim src32 = (uint32_t *)((void *)entry + entry_offset); 304238384Sjkim data = *src32; 305238384Sjkim break; 306238384Sjkim case 8: 307238384Sjkim src64 = (uint64_t *)((void *)entry + entry_offset); 308238384Sjkim data = *src64; 309238384Sjkim break; 310238384Sjkim default: 311238384Sjkim return (-1); 312238384Sjkim } 313238384Sjkim 314238384Sjkim return (data); 315238384Sjkim} 316238384Sjkim 317238384Sjkimstatic void 318238384Sjkimmsix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, 319238384Sjkim uint64_t offset, int size, uint64_t data) 320238384Sjkim{ 321238384Sjkim struct pci_devinst *pi; 322238384Sjkim struct msix_table_entry *entry; 323238384Sjkim uint32_t *dest; 324238384Sjkim size_t entry_offset; 325238384Sjkim uint32_t vector_control; 326238384Sjkim int error, index; 327238384Sjkim 328238384Sjkim pi = sc->psc_pi; 329238384Sjkim if (offset < pi->pi_msix.table_offset) 330238384Sjkim return; 331238384Sjkim 332238384Sjkim offset -= pi->pi_msix.table_offset; 333238384Sjkim index = offset / MSIX_TABLE_ENTRY_SIZE; 334238384Sjkim if (index >= pi->pi_msix.table_count) 335238384Sjkim return; 336238384Sjkim 337238384Sjkim entry = &pi->pi_msix.table[index]; 338238384Sjkim entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 339238384Sjkim 340238384Sjkim /* Only 4 byte naturally-aligned writes are supported */ 341238384Sjkim assert(size == 4); 342238384Sjkim assert(entry_offset % 4 == 0); 343238384Sjkim 344238384Sjkim vector_control = entry->vector_control; 345238384Sjkim dest = (uint32_t *)((void *)entry + entry_offset); 346238384Sjkim *dest = data; 347238384Sjkim /* If MSI-X hasn't been enabled, do nothing */ 348238384Sjkim if (pi->pi_msix.enabled) { 349238384Sjkim /* If the entry is masked, don't set it up */ 350238384Sjkim if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || 351238384Sjkim (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 352238384Sjkim error = vm_setup_pptdev_msix(ctx, vcpu, 353238384Sjkim sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 354238384Sjkim sc->psc_sel.pc_func, index, entry->addr, 355238384Sjkim entry->msg_data, entry->vector_control); 356238384Sjkim } 357238384Sjkim } 358238384Sjkim} 359238384Sjkim 360238384Sjkimstatic int 361238384Sjkiminit_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) 362238384Sjkim{ 363238384Sjkim int b, s, f; 364238384Sjkim int error, idx; 365238384Sjkim size_t len, remaining; 366238384Sjkim uint32_t table_size, table_offset; 367238384Sjkim uint32_t pba_size, pba_offset; 368238384Sjkim vm_paddr_t start; 369238384Sjkim struct pci_devinst *pi = sc->psc_pi; 370238384Sjkim 371238384Sjkim assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0); 372238384Sjkim 373238384Sjkim b = sc->psc_sel.pc_bus; 374238384Sjkim s = sc->psc_sel.pc_dev; 375238384Sjkim f = sc->psc_sel.pc_func; 376238384Sjkim 377238384Sjkim /* 378238384Sjkim * If the MSI-X table BAR maps memory intended for 379238384Sjkim * other uses, it is at least assured that the table 380238384Sjkim * either resides in its own page within the region, 381238384Sjkim * or it resides in a page shared with only the PBA. 382238384Sjkim */ 383238384Sjkim table_offset = rounddown2(pi->pi_msix.table_offset, 4096); 384238384Sjkim 385238384Sjkim table_size = pi->pi_msix.table_offset - table_offset; 386238384Sjkim table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 387238384Sjkim table_size = roundup2(table_size, 4096); 388238384Sjkim 389238384Sjkim if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) { 390238384Sjkim pba_offset = pi->pi_msix.pba_offset; 391238384Sjkim pba_size = pi->pi_msix.pba_size; 392238384Sjkim if (pba_offset >= table_offset + table_size || 393238384Sjkim table_offset >= pba_offset + pba_size) { 394238384Sjkim /* 395238384Sjkim * The PBA can reside in the same BAR as the MSI-x 396238384Sjkim * tables as long as it does not overlap with any 397238384Sjkim * naturally aligned page occupied by the tables. 398238384Sjkim */ 399238384Sjkim } else { 400238384Sjkim /* Need to also emulate the PBA, not supported yet */ 401238384Sjkim printf("Unsupported MSI-X configuration: %d/%d/%d\n", 402238384Sjkim b, s, f); 403238384Sjkim return (-1); 404238384Sjkim } 405238384Sjkim } 406238384Sjkim 407238384Sjkim idx = pi->pi_msix.table_bar; 408238384Sjkim start = pi->pi_bar[idx].addr; 409238384Sjkim remaining = pi->pi_bar[idx].size; 410238384Sjkim 411238384Sjkim /* Map everything before the MSI-X table */ 412238384Sjkim if (table_offset > 0) { 413238384Sjkim len = table_offset; 414238384Sjkim error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); 415238384Sjkim if (error) 416238384Sjkim return (error); 417238384Sjkim 418238384Sjkim base += len; 419238384Sjkim start += len; 420238384Sjkim remaining -= len; 421238384Sjkim } 422238384Sjkim 423238384Sjkim /* Skip the MSI-X table */ 424238384Sjkim base += table_size; 425238384Sjkim start += table_size; 426238384Sjkim remaining -= table_size; 427238384Sjkim 428238384Sjkim /* Map everything beyond the end of the MSI-X table */ 429238384Sjkim if (remaining > 0) { 430238384Sjkim len = remaining; 431238384Sjkim error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); 432238384Sjkim if (error) 433238384Sjkim return (error); 434238384Sjkim } 435238384Sjkim 436238384Sjkim return (0); 437238384Sjkim} 438238384Sjkim 439238384Sjkimstatic int 440238384Sjkimcfginitbar(struct vmctx *ctx, struct passthru_softc *sc) 441238384Sjkim{ 442238384Sjkim int i, error; 443238384Sjkim struct pci_devinst *pi; 444238384Sjkim struct pci_bar_io bar; 445238384Sjkim enum pcibar_type bartype; 446238384Sjkim uint64_t base, size; 447238384Sjkim 448238384Sjkim pi = sc->psc_pi; 449238384Sjkim 450238384Sjkim /* 451238384Sjkim * Initialize BAR registers 452238384Sjkim */ 453238384Sjkim for (i = 0; i <= PCI_BARMAX; i++) { 454238384Sjkim bzero(&bar, sizeof(bar)); 455238384Sjkim bar.pbi_sel = sc->psc_sel; 456238384Sjkim bar.pbi_reg = PCIR_BAR(i); 457238384Sjkim 458238384Sjkim if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0) 459238384Sjkim continue; 460238384Sjkim 461238384Sjkim if (PCI_BAR_IO(bar.pbi_base)) { 462238384Sjkim bartype = PCIBAR_IO; 463238384Sjkim base = bar.pbi_base & PCIM_BAR_IO_BASE; 464238384Sjkim } else { 465238384Sjkim switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) { 466238384Sjkim case PCIM_BAR_MEM_64: 467238384Sjkim bartype = PCIBAR_MEM64; 468238384Sjkim break; 469238384Sjkim default: 470238384Sjkim bartype = PCIBAR_MEM32; 471238384Sjkim break; 472238384Sjkim } 473238384Sjkim base = bar.pbi_base & PCIM_BAR_MEM_BASE; 474238384Sjkim } 475238384Sjkim size = bar.pbi_length; 476238384Sjkim 477238384Sjkim if (bartype != PCIBAR_IO) { 478238384Sjkim if (((base | size) & PAGE_MASK) != 0) { 479238384Sjkim printf("passthru device %d/%d/%d BAR %d: " 480238384Sjkim "base %#lx or size %#lx not page aligned\n", 481238384Sjkim sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 482238384Sjkim sc->psc_sel.pc_func, i, base, size); 483238384Sjkim return (-1); 484238384Sjkim } 485238384Sjkim } 486238384Sjkim 487238384Sjkim /* Cache information about the "real" BAR */ 488238384Sjkim sc->psc_bar[i].type = bartype; 489238384Sjkim sc->psc_bar[i].size = size; 490238384Sjkim sc->psc_bar[i].addr = base; 491238384Sjkim 492238384Sjkim /* Allocate the BAR in the guest I/O or MMIO space */ 493238384Sjkim error = pci_emul_alloc_pbar(pi, i, base, bartype, size); 494238384Sjkim if (error) 495238384Sjkim return (-1); 496238384Sjkim 497238384Sjkim /* The MSI-X table needs special handling */ 498238384Sjkim if (i == pci_msix_table_bar(pi)) { 499238384Sjkim error = init_msix_table(ctx, sc, base); 500238384Sjkim if (error) 501238384Sjkim return (-1); 502238384Sjkim } else if (bartype != PCIBAR_IO) { 503238384Sjkim /* Map the physical BAR in the guest MMIO space */ 504238384Sjkim error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, 505238384Sjkim sc->psc_sel.pc_dev, sc->psc_sel.pc_func, 506238384Sjkim pi->pi_bar[i].addr, pi->pi_bar[i].size, base); 507238384Sjkim if (error) 508238384Sjkim return (-1); 509238384Sjkim } 510238384Sjkim 511238384Sjkim /* 512238384Sjkim * 64-bit BAR takes up two slots so skip the next one. 513238384Sjkim */ 514238384Sjkim if (bartype == PCIBAR_MEM64) { 515238384Sjkim i++; 516238384Sjkim assert(i <= PCI_BARMAX); 517238384Sjkim sc->psc_bar[i].type = PCIBAR_MEMHI64; 518238384Sjkim } 519238384Sjkim } 520238384Sjkim return (0); 521238384Sjkim} 522238384Sjkim 523238384Sjkimstatic int 524238384Sjkimcfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) 525238384Sjkim{ 526238384Sjkim int error; 527238384Sjkim struct passthru_softc *sc; 528238384Sjkim 529238384Sjkim error = 1; 530238384Sjkim sc = pi->pi_arg; 531238384Sjkim 532238384Sjkim bzero(&sc->psc_sel, sizeof(struct pcisel)); 533238384Sjkim sc->psc_sel.pc_bus = bus; 534238384Sjkim sc->psc_sel.pc_dev = slot; 535238384Sjkim sc->psc_sel.pc_func = func; 536238384Sjkim 537238384Sjkim if (cfginitmsi(sc) != 0) 538238384Sjkim goto done; 539238384Sjkim 540238384Sjkim if (cfginitbar(ctx, sc) != 0) 541238384Sjkim goto done; 542238384Sjkim 543238384Sjkim error = 0; /* success */ 544238384Sjkimdone: 545238384Sjkim return (error); 546238384Sjkim} 547238384Sjkim 548238384Sjkimstatic int 549238384Sjkimpassthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 550238384Sjkim{ 551238384Sjkim int bus, slot, func, error, memflags; 552238384Sjkim struct passthru_softc *sc; 553238384Sjkim 554238384Sjkim sc = NULL; 555238384Sjkim error = 1; 556238384Sjkim 557238384Sjkim memflags = vm_get_memflags(ctx); 558238384Sjkim if (!(memflags & VM_MEM_F_WIRED)) { 559238384Sjkim fprintf(stderr, "passthru requires guest memory to be wired\n"); 560238384Sjkim goto done; 561238384Sjkim } 562238384Sjkim 563238384Sjkim if (pcifd < 0) { 564238384Sjkim pcifd = open(_PATH_DEVPCI, O_RDWR, 0); 565238384Sjkim if (pcifd < 0) 566238384Sjkim goto done; 567238384Sjkim } 568238384Sjkim 569238384Sjkim if (iofd < 0) { 570238384Sjkim iofd = open(_PATH_DEVIO, O_RDWR, 0); 571238384Sjkim if (iofd < 0) 572238384Sjkim goto done; 573238384Sjkim } 574238384Sjkim 575238384Sjkim if (opts == NULL || 576238384Sjkim sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) 577238384Sjkim goto done; 578238384Sjkim 579238384Sjkim if (vm_assign_pptdev(ctx, bus, slot, func) != 0) 580238384Sjkim goto done; 581238384Sjkim 582238384Sjkim sc = calloc(1, sizeof(struct passthru_softc)); 583238384Sjkim 584238384Sjkim pi->pi_arg = sc; 585238384Sjkim sc->psc_pi = pi; 586238384Sjkim 587238384Sjkim /* initialize config space */ 588238384Sjkim if ((error = cfginit(ctx, pi, bus, slot, func)) != 0) 589238384Sjkim goto done; 590238384Sjkim 591238384Sjkim error = 0; /* success */ 592238384Sjkimdone: 593238384Sjkim if (error) { 594 free(sc); 595 vm_unassign_pptdev(ctx, bus, slot, func); 596 } 597 return (error); 598} 599 600static int 601bar_access(int coff) 602{ 603 if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) 604 return (1); 605 else 606 return (0); 607} 608 609static int 610msicap_access(struct passthru_softc *sc, int coff) 611{ 612 int caplen; 613 614 if (sc->psc_msi.capoff == 0) 615 return (0); 616 617 caplen = msi_caplen(sc->psc_msi.msgctrl); 618 619 if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen) 620 return (1); 621 else 622 return (0); 623} 624 625static int 626msixcap_access(struct passthru_softc *sc, int coff) 627{ 628 if (sc->psc_msix.capoff == 0) 629 return (0); 630 631 return (coff >= sc->psc_msix.capoff && 632 coff < sc->psc_msix.capoff + MSIX_CAPLEN); 633} 634 635static int 636passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 637 int coff, int bytes, uint32_t *rv) 638{ 639 struct passthru_softc *sc; 640 641 sc = pi->pi_arg; 642 643 /* 644 * PCI BARs and MSI capability is emulated. 645 */ 646 if (bar_access(coff) || msicap_access(sc, coff)) 647 return (-1); 648 649#ifdef LEGACY_SUPPORT 650 /* 651 * Emulate PCIR_CAP_PTR if this device does not support MSI capability 652 * natively. 653 */ 654 if (sc->psc_msi.emulated) { 655 if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4) 656 return (-1); 657 } 658#endif 659 660 /* Everything else just read from the device's config space */ 661 *rv = read_config(&sc->psc_sel, coff, bytes); 662 663 return (0); 664} 665 666static int 667passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 668 int coff, int bytes, uint32_t val) 669{ 670 int error, msix_table_entries, i; 671 struct passthru_softc *sc; 672 673 sc = pi->pi_arg; 674 675 /* 676 * PCI BARs are emulated 677 */ 678 if (bar_access(coff)) 679 return (-1); 680 681 /* 682 * MSI capability is emulated 683 */ 684 if (msicap_access(sc, coff)) { 685 msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val); 686 687 error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus, 688 sc->psc_sel.pc_dev, sc->psc_sel.pc_func, 689 pi->pi_msi.addr, pi->pi_msi.msg_data, 690 pi->pi_msi.maxmsgnum); 691 if (error != 0) { 692 printf("vm_setup_pptdev_msi error %d\r\n", errno); 693 exit(1); 694 } 695 return (0); 696 } 697 698 if (msixcap_access(sc, coff)) { 699 msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val); 700 if (pi->pi_msix.enabled) { 701 msix_table_entries = pi->pi_msix.table_count; 702 for (i = 0; i < msix_table_entries; i++) { 703 error = vm_setup_pptdev_msix(ctx, vcpu, 704 sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 705 sc->psc_sel.pc_func, i, 706 pi->pi_msix.table[i].addr, 707 pi->pi_msix.table[i].msg_data, 708 pi->pi_msix.table[i].vector_control); 709 710 if (error) { 711 printf("vm_setup_pptdev_msix error " 712 "%d\r\n", errno); 713 exit(1); 714 } 715 } 716 } 717 return (0); 718 } 719 720#ifdef LEGACY_SUPPORT 721 /* 722 * If this device does not support MSI natively then we cannot let 723 * the guest disable legacy interrupts from the device. It is the 724 * legacy interrupt that is triggering the virtual MSI to the guest. 725 */ 726 if (sc->psc_msi.emulated && pci_msi_enabled(pi)) { 727 if (coff == PCIR_COMMAND && bytes == 2) 728 val &= ~PCIM_CMD_INTxDIS; 729 } 730#endif 731 732 write_config(&sc->psc_sel, coff, bytes, val); 733 734 return (0); 735} 736 737static void 738passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 739 uint64_t offset, int size, uint64_t value) 740{ 741 struct passthru_softc *sc; 742 struct iodev_pio_req pio; 743 744 sc = pi->pi_arg; 745 746 if (baridx == pci_msix_table_bar(pi)) { 747 msix_table_write(ctx, vcpu, sc, offset, size, value); 748 } else { 749 assert(pi->pi_bar[baridx].type == PCIBAR_IO); 750 bzero(&pio, sizeof(struct iodev_pio_req)); 751 pio.access = IODEV_PIO_WRITE; 752 pio.port = sc->psc_bar[baridx].addr + offset; 753 pio.width = size; 754 pio.val = value; 755 756 (void)ioctl(iofd, IODEV_PIO, &pio); 757 } 758} 759 760static uint64_t 761passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 762 uint64_t offset, int size) 763{ 764 struct passthru_softc *sc; 765 struct iodev_pio_req pio; 766 uint64_t val; 767 768 sc = pi->pi_arg; 769 770 if (baridx == pci_msix_table_bar(pi)) { 771 val = msix_table_read(sc, offset, size); 772 } else { 773 assert(pi->pi_bar[baridx].type == PCIBAR_IO); 774 bzero(&pio, sizeof(struct iodev_pio_req)); 775 pio.access = IODEV_PIO_READ; 776 pio.port = sc->psc_bar[baridx].addr + offset; 777 pio.width = size; 778 pio.val = 0; 779 780 (void)ioctl(iofd, IODEV_PIO, &pio); 781 782 val = pio.val; 783 } 784 785 return (val); 786} 787 788struct pci_devemu passthru = { 789 .pe_emu = "passthru", 790 .pe_init = passthru_init, 791 .pe_cfgwrite = passthru_cfgwrite, 792 .pe_cfgread = passthru_cfgread, 793 .pe_barwrite = passthru_write, 794 .pe_barread = passthru_read, 795}; 796PCI_EMUL_SET(passthru); 797