1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD: releng/11.0/usr.sbin/bhyve/pci_passthru.c 302365 2016-07-06 05:05:03Z ngie $ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD: releng/11.0/usr.sbin/bhyve/pci_passthru.c 302365 2016-07-06 05:05:03Z ngie $"); 31221828Sgrehan 32221828Sgrehan#include <sys/param.h> 33221828Sgrehan#include <sys/types.h> 34297932Sjhb#include <sys/mman.h> 35221828Sgrehan#include <sys/pciio.h> 36221828Sgrehan#include <sys/ioctl.h> 37221828Sgrehan 38221828Sgrehan#include <dev/io/iodev.h> 39245749Sneel#include <dev/pci/pcireg.h> 40245749Sneel 41221828Sgrehan#include <machine/iodev.h> 42221828Sgrehan 43221828Sgrehan#include <stdio.h> 44221828Sgrehan#include <stdlib.h> 45221828Sgrehan#include <string.h> 46298295Sjhb#include <err.h> 47221828Sgrehan#include <fcntl.h> 48221828Sgrehan#include <unistd.h> 49221828Sgrehan 50221828Sgrehan#include <machine/vmm.h> 51221828Sgrehan#include <vmmapi.h> 52221828Sgrehan#include "pci_emul.h" 53241744Sgrehan#include "mem.h" 54221828Sgrehan 55221828Sgrehan#ifndef _PATH_DEVPCI 56221828Sgrehan#define _PATH_DEVPCI "/dev/pci" 57221828Sgrehan#endif 58221828Sgrehan 59221828Sgrehan#ifndef _PATH_DEVIO 60221828Sgrehan#define _PATH_DEVIO "/dev/io" 61221828Sgrehan#endif 62221828Sgrehan 63297932Sjhb#ifndef _PATH_MEM 64297932Sjhb#define _PATH_MEM "/dev/mem" 65297932Sjhb#endif 66297932Sjhb 67221828Sgrehan#define LEGACY_SUPPORT 1 68221828Sgrehan 69245749Sneel#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) 70234761Sgrehan#define MSIX_CAPLEN 12 71234761Sgrehan 72221828Sgrehanstatic int pcifd = -1; 73221828Sgrehanstatic int iofd = -1; 74297932Sjhbstatic int memfd = -1; 75221828Sgrehan 76221828Sgrehanstruct passthru_softc { 77221828Sgrehan struct pci_devinst *psc_pi; 78221828Sgrehan struct pcibar psc_bar[PCI_BARMAX + 1]; 79221828Sgrehan struct { 80221828Sgrehan int capoff; 81221828Sgrehan int msgctrl; 82221828Sgrehan int emulated; 83221828Sgrehan } psc_msi; 84234761Sgrehan struct { 85234761Sgrehan int capoff; 86234761Sgrehan } psc_msix; 87221828Sgrehan struct pcisel psc_sel; 88221828Sgrehan}; 89221828Sgrehan 90221828Sgrehanstatic int 91221828Sgrehanmsi_caplen(int msgctrl) 92221828Sgrehan{ 93221828Sgrehan int len; 94221828Sgrehan 95221828Sgrehan len = 10; /* minimum length of msi capability */ 96221828Sgrehan 97221828Sgrehan if (msgctrl & PCIM_MSICTRL_64BIT) 98221828Sgrehan len += 4; 99221828Sgrehan 100221828Sgrehan#if 0 101221828Sgrehan /* 102221828Sgrehan * Ignore the 'mask' and 'pending' bits in the MSI capability. 103221828Sgrehan * We'll let the guest manipulate them directly. 104221828Sgrehan */ 105221828Sgrehan if (msgctrl & PCIM_MSICTRL_VECTOR) 106221828Sgrehan len += 10; 107221828Sgrehan#endif 108221828Sgrehan 109221828Sgrehan return (len); 110221828Sgrehan} 111221828Sgrehan 112221828Sgrehanstatic uint32_t 113221828Sgrehanread_config(const struct pcisel *sel, long reg, int width) 114221828Sgrehan{ 115221828Sgrehan struct pci_io pi; 116221828Sgrehan 117221828Sgrehan bzero(&pi, sizeof(pi)); 118221828Sgrehan pi.pi_sel = *sel; 119221828Sgrehan pi.pi_reg = reg; 120221828Sgrehan pi.pi_width = width; 121221828Sgrehan 122221828Sgrehan if (ioctl(pcifd, PCIOCREAD, &pi) < 0) 123221828Sgrehan return (0); /* XXX */ 124221828Sgrehan else 125221828Sgrehan return (pi.pi_data); 126221828Sgrehan} 127221828Sgrehan 128221828Sgrehanstatic void 129221828Sgrehanwrite_config(const struct pcisel *sel, long reg, int width, uint32_t data) 130221828Sgrehan{ 131221828Sgrehan struct pci_io pi; 132221828Sgrehan 133221828Sgrehan bzero(&pi, sizeof(pi)); 134221828Sgrehan pi.pi_sel = *sel; 135221828Sgrehan pi.pi_reg = reg; 136221828Sgrehan pi.pi_width = width; 137221828Sgrehan pi.pi_data = data; 138221828Sgrehan 139221828Sgrehan (void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */ 140221828Sgrehan} 141221828Sgrehan 142221828Sgrehan#ifdef LEGACY_SUPPORT 143221828Sgrehanstatic int 144221828Sgrehanpassthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) 145221828Sgrehan{ 146221828Sgrehan int capoff, i; 147221828Sgrehan struct msicap msicap; 148221828Sgrehan u_char *capdata; 149221828Sgrehan 150221828Sgrehan pci_populate_msicap(&msicap, msgnum, nextptr); 151221828Sgrehan 152221828Sgrehan /* 153221828Sgrehan * XXX 154221828Sgrehan * Copy the msi capability structure in the last 16 bytes of the 155221828Sgrehan * config space. This is wrong because it could shadow something 156221828Sgrehan * useful to the device. 157221828Sgrehan */ 158221828Sgrehan capoff = 256 - roundup(sizeof(msicap), 4); 159221828Sgrehan capdata = (u_char *)&msicap; 160221828Sgrehan for (i = 0; i < sizeof(msicap); i++) 161221828Sgrehan pci_set_cfgdata8(pi, capoff + i, capdata[i]); 162221828Sgrehan 163221828Sgrehan return (capoff); 164221828Sgrehan} 165221828Sgrehan#endif /* LEGACY_SUPPORT */ 166221828Sgrehan 167221828Sgrehanstatic int 168221828Sgrehancfginitmsi(struct passthru_softc *sc) 169221828Sgrehan{ 170245749Sneel int i, ptr, capptr, cap, sts, caplen, table_size; 171221828Sgrehan uint32_t u32; 172221828Sgrehan struct pcisel sel; 173221828Sgrehan struct pci_devinst *pi; 174234761Sgrehan struct msixcap msixcap; 175234761Sgrehan uint32_t *msixcap_ptr; 176221828Sgrehan 177221828Sgrehan pi = sc->psc_pi; 178221828Sgrehan sel = sc->psc_sel; 179221828Sgrehan 180221828Sgrehan /* 181221828Sgrehan * Parse the capabilities and cache the location of the MSI 182234761Sgrehan * and MSI-X capabilities. 183221828Sgrehan */ 184221828Sgrehan sts = read_config(&sel, PCIR_STATUS, 2); 185221828Sgrehan if (sts & PCIM_STATUS_CAPPRESENT) { 186221828Sgrehan ptr = read_config(&sel, PCIR_CAP_PTR, 1); 187221828Sgrehan while (ptr != 0 && ptr != 0xff) { 188221828Sgrehan cap = read_config(&sel, ptr + PCICAP_ID, 1); 189221828Sgrehan if (cap == PCIY_MSI) { 190221828Sgrehan /* 191221828Sgrehan * Copy the MSI capability into the config 192221828Sgrehan * space of the emulated pci device 193221828Sgrehan */ 194221828Sgrehan sc->psc_msi.capoff = ptr; 195221828Sgrehan sc->psc_msi.msgctrl = read_config(&sel, 196221828Sgrehan ptr + 2, 2); 197221828Sgrehan sc->psc_msi.emulated = 0; 198221828Sgrehan caplen = msi_caplen(sc->psc_msi.msgctrl); 199234761Sgrehan capptr = ptr; 200221828Sgrehan while (caplen > 0) { 201234761Sgrehan u32 = read_config(&sel, capptr, 4); 202234761Sgrehan pci_set_cfgdata32(pi, capptr, u32); 203221828Sgrehan caplen -= 4; 204234761Sgrehan capptr += 4; 205221828Sgrehan } 206234761Sgrehan } else if (cap == PCIY_MSIX) { 207234761Sgrehan /* 208234761Sgrehan * Copy the MSI-X capability 209234761Sgrehan */ 210234761Sgrehan sc->psc_msix.capoff = ptr; 211234761Sgrehan caplen = 12; 212234761Sgrehan msixcap_ptr = (uint32_t*) &msixcap; 213234761Sgrehan capptr = ptr; 214234761Sgrehan while (caplen > 0) { 215234761Sgrehan u32 = read_config(&sel, capptr, 4); 216234761Sgrehan *msixcap_ptr = u32; 217234761Sgrehan pci_set_cfgdata32(pi, capptr, u32); 218234761Sgrehan caplen -= 4; 219234761Sgrehan capptr += 4; 220234761Sgrehan msixcap_ptr++; 221234761Sgrehan } 222221828Sgrehan } 223221828Sgrehan ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1); 224221828Sgrehan } 225221828Sgrehan } 226221828Sgrehan 227241744Sgrehan if (sc->psc_msix.capoff != 0) { 228241744Sgrehan pi->pi_msix.pba_bar = 229245749Sneel msixcap.pba_info & PCIM_MSIX_BIR_MASK; 230241744Sgrehan pi->pi_msix.pba_offset = 231245749Sneel msixcap.pba_info & ~PCIM_MSIX_BIR_MASK; 232241744Sgrehan pi->pi_msix.table_bar = 233245749Sneel msixcap.table_info & PCIM_MSIX_BIR_MASK; 234241744Sgrehan pi->pi_msix.table_offset = 235245749Sneel msixcap.table_info & ~PCIM_MSIX_BIR_MASK; 236241744Sgrehan pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); 237262184Sneel pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count); 238245749Sneel 239245749Sneel /* Allocate the emulated MSI-X table array */ 240245749Sneel table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 241264770Sdelphij pi->pi_msix.table = calloc(1, table_size); 242245749Sneel 243245749Sneel /* Mask all table entries */ 244245749Sneel for (i = 0; i < pi->pi_msix.table_count; i++) { 245245749Sneel pi->pi_msix.table[i].vector_control |= 246245749Sneel PCIM_MSIX_VCTRL_MASK; 247245749Sneel } 248241744Sgrehan } 249234761Sgrehan 250221828Sgrehan#ifdef LEGACY_SUPPORT 251221828Sgrehan /* 252221828Sgrehan * If the passthrough device does not support MSI then craft a 253221828Sgrehan * MSI capability for it. We link the new MSI capability at the 254221828Sgrehan * head of the list of capabilities. 255221828Sgrehan */ 256221828Sgrehan if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) { 257221828Sgrehan int origptr, msiptr; 258221828Sgrehan origptr = read_config(&sel, PCIR_CAP_PTR, 1); 259221828Sgrehan msiptr = passthru_add_msicap(pi, 1, origptr); 260221828Sgrehan sc->psc_msi.capoff = msiptr; 261221828Sgrehan sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2); 262221828Sgrehan sc->psc_msi.emulated = 1; 263221828Sgrehan pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr); 264221828Sgrehan } 265221828Sgrehan#endif 266221828Sgrehan 267234761Sgrehan /* Make sure one of the capabilities is present */ 268234761Sgrehan if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) 269221828Sgrehan return (-1); 270221828Sgrehan else 271221828Sgrehan return (0); 272221828Sgrehan} 273221828Sgrehan 274241744Sgrehanstatic uint64_t 275241744Sgrehanmsix_table_read(struct passthru_softc *sc, uint64_t offset, int size) 276234761Sgrehan{ 277234761Sgrehan struct pci_devinst *pi; 278241744Sgrehan struct msix_table_entry *entry; 279234761Sgrehan uint8_t *src8; 280234761Sgrehan uint16_t *src16; 281234761Sgrehan uint32_t *src32; 282234761Sgrehan uint64_t *src64; 283241744Sgrehan uint64_t data; 284241744Sgrehan size_t entry_offset; 285241744Sgrehan int index; 286234761Sgrehan 287234761Sgrehan pi = sc->psc_pi; 288297932Sjhb if (offset >= pi->pi_msix.pba_offset && 289297932Sjhb offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 290297932Sjhb switch(size) { 291297932Sjhb case 1: 292297932Sjhb src8 = (uint8_t *)(pi->pi_msix.pba_page + offset - 293297932Sjhb pi->pi_msix.pba_page_offset); 294297932Sjhb data = *src8; 295297932Sjhb break; 296297932Sjhb case 2: 297297932Sjhb src16 = (uint16_t *)(pi->pi_msix.pba_page + offset - 298297932Sjhb pi->pi_msix.pba_page_offset); 299297932Sjhb data = *src16; 300297932Sjhb break; 301297932Sjhb case 4: 302297932Sjhb src32 = (uint32_t *)(pi->pi_msix.pba_page + offset - 303297932Sjhb pi->pi_msix.pba_page_offset); 304297932Sjhb data = *src32; 305297932Sjhb break; 306297932Sjhb case 8: 307297932Sjhb src64 = (uint64_t *)(pi->pi_msix.pba_page + offset - 308297932Sjhb pi->pi_msix.pba_page_offset); 309297932Sjhb data = *src64; 310297932Sjhb break; 311297932Sjhb default: 312297932Sjhb return (-1); 313297932Sjhb } 314297932Sjhb return (data); 315297932Sjhb } 316297932Sjhb 317262184Sneel if (offset < pi->pi_msix.table_offset) 318262184Sneel return (-1); 319262184Sneel 320248171Sneel offset -= pi->pi_msix.table_offset; 321234761Sgrehan index = offset / MSIX_TABLE_ENTRY_SIZE; 322245749Sneel if (index >= pi->pi_msix.table_count) 323245749Sneel return (-1); 324245749Sneel 325234761Sgrehan entry = &pi->pi_msix.table[index]; 326245749Sneel entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 327234761Sgrehan 328234761Sgrehan switch(size) { 329234761Sgrehan case 1: 330241744Sgrehan src8 = (uint8_t *)((void *)entry + entry_offset); 331241744Sgrehan data = *src8; 332234761Sgrehan break; 333234761Sgrehan case 2: 334241744Sgrehan src16 = (uint16_t *)((void *)entry + entry_offset); 335241744Sgrehan data = *src16; 336234761Sgrehan break; 337234761Sgrehan case 4: 338241744Sgrehan src32 = (uint32_t *)((void *)entry + entry_offset); 339241744Sgrehan data = *src32; 340234761Sgrehan break; 341234761Sgrehan case 8: 342241744Sgrehan src64 = (uint64_t *)((void *)entry + entry_offset); 343241744Sgrehan data = *src64; 344234761Sgrehan break; 345234761Sgrehan default: 346234761Sgrehan return (-1); 347234761Sgrehan } 348234761Sgrehan 349241744Sgrehan return (data); 350234761Sgrehan} 351234761Sgrehan 352241744Sgrehanstatic void 353241744Sgrehanmsix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, 354241744Sgrehan uint64_t offset, int size, uint64_t data) 355234761Sgrehan{ 356234761Sgrehan struct pci_devinst *pi; 357241744Sgrehan struct msix_table_entry *entry; 358297932Sjhb uint8_t *dest8; 359297932Sjhb uint16_t *dest16; 360297932Sjhb uint32_t *dest32; 361297932Sjhb uint64_t *dest64; 362241744Sgrehan size_t entry_offset; 363234761Sgrehan uint32_t vector_control; 364302365Sngie int index; 365234761Sgrehan 366234761Sgrehan pi = sc->psc_pi; 367297932Sjhb if (offset >= pi->pi_msix.pba_offset && 368297932Sjhb offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { 369297932Sjhb switch(size) { 370297932Sjhb case 1: 371297932Sjhb dest8 = (uint8_t *)(pi->pi_msix.pba_page + offset - 372297932Sjhb pi->pi_msix.pba_page_offset); 373297932Sjhb *dest8 = data; 374297932Sjhb break; 375297932Sjhb case 2: 376297932Sjhb dest16 = (uint16_t *)(pi->pi_msix.pba_page + offset - 377297932Sjhb pi->pi_msix.pba_page_offset); 378297932Sjhb *dest16 = data; 379297932Sjhb break; 380297932Sjhb case 4: 381297932Sjhb dest32 = (uint32_t *)(pi->pi_msix.pba_page + offset - 382297932Sjhb pi->pi_msix.pba_page_offset); 383297932Sjhb *dest32 = data; 384297932Sjhb break; 385297932Sjhb case 8: 386297932Sjhb dest64 = (uint64_t *)(pi->pi_msix.pba_page + offset - 387297932Sjhb pi->pi_msix.pba_page_offset); 388297932Sjhb *dest64 = data; 389297932Sjhb break; 390297932Sjhb default: 391297932Sjhb break; 392297932Sjhb } 393297932Sjhb return; 394297932Sjhb } 395297932Sjhb 396262184Sneel if (offset < pi->pi_msix.table_offset) 397262184Sneel return; 398262184Sneel 399248171Sneel offset -= pi->pi_msix.table_offset; 400234761Sgrehan index = offset / MSIX_TABLE_ENTRY_SIZE; 401245749Sneel if (index >= pi->pi_msix.table_count) 402245749Sneel return; 403245749Sneel 404234761Sgrehan entry = &pi->pi_msix.table[index]; 405245749Sneel entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 406234761Sgrehan 407234761Sgrehan /* Only 4 byte naturally-aligned writes are supported */ 408241744Sgrehan assert(size == 4); 409241744Sgrehan assert(entry_offset % 4 == 0); 410241744Sgrehan 411241744Sgrehan vector_control = entry->vector_control; 412297932Sjhb dest32 = (uint32_t *)((void *)entry + entry_offset); 413297932Sjhb *dest32 = data; 414241744Sgrehan /* If MSI-X hasn't been enabled, do nothing */ 415241744Sgrehan if (pi->pi_msix.enabled) { 416241744Sgrehan /* If the entry is masked, don't set it up */ 417241744Sgrehan if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || 418241744Sgrehan (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 419302365Sngie (void)vm_setup_pptdev_msix(ctx, vcpu, 420302365Sngie sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 421259537Sneel sc->psc_sel.pc_func, index, entry->addr, 422259537Sneel entry->msg_data, entry->vector_control); 423234761Sgrehan } 424234761Sgrehan } 425234761Sgrehan} 426234761Sgrehan 427234761Sgrehanstatic int 428234761Sgrehaninit_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) 429234761Sgrehan{ 430246191Sneel int b, s, f; 431246191Sneel int error, idx; 432262184Sneel size_t len, remaining; 433262184Sneel uint32_t table_size, table_offset; 434262184Sneel uint32_t pba_size, pba_offset; 435234761Sgrehan vm_paddr_t start; 436234761Sgrehan struct pci_devinst *pi = sc->psc_pi; 437234761Sgrehan 438246190Sneel assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0); 439246190Sneel 440246191Sneel b = sc->psc_sel.pc_bus; 441246191Sneel s = sc->psc_sel.pc_dev; 442246191Sneel f = sc->psc_sel.pc_func; 443246191Sneel 444234761Sgrehan /* 445234761Sgrehan * If the MSI-X table BAR maps memory intended for 446234761Sgrehan * other uses, it is at least assured that the table 447234761Sgrehan * either resides in its own page within the region, 448234761Sgrehan * or it resides in a page shared with only the PBA. 449234761Sgrehan */ 450262184Sneel table_offset = rounddown2(pi->pi_msix.table_offset, 4096); 451241744Sgrehan 452262184Sneel table_size = pi->pi_msix.table_offset - table_offset; 453262184Sneel table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 454246191Sneel table_size = roundup2(table_size, 4096); 455246191Sneel 456297932Sjhb idx = pi->pi_msix.table_bar; 457297932Sjhb start = pi->pi_bar[idx].addr; 458297932Sjhb remaining = pi->pi_bar[idx].size; 459297932Sjhb 460262184Sneel if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) { 461262184Sneel pba_offset = pi->pi_msix.pba_offset; 462262184Sneel pba_size = pi->pi_msix.pba_size; 463262184Sneel if (pba_offset >= table_offset + table_size || 464262184Sneel table_offset >= pba_offset + pba_size) { 465262184Sneel /* 466297932Sjhb * If the PBA does not share a page with the MSI-x 467297932Sjhb * tables, no PBA emulation is required. 468262184Sneel */ 469297932Sjhb pi->pi_msix.pba_page = NULL; 470297932Sjhb pi->pi_msix.pba_page_offset = 0; 471262184Sneel } else { 472297932Sjhb /* 473297932Sjhb * The PBA overlaps with either the first or last 474297932Sjhb * page of the MSI-X table region. Map the 475297932Sjhb * appropriate page. 476297932Sjhb */ 477297932Sjhb if (pba_offset <= table_offset) 478297932Sjhb pi->pi_msix.pba_page_offset = table_offset; 479297932Sjhb else 480297932Sjhb pi->pi_msix.pba_page_offset = table_offset + 481297932Sjhb table_size - 4096; 482297932Sjhb pi->pi_msix.pba_page = mmap(NULL, 4096, PROT_READ | 483297932Sjhb PROT_WRITE, MAP_SHARED, memfd, start + 484297932Sjhb pi->pi_msix.pba_page_offset); 485297932Sjhb if (pi->pi_msix.pba_page == MAP_FAILED) { 486298295Sjhb warn( 487298295Sjhb "Failed to map PBA page for MSI-X on %d/%d/%d", 488298295Sjhb b, s, f); 489297932Sjhb return (-1); 490297932Sjhb } 491262184Sneel } 492262184Sneel } 493262184Sneel 494246191Sneel /* Map everything before the MSI-X table */ 495262184Sneel if (table_offset > 0) { 496262184Sneel len = table_offset; 497246191Sneel error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); 498246191Sneel if (error) 499246191Sneel return (error); 500246191Sneel 501246191Sneel base += len; 502246191Sneel start += len; 503246191Sneel remaining -= len; 504234761Sgrehan } 505246191Sneel 506246191Sneel /* Skip the MSI-X table */ 507246191Sneel base += table_size; 508246191Sneel start += table_size; 509246191Sneel remaining -= table_size; 510246191Sneel 511246191Sneel /* Map everything beyond the end of the MSI-X table */ 512246191Sneel if (remaining > 0) { 513246191Sneel len = remaining; 514246191Sneel error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); 515246191Sneel if (error) 516246191Sneel return (error); 517246191Sneel } 518246191Sneel 519246191Sneel return (0); 520234761Sgrehan} 521234761Sgrehan 522234761Sgrehanstatic int 523221828Sgrehancfginitbar(struct vmctx *ctx, struct passthru_softc *sc) 524221828Sgrehan{ 525221828Sgrehan int i, error; 526221828Sgrehan struct pci_devinst *pi; 527221828Sgrehan struct pci_bar_io bar; 528221828Sgrehan enum pcibar_type bartype; 529262184Sneel uint64_t base, size; 530221828Sgrehan 531221828Sgrehan pi = sc->psc_pi; 532221828Sgrehan 533221828Sgrehan /* 534221828Sgrehan * Initialize BAR registers 535221828Sgrehan */ 536221828Sgrehan for (i = 0; i <= PCI_BARMAX; i++) { 537221828Sgrehan bzero(&bar, sizeof(bar)); 538221828Sgrehan bar.pbi_sel = sc->psc_sel; 539221828Sgrehan bar.pbi_reg = PCIR_BAR(i); 540221828Sgrehan 541221828Sgrehan if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0) 542221828Sgrehan continue; 543221828Sgrehan 544221828Sgrehan if (PCI_BAR_IO(bar.pbi_base)) { 545221828Sgrehan bartype = PCIBAR_IO; 546221828Sgrehan base = bar.pbi_base & PCIM_BAR_IO_BASE; 547221828Sgrehan } else { 548221828Sgrehan switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) { 549221828Sgrehan case PCIM_BAR_MEM_64: 550221828Sgrehan bartype = PCIBAR_MEM64; 551221828Sgrehan break; 552221828Sgrehan default: 553221828Sgrehan bartype = PCIBAR_MEM32; 554221828Sgrehan break; 555221828Sgrehan } 556221828Sgrehan base = bar.pbi_base & PCIM_BAR_MEM_BASE; 557221828Sgrehan } 558262184Sneel size = bar.pbi_length; 559221828Sgrehan 560262184Sneel if (bartype != PCIBAR_IO) { 561262184Sneel if (((base | size) & PAGE_MASK) != 0) { 562298295Sjhb warnx("passthru device %d/%d/%d BAR %d: " 563262184Sneel "base %#lx or size %#lx not page aligned\n", 564262184Sneel sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 565262184Sneel sc->psc_sel.pc_func, i, base, size); 566262184Sneel return (-1); 567262184Sneel } 568262184Sneel } 569262184Sneel 570221828Sgrehan /* Cache information about the "real" BAR */ 571221828Sgrehan sc->psc_bar[i].type = bartype; 572262184Sneel sc->psc_bar[i].size = size; 573221828Sgrehan sc->psc_bar[i].addr = base; 574221828Sgrehan 575221828Sgrehan /* Allocate the BAR in the guest I/O or MMIO space */ 576262184Sneel error = pci_emul_alloc_pbar(pi, i, base, bartype, size); 577221828Sgrehan if (error) 578221828Sgrehan return (-1); 579221828Sgrehan 580234761Sgrehan /* The MSI-X table needs special handling */ 581246190Sneel if (i == pci_msix_table_bar(pi)) { 582234761Sgrehan error = init_msix_table(ctx, sc, base); 583234761Sgrehan if (error) 584234761Sgrehan return (-1); 585234761Sgrehan } else if (bartype != PCIBAR_IO) { 586262184Sneel /* Map the physical BAR in the guest MMIO space */ 587221828Sgrehan error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, 588221828Sgrehan sc->psc_sel.pc_dev, sc->psc_sel.pc_func, 589221828Sgrehan pi->pi_bar[i].addr, pi->pi_bar[i].size, base); 590221828Sgrehan if (error) 591221828Sgrehan return (-1); 592221828Sgrehan } 593221828Sgrehan 594221828Sgrehan /* 595221828Sgrehan * 64-bit BAR takes up two slots so skip the next one. 596221828Sgrehan */ 597221828Sgrehan if (bartype == PCIBAR_MEM64) { 598221828Sgrehan i++; 599221828Sgrehan assert(i <= PCI_BARMAX); 600221828Sgrehan sc->psc_bar[i].type = PCIBAR_MEMHI64; 601221828Sgrehan } 602221828Sgrehan } 603221828Sgrehan return (0); 604221828Sgrehan} 605221828Sgrehan 606221828Sgrehanstatic int 607221828Sgrehancfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) 608221828Sgrehan{ 609221828Sgrehan int error; 610221828Sgrehan struct passthru_softc *sc; 611221828Sgrehan 612221828Sgrehan error = 1; 613221828Sgrehan sc = pi->pi_arg; 614221828Sgrehan 615221828Sgrehan bzero(&sc->psc_sel, sizeof(struct pcisel)); 616221828Sgrehan sc->psc_sel.pc_bus = bus; 617221828Sgrehan sc->psc_sel.pc_dev = slot; 618221828Sgrehan sc->psc_sel.pc_func = func; 619221828Sgrehan 620298295Sjhb if (cfginitmsi(sc) != 0) { 621298295Sjhb warnx("failed to initialize MSI for PCI %d/%d/%d", 622298295Sjhb bus, slot, func); 623234761Sgrehan goto done; 624298295Sjhb } 625234761Sgrehan 626298295Sjhb if (cfginitbar(ctx, sc) != 0) { 627298295Sjhb warnx("failed to initialize BARs for PCI %d/%d/%d", 628298295Sjhb bus, slot, func); 629221828Sgrehan goto done; 630298295Sjhb } 631221828Sgrehan 632221828Sgrehan error = 0; /* success */ 633221828Sgrehandone: 634221828Sgrehan return (error); 635221828Sgrehan} 636221828Sgrehan 637221828Sgrehanstatic int 638221828Sgrehanpassthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 639221828Sgrehan{ 640284539Sneel int bus, slot, func, error, memflags; 641221828Sgrehan struct passthru_softc *sc; 642221828Sgrehan 643221828Sgrehan sc = NULL; 644221828Sgrehan error = 1; 645221828Sgrehan 646284539Sneel memflags = vm_get_memflags(ctx); 647284539Sneel if (!(memflags & VM_MEM_F_WIRED)) { 648298295Sjhb warnx("passthru requires guest memory to be wired"); 649284539Sneel goto done; 650284539Sneel } 651284539Sneel 652221828Sgrehan if (pcifd < 0) { 653221828Sgrehan pcifd = open(_PATH_DEVPCI, O_RDWR, 0); 654298295Sjhb if (pcifd < 0) { 655298295Sjhb warn("failed to open %s", _PATH_DEVPCI); 656221828Sgrehan goto done; 657298295Sjhb } 658221828Sgrehan } 659221828Sgrehan 660221828Sgrehan if (iofd < 0) { 661221828Sgrehan iofd = open(_PATH_DEVIO, O_RDWR, 0); 662298295Sjhb if (iofd < 0) { 663298295Sjhb warn("failed to open %s", _PATH_DEVIO); 664221828Sgrehan goto done; 665298295Sjhb } 666221828Sgrehan } 667221828Sgrehan 668297932Sjhb if (memfd < 0) { 669297932Sjhb memfd = open(_PATH_MEM, O_RDWR, 0); 670298295Sjhb if (memfd < 0) { 671298295Sjhb warn("failed to open %s", _PATH_MEM); 672297932Sjhb goto done; 673298295Sjhb } 674297932Sjhb } 675297932Sjhb 676241744Sgrehan if (opts == NULL || 677298295Sjhb sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) { 678298295Sjhb warnx("invalid passthru options"); 679221828Sgrehan goto done; 680298295Sjhb } 681221828Sgrehan 682298295Sjhb if (vm_assign_pptdev(ctx, bus, slot, func) != 0) { 683298295Sjhb warnx("PCI device at %d/%d/%d is not using the ppt(4) driver", 684298295Sjhb bus, slot, func); 685221828Sgrehan goto done; 686298295Sjhb } 687221828Sgrehan 688264770Sdelphij sc = calloc(1, sizeof(struct passthru_softc)); 689221828Sgrehan 690221828Sgrehan pi->pi_arg = sc; 691221828Sgrehan sc->psc_pi = pi; 692221828Sgrehan 693221828Sgrehan /* initialize config space */ 694241744Sgrehan if ((error = cfginit(ctx, pi, bus, slot, func)) != 0) 695221828Sgrehan goto done; 696221828Sgrehan 697221828Sgrehan error = 0; /* success */ 698221828Sgrehandone: 699221828Sgrehan if (error) { 700221828Sgrehan free(sc); 701221828Sgrehan vm_unassign_pptdev(ctx, bus, slot, func); 702221828Sgrehan } 703221828Sgrehan return (error); 704221828Sgrehan} 705221828Sgrehan 706221828Sgrehanstatic int 707221828Sgrehanbar_access(int coff) 708221828Sgrehan{ 709221828Sgrehan if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) 710221828Sgrehan return (1); 711221828Sgrehan else 712221828Sgrehan return (0); 713221828Sgrehan} 714221828Sgrehan 715221828Sgrehanstatic int 716221828Sgrehanmsicap_access(struct passthru_softc *sc, int coff) 717221828Sgrehan{ 718221828Sgrehan int caplen; 719221828Sgrehan 720221828Sgrehan if (sc->psc_msi.capoff == 0) 721221828Sgrehan return (0); 722221828Sgrehan 723221828Sgrehan caplen = msi_caplen(sc->psc_msi.msgctrl); 724221828Sgrehan 725221828Sgrehan if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen) 726221828Sgrehan return (1); 727221828Sgrehan else 728221828Sgrehan return (0); 729221828Sgrehan} 730221828Sgrehan 731234761Sgrehanstatic int 732234761Sgrehanmsixcap_access(struct passthru_softc *sc, int coff) 733234761Sgrehan{ 734234761Sgrehan if (sc->psc_msix.capoff == 0) 735234761Sgrehan return (0); 736234761Sgrehan 737234761Sgrehan return (coff >= sc->psc_msix.capoff && 738234761Sgrehan coff < sc->psc_msix.capoff + MSIX_CAPLEN); 739234761Sgrehan} 740234761Sgrehan 741221828Sgrehanstatic int 742241744Sgrehanpassthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 743241744Sgrehan int coff, int bytes, uint32_t *rv) 744221828Sgrehan{ 745221828Sgrehan struct passthru_softc *sc; 746221828Sgrehan 747221828Sgrehan sc = pi->pi_arg; 748221828Sgrehan 749221828Sgrehan /* 750221828Sgrehan * PCI BARs and MSI capability is emulated. 751221828Sgrehan */ 752221828Sgrehan if (bar_access(coff) || msicap_access(sc, coff)) 753221828Sgrehan return (-1); 754221828Sgrehan 755221828Sgrehan#ifdef LEGACY_SUPPORT 756221828Sgrehan /* 757221828Sgrehan * Emulate PCIR_CAP_PTR if this device does not support MSI capability 758221828Sgrehan * natively. 759221828Sgrehan */ 760221828Sgrehan if (sc->psc_msi.emulated) { 761221828Sgrehan if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4) 762221828Sgrehan return (-1); 763221828Sgrehan } 764221828Sgrehan#endif 765221828Sgrehan 766221828Sgrehan /* Everything else just read from the device's config space */ 767221828Sgrehan *rv = read_config(&sc->psc_sel, coff, bytes); 768221828Sgrehan 769221828Sgrehan return (0); 770221828Sgrehan} 771221828Sgrehan 772221828Sgrehanstatic int 773241744Sgrehanpassthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 774241744Sgrehan int coff, int bytes, uint32_t val) 775221828Sgrehan{ 776234761Sgrehan int error, msix_table_entries, i; 777221828Sgrehan struct passthru_softc *sc; 778221828Sgrehan 779221828Sgrehan sc = pi->pi_arg; 780221828Sgrehan 781221828Sgrehan /* 782221828Sgrehan * PCI BARs are emulated 783221828Sgrehan */ 784221828Sgrehan if (bar_access(coff)) 785221828Sgrehan return (-1); 786221828Sgrehan 787221828Sgrehan /* 788221828Sgrehan * MSI capability is emulated 789221828Sgrehan */ 790221828Sgrehan if (msicap_access(sc, coff)) { 791221828Sgrehan msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val); 792221828Sgrehan 793259537Sneel error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus, 794259482Sneel sc->psc_sel.pc_dev, sc->psc_sel.pc_func, 795259482Sneel pi->pi_msi.addr, pi->pi_msi.msg_data, 796259482Sneel pi->pi_msi.maxmsgnum); 797298295Sjhb if (error != 0) 798298295Sjhb err(1, "vm_setup_pptdev_msi"); 799221828Sgrehan return (0); 800221828Sgrehan } 801221828Sgrehan 802234761Sgrehan if (msixcap_access(sc, coff)) { 803234761Sgrehan msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val); 804234761Sgrehan if (pi->pi_msix.enabled) { 805234761Sgrehan msix_table_entries = pi->pi_msix.table_count; 806234761Sgrehan for (i = 0; i < msix_table_entries; i++) { 807259537Sneel error = vm_setup_pptdev_msix(ctx, vcpu, 808259482Sneel sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 809259482Sneel sc->psc_sel.pc_func, i, 810259482Sneel pi->pi_msix.table[i].addr, 811259482Sneel pi->pi_msix.table[i].msg_data, 812259482Sneel pi->pi_msix.table[i].vector_control); 813234761Sgrehan 814298295Sjhb if (error) 815298295Sjhb err(1, "vm_setup_pptdev_msix"); 816234761Sgrehan } 817234761Sgrehan } 818234761Sgrehan return (0); 819234761Sgrehan } 820234761Sgrehan 821221828Sgrehan#ifdef LEGACY_SUPPORT 822221828Sgrehan /* 823221828Sgrehan * If this device does not support MSI natively then we cannot let 824221828Sgrehan * the guest disable legacy interrupts from the device. It is the 825221828Sgrehan * legacy interrupt that is triggering the virtual MSI to the guest. 826221828Sgrehan */ 827221828Sgrehan if (sc->psc_msi.emulated && pci_msi_enabled(pi)) { 828221828Sgrehan if (coff == PCIR_COMMAND && bytes == 2) 829221828Sgrehan val &= ~PCIM_CMD_INTxDIS; 830221828Sgrehan } 831221828Sgrehan#endif 832221828Sgrehan 833221828Sgrehan write_config(&sc->psc_sel, coff, bytes, val); 834221828Sgrehan 835221828Sgrehan return (0); 836221828Sgrehan} 837221828Sgrehan 838221828Sgrehanstatic void 839241744Sgrehanpassthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 840241744Sgrehan uint64_t offset, int size, uint64_t value) 841221828Sgrehan{ 842221828Sgrehan struct passthru_softc *sc; 843221828Sgrehan struct iodev_pio_req pio; 844221828Sgrehan 845221828Sgrehan sc = pi->pi_arg; 846221828Sgrehan 847246190Sneel if (baridx == pci_msix_table_bar(pi)) { 848241744Sgrehan msix_table_write(ctx, vcpu, sc, offset, size, value); 849241744Sgrehan } else { 850241744Sgrehan assert(pi->pi_bar[baridx].type == PCIBAR_IO); 851241744Sgrehan bzero(&pio, sizeof(struct iodev_pio_req)); 852241744Sgrehan pio.access = IODEV_PIO_WRITE; 853241744Sgrehan pio.port = sc->psc_bar[baridx].addr + offset; 854241744Sgrehan pio.width = size; 855241744Sgrehan pio.val = value; 856241744Sgrehan 857241744Sgrehan (void)ioctl(iofd, IODEV_PIO, &pio); 858241744Sgrehan } 859221828Sgrehan} 860221828Sgrehan 861241744Sgrehanstatic uint64_t 862241744Sgrehanpassthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 863241744Sgrehan uint64_t offset, int size) 864221828Sgrehan{ 865221828Sgrehan struct passthru_softc *sc; 866221828Sgrehan struct iodev_pio_req pio; 867241744Sgrehan uint64_t val; 868221828Sgrehan 869221828Sgrehan sc = pi->pi_arg; 870221828Sgrehan 871246190Sneel if (baridx == pci_msix_table_bar(pi)) { 872241744Sgrehan val = msix_table_read(sc, offset, size); 873241744Sgrehan } else { 874241744Sgrehan assert(pi->pi_bar[baridx].type == PCIBAR_IO); 875241744Sgrehan bzero(&pio, sizeof(struct iodev_pio_req)); 876241744Sgrehan pio.access = IODEV_PIO_READ; 877241744Sgrehan pio.port = sc->psc_bar[baridx].addr + offset; 878241744Sgrehan pio.width = size; 879241744Sgrehan pio.val = 0; 880221828Sgrehan 881241744Sgrehan (void)ioctl(iofd, IODEV_PIO, &pio); 882221828Sgrehan 883241744Sgrehan val = pio.val; 884241744Sgrehan } 885241744Sgrehan 886241744Sgrehan return (val); 887221828Sgrehan} 888221828Sgrehan 889221828Sgrehanstruct pci_devemu passthru = { 890221828Sgrehan .pe_emu = "passthru", 891221828Sgrehan .pe_init = passthru_init, 892221828Sgrehan .pe_cfgwrite = passthru_cfgwrite, 893221828Sgrehan .pe_cfgread = passthru_cfgread, 894241744Sgrehan .pe_barwrite = passthru_write, 895241744Sgrehan .pe_barread = passthru_read, 896221828Sgrehan}; 897221828SgrehanPCI_EMUL_SET(passthru); 898