pci_passthru.c revision 295124
1238384Sjkim/*-
2238384Sjkim * Copyright (c) 2011 NetApp, Inc.
3238384Sjkim * All rights reserved.
4238384Sjkim *
5238384Sjkim * Redistribution and use in source and binary forms, with or without
6238384Sjkim * modification, are permitted provided that the following conditions
7238384Sjkim * are met:
8238384Sjkim * 1. Redistributions of source code must retain the above copyright
9238384Sjkim *    notice, this list of conditions and the following disclaimer.
10238384Sjkim * 2. Redistributions in binary form must reproduce the above copyright
11238384Sjkim *    notice, this list of conditions and the following disclaimer in the
12238384Sjkim *    documentation and/or other materials provided with the distribution.
13238384Sjkim *
14238384Sjkim * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15238384Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16238384Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17238384Sjkim * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18238384Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19238384Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20238384Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21238384Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22238384Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23238384Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24238384Sjkim * SUCH DAMAGE.
25238384Sjkim *
26238384Sjkim * $FreeBSD: stable/10/usr.sbin/bhyve/pci_passthru.c 295124 2016-02-01 14:56:11Z grehan $
27238384Sjkim */
28238384Sjkim
29238384Sjkim#include <sys/cdefs.h>
30238384Sjkim__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_passthru.c 295124 2016-02-01 14:56:11Z grehan $");
31238384Sjkim
32238384Sjkim#include <sys/param.h>
33238384Sjkim#include <sys/types.h>
34238384Sjkim#include <sys/pciio.h>
35238384Sjkim#include <sys/ioctl.h>
36238384Sjkim
37238384Sjkim#include <dev/io/iodev.h>
38238384Sjkim#include <dev/pci/pcireg.h>
39238384Sjkim
40238384Sjkim#include <machine/iodev.h>
41238384Sjkim
42238384Sjkim#include <stdio.h>
43238384Sjkim#include <stdlib.h>
44238384Sjkim#include <string.h>
45238384Sjkim#include <errno.h>
46238384Sjkim#include <fcntl.h>
47238384Sjkim#include <unistd.h>
48238384Sjkim
49238384Sjkim#include <machine/vmm.h>
50238384Sjkim#include <vmmapi.h>
51238384Sjkim#include "pci_emul.h"
52238384Sjkim#include "mem.h"
53238384Sjkim
54238384Sjkim#ifndef _PATH_DEVPCI
55238384Sjkim#define	_PATH_DEVPCI	"/dev/pci"
56238384Sjkim#endif
57238384Sjkim
58238384Sjkim#ifndef	_PATH_DEVIO
59238384Sjkim#define	_PATH_DEVIO	"/dev/io"
60238384Sjkim#endif
61238384Sjkim
62238384Sjkim#define	LEGACY_SUPPORT	1
63238384Sjkim
64238384Sjkim#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
65238384Sjkim#define MSIX_CAPLEN 12
66238384Sjkim
67238384Sjkimstatic int pcifd = -1;
68238384Sjkimstatic int iofd = -1;
69238384Sjkim
70238384Sjkimstruct passthru_softc {
71238384Sjkim	struct pci_devinst *psc_pi;
72238384Sjkim	struct pcibar psc_bar[PCI_BARMAX + 1];
73238384Sjkim	struct {
74238384Sjkim		int		capoff;
75238384Sjkim		int		msgctrl;
76238384Sjkim		int		emulated;
77238384Sjkim	} psc_msi;
78238384Sjkim	struct {
79238384Sjkim		int		capoff;
80238384Sjkim	} psc_msix;
81238384Sjkim	struct pcisel psc_sel;
82238384Sjkim};
83238384Sjkim
84238384Sjkimstatic int
85238384Sjkimmsi_caplen(int msgctrl)
86238384Sjkim{
87238384Sjkim	int len;
88238384Sjkim
89238384Sjkim	len = 10;		/* minimum length of msi capability */
90238384Sjkim
91238384Sjkim	if (msgctrl & PCIM_MSICTRL_64BIT)
92238384Sjkim		len += 4;
93238384Sjkim
94238384Sjkim#if 0
95238384Sjkim	/*
96238384Sjkim	 * Ignore the 'mask' and 'pending' bits in the MSI capability.
97238384Sjkim	 * We'll let the guest manipulate them directly.
98238384Sjkim	 */
99238384Sjkim	if (msgctrl & PCIM_MSICTRL_VECTOR)
100238384Sjkim		len += 10;
101238384Sjkim#endif
102238384Sjkim
103238384Sjkim	return (len);
104238384Sjkim}
105238384Sjkim
106238384Sjkimstatic uint32_t
107238384Sjkimread_config(const struct pcisel *sel, long reg, int width)
108238384Sjkim{
109238384Sjkim	struct pci_io pi;
110238384Sjkim
111238384Sjkim	bzero(&pi, sizeof(pi));
112238384Sjkim	pi.pi_sel = *sel;
113238384Sjkim	pi.pi_reg = reg;
114238384Sjkim	pi.pi_width = width;
115238384Sjkim
116238384Sjkim	if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
117238384Sjkim		return (0);				/* XXX */
118238384Sjkim	else
119238384Sjkim		return (pi.pi_data);
120238384Sjkim}
121238384Sjkim
122238384Sjkimstatic void
123238384Sjkimwrite_config(const struct pcisel *sel, long reg, int width, uint32_t data)
124238384Sjkim{
125238384Sjkim	struct pci_io pi;
126238384Sjkim
127238384Sjkim	bzero(&pi, sizeof(pi));
128238384Sjkim	pi.pi_sel = *sel;
129238384Sjkim	pi.pi_reg = reg;
130238384Sjkim	pi.pi_width = width;
131238384Sjkim	pi.pi_data = data;
132238384Sjkim
133238384Sjkim	(void)ioctl(pcifd, PCIOCWRITE, &pi);		/* XXX */
134238384Sjkim}
135238384Sjkim
136238384Sjkim#ifdef LEGACY_SUPPORT
137238384Sjkimstatic int
138238384Sjkimpassthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
139238384Sjkim{
140238384Sjkim	int capoff, i;
141238384Sjkim	struct msicap msicap;
142238384Sjkim	u_char *capdata;
143238384Sjkim
144238384Sjkim	pci_populate_msicap(&msicap, msgnum, nextptr);
145238384Sjkim
146238384Sjkim	/*
147238384Sjkim	 * XXX
148238384Sjkim	 * Copy the msi capability structure in the last 16 bytes of the
149238384Sjkim	 * config space. This is wrong because it could shadow something
150238384Sjkim	 * useful to the device.
151238384Sjkim	 */
152238384Sjkim	capoff = 256 - roundup(sizeof(msicap), 4);
153238384Sjkim	capdata = (u_char *)&msicap;
154238384Sjkim	for (i = 0; i < sizeof(msicap); i++)
155238384Sjkim		pci_set_cfgdata8(pi, capoff + i, capdata[i]);
156238384Sjkim
157238384Sjkim	return (capoff);
158238384Sjkim}
159238384Sjkim#endif	/* LEGACY_SUPPORT */
160238384Sjkim
161238384Sjkimstatic int
162238384Sjkimcfginitmsi(struct passthru_softc *sc)
163238384Sjkim{
164238384Sjkim	int i, ptr, capptr, cap, sts, caplen, table_size;
165238384Sjkim	uint32_t u32;
166238384Sjkim	struct pcisel sel;
167238384Sjkim	struct pci_devinst *pi;
168238384Sjkim	struct msixcap msixcap;
169238384Sjkim	uint32_t *msixcap_ptr;
170238384Sjkim
171238384Sjkim	pi = sc->psc_pi;
172238384Sjkim	sel = sc->psc_sel;
173238384Sjkim
174238384Sjkim	/*
175238384Sjkim	 * Parse the capabilities and cache the location of the MSI
176238384Sjkim	 * and MSI-X capabilities.
177238384Sjkim	 */
178238384Sjkim	sts = read_config(&sel, PCIR_STATUS, 2);
179238384Sjkim	if (sts & PCIM_STATUS_CAPPRESENT) {
180238384Sjkim		ptr = read_config(&sel, PCIR_CAP_PTR, 1);
181238384Sjkim		while (ptr != 0 && ptr != 0xff) {
182238384Sjkim			cap = read_config(&sel, ptr + PCICAP_ID, 1);
183238384Sjkim			if (cap == PCIY_MSI) {
184238384Sjkim				/*
185238384Sjkim				 * Copy the MSI capability into the config
186238384Sjkim				 * space of the emulated pci device
187238384Sjkim				 */
188238384Sjkim				sc->psc_msi.capoff = ptr;
189238384Sjkim				sc->psc_msi.msgctrl = read_config(&sel,
190238384Sjkim								  ptr + 2, 2);
191238384Sjkim				sc->psc_msi.emulated = 0;
192238384Sjkim				caplen = msi_caplen(sc->psc_msi.msgctrl);
193238384Sjkim				capptr = ptr;
194238384Sjkim				while (caplen > 0) {
195238384Sjkim					u32 = read_config(&sel, capptr, 4);
196238384Sjkim					pci_set_cfgdata32(pi, capptr, u32);
197238384Sjkim					caplen -= 4;
198238384Sjkim					capptr += 4;
199238384Sjkim				}
200238384Sjkim			} else if (cap == PCIY_MSIX) {
201238384Sjkim				/*
202238384Sjkim				 * Copy the MSI-X capability
203238384Sjkim				 */
204238384Sjkim				sc->psc_msix.capoff = ptr;
205238384Sjkim				caplen = 12;
206238384Sjkim				msixcap_ptr = (uint32_t*) &msixcap;
207238384Sjkim				capptr = ptr;
208238384Sjkim				while (caplen > 0) {
209238384Sjkim					u32 = read_config(&sel, capptr, 4);
210238384Sjkim					*msixcap_ptr = u32;
211238384Sjkim					pci_set_cfgdata32(pi, capptr, u32);
212238384Sjkim					caplen -= 4;
213238384Sjkim					capptr += 4;
214238384Sjkim					msixcap_ptr++;
215238384Sjkim				}
216238384Sjkim			}
217238384Sjkim			ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1);
218238384Sjkim		}
219238384Sjkim	}
220238384Sjkim
221238384Sjkim	if (sc->psc_msix.capoff != 0) {
222238384Sjkim		pi->pi_msix.pba_bar =
223238384Sjkim		    msixcap.pba_info & PCIM_MSIX_BIR_MASK;
224238384Sjkim		pi->pi_msix.pba_offset =
225238384Sjkim		    msixcap.pba_info & ~PCIM_MSIX_BIR_MASK;
226238384Sjkim		pi->pi_msix.table_bar =
227238384Sjkim		    msixcap.table_info & PCIM_MSIX_BIR_MASK;
228238384Sjkim		pi->pi_msix.table_offset =
229238384Sjkim		    msixcap.table_info & ~PCIM_MSIX_BIR_MASK;
230238384Sjkim		pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl);
231238384Sjkim		pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count);
232238384Sjkim
233238384Sjkim		/* Allocate the emulated MSI-X table array */
234238384Sjkim		table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
235238384Sjkim		pi->pi_msix.table = calloc(1, table_size);
236238384Sjkim
237238384Sjkim		/* Mask all table entries */
238238384Sjkim		for (i = 0; i < pi->pi_msix.table_count; i++) {
239238384Sjkim			pi->pi_msix.table[i].vector_control |=
240238384Sjkim						PCIM_MSIX_VCTRL_MASK;
241238384Sjkim		}
242238384Sjkim	}
243238384Sjkim
244238384Sjkim#ifdef LEGACY_SUPPORT
245238384Sjkim	/*
246238384Sjkim	 * If the passthrough device does not support MSI then craft a
247238384Sjkim	 * MSI capability for it. We link the new MSI capability at the
248238384Sjkim	 * head of the list of capabilities.
249238384Sjkim	 */
250238384Sjkim	if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) {
251238384Sjkim		int origptr, msiptr;
252238384Sjkim		origptr = read_config(&sel, PCIR_CAP_PTR, 1);
253238384Sjkim		msiptr = passthru_add_msicap(pi, 1, origptr);
254238384Sjkim		sc->psc_msi.capoff = msiptr;
255238384Sjkim		sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2);
256238384Sjkim		sc->psc_msi.emulated = 1;
257238384Sjkim		pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr);
258238384Sjkim	}
259238384Sjkim#endif
260238384Sjkim
261238384Sjkim	/* Make sure one of the capabilities is present */
262238384Sjkim	if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0)
263238384Sjkim		return (-1);
264238384Sjkim	else
265238384Sjkim		return (0);
266238384Sjkim}
267238384Sjkim
268238384Sjkimstatic uint64_t
269238384Sjkimmsix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
270238384Sjkim{
271238384Sjkim	struct pci_devinst *pi;
272238384Sjkim	struct msix_table_entry *entry;
273238384Sjkim	uint8_t *src8;
274238384Sjkim	uint16_t *src16;
275238384Sjkim	uint32_t *src32;
276238384Sjkim	uint64_t *src64;
277238384Sjkim	uint64_t data;
278238384Sjkim	size_t entry_offset;
279238384Sjkim	int index;
280238384Sjkim
281238384Sjkim	pi = sc->psc_pi;
282238384Sjkim	if (offset < pi->pi_msix.table_offset)
283238384Sjkim		return (-1);
284238384Sjkim
285238384Sjkim	offset -= pi->pi_msix.table_offset;
286238384Sjkim	index = offset / MSIX_TABLE_ENTRY_SIZE;
287238384Sjkim	if (index >= pi->pi_msix.table_count)
288238384Sjkim		return (-1);
289238384Sjkim
290238384Sjkim	entry = &pi->pi_msix.table[index];
291238384Sjkim	entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
292238384Sjkim
293238384Sjkim	switch(size) {
294238384Sjkim	case 1:
295238384Sjkim		src8 = (uint8_t *)((void *)entry + entry_offset);
296238384Sjkim		data = *src8;
297238384Sjkim		break;
298238384Sjkim	case 2:
299238384Sjkim		src16 = (uint16_t *)((void *)entry + entry_offset);
300238384Sjkim		data = *src16;
301238384Sjkim		break;
302238384Sjkim	case 4:
303238384Sjkim		src32 = (uint32_t *)((void *)entry + entry_offset);
304238384Sjkim		data = *src32;
305238384Sjkim		break;
306238384Sjkim	case 8:
307238384Sjkim		src64 = (uint64_t *)((void *)entry + entry_offset);
308238384Sjkim		data = *src64;
309238384Sjkim		break;
310238384Sjkim	default:
311238384Sjkim		return (-1);
312238384Sjkim	}
313238384Sjkim
314238384Sjkim	return (data);
315238384Sjkim}
316238384Sjkim
317238384Sjkimstatic void
318238384Sjkimmsix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
319238384Sjkim		 uint64_t offset, int size, uint64_t data)
320238384Sjkim{
321238384Sjkim	struct pci_devinst *pi;
322238384Sjkim	struct msix_table_entry *entry;
323238384Sjkim	uint32_t *dest;
324238384Sjkim	size_t entry_offset;
325238384Sjkim	uint32_t vector_control;
326238384Sjkim	int error, index;
327238384Sjkim
328238384Sjkim	pi = sc->psc_pi;
329238384Sjkim	if (offset < pi->pi_msix.table_offset)
330238384Sjkim		return;
331238384Sjkim
332238384Sjkim	offset -= pi->pi_msix.table_offset;
333238384Sjkim	index = offset / MSIX_TABLE_ENTRY_SIZE;
334238384Sjkim	if (index >= pi->pi_msix.table_count)
335238384Sjkim		return;
336238384Sjkim
337238384Sjkim	entry = &pi->pi_msix.table[index];
338238384Sjkim	entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
339238384Sjkim
340238384Sjkim	/* Only 4 byte naturally-aligned writes are supported */
341238384Sjkim	assert(size == 4);
342238384Sjkim	assert(entry_offset % 4 == 0);
343238384Sjkim
344238384Sjkim	vector_control = entry->vector_control;
345238384Sjkim	dest = (uint32_t *)((void *)entry + entry_offset);
346238384Sjkim	*dest = data;
347238384Sjkim	/* If MSI-X hasn't been enabled, do nothing */
348238384Sjkim	if (pi->pi_msix.enabled) {
349238384Sjkim		/* If the entry is masked, don't set it up */
350238384Sjkim		if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
351238384Sjkim		    (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
352238384Sjkim			error = vm_setup_pptdev_msix(ctx, vcpu,
353238384Sjkim			    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
354238384Sjkim			    sc->psc_sel.pc_func, index, entry->addr,
355238384Sjkim			    entry->msg_data, entry->vector_control);
356238384Sjkim		}
357238384Sjkim	}
358238384Sjkim}
359238384Sjkim
360238384Sjkimstatic int
361238384Sjkiminit_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
362238384Sjkim{
363238384Sjkim	int b, s, f;
364238384Sjkim	int error, idx;
365238384Sjkim	size_t len, remaining;
366238384Sjkim	uint32_t table_size, table_offset;
367238384Sjkim	uint32_t pba_size, pba_offset;
368238384Sjkim	vm_paddr_t start;
369238384Sjkim	struct pci_devinst *pi = sc->psc_pi;
370238384Sjkim
371238384Sjkim	assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0);
372238384Sjkim
373238384Sjkim	b = sc->psc_sel.pc_bus;
374238384Sjkim	s = sc->psc_sel.pc_dev;
375238384Sjkim	f = sc->psc_sel.pc_func;
376238384Sjkim
377238384Sjkim	/*
378238384Sjkim	 * If the MSI-X table BAR maps memory intended for
379238384Sjkim	 * other uses, it is at least assured that the table
380238384Sjkim	 * either resides in its own page within the region,
381238384Sjkim	 * or it resides in a page shared with only the PBA.
382238384Sjkim	 */
383238384Sjkim	table_offset = rounddown2(pi->pi_msix.table_offset, 4096);
384238384Sjkim
385238384Sjkim	table_size = pi->pi_msix.table_offset - table_offset;
386238384Sjkim	table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
387238384Sjkim	table_size = roundup2(table_size, 4096);
388238384Sjkim
389238384Sjkim	if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) {
390238384Sjkim		pba_offset = pi->pi_msix.pba_offset;
391238384Sjkim		pba_size = pi->pi_msix.pba_size;
392238384Sjkim		if (pba_offset >= table_offset + table_size ||
393238384Sjkim		    table_offset >= pba_offset + pba_size) {
394238384Sjkim			/*
395238384Sjkim			 * The PBA can reside in the same BAR as the MSI-x
396238384Sjkim			 * tables as long as it does not overlap with any
397238384Sjkim			 * naturally aligned page occupied by the tables.
398238384Sjkim			 */
399238384Sjkim		} else {
400238384Sjkim			/* Need to also emulate the PBA, not supported yet */
401238384Sjkim			printf("Unsupported MSI-X configuration: %d/%d/%d\n",
402238384Sjkim		            b, s, f);
403238384Sjkim			return (-1);
404238384Sjkim		}
405238384Sjkim	}
406238384Sjkim
407238384Sjkim	idx = pi->pi_msix.table_bar;
408238384Sjkim	start = pi->pi_bar[idx].addr;
409238384Sjkim	remaining = pi->pi_bar[idx].size;
410238384Sjkim
411238384Sjkim	/* Map everything before the MSI-X table */
412238384Sjkim	if (table_offset > 0) {
413238384Sjkim		len = table_offset;
414238384Sjkim		error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
415238384Sjkim		if (error)
416238384Sjkim			return (error);
417238384Sjkim
418238384Sjkim		base += len;
419238384Sjkim		start += len;
420238384Sjkim		remaining -= len;
421238384Sjkim	}
422238384Sjkim
423238384Sjkim	/* Skip the MSI-X table */
424238384Sjkim	base += table_size;
425238384Sjkim	start += table_size;
426238384Sjkim	remaining -= table_size;
427238384Sjkim
428238384Sjkim	/* Map everything beyond the end of the MSI-X table */
429238384Sjkim	if (remaining > 0) {
430238384Sjkim		len = remaining;
431238384Sjkim		error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
432238384Sjkim		if (error)
433238384Sjkim			return (error);
434238384Sjkim	}
435238384Sjkim
436238384Sjkim	return (0);
437238384Sjkim}
438238384Sjkim
439238384Sjkimstatic int
440238384Sjkimcfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
441238384Sjkim{
442238384Sjkim	int i, error;
443238384Sjkim	struct pci_devinst *pi;
444238384Sjkim	struct pci_bar_io bar;
445238384Sjkim	enum pcibar_type bartype;
446238384Sjkim	uint64_t base, size;
447238384Sjkim
448238384Sjkim	pi = sc->psc_pi;
449238384Sjkim
450238384Sjkim	/*
451238384Sjkim	 * Initialize BAR registers
452238384Sjkim	 */
453238384Sjkim	for (i = 0; i <= PCI_BARMAX; i++) {
454238384Sjkim		bzero(&bar, sizeof(bar));
455238384Sjkim		bar.pbi_sel = sc->psc_sel;
456238384Sjkim		bar.pbi_reg = PCIR_BAR(i);
457238384Sjkim
458238384Sjkim		if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0)
459238384Sjkim			continue;
460238384Sjkim
461238384Sjkim		if (PCI_BAR_IO(bar.pbi_base)) {
462238384Sjkim			bartype = PCIBAR_IO;
463238384Sjkim			base = bar.pbi_base & PCIM_BAR_IO_BASE;
464238384Sjkim		} else {
465238384Sjkim			switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
466238384Sjkim			case PCIM_BAR_MEM_64:
467238384Sjkim				bartype = PCIBAR_MEM64;
468238384Sjkim				break;
469238384Sjkim			default:
470238384Sjkim				bartype = PCIBAR_MEM32;
471238384Sjkim				break;
472238384Sjkim			}
473238384Sjkim			base = bar.pbi_base & PCIM_BAR_MEM_BASE;
474238384Sjkim		}
475238384Sjkim		size = bar.pbi_length;
476238384Sjkim
477238384Sjkim		if (bartype != PCIBAR_IO) {
478238384Sjkim			if (((base | size) & PAGE_MASK) != 0) {
479238384Sjkim				printf("passthru device %d/%d/%d BAR %d: "
480238384Sjkim				    "base %#lx or size %#lx not page aligned\n",
481238384Sjkim				    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
482238384Sjkim				    sc->psc_sel.pc_func, i, base, size);
483238384Sjkim				return (-1);
484238384Sjkim			}
485238384Sjkim		}
486238384Sjkim
487238384Sjkim		/* Cache information about the "real" BAR */
488238384Sjkim		sc->psc_bar[i].type = bartype;
489238384Sjkim		sc->psc_bar[i].size = size;
490238384Sjkim		sc->psc_bar[i].addr = base;
491238384Sjkim
492238384Sjkim		/* Allocate the BAR in the guest I/O or MMIO space */
493238384Sjkim		error = pci_emul_alloc_pbar(pi, i, base, bartype, size);
494238384Sjkim		if (error)
495238384Sjkim			return (-1);
496238384Sjkim
497238384Sjkim		/* The MSI-X table needs special handling */
498238384Sjkim		if (i == pci_msix_table_bar(pi)) {
499238384Sjkim			error = init_msix_table(ctx, sc, base);
500238384Sjkim			if (error)
501238384Sjkim				return (-1);
502238384Sjkim		} else if (bartype != PCIBAR_IO) {
503238384Sjkim			/* Map the physical BAR in the guest MMIO space */
504238384Sjkim			error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
505238384Sjkim				sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
506238384Sjkim				pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
507238384Sjkim			if (error)
508238384Sjkim				return (-1);
509238384Sjkim		}
510238384Sjkim
511238384Sjkim		/*
512238384Sjkim		 * 64-bit BAR takes up two slots so skip the next one.
513238384Sjkim		 */
514238384Sjkim		if (bartype == PCIBAR_MEM64) {
515238384Sjkim			i++;
516238384Sjkim			assert(i <= PCI_BARMAX);
517238384Sjkim			sc->psc_bar[i].type = PCIBAR_MEMHI64;
518238384Sjkim		}
519238384Sjkim	}
520238384Sjkim	return (0);
521238384Sjkim}
522238384Sjkim
523238384Sjkimstatic int
524238384Sjkimcfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
525238384Sjkim{
526238384Sjkim	int error;
527238384Sjkim	struct passthru_softc *sc;
528238384Sjkim
529238384Sjkim	error = 1;
530238384Sjkim	sc = pi->pi_arg;
531238384Sjkim
532238384Sjkim	bzero(&sc->psc_sel, sizeof(struct pcisel));
533238384Sjkim	sc->psc_sel.pc_bus = bus;
534238384Sjkim	sc->psc_sel.pc_dev = slot;
535238384Sjkim	sc->psc_sel.pc_func = func;
536238384Sjkim
537238384Sjkim	if (cfginitmsi(sc) != 0)
538238384Sjkim		goto done;
539238384Sjkim
540238384Sjkim	if (cfginitbar(ctx, sc) != 0)
541238384Sjkim		goto done;
542238384Sjkim
543238384Sjkim	error = 0;				/* success */
544238384Sjkimdone:
545238384Sjkim	return (error);
546238384Sjkim}
547238384Sjkim
548238384Sjkimstatic int
549238384Sjkimpassthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
550238384Sjkim{
551238384Sjkim	int bus, slot, func, error, memflags;
552238384Sjkim	struct passthru_softc *sc;
553238384Sjkim
554238384Sjkim	sc = NULL;
555238384Sjkim	error = 1;
556238384Sjkim
557238384Sjkim	memflags = vm_get_memflags(ctx);
558238384Sjkim	if (!(memflags & VM_MEM_F_WIRED)) {
559238384Sjkim		fprintf(stderr, "passthru requires guest memory to be wired\n");
560238384Sjkim		goto done;
561238384Sjkim	}
562238384Sjkim
563238384Sjkim	if (pcifd < 0) {
564238384Sjkim		pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
565238384Sjkim		if (pcifd < 0)
566238384Sjkim			goto done;
567238384Sjkim	}
568238384Sjkim
569238384Sjkim	if (iofd < 0) {
570238384Sjkim		iofd = open(_PATH_DEVIO, O_RDWR, 0);
571238384Sjkim		if (iofd < 0)
572238384Sjkim			goto done;
573238384Sjkim	}
574238384Sjkim
575238384Sjkim	if (opts == NULL ||
576238384Sjkim	    sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3)
577238384Sjkim		goto done;
578238384Sjkim
579238384Sjkim	if (vm_assign_pptdev(ctx, bus, slot, func) != 0)
580238384Sjkim		goto done;
581238384Sjkim
582238384Sjkim	sc = calloc(1, sizeof(struct passthru_softc));
583238384Sjkim
584238384Sjkim	pi->pi_arg = sc;
585238384Sjkim	sc->psc_pi = pi;
586238384Sjkim
587238384Sjkim	/* initialize config space */
588238384Sjkim	if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
589238384Sjkim		goto done;
590238384Sjkim
591238384Sjkim	error = 0;		/* success */
592238384Sjkimdone:
593238384Sjkim	if (error) {
594		free(sc);
595		vm_unassign_pptdev(ctx, bus, slot, func);
596	}
597	return (error);
598}
599
600static int
601bar_access(int coff)
602{
603	if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
604		return (1);
605	else
606		return (0);
607}
608
609static int
610msicap_access(struct passthru_softc *sc, int coff)
611{
612	int caplen;
613
614	if (sc->psc_msi.capoff == 0)
615		return (0);
616
617	caplen = msi_caplen(sc->psc_msi.msgctrl);
618
619	if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen)
620		return (1);
621	else
622		return (0);
623}
624
625static int
626msixcap_access(struct passthru_softc *sc, int coff)
627{
628	if (sc->psc_msix.capoff == 0)
629		return (0);
630
631	return (coff >= sc->psc_msix.capoff &&
632	        coff < sc->psc_msix.capoff + MSIX_CAPLEN);
633}
634
635static int
636passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
637		 int coff, int bytes, uint32_t *rv)
638{
639	struct passthru_softc *sc;
640
641	sc = pi->pi_arg;
642
643	/*
644	 * PCI BARs and MSI capability is emulated.
645	 */
646	if (bar_access(coff) || msicap_access(sc, coff))
647		return (-1);
648
649#ifdef LEGACY_SUPPORT
650	/*
651	 * Emulate PCIR_CAP_PTR if this device does not support MSI capability
652	 * natively.
653	 */
654	if (sc->psc_msi.emulated) {
655		if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
656			return (-1);
657	}
658#endif
659
660	/* Everything else just read from the device's config space */
661	*rv = read_config(&sc->psc_sel, coff, bytes);
662
663	return (0);
664}
665
666static int
667passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
668		  int coff, int bytes, uint32_t val)
669{
670	int error, msix_table_entries, i;
671	struct passthru_softc *sc;
672
673	sc = pi->pi_arg;
674
675	/*
676	 * PCI BARs are emulated
677	 */
678	if (bar_access(coff))
679		return (-1);
680
681	/*
682	 * MSI capability is emulated
683	 */
684	if (msicap_access(sc, coff)) {
685		msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
686
687		error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus,
688			sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
689			pi->pi_msi.addr, pi->pi_msi.msg_data,
690			pi->pi_msi.maxmsgnum);
691		if (error != 0) {
692			printf("vm_setup_pptdev_msi error %d\r\n", errno);
693			exit(1);
694		}
695		return (0);
696	}
697
698	if (msixcap_access(sc, coff)) {
699		msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val);
700		if (pi->pi_msix.enabled) {
701			msix_table_entries = pi->pi_msix.table_count;
702			for (i = 0; i < msix_table_entries; i++) {
703				error = vm_setup_pptdev_msix(ctx, vcpu,
704				    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
705				    sc->psc_sel.pc_func, i,
706				    pi->pi_msix.table[i].addr,
707				    pi->pi_msix.table[i].msg_data,
708				    pi->pi_msix.table[i].vector_control);
709
710				if (error) {
711					printf("vm_setup_pptdev_msix error "
712					    "%d\r\n", errno);
713					exit(1);
714				}
715			}
716		}
717		return (0);
718	}
719
720#ifdef LEGACY_SUPPORT
721	/*
722	 * If this device does not support MSI natively then we cannot let
723	 * the guest disable legacy interrupts from the device. It is the
724	 * legacy interrupt that is triggering the virtual MSI to the guest.
725	 */
726	if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
727		if (coff == PCIR_COMMAND && bytes == 2)
728			val &= ~PCIM_CMD_INTxDIS;
729	}
730#endif
731
732	write_config(&sc->psc_sel, coff, bytes, val);
733
734	return (0);
735}
736
737static void
738passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
739	       uint64_t offset, int size, uint64_t value)
740{
741	struct passthru_softc *sc;
742	struct iodev_pio_req pio;
743
744	sc = pi->pi_arg;
745
746	if (baridx == pci_msix_table_bar(pi)) {
747		msix_table_write(ctx, vcpu, sc, offset, size, value);
748	} else {
749		assert(pi->pi_bar[baridx].type == PCIBAR_IO);
750		bzero(&pio, sizeof(struct iodev_pio_req));
751		pio.access = IODEV_PIO_WRITE;
752		pio.port = sc->psc_bar[baridx].addr + offset;
753		pio.width = size;
754		pio.val = value;
755
756		(void)ioctl(iofd, IODEV_PIO, &pio);
757	}
758}
759
760static uint64_t
761passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
762	      uint64_t offset, int size)
763{
764	struct passthru_softc *sc;
765	struct iodev_pio_req pio;
766	uint64_t val;
767
768	sc = pi->pi_arg;
769
770	if (baridx == pci_msix_table_bar(pi)) {
771		val = msix_table_read(sc, offset, size);
772	} else {
773		assert(pi->pi_bar[baridx].type == PCIBAR_IO);
774		bzero(&pio, sizeof(struct iodev_pio_req));
775		pio.access = IODEV_PIO_READ;
776		pio.port = sc->psc_bar[baridx].addr + offset;
777		pio.width = size;
778		pio.val = 0;
779
780		(void)ioctl(iofd, IODEV_PIO, &pio);
781
782		val = pio.val;
783	}
784
785	return (val);
786}
787
788struct pci_devemu passthru = {
789	.pe_emu		= "passthru",
790	.pe_init	= passthru_init,
791	.pe_cfgwrite	= passthru_cfgwrite,
792	.pe_cfgread	= passthru_cfgread,
793	.pe_barwrite 	= passthru_write,
794	.pe_barread    	= passthru_read,
795};
796PCI_EMUL_SET(passthru);
797