1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: releng/11.0/usr.sbin/bhyve/pci_passthru.c 302365 2016-07-06 05:05:03Z ngie $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: releng/11.0/usr.sbin/bhyve/pci_passthru.c 302365 2016-07-06 05:05:03Z ngie $");
31
32#include <sys/param.h>
33#include <sys/types.h>
34#include <sys/mman.h>
35#include <sys/pciio.h>
36#include <sys/ioctl.h>
37
38#include <dev/io/iodev.h>
39#include <dev/pci/pcireg.h>
40
41#include <machine/iodev.h>
42
43#include <stdio.h>
44#include <stdlib.h>
45#include <string.h>
46#include <err.h>
47#include <fcntl.h>
48#include <unistd.h>
49
50#include <machine/vmm.h>
51#include <vmmapi.h>
52#include "pci_emul.h"
53#include "mem.h"
54
55#ifndef _PATH_DEVPCI
56#define	_PATH_DEVPCI	"/dev/pci"
57#endif
58
59#ifndef	_PATH_DEVIO
60#define	_PATH_DEVIO	"/dev/io"
61#endif
62
63#ifndef _PATH_MEM
64#define	_PATH_MEM	"/dev/mem"
65#endif
66
67#define	LEGACY_SUPPORT	1
68
69#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
70#define MSIX_CAPLEN 12
71
72static int pcifd = -1;
73static int iofd = -1;
74static int memfd = -1;
75
76struct passthru_softc {
77	struct pci_devinst *psc_pi;
78	struct pcibar psc_bar[PCI_BARMAX + 1];
79	struct {
80		int		capoff;
81		int		msgctrl;
82		int		emulated;
83	} psc_msi;
84	struct {
85		int		capoff;
86	} psc_msix;
87	struct pcisel psc_sel;
88};
89
90static int
91msi_caplen(int msgctrl)
92{
93	int len;
94
95	len = 10;		/* minimum length of msi capability */
96
97	if (msgctrl & PCIM_MSICTRL_64BIT)
98		len += 4;
99
100#if 0
101	/*
102	 * Ignore the 'mask' and 'pending' bits in the MSI capability.
103	 * We'll let the guest manipulate them directly.
104	 */
105	if (msgctrl & PCIM_MSICTRL_VECTOR)
106		len += 10;
107#endif
108
109	return (len);
110}
111
112static uint32_t
113read_config(const struct pcisel *sel, long reg, int width)
114{
115	struct pci_io pi;
116
117	bzero(&pi, sizeof(pi));
118	pi.pi_sel = *sel;
119	pi.pi_reg = reg;
120	pi.pi_width = width;
121
122	if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
123		return (0);				/* XXX */
124	else
125		return (pi.pi_data);
126}
127
128static void
129write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
130{
131	struct pci_io pi;
132
133	bzero(&pi, sizeof(pi));
134	pi.pi_sel = *sel;
135	pi.pi_reg = reg;
136	pi.pi_width = width;
137	pi.pi_data = data;
138
139	(void)ioctl(pcifd, PCIOCWRITE, &pi);		/* XXX */
140}
141
142#ifdef LEGACY_SUPPORT
143static int
144passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
145{
146	int capoff, i;
147	struct msicap msicap;
148	u_char *capdata;
149
150	pci_populate_msicap(&msicap, msgnum, nextptr);
151
152	/*
153	 * XXX
154	 * Copy the msi capability structure in the last 16 bytes of the
155	 * config space. This is wrong because it could shadow something
156	 * useful to the device.
157	 */
158	capoff = 256 - roundup(sizeof(msicap), 4);
159	capdata = (u_char *)&msicap;
160	for (i = 0; i < sizeof(msicap); i++)
161		pci_set_cfgdata8(pi, capoff + i, capdata[i]);
162
163	return (capoff);
164}
165#endif	/* LEGACY_SUPPORT */
166
167static int
168cfginitmsi(struct passthru_softc *sc)
169{
170	int i, ptr, capptr, cap, sts, caplen, table_size;
171	uint32_t u32;
172	struct pcisel sel;
173	struct pci_devinst *pi;
174	struct msixcap msixcap;
175	uint32_t *msixcap_ptr;
176
177	pi = sc->psc_pi;
178	sel = sc->psc_sel;
179
180	/*
181	 * Parse the capabilities and cache the location of the MSI
182	 * and MSI-X capabilities.
183	 */
184	sts = read_config(&sel, PCIR_STATUS, 2);
185	if (sts & PCIM_STATUS_CAPPRESENT) {
186		ptr = read_config(&sel, PCIR_CAP_PTR, 1);
187		while (ptr != 0 && ptr != 0xff) {
188			cap = read_config(&sel, ptr + PCICAP_ID, 1);
189			if (cap == PCIY_MSI) {
190				/*
191				 * Copy the MSI capability into the config
192				 * space of the emulated pci device
193				 */
194				sc->psc_msi.capoff = ptr;
195				sc->psc_msi.msgctrl = read_config(&sel,
196								  ptr + 2, 2);
197				sc->psc_msi.emulated = 0;
198				caplen = msi_caplen(sc->psc_msi.msgctrl);
199				capptr = ptr;
200				while (caplen > 0) {
201					u32 = read_config(&sel, capptr, 4);
202					pci_set_cfgdata32(pi, capptr, u32);
203					caplen -= 4;
204					capptr += 4;
205				}
206			} else if (cap == PCIY_MSIX) {
207				/*
208				 * Copy the MSI-X capability
209				 */
210				sc->psc_msix.capoff = ptr;
211				caplen = 12;
212				msixcap_ptr = (uint32_t*) &msixcap;
213				capptr = ptr;
214				while (caplen > 0) {
215					u32 = read_config(&sel, capptr, 4);
216					*msixcap_ptr = u32;
217					pci_set_cfgdata32(pi, capptr, u32);
218					caplen -= 4;
219					capptr += 4;
220					msixcap_ptr++;
221				}
222			}
223			ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1);
224		}
225	}
226
227	if (sc->psc_msix.capoff != 0) {
228		pi->pi_msix.pba_bar =
229		    msixcap.pba_info & PCIM_MSIX_BIR_MASK;
230		pi->pi_msix.pba_offset =
231		    msixcap.pba_info & ~PCIM_MSIX_BIR_MASK;
232		pi->pi_msix.table_bar =
233		    msixcap.table_info & PCIM_MSIX_BIR_MASK;
234		pi->pi_msix.table_offset =
235		    msixcap.table_info & ~PCIM_MSIX_BIR_MASK;
236		pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl);
237		pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count);
238
239		/* Allocate the emulated MSI-X table array */
240		table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
241		pi->pi_msix.table = calloc(1, table_size);
242
243		/* Mask all table entries */
244		for (i = 0; i < pi->pi_msix.table_count; i++) {
245			pi->pi_msix.table[i].vector_control |=
246						PCIM_MSIX_VCTRL_MASK;
247		}
248	}
249
250#ifdef LEGACY_SUPPORT
251	/*
252	 * If the passthrough device does not support MSI then craft a
253	 * MSI capability for it. We link the new MSI capability at the
254	 * head of the list of capabilities.
255	 */
256	if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) {
257		int origptr, msiptr;
258		origptr = read_config(&sel, PCIR_CAP_PTR, 1);
259		msiptr = passthru_add_msicap(pi, 1, origptr);
260		sc->psc_msi.capoff = msiptr;
261		sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2);
262		sc->psc_msi.emulated = 1;
263		pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr);
264	}
265#endif
266
267	/* Make sure one of the capabilities is present */
268	if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0)
269		return (-1);
270	else
271		return (0);
272}
273
274static uint64_t
275msix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
276{
277	struct pci_devinst *pi;
278	struct msix_table_entry *entry;
279	uint8_t *src8;
280	uint16_t *src16;
281	uint32_t *src32;
282	uint64_t *src64;
283	uint64_t data;
284	size_t entry_offset;
285	int index;
286
287	pi = sc->psc_pi;
288	if (offset >= pi->pi_msix.pba_offset &&
289	    offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
290		switch(size) {
291		case 1:
292			src8 = (uint8_t *)(pi->pi_msix.pba_page + offset -
293			    pi->pi_msix.pba_page_offset);
294			data = *src8;
295			break;
296		case 2:
297			src16 = (uint16_t *)(pi->pi_msix.pba_page + offset -
298			    pi->pi_msix.pba_page_offset);
299			data = *src16;
300			break;
301		case 4:
302			src32 = (uint32_t *)(pi->pi_msix.pba_page + offset -
303			    pi->pi_msix.pba_page_offset);
304			data = *src32;
305			break;
306		case 8:
307			src64 = (uint64_t *)(pi->pi_msix.pba_page + offset -
308			    pi->pi_msix.pba_page_offset);
309			data = *src64;
310			break;
311		default:
312			return (-1);
313		}
314		return (data);
315	}
316
317	if (offset < pi->pi_msix.table_offset)
318		return (-1);
319
320	offset -= pi->pi_msix.table_offset;
321	index = offset / MSIX_TABLE_ENTRY_SIZE;
322	if (index >= pi->pi_msix.table_count)
323		return (-1);
324
325	entry = &pi->pi_msix.table[index];
326	entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
327
328	switch(size) {
329	case 1:
330		src8 = (uint8_t *)((void *)entry + entry_offset);
331		data = *src8;
332		break;
333	case 2:
334		src16 = (uint16_t *)((void *)entry + entry_offset);
335		data = *src16;
336		break;
337	case 4:
338		src32 = (uint32_t *)((void *)entry + entry_offset);
339		data = *src32;
340		break;
341	case 8:
342		src64 = (uint64_t *)((void *)entry + entry_offset);
343		data = *src64;
344		break;
345	default:
346		return (-1);
347	}
348
349	return (data);
350}
351
352static void
353msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
354		 uint64_t offset, int size, uint64_t data)
355{
356	struct pci_devinst *pi;
357	struct msix_table_entry *entry;
358	uint8_t *dest8;
359	uint16_t *dest16;
360	uint32_t *dest32;
361	uint64_t *dest64;
362	size_t entry_offset;
363	uint32_t vector_control;
364	int index;
365
366	pi = sc->psc_pi;
367	if (offset >= pi->pi_msix.pba_offset &&
368	    offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
369		switch(size) {
370		case 1:
371			dest8 = (uint8_t *)(pi->pi_msix.pba_page + offset -
372			    pi->pi_msix.pba_page_offset);
373			*dest8 = data;
374			break;
375		case 2:
376			dest16 = (uint16_t *)(pi->pi_msix.pba_page + offset -
377			    pi->pi_msix.pba_page_offset);
378			*dest16 = data;
379			break;
380		case 4:
381			dest32 = (uint32_t *)(pi->pi_msix.pba_page + offset -
382			    pi->pi_msix.pba_page_offset);
383			*dest32 = data;
384			break;
385		case 8:
386			dest64 = (uint64_t *)(pi->pi_msix.pba_page + offset -
387			    pi->pi_msix.pba_page_offset);
388			*dest64 = data;
389			break;
390		default:
391			break;
392		}
393		return;
394	}
395
396	if (offset < pi->pi_msix.table_offset)
397		return;
398
399	offset -= pi->pi_msix.table_offset;
400	index = offset / MSIX_TABLE_ENTRY_SIZE;
401	if (index >= pi->pi_msix.table_count)
402		return;
403
404	entry = &pi->pi_msix.table[index];
405	entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
406
407	/* Only 4 byte naturally-aligned writes are supported */
408	assert(size == 4);
409	assert(entry_offset % 4 == 0);
410
411	vector_control = entry->vector_control;
412	dest32 = (uint32_t *)((void *)entry + entry_offset);
413	*dest32 = data;
414	/* If MSI-X hasn't been enabled, do nothing */
415	if (pi->pi_msix.enabled) {
416		/* If the entry is masked, don't set it up */
417		if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
418		    (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
419			(void)vm_setup_pptdev_msix(ctx, vcpu,
420			    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
421			    sc->psc_sel.pc_func, index, entry->addr,
422			    entry->msg_data, entry->vector_control);
423		}
424	}
425}
426
427static int
428init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
429{
430	int b, s, f;
431	int error, idx;
432	size_t len, remaining;
433	uint32_t table_size, table_offset;
434	uint32_t pba_size, pba_offset;
435	vm_paddr_t start;
436	struct pci_devinst *pi = sc->psc_pi;
437
438	assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0);
439
440	b = sc->psc_sel.pc_bus;
441	s = sc->psc_sel.pc_dev;
442	f = sc->psc_sel.pc_func;
443
444	/*
445	 * If the MSI-X table BAR maps memory intended for
446	 * other uses, it is at least assured that the table
447	 * either resides in its own page within the region,
448	 * or it resides in a page shared with only the PBA.
449	 */
450	table_offset = rounddown2(pi->pi_msix.table_offset, 4096);
451
452	table_size = pi->pi_msix.table_offset - table_offset;
453	table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
454	table_size = roundup2(table_size, 4096);
455
456	idx = pi->pi_msix.table_bar;
457	start = pi->pi_bar[idx].addr;
458	remaining = pi->pi_bar[idx].size;
459
460	if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) {
461		pba_offset = pi->pi_msix.pba_offset;
462		pba_size = pi->pi_msix.pba_size;
463		if (pba_offset >= table_offset + table_size ||
464		    table_offset >= pba_offset + pba_size) {
465			/*
466			 * If the PBA does not share a page with the MSI-x
467			 * tables, no PBA emulation is required.
468			 */
469			pi->pi_msix.pba_page = NULL;
470			pi->pi_msix.pba_page_offset = 0;
471		} else {
472			/*
473			 * The PBA overlaps with either the first or last
474			 * page of the MSI-X table region.  Map the
475			 * appropriate page.
476			 */
477			if (pba_offset <= table_offset)
478				pi->pi_msix.pba_page_offset = table_offset;
479			else
480				pi->pi_msix.pba_page_offset = table_offset +
481				    table_size - 4096;
482			pi->pi_msix.pba_page = mmap(NULL, 4096, PROT_READ |
483			    PROT_WRITE, MAP_SHARED, memfd, start +
484			    pi->pi_msix.pba_page_offset);
485			if (pi->pi_msix.pba_page == MAP_FAILED) {
486				warn(
487			    "Failed to map PBA page for MSI-X on %d/%d/%d",
488				    b, s, f);
489				return (-1);
490			}
491		}
492	}
493
494	/* Map everything before the MSI-X table */
495	if (table_offset > 0) {
496		len = table_offset;
497		error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
498		if (error)
499			return (error);
500
501		base += len;
502		start += len;
503		remaining -= len;
504	}
505
506	/* Skip the MSI-X table */
507	base += table_size;
508	start += table_size;
509	remaining -= table_size;
510
511	/* Map everything beyond the end of the MSI-X table */
512	if (remaining > 0) {
513		len = remaining;
514		error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
515		if (error)
516			return (error);
517	}
518
519	return (0);
520}
521
522static int
523cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
524{
525	int i, error;
526	struct pci_devinst *pi;
527	struct pci_bar_io bar;
528	enum pcibar_type bartype;
529	uint64_t base, size;
530
531	pi = sc->psc_pi;
532
533	/*
534	 * Initialize BAR registers
535	 */
536	for (i = 0; i <= PCI_BARMAX; i++) {
537		bzero(&bar, sizeof(bar));
538		bar.pbi_sel = sc->psc_sel;
539		bar.pbi_reg = PCIR_BAR(i);
540
541		if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0)
542			continue;
543
544		if (PCI_BAR_IO(bar.pbi_base)) {
545			bartype = PCIBAR_IO;
546			base = bar.pbi_base & PCIM_BAR_IO_BASE;
547		} else {
548			switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
549			case PCIM_BAR_MEM_64:
550				bartype = PCIBAR_MEM64;
551				break;
552			default:
553				bartype = PCIBAR_MEM32;
554				break;
555			}
556			base = bar.pbi_base & PCIM_BAR_MEM_BASE;
557		}
558		size = bar.pbi_length;
559
560		if (bartype != PCIBAR_IO) {
561			if (((base | size) & PAGE_MASK) != 0) {
562				warnx("passthru device %d/%d/%d BAR %d: "
563				    "base %#lx or size %#lx not page aligned\n",
564				    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
565				    sc->psc_sel.pc_func, i, base, size);
566				return (-1);
567			}
568		}
569
570		/* Cache information about the "real" BAR */
571		sc->psc_bar[i].type = bartype;
572		sc->psc_bar[i].size = size;
573		sc->psc_bar[i].addr = base;
574
575		/* Allocate the BAR in the guest I/O or MMIO space */
576		error = pci_emul_alloc_pbar(pi, i, base, bartype, size);
577		if (error)
578			return (-1);
579
580		/* The MSI-X table needs special handling */
581		if (i == pci_msix_table_bar(pi)) {
582			error = init_msix_table(ctx, sc, base);
583			if (error)
584				return (-1);
585		} else if (bartype != PCIBAR_IO) {
586			/* Map the physical BAR in the guest MMIO space */
587			error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
588				sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
589				pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
590			if (error)
591				return (-1);
592		}
593
594		/*
595		 * 64-bit BAR takes up two slots so skip the next one.
596		 */
597		if (bartype == PCIBAR_MEM64) {
598			i++;
599			assert(i <= PCI_BARMAX);
600			sc->psc_bar[i].type = PCIBAR_MEMHI64;
601		}
602	}
603	return (0);
604}
605
606static int
607cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
608{
609	int error;
610	struct passthru_softc *sc;
611
612	error = 1;
613	sc = pi->pi_arg;
614
615	bzero(&sc->psc_sel, sizeof(struct pcisel));
616	sc->psc_sel.pc_bus = bus;
617	sc->psc_sel.pc_dev = slot;
618	sc->psc_sel.pc_func = func;
619
620	if (cfginitmsi(sc) != 0) {
621		warnx("failed to initialize MSI for PCI %d/%d/%d",
622		    bus, slot, func);
623		goto done;
624	}
625
626	if (cfginitbar(ctx, sc) != 0) {
627		warnx("failed to initialize BARs for PCI %d/%d/%d",
628		    bus, slot, func);
629		goto done;
630	}
631
632	error = 0;				/* success */
633done:
634	return (error);
635}
636
637static int
638passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
639{
640	int bus, slot, func, error, memflags;
641	struct passthru_softc *sc;
642
643	sc = NULL;
644	error = 1;
645
646	memflags = vm_get_memflags(ctx);
647	if (!(memflags & VM_MEM_F_WIRED)) {
648		warnx("passthru requires guest memory to be wired");
649		goto done;
650	}
651
652	if (pcifd < 0) {
653		pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
654		if (pcifd < 0) {
655			warn("failed to open %s", _PATH_DEVPCI);
656			goto done;
657		}
658	}
659
660	if (iofd < 0) {
661		iofd = open(_PATH_DEVIO, O_RDWR, 0);
662		if (iofd < 0) {
663			warn("failed to open %s", _PATH_DEVIO);
664			goto done;
665		}
666	}
667
668	if (memfd < 0) {
669		memfd = open(_PATH_MEM, O_RDWR, 0);
670		if (memfd < 0) {
671			warn("failed to open %s", _PATH_MEM);
672			goto done;
673		}
674	}
675
676	if (opts == NULL ||
677	    sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) {
678		warnx("invalid passthru options");
679		goto done;
680	}
681
682	if (vm_assign_pptdev(ctx, bus, slot, func) != 0) {
683		warnx("PCI device at %d/%d/%d is not using the ppt(4) driver",
684		    bus, slot, func);
685		goto done;
686	}
687
688	sc = calloc(1, sizeof(struct passthru_softc));
689
690	pi->pi_arg = sc;
691	sc->psc_pi = pi;
692
693	/* initialize config space */
694	if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
695		goto done;
696
697	error = 0;		/* success */
698done:
699	if (error) {
700		free(sc);
701		vm_unassign_pptdev(ctx, bus, slot, func);
702	}
703	return (error);
704}
705
706static int
707bar_access(int coff)
708{
709	if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
710		return (1);
711	else
712		return (0);
713}
714
715static int
716msicap_access(struct passthru_softc *sc, int coff)
717{
718	int caplen;
719
720	if (sc->psc_msi.capoff == 0)
721		return (0);
722
723	caplen = msi_caplen(sc->psc_msi.msgctrl);
724
725	if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen)
726		return (1);
727	else
728		return (0);
729}
730
731static int
732msixcap_access(struct passthru_softc *sc, int coff)
733{
734	if (sc->psc_msix.capoff == 0)
735		return (0);
736
737	return (coff >= sc->psc_msix.capoff &&
738	        coff < sc->psc_msix.capoff + MSIX_CAPLEN);
739}
740
741static int
742passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
743		 int coff, int bytes, uint32_t *rv)
744{
745	struct passthru_softc *sc;
746
747	sc = pi->pi_arg;
748
749	/*
750	 * PCI BARs and MSI capability is emulated.
751	 */
752	if (bar_access(coff) || msicap_access(sc, coff))
753		return (-1);
754
755#ifdef LEGACY_SUPPORT
756	/*
757	 * Emulate PCIR_CAP_PTR if this device does not support MSI capability
758	 * natively.
759	 */
760	if (sc->psc_msi.emulated) {
761		if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
762			return (-1);
763	}
764#endif
765
766	/* Everything else just read from the device's config space */
767	*rv = read_config(&sc->psc_sel, coff, bytes);
768
769	return (0);
770}
771
772static int
773passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
774		  int coff, int bytes, uint32_t val)
775{
776	int error, msix_table_entries, i;
777	struct passthru_softc *sc;
778
779	sc = pi->pi_arg;
780
781	/*
782	 * PCI BARs are emulated
783	 */
784	if (bar_access(coff))
785		return (-1);
786
787	/*
788	 * MSI capability is emulated
789	 */
790	if (msicap_access(sc, coff)) {
791		msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
792
793		error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus,
794			sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
795			pi->pi_msi.addr, pi->pi_msi.msg_data,
796			pi->pi_msi.maxmsgnum);
797		if (error != 0)
798			err(1, "vm_setup_pptdev_msi");
799		return (0);
800	}
801
802	if (msixcap_access(sc, coff)) {
803		msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val);
804		if (pi->pi_msix.enabled) {
805			msix_table_entries = pi->pi_msix.table_count;
806			for (i = 0; i < msix_table_entries; i++) {
807				error = vm_setup_pptdev_msix(ctx, vcpu,
808				    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
809				    sc->psc_sel.pc_func, i,
810				    pi->pi_msix.table[i].addr,
811				    pi->pi_msix.table[i].msg_data,
812				    pi->pi_msix.table[i].vector_control);
813
814				if (error)
815					err(1, "vm_setup_pptdev_msix");
816			}
817		}
818		return (0);
819	}
820
821#ifdef LEGACY_SUPPORT
822	/*
823	 * If this device does not support MSI natively then we cannot let
824	 * the guest disable legacy interrupts from the device. It is the
825	 * legacy interrupt that is triggering the virtual MSI to the guest.
826	 */
827	if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
828		if (coff == PCIR_COMMAND && bytes == 2)
829			val &= ~PCIM_CMD_INTxDIS;
830	}
831#endif
832
833	write_config(&sc->psc_sel, coff, bytes, val);
834
835	return (0);
836}
837
838static void
839passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
840	       uint64_t offset, int size, uint64_t value)
841{
842	struct passthru_softc *sc;
843	struct iodev_pio_req pio;
844
845	sc = pi->pi_arg;
846
847	if (baridx == pci_msix_table_bar(pi)) {
848		msix_table_write(ctx, vcpu, sc, offset, size, value);
849	} else {
850		assert(pi->pi_bar[baridx].type == PCIBAR_IO);
851		bzero(&pio, sizeof(struct iodev_pio_req));
852		pio.access = IODEV_PIO_WRITE;
853		pio.port = sc->psc_bar[baridx].addr + offset;
854		pio.width = size;
855		pio.val = value;
856
857		(void)ioctl(iofd, IODEV_PIO, &pio);
858	}
859}
860
861static uint64_t
862passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
863	      uint64_t offset, int size)
864{
865	struct passthru_softc *sc;
866	struct iodev_pio_req pio;
867	uint64_t val;
868
869	sc = pi->pi_arg;
870
871	if (baridx == pci_msix_table_bar(pi)) {
872		val = msix_table_read(sc, offset, size);
873	} else {
874		assert(pi->pi_bar[baridx].type == PCIBAR_IO);
875		bzero(&pio, sizeof(struct iodev_pio_req));
876		pio.access = IODEV_PIO_READ;
877		pio.port = sc->psc_bar[baridx].addr + offset;
878		pio.width = size;
879		pio.val = 0;
880
881		(void)ioctl(iofd, IODEV_PIO, &pio);
882
883		val = pio.val;
884	}
885
886	return (val);
887}
888
889struct pci_devemu passthru = {
890	.pe_emu		= "passthru",
891	.pe_init	= passthru_init,
892	.pe_cfgwrite	= passthru_cfgwrite,
893	.pe_cfgread	= passthru_cfgread,
894	.pe_barwrite 	= passthru_write,
895	.pe_barread    	= passthru_read,
896};
897PCI_EMUL_SET(passthru);
898