1/*	$OpenBSD: virtio_pci.c,v 1.38 2024/06/26 01:40:49 jsg Exp $	*/
2/*	$NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $	*/
3
4/*
5 * Copyright (c) 2012 Stefan Fritsch.
6 * Copyright (c) 2010 Minoura Makoto.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/device.h>
33#include <sys/mutex.h>
34
35#include <dev/pci/pcidevs.h>
36#include <dev/pci/pcireg.h>
37#include <dev/pci/pcivar.h>
38#include <dev/pci/virtio_pcireg.h>
39
40#include <dev/pv/virtioreg.h>
41#include <dev/pv/virtiovar.h>
42
43#define DNPRINTF(n,x...)				\
44    do { if (VIRTIO_DEBUG >= n) printf(x); } while(0)
45
46
47/*
48 * XXX: Before being used on big endian arches, the access to config registers
49 * XXX: needs to be reviewed/fixed. The non-device specific registers are
50 * XXX: PCI-endian while the device specific registers are native endian.
51 */
52
53#define MAX_MSIX_VECS	8
54
55struct virtio_pci_softc;
56
57int		virtio_pci_match(struct device *, void *, void *);
58void		virtio_pci_attach(struct device *, struct device *, void *);
59int		virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa);
60int		virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa);
61int		virtio_pci_detach(struct device *, int);
62
63void		virtio_pci_kick(struct virtio_softc *, uint16_t);
64int		virtio_pci_adjust_config_region(struct virtio_pci_softc *);
65uint8_t		virtio_pci_read_device_config_1(struct virtio_softc *, int);
66uint16_t	virtio_pci_read_device_config_2(struct virtio_softc *, int);
67uint32_t	virtio_pci_read_device_config_4(struct virtio_softc *, int);
68uint64_t	virtio_pci_read_device_config_8(struct virtio_softc *, int);
69void		virtio_pci_write_device_config_1(struct virtio_softc *, int, uint8_t);
70void		virtio_pci_write_device_config_2(struct virtio_softc *, int, uint16_t);
71void		virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t);
72void		virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t);
73uint16_t	virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
74void		virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
75int		virtio_pci_get_status(struct virtio_softc *);
76void		virtio_pci_set_status(struct virtio_softc *, int);
77int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *);
78int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
79void		virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t);
80void		virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t);
81int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct pci_attach_args *, int, int (*)(void *), void *);
82int		virtio_pci_setup_msix(struct virtio_pci_softc *, struct pci_attach_args *, int);
83void		virtio_pci_free_irqs(struct virtio_pci_softc *);
84int		virtio_pci_poll_intr(void *);
85int		virtio_pci_legacy_intr(void *);
86int		virtio_pci_legacy_intr_mpsafe(void *);
87int		virtio_pci_config_intr(void *);
88int		virtio_pci_queue_intr(void *);
89int		virtio_pci_shared_queue_intr(void *);
90int		virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int buflen);
91#if VIRTIO_DEBUG
92void virtio_pci_dump_caps(struct virtio_pci_softc *sc);
93#endif
94
95enum irq_type {
96	IRQ_NO_MSIX,
97	IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */
98	IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */
99};
100
101struct virtio_pci_softc {
102	struct virtio_softc	sc_sc;
103	pci_chipset_tag_t	sc_pc;
104	pcitag_t		sc_ptag;
105
106	bus_space_tag_t		sc_iot;
107	bus_space_handle_t	sc_ioh;
108	bus_size_t		sc_iosize;
109
110	bus_space_tag_t		sc_bars_iot[4];
111	bus_space_handle_t	sc_bars_ioh[4];
112	bus_size_t		sc_bars_iosize[4];
113
114	bus_space_tag_t		sc_notify_iot;
115	bus_space_handle_t	sc_notify_ioh;
116	bus_size_t		sc_notify_iosize;
117	unsigned int		sc_notify_off_multiplier;
118
119	bus_space_tag_t		sc_devcfg_iot;
120	bus_space_handle_t	sc_devcfg_ioh;
121	bus_size_t		sc_devcfg_iosize;
122	/*
123	 * With 0.9, the offset of the devcfg region in the io bar changes
124	 * depending on MSI-X being enabled or not.
125	 * With 1.0, this field is still used to remember if MSI-X is enabled
126	 * or not.
127	 */
128	unsigned int		sc_devcfg_offset;
129
130	bus_space_tag_t		sc_isr_iot;
131	bus_space_handle_t	sc_isr_ioh;
132	bus_size_t		sc_isr_iosize;
133
134	void			*sc_ih[MAX_MSIX_VECS];
135
136	enum irq_type		sc_irq_type;
137};
138
139const struct cfattach virtio_pci_ca = {
140	sizeof(struct virtio_pci_softc),
141	virtio_pci_match,
142	virtio_pci_attach,
143	virtio_pci_detach,
144	NULL
145};
146
147struct virtio_ops virtio_pci_ops = {
148	virtio_pci_kick,
149	virtio_pci_read_device_config_1,
150	virtio_pci_read_device_config_2,
151	virtio_pci_read_device_config_4,
152	virtio_pci_read_device_config_8,
153	virtio_pci_write_device_config_1,
154	virtio_pci_write_device_config_2,
155	virtio_pci_write_device_config_4,
156	virtio_pci_write_device_config_8,
157	virtio_pci_read_queue_size,
158	virtio_pci_setup_queue,
159	virtio_pci_get_status,
160	virtio_pci_set_status,
161	virtio_pci_negotiate_features,
162	virtio_pci_poll_intr,
163};
164
165static inline uint64_t
166_cread(struct virtio_pci_softc *sc, unsigned off, unsigned size)
167{
168	uint64_t val;
169	switch (size) {
170	case 1:
171		val = bus_space_read_1(sc->sc_iot, sc->sc_ioh, off);
172		break;
173	case 2:
174		val = bus_space_read_2(sc->sc_iot, sc->sc_ioh, off);
175		break;
176	case 4:
177		val = bus_space_read_4(sc->sc_iot, sc->sc_ioh, off);
178		break;
179	case 8:
180		val = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
181		    off + sizeof(uint32_t));
182		val <<= 32;
183		val += bus_space_read_4(sc->sc_iot, sc->sc_ioh, off);
184		break;
185	}
186	return val;
187}
188
189#define CREAD(sc, memb)  _cread(sc, offsetof(struct virtio_pci_common_cfg, memb), \
190    sizeof(((struct virtio_pci_common_cfg *)0)->memb))
191
192#define CWRITE(sc, memb, val)							\
193	do {									\
194		struct virtio_pci_common_cfg c;					\
195		size_t off = offsetof(struct virtio_pci_common_cfg, memb);	\
196		size_t size = sizeof(c.memb);					\
197										\
198		DNPRINTF(2, "%s: %d: off %#zx size %#zx write %#llx\n",		\
199		    __func__, __LINE__, off, size, (unsigned long long)val);	\
200		switch (size) {							\
201		case 1:								\
202			bus_space_write_1(sc->sc_iot, sc->sc_ioh, off, val);	\
203			break;							\
204		case 2:								\
205			bus_space_write_2(sc->sc_iot, sc->sc_ioh, off, val);	\
206			break;							\
207		case 4:								\
208			bus_space_write_4(sc->sc_iot, sc->sc_ioh, off, val);	\
209			break;							\
210		case 8:								\
211			bus_space_write_4(sc->sc_iot, sc->sc_ioh, off,		\
212			    (val) & 0xffffffff);				\
213			bus_space_write_4(sc->sc_iot, sc->sc_ioh,		\
214			    (off) + sizeof(uint32_t), (uint64_t)(val) >> 32);	\
215			break;							\
216		}								\
217	} while (0)
218
219uint16_t
220virtio_pci_read_queue_size(struct virtio_softc *vsc, uint16_t idx)
221{
222	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
223	uint16_t ret;
224	if (sc->sc_sc.sc_version_1) {
225		CWRITE(sc, queue_select, idx);
226		ret = CREAD(sc, queue_size);
227	} else {
228		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
229		    VIRTIO_CONFIG_QUEUE_SELECT, idx);
230		ret = bus_space_read_2(sc->sc_iot, sc->sc_ioh,
231		    VIRTIO_CONFIG_QUEUE_SIZE);
232	}
233	return ret;
234}
235
236void
237virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
238    uint64_t addr)
239{
240	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
241	if (sc->sc_sc.sc_version_1) {
242		CWRITE(sc, queue_select, vq->vq_index);
243		if (addr == 0) {
244			CWRITE(sc, queue_enable, 0);
245			CWRITE(sc, queue_desc, 0);
246			CWRITE(sc, queue_avail, 0);
247			CWRITE(sc, queue_used, 0);
248		} else {
249			CWRITE(sc, queue_desc, addr);
250			CWRITE(sc, queue_avail, addr + vq->vq_availoffset);
251			CWRITE(sc, queue_used, addr + vq->vq_usedoffset);
252			CWRITE(sc, queue_enable, 1);
253			vq->vq_notify_off = CREAD(sc, queue_notify_off);
254		}
255	} else {
256		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
257		    VIRTIO_CONFIG_QUEUE_SELECT, vq->vq_index);
258		bus_space_write_4(sc->sc_iot, sc->sc_ioh,
259		    VIRTIO_CONFIG_QUEUE_ADDRESS, addr / VIRTIO_PAGE_SIZE);
260	}
261
262	/*
263	 * This path is only executed if this function is called after
264	 * the child's attach function has finished. In other cases,
265	 * it's done in virtio_pci_setup_msix().
266	 */
267	if (sc->sc_irq_type != IRQ_NO_MSIX) {
268		int vec = 1;
269		if (sc->sc_irq_type == IRQ_MSIX_PER_VQ)
270		       vec += vq->vq_index;
271		if (sc->sc_sc.sc_version_1) {
272			CWRITE(sc, queue_msix_vector, vec);
273		} else {
274			bus_space_write_2(sc->sc_iot, sc->sc_ioh,
275			    VIRTIO_MSI_QUEUE_VECTOR, vec);
276		}
277	}
278}
279
280int
281virtio_pci_get_status(struct virtio_softc *vsc)
282{
283	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
284
285	if (sc->sc_sc.sc_version_1)
286		return CREAD(sc, device_status);
287	else
288		return bus_space_read_1(sc->sc_iot, sc->sc_ioh,
289		    VIRTIO_CONFIG_DEVICE_STATUS);
290}
291
292void
293virtio_pci_set_status(struct virtio_softc *vsc, int status)
294{
295	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
296	int old = 0;
297
298	if (sc->sc_sc.sc_version_1) {
299		if (status == 0) {
300			CWRITE(sc, device_status, 0);
301			while (CREAD(sc, device_status) != 0) {
302				CPU_BUSY_CYCLE();
303			}
304		} else {
305			old = CREAD(sc, device_status);
306			CWRITE(sc, device_status, status|old);
307		}
308	} else {
309		if (status == 0) {
310			bus_space_write_1(sc->sc_iot, sc->sc_ioh,
311			    VIRTIO_CONFIG_DEVICE_STATUS, status|old);
312			while (bus_space_read_1(sc->sc_iot, sc->sc_ioh,
313			    VIRTIO_CONFIG_DEVICE_STATUS) != 0) {
314				CPU_BUSY_CYCLE();
315			}
316		} else {
317			old = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
318			    VIRTIO_CONFIG_DEVICE_STATUS);
319			bus_space_write_1(sc->sc_iot, sc->sc_ioh,
320			    VIRTIO_CONFIG_DEVICE_STATUS, status|old);
321		}
322	}
323}
324
325int
326virtio_pci_match(struct device *parent, void *match, void *aux)
327{
328	struct pci_attach_args *pa;
329
330	pa = (struct pci_attach_args *)aux;
331	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_OPENBSD &&
332	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_OPENBSD_CONTROL)
333		return 1;
334	if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_QUMRANET)
335		return 0;
336	/* virtio 0.9 */
337	if (PCI_PRODUCT(pa->pa_id) >= 0x1000 &&
338	    PCI_PRODUCT(pa->pa_id) <= 0x103f &&
339	    PCI_REVISION(pa->pa_class) == 0)
340		return 1;
341	/* virtio 1.0 */
342	if (PCI_PRODUCT(pa->pa_id) >= 0x1040 &&
343	    PCI_PRODUCT(pa->pa_id) <= 0x107f &&
344	    PCI_REVISION(pa->pa_class) == 1)
345		return 1;
346	return 0;
347}
348
349#if VIRTIO_DEBUG
350void
351virtio_pci_dump_caps(struct virtio_pci_softc *sc)
352{
353	pci_chipset_tag_t pc = sc->sc_pc;
354	pcitag_t tag = sc->sc_ptag;
355	int offset;
356	union {
357		pcireg_t reg[4];
358		struct virtio_pci_cap vcap;
359	} v;
360
361	if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v.reg[0]))
362		return;
363
364	printf("\n");
365	do {
366		for (int i = 0; i < 4; i++)
367			v.reg[i] = pci_conf_read(pc, tag, offset + i * 4);
368		printf("%s: cfgoff %#x len %#x type %#x bar %#x: off %#x len %#x\n",
369			__func__, offset, v.vcap.cap_len, v.vcap.cfg_type, v.vcap.bar,
370			v.vcap.offset, v.vcap.length);
371		offset = v.vcap.cap_next;
372	} while (offset != 0);
373}
374#endif
375
376int
377virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int buflen)
378{
379	pci_chipset_tag_t pc = sc->sc_pc;
380	pcitag_t tag = sc->sc_ptag;
381	unsigned int offset, i, len;
382	union {
383		pcireg_t reg[8];
384		struct virtio_pci_cap vcap;
385	} *v = buf;
386
387	if (buflen < sizeof(struct virtio_pci_cap))
388		return ERANGE;
389
390	if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v->reg[0]))
391		return ENOENT;
392
393	do {
394		for (i = 0; i < 4; i++)
395			v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
396		if (v->vcap.cfg_type == cfg_type)
397			break;
398		offset = v->vcap.cap_next;
399	} while (offset != 0);
400
401	if (offset == 0)
402		return ENOENT;
403
404	if (v->vcap.cap_len > sizeof(struct virtio_pci_cap)) {
405		len = roundup(v->vcap.cap_len, sizeof(pcireg_t));
406		if (len > buflen) {
407			printf("%s: cap too large\n", __func__);
408			return ERANGE;
409		}
410		for (i = 4; i < len / sizeof(pcireg_t);  i++)
411			v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
412	}
413
414	return 0;
415}
416
417
418#define NMAPREG		((PCI_MAPREG_END - PCI_MAPREG_START) / \
419				sizeof(pcireg_t))
420
421int
422virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
423{
424	struct virtio_pci_cap common, isr, device;
425	struct virtio_pci_notify_cap notify;
426	int have_device_cfg = 0;
427	bus_size_t bars[NMAPREG] = { 0 };
428	int bars_idx[NMAPREG] = { 0 };
429	struct virtio_pci_cap *caps[] = { &common, &isr, &device, &notify.cap };
430	int i, j = 0, ret = 0;
431
432	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_COMMON_CFG, &common, sizeof(common)) != 0)
433		return ENODEV;
434
435	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_NOTIFY_CFG, &notify, sizeof(notify)) != 0)
436		return ENODEV;
437	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_ISR_CFG, &isr, sizeof(isr)) != 0)
438		return ENODEV;
439	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_DEVICE_CFG, &device, sizeof(device)) != 0)
440		memset(&device, 0, sizeof(device));
441	else
442		have_device_cfg = 1;
443
444	/*
445	 * XXX Maybe there are devices that offer the pci caps but not the
446	 * XXX VERSION_1 feature bit? Then we should check the feature bit
447	 * XXX here and fall back to 0.9 out if not present.
448	 */
449
450	/* Figure out which bars we need to map */
451	for (i = 0; i < nitems(caps); i++) {
452		int bar = caps[i]->bar;
453		bus_size_t len = caps[i]->offset + caps[i]->length;
454		if (caps[i]->length == 0)
455			continue;
456		if (bars[bar] < len)
457			bars[bar] = len;
458	}
459
460	for (i = 0; i < nitems(bars); i++) {
461		int reg;
462		pcireg_t type;
463		if (bars[i] == 0)
464			continue;
465		reg = PCI_MAPREG_START + i * 4;
466		type = pci_mapreg_type(sc->sc_pc, sc->sc_ptag, reg);
467		if (pci_mapreg_map(pa, reg, type, 0, &sc->sc_bars_iot[j],
468		    &sc->sc_bars_ioh[j], NULL, &sc->sc_bars_iosize[j],
469		    bars[i])) {
470			printf("%s: can't map bar %u \n",
471			    sc->sc_sc.sc_dev.dv_xname, i);
472			ret = EIO;
473			goto err;
474		}
475		bars_idx[i] = j;
476		j++;
477	}
478
479	i = bars_idx[notify.cap.bar];
480	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
481	    notify.cap.offset, notify.cap.length, &sc->sc_notify_ioh) != 0) {
482		printf("%s: can't map notify i/o space\n",
483		    sc->sc_sc.sc_dev.dv_xname);
484		ret = EIO;
485		goto err;
486	}
487	sc->sc_notify_iosize = notify.cap.length;
488	sc->sc_notify_iot = sc->sc_bars_iot[i];
489	sc->sc_notify_off_multiplier = notify.notify_off_multiplier;
490
491	if (have_device_cfg) {
492		i = bars_idx[device.bar];
493		if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
494		    device.offset, device.length, &sc->sc_devcfg_ioh) != 0) {
495			printf("%s: can't map devcfg i/o space\n",
496			    sc->sc_sc.sc_dev.dv_xname);
497			ret = EIO;
498			goto err;
499		}
500		sc->sc_devcfg_iosize = device.length;
501		sc->sc_devcfg_iot = sc->sc_bars_iot[i];
502	}
503
504	i = bars_idx[isr.bar];
505	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
506	    isr.offset, isr.length, &sc->sc_isr_ioh) != 0) {
507		printf("%s: can't map isr i/o space\n",
508		    sc->sc_sc.sc_dev.dv_xname);
509		ret = EIO;
510		goto err;
511	}
512	sc->sc_isr_iosize = isr.length;
513	sc->sc_isr_iot = sc->sc_bars_iot[i];
514
515	i = bars_idx[common.bar];
516	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
517	    common.offset, common.length, &sc->sc_ioh) != 0) {
518		printf("%s: can't map common i/o space\n",
519		    sc->sc_sc.sc_dev.dv_xname);
520		ret = EIO;
521		goto err;
522	}
523	sc->sc_iosize = common.length;
524	sc->sc_iot = sc->sc_bars_iot[i];
525
526	sc->sc_sc.sc_version_1 = 1;
527	return 0;
528
529err:
530	/* there is no pci_mapreg_unmap() */
531	return ret;
532}
533
534int
535virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
536{
537	struct virtio_softc *vsc = &sc->sc_sc;
538	pcireg_t type;
539
540	type = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START);
541	if (pci_mapreg_map(pa, PCI_MAPREG_START, type, 0,
542	    &sc->sc_iot, &sc->sc_ioh, NULL, &sc->sc_iosize, 0)) {
543		printf("%s: can't map i/o space\n", vsc->sc_dev.dv_xname);
544		return EIO;
545	}
546
547	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
548	    VIRTIO_CONFIG_QUEUE_NOTIFY, 2, &sc->sc_notify_ioh) != 0) {
549		printf("%s: can't map notify i/o space\n",
550		    vsc->sc_dev.dv_xname);
551		return EIO;
552	}
553	sc->sc_notify_iosize = 2;
554	sc->sc_notify_iot = sc->sc_iot;
555
556	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
557	    VIRTIO_CONFIG_ISR_STATUS, 1, &sc->sc_isr_ioh) != 0) {
558		printf("%s: can't map isr i/o space\n",
559		    vsc->sc_dev.dv_xname);
560		return EIO;
561	}
562	sc->sc_isr_iosize = 1;
563	sc->sc_isr_iot = sc->sc_iot;
564
565	return 0;
566}
567
568void
569virtio_pci_attach(struct device *parent, struct device *self, void *aux)
570{
571	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
572	struct virtio_softc *vsc = &sc->sc_sc;
573	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
574	pci_chipset_tag_t pc = pa->pa_pc;
575	pcitag_t tag = pa->pa_tag;
576	int revision, ret = ENODEV;
577	pcireg_t id;
578	char const *intrstr;
579	pci_intr_handle_t ih;
580
581	revision = PCI_REVISION(pa->pa_class);
582	switch (revision) {
583	case 0:
584		/* subsystem ID shows what I am */
585		id = PCI_PRODUCT(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG));
586		break;
587	case 1:
588		id = PCI_PRODUCT(pa->pa_id) - 0x1040;
589		break;
590	default:
591		printf("unknown revision 0x%02x; giving up\n", revision);
592		return;
593	}
594
595	sc->sc_pc = pc;
596	sc->sc_ptag = pa->pa_tag;
597	vsc->sc_dmat = pa->pa_dmat;
598
599#if defined(__i386__) || defined(__amd64__)
600	/*
601	 * For virtio, ignore normal MSI black/white-listing depending on the
602	 * PCI bridge but enable it unconditionally.
603	 */
604	pa->pa_flags |= PCI_FLAGS_MSI_ENABLED;
605#endif
606
607#if VIRTIO_DEBUG
608	virtio_pci_dump_caps(sc);
609#endif
610
611	vsc->sc_ops = &virtio_pci_ops;
612	if ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_VERSION_1) == 0 &&
613	    (revision == 1 ||
614	     (vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_PREFER_VERSION_1))) {
615		ret = virtio_pci_attach_10(sc, pa);
616	}
617	if (ret != 0 && revision == 0) {
618		/* revision 0 means 0.9 only or both 0.9 and 1.0 */
619		ret = virtio_pci_attach_09(sc, pa);
620	}
621	if (ret != 0) {
622		printf(": Cannot attach (%d)\n", ret);
623		return;
624	}
625
626	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
627	sc->sc_irq_type = IRQ_NO_MSIX;
628	if (virtio_pci_adjust_config_region(sc) != 0)
629		return;
630
631	virtio_device_reset(vsc);
632	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
633	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
634
635	printf("\n");
636	vsc->sc_childdevid = id;
637	vsc->sc_child = NULL;
638	config_found(self, sc, NULL);
639	if (vsc->sc_child == NULL) {
640		printf("%s: no matching child driver; not configured\n",
641		    vsc->sc_dev.dv_xname);
642		goto fail_1;
643	}
644	if (vsc->sc_child == VIRTIO_CHILD_ERROR) {
645		printf("%s: virtio configuration failed\n",
646		    vsc->sc_dev.dv_xname);
647		goto fail_1;
648	}
649
650	if (virtio_pci_setup_msix(sc, pa, 0) == 0) {
651		sc->sc_irq_type = IRQ_MSIX_PER_VQ;
652		intrstr = "msix per-VQ";
653	} else if (virtio_pci_setup_msix(sc, pa, 1) == 0) {
654		sc->sc_irq_type = IRQ_MSIX_SHARED;
655		intrstr = "msix shared";
656	} else {
657		int (*ih_func)(void *) = virtio_pci_legacy_intr;
658		if (pci_intr_map_msi(pa, &ih) != 0 && pci_intr_map(pa, &ih) != 0) {
659			printf("%s: couldn't map interrupt\n", vsc->sc_dev.dv_xname);
660			goto fail_2;
661		}
662		intrstr = pci_intr_string(pc, ih);
663		/*
664		 * We always set the IPL_MPSAFE flag in order to do the relatively
665		 * expensive ISR read without lock, and then grab the kernel lock in
666		 * the interrupt handler.
667		 */
668		if (vsc->sc_ipl & IPL_MPSAFE)
669			ih_func = virtio_pci_legacy_intr_mpsafe;
670		sc->sc_ih[0] = pci_intr_establish(pc, ih, vsc->sc_ipl | IPL_MPSAFE,
671		    ih_func, sc, vsc->sc_dev.dv_xname);
672		if (sc->sc_ih[0] == NULL) {
673			printf("%s: couldn't establish interrupt", vsc->sc_dev.dv_xname);
674			if (intrstr != NULL)
675				printf(" at %s", intrstr);
676			printf("\n");
677			goto fail_2;
678		}
679	}
680	printf("%s: %s\n", vsc->sc_dev.dv_xname, intrstr);
681
682	return;
683
684fail_2:
685	config_detach(vsc->sc_child, 0);
686fail_1:
687	/* no pci_mapreg_unmap() or pci_intr_unmap() */
688	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
689}
690
691int
692virtio_pci_detach(struct device *self, int flags)
693{
694	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
695	struct virtio_softc *vsc = &sc->sc_sc;
696	int r;
697
698	if (vsc->sc_child != 0 && vsc->sc_child != VIRTIO_CHILD_ERROR) {
699		r = config_detach(vsc->sc_child, flags);
700		if (r)
701			return r;
702	}
703	KASSERT(vsc->sc_child == 0 || vsc->sc_child == VIRTIO_CHILD_ERROR);
704	KASSERT(vsc->sc_vqs == 0);
705	virtio_pci_free_irqs(sc);
706	if (sc->sc_iosize)
707		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
708	sc->sc_iosize = 0;
709
710	return 0;
711}
712
713int
714virtio_pci_adjust_config_region(struct virtio_pci_softc *sc)
715{
716	if (sc->sc_sc.sc_version_1)
717		return 0;
718	sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset;
719	sc->sc_devcfg_iot = sc->sc_iot;
720	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset,
721	    sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) {
722		printf("%s: can't map config i/o space\n",
723		    sc->sc_sc.sc_dev.dv_xname);
724		return 1;
725	}
726	return 0;
727}
728
729/*
730 * Feature negotiation.
731 * Prints available / negotiated features if guest_feature_names != NULL and
732 * VIRTIO_DEBUG is 1
733 */
734int
735virtio_pci_negotiate_features(struct virtio_softc *vsc,
736    const struct virtio_feature_name *guest_feature_names)
737{
738	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
739	uint64_t host, negotiated;
740
741	vsc->sc_active_features = 0;
742
743	/*
744	 * We enable indirect descriptors by default. They can be switched
745	 * off by setting bit 1 in the driver flags, see config(8)
746	 */
747	if (!(vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_INDIRECT) &&
748	    !(vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_INDIRECT)) {
749		vsc->sc_driver_features |= VIRTIO_F_RING_INDIRECT_DESC;
750	} else if (guest_feature_names != NULL) {
751		printf(" RingIndirectDesc disabled by UKC");
752	}
753
754	/*
755	 * The driver must add VIRTIO_F_RING_EVENT_IDX if it supports it.
756	 * If it did, check if it is disabled by bit 2 in the driver flags.
757	 */
758	if ((vsc->sc_driver_features & VIRTIO_F_RING_EVENT_IDX) &&
759	    ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX) ||
760	    (vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX))) {
761		if (guest_feature_names != NULL)
762			printf(" RingEventIdx disabled by UKC");
763		vsc->sc_driver_features &= ~VIRTIO_F_RING_EVENT_IDX;
764	}
765
766	if (vsc->sc_version_1) {
767		return virtio_pci_negotiate_features_10(vsc,
768		    guest_feature_names);
769	}
770
771	/* virtio 0.9 only */
772	host = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
773				VIRTIO_CONFIG_DEVICE_FEATURES);
774	negotiated = host & vsc->sc_driver_features;
775#if VIRTIO_DEBUG
776	if (guest_feature_names)
777		virtio_log_features(host, negotiated, guest_feature_names);
778#endif
779	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
780			  VIRTIO_CONFIG_GUEST_FEATURES, negotiated);
781	vsc->sc_active_features = negotiated;
782	if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
783		vsc->sc_indirect = 1;
784	else
785		vsc->sc_indirect = 0;
786	return 0;
787}
788
789int
790virtio_pci_negotiate_features_10(struct virtio_softc *vsc,
791    const struct virtio_feature_name *guest_feature_names)
792{
793	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
794	uint64_t host, negotiated;
795
796	vsc->sc_driver_features |= VIRTIO_F_VERSION_1;
797	/* notify on empty is 0.9 only */
798	vsc->sc_driver_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY;
799	CWRITE(sc, device_feature_select, 0);
800	host = CREAD(sc, device_feature);
801	CWRITE(sc, device_feature_select, 1);
802	host |= (uint64_t)CREAD(sc, device_feature) << 32;
803
804	negotiated = host & vsc->sc_driver_features;
805#if VIRTIO_DEBUG
806	if (guest_feature_names)
807		virtio_log_features(host, negotiated, guest_feature_names);
808#endif
809	CWRITE(sc, driver_feature_select, 0);
810	CWRITE(sc, driver_feature, negotiated & 0xffffffff);
811	CWRITE(sc, driver_feature_select, 1);
812	CWRITE(sc, driver_feature, negotiated >> 32);
813	virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK);
814
815	if ((CREAD(sc, device_status) &
816	    VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) {
817		printf("%s: Feature negotiation failed\n",
818		    vsc->sc_dev.dv_xname);
819		CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
820		return ENXIO;
821	}
822	vsc->sc_active_features = negotiated;
823
824	if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
825		vsc->sc_indirect = 1;
826	else
827		vsc->sc_indirect = 0;
828
829	if ((negotiated & VIRTIO_F_VERSION_1) == 0) {
830#if VIRTIO_DEBUG
831		printf("%s: Host rejected Version_1\n", __func__);
832#endif
833		CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
834		return EINVAL;
835	}
836	return 0;
837}
838
839/*
840 * Device configuration registers.
841 */
842uint8_t
843virtio_pci_read_device_config_1(struct virtio_softc *vsc, int index)
844{
845	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
846	return bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
847}
848
849uint16_t
850virtio_pci_read_device_config_2(struct virtio_softc *vsc, int index)
851{
852	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
853	return bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
854}
855
856uint32_t
857virtio_pci_read_device_config_4(struct virtio_softc *vsc, int index)
858{
859	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
860	return bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
861}
862
863uint64_t
864virtio_pci_read_device_config_8(struct virtio_softc *vsc, int index)
865{
866	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
867	uint64_t r;
868
869	r = bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
870	    index + sizeof(uint32_t));
871	r <<= 32;
872	r += bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
873	return r;
874}
875
876void
877virtio_pci_write_device_config_1(struct virtio_softc *vsc, int index,
878    uint8_t value)
879{
880	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
881	bus_space_write_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
882}
883
884void
885virtio_pci_write_device_config_2(struct virtio_softc *vsc, int index,
886    uint16_t value)
887{
888	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
889	bus_space_write_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
890}
891
892void
893virtio_pci_write_device_config_4(struct virtio_softc *vsc,
894			     int index, uint32_t value)
895{
896	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
897	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
898}
899
900void
901virtio_pci_write_device_config_8(struct virtio_softc *vsc,
902			     int index, uint64_t value)
903{
904	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
905	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
906	    index, value & 0xffffffff);
907	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
908	    index + sizeof(uint32_t), value >> 32);
909}
910
911int
912virtio_pci_msix_establish(struct virtio_pci_softc *sc,
913    struct pci_attach_args *pa, int idx, int (*handler)(void *), void *ih_arg)
914{
915	struct virtio_softc *vsc = &sc->sc_sc;
916	pci_intr_handle_t ih;
917
918	if (pci_intr_map_msix(pa, idx, &ih) != 0) {
919#if VIRTIO_DEBUG
920		printf("%s[%d]: pci_intr_map_msix failed\n",
921		    vsc->sc_dev.dv_xname, idx);
922#endif
923		return 1;
924	}
925	sc->sc_ih[idx] = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl,
926	    handler, ih_arg, vsc->sc_dev.dv_xname);
927	if (sc->sc_ih[idx] == NULL) {
928		printf("%s[%d]: couldn't establish msix interrupt\n",
929		    vsc->sc_dev.dv_xname, idx);
930		return 1;
931	}
932	return 0;
933}
934
935void
936virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *sc, uint32_t idx, uint16_t vector)
937{
938	if (sc->sc_sc.sc_version_1) {
939		CWRITE(sc, queue_select, idx);
940		CWRITE(sc, queue_msix_vector, vector);
941	} else {
942		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
943		    VIRTIO_CONFIG_QUEUE_SELECT, idx);
944		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
945		    VIRTIO_MSI_QUEUE_VECTOR, vector);
946	}
947}
948
949void
950virtio_pci_set_msix_config_vector(struct virtio_pci_softc *sc, uint16_t vector)
951{
952	if (sc->sc_sc.sc_version_1) {
953		CWRITE(sc, config_msix_vector, vector);
954	} else {
955		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
956		    VIRTIO_MSI_CONFIG_VECTOR, vector);
957	}
958}
959
960
961void
962virtio_pci_free_irqs(struct virtio_pci_softc *sc)
963{
964	struct virtio_softc *vsc = &sc->sc_sc;
965	int i;
966
967	if (sc->sc_devcfg_offset == VIRTIO_CONFIG_DEVICE_CONFIG_MSI) {
968		for (i = 0; i < vsc->sc_nvqs; i++) {
969			virtio_pci_set_msix_queue_vector(sc, i,
970			    VIRTIO_MSI_NO_VECTOR);
971		}
972	}
973
974	for (i = 0; i < MAX_MSIX_VECS; i++) {
975		if (sc->sc_ih[i]) {
976			pci_intr_disestablish(sc->sc_pc, sc->sc_ih[i]);
977			sc->sc_ih[i] = NULL;
978		}
979	}
980
981	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
982	virtio_pci_adjust_config_region(sc);
983}
984
985int
986virtio_pci_setup_msix(struct virtio_pci_softc *sc, struct pci_attach_args *pa,
987    int shared)
988{
989	struct virtio_softc *vsc = &sc->sc_sc;
990	int i;
991
992	/* Shared needs config + queue */
993	if (shared && pci_intr_msix_count(pa) < 1 + 1)
994		return 1;
995	/* Per VQ needs config + N * queue */
996	if (!shared && pci_intr_msix_count(pa) < 1 + vsc->sc_nvqs)
997		return 1;
998
999	if (virtio_pci_msix_establish(sc, pa, 0, virtio_pci_config_intr, vsc))
1000		return 1;
1001	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
1002	virtio_pci_adjust_config_region(sc);
1003	virtio_pci_set_msix_config_vector(sc, 0);
1004
1005	if (shared) {
1006		if (virtio_pci_msix_establish(sc, pa, 1,
1007		    virtio_pci_shared_queue_intr, vsc)) {
1008			goto fail;
1009		}
1010
1011		for (i = 0; i < vsc->sc_nvqs; i++)
1012			virtio_pci_set_msix_queue_vector(sc, i, 1);
1013	} else {
1014		for (i = 0; i < vsc->sc_nvqs; i++) {
1015			if (virtio_pci_msix_establish(sc, pa, i + 1,
1016			    virtio_pci_queue_intr, &vsc->sc_vqs[i])) {
1017				goto fail;
1018			}
1019			virtio_pci_set_msix_queue_vector(sc, i, i + 1);
1020		}
1021	}
1022
1023	return 0;
1024fail:
1025	virtio_pci_free_irqs(sc);
1026	return 1;
1027}
1028
1029/*
1030 * Interrupt handler.
1031 */
1032
1033/*
1034 * Only used without MSI-X
1035 */
1036int
1037virtio_pci_legacy_intr(void *arg)
1038{
1039	struct virtio_pci_softc *sc = arg;
1040	struct virtio_softc *vsc = &sc->sc_sc;
1041	int isr, r = 0;
1042
1043	/* check and ack the interrupt */
1044	isr = bus_space_read_1(sc->sc_isr_iot, sc->sc_isr_ioh, 0);
1045	if (isr == 0)
1046		return 0;
1047	KERNEL_LOCK();
1048	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
1049	    (vsc->sc_config_change != NULL)) {
1050		r = (vsc->sc_config_change)(vsc);
1051	}
1052	r |= virtio_check_vqs(vsc);
1053	KERNEL_UNLOCK();
1054
1055	return r;
1056}
1057
1058int
1059virtio_pci_legacy_intr_mpsafe(void *arg)
1060{
1061	struct virtio_pci_softc *sc = arg;
1062	struct virtio_softc *vsc = &sc->sc_sc;
1063	int isr, r = 0;
1064
1065	/* check and ack the interrupt */
1066	isr = bus_space_read_1(sc->sc_isr_iot, sc->sc_isr_ioh, 0);
1067	if (isr == 0)
1068		return 0;
1069	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
1070	    (vsc->sc_config_change != NULL)) {
1071		r = (vsc->sc_config_change)(vsc);
1072	}
1073	r |= virtio_check_vqs(vsc);
1074	return r;
1075}
1076
1077/*
1078 * Only used with MSI-X
1079 */
1080int
1081virtio_pci_config_intr(void *arg)
1082{
1083	struct virtio_softc *vsc = arg;
1084
1085	if (vsc->sc_config_change != NULL)
1086		return vsc->sc_config_change(vsc);
1087	return 0;
1088}
1089
1090/*
1091 * Only used with MSI-X
1092 */
1093int
1094virtio_pci_queue_intr(void *arg)
1095{
1096	struct virtqueue *vq = arg;
1097	struct virtio_softc *vsc = vq->vq_owner;
1098
1099	return virtio_check_vq(vsc, vq);
1100}
1101
1102int
1103virtio_pci_shared_queue_intr(void *arg)
1104{
1105	struct virtio_softc *vsc = arg;
1106
1107	return virtio_check_vqs(vsc);
1108}
1109
1110/*
1111 * Interrupt handler to be used when polling.
1112 * We cannot use isr here because it is not defined in MSI-X mode.
1113 */
1114int
1115virtio_pci_poll_intr(void *arg)
1116{
1117	struct virtio_pci_softc *sc = arg;
1118	struct virtio_softc *vsc = &sc->sc_sc;
1119	int r = 0;
1120
1121	if (vsc->sc_config_change != NULL)
1122		r = (vsc->sc_config_change)(vsc);
1123
1124	r |= virtio_check_vqs(vsc);
1125
1126	return r;
1127}
1128
1129void
1130virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx)
1131{
1132	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
1133	unsigned offset = 0;
1134	if (vsc->sc_version_1) {
1135		offset = vsc->sc_vqs[idx].vq_notify_off *
1136		    sc->sc_notify_off_multiplier;
1137	}
1138	bus_space_write_2(sc->sc_notify_iot, sc->sc_notify_ioh, offset, idx);
1139}
1140