virtio_pci.c revision 1.29
1/*	$OpenBSD: virtio_pci.c,v 1.29 2021/06/12 13:08:30 kettenis Exp $	*/
2/*	$NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $	*/
3
4/*
5 * Copyright (c) 2012 Stefan Fritsch.
6 * Copyright (c) 2010 Minoura Makoto.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/device.h>
33#include <sys/mutex.h>
34
35#include <dev/pci/pcidevs.h>
36#include <dev/pci/pcireg.h>
37#include <dev/pci/pcivar.h>
38#include <dev/pci/virtio_pcireg.h>
39
40#include <dev/pv/virtioreg.h>
41#include <dev/pv/virtiovar.h>
42#include <dev/pci/virtio_pcireg.h>
43
44#define DNPRINTF(n,x...)				\
45    do { if (VIRTIO_DEBUG >= n) printf(x); } while(0)
46
47
48/*
49 * XXX: Before being used on big endian arches, the access to config registers
50 * XXX: needs to be reviewed/fixed. The non-device specific registers are
51 * XXX: PCI-endian while the device specific registers are native endian.
52 */
53
54#define MAX_MSIX_VECS	8
55
56struct virtio_pci_softc;
57
58int		virtio_pci_match(struct device *, void *, void *);
59void		virtio_pci_attach(struct device *, struct device *, void *);
60int		virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa);
61int		virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa);
62int		virtio_pci_detach(struct device *, int);
63
64void		virtio_pci_kick(struct virtio_softc *, uint16_t);
65int		virtio_pci_adjust_config_region(struct virtio_pci_softc *);
66uint8_t		virtio_pci_read_device_config_1(struct virtio_softc *, int);
67uint16_t	virtio_pci_read_device_config_2(struct virtio_softc *, int);
68uint32_t	virtio_pci_read_device_config_4(struct virtio_softc *, int);
69uint64_t	virtio_pci_read_device_config_8(struct virtio_softc *, int);
70void		virtio_pci_write_device_config_1(struct virtio_softc *, int, uint8_t);
71void		virtio_pci_write_device_config_2(struct virtio_softc *, int, uint16_t);
72void		virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t);
73void		virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t);
74uint16_t	virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
75void		virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
76void		virtio_pci_set_status(struct virtio_softc *, int);
77int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *);
78int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
79void		virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t);
80void		virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t);
81int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct pci_attach_args *, int, int (*)(void *), void *);
82int		virtio_pci_setup_msix(struct virtio_pci_softc *, struct pci_attach_args *, int);
83void		virtio_pci_free_irqs(struct virtio_pci_softc *);
84int		virtio_pci_poll_intr(void *);
85int		virtio_pci_legacy_intr(void *);
86int		virtio_pci_legacy_intr_mpsafe(void *);
87int		virtio_pci_config_intr(void *);
88int		virtio_pci_queue_intr(void *);
89int		virtio_pci_shared_queue_intr(void *);
90int		virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int buflen);
91#if VIRTIO_DEBUG
92void virtio_pci_dump_caps(struct virtio_pci_softc *sc);
93#endif
94
95enum irq_type {
96	IRQ_NO_MSIX,
97	IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */
98	IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */
99};
100
101struct virtio_pci_softc {
102	struct virtio_softc	sc_sc;
103	pci_chipset_tag_t	sc_pc;
104	pcitag_t		sc_ptag;
105
106	bus_space_tag_t		sc_iot;
107	bus_space_handle_t	sc_ioh;
108	bus_size_t		sc_iosize;
109
110	bus_space_tag_t		sc_bars_iot[4];
111	bus_space_handle_t	sc_bars_ioh[4];
112	bus_size_t		sc_bars_iosize[4];
113
114	bus_space_tag_t		sc_notify_iot;
115	bus_space_handle_t	sc_notify_ioh;
116	bus_size_t		sc_notify_iosize;
117	unsigned int		sc_notify_off_multiplier;
118
119	bus_space_tag_t		sc_devcfg_iot;
120	bus_space_handle_t	sc_devcfg_ioh;
121	bus_size_t		sc_devcfg_iosize;
122	/*
123	 * With 0.9, the offset of the devcfg region in the io bar changes
124	 * depending on MSI-X being enabled or not.
125	 * With 1.0, this field is still used to remember if MSI-X is enabled
126	 * or not.
127	 */
128	unsigned int		sc_devcfg_offset;
129
130	bus_space_tag_t		sc_isr_iot;
131	bus_space_handle_t	sc_isr_ioh;
132	bus_size_t		sc_isr_iosize;
133
134	void			*sc_ih[MAX_MSIX_VECS];
135
136	enum irq_type		sc_irq_type;
137};
138
139struct cfattach virtio_pci_ca = {
140	sizeof(struct virtio_pci_softc),
141	virtio_pci_match,
142	virtio_pci_attach,
143	virtio_pci_detach,
144	NULL
145};
146
147struct virtio_ops virtio_pci_ops = {
148	virtio_pci_kick,
149	virtio_pci_read_device_config_1,
150	virtio_pci_read_device_config_2,
151	virtio_pci_read_device_config_4,
152	virtio_pci_read_device_config_8,
153	virtio_pci_write_device_config_1,
154	virtio_pci_write_device_config_2,
155	virtio_pci_write_device_config_4,
156	virtio_pci_write_device_config_8,
157	virtio_pci_read_queue_size,
158	virtio_pci_setup_queue,
159	virtio_pci_set_status,
160	virtio_pci_negotiate_features,
161	virtio_pci_poll_intr,
162};
163
164static inline
165uint64_t _cread(struct virtio_pci_softc *sc, unsigned off, unsigned size)
166{
167	uint64_t val;
168	switch (size) {
169	case 1:
170		val = bus_space_read_1(sc->sc_iot, sc->sc_ioh, off);
171		break;
172	case 2:
173		val = bus_space_read_2(sc->sc_iot, sc->sc_ioh, off);
174		break;
175	case 4:
176		val = bus_space_read_4(sc->sc_iot, sc->sc_ioh, off);
177		break;
178	case 8:
179		val = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
180		    off + sizeof(uint32_t));
181		val <<= 32;
182		val += bus_space_read_4(sc->sc_iot, sc->sc_ioh, off);
183		break;
184	}
185	return val;
186}
187
188#define CREAD(sc, memb)  _cread(sc, offsetof(struct virtio_pci_common_cfg, memb), \
189    sizeof(((struct virtio_pci_common_cfg *)0)->memb))
190
191#define CWRITE(sc, memb, val)							\
192	do {									\
193		struct virtio_pci_common_cfg c;					\
194		size_t off = offsetof(struct virtio_pci_common_cfg, memb);	\
195		size_t size = sizeof(c.memb);					\
196										\
197		DNPRINTF(2, "%s: %d: off %#zx size %#zx write %#llx\n",		\
198		    __func__, __LINE__, off, size, (unsigned long long)val);	\
199		switch (size) {							\
200		case 1:								\
201			bus_space_write_1(sc->sc_iot, sc->sc_ioh, off, val);	\
202			break;							\
203		case 2:								\
204			bus_space_write_2(sc->sc_iot, sc->sc_ioh, off, val);	\
205			break;							\
206		case 4:								\
207			bus_space_write_4(sc->sc_iot, sc->sc_ioh, off, val);	\
208			break;							\
209		case 8:								\
210			bus_space_write_4(sc->sc_iot, sc->sc_ioh, off,		\
211			    (val) & 0xffffffff);				\
212			bus_space_write_4(sc->sc_iot, sc->sc_ioh,		\
213			    (off) + sizeof(uint32_t), (uint64_t)(val) >> 32);	\
214			break;							\
215		}								\
216	} while (0)
217
218uint16_t
219virtio_pci_read_queue_size(struct virtio_softc *vsc, uint16_t idx)
220{
221	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
222	uint16_t ret;
223	if (sc->sc_sc.sc_version_1) {
224		CWRITE(sc, queue_select, idx);
225		ret = CREAD(sc, queue_size);
226	} else {
227		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
228		    VIRTIO_CONFIG_QUEUE_SELECT, idx);
229		ret = bus_space_read_2(sc->sc_iot, sc->sc_ioh,
230		    VIRTIO_CONFIG_QUEUE_SIZE);
231	}
232	return ret;
233}
234
235void
236virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
237    uint64_t addr)
238{
239	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
240	if (sc->sc_sc.sc_version_1) {
241		CWRITE(sc, queue_select, vq->vq_index);
242		if (addr == 0) {
243			CWRITE(sc, queue_enable, 0);
244			CWRITE(sc, queue_desc, 0);
245			CWRITE(sc, queue_avail, 0);
246			CWRITE(sc, queue_used, 0);
247		} else {
248			CWRITE(sc, queue_desc, addr);
249			CWRITE(sc, queue_avail, addr + vq->vq_availoffset);
250			CWRITE(sc, queue_used, addr + vq->vq_usedoffset);
251			CWRITE(sc, queue_enable, 1);
252			vq->vq_notify_off = CREAD(sc, queue_notify_off);
253		}
254	} else {
255		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
256		    VIRTIO_CONFIG_QUEUE_SELECT, vq->vq_index);
257		bus_space_write_4(sc->sc_iot, sc->sc_ioh,
258		    VIRTIO_CONFIG_QUEUE_ADDRESS, addr / VIRTIO_PAGE_SIZE);
259	}
260
261	/*
262	 * This path is only executed if this function is called after
263	 * the child's attach function has finished. In other cases,
264	 * it's done in virtio_pci_setup_msix().
265	 */
266	if (sc->sc_irq_type != IRQ_NO_MSIX) {
267		int vec = 1;
268		if (sc->sc_irq_type == IRQ_MSIX_PER_VQ)
269		       vec += vq->vq_index;
270		if (sc->sc_sc.sc_version_1) {
271			CWRITE(sc, queue_msix_vector, vec);
272		} else {
273			bus_space_write_2(sc->sc_iot, sc->sc_ioh,
274			    VIRTIO_MSI_QUEUE_VECTOR, vec);
275		}
276	}
277}
278
279void
280virtio_pci_set_status(struct virtio_softc *vsc, int status)
281{
282	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
283	int old = 0;
284
285	if (sc->sc_sc.sc_version_1) {
286		if (status != 0)
287			old = CREAD(sc, device_status);
288		CWRITE(sc, device_status, status|old);
289	} else {
290		if (status != 0)
291			old = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
292			    VIRTIO_CONFIG_DEVICE_STATUS);
293		bus_space_write_1(sc->sc_iot, sc->sc_ioh,
294		    VIRTIO_CONFIG_DEVICE_STATUS, status|old);
295	}
296}
297
298int
299virtio_pci_match(struct device *parent, void *match, void *aux)
300{
301	struct pci_attach_args *pa;
302
303	pa = (struct pci_attach_args *)aux;
304	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_OPENBSD &&
305	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_OPENBSD_CONTROL)
306		return 1;
307	if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_QUMRANET)
308		return 0;
309	/* virtio 0.9 */
310	if (PCI_PRODUCT(pa->pa_id) >= 0x1000 &&
311	    PCI_PRODUCT(pa->pa_id) <= 0x103f &&
312	    PCI_REVISION(pa->pa_class) == 0)
313		return 1;
314	/* virtio 1.0 */
315	if (PCI_PRODUCT(pa->pa_id) >= 0x1040 &&
316	    PCI_PRODUCT(pa->pa_id) <= 0x107f &&
317	    PCI_REVISION(pa->pa_class) == 1)
318		return 1;
319	return 0;
320}
321
322#if VIRTIO_DEBUG
323void
324virtio_pci_dump_caps(struct virtio_pci_softc *sc)
325{
326	pci_chipset_tag_t pc = sc->sc_pc;
327	pcitag_t tag = sc->sc_ptag;
328	int offset;
329	union {
330		pcireg_t reg[4];
331		struct virtio_pci_cap vcap;
332	} v;
333
334	if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v.reg[0]))
335		return;
336
337	printf("\n");
338	do {
339		for (int i = 0; i < 4; i++)
340			v.reg[i] = pci_conf_read(pc, tag, offset + i * 4);
341		printf("%s: cfgoff %#x len %#x type %#x bar %#x: off %#x len %#x\n",
342			__func__, offset, v.vcap.cap_len, v.vcap.cfg_type, v.vcap.bar,
343			v.vcap.offset, v.vcap.length);
344		offset = v.vcap.cap_next;
345	} while (offset != 0);
346}
347#endif
348
349int
350virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int buflen)
351{
352	pci_chipset_tag_t pc = sc->sc_pc;
353	pcitag_t tag = sc->sc_ptag;
354	unsigned int offset, i, len;
355	union {
356		pcireg_t reg[8];
357		struct virtio_pci_cap vcap;
358	} *v = buf;
359
360	if (buflen < sizeof(struct virtio_pci_cap))
361		return ERANGE;
362
363	if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v->reg[0]))
364		return ENOENT;
365
366	do {
367		for (i = 0; i < 4; i++)
368			v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
369		if (v->vcap.cfg_type == cfg_type)
370			break;
371		offset = v->vcap.cap_next;
372	} while (offset != 0);
373
374	if (offset == 0)
375		return ENOENT;
376
377	if (v->vcap.cap_len > sizeof(struct virtio_pci_cap)) {
378		len = roundup(v->vcap.cap_len, sizeof(pcireg_t));
379		if (len > buflen) {
380			printf("%s: cap too large\n", __func__);
381			return ERANGE;
382		}
383		for (i = 4; i < len / sizeof(pcireg_t);  i++)
384			v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
385	}
386
387	return 0;
388}
389
390
391#define NMAPREG		((PCI_MAPREG_END - PCI_MAPREG_START) / \
392				sizeof(pcireg_t))
393
394int
395virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
396{
397	struct virtio_pci_cap common, isr, device;
398	struct virtio_pci_notify_cap notify;
399	int have_device_cfg = 0;
400	bus_size_t bars[NMAPREG] = { 0 };
401	int bars_idx[NMAPREG] = { 0 };
402	struct virtio_pci_cap *caps[] = { &common, &isr, &device, &notify.cap };
403	int i, j = 0, ret = 0;
404
405	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_COMMON_CFG, &common, sizeof(common)) != 0)
406		return ENODEV;
407
408	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_NOTIFY_CFG, &notify, sizeof(notify)) != 0)
409		return ENODEV;
410	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_ISR_CFG, &isr, sizeof(isr)) != 0)
411		return ENODEV;
412	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_DEVICE_CFG, &device, sizeof(device)) != 0)
413		memset(&device, 0, sizeof(device));
414	else
415		have_device_cfg = 1;
416
417	/*
418	 * XXX Maybe there are devices that offer the pci caps but not the
419	 * XXX VERSION_1 feature bit? Then we should check the feature bit
420	 * XXX here and fall back to 0.9 out if not present.
421	 */
422
423	/* Figure out which bars we need to map */
424	for (i = 0; i < nitems(caps); i++) {
425		int bar = caps[i]->bar;
426		bus_size_t len = caps[i]->offset + caps[i]->length;
427		if (caps[i]->length == 0)
428			continue;
429		if (bars[bar] < len)
430			bars[bar] = len;
431	}
432
433	for (i = 0; i < nitems(bars); i++) {
434		int reg;
435		pcireg_t type;
436		if (bars[i] == 0)
437			continue;
438		reg = PCI_MAPREG_START + i * 4;
439		type = pci_mapreg_type(sc->sc_pc, sc->sc_ptag, reg);
440		if (pci_mapreg_map(pa, reg, type, 0, &sc->sc_bars_iot[j],
441		    &sc->sc_bars_ioh[j], NULL, &sc->sc_bars_iosize[j],
442		    bars[i])) {
443			printf("%s: can't map bar %u \n",
444			    sc->sc_sc.sc_dev.dv_xname, i);
445			ret = EIO;
446			goto err;
447		}
448		bars_idx[i] = j;
449		j++;
450	}
451
452	i = bars_idx[notify.cap.bar];
453	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
454	    notify.cap.offset, notify.cap.length, &sc->sc_notify_ioh) != 0) {
455		printf("%s: can't map notify i/o space\n",
456		    sc->sc_sc.sc_dev.dv_xname);
457		ret = EIO;
458		goto err;
459	}
460	sc->sc_notify_iosize = notify.cap.length;
461	sc->sc_notify_iot = sc->sc_bars_iot[i];
462	sc->sc_notify_off_multiplier = notify.notify_off_multiplier;
463
464	if (have_device_cfg) {
465		i = bars_idx[device.bar];
466		if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
467		    device.offset, device.length, &sc->sc_devcfg_ioh) != 0) {
468			printf("%s: can't map devcfg i/o space\n",
469			    sc->sc_sc.sc_dev.dv_xname);
470			ret = EIO;
471			goto err;
472		}
473		sc->sc_devcfg_iosize = device.length;
474		sc->sc_devcfg_iot = sc->sc_bars_iot[i];
475	}
476
477	i = bars_idx[isr.bar];
478	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
479	    isr.offset, isr.length, &sc->sc_isr_ioh) != 0) {
480		printf("%s: can't map isr i/o space\n",
481		    sc->sc_sc.sc_dev.dv_xname);
482		ret = EIO;
483		goto err;
484	}
485	sc->sc_isr_iosize = isr.length;
486	sc->sc_isr_iot = sc->sc_bars_iot[i];
487
488	i = bars_idx[common.bar];
489	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
490	    common.offset, common.length, &sc->sc_ioh) != 0) {
491		printf("%s: can't map common i/o space\n",
492		    sc->sc_sc.sc_dev.dv_xname);
493		ret = EIO;
494		goto err;
495	}
496	sc->sc_iosize = common.length;
497	sc->sc_iot = sc->sc_bars_iot[i];
498
499	sc->sc_sc.sc_version_1 = 1;
500	return 0;
501
502err:
503	/* there is no pci_mapreg_unmap() */
504	return ret;
505}
506
507int
508virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
509{
510	struct virtio_softc *vsc = &sc->sc_sc;
511	if (pci_mapreg_map(pa, PCI_MAPREG_START, PCI_MAPREG_TYPE_IO, 0,
512	    &sc->sc_iot, &sc->sc_ioh, NULL, &sc->sc_iosize, 0)) {
513		printf("%s: can't map i/o space\n", vsc->sc_dev.dv_xname);
514		return EIO;
515	}
516
517	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
518	    VIRTIO_CONFIG_QUEUE_NOTIFY, 2, &sc->sc_notify_ioh) != 0) {
519		printf("%s: can't map notify i/o space\n",
520		    vsc->sc_dev.dv_xname);
521		return EIO;
522	}
523	sc->sc_notify_iosize = 2;
524	sc->sc_notify_iot = sc->sc_iot;
525
526	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
527	    VIRTIO_CONFIG_ISR_STATUS, 1, &sc->sc_isr_ioh) != 0) {
528		printf("%s: can't map isr i/o space\n",
529		    vsc->sc_dev.dv_xname);
530		return EIO;
531	}
532	sc->sc_isr_iosize = 1;
533	sc->sc_isr_iot = sc->sc_iot;
534
535	return 0;
536}
537
538void
539virtio_pci_attach(struct device *parent, struct device *self, void *aux)
540{
541	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
542	struct virtio_softc *vsc = &sc->sc_sc;
543	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
544	pci_chipset_tag_t pc = pa->pa_pc;
545	pcitag_t tag = pa->pa_tag;
546	int revision, ret = ENODEV;
547	pcireg_t id;
548	char const *intrstr;
549	pci_intr_handle_t ih;
550
551	revision = PCI_REVISION(pa->pa_class);
552	switch (revision) {
553	case 0:
554		/* subsystem ID shows what I am */
555		id = PCI_PRODUCT(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG));
556		break;
557	case 1:
558		id = PCI_PRODUCT(pa->pa_id) - 0x1040;
559		break;
560	default:
561		printf("unknown revision 0x%02x; giving up\n", revision);
562		return;
563	}
564
565	sc->sc_pc = pc;
566	sc->sc_ptag = pa->pa_tag;
567	vsc->sc_dmat = pa->pa_dmat;
568
569#if defined(__i386__) || defined(__amd64__)
570	/*
571	 * For virtio, ignore normal MSI black/white-listing depending on the
572	 * PCI bridge but enable it unconditionally.
573	 */
574	pa->pa_flags |= PCI_FLAGS_MSI_ENABLED;
575#endif
576
577#if VIRTIO_DEBUG
578	virtio_pci_dump_caps(sc);
579#endif
580
581	vsc->sc_ops = &virtio_pci_ops;
582	if ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_VERSION_1) == 0 &&
583	    (revision == 1 ||
584	     (vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_PREFER_VERSION_1))) {
585		ret = virtio_pci_attach_10(sc, pa);
586	}
587	if (ret != 0 && revision == 0) {
588		/* revision 0 means 0.9 only or both 0.9 and 1.0 */
589		ret = virtio_pci_attach_09(sc, pa);
590	}
591	if (ret != 0) {
592		printf(": Cannot attach (%d)\n", ret);
593		return;
594	}
595
596	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
597	sc->sc_irq_type = IRQ_NO_MSIX;
598	if (virtio_pci_adjust_config_region(sc) != 0)
599		return;
600
601	virtio_device_reset(vsc);
602	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
603	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
604
605	printf("\n");
606	vsc->sc_childdevid = id;
607	vsc->sc_child = NULL;
608	config_found(self, sc, NULL);
609	if (vsc->sc_child == NULL) {
610		printf("%s: no matching child driver; not configured\n",
611		    vsc->sc_dev.dv_xname);
612		goto fail_1;
613	}
614	if (vsc->sc_child == VIRTIO_CHILD_ERROR) {
615		printf("%s: virtio configuration failed\n",
616		    vsc->sc_dev.dv_xname);
617		goto fail_1;
618	}
619
620	if (virtio_pci_setup_msix(sc, pa, 0) == 0) {
621		sc->sc_irq_type = IRQ_MSIX_PER_VQ;
622		intrstr = "msix per-VQ";
623	} else if (virtio_pci_setup_msix(sc, pa, 1) == 0) {
624		sc->sc_irq_type = IRQ_MSIX_SHARED;
625		intrstr = "msix shared";
626	} else {
627		int (*ih_func)(void *) = virtio_pci_legacy_intr;
628		if (pci_intr_map_msi(pa, &ih) != 0 && pci_intr_map(pa, &ih) != 0) {
629			printf("%s: couldn't map interrupt\n", vsc->sc_dev.dv_xname);
630			goto fail_2;
631		}
632		intrstr = pci_intr_string(pc, ih);
633		/*
634		 * We always set the IPL_MPSAFE flag in order to do the relatively
635		 * expensive ISR read without lock, and then grab the kernel lock in
636		 * the interrupt handler.
637		 */
638		if (vsc->sc_ipl & IPL_MPSAFE)
639			ih_func = virtio_pci_legacy_intr_mpsafe;
640		sc->sc_ih[0] = pci_intr_establish(pc, ih, vsc->sc_ipl | IPL_MPSAFE,
641		    ih_func, sc, vsc->sc_dev.dv_xname);
642		if (sc->sc_ih[0] == NULL) {
643			printf("%s: couldn't establish interrupt", vsc->sc_dev.dv_xname);
644			if (intrstr != NULL)
645				printf(" at %s", intrstr);
646			printf("\n");
647			goto fail_2;
648		}
649	}
650	printf("%s: %s\n", vsc->sc_dev.dv_xname, intrstr);
651
652	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
653	return;
654
655fail_2:
656	config_detach(vsc->sc_child, 0);
657fail_1:
658	/* no pci_mapreg_unmap() or pci_intr_unmap() */
659	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
660}
661
662int
663virtio_pci_detach(struct device *self, int flags)
664{
665	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
666	struct virtio_softc *vsc = &sc->sc_sc;
667	int r;
668
669	if (vsc->sc_child != 0 && vsc->sc_child != VIRTIO_CHILD_ERROR) {
670		r = config_detach(vsc->sc_child, flags);
671		if (r)
672			return r;
673	}
674	KASSERT(vsc->sc_child == 0 || vsc->sc_child == VIRTIO_CHILD_ERROR);
675	KASSERT(vsc->sc_vqs == 0);
676	virtio_pci_free_irqs(sc);
677	if (sc->sc_iosize)
678		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
679	sc->sc_iosize = 0;
680
681	return 0;
682}
683
684int
685virtio_pci_adjust_config_region(struct virtio_pci_softc *sc)
686{
687	if (sc->sc_sc.sc_version_1)
688		return 0;
689	sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset;
690	sc->sc_devcfg_iot = sc->sc_iot;
691	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset,
692	    sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) {
693		printf("%s: can't map config i/o space\n",
694		    sc->sc_sc.sc_dev.dv_xname);
695		return 1;
696	}
697	return 0;
698}
699
700/*
701 * Feature negotiation.
702 * Prints available / negotiated features if guest_feature_names != NULL and
703 * VIRTIO_DEBUG is 1
704 */
705int
706virtio_pci_negotiate_features(struct virtio_softc *vsc,
707    const struct virtio_feature_name *guest_feature_names)
708{
709	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
710	uint64_t host, negotiated;
711
712	vsc->sc_active_features = 0;
713
714	/*
715	 * We enable indirect descriptors by default. They can be switched
716	 * off by setting bit 1 in the driver flags, see config(8)
717	 */
718	if (!(vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_INDIRECT) &&
719	    !(vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_INDIRECT)) {
720		vsc->sc_driver_features |= VIRTIO_F_RING_INDIRECT_DESC;
721	} else if (guest_feature_names != NULL) {
722		printf(" RingIndirectDesc disabled by UKC");
723	}
724
725	/*
726	 * The driver must add VIRTIO_F_RING_EVENT_IDX if it supports it.
727	 * If it did, check if it is disabled by bit 2 in the driver flags.
728	 */
729	if ((vsc->sc_driver_features & VIRTIO_F_RING_EVENT_IDX) &&
730	    ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX) ||
731	    (vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX))) {
732		if (guest_feature_names != NULL)
733			printf(" RingEventIdx disabled by UKC");
734		vsc->sc_driver_features &= ~VIRTIO_F_RING_EVENT_IDX;
735	}
736
737	if (vsc->sc_version_1) {
738		return virtio_pci_negotiate_features_10(vsc,
739		    guest_feature_names);
740	}
741
742	/* virtio 0.9 only */
743	host = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
744				VIRTIO_CONFIG_DEVICE_FEATURES);
745	negotiated = host & vsc->sc_driver_features;
746#if VIRTIO_DEBUG
747	if (guest_feature_names)
748		virtio_log_features(host, negotiated, guest_feature_names);
749#endif
750	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
751			  VIRTIO_CONFIG_GUEST_FEATURES, negotiated);
752	vsc->sc_active_features = negotiated;
753	if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
754		vsc->sc_indirect = 1;
755	else
756		vsc->sc_indirect = 0;
757	return 0;
758}
759
760int
761virtio_pci_negotiate_features_10(struct virtio_softc *vsc,
762    const struct virtio_feature_name *guest_feature_names)
763{
764	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
765	uint64_t host, negotiated;
766
767	vsc->sc_driver_features |= VIRTIO_F_VERSION_1;
768	/* notify on empty is 0.9 only */
769	vsc->sc_driver_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY;
770	CWRITE(sc, device_feature_select, 0);
771	host = CREAD(sc, device_feature);
772	CWRITE(sc, device_feature_select, 1);
773	host |= (uint64_t)CREAD(sc, device_feature) << 32;
774
775	negotiated = host & vsc->sc_driver_features;
776#if VIRTIO_DEBUG
777	if (guest_feature_names)
778		virtio_log_features(host, negotiated, guest_feature_names);
779#endif
780	CWRITE(sc, driver_feature_select, 0);
781	CWRITE(sc, driver_feature, negotiated & 0xffffffff);
782	CWRITE(sc, driver_feature_select, 1);
783	CWRITE(sc, driver_feature, negotiated >> 32);
784	virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK);
785
786	if ((CREAD(sc, device_status) &
787	    VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) {
788		printf("%s: Feature negotiation failed\n",
789		    vsc->sc_dev.dv_xname);
790		CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
791		return ENXIO;
792	}
793	vsc->sc_active_features = negotiated;
794
795	if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
796		vsc->sc_indirect = 1;
797	else
798		vsc->sc_indirect = 0;
799
800	if ((negotiated & VIRTIO_F_VERSION_1) == 0) {
801#if VIRTIO_DEBUG
802		printf("%s: Host rejected Version_1\n", __func__);
803#endif
804		CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
805		return EINVAL;
806	}
807	return 0;
808}
809
810/*
811 * Device configuration registers.
812 */
813uint8_t
814virtio_pci_read_device_config_1(struct virtio_softc *vsc, int index)
815{
816	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
817	return bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
818}
819
820uint16_t
821virtio_pci_read_device_config_2(struct virtio_softc *vsc, int index)
822{
823	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
824	return bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
825}
826
827uint32_t
828virtio_pci_read_device_config_4(struct virtio_softc *vsc, int index)
829{
830	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
831	return bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
832}
833
834uint64_t
835virtio_pci_read_device_config_8(struct virtio_softc *vsc, int index)
836{
837	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
838	uint64_t r;
839
840	r = bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
841	    index + sizeof(uint32_t));
842	r <<= 32;
843	r += bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
844	return r;
845}
846
847void
848virtio_pci_write_device_config_1(struct virtio_softc *vsc, int index,
849    uint8_t value)
850{
851	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
852	bus_space_write_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
853}
854
855void
856virtio_pci_write_device_config_2(struct virtio_softc *vsc, int index,
857    uint16_t value)
858{
859	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
860	bus_space_write_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
861}
862
863void
864virtio_pci_write_device_config_4(struct virtio_softc *vsc,
865			     int index, uint32_t value)
866{
867	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
868	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
869}
870
871void
872virtio_pci_write_device_config_8(struct virtio_softc *vsc,
873			     int index, uint64_t value)
874{
875	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
876	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
877	    index, value & 0xffffffff);
878	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
879	    index + sizeof(uint32_t), value >> 32);
880}
881
882int
883virtio_pci_msix_establish(struct virtio_pci_softc *sc,
884    struct pci_attach_args *pa, int idx, int (*handler)(void *), void *ih_arg)
885{
886	struct virtio_softc *vsc = &sc->sc_sc;
887	pci_intr_handle_t ih;
888
889	if (pci_intr_map_msix(pa, idx, &ih) != 0) {
890#if VIRTIO_DEBUG
891		printf("%s[%d]: pci_intr_map_msix failed\n",
892		    vsc->sc_dev.dv_xname, idx);
893#endif
894		return 1;
895	}
896	sc->sc_ih[idx] = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl,
897	    handler, ih_arg, vsc->sc_dev.dv_xname);
898	if (sc->sc_ih[idx] == NULL) {
899		printf("%s[%d]: couldn't establish msix interrupt\n",
900		    vsc->sc_dev.dv_xname, idx);
901		return 1;
902	}
903	return 0;
904}
905
906void
907virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *sc, uint32_t idx, uint16_t vector)
908{
909	if (sc->sc_sc.sc_version_1) {
910		CWRITE(sc, queue_select, idx);
911		CWRITE(sc, queue_msix_vector, vector);
912	} else {
913		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
914		    VIRTIO_CONFIG_QUEUE_SELECT, idx);
915		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
916		    VIRTIO_MSI_QUEUE_VECTOR, vector);
917	}
918}
919
920void
921virtio_pci_set_msix_config_vector(struct virtio_pci_softc *sc, uint16_t vector)
922{
923	if (sc->sc_sc.sc_version_1) {
924		CWRITE(sc, config_msix_vector, vector);
925	} else {
926		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
927		    VIRTIO_MSI_CONFIG_VECTOR, vector);
928	}
929}
930
931
932void
933virtio_pci_free_irqs(struct virtio_pci_softc *sc)
934{
935	struct virtio_softc *vsc = &sc->sc_sc;
936	int i;
937
938	if (sc->sc_devcfg_offset == VIRTIO_CONFIG_DEVICE_CONFIG_MSI) {
939		for (i = 0; i < vsc->sc_nvqs; i++) {
940			virtio_pci_set_msix_queue_vector(sc, i,
941			    VIRTIO_MSI_NO_VECTOR);
942		}
943	}
944
945	for (i = 0; i < MAX_MSIX_VECS; i++) {
946		if (sc->sc_ih[i]) {
947			pci_intr_disestablish(sc->sc_pc, sc->sc_ih[i]);
948			sc->sc_ih[i] = NULL;
949		}
950	}
951
952	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
953	virtio_pci_adjust_config_region(sc);
954}
955
956int
957virtio_pci_setup_msix(struct virtio_pci_softc *sc, struct pci_attach_args *pa,
958    int shared)
959{
960	struct virtio_softc *vsc = &sc->sc_sc;
961	int i;
962
963	if (virtio_pci_msix_establish(sc, pa, 0, virtio_pci_config_intr, vsc))
964		return 1;
965	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
966	virtio_pci_adjust_config_region(sc);
967	virtio_pci_set_msix_config_vector(sc, 0);
968
969	if (shared) {
970		if (virtio_pci_msix_establish(sc, pa, 1,
971		    virtio_pci_shared_queue_intr, vsc)) {
972			goto fail;
973		}
974
975		for (i = 0; i < vsc->sc_nvqs; i++)
976			virtio_pci_set_msix_queue_vector(sc, i, 1);
977	} else {
978		for (i = 0; i <= vsc->sc_nvqs; i++) {
979			if (virtio_pci_msix_establish(sc, pa, i + 1,
980			    virtio_pci_queue_intr, &vsc->sc_vqs[i])) {
981				goto fail;
982			}
983			virtio_pci_set_msix_queue_vector(sc, i, i + 1);
984		}
985	}
986
987	return 0;
988fail:
989	virtio_pci_free_irqs(sc);
990	return 1;
991}
992
993/*
994 * Interrupt handler.
995 */
996
997/*
998 * Only used without MSI-X
999 */
1000int
1001virtio_pci_legacy_intr(void *arg)
1002{
1003	struct virtio_pci_softc *sc = arg;
1004	struct virtio_softc *vsc = &sc->sc_sc;
1005	int isr, r = 0;
1006
1007	/* check and ack the interrupt */
1008	isr = bus_space_read_1(sc->sc_isr_iot, sc->sc_isr_ioh, 0);
1009	if (isr == 0)
1010		return 0;
1011	KERNEL_LOCK();
1012	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
1013	    (vsc->sc_config_change != NULL)) {
1014		r = (vsc->sc_config_change)(vsc);
1015	}
1016	r |= virtio_check_vqs(vsc);
1017	KERNEL_UNLOCK();
1018
1019	return r;
1020}
1021
1022int
1023virtio_pci_legacy_intr_mpsafe(void *arg)
1024{
1025	struct virtio_pci_softc *sc = arg;
1026	struct virtio_softc *vsc = &sc->sc_sc;
1027	int isr, r = 0;
1028
1029	/* check and ack the interrupt */
1030	isr = bus_space_read_1(sc->sc_isr_iot, sc->sc_isr_ioh, 0);
1031	if (isr == 0)
1032		return 0;
1033	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
1034	    (vsc->sc_config_change != NULL)) {
1035		r = (vsc->sc_config_change)(vsc);
1036	}
1037	r |= virtio_check_vqs(vsc);
1038	return r;
1039}
1040
1041/*
1042 * Only used with MSI-X
1043 */
1044int
1045virtio_pci_config_intr(void *arg)
1046{
1047	struct virtio_softc *vsc = arg;
1048
1049	if (vsc->sc_config_change != NULL)
1050		return vsc->sc_config_change(vsc);
1051	return 0;
1052}
1053
1054/*
1055 * Only used with MSI-X
1056 */
1057int
1058virtio_pci_queue_intr(void *arg)
1059{
1060	struct virtqueue *vq = arg;
1061
1062	if (vq->vq_done)
1063		return (vq->vq_done)(vq);
1064	return 0;
1065}
1066
1067int
1068virtio_pci_shared_queue_intr(void *arg)
1069{
1070	struct virtio_softc *vsc = arg;
1071
1072	return virtio_check_vqs(vsc);
1073}
1074
1075/*
1076 * Interrupt handler to be used when polling.
1077 * We cannot use isr here because it is not defined in MSI-X mode.
1078 */
1079int
1080virtio_pci_poll_intr(void *arg)
1081{
1082	struct virtio_pci_softc *sc = arg;
1083	struct virtio_softc *vsc = &sc->sc_sc;
1084	int r = 0;
1085
1086	if (vsc->sc_config_change != NULL)
1087		r = (vsc->sc_config_change)(vsc);
1088
1089	r |= virtio_check_vqs(vsc);
1090
1091	return r;
1092}
1093
1094void
1095virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx)
1096{
1097	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
1098	unsigned offset = 0;
1099	if (vsc->sc_version_1) {
1100		offset = vsc->sc_vqs[idx].vq_notify_off *
1101		    sc->sc_notify_off_multiplier;
1102	}
1103	bus_space_write_2(sc->sc_notify_iot, sc->sc_notify_ioh, offset, idx);
1104}
1105