virtio.c revision 1.63
1/*	$NetBSD: virtio.c,v 1.63 2022/10/31 13:00:34 simonb Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6 * Copyright (c) 2010 Minoura Makoto.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.63 2022/10/31 13:00:34 simonb Exp $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/atomic.h>
37#include <sys/bus.h>
38#include <sys/device.h>
39#include <sys/kmem.h>
40#include <sys/module.h>
41
42#define VIRTIO_PRIVATE
43
44#include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
45#include <dev/pci/virtiovar.h> /* XXX: move to non-pci */
46
47#define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */
48
49/* incomplete list */
50static const char *virtio_device_name[] = {
51	"unknown (0)",			/*  0 */
52	"network",			/*  1 */
53	"block",			/*  2 */
54	"console",			/*  3 */
55	"entropy",			/*  4 */
56	"memory balloon",		/*  5 */
57	"I/O memory",			/*  6 */
58	"remote processor messaging",	/*  7 */
59	"SCSI",				/*  8 */
60	"9P transport",			/*  9 */
61};
62#define NDEVNAMES	__arraycount(virtio_device_name)
63
64static void	virtio_init_vq(struct virtio_softc *,
65		    struct virtqueue *, const bool);
66
67void
68virtio_set_status(struct virtio_softc *sc, int status)
69{
70	sc->sc_ops->set_status(sc, status);
71}
72
73/*
74 * Reset the device.
75 */
76/*
77 * To reset the device to a known state, do following:
78 *	virtio_reset(sc);	     // this will stop the device activity
79 *	<dequeue finished requests>; // virtio_dequeue() still can be called
80 *	<revoke pending requests in the vqs if any>;
81 *	virtio_reinit_start(sc);     // dequeue prohibitted
82 *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
83 *	<some other initialization>;
84 *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
85 * Once attached, feature negotiation can only be allowed after virtio_reset.
86 */
87void
88virtio_reset(struct virtio_softc *sc)
89{
90	virtio_device_reset(sc);
91}
92
93int
94virtio_reinit_start(struct virtio_softc *sc)
95{
96	int i, r;
97
98	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
99	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
100	for (i = 0; i < sc->sc_nvqs; i++) {
101		int n;
102		struct virtqueue *vq = &sc->sc_vqs[i];
103		n = sc->sc_ops->read_queue_size(sc, vq->vq_index);
104		if (n == 0)	/* vq disappeared */
105			continue;
106		if (n != vq->vq_num) {
107			panic("%s: virtqueue size changed, vq index %d\n",
108			    device_xname(sc->sc_dev),
109			    vq->vq_index);
110		}
111		virtio_init_vq(sc, vq, true);
112		sc->sc_ops->setup_queue(sc, vq->vq_index,
113		    vq->vq_dmamap->dm_segs[0].ds_addr);
114	}
115
116	r = sc->sc_ops->setup_interrupts(sc, 1);
117	if (r != 0)
118		goto fail;
119
120	return 0;
121
122fail:
123	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
124
125	return 1;
126}
127
128void
129virtio_reinit_end(struct virtio_softc *sc)
130{
131	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
132}
133
134/*
135 * Feature negotiation.
136 */
137void
138virtio_negotiate_features(struct virtio_softc *sc, uint64_t guest_features)
139{
140	if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
141	    !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
142		guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
143	sc->sc_ops->neg_features(sc, guest_features);
144	if (sc->sc_active_features & VIRTIO_F_RING_INDIRECT_DESC)
145		sc->sc_indirect = true;
146	else
147		sc->sc_indirect = false;
148}
149
150
151/*
152 * Device configuration registers readers/writers
153 */
154#if 0
155#define DPRINTFR(n, fmt, val, index, num) \
156	printf("\n%s (", n); \
157	for (int i = 0; i < num; i++) \
158		printf("%02x ", bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index+i)); \
159	printf(") -> "); printf(fmt, val); printf("\n");
160#define DPRINTFR2(n, fmt, val_s, val_n) \
161	printf("%s ", n); \
162	printf("\n        stream "); printf(fmt, val_s); printf(" norm "); printf(fmt, val_n); printf("\n");
163#else
164#define DPRINTFR(n, fmt, val, index, num)
165#define DPRINTFR2(n, fmt, val_s, val_n)
166#endif
167
168
169uint8_t
170virtio_read_device_config_1(struct virtio_softc *sc, int index)
171{
172	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
173	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
174	uint8_t val;
175
176	val = bus_space_read_1(iot, ioh, index);
177
178	DPRINTFR("read_1", "%02x", val, index, 1);
179	return val;
180}
181
182uint16_t
183virtio_read_device_config_2(struct virtio_softc *sc, int index)
184{
185	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
186	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
187	uint16_t val;
188
189	val = bus_space_read_2(iot, ioh, index);
190	if (BYTE_ORDER != sc->sc_bus_endian)
191		val = bswap16(val);
192
193	DPRINTFR("read_2", "%04x", val, index, 2);
194	DPRINTFR2("read_2", "%04x",
195	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
196		index),
197	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
198	return val;
199}
200
201uint32_t
202virtio_read_device_config_4(struct virtio_softc *sc, int index)
203{
204	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
205	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
206	uint32_t val;
207
208	val = bus_space_read_4(iot, ioh, index);
209	if (BYTE_ORDER != sc->sc_bus_endian)
210		val = bswap32(val);
211
212	DPRINTFR("read_4", "%08x", val, index, 4);
213	DPRINTFR2("read_4", "%08x",
214	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
215		index),
216	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
217	return val;
218}
219
220/*
221 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
222 * considered atomic and no triggers may be connected to reading or writing
223 * it. We access it using two 32 reads. See virtio spec 4.1.3.1.
224 */
225uint64_t
226virtio_read_device_config_8(struct virtio_softc *sc, int index)
227{
228	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
229	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
230	union {
231		uint64_t u64;
232		uint32_t l[2];
233	} v;
234	uint64_t val;
235
236	v.l[0] = bus_space_read_4(iot, ioh, index);
237	v.l[1] = bus_space_read_4(iot, ioh, index + 4);
238	if (sc->sc_bus_endian != sc->sc_struct_endian) {
239		v.l[0] = bswap32(v.l[0]);
240		v.l[1] = bswap32(v.l[1]);
241	}
242	val = v.u64;
243
244	if (BYTE_ORDER != sc->sc_struct_endian)
245		val = bswap64(val);
246
247	DPRINTFR("read_8", "%08"PRIx64, val, index, 8);
248	DPRINTFR2("read_8 low ", "%08x",
249	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
250		index),
251	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
252	DPRINTFR2("read_8 high ", "%08x",
253	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
254		index + 4),
255	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4));
256	return val;
257}
258
259/*
260 * In the older virtio spec, device config registers are host endian. On newer
261 * they are little endian. Some newer devices however explicitly specify their
262 * register to always be little endian. These functions cater for these.
263 */
264uint16_t
265virtio_read_device_config_le_2(struct virtio_softc *sc, int index)
266{
267	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
268	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
269	uint16_t val;
270
271	val = bus_space_read_2(iot, ioh, index);
272	if (sc->sc_bus_endian != LITTLE_ENDIAN)
273		val = bswap16(val);
274
275	DPRINTFR("read_le_2", "%04x", val, index, 2);
276	DPRINTFR2("read_le_2", "%04x",
277	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
278	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
279	return val;
280}
281
282uint32_t
283virtio_read_device_config_le_4(struct virtio_softc *sc, int index)
284{
285	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
286	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
287	uint32_t val;
288
289	val = bus_space_read_4(iot, ioh, index);
290	if (sc->sc_bus_endian != LITTLE_ENDIAN)
291		val = bswap32(val);
292
293	DPRINTFR("read_le_4", "%08x", val, index, 4);
294	DPRINTFR2("read_le_4", "%08x",
295	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
296	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
297	return val;
298}
299
300void
301virtio_write_device_config_1(struct virtio_softc *sc, int index, uint8_t value)
302{
303	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
304	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
305
306	bus_space_write_1(iot, ioh, index, value);
307}
308
309void
310virtio_write_device_config_2(struct virtio_softc *sc, int index,
311    uint16_t value)
312{
313	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
314	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
315
316	if (BYTE_ORDER != sc->sc_bus_endian)
317		value = bswap16(value);
318	bus_space_write_2(iot, ioh, index, value);
319}
320
321void
322virtio_write_device_config_4(struct virtio_softc *sc, int index,
323    uint32_t value)
324{
325	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
326	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
327
328	if (BYTE_ORDER != sc->sc_bus_endian)
329		value = bswap32(value);
330	bus_space_write_4(iot, ioh, index, value);
331}
332
333/*
334 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
335 * considered atomic and no triggers may be connected to reading or writing
336 * it. We access it using two 32 bit writes. For good measure it is stated to
337 * always write lsb first just in case of a hypervisor bug. See See virtio
338 * spec 4.1.3.1.
339 */
340void
341virtio_write_device_config_8(struct virtio_softc *sc, int index,
342    uint64_t value)
343{
344	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
345	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
346	union {
347		uint64_t u64;
348		uint32_t l[2];
349	} v;
350
351	if (BYTE_ORDER != sc->sc_struct_endian)
352		value = bswap64(value);
353
354	v.u64 = value;
355	if (sc->sc_bus_endian != sc->sc_struct_endian) {
356		v.l[0] = bswap32(v.l[0]);
357		v.l[1] = bswap32(v.l[1]);
358	}
359
360	if (sc->sc_struct_endian == LITTLE_ENDIAN) {
361		bus_space_write_4(iot, ioh, index,     v.l[0]);
362		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
363	} else {
364		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
365		bus_space_write_4(iot, ioh, index,     v.l[0]);
366	}
367}
368
369/*
370 * In the older virtio spec, device config registers are host endian. On newer
371 * they are little endian. Some newer devices however explicitly specify their
372 * register to always be little endian. These functions cater for these.
373 */
374void
375virtio_write_device_config_le_2(struct virtio_softc *sc, int index,
376    uint16_t value)
377{
378	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
379	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
380
381	if (sc->sc_bus_endian != LITTLE_ENDIAN)
382		value = bswap16(value);
383	bus_space_write_2(iot, ioh, index, value);
384}
385
386void
387virtio_write_device_config_le_4(struct virtio_softc *sc, int index,
388    uint32_t value)
389{
390	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
391	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
392
393	if (sc->sc_bus_endian != LITTLE_ENDIAN)
394		value = bswap32(value);
395	bus_space_write_4(iot, ioh, index, value);
396}
397
398
399/*
400 * data structures endian helpers
401 */
402uint16_t
403virtio_rw16(struct virtio_softc *sc, uint16_t val)
404{
405	KASSERT(sc);
406	return BYTE_ORDER != sc->sc_struct_endian ? bswap16(val) : val;
407}
408
409uint32_t
410virtio_rw32(struct virtio_softc *sc, uint32_t val)
411{
412	KASSERT(sc);
413	return BYTE_ORDER != sc->sc_struct_endian ? bswap32(val) : val;
414}
415
416uint64_t
417virtio_rw64(struct virtio_softc *sc, uint64_t val)
418{
419	KASSERT(sc);
420	return BYTE_ORDER != sc->sc_struct_endian ? bswap64(val) : val;
421}
422
423
424/*
425 * Interrupt handler.
426 */
427static void
428virtio_soft_intr(void *arg)
429{
430	struct virtio_softc *sc = arg;
431
432	KASSERT(sc->sc_intrhand != NULL);
433
434	(*sc->sc_intrhand)(sc);
435}
436
437/*
438 * dmamap sync operations for a virtqueue.
439 */
440static inline void
441vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
442{
443
444	/* availoffset == sizeof(vring_desc) * vq_num */
445	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
446	    ops);
447}
448
449static inline void
450vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
451{
452	uint16_t hdrlen = offsetof(struct vring_avail, ring);
453	size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
454	size_t usedlen = 0;
455
456	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
457		usedlen = sizeof(uint16_t);
458	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
459	    vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
460}
461
462static inline void
463vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
464{
465	uint16_t hdrlen = offsetof(struct vring_avail, ring);
466
467	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
468	    vq->vq_availoffset, hdrlen, ops);
469}
470
471static inline void
472vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
473{
474	uint16_t hdrlen = offsetof(struct vring_avail, ring);
475	size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
476
477	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
478	    vq->vq_availoffset + hdrlen, payloadlen, ops);
479}
480
481static inline void
482vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
483{
484	uint16_t hdrlen = offsetof(struct vring_avail, ring);
485	size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
486	size_t usedlen = sizeof(uint16_t);
487
488	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
489		return;
490	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
491	    vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
492}
493
494static inline void
495vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
496{
497	uint16_t hdrlen = offsetof(struct vring_used, ring);
498	size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
499	size_t availlen = 0;
500
501	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
502		availlen = sizeof(uint16_t);
503	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
504	    vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
505}
506
507static inline void
508vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
509{
510	uint16_t hdrlen = offsetof(struct vring_used, ring);
511
512	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
513	    vq->vq_usedoffset, hdrlen, ops);
514}
515
516static inline void
517vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
518{
519	uint16_t hdrlen = offsetof(struct vring_used, ring);
520	size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
521
522	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
523	    vq->vq_usedoffset + hdrlen, payloadlen, ops);
524}
525
526static inline void
527vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
528{
529	uint16_t hdrlen = offsetof(struct vring_used, ring);
530	size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
531	size_t availlen = sizeof(uint16_t);
532
533	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
534		return;
535	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
536	    vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
537}
538
539static inline void
540vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
541    int ops)
542{
543	int offset = vq->vq_indirectoffset +
544	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
545
546	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
547	    offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
548}
549
550bool
551virtio_vq_is_enqueued(struct virtio_softc *sc, struct virtqueue *vq)
552{
553
554	if (vq->vq_queued) {
555		vq->vq_queued = 0;
556		vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
557	}
558
559	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
560	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
561		return 0;
562	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
563	return 1;
564}
565
566/*
567 * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
568 * and calls (*vq_done)() if some entries are consumed.
569 *
570 * Can be used as sc_intrhand.
571 */
572int
573virtio_vq_intr(struct virtio_softc *sc)
574{
575	struct virtqueue *vq;
576	int i, r = 0;
577
578	for (i = 0; i < sc->sc_nvqs; i++) {
579		vq = &sc->sc_vqs[i];
580		if (virtio_vq_is_enqueued(sc, vq) == 1) {
581			if (vq->vq_done)
582				r |= (*vq->vq_done)(vq);
583		}
584	}
585
586	return r;
587}
588
589int
590virtio_vq_intrhand(struct virtio_softc *sc)
591{
592	struct virtqueue *vq;
593	int i, r = 0;
594
595	for (i = 0; i < sc->sc_nvqs; i++) {
596		vq = &sc->sc_vqs[i];
597		r |= (*vq->vq_intrhand)(vq->vq_intrhand_arg);
598	}
599
600	return r;
601}
602
603
604/*
605 * Increase the event index in order to delay interrupts.
606 */
607int
608virtio_postpone_intr(struct virtio_softc *sc, struct virtqueue *vq,
609    uint16_t nslots)
610{
611	uint16_t	idx, nused;
612
613	idx = vq->vq_used_idx + nslots;
614
615	/* set the new event index: avail_ring->used_event = idx */
616	*vq->vq_used_event = virtio_rw16(sc, idx);
617	vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
618	vq->vq_queued++;
619
620	nused = (uint16_t)
621	    (virtio_rw16(sc, vq->vq_used->idx) - vq->vq_used_idx);
622	KASSERT(nused <= vq->vq_num);
623
624	return nslots < nused;
625}
626
627/*
628 * Postpone interrupt until 3/4 of the available descriptors have been
629 * consumed.
630 */
631int
632virtio_postpone_intr_smart(struct virtio_softc *sc, struct virtqueue *vq)
633{
634	uint16_t	nslots;
635
636	nslots = (uint16_t)
637	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx) * 3 / 4;
638
639	return virtio_postpone_intr(sc, vq, nslots);
640}
641
642/*
643 * Postpone interrupt until all of the available descriptors have been
644 * consumed.
645 */
646int
647virtio_postpone_intr_far(struct virtio_softc *sc, struct virtqueue *vq)
648{
649	uint16_t	nslots;
650
651	nslots = (uint16_t)
652	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx);
653
654	return virtio_postpone_intr(sc, vq, nslots);
655}
656
657/*
658 * Start/stop vq interrupt.  No guarantee.
659 */
660void
661virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
662{
663
664	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
665		/*
666		 * No way to disable the interrupt completely with
667		 * RingEventIdx. Instead advance used_event by half the
668		 * possible value. This won't happen soon and is far enough in
669		 * the past to not trigger a spurios interrupt.
670		 */
671		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
672		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
673	} else {
674		vq->vq_avail->flags |=
675		    virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
676		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
677	}
678	vq->vq_queued++;
679}
680
681int
682virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
683{
684
685	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
686		/*
687		 * If event index feature is negotiated, enabling interrupts
688		 * is done through setting the latest consumed index in the
689		 * used_event field
690		 */
691		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
692		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
693	} else {
694		vq->vq_avail->flags &=
695		    ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
696		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
697	}
698	vq->vq_queued++;
699
700	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
701	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
702		return 0;
703	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
704	return 1;
705}
706
707/*
708 * Initialize vq structure.
709 */
710static void
711virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq,
712    const bool reinit)
713{
714	int i, j;
715	int vq_size = vq->vq_num;
716
717	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
718
719	/* build the indirect descriptor chain */
720	if (vq->vq_indirect != NULL) {
721		struct vring_desc *vd;
722
723		for (i = 0; i < vq_size; i++) {
724			vd = vq->vq_indirect;
725			vd += vq->vq_maxnsegs * i;
726			for (j = 0; j < vq->vq_maxnsegs - 1; j++) {
727				vd[j].next = virtio_rw16(sc, j + 1);
728			}
729		}
730	}
731
732	/* free slot management */
733	SIMPLEQ_INIT(&vq->vq_freelist);
734	for (i = 0; i < vq_size; i++) {
735		SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, &vq->vq_entries[i],
736		    qe_list);
737		vq->vq_entries[i].qe_index = i;
738	}
739	if (!reinit)
740		mutex_init(&vq->vq_freelist_lock, MUTEX_SPIN, sc->sc_ipl);
741
742	/* enqueue/dequeue status */
743	vq->vq_avail_idx = 0;
744	vq->vq_used_idx = 0;
745	vq->vq_queued = 0;
746	if (!reinit) {
747		mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
748		mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
749	}
750	vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
751	vq->vq_queued++;
752}
753
754/*
755 * Allocate/free a vq.
756 */
757int
758virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
759    int maxsegsize, int maxnsegs, const char *name)
760{
761	int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
762	int rsegs, r, hdrlen;
763#define VIRTQUEUE_ALIGN(n)	roundup(n, VIRTIO_PAGE_SIZE)
764
765	/* Make sure callers allocate vqs in order */
766	KASSERT(sc->sc_nvqs == index);
767
768	memset(vq, 0, sizeof(*vq));
769
770	vq_size = sc->sc_ops->read_queue_size(sc, index);
771	if (vq_size == 0) {
772		aprint_error_dev(sc->sc_dev,
773		    "virtqueue not exist, index %d for %s\n",
774		    index, name);
775		goto err;
776	}
777
778	hdrlen = sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX ? 3 : 2;
779
780	/* allocsize1: descriptor table + avail ring + pad */
781	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
782	    + sizeof(uint16_t) * (hdrlen + vq_size));
783	/* allocsize2: used ring + pad */
784	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
785	    + sizeof(struct vring_used_elem) * vq_size);
786	/* allocsize3: indirect table */
787	if (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT)
788		allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
789	else
790		allocsize3 = 0;
791	allocsize = allocsize1 + allocsize2 + allocsize3;
792
793	/* alloc and map the memory */
794	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
795	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK);
796	if (r != 0) {
797		aprint_error_dev(sc->sc_dev,
798		    "virtqueue %d for %s allocation failed, "
799		    "error code %d\n", index, name, r);
800		goto err;
801	}
802	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], rsegs, allocsize,
803	    &vq->vq_vaddr, BUS_DMA_WAITOK);
804	if (r != 0) {
805		aprint_error_dev(sc->sc_dev,
806		    "virtqueue %d for %s map failed, "
807		    "error code %d\n", index, name, r);
808		goto err;
809	}
810	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
811	    BUS_DMA_WAITOK, &vq->vq_dmamap);
812	if (r != 0) {
813		aprint_error_dev(sc->sc_dev,
814		    "virtqueue %d for %s dmamap creation failed, "
815		    "error code %d\n", index, name, r);
816		goto err;
817	}
818	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
819	    vq->vq_vaddr, allocsize, NULL, BUS_DMA_WAITOK);
820	if (r != 0) {
821		aprint_error_dev(sc->sc_dev,
822		    "virtqueue %d for %s dmamap load failed, "
823		    "error code %d\n", index, name, r);
824		goto err;
825	}
826
827	/* remember addresses and offsets for later use */
828	vq->vq_owner = sc;
829	vq->vq_num = vq_size;
830	vq->vq_index = index;
831	vq->vq_desc = vq->vq_vaddr;
832	vq->vq_availoffset = sizeof(struct vring_desc) * vq_size;
833	vq->vq_avail = (void *)(((char *)vq->vq_desc) + vq->vq_availoffset);
834	vq->vq_used_event = (uint16_t *)((char *)vq->vq_avail +
835	    offsetof(struct vring_avail, ring[vq->vq_num]));
836	vq->vq_usedoffset = allocsize1;
837	vq->vq_used = (void *)(((char *)vq->vq_desc) + vq->vq_usedoffset);
838	vq->vq_avail_event = (uint16_t *)((char *)vq->vq_used +
839	    offsetof(struct vring_used, ring[vq->vq_num]));
840
841	if (allocsize3 > 0) {
842		vq->vq_indirectoffset = allocsize1 + allocsize2;
843		vq->vq_indirect = (void *)(((char *)vq->vq_desc)
844		    + vq->vq_indirectoffset);
845	}
846	vq->vq_bytesize = allocsize;
847	vq->vq_maxsegsize = maxsegsize;
848	vq->vq_maxnsegs = maxnsegs;
849
850	/* free slot management */
851	vq->vq_entries = kmem_zalloc(sizeof(struct vq_entry) * vq_size,
852	    KM_SLEEP);
853	virtio_init_vq(sc, vq, false);
854
855	/* set the vq address */
856	sc->sc_ops->setup_queue(sc, index,
857	    vq->vq_dmamap->dm_segs[0].ds_addr);
858
859	aprint_verbose_dev(sc->sc_dev,
860	    "allocated %u byte for virtqueue %d for %s, size %d\n",
861	    allocsize, index, name, vq_size);
862	if (allocsize3 > 0)
863		aprint_verbose_dev(sc->sc_dev,
864		    "using %d byte (%d entries) indirect descriptors\n",
865		    allocsize3, maxnsegs * vq_size);
866
867	sc->sc_nvqs++;
868
869	return 0;
870
871err:
872	sc->sc_ops->setup_queue(sc, index, 0);
873	if (vq->vq_dmamap)
874		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
875	if (vq->vq_vaddr)
876		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
877	if (vq->vq_segs[0].ds_addr)
878		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
879	memset(vq, 0, sizeof(*vq));
880
881	return -1;
882}
883
884int
885virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
886{
887	struct vq_entry *qe;
888	int i = 0;
889
890	/* device must be already deactivated */
891	/* confirm the vq is empty */
892	SIMPLEQ_FOREACH(qe, &vq->vq_freelist, qe_list) {
893		i++;
894	}
895	if (i != vq->vq_num) {
896		printf("%s: freeing non-empty vq, index %d\n",
897		    device_xname(sc->sc_dev), vq->vq_index);
898		return EBUSY;
899	}
900
901	/* tell device that there's no virtqueue any longer */
902	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
903
904	vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
905
906	kmem_free(vq->vq_entries, sizeof(*vq->vq_entries) * vq->vq_num);
907	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
908	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
909	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
910	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
911	mutex_destroy(&vq->vq_freelist_lock);
912	mutex_destroy(&vq->vq_uring_lock);
913	mutex_destroy(&vq->vq_aring_lock);
914	memset(vq, 0, sizeof(*vq));
915
916	sc->sc_nvqs--;
917
918	return 0;
919}
920
921/*
922 * Free descriptor management.
923 */
924static struct vq_entry *
925vq_alloc_entry(struct virtqueue *vq)
926{
927	struct vq_entry *qe;
928
929	mutex_enter(&vq->vq_freelist_lock);
930	if (SIMPLEQ_EMPTY(&vq->vq_freelist)) {
931		mutex_exit(&vq->vq_freelist_lock);
932		return NULL;
933	}
934	qe = SIMPLEQ_FIRST(&vq->vq_freelist);
935	SIMPLEQ_REMOVE_HEAD(&vq->vq_freelist, qe_list);
936	mutex_exit(&vq->vq_freelist_lock);
937
938	return qe;
939}
940
941static void
942vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
943{
944	mutex_enter(&vq->vq_freelist_lock);
945	SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
946	mutex_exit(&vq->vq_freelist_lock);
947
948	return;
949}
950
951/*
952 * Enqueue several dmamaps as a single request.
953 */
954/*
955 * Typical usage:
956 *  <queue size> number of followings are stored in arrays
957 *  - command blocks (in dmamem) should be pre-allocated and mapped
958 *  - dmamaps for command blocks should be pre-allocated and loaded
959 *  - dmamaps for payload should be pre-allocated
960 *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
961 *	if (r)		// currently 0 or EAGAIN
962 *		return r;
963 *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
964 *	if (r) {
965 *		virtio_enqueue_abort(sc, vq, slot);
966 *		return r;
967 *	}
968 *	r = virtio_enqueue_reserve(sc, vq, slot,
969 *	    dmamap_payload[slot]->dm_nsegs + 1);
970 *							// ^ +1 for command
971 *	if (r) {	// currently 0 or EAGAIN
972 *		bus_dmamap_unload(dmat, dmamap_payload[slot]);
973 *		return r;				// do not call abort()
974 *	}
975 *	<setup and prepare commands>
976 *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
977 *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
978 *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
979 *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
980 *	virtio_enqueue_commit(sc, vq, slot, true);
981 */
982
983/*
984 * enqueue_prep: allocate a slot number
985 */
986int
987virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
988{
989	struct vq_entry *qe1;
990
991	KASSERT(slotp != NULL);
992
993	qe1 = vq_alloc_entry(vq);
994	if (qe1 == NULL)
995		return EAGAIN;
996	/* next slot is not allocated yet */
997	qe1->qe_next = -1;
998	*slotp = qe1->qe_index;
999
1000	return 0;
1001}
1002
1003/*
1004 * enqueue_reserve: allocate remaining slots and build the descriptor chain.
1005 */
1006int
1007virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
1008    int slot, int nsegs)
1009{
1010	int indirect;
1011	struct vq_entry *qe1 = &vq->vq_entries[slot];
1012
1013	KASSERT(qe1->qe_next == -1);
1014	KASSERT(1 <= nsegs && nsegs <= vq->vq_num);
1015
1016	if ((vq->vq_indirect != NULL) &&
1017	    (nsegs >= MINSEG_INDIRECT) &&
1018	    (nsegs <= vq->vq_maxnsegs))
1019		indirect = 1;
1020	else
1021		indirect = 0;
1022	qe1->qe_indirect = indirect;
1023
1024	if (indirect) {
1025		struct vring_desc *vd;
1026		uint64_t addr;
1027		int i;
1028
1029		vd = &vq->vq_desc[qe1->qe_index];
1030		addr = vq->vq_dmamap->dm_segs[0].ds_addr
1031		    + vq->vq_indirectoffset;
1032		addr += sizeof(struct vring_desc)
1033		    * vq->vq_maxnsegs * qe1->qe_index;
1034		vd->addr  = virtio_rw64(sc, addr);
1035		vd->len   = virtio_rw32(sc, sizeof(struct vring_desc) * nsegs);
1036		vd->flags = virtio_rw16(sc, VRING_DESC_F_INDIRECT);
1037
1038		vd = vq->vq_indirect;
1039		vd += vq->vq_maxnsegs * qe1->qe_index;
1040		qe1->qe_desc_base = vd;
1041
1042		for (i = 0; i < nsegs - 1; i++) {
1043			vd[i].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1044		}
1045		vd[i].flags  = virtio_rw16(sc, 0);
1046		qe1->qe_next = 0;
1047
1048		return 0;
1049	} else {
1050		struct vring_desc *vd;
1051		struct vq_entry *qe;
1052		int i, s;
1053
1054		vd = &vq->vq_desc[0];
1055		qe1->qe_desc_base = vd;
1056		qe1->qe_next = qe1->qe_index;
1057		s = slot;
1058		for (i = 0; i < nsegs - 1; i++) {
1059			qe = vq_alloc_entry(vq);
1060			if (qe == NULL) {
1061				vd[s].flags = virtio_rw16(sc, 0);
1062				virtio_enqueue_abort(sc, vq, slot);
1063				return EAGAIN;
1064			}
1065			vd[s].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1066			vd[s].next  = virtio_rw16(sc, qe->qe_index);
1067			s = qe->qe_index;
1068		}
1069		vd[s].flags = virtio_rw16(sc, 0);
1070
1071		return 0;
1072	}
1073}
1074
1075/*
1076 * enqueue: enqueue a single dmamap.
1077 */
1078int
1079virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1080    bus_dmamap_t dmamap, bool write)
1081{
1082	struct vq_entry *qe1 = &vq->vq_entries[slot];
1083	struct vring_desc *vd = qe1->qe_desc_base;
1084	int i;
1085	int s = qe1->qe_next;
1086
1087	KASSERT(s >= 0);
1088	KASSERT(dmamap->dm_nsegs > 0);
1089
1090	for (i = 0; i < dmamap->dm_nsegs; i++) {
1091		vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[i].ds_addr);
1092		vd[s].len  = virtio_rw32(sc, dmamap->dm_segs[i].ds_len);
1093		if (!write)
1094			vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1095		s = virtio_rw16(sc, vd[s].next);
1096	}
1097	qe1->qe_next = s;
1098
1099	return 0;
1100}
1101
1102int
1103virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1104    bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
1105    bool write)
1106{
1107	struct vq_entry *qe1 = &vq->vq_entries[slot];
1108	struct vring_desc *vd = qe1->qe_desc_base;
1109	int s = qe1->qe_next;
1110
1111	KASSERT(s >= 0);
1112	KASSERT(dmamap->dm_nsegs == 1); /* XXX */
1113	KASSERT(dmamap->dm_segs[0].ds_len > start);
1114	KASSERT(dmamap->dm_segs[0].ds_len >= start + len);
1115
1116	vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[0].ds_addr + start);
1117	vd[s].len  = virtio_rw32(sc, len);
1118	if (!write)
1119		vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1120	qe1->qe_next = virtio_rw16(sc, vd[s].next);
1121
1122	return 0;
1123}
1124
1125/*
1126 * enqueue_commit: add it to the aring.
1127 */
1128int
1129virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1130    bool notifynow)
1131{
1132	struct vq_entry *qe1;
1133
1134	if (slot < 0) {
1135		mutex_enter(&vq->vq_aring_lock);
1136		goto notify;
1137	}
1138	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
1139	qe1 = &vq->vq_entries[slot];
1140	if (qe1->qe_indirect)
1141		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
1142	mutex_enter(&vq->vq_aring_lock);
1143	vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
1144	    virtio_rw16(sc, slot);
1145
1146notify:
1147	if (notifynow) {
1148		uint16_t o, n, t;
1149		uint16_t flags;
1150
1151		o = virtio_rw16(sc, vq->vq_avail->idx);
1152		n = vq->vq_avail_idx;
1153
1154		/*
1155		 * Prepare for `device->CPU' (host->guest) transfer
1156		 * into the buffer.  This must happen before we commit
1157		 * the vq->vq_avail->idx update to ensure we're not
1158		 * still using the buffer in case program-prior loads
1159		 * or stores in it get delayed past the store to
1160		 * vq->vq_avail->idx.
1161		 */
1162		vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
1163
1164		/* ensure payload is published, then avail idx */
1165		vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
1166		vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
1167		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
1168		vq->vq_queued++;
1169
1170		if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
1171			vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
1172			t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
1173			if ((uint16_t) (n - t) < (uint16_t) (n - o))
1174				sc->sc_ops->kick(sc, vq->vq_index);
1175		} else {
1176			vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
1177			flags = virtio_rw16(sc, vq->vq_used->flags);
1178			if (!(flags & VRING_USED_F_NO_NOTIFY))
1179				sc->sc_ops->kick(sc, vq->vq_index);
1180		}
1181	}
1182	mutex_exit(&vq->vq_aring_lock);
1183
1184	return 0;
1185}
1186
1187/*
1188 * enqueue_abort: rollback.
1189 */
1190int
1191virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1192{
1193	struct vq_entry *qe = &vq->vq_entries[slot];
1194	struct vring_desc *vd;
1195	int s;
1196
1197	if (qe->qe_next < 0) {
1198		vq_free_entry(vq, qe);
1199		return 0;
1200	}
1201
1202	s = slot;
1203	vd = &vq->vq_desc[0];
1204	while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
1205		s = virtio_rw16(sc, vd[s].next);
1206		vq_free_entry(vq, qe);
1207		qe = &vq->vq_entries[s];
1208	}
1209	vq_free_entry(vq, qe);
1210	return 0;
1211}
1212
1213/*
1214 * Dequeue a request.
1215 */
1216/*
1217 * dequeue: dequeue a request from uring; dmamap_sync for uring is
1218 *	    already done in the interrupt handler.
1219 */
1220int
1221virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
1222    int *slotp, int *lenp)
1223{
1224	uint16_t slot, usedidx;
1225	struct vq_entry *qe;
1226
1227	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
1228		return ENOENT;
1229	mutex_enter(&vq->vq_uring_lock);
1230	usedidx = vq->vq_used_idx++;
1231	mutex_exit(&vq->vq_uring_lock);
1232	usedidx %= vq->vq_num;
1233	slot = virtio_rw32(sc, vq->vq_used->ring[usedidx].id);
1234	qe = &vq->vq_entries[slot];
1235
1236	if (qe->qe_indirect)
1237		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
1238
1239	if (slotp)
1240		*slotp = slot;
1241	if (lenp)
1242		*lenp = virtio_rw32(sc, vq->vq_used->ring[usedidx].len);
1243
1244	return 0;
1245}
1246
1247/*
1248 * dequeue_commit: complete dequeue; the slot is recycled for future use.
1249 *                 if you forget to call this the slot will be leaked.
1250 */
1251int
1252virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1253{
1254	struct vq_entry *qe = &vq->vq_entries[slot];
1255	struct vring_desc *vd = &vq->vq_desc[0];
1256	int s = slot;
1257
1258	while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
1259		s = virtio_rw16(sc, vd[s].next);
1260		vq_free_entry(vq, qe);
1261		qe = &vq->vq_entries[s];
1262	}
1263	vq_free_entry(vq, qe);
1264
1265	return 0;
1266}
1267
1268/*
1269 * Attach a child, fill all the members.
1270 */
1271void
1272virtio_child_attach_start(struct virtio_softc *sc, device_t child, int ipl,
1273    struct virtqueue *vqs,
1274    virtio_callback config_change,
1275    virtio_callback intr_hand,
1276    int req_flags, int req_features, const char *feat_bits)
1277{
1278	char buf[1024];
1279
1280	sc->sc_child = child;
1281	sc->sc_ipl = ipl;
1282	sc->sc_vqs = vqs;
1283	sc->sc_config_change = config_change;
1284	sc->sc_intrhand = intr_hand;
1285	sc->sc_flags = req_flags;
1286
1287	virtio_negotiate_features(sc, req_features);
1288	snprintb(buf, sizeof(buf), feat_bits, sc->sc_active_features);
1289	aprint_normal(": features: %s\n", buf);
1290	aprint_naive("\n");
1291}
1292
1293void
1294virtio_child_attach_set_vqs(struct virtio_softc *sc,
1295    struct virtqueue *vqs, int nvq_pairs)
1296{
1297
1298	KASSERT(nvq_pairs == 1 ||
1299	    (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) == 0);
1300	if (nvq_pairs > 1)
1301		sc->sc_child_mq = true;
1302
1303	sc->sc_vqs = vqs;
1304}
1305
1306int
1307virtio_child_attach_finish(struct virtio_softc *sc)
1308{
1309	int r;
1310
1311	sc->sc_finished_called = true;
1312	r = sc->sc_ops->alloc_interrupts(sc);
1313	if (r != 0) {
1314		aprint_error_dev(sc->sc_dev,
1315		    "failed to allocate interrupts\n");
1316		goto fail;
1317	}
1318
1319	r = sc->sc_ops->setup_interrupts(sc, 0);
1320	if (r != 0) {
1321		aprint_error_dev(sc->sc_dev, "failed to setup interrupts\n");
1322		goto fail;
1323	}
1324
1325	KASSERT(sc->sc_soft_ih == NULL);
1326	if (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) {
1327		u_int flags = SOFTINT_NET;
1328		if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
1329			flags |= SOFTINT_MPSAFE;
1330
1331		sc->sc_soft_ih = softint_establish(flags, virtio_soft_intr,
1332		    sc);
1333		if (sc->sc_soft_ih == NULL) {
1334			sc->sc_ops->free_interrupts(sc);
1335			aprint_error_dev(sc->sc_dev,
1336			    "failed to establish soft interrupt\n");
1337			goto fail;
1338		}
1339	}
1340
1341	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1342	return 0;
1343
1344fail:
1345	if (sc->sc_soft_ih) {
1346		softint_disestablish(sc->sc_soft_ih);
1347		sc->sc_soft_ih = NULL;
1348	}
1349
1350	sc->sc_ops->free_interrupts(sc);
1351
1352	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1353	return 1;
1354}
1355
1356void
1357virtio_child_detach(struct virtio_softc *sc)
1358{
1359	sc->sc_child = NULL;
1360	sc->sc_vqs = NULL;
1361
1362	virtio_device_reset(sc);
1363
1364	sc->sc_ops->free_interrupts(sc);
1365
1366	if (sc->sc_soft_ih) {
1367		softint_disestablish(sc->sc_soft_ih);
1368		sc->sc_soft_ih = NULL;
1369	}
1370}
1371
1372void
1373virtio_child_attach_failed(struct virtio_softc *sc)
1374{
1375	virtio_child_detach(sc);
1376
1377	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1378
1379	sc->sc_child = VIRTIO_CHILD_FAILED;
1380}
1381
1382bus_dma_tag_t
1383virtio_dmat(struct virtio_softc *sc)
1384{
1385	return sc->sc_dmat;
1386}
1387
1388device_t
1389virtio_child(struct virtio_softc *sc)
1390{
1391	return sc->sc_child;
1392}
1393
1394int
1395virtio_intrhand(struct virtio_softc *sc)
1396{
1397	return (*sc->sc_intrhand)(sc);
1398}
1399
1400uint64_t
1401virtio_features(struct virtio_softc *sc)
1402{
1403	return sc->sc_active_features;
1404}
1405
1406int
1407virtio_attach_failed(struct virtio_softc *sc)
1408{
1409	device_t self = sc->sc_dev;
1410
1411	/* no error if its not connected, but its failed */
1412	if (sc->sc_childdevid == 0)
1413		return 1;
1414
1415	if (sc->sc_child == NULL) {
1416		aprint_error_dev(self,
1417		    "no matching child driver; not configured\n");
1418		return 1;
1419	}
1420
1421	if (sc->sc_child == VIRTIO_CHILD_FAILED) {
1422		aprint_error_dev(self, "virtio configuration failed\n");
1423		return 1;
1424	}
1425
1426	/* sanity check */
1427	if (!sc->sc_finished_called) {
1428		aprint_error_dev(self, "virtio internal error, child driver "
1429		    "signaled OK but didn't initialize interrupts\n");
1430		return 1;
1431	}
1432
1433	return 0;
1434}
1435
1436void
1437virtio_print_device_type(device_t self, int id, int revision)
1438{
1439	aprint_normal_dev(self, "%s device (id %d, rev. 0x%02x)\n",
1440	    (id < NDEVNAMES ? virtio_device_name[id] : "Unknown"),
1441	    id,
1442	    revision);
1443}
1444
1445
1446MODULE(MODULE_CLASS_DRIVER, virtio, NULL);
1447
1448#ifdef _MODULE
1449#include "ioconf.c"
1450#endif
1451
1452static int
1453virtio_modcmd(modcmd_t cmd, void *opaque)
1454{
1455	int error = 0;
1456
1457#ifdef _MODULE
1458	switch (cmd) {
1459	case MODULE_CMD_INIT:
1460		error = config_init_component(cfdriver_ioconf_virtio,
1461		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1462		break;
1463	case MODULE_CMD_FINI:
1464		error = config_fini_component(cfdriver_ioconf_virtio,
1465		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1466		break;
1467	default:
1468		error = ENOTTY;
1469		break;
1470	}
1471#endif
1472
1473	return error;
1474}
1475