virtio.c revision 1.69
1/*	$NetBSD: virtio.c,v 1.69 2023/03/25 02:59:23 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6 * Copyright (c) 2010 Minoura Makoto.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.69 2023/03/25 02:59:23 yamaguchi Exp $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/atomic.h>
37#include <sys/bus.h>
38#include <sys/device.h>
39#include <sys/kmem.h>
40#include <sys/module.h>
41
42#define VIRTIO_PRIVATE
43
44#include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
45#include <dev/pci/virtiovar.h> /* XXX: move to non-pci */
46
47#define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */
48
49/* incomplete list */
50static const char *virtio_device_name[] = {
51	"unknown (0)",			/*  0 */
52	"network",			/*  1 */
53	"block",			/*  2 */
54	"console",			/*  3 */
55	"entropy",			/*  4 */
56	"memory balloon",		/*  5 */
57	"I/O memory",			/*  6 */
58	"remote processor messaging",	/*  7 */
59	"SCSI",				/*  8 */
60	"9P transport",			/*  9 */
61};
62#define NDEVNAMES	__arraycount(virtio_device_name)
63
64static void	virtio_reset_vq(struct virtio_softc *,
65		    struct virtqueue *);
66
67void
68virtio_set_status(struct virtio_softc *sc, int status)
69{
70	sc->sc_ops->set_status(sc, status);
71}
72
73/*
74 * Reset the device.
75 */
76/*
77 * To reset the device to a known state, do following:
78 *	virtio_reset(sc);	     // this will stop the device activity
79 *	<dequeue finished requests>; // virtio_dequeue() still can be called
80 *	<revoke pending requests in the vqs if any>;
81 *	virtio_reinit_start(sc);     // dequeue prohibitted
82 *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
83 *	<some other initialization>;
84 *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
85 * Once attached, feature negotiation can only be allowed after virtio_reset.
86 */
87void
88virtio_reset(struct virtio_softc *sc)
89{
90	virtio_device_reset(sc);
91}
92
93int
94virtio_reinit_start(struct virtio_softc *sc)
95{
96	int i, r;
97
98	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
99	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
100	for (i = 0; i < sc->sc_nvqs; i++) {
101		int n;
102		struct virtqueue *vq = &sc->sc_vqs[i];
103		n = sc->sc_ops->read_queue_size(sc, vq->vq_index);
104		if (n == 0)	/* vq disappeared */
105			continue;
106		if (n != vq->vq_num) {
107			panic("%s: virtqueue size changed, vq index %d\n",
108			    device_xname(sc->sc_dev),
109			    vq->vq_index);
110		}
111		virtio_reset_vq(sc, vq);
112		sc->sc_ops->setup_queue(sc, vq->vq_index,
113		    vq->vq_dmamap->dm_segs[0].ds_addr);
114	}
115
116	r = sc->sc_ops->setup_interrupts(sc, 1);
117	if (r != 0)
118		goto fail;
119
120	return 0;
121
122fail:
123	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
124
125	return 1;
126}
127
128void
129virtio_reinit_end(struct virtio_softc *sc)
130{
131	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
132}
133
134/*
135 * Feature negotiation.
136 */
137void
138virtio_negotiate_features(struct virtio_softc *sc, uint64_t guest_features)
139{
140	if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
141	    !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
142		guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
143	sc->sc_ops->neg_features(sc, guest_features);
144	if (sc->sc_active_features & VIRTIO_F_RING_INDIRECT_DESC)
145		sc->sc_indirect = true;
146	else
147		sc->sc_indirect = false;
148}
149
150
151/*
152 * Device configuration registers readers/writers
153 */
154#if 0
155#define DPRINTFR(n, fmt, val, index, num) \
156	printf("\n%s (", n); \
157	for (int i = 0; i < num; i++) \
158		printf("%02x ", bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index+i)); \
159	printf(") -> "); printf(fmt, val); printf("\n");
160#define DPRINTFR2(n, fmt, val_s, val_n) \
161	printf("%s ", n); \
162	printf("\n        stream "); printf(fmt, val_s); printf(" norm "); printf(fmt, val_n); printf("\n");
163#else
164#define DPRINTFR(n, fmt, val, index, num)
165#define DPRINTFR2(n, fmt, val_s, val_n)
166#endif
167
168
169uint8_t
170virtio_read_device_config_1(struct virtio_softc *sc, int index)
171{
172	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
173	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
174	uint8_t val;
175
176	val = bus_space_read_1(iot, ioh, index);
177
178	DPRINTFR("read_1", "%02x", val, index, 1);
179	return val;
180}
181
182uint16_t
183virtio_read_device_config_2(struct virtio_softc *sc, int index)
184{
185	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
186	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
187	uint16_t val;
188
189	val = bus_space_read_2(iot, ioh, index);
190	if (BYTE_ORDER != sc->sc_bus_endian)
191		val = bswap16(val);
192
193	DPRINTFR("read_2", "%04x", val, index, 2);
194	DPRINTFR2("read_2", "%04x",
195	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
196		index),
197	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
198	return val;
199}
200
201uint32_t
202virtio_read_device_config_4(struct virtio_softc *sc, int index)
203{
204	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
205	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
206	uint32_t val;
207
208	val = bus_space_read_4(iot, ioh, index);
209	if (BYTE_ORDER != sc->sc_bus_endian)
210		val = bswap32(val);
211
212	DPRINTFR("read_4", "%08x", val, index, 4);
213	DPRINTFR2("read_4", "%08x",
214	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
215		index),
216	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
217	return val;
218}
219
220/*
221 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
222 * considered atomic and no triggers may be connected to reading or writing
223 * it. We access it using two 32 reads. See virtio spec 4.1.3.1.
224 */
225uint64_t
226virtio_read_device_config_8(struct virtio_softc *sc, int index)
227{
228	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
229	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
230	union {
231		uint64_t u64;
232		uint32_t l[2];
233	} v;
234	uint64_t val;
235
236	v.l[0] = bus_space_read_4(iot, ioh, index);
237	v.l[1] = bus_space_read_4(iot, ioh, index + 4);
238	if (sc->sc_bus_endian != sc->sc_struct_endian) {
239		v.l[0] = bswap32(v.l[0]);
240		v.l[1] = bswap32(v.l[1]);
241	}
242	val = v.u64;
243
244	if (BYTE_ORDER != sc->sc_struct_endian)
245		val = bswap64(val);
246
247	DPRINTFR("read_8", "%08"PRIx64, val, index, 8);
248	DPRINTFR2("read_8 low ", "%08x",
249	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
250		index),
251	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
252	DPRINTFR2("read_8 high ", "%08x",
253	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
254		index + 4),
255	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4));
256	return val;
257}
258
259/*
260 * In the older virtio spec, device config registers are host endian. On newer
261 * they are little endian. Some newer devices however explicitly specify their
262 * register to always be little endian. These functions cater for these.
263 */
264uint16_t
265virtio_read_device_config_le_2(struct virtio_softc *sc, int index)
266{
267	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
268	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
269	uint16_t val;
270
271	val = bus_space_read_2(iot, ioh, index);
272	if (sc->sc_bus_endian != LITTLE_ENDIAN)
273		val = bswap16(val);
274
275	DPRINTFR("read_le_2", "%04x", val, index, 2);
276	DPRINTFR2("read_le_2", "%04x",
277	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
278	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
279	return val;
280}
281
282uint32_t
283virtio_read_device_config_le_4(struct virtio_softc *sc, int index)
284{
285	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
286	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
287	uint32_t val;
288
289	val = bus_space_read_4(iot, ioh, index);
290	if (sc->sc_bus_endian != LITTLE_ENDIAN)
291		val = bswap32(val);
292
293	DPRINTFR("read_le_4", "%08x", val, index, 4);
294	DPRINTFR2("read_le_4", "%08x",
295	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
296	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
297	return val;
298}
299
300void
301virtio_write_device_config_1(struct virtio_softc *sc, int index, uint8_t value)
302{
303	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
304	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
305
306	bus_space_write_1(iot, ioh, index, value);
307}
308
309void
310virtio_write_device_config_2(struct virtio_softc *sc, int index,
311    uint16_t value)
312{
313	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
314	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
315
316	if (BYTE_ORDER != sc->sc_bus_endian)
317		value = bswap16(value);
318	bus_space_write_2(iot, ioh, index, value);
319}
320
321void
322virtio_write_device_config_4(struct virtio_softc *sc, int index,
323    uint32_t value)
324{
325	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
326	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
327
328	if (BYTE_ORDER != sc->sc_bus_endian)
329		value = bswap32(value);
330	bus_space_write_4(iot, ioh, index, value);
331}
332
333/*
334 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
335 * considered atomic and no triggers may be connected to reading or writing
336 * it. We access it using two 32 bit writes. For good measure it is stated to
337 * always write lsb first just in case of a hypervisor bug. See See virtio
338 * spec 4.1.3.1.
339 */
340void
341virtio_write_device_config_8(struct virtio_softc *sc, int index,
342    uint64_t value)
343{
344	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
345	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
346	union {
347		uint64_t u64;
348		uint32_t l[2];
349	} v;
350
351	if (BYTE_ORDER != sc->sc_struct_endian)
352		value = bswap64(value);
353
354	v.u64 = value;
355	if (sc->sc_bus_endian != sc->sc_struct_endian) {
356		v.l[0] = bswap32(v.l[0]);
357		v.l[1] = bswap32(v.l[1]);
358	}
359
360	if (sc->sc_struct_endian == LITTLE_ENDIAN) {
361		bus_space_write_4(iot, ioh, index,     v.l[0]);
362		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
363	} else {
364		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
365		bus_space_write_4(iot, ioh, index,     v.l[0]);
366	}
367}
368
369/*
370 * In the older virtio spec, device config registers are host endian. On newer
371 * they are little endian. Some newer devices however explicitly specify their
372 * register to always be little endian. These functions cater for these.
373 */
374void
375virtio_write_device_config_le_2(struct virtio_softc *sc, int index,
376    uint16_t value)
377{
378	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
379	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
380
381	if (sc->sc_bus_endian != LITTLE_ENDIAN)
382		value = bswap16(value);
383	bus_space_write_2(iot, ioh, index, value);
384}
385
386void
387virtio_write_device_config_le_4(struct virtio_softc *sc, int index,
388    uint32_t value)
389{
390	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
391	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
392
393	if (sc->sc_bus_endian != LITTLE_ENDIAN)
394		value = bswap32(value);
395	bus_space_write_4(iot, ioh, index, value);
396}
397
398
399/*
400 * data structures endian helpers
401 */
402uint16_t
403virtio_rw16(struct virtio_softc *sc, uint16_t val)
404{
405	KASSERT(sc);
406	return BYTE_ORDER != sc->sc_struct_endian ? bswap16(val) : val;
407}
408
409uint32_t
410virtio_rw32(struct virtio_softc *sc, uint32_t val)
411{
412	KASSERT(sc);
413	return BYTE_ORDER != sc->sc_struct_endian ? bswap32(val) : val;
414}
415
416uint64_t
417virtio_rw64(struct virtio_softc *sc, uint64_t val)
418{
419	KASSERT(sc);
420	return BYTE_ORDER != sc->sc_struct_endian ? bswap64(val) : val;
421}
422
423
424/*
425 * Interrupt handler.
426 */
427static void
428virtio_soft_intr(void *arg)
429{
430	struct virtio_softc *sc = arg;
431
432	KASSERT(sc->sc_intrhand != NULL);
433
434	(*sc->sc_intrhand)(sc);
435}
436
437/* set to vq->vq_intrhand in virtio_init_vq_vqdone() */
438static int
439virtio_vq_done(void *xvq)
440{
441	struct virtqueue *vq = xvq;
442
443	return vq->vq_done(vq);
444}
445
446static int
447virtio_vq_intr(struct virtio_softc *sc)
448{
449	struct virtqueue *vq;
450	int i, r = 0;
451
452	for (i = 0; i < sc->sc_nvqs; i++) {
453		vq = &sc->sc_vqs[i];
454		if (virtio_vq_is_enqueued(sc, vq) == 1) {
455			r |= (*vq->vq_intrhand)(vq->vq_intrhand_arg);
456		}
457	}
458
459	return r;
460}
461
462/*
463 * dmamap sync operations for a virtqueue.
464 */
465static inline void
466vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
467{
468
469	/* availoffset == sizeof(vring_desc) * vq_num */
470	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
471	    ops);
472}
473
474static inline void
475vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
476{
477	uint16_t hdrlen = offsetof(struct vring_avail, ring);
478	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
479	size_t usedlen = 0;
480
481	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
482		usedlen = sizeof(uint16_t);
483	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
484	    vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
485}
486
487static inline void
488vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
489{
490	uint16_t hdrlen = offsetof(struct vring_avail, ring);
491
492	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
493	    vq->vq_availoffset, hdrlen, ops);
494}
495
496static inline void
497vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
498{
499	uint16_t hdrlen = offsetof(struct vring_avail, ring);
500	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
501
502	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
503	    vq->vq_availoffset + hdrlen, payloadlen, ops);
504}
505
506static inline void
507vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
508{
509	uint16_t hdrlen = offsetof(struct vring_avail, ring);
510	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
511	size_t usedlen = sizeof(uint16_t);
512
513	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
514		return;
515	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
516	    vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
517}
518
519static inline void
520vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
521{
522	uint16_t hdrlen = offsetof(struct vring_used, ring);
523	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
524	size_t availlen = 0;
525
526	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
527		availlen = sizeof(uint16_t);
528	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
529	    vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
530}
531
532static inline void
533vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
534{
535	uint16_t hdrlen = offsetof(struct vring_used, ring);
536
537	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
538	    vq->vq_usedoffset, hdrlen, ops);
539}
540
541static inline void
542vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
543{
544	uint16_t hdrlen = offsetof(struct vring_used, ring);
545	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
546
547	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
548	    vq->vq_usedoffset + hdrlen, payloadlen, ops);
549}
550
551static inline void
552vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
553{
554	uint16_t hdrlen = offsetof(struct vring_used, ring);
555	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
556	size_t availlen = sizeof(uint16_t);
557
558	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
559		return;
560	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
561	    vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
562}
563
564static inline void
565vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
566    int ops)
567{
568	int offset = vq->vq_indirectoffset +
569	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
570
571	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
572	    offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
573}
574
575bool
576virtio_vq_is_enqueued(struct virtio_softc *sc, struct virtqueue *vq)
577{
578
579	if (vq->vq_queued) {
580		vq->vq_queued = 0;
581		vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
582	}
583
584	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
585	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
586		return 0;
587	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
588	return 1;
589}
590
591/*
592 * Increase the event index in order to delay interrupts.
593 */
594int
595virtio_postpone_intr(struct virtio_softc *sc, struct virtqueue *vq,
596    uint16_t nslots)
597{
598	uint16_t	idx, nused;
599
600	idx = vq->vq_used_idx + nslots;
601
602	/* set the new event index: avail_ring->used_event = idx */
603	*vq->vq_used_event = virtio_rw16(sc, idx);
604	vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
605	vq->vq_queued++;
606
607	nused = (uint16_t)
608	    (virtio_rw16(sc, vq->vq_used->idx) - vq->vq_used_idx);
609	KASSERT(nused <= vq->vq_num);
610
611	return nslots < nused;
612}
613
614/*
615 * Postpone interrupt until 3/4 of the available descriptors have been
616 * consumed.
617 */
618int
619virtio_postpone_intr_smart(struct virtio_softc *sc, struct virtqueue *vq)
620{
621	uint16_t	nslots;
622
623	nslots = (uint16_t)
624	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx) * 3 / 4;
625
626	return virtio_postpone_intr(sc, vq, nslots);
627}
628
629/*
630 * Postpone interrupt until all of the available descriptors have been
631 * consumed.
632 */
633int
634virtio_postpone_intr_far(struct virtio_softc *sc, struct virtqueue *vq)
635{
636	uint16_t	nslots;
637
638	nslots = (uint16_t)
639	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx);
640
641	return virtio_postpone_intr(sc, vq, nslots);
642}
643
644/*
645 * Start/stop vq interrupt.  No guarantee.
646 */
647void
648virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
649{
650
651	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
652		/*
653		 * No way to disable the interrupt completely with
654		 * RingEventIdx. Instead advance used_event by half the
655		 * possible value. This won't happen soon and is far enough in
656		 * the past to not trigger a spurios interrupt.
657		 */
658		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
659		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
660	} else {
661		vq->vq_avail->flags |=
662		    virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
663		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
664	}
665	vq->vq_queued++;
666}
667
668int
669virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
670{
671
672	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
673		/*
674		 * If event index feature is negotiated, enabling interrupts
675		 * is done through setting the latest consumed index in the
676		 * used_event field
677		 */
678		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
679		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
680	} else {
681		vq->vq_avail->flags &=
682		    ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
683		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
684	}
685	vq->vq_queued++;
686
687	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
688	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
689		return 0;
690	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
691	return 1;
692}
693
694/*
695 * Initialize vq structure.
696 */
697/*
698 * Reset virtqueue parameters
699 */
700static void
701virtio_reset_vq(struct virtio_softc *sc, struct virtqueue *vq)
702{
703	int i, j;
704	int vq_size = vq->vq_num;
705
706	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
707
708	/* build the indirect descriptor chain */
709	if (vq->vq_indirect != NULL) {
710		struct vring_desc *vd;
711
712		for (i = 0; i < vq_size; i++) {
713			vd = vq->vq_indirect;
714			vd += vq->vq_maxnsegs * i;
715			for (j = 0; j < vq->vq_maxnsegs - 1; j++) {
716				vd[j].next = virtio_rw16(sc, j + 1);
717			}
718		}
719	}
720
721	/* free slot management */
722	SIMPLEQ_INIT(&vq->vq_freelist);
723	for (i = 0; i < vq_size; i++) {
724		SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, &vq->vq_entries[i],
725		    qe_list);
726		vq->vq_entries[i].qe_index = i;
727	}
728
729	/* enqueue/dequeue status */
730	vq->vq_avail_idx = 0;
731	vq->vq_used_idx = 0;
732	vq->vq_queued = 0;
733	vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
734	vq->vq_queued++;
735}
736
737/* Initialize vq */
738void
739virtio_init_vq_vqdone(struct virtio_softc *sc, struct virtqueue *vq,
740    int index, int (*vq_done)(struct virtqueue *))
741{
742
743	virtio_init_vq(sc, vq, index, virtio_vq_done, vq);
744	vq->vq_done = vq_done;
745}
746
747void
748virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
749   int (*intrhand)(void *), void *arg)
750{
751
752	memset(vq, 0, sizeof(*vq));
753
754	vq->vq_owner = sc;
755	vq->vq_num = sc->sc_ops->read_queue_size(sc, index);
756	vq->vq_index = index;
757	vq->vq_intrhand = intrhand;
758	vq->vq_intrhand_arg = arg;
759}
760
761/*
762 * Allocate/free a vq.
763 */
764int
765virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq,
766    int maxsegsize, int maxnsegs, const char *name)
767{
768	bus_size_t size_desc, size_avail, size_used, size_indirect;
769	bus_size_t allocsize = 0, size_desc_avail;
770	int rsegs, r, hdrlen;
771	unsigned int vq_num;
772#define VIRTQUEUE_ALIGN(n)	roundup(n, VIRTIO_PAGE_SIZE)
773
774	vq_num = vq->vq_num;
775
776	if (vq_num == 0) {
777		aprint_error_dev(sc->sc_dev,
778		    "virtqueue not exist, index %d for %s\n",
779		    vq->vq_index, name);
780		goto err;
781	}
782
783	hdrlen = sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX ? 3 : 2;
784
785	size_desc = sizeof(vq->vq_desc[0]) * vq_num;
786	size_avail = sizeof(uint16_t) * hdrlen
787	    + sizeof(vq->vq_avail[0].ring) * vq_num;
788	size_used = sizeof(uint16_t) *hdrlen
789	    + sizeof(vq->vq_used[0].ring) * vq_num;
790	size_indirect = (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT) ?
791	    sizeof(struct vring_desc) * maxnsegs * vq_num : 0;
792
793	size_desc_avail = VIRTQUEUE_ALIGN(size_desc + size_avail);
794	size_used = VIRTQUEUE_ALIGN(size_used);
795
796	allocsize = size_desc_avail + size_used + size_indirect;
797
798	/* alloc and map the memory */
799	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
800	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK);
801	if (r != 0) {
802		aprint_error_dev(sc->sc_dev,
803		    "virtqueue %d for %s allocation failed, "
804		    "error code %d\n", vq->vq_index, name, r);
805		goto err;
806	}
807
808	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], rsegs, allocsize,
809	    &vq->vq_vaddr, BUS_DMA_WAITOK);
810	if (r != 0) {
811		aprint_error_dev(sc->sc_dev,
812		    "virtqueue %d for %s map failed, "
813		    "error code %d\n", vq->vq_index, name, r);
814		goto err;
815	}
816
817	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
818	    BUS_DMA_WAITOK, &vq->vq_dmamap);
819	if (r != 0) {
820		aprint_error_dev(sc->sc_dev,
821		    "virtqueue %d for %s dmamap creation failed, "
822		    "error code %d\n", vq->vq_index, name, r);
823		goto err;
824	}
825
826	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
827	    vq->vq_vaddr, allocsize, NULL, BUS_DMA_WAITOK);
828	if (r != 0) {
829		aprint_error_dev(sc->sc_dev,
830		    "virtqueue %d for %s dmamap load failed, "
831		    "error code %d\n", vq->vq_index, name, r);
832		goto err;
833	}
834
835	vq->vq_bytesize = allocsize;
836	vq->vq_maxsegsize = maxsegsize;
837	vq->vq_maxnsegs = maxnsegs;
838
839#define VIRTIO_PTR(base, offset)	(void *)((intptr_t)(base) + (offset))
840	/* initialize vring pointers */
841	vq->vq_desc = VIRTIO_PTR(vq->vq_vaddr, 0);
842	vq->vq_availoffset = size_desc;
843	vq->vq_avail = VIRTIO_PTR(vq->vq_vaddr, vq->vq_availoffset);
844	vq->vq_used_event = VIRTIO_PTR(vq->vq_avail,
845	    offsetof(struct vring_avail, ring[vq_num]));
846	vq->vq_usedoffset = size_desc_avail;
847	vq->vq_used = VIRTIO_PTR(vq->vq_vaddr, vq->vq_usedoffset);
848	vq->vq_avail_event = VIRTIO_PTR(vq->vq_used,
849	    offsetof(struct vring_used, ring[vq_num]));
850
851	if (size_indirect > 0) {
852		vq->vq_indirectoffset = size_desc_avail + size_used;
853		vq->vq_indirect = VIRTIO_PTR(vq->vq_vaddr,
854		    vq->vq_indirectoffset);
855	}
856#undef VIRTIO_PTR
857
858	/* free slot management */
859	vq->vq_entries = kmem_zalloc(sizeof(struct vq_entry) * vq_num,
860	    KM_SLEEP);
861
862	mutex_init(&vq->vq_freelist_lock, MUTEX_SPIN, sc->sc_ipl);
863	mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
864	mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
865
866	virtio_reset_vq(sc, vq);
867
868	aprint_verbose_dev(sc->sc_dev,
869	    "allocated %zu byte for virtqueue %d for %s, size %d\n",
870	    allocsize, vq->vq_index, name, vq_num);
871	if (size_indirect > 0)
872		aprint_verbose_dev(sc->sc_dev,
873		    "using %zu byte (%d entries) indirect descriptors\n",
874		    size_indirect, maxnsegs * vq_num);
875
876	return 0;
877
878err:
879	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
880	if (vq->vq_dmamap)
881		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
882	if (vq->vq_vaddr)
883		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
884	if (vq->vq_segs[0].ds_addr)
885		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
886	memset(vq, 0, sizeof(*vq));
887
888	return -1;
889}
890
891int
892virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
893{
894	struct vq_entry *qe;
895	int i = 0;
896
897	if (vq->vq_vaddr == NULL)
898		return 0;
899
900	/* device must be already deactivated */
901	/* confirm the vq is empty */
902	SIMPLEQ_FOREACH(qe, &vq->vq_freelist, qe_list) {
903		i++;
904	}
905	if (i != vq->vq_num) {
906		printf("%s: freeing non-empty vq, index %d\n",
907		    device_xname(sc->sc_dev), vq->vq_index);
908		return EBUSY;
909	}
910
911	/* tell device that there's no virtqueue any longer */
912	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
913
914	vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
915
916	kmem_free(vq->vq_entries, sizeof(*vq->vq_entries) * vq->vq_num);
917	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
918	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
919	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
920	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
921	mutex_destroy(&vq->vq_freelist_lock);
922	mutex_destroy(&vq->vq_uring_lock);
923	mutex_destroy(&vq->vq_aring_lock);
924	memset(vq, 0, sizeof(*vq));
925
926	return 0;
927}
928
929/*
930 * Free descriptor management.
931 */
932static struct vq_entry *
933vq_alloc_entry(struct virtqueue *vq)
934{
935	struct vq_entry *qe;
936
937	mutex_enter(&vq->vq_freelist_lock);
938	if (SIMPLEQ_EMPTY(&vq->vq_freelist)) {
939		mutex_exit(&vq->vq_freelist_lock);
940		return NULL;
941	}
942	qe = SIMPLEQ_FIRST(&vq->vq_freelist);
943	SIMPLEQ_REMOVE_HEAD(&vq->vq_freelist, qe_list);
944	mutex_exit(&vq->vq_freelist_lock);
945
946	return qe;
947}
948
949static void
950vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
951{
952	mutex_enter(&vq->vq_freelist_lock);
953	SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
954	mutex_exit(&vq->vq_freelist_lock);
955
956	return;
957}
958
959/*
960 * Enqueue several dmamaps as a single request.
961 */
962/*
963 * Typical usage:
964 *  <queue size> number of followings are stored in arrays
965 *  - command blocks (in dmamem) should be pre-allocated and mapped
966 *  - dmamaps for command blocks should be pre-allocated and loaded
967 *  - dmamaps for payload should be pre-allocated
968 *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
969 *	if (r)		// currently 0 or EAGAIN
970 *		return r;
971 *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
972 *	if (r) {
973 *		virtio_enqueue_abort(sc, vq, slot);
974 *		return r;
975 *	}
976 *	r = virtio_enqueue_reserve(sc, vq, slot,
977 *	    dmamap_payload[slot]->dm_nsegs + 1);
978 *							// ^ +1 for command
979 *	if (r) {	// currently 0 or EAGAIN
980 *		bus_dmamap_unload(dmat, dmamap_payload[slot]);
981 *		return r;				// do not call abort()
982 *	}
983 *	<setup and prepare commands>
984 *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
985 *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
986 *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
987 *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
988 *	virtio_enqueue_commit(sc, vq, slot, true);
989 */
990
991/*
992 * enqueue_prep: allocate a slot number
993 */
994int
995virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
996{
997	struct vq_entry *qe1;
998
999	KASSERT(slotp != NULL);
1000
1001	qe1 = vq_alloc_entry(vq);
1002	if (qe1 == NULL)
1003		return EAGAIN;
1004	/* next slot is not allocated yet */
1005	qe1->qe_next = -1;
1006	*slotp = qe1->qe_index;
1007
1008	return 0;
1009}
1010
1011/*
1012 * enqueue_reserve: allocate remaining slots and build the descriptor chain.
1013 */
1014int
1015virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
1016    int slot, int nsegs)
1017{
1018	int indirect;
1019	struct vq_entry *qe1 = &vq->vq_entries[slot];
1020
1021	KASSERT(qe1->qe_next == -1);
1022	KASSERT(1 <= nsegs && nsegs <= vq->vq_num);
1023
1024	if ((vq->vq_indirect != NULL) &&
1025	    (nsegs >= MINSEG_INDIRECT) &&
1026	    (nsegs <= vq->vq_maxnsegs))
1027		indirect = 1;
1028	else
1029		indirect = 0;
1030	qe1->qe_indirect = indirect;
1031
1032	if (indirect) {
1033		struct vring_desc *vd;
1034		uint64_t addr;
1035		int i;
1036
1037		vd = &vq->vq_desc[qe1->qe_index];
1038		addr = vq->vq_dmamap->dm_segs[0].ds_addr
1039		    + vq->vq_indirectoffset;
1040		addr += sizeof(struct vring_desc)
1041		    * vq->vq_maxnsegs * qe1->qe_index;
1042		vd->addr  = virtio_rw64(sc, addr);
1043		vd->len   = virtio_rw32(sc, sizeof(struct vring_desc) * nsegs);
1044		vd->flags = virtio_rw16(sc, VRING_DESC_F_INDIRECT);
1045
1046		vd = vq->vq_indirect;
1047		vd += vq->vq_maxnsegs * qe1->qe_index;
1048		qe1->qe_desc_base = vd;
1049
1050		for (i = 0; i < nsegs - 1; i++) {
1051			vd[i].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1052		}
1053		vd[i].flags  = virtio_rw16(sc, 0);
1054		qe1->qe_next = 0;
1055
1056		return 0;
1057	} else {
1058		struct vring_desc *vd;
1059		struct vq_entry *qe;
1060		int i, s;
1061
1062		vd = &vq->vq_desc[0];
1063		qe1->qe_desc_base = vd;
1064		qe1->qe_next = qe1->qe_index;
1065		s = slot;
1066		for (i = 0; i < nsegs - 1; i++) {
1067			qe = vq_alloc_entry(vq);
1068			if (qe == NULL) {
1069				vd[s].flags = virtio_rw16(sc, 0);
1070				virtio_enqueue_abort(sc, vq, slot);
1071				return EAGAIN;
1072			}
1073			vd[s].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1074			vd[s].next  = virtio_rw16(sc, qe->qe_index);
1075			s = qe->qe_index;
1076		}
1077		vd[s].flags = virtio_rw16(sc, 0);
1078
1079		return 0;
1080	}
1081}
1082
1083/*
1084 * enqueue: enqueue a single dmamap.
1085 */
1086int
1087virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1088    bus_dmamap_t dmamap, bool write)
1089{
1090	struct vq_entry *qe1 = &vq->vq_entries[slot];
1091	struct vring_desc *vd = qe1->qe_desc_base;
1092	int i;
1093	int s = qe1->qe_next;
1094
1095	KASSERT(s >= 0);
1096	KASSERT(dmamap->dm_nsegs > 0);
1097
1098	for (i = 0; i < dmamap->dm_nsegs; i++) {
1099		vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[i].ds_addr);
1100		vd[s].len  = virtio_rw32(sc, dmamap->dm_segs[i].ds_len);
1101		if (!write)
1102			vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1103		s = virtio_rw16(sc, vd[s].next);
1104	}
1105	qe1->qe_next = s;
1106
1107	return 0;
1108}
1109
1110int
1111virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1112    bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
1113    bool write)
1114{
1115	struct vq_entry *qe1 = &vq->vq_entries[slot];
1116	struct vring_desc *vd = qe1->qe_desc_base;
1117	int s = qe1->qe_next;
1118
1119	KASSERT(s >= 0);
1120	KASSERT(dmamap->dm_nsegs == 1); /* XXX */
1121	KASSERT(dmamap->dm_segs[0].ds_len > start);
1122	KASSERT(dmamap->dm_segs[0].ds_len >= start + len);
1123
1124	vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[0].ds_addr + start);
1125	vd[s].len  = virtio_rw32(sc, len);
1126	if (!write)
1127		vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1128	qe1->qe_next = virtio_rw16(sc, vd[s].next);
1129
1130	return 0;
1131}
1132
1133/*
1134 * enqueue_commit: add it to the aring.
1135 */
1136int
1137virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1138    bool notifynow)
1139{
1140	struct vq_entry *qe1;
1141
1142	if (slot < 0) {
1143		mutex_enter(&vq->vq_aring_lock);
1144		goto notify;
1145	}
1146	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
1147	qe1 = &vq->vq_entries[slot];
1148	if (qe1->qe_indirect)
1149		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
1150	mutex_enter(&vq->vq_aring_lock);
1151	vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
1152	    virtio_rw16(sc, slot);
1153
1154notify:
1155	if (notifynow) {
1156		uint16_t o, n, t;
1157		uint16_t flags;
1158
1159		o = virtio_rw16(sc, vq->vq_avail->idx) - 1;
1160		n = vq->vq_avail_idx;
1161
1162		/*
1163		 * Prepare for `device->CPU' (host->guest) transfer
1164		 * into the buffer.  This must happen before we commit
1165		 * the vq->vq_avail->idx update to ensure we're not
1166		 * still using the buffer in case program-prior loads
1167		 * or stores in it get delayed past the store to
1168		 * vq->vq_avail->idx.
1169		 */
1170		vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
1171
1172		/* ensure payload is published, then avail idx */
1173		vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
1174		vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
1175		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
1176		vq->vq_queued++;
1177
1178		if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
1179			vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
1180			t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
1181			if ((uint16_t) (n - t) < (uint16_t) (n - o))
1182				sc->sc_ops->kick(sc, vq->vq_index);
1183		} else {
1184			vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
1185			flags = virtio_rw16(sc, vq->vq_used->flags);
1186			if (!(flags & VRING_USED_F_NO_NOTIFY))
1187				sc->sc_ops->kick(sc, vq->vq_index);
1188		}
1189	}
1190	mutex_exit(&vq->vq_aring_lock);
1191
1192	return 0;
1193}
1194
1195/*
1196 * enqueue_abort: rollback.
1197 */
1198int
1199virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1200{
1201	struct vq_entry *qe = &vq->vq_entries[slot];
1202	struct vring_desc *vd;
1203	int s;
1204
1205	if (qe->qe_next < 0) {
1206		vq_free_entry(vq, qe);
1207		return 0;
1208	}
1209
1210	s = slot;
1211	vd = &vq->vq_desc[0];
1212	while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
1213		s = virtio_rw16(sc, vd[s].next);
1214		vq_free_entry(vq, qe);
1215		qe = &vq->vq_entries[s];
1216	}
1217	vq_free_entry(vq, qe);
1218	return 0;
1219}
1220
1221/*
1222 * Dequeue a request.
1223 */
1224/*
1225 * dequeue: dequeue a request from uring; dmamap_sync for uring is
1226 *	    already done in the interrupt handler.
1227 */
1228int
1229virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
1230    int *slotp, int *lenp)
1231{
1232	uint16_t slot, usedidx;
1233	struct vq_entry *qe;
1234
1235	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
1236		return ENOENT;
1237	mutex_enter(&vq->vq_uring_lock);
1238	usedidx = vq->vq_used_idx++;
1239	mutex_exit(&vq->vq_uring_lock);
1240	usedidx %= vq->vq_num;
1241	slot = virtio_rw32(sc, vq->vq_used->ring[usedidx].id);
1242	qe = &vq->vq_entries[slot];
1243
1244	if (qe->qe_indirect)
1245		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
1246
1247	if (slotp)
1248		*slotp = slot;
1249	if (lenp)
1250		*lenp = virtio_rw32(sc, vq->vq_used->ring[usedidx].len);
1251
1252	return 0;
1253}
1254
1255/*
1256 * dequeue_commit: complete dequeue; the slot is recycled for future use.
1257 *                 if you forget to call this the slot will be leaked.
1258 */
1259int
1260virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1261{
1262	struct vq_entry *qe = &vq->vq_entries[slot];
1263	struct vring_desc *vd = &vq->vq_desc[0];
1264	int s = slot;
1265
1266	while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
1267		s = virtio_rw16(sc, vd[s].next);
1268		vq_free_entry(vq, qe);
1269		qe = &vq->vq_entries[s];
1270	}
1271	vq_free_entry(vq, qe);
1272
1273	return 0;
1274}
1275
1276/*
1277 * Attach a child, fill all the members.
1278 */
1279void
1280virtio_child_attach_start(struct virtio_softc *sc, device_t child, int ipl,
1281    uint64_t req_features, const char *feat_bits)
1282{
1283	char buf[1024];
1284
1285	sc->sc_child = child;
1286	sc->sc_ipl = ipl;
1287
1288	virtio_negotiate_features(sc, req_features);
1289	snprintb(buf, sizeof(buf), feat_bits, sc->sc_active_features);
1290	aprint_normal(": features: %s\n", buf);
1291	aprint_naive("\n");
1292}
1293
1294int
1295virtio_child_attach_finish(struct virtio_softc *sc,
1296    struct virtqueue *vqs, size_t nvqs,
1297    virtio_callback config_change,
1298    int req_flags)
1299{
1300	size_t i;
1301	int r;
1302
1303#ifdef DIAGNOSTIC
1304	KASSERT(nvqs > 0);
1305#define VIRTIO_ASSERT_FLAGS	(VIRTIO_F_INTR_SOFTINT | VIRTIO_F_INTR_PERVQ)
1306	KASSERT((req_flags & VIRTIO_ASSERT_FLAGS) != VIRTIO_ASSERT_FLAGS);
1307#undef VIRTIO_ASSERT_FLAGS
1308
1309	for (i = 0; i < nvqs; i++){
1310		KASSERT(vqs[i].vq_index == i);
1311		KASSERT(vqs[i].vq_intrhand != NULL);
1312		KASSERT(vqs[i].vq_done == NULL ||
1313		    vqs[i].vq_intrhand == virtio_vq_done);
1314	}
1315#endif
1316
1317	sc->sc_finished_called = true;
1318
1319	sc->sc_vqs = vqs;
1320	sc->sc_nvqs = nvqs;
1321	sc->sc_config_change = config_change;
1322	sc->sc_intrhand = virtio_vq_intr;
1323	sc->sc_flags = req_flags;
1324
1325	/* set the vq address */
1326	for (i = 0; i < nvqs; i++) {
1327		sc->sc_ops->setup_queue(sc, vqs[i].vq_index,
1328		    vqs[i].vq_dmamap->dm_segs[0].ds_addr);
1329	}
1330
1331	r = sc->sc_ops->alloc_interrupts(sc);
1332	if (r != 0) {
1333		aprint_error_dev(sc->sc_dev,
1334		    "failed to allocate interrupts\n");
1335		goto fail;
1336	}
1337
1338	r = sc->sc_ops->setup_interrupts(sc, 0);
1339	if (r != 0) {
1340		aprint_error_dev(sc->sc_dev, "failed to setup interrupts\n");
1341		goto fail;
1342	}
1343
1344	KASSERT(sc->sc_soft_ih == NULL);
1345	if (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) {
1346		u_int flags = SOFTINT_NET;
1347		if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
1348			flags |= SOFTINT_MPSAFE;
1349
1350		sc->sc_soft_ih = softint_establish(flags, virtio_soft_intr,
1351		    sc);
1352		if (sc->sc_soft_ih == NULL) {
1353			sc->sc_ops->free_interrupts(sc);
1354			aprint_error_dev(sc->sc_dev,
1355			    "failed to establish soft interrupt\n");
1356			goto fail;
1357		}
1358	}
1359
1360	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1361	return 0;
1362
1363fail:
1364	if (sc->sc_soft_ih) {
1365		softint_disestablish(sc->sc_soft_ih);
1366		sc->sc_soft_ih = NULL;
1367	}
1368
1369	sc->sc_ops->free_interrupts(sc);
1370
1371	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1372	return 1;
1373}
1374
1375void
1376virtio_child_detach(struct virtio_softc *sc)
1377{
1378	sc->sc_child = NULL;
1379	sc->sc_vqs = NULL;
1380
1381	virtio_device_reset(sc);
1382
1383	sc->sc_ops->free_interrupts(sc);
1384
1385	if (sc->sc_soft_ih) {
1386		softint_disestablish(sc->sc_soft_ih);
1387		sc->sc_soft_ih = NULL;
1388	}
1389}
1390
1391void
1392virtio_child_attach_failed(struct virtio_softc *sc)
1393{
1394	virtio_child_detach(sc);
1395
1396	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1397
1398	sc->sc_child = VIRTIO_CHILD_FAILED;
1399}
1400
1401bus_dma_tag_t
1402virtio_dmat(struct virtio_softc *sc)
1403{
1404	return sc->sc_dmat;
1405}
1406
1407device_t
1408virtio_child(struct virtio_softc *sc)
1409{
1410	return sc->sc_child;
1411}
1412
1413int
1414virtio_intrhand(struct virtio_softc *sc)
1415{
1416	return (*sc->sc_intrhand)(sc);
1417}
1418
1419uint64_t
1420virtio_features(struct virtio_softc *sc)
1421{
1422	return sc->sc_active_features;
1423}
1424
1425int
1426virtio_attach_failed(struct virtio_softc *sc)
1427{
1428	device_t self = sc->sc_dev;
1429
1430	/* no error if its not connected, but its failed */
1431	if (sc->sc_childdevid == 0)
1432		return 1;
1433
1434	if (sc->sc_child == NULL) {
1435		aprint_error_dev(self,
1436		    "no matching child driver; not configured\n");
1437		return 1;
1438	}
1439
1440	if (sc->sc_child == VIRTIO_CHILD_FAILED) {
1441		aprint_error_dev(self, "virtio configuration failed\n");
1442		return 1;
1443	}
1444
1445	/* sanity check */
1446	if (!sc->sc_finished_called) {
1447		aprint_error_dev(self, "virtio internal error, child driver "
1448		    "signaled OK but didn't initialize interrupts\n");
1449		return 1;
1450	}
1451
1452	return 0;
1453}
1454
1455void
1456virtio_print_device_type(device_t self, int id, int revision)
1457{
1458	aprint_normal_dev(self, "%s device (id %d, rev. 0x%02x)\n",
1459	    (id < NDEVNAMES ? virtio_device_name[id] : "Unknown"),
1460	    id,
1461	    revision);
1462}
1463
1464
1465MODULE(MODULE_CLASS_DRIVER, virtio, NULL);
1466
1467#ifdef _MODULE
1468#include "ioconf.c"
1469#endif
1470
1471static int
1472virtio_modcmd(modcmd_t cmd, void *opaque)
1473{
1474	int error = 0;
1475
1476#ifdef _MODULE
1477	switch (cmd) {
1478	case MODULE_CMD_INIT:
1479		error = config_init_component(cfdriver_ioconf_virtio,
1480		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1481		break;
1482	case MODULE_CMD_FINI:
1483		error = config_fini_component(cfdriver_ioconf_virtio,
1484		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1485		break;
1486	default:
1487		error = ENOTTY;
1488		break;
1489	}
1490#endif
1491
1492	return error;
1493}
1494