virtio.c revision 1.57
1/*	$NetBSD: virtio.c,v 1.57 2022/08/12 10:49:57 riastradh Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6 * Copyright (c) 2010 Minoura Makoto.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.57 2022/08/12 10:49:57 riastradh Exp $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/atomic.h>
37#include <sys/bus.h>
38#include <sys/device.h>
39#include <sys/kmem.h>
40#include <sys/module.h>
41
42#define VIRTIO_PRIVATE
43
44#include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
45#include <dev/pci/virtiovar.h> /* XXX: move to non-pci */
46
47#define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */
48
49/* incomplete list */
50static const char *virtio_device_name[] = {
51	"unknown (0)",			/*  0 */
52	"network",			/*  1 */
53	"block",			/*  2 */
54	"console",			/*  3 */
55	"entropy",			/*  4 */
56	"memory balloon",		/*  5 */
57	"I/O memory",			/*  6 */
58	"remote processor messaging",	/*  7 */
59	"SCSI",				/*  8 */
60	"9P transport",			/*  9 */
61};
62#define NDEVNAMES	__arraycount(virtio_device_name)
63
64static void	virtio_init_vq(struct virtio_softc *,
65		    struct virtqueue *, const bool);
66
67void
68virtio_set_status(struct virtio_softc *sc, int status)
69{
70	sc->sc_ops->set_status(sc, status);
71}
72
73/*
74 * Reset the device.
75 */
76/*
77 * To reset the device to a known state, do following:
78 *	virtio_reset(sc);	     // this will stop the device activity
79 *	<dequeue finished requests>; // virtio_dequeue() still can be called
80 *	<revoke pending requests in the vqs if any>;
81 *	virtio_reinit_start(sc);     // dequeue prohibitted
82 *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
83 *	<some other initialization>;
84 *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
85 * Once attached, feature negotiation can only be allowed after virtio_reset.
86 */
87void
88virtio_reset(struct virtio_softc *sc)
89{
90	virtio_device_reset(sc);
91}
92
93int
94virtio_reinit_start(struct virtio_softc *sc)
95{
96	int i, r;
97
98	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
99	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
100	for (i = 0; i < sc->sc_nvqs; i++) {
101		int n;
102		struct virtqueue *vq = &sc->sc_vqs[i];
103		n = sc->sc_ops->read_queue_size(sc, vq->vq_index);
104		if (n == 0)	/* vq disappeared */
105			continue;
106		if (n != vq->vq_num) {
107			panic("%s: virtqueue size changed, vq index %d\n",
108			      device_xname(sc->sc_dev),
109			      vq->vq_index);
110		}
111		virtio_init_vq(sc, vq, true);
112		sc->sc_ops->setup_queue(sc, vq->vq_index,
113		    vq->vq_dmamap->dm_segs[0].ds_addr);
114	}
115
116	r = sc->sc_ops->setup_interrupts(sc, 1);
117	if (r != 0)
118		goto fail;
119
120	return 0;
121
122fail:
123	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
124
125	return 1;
126}
127
128void
129virtio_reinit_end(struct virtio_softc *sc)
130{
131	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
132}
133
134/*
135 * Feature negotiation.
136 */
137void
138virtio_negotiate_features(struct virtio_softc *sc, uint64_t guest_features)
139{
140	if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
141	    !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
142		guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
143	sc->sc_ops->neg_features(sc, guest_features);
144	if (sc->sc_active_features & VIRTIO_F_RING_INDIRECT_DESC)
145		sc->sc_indirect = true;
146	else
147		sc->sc_indirect = false;
148}
149
150
151/*
152 * Device configuration registers readers/writers
153 */
154#if 0
155#define DPRINTFR(n, fmt, val, index, num) \
156	printf("\n%s (", n); \
157	for (int i = 0; i < num; i++) \
158		printf("%02x ", bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index+i)); \
159	printf(") -> "); printf(fmt, val); printf("\n");
160#define DPRINTFR2(n, fmt, val_s, val_n) \
161	printf("%s ", n); \
162	printf("\n        stream "); printf(fmt, val_s); printf(" norm "); printf(fmt, val_n); printf("\n");
163#else
164#define DPRINTFR(n, fmt, val, index, num)
165#define DPRINTFR2(n, fmt, val_s, val_n)
166#endif
167
168
169uint8_t
170virtio_read_device_config_1(struct virtio_softc *sc, int index) {
171	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
172	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
173	uint8_t val;
174
175	val = bus_space_read_1(iot, ioh, index);
176
177	DPRINTFR("read_1", "%02x", val, index, 1);
178	return val;
179}
180
181uint16_t
182virtio_read_device_config_2(struct virtio_softc *sc, int index) {
183	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
184	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
185	uint16_t val;
186
187	val = bus_space_read_2(iot, ioh, index);
188	if (BYTE_ORDER != sc->sc_bus_endian)
189		val = bswap16(val);
190
191	DPRINTFR("read_2", "%04x", val, index, 2);
192	DPRINTFR2("read_2", "%04x",
193		bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index),
194		bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
195	return val;
196}
197
198uint32_t
199virtio_read_device_config_4(struct virtio_softc *sc, int index) {
200	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
201	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
202	uint32_t val;
203
204	val = bus_space_read_4(iot, ioh, index);
205	if (BYTE_ORDER != sc->sc_bus_endian)
206		val = bswap32(val);
207
208	DPRINTFR("read_4", "%08x", val, index, 4);
209	DPRINTFR2("read_4", "%08x",
210		bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index),
211		bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
212	return val;
213}
214
215/*
216 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
217 * considered atomic and no triggers may be connected to reading or writing
218 * it. We access it using two 32 reads. See virtio spec 4.1.3.1.
219 */
220uint64_t
221virtio_read_device_config_8(struct virtio_softc *sc, int index) {
222	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
223	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
224	union {
225		uint64_t u64;
226		uint32_t l[2];
227	} v;
228	uint64_t val;
229
230	v.l[0] = bus_space_read_4(iot, ioh, index);
231	v.l[1] = bus_space_read_4(iot, ioh, index + 4);
232	if (sc->sc_bus_endian != sc->sc_struct_endian) {
233		v.l[0] = bswap32(v.l[0]);
234		v.l[1] = bswap32(v.l[1]);
235	}
236	val = v.u64;
237
238	if (BYTE_ORDER != sc->sc_struct_endian)
239		val = bswap64(val);
240
241	DPRINTFR("read_8", "%08lx", val, index, 8);
242	DPRINTFR2("read_8 low ", "%08x",
243		bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index),
244		bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
245	DPRINTFR2("read_8 high ", "%08x",
246		bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4),
247		bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4));
248	return val;
249}
250
251/*
252 * In the older virtio spec, device config registers are host endian. On newer
253 * they are little endian. Some newer devices however explicitly specify their
254 * register to always be little endian. These functions cater for these.
255 */
256uint16_t
257virtio_read_device_config_le_2(struct virtio_softc *sc, int index) {
258	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
259	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
260	uint16_t val;
261
262	val = bus_space_read_2(iot, ioh, index);
263	if (sc->sc_bus_endian != LITTLE_ENDIAN)
264		val = bswap16(val);
265
266	DPRINTFR("read_le_2", "%04x", val, index, 2);
267	DPRINTFR2("read_le_2", "%04x",
268		bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
269		bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
270	return val;
271}
272
273uint32_t
274virtio_read_device_config_le_4(struct virtio_softc *sc, int index) {
275	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
276	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
277	uint32_t val;
278
279	val = bus_space_read_4(iot, ioh, index);
280	if (sc->sc_bus_endian != LITTLE_ENDIAN)
281		val = bswap32(val);
282
283	DPRINTFR("read_le_4", "%08x", val, index, 4);
284	DPRINTFR2("read_le_4", "%08x",
285		bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
286		bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
287	return val;
288}
289
290void
291virtio_write_device_config_1(struct virtio_softc *sc, int index, uint8_t value)
292{
293	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
294	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
295
296	bus_space_write_1(iot, ioh, index, value);
297}
298
299void
300virtio_write_device_config_2(struct virtio_softc *sc, int index, uint16_t value)
301{
302	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
303	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
304
305	if (BYTE_ORDER != sc->sc_bus_endian)
306		value = bswap16(value);
307	bus_space_write_2(iot, ioh, index, value);
308}
309
310void
311virtio_write_device_config_4(struct virtio_softc *sc, int index, uint32_t value)
312{
313	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
314	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
315
316	if (BYTE_ORDER != sc->sc_bus_endian)
317		value = bswap32(value);
318	bus_space_write_4(iot, ioh, index, value);
319}
320
321/*
322 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
323 * considered atomic and no triggers may be connected to reading or writing
324 * it. We access it using two 32 bit writes. For good measure it is stated to
325 * always write lsb first just in case of a hypervisor bug. See See virtio
326 * spec 4.1.3.1.
327 */
328void
329virtio_write_device_config_8(struct virtio_softc *sc, int index, uint64_t value)
330{
331	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
332	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
333	union {
334		uint64_t u64;
335		uint32_t l[2];
336	} v;
337
338	if (BYTE_ORDER != sc->sc_struct_endian)
339		value = bswap64(value);
340
341	v.u64 = value;
342	if (sc->sc_bus_endian != sc->sc_struct_endian) {
343		v.l[0] = bswap32(v.l[0]);
344		v.l[1] = bswap32(v.l[1]);
345	}
346
347	if (sc->sc_struct_endian == LITTLE_ENDIAN) {
348		bus_space_write_4(iot, ioh, index,     v.l[0]);
349		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
350	} else {
351		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
352		bus_space_write_4(iot, ioh, index,     v.l[0]);
353	}
354}
355
356/*
357 * In the older virtio spec, device config registers are host endian. On newer
358 * they are little endian. Some newer devices however explicitly specify their
359 * register to always be little endian. These functions cater for these.
360 */
361void
362virtio_write_device_config_le_2(struct virtio_softc *sc, int index, uint16_t value)
363{
364	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
365	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
366
367	if (sc->sc_bus_endian != LITTLE_ENDIAN)
368		value = bswap16(value);
369	bus_space_write_2(iot, ioh, index, value);
370}
371
372void
373virtio_write_device_config_le_4(struct virtio_softc *sc, int index, uint32_t value)
374{
375	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
376	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
377
378	if (sc->sc_bus_endian != LITTLE_ENDIAN)
379		value = bswap32(value);
380	bus_space_write_4(iot, ioh, index, value);
381}
382
383
384/*
385 * data structures endian helpers
386 */
387uint16_t virtio_rw16(struct virtio_softc *sc, uint16_t val)
388{
389	KASSERT(sc);
390	return BYTE_ORDER != sc->sc_struct_endian ? bswap16(val) : val;
391}
392
393uint32_t virtio_rw32(struct virtio_softc *sc, uint32_t val)
394{
395	KASSERT(sc);
396	return BYTE_ORDER != sc->sc_struct_endian ? bswap32(val) : val;
397}
398
399uint64_t virtio_rw64(struct virtio_softc *sc, uint64_t val)
400{
401	KASSERT(sc);
402	return BYTE_ORDER != sc->sc_struct_endian ? bswap64(val) : val;
403}
404
405
406/*
407 * Interrupt handler.
408 */
409static void
410virtio_soft_intr(void *arg)
411{
412	struct virtio_softc *sc = arg;
413
414	KASSERT(sc->sc_intrhand != NULL);
415
416	(*sc->sc_intrhand)(sc);
417}
418
419/*
420 * dmamap sync operations for a virtqueue.
421 */
422static inline void
423vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
424{
425
426	/* availoffset == sizeof(vring_desc)*vq_num */
427	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
428	    ops);
429}
430
431static inline void
432vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
433{
434	uint16_t hdrlen = offsetof(struct vring_avail, ring);
435	size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
436	size_t usedlen = 0;
437
438	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
439		usedlen = sizeof(uint16_t);
440	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
441	    vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
442}
443
444static inline void
445vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
446{
447	uint16_t hdrlen = offsetof(struct vring_avail, ring);
448
449	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
450	    vq->vq_availoffset, hdrlen, ops);
451}
452
453static inline void
454vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
455{
456	uint16_t hdrlen = offsetof(struct vring_avail, ring);
457	size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
458
459	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
460	    vq->vq_availoffset + hdrlen, payloadlen, ops);
461}
462
463static inline void
464vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
465{
466	uint16_t hdrlen = offsetof(struct vring_avail, ring);
467	size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
468	size_t usedlen = sizeof(uint16_t);
469
470	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
471		return;
472	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
473	    vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
474}
475
476static inline void
477vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
478{
479	uint16_t hdrlen = offsetof(struct vring_used, ring);
480	size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
481	size_t availlen = 0;
482
483	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
484		availlen = sizeof(uint16_t);
485	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
486	    vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
487}
488
489static inline void
490vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
491{
492	uint16_t hdrlen = offsetof(struct vring_used, ring);
493
494	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
495	    vq->vq_usedoffset, hdrlen, ops);
496}
497
498static inline void
499vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
500{
501	uint16_t hdrlen = offsetof(struct vring_used, ring);
502	size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
503
504	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
505	    vq->vq_usedoffset + hdrlen, payloadlen, ops);
506}
507
508static inline void
509vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
510{
511	uint16_t hdrlen = offsetof(struct vring_used, ring);
512	size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
513	size_t availlen = sizeof(uint16_t);
514
515	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
516		return;
517	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
518	    vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
519}
520
521static inline void
522vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
523    int ops)
524{
525	int offset = vq->vq_indirectoffset +
526	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
527
528	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
529	    offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
530}
531
532bool
533virtio_vq_is_enqueued(struct virtio_softc *sc, struct virtqueue *vq)
534{
535
536	if (vq->vq_queued) {
537		vq->vq_queued = 0;
538		vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
539	}
540
541	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
542	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
543		return 0;
544	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
545	return 1;
546}
547
548/*
549 * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
550 * and calls (*vq_done)() if some entries are consumed.
551 *
552 * Can be used as sc_intrhand.
553 */
554int
555virtio_vq_intr(struct virtio_softc *sc)
556{
557	struct virtqueue *vq;
558	int i, r = 0;
559
560	for (i = 0; i < sc->sc_nvqs; i++) {
561		vq = &sc->sc_vqs[i];
562		if (virtio_vq_is_enqueued(sc, vq) == 1) {
563			if (vq->vq_done)
564				r |= (*vq->vq_done)(vq);
565		}
566	}
567
568	return r;
569}
570
571int
572virtio_vq_intrhand(struct virtio_softc *sc)
573{
574	struct virtqueue *vq;
575	int i, r = 0;
576
577	for (i = 0; i < sc->sc_nvqs; i++) {
578		vq = &sc->sc_vqs[i];
579		r |= (*vq->vq_intrhand)(vq->vq_intrhand_arg);
580	}
581
582	return r;
583}
584
585
586/*
587 * Increase the event index in order to delay interrupts.
588 */
589int
590virtio_postpone_intr(struct virtio_softc *sc, struct virtqueue *vq,
591		uint16_t nslots)
592{
593	uint16_t	idx, nused;
594
595	idx = vq->vq_used_idx + nslots;
596
597	/* set the new event index: avail_ring->used_event = idx */
598	*vq->vq_used_event = virtio_rw16(sc, idx);
599	vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
600	vq->vq_queued++;
601
602	nused = (uint16_t)
603		(virtio_rw16(sc, vq->vq_used->idx) - vq->vq_used_idx);
604	KASSERT(nused <= vq->vq_num);
605
606	return nslots < nused;
607}
608
609/*
610 * Postpone interrupt until 3/4 of the available descriptors have been
611 * consumed.
612 */
613int
614virtio_postpone_intr_smart(struct virtio_softc *sc, struct virtqueue *vq)
615{
616	uint16_t	nslots;
617
618	nslots = (uint16_t)
619		(virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx) * 3 / 4;
620
621	return virtio_postpone_intr(sc, vq, nslots);
622}
623
624/*
625 * Postpone interrupt until all of the available descriptors have been
626 * consumed.
627 */
628int
629virtio_postpone_intr_far(struct virtio_softc *sc, struct virtqueue *vq)
630{
631	uint16_t	nslots;
632
633	nslots = (uint16_t)
634		(virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx);
635
636	return virtio_postpone_intr(sc, vq, nslots);
637}
638
639/*
640 * Start/stop vq interrupt.  No guarantee.
641 */
642void
643virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
644{
645
646	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
647		/*
648		 * No way to disable the interrupt completely with
649		 * RingEventIdx. Instead advance used_event by half the
650		 * possible value. This won't happen soon and is far enough in
651		 * the past to not trigger a spurios interrupt.
652		 */
653		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
654		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
655	} else {
656		vq->vq_avail->flags |=
657		    virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
658		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
659	}
660	vq->vq_queued++;
661}
662
663int
664virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
665{
666
667	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
668		/*
669		 * If event index feature is negotiated, enabling interrupts
670		 * is done through setting the latest consumed index in the
671		 * used_event field
672		 */
673		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
674		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
675	} else {
676		vq->vq_avail->flags &=
677		    ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
678		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
679	}
680	vq->vq_queued++;
681
682	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
683	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
684		return 0;
685	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
686	return 1;
687}
688
689/*
690 * Initialize vq structure.
691 */
692static void
693virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq,
694    const bool reinit)
695{
696	int i, j;
697	int vq_size = vq->vq_num;
698
699	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
700
701	/* build the indirect descriptor chain */
702	if (vq->vq_indirect != NULL) {
703		struct vring_desc *vd;
704
705		for (i = 0; i < vq_size; i++) {
706			vd = vq->vq_indirect;
707			vd += vq->vq_maxnsegs * i;
708			for (j = 0; j < vq->vq_maxnsegs-1; j++) {
709				vd[j].next = virtio_rw16(sc, j + 1);
710			}
711		}
712	}
713
714	/* free slot management */
715	SIMPLEQ_INIT(&vq->vq_freelist);
716	for (i = 0; i < vq_size; i++) {
717		SIMPLEQ_INSERT_TAIL(&vq->vq_freelist,
718				    &vq->vq_entries[i], qe_list);
719		vq->vq_entries[i].qe_index = i;
720	}
721	if (!reinit)
722		mutex_init(&vq->vq_freelist_lock, MUTEX_SPIN, sc->sc_ipl);
723
724	/* enqueue/dequeue status */
725	vq->vq_avail_idx = 0;
726	vq->vq_used_idx = 0;
727	vq->vq_queued = 0;
728	if (!reinit) {
729		mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
730		mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
731	}
732	vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
733	vq->vq_queued++;
734}
735
736/*
737 * Allocate/free a vq.
738 */
739int
740virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
741    int maxsegsize, int maxnsegs, const char *name)
742{
743	int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
744	int rsegs, r, hdrlen;
745#define VIRTQUEUE_ALIGN(n)	(((n)+(VIRTIO_PAGE_SIZE-1))&	\
746				 ~(VIRTIO_PAGE_SIZE-1))
747
748	/* Make sure callers allocate vqs in order */
749	KASSERT(sc->sc_nvqs == index);
750
751	memset(vq, 0, sizeof(*vq));
752
753	vq_size = sc->sc_ops->read_queue_size(sc, index);
754	if (vq_size == 0) {
755		aprint_error_dev(sc->sc_dev,
756				 "virtqueue not exist, index %d for %s\n",
757				 index, name);
758		goto err;
759	}
760
761	hdrlen = sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX ? 3 : 2;
762
763	/* allocsize1: descriptor table + avail ring + pad */
764	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc)*vq_size
765			     + sizeof(uint16_t)*(hdrlen + vq_size));
766	/* allocsize2: used ring + pad */
767	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
768			     + sizeof(struct vring_used_elem)*vq_size);
769	/* allocsize3: indirect table */
770	if (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT)
771		allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
772	else
773		allocsize3 = 0;
774	allocsize = allocsize1 + allocsize2 + allocsize3;
775
776	/* alloc and map the memory */
777	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
778			     &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK);
779	if (r != 0) {
780		aprint_error_dev(sc->sc_dev,
781				 "virtqueue %d for %s allocation failed, "
782				 "error code %d\n", index, name, r);
783		goto err;
784	}
785	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], rsegs, allocsize,
786			   &vq->vq_vaddr, BUS_DMA_WAITOK);
787	if (r != 0) {
788		aprint_error_dev(sc->sc_dev,
789				 "virtqueue %d for %s map failed, "
790				 "error code %d\n", index, name, r);
791		goto err;
792	}
793	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
794			      BUS_DMA_WAITOK, &vq->vq_dmamap);
795	if (r != 0) {
796		aprint_error_dev(sc->sc_dev,
797				 "virtqueue %d for %s dmamap creation failed, "
798				 "error code %d\n", index, name, r);
799		goto err;
800	}
801	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
802			    vq->vq_vaddr, allocsize, NULL, BUS_DMA_WAITOK);
803	if (r != 0) {
804		aprint_error_dev(sc->sc_dev,
805				 "virtqueue %d for %s dmamap load failed, "
806				 "error code %d\n", index, name, r);
807		goto err;
808	}
809
810	/* remember addresses and offsets for later use */
811	vq->vq_owner = sc;
812	vq->vq_num = vq_size;
813	vq->vq_index = index;
814	vq->vq_desc = vq->vq_vaddr;
815	vq->vq_availoffset = sizeof(struct vring_desc)*vq_size;
816	vq->vq_avail = (void*)(((char*)vq->vq_desc) + vq->vq_availoffset);
817	vq->vq_used_event = (uint16_t *) ((char *)vq->vq_avail +
818		 offsetof(struct vring_avail, ring[vq->vq_num]));
819	vq->vq_usedoffset = allocsize1;
820	vq->vq_used = (void*)(((char*)vq->vq_desc) + vq->vq_usedoffset);
821	vq->vq_avail_event = (uint16_t *)((char *)vq->vq_used +
822		 offsetof(struct vring_used, ring[vq->vq_num]));
823
824	if (allocsize3 > 0) {
825		vq->vq_indirectoffset = allocsize1 + allocsize2;
826		vq->vq_indirect = (void*)(((char*)vq->vq_desc)
827					  + vq->vq_indirectoffset);
828	}
829	vq->vq_bytesize = allocsize;
830	vq->vq_maxsegsize = maxsegsize;
831	vq->vq_maxnsegs = maxnsegs;
832
833	/* free slot management */
834	vq->vq_entries = kmem_zalloc(sizeof(struct vq_entry)*vq_size,
835				     KM_SLEEP);
836	virtio_init_vq(sc, vq, false);
837
838	/* set the vq address */
839	sc->sc_ops->setup_queue(sc, index,
840	    vq->vq_dmamap->dm_segs[0].ds_addr);
841
842	aprint_verbose_dev(sc->sc_dev,
843			   "allocated %u byte for virtqueue %d for %s, "
844			   "size %d\n", allocsize, index, name, vq_size);
845	if (allocsize3 > 0)
846		aprint_verbose_dev(sc->sc_dev,
847				   "using %d byte (%d entries) "
848				   "indirect descriptors\n",
849				   allocsize3, maxnsegs * vq_size);
850
851	sc->sc_nvqs++;
852
853	return 0;
854
855err:
856	sc->sc_ops->setup_queue(sc, index, 0);
857	if (vq->vq_dmamap)
858		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
859	if (vq->vq_vaddr)
860		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
861	if (vq->vq_segs[0].ds_addr)
862		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
863	memset(vq, 0, sizeof(*vq));
864
865	return -1;
866}
867
868int
869virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
870{
871	struct vq_entry *qe;
872	int i = 0;
873
874	/* device must be already deactivated */
875	/* confirm the vq is empty */
876	SIMPLEQ_FOREACH(qe, &vq->vq_freelist, qe_list) {
877		i++;
878	}
879	if (i != vq->vq_num) {
880		printf("%s: freeing non-empty vq, index %d\n",
881		       device_xname(sc->sc_dev), vq->vq_index);
882		return EBUSY;
883	}
884
885	/* tell device that there's no virtqueue any longer */
886	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
887
888	vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
889
890	kmem_free(vq->vq_entries, sizeof(*vq->vq_entries) * vq->vq_num);
891	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
892	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
893	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
894	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
895	mutex_destroy(&vq->vq_freelist_lock);
896	mutex_destroy(&vq->vq_uring_lock);
897	mutex_destroy(&vq->vq_aring_lock);
898	memset(vq, 0, sizeof(*vq));
899
900	sc->sc_nvqs--;
901
902	return 0;
903}
904
905/*
906 * Free descriptor management.
907 */
908static struct vq_entry *
909vq_alloc_entry(struct virtqueue *vq)
910{
911	struct vq_entry *qe;
912
913	mutex_enter(&vq->vq_freelist_lock);
914	if (SIMPLEQ_EMPTY(&vq->vq_freelist)) {
915		mutex_exit(&vq->vq_freelist_lock);
916		return NULL;
917	}
918	qe = SIMPLEQ_FIRST(&vq->vq_freelist);
919	SIMPLEQ_REMOVE_HEAD(&vq->vq_freelist, qe_list);
920	mutex_exit(&vq->vq_freelist_lock);
921
922	return qe;
923}
924
925static void
926vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
927{
928	mutex_enter(&vq->vq_freelist_lock);
929	SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
930	mutex_exit(&vq->vq_freelist_lock);
931
932	return;
933}
934
935/*
936 * Enqueue several dmamaps as a single request.
937 */
938/*
939 * Typical usage:
940 *  <queue size> number of followings are stored in arrays
941 *  - command blocks (in dmamem) should be pre-allocated and mapped
942 *  - dmamaps for command blocks should be pre-allocated and loaded
943 *  - dmamaps for payload should be pre-allocated
944 *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
945 *	if (r)		// currently 0 or EAGAIN
946 *	  return r;
947 *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
948 *	if (r) {
949 *	  virtio_enqueue_abort(sc, vq, slot);
950 *	  return r;
951 *	}
952 *	r = virtio_enqueue_reserve(sc, vq, slot,
953 *				   dmamap_payload[slot]->dm_nsegs+1);
954 *							// ^ +1 for command
955 *	if (r) {	// currently 0 or EAGAIN
956 *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
957 *	  return r;					// do not call abort()
958 *	}
959 *	<setup and prepare commands>
960 *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
961 *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
962 *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
963 *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
964 *	virtio_enqueue_commit(sc, vq, slot, true);
965 */
966
967/*
968 * enqueue_prep: allocate a slot number
969 */
970int
971virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
972{
973	struct vq_entry *qe1;
974
975	KASSERT(slotp != NULL);
976
977	qe1 = vq_alloc_entry(vq);
978	if (qe1 == NULL)
979		return EAGAIN;
980	/* next slot is not allocated yet */
981	qe1->qe_next = -1;
982	*slotp = qe1->qe_index;
983
984	return 0;
985}
986
987/*
988 * enqueue_reserve: allocate remaining slots and build the descriptor chain.
989 */
990int
991virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
992		       int slot, int nsegs)
993{
994	int indirect;
995	struct vq_entry *qe1 = &vq->vq_entries[slot];
996
997	KASSERT(qe1->qe_next == -1);
998	KASSERT(1 <= nsegs && nsegs <= vq->vq_num);
999
1000	if ((vq->vq_indirect != NULL) &&
1001	    (nsegs >= MINSEG_INDIRECT) &&
1002	    (nsegs <= vq->vq_maxnsegs))
1003		indirect = 1;
1004	else
1005		indirect = 0;
1006	qe1->qe_indirect = indirect;
1007
1008	if (indirect) {
1009		struct vring_desc *vd;
1010		uint64_t addr;
1011		int i;
1012
1013		vd = &vq->vq_desc[qe1->qe_index];
1014		addr = vq->vq_dmamap->dm_segs[0].ds_addr
1015			+ vq->vq_indirectoffset;
1016		addr += sizeof(struct vring_desc)
1017			* vq->vq_maxnsegs * qe1->qe_index;
1018		vd->addr  = virtio_rw64(sc, addr);
1019		vd->len   = virtio_rw32(sc, sizeof(struct vring_desc) * nsegs);
1020		vd->flags = virtio_rw16(sc, VRING_DESC_F_INDIRECT);
1021
1022		vd = vq->vq_indirect;
1023		vd += vq->vq_maxnsegs * qe1->qe_index;
1024		qe1->qe_desc_base = vd;
1025
1026		for (i = 0; i < nsegs-1; i++) {
1027			vd[i].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1028		}
1029		vd[i].flags  = virtio_rw16(sc, 0);
1030		qe1->qe_next = 0;
1031
1032		return 0;
1033	} else {
1034		struct vring_desc *vd;
1035		struct vq_entry *qe;
1036		int i, s;
1037
1038		vd = &vq->vq_desc[0];
1039		qe1->qe_desc_base = vd;
1040		qe1->qe_next = qe1->qe_index;
1041		s = slot;
1042		for (i = 0; i < nsegs - 1; i++) {
1043			qe = vq_alloc_entry(vq);
1044			if (qe == NULL) {
1045				vd[s].flags = virtio_rw16(sc, 0);
1046				virtio_enqueue_abort(sc, vq, slot);
1047				return EAGAIN;
1048			}
1049			vd[s].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1050			vd[s].next  = virtio_rw16(sc, qe->qe_index);
1051			s = qe->qe_index;
1052		}
1053		vd[s].flags = virtio_rw16(sc, 0);
1054
1055		return 0;
1056	}
1057}
1058
1059/*
1060 * enqueue: enqueue a single dmamap.
1061 */
1062int
1063virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1064	       bus_dmamap_t dmamap, bool write)
1065{
1066	struct vq_entry *qe1 = &vq->vq_entries[slot];
1067	struct vring_desc *vd = qe1->qe_desc_base;
1068	int i;
1069	int s = qe1->qe_next;
1070
1071	KASSERT(s >= 0);
1072	KASSERT(dmamap->dm_nsegs > 0);
1073
1074	for (i = 0; i < dmamap->dm_nsegs; i++) {
1075		vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[i].ds_addr);
1076		vd[s].len  = virtio_rw32(sc, dmamap->dm_segs[i].ds_len);
1077		if (!write)
1078			vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1079		s = virtio_rw16(sc, vd[s].next);
1080	}
1081	qe1->qe_next = s;
1082
1083	return 0;
1084}
1085
1086int
1087virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1088		 bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
1089		 bool write)
1090{
1091	struct vq_entry *qe1 = &vq->vq_entries[slot];
1092	struct vring_desc *vd = qe1->qe_desc_base;
1093	int s = qe1->qe_next;
1094
1095	KASSERT(s >= 0);
1096	KASSERT(dmamap->dm_nsegs == 1); /* XXX */
1097	KASSERT((dmamap->dm_segs[0].ds_len > start) &&
1098		(dmamap->dm_segs[0].ds_len >= start + len));
1099
1100	vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[0].ds_addr + start);
1101	vd[s].len  = virtio_rw32(sc, len);
1102	if (!write)
1103		vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1104	qe1->qe_next = virtio_rw16(sc, vd[s].next);
1105
1106	return 0;
1107}
1108
1109/*
1110 * enqueue_commit: add it to the aring.
1111 */
1112int
1113virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1114		      bool notifynow)
1115{
1116	struct vq_entry *qe1;
1117
1118	if (slot < 0) {
1119		mutex_enter(&vq->vq_aring_lock);
1120		goto notify;
1121	}
1122	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
1123	qe1 = &vq->vq_entries[slot];
1124	if (qe1->qe_indirect)
1125		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
1126	mutex_enter(&vq->vq_aring_lock);
1127	vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
1128	    virtio_rw16(sc, slot);
1129
1130notify:
1131	if (notifynow) {
1132		uint16_t o, n, t;
1133		uint16_t flags;
1134
1135		o = virtio_rw16(sc, vq->vq_avail->idx);
1136		n = vq->vq_avail_idx;
1137
1138		/*
1139		 * Prepare for `device->CPU' (host->guest) transfer
1140		 * into the buffer.  This must happen before we commit
1141		 * the vq->vq_avail->idx update to ensure we're not
1142		 * still using the buffer in case program-prior loads
1143		 * or stores in it get delayed past the store to
1144		 * vq->vq_avail->idx.
1145		 */
1146		vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
1147
1148		/* ensure payload is published, then avail idx */
1149		vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
1150		vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
1151		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
1152		vq->vq_queued++;
1153
1154		if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
1155			vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
1156			t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
1157			if ((uint16_t) (n - t) < (uint16_t) (n - o))
1158				sc->sc_ops->kick(sc, vq->vq_index);
1159		} else {
1160			vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
1161			flags = virtio_rw16(sc, vq->vq_used->flags);
1162			if (!(flags & VRING_USED_F_NO_NOTIFY))
1163				sc->sc_ops->kick(sc, vq->vq_index);
1164		}
1165	}
1166	mutex_exit(&vq->vq_aring_lock);
1167
1168	return 0;
1169}
1170
1171/*
1172 * enqueue_abort: rollback.
1173 */
1174int
1175virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1176{
1177	struct vq_entry *qe = &vq->vq_entries[slot];
1178	struct vring_desc *vd;
1179	int s;
1180
1181	if (qe->qe_next < 0) {
1182		vq_free_entry(vq, qe);
1183		return 0;
1184	}
1185
1186	s = slot;
1187	vd = &vq->vq_desc[0];
1188	while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
1189		s = virtio_rw16(sc, vd[s].next);
1190		vq_free_entry(vq, qe);
1191		qe = &vq->vq_entries[s];
1192	}
1193	vq_free_entry(vq, qe);
1194	return 0;
1195}
1196
1197/*
1198 * Dequeue a request.
1199 */
1200/*
1201 * dequeue: dequeue a request from uring; dmamap_sync for uring is
1202 *	    already done in the interrupt handler.
1203 */
1204int
1205virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
1206	       int *slotp, int *lenp)
1207{
1208	uint16_t slot, usedidx;
1209	struct vq_entry *qe;
1210
1211	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
1212		return ENOENT;
1213	mutex_enter(&vq->vq_uring_lock);
1214	usedidx = vq->vq_used_idx++;
1215	mutex_exit(&vq->vq_uring_lock);
1216	usedidx %= vq->vq_num;
1217	slot = virtio_rw32(sc, vq->vq_used->ring[usedidx].id);
1218	qe = &vq->vq_entries[slot];
1219
1220	if (qe->qe_indirect)
1221		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
1222
1223	if (slotp)
1224		*slotp = slot;
1225	if (lenp)
1226		*lenp = virtio_rw32(sc, vq->vq_used->ring[usedidx].len);
1227
1228	return 0;
1229}
1230
1231/*
1232 * dequeue_commit: complete dequeue; the slot is recycled for future use.
1233 *                 if you forget to call this the slot will be leaked.
1234 */
1235int
1236virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1237{
1238	struct vq_entry *qe = &vq->vq_entries[slot];
1239	struct vring_desc *vd = &vq->vq_desc[0];
1240	int s = slot;
1241
1242	while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
1243		s = virtio_rw16(sc, vd[s].next);
1244		vq_free_entry(vq, qe);
1245		qe = &vq->vq_entries[s];
1246	}
1247	vq_free_entry(vq, qe);
1248
1249	return 0;
1250}
1251
1252/*
1253 * Attach a child, fill all the members.
1254 */
1255void
1256virtio_child_attach_start(struct virtio_softc *sc, device_t child, int ipl,
1257		    struct virtqueue *vqs,
1258		    virtio_callback config_change,
1259		    virtio_callback intr_hand,
1260		    int req_flags, int req_features, const char *feat_bits)
1261{
1262	char buf[1024];
1263
1264	sc->sc_child = child;
1265	sc->sc_ipl = ipl;
1266	sc->sc_vqs = vqs;
1267	sc->sc_config_change = config_change;
1268	sc->sc_intrhand = intr_hand;
1269	sc->sc_flags = req_flags;
1270
1271	virtio_negotiate_features(sc, req_features);
1272	snprintb(buf, sizeof(buf), feat_bits, sc->sc_active_features);
1273	aprint_normal(": features: %s\n", buf);
1274	aprint_naive("\n");
1275}
1276
1277void
1278virtio_child_attach_set_vqs(struct virtio_softc *sc,
1279    struct virtqueue *vqs, int nvq_pairs)
1280{
1281
1282	KASSERT(nvq_pairs == 1 ||
1283	    (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) == 0);
1284	if (nvq_pairs > 1)
1285		sc->sc_child_mq = true;
1286
1287	sc->sc_vqs = vqs;
1288}
1289
1290int
1291virtio_child_attach_finish(struct virtio_softc *sc)
1292{
1293	int r;
1294
1295	sc->sc_finished_called = true;
1296	r = sc->sc_ops->alloc_interrupts(sc);
1297	if (r != 0) {
1298		aprint_error_dev(sc->sc_dev, "failed to allocate interrupts\n");
1299		goto fail;
1300	}
1301
1302	r = sc->sc_ops->setup_interrupts(sc, 0);
1303	if (r != 0) {
1304		aprint_error_dev(sc->sc_dev, "failed to setup interrupts\n");
1305		goto fail;
1306	}
1307
1308	KASSERT(sc->sc_soft_ih == NULL);
1309	if (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) {
1310		u_int flags = SOFTINT_NET;
1311		if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
1312			flags |= SOFTINT_MPSAFE;
1313
1314		sc->sc_soft_ih = softint_establish(flags, virtio_soft_intr, sc);
1315		if (sc->sc_soft_ih == NULL) {
1316			sc->sc_ops->free_interrupts(sc);
1317			aprint_error_dev(sc->sc_dev,
1318			    "failed to establish soft interrupt\n");
1319			goto fail;
1320		}
1321	}
1322
1323	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1324	return 0;
1325
1326fail:
1327	if (sc->sc_soft_ih) {
1328		softint_disestablish(sc->sc_soft_ih);
1329		sc->sc_soft_ih = NULL;
1330	}
1331
1332	sc->sc_ops->free_interrupts(sc);
1333
1334	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1335	return 1;
1336}
1337
1338void
1339virtio_child_detach(struct virtio_softc *sc)
1340{
1341	sc->sc_child = NULL;
1342	sc->sc_vqs = NULL;
1343
1344	virtio_device_reset(sc);
1345
1346	sc->sc_ops->free_interrupts(sc);
1347
1348	if (sc->sc_soft_ih) {
1349		softint_disestablish(sc->sc_soft_ih);
1350		sc->sc_soft_ih = NULL;
1351	}
1352}
1353
1354void
1355virtio_child_attach_failed(struct virtio_softc *sc)
1356{
1357	virtio_child_detach(sc);
1358
1359	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1360
1361	sc->sc_child = VIRTIO_CHILD_FAILED;
1362}
1363
1364bus_dma_tag_t
1365virtio_dmat(struct virtio_softc *sc)
1366{
1367	return sc->sc_dmat;
1368}
1369
1370device_t
1371virtio_child(struct virtio_softc *sc)
1372{
1373	return sc->sc_child;
1374}
1375
1376int
1377virtio_intrhand(struct virtio_softc *sc)
1378{
1379	return (*sc->sc_intrhand)(sc);
1380}
1381
1382uint64_t
1383virtio_features(struct virtio_softc *sc)
1384{
1385	return sc->sc_active_features;
1386}
1387
1388int
1389virtio_attach_failed(struct virtio_softc *sc)
1390{
1391	device_t self = sc->sc_dev;
1392
1393	/* no error if its not connected, but its failed */
1394	if (sc->sc_childdevid == 0)
1395		return 1;
1396
1397	if (sc->sc_child == NULL) {
1398		aprint_error_dev(self,
1399			"no matching child driver; not configured\n");
1400		return 1;
1401	}
1402
1403	if (sc->sc_child == VIRTIO_CHILD_FAILED) {
1404		aprint_error_dev(self, "virtio configuration failed\n");
1405		return 1;
1406	}
1407
1408	/* sanity check */
1409	if (!sc->sc_finished_called) {
1410		aprint_error_dev(self, "virtio internal error, child driver "
1411			"signaled OK but didn't initialize interrupts\n");
1412		return 1;
1413	}
1414
1415	return 0;
1416}
1417
1418void
1419virtio_print_device_type(device_t self, int id, int revision)
1420{
1421	aprint_normal_dev(self, "%s device (rev. 0x%02x)\n",
1422		  (id < NDEVNAMES ? virtio_device_name[id] : "Unknown"),
1423		  revision);
1424}
1425
1426
1427MODULE(MODULE_CLASS_DRIVER, virtio, NULL);
1428
1429#ifdef _MODULE
1430#include "ioconf.c"
1431#endif
1432
1433static int
1434virtio_modcmd(modcmd_t cmd, void *opaque)
1435{
1436	int error = 0;
1437
1438#ifdef _MODULE
1439	switch (cmd) {
1440	case MODULE_CMD_INIT:
1441		error = config_init_component(cfdriver_ioconf_virtio,
1442		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1443		break;
1444	case MODULE_CMD_FINI:
1445		error = config_fini_component(cfdriver_ioconf_virtio,
1446		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1447		break;
1448	default:
1449		error = ENOTTY;
1450		break;
1451	}
1452#endif
1453
1454	return error;
1455}
1456