virtio.c revision 1.77
1/*	$NetBSD: virtio.c,v 1.77 2023/04/19 00:40:30 yamaguchi Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6 * Copyright (c) 2010 Minoura Makoto.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.77 2023/04/19 00:40:30 yamaguchi Exp $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/atomic.h>
37#include <sys/bus.h>
38#include <sys/device.h>
39#include <sys/kmem.h>
40#include <sys/module.h>
41
42#define VIRTIO_PRIVATE
43
44#include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
45#include <dev/pci/virtiovar.h> /* XXX: move to non-pci */
46
47#define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */
48
49/*
50 * The maximum descriptor size is 2^15. Use that value as the end of
51 * descriptor chain terminator since it will never be a valid index
52 * in the descriptor table.
53 */
54#define VRING_DESC_CHAIN_END		32768
55
56/* incomplete list */
57static const char *virtio_device_name[] = {
58	"unknown (0)",			/*  0 */
59	"network",			/*  1 */
60	"block",			/*  2 */
61	"console",			/*  3 */
62	"entropy",			/*  4 */
63	"memory balloon",		/*  5 */
64	"I/O memory",			/*  6 */
65	"remote processor messaging",	/*  7 */
66	"SCSI",				/*  8 */
67	"9P transport",			/*  9 */
68};
69#define NDEVNAMES	__arraycount(virtio_device_name)
70
71static void	virtio_reset_vq(struct virtio_softc *,
72		    struct virtqueue *);
73
74void
75virtio_set_status(struct virtio_softc *sc, int status)
76{
77	sc->sc_ops->set_status(sc, status);
78}
79
80/*
81 * Reset the device.
82 */
83/*
84 * To reset the device to a known state, do following:
85 *	virtio_reset(sc);	     // this will stop the device activity
86 *	<dequeue finished requests>; // virtio_dequeue() still can be called
87 *	<revoke pending requests in the vqs if any>;
88 *	virtio_reinit_start(sc);     // dequeue prohibitted
89 *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
90 *	<some other initialization>;
91 *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
92 * Once attached, feature negotiation can only be allowed after virtio_reset.
93 */
94void
95virtio_reset(struct virtio_softc *sc)
96{
97	virtio_device_reset(sc);
98}
99
100int
101virtio_reinit_start(struct virtio_softc *sc)
102{
103	int i, r;
104
105	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
106	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
107	for (i = 0; i < sc->sc_nvqs; i++) {
108		int n;
109		struct virtqueue *vq = &sc->sc_vqs[i];
110		n = sc->sc_ops->read_queue_size(sc, vq->vq_index);
111		if (n == 0)	/* vq disappeared */
112			continue;
113		if (n != vq->vq_num) {
114			panic("%s: virtqueue size changed, vq index %d\n",
115			    device_xname(sc->sc_dev),
116			    vq->vq_index);
117		}
118		virtio_reset_vq(sc, vq);
119		sc->sc_ops->setup_queue(sc, vq->vq_index,
120		    vq->vq_dmamap->dm_segs[0].ds_addr);
121	}
122
123	r = sc->sc_ops->setup_interrupts(sc, 1);
124	if (r != 0)
125		goto fail;
126
127	return 0;
128
129fail:
130	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
131
132	return 1;
133}
134
135void
136virtio_reinit_end(struct virtio_softc *sc)
137{
138	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
139}
140
141/*
142 * Feature negotiation.
143 */
144void
145virtio_negotiate_features(struct virtio_softc *sc, uint64_t guest_features)
146{
147	if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
148	    !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
149		guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
150	sc->sc_ops->neg_features(sc, guest_features);
151	if (sc->sc_active_features & VIRTIO_F_RING_INDIRECT_DESC)
152		sc->sc_indirect = true;
153	else
154		sc->sc_indirect = false;
155}
156
157
158/*
159 * Device configuration registers readers/writers
160 */
161#if 0
162#define DPRINTFR(n, fmt, val, index, num) \
163	printf("\n%s (", n); \
164	for (int i = 0; i < num; i++) \
165		printf("%02x ", bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index+i)); \
166	printf(") -> "); printf(fmt, val); printf("\n");
167#define DPRINTFR2(n, fmt, val_s, val_n) \
168	printf("%s ", n); \
169	printf("\n        stream "); printf(fmt, val_s); printf(" norm "); printf(fmt, val_n); printf("\n");
170#else
171#define DPRINTFR(n, fmt, val, index, num)
172#define DPRINTFR2(n, fmt, val_s, val_n)
173#endif
174
175
176uint8_t
177virtio_read_device_config_1(struct virtio_softc *sc, int index)
178{
179	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
180	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
181	uint8_t val;
182
183	val = bus_space_read_1(iot, ioh, index);
184
185	DPRINTFR("read_1", "%02x", val, index, 1);
186	return val;
187}
188
189uint16_t
190virtio_read_device_config_2(struct virtio_softc *sc, int index)
191{
192	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
193	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
194	uint16_t val;
195
196	val = bus_space_read_2(iot, ioh, index);
197	if (BYTE_ORDER != sc->sc_bus_endian)
198		val = bswap16(val);
199
200	DPRINTFR("read_2", "%04x", val, index, 2);
201	DPRINTFR2("read_2", "%04x",
202	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
203		index),
204	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
205	return val;
206}
207
208uint32_t
209virtio_read_device_config_4(struct virtio_softc *sc, int index)
210{
211	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
212	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
213	uint32_t val;
214
215	val = bus_space_read_4(iot, ioh, index);
216	if (BYTE_ORDER != sc->sc_bus_endian)
217		val = bswap32(val);
218
219	DPRINTFR("read_4", "%08x", val, index, 4);
220	DPRINTFR2("read_4", "%08x",
221	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
222		index),
223	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
224	return val;
225}
226
227/*
228 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
229 * considered atomic and no triggers may be connected to reading or writing
230 * it. We access it using two 32 reads. See virtio spec 4.1.3.1.
231 */
232uint64_t
233virtio_read_device_config_8(struct virtio_softc *sc, int index)
234{
235	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
236	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
237	union {
238		uint64_t u64;
239		uint32_t l[2];
240	} v;
241	uint64_t val;
242
243	v.l[0] = bus_space_read_4(iot, ioh, index);
244	v.l[1] = bus_space_read_4(iot, ioh, index + 4);
245	if (sc->sc_bus_endian != sc->sc_struct_endian) {
246		v.l[0] = bswap32(v.l[0]);
247		v.l[1] = bswap32(v.l[1]);
248	}
249	val = v.u64;
250
251	if (BYTE_ORDER != sc->sc_struct_endian)
252		val = bswap64(val);
253
254	DPRINTFR("read_8", "%08"PRIx64, val, index, 8);
255	DPRINTFR2("read_8 low ", "%08x",
256	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
257		index),
258	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
259	DPRINTFR2("read_8 high ", "%08x",
260	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
261		index + 4),
262	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4));
263	return val;
264}
265
266/*
267 * In the older virtio spec, device config registers are host endian. On newer
268 * they are little endian. Some newer devices however explicitly specify their
269 * register to always be little endian. These functions cater for these.
270 */
271uint16_t
272virtio_read_device_config_le_2(struct virtio_softc *sc, int index)
273{
274	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
275	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
276	uint16_t val;
277
278	val = bus_space_read_2(iot, ioh, index);
279	if (sc->sc_bus_endian != LITTLE_ENDIAN)
280		val = bswap16(val);
281
282	DPRINTFR("read_le_2", "%04x", val, index, 2);
283	DPRINTFR2("read_le_2", "%04x",
284	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
285	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
286	return val;
287}
288
289uint32_t
290virtio_read_device_config_le_4(struct virtio_softc *sc, int index)
291{
292	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
293	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
294	uint32_t val;
295
296	val = bus_space_read_4(iot, ioh, index);
297	if (sc->sc_bus_endian != LITTLE_ENDIAN)
298		val = bswap32(val);
299
300	DPRINTFR("read_le_4", "%08x", val, index, 4);
301	DPRINTFR2("read_le_4", "%08x",
302	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
303	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
304	return val;
305}
306
307void
308virtio_write_device_config_1(struct virtio_softc *sc, int index, uint8_t value)
309{
310	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
311	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
312
313	bus_space_write_1(iot, ioh, index, value);
314}
315
316void
317virtio_write_device_config_2(struct virtio_softc *sc, int index,
318    uint16_t value)
319{
320	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
321	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
322
323	if (BYTE_ORDER != sc->sc_bus_endian)
324		value = bswap16(value);
325	bus_space_write_2(iot, ioh, index, value);
326}
327
328void
329virtio_write_device_config_4(struct virtio_softc *sc, int index,
330    uint32_t value)
331{
332	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
333	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
334
335	if (BYTE_ORDER != sc->sc_bus_endian)
336		value = bswap32(value);
337	bus_space_write_4(iot, ioh, index, value);
338}
339
340/*
341 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
342 * considered atomic and no triggers may be connected to reading or writing
343 * it. We access it using two 32 bit writes. For good measure it is stated to
344 * always write lsb first just in case of a hypervisor bug. See See virtio
345 * spec 4.1.3.1.
346 */
347void
348virtio_write_device_config_8(struct virtio_softc *sc, int index,
349    uint64_t value)
350{
351	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
352	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
353	union {
354		uint64_t u64;
355		uint32_t l[2];
356	} v;
357
358	if (BYTE_ORDER != sc->sc_struct_endian)
359		value = bswap64(value);
360
361	v.u64 = value;
362	if (sc->sc_bus_endian != sc->sc_struct_endian) {
363		v.l[0] = bswap32(v.l[0]);
364		v.l[1] = bswap32(v.l[1]);
365	}
366
367	if (sc->sc_struct_endian == LITTLE_ENDIAN) {
368		bus_space_write_4(iot, ioh, index,     v.l[0]);
369		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
370	} else {
371		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
372		bus_space_write_4(iot, ioh, index,     v.l[0]);
373	}
374}
375
376/*
377 * In the older virtio spec, device config registers are host endian. On newer
378 * they are little endian. Some newer devices however explicitly specify their
379 * register to always be little endian. These functions cater for these.
380 */
381void
382virtio_write_device_config_le_2(struct virtio_softc *sc, int index,
383    uint16_t value)
384{
385	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
386	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
387
388	if (sc->sc_bus_endian != LITTLE_ENDIAN)
389		value = bswap16(value);
390	bus_space_write_2(iot, ioh, index, value);
391}
392
393void
394virtio_write_device_config_le_4(struct virtio_softc *sc, int index,
395    uint32_t value)
396{
397	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
398	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
399
400	if (sc->sc_bus_endian != LITTLE_ENDIAN)
401		value = bswap32(value);
402	bus_space_write_4(iot, ioh, index, value);
403}
404
405
406/*
407 * data structures endian helpers
408 */
409uint16_t
410virtio_rw16(struct virtio_softc *sc, uint16_t val)
411{
412	KASSERT(sc);
413	return BYTE_ORDER != sc->sc_struct_endian ? bswap16(val) : val;
414}
415
416uint32_t
417virtio_rw32(struct virtio_softc *sc, uint32_t val)
418{
419	KASSERT(sc);
420	return BYTE_ORDER != sc->sc_struct_endian ? bswap32(val) : val;
421}
422
423uint64_t
424virtio_rw64(struct virtio_softc *sc, uint64_t val)
425{
426	KASSERT(sc);
427	return BYTE_ORDER != sc->sc_struct_endian ? bswap64(val) : val;
428}
429
430
431/*
432 * Interrupt handler.
433 */
434static void
435virtio_soft_intr(void *arg)
436{
437	struct virtio_softc *sc = arg;
438
439	KASSERT(sc->sc_intrhand != NULL);
440
441	(*sc->sc_intrhand)(sc);
442}
443
444/* set to vq->vq_intrhand in virtio_init_vq_vqdone() */
445static int
446virtio_vq_done(void *xvq)
447{
448	struct virtqueue *vq = xvq;
449
450	return vq->vq_done(vq);
451}
452
453static int
454virtio_vq_intr(struct virtio_softc *sc)
455{
456	struct virtqueue *vq;
457	int i, r = 0;
458
459	for (i = 0; i < sc->sc_nvqs; i++) {
460		vq = &sc->sc_vqs[i];
461		if (virtio_vq_is_enqueued(sc, vq) == 1) {
462			r |= (*vq->vq_intrhand)(vq->vq_intrhand_arg);
463		}
464	}
465
466	return r;
467}
468
469/*
470 * dmamap sync operations for a virtqueue.
471 */
472static inline void
473vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
474{
475
476	/* availoffset == sizeof(vring_desc) * vq_num */
477	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
478	    ops);
479}
480
481static inline void
482vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
483{
484	uint16_t hdrlen = offsetof(struct vring_avail, ring);
485	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
486	size_t usedlen = 0;
487
488	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
489		usedlen = sizeof(uint16_t);
490	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
491	    vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
492}
493
494static inline void
495vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
496{
497	uint16_t hdrlen = offsetof(struct vring_avail, ring);
498
499	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
500	    vq->vq_availoffset, hdrlen, ops);
501}
502
503static inline void
504vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
505{
506	uint16_t hdrlen = offsetof(struct vring_avail, ring);
507	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
508
509	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
510	    vq->vq_availoffset + hdrlen, payloadlen, ops);
511}
512
513static inline void
514vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
515{
516	uint16_t hdrlen = offsetof(struct vring_avail, ring);
517	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
518	size_t usedlen = sizeof(uint16_t);
519
520	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
521		return;
522	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
523	    vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
524}
525
526static inline void
527vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
528{
529	uint16_t hdrlen = offsetof(struct vring_used, ring);
530	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
531	size_t availlen = 0;
532
533	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
534		availlen = sizeof(uint16_t);
535	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
536	    vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
537}
538
539static inline void
540vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
541{
542	uint16_t hdrlen = offsetof(struct vring_used, ring);
543
544	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
545	    vq->vq_usedoffset, hdrlen, ops);
546}
547
548static inline void
549vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
550{
551	uint16_t hdrlen = offsetof(struct vring_used, ring);
552	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
553
554	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
555	    vq->vq_usedoffset + hdrlen, payloadlen, ops);
556}
557
558static inline void
559vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
560{
561	uint16_t hdrlen = offsetof(struct vring_used, ring);
562	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
563	size_t availlen = sizeof(uint16_t);
564
565	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
566		return;
567	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
568	    vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
569}
570
571static inline void
572vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
573    int ops)
574{
575	int offset = vq->vq_indirectoffset +
576	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
577
578	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
579	    offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
580}
581
582bool
583virtio_vq_is_enqueued(struct virtio_softc *sc, struct virtqueue *vq)
584{
585
586	if (vq->vq_queued) {
587		vq->vq_queued = 0;
588		vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
589	}
590
591	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
592	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
593		return 0;
594	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
595	return 1;
596}
597
598/*
599 * Increase the event index in order to delay interrupts.
600 */
601int
602virtio_postpone_intr(struct virtio_softc *sc, struct virtqueue *vq,
603    uint16_t nslots)
604{
605	uint16_t	idx, nused;
606
607	idx = vq->vq_used_idx + nslots;
608
609	/* set the new event index: avail_ring->used_event = idx */
610	*vq->vq_used_event = virtio_rw16(sc, idx);
611	vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
612	vq->vq_queued++;
613
614	nused = (uint16_t)
615	    (virtio_rw16(sc, vq->vq_used->idx) - vq->vq_used_idx);
616	KASSERT(nused <= vq->vq_num);
617
618	return nslots < nused;
619}
620
621/*
622 * Postpone interrupt until 3/4 of the available descriptors have been
623 * consumed.
624 */
625int
626virtio_postpone_intr_smart(struct virtio_softc *sc, struct virtqueue *vq)
627{
628	uint16_t	nslots;
629
630	nslots = (uint16_t)
631	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx) * 3 / 4;
632
633	return virtio_postpone_intr(sc, vq, nslots);
634}
635
636/*
637 * Postpone interrupt until all of the available descriptors have been
638 * consumed.
639 */
640int
641virtio_postpone_intr_far(struct virtio_softc *sc, struct virtqueue *vq)
642{
643	uint16_t	nslots;
644
645	nslots = (uint16_t)
646	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx);
647
648	return virtio_postpone_intr(sc, vq, nslots);
649}
650
651/*
652 * Start/stop vq interrupt.  No guarantee.
653 */
654void
655virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
656{
657
658	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
659		/*
660		 * No way to disable the interrupt completely with
661		 * RingEventIdx. Instead advance used_event by half the
662		 * possible value. This won't happen soon and is far enough in
663		 * the past to not trigger a spurios interrupt.
664		 */
665		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
666		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
667	} else {
668		vq->vq_avail->flags |=
669		    virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
670		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
671	}
672	vq->vq_queued++;
673}
674
675int
676virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
677{
678
679	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
680		/*
681		 * If event index feature is negotiated, enabling interrupts
682		 * is done through setting the latest consumed index in the
683		 * used_event field
684		 */
685		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
686		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
687	} else {
688		vq->vq_avail->flags &=
689		    ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
690		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
691	}
692	vq->vq_queued++;
693
694	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
695	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
696		return 0;
697	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
698	return 1;
699}
700
701/*
702 * Initialize vq structure.
703 */
704/*
705 * Reset virtqueue parameters
706 */
707static void
708virtio_reset_vq(struct virtio_softc *sc, struct virtqueue *vq)
709{
710	struct vring_desc *vds;
711	int i, j;
712	int vq_size = vq->vq_num;
713
714	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
715
716	/* build the descriptor chain for free slot management */
717	vds = vq->vq_desc;
718	for (i = 0; i < vq_size - 1; i++) {
719		vds[i].next = virtio_rw16(sc, i + 1);
720	}
721	vds[i].next = virtio_rw16(sc, VRING_DESC_CHAIN_END);
722	vq->vq_free_idx = 0;
723
724	/* build the indirect descriptor chain */
725	if (vq->vq_indirect != NULL) {
726		struct vring_desc *vd;
727
728		for (i = 0; i < vq_size; i++) {
729			vd = vq->vq_indirect;
730			vd += vq->vq_maxnsegs * i;
731			for (j = 0; j < vq->vq_maxnsegs - 1; j++) {
732				vd[j].next = virtio_rw16(sc, j + 1);
733			}
734		}
735	}
736
737	/* enqueue/dequeue status */
738	vq->vq_avail_idx = 0;
739	vq->vq_used_idx = 0;
740	vq->vq_queued = 0;
741	vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
742	vq->vq_queued++;
743}
744
745/* Initialize vq */
746void
747virtio_init_vq_vqdone(struct virtio_softc *sc, struct virtqueue *vq,
748    int index, int (*vq_done)(struct virtqueue *))
749{
750
751	virtio_init_vq(sc, vq, index, virtio_vq_done, vq);
752	vq->vq_done = vq_done;
753}
754
755void
756virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
757   int (*func)(void *), void *arg)
758{
759
760	memset(vq, 0, sizeof(*vq));
761
762	vq->vq_owner = sc;
763	vq->vq_num = sc->sc_ops->read_queue_size(sc, index);
764	vq->vq_index = index;
765	vq->vq_intrhand = func;
766	vq->vq_intrhand_arg = arg;
767}
768
769/*
770 * Allocate/free a vq.
771 */
772int
773virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq,
774    int maxsegsize, int maxnsegs, const char *name)
775{
776	bus_size_t size_desc, size_avail, size_used, size_indirect;
777	bus_size_t allocsize = 0, size_desc_avail;
778	int rsegs, r, hdrlen;
779	unsigned int vq_num;
780#define VIRTQUEUE_ALIGN(n)	roundup(n, VIRTIO_PAGE_SIZE)
781
782	vq_num = vq->vq_num;
783
784	if (vq_num == 0) {
785		aprint_error_dev(sc->sc_dev,
786		    "virtqueue not exist, index %d for %s\n",
787		    vq->vq_index, name);
788		goto err;
789	}
790
791	hdrlen = sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX ? 3 : 2;
792
793	size_desc = sizeof(vq->vq_desc[0]) * vq_num;
794	size_avail = sizeof(uint16_t) * hdrlen
795	    + sizeof(vq->vq_avail[0].ring[0]) * vq_num;
796	size_used = sizeof(uint16_t) *hdrlen
797	    + sizeof(vq->vq_used[0].ring[0]) * vq_num;
798	size_indirect = (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT) ?
799	    sizeof(struct vring_desc) * maxnsegs * vq_num : 0;
800
801	size_desc_avail = VIRTQUEUE_ALIGN(size_desc + size_avail);
802	size_used = VIRTQUEUE_ALIGN(size_used);
803
804	allocsize = size_desc_avail + size_used + size_indirect;
805
806	/* alloc and map the memory */
807	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
808	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK);
809	if (r != 0) {
810		aprint_error_dev(sc->sc_dev,
811		    "virtqueue %d for %s allocation failed, "
812		    "error code %d\n", vq->vq_index, name, r);
813		goto err;
814	}
815
816	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], rsegs, allocsize,
817	    &vq->vq_vaddr, BUS_DMA_WAITOK);
818	if (r != 0) {
819		aprint_error_dev(sc->sc_dev,
820		    "virtqueue %d for %s map failed, "
821		    "error code %d\n", vq->vq_index, name, r);
822		goto err;
823	}
824
825	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
826	    BUS_DMA_WAITOK, &vq->vq_dmamap);
827	if (r != 0) {
828		aprint_error_dev(sc->sc_dev,
829		    "virtqueue %d for %s dmamap creation failed, "
830		    "error code %d\n", vq->vq_index, name, r);
831		goto err;
832	}
833
834	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
835	    vq->vq_vaddr, allocsize, NULL, BUS_DMA_WAITOK);
836	if (r != 0) {
837		aprint_error_dev(sc->sc_dev,
838		    "virtqueue %d for %s dmamap load failed, "
839		    "error code %d\n", vq->vq_index, name, r);
840		goto err;
841	}
842
843	vq->vq_bytesize = allocsize;
844	vq->vq_maxsegsize = maxsegsize;
845	vq->vq_maxnsegs = maxnsegs;
846
847#define VIRTIO_PTR(base, offset)	(void *)((intptr_t)(base) + (offset))
848	/* initialize vring pointers */
849	vq->vq_desc = VIRTIO_PTR(vq->vq_vaddr, 0);
850	vq->vq_availoffset = size_desc;
851	vq->vq_avail = VIRTIO_PTR(vq->vq_vaddr, vq->vq_availoffset);
852	vq->vq_used_event = VIRTIO_PTR(vq->vq_avail,
853	    offsetof(struct vring_avail, ring[vq_num]));
854	vq->vq_usedoffset = size_desc_avail;
855	vq->vq_used = VIRTIO_PTR(vq->vq_vaddr, vq->vq_usedoffset);
856	vq->vq_avail_event = VIRTIO_PTR(vq->vq_used,
857	    offsetof(struct vring_used, ring[vq_num]));
858
859	if (size_indirect > 0) {
860		vq->vq_indirectoffset = size_desc_avail + size_used;
861		vq->vq_indirect = VIRTIO_PTR(vq->vq_vaddr,
862		    vq->vq_indirectoffset);
863	}
864#undef VIRTIO_PTR
865
866	vq->vq_descx = kmem_zalloc(sizeof(vq->vq_descx[0]) * vq_num,
867	    KM_SLEEP);
868
869	mutex_init(&vq->vq_freedesc_lock, MUTEX_SPIN, sc->sc_ipl);
870	mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
871	mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
872
873	virtio_reset_vq(sc, vq);
874
875	aprint_verbose_dev(sc->sc_dev,
876	    "allocated %" PRIuBUSSIZE " byte for virtqueue %d for %s, "
877	    "size %d\n", allocsize, vq->vq_index, name, vq_num);
878	if (size_indirect > 0)
879		aprint_verbose_dev(sc->sc_dev,
880		    "using %" PRIuBUSSIZE " byte (%d entries) indirect "
881		    "descriptors\n", size_indirect, maxnsegs * vq_num);
882
883	return 0;
884
885err:
886	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
887	if (vq->vq_dmamap)
888		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
889	if (vq->vq_vaddr)
890		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
891	if (vq->vq_segs[0].ds_addr)
892		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
893	memset(vq, 0, sizeof(*vq));
894
895	return -1;
896}
897
898int
899virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
900{
901	uint16_t s;
902	size_t i;
903
904	if (vq->vq_vaddr == NULL)
905		return 0;
906
907	/* device must be already deactivated */
908	/* confirm the vq is empty */
909	s = vq->vq_free_idx;
910	i = 0;
911	while (s != virtio_rw16(sc, VRING_DESC_CHAIN_END)) {
912		s = vq->vq_desc[s].next;
913		i++;
914	}
915	if (i != vq->vq_num) {
916		printf("%s: freeing non-empty vq, index %d\n",
917		    device_xname(sc->sc_dev), vq->vq_index);
918		return EBUSY;
919	}
920
921	/* tell device that there's no virtqueue any longer */
922	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
923
924	vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
925
926	kmem_free(vq->vq_descx, sizeof(vq->vq_descx[0]) * vq->vq_num);
927	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
928	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
929	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
930	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
931	mutex_destroy(&vq->vq_freedesc_lock);
932	mutex_destroy(&vq->vq_uring_lock);
933	mutex_destroy(&vq->vq_aring_lock);
934	memset(vq, 0, sizeof(*vq));
935
936	return 0;
937}
938
939/*
940 * Free descriptor management.
941 */
942static int
943vq_alloc_slot_locked(struct virtio_softc *sc, struct virtqueue *vq,
944    size_t nslots)
945{
946	struct vring_desc *vd;
947	uint16_t head, tail;
948	size_t i;
949
950	KASSERT(mutex_owned(&vq->vq_freedesc_lock));
951
952	head = tail = virtio_rw16(sc, vq->vq_free_idx);
953	for (i = 0; i < nslots - 1; i++) {
954		if (tail == VRING_DESC_CHAIN_END)
955			return VRING_DESC_CHAIN_END;
956
957		vd = &vq->vq_desc[tail];
958		vd->flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
959		tail = virtio_rw16(sc, vd->next);
960	}
961
962	if (tail == VRING_DESC_CHAIN_END)
963		return VRING_DESC_CHAIN_END;
964
965	vd = &vq->vq_desc[tail];
966	vd->flags = virtio_rw16(sc, 0);
967	vq->vq_free_idx = vd->next;
968
969	return head;
970}
971static uint16_t
972vq_alloc_slot(struct virtio_softc *sc, struct virtqueue *vq, size_t nslots)
973{
974	uint16_t rv;
975
976	mutex_enter(&vq->vq_freedesc_lock);
977	rv = vq_alloc_slot_locked(sc, vq, nslots);
978	mutex_exit(&vq->vq_freedesc_lock);
979
980	return rv;
981}
982
983static void
984vq_free_slot(struct virtio_softc *sc, struct virtqueue *vq, uint16_t slot)
985{
986	struct vring_desc *vd;
987	uint16_t s;
988
989	mutex_enter(&vq->vq_freedesc_lock);
990	vd = &vq->vq_desc[slot];
991	while ((vd->flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) != 0) {
992		s = virtio_rw16(sc, vd->next);
993		vd = &vq->vq_desc[s];
994	}
995	vd->next = vq->vq_free_idx;
996	vq->vq_free_idx = virtio_rw16(sc, slot);
997	mutex_exit(&vq->vq_freedesc_lock);
998}
999
1000/*
1001 * Enqueue several dmamaps as a single request.
1002 */
1003/*
1004 * Typical usage:
1005 *  <queue size> number of followings are stored in arrays
1006 *  - command blocks (in dmamem) should be pre-allocated and mapped
1007 *  - dmamaps for command blocks should be pre-allocated and loaded
1008 *  - dmamaps for payload should be pre-allocated
1009 *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
1010 *	if (r)		// currently 0 or EAGAIN
1011 *		return r;
1012 *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
1013 *	if (r) {
1014 *		virtio_enqueue_abort(sc, vq, slot);
1015 *		return r;
1016 *	}
1017 *	r = virtio_enqueue_reserve(sc, vq, slot,
1018 *	    dmamap_payload[slot]->dm_nsegs + 1);
1019 *							// ^ +1 for command
1020 *	if (r) {	// currently 0 or EAGAIN
1021 *		bus_dmamap_unload(dmat, dmamap_payload[slot]);
1022 *		return r;				// do not call abort()
1023 *	}
1024 *	<setup and prepare commands>
1025 *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
1026 *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
1027 *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
1028 *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
1029 *	virtio_enqueue_commit(sc, vq, slot, true);
1030 */
1031
1032/*
1033 * enqueue_prep: allocate a slot number
1034 */
1035int
1036virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
1037{
1038	uint16_t slot;
1039
1040	KASSERT(slotp != NULL);
1041
1042	slot = vq_alloc_slot(sc, vq, 1);
1043	if (slot == VRING_DESC_CHAIN_END)
1044		return EAGAIN;
1045
1046	*slotp = slot;
1047
1048	return 0;
1049}
1050
1051/*
1052 * enqueue_reserve: allocate remaining slots and build the descriptor chain.
1053 */
1054int
1055virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
1056    int slot, int nsegs)
1057{
1058	struct vring_desc *vd;
1059	struct vring_desc_extra *vdx;
1060	int i;
1061
1062	KASSERT(1 <= nsegs && nsegs <= vq->vq_num);
1063
1064	vdx = &vq->vq_descx[slot];
1065	vd = &vq->vq_desc[slot];
1066
1067	KASSERT((vd->flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0);
1068
1069	if ((vq->vq_indirect != NULL) &&
1070	    (nsegs >= MINSEG_INDIRECT) &&
1071	    (nsegs <= vq->vq_maxnsegs))
1072		vdx->use_indirect = true;
1073	else
1074		vdx->use_indirect = false;
1075
1076	if (vdx->use_indirect) {
1077		uint64_t addr;
1078
1079		addr = vq->vq_dmamap->dm_segs[0].ds_addr
1080		    + vq->vq_indirectoffset;
1081		addr += sizeof(struct vring_desc)
1082		    * vq->vq_maxnsegs * slot;
1083
1084		vd->addr  = virtio_rw64(sc, addr);
1085		vd->len   = virtio_rw32(sc, sizeof(struct vring_desc) * nsegs);
1086		vd->flags = virtio_rw16(sc, VRING_DESC_F_INDIRECT);
1087
1088		vd = &vq->vq_indirect[vq->vq_maxnsegs * slot];
1089		vdx->desc_base = vd;
1090		vdx->desc_free_idx = 0;
1091
1092		for (i = 0; i < nsegs - 1; i++) {
1093			vd[i].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1094		}
1095		vd[i].flags  = virtio_rw16(sc, 0);
1096	} else {
1097		if (nsegs > 1) {
1098			uint16_t s;
1099
1100			s = vq_alloc_slot(sc, vq, nsegs - 1);
1101			if (s == VRING_DESC_CHAIN_END) {
1102				vq_free_slot(sc, vq, slot);
1103				return EAGAIN;
1104			}
1105			vd->next = virtio_rw16(sc, s);
1106			vd->flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1107		}
1108
1109		vdx->desc_base = &vq->vq_desc[0];
1110		vdx->desc_free_idx = slot;
1111	}
1112
1113	return 0;
1114}
1115
1116/*
1117 * enqueue: enqueue a single dmamap.
1118 */
1119int
1120virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1121    bus_dmamap_t dmamap, bool write)
1122{
1123	struct vring_desc *vds;
1124	struct vring_desc_extra *vdx;
1125	uint16_t s;
1126	int i;
1127
1128	KASSERT(dmamap->dm_nsegs > 0);
1129
1130	vdx = &vq->vq_descx[slot];
1131	vds = vdx->desc_base;
1132	s = vdx->desc_free_idx;
1133
1134	KASSERT(vds != NULL);
1135
1136	for (i = 0; i < dmamap->dm_nsegs; i++) {
1137		KASSERT(s != VRING_DESC_CHAIN_END);
1138
1139		vds[s].addr = virtio_rw64(sc, dmamap->dm_segs[i].ds_addr);
1140		vds[s].len  = virtio_rw32(sc, dmamap->dm_segs[i].ds_len);
1141		if (!write)
1142			vds[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1143
1144		if ((vds[s].flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0) {
1145			s = VRING_DESC_CHAIN_END;
1146		} else {
1147			s = virtio_rw16(sc, vds[s].next);
1148		}
1149	}
1150
1151	vdx->desc_free_idx = s;
1152
1153	return 0;
1154}
1155
1156int
1157virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1158    bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
1159    bool write)
1160{
1161	struct vring_desc_extra *vdx;
1162	struct vring_desc *vds;
1163	uint16_t s;
1164
1165	vdx = &vq->vq_descx[slot];
1166	vds = vdx->desc_base;
1167	s = vdx->desc_free_idx;
1168
1169	KASSERT(s != VRING_DESC_CHAIN_END);
1170	KASSERT(vds != NULL);
1171	KASSERT(dmamap->dm_nsegs == 1); /* XXX */
1172	KASSERT(dmamap->dm_segs[0].ds_len > start);
1173	KASSERT(dmamap->dm_segs[0].ds_len >= start + len);
1174
1175	vds[s].addr = virtio_rw64(sc, dmamap->dm_segs[0].ds_addr + start);
1176	vds[s].len  = virtio_rw32(sc, len);
1177	if (!write)
1178		vds[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1179
1180	if ((vds[s].flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0) {
1181		s = VRING_DESC_CHAIN_END;
1182	} else {
1183		s = virtio_rw16(sc, vds[s].next);
1184	}
1185
1186	vdx->desc_free_idx = s;
1187
1188	return 0;
1189}
1190
1191/*
1192 * enqueue_commit: add it to the aring.
1193 */
1194int
1195virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1196    bool notifynow)
1197{
1198
1199	if (slot < 0) {
1200		mutex_enter(&vq->vq_aring_lock);
1201		goto notify;
1202	}
1203
1204	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
1205	if (vq->vq_descx[slot].use_indirect)
1206		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
1207
1208	mutex_enter(&vq->vq_aring_lock);
1209	vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
1210	    virtio_rw16(sc, slot);
1211
1212notify:
1213	if (notifynow) {
1214		uint16_t o, n, t;
1215		uint16_t flags;
1216
1217		o = virtio_rw16(sc, vq->vq_avail->idx) - 1;
1218		n = vq->vq_avail_idx;
1219
1220		/*
1221		 * Prepare for `device->CPU' (host->guest) transfer
1222		 * into the buffer.  This must happen before we commit
1223		 * the vq->vq_avail->idx update to ensure we're not
1224		 * still using the buffer in case program-prior loads
1225		 * or stores in it get delayed past the store to
1226		 * vq->vq_avail->idx.
1227		 */
1228		vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
1229
1230		/* ensure payload is published, then avail idx */
1231		vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
1232		vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
1233		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
1234		vq->vq_queued++;
1235
1236		if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
1237			vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
1238			t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
1239			if ((uint16_t) (n - t) < (uint16_t) (n - o))
1240				sc->sc_ops->kick(sc, vq->vq_index);
1241		} else {
1242			vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
1243			flags = virtio_rw16(sc, vq->vq_used->flags);
1244			if (!(flags & VRING_USED_F_NO_NOTIFY))
1245				sc->sc_ops->kick(sc, vq->vq_index);
1246		}
1247	}
1248	mutex_exit(&vq->vq_aring_lock);
1249
1250	return 0;
1251}
1252
1253/*
1254 * enqueue_abort: rollback.
1255 */
1256int
1257virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1258{
1259	struct vring_desc_extra *vdx;
1260
1261	vq_free_slot(sc, vq, slot);
1262
1263	vdx = &vq->vq_descx[slot];
1264	vdx->desc_free_idx = VRING_DESC_CHAIN_END;
1265	vdx->desc_base = NULL;
1266
1267	return 0;
1268}
1269
1270/*
1271 * Dequeue a request.
1272 */
1273/*
1274 * dequeue: dequeue a request from uring; dmamap_sync for uring is
1275 *	    already done in the interrupt handler.
1276 */
1277int
1278virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
1279    int *slotp, int *lenp)
1280{
1281	uint16_t slot, usedidx;
1282
1283	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
1284		return ENOENT;
1285	mutex_enter(&vq->vq_uring_lock);
1286	usedidx = vq->vq_used_idx++;
1287	mutex_exit(&vq->vq_uring_lock);
1288	usedidx %= vq->vq_num;
1289	slot = virtio_rw32(sc, vq->vq_used->ring[usedidx].id);
1290
1291	if (vq->vq_descx[slot].use_indirect)
1292		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
1293
1294	if (slotp)
1295		*slotp = slot;
1296	if (lenp)
1297		*lenp = virtio_rw32(sc, vq->vq_used->ring[usedidx].len);
1298
1299	return 0;
1300}
1301
1302/*
1303 * dequeue_commit: complete dequeue; the slot is recycled for future use.
1304 *                 if you forget to call this the slot will be leaked.
1305 */
1306int
1307virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1308{
1309	struct vring_desc_extra *vdx;
1310
1311	vq_free_slot(sc, vq, slot);
1312
1313	vdx = &vq->vq_descx[slot];
1314	vdx->desc_base = NULL;
1315	vdx->desc_free_idx = VRING_DESC_CHAIN_END;
1316
1317	return 0;
1318}
1319
1320/*
1321 * Attach a child, fill all the members.
1322 */
1323void
1324virtio_child_attach_start(struct virtio_softc *sc, device_t child, int ipl,
1325    uint64_t req_features, const char *feat_bits)
1326{
1327	char buf[1024];
1328
1329	KASSERT(sc->sc_child == NULL);
1330	KASSERT(sc->sc_child_state == VIRTIO_NO_CHILD);
1331
1332	sc->sc_child = child;
1333	sc->sc_ipl = ipl;
1334
1335	virtio_negotiate_features(sc, req_features);
1336	snprintb(buf, sizeof(buf), feat_bits, sc->sc_active_features);
1337	aprint_normal(": features: %s\n", buf);
1338	aprint_naive("\n");
1339}
1340
1341int
1342virtio_child_attach_finish(struct virtio_softc *sc,
1343    struct virtqueue *vqs, size_t nvqs,
1344    virtio_callback config_change,
1345    int req_flags)
1346{
1347	size_t i;
1348	int r;
1349
1350#ifdef DIAGNOSTIC
1351	KASSERT(nvqs > 0);
1352#define VIRTIO_ASSERT_FLAGS	(VIRTIO_F_INTR_SOFTINT | VIRTIO_F_INTR_PERVQ)
1353	KASSERT((req_flags & VIRTIO_ASSERT_FLAGS) != VIRTIO_ASSERT_FLAGS);
1354#undef VIRTIO_ASSERT_FLAGS
1355
1356	for (i = 0; i < nvqs; i++){
1357		KASSERT(vqs[i].vq_index == i);
1358		KASSERT(vqs[i].vq_intrhand != NULL);
1359		KASSERT(vqs[i].vq_done == NULL ||
1360		    vqs[i].vq_intrhand == virtio_vq_done);
1361	}
1362#endif
1363
1364
1365	sc->sc_vqs = vqs;
1366	sc->sc_nvqs = nvqs;
1367	sc->sc_config_change = config_change;
1368	sc->sc_intrhand = virtio_vq_intr;
1369	sc->sc_flags = req_flags;
1370
1371	/* set the vq address */
1372	for (i = 0; i < nvqs; i++) {
1373		sc->sc_ops->setup_queue(sc, vqs[i].vq_index,
1374		    vqs[i].vq_dmamap->dm_segs[0].ds_addr);
1375	}
1376
1377	r = sc->sc_ops->alloc_interrupts(sc);
1378	if (r != 0) {
1379		aprint_error_dev(sc->sc_dev,
1380		    "failed to allocate interrupts\n");
1381		goto fail;
1382	}
1383
1384	r = sc->sc_ops->setup_interrupts(sc, 0);
1385	if (r != 0) {
1386		aprint_error_dev(sc->sc_dev, "failed to setup interrupts\n");
1387		goto fail;
1388	}
1389
1390	KASSERT(sc->sc_soft_ih == NULL);
1391	if (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) {
1392		u_int flags = SOFTINT_NET;
1393		if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
1394			flags |= SOFTINT_MPSAFE;
1395
1396		sc->sc_soft_ih = softint_establish(flags, virtio_soft_intr,
1397		    sc);
1398		if (sc->sc_soft_ih == NULL) {
1399			sc->sc_ops->free_interrupts(sc);
1400			aprint_error_dev(sc->sc_dev,
1401			    "failed to establish soft interrupt\n");
1402			goto fail;
1403		}
1404	}
1405
1406	sc->sc_child_state = VIRTIO_CHILD_ATTACH_FINISHED;
1407	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1408	return 0;
1409
1410fail:
1411	if (sc->sc_soft_ih) {
1412		softint_disestablish(sc->sc_soft_ih);
1413		sc->sc_soft_ih = NULL;
1414	}
1415
1416	sc->sc_ops->free_interrupts(sc);
1417
1418	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1419	return 1;
1420}
1421
1422void
1423virtio_child_detach(struct virtio_softc *sc)
1424{
1425
1426	/* already detached */
1427	if (sc->sc_child == NULL)
1428		return;
1429
1430
1431	virtio_device_reset(sc);
1432
1433	sc->sc_ops->free_interrupts(sc);
1434
1435	if (sc->sc_soft_ih) {
1436		softint_disestablish(sc->sc_soft_ih);
1437		sc->sc_soft_ih = NULL;
1438	}
1439
1440	sc->sc_vqs = NULL;
1441	sc->sc_child = NULL;
1442}
1443
1444void
1445virtio_child_attach_failed(struct virtio_softc *sc)
1446{
1447	virtio_child_detach(sc);
1448
1449	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1450
1451	sc->sc_child_state = VIRTIO_CHILD_ATTACH_FAILED;
1452}
1453
1454bus_dma_tag_t
1455virtio_dmat(struct virtio_softc *sc)
1456{
1457	return sc->sc_dmat;
1458}
1459
1460device_t
1461virtio_child(struct virtio_softc *sc)
1462{
1463	return sc->sc_child;
1464}
1465
1466int
1467virtio_intrhand(struct virtio_softc *sc)
1468{
1469	return (*sc->sc_intrhand)(sc);
1470}
1471
1472uint64_t
1473virtio_features(struct virtio_softc *sc)
1474{
1475	return sc->sc_active_features;
1476}
1477
1478int
1479virtio_attach_failed(struct virtio_softc *sc)
1480{
1481	device_t self = sc->sc_dev;
1482
1483	/* no error if its not connected, but its failed */
1484	if (sc->sc_childdevid == 0)
1485		return 1;
1486
1487	if (sc->sc_child == NULL) {
1488		switch (sc->sc_child_state) {
1489		case VIRTIO_CHILD_ATTACH_FAILED:
1490			aprint_error_dev(self,
1491			    "virtio configuration failed\n");
1492			break;
1493		case VIRTIO_NO_CHILD:
1494			aprint_error_dev(self,
1495			    "no matching child driver; not configured\n");
1496			break;
1497		default:
1498			/* sanity check */
1499			aprint_error_dev(self,
1500			    "virtio internal error, "
1501			    "child driver is not configured\n");
1502			break;
1503		}
1504
1505		return 1;
1506	}
1507
1508	/* sanity check */
1509	if (sc->sc_child_state != VIRTIO_CHILD_ATTACH_FINISHED) {
1510		aprint_error_dev(self, "virtio internal error, child driver "
1511		    "signaled OK but didn't initialize interrupts\n");
1512		return 1;
1513	}
1514
1515	return 0;
1516}
1517
1518void
1519virtio_print_device_type(device_t self, int id, int revision)
1520{
1521	aprint_normal_dev(self, "%s device (id %d, rev. 0x%02x)\n",
1522	    (id < NDEVNAMES ? virtio_device_name[id] : "Unknown"),
1523	    id,
1524	    revision);
1525}
1526
1527
1528MODULE(MODULE_CLASS_DRIVER, virtio, NULL);
1529
1530#ifdef _MODULE
1531#include "ioconf.c"
1532#endif
1533
1534static int
1535virtio_modcmd(modcmd_t cmd, void *opaque)
1536{
1537	int error = 0;
1538
1539#ifdef _MODULE
1540	switch (cmd) {
1541	case MODULE_CMD_INIT:
1542		error = config_init_component(cfdriver_ioconf_virtio,
1543		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1544		break;
1545	case MODULE_CMD_FINI:
1546		error = config_fini_component(cfdriver_ioconf_virtio,
1547		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1548		break;
1549	default:
1550		error = ENOTTY;
1551		break;
1552	}
1553#endif
1554
1555	return error;
1556}
1557