1/*	$NetBSD: virtio.c,v 1.81 2024/02/10 02:25:15 isaki Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6 * Copyright (c) 2010 Minoura Makoto.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.81 2024/02/10 02:25:15 isaki Exp $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/atomic.h>
37#include <sys/bus.h>
38#include <sys/device.h>
39#include <sys/kmem.h>
40#include <sys/module.h>
41
42#define VIRTIO_PRIVATE
43
44#include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
45#include <dev/pci/virtiovar.h> /* XXX: move to non-pci */
46
47#define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */
48
49/*
50 * The maximum descriptor size is 2^15. Use that value as the end of
51 * descriptor chain terminator since it will never be a valid index
52 * in the descriptor table.
53 */
54#define VRING_DESC_CHAIN_END		32768
55
56/* incomplete list */
57static const char *virtio_device_name[] = {
58	"unknown (0)",			/*  0 */
59	"network",			/*  1 */
60	"block",			/*  2 */
61	"console",			/*  3 */
62	"entropy",			/*  4 */
63	"memory balloon",		/*  5 */
64	"I/O memory",			/*  6 */
65	"remote processor messaging",	/*  7 */
66	"SCSI",				/*  8 */
67	"9P transport",			/*  9 */
68};
69#define NDEVNAMES	__arraycount(virtio_device_name)
70
71static void	virtio_reset_vq(struct virtio_softc *,
72		    struct virtqueue *);
73
74void
75virtio_set_status(struct virtio_softc *sc, int status)
76{
77	sc->sc_ops->set_status(sc, status);
78}
79
80/*
81 * Reset the device.
82 */
83/*
84 * To reset the device to a known state, do following:
85 *	virtio_reset(sc);	     // this will stop the device activity
86 *	<dequeue finished requests>; // virtio_dequeue() still can be called
87 *	<revoke pending requests in the vqs if any>;
88 *	virtio_reinit_start(sc);     // dequeue prohibited
89 *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
90 *	<some other initialization>;
91 *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
92 * Once attached, feature negotiation can only be allowed after virtio_reset.
93 */
94void
95virtio_reset(struct virtio_softc *sc)
96{
97	virtio_device_reset(sc);
98}
99
100int
101virtio_reinit_start(struct virtio_softc *sc)
102{
103	int i, r;
104
105	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
106	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
107	for (i = 0; i < sc->sc_nvqs; i++) {
108		int n;
109		struct virtqueue *vq = &sc->sc_vqs[i];
110		n = sc->sc_ops->read_queue_size(sc, vq->vq_index);
111		if (n == 0)	/* vq disappeared */
112			continue;
113		if (n != vq->vq_num) {
114			panic("%s: virtqueue size changed, vq index %d\n",
115			    device_xname(sc->sc_dev),
116			    vq->vq_index);
117		}
118		virtio_reset_vq(sc, vq);
119		sc->sc_ops->setup_queue(sc, vq->vq_index,
120		    vq->vq_dmamap->dm_segs[0].ds_addr);
121	}
122
123	r = sc->sc_ops->setup_interrupts(sc, 1);
124	if (r != 0)
125		goto fail;
126
127	return 0;
128
129fail:
130	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
131
132	return 1;
133}
134
135void
136virtio_reinit_end(struct virtio_softc *sc)
137{
138	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
139}
140
141/*
142 * Feature negotiation.
143 */
144void
145virtio_negotiate_features(struct virtio_softc *sc, uint64_t guest_features)
146{
147	if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
148	    !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
149		guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
150	sc->sc_ops->neg_features(sc, guest_features);
151	if (sc->sc_active_features & VIRTIO_F_RING_INDIRECT_DESC)
152		sc->sc_indirect = true;
153	else
154		sc->sc_indirect = false;
155}
156
157
158/*
159 * Device configuration registers readers/writers
160 */
161#if 0
162#define DPRINTFR(n, fmt, val, index, num) \
163	printf("\n%s (", n); \
164	for (int i = 0; i < num; i++) \
165		printf("%02x ", bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index+i)); \
166	printf(") -> "); printf(fmt, val); printf("\n");
167#define DPRINTFR2(n, fmt, val_s, val_n) \
168	printf("%s ", n); \
169	printf("\n        stream "); printf(fmt, val_s); printf(" norm "); printf(fmt, val_n); printf("\n");
170#else
171#define DPRINTFR(n, fmt, val, index, num)
172#define DPRINTFR2(n, fmt, val_s, val_n)
173#endif
174
175
176uint8_t
177virtio_read_device_config_1(struct virtio_softc *sc, int index)
178{
179	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
180	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
181	uint8_t val;
182
183	val = bus_space_read_1(iot, ioh, index);
184
185	DPRINTFR("read_1", "%02x", val, index, 1);
186	return val;
187}
188
189uint16_t
190virtio_read_device_config_2(struct virtio_softc *sc, int index)
191{
192	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
193	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
194	uint16_t val;
195
196	val = bus_space_read_2(iot, ioh, index);
197	if (BYTE_ORDER != sc->sc_bus_endian)
198		val = bswap16(val);
199
200	DPRINTFR("read_2", "%04x", val, index, 2);
201	DPRINTFR2("read_2", "%04x",
202	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
203		index),
204	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
205	return val;
206}
207
208uint32_t
209virtio_read_device_config_4(struct virtio_softc *sc, int index)
210{
211	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
212	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
213	uint32_t val;
214
215	val = bus_space_read_4(iot, ioh, index);
216	if (BYTE_ORDER != sc->sc_bus_endian)
217		val = bswap32(val);
218
219	DPRINTFR("read_4", "%08x", val, index, 4);
220	DPRINTFR2("read_4", "%08x",
221	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
222		index),
223	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
224	return val;
225}
226
227/*
228 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
229 * considered atomic and no triggers may be connected to reading or writing
230 * it. We access it using two 32 reads. See virtio spec 4.1.3.1.
231 */
232uint64_t
233virtio_read_device_config_8(struct virtio_softc *sc, int index)
234{
235	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
236	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
237	union {
238		uint64_t u64;
239		uint32_t l[2];
240	} v;
241	uint64_t val;
242
243	v.l[0] = bus_space_read_4(iot, ioh, index);
244	v.l[1] = bus_space_read_4(iot, ioh, index + 4);
245	if (sc->sc_bus_endian != sc->sc_struct_endian) {
246		v.l[0] = bswap32(v.l[0]);
247		v.l[1] = bswap32(v.l[1]);
248	}
249	val = v.u64;
250
251	if (BYTE_ORDER != sc->sc_struct_endian)
252		val = bswap64(val);
253
254	DPRINTFR("read_8", "%08"PRIx64, val, index, 8);
255	DPRINTFR2("read_8 low ", "%08x",
256	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
257		index),
258	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
259	DPRINTFR2("read_8 high ", "%08x",
260	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
261		index + 4),
262	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4));
263	return val;
264}
265
266/*
267 * In the older virtio spec, device config registers are host endian. On newer
268 * they are little endian. Some newer devices however explicitly specify their
269 * register to always be little endian. These functions cater for these.
270 */
271uint16_t
272virtio_read_device_config_le_2(struct virtio_softc *sc, int index)
273{
274	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
275	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
276	uint16_t val;
277
278	val = bus_space_read_2(iot, ioh, index);
279#if !defined(__aarch64__) && !defined(__arm__)
280	/*
281	 * For big-endian aarch64/armv7, bus endian is always LSB, but
282	 * byte-order is automatically swapped by bus_space(9) (see also
283	 * comments in virtio_pci.c). Therefore, no need to swap here.
284	 */
285	if (sc->sc_bus_endian != LITTLE_ENDIAN)
286		val = bswap16(val);
287#endif
288
289	DPRINTFR("read_le_2", "%04x", val, index, 2);
290	DPRINTFR2("read_le_2", "%04x",
291	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
292	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
293	return val;
294}
295
296uint32_t
297virtio_read_device_config_le_4(struct virtio_softc *sc, int index)
298{
299	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
300	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
301	uint32_t val;
302
303	val = bus_space_read_4(iot, ioh, index);
304#if !defined(__aarch64__) && !defined(__arm__)
305	/* See virtio_read_device_config_le_2() above. */
306	if (sc->sc_bus_endian != LITTLE_ENDIAN)
307		val = bswap32(val);
308#endif
309
310	DPRINTFR("read_le_4", "%08x", val, index, 4);
311	DPRINTFR2("read_le_4", "%08x",
312	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
313	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
314	return val;
315}
316
317void
318virtio_write_device_config_1(struct virtio_softc *sc, int index, uint8_t value)
319{
320	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
321	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
322
323	bus_space_write_1(iot, ioh, index, value);
324}
325
326void
327virtio_write_device_config_2(struct virtio_softc *sc, int index,
328    uint16_t value)
329{
330	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
331	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
332
333	if (BYTE_ORDER != sc->sc_bus_endian)
334		value = bswap16(value);
335	bus_space_write_2(iot, ioh, index, value);
336}
337
338void
339virtio_write_device_config_4(struct virtio_softc *sc, int index,
340    uint32_t value)
341{
342	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
343	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
344
345	if (BYTE_ORDER != sc->sc_bus_endian)
346		value = bswap32(value);
347	bus_space_write_4(iot, ioh, index, value);
348}
349
350/*
351 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
352 * considered atomic and no triggers may be connected to reading or writing
353 * it. We access it using two 32 bit writes. For good measure it is stated to
354 * always write lsb first just in case of a hypervisor bug. See See virtio
355 * spec 4.1.3.1.
356 */
357void
358virtio_write_device_config_8(struct virtio_softc *sc, int index,
359    uint64_t value)
360{
361	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
362	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
363	union {
364		uint64_t u64;
365		uint32_t l[2];
366	} v;
367
368	if (BYTE_ORDER != sc->sc_struct_endian)
369		value = bswap64(value);
370
371	v.u64 = value;
372	if (sc->sc_bus_endian != sc->sc_struct_endian) {
373		v.l[0] = bswap32(v.l[0]);
374		v.l[1] = bswap32(v.l[1]);
375	}
376
377	if (sc->sc_struct_endian == LITTLE_ENDIAN) {
378		bus_space_write_4(iot, ioh, index,     v.l[0]);
379		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
380	} else {
381		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
382		bus_space_write_4(iot, ioh, index,     v.l[0]);
383	}
384}
385
386/*
387 * In the older virtio spec, device config registers are host endian. On newer
388 * they are little endian. Some newer devices however explicitly specify their
389 * register to always be little endian. These functions cater for these.
390 */
391void
392virtio_write_device_config_le_2(struct virtio_softc *sc, int index,
393    uint16_t value)
394{
395	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
396	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
397
398	if (sc->sc_bus_endian != LITTLE_ENDIAN)
399		value = bswap16(value);
400	bus_space_write_2(iot, ioh, index, value);
401}
402
403void
404virtio_write_device_config_le_4(struct virtio_softc *sc, int index,
405    uint32_t value)
406{
407	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
408	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
409
410	if (sc->sc_bus_endian != LITTLE_ENDIAN)
411		value = bswap32(value);
412	bus_space_write_4(iot, ioh, index, value);
413}
414
415
416/*
417 * data structures endian helpers
418 */
419uint16_t
420virtio_rw16(struct virtio_softc *sc, uint16_t val)
421{
422	KASSERT(sc);
423	return BYTE_ORDER != sc->sc_struct_endian ? bswap16(val) : val;
424}
425
426uint32_t
427virtio_rw32(struct virtio_softc *sc, uint32_t val)
428{
429	KASSERT(sc);
430	return BYTE_ORDER != sc->sc_struct_endian ? bswap32(val) : val;
431}
432
433uint64_t
434virtio_rw64(struct virtio_softc *sc, uint64_t val)
435{
436	KASSERT(sc);
437	return BYTE_ORDER != sc->sc_struct_endian ? bswap64(val) : val;
438}
439
440
441/*
442 * Interrupt handler.
443 */
444static void
445virtio_soft_intr(void *arg)
446{
447	struct virtio_softc *sc = arg;
448
449	KASSERT(sc->sc_intrhand != NULL);
450
451	(*sc->sc_intrhand)(sc);
452}
453
454/* set to vq->vq_intrhand in virtio_init_vq_vqdone() */
455static int
456virtio_vq_done(void *xvq)
457{
458	struct virtqueue *vq = xvq;
459
460	return vq->vq_done(vq);
461}
462
463static int
464virtio_vq_intr(struct virtio_softc *sc)
465{
466	struct virtqueue *vq;
467	int i, r = 0;
468
469	for (i = 0; i < sc->sc_nvqs; i++) {
470		vq = &sc->sc_vqs[i];
471		if (virtio_vq_is_enqueued(sc, vq) == 1) {
472			r |= (*vq->vq_intrhand)(vq->vq_intrhand_arg);
473		}
474	}
475
476	return r;
477}
478
479/*
480 * dmamap sync operations for a virtqueue.
481 */
482static inline void
483vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
484{
485
486	/* availoffset == sizeof(vring_desc) * vq_num */
487	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
488	    ops);
489}
490
491static inline void
492vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
493{
494	uint16_t hdrlen = offsetof(struct vring_avail, ring);
495	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
496	size_t usedlen = 0;
497
498	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
499		usedlen = sizeof(uint16_t);
500	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
501	    vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
502}
503
504static inline void
505vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
506{
507	uint16_t hdrlen = offsetof(struct vring_avail, ring);
508
509	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
510	    vq->vq_availoffset, hdrlen, ops);
511}
512
513static inline void
514vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
515{
516	uint16_t hdrlen = offsetof(struct vring_avail, ring);
517	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
518
519	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
520	    vq->vq_availoffset + hdrlen, payloadlen, ops);
521}
522
523static inline void
524vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
525{
526	uint16_t hdrlen = offsetof(struct vring_avail, ring);
527	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
528	size_t usedlen = sizeof(uint16_t);
529
530	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
531		return;
532	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
533	    vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
534}
535
536static inline void
537vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
538{
539	uint16_t hdrlen = offsetof(struct vring_used, ring);
540	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
541	size_t availlen = 0;
542
543	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
544		availlen = sizeof(uint16_t);
545	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
546	    vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
547}
548
549static inline void
550vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
551{
552	uint16_t hdrlen = offsetof(struct vring_used, ring);
553
554	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
555	    vq->vq_usedoffset, hdrlen, ops);
556}
557
558static inline void
559vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
560{
561	uint16_t hdrlen = offsetof(struct vring_used, ring);
562	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
563
564	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
565	    vq->vq_usedoffset + hdrlen, payloadlen, ops);
566}
567
568static inline void
569vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
570{
571	uint16_t hdrlen = offsetof(struct vring_used, ring);
572	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
573	size_t availlen = sizeof(uint16_t);
574
575	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
576		return;
577	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
578	    vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
579}
580
581static inline void
582vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
583    int ops)
584{
585	int offset = vq->vq_indirectoffset +
586	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
587
588	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
589	    offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
590}
591
592bool
593virtio_vq_is_enqueued(struct virtio_softc *sc, struct virtqueue *vq)
594{
595
596	if (vq->vq_queued) {
597		vq->vq_queued = 0;
598		vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
599	}
600
601	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
602	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
603		return 0;
604	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
605	return 1;
606}
607
608/*
609 * Increase the event index in order to delay interrupts.
610 */
611int
612virtio_postpone_intr(struct virtio_softc *sc, struct virtqueue *vq,
613    uint16_t nslots)
614{
615	uint16_t	idx, nused;
616
617	idx = vq->vq_used_idx + nslots;
618
619	/* set the new event index: avail_ring->used_event = idx */
620	*vq->vq_used_event = virtio_rw16(sc, idx);
621	vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
622	vq->vq_queued++;
623
624	nused = (uint16_t)
625	    (virtio_rw16(sc, vq->vq_used->idx) - vq->vq_used_idx);
626	KASSERT(nused <= vq->vq_num);
627
628	return nslots < nused;
629}
630
631/*
632 * Postpone interrupt until 3/4 of the available descriptors have been
633 * consumed.
634 */
635int
636virtio_postpone_intr_smart(struct virtio_softc *sc, struct virtqueue *vq)
637{
638	uint16_t	nslots;
639
640	nslots = (uint16_t)
641	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx) * 3 / 4;
642
643	return virtio_postpone_intr(sc, vq, nslots);
644}
645
646/*
647 * Postpone interrupt until all of the available descriptors have been
648 * consumed.
649 */
650int
651virtio_postpone_intr_far(struct virtio_softc *sc, struct virtqueue *vq)
652{
653	uint16_t	nslots;
654
655	nslots = (uint16_t)
656	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx);
657
658	return virtio_postpone_intr(sc, vq, nslots);
659}
660
661/*
662 * Start/stop vq interrupt.  No guarantee.
663 */
664void
665virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
666{
667
668	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
669		/*
670		 * No way to disable the interrupt completely with
671		 * RingEventIdx. Instead advance used_event by half the
672		 * possible value. This won't happen soon and is far enough in
673		 * the past to not trigger a spurious interrupt.
674		 */
675		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
676		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
677	} else {
678		vq->vq_avail->flags |=
679		    virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
680		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
681	}
682	vq->vq_queued++;
683}
684
685int
686virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
687{
688
689	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
690		/*
691		 * If event index feature is negotiated, enabling interrupts
692		 * is done through setting the latest consumed index in the
693		 * used_event field
694		 */
695		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
696		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
697	} else {
698		vq->vq_avail->flags &=
699		    ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
700		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
701	}
702	vq->vq_queued++;
703
704	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
705	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
706		return 0;
707	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
708	return 1;
709}
710
711/*
712 * Initialize vq structure.
713 */
714/*
715 * Reset virtqueue parameters
716 */
717static void
718virtio_reset_vq(struct virtio_softc *sc, struct virtqueue *vq)
719{
720	struct vring_desc *vds;
721	int i, j;
722	int vq_size = vq->vq_num;
723
724	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
725
726	/* build the descriptor chain for free slot management */
727	vds = vq->vq_desc;
728	for (i = 0; i < vq_size - 1; i++) {
729		vds[i].next = virtio_rw16(sc, i + 1);
730	}
731	vds[i].next = virtio_rw16(sc, VRING_DESC_CHAIN_END);
732	vq->vq_free_idx = 0;
733
734	/* build the indirect descriptor chain */
735	if (vq->vq_indirect != NULL) {
736		struct vring_desc *vd;
737
738		for (i = 0; i < vq_size; i++) {
739			vd = vq->vq_indirect;
740			vd += vq->vq_maxnsegs * i;
741			for (j = 0; j < vq->vq_maxnsegs - 1; j++) {
742				vd[j].next = virtio_rw16(sc, j + 1);
743			}
744		}
745	}
746
747	/* enqueue/dequeue status */
748	vq->vq_avail_idx = 0;
749	vq->vq_used_idx = 0;
750	vq->vq_queued = 0;
751	vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
752	vq->vq_queued++;
753}
754
755/* Initialize vq */
756void
757virtio_init_vq_vqdone(struct virtio_softc *sc, struct virtqueue *vq,
758    int index, int (*vq_done)(struct virtqueue *))
759{
760
761	virtio_init_vq(sc, vq, index, virtio_vq_done, vq);
762	vq->vq_done = vq_done;
763}
764
765void
766virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
767   int (*func)(void *), void *arg)
768{
769
770	memset(vq, 0, sizeof(*vq));
771
772	vq->vq_owner = sc;
773	vq->vq_num = sc->sc_ops->read_queue_size(sc, index);
774	vq->vq_index = index;
775	vq->vq_intrhand = func;
776	vq->vq_intrhand_arg = arg;
777}
778
779/*
780 * Allocate/free a vq.
781 */
782int
783virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq,
784    int maxsegsize, int maxnsegs, const char *name)
785{
786	bus_size_t size_desc, size_avail, size_used, size_indirect;
787	bus_size_t allocsize = 0, size_desc_avail;
788	int rsegs, r, hdrlen;
789	unsigned int vq_num;
790#define VIRTQUEUE_ALIGN(n)	roundup(n, VIRTIO_PAGE_SIZE)
791
792	vq_num = vq->vq_num;
793
794	if (vq_num == 0) {
795		aprint_error_dev(sc->sc_dev,
796		    "virtqueue not exist, index %d for %s\n",
797		    vq->vq_index, name);
798		goto err;
799	}
800
801	hdrlen = sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX ? 3 : 2;
802
803	size_desc = sizeof(vq->vq_desc[0]) * vq_num;
804	size_avail = sizeof(uint16_t) * hdrlen
805	    + sizeof(vq->vq_avail[0].ring[0]) * vq_num;
806	size_used = sizeof(uint16_t) *hdrlen
807	    + sizeof(vq->vq_used[0].ring[0]) * vq_num;
808	size_indirect = (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT) ?
809	    sizeof(struct vring_desc) * maxnsegs * vq_num : 0;
810
811	size_desc_avail = VIRTQUEUE_ALIGN(size_desc + size_avail);
812	size_used = VIRTQUEUE_ALIGN(size_used);
813
814	allocsize = size_desc_avail + size_used + size_indirect;
815
816	/* alloc and map the memory */
817	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
818	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK);
819	if (r != 0) {
820		aprint_error_dev(sc->sc_dev,
821		    "virtqueue %d for %s allocation failed, "
822		    "error code %d\n", vq->vq_index, name, r);
823		goto err;
824	}
825
826	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], rsegs, allocsize,
827	    &vq->vq_vaddr, BUS_DMA_WAITOK);
828	if (r != 0) {
829		aprint_error_dev(sc->sc_dev,
830		    "virtqueue %d for %s map failed, "
831		    "error code %d\n", vq->vq_index, name, r);
832		goto err;
833	}
834
835	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
836	    BUS_DMA_WAITOK, &vq->vq_dmamap);
837	if (r != 0) {
838		aprint_error_dev(sc->sc_dev,
839		    "virtqueue %d for %s dmamap creation failed, "
840		    "error code %d\n", vq->vq_index, name, r);
841		goto err;
842	}
843
844	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
845	    vq->vq_vaddr, allocsize, NULL, BUS_DMA_WAITOK);
846	if (r != 0) {
847		aprint_error_dev(sc->sc_dev,
848		    "virtqueue %d for %s dmamap load failed, "
849		    "error code %d\n", vq->vq_index, name, r);
850		goto err;
851	}
852
853	vq->vq_bytesize = allocsize;
854	vq->vq_maxsegsize = maxsegsize;
855	vq->vq_maxnsegs = maxnsegs;
856
857#define VIRTIO_PTR(base, offset)	(void *)((intptr_t)(base) + (offset))
858	/* initialize vring pointers */
859	vq->vq_desc = VIRTIO_PTR(vq->vq_vaddr, 0);
860	vq->vq_availoffset = size_desc;
861	vq->vq_avail = VIRTIO_PTR(vq->vq_vaddr, vq->vq_availoffset);
862	vq->vq_used_event = VIRTIO_PTR(vq->vq_avail,
863	    offsetof(struct vring_avail, ring[vq_num]));
864	vq->vq_usedoffset = size_desc_avail;
865	vq->vq_used = VIRTIO_PTR(vq->vq_vaddr, vq->vq_usedoffset);
866	vq->vq_avail_event = VIRTIO_PTR(vq->vq_used,
867	    offsetof(struct vring_used, ring[vq_num]));
868
869	if (size_indirect > 0) {
870		vq->vq_indirectoffset = size_desc_avail + size_used;
871		vq->vq_indirect = VIRTIO_PTR(vq->vq_vaddr,
872		    vq->vq_indirectoffset);
873	}
874#undef VIRTIO_PTR
875
876	vq->vq_descx = kmem_zalloc(sizeof(vq->vq_descx[0]) * vq_num,
877	    KM_SLEEP);
878
879	mutex_init(&vq->vq_freedesc_lock, MUTEX_SPIN, sc->sc_ipl);
880	mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
881	mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
882
883	virtio_reset_vq(sc, vq);
884
885	aprint_verbose_dev(sc->sc_dev,
886	    "allocated %" PRIuBUSSIZE " byte for virtqueue %d for %s, "
887	    "size %d\n", allocsize, vq->vq_index, name, vq_num);
888	if (size_indirect > 0)
889		aprint_verbose_dev(sc->sc_dev,
890		    "using %" PRIuBUSSIZE " byte (%d entries) indirect "
891		    "descriptors\n", size_indirect, maxnsegs * vq_num);
892
893	return 0;
894
895err:
896	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
897	if (vq->vq_dmamap)
898		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
899	if (vq->vq_vaddr)
900		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
901	if (vq->vq_segs[0].ds_addr)
902		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
903	memset(vq, 0, sizeof(*vq));
904
905	return -1;
906}
907
908int
909virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
910{
911	uint16_t s;
912	size_t i;
913
914	if (vq->vq_vaddr == NULL)
915		return 0;
916
917	/* device must be already deactivated */
918	/* confirm the vq is empty */
919	s = vq->vq_free_idx;
920	i = 0;
921	while (s != virtio_rw16(sc, VRING_DESC_CHAIN_END)) {
922		s = vq->vq_desc[s].next;
923		i++;
924	}
925	if (i != vq->vq_num) {
926		printf("%s: freeing non-empty vq, index %d\n",
927		    device_xname(sc->sc_dev), vq->vq_index);
928		return EBUSY;
929	}
930
931	/* tell device that there's no virtqueue any longer */
932	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
933
934	vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
935
936	kmem_free(vq->vq_descx, sizeof(vq->vq_descx[0]) * vq->vq_num);
937	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
938	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
939	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
940	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
941	mutex_destroy(&vq->vq_freedesc_lock);
942	mutex_destroy(&vq->vq_uring_lock);
943	mutex_destroy(&vq->vq_aring_lock);
944	memset(vq, 0, sizeof(*vq));
945
946	return 0;
947}
948
949/*
950 * Free descriptor management.
951 */
952static int
953vq_alloc_slot_locked(struct virtio_softc *sc, struct virtqueue *vq,
954    size_t nslots)
955{
956	struct vring_desc *vd;
957	uint16_t head, tail;
958	size_t i;
959
960	KASSERT(mutex_owned(&vq->vq_freedesc_lock));
961
962	head = tail = virtio_rw16(sc, vq->vq_free_idx);
963	for (i = 0; i < nslots - 1; i++) {
964		if (tail == VRING_DESC_CHAIN_END)
965			return VRING_DESC_CHAIN_END;
966
967		vd = &vq->vq_desc[tail];
968		vd->flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
969		tail = virtio_rw16(sc, vd->next);
970	}
971
972	if (tail == VRING_DESC_CHAIN_END)
973		return VRING_DESC_CHAIN_END;
974
975	vd = &vq->vq_desc[tail];
976	vd->flags = virtio_rw16(sc, 0);
977	vq->vq_free_idx = vd->next;
978
979	return head;
980}
981static uint16_t
982vq_alloc_slot(struct virtio_softc *sc, struct virtqueue *vq, size_t nslots)
983{
984	uint16_t rv;
985
986	mutex_enter(&vq->vq_freedesc_lock);
987	rv = vq_alloc_slot_locked(sc, vq, nslots);
988	mutex_exit(&vq->vq_freedesc_lock);
989
990	return rv;
991}
992
993static void
994vq_free_slot(struct virtio_softc *sc, struct virtqueue *vq, uint16_t slot)
995{
996	struct vring_desc *vd;
997	uint16_t s;
998
999	mutex_enter(&vq->vq_freedesc_lock);
1000	vd = &vq->vq_desc[slot];
1001	while ((vd->flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) != 0) {
1002		s = virtio_rw16(sc, vd->next);
1003		vd = &vq->vq_desc[s];
1004	}
1005	vd->next = vq->vq_free_idx;
1006	vq->vq_free_idx = virtio_rw16(sc, slot);
1007	mutex_exit(&vq->vq_freedesc_lock);
1008}
1009
1010/*
1011 * Enqueue several dmamaps as a single request.
1012 */
1013/*
1014 * Typical usage:
1015 *  <queue size> number of followings are stored in arrays
1016 *  - command blocks (in dmamem) should be pre-allocated and mapped
1017 *  - dmamaps for command blocks should be pre-allocated and loaded
1018 *  - dmamaps for payload should be pre-allocated
1019 *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
1020 *	if (r)		// currently 0 or EAGAIN
1021 *		return r;
1022 *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
1023 *	if (r) {
1024 *		virtio_enqueue_abort(sc, vq, slot);
1025 *		return r;
1026 *	}
1027 *	r = virtio_enqueue_reserve(sc, vq, slot,
1028 *	    dmamap_payload[slot]->dm_nsegs + 1);
1029 *							// ^ +1 for command
1030 *	if (r) {	// currently 0 or EAGAIN
1031 *		bus_dmamap_unload(dmat, dmamap_payload[slot]);
1032 *		return r;				// do not call abort()
1033 *	}
1034 *	<setup and prepare commands>
1035 *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
1036 *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
1037 *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
1038 *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
1039 *	virtio_enqueue_commit(sc, vq, slot, true);
1040 */
1041
1042/*
1043 * enqueue_prep: allocate a slot number
1044 */
1045int
1046virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
1047{
1048	uint16_t slot;
1049
1050	KASSERT(slotp != NULL);
1051
1052	slot = vq_alloc_slot(sc, vq, 1);
1053	if (slot == VRING_DESC_CHAIN_END)
1054		return EAGAIN;
1055
1056	*slotp = slot;
1057
1058	return 0;
1059}
1060
1061/*
1062 * enqueue_reserve: allocate remaining slots and build the descriptor chain.
1063 */
1064int
1065virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
1066    int slot, int nsegs)
1067{
1068	struct vring_desc *vd;
1069	struct vring_desc_extra *vdx;
1070	int i;
1071
1072	KASSERT(1 <= nsegs);
1073	KASSERT(nsegs <= vq->vq_num);
1074
1075	vdx = &vq->vq_descx[slot];
1076	vd = &vq->vq_desc[slot];
1077
1078	KASSERT((vd->flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0);
1079
1080	if ((vq->vq_indirect != NULL) &&
1081	    (nsegs >= MINSEG_INDIRECT) &&
1082	    (nsegs <= vq->vq_maxnsegs))
1083		vdx->use_indirect = true;
1084	else
1085		vdx->use_indirect = false;
1086
1087	if (vdx->use_indirect) {
1088		uint64_t addr;
1089
1090		addr = vq->vq_dmamap->dm_segs[0].ds_addr
1091		    + vq->vq_indirectoffset;
1092		addr += sizeof(struct vring_desc)
1093		    * vq->vq_maxnsegs * slot;
1094
1095		vd->addr  = virtio_rw64(sc, addr);
1096		vd->len   = virtio_rw32(sc, sizeof(struct vring_desc) * nsegs);
1097		vd->flags = virtio_rw16(sc, VRING_DESC_F_INDIRECT);
1098
1099		vd = &vq->vq_indirect[vq->vq_maxnsegs * slot];
1100		vdx->desc_base = vd;
1101		vdx->desc_free_idx = 0;
1102
1103		for (i = 0; i < nsegs - 1; i++) {
1104			vd[i].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1105		}
1106		vd[i].flags  = virtio_rw16(sc, 0);
1107	} else {
1108		if (nsegs > 1) {
1109			uint16_t s;
1110
1111			s = vq_alloc_slot(sc, vq, nsegs - 1);
1112			if (s == VRING_DESC_CHAIN_END) {
1113				vq_free_slot(sc, vq, slot);
1114				return EAGAIN;
1115			}
1116			vd->next = virtio_rw16(sc, s);
1117			vd->flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1118		}
1119
1120		vdx->desc_base = &vq->vq_desc[0];
1121		vdx->desc_free_idx = slot;
1122	}
1123
1124	return 0;
1125}
1126
1127/*
1128 * enqueue: enqueue a single dmamap.
1129 */
1130int
1131virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1132    bus_dmamap_t dmamap, bool write)
1133{
1134	struct vring_desc *vds;
1135	struct vring_desc_extra *vdx;
1136	uint16_t s;
1137	int i;
1138
1139	KASSERT(dmamap->dm_nsegs > 0);
1140
1141	vdx = &vq->vq_descx[slot];
1142	vds = vdx->desc_base;
1143	s = vdx->desc_free_idx;
1144
1145	KASSERT(vds != NULL);
1146
1147	for (i = 0; i < dmamap->dm_nsegs; i++) {
1148		KASSERT(s != VRING_DESC_CHAIN_END);
1149
1150		vds[s].addr = virtio_rw64(sc, dmamap->dm_segs[i].ds_addr);
1151		vds[s].len  = virtio_rw32(sc, dmamap->dm_segs[i].ds_len);
1152		if (!write)
1153			vds[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1154
1155		if ((vds[s].flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0) {
1156			s = VRING_DESC_CHAIN_END;
1157		} else {
1158			s = virtio_rw16(sc, vds[s].next);
1159		}
1160	}
1161
1162	vdx->desc_free_idx = s;
1163
1164	return 0;
1165}
1166
1167int
1168virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1169    bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
1170    bool write)
1171{
1172	struct vring_desc_extra *vdx;
1173	struct vring_desc *vds;
1174	uint16_t s;
1175
1176	vdx = &vq->vq_descx[slot];
1177	vds = vdx->desc_base;
1178	s = vdx->desc_free_idx;
1179
1180	KASSERT(s != VRING_DESC_CHAIN_END);
1181	KASSERT(vds != NULL);
1182	KASSERT(dmamap->dm_nsegs == 1); /* XXX */
1183	KASSERT(dmamap->dm_segs[0].ds_len > start);
1184	KASSERT(dmamap->dm_segs[0].ds_len >= start + len);
1185
1186	vds[s].addr = virtio_rw64(sc, dmamap->dm_segs[0].ds_addr + start);
1187	vds[s].len  = virtio_rw32(sc, len);
1188	if (!write)
1189		vds[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1190
1191	if ((vds[s].flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0) {
1192		s = VRING_DESC_CHAIN_END;
1193	} else {
1194		s = virtio_rw16(sc, vds[s].next);
1195	}
1196
1197	vdx->desc_free_idx = s;
1198
1199	return 0;
1200}
1201
1202/*
1203 * enqueue_commit: add it to the aring.
1204 */
1205int
1206virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1207    bool notifynow)
1208{
1209
1210	if (slot < 0) {
1211		mutex_enter(&vq->vq_aring_lock);
1212		goto notify;
1213	}
1214
1215	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
1216	if (vq->vq_descx[slot].use_indirect)
1217		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
1218
1219	mutex_enter(&vq->vq_aring_lock);
1220	vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
1221	    virtio_rw16(sc, slot);
1222
1223notify:
1224	if (notifynow) {
1225		uint16_t o, n, t;
1226		uint16_t flags;
1227
1228		o = virtio_rw16(sc, vq->vq_avail->idx) - 1;
1229		n = vq->vq_avail_idx;
1230
1231		/*
1232		 * Prepare for `device->CPU' (host->guest) transfer
1233		 * into the buffer.  This must happen before we commit
1234		 * the vq->vq_avail->idx update to ensure we're not
1235		 * still using the buffer in case program-prior loads
1236		 * or stores in it get delayed past the store to
1237		 * vq->vq_avail->idx.
1238		 */
1239		vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
1240
1241		/* ensure payload is published, then avail idx */
1242		vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
1243		vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
1244		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
1245		vq->vq_queued++;
1246
1247		if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
1248			vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
1249			t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
1250			if ((uint16_t) (n - t) < (uint16_t) (n - o))
1251				sc->sc_ops->kick(sc, vq->vq_index);
1252		} else {
1253			vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
1254			flags = virtio_rw16(sc, vq->vq_used->flags);
1255			if (!(flags & VRING_USED_F_NO_NOTIFY))
1256				sc->sc_ops->kick(sc, vq->vq_index);
1257		}
1258	}
1259	mutex_exit(&vq->vq_aring_lock);
1260
1261	return 0;
1262}
1263
1264/*
1265 * enqueue_abort: rollback.
1266 */
1267int
1268virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1269{
1270	struct vring_desc_extra *vdx;
1271
1272	vdx = &vq->vq_descx[slot];
1273	vdx->desc_free_idx = VRING_DESC_CHAIN_END;
1274	vdx->desc_base = NULL;
1275
1276	vq_free_slot(sc, vq, slot);
1277
1278	return 0;
1279}
1280
1281/*
1282 * Dequeue a request.
1283 */
1284/*
1285 * dequeue: dequeue a request from uring; dmamap_sync for uring is
1286 *	    already done in the interrupt handler.
1287 */
1288int
1289virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
1290    int *slotp, int *lenp)
1291{
1292	uint16_t slot, usedidx;
1293
1294	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
1295		return ENOENT;
1296	mutex_enter(&vq->vq_uring_lock);
1297	usedidx = vq->vq_used_idx++;
1298	mutex_exit(&vq->vq_uring_lock);
1299	usedidx %= vq->vq_num;
1300	slot = virtio_rw32(sc, vq->vq_used->ring[usedidx].id);
1301
1302	if (vq->vq_descx[slot].use_indirect)
1303		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
1304
1305	if (slotp)
1306		*slotp = slot;
1307	if (lenp)
1308		*lenp = virtio_rw32(sc, vq->vq_used->ring[usedidx].len);
1309
1310	return 0;
1311}
1312
1313/*
1314 * dequeue_commit: complete dequeue; the slot is recycled for future use.
1315 *                 if you forget to call this the slot will be leaked.
1316 */
1317int
1318virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1319{
1320	struct vring_desc_extra *vdx;
1321
1322	vdx = &vq->vq_descx[slot];
1323	vdx->desc_base = NULL;
1324	vdx->desc_free_idx = VRING_DESC_CHAIN_END;
1325
1326	vq_free_slot(sc, vq, slot);
1327
1328	return 0;
1329}
1330
1331/*
1332 * Attach a child, fill all the members.
1333 */
1334void
1335virtio_child_attach_start(struct virtio_softc *sc, device_t child, int ipl,
1336    uint64_t req_features, const char *feat_bits)
1337{
1338	char buf[1024];
1339
1340	KASSERT(sc->sc_child == NULL);
1341	KASSERT(sc->sc_child_state == VIRTIO_NO_CHILD);
1342
1343	sc->sc_child = child;
1344	sc->sc_ipl = ipl;
1345
1346	virtio_negotiate_features(sc, req_features);
1347	snprintb(buf, sizeof(buf), feat_bits, sc->sc_active_features);
1348	aprint_normal(": features: %s\n", buf);
1349	aprint_naive("\n");
1350}
1351
1352int
1353virtio_child_attach_finish(struct virtio_softc *sc,
1354    struct virtqueue *vqs, size_t nvqs,
1355    virtio_callback config_change,
1356    int req_flags)
1357{
1358	size_t i;
1359	int r;
1360
1361#ifdef DIAGNOSTIC
1362	KASSERT(nvqs > 0);
1363#define VIRTIO_ASSERT_FLAGS	(VIRTIO_F_INTR_SOFTINT | VIRTIO_F_INTR_PERVQ)
1364	KASSERT((req_flags & VIRTIO_ASSERT_FLAGS) != VIRTIO_ASSERT_FLAGS);
1365#undef VIRTIO_ASSERT_FLAGS
1366
1367	for (i = 0; i < nvqs; i++){
1368		KASSERT(vqs[i].vq_index == i);
1369		KASSERT(vqs[i].vq_intrhand != NULL);
1370		KASSERT(vqs[i].vq_done == NULL ||
1371		    vqs[i].vq_intrhand == virtio_vq_done);
1372	}
1373#endif
1374
1375
1376	sc->sc_vqs = vqs;
1377	sc->sc_nvqs = nvqs;
1378	sc->sc_config_change = config_change;
1379	sc->sc_intrhand = virtio_vq_intr;
1380	sc->sc_flags = req_flags;
1381
1382	/* set the vq address */
1383	for (i = 0; i < nvqs; i++) {
1384		sc->sc_ops->setup_queue(sc, vqs[i].vq_index,
1385		    vqs[i].vq_dmamap->dm_segs[0].ds_addr);
1386	}
1387
1388	r = sc->sc_ops->alloc_interrupts(sc);
1389	if (r != 0) {
1390		aprint_error_dev(sc->sc_dev,
1391		    "failed to allocate interrupts\n");
1392		goto fail;
1393	}
1394
1395	r = sc->sc_ops->setup_interrupts(sc, 0);
1396	if (r != 0) {
1397		aprint_error_dev(sc->sc_dev, "failed to setup interrupts\n");
1398		goto fail;
1399	}
1400
1401	KASSERT(sc->sc_soft_ih == NULL);
1402	if (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) {
1403		u_int flags = SOFTINT_NET;
1404		if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
1405			flags |= SOFTINT_MPSAFE;
1406
1407		sc->sc_soft_ih = softint_establish(flags, virtio_soft_intr,
1408		    sc);
1409		if (sc->sc_soft_ih == NULL) {
1410			sc->sc_ops->free_interrupts(sc);
1411			aprint_error_dev(sc->sc_dev,
1412			    "failed to establish soft interrupt\n");
1413			goto fail;
1414		}
1415	}
1416
1417	sc->sc_child_state = VIRTIO_CHILD_ATTACH_FINISHED;
1418	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1419	return 0;
1420
1421fail:
1422	if (sc->sc_soft_ih) {
1423		softint_disestablish(sc->sc_soft_ih);
1424		sc->sc_soft_ih = NULL;
1425	}
1426
1427	sc->sc_ops->free_interrupts(sc);
1428
1429	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1430	return 1;
1431}
1432
1433void
1434virtio_child_detach(struct virtio_softc *sc)
1435{
1436
1437	/* already detached */
1438	if (sc->sc_child == NULL)
1439		return;
1440
1441
1442	virtio_device_reset(sc);
1443
1444	sc->sc_ops->free_interrupts(sc);
1445
1446	if (sc->sc_soft_ih) {
1447		softint_disestablish(sc->sc_soft_ih);
1448		sc->sc_soft_ih = NULL;
1449	}
1450
1451	sc->sc_vqs = NULL;
1452	sc->sc_child = NULL;
1453}
1454
1455void
1456virtio_child_attach_failed(struct virtio_softc *sc)
1457{
1458	virtio_child_detach(sc);
1459
1460	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1461
1462	sc->sc_child_state = VIRTIO_CHILD_ATTACH_FAILED;
1463}
1464
1465bus_dma_tag_t
1466virtio_dmat(struct virtio_softc *sc)
1467{
1468	return sc->sc_dmat;
1469}
1470
1471device_t
1472virtio_child(struct virtio_softc *sc)
1473{
1474	return sc->sc_child;
1475}
1476
1477int
1478virtio_intrhand(struct virtio_softc *sc)
1479{
1480	return (*sc->sc_intrhand)(sc);
1481}
1482
1483uint64_t
1484virtio_features(struct virtio_softc *sc)
1485{
1486	return sc->sc_active_features;
1487}
1488
1489int
1490virtio_attach_failed(struct virtio_softc *sc)
1491{
1492	device_t self = sc->sc_dev;
1493
1494	/* no error if its not connected, but its failed */
1495	if (sc->sc_childdevid == 0)
1496		return 1;
1497
1498	if (sc->sc_child == NULL) {
1499		switch (sc->sc_child_state) {
1500		case VIRTIO_CHILD_ATTACH_FAILED:
1501			aprint_error_dev(self,
1502			    "virtio configuration failed\n");
1503			break;
1504		case VIRTIO_NO_CHILD:
1505			aprint_error_dev(self,
1506			    "no matching child driver; not configured\n");
1507			break;
1508		default:
1509			/* sanity check */
1510			aprint_error_dev(self,
1511			    "virtio internal error, "
1512			    "child driver is not configured\n");
1513			break;
1514		}
1515
1516		return 1;
1517	}
1518
1519	/* sanity check */
1520	if (sc->sc_child_state != VIRTIO_CHILD_ATTACH_FINISHED) {
1521		aprint_error_dev(self, "virtio internal error, child driver "
1522		    "signaled OK but didn't initialize interrupts\n");
1523		return 1;
1524	}
1525
1526	return 0;
1527}
1528
1529void
1530virtio_print_device_type(device_t self, int id, int revision)
1531{
1532	aprint_normal_dev(self, "%s device (id %d, rev. 0x%02x)\n",
1533	    (id < NDEVNAMES ? virtio_device_name[id] : "Unknown"),
1534	    id,
1535	    revision);
1536}
1537
1538
1539MODULE(MODULE_CLASS_DRIVER, virtio, NULL);
1540
1541#ifdef _MODULE
1542#include "ioconf.c"
1543#endif
1544
1545static int
1546virtio_modcmd(modcmd_t cmd, void *opaque)
1547{
1548	int error = 0;
1549
1550#ifdef _MODULE
1551	switch (cmd) {
1552	case MODULE_CMD_INIT:
1553		error = config_init_component(cfdriver_ioconf_virtio,
1554		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1555		break;
1556	case MODULE_CMD_FINI:
1557		error = config_fini_component(cfdriver_ioconf_virtio,
1558		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1559		break;
1560	default:
1561		error = ENOTTY;
1562		break;
1563	}
1564#endif
1565
1566	return error;
1567}
1568