virtio.c revision 1.43
1/*	$NetBSD: virtio.c,v 1.43 2021/01/20 19:46:48 reinoud Exp $	*/
2
3/*
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6 * Copyright (c) 2010 Minoura Makoto.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.43 2021/01/20 19:46:48 reinoud Exp $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/atomic.h>
37#include <sys/bus.h>
38#include <sys/device.h>
39#include <sys/kmem.h>
40#include <sys/module.h>
41
42#define VIRTIO_PRIVATE
43
44#include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
45#include <dev/pci/virtiovar.h> /* XXX: move to non-pci */
46
47#define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */
48
49/* incomplete list */
50static const char *virtio_device_name[] = {
51	"unknown (0)",			/*  0 */
52	"network",			/*  1 */
53	"block",			/*  2 */
54	"console",			/*  3 */
55	"entropy",			/*  4 */
56	"memory balloon",		/*  5 */
57	"I/O memory",			/*  6 */
58	"remote processor messaging",	/*  7 */
59	"SCSI",				/*  8 */
60	"9P transport",			/*  9 */
61};
62#define NDEVNAMES	__arraycount(virtio_device_name)
63
64static void	virtio_init_vq(struct virtio_softc *,
65		    struct virtqueue *, const bool);
66
67void
68virtio_set_status(struct virtio_softc *sc, int status)
69{
70	sc->sc_ops->set_status(sc, status);
71}
72
73/*
74 * Reset the device.
75 */
76/*
77 * To reset the device to a known state, do following:
78 *	virtio_reset(sc);	     // this will stop the device activity
79 *	<dequeue finished requests>; // virtio_dequeue() still can be called
80 *	<revoke pending requests in the vqs if any>;
81 *	virtio_reinit_start(sc);     // dequeue prohibitted
82 *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
83 *	<some other initialization>;
84 *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
85 * Once attached, feature negotiation can only be allowed after virtio_reset.
86 */
87void
88virtio_reset(struct virtio_softc *sc)
89{
90	virtio_device_reset(sc);
91}
92
93void
94virtio_reinit_start(struct virtio_softc *sc)
95{
96	int i;
97
98	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
99	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
100	for (i = 0; i < sc->sc_nvqs; i++) {
101		int n;
102		struct virtqueue *vq = &sc->sc_vqs[i];
103		n = sc->sc_ops->read_queue_size(sc, vq->vq_index);
104		if (n == 0)	/* vq disappeared */
105			continue;
106		if (n != vq->vq_num) {
107			panic("%s: virtqueue size changed, vq index %d\n",
108			      device_xname(sc->sc_dev),
109			      vq->vq_index);
110		}
111		virtio_init_vq(sc, vq, true);
112		sc->sc_ops->setup_queue(sc, vq->vq_index,
113		    vq->vq_dmamap->dm_segs[0].ds_addr);
114	}
115}
116
117void
118virtio_reinit_end(struct virtio_softc *sc)
119{
120	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
121}
122
123/*
124 * Feature negotiation.
125 */
126void
127virtio_negotiate_features(struct virtio_softc *sc, uint64_t guest_features)
128{
129	if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
130	    !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
131		guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
132	sc->sc_ops->neg_features(sc, guest_features);
133	if (sc->sc_active_features & VIRTIO_F_RING_INDIRECT_DESC)
134		sc->sc_indirect = true;
135	else
136		sc->sc_indirect = false;
137}
138
139
140/*
141 * Device configuration registers readers/writers
142 */
143#if 0
144#define DPRINTFR(n, fmt, val, index, num) \
145	printf("\n%s (", n); \
146	for (int i = 0; i < num; i++) \
147		printf("%02x ", bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index+i)); \
148	printf(") -> "); printf(fmt, val); printf("\n");
149#else
150#define DPRINTFR(n, fmt, val, index, num)
151#endif
152
153uint8_t
154virtio_read_device_config_1(struct virtio_softc *sc, int index) {
155	uint8_t val;
156	val = sc->sc_ops->read_dev_cfg_1(sc, index);
157	DPRINTFR("read_1", "%02x", val, index, 1);
158	return val;
159}
160
161uint16_t
162virtio_read_device_config_2(struct virtio_softc *sc, int index) {
163	uint16_t val;
164	val = sc->sc_ops->read_dev_cfg_2(sc, index);
165	DPRINTFR("read_2", "%04x", val, index, 2);
166	return val;
167}
168
169uint32_t
170virtio_read_device_config_4(struct virtio_softc *sc, int index) {
171	uint32_t val;
172	val = sc->sc_ops->read_dev_cfg_4(sc, index);
173	DPRINTFR("read_4", "%08x", val, index, 4);
174	return val;
175}
176
177uint64_t
178virtio_read_device_config_8(struct virtio_softc *sc, int index) {
179	uint64_t val;
180	val = sc->sc_ops->read_dev_cfg_8(sc, index);
181	DPRINTFR("read_8", "%08lx", val, index, 8);
182	return val;
183}
184
185/*
186 * In the older virtio spec, device config registers are host endian. On newer
187 * they are little endian. The normal logic will cater for this. However some
188 * devices however explicitly state that its fields are always little endian
189 * and will still need to be swapped.
190 */
191uint16_t
192virtio_read_device_config_le_2(struct virtio_softc *sc, int index) {
193	bool virtio_v1 = (sc->sc_active_features & VIRTIO_F_VERSION_1);
194	uint16_t val;
195
196	val = sc->sc_ops->read_dev_cfg_2(sc, index);
197	val = virtio_v1 ? val : le16toh(val);
198	DPRINTFR("read_le_2", "%08x", val, index, 2);
199	return val;
200}
201
202uint32_t
203virtio_read_device_config_le_4(struct virtio_softc *sc, int index) {
204	bool virtio_v1 = (sc->sc_active_features & VIRTIO_F_VERSION_1);
205	uint32_t val;
206
207	val = sc->sc_ops->read_dev_cfg_4(sc, index);
208	val = virtio_v1 ? val : le32toh(val);
209	DPRINTFR("read_le_4", "%08x", val, index, 4);
210	return val;
211}
212
213void
214virtio_write_device_config_1(struct virtio_softc *sc, int index, uint8_t value)
215{
216	sc->sc_ops->write_dev_cfg_1(sc, index, value);
217}
218
219void
220virtio_write_device_config_2(struct virtio_softc *sc, int index, uint16_t value)
221{
222	sc->sc_ops->write_dev_cfg_2(sc, index, value);
223}
224
225void
226virtio_write_device_config_4(struct virtio_softc *sc, int index, uint32_t value)
227{
228	sc->sc_ops->write_dev_cfg_4(sc, index, value);
229}
230
231void
232virtio_write_device_config_8(struct virtio_softc *sc, int index, uint64_t value)
233{
234	sc->sc_ops->write_dev_cfg_8(sc, index, value);
235}
236
237/*
238 * In the older virtio spec, device config registers are host endian. On newer
239 * they are little endian. The normal logic will cater for this. However some
240 * devices however explicitly state that its fields are always little endian
241 * and will still need to be swapped.
242 */
243void
244virtio_write_device_config_le_2(struct virtio_softc *sc, int index, uint16_t value)
245{
246	bool virtio_v1 = (sc->sc_active_features & VIRTIO_F_VERSION_1);
247	value = virtio_v1 ? value : htole16(value);
248	sc->sc_ops->write_dev_cfg_2(sc, index, value);
249}
250
251void
252virtio_write_device_config_le_4(struct virtio_softc *sc, int index, uint32_t value)
253{
254	bool virtio_v1 = (sc->sc_active_features & VIRTIO_F_VERSION_1);
255	value = virtio_v1 ? value : htole32(value);
256	sc->sc_ops->write_dev_cfg_4(sc, index, value);
257}
258
259/*
260 * data structures endian helpers
261 */
262uint16_t virtio_rw16(struct virtio_softc *sc, uint16_t val)
263{
264	KASSERT(sc);
265	return (sc->sc_devcfg_swap) ? bswap16(val) : val;
266}
267
268uint32_t virtio_rw32(struct virtio_softc *sc, uint32_t val)
269{
270	KASSERT(sc);
271	return (sc->sc_devcfg_swap) ? bswap32(val) : val;
272}
273
274uint64_t virtio_rw64(struct virtio_softc *sc, uint64_t val)
275{
276	KASSERT(sc);
277	return (sc->sc_devcfg_swap) ? bswap64(val) : val;
278}
279
280
281/*
282 * Interrupt handler.
283 */
284static void
285virtio_soft_intr(void *arg)
286{
287	struct virtio_softc *sc = arg;
288
289	KASSERT(sc->sc_intrhand != NULL);
290
291	(sc->sc_intrhand)(sc);
292}
293
294/*
295 * dmamap sync operations for a virtqueue.
296 */
297static inline void
298vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
299{
300	/* availoffset == sizeof(vring_desc)*vq_num */
301	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
302			ops);
303}
304
305static inline void
306vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
307{
308	uint16_t hdrlen = offsetof(struct vring_avail, ring);
309	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
310		hdrlen += sizeof(uint16_t);
311
312	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
313			vq->vq_availoffset,
314			hdrlen + sc->sc_nvqs * sizeof(uint16_t),
315			ops);
316}
317
318static inline void
319vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
320{
321	uint16_t hdrlen = offsetof(struct vring_used, ring);
322	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
323		hdrlen += sizeof(uint16_t);
324
325	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
326			vq->vq_usedoffset,
327			hdrlen + sc->sc_nvqs * sizeof(struct vring_used_elem),
328			ops);
329}
330
331static inline void
332vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
333		     int ops)
334{
335	int offset = vq->vq_indirectoffset
336		      + sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
337
338	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
339			offset, sizeof(struct vring_desc) * vq->vq_maxnsegs,
340			ops);
341}
342
343/*
344 * Can be used as sc_intrhand.
345 */
346/*
347 * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
348 * and calls (*vq_done)() if some entries are consumed.
349 */
350bool
351virtio_vq_is_enqueued(struct virtio_softc *sc, struct virtqueue *vq)
352{
353
354	if (vq->vq_queued) {
355		vq->vq_queued = 0;
356		vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
357	}
358	vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
359	membar_consumer();
360
361	return (vq->vq_used_idx != virtio_rw16(sc, vq->vq_used->idx)) ? 1 : 0;
362}
363
364int
365virtio_vq_intr(struct virtio_softc *sc)
366{
367	struct virtqueue *vq;
368	int i, r = 0;
369
370	for (i = 0; i < sc->sc_nvqs; i++) {
371		vq = &sc->sc_vqs[i];
372		if (virtio_vq_is_enqueued(sc, vq) == 1) {
373			if (vq->vq_done)
374				r |= (vq->vq_done)(vq);
375		}
376	}
377
378	return r;
379}
380
381int
382virtio_vq_intrhand(struct virtio_softc *sc)
383{
384	struct virtqueue *vq;
385	int i, r = 0;
386
387	for (i = 0; i < sc->sc_nvqs; i++) {
388		vq = &sc->sc_vqs[i];
389		r |= (vq->vq_intrhand)(vq->vq_intrhand_arg);
390	}
391
392	return r;
393}
394
395
396/*
397 * Increase the event index in order to delay interrupts.
398 */
399int
400virtio_postpone_intr(struct virtio_softc *sc, struct virtqueue *vq,
401		uint16_t nslots)
402{
403	uint16_t	idx, nused;
404
405	idx = vq->vq_used_idx + nslots;
406
407	/* set the new event index: avail_ring->used_event = idx */
408	*vq->vq_used_event = virtio_rw16(sc, idx);
409	membar_producer();
410
411	vq_sync_aring(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
412	vq->vq_queued++;
413
414	nused = (uint16_t)
415		(virtio_rw16(sc, vq->vq_used->idx) - vq->vq_used_idx);
416	KASSERT(nused <= vq->vq_num);
417
418	return nslots < nused;
419}
420
421/*
422 * Postpone interrupt until 3/4 of the available descriptors have been
423 * consumed.
424 */
425int
426virtio_postpone_intr_smart(struct virtio_softc *sc, struct virtqueue *vq)
427{
428	uint16_t	nslots;
429
430	nslots = (uint16_t)
431		(virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx) * 3 / 4;
432
433	return virtio_postpone_intr(sc, vq, nslots);
434}
435
436/*
437 * Postpone interrupt until all of the available descriptors have been
438 * consumed.
439 */
440int
441virtio_postpone_intr_far(struct virtio_softc *sc, struct virtqueue *vq)
442{
443	uint16_t	nslots;
444
445	nslots = (uint16_t)
446		(virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx);
447
448	return virtio_postpone_intr(sc, vq, nslots);
449}
450
451/*
452 * Start/stop vq interrupt.  No guarantee.
453 */
454void
455virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
456{
457	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
458		/*
459		 * No way to disable the interrupt completely with
460		 * RingEventIdx. Instead advance used_event by half the
461		 * possible value. This won't happen soon and is far enough in
462		 * the past to not trigger a spurios interrupt.
463		 */
464		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
465	} else {
466		vq->vq_avail->flags |= virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
467	}
468	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
469	vq->vq_queued++;
470}
471
472int
473virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
474{
475	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
476		/*
477		 * If event index feature is negotiated, enabling interrupts
478		 * is done through setting the latest consumed index in the
479		 * used_event field
480		 */
481		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
482	} else {
483		vq->vq_avail->flags &= ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
484	}
485	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
486	vq->vq_queued++;
487
488	return vq->vq_used_idx != virtio_rw16(sc, vq->vq_used->idx);
489}
490
491/*
492 * Initialize vq structure.
493 */
494static void
495virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq,
496    const bool reinit)
497{
498	int i, j;
499	int vq_size = vq->vq_num;
500
501	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
502
503	/* build the indirect descriptor chain */
504	if (vq->vq_indirect != NULL) {
505		struct vring_desc *vd;
506
507		for (i = 0; i < vq_size; i++) {
508			vd = vq->vq_indirect;
509			vd += vq->vq_maxnsegs * i;
510			for (j = 0; j < vq->vq_maxnsegs-1; j++) {
511				vd[j].next = virtio_rw16(sc, j + 1);
512			}
513		}
514	}
515
516	/* free slot management */
517	SIMPLEQ_INIT(&vq->vq_freelist);
518	for (i = 0; i < vq_size; i++) {
519		SIMPLEQ_INSERT_TAIL(&vq->vq_freelist,
520				    &vq->vq_entries[i], qe_list);
521		vq->vq_entries[i].qe_index = i;
522	}
523	if (!reinit)
524		mutex_init(&vq->vq_freelist_lock, MUTEX_SPIN, sc->sc_ipl);
525
526	/* enqueue/dequeue status */
527	vq->vq_avail_idx = 0;
528	vq->vq_used_idx = 0;
529	vq->vq_queued = 0;
530	if (!reinit) {
531		mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
532		mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
533	}
534	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
535	vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
536	vq->vq_queued++;
537}
538
539/*
540 * Allocate/free a vq.
541 */
542int
543virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
544    int maxsegsize, int maxnsegs, const char *name)
545{
546	int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
547	int rsegs, r, hdrlen;
548#define VIRTQUEUE_ALIGN(n)	(((n)+(VIRTIO_PAGE_SIZE-1))&	\
549				 ~(VIRTIO_PAGE_SIZE-1))
550
551	/* Make sure callers allocate vqs in order */
552	KASSERT(sc->sc_nvqs == index);
553
554	memset(vq, 0, sizeof(*vq));
555
556	vq_size = sc->sc_ops->read_queue_size(sc, index);
557	if (vq_size == 0) {
558		aprint_error_dev(sc->sc_dev,
559				 "virtqueue not exist, index %d for %s\n",
560				 index, name);
561		goto err;
562	}
563
564	hdrlen = sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX ? 3 : 2;
565
566	/* allocsize1: descriptor table + avail ring + pad */
567	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc)*vq_size
568			     + sizeof(uint16_t)*(hdrlen + vq_size));
569	/* allocsize2: used ring + pad */
570	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
571			     + sizeof(struct vring_used_elem)*vq_size);
572	/* allocsize3: indirect table */
573	if (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT)
574		allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
575	else
576		allocsize3 = 0;
577	allocsize = allocsize1 + allocsize2 + allocsize3;
578
579	/* alloc and map the memory */
580	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
581			     &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
582	if (r != 0) {
583		aprint_error_dev(sc->sc_dev,
584				 "virtqueue %d for %s allocation failed, "
585				 "error code %d\n", index, name, r);
586		goto err;
587	}
588	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], rsegs, allocsize,
589			   &vq->vq_vaddr, BUS_DMA_NOWAIT);
590	if (r != 0) {
591		aprint_error_dev(sc->sc_dev,
592				 "virtqueue %d for %s map failed, "
593				 "error code %d\n", index, name, r);
594		goto err;
595	}
596	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
597			      BUS_DMA_NOWAIT, &vq->vq_dmamap);
598	if (r != 0) {
599		aprint_error_dev(sc->sc_dev,
600				 "virtqueue %d for %s dmamap creation failed, "
601				 "error code %d\n", index, name, r);
602		goto err;
603	}
604	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
605			    vq->vq_vaddr, allocsize, NULL, BUS_DMA_NOWAIT);
606	if (r != 0) {
607		aprint_error_dev(sc->sc_dev,
608				 "virtqueue %d for %s dmamap load failed, "
609				 "error code %d\n", index, name, r);
610		goto err;
611	}
612
613	/* remember addresses and offsets for later use */
614	vq->vq_owner = sc;
615	vq->vq_num = vq_size;
616	vq->vq_index = index;
617	vq->vq_desc = vq->vq_vaddr;
618	vq->vq_availoffset = sizeof(struct vring_desc)*vq_size;
619	vq->vq_avail = (void*)(((char*)vq->vq_desc) + vq->vq_availoffset);
620	vq->vq_used_event = (uint16_t *) ((char *)vq->vq_avail +
621		 offsetof(struct vring_avail, ring[vq->vq_num]));
622	vq->vq_usedoffset = allocsize1;
623	vq->vq_used = (void*)(((char*)vq->vq_desc) + vq->vq_usedoffset);
624	vq->vq_avail_event = (uint16_t *)((char *)vq->vq_used +
625		 offsetof(struct vring_used, ring[vq->vq_num]));
626
627	if (allocsize3 > 0) {
628		vq->vq_indirectoffset = allocsize1 + allocsize2;
629		vq->vq_indirect = (void*)(((char*)vq->vq_desc)
630					  + vq->vq_indirectoffset);
631	}
632	vq->vq_bytesize = allocsize;
633	vq->vq_maxsegsize = maxsegsize;
634	vq->vq_maxnsegs = maxnsegs;
635
636	/* free slot management */
637	vq->vq_entries = kmem_zalloc(sizeof(struct vq_entry)*vq_size,
638				     KM_SLEEP);
639	virtio_init_vq(sc, vq, false);
640
641	/* set the vq address */
642	sc->sc_ops->setup_queue(sc, index,
643	    vq->vq_dmamap->dm_segs[0].ds_addr);
644
645	aprint_verbose_dev(sc->sc_dev,
646			   "allocated %u byte for virtqueue %d for %s, "
647			   "size %d\n", allocsize, index, name, vq_size);
648	if (allocsize3 > 0)
649		aprint_verbose_dev(sc->sc_dev,
650				   "using %d byte (%d entries) "
651				   "indirect descriptors\n",
652				   allocsize3, maxnsegs * vq_size);
653
654	sc->sc_nvqs++;
655
656	return 0;
657
658err:
659	sc->sc_ops->setup_queue(sc, index, 0);
660	if (vq->vq_dmamap)
661		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
662	if (vq->vq_vaddr)
663		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
664	if (vq->vq_segs[0].ds_addr)
665		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
666	memset(vq, 0, sizeof(*vq));
667
668	return -1;
669}
670
671int
672virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
673{
674	struct vq_entry *qe;
675	int i = 0;
676
677	/* device must be already deactivated */
678	/* confirm the vq is empty */
679	SIMPLEQ_FOREACH(qe, &vq->vq_freelist, qe_list) {
680		i++;
681	}
682	if (i != vq->vq_num) {
683		printf("%s: freeing non-empty vq, index %d\n",
684		       device_xname(sc->sc_dev), vq->vq_index);
685		return EBUSY;
686	}
687
688	/* tell device that there's no virtqueue any longer */
689	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
690
691	kmem_free(vq->vq_entries, sizeof(*vq->vq_entries) * vq->vq_num);
692	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
693	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
694	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
695	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
696	mutex_destroy(&vq->vq_freelist_lock);
697	mutex_destroy(&vq->vq_uring_lock);
698	mutex_destroy(&vq->vq_aring_lock);
699	memset(vq, 0, sizeof(*vq));
700
701	sc->sc_nvqs--;
702
703	return 0;
704}
705
706/*
707 * Free descriptor management.
708 */
709static struct vq_entry *
710vq_alloc_entry(struct virtqueue *vq)
711{
712	struct vq_entry *qe;
713
714	mutex_enter(&vq->vq_freelist_lock);
715	if (SIMPLEQ_EMPTY(&vq->vq_freelist)) {
716		mutex_exit(&vq->vq_freelist_lock);
717		return NULL;
718	}
719	qe = SIMPLEQ_FIRST(&vq->vq_freelist);
720	SIMPLEQ_REMOVE_HEAD(&vq->vq_freelist, qe_list);
721	mutex_exit(&vq->vq_freelist_lock);
722
723	return qe;
724}
725
726static void
727vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
728{
729	mutex_enter(&vq->vq_freelist_lock);
730	SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
731	mutex_exit(&vq->vq_freelist_lock);
732
733	return;
734}
735
736/*
737 * Enqueue several dmamaps as a single request.
738 */
739/*
740 * Typical usage:
741 *  <queue size> number of followings are stored in arrays
742 *  - command blocks (in dmamem) should be pre-allocated and mapped
743 *  - dmamaps for command blocks should be pre-allocated and loaded
744 *  - dmamaps for payload should be pre-allocated
745 *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
746 *	if (r)		// currently 0 or EAGAIN
747 *	  return r;
748 *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
749 *	if (r) {
750 *	  virtio_enqueue_abort(sc, vq, slot);
751 *	  return r;
752 *	}
753 *	r = virtio_enqueue_reserve(sc, vq, slot,
754 *				   dmamap_payload[slot]->dm_nsegs+1);
755 *							// ^ +1 for command
756 *	if (r) {	// currently 0 or EAGAIN
757 *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
758 *	  return r;					// do not call abort()
759 *	}
760 *	<setup and prepare commands>
761 *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
762 *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
763 *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
764 *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
765 *	virtio_enqueue_commit(sc, vq, slot, true);
766 */
767
768/*
769 * enqueue_prep: allocate a slot number
770 */
771int
772virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
773{
774	struct vq_entry *qe1;
775
776	KASSERT(slotp != NULL);
777
778	qe1 = vq_alloc_entry(vq);
779	if (qe1 == NULL)
780		return EAGAIN;
781	/* next slot is not allocated yet */
782	qe1->qe_next = -1;
783	*slotp = qe1->qe_index;
784
785	return 0;
786}
787
788/*
789 * enqueue_reserve: allocate remaining slots and build the descriptor chain.
790 */
791int
792virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
793		       int slot, int nsegs)
794{
795	int indirect;
796	struct vq_entry *qe1 = &vq->vq_entries[slot];
797
798	KASSERT(qe1->qe_next == -1);
799	KASSERT(1 <= nsegs && nsegs <= vq->vq_num);
800
801	if ((vq->vq_indirect != NULL) &&
802	    (nsegs >= MINSEG_INDIRECT) &&
803	    (nsegs <= vq->vq_maxnsegs))
804		indirect = 1;
805	else
806		indirect = 0;
807	qe1->qe_indirect = indirect;
808
809	if (indirect) {
810		struct vring_desc *vd;
811		uint64_t addr;
812		int i;
813
814		vd = &vq->vq_desc[qe1->qe_index];
815		addr = vq->vq_dmamap->dm_segs[0].ds_addr
816			+ vq->vq_indirectoffset;
817		addr += sizeof(struct vring_desc)
818			* vq->vq_maxnsegs * qe1->qe_index;
819		vd->addr  = virtio_rw64(sc, addr);
820		vd->len   = virtio_rw32(sc, sizeof(struct vring_desc) * nsegs);
821		vd->flags = virtio_rw16(sc, VRING_DESC_F_INDIRECT);
822
823		vd = vq->vq_indirect;
824		vd += vq->vq_maxnsegs * qe1->qe_index;
825		qe1->qe_desc_base = vd;
826
827		for (i = 0; i < nsegs-1; i++) {
828			vd[i].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
829		}
830		vd[i].flags  = virtio_rw16(sc, 0);
831		qe1->qe_next = 0;
832
833		return 0;
834	} else {
835		struct vring_desc *vd;
836		struct vq_entry *qe;
837		int i, s;
838
839		vd = &vq->vq_desc[0];
840		qe1->qe_desc_base = vd;
841		qe1->qe_next = qe1->qe_index;
842		s = slot;
843		for (i = 0; i < nsegs - 1; i++) {
844			qe = vq_alloc_entry(vq);
845			if (qe == NULL) {
846				vd[s].flags = virtio_rw16(sc, 0);
847				virtio_enqueue_abort(sc, vq, slot);
848				return EAGAIN;
849			}
850			vd[s].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
851			vd[s].next  = virtio_rw16(sc, qe->qe_index);
852			s = qe->qe_index;
853		}
854		vd[s].flags = virtio_rw16(sc, 0);
855
856		return 0;
857	}
858}
859
860/*
861 * enqueue: enqueue a single dmamap.
862 */
863int
864virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
865	       bus_dmamap_t dmamap, bool write)
866{
867	struct vq_entry *qe1 = &vq->vq_entries[slot];
868	struct vring_desc *vd = qe1->qe_desc_base;
869	int i;
870	int s = qe1->qe_next;
871
872	KASSERT(s >= 0);
873	KASSERT(dmamap->dm_nsegs > 0);
874
875	for (i = 0; i < dmamap->dm_nsegs; i++) {
876		vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[i].ds_addr);
877		vd[s].len  = virtio_rw32(sc, dmamap->dm_segs[i].ds_len);
878		if (!write)
879			vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
880		s = virtio_rw16(sc, vd[s].next);
881	}
882	qe1->qe_next = s;
883
884	return 0;
885}
886
887int
888virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
889		 bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
890		 bool write)
891{
892	struct vq_entry *qe1 = &vq->vq_entries[slot];
893	struct vring_desc *vd = qe1->qe_desc_base;
894	int s = qe1->qe_next;
895
896	KASSERT(s >= 0);
897	KASSERT(dmamap->dm_nsegs == 1); /* XXX */
898	KASSERT((dmamap->dm_segs[0].ds_len > start) &&
899		(dmamap->dm_segs[0].ds_len >= start + len));
900
901	vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[0].ds_addr + start);
902	vd[s].len  = virtio_rw32(sc, len);
903	if (!write)
904		vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
905	qe1->qe_next = virtio_rw16(sc, vd[s].next);
906
907	return 0;
908}
909
910/*
911 * enqueue_commit: add it to the aring.
912 */
913int
914virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
915		      bool notifynow)
916{
917	struct vq_entry *qe1;
918
919	if (slot < 0) {
920		mutex_enter(&vq->vq_aring_lock);
921		goto notify;
922	}
923	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
924	qe1 = &vq->vq_entries[slot];
925	if (qe1->qe_indirect)
926		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
927	mutex_enter(&vq->vq_aring_lock);
928	vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
929		virtio_rw16(sc, slot);
930
931notify:
932	if (notifynow) {
933		uint16_t o, n, t;
934		uint16_t flags;
935		o = virtio_rw16(sc, vq->vq_avail->idx);
936		n = vq->vq_avail_idx;
937
938		/* publish avail idx */
939		membar_producer();
940		vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
941		vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
942		vq->vq_queued++;
943
944		membar_consumer();
945		vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
946		if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
947			t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
948			if ((uint16_t) (n - t) < (uint16_t) (n - o))
949				sc->sc_ops->kick(sc, vq->vq_index);
950		} else {
951			flags = virtio_rw16(sc, vq->vq_used->flags);
952			if (!(flags & VRING_USED_F_NO_NOTIFY))
953				sc->sc_ops->kick(sc, vq->vq_index);
954		}
955		vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
956		vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
957	}
958	mutex_exit(&vq->vq_aring_lock);
959
960	return 0;
961}
962
963/*
964 * enqueue_abort: rollback.
965 */
966int
967virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
968{
969	struct vq_entry *qe = &vq->vq_entries[slot];
970	struct vring_desc *vd;
971	int s;
972
973	if (qe->qe_next < 0) {
974		vq_free_entry(vq, qe);
975		return 0;
976	}
977
978	s = slot;
979	vd = &vq->vq_desc[0];
980	while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
981		s = virtio_rw16(sc, vd[s].next);
982		vq_free_entry(vq, qe);
983		qe = &vq->vq_entries[s];
984	}
985	vq_free_entry(vq, qe);
986	return 0;
987}
988
989/*
990 * Dequeue a request.
991 */
992/*
993 * dequeue: dequeue a request from uring; dmamap_sync for uring is
994 *	    already done in the interrupt handler.
995 */
996int
997virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
998	       int *slotp, int *lenp)
999{
1000	uint16_t slot, usedidx;
1001	struct vq_entry *qe;
1002
1003	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
1004		return ENOENT;
1005	mutex_enter(&vq->vq_uring_lock);
1006	usedidx = vq->vq_used_idx++;
1007	mutex_exit(&vq->vq_uring_lock);
1008	usedidx %= vq->vq_num;
1009	slot = virtio_rw32(sc, vq->vq_used->ring[usedidx].id);
1010	qe = &vq->vq_entries[slot];
1011
1012	if (qe->qe_indirect)
1013		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
1014
1015	if (slotp)
1016		*slotp = slot;
1017	if (lenp)
1018		*lenp = virtio_rw32(sc, vq->vq_used->ring[usedidx].len);
1019
1020	return 0;
1021}
1022
1023/*
1024 * dequeue_commit: complete dequeue; the slot is recycled for future use.
1025 *                 if you forget to call this the slot will be leaked.
1026 */
1027int
1028virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1029{
1030	struct vq_entry *qe = &vq->vq_entries[slot];
1031	struct vring_desc *vd = &vq->vq_desc[0];
1032	int s = slot;
1033
1034	while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
1035		s = virtio_rw16(sc, vd[s].next);
1036		vq_free_entry(vq, qe);
1037		qe = &vq->vq_entries[s];
1038	}
1039	vq_free_entry(vq, qe);
1040
1041	return 0;
1042}
1043
1044/*
1045 * Attach a child, fill all the members.
1046 */
1047void
1048virtio_child_attach_start(struct virtio_softc *sc, device_t child, int ipl,
1049		    struct virtqueue *vqs,
1050		    virtio_callback config_change,
1051		    virtio_callback intr_hand,
1052		    int req_flags, int req_features, const char *feat_bits)
1053{
1054	char buf[1024];
1055
1056	sc->sc_child = child;
1057	sc->sc_ipl = ipl;
1058	sc->sc_vqs = vqs;
1059	sc->sc_config_change = config_change;
1060	sc->sc_intrhand = intr_hand;
1061	sc->sc_flags = req_flags;
1062
1063	virtio_negotiate_features(sc, req_features);
1064	snprintb(buf, sizeof(buf), feat_bits, sc->sc_active_features);
1065	aprint_normal(": features: %s\n", buf);
1066	aprint_naive("\n");
1067}
1068
1069void
1070virtio_child_attach_set_vqs(struct virtio_softc *sc,
1071    struct virtqueue *vqs, int nvq_pairs)
1072{
1073
1074	KASSERT(nvq_pairs == 1 ||
1075	    (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) == 0);
1076	if (nvq_pairs > 1)
1077		sc->sc_child_mq = true;
1078
1079	sc->sc_vqs = vqs;
1080}
1081
1082int
1083virtio_child_attach_finish(struct virtio_softc *sc)
1084{
1085	int r;
1086
1087	r = sc->sc_ops->setup_interrupts(sc);
1088	if (r != 0) {
1089		aprint_error_dev(sc->sc_dev, "failed to setup interrupts\n");
1090		goto fail;
1091	}
1092
1093	KASSERT(sc->sc_soft_ih == NULL);
1094	if (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) {
1095		u_int flags = SOFTINT_NET;
1096		if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
1097			flags |= SOFTINT_MPSAFE;
1098
1099		sc->sc_soft_ih = softint_establish(flags, virtio_soft_intr, sc);
1100		if (sc->sc_soft_ih == NULL) {
1101			sc->sc_ops->free_interrupts(sc);
1102			aprint_error_dev(sc->sc_dev,
1103			    "failed to establish soft interrupt\n");
1104			goto fail;
1105		}
1106	}
1107
1108	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1109	return 0;
1110
1111fail:
1112	if (sc->sc_soft_ih) {
1113		softint_disestablish(sc->sc_soft_ih);
1114		sc->sc_soft_ih = NULL;
1115	}
1116
1117	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1118	return 1;
1119}
1120
1121void
1122virtio_child_detach(struct virtio_softc *sc)
1123{
1124	sc->sc_child = NULL;
1125	sc->sc_vqs = NULL;
1126
1127	virtio_device_reset(sc);
1128
1129	sc->sc_ops->free_interrupts(sc);
1130
1131	if (sc->sc_soft_ih) {
1132		softint_disestablish(sc->sc_soft_ih);
1133		sc->sc_soft_ih = NULL;
1134	}
1135}
1136
1137void
1138virtio_child_attach_failed(struct virtio_softc *sc)
1139{
1140	virtio_child_detach(sc);
1141
1142	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1143
1144	sc->sc_child = VIRTIO_CHILD_FAILED;
1145}
1146
1147bus_dma_tag_t
1148virtio_dmat(struct virtio_softc *sc)
1149{
1150	return sc->sc_dmat;
1151}
1152
1153device_t
1154virtio_child(struct virtio_softc *sc)
1155{
1156	return sc->sc_child;
1157}
1158
1159int
1160virtio_intrhand(struct virtio_softc *sc)
1161{
1162	return (sc->sc_intrhand)(sc);
1163}
1164
1165uint64_t
1166virtio_features(struct virtio_softc *sc)
1167{
1168	return sc->sc_active_features;
1169}
1170
1171int
1172virtio_attach_failed(struct virtio_softc *sc)
1173{
1174	device_t self = sc->sc_dev;
1175
1176	/* no error if its not connected, but its failed */
1177	if (sc->sc_childdevid == 0)
1178		return 1;
1179
1180	if (sc->sc_child == NULL) {
1181		aprint_error_dev(self,
1182			"no matching child driver; not configured\n");
1183		return 1;
1184	}
1185
1186	if (sc->sc_child == VIRTIO_CHILD_FAILED) {
1187		aprint_error_dev(self, "virtio configuration failed\n");
1188		return 1;
1189	}
1190	return 0;
1191}
1192
1193void
1194virtio_print_device_type(device_t self, int id, int revision)
1195{
1196	aprint_normal_dev(self, "%s device (rev. 0x%02x)\n",
1197		  (id < NDEVNAMES ? virtio_device_name[id] : "Unknown"),
1198		  revision);
1199}
1200
1201
1202MODULE(MODULE_CLASS_DRIVER, virtio, NULL);
1203
1204#ifdef _MODULE
1205#include "ioconf.c"
1206#endif
1207
1208static int
1209virtio_modcmd(modcmd_t cmd, void *opaque)
1210{
1211	int error = 0;
1212
1213#ifdef _MODULE
1214	switch (cmd) {
1215	case MODULE_CMD_INIT:
1216		error = config_init_component(cfdriver_ioconf_virtio,
1217		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1218		break;
1219	case MODULE_CMD_FINI:
1220		error = config_fini_component(cfdriver_ioconf_virtio,
1221		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1222		break;
1223	default:
1224		error = ENOTTY;
1225		break;
1226	}
1227#endif
1228
1229	return error;
1230}
1231