virtio.c revision 1.1
1/*	$NetBSD: virtio.c,v 1.1 2011/10/30 12:12:21 hannken Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.1 2011/10/30 12:12:21 hannken Exp $");
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
34#include <sys/atomic.h>
35#include <sys/bus.h>
36#include <sys/device.h>
37#include <sys/kmem.h>
38
39#include <dev/pci/pcidevs.h>
40#include <dev/pci/pcireg.h>
41#include <dev/pci/pcivar.h>
42
43#include <dev/pci/virtioreg.h>
44#include <dev/pci/virtiovar.h>
45
46#define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */
47
48static int	virtio_match(device_t, cfdata_t, void *);
49static void	virtio_attach(device_t, device_t, void *);
50static int	virtio_detach(device_t, int);
51static int	virtio_intr(void *arg);
52static void	virtio_init_vq(struct virtio_softc *,
53		    struct virtqueue *, const bool);
54
55CFATTACH_DECL3_NEW(virtio, sizeof(struct virtio_softc),
56    virtio_match, virtio_attach, virtio_detach, NULL, NULL, NULL,
57    DVF_DETACH_SHUTDOWN);
58
59static void
60virtio_set_status(struct virtio_softc *sc, int status)
61{
62	int old = 0;
63
64	if (status != 0)
65		old = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
66				       VIRTIO_CONFIG_DEVICE_STATUS);
67	bus_space_write_1(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_DEVICE_STATUS,
68			  status|old);
69}
70
71#define virtio_device_reset(sc)	virtio_set_status((sc), 0)
72
73static int
74virtio_match(device_t parent, cfdata_t match, void *aux)
75{
76	struct pci_attach_args *pa;
77
78	pa = (struct pci_attach_args *)aux;
79	switch (PCI_VENDOR(pa->pa_id)) {
80	case PCI_VENDOR_QUMRANET:
81		if ((0x1000 <= PCI_PRODUCT(pa->pa_id)) &&
82		    (PCI_PRODUCT(pa->pa_id) <= 0x103f))
83			return 1;
84		break;
85	}
86
87	return 0;
88}
89
90static const char *virtio_device_name[] = {
91	"Unknown (0)",		/* 0 */
92	"Network",		/* 1 */
93	"Block",		/* 2 */
94	"Console",		/* 3 */
95	"Entropy",		/* 4 */
96	"Memory Balloon",	/* 5 */
97	"Unknown (6)",		/* 6 */
98	"Unknown (7)",		/* 7 */
99	"Unknown (8)",		/* 8 */
100	"9P Transport"		/* 9 */
101};
102#define NDEVNAMES	(sizeof(virtio_device_name)/sizeof(char*))
103
104static void
105virtio_attach(device_t parent, device_t self, void *aux)
106{
107	struct virtio_softc *sc = device_private(self);
108	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
109	pci_chipset_tag_t pc = pa->pa_pc;
110	pcitag_t tag = pa->pa_tag;
111	int revision;
112	pcireg_t id;
113	char const *intrstr;
114	pci_intr_handle_t ih;
115
116	revision = PCI_REVISION(pa->pa_class);
117	if (revision != 0) {
118		aprint_normal(": unknown revision 0x%02x; giving up\n",
119			      revision);
120		return;
121	}
122	aprint_normal("\n");
123	aprint_naive("\n");
124
125	/* subsystem ID shows what I am */
126	id = pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG);
127	aprint_normal_dev(self, "Virtio %s Device (rev. 0x%02x)\n",
128			  (PCI_PRODUCT(id) < NDEVNAMES?
129			   virtio_device_name[PCI_PRODUCT(id)] : "Unknown"),
130			  revision);
131
132	sc->sc_dev = self;
133	sc->sc_pc = pc;
134	sc->sc_tag = tag;
135	sc->sc_iot = pa->pa_iot;
136	sc->sc_dmat = pa->pa_dmat;
137	sc->sc_config_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
138
139	if (pci_mapreg_map(pa, PCI_MAPREG_START, PCI_MAPREG_TYPE_IO, 0,
140			   &sc->sc_iot, &sc->sc_ioh, NULL, &sc->sc_iosize)) {
141		aprint_error_dev(self, "can't map i/o space\n");
142		return;
143	}
144
145	virtio_device_reset(sc);
146	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
147	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
148
149	/* XXX: use softc as aux... */
150	sc->sc_childdevid = PCI_PRODUCT(id);
151	sc->sc_child = NULL;
152	config_found(self, sc, NULL);
153	if (sc->sc_child == NULL) {
154		aprint_error_dev(self,
155				 "no matching child driver; not configured\n");
156		return;
157	}
158	if (sc->sc_child == (void*)1) { /* this shows error */
159		aprint_error_dev(self,
160				 "virtio configuration failed\n");
161		virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
162		return;
163	}
164
165	if (pci_intr_map(pa, &ih)) {
166		aprint_error_dev(self, "couldn't map interrupt\n");
167		virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
168		return;
169	}
170	intrstr = pci_intr_string(pc, ih);
171	sc->sc_ih = pci_intr_establish(pc, ih, sc->sc_ipl, virtio_intr, sc);
172	if (sc->sc_ih == NULL) {
173		aprint_error_dev(self, "couldn't establish interrupt");
174		if (intrstr != NULL)
175			aprint_error(" at %s", intrstr);
176		aprint_error("\n");
177		virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
178		return;
179	}
180	aprint_normal_dev(self, "interrupting at %s\n", intrstr);
181
182	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
183
184	return;
185}
186
187static int
188virtio_detach(device_t self, int flags)
189{
190	struct virtio_softc *sc = device_private(self);
191	int r;
192
193	if (sc->sc_child != 0 && sc->sc_child != (void*)1) {
194		r = config_detach(sc->sc_child, flags);
195		if (r)
196			return r;
197	}
198	KASSERT(sc->sc_child == 0 || sc->sc_child == (void*)1);
199	KASSERT(sc->sc_vqs == 0);
200	pci_intr_disestablish(sc->sc_pc, sc->sc_ih);
201	sc->sc_ih = 0;
202	if (sc->sc_iosize)
203		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
204	sc->sc_iosize = 0;
205
206	return 0;
207}
208
209/*
210 * Reset the device.
211 */
212/*
213 * To reset the device to a known state, do following:
214 *	virtio_reset(sc);	     // this will stop the device activity
215 *	<dequeue finished requests>; // virtio_dequeue() still can be called
216 *	<revoke pending requests in the vqs if any>;
217 *	virtio_reinit_begin(sc);     // dequeue prohibitted
218 *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
219 *	<some other initialization>;
220 *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
221 * Once attached, feature negotiation can only be allowed after virtio_reset.
222 */
223void
224virtio_reset(struct virtio_softc *sc)
225{
226	virtio_device_reset(sc);
227}
228
229void
230virtio_reinit_start(struct virtio_softc *sc)
231{
232	int i;
233
234	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
235	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
236	for (i = 0; i < sc->sc_nvqs; i++) {
237		int n;
238		struct virtqueue *vq = &sc->sc_vqs[i];
239		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
240				  VIRTIO_CONFIG_QUEUE_SELECT,
241				  vq->vq_index);
242		n = bus_space_read_2(sc->sc_iot, sc->sc_ioh,
243				     VIRTIO_CONFIG_QUEUE_SIZE);
244		if (n == 0)	/* vq disappeared */
245			continue;
246		if (n != vq->vq_num) {
247			panic("%s: virtqueue size changed, vq index %d\n",
248			      device_xname(sc->sc_dev),
249			      vq->vq_index);
250		}
251		virtio_init_vq(sc, vq, true);
252		bus_space_write_4(sc->sc_iot, sc->sc_ioh,
253				  VIRTIO_CONFIG_QUEUE_ADDRESS,
254				  (vq->vq_dmamap->dm_segs[0].ds_addr
255				   / VIRTIO_PAGE_SIZE));
256	}
257}
258
259void
260virtio_reinit_end(struct virtio_softc *sc)
261{
262	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
263}
264
265/*
266 * Feature negotiation.
267 */
268uint32_t
269virtio_negotiate_features(struct virtio_softc *sc, uint32_t guest_features)
270{
271	uint32_t r;
272
273	if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
274	    !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
275		guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
276	r = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
277			     VIRTIO_CONFIG_DEVICE_FEATURES);
278	r &= guest_features;
279	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
280			  VIRTIO_CONFIG_GUEST_FEATURES, r);
281	sc->sc_features = r;
282	if (r & VIRTIO_F_RING_INDIRECT_DESC)
283		sc->sc_indirect = true;
284	else
285		sc->sc_indirect = false;
286
287	return r;
288}
289
290/*
291 * Device configuration registers.
292 */
293uint8_t
294virtio_read_device_config_1(struct virtio_softc *sc, int index)
295{
296	return bus_space_read_1(sc->sc_iot, sc->sc_ioh,
297				sc->sc_config_offset + index);
298}
299
300uint16_t
301virtio_read_device_config_2(struct virtio_softc *sc, int index)
302{
303	return bus_space_read_2(sc->sc_iot, sc->sc_ioh,
304				sc->sc_config_offset + index);
305}
306
307uint32_t
308virtio_read_device_config_4(struct virtio_softc *sc, int index)
309{
310	return bus_space_read_4(sc->sc_iot, sc->sc_ioh,
311				sc->sc_config_offset + index);
312}
313
314uint64_t
315virtio_read_device_config_8(struct virtio_softc *sc, int index)
316{
317	uint64_t r;
318
319	r = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
320			     sc->sc_config_offset + index + sizeof(uint32_t));
321	r <<= 32;
322	r += bus_space_read_4(sc->sc_iot, sc->sc_ioh,
323			      sc->sc_config_offset + index);
324	return r;
325}
326
327void
328virtio_write_device_config_1(struct virtio_softc *sc,
329			     int index, uint8_t value)
330{
331	bus_space_write_1(sc->sc_iot, sc->sc_ioh,
332			  sc->sc_config_offset + index, value);
333}
334
335void
336virtio_write_device_config_2(struct virtio_softc *sc,
337			     int index, uint16_t value)
338{
339	bus_space_write_2(sc->sc_iot, sc->sc_ioh,
340			  sc->sc_config_offset + index, value);
341}
342
343void
344virtio_write_device_config_4(struct virtio_softc *sc,
345			     int index, uint32_t value)
346{
347	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
348			  sc->sc_config_offset + index, value);
349}
350
351void
352virtio_write_device_config_8(struct virtio_softc *sc,
353			     int index, uint64_t value)
354{
355	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
356			  sc->sc_config_offset + index,
357			  value & 0xffffffff);
358	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
359			  sc->sc_config_offset + index + sizeof(uint32_t),
360			  value >> 32);
361}
362
363/*
364 * Interrupt handler.
365 */
366static int
367virtio_intr(void *arg)
368{
369	struct virtio_softc *sc = arg;
370	int isr, r = 0;
371
372	/* check and ack the interrupt */
373	isr = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
374			       VIRTIO_CONFIG_ISR_STATUS);
375	if (isr == 0)
376		return 0;
377	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
378	    (sc->sc_config_change != NULL))
379		r = (sc->sc_config_change)(sc);
380	if (sc->sc_intrhand != NULL)
381		r |= (sc->sc_intrhand)(sc);
382
383	return r;
384}
385
386/*
387 * dmamap sync operations for a virtqueue.
388 */
389static inline void
390vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
391{
392	/* availoffset == sizeof(vring_desc)*vq_num */
393	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
394			ops);
395}
396
397static inline void
398vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
399{
400	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
401			vq->vq_availoffset,
402			offsetof(struct vring_avail, ring)
403			 + vq->vq_num * sizeof(uint16_t),
404			ops);
405}
406
407static inline void
408vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
409{
410	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
411			vq->vq_usedoffset,
412			offsetof(struct vring_used, ring)
413			 + vq->vq_num * sizeof(struct vring_used_elem),
414			ops);
415}
416
417static inline void
418vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
419		     int ops)
420{
421	int offset = vq->vq_indirectoffset
422		      + sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
423
424	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
425			offset, sizeof(struct vring_desc) * vq->vq_maxnsegs,
426			ops);
427}
428
429/*
430 * Can be used as sc_intrhand.
431 */
432/*
433 * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
434 * and calls (*vq_done)() if some entries are consumed.
435 */
436int
437virtio_vq_intr(struct virtio_softc *sc)
438{
439	struct virtqueue *vq;
440	int i, r = 0;
441
442	for (i = 0; i < sc->sc_nvqs; i++) {
443		vq = &sc->sc_vqs[i];
444		if (vq->vq_queued) {
445			vq->vq_queued = 0;
446			vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
447		}
448		vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
449		membar_consumer();
450		if (vq->vq_used_idx != vq->vq_used->idx) {
451			if (vq->vq_done)
452				r |= (vq->vq_done)(vq);
453		}
454	}
455
456
457	return r;
458}
459
460/*
461 * Start/stop vq interrupt.  No guarantee.
462 */
463void
464virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
465{
466	vq->vq_avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
467	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
468	vq->vq_queued++;
469}
470
471void
472virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
473{
474	vq->vq_avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
475	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
476	vq->vq_queued++;
477}
478
479/*
480 * Initialize vq structure.
481 */
482static void
483virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq, const bool reinit)
484{
485	int i, j;
486	int vq_size = vq->vq_num;
487
488	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
489
490	/* build the indirect descriptor chain */
491	if (vq->vq_indirect != NULL) {
492		struct vring_desc *vd;
493
494		for (i = 0; i < vq_size; i++) {
495			vd = vq->vq_indirect;
496			vd += vq->vq_maxnsegs * i;
497			for (j = 0; j < vq->vq_maxnsegs-1; j++)
498				vd[j].next = j + 1;
499		}
500	}
501
502	/* free slot management */
503	SIMPLEQ_INIT(&vq->vq_freelist);
504	for (i = 0; i < vq_size; i++) {
505		SIMPLEQ_INSERT_TAIL(&vq->vq_freelist,
506				    &vq->vq_entries[i], qe_list);
507		vq->vq_entries[i].qe_index = i;
508	}
509	if (!reinit)
510		mutex_init(&vq->vq_freelist_lock, MUTEX_SPIN, sc->sc_ipl);
511
512	/* enqueue/dequeue status */
513	vq->vq_avail_idx = 0;
514	vq->vq_used_idx = 0;
515	vq->vq_queued = 0;
516	if (!reinit) {
517		mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
518		mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
519	}
520	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
521	vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
522	vq->vq_queued++;
523}
524
525/*
526 * Allocate/free a vq.
527 */
528int
529virtio_alloc_vq(struct virtio_softc *sc,
530		struct virtqueue *vq, int index, int maxsegsize, int maxnsegs,
531		const char *name)
532{
533	int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
534	int rsegs, r;
535#define VIRTQUEUE_ALIGN(n)	(((n)+(VIRTIO_PAGE_SIZE-1))&	\
536				 ~(VIRTIO_PAGE_SIZE-1))
537
538	memset(vq, 0, sizeof(*vq));
539
540	bus_space_write_2(sc->sc_iot, sc->sc_ioh,
541			  VIRTIO_CONFIG_QUEUE_SELECT, index);
542	vq_size = bus_space_read_2(sc->sc_iot, sc->sc_ioh,
543				   VIRTIO_CONFIG_QUEUE_SIZE);
544	if (vq_size == 0) {
545		aprint_error_dev(sc->sc_dev,
546				 "virtqueue not exist, index %d for %s\n",
547				 index, name);
548		goto err;
549	}
550	/* allocsize1: descriptor table + avail ring + pad */
551	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc)*vq_size
552				     + sizeof(uint16_t)*(2+vq_size));
553	/* allocsize2: used ring + pad */
554	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t)*2
555				     + sizeof(struct vring_used_elem)*vq_size);
556	/* allocsize3: indirect table */
557	if (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT)
558		allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
559	else
560		allocsize3 = 0;
561	allocsize = allocsize1 + allocsize2 + allocsize3;
562
563	/* alloc and map the memory */
564	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
565			     &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
566	if (r != 0) {
567		aprint_error_dev(sc->sc_dev,
568				 "virtqueue %d for %s allocation failed, "
569				 "error code %d\n", index, name, r);
570		goto err;
571	}
572	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], 1, allocsize,
573			   &vq->vq_vaddr, BUS_DMA_NOWAIT);
574	if (r != 0) {
575		aprint_error_dev(sc->sc_dev,
576				 "virtqueue %d for %s map failed, "
577				 "error code %d\n", index, name, r);
578		goto err;
579	}
580	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
581			      BUS_DMA_NOWAIT, &vq->vq_dmamap);
582	if (r != 0) {
583		aprint_error_dev(sc->sc_dev,
584				 "virtqueue %d for %s dmamap creation failed, "
585				 "error code %d\n", index, name, r);
586		goto err;
587	}
588	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
589			    vq->vq_vaddr, allocsize, NULL, BUS_DMA_NOWAIT);
590	if (r != 0) {
591		aprint_error_dev(sc->sc_dev,
592				 "virtqueue %d for %s dmamap load failed, "
593				 "error code %d\n", index, name, r);
594		goto err;
595	}
596
597	/* set the vq address */
598	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
599			  VIRTIO_CONFIG_QUEUE_ADDRESS,
600			  (vq->vq_dmamap->dm_segs[0].ds_addr
601			   / VIRTIO_PAGE_SIZE));
602
603	/* remember addresses and offsets for later use */
604	vq->vq_owner = sc;
605	vq->vq_num = vq_size;
606	vq->vq_index = index;
607	vq->vq_desc = vq->vq_vaddr;
608	vq->vq_availoffset = sizeof(struct vring_desc)*vq_size;
609	vq->vq_avail = (void*)(((char*)vq->vq_desc) + vq->vq_availoffset);
610	vq->vq_usedoffset = allocsize1;
611	vq->vq_used = (void*)(((char*)vq->vq_desc) + vq->vq_usedoffset);
612	if (allocsize3 > 0) {
613		vq->vq_indirectoffset = allocsize1 + allocsize2;
614		vq->vq_indirect = (void*)(((char*)vq->vq_desc)
615					  + vq->vq_indirectoffset);
616	}
617	vq->vq_bytesize = allocsize;
618	vq->vq_maxsegsize = maxsegsize;
619	vq->vq_maxnsegs = maxnsegs;
620
621	/* free slot management */
622	vq->vq_entries = kmem_zalloc(sizeof(struct vq_entry)*vq_size,
623				     KM_NOSLEEP);
624	if (vq->vq_entries == NULL) {
625		r = ENOMEM;
626		goto err;
627	}
628
629	virtio_init_vq(sc, vq, false);
630
631	aprint_verbose_dev(sc->sc_dev,
632			   "allocated %u byte for virtqueue %d for %s, "
633			   "size %d\n", allocsize, index, name, vq_size);
634	if (allocsize3 > 0)
635		aprint_verbose_dev(sc->sc_dev,
636				   "using %d byte (%d entries) "
637				   "indirect descriptors\n",
638				   allocsize3, maxnsegs * vq_size);
639	return 0;
640
641err:
642	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
643			  VIRTIO_CONFIG_QUEUE_ADDRESS, 0);
644	if (vq->vq_dmamap)
645		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
646	if (vq->vq_vaddr)
647		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
648	if (vq->vq_segs[0].ds_addr)
649		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
650	memset(vq, 0, sizeof(*vq));
651
652	return -1;
653}
654
655int
656virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
657{
658	struct vq_entry *qe;
659	int i = 0;
660
661	/* device must be already deactivated */
662	/* confirm the vq is empty */
663	SIMPLEQ_FOREACH(qe, &vq->vq_freelist, qe_list) {
664		i++;
665	}
666	if (i != vq->vq_num) {
667		printf("%s: freeing non-empty vq, index %d\n",
668		       device_xname(sc->sc_dev), vq->vq_index);
669		return EBUSY;
670	}
671
672	/* tell device that there's no virtqueue any longer */
673	bus_space_write_2(sc->sc_iot, sc->sc_ioh,
674			  VIRTIO_CONFIG_QUEUE_SELECT, vq->vq_index);
675	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
676			  VIRTIO_CONFIG_QUEUE_ADDRESS, 0);
677
678	kmem_free(vq->vq_entries, vq->vq_bytesize);
679	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
680	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
681	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
682	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
683	mutex_destroy(&vq->vq_freelist_lock);
684	mutex_destroy(&vq->vq_uring_lock);
685	mutex_destroy(&vq->vq_aring_lock);
686	memset(vq, 0, sizeof(*vq));
687
688	return 0;
689}
690
691/*
692 * Free descriptor management.
693 */
694static struct vq_entry *
695vq_alloc_entry(struct virtqueue *vq)
696{
697	struct vq_entry *qe;
698
699	mutex_enter(&vq->vq_freelist_lock);
700	if (SIMPLEQ_EMPTY(&vq->vq_freelist)) {
701		mutex_exit(&vq->vq_freelist_lock);
702		return NULL;
703	}
704	qe = SIMPLEQ_FIRST(&vq->vq_freelist);
705	SIMPLEQ_REMOVE_HEAD(&vq->vq_freelist, qe_list);
706	mutex_exit(&vq->vq_freelist_lock);
707
708	return qe;
709}
710
711static void
712vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
713{
714	mutex_enter(&vq->vq_freelist_lock);
715	SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
716	mutex_exit(&vq->vq_freelist_lock);
717
718	return;
719}
720
721/*
722 * Enqueue several dmamaps as a single request.
723 */
724/*
725 * Typical usage:
726 *  <queue size> number of followings are stored in arrays
727 *  - command blocks (in dmamem) should be pre-allocated and mapped
728 *  - dmamaps for command blocks should be pre-allocated and loaded
729 *  - dmamaps for payload should be pre-allocated
730 *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
731 *	if (r)		// currently 0 or EAGAIN
732 *	  return r;
733 *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
734 *	if (r) {
735 *	  virtio_enqueue_abort(sc, vq, slot);
736 *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
737 *	  return r;
738 *	}
739 *	r = virtio_enqueue_reserve(sc, vq, slot,
740 *				   dmamap_payload[slot]->dm_nsegs+1);
741 *							// ^ +1 for command
742 *	if (r) {	// currently 0 or EAGAIN
743 *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
744 *	  return r;					// do not call abort()
745 *	}
746 *	<setup and prepare commands>
747 *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
748 *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
749 *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
750 *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
751 *	virtio_enqueue_commit(sc, vq, slot, true);
752 */
753
754/*
755 * enqueue_prep: allocate a slot number
756 */
757int
758virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
759{
760	struct vq_entry *qe1;
761
762	KASSERT(slotp != NULL);
763
764	qe1 = vq_alloc_entry(vq);
765	if (qe1 == NULL)
766		return EAGAIN;
767	/* next slot is not allocated yet */
768	qe1->qe_next = -1;
769	*slotp = qe1->qe_index;
770
771	return 0;
772}
773
774/*
775 * enqueue_reserve: allocate remaining slots and build the descriptor chain.
776 */
777int
778virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
779		       int slot, int nsegs)
780{
781	int indirect;
782	struct vq_entry *qe1 = &vq->vq_entries[slot];
783
784	KASSERT(qe1->qe_next == -1);
785	KASSERT(1 <= nsegs && nsegs <= vq->vq_num);
786
787	if ((vq->vq_indirect != NULL) &&
788	    (nsegs >= MINSEG_INDIRECT) &&
789	    (nsegs <= vq->vq_maxnsegs))
790		indirect = 1;
791	else
792		indirect = 0;
793	qe1->qe_indirect = indirect;
794
795	if (indirect) {
796		struct vring_desc *vd;
797		int i;
798
799		vd = &vq->vq_desc[qe1->qe_index];
800		vd->addr = vq->vq_dmamap->dm_segs[0].ds_addr
801			+ vq->vq_indirectoffset;
802		vd->addr += sizeof(struct vring_desc)
803			* vq->vq_maxnsegs * qe1->qe_index;
804		vd->len = sizeof(struct vring_desc) * nsegs;
805		vd->flags = VRING_DESC_F_INDIRECT;
806
807		vd = vq->vq_indirect;
808		vd += vq->vq_maxnsegs * qe1->qe_index;
809		qe1->qe_desc_base = vd;
810
811		for (i = 0; i < nsegs-1; i++) {
812			vd[i].flags = VRING_DESC_F_NEXT;
813		}
814		vd[i].flags = 0;
815		qe1->qe_next = 0;
816
817		return 0;
818	} else {
819		struct vring_desc *vd;
820		struct vq_entry *qe;
821		int i, s;
822
823		vd = &vq->vq_desc[0];
824		qe1->qe_desc_base = vd;
825		qe1->qe_next = qe1->qe_index;
826		s = slot;
827		for (i = 0; i < nsegs - 1; i++) {
828			qe = vq_alloc_entry(vq);
829			if (qe == NULL) {
830				vd[s].flags = 0;
831				virtio_enqueue_abort(sc, vq, slot);
832				return EAGAIN;
833			}
834			vd[s].flags = VRING_DESC_F_NEXT;
835			vd[s].next = qe->qe_index;
836			s = qe->qe_index;
837		}
838		vd[s].flags = 0;
839
840		return 0;
841	}
842}
843
844/*
845 * enqueue: enqueue a single dmamap.
846 */
847int
848virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
849	       bus_dmamap_t dmamap, bool write)
850{
851	struct vq_entry *qe1 = &vq->vq_entries[slot];
852	struct vring_desc *vd = qe1->qe_desc_base;
853	int i;
854	int s = qe1->qe_next;
855
856	KASSERT(s >= 0);
857	KASSERT(dmamap->dm_nsegs > 0);
858
859	for (i = 0; i < dmamap->dm_nsegs; i++) {
860		vd[s].addr = dmamap->dm_segs[i].ds_addr;
861		vd[s].len = dmamap->dm_segs[i].ds_len;
862		if (!write)
863			vd[s].flags |= VRING_DESC_F_WRITE;
864		s = vd[s].next;
865	}
866	qe1->qe_next = s;
867
868	return 0;
869}
870
871int
872virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
873		 bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
874		 bool write)
875{
876	struct vq_entry *qe1 = &vq->vq_entries[slot];
877	struct vring_desc *vd = qe1->qe_desc_base;
878	int s = qe1->qe_next;
879
880	KASSERT(s >= 0);
881	KASSERT(dmamap->dm_nsegs == 1); /* XXX */
882	KASSERT((dmamap->dm_segs[0].ds_len > start) &&
883		(dmamap->dm_segs[0].ds_len >= start + len));
884
885	vd[s].addr = dmamap->dm_segs[0].ds_addr + start;
886	vd[s].len = len;
887	if (!write)
888		vd[s].flags |= VRING_DESC_F_WRITE;
889	qe1->qe_next = vd[s].next;
890
891	return 0;
892}
893
894/*
895 * enqueue_commit: add it to the aring.
896 */
897int
898virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
899		      bool notifynow)
900{
901	struct vq_entry *qe1;
902
903	if (slot < 0) {
904		mutex_enter(&vq->vq_aring_lock);
905		goto notify;
906	}
907	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
908	qe1 = &vq->vq_entries[slot];
909	if (qe1->qe_indirect)
910		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
911	mutex_enter(&vq->vq_aring_lock);
912	vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] = slot;
913
914notify:
915	if (notifynow) {
916		vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
917		vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
918		membar_producer();
919		vq->vq_avail->idx = vq->vq_avail_idx;
920		vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
921		membar_producer();
922		vq->vq_queued++;
923		vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
924		membar_consumer();
925		if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY))
926			bus_space_write_2(sc->sc_iot, sc->sc_ioh,
927					  VIRTIO_CONFIG_QUEUE_NOTIFY,
928					  vq->vq_index);
929	}
930	mutex_exit(&vq->vq_aring_lock);
931
932	return 0;
933}
934
935/*
936 * enqueue_abort: rollback.
937 */
938int
939virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
940{
941	struct vq_entry *qe = &vq->vq_entries[slot];
942	struct vring_desc *vd;
943	int s;
944
945	if (qe->qe_next < 0) {
946		vq_free_entry(vq, qe);
947		return 0;
948	}
949
950	s = slot;
951	vd = &vq->vq_desc[0];
952	while (vd[s].flags & VRING_DESC_F_NEXT) {
953		s = vd[s].next;
954		vq_free_entry(vq, qe);
955		qe = &vq->vq_entries[s];
956	}
957	vq_free_entry(vq, qe);
958	return 0;
959}
960
961/*
962 * Dequeue a request.
963 */
964/*
965 * dequeue: dequeue a request from uring; dmamap_sync for uring is
966 *	    already done in the interrupt handler.
967 */
968int
969virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
970	       int *slotp, int *lenp)
971{
972	uint16_t slot, usedidx;
973	struct vq_entry *qe;
974
975	if (vq->vq_used_idx == vq->vq_used->idx)
976		return ENOENT;
977	mutex_enter(&vq->vq_uring_lock);
978	usedidx = vq->vq_used_idx++;
979	mutex_exit(&vq->vq_uring_lock);
980	usedidx %= vq->vq_num;
981	slot = vq->vq_used->ring[usedidx].id;
982	qe = &vq->vq_entries[slot];
983
984	if (qe->qe_indirect)
985		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
986
987	if (slotp)
988		*slotp = slot;
989	if (lenp)
990		*lenp = vq->vq_used->ring[usedidx].len;
991
992	return 0;
993}
994
995/*
996 * dequeue_commit: complete dequeue; the slot is recycled for future use.
997 *                 if you forget to call this the slot will be leaked.
998 */
999int
1000virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1001{
1002	struct vq_entry *qe = &vq->vq_entries[slot];
1003	struct vring_desc *vd = &vq->vq_desc[0];
1004	int s = slot;
1005
1006	while (vd[s].flags & VRING_DESC_F_NEXT) {
1007		s = vd[s].next;
1008		vq_free_entry(vq, qe);
1009		qe = &vq->vq_entries[s];
1010	}
1011	vq_free_entry(vq, qe);
1012
1013	return 0;
1014}
1015