virtqueue.c revision 252702
156043Syokota/*-
256043Syokota * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
356043Syokota * All rights reserved.
456043Syokota *
556043Syokota * Redistribution and use in source and binary forms, with or without
656043Syokota * modification, are permitted provided that the following conditions
756043Syokota * are met:
856043Syokota * 1. Redistributions of source code must retain the above copyright
956043Syokota *    notice unmodified, this list of conditions, and the following
1056043Syokota *    disclaimer.
1156043Syokota * 2. Redistributions in binary form must reproduce the above copyright
1256043Syokota *    notice, this list of conditions and the following disclaimer in the
1356043Syokota *    documentation and/or other materials provided with the distribution.
1456043Syokota *
1556043Syokota * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1656043Syokota * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1756043Syokota * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1856043Syokota * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
1956043Syokota * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2056043Syokota * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2156043Syokota * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2256043Syokota * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2356043Syokota * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2456043Syokota * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2556043Syokota */
2656043Syokota
2756043Syokota/*
28119420Sobrien * Implements the virtqueue interface as basically described
29119420Sobrien * in the original VirtIO paper.
30119420Sobrien */
3156043Syokota
3256043Syokota#include <sys/cdefs.h>
3356043Syokota__FBSDID("$FreeBSD: head/sys/dev/virtio/virtqueue.c 252702 2013-07-04 17:50:11Z bryanv $");
3456043Syokota
3556043Syokota#include <sys/param.h>
3656043Syokota#include <sys/systm.h>
3756043Syokota#include <sys/kernel.h>
3866834Sphk#include <sys/malloc.h>
3966860Sphk#include <sys/sglist.h>
4056043Syokota#include <vm/vm.h>
4156043Syokota#include <vm/pmap.h>
4256043Syokota
4356043Syokota#include <machine/cpu.h>
4456043Syokota#include <machine/bus.h>
4556043Syokota#include <machine/atomic.h>
4656043Syokota#include <machine/resource.h>
4756043Syokota#include <sys/bus.h>
4856043Syokota#include <sys/rman.h>
4956043Syokota
5056043Syokota#include <dev/virtio/virtio.h>
5156043Syokota#include <dev/virtio/virtqueue.h>
52126080Sphk#include <dev/virtio/virtio_ring.h>
53111815Sphk
54111815Sphk#include "virtio_bus_if.h"
55111815Sphk
56111815Sphkstruct virtqueue {
57126080Sphk	device_t		 vq_dev;
5856043Syokota	char			 vq_name[VIRTQUEUE_MAX_NAME_SZ];
5956043Syokota	uint16_t		 vq_queue_index;
6056043Syokota	uint16_t		 vq_nentries;
6156043Syokota	uint32_t		 vq_flags;
6256043Syokota#define	VIRTQUEUE_FLAG_INDIRECT	 0x0001
6356043Syokota#define	VIRTQUEUE_FLAG_EVENT_IDX 0x0002
6456043Syokota
6556043Syokota	int			 vq_alignment;
6656043Syokota	int			 vq_ring_size;
6756043Syokota	void			*vq_ring_mem;
6856043Syokota	int			 vq_max_indirect_size;
69130585Sphk	int			 vq_indirect_mem_size;
7056043Syokota	virtqueue_intr_t	*vq_intrhand;
7156043Syokota	void			*vq_intrhand_arg;
7256043Syokota
7356043Syokota	struct vring		 vq_ring;
7456043Syokota	uint16_t		 vq_free_cnt;
7556043Syokota	uint16_t		 vq_queued_cnt;
7656043Syokota	/*
7756043Syokota	 * Head of the free chain in the descriptor table. If
7856043Syokota	 * there are no free descriptors, this will be set to
7956043Syokota	 * VQ_RING_DESC_CHAIN_END.
8056043Syokota	 */
81136505Sphk	uint16_t		 vq_desc_head_idx;
8256043Syokota	/*
8356043Syokota	 * Last consumed descriptor in the used table,
8456043Syokota	 * trails vq_ring.used->idx.
8556043Syokota	 */
8656043Syokota	uint16_t		 vq_used_cons_idx;
8756043Syokota
8856043Syokota	struct vq_desc_extra {
8956043Syokota		void		  *cookie;
90130077Sphk		struct vring_desc *indirect;
9193593Sjhb		vm_paddr_t	   indirect_paddr;
9256043Syokota		uint16_t	   ndescs;
9356043Syokota	} vq_descx[0];
9456043Syokota};
95130077Sphk
9656043Syokota/*
9756043Syokota * The maximum virtqueue size is 2^15. Use that value as the end of
9856043Syokota * descriptor chain terminator since it will never be a valid index
99130585Sphk * in the descriptor table. This is used to verify we are correctly
10056043Syokota * handling vq_free_cnt.
10156043Syokota */
10256043Syokota#define VQ_RING_DESC_CHAIN_END 32768
10356043Syokota
10456043Syokota#define VQASSERT(_vq, _exp, _msg, ...)				\
10556043Syokota    KASSERT((_exp),("%s: %s - "_msg, __func__, (_vq)->vq_name,	\
10656043Syokota	##__VA_ARGS__))
107130077Sphk
108132226Sphk#define VQ_RING_ASSERT_VALID_IDX(_vq, _idx)			\
10956043Syokota    VQASSERT((_vq), (_idx) < (_vq)->vq_nentries,		\
11056043Syokota	"invalid ring index: %d, max: %d", (_idx),		\
11156043Syokota	(_vq)->vq_nentries)
11256043Syokota
11356043Syokota#define VQ_RING_ASSERT_CHAIN_TERM(_vq)				\
11456043Syokota    VQASSERT((_vq), (_vq)->vq_desc_head_idx ==			\
11556043Syokota	VQ_RING_DESC_CHAIN_END,	"full ring terminated "		\
11656043Syokota	"incorrectly: head idx: %d", (_vq)->vq_desc_head_idx)
11756043Syokota
11856043Syokotastatic int	virtqueue_init_indirect(struct virtqueue *vq, int);
11956043Syokotastatic void	virtqueue_free_indirect(struct virtqueue *vq);
12056043Syokotastatic void	virtqueue_init_indirect_list(struct virtqueue *,
12156043Syokota		    struct vring_desc *);
12256043Syokota
12356043Syokotastatic void	vq_ring_init(struct virtqueue *);
12456043Syokotastatic void	vq_ring_update_avail(struct virtqueue *, uint16_t);
12556043Syokotastatic uint16_t	vq_ring_enqueue_segments(struct virtqueue *,
12656043Syokota		    struct vring_desc *, uint16_t, struct sglist *, int, int);
12756043Syokotastatic int	vq_ring_use_indirect(struct virtqueue *, int);
12856043Syokotastatic void	vq_ring_enqueue_indirect(struct virtqueue *, void *,
12956043Syokota		    struct sglist *, int, int);
13056043Syokotastatic int 	vq_ring_enable_interrupt(struct virtqueue *, uint16_t);
13156043Syokotastatic int	vq_ring_must_notify_host(struct virtqueue *);
13256043Syokotastatic void	vq_ring_notify_host(struct virtqueue *);
13356043Syokotastatic void	vq_ring_free_chain(struct virtqueue *, uint16_t);
13456043Syokota
13556043Syokotauint64_t
13656043Syokotavirtqueue_filter_features(uint64_t features)
13756043Syokota{
13856043Syokota	uint64_t mask;
13956043Syokota
14056043Syokota	mask = (1 << VIRTIO_TRANSPORT_F_START) - 1;
14156043Syokota	mask |= VIRTIO_RING_F_INDIRECT_DESC;
14256043Syokota	mask |= VIRTIO_RING_F_EVENT_IDX;
143130585Sphk
14456043Syokota	return (features & mask);
14556043Syokota}
14656043Syokota
14756043Syokotaint
14856043Syokotavirtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, int align,
14956043Syokota    vm_paddr_t highaddr, struct vq_alloc_info *info, struct virtqueue **vqp)
15056043Syokota{
15156043Syokota	struct virtqueue *vq;
15256043Syokota	int error;
15356043Syokota
15456043Syokota	*vqp = NULL;
15556043Syokota	error = 0;
15656043Syokota
15756043Syokota	if (size == 0) {
15856043Syokota		device_printf(dev,
15956043Syokota		    "virtqueue %d (%s) does not exist (size is zero)\n",
16056043Syokota		    queue, info->vqai_name);
16156043Syokota		return (ENODEV);
16256043Syokota	} else if (!powerof2(size)) {
16356043Syokota		device_printf(dev,
16456043Syokota		    "virtqueue %d (%s) size is not a power of 2: %d\n",
16556043Syokota		    queue, info->vqai_name, size);
16656043Syokota		return (ENXIO);
16756043Syokota	} else if (info->vqai_maxindirsz > VIRTIO_MAX_INDIRECT) {
16856043Syokota		device_printf(dev, "virtqueue %d (%s) requested too many "
16956043Syokota		    "indirect descriptors: %d, max %d\n",
17056043Syokota		    queue, info->vqai_name, info->vqai_maxindirsz,
17156043Syokota		    VIRTIO_MAX_INDIRECT);
17256043Syokota		return (EINVAL);
17356043Syokota	}
17456043Syokota
17556043Syokota	vq = malloc(sizeof(struct virtqueue) +
17656043Syokota	    size * sizeof(struct vq_desc_extra), M_DEVBUF, M_NOWAIT | M_ZERO);
17756043Syokota	if (vq == NULL) {
17856043Syokota		device_printf(dev, "cannot allocate virtqueue\n");
17956043Syokota		return (ENOMEM);
18056043Syokota	}
18156043Syokota
18256043Syokota	vq->vq_dev = dev;
18356043Syokota	strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name));
18456043Syokota	vq->vq_queue_index = queue;
18556043Syokota	vq->vq_alignment = align;
18656043Syokota	vq->vq_nentries = size;
18756043Syokota	vq->vq_free_cnt = size;
18856043Syokota	vq->vq_intrhand = info->vqai_intr;
18956043Syokota	vq->vq_intrhand_arg = info->vqai_intr_arg;
19065129Syokota
19165129Syokota	if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0)
19265129Syokota		vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX;
19356043Syokota
19465129Syokota	if (info->vqai_maxindirsz > 1) {
19565129Syokota		error = virtqueue_init_indirect(vq, info->vqai_maxindirsz);
19656043Syokota		if (error)
19756043Syokota			goto fail;
19856043Syokota	}
19956043Syokota
20056043Syokota	vq->vq_ring_size = round_page(vring_size(size, align));
20156043Syokota	vq->vq_ring_mem = contigmalloc(vq->vq_ring_size, M_DEVBUF,
20256043Syokota	    M_NOWAIT | M_ZERO, 0, highaddr, PAGE_SIZE, 0);
20356043Syokota	if (vq->vq_ring_mem == NULL) {
20456043Syokota		device_printf(dev,
20556043Syokota		    "cannot allocate memory for virtqueue ring\n");
20656043Syokota		error = ENOMEM;
20756043Syokota		goto fail;
20856043Syokota	}
20956043Syokota
21056043Syokota	vq_ring_init(vq);
21156043Syokota	virtqueue_disable_intr(vq);
21256043Syokota
21356043Syokota	*vqp = vq;
21456043Syokota
21556043Syokotafail:
21656043Syokota	if (error)
21756043Syokota		virtqueue_free(vq);
21856043Syokota
21956043Syokota	return (error);
22056043Syokota}
22156043Syokota
22256043Syokotastatic int
22356043Syokotavirtqueue_init_indirect(struct virtqueue *vq, int indirect_size)
22456043Syokota{
22556043Syokota	device_t dev;
22656043Syokota	struct vq_desc_extra *dxp;
22756043Syokota	int i, size;
22856043Syokota
22956043Syokota	dev = vq->vq_dev;
230129944Sphk
23156043Syokota	if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
23256043Syokota		/*
23356043Syokota		 * Indirect descriptors requested by the driver but not
23456043Syokota		 * negotiated. Return zero to keep the initialization
23556043Syokota		 * going: we'll run fine without.
236130585Sphk		 */
237136505Sphk		if (bootverbose)
23856043Syokota			device_printf(dev, "virtqueue %d (%s) requested "
23956043Syokota			    "indirect descriptors but not negotiated\n",
24056043Syokota			    vq->vq_queue_index, vq->vq_name);
241136505Sphk		return (0);
242136505Sphk	}
243136505Sphk
244136505Sphk	size = indirect_size * sizeof(struct vring_desc);
245136505Sphk	vq->vq_max_indirect_size = indirect_size;
246136505Sphk	vq->vq_indirect_mem_size = size;
247136505Sphk	vq->vq_flags |= VIRTQUEUE_FLAG_INDIRECT;
24856043Syokota
24956043Syokota	for (i = 0; i < vq->vq_nentries; i++) {
25056043Syokota		dxp = &vq->vq_descx[i];
251126076Sphk
25256043Syokota		dxp->indirect = malloc(size, M_DEVBUF, M_NOWAIT);
25356043Syokota		if (dxp->indirect == NULL) {
25456043Syokota			device_printf(dev, "cannot allocate indirect list\n");
25556043Syokota			return (ENOMEM);
25656043Syokota		}
25756043Syokota
25856043Syokota		dxp->indirect_paddr = vtophys(dxp->indirect);
25956043Syokota		virtqueue_init_indirect_list(vq, dxp->indirect);
26056043Syokota	}
26156043Syokota
26256043Syokota	return (0);
26356043Syokota}
26456043Syokota
26556043Syokotastatic void
26656043Syokotavirtqueue_free_indirect(struct virtqueue *vq)
26756043Syokota{
26856043Syokota	struct vq_desc_extra *dxp;
26956043Syokota	int i;
27056043Syokota
27156043Syokota	for (i = 0; i < vq->vq_nentries; i++) {
27256043Syokota		dxp = &vq->vq_descx[i];
27356043Syokota
27456043Syokota		if (dxp->indirect == NULL)
27556043Syokota			break;
27656043Syokota
27756043Syokota		free(dxp->indirect, M_DEVBUF);
27856043Syokota		dxp->indirect = NULL;
27956043Syokota		dxp->indirect_paddr = 0;
28056043Syokota	}
28156043Syokota
28256043Syokota	vq->vq_flags &= ~VIRTQUEUE_FLAG_INDIRECT;
28356043Syokota	vq->vq_indirect_mem_size = 0;
28456043Syokota}
28556043Syokota
28656043Syokotastatic void
28756043Syokotavirtqueue_init_indirect_list(struct virtqueue *vq,
28856043Syokota    struct vring_desc *indirect)
28956043Syokota{
29056043Syokota	int i;
29156043Syokota
29256043Syokota	bzero(indirect, vq->vq_indirect_mem_size);
29356043Syokota
29456043Syokota	for (i = 0; i < vq->vq_max_indirect_size - 1; i++)
29556043Syokota		indirect[i].next = i + 1;
29656043Syokota	indirect[i].next = VQ_RING_DESC_CHAIN_END;
29756043Syokota}
29856043Syokota
29956043Syokotaint
30056043Syokotavirtqueue_reinit(struct virtqueue *vq, uint16_t size)
30156043Syokota{
30256043Syokota	struct vq_desc_extra *dxp;
30356043Syokota	int i;
30456043Syokota
30556043Syokota	if (vq->vq_nentries != size) {
30656043Syokota		device_printf(vq->vq_dev,
30756043Syokota		    "%s: '%s' changed size; old=%hu, new=%hu\n",
30856043Syokota		    __func__, vq->vq_name, vq->vq_nentries, size);
30956043Syokota		return (EINVAL);
31056043Syokota	}
31156043Syokota
312130077Sphk	/* Warn if the virtqueue was not properly cleaned up. */
31356043Syokota	if (vq->vq_free_cnt != vq->vq_nentries) {
31456043Syokota		device_printf(vq->vq_dev,
31556043Syokota		    "%s: warning '%s' virtqueue not empty, "
31656043Syokota		    "leaking %d entries\n", __func__, vq->vq_name,
31756043Syokota		    vq->vq_nentries - vq->vq_free_cnt);
31856043Syokota	}
31956043Syokota
32056043Syokota	vq->vq_desc_head_idx = 0;
321130095Sphk	vq->vq_used_cons_idx = 0;
32256043Syokota	vq->vq_queued_cnt = 0;
32356043Syokota	vq->vq_free_cnt = vq->vq_nentries;
32456043Syokota
32556043Syokota	/* To be safe, reset all our allocated memory. */
32656043Syokota	bzero(vq->vq_ring_mem, vq->vq_ring_size);
32756043Syokota	for (i = 0; i < vq->vq_nentries; i++) {
328		dxp = &vq->vq_descx[i];
329		dxp->cookie = NULL;
330		dxp->ndescs = 0;
331		if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT)
332			virtqueue_init_indirect_list(vq, dxp->indirect);
333	}
334
335	vq_ring_init(vq);
336	virtqueue_disable_intr(vq);
337
338	return (0);
339}
340
341void
342virtqueue_free(struct virtqueue *vq)
343{
344
345	if (vq->vq_free_cnt != vq->vq_nentries) {
346		device_printf(vq->vq_dev, "%s: freeing non-empty virtqueue, "
347		    "leaking %d entries\n", vq->vq_name,
348		    vq->vq_nentries - vq->vq_free_cnt);
349	}
350
351	if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT)
352		virtqueue_free_indirect(vq);
353
354	if (vq->vq_ring_mem != NULL) {
355		contigfree(vq->vq_ring_mem, vq->vq_ring_size, M_DEVBUF);
356		vq->vq_ring_size = 0;
357		vq->vq_ring_mem = NULL;
358	}
359
360	free(vq, M_DEVBUF);
361}
362
363vm_paddr_t
364virtqueue_paddr(struct virtqueue *vq)
365{
366
367	return (vtophys(vq->vq_ring_mem));
368}
369
370int
371virtqueue_size(struct virtqueue *vq)
372{
373
374	return (vq->vq_nentries);
375}
376
377int
378virtqueue_empty(struct virtqueue *vq)
379{
380
381	return (vq->vq_nentries == vq->vq_free_cnt);
382}
383
384int
385virtqueue_full(struct virtqueue *vq)
386{
387
388	return (vq->vq_free_cnt == 0);
389}
390
391void
392virtqueue_notify(struct virtqueue *vq)
393{
394
395	/* Ensure updated avail->idx is visible to host. */
396	mb();
397
398	if (vq_ring_must_notify_host(vq))
399		vq_ring_notify_host(vq);
400	vq->vq_queued_cnt = 0;
401}
402
403int
404virtqueue_nused(struct virtqueue *vq)
405{
406	uint16_t used_idx, nused;
407
408	used_idx = vq->vq_ring.used->idx;
409
410	nused = (uint16_t)(used_idx - vq->vq_used_cons_idx);
411	VQASSERT(vq, nused <= vq->vq_nentries, "used more than available");
412
413	return (nused);
414}
415
416int
417virtqueue_intr_filter(struct virtqueue *vq)
418{
419
420	if (__predict_false(vq->vq_intrhand == NULL))
421		return (0);
422	if (vq->vq_used_cons_idx == vq->vq_ring.used->idx)
423		return (0);
424
425	virtqueue_disable_intr(vq);
426
427	return (1);
428}
429
430void
431virtqueue_intr(struct virtqueue *vq)
432{
433
434	if (__predict_true(vq->vq_intrhand != NULL))
435		vq->vq_intrhand(vq->vq_intrhand_arg);
436}
437
438int
439virtqueue_enable_intr(struct virtqueue *vq)
440{
441
442	return (vq_ring_enable_interrupt(vq, 0));
443}
444
445int
446virtqueue_postpone_intr(struct virtqueue *vq)
447{
448	uint16_t ndesc, avail_idx;
449
450	/*
451	 * Request the next interrupt be postponed until at least half
452	 * of the available descriptors have been consumed.
453	 */
454	avail_idx = vq->vq_ring.avail->idx;
455	ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx) / 2;
456
457	return (vq_ring_enable_interrupt(vq, ndesc));
458}
459
460void
461virtqueue_disable_intr(struct virtqueue *vq)
462{
463
464	/*
465	 * Note this is only considered a hint to the host.
466	 */
467	if ((vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) == 0)
468		vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
469}
470
471int
472virtqueue_enqueue(struct virtqueue *vq, void *cookie, struct sglist *sg,
473    int readable, int writable)
474{
475	struct vq_desc_extra *dxp;
476	int needed;
477	uint16_t head_idx, idx;
478
479	needed = readable + writable;
480
481	VQASSERT(vq, cookie != NULL, "enqueuing with no cookie");
482	VQASSERT(vq, needed == sg->sg_nseg,
483	    "segment count mismatch, %d, %d", needed, sg->sg_nseg);
484	VQASSERT(vq,
485	    needed <= vq->vq_nentries || needed <= vq->vq_max_indirect_size,
486	    "too many segments to enqueue: %d, %d/%d", needed,
487	    vq->vq_nentries, vq->vq_max_indirect_size);
488
489	if (needed < 1)
490		return (EINVAL);
491	if (vq->vq_free_cnt == 0)
492		return (ENOSPC);
493
494	if (vq_ring_use_indirect(vq, needed)) {
495		vq_ring_enqueue_indirect(vq, cookie, sg, readable, writable);
496		return (0);
497	} else if (vq->vq_free_cnt < needed)
498		return (EMSGSIZE);
499
500	head_idx = vq->vq_desc_head_idx;
501	VQ_RING_ASSERT_VALID_IDX(vq, head_idx);
502	dxp = &vq->vq_descx[head_idx];
503
504	VQASSERT(vq, dxp->cookie == NULL,
505	    "cookie already exists for index %d", head_idx);
506	dxp->cookie = cookie;
507	dxp->ndescs = needed;
508
509	idx = vq_ring_enqueue_segments(vq, vq->vq_ring.desc, head_idx,
510	    sg, readable, writable);
511
512	vq->vq_desc_head_idx = idx;
513	vq->vq_free_cnt -= needed;
514	if (vq->vq_free_cnt == 0)
515		VQ_RING_ASSERT_CHAIN_TERM(vq);
516	else
517		VQ_RING_ASSERT_VALID_IDX(vq, idx);
518
519	vq_ring_update_avail(vq, head_idx);
520
521	return (0);
522}
523
524void *
525virtqueue_dequeue(struct virtqueue *vq, uint32_t *len)
526{
527	struct vring_used_elem *uep;
528	void *cookie;
529	uint16_t used_idx, desc_idx;
530
531	if (vq->vq_used_cons_idx == vq->vq_ring.used->idx)
532		return (NULL);
533
534	used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1);
535	uep = &vq->vq_ring.used->ring[used_idx];
536
537	rmb();
538	desc_idx = (uint16_t) uep->id;
539	if (len != NULL)
540		*len = uep->len;
541
542	vq_ring_free_chain(vq, desc_idx);
543
544	cookie = vq->vq_descx[desc_idx].cookie;
545	VQASSERT(vq, cookie != NULL, "no cookie for index %d", desc_idx);
546	vq->vq_descx[desc_idx].cookie = NULL;
547
548	return (cookie);
549}
550
551void *
552virtqueue_poll(struct virtqueue *vq, uint32_t *len)
553{
554	void *cookie;
555
556	while ((cookie = virtqueue_dequeue(vq, len)) == NULL)
557		cpu_spinwait();
558
559	return (cookie);
560}
561
562void *
563virtqueue_drain(struct virtqueue *vq, int *last)
564{
565	void *cookie;
566	int idx;
567
568	cookie = NULL;
569	idx = *last;
570
571	while (idx < vq->vq_nentries && cookie == NULL) {
572		if ((cookie = vq->vq_descx[idx].cookie) != NULL) {
573			vq->vq_descx[idx].cookie = NULL;
574			/* Free chain to keep free count consistent. */
575			vq_ring_free_chain(vq, idx);
576		}
577		idx++;
578	}
579
580	*last = idx;
581
582	return (cookie);
583}
584
585void
586virtqueue_dump(struct virtqueue *vq)
587{
588
589	if (vq == NULL)
590		return;
591
592	printf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; "
593	    "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; "
594	    "used.idx=%d; avail.flags=0x%x; used.flags=0x%x\n",
595	    vq->vq_name, vq->vq_nentries, vq->vq_free_cnt,
596	    virtqueue_nused(vq), vq->vq_queued_cnt, vq->vq_desc_head_idx,
597	    vq->vq_ring.avail->idx, vq->vq_used_cons_idx,
598	    vq->vq_ring.used->idx, vq->vq_ring.avail->flags,
599	    vq->vq_ring.used->flags);
600}
601
602static void
603vq_ring_init(struct virtqueue *vq)
604{
605	struct vring *vr;
606	char *ring_mem;
607	int i, size;
608
609	ring_mem = vq->vq_ring_mem;
610	size = vq->vq_nentries;
611	vr = &vq->vq_ring;
612
613	vring_init(vr, size, ring_mem, vq->vq_alignment);
614
615	for (i = 0; i < size - 1; i++)
616		vr->desc[i].next = i + 1;
617	vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
618}
619
620static void
621vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx)
622{
623	uint16_t avail_idx;
624
625	/*
626	 * Place the head of the descriptor chain into the next slot and make
627	 * it usable to the host. The chain is made available now rather than
628	 * deferring to virtqueue_notify() in the hopes that if the host is
629	 * currently running on another CPU, we can keep it processing the new
630	 * descriptor.
631	 */
632	avail_idx = vq->vq_ring.avail->idx & (vq->vq_nentries - 1);
633	vq->vq_ring.avail->ring[avail_idx] = desc_idx;
634
635	wmb();
636	vq->vq_ring.avail->idx++;
637
638	/* Keep pending count until virtqueue_notify(). */
639	vq->vq_queued_cnt++;
640}
641
642static uint16_t
643vq_ring_enqueue_segments(struct virtqueue *vq, struct vring_desc *desc,
644    uint16_t head_idx, struct sglist *sg, int readable, int writable)
645{
646	struct sglist_seg *seg;
647	struct vring_desc *dp;
648	int i, needed;
649	uint16_t idx;
650
651	needed = readable + writable;
652
653	for (i = 0, idx = head_idx, seg = sg->sg_segs;
654	     i < needed;
655	     i++, idx = dp->next, seg++) {
656		VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END,
657		    "premature end of free desc chain");
658
659		dp = &desc[idx];
660		dp->addr = seg->ss_paddr;
661		dp->len = seg->ss_len;
662		dp->flags = 0;
663
664		if (i < needed - 1)
665			dp->flags |= VRING_DESC_F_NEXT;
666		if (i >= readable)
667			dp->flags |= VRING_DESC_F_WRITE;
668	}
669
670	return (idx);
671}
672
673static int
674vq_ring_use_indirect(struct virtqueue *vq, int needed)
675{
676
677	if ((vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) == 0)
678		return (0);
679
680	if (vq->vq_max_indirect_size < needed)
681		return (0);
682
683	if (needed < 2)
684		return (0);
685
686	return (1);
687}
688
689static void
690vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie,
691    struct sglist *sg, int readable, int writable)
692{
693	struct vring_desc *dp;
694	struct vq_desc_extra *dxp;
695	int needed;
696	uint16_t head_idx;
697
698	needed = readable + writable;
699	VQASSERT(vq, needed <= vq->vq_max_indirect_size,
700	    "enqueuing too many indirect descriptors");
701
702	head_idx = vq->vq_desc_head_idx;
703	VQ_RING_ASSERT_VALID_IDX(vq, head_idx);
704	dp = &vq->vq_ring.desc[head_idx];
705	dxp = &vq->vq_descx[head_idx];
706
707	VQASSERT(vq, dxp->cookie == NULL,
708	    "cookie already exists for index %d", head_idx);
709	dxp->cookie = cookie;
710	dxp->ndescs = 1;
711
712	dp->addr = dxp->indirect_paddr;
713	dp->len = needed * sizeof(struct vring_desc);
714	dp->flags = VRING_DESC_F_INDIRECT;
715
716	vq_ring_enqueue_segments(vq, dxp->indirect, 0,
717	    sg, readable, writable);
718
719	vq->vq_desc_head_idx = dp->next;
720	vq->vq_free_cnt--;
721	if (vq->vq_free_cnt == 0)
722		VQ_RING_ASSERT_CHAIN_TERM(vq);
723	else
724		VQ_RING_ASSERT_VALID_IDX(vq, vq->vq_desc_head_idx);
725
726	vq_ring_update_avail(vq, head_idx);
727}
728
729static int
730vq_ring_enable_interrupt(struct virtqueue *vq, uint16_t ndesc)
731{
732
733	/*
734	 * Enable interrupts, making sure we get the latest index of
735	 * what's already been consumed.
736	 */
737	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX)
738		vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc;
739	else
740		vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
741
742	mb();
743
744	/*
745	 * Enough items may have already been consumed to meet our threshold
746	 * since we last checked. Let our caller know so it processes the new
747	 * entries.
748	 */
749	if (virtqueue_nused(vq) > ndesc)
750		return (1);
751
752	return (0);
753}
754
755static int
756vq_ring_must_notify_host(struct virtqueue *vq)
757{
758	uint16_t new_idx, prev_idx, event_idx;
759
760	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
761		new_idx = vq->vq_ring.avail->idx;
762		prev_idx = new_idx - vq->vq_queued_cnt;
763		event_idx = vring_avail_event(&vq->vq_ring);
764
765		return (vring_need_event(event_idx, new_idx, prev_idx) != 0);
766	}
767
768	return ((vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0);
769}
770
771static void
772vq_ring_notify_host(struct virtqueue *vq)
773{
774
775	VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index);
776}
777
778static void
779vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
780{
781	struct vring_desc *dp;
782	struct vq_desc_extra *dxp;
783
784	VQ_RING_ASSERT_VALID_IDX(vq, desc_idx);
785	dp = &vq->vq_ring.desc[desc_idx];
786	dxp = &vq->vq_descx[desc_idx];
787
788	if (vq->vq_free_cnt == 0)
789		VQ_RING_ASSERT_CHAIN_TERM(vq);
790
791	vq->vq_free_cnt += dxp->ndescs;
792	dxp->ndescs--;
793
794	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
795		while (dp->flags & VRING_DESC_F_NEXT) {
796			VQ_RING_ASSERT_VALID_IDX(vq, dp->next);
797			dp = &vq->vq_ring.desc[dp->next];
798			dxp->ndescs--;
799		}
800	}
801
802	VQASSERT(vq, dxp->ndescs == 0,
803	    "failed to free entire desc chain, remaining: %d", dxp->ndescs);
804
805	/*
806	 * We must append the existing free chain, if any, to the end of
807	 * newly freed chain. If the virtqueue was completely used, then
808	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
809	 */
810	dp->next = vq->vq_desc_head_idx;
811	vq->vq_desc_head_idx = desc_idx;
812}
813