1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * Implements the virtqueue interface as basically described
31 * in the original VirtIO paper.
32 */
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/malloc.h>
38#include <sys/sdt.h>
39#include <sys/sglist.h>
40#include <vm/vm.h>
41#include <vm/pmap.h>
42
43#include <machine/cpu.h>
44#include <machine/bus.h>
45#include <machine/atomic.h>
46#include <machine/resource.h>
47#include <sys/bus.h>
48#include <sys/rman.h>
49
50#include <dev/virtio/virtio.h>
51#include <dev/virtio/virtqueue.h>
52#include <dev/virtio/virtio_ring.h>
53
54#include "virtio_bus_if.h"
55
56struct virtqueue {
57	device_t		 vq_dev;
58	uint16_t		 vq_queue_index;
59	uint16_t		 vq_nentries;
60	uint32_t		 vq_flags;
61#define	VIRTQUEUE_FLAG_MODERN	 0x0001
62#define	VIRTQUEUE_FLAG_INDIRECT	 0x0002
63#define	VIRTQUEUE_FLAG_EVENT_IDX 0x0004
64
65	int			 vq_max_indirect_size;
66	bus_size_t		 vq_notify_offset;
67	virtqueue_intr_t	*vq_intrhand;
68	void			*vq_intrhand_arg;
69
70	struct vring		 vq_ring;
71	uint16_t		 vq_free_cnt;
72	uint16_t		 vq_queued_cnt;
73	/*
74	 * Head of the free chain in the descriptor table. If
75	 * there are no free descriptors, this will be set to
76	 * VQ_RING_DESC_CHAIN_END.
77	 */
78	uint16_t		 vq_desc_head_idx;
79	/*
80	 * Last consumed descriptor in the used table,
81	 * trails vq_ring.used->idx.
82	 */
83	uint16_t		 vq_used_cons_idx;
84
85	void			*vq_ring_mem;
86	int			 vq_indirect_mem_size;
87	int			 vq_alignment;
88	int			 vq_ring_size;
89	char			 vq_name[VIRTQUEUE_MAX_NAME_SZ];
90
91	struct vq_desc_extra {
92		void		  *cookie;
93		struct vring_desc *indirect;
94		vm_paddr_t	   indirect_paddr;
95		uint16_t	   ndescs;
96	} vq_descx[0];
97};
98
99/*
100 * The maximum virtqueue size is 2^15. Use that value as the end of
101 * descriptor chain terminator since it will never be a valid index
102 * in the descriptor table. This is used to verify we are correctly
103 * handling vq_free_cnt.
104 */
105#define VQ_RING_DESC_CHAIN_END 32768
106
107#define VQASSERT(_vq, _exp, _msg, ...)				\
108    KASSERT((_exp),("%s: %s - "_msg, __func__, (_vq)->vq_name,	\
109	##__VA_ARGS__))
110
111#define VQ_RING_ASSERT_VALID_IDX(_vq, _idx)			\
112    VQASSERT((_vq), (_idx) < (_vq)->vq_nentries,		\
113	"invalid ring index: %d, max: %d", (_idx),		\
114	(_vq)->vq_nentries)
115
116#define VQ_RING_ASSERT_CHAIN_TERM(_vq)				\
117    VQASSERT((_vq), (_vq)->vq_desc_head_idx ==			\
118	VQ_RING_DESC_CHAIN_END,	"full ring terminated "		\
119	"incorrectly: head idx: %d", (_vq)->vq_desc_head_idx)
120
121static int	virtqueue_init_indirect(struct virtqueue *vq, int);
122static void	virtqueue_free_indirect(struct virtqueue *vq);
123static void	virtqueue_init_indirect_list(struct virtqueue *,
124		    struct vring_desc *);
125
126static void	vq_ring_init(struct virtqueue *);
127static void	vq_ring_update_avail(struct virtqueue *, uint16_t);
128static uint16_t	vq_ring_enqueue_segments(struct virtqueue *,
129		    struct vring_desc *, uint16_t, struct sglist *, int, int);
130static bool	vq_ring_use_indirect(struct virtqueue *, int);
131static void	vq_ring_enqueue_indirect(struct virtqueue *, void *,
132		    struct sglist *, int, int);
133static int	vq_ring_enable_interrupt(struct virtqueue *, uint16_t);
134static int	vq_ring_must_notify_host(struct virtqueue *);
135static void	vq_ring_notify_host(struct virtqueue *);
136static void	vq_ring_free_chain(struct virtqueue *, uint16_t);
137
138SDT_PROVIDER_DEFINE(virtqueue);
139SDT_PROBE_DEFINE6(virtqueue, , enqueue_segments, entry, "struct virtqueue *",
140    "struct vring_desc *", "uint16_t", "struct sglist *", "int", "int");
141SDT_PROBE_DEFINE1(virtqueue, , enqueue_segments, return, "uint16_t");
142
143#define vq_modern(_vq) 		(((_vq)->vq_flags & VIRTQUEUE_FLAG_MODERN) != 0)
144#define vq_htog16(_vq, _val) 	virtio_htog16(vq_modern(_vq), _val)
145#define vq_htog32(_vq, _val) 	virtio_htog32(vq_modern(_vq), _val)
146#define vq_htog64(_vq, _val) 	virtio_htog64(vq_modern(_vq), _val)
147#define vq_gtoh16(_vq, _val) 	virtio_gtoh16(vq_modern(_vq), _val)
148#define vq_gtoh32(_vq, _val) 	virtio_gtoh32(vq_modern(_vq), _val)
149#define vq_gtoh64(_vq, _val) 	virtio_gtoh64(vq_modern(_vq), _val)
150
151int
152virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size,
153    bus_size_t notify_offset, int align, vm_paddr_t highaddr,
154    struct vq_alloc_info *info, struct virtqueue **vqp)
155{
156	struct virtqueue *vq;
157	int error;
158
159	*vqp = NULL;
160	error = 0;
161
162	if (size == 0) {
163		device_printf(dev,
164		    "virtqueue %d (%s) does not exist (size is zero)\n",
165		    queue, info->vqai_name);
166		return (ENODEV);
167	} else if (!powerof2(size)) {
168		device_printf(dev,
169		    "virtqueue %d (%s) size is not a power of 2: %d\n",
170		    queue, info->vqai_name, size);
171		return (ENXIO);
172	} else if (info->vqai_maxindirsz > VIRTIO_MAX_INDIRECT) {
173		device_printf(dev, "virtqueue %d (%s) requested too many "
174		    "indirect descriptors: %d, max %d\n",
175		    queue, info->vqai_name, info->vqai_maxindirsz,
176		    VIRTIO_MAX_INDIRECT);
177		return (EINVAL);
178	}
179
180	vq = malloc(sizeof(struct virtqueue) +
181	    size * sizeof(struct vq_desc_extra), M_DEVBUF, M_NOWAIT | M_ZERO);
182	if (vq == NULL) {
183		device_printf(dev, "cannot allocate virtqueue\n");
184		return (ENOMEM);
185	}
186
187	vq->vq_dev = dev;
188	strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name));
189	vq->vq_queue_index = queue;
190	vq->vq_notify_offset = notify_offset;
191	vq->vq_alignment = align;
192	vq->vq_nentries = size;
193	vq->vq_free_cnt = size;
194	vq->vq_intrhand = info->vqai_intr;
195	vq->vq_intrhand_arg = info->vqai_intr_arg;
196
197	if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_F_VERSION_1) != 0)
198		vq->vq_flags |= VIRTQUEUE_FLAG_MODERN;
199	if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0)
200		vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX;
201
202	if (info->vqai_maxindirsz > 1) {
203		error = virtqueue_init_indirect(vq, info->vqai_maxindirsz);
204		if (error)
205			goto fail;
206	}
207
208	vq->vq_ring_size = round_page(vring_size(size, align));
209	vq->vq_ring_mem = contigmalloc(vq->vq_ring_size, M_DEVBUF,
210	    M_NOWAIT | M_ZERO, 0, highaddr, PAGE_SIZE, 0);
211	if (vq->vq_ring_mem == NULL) {
212		device_printf(dev,
213		    "cannot allocate memory for virtqueue ring\n");
214		error = ENOMEM;
215		goto fail;
216	}
217
218	vq_ring_init(vq);
219	virtqueue_disable_intr(vq);
220
221	*vqp = vq;
222
223fail:
224	if (error)
225		virtqueue_free(vq);
226
227	return (error);
228}
229
230static int
231virtqueue_init_indirect(struct virtqueue *vq, int indirect_size)
232{
233	device_t dev;
234	struct vq_desc_extra *dxp;
235	int i, size;
236
237	dev = vq->vq_dev;
238
239	if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
240		/*
241		 * Indirect descriptors requested by the driver but not
242		 * negotiated. Return zero to keep the initialization
243		 * going: we'll run fine without.
244		 */
245		if (bootverbose)
246			device_printf(dev, "virtqueue %d (%s) requested "
247			    "indirect descriptors but not negotiated\n",
248			    vq->vq_queue_index, vq->vq_name);
249		return (0);
250	}
251
252	size = indirect_size * sizeof(struct vring_desc);
253	vq->vq_max_indirect_size = indirect_size;
254	vq->vq_indirect_mem_size = size;
255	vq->vq_flags |= VIRTQUEUE_FLAG_INDIRECT;
256
257	for (i = 0; i < vq->vq_nentries; i++) {
258		dxp = &vq->vq_descx[i];
259
260		dxp->indirect = malloc(size, M_DEVBUF, M_NOWAIT);
261		if (dxp->indirect == NULL) {
262			device_printf(dev, "cannot allocate indirect list\n");
263			return (ENOMEM);
264		}
265
266		dxp->indirect_paddr = vtophys(dxp->indirect);
267		virtqueue_init_indirect_list(vq, dxp->indirect);
268	}
269
270	return (0);
271}
272
273static void
274virtqueue_free_indirect(struct virtqueue *vq)
275{
276	struct vq_desc_extra *dxp;
277	int i;
278
279	for (i = 0; i < vq->vq_nentries; i++) {
280		dxp = &vq->vq_descx[i];
281
282		if (dxp->indirect == NULL)
283			break;
284
285		free(dxp->indirect, M_DEVBUF);
286		dxp->indirect = NULL;
287		dxp->indirect_paddr = 0;
288	}
289
290	vq->vq_flags &= ~VIRTQUEUE_FLAG_INDIRECT;
291	vq->vq_indirect_mem_size = 0;
292}
293
294static void
295virtqueue_init_indirect_list(struct virtqueue *vq,
296    struct vring_desc *indirect)
297{
298	int i;
299
300	bzero(indirect, vq->vq_indirect_mem_size);
301
302	for (i = 0; i < vq->vq_max_indirect_size - 1; i++)
303		indirect[i].next = vq_gtoh16(vq, i + 1);
304	indirect[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END);
305}
306
307int
308virtqueue_reinit(struct virtqueue *vq, uint16_t size)
309{
310	struct vq_desc_extra *dxp;
311	int i;
312
313	if (vq->vq_nentries != size) {
314		device_printf(vq->vq_dev,
315		    "%s: '%s' changed size; old=%hu, new=%hu\n",
316		    __func__, vq->vq_name, vq->vq_nentries, size);
317		return (EINVAL);
318	}
319
320	/* Warn if the virtqueue was not properly cleaned up. */
321	if (vq->vq_free_cnt != vq->vq_nentries) {
322		device_printf(vq->vq_dev,
323		    "%s: warning '%s' virtqueue not empty, "
324		    "leaking %d entries\n", __func__, vq->vq_name,
325		    vq->vq_nentries - vq->vq_free_cnt);
326	}
327
328	vq->vq_desc_head_idx = 0;
329	vq->vq_used_cons_idx = 0;
330	vq->vq_queued_cnt = 0;
331	vq->vq_free_cnt = vq->vq_nentries;
332
333	/* To be safe, reset all our allocated memory. */
334	bzero(vq->vq_ring_mem, vq->vq_ring_size);
335	for (i = 0; i < vq->vq_nentries; i++) {
336		dxp = &vq->vq_descx[i];
337		dxp->cookie = NULL;
338		dxp->ndescs = 0;
339		if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT)
340			virtqueue_init_indirect_list(vq, dxp->indirect);
341	}
342
343	vq_ring_init(vq);
344	virtqueue_disable_intr(vq);
345
346	return (0);
347}
348
349void
350virtqueue_free(struct virtqueue *vq)
351{
352
353	if (vq->vq_free_cnt != vq->vq_nentries) {
354		device_printf(vq->vq_dev, "%s: freeing non-empty virtqueue, "
355		    "leaking %d entries\n", vq->vq_name,
356		    vq->vq_nentries - vq->vq_free_cnt);
357	}
358
359	if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT)
360		virtqueue_free_indirect(vq);
361
362	if (vq->vq_ring_mem != NULL) {
363		contigfree(vq->vq_ring_mem, vq->vq_ring_size, M_DEVBUF);
364		vq->vq_ring_size = 0;
365		vq->vq_ring_mem = NULL;
366	}
367
368	free(vq, M_DEVBUF);
369}
370
371vm_paddr_t
372virtqueue_paddr(struct virtqueue *vq)
373{
374
375	return (vtophys(vq->vq_ring_mem));
376}
377
378vm_paddr_t
379virtqueue_desc_paddr(struct virtqueue *vq)
380{
381
382	return (vtophys(vq->vq_ring.desc));
383}
384
385vm_paddr_t
386virtqueue_avail_paddr(struct virtqueue *vq)
387{
388
389	return (vtophys(vq->vq_ring.avail));
390}
391
392vm_paddr_t
393virtqueue_used_paddr(struct virtqueue *vq)
394{
395
396	return (vtophys(vq->vq_ring.used));
397}
398
399uint16_t
400virtqueue_index(struct virtqueue *vq)
401{
402
403	return (vq->vq_queue_index);
404}
405
406int
407virtqueue_size(struct virtqueue *vq)
408{
409
410	return (vq->vq_nentries);
411}
412
413int
414virtqueue_nfree(struct virtqueue *vq)
415{
416
417	return (vq->vq_free_cnt);
418}
419
420bool
421virtqueue_empty(struct virtqueue *vq)
422{
423
424	return (vq->vq_nentries == vq->vq_free_cnt);
425}
426
427bool
428virtqueue_full(struct virtqueue *vq)
429{
430
431	return (vq->vq_free_cnt == 0);
432}
433
434void
435virtqueue_notify(struct virtqueue *vq)
436{
437
438	/* Ensure updated avail->idx is visible to host. */
439	mb();
440
441	if (vq_ring_must_notify_host(vq))
442		vq_ring_notify_host(vq);
443	vq->vq_queued_cnt = 0;
444}
445
446int
447virtqueue_nused(struct virtqueue *vq)
448{
449	uint16_t used_idx, nused;
450
451	used_idx = vq_htog16(vq, vq->vq_ring.used->idx);
452
453	nused = (uint16_t)(used_idx - vq->vq_used_cons_idx);
454	VQASSERT(vq, nused <= vq->vq_nentries, "used more than available");
455
456	return (nused);
457}
458
459int
460virtqueue_intr_filter(struct virtqueue *vq)
461{
462
463	if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx))
464		return (0);
465
466	virtqueue_disable_intr(vq);
467
468	return (1);
469}
470
471void
472virtqueue_intr(struct virtqueue *vq)
473{
474
475	vq->vq_intrhand(vq->vq_intrhand_arg);
476}
477
478int
479virtqueue_enable_intr(struct virtqueue *vq)
480{
481
482	return (vq_ring_enable_interrupt(vq, 0));
483}
484
485int
486virtqueue_postpone_intr(struct virtqueue *vq, vq_postpone_t hint)
487{
488	uint16_t ndesc, avail_idx;
489
490	avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx);
491	ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx);
492
493	switch (hint) {
494	case VQ_POSTPONE_SHORT:
495		ndesc = ndesc / 4;
496		break;
497	case VQ_POSTPONE_LONG:
498		ndesc = (ndesc * 3) / 4;
499		break;
500	case VQ_POSTPONE_EMPTIED:
501		break;
502	}
503
504	return (vq_ring_enable_interrupt(vq, ndesc));
505}
506
507/*
508 * Note this is only considered a hint to the host.
509 */
510void
511virtqueue_disable_intr(struct virtqueue *vq)
512{
513
514	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
515		vring_used_event(&vq->vq_ring) = vq_gtoh16(vq,
516		    vq->vq_used_cons_idx - vq->vq_nentries - 1);
517		return;
518	}
519
520	vq->vq_ring.avail->flags |= vq_gtoh16(vq, VRING_AVAIL_F_NO_INTERRUPT);
521}
522
523int
524virtqueue_enqueue(struct virtqueue *vq, void *cookie, struct sglist *sg,
525    int readable, int writable)
526{
527	struct vq_desc_extra *dxp;
528	int needed;
529	uint16_t head_idx, idx;
530
531	needed = readable + writable;
532
533	VQASSERT(vq, cookie != NULL, "enqueuing with no cookie");
534	VQASSERT(vq, needed == sg->sg_nseg,
535	    "segment count mismatch, %d, %d", needed, sg->sg_nseg);
536	VQASSERT(vq,
537	    needed <= vq->vq_nentries || needed <= vq->vq_max_indirect_size,
538	    "too many segments to enqueue: %d, %d/%d", needed,
539	    vq->vq_nentries, vq->vq_max_indirect_size);
540
541	if (needed < 1)
542		return (EINVAL);
543	if (vq->vq_free_cnt == 0)
544		return (ENOSPC);
545
546	if (vq_ring_use_indirect(vq, needed)) {
547		vq_ring_enqueue_indirect(vq, cookie, sg, readable, writable);
548		return (0);
549	} else if (vq->vq_free_cnt < needed)
550		return (EMSGSIZE);
551
552	head_idx = vq->vq_desc_head_idx;
553	VQ_RING_ASSERT_VALID_IDX(vq, head_idx);
554	dxp = &vq->vq_descx[head_idx];
555
556	VQASSERT(vq, dxp->cookie == NULL,
557	    "cookie already exists for index %d", head_idx);
558	dxp->cookie = cookie;
559	dxp->ndescs = needed;
560
561	idx = vq_ring_enqueue_segments(vq, vq->vq_ring.desc, head_idx,
562	    sg, readable, writable);
563
564	vq->vq_desc_head_idx = idx;
565	vq->vq_free_cnt -= needed;
566	if (vq->vq_free_cnt == 0)
567		VQ_RING_ASSERT_CHAIN_TERM(vq);
568	else
569		VQ_RING_ASSERT_VALID_IDX(vq, idx);
570
571	vq_ring_update_avail(vq, head_idx);
572
573	return (0);
574}
575
576void *
577virtqueue_dequeue(struct virtqueue *vq, uint32_t *len)
578{
579	struct vring_used_elem *uep;
580	void *cookie;
581	uint16_t used_idx, desc_idx;
582
583	if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx))
584		return (NULL);
585
586	used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1);
587	uep = &vq->vq_ring.used->ring[used_idx];
588
589	rmb();
590	desc_idx = (uint16_t) vq_htog32(vq, uep->id);
591	if (len != NULL)
592		*len = vq_htog32(vq, uep->len);
593
594	vq_ring_free_chain(vq, desc_idx);
595
596	cookie = vq->vq_descx[desc_idx].cookie;
597	VQASSERT(vq, cookie != NULL, "no cookie for index %d", desc_idx);
598	vq->vq_descx[desc_idx].cookie = NULL;
599
600	return (cookie);
601}
602
603void *
604virtqueue_poll(struct virtqueue *vq, uint32_t *len)
605{
606	void *cookie;
607
608	VIRTIO_BUS_POLL(vq->vq_dev);
609	while ((cookie = virtqueue_dequeue(vq, len)) == NULL) {
610		cpu_spinwait();
611		VIRTIO_BUS_POLL(vq->vq_dev);
612	}
613
614	return (cookie);
615}
616
617void *
618virtqueue_drain(struct virtqueue *vq, int *last)
619{
620	void *cookie;
621	int idx;
622
623	cookie = NULL;
624	idx = *last;
625
626	while (idx < vq->vq_nentries && cookie == NULL) {
627		if ((cookie = vq->vq_descx[idx].cookie) != NULL) {
628			vq->vq_descx[idx].cookie = NULL;
629			/* Free chain to keep free count consistent. */
630			vq_ring_free_chain(vq, idx);
631		}
632		idx++;
633	}
634
635	*last = idx;
636
637	return (cookie);
638}
639
640void
641virtqueue_dump(struct virtqueue *vq)
642{
643
644	if (vq == NULL)
645		return;
646
647	printf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; "
648	    "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; "
649	    "used.idx=%d; used_event_idx=%d; avail.flags=0x%x; used.flags=0x%x\n",
650	    vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, virtqueue_nused(vq),
651	    vq->vq_queued_cnt, vq->vq_desc_head_idx,
652	    vq_htog16(vq, vq->vq_ring.avail->idx), vq->vq_used_cons_idx,
653	    vq_htog16(vq, vq->vq_ring.used->idx),
654	    vq_htog16(vq, vring_used_event(&vq->vq_ring)),
655	    vq_htog16(vq, vq->vq_ring.avail->flags),
656	    vq_htog16(vq, vq->vq_ring.used->flags));
657}
658
659static void
660vq_ring_init(struct virtqueue *vq)
661{
662	struct vring *vr;
663	char *ring_mem;
664	int i, size;
665
666	ring_mem = vq->vq_ring_mem;
667	size = vq->vq_nentries;
668	vr = &vq->vq_ring;
669
670	vring_init(vr, size, ring_mem, vq->vq_alignment);
671
672	for (i = 0; i < size - 1; i++)
673		vr->desc[i].next = vq_gtoh16(vq, i + 1);
674	vr->desc[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END);
675}
676
677static void
678vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx)
679{
680	uint16_t avail_idx, avail_ring_idx;
681
682	/*
683	 * Place the head of the descriptor chain into the next slot and make
684	 * it usable to the host. The chain is made available now rather than
685	 * deferring to virtqueue_notify() in the hopes that if the host is
686	 * currently running on another CPU, we can keep it processing the new
687	 * descriptor.
688	 */
689	avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx);
690	avail_ring_idx = avail_idx & (vq->vq_nentries - 1);
691	vq->vq_ring.avail->ring[avail_ring_idx] = vq_gtoh16(vq, desc_idx);
692
693	wmb();
694	vq->vq_ring.avail->idx = vq_gtoh16(vq, avail_idx + 1);
695
696	/* Keep pending count until virtqueue_notify(). */
697	vq->vq_queued_cnt++;
698}
699
700static uint16_t
701vq_ring_enqueue_segments(struct virtqueue *vq, struct vring_desc *desc,
702    uint16_t head_idx, struct sglist *sg, int readable, int writable)
703{
704	struct sglist_seg *seg;
705	struct vring_desc *dp;
706	int i, needed;
707	uint16_t idx;
708
709	SDT_PROBE6(virtqueue, , enqueue_segments, entry, vq, desc, head_idx,
710	    sg, readable, writable);
711
712	needed = readable + writable;
713
714	for (i = 0, idx = head_idx, seg = sg->sg_segs;
715	     i < needed;
716	     i++, idx = vq_htog16(vq, dp->next), seg++) {
717		VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END,
718		    "premature end of free desc chain");
719
720		dp = &desc[idx];
721		dp->addr = vq_gtoh64(vq, seg->ss_paddr);
722		dp->len = vq_gtoh32(vq, seg->ss_len);
723		dp->flags = 0;
724
725		if (i < needed - 1)
726			dp->flags |= vq_gtoh16(vq, VRING_DESC_F_NEXT);
727		if (i >= readable)
728			dp->flags |= vq_gtoh16(vq, VRING_DESC_F_WRITE);
729	}
730
731	SDT_PROBE1(virtqueue, , enqueue_segments, return, idx);
732	return (idx);
733}
734
735static bool
736vq_ring_use_indirect(struct virtqueue *vq, int needed)
737{
738
739	if ((vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) == 0)
740		return (false);
741
742	if (vq->vq_max_indirect_size < needed)
743		return (false);
744
745	if (needed < 2)
746		return (false);
747
748	return (true);
749}
750
751static void
752vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie,
753    struct sglist *sg, int readable, int writable)
754{
755	struct vring_desc *dp;
756	struct vq_desc_extra *dxp;
757	int needed;
758	uint16_t head_idx;
759
760	needed = readable + writable;
761	VQASSERT(vq, needed <= vq->vq_max_indirect_size,
762	    "enqueuing too many indirect descriptors");
763
764	head_idx = vq->vq_desc_head_idx;
765	VQ_RING_ASSERT_VALID_IDX(vq, head_idx);
766	dp = &vq->vq_ring.desc[head_idx];
767	dxp = &vq->vq_descx[head_idx];
768
769	VQASSERT(vq, dxp->cookie == NULL,
770	    "cookie already exists for index %d", head_idx);
771	dxp->cookie = cookie;
772	dxp->ndescs = 1;
773
774	dp->addr = vq_gtoh64(vq, dxp->indirect_paddr);
775	dp->len = vq_gtoh32(vq, needed * sizeof(struct vring_desc));
776	dp->flags = vq_gtoh16(vq, VRING_DESC_F_INDIRECT);
777
778	vq_ring_enqueue_segments(vq, dxp->indirect, 0,
779	    sg, readable, writable);
780
781	vq->vq_desc_head_idx = vq_htog16(vq, dp->next);
782	vq->vq_free_cnt--;
783	if (vq->vq_free_cnt == 0)
784		VQ_RING_ASSERT_CHAIN_TERM(vq);
785	else
786		VQ_RING_ASSERT_VALID_IDX(vq, vq->vq_desc_head_idx);
787
788	vq_ring_update_avail(vq, head_idx);
789}
790
791static int
792vq_ring_enable_interrupt(struct virtqueue *vq, uint16_t ndesc)
793{
794
795	/*
796	 * Enable interrupts, making sure we get the latest index of
797	 * what's already been consumed.
798	 */
799	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
800		vring_used_event(&vq->vq_ring) =
801		    vq_gtoh16(vq, vq->vq_used_cons_idx + ndesc);
802	} else {
803		vq->vq_ring.avail->flags &=
804		    vq_gtoh16(vq, ~VRING_AVAIL_F_NO_INTERRUPT);
805	}
806
807	mb();
808
809	/*
810	 * Enough items may have already been consumed to meet our threshold
811	 * since we last checked. Let our caller know so it processes the new
812	 * entries.
813	 */
814	if (virtqueue_nused(vq) > ndesc)
815		return (1);
816
817	return (0);
818}
819
820static int
821vq_ring_must_notify_host(struct virtqueue *vq)
822{
823	uint16_t new_idx, prev_idx, event_idx, flags;
824
825	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
826		new_idx = vq_htog16(vq, vq->vq_ring.avail->idx);
827		prev_idx = new_idx - vq->vq_queued_cnt;
828		event_idx = vq_htog16(vq, vring_avail_event(&vq->vq_ring));
829
830		return (vring_need_event(event_idx, new_idx, prev_idx) != 0);
831	}
832
833	flags = vq->vq_ring.used->flags;
834	return ((flags & vq_gtoh16(vq, VRING_USED_F_NO_NOTIFY)) == 0);
835}
836
837static void
838vq_ring_notify_host(struct virtqueue *vq)
839{
840
841	VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index,
842	    vq->vq_notify_offset);
843}
844
845static void
846vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
847{
848	struct vring_desc *dp;
849	struct vq_desc_extra *dxp;
850
851	VQ_RING_ASSERT_VALID_IDX(vq, desc_idx);
852	dp = &vq->vq_ring.desc[desc_idx];
853	dxp = &vq->vq_descx[desc_idx];
854
855	if (vq->vq_free_cnt == 0)
856		VQ_RING_ASSERT_CHAIN_TERM(vq);
857
858	vq->vq_free_cnt += dxp->ndescs;
859	dxp->ndescs--;
860
861	if ((dp->flags & vq_gtoh16(vq, VRING_DESC_F_INDIRECT)) == 0) {
862		while (dp->flags & vq_gtoh16(vq, VRING_DESC_F_NEXT)) {
863			uint16_t next_idx = vq_htog16(vq, dp->next);
864			VQ_RING_ASSERT_VALID_IDX(vq, next_idx);
865			dp = &vq->vq_ring.desc[next_idx];
866			dxp->ndescs--;
867		}
868	}
869
870	VQASSERT(vq, dxp->ndescs == 0,
871	    "failed to free entire desc chain, remaining: %d", dxp->ndescs);
872
873	/*
874	 * We must append the existing free chain, if any, to the end of
875	 * newly freed chain. If the virtqueue was completely used, then
876	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
877	 */
878	dp->next = vq_gtoh16(vq, vq->vq_desc_head_idx);
879	vq->vq_desc_head_idx = desc_idx;
880}
881