1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/* Driver for VirtIO block devices. */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/bio.h>
38#include <sys/malloc.h>
39#include <sys/module.h>
40#include <sys/sglist.h>
41#include <sys/sysctl.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/queue.h>
45
46#include <geom/geom.h>
47#include <geom/geom_disk.h>
48
49#include <machine/bus.h>
50#include <machine/resource.h>
51#include <sys/bus.h>
52#include <sys/rman.h>
53
54#include <dev/virtio/virtio.h>
55#include <dev/virtio/virtqueue.h>
56#include <dev/virtio/block/virtio_blk.h>
57
58#include "virtio_if.h"
59
60struct vtblk_request {
61	struct virtio_blk_outhdr	 vbr_hdr;
62	struct bio			*vbr_bp;
63	uint8_t				 vbr_ack;
64	TAILQ_ENTRY(vtblk_request)	 vbr_link;
65};
66
67enum vtblk_cache_mode {
68	VTBLK_CACHE_WRITETHROUGH,
69	VTBLK_CACHE_WRITEBACK,
70	VTBLK_CACHE_MAX
71};
72
73struct vtblk_softc {
74	device_t		 vtblk_dev;
75	struct mtx		 vtblk_mtx;
76	uint64_t		 vtblk_features;
77	uint32_t		 vtblk_flags;
78#define VTBLK_FLAG_INDIRECT	0x0001
79#define VTBLK_FLAG_DETACH	0x0002
80#define VTBLK_FLAG_SUSPEND	0x0004
81#define VTBLK_FLAG_BARRIER	0x0008
82#define VTBLK_FLAG_WCE_CONFIG	0x0010
83
84	struct virtqueue	*vtblk_vq;
85	struct sglist		*vtblk_sglist;
86	struct disk		*vtblk_disk;
87
88	struct bio_queue_head	 vtblk_bioq;
89	TAILQ_HEAD(, vtblk_request)
90				 vtblk_req_free;
91	TAILQ_HEAD(, vtblk_request)
92				 vtblk_req_ready;
93	struct vtblk_request	*vtblk_req_ordered;
94
95	int			 vtblk_max_nsegs;
96	int			 vtblk_request_count;
97	enum vtblk_cache_mode	 vtblk_write_cache;
98
99	struct bio_queue	 vtblk_dump_queue;
100	struct vtblk_request	 vtblk_dump_request;
101};
102
103static struct virtio_feature_desc vtblk_feature_desc[] = {
104	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
105	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
106	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
107	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
108	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
109	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
110	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
111	{ VIRTIO_BLK_F_FLUSH,		"FlushCmd"	},
112	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
113	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
114	{ VIRTIO_BLK_F_MQ,		"Multiqueue"	},
115	{ VIRTIO_BLK_F_DISCARD,		"Discard"	},
116	{ VIRTIO_BLK_F_WRITE_ZEROES,	"WriteZeros"	},
117
118	{ 0, NULL }
119};
120
121static int	vtblk_modevent(module_t, int, void *);
122
123static int	vtblk_probe(device_t);
124static int	vtblk_attach(device_t);
125static int	vtblk_detach(device_t);
126static int	vtblk_suspend(device_t);
127static int	vtblk_resume(device_t);
128static int	vtblk_shutdown(device_t);
129static int	vtblk_config_change(device_t);
130
131static int	vtblk_open(struct disk *);
132static int	vtblk_close(struct disk *);
133static int	vtblk_ioctl(struct disk *, u_long, void *, int,
134		    struct thread *);
135static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
136static void	vtblk_strategy(struct bio *);
137
138static int	vtblk_negotiate_features(struct vtblk_softc *);
139static int	vtblk_setup_features(struct vtblk_softc *);
140static int	vtblk_maximum_segments(struct vtblk_softc *,
141		    struct virtio_blk_config *);
142static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
143static void	vtblk_resize_disk(struct vtblk_softc *, uint64_t);
144static void	vtblk_alloc_disk(struct vtblk_softc *,
145		    struct virtio_blk_config *);
146static void	vtblk_create_disk(struct vtblk_softc *);
147
148static int	vtblk_request_prealloc(struct vtblk_softc *);
149static void	vtblk_request_free(struct vtblk_softc *);
150static struct vtblk_request *
151		vtblk_request_dequeue(struct vtblk_softc *);
152static void	vtblk_request_enqueue(struct vtblk_softc *,
153		    struct vtblk_request *);
154static struct vtblk_request *
155		vtblk_request_next_ready(struct vtblk_softc *);
156static void	vtblk_request_requeue_ready(struct vtblk_softc *,
157		    struct vtblk_request *);
158static struct vtblk_request *
159		vtblk_request_next(struct vtblk_softc *);
160static struct vtblk_request *
161		vtblk_request_bio(struct vtblk_softc *);
162static int	vtblk_request_execute(struct vtblk_softc *,
163		    struct vtblk_request *);
164static int	vtblk_request_error(struct vtblk_request *);
165
166static void	vtblk_queue_completed(struct vtblk_softc *,
167		    struct bio_queue *);
168static void	vtblk_done_completed(struct vtblk_softc *,
169		    struct bio_queue *);
170static void	vtblk_drain_vq(struct vtblk_softc *);
171static void	vtblk_drain(struct vtblk_softc *);
172
173static void	vtblk_startio(struct vtblk_softc *);
174static void	vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
175
176static void	vtblk_read_config(struct vtblk_softc *,
177		    struct virtio_blk_config *);
178static void	vtblk_ident(struct vtblk_softc *);
179static int	vtblk_poll_request(struct vtblk_softc *,
180		    struct vtblk_request *);
181static int	vtblk_quiesce(struct vtblk_softc *);
182static void	vtblk_vq_intr(void *);
183static void	vtblk_stop(struct vtblk_softc *);
184
185static void	vtblk_dump_quiesce(struct vtblk_softc *);
186static int	vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
187static int	vtblk_dump_flush(struct vtblk_softc *);
188static void	vtblk_dump_complete(struct vtblk_softc *);
189
190static void	vtblk_set_write_cache(struct vtblk_softc *, int);
191static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
192		    struct virtio_blk_config *);
193static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
194
195static void	vtblk_setup_sysctl(struct vtblk_softc *);
196static int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
197
198#define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
199#define vtblk_htog16(_sc, _val)	virtio_htog16(vtblk_modern(_sc), _val)
200#define vtblk_htog32(_sc, _val)	virtio_htog32(vtblk_modern(_sc), _val)
201#define vtblk_htog64(_sc, _val)	virtio_htog64(vtblk_modern(_sc), _val)
202#define vtblk_gtoh16(_sc, _val)	virtio_gtoh16(vtblk_modern(_sc), _val)
203#define vtblk_gtoh32(_sc, _val)	virtio_gtoh32(vtblk_modern(_sc), _val)
204#define vtblk_gtoh64(_sc, _val)	virtio_gtoh64(vtblk_modern(_sc), _val)
205
206/* Tunables. */
207static int vtblk_no_ident = 0;
208TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
209static int vtblk_writecache_mode = -1;
210TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
211
212#define VTBLK_COMMON_FEATURES \
213    (VIRTIO_BLK_F_SIZE_MAX		| \
214     VIRTIO_BLK_F_SEG_MAX		| \
215     VIRTIO_BLK_F_GEOMETRY		| \
216     VIRTIO_BLK_F_RO			| \
217     VIRTIO_BLK_F_BLK_SIZE		| \
218     VIRTIO_BLK_F_FLUSH			| \
219     VIRTIO_BLK_F_TOPOLOGY		| \
220     VIRTIO_BLK_F_CONFIG_WCE		| \
221     VIRTIO_BLK_F_DISCARD		| \
222     VIRTIO_RING_F_INDIRECT_DESC)
223
224#define VTBLK_MODERN_FEATURES	(VTBLK_COMMON_FEATURES)
225#define VTBLK_LEGACY_FEATURES	(VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
226
227#define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
228#define VTBLK_LOCK_INIT(_sc, _name) \
229				mtx_init(VTBLK_MTX((_sc)), (_name), \
230				    "VirtIO Block Lock", MTX_DEF)
231#define VTBLK_LOCK(_sc)		mtx_lock(VTBLK_MTX((_sc)))
232#define VTBLK_UNLOCK(_sc)	mtx_unlock(VTBLK_MTX((_sc)))
233#define VTBLK_LOCK_DESTROY(_sc)	mtx_destroy(VTBLK_MTX((_sc)))
234#define VTBLK_LOCK_ASSERT(_sc)	mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
235#define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
236				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
237
238#define VTBLK_DISK_NAME		"vtbd"
239#define VTBLK_QUIESCE_TIMEOUT	(30 * hz)
240#define VTBLK_BSIZE		512
241
242/*
243 * Each block request uses at least two segments - one for the header
244 * and one for the status.
245 */
246#define VTBLK_MIN_SEGMENTS	2
247
248static device_method_t vtblk_methods[] = {
249	/* Device methods. */
250	DEVMETHOD(device_probe,		vtblk_probe),
251	DEVMETHOD(device_attach,	vtblk_attach),
252	DEVMETHOD(device_detach,	vtblk_detach),
253	DEVMETHOD(device_suspend,	vtblk_suspend),
254	DEVMETHOD(device_resume,	vtblk_resume),
255	DEVMETHOD(device_shutdown,	vtblk_shutdown),
256
257	/* VirtIO methods. */
258	DEVMETHOD(virtio_config_change,	vtblk_config_change),
259
260	DEVMETHOD_END
261};
262
263static driver_t vtblk_driver = {
264	"vtblk",
265	vtblk_methods,
266	sizeof(struct vtblk_softc)
267};
268static devclass_t vtblk_devclass;
269
270VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_devclass,
271    vtblk_modevent, 0);
272MODULE_VERSION(virtio_blk, 1);
273MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
274
275VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
276
277static int
278vtblk_modevent(module_t mod, int type, void *unused)
279{
280	int error;
281
282	error = 0;
283
284	switch (type) {
285	case MOD_LOAD:
286	case MOD_QUIESCE:
287	case MOD_UNLOAD:
288	case MOD_SHUTDOWN:
289		break;
290	default:
291		error = EOPNOTSUPP;
292		break;
293	}
294
295	return (error);
296}
297
298static int
299vtblk_probe(device_t dev)
300{
301	return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
302}
303
304static int
305vtblk_attach(device_t dev)
306{
307	struct vtblk_softc *sc;
308	struct virtio_blk_config blkcfg;
309	int error;
310
311	sc = device_get_softc(dev);
312	sc->vtblk_dev = dev;
313	virtio_set_feature_desc(dev, vtblk_feature_desc);
314
315	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
316	bioq_init(&sc->vtblk_bioq);
317	TAILQ_INIT(&sc->vtblk_dump_queue);
318	TAILQ_INIT(&sc->vtblk_req_free);
319	TAILQ_INIT(&sc->vtblk_req_ready);
320
321	vtblk_setup_sysctl(sc);
322
323	error = vtblk_setup_features(sc);
324	if (error) {
325		device_printf(dev, "cannot setup features\n");
326		goto fail;
327	}
328
329	vtblk_read_config(sc, &blkcfg);
330
331	/*
332	 * With the current sglist(9) implementation, it is not easy
333	 * for us to support a maximum segment size as adjacent
334	 * segments are coalesced. For now, just make sure it's larger
335	 * than the maximum supported transfer size.
336	 */
337	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
338		if (blkcfg.size_max < maxphys) {
339			error = ENOTSUP;
340			device_printf(dev, "host requires unsupported "
341			    "maximum segment size feature\n");
342			goto fail;
343		}
344	}
345
346	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
347	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
348		error = EINVAL;
349		device_printf(dev, "fewer than minimum number of segments "
350		    "allowed: %d\n", sc->vtblk_max_nsegs);
351		goto fail;
352	}
353
354	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
355	if (sc->vtblk_sglist == NULL) {
356		error = ENOMEM;
357		device_printf(dev, "cannot allocate sglist\n");
358		goto fail;
359	}
360
361	error = vtblk_alloc_virtqueue(sc);
362	if (error) {
363		device_printf(dev, "cannot allocate virtqueue\n");
364		goto fail;
365	}
366
367	error = vtblk_request_prealloc(sc);
368	if (error) {
369		device_printf(dev, "cannot preallocate requests\n");
370		goto fail;
371	}
372
373	vtblk_alloc_disk(sc, &blkcfg);
374
375	error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
376	if (error) {
377		device_printf(dev, "cannot setup virtqueue interrupt\n");
378		goto fail;
379	}
380
381	vtblk_create_disk(sc);
382
383	virtqueue_enable_intr(sc->vtblk_vq);
384
385fail:
386	if (error)
387		vtblk_detach(dev);
388
389	return (error);
390}
391
392static int
393vtblk_detach(device_t dev)
394{
395	struct vtblk_softc *sc;
396
397	sc = device_get_softc(dev);
398
399	VTBLK_LOCK(sc);
400	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
401	if (device_is_attached(dev))
402		vtblk_stop(sc);
403	VTBLK_UNLOCK(sc);
404
405	vtblk_drain(sc);
406
407	if (sc->vtblk_disk != NULL) {
408		disk_destroy(sc->vtblk_disk);
409		sc->vtblk_disk = NULL;
410	}
411
412	if (sc->vtblk_sglist != NULL) {
413		sglist_free(sc->vtblk_sglist);
414		sc->vtblk_sglist = NULL;
415	}
416
417	VTBLK_LOCK_DESTROY(sc);
418
419	return (0);
420}
421
422static int
423vtblk_suspend(device_t dev)
424{
425	struct vtblk_softc *sc;
426	int error;
427
428	sc = device_get_softc(dev);
429
430	VTBLK_LOCK(sc);
431	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
432	/* XXX BMV: virtio_stop(), etc needed here? */
433	error = vtblk_quiesce(sc);
434	if (error)
435		sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
436	VTBLK_UNLOCK(sc);
437
438	return (error);
439}
440
441static int
442vtblk_resume(device_t dev)
443{
444	struct vtblk_softc *sc;
445
446	sc = device_get_softc(dev);
447
448	VTBLK_LOCK(sc);
449	/* XXX BMV: virtio_reinit(), etc needed here? */
450	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
451	vtblk_startio(sc);
452	VTBLK_UNLOCK(sc);
453
454	return (0);
455}
456
457static int
458vtblk_shutdown(device_t dev)
459{
460
461	return (0);
462}
463
464static int
465vtblk_config_change(device_t dev)
466{
467	struct vtblk_softc *sc;
468	struct virtio_blk_config blkcfg;
469	uint64_t capacity;
470
471	sc = device_get_softc(dev);
472
473	vtblk_read_config(sc, &blkcfg);
474
475	/* Capacity is always in 512-byte units. */
476	capacity = blkcfg.capacity * VTBLK_BSIZE;
477
478	if (sc->vtblk_disk->d_mediasize != capacity)
479		vtblk_resize_disk(sc, capacity);
480
481	return (0);
482}
483
484static int
485vtblk_open(struct disk *dp)
486{
487	struct vtblk_softc *sc;
488
489	if ((sc = dp->d_drv1) == NULL)
490		return (ENXIO);
491
492	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
493}
494
495static int
496vtblk_close(struct disk *dp)
497{
498	struct vtblk_softc *sc;
499
500	if ((sc = dp->d_drv1) == NULL)
501		return (ENXIO);
502
503	return (0);
504}
505
506static int
507vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
508    struct thread *td)
509{
510	struct vtblk_softc *sc;
511
512	if ((sc = dp->d_drv1) == NULL)
513		return (ENXIO);
514
515	return (ENOTTY);
516}
517
518static int
519vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
520    size_t length)
521{
522	struct disk *dp;
523	struct vtblk_softc *sc;
524	int error;
525
526	dp = arg;
527	error = 0;
528
529	if ((sc = dp->d_drv1) == NULL)
530		return (ENXIO);
531
532	VTBLK_LOCK(sc);
533
534	vtblk_dump_quiesce(sc);
535
536	if (length > 0)
537		error = vtblk_dump_write(sc, virtual, offset, length);
538	if (error || (virtual == NULL && offset == 0))
539		vtblk_dump_complete(sc);
540
541	VTBLK_UNLOCK(sc);
542
543	return (error);
544}
545
546static void
547vtblk_strategy(struct bio *bp)
548{
549	struct vtblk_softc *sc;
550
551	if ((sc = bp->bio_disk->d_drv1) == NULL) {
552		vtblk_bio_done(NULL, bp, EINVAL);
553		return;
554	}
555
556	if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
557	    (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
558		vtblk_bio_done(sc, bp, EOPNOTSUPP);
559		return;
560	}
561
562	VTBLK_LOCK(sc);
563
564	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
565		VTBLK_UNLOCK(sc);
566		vtblk_bio_done(sc, bp, ENXIO);
567		return;
568	}
569
570	bioq_insert_tail(&sc->vtblk_bioq, bp);
571	vtblk_startio(sc);
572
573	VTBLK_UNLOCK(sc);
574}
575
576static int
577vtblk_negotiate_features(struct vtblk_softc *sc)
578{
579	device_t dev;
580	uint64_t features;
581
582	dev = sc->vtblk_dev;
583	features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
584	    VTBLK_LEGACY_FEATURES;
585
586	sc->vtblk_features = virtio_negotiate_features(dev, features);
587	return (virtio_finalize_features(dev));
588}
589
590static int
591vtblk_setup_features(struct vtblk_softc *sc)
592{
593	device_t dev;
594	int error;
595
596	dev = sc->vtblk_dev;
597
598	error = vtblk_negotiate_features(sc);
599	if (error)
600		return (error);
601
602	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
603		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
604	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
605		sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
606
607	/* Legacy. */
608	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
609		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
610
611	return (0);
612}
613
614static int
615vtblk_maximum_segments(struct vtblk_softc *sc,
616    struct virtio_blk_config *blkcfg)
617{
618	device_t dev;
619	int nsegs;
620
621	dev = sc->vtblk_dev;
622	nsegs = VTBLK_MIN_SEGMENTS;
623
624	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
625		nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
626		if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
627			nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
628	} else
629		nsegs += 1;
630
631	return (nsegs);
632}
633
634static int
635vtblk_alloc_virtqueue(struct vtblk_softc *sc)
636{
637	device_t dev;
638	struct vq_alloc_info vq_info;
639
640	dev = sc->vtblk_dev;
641
642	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
643	    vtblk_vq_intr, sc, &sc->vtblk_vq,
644	    "%s request", device_get_nameunit(dev));
645
646	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
647}
648
649static void
650vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
651{
652	device_t dev;
653	struct disk *dp;
654	int error;
655
656	dev = sc->vtblk_dev;
657	dp = sc->vtblk_disk;
658
659	dp->d_mediasize = new_capacity;
660	if (bootverbose) {
661		device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
662		    (uintmax_t) dp->d_mediasize >> 20,
663		    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
664		    dp->d_sectorsize);
665	}
666
667	error = disk_resize(dp, M_NOWAIT);
668	if (error) {
669		device_printf(dev,
670		    "disk_resize(9) failed, error: %d\n", error);
671	}
672}
673
674static void
675vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
676{
677	device_t dev;
678	struct disk *dp;
679
680	dev = sc->vtblk_dev;
681
682	sc->vtblk_disk = dp = disk_alloc();
683	dp->d_open = vtblk_open;
684	dp->d_close = vtblk_close;
685	dp->d_ioctl = vtblk_ioctl;
686	dp->d_strategy = vtblk_strategy;
687	dp->d_name = VTBLK_DISK_NAME;
688	dp->d_unit = device_get_unit(dev);
689	dp->d_drv1 = sc;
690	dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
691	dp->d_hba_vendor = virtio_get_vendor(dev);
692	dp->d_hba_device = virtio_get_device(dev);
693	dp->d_hba_subvendor = virtio_get_subvendor(dev);
694	dp->d_hba_subdevice = virtio_get_subdevice(dev);
695
696	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
697		dp->d_flags |= DISKFLAG_WRITE_PROTECT;
698	else {
699		if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
700			dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
701		dp->d_dump = vtblk_dump;
702	}
703
704	/* Capacity is always in 512-byte units. */
705	dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
706
707	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
708		dp->d_sectorsize = blkcfg->blk_size;
709	else
710		dp->d_sectorsize = VTBLK_BSIZE;
711
712	/*
713	 * The VirtIO maximum I/O size is given in terms of segments.
714	 * However, FreeBSD limits I/O size by logical buffer size, not
715	 * by physically contiguous pages. Therefore, we have to assume
716	 * no pages are contiguous. This may impose an artificially low
717	 * maximum I/O size. But in practice, since QEMU advertises 128
718	 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
719	 * which is typically greater than maxphys. Eventually we should
720	 * just advertise maxphys and split buffers that are too big.
721	 *
722	 * Note we must subtract one additional segment in case of non
723	 * page aligned buffers.
724	 */
725	dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
726	    PAGE_SIZE;
727	if (dp->d_maxsize < PAGE_SIZE)
728		dp->d_maxsize = PAGE_SIZE; /* XXX */
729
730	if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
731		dp->d_fwsectors = blkcfg->geometry.sectors;
732		dp->d_fwheads = blkcfg->geometry.heads;
733	}
734
735	if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
736	    blkcfg->topology.physical_block_exp > 0) {
737		dp->d_stripesize = dp->d_sectorsize *
738		    (1 << blkcfg->topology.physical_block_exp);
739		dp->d_stripeoffset = (dp->d_stripesize -
740		    blkcfg->topology.alignment_offset * dp->d_sectorsize) %
741		    dp->d_stripesize;
742	}
743
744	if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
745		dp->d_flags |= DISKFLAG_CANDELETE;
746		dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
747	}
748
749	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
750		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
751	else
752		sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
753}
754
755static void
756vtblk_create_disk(struct vtblk_softc *sc)
757{
758	struct disk *dp;
759
760	dp = sc->vtblk_disk;
761
762	vtblk_ident(sc);
763
764	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
765	    (uintmax_t) dp->d_mediasize >> 20,
766	    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
767	    dp->d_sectorsize);
768
769	disk_create(dp, DISK_VERSION);
770}
771
772static int
773vtblk_request_prealloc(struct vtblk_softc *sc)
774{
775	struct vtblk_request *req;
776	int i, nreqs;
777
778	nreqs = virtqueue_size(sc->vtblk_vq);
779
780	/*
781	 * Preallocate sufficient requests to keep the virtqueue full. Each
782	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
783	 * the number allocated when indirect descriptors are not available.
784	 */
785	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
786		nreqs /= VTBLK_MIN_SEGMENTS;
787
788	for (i = 0; i < nreqs; i++) {
789		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
790		if (req == NULL)
791			return (ENOMEM);
792
793		MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
794		MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
795
796		sc->vtblk_request_count++;
797		vtblk_request_enqueue(sc, req);
798	}
799
800	return (0);
801}
802
803static void
804vtblk_request_free(struct vtblk_softc *sc)
805{
806	struct vtblk_request *req;
807
808	MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
809
810	while ((req = vtblk_request_dequeue(sc)) != NULL) {
811		sc->vtblk_request_count--;
812		free(req, M_DEVBUF);
813	}
814
815	KASSERT(sc->vtblk_request_count == 0,
816	    ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
817}
818
819static struct vtblk_request *
820vtblk_request_dequeue(struct vtblk_softc *sc)
821{
822	struct vtblk_request *req;
823
824	req = TAILQ_FIRST(&sc->vtblk_req_free);
825	if (req != NULL) {
826		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
827		bzero(req, sizeof(struct vtblk_request));
828	}
829
830	return (req);
831}
832
833static void
834vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
835{
836
837	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
838}
839
840static struct vtblk_request *
841vtblk_request_next_ready(struct vtblk_softc *sc)
842{
843	struct vtblk_request *req;
844
845	req = TAILQ_FIRST(&sc->vtblk_req_ready);
846	if (req != NULL)
847		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
848
849	return (req);
850}
851
852static void
853vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
854{
855
856	/* NOTE: Currently, there will be at most one request in the queue. */
857	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
858}
859
860static struct vtblk_request *
861vtblk_request_next(struct vtblk_softc *sc)
862{
863	struct vtblk_request *req;
864
865	req = vtblk_request_next_ready(sc);
866	if (req != NULL)
867		return (req);
868
869	return (vtblk_request_bio(sc));
870}
871
872static struct vtblk_request *
873vtblk_request_bio(struct vtblk_softc *sc)
874{
875	struct bio_queue_head *bioq;
876	struct vtblk_request *req;
877	struct bio *bp;
878
879	bioq = &sc->vtblk_bioq;
880
881	if (bioq_first(bioq) == NULL)
882		return (NULL);
883
884	req = vtblk_request_dequeue(sc);
885	if (req == NULL)
886		return (NULL);
887
888	bp = bioq_takefirst(bioq);
889	req->vbr_bp = bp;
890	req->vbr_ack = -1;
891	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
892
893	switch (bp->bio_cmd) {
894	case BIO_FLUSH:
895		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
896		req->vbr_hdr.sector = 0;
897		break;
898	case BIO_READ:
899		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
900		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
901		break;
902	case BIO_WRITE:
903		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
904		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
905		break;
906	case BIO_DELETE:
907		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
908		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
909		break;
910	default:
911		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
912	}
913
914	if (bp->bio_flags & BIO_ORDERED)
915		req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
916
917	return (req);
918}
919
920static int
921vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
922{
923	struct virtqueue *vq;
924	struct sglist *sg;
925	struct bio *bp;
926	int ordered, readable, writable, error;
927
928	vq = sc->vtblk_vq;
929	sg = sc->vtblk_sglist;
930	bp = req->vbr_bp;
931	ordered = 0;
932	writable = 0;
933
934	/*
935	 * Some hosts (such as bhyve) do not implement the barrier feature,
936	 * so we emulate it in the driver by allowing the barrier request
937	 * to be the only one in flight.
938	 */
939	if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
940		if (sc->vtblk_req_ordered != NULL)
941			return (EBUSY);
942		if (bp->bio_flags & BIO_ORDERED) {
943			if (!virtqueue_empty(vq))
944				return (EBUSY);
945			ordered = 1;
946			req->vbr_hdr.type &= vtblk_gtoh32(sc,
947				~VIRTIO_BLK_T_BARRIER);
948		}
949	}
950
951	sglist_reset(sg);
952	sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
953
954	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
955		error = sglist_append_bio(sg, bp);
956		if (error || sg->sg_nseg == sg->sg_maxseg) {
957			panic("%s: bio %p data buffer too big %d",
958			    __func__, bp, error);
959		}
960
961		/* BIO_READ means the host writes into our buffer. */
962		if (bp->bio_cmd == BIO_READ)
963			writable = sg->sg_nseg - 1;
964	} else if (bp->bio_cmd == BIO_DELETE) {
965		struct virtio_blk_discard_write_zeroes *discard;
966
967		discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
968		if (discard == NULL)
969			return (ENOMEM);
970
971		bp->bio_driver1 = discard;
972		discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
973		discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
974		error = sglist_append(sg, discard, sizeof(*discard));
975		if (error || sg->sg_nseg == sg->sg_maxseg) {
976			panic("%s: bio %p data buffer too big %d",
977			    __func__, bp, error);
978		}
979	}
980
981	writable++;
982	sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
983	readable = sg->sg_nseg - writable;
984
985	error = virtqueue_enqueue(vq, req, sg, readable, writable);
986	if (error == 0 && ordered)
987		sc->vtblk_req_ordered = req;
988
989	return (error);
990}
991
992static int
993vtblk_request_error(struct vtblk_request *req)
994{
995	int error;
996
997	switch (req->vbr_ack) {
998	case VIRTIO_BLK_S_OK:
999		error = 0;
1000		break;
1001	case VIRTIO_BLK_S_UNSUPP:
1002		error = ENOTSUP;
1003		break;
1004	default:
1005		error = EIO;
1006		break;
1007	}
1008
1009	return (error);
1010}
1011
1012static void
1013vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1014{
1015	struct vtblk_request *req;
1016	struct bio *bp;
1017
1018	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
1019		if (sc->vtblk_req_ordered != NULL) {
1020			MPASS(sc->vtblk_req_ordered == req);
1021			sc->vtblk_req_ordered = NULL;
1022		}
1023
1024		bp = req->vbr_bp;
1025		bp->bio_error = vtblk_request_error(req);
1026		TAILQ_INSERT_TAIL(queue, bp, bio_queue);
1027
1028		vtblk_request_enqueue(sc, req);
1029	}
1030}
1031
1032static void
1033vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1034{
1035	struct bio *bp, *tmp;
1036
1037	TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
1038		if (bp->bio_error != 0)
1039			disk_err(bp, "hard error", -1, 1);
1040		vtblk_bio_done(sc, bp, bp->bio_error);
1041	}
1042}
1043
1044static void
1045vtblk_drain_vq(struct vtblk_softc *sc)
1046{
1047	struct virtqueue *vq;
1048	struct vtblk_request *req;
1049	int last;
1050
1051	vq = sc->vtblk_vq;
1052	last = 0;
1053
1054	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1055		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1056		vtblk_request_enqueue(sc, req);
1057	}
1058
1059	sc->vtblk_req_ordered = NULL;
1060	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1061}
1062
1063static void
1064vtblk_drain(struct vtblk_softc *sc)
1065{
1066	struct bio_queue_head *bioq;
1067	struct vtblk_request *req;
1068	struct bio *bp;
1069
1070	bioq = &sc->vtblk_bioq;
1071
1072	if (sc->vtblk_vq != NULL) {
1073		struct bio_queue queue;
1074
1075		TAILQ_INIT(&queue);
1076		vtblk_queue_completed(sc, &queue);
1077		vtblk_done_completed(sc, &queue);
1078
1079		vtblk_drain_vq(sc);
1080	}
1081
1082	while ((req = vtblk_request_next_ready(sc)) != NULL) {
1083		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1084		vtblk_request_enqueue(sc, req);
1085	}
1086
1087	while (bioq_first(bioq) != NULL) {
1088		bp = bioq_takefirst(bioq);
1089		vtblk_bio_done(sc, bp, ENXIO);
1090	}
1091
1092	vtblk_request_free(sc);
1093}
1094
1095static void
1096vtblk_startio(struct vtblk_softc *sc)
1097{
1098	struct virtqueue *vq;
1099	struct vtblk_request *req;
1100	int enq;
1101
1102	VTBLK_LOCK_ASSERT(sc);
1103	vq = sc->vtblk_vq;
1104	enq = 0;
1105
1106	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1107		return;
1108
1109	while (!virtqueue_full(vq)) {
1110		req = vtblk_request_next(sc);
1111		if (req == NULL)
1112			break;
1113
1114		if (vtblk_request_execute(sc, req) != 0) {
1115			vtblk_request_requeue_ready(sc, req);
1116			break;
1117		}
1118
1119		enq++;
1120	}
1121
1122	if (enq > 0)
1123		virtqueue_notify(vq);
1124}
1125
1126static void
1127vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1128{
1129
1130	/* Because of GEOM direct dispatch, we cannot hold any locks. */
1131	if (sc != NULL)
1132		VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1133
1134	if (error) {
1135		bp->bio_resid = bp->bio_bcount;
1136		bp->bio_error = error;
1137		bp->bio_flags |= BIO_ERROR;
1138	}
1139
1140	if (bp->bio_driver1 != NULL) {
1141		free(bp->bio_driver1, M_DEVBUF);
1142		bp->bio_driver1 = NULL;
1143	}
1144
1145	biodone(bp);
1146}
1147
1148#define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)			\
1149	if (virtio_with_feature(_dev, _feature)) {			\
1150		virtio_read_device_config(_dev,				\
1151		    offsetof(struct virtio_blk_config, _field),		\
1152		    &(_cfg)->_field, sizeof((_cfg)->_field));		\
1153	}
1154
1155static void
1156vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1157{
1158	device_t dev;
1159
1160	dev = sc->vtblk_dev;
1161
1162	bzero(blkcfg, sizeof(struct virtio_blk_config));
1163
1164	/* The capacity is always available. */
1165	virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1166	    capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1167
1168	/* Read the configuration if the feature was negotiated. */
1169	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1170	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1171	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1172	    geometry.cylinders, blkcfg);
1173	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1174	    geometry.heads, blkcfg);
1175	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1176	    geometry.sectors, blkcfg);
1177	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1178	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1179	    topology.physical_block_exp, blkcfg);
1180	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1181	    topology.alignment_offset, blkcfg);
1182	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1183	    topology.min_io_size, blkcfg);
1184	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1185	    topology.opt_io_size, blkcfg);
1186	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
1187	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
1188	    blkcfg);
1189	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
1190	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
1191	    blkcfg);
1192}
1193
1194#undef VTBLK_GET_CONFIG
1195
1196static void
1197vtblk_ident(struct vtblk_softc *sc)
1198{
1199	struct bio buf;
1200	struct disk *dp;
1201	struct vtblk_request *req;
1202	int len, error;
1203
1204	dp = sc->vtblk_disk;
1205	len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1206
1207	if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1208		return;
1209
1210	req = vtblk_request_dequeue(sc);
1211	if (req == NULL)
1212		return;
1213
1214	req->vbr_ack = -1;
1215	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
1216	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1217	req->vbr_hdr.sector = 0;
1218
1219	req->vbr_bp = &buf;
1220	g_reset_bio(&buf);
1221
1222	buf.bio_cmd = BIO_READ;
1223	buf.bio_data = dp->d_ident;
1224	buf.bio_bcount = len;
1225
1226	VTBLK_LOCK(sc);
1227	error = vtblk_poll_request(sc, req);
1228	VTBLK_UNLOCK(sc);
1229
1230	vtblk_request_enqueue(sc, req);
1231
1232	if (error) {
1233		device_printf(sc->vtblk_dev,
1234		    "error getting device identifier: %d\n", error);
1235	}
1236}
1237
1238static int
1239vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1240{
1241	struct virtqueue *vq;
1242	int error;
1243
1244	vq = sc->vtblk_vq;
1245
1246	if (!virtqueue_empty(vq))
1247		return (EBUSY);
1248
1249	error = vtblk_request_execute(sc, req);
1250	if (error)
1251		return (error);
1252
1253	virtqueue_notify(vq);
1254	virtqueue_poll(vq, NULL);
1255
1256	error = vtblk_request_error(req);
1257	if (error && bootverbose) {
1258		device_printf(sc->vtblk_dev,
1259		    "%s: IO error: %d\n", __func__, error);
1260	}
1261
1262	return (error);
1263}
1264
1265static int
1266vtblk_quiesce(struct vtblk_softc *sc)
1267{
1268	int error;
1269
1270	VTBLK_LOCK_ASSERT(sc);
1271	error = 0;
1272
1273	while (!virtqueue_empty(sc->vtblk_vq)) {
1274		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1275		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1276			error = EBUSY;
1277			break;
1278		}
1279	}
1280
1281	return (error);
1282}
1283
1284static void
1285vtblk_vq_intr(void *xsc)
1286{
1287	struct vtblk_softc *sc;
1288	struct virtqueue *vq;
1289	struct bio_queue queue;
1290
1291	sc = xsc;
1292	vq = sc->vtblk_vq;
1293	TAILQ_INIT(&queue);
1294
1295	VTBLK_LOCK(sc);
1296
1297again:
1298	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1299		goto out;
1300
1301	vtblk_queue_completed(sc, &queue);
1302	vtblk_startio(sc);
1303
1304	if (virtqueue_enable_intr(vq) != 0) {
1305		virtqueue_disable_intr(vq);
1306		goto again;
1307	}
1308
1309	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1310		wakeup(&sc->vtblk_vq);
1311
1312out:
1313	VTBLK_UNLOCK(sc);
1314	vtblk_done_completed(sc, &queue);
1315}
1316
1317static void
1318vtblk_stop(struct vtblk_softc *sc)
1319{
1320
1321	virtqueue_disable_intr(sc->vtblk_vq);
1322	virtio_stop(sc->vtblk_dev);
1323}
1324
1325static void
1326vtblk_dump_quiesce(struct vtblk_softc *sc)
1327{
1328
1329	/*
1330	 * Spin here until all the requests in-flight at the time of the
1331	 * dump are completed and queued. The queued requests will be
1332	 * biodone'd once the dump is finished.
1333	 */
1334	while (!virtqueue_empty(sc->vtblk_vq))
1335		vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1336}
1337
1338static int
1339vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1340    size_t length)
1341{
1342	struct bio buf;
1343	struct vtblk_request *req;
1344
1345	req = &sc->vtblk_dump_request;
1346	req->vbr_ack = -1;
1347	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1348	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1349	req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
1350
1351	req->vbr_bp = &buf;
1352	g_reset_bio(&buf);
1353
1354	buf.bio_cmd = BIO_WRITE;
1355	buf.bio_data = virtual;
1356	buf.bio_bcount = length;
1357
1358	return (vtblk_poll_request(sc, req));
1359}
1360
1361static int
1362vtblk_dump_flush(struct vtblk_softc *sc)
1363{
1364	struct bio buf;
1365	struct vtblk_request *req;
1366
1367	req = &sc->vtblk_dump_request;
1368	req->vbr_ack = -1;
1369	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1370	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1371	req->vbr_hdr.sector = 0;
1372
1373	req->vbr_bp = &buf;
1374	g_reset_bio(&buf);
1375
1376	buf.bio_cmd = BIO_FLUSH;
1377
1378	return (vtblk_poll_request(sc, req));
1379}
1380
1381static void
1382vtblk_dump_complete(struct vtblk_softc *sc)
1383{
1384
1385	vtblk_dump_flush(sc);
1386
1387	VTBLK_UNLOCK(sc);
1388	vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1389	VTBLK_LOCK(sc);
1390}
1391
1392static void
1393vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1394{
1395
1396	/* Set either writeback (1) or writethrough (0) mode. */
1397	virtio_write_dev_config_1(sc->vtblk_dev,
1398	    offsetof(struct virtio_blk_config, wce), wc);
1399}
1400
1401static int
1402vtblk_write_cache_enabled(struct vtblk_softc *sc,
1403    struct virtio_blk_config *blkcfg)
1404{
1405	int wc;
1406
1407	if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
1408		wc = vtblk_tunable_int(sc, "writecache_mode",
1409		    vtblk_writecache_mode);
1410		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1411			vtblk_set_write_cache(sc, wc);
1412		else
1413			wc = blkcfg->wce;
1414	} else
1415		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
1416
1417	return (wc);
1418}
1419
1420static int
1421vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1422{
1423	struct vtblk_softc *sc;
1424	int wc, error;
1425
1426	sc = oidp->oid_arg1;
1427	wc = sc->vtblk_write_cache;
1428
1429	error = sysctl_handle_int(oidp, &wc, 0, req);
1430	if (error || req->newptr == NULL)
1431		return (error);
1432	if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
1433		return (EPERM);
1434	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1435		return (EINVAL);
1436
1437	VTBLK_LOCK(sc);
1438	sc->vtblk_write_cache = wc;
1439	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1440	VTBLK_UNLOCK(sc);
1441
1442	return (0);
1443}
1444
1445static void
1446vtblk_setup_sysctl(struct vtblk_softc *sc)
1447{
1448	device_t dev;
1449	struct sysctl_ctx_list *ctx;
1450	struct sysctl_oid *tree;
1451	struct sysctl_oid_list *child;
1452
1453	dev = sc->vtblk_dev;
1454	ctx = device_get_sysctl_ctx(dev);
1455	tree = device_get_sysctl_tree(dev);
1456	child = SYSCTL_CHILDREN(tree);
1457
1458	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1459	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1460	    vtblk_write_cache_sysctl, "I",
1461	    "Write cache mode (writethrough (0) or writeback (1))");
1462}
1463
1464static int
1465vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1466{
1467	char path[64];
1468
1469	snprintf(path, sizeof(path),
1470	    "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1471	TUNABLE_INT_FETCH(path, &def);
1472
1473	return (def);
1474}
1475