virtio_blk.c revision 276487
1/*-
2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/* Driver for VirtIO block devices. */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/virtio/block/virtio_blk.c 276487 2015-01-01 01:43:00Z bryanv $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/bio.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/sglist.h>
39#include <sys/sysctl.h>
40#include <sys/lock.h>
41#include <sys/mutex.h>
42#include <sys/queue.h>
43
44#include <geom/geom_disk.h>
45
46#include <machine/bus.h>
47#include <machine/resource.h>
48#include <sys/bus.h>
49#include <sys/rman.h>
50
51#include <dev/virtio/virtio.h>
52#include <dev/virtio/virtqueue.h>
53#include <dev/virtio/block/virtio_blk.h>
54
55#include "virtio_if.h"
56
57struct vtblk_request {
58	struct virtio_blk_outhdr	 vbr_hdr;
59	struct bio			*vbr_bp;
60	uint8_t				 vbr_ack;
61	TAILQ_ENTRY(vtblk_request)	 vbr_link;
62};
63
64enum vtblk_cache_mode {
65	VTBLK_CACHE_WRITETHROUGH,
66	VTBLK_CACHE_WRITEBACK,
67	VTBLK_CACHE_MAX
68};
69
70struct vtblk_softc {
71	device_t		 vtblk_dev;
72	struct mtx		 vtblk_mtx;
73	uint64_t		 vtblk_features;
74	uint32_t		 vtblk_flags;
75#define VTBLK_FLAG_INDIRECT	0x0001
76#define VTBLK_FLAG_READONLY	0x0002
77#define VTBLK_FLAG_DETACH	0x0004
78#define VTBLK_FLAG_SUSPEND	0x0008
79#define VTBLK_FLAG_DUMPING	0x0010
80#define VTBLK_FLAG_BARRIER	0x0020
81#define VTBLK_FLAG_WC_CONFIG	0x0040
82
83	struct virtqueue	*vtblk_vq;
84	struct sglist		*vtblk_sglist;
85	struct disk		*vtblk_disk;
86
87	struct bio_queue_head	 vtblk_bioq;
88	TAILQ_HEAD(, vtblk_request)
89				 vtblk_req_free;
90	TAILQ_HEAD(, vtblk_request)
91				 vtblk_req_ready;
92	struct vtblk_request	*vtblk_req_ordered;
93
94	int			 vtblk_max_nsegs;
95	int			 vtblk_request_count;
96	enum vtblk_cache_mode	 vtblk_write_cache;
97
98	struct vtblk_request	 vtblk_dump_request;
99};
100
101static struct virtio_feature_desc vtblk_feature_desc[] = {
102	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
103	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
104	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
105	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
106	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
107	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
108	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
109	{ VIRTIO_BLK_F_WCE,		"WriteCache"	},
110	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
111	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
112
113	{ 0, NULL }
114};
115
116static int	vtblk_modevent(module_t, int, void *);
117
118static int	vtblk_probe(device_t);
119static int	vtblk_attach(device_t);
120static int	vtblk_detach(device_t);
121static int	vtblk_suspend(device_t);
122static int	vtblk_resume(device_t);
123static int	vtblk_shutdown(device_t);
124static int	vtblk_config_change(device_t);
125
126static int	vtblk_open(struct disk *);
127static int	vtblk_close(struct disk *);
128static int	vtblk_ioctl(struct disk *, u_long, void *, int,
129		    struct thread *);
130static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
131static void	vtblk_strategy(struct bio *);
132
133static void	vtblk_negotiate_features(struct vtblk_softc *);
134static void	vtblk_setup_features(struct vtblk_softc *);
135static int	vtblk_maximum_segments(struct vtblk_softc *,
136		    struct virtio_blk_config *);
137static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
138static void	vtblk_resize_disk(struct vtblk_softc *, uint64_t);
139static void	vtblk_alloc_disk(struct vtblk_softc *,
140		    struct virtio_blk_config *);
141static void	vtblk_create_disk(struct vtblk_softc *);
142
143static int	vtblk_request_prealloc(struct vtblk_softc *);
144static void	vtblk_request_free(struct vtblk_softc *);
145static struct vtblk_request *
146		vtblk_request_dequeue(struct vtblk_softc *);
147static void	vtblk_request_enqueue(struct vtblk_softc *,
148		    struct vtblk_request *);
149static struct vtblk_request *
150		vtblk_request_next_ready(struct vtblk_softc *);
151static void	vtblk_request_requeue_ready(struct vtblk_softc *,
152		    struct vtblk_request *);
153static struct vtblk_request *
154		vtblk_request_next(struct vtblk_softc *);
155static struct vtblk_request *
156		vtblk_request_bio(struct vtblk_softc *);
157static int	vtblk_request_execute(struct vtblk_softc *,
158		    struct vtblk_request *);
159static int	vtblk_request_error(struct vtblk_request *);
160
161static void	vtblk_queue_completed(struct vtblk_softc *,
162		    struct bio_queue *);
163static void	vtblk_done_completed(struct vtblk_softc *,
164		    struct bio_queue *);
165static void	vtblk_drain_vq(struct vtblk_softc *, int);
166static void	vtblk_drain(struct vtblk_softc *);
167
168static void	vtblk_startio(struct vtblk_softc *);
169static void	vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
170
171static void	vtblk_read_config(struct vtblk_softc *,
172		    struct virtio_blk_config *);
173static void	vtblk_ident(struct vtblk_softc *);
174static int	vtblk_poll_request(struct vtblk_softc *,
175		    struct vtblk_request *);
176static int	vtblk_quiesce(struct vtblk_softc *);
177static void	vtblk_vq_intr(void *);
178static void	vtblk_stop(struct vtblk_softc *);
179
180static void	vtblk_dump_prepare(struct vtblk_softc *);
181static int	vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
182static int	vtblk_dump_flush(struct vtblk_softc *);
183
184static void	vtblk_set_write_cache(struct vtblk_softc *, int);
185static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
186		    struct virtio_blk_config *);
187static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
188
189static void	vtblk_setup_sysctl(struct vtblk_softc *);
190static int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
191
192/* Tunables. */
193static int vtblk_no_ident = 0;
194TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
195static int vtblk_writecache_mode = -1;
196TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
197
198/* Features desired/implemented by this driver. */
199#define VTBLK_FEATURES \
200    (VIRTIO_BLK_F_BARRIER		| \
201     VIRTIO_BLK_F_SIZE_MAX		| \
202     VIRTIO_BLK_F_SEG_MAX		| \
203     VIRTIO_BLK_F_GEOMETRY		| \
204     VIRTIO_BLK_F_RO			| \
205     VIRTIO_BLK_F_BLK_SIZE		| \
206     VIRTIO_BLK_F_WCE			| \
207     VIRTIO_BLK_F_CONFIG_WCE		| \
208     VIRTIO_RING_F_INDIRECT_DESC)
209
210#define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
211#define VTBLK_LOCK_INIT(_sc, _name) \
212				mtx_init(VTBLK_MTX((_sc)), (_name), \
213				    "VirtIO Block Lock", MTX_DEF)
214#define VTBLK_LOCK(_sc)		mtx_lock(VTBLK_MTX((_sc)))
215#define VTBLK_UNLOCK(_sc)	mtx_unlock(VTBLK_MTX((_sc)))
216#define VTBLK_LOCK_DESTROY(_sc)	mtx_destroy(VTBLK_MTX((_sc)))
217#define VTBLK_LOCK_ASSERT(_sc)	mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
218#define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
219				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
220
221#define VTBLK_DISK_NAME		"vtbd"
222#define VTBLK_QUIESCE_TIMEOUT	(30 * hz)
223
224/*
225 * Each block request uses at least two segments - one for the header
226 * and one for the status.
227 */
228#define VTBLK_MIN_SEGMENTS	2
229
230static device_method_t vtblk_methods[] = {
231	/* Device methods. */
232	DEVMETHOD(device_probe,		vtblk_probe),
233	DEVMETHOD(device_attach,	vtblk_attach),
234	DEVMETHOD(device_detach,	vtblk_detach),
235	DEVMETHOD(device_suspend,	vtblk_suspend),
236	DEVMETHOD(device_resume,	vtblk_resume),
237	DEVMETHOD(device_shutdown,	vtblk_shutdown),
238
239	/* VirtIO methods. */
240	DEVMETHOD(virtio_config_change,	vtblk_config_change),
241
242	DEVMETHOD_END
243};
244
245static driver_t vtblk_driver = {
246	"vtblk",
247	vtblk_methods,
248	sizeof(struct vtblk_softc)
249};
250static devclass_t vtblk_devclass;
251
252DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
253    vtblk_modevent, 0);
254MODULE_VERSION(virtio_blk, 1);
255MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
256
257static int
258vtblk_modevent(module_t mod, int type, void *unused)
259{
260	int error;
261
262	error = 0;
263
264	switch (type) {
265	case MOD_LOAD:
266	case MOD_QUIESCE:
267	case MOD_UNLOAD:
268	case MOD_SHUTDOWN:
269		break;
270	default:
271		error = EOPNOTSUPP;
272		break;
273	}
274
275	return (error);
276}
277
278static int
279vtblk_probe(device_t dev)
280{
281
282	if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
283		return (ENXIO);
284
285	device_set_desc(dev, "VirtIO Block Adapter");
286
287	return (BUS_PROBE_DEFAULT);
288}
289
290static int
291vtblk_attach(device_t dev)
292{
293	struct vtblk_softc *sc;
294	struct virtio_blk_config blkcfg;
295	int error;
296
297	virtio_set_feature_desc(dev, vtblk_feature_desc);
298
299	sc = device_get_softc(dev);
300	sc->vtblk_dev = dev;
301	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
302	bioq_init(&sc->vtblk_bioq);
303	TAILQ_INIT(&sc->vtblk_req_free);
304	TAILQ_INIT(&sc->vtblk_req_ready);
305
306	vtblk_setup_sysctl(sc);
307	vtblk_setup_features(sc);
308
309	vtblk_read_config(sc, &blkcfg);
310
311	/*
312	 * With the current sglist(9) implementation, it is not easy
313	 * for us to support a maximum segment size as adjacent
314	 * segments are coalesced. For now, just make sure it's larger
315	 * than the maximum supported transfer size.
316	 */
317	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
318		if (blkcfg.size_max < MAXPHYS) {
319			error = ENOTSUP;
320			device_printf(dev, "host requires unsupported "
321			    "maximum segment size feature\n");
322			goto fail;
323		}
324	}
325
326	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
327	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
328		error = EINVAL;
329		device_printf(dev, "fewer than minimum number of segments "
330		    "allowed: %d\n", sc->vtblk_max_nsegs);
331		goto fail;
332	}
333
334	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
335	if (sc->vtblk_sglist == NULL) {
336		error = ENOMEM;
337		device_printf(dev, "cannot allocate sglist\n");
338		goto fail;
339	}
340
341	error = vtblk_alloc_virtqueue(sc);
342	if (error) {
343		device_printf(dev, "cannot allocate virtqueue\n");
344		goto fail;
345	}
346
347	error = vtblk_request_prealloc(sc);
348	if (error) {
349		device_printf(dev, "cannot preallocate requests\n");
350		goto fail;
351	}
352
353	vtblk_alloc_disk(sc, &blkcfg);
354
355	error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
356	if (error) {
357		device_printf(dev, "cannot setup virtqueue interrupt\n");
358		goto fail;
359	}
360
361	vtblk_create_disk(sc);
362
363	virtqueue_enable_intr(sc->vtblk_vq);
364
365fail:
366	if (error)
367		vtblk_detach(dev);
368
369	return (error);
370}
371
372static int
373vtblk_detach(device_t dev)
374{
375	struct vtblk_softc *sc;
376
377	sc = device_get_softc(dev);
378
379	VTBLK_LOCK(sc);
380	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
381	if (device_is_attached(dev))
382		vtblk_stop(sc);
383	VTBLK_UNLOCK(sc);
384
385	vtblk_drain(sc);
386
387	if (sc->vtblk_disk != NULL) {
388		disk_destroy(sc->vtblk_disk);
389		sc->vtblk_disk = NULL;
390	}
391
392	if (sc->vtblk_sglist != NULL) {
393		sglist_free(sc->vtblk_sglist);
394		sc->vtblk_sglist = NULL;
395	}
396
397	VTBLK_LOCK_DESTROY(sc);
398
399	return (0);
400}
401
402static int
403vtblk_suspend(device_t dev)
404{
405	struct vtblk_softc *sc;
406	int error;
407
408	sc = device_get_softc(dev);
409
410	VTBLK_LOCK(sc);
411	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
412	/* XXX BMV: virtio_stop(), etc needed here? */
413	error = vtblk_quiesce(sc);
414	if (error)
415		sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
416	VTBLK_UNLOCK(sc);
417
418	return (error);
419}
420
421static int
422vtblk_resume(device_t dev)
423{
424	struct vtblk_softc *sc;
425
426	sc = device_get_softc(dev);
427
428	VTBLK_LOCK(sc);
429	/* XXX BMV: virtio_reinit(), etc needed here? */
430	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
431	vtblk_startio(sc);
432	VTBLK_UNLOCK(sc);
433
434	return (0);
435}
436
437static int
438vtblk_shutdown(device_t dev)
439{
440
441	return (0);
442}
443
444static int
445vtblk_config_change(device_t dev)
446{
447	struct vtblk_softc *sc;
448	struct virtio_blk_config blkcfg;
449	uint64_t capacity;
450
451	sc = device_get_softc(dev);
452
453	vtblk_read_config(sc, &blkcfg);
454
455	/* Capacity is always in 512-byte units. */
456	capacity = blkcfg.capacity * 512;
457
458	if (sc->vtblk_disk->d_mediasize != capacity)
459		vtblk_resize_disk(sc, capacity);
460
461	return (0);
462}
463
464static int
465vtblk_open(struct disk *dp)
466{
467	struct vtblk_softc *sc;
468
469	if ((sc = dp->d_drv1) == NULL)
470		return (ENXIO);
471
472	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
473}
474
475static int
476vtblk_close(struct disk *dp)
477{
478	struct vtblk_softc *sc;
479
480	if ((sc = dp->d_drv1) == NULL)
481		return (ENXIO);
482
483	return (0);
484}
485
486static int
487vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
488    struct thread *td)
489{
490	struct vtblk_softc *sc;
491
492	if ((sc = dp->d_drv1) == NULL)
493		return (ENXIO);
494
495	return (ENOTTY);
496}
497
498static int
499vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
500    size_t length)
501{
502	struct disk *dp;
503	struct vtblk_softc *sc;
504	int error;
505
506	dp = arg;
507
508	if ((sc = dp->d_drv1) == NULL)
509		return (ENXIO);
510
511	VTBLK_LOCK(sc);
512
513	if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
514		vtblk_dump_prepare(sc);
515		sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
516	}
517
518	if (length > 0)
519		error = vtblk_dump_write(sc, virtual, offset, length);
520	else if (virtual == NULL && offset == 0)
521		error = vtblk_dump_flush(sc);
522	else {
523		error = EINVAL;
524		sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING;
525	}
526
527	VTBLK_UNLOCK(sc);
528
529	return (error);
530}
531
532static void
533vtblk_strategy(struct bio *bp)
534{
535	struct vtblk_softc *sc;
536
537	if ((sc = bp->bio_disk->d_drv1) == NULL) {
538		vtblk_bio_done(NULL, bp, EINVAL);
539		return;
540	}
541
542	/*
543	 * Fail any write if RO. Unfortunately, there does not seem to
544	 * be a better way to report our readonly'ness to GEOM above.
545	 */
546	if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
547	    (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
548		vtblk_bio_done(sc, bp, EROFS);
549		return;
550	}
551
552	VTBLK_LOCK(sc);
553
554	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
555		VTBLK_UNLOCK(sc);
556		vtblk_bio_done(sc, bp, ENXIO);
557		return;
558	}
559
560	bioq_insert_tail(&sc->vtblk_bioq, bp);
561	vtblk_startio(sc);
562
563	VTBLK_UNLOCK(sc);
564}
565
566static void
567vtblk_negotiate_features(struct vtblk_softc *sc)
568{
569	device_t dev;
570	uint64_t features;
571
572	dev = sc->vtblk_dev;
573	features = VTBLK_FEATURES;
574
575	sc->vtblk_features = virtio_negotiate_features(dev, features);
576}
577
578static void
579vtblk_setup_features(struct vtblk_softc *sc)
580{
581	device_t dev;
582
583	dev = sc->vtblk_dev;
584
585	vtblk_negotiate_features(sc);
586
587	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
588		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
589	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
590		sc->vtblk_flags |= VTBLK_FLAG_READONLY;
591	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
592		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
593	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
594		sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
595}
596
597static int
598vtblk_maximum_segments(struct vtblk_softc *sc,
599    struct virtio_blk_config *blkcfg)
600{
601	device_t dev;
602	int nsegs;
603
604	dev = sc->vtblk_dev;
605	nsegs = VTBLK_MIN_SEGMENTS;
606
607	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
608		nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
609		if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
610			nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
611	} else
612		nsegs += 1;
613
614	return (nsegs);
615}
616
617static int
618vtblk_alloc_virtqueue(struct vtblk_softc *sc)
619{
620	device_t dev;
621	struct vq_alloc_info vq_info;
622
623	dev = sc->vtblk_dev;
624
625	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
626	    vtblk_vq_intr, sc, &sc->vtblk_vq,
627	    "%s request", device_get_nameunit(dev));
628
629	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
630}
631
632static void
633vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
634{
635	device_t dev;
636	struct disk *dp;
637	int error;
638
639	dev = sc->vtblk_dev;
640	dp = sc->vtblk_disk;
641
642	dp->d_mediasize = new_capacity;
643	if (bootverbose) {
644		device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
645		    (uintmax_t) dp->d_mediasize >> 20,
646		    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
647		    dp->d_sectorsize);
648	}
649
650	error = disk_resize(dp, M_NOWAIT);
651	if (error) {
652		device_printf(dev,
653		    "disk_resize(9) failed, error: %d\n", error);
654	}
655}
656
657static void
658vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
659{
660	device_t dev;
661	struct disk *dp;
662
663	dev = sc->vtblk_dev;
664
665	sc->vtblk_disk = dp = disk_alloc();
666	dp->d_open = vtblk_open;
667	dp->d_close = vtblk_close;
668	dp->d_ioctl = vtblk_ioctl;
669	dp->d_strategy = vtblk_strategy;
670	dp->d_name = VTBLK_DISK_NAME;
671	dp->d_unit = device_get_unit(dev);
672	dp->d_drv1 = sc;
673	dp->d_flags = DISKFLAG_CANFLUSHCACHE | DISKFLAG_UNMAPPED_BIO |
674	    DISKFLAG_DIRECT_COMPLETION;
675	dp->d_hba_vendor = virtio_get_vendor(dev);
676	dp->d_hba_device = virtio_get_device(dev);
677	dp->d_hba_subvendor = virtio_get_subvendor(dev);
678	dp->d_hba_subdevice = virtio_get_subdevice(dev);
679
680	if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0)
681		dp->d_dump = vtblk_dump;
682
683	/* Capacity is always in 512-byte units. */
684	dp->d_mediasize = blkcfg->capacity * 512;
685
686	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
687		dp->d_sectorsize = blkcfg->blk_size;
688	else
689		dp->d_sectorsize = 512;
690
691	/*
692	 * The VirtIO maximum I/O size is given in terms of segments.
693	 * However, FreeBSD limits I/O size by logical buffer size, not
694	 * by physically contiguous pages. Therefore, we have to assume
695	 * no pages are contiguous. This may impose an artificially low
696	 * maximum I/O size. But in practice, since QEMU advertises 128
697	 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
698	 * which is typically greater than MAXPHYS. Eventually we should
699	 * just advertise MAXPHYS and split buffers that are too big.
700	 *
701	 * Note we must subtract one additional segment in case of non
702	 * page aligned buffers.
703	 */
704	dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
705	    PAGE_SIZE;
706	if (dp->d_maxsize < PAGE_SIZE)
707		dp->d_maxsize = PAGE_SIZE; /* XXX */
708
709	if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
710		dp->d_fwsectors = blkcfg->geometry.sectors;
711		dp->d_fwheads = blkcfg->geometry.heads;
712	}
713
714	if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY)) {
715		dp->d_stripesize = dp->d_sectorsize *
716		    (1 << blkcfg->topology.physical_block_exp);
717		dp->d_stripeoffset = (dp->d_stripesize -
718		    blkcfg->topology.alignment_offset * dp->d_sectorsize) %
719		    dp->d_stripesize;
720	}
721
722	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
723		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
724	else
725		sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
726}
727
728static void
729vtblk_create_disk(struct vtblk_softc *sc)
730{
731	struct disk *dp;
732
733	dp = sc->vtblk_disk;
734
735	vtblk_ident(sc);
736
737	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
738	    (uintmax_t) dp->d_mediasize >> 20,
739	    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
740	    dp->d_sectorsize);
741
742	disk_create(dp, DISK_VERSION);
743}
744
745static int
746vtblk_request_prealloc(struct vtblk_softc *sc)
747{
748	struct vtblk_request *req;
749	int i, nreqs;
750
751	nreqs = virtqueue_size(sc->vtblk_vq);
752
753	/*
754	 * Preallocate sufficient requests to keep the virtqueue full. Each
755	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
756	 * the number allocated when indirect descriptors are not available.
757	 */
758	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
759		nreqs /= VTBLK_MIN_SEGMENTS;
760
761	for (i = 0; i < nreqs; i++) {
762		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
763		if (req == NULL)
764			return (ENOMEM);
765
766		MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
767		MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
768
769		sc->vtblk_request_count++;
770		vtblk_request_enqueue(sc, req);
771	}
772
773	return (0);
774}
775
776static void
777vtblk_request_free(struct vtblk_softc *sc)
778{
779	struct vtblk_request *req;
780
781	MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
782
783	while ((req = vtblk_request_dequeue(sc)) != NULL) {
784		sc->vtblk_request_count--;
785		free(req, M_DEVBUF);
786	}
787
788	KASSERT(sc->vtblk_request_count == 0,
789	    ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
790}
791
792static struct vtblk_request *
793vtblk_request_dequeue(struct vtblk_softc *sc)
794{
795	struct vtblk_request *req;
796
797	req = TAILQ_FIRST(&sc->vtblk_req_free);
798	if (req != NULL) {
799		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
800		bzero(req, sizeof(struct vtblk_request));
801	}
802
803	return (req);
804}
805
806static void
807vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
808{
809
810	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
811}
812
813static struct vtblk_request *
814vtblk_request_next_ready(struct vtblk_softc *sc)
815{
816	struct vtblk_request *req;
817
818	req = TAILQ_FIRST(&sc->vtblk_req_ready);
819	if (req != NULL)
820		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
821
822	return (req);
823}
824
825static void
826vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
827{
828
829	/* NOTE: Currently, there will be at most one request in the queue. */
830	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
831}
832
833static struct vtblk_request *
834vtblk_request_next(struct vtblk_softc *sc)
835{
836	struct vtblk_request *req;
837
838	req = vtblk_request_next_ready(sc);
839	if (req != NULL)
840		return (req);
841
842	return (vtblk_request_bio(sc));
843}
844
845static struct vtblk_request *
846vtblk_request_bio(struct vtblk_softc *sc)
847{
848	struct bio_queue_head *bioq;
849	struct vtblk_request *req;
850	struct bio *bp;
851
852	bioq = &sc->vtblk_bioq;
853
854	if (bioq_first(bioq) == NULL)
855		return (NULL);
856
857	req = vtblk_request_dequeue(sc);
858	if (req == NULL)
859		return (NULL);
860
861	bp = bioq_takefirst(bioq);
862	req->vbr_bp = bp;
863	req->vbr_ack = -1;
864	req->vbr_hdr.ioprio = 1;
865
866	switch (bp->bio_cmd) {
867	case BIO_FLUSH:
868		req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
869		break;
870	case BIO_READ:
871		req->vbr_hdr.type = VIRTIO_BLK_T_IN;
872		req->vbr_hdr.sector = bp->bio_offset / 512;
873		break;
874	case BIO_WRITE:
875		req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
876		req->vbr_hdr.sector = bp->bio_offset / 512;
877		break;
878	default:
879		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
880	}
881
882	if (bp->bio_flags & BIO_ORDERED)
883		req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
884
885	return (req);
886}
887
888static int
889vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
890{
891	struct virtqueue *vq;
892	struct sglist *sg;
893	struct bio *bp;
894	int ordered, readable, writable, error;
895
896	vq = sc->vtblk_vq;
897	sg = sc->vtblk_sglist;
898	bp = req->vbr_bp;
899	ordered = 0;
900	writable = 0;
901
902	/*
903	 * Some hosts (such as bhyve) do not implement the barrier feature,
904	 * so we emulate it in the driver by allowing the barrier request
905	 * to be the only one in flight.
906	 */
907	if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
908		if (sc->vtblk_req_ordered != NULL)
909			return (EBUSY);
910		if (bp->bio_flags & BIO_ORDERED) {
911			if (!virtqueue_empty(vq))
912				return (EBUSY);
913			ordered = 1;
914			req->vbr_hdr.type &= ~VIRTIO_BLK_T_BARRIER;
915		}
916	}
917
918	sglist_reset(sg);
919	sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
920
921	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
922		error = sglist_append_bio(sg, bp);
923		if (error || sg->sg_nseg == sg->sg_maxseg) {
924			panic("%s: bio %p data buffer too big %d",
925			    __func__, bp, error);
926		}
927
928		/* BIO_READ means the host writes into our buffer. */
929		if (bp->bio_cmd == BIO_READ)
930			writable = sg->sg_nseg - 1;
931	}
932
933	writable++;
934	sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
935	readable = sg->sg_nseg - writable;
936
937	error = virtqueue_enqueue(vq, req, sg, readable, writable);
938	if (error == 0 && ordered)
939		sc->vtblk_req_ordered = req;
940
941	return (error);
942}
943
944static int
945vtblk_request_error(struct vtblk_request *req)
946{
947	int error;
948
949	switch (req->vbr_ack) {
950	case VIRTIO_BLK_S_OK:
951		error = 0;
952		break;
953	case VIRTIO_BLK_S_UNSUPP:
954		error = ENOTSUP;
955		break;
956	default:
957		error = EIO;
958		break;
959	}
960
961	return (error);
962}
963
964static void
965vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
966{
967	struct vtblk_request *req;
968	struct bio *bp;
969
970	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
971		if (sc->vtblk_req_ordered != NULL) {
972			MPASS(sc->vtblk_req_ordered == req);
973			sc->vtblk_req_ordered = NULL;
974		}
975
976		bp = req->vbr_bp;
977		bp->bio_error = vtblk_request_error(req);
978		TAILQ_INSERT_TAIL(queue, bp, bio_queue);
979
980		vtblk_request_enqueue(sc, req);
981	}
982}
983
984static void
985vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
986{
987	struct bio *bp, *tmp;
988
989	TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
990		if (bp->bio_error != 0)
991			disk_err(bp, "hard error", -1, 1);
992		vtblk_bio_done(sc, bp, bp->bio_error);
993	}
994}
995
996static void
997vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
998{
999	struct virtqueue *vq;
1000	struct vtblk_request *req;
1001	int last;
1002
1003	vq = sc->vtblk_vq;
1004	last = 0;
1005
1006	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1007		if (!skip_done)
1008			vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1009
1010		vtblk_request_enqueue(sc, req);
1011	}
1012
1013	sc->vtblk_req_ordered = NULL;
1014	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1015}
1016
1017static void
1018vtblk_drain(struct vtblk_softc *sc)
1019{
1020	struct bio_queue queue;
1021	struct bio_queue_head *bioq;
1022	struct vtblk_request *req;
1023	struct bio *bp;
1024
1025	bioq = &sc->vtblk_bioq;
1026	TAILQ_INIT(&queue);
1027
1028	if (sc->vtblk_vq != NULL) {
1029		vtblk_queue_completed(sc, &queue);
1030		vtblk_done_completed(sc, &queue);
1031
1032		vtblk_drain_vq(sc, 0);
1033	}
1034
1035	while ((req = vtblk_request_next_ready(sc)) != NULL) {
1036		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1037		vtblk_request_enqueue(sc, req);
1038	}
1039
1040	while (bioq_first(bioq) != NULL) {
1041		bp = bioq_takefirst(bioq);
1042		vtblk_bio_done(sc, bp, ENXIO);
1043	}
1044
1045	vtblk_request_free(sc);
1046}
1047
1048static void
1049vtblk_startio(struct vtblk_softc *sc)
1050{
1051	struct virtqueue *vq;
1052	struct vtblk_request *req;
1053	int enq;
1054
1055	VTBLK_LOCK_ASSERT(sc);
1056	vq = sc->vtblk_vq;
1057	enq = 0;
1058
1059	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1060		return;
1061
1062	while (!virtqueue_full(vq)) {
1063		req = vtblk_request_next(sc);
1064		if (req == NULL)
1065			break;
1066
1067		if (vtblk_request_execute(sc, req) != 0) {
1068			vtblk_request_requeue_ready(sc, req);
1069			break;
1070		}
1071
1072		enq++;
1073	}
1074
1075	if (enq > 0)
1076		virtqueue_notify(vq);
1077}
1078
1079static void
1080vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1081{
1082
1083	/* Because of GEOM direct dispatch, we cannot hold any locks. */
1084	if (sc != NULL)
1085		VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1086
1087	if (error) {
1088		bp->bio_resid = bp->bio_bcount;
1089		bp->bio_error = error;
1090		bp->bio_flags |= BIO_ERROR;
1091	}
1092
1093	biodone(bp);
1094}
1095
1096#define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)			\
1097	if (virtio_with_feature(_dev, _feature)) {			\
1098		virtio_read_device_config(_dev,				\
1099		    offsetof(struct virtio_blk_config, _field),		\
1100		    &(_cfg)->_field, sizeof((_cfg)->_field));		\
1101	}
1102
1103static void
1104vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1105{
1106	device_t dev;
1107
1108	dev = sc->vtblk_dev;
1109
1110	bzero(blkcfg, sizeof(struct virtio_blk_config));
1111
1112	/* The capacity is always available. */
1113	virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1114	    capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1115
1116	/* Read the configuration if the feature was negotiated. */
1117	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1118	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1119	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg);
1120	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1121	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg);
1122	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg);
1123}
1124
1125#undef VTBLK_GET_CONFIG
1126
1127static void
1128vtblk_ident(struct vtblk_softc *sc)
1129{
1130	struct bio buf;
1131	struct disk *dp;
1132	struct vtblk_request *req;
1133	int len, error;
1134
1135	dp = sc->vtblk_disk;
1136	len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1137
1138	if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1139		return;
1140
1141	req = vtblk_request_dequeue(sc);
1142	if (req == NULL)
1143		return;
1144
1145	req->vbr_ack = -1;
1146	req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID;
1147	req->vbr_hdr.ioprio = 1;
1148	req->vbr_hdr.sector = 0;
1149
1150	req->vbr_bp = &buf;
1151	bzero(&buf, sizeof(struct bio));
1152
1153	buf.bio_cmd = BIO_READ;
1154	buf.bio_data = dp->d_ident;
1155	buf.bio_bcount = len;
1156
1157	VTBLK_LOCK(sc);
1158	error = vtblk_poll_request(sc, req);
1159	VTBLK_UNLOCK(sc);
1160
1161	vtblk_request_enqueue(sc, req);
1162
1163	if (error) {
1164		device_printf(sc->vtblk_dev,
1165		    "error getting device identifier: %d\n", error);
1166	}
1167}
1168
1169static int
1170vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1171{
1172	struct virtqueue *vq;
1173	int error;
1174
1175	vq = sc->vtblk_vq;
1176
1177	if (!virtqueue_empty(vq))
1178		return (EBUSY);
1179
1180	error = vtblk_request_execute(sc, req);
1181	if (error)
1182		return (error);
1183
1184	virtqueue_notify(vq);
1185	virtqueue_poll(vq, NULL);
1186
1187	error = vtblk_request_error(req);
1188	if (error && bootverbose) {
1189		device_printf(sc->vtblk_dev,
1190		    "%s: IO error: %d\n", __func__, error);
1191	}
1192
1193	return (error);
1194}
1195
1196static int
1197vtblk_quiesce(struct vtblk_softc *sc)
1198{
1199	int error;
1200
1201	VTBLK_LOCK_ASSERT(sc);
1202	error = 0;
1203
1204	while (!virtqueue_empty(sc->vtblk_vq)) {
1205		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1206		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1207			error = EBUSY;
1208			break;
1209		}
1210	}
1211
1212	return (error);
1213}
1214
1215static void
1216vtblk_vq_intr(void *xsc)
1217{
1218	struct vtblk_softc *sc;
1219	struct virtqueue *vq;
1220	struct bio_queue queue;
1221
1222	sc = xsc;
1223	vq = sc->vtblk_vq;
1224	TAILQ_INIT(&queue);
1225
1226	VTBLK_LOCK(sc);
1227
1228again:
1229	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1230		goto out;
1231
1232	vtblk_queue_completed(sc, &queue);
1233	vtblk_startio(sc);
1234
1235	if (virtqueue_enable_intr(vq) != 0) {
1236		virtqueue_disable_intr(vq);
1237		goto again;
1238	}
1239
1240	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1241		wakeup(&sc->vtblk_vq);
1242
1243out:
1244	VTBLK_UNLOCK(sc);
1245	vtblk_done_completed(sc, &queue);
1246}
1247
1248static void
1249vtblk_stop(struct vtblk_softc *sc)
1250{
1251
1252	virtqueue_disable_intr(sc->vtblk_vq);
1253	virtio_stop(sc->vtblk_dev);
1254}
1255
1256static void
1257vtblk_dump_prepare(struct vtblk_softc *sc)
1258{
1259	device_t dev;
1260	struct virtqueue *vq;
1261
1262	dev = sc->vtblk_dev;
1263	vq = sc->vtblk_vq;
1264
1265	vtblk_stop(sc);
1266
1267	/*
1268	 * Drain all requests caught in-flight in the virtqueue,
1269	 * skipping biodone(). When dumping, only one request is
1270	 * outstanding at a time, and we just poll the virtqueue
1271	 * for the response.
1272	 */
1273	vtblk_drain_vq(sc, 1);
1274
1275	if (virtio_reinit(dev, sc->vtblk_features) != 0) {
1276		panic("%s: cannot reinit VirtIO block device during dump",
1277		    device_get_nameunit(dev));
1278	}
1279
1280	virtqueue_disable_intr(vq);
1281	virtio_reinit_complete(dev);
1282}
1283
1284static int
1285vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1286    size_t length)
1287{
1288	struct bio buf;
1289	struct vtblk_request *req;
1290
1291	req = &sc->vtblk_dump_request;
1292	req->vbr_ack = -1;
1293	req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
1294	req->vbr_hdr.ioprio = 1;
1295	req->vbr_hdr.sector = offset / 512;
1296
1297	req->vbr_bp = &buf;
1298	bzero(&buf, sizeof(struct bio));
1299
1300	buf.bio_cmd = BIO_WRITE;
1301	buf.bio_data = virtual;
1302	buf.bio_bcount = length;
1303
1304	return (vtblk_poll_request(sc, req));
1305}
1306
1307static int
1308vtblk_dump_flush(struct vtblk_softc *sc)
1309{
1310	struct bio buf;
1311	struct vtblk_request *req;
1312
1313	req = &sc->vtblk_dump_request;
1314	req->vbr_ack = -1;
1315	req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
1316	req->vbr_hdr.ioprio = 1;
1317	req->vbr_hdr.sector = 0;
1318
1319	req->vbr_bp = &buf;
1320	bzero(&buf, sizeof(struct bio));
1321
1322	buf.bio_cmd = BIO_FLUSH;
1323
1324	return (vtblk_poll_request(sc, req));
1325}
1326
1327static void
1328vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1329{
1330
1331	/* Set either writeback (1) or writethrough (0) mode. */
1332	virtio_write_dev_config_1(sc->vtblk_dev,
1333	    offsetof(struct virtio_blk_config, writeback), wc);
1334}
1335
1336static int
1337vtblk_write_cache_enabled(struct vtblk_softc *sc,
1338    struct virtio_blk_config *blkcfg)
1339{
1340	int wc;
1341
1342	if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) {
1343		wc = vtblk_tunable_int(sc, "writecache_mode",
1344		    vtblk_writecache_mode);
1345		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1346			vtblk_set_write_cache(sc, wc);
1347		else
1348			wc = blkcfg->writeback;
1349	} else
1350		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);
1351
1352	return (wc);
1353}
1354
1355static int
1356vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1357{
1358	struct vtblk_softc *sc;
1359	int wc, error;
1360
1361	sc = oidp->oid_arg1;
1362	wc = sc->vtblk_write_cache;
1363
1364	error = sysctl_handle_int(oidp, &wc, 0, req);
1365	if (error || req->newptr == NULL)
1366		return (error);
1367	if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0)
1368		return (EPERM);
1369	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1370		return (EINVAL);
1371
1372	VTBLK_LOCK(sc);
1373	sc->vtblk_write_cache = wc;
1374	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1375	VTBLK_UNLOCK(sc);
1376
1377	return (0);
1378}
1379
1380static void
1381vtblk_setup_sysctl(struct vtblk_softc *sc)
1382{
1383	device_t dev;
1384	struct sysctl_ctx_list *ctx;
1385	struct sysctl_oid *tree;
1386	struct sysctl_oid_list *child;
1387
1388	dev = sc->vtblk_dev;
1389	ctx = device_get_sysctl_ctx(dev);
1390	tree = device_get_sysctl_tree(dev);
1391	child = SYSCTL_CHILDREN(tree);
1392
1393	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1394	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl,
1395	    "I", "Write cache mode (writethrough (0) or writeback (1))");
1396}
1397
1398static int
1399vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1400{
1401	char path[64];
1402
1403	snprintf(path, sizeof(path),
1404	    "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1405	TUNABLE_INT_FETCH(path, &def);
1406
1407	return (def);
1408}
1409