1227652Sgrehan/*-
2252707Sbryanv * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3227652Sgrehan * All rights reserved.
4227652Sgrehan *
5227652Sgrehan * Redistribution and use in source and binary forms, with or without
6227652Sgrehan * modification, are permitted provided that the following conditions
7227652Sgrehan * are met:
8227652Sgrehan * 1. Redistributions of source code must retain the above copyright
9227652Sgrehan *    notice unmodified, this list of conditions, and the following
10227652Sgrehan *    disclaimer.
11227652Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
12227652Sgrehan *    notice, this list of conditions and the following disclaimer in the
13227652Sgrehan *    documentation and/or other materials provided with the distribution.
14227652Sgrehan *
15227652Sgrehan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16227652Sgrehan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17227652Sgrehan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18227652Sgrehan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19227652Sgrehan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20227652Sgrehan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21227652Sgrehan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22227652Sgrehan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23227652Sgrehan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24227652Sgrehan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25227652Sgrehan */
26227652Sgrehan
27227652Sgrehan/* Driver for VirtIO block devices. */
28227652Sgrehan
29227652Sgrehan#include <sys/cdefs.h>
30227652Sgrehan__FBSDID("$FreeBSD$");
31227652Sgrehan
32227652Sgrehan#include <sys/param.h>
33227652Sgrehan#include <sys/systm.h>
34227652Sgrehan#include <sys/kernel.h>
35227652Sgrehan#include <sys/bio.h>
36227652Sgrehan#include <sys/malloc.h>
37227652Sgrehan#include <sys/module.h>
38227652Sgrehan#include <sys/sglist.h>
39252703Sbryanv#include <sys/sysctl.h>
40227652Sgrehan#include <sys/lock.h>
41227652Sgrehan#include <sys/mutex.h>
42227652Sgrehan#include <sys/queue.h>
43227652Sgrehan
44295707Simp#include <geom/geom.h>
45227652Sgrehan#include <geom/geom_disk.h>
46227652Sgrehan
47227652Sgrehan#include <machine/bus.h>
48227652Sgrehan#include <machine/resource.h>
49227652Sgrehan#include <sys/bus.h>
50227652Sgrehan#include <sys/rman.h>
51227652Sgrehan
52227652Sgrehan#include <dev/virtio/virtio.h>
53227652Sgrehan#include <dev/virtio/virtqueue.h>
54227652Sgrehan#include <dev/virtio/block/virtio_blk.h>
55227652Sgrehan
56227652Sgrehan#include "virtio_if.h"
57227652Sgrehan
58227652Sgrehanstruct vtblk_request {
59227652Sgrehan	struct virtio_blk_outhdr	 vbr_hdr;
60227652Sgrehan	struct bio			*vbr_bp;
61227652Sgrehan	uint8_t				 vbr_ack;
62227652Sgrehan	TAILQ_ENTRY(vtblk_request)	 vbr_link;
63227652Sgrehan};
64227652Sgrehan
65252703Sbryanvenum vtblk_cache_mode {
66252703Sbryanv	VTBLK_CACHE_WRITETHROUGH,
67252703Sbryanv	VTBLK_CACHE_WRITEBACK,
68252703Sbryanv	VTBLK_CACHE_MAX
69252703Sbryanv};
70252703Sbryanv
71227652Sgrehanstruct vtblk_softc {
72227652Sgrehan	device_t		 vtblk_dev;
73227652Sgrehan	struct mtx		 vtblk_mtx;
74227652Sgrehan	uint64_t		 vtblk_features;
75227652Sgrehan	uint32_t		 vtblk_flags;
76227652Sgrehan#define VTBLK_FLAG_INDIRECT	0x0001
77227652Sgrehan#define VTBLK_FLAG_READONLY	0x0002
78234270Sgrehan#define VTBLK_FLAG_DETACH	0x0004
79234270Sgrehan#define VTBLK_FLAG_SUSPEND	0x0008
80277788Sbryanv#define VTBLK_FLAG_BARRIER	0x0010
81277788Sbryanv#define VTBLK_FLAG_WC_CONFIG	0x0020
82227652Sgrehan
83227652Sgrehan	struct virtqueue	*vtblk_vq;
84227652Sgrehan	struct sglist		*vtblk_sglist;
85227652Sgrehan	struct disk		*vtblk_disk;
86227652Sgrehan
87227652Sgrehan	struct bio_queue_head	 vtblk_bioq;
88227652Sgrehan	TAILQ_HEAD(, vtblk_request)
89227652Sgrehan				 vtblk_req_free;
90227652Sgrehan	TAILQ_HEAD(, vtblk_request)
91247829Sbryanv				 vtblk_req_ready;
92247829Sbryanv	struct vtblk_request	*vtblk_req_ordered;
93227652Sgrehan
94227652Sgrehan	int			 vtblk_max_nsegs;
95227652Sgrehan	int			 vtblk_request_count;
96252703Sbryanv	enum vtblk_cache_mode	 vtblk_write_cache;
97227652Sgrehan
98277788Sbryanv	struct bio_queue	 vtblk_dump_queue;
99227652Sgrehan	struct vtblk_request	 vtblk_dump_request;
100227652Sgrehan};
101227652Sgrehan
102227652Sgrehanstatic struct virtio_feature_desc vtblk_feature_desc[] = {
103227652Sgrehan	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
104227652Sgrehan	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
105227652Sgrehan	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
106227652Sgrehan	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
107227652Sgrehan	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
108227652Sgrehan	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
109227652Sgrehan	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
110252703Sbryanv	{ VIRTIO_BLK_F_WCE,		"WriteCache"	},
111227652Sgrehan	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
112252703Sbryanv	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
113227652Sgrehan
114227652Sgrehan	{ 0, NULL }
115227652Sgrehan};
116227652Sgrehan
117227652Sgrehanstatic int	vtblk_modevent(module_t, int, void *);
118227652Sgrehan
119227652Sgrehanstatic int	vtblk_probe(device_t);
120227652Sgrehanstatic int	vtblk_attach(device_t);
121227652Sgrehanstatic int	vtblk_detach(device_t);
122227652Sgrehanstatic int	vtblk_suspend(device_t);
123227652Sgrehanstatic int	vtblk_resume(device_t);
124227652Sgrehanstatic int	vtblk_shutdown(device_t);
125252703Sbryanvstatic int	vtblk_config_change(device_t);
126227652Sgrehan
127234270Sgrehanstatic int	vtblk_open(struct disk *);
128234270Sgrehanstatic int	vtblk_close(struct disk *);
129234270Sgrehanstatic int	vtblk_ioctl(struct disk *, u_long, void *, int,
130238360Sgrehan		    struct thread *);
131234270Sgrehanstatic int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
132234270Sgrehanstatic void	vtblk_strategy(struct bio *);
133234270Sgrehan
134227652Sgrehanstatic void	vtblk_negotiate_features(struct vtblk_softc *);
135275335Sbryanvstatic void	vtblk_setup_features(struct vtblk_softc *);
136227652Sgrehanstatic int	vtblk_maximum_segments(struct vtblk_softc *,
137227652Sgrehan		    struct virtio_blk_config *);
138227652Sgrehanstatic int	vtblk_alloc_virtqueue(struct vtblk_softc *);
139252703Sbryanvstatic void	vtblk_resize_disk(struct vtblk_softc *, uint64_t);
140227652Sgrehanstatic void	vtblk_alloc_disk(struct vtblk_softc *,
141227652Sgrehan		    struct virtio_blk_config *);
142227652Sgrehanstatic void	vtblk_create_disk(struct vtblk_softc *);
143227652Sgrehan
144275335Sbryanvstatic int	vtblk_request_prealloc(struct vtblk_softc *);
145275335Sbryanvstatic void	vtblk_request_free(struct vtblk_softc *);
146275335Sbryanvstatic struct vtblk_request *
147275335Sbryanv		vtblk_request_dequeue(struct vtblk_softc *);
148275335Sbryanvstatic void	vtblk_request_enqueue(struct vtblk_softc *,
149227652Sgrehan		    struct vtblk_request *);
150275335Sbryanvstatic struct vtblk_request *
151275335Sbryanv		vtblk_request_next_ready(struct vtblk_softc *);
152275335Sbryanvstatic void	vtblk_request_requeue_ready(struct vtblk_softc *,
153275335Sbryanv		    struct vtblk_request *);
154275335Sbryanvstatic struct vtblk_request *
155275335Sbryanv		vtblk_request_next(struct vtblk_softc *);
156275335Sbryanvstatic struct vtblk_request *
157275335Sbryanv		vtblk_request_bio(struct vtblk_softc *);
158275335Sbryanvstatic int	vtblk_request_execute(struct vtblk_softc *,
159275335Sbryanv		    struct vtblk_request *);
160275335Sbryanvstatic int	vtblk_request_error(struct vtblk_request *);
161227652Sgrehan
162275335Sbryanvstatic void	vtblk_queue_completed(struct vtblk_softc *,
163275335Sbryanv		    struct bio_queue *);
164275335Sbryanvstatic void	vtblk_done_completed(struct vtblk_softc *,
165275335Sbryanv		    struct bio_queue *);
166277788Sbryanvstatic void	vtblk_drain_vq(struct vtblk_softc *);
167275335Sbryanvstatic void	vtblk_drain(struct vtblk_softc *);
168227652Sgrehan
169275335Sbryanvstatic void	vtblk_startio(struct vtblk_softc *);
170275335Sbryanvstatic void	vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
171227652Sgrehan
172252703Sbryanvstatic void	vtblk_read_config(struct vtblk_softc *,
173252703Sbryanv		    struct virtio_blk_config *);
174275335Sbryanvstatic void	vtblk_ident(struct vtblk_softc *);
175227652Sgrehanstatic int	vtblk_poll_request(struct vtblk_softc *,
176227652Sgrehan		    struct vtblk_request *);
177275335Sbryanvstatic int	vtblk_quiesce(struct vtblk_softc *);
178275335Sbryanvstatic void	vtblk_vq_intr(void *);
179275335Sbryanvstatic void	vtblk_stop(struct vtblk_softc *);
180227652Sgrehan
181277788Sbryanvstatic void	vtblk_dump_quiesce(struct vtblk_softc *);
182275335Sbryanvstatic int	vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
183275335Sbryanvstatic int	vtblk_dump_flush(struct vtblk_softc *);
184277788Sbryanvstatic void	vtblk_dump_complete(struct vtblk_softc *);
185227652Sgrehan
186275335Sbryanvstatic void	vtblk_set_write_cache(struct vtblk_softc *, int);
187275335Sbryanvstatic int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
188275335Sbryanv		    struct virtio_blk_config *);
189275335Sbryanvstatic int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
190227652Sgrehan
191252703Sbryanvstatic void	vtblk_setup_sysctl(struct vtblk_softc *);
192252703Sbryanvstatic int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
193252703Sbryanv
194227652Sgrehan/* Tunables. */
195227652Sgrehanstatic int vtblk_no_ident = 0;
196227652SgrehanTUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
197252703Sbryanvstatic int vtblk_writecache_mode = -1;
198252703SbryanvTUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
199227652Sgrehan
200227652Sgrehan/* Features desired/implemented by this driver. */
201227652Sgrehan#define VTBLK_FEATURES \
202227652Sgrehan    (VIRTIO_BLK_F_BARRIER		| \
203227652Sgrehan     VIRTIO_BLK_F_SIZE_MAX		| \
204227652Sgrehan     VIRTIO_BLK_F_SEG_MAX		| \
205227652Sgrehan     VIRTIO_BLK_F_GEOMETRY		| \
206227652Sgrehan     VIRTIO_BLK_F_RO			| \
207227652Sgrehan     VIRTIO_BLK_F_BLK_SIZE		| \
208252703Sbryanv     VIRTIO_BLK_F_WCE			| \
209279642Smav     VIRTIO_BLK_F_TOPOLOGY		| \
210252703Sbryanv     VIRTIO_BLK_F_CONFIG_WCE		| \
211227652Sgrehan     VIRTIO_RING_F_INDIRECT_DESC)
212227652Sgrehan
213227652Sgrehan#define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
214227652Sgrehan#define VTBLK_LOCK_INIT(_sc, _name) \
215227652Sgrehan				mtx_init(VTBLK_MTX((_sc)), (_name), \
216252703Sbryanv				    "VirtIO Block Lock", MTX_DEF)
217227652Sgrehan#define VTBLK_LOCK(_sc)		mtx_lock(VTBLK_MTX((_sc)))
218227652Sgrehan#define VTBLK_UNLOCK(_sc)	mtx_unlock(VTBLK_MTX((_sc)))
219227652Sgrehan#define VTBLK_LOCK_DESTROY(_sc)	mtx_destroy(VTBLK_MTX((_sc)))
220227652Sgrehan#define VTBLK_LOCK_ASSERT(_sc)	mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
221227652Sgrehan#define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
222227652Sgrehan				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
223227652Sgrehan
224227652Sgrehan#define VTBLK_DISK_NAME		"vtbd"
225238360Sgrehan#define VTBLK_QUIESCE_TIMEOUT	(30 * hz)
226227652Sgrehan
227227652Sgrehan/*
228227652Sgrehan * Each block request uses at least two segments - one for the header
229227652Sgrehan * and one for the status.
230227652Sgrehan */
231227652Sgrehan#define VTBLK_MIN_SEGMENTS	2
232227652Sgrehan
233227652Sgrehanstatic device_method_t vtblk_methods[] = {
234227652Sgrehan	/* Device methods. */
235227652Sgrehan	DEVMETHOD(device_probe,		vtblk_probe),
236227652Sgrehan	DEVMETHOD(device_attach,	vtblk_attach),
237227652Sgrehan	DEVMETHOD(device_detach,	vtblk_detach),
238227652Sgrehan	DEVMETHOD(device_suspend,	vtblk_suspend),
239227652Sgrehan	DEVMETHOD(device_resume,	vtblk_resume),
240227652Sgrehan	DEVMETHOD(device_shutdown,	vtblk_shutdown),
241227652Sgrehan
242252703Sbryanv	/* VirtIO methods. */
243252703Sbryanv	DEVMETHOD(virtio_config_change,	vtblk_config_change),
244252703Sbryanv
245234270Sgrehan	DEVMETHOD_END
246227652Sgrehan};
247227652Sgrehan
248227652Sgrehanstatic driver_t vtblk_driver = {
249227652Sgrehan	"vtblk",
250227652Sgrehan	vtblk_methods,
251227652Sgrehan	sizeof(struct vtblk_softc)
252227652Sgrehan};
253227652Sgrehanstatic devclass_t vtblk_devclass;
254227652Sgrehan
255274655SbrDRIVER_MODULE(virtio_blk, virtio_mmio, vtblk_driver, vtblk_devclass,
256274655Sbr    vtblk_modevent, 0);
257227652SgrehanDRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
258227652Sgrehan    vtblk_modevent, 0);
259227652SgrehanMODULE_VERSION(virtio_blk, 1);
260227652SgrehanMODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
261227652Sgrehan
262227652Sgrehanstatic int
263227652Sgrehanvtblk_modevent(module_t mod, int type, void *unused)
264227652Sgrehan{
265227652Sgrehan	int error;
266227652Sgrehan
267227652Sgrehan	error = 0;
268227652Sgrehan
269227652Sgrehan	switch (type) {
270227652Sgrehan	case MOD_LOAD:
271227652Sgrehan	case MOD_QUIESCE:
272227652Sgrehan	case MOD_UNLOAD:
273227652Sgrehan	case MOD_SHUTDOWN:
274227652Sgrehan		break;
275227652Sgrehan	default:
276227652Sgrehan		error = EOPNOTSUPP;
277227652Sgrehan		break;
278227652Sgrehan	}
279227652Sgrehan
280227652Sgrehan	return (error);
281227652Sgrehan}
282227652Sgrehan
283227652Sgrehanstatic int
284227652Sgrehanvtblk_probe(device_t dev)
285227652Sgrehan{
286227652Sgrehan
287227652Sgrehan	if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
288227652Sgrehan		return (ENXIO);
289227652Sgrehan
290227652Sgrehan	device_set_desc(dev, "VirtIO Block Adapter");
291227652Sgrehan
292227652Sgrehan	return (BUS_PROBE_DEFAULT);
293227652Sgrehan}
294227652Sgrehan
295227652Sgrehanstatic int
296227652Sgrehanvtblk_attach(device_t dev)
297227652Sgrehan{
298227652Sgrehan	struct vtblk_softc *sc;
299227652Sgrehan	struct virtio_blk_config blkcfg;
300227652Sgrehan	int error;
301227652Sgrehan
302275335Sbryanv	virtio_set_feature_desc(dev, vtblk_feature_desc);
303275335Sbryanv
304227652Sgrehan	sc = device_get_softc(dev);
305227652Sgrehan	sc->vtblk_dev = dev;
306227652Sgrehan	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
307227652Sgrehan	bioq_init(&sc->vtblk_bioq);
308277788Sbryanv	TAILQ_INIT(&sc->vtblk_dump_queue);
309227652Sgrehan	TAILQ_INIT(&sc->vtblk_req_free);
310227652Sgrehan	TAILQ_INIT(&sc->vtblk_req_ready);
311227652Sgrehan
312252703Sbryanv	vtblk_setup_sysctl(sc);
313275335Sbryanv	vtblk_setup_features(sc);
314252703Sbryanv
315252703Sbryanv	vtblk_read_config(sc, &blkcfg);
316227652Sgrehan
317227652Sgrehan	/*
318227652Sgrehan	 * With the current sglist(9) implementation, it is not easy
319227652Sgrehan	 * for us to support a maximum segment size as adjacent
320227652Sgrehan	 * segments are coalesced. For now, just make sure it's larger
321227652Sgrehan	 * than the maximum supported transfer size.
322227652Sgrehan	 */
323227652Sgrehan	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
324227652Sgrehan		if (blkcfg.size_max < MAXPHYS) {
325227652Sgrehan			error = ENOTSUP;
326227652Sgrehan			device_printf(dev, "host requires unsupported "
327227652Sgrehan			    "maximum segment size feature\n");
328227652Sgrehan			goto fail;
329227652Sgrehan		}
330227652Sgrehan	}
331227652Sgrehan
332227652Sgrehan	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
333238360Sgrehan	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
334234270Sgrehan		error = EINVAL;
335234270Sgrehan		device_printf(dev, "fewer than minimum number of segments "
336234270Sgrehan		    "allowed: %d\n", sc->vtblk_max_nsegs);
337234270Sgrehan		goto fail;
338234270Sgrehan	}
339227652Sgrehan
340227652Sgrehan	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
341227652Sgrehan	if (sc->vtblk_sglist == NULL) {
342227652Sgrehan		error = ENOMEM;
343227652Sgrehan		device_printf(dev, "cannot allocate sglist\n");
344227652Sgrehan		goto fail;
345227652Sgrehan	}
346227652Sgrehan
347227652Sgrehan	error = vtblk_alloc_virtqueue(sc);
348227652Sgrehan	if (error) {
349227652Sgrehan		device_printf(dev, "cannot allocate virtqueue\n");
350227652Sgrehan		goto fail;
351227652Sgrehan	}
352227652Sgrehan
353275335Sbryanv	error = vtblk_request_prealloc(sc);
354227652Sgrehan	if (error) {
355227652Sgrehan		device_printf(dev, "cannot preallocate requests\n");
356227652Sgrehan		goto fail;
357227652Sgrehan	}
358227652Sgrehan
359227652Sgrehan	vtblk_alloc_disk(sc, &blkcfg);
360227652Sgrehan
361227652Sgrehan	error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
362227652Sgrehan	if (error) {
363227652Sgrehan		device_printf(dev, "cannot setup virtqueue interrupt\n");
364227652Sgrehan		goto fail;
365227652Sgrehan	}
366227652Sgrehan
367227652Sgrehan	vtblk_create_disk(sc);
368227652Sgrehan
369227652Sgrehan	virtqueue_enable_intr(sc->vtblk_vq);
370227652Sgrehan
371227652Sgrehanfail:
372227652Sgrehan	if (error)
373227652Sgrehan		vtblk_detach(dev);
374227652Sgrehan
375227652Sgrehan	return (error);
376227652Sgrehan}
377227652Sgrehan
378227652Sgrehanstatic int
379227652Sgrehanvtblk_detach(device_t dev)
380227652Sgrehan{
381227652Sgrehan	struct vtblk_softc *sc;
382227652Sgrehan
383227652Sgrehan	sc = device_get_softc(dev);
384227652Sgrehan
385227652Sgrehan	VTBLK_LOCK(sc);
386234270Sgrehan	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
387227652Sgrehan	if (device_is_attached(dev))
388227652Sgrehan		vtblk_stop(sc);
389227652Sgrehan	VTBLK_UNLOCK(sc);
390227652Sgrehan
391227652Sgrehan	vtblk_drain(sc);
392227652Sgrehan
393227652Sgrehan	if (sc->vtblk_disk != NULL) {
394227652Sgrehan		disk_destroy(sc->vtblk_disk);
395227652Sgrehan		sc->vtblk_disk = NULL;
396227652Sgrehan	}
397227652Sgrehan
398227652Sgrehan	if (sc->vtblk_sglist != NULL) {
399227652Sgrehan		sglist_free(sc->vtblk_sglist);
400227652Sgrehan		sc->vtblk_sglist = NULL;
401227652Sgrehan	}
402227652Sgrehan
403227652Sgrehan	VTBLK_LOCK_DESTROY(sc);
404227652Sgrehan
405227652Sgrehan	return (0);
406227652Sgrehan}
407227652Sgrehan
408227652Sgrehanstatic int
409227652Sgrehanvtblk_suspend(device_t dev)
410227652Sgrehan{
411227652Sgrehan	struct vtblk_softc *sc;
412234270Sgrehan	int error;
413227652Sgrehan
414227652Sgrehan	sc = device_get_softc(dev);
415227652Sgrehan
416227652Sgrehan	VTBLK_LOCK(sc);
417234270Sgrehan	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
418234270Sgrehan	/* XXX BMV: virtio_stop(), etc needed here? */
419234270Sgrehan	error = vtblk_quiesce(sc);
420234270Sgrehan	if (error)
421234270Sgrehan		sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
422227652Sgrehan	VTBLK_UNLOCK(sc);
423227652Sgrehan
424234270Sgrehan	return (error);
425227652Sgrehan}
426227652Sgrehan
427227652Sgrehanstatic int
428227652Sgrehanvtblk_resume(device_t dev)
429227652Sgrehan{
430227652Sgrehan	struct vtblk_softc *sc;
431227652Sgrehan
432227652Sgrehan	sc = device_get_softc(dev);
433227652Sgrehan
434227652Sgrehan	VTBLK_LOCK(sc);
435234270Sgrehan	/* XXX BMV: virtio_reinit(), etc needed here? */
436234270Sgrehan	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
437234270Sgrehan	vtblk_startio(sc);
438227652Sgrehan	VTBLK_UNLOCK(sc);
439227652Sgrehan
440227652Sgrehan	return (0);
441227652Sgrehan}
442227652Sgrehan
443227652Sgrehanstatic int
444227652Sgrehanvtblk_shutdown(device_t dev)
445227652Sgrehan{
446227652Sgrehan
447227652Sgrehan	return (0);
448227652Sgrehan}
449227652Sgrehan
450227652Sgrehanstatic int
451252703Sbryanvvtblk_config_change(device_t dev)
452252703Sbryanv{
453252703Sbryanv	struct vtblk_softc *sc;
454252703Sbryanv	struct virtio_blk_config blkcfg;
455252703Sbryanv	uint64_t capacity;
456252703Sbryanv
457252703Sbryanv	sc = device_get_softc(dev);
458252703Sbryanv
459252703Sbryanv	vtblk_read_config(sc, &blkcfg);
460252703Sbryanv
461252703Sbryanv	/* Capacity is always in 512-byte units. */
462252703Sbryanv	capacity = blkcfg.capacity * 512;
463252703Sbryanv
464252703Sbryanv	if (sc->vtblk_disk->d_mediasize != capacity)
465252703Sbryanv		vtblk_resize_disk(sc, capacity);
466252703Sbryanv
467252703Sbryanv	return (0);
468252703Sbryanv}
469252703Sbryanv
470252703Sbryanvstatic int
471227652Sgrehanvtblk_open(struct disk *dp)
472227652Sgrehan{
473227652Sgrehan	struct vtblk_softc *sc;
474227652Sgrehan
475227652Sgrehan	if ((sc = dp->d_drv1) == NULL)
476227652Sgrehan		return (ENXIO);
477227652Sgrehan
478234270Sgrehan	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
479227652Sgrehan}
480227652Sgrehan
481227652Sgrehanstatic int
482227652Sgrehanvtblk_close(struct disk *dp)
483227652Sgrehan{
484227652Sgrehan	struct vtblk_softc *sc;
485227652Sgrehan
486227652Sgrehan	if ((sc = dp->d_drv1) == NULL)
487227652Sgrehan		return (ENXIO);
488227652Sgrehan
489227652Sgrehan	return (0);
490227652Sgrehan}
491227652Sgrehan
492227652Sgrehanstatic int
493227652Sgrehanvtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
494227652Sgrehan    struct thread *td)
495227652Sgrehan{
496227652Sgrehan	struct vtblk_softc *sc;
497227652Sgrehan
498227652Sgrehan	if ((sc = dp->d_drv1) == NULL)
499227652Sgrehan		return (ENXIO);
500227652Sgrehan
501227652Sgrehan	return (ENOTTY);
502227652Sgrehan}
503227652Sgrehan
504227652Sgrehanstatic int
505227652Sgrehanvtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
506227652Sgrehan    size_t length)
507227652Sgrehan{
508227652Sgrehan	struct disk *dp;
509227652Sgrehan	struct vtblk_softc *sc;
510227652Sgrehan	int error;
511227652Sgrehan
512227652Sgrehan	dp = arg;
513277788Sbryanv	error = 0;
514227652Sgrehan
515227652Sgrehan	if ((sc = dp->d_drv1) == NULL)
516227652Sgrehan		return (ENXIO);
517227652Sgrehan
518234270Sgrehan	VTBLK_LOCK(sc);
519234270Sgrehan
520277788Sbryanv	vtblk_dump_quiesce(sc);
521227652Sgrehan
522227652Sgrehan	if (length > 0)
523275335Sbryanv		error = vtblk_dump_write(sc, virtual, offset, length);
524277788Sbryanv	if (error || (virtual == NULL && offset == 0))
525277788Sbryanv		vtblk_dump_complete(sc);
526227652Sgrehan
527227652Sgrehan	VTBLK_UNLOCK(sc);
528227652Sgrehan
529227652Sgrehan	return (error);
530227652Sgrehan}
531227652Sgrehan
532227652Sgrehanstatic void
533227652Sgrehanvtblk_strategy(struct bio *bp)
534227652Sgrehan{
535227652Sgrehan	struct vtblk_softc *sc;
536227652Sgrehan
537227652Sgrehan	if ((sc = bp->bio_disk->d_drv1) == NULL) {
538275335Sbryanv		vtblk_bio_done(NULL, bp, EINVAL);
539227652Sgrehan		return;
540227652Sgrehan	}
541227652Sgrehan
542227652Sgrehan	/*
543227652Sgrehan	 * Fail any write if RO. Unfortunately, there does not seem to
544227652Sgrehan	 * be a better way to report our readonly'ness to GEOM above.
545227652Sgrehan	 */
546227652Sgrehan	if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
547227652Sgrehan	    (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
548275335Sbryanv		vtblk_bio_done(sc, bp, EROFS);
549227652Sgrehan		return;
550227652Sgrehan	}
551227652Sgrehan
552275335Sbryanv	VTBLK_LOCK(sc);
553238360Sgrehan
554275335Sbryanv	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
555275335Sbryanv		VTBLK_UNLOCK(sc);
556275335Sbryanv		vtblk_bio_done(sc, bp, ENXIO);
557275335Sbryanv		return;
558227652Sgrehan	}
559227652Sgrehan
560275335Sbryanv	bioq_insert_tail(&sc->vtblk_bioq, bp);
561275335Sbryanv	vtblk_startio(sc);
562234270Sgrehan
563227652Sgrehan	VTBLK_UNLOCK(sc);
564227652Sgrehan}
565227652Sgrehan
566227652Sgrehanstatic void
567227652Sgrehanvtblk_negotiate_features(struct vtblk_softc *sc)
568227652Sgrehan{
569227652Sgrehan	device_t dev;
570227652Sgrehan	uint64_t features;
571227652Sgrehan
572227652Sgrehan	dev = sc->vtblk_dev;
573227652Sgrehan	features = VTBLK_FEATURES;
574227652Sgrehan
575227652Sgrehan	sc->vtblk_features = virtio_negotiate_features(dev, features);
576227652Sgrehan}
577227652Sgrehan
578275335Sbryanvstatic void
579275335Sbryanvvtblk_setup_features(struct vtblk_softc *sc)
580275335Sbryanv{
581275335Sbryanv	device_t dev;
582275335Sbryanv
583275335Sbryanv	dev = sc->vtblk_dev;
584275335Sbryanv
585275335Sbryanv	vtblk_negotiate_features(sc);
586275335Sbryanv
587275335Sbryanv	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
588275335Sbryanv		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
589275335Sbryanv	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
590275335Sbryanv		sc->vtblk_flags |= VTBLK_FLAG_READONLY;
591275335Sbryanv	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
592275335Sbryanv		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
593275335Sbryanv	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
594275335Sbryanv		sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
595275335Sbryanv}
596275335Sbryanv
597227652Sgrehanstatic int
598227652Sgrehanvtblk_maximum_segments(struct vtblk_softc *sc,
599227652Sgrehan    struct virtio_blk_config *blkcfg)
600227652Sgrehan{
601227652Sgrehan	device_t dev;
602227652Sgrehan	int nsegs;
603227652Sgrehan
604227652Sgrehan	dev = sc->vtblk_dev;
605227652Sgrehan	nsegs = VTBLK_MIN_SEGMENTS;
606227652Sgrehan
607227652Sgrehan	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
608227652Sgrehan		nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
609227652Sgrehan		if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
610227652Sgrehan			nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
611227652Sgrehan	} else
612227652Sgrehan		nsegs += 1;
613227652Sgrehan
614227652Sgrehan	return (nsegs);
615227652Sgrehan}
616227652Sgrehan
617227652Sgrehanstatic int
618227652Sgrehanvtblk_alloc_virtqueue(struct vtblk_softc *sc)
619227652Sgrehan{
620227652Sgrehan	device_t dev;
621227652Sgrehan	struct vq_alloc_info vq_info;
622227652Sgrehan
623227652Sgrehan	dev = sc->vtblk_dev;
624227652Sgrehan
625227652Sgrehan	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
626227652Sgrehan	    vtblk_vq_intr, sc, &sc->vtblk_vq,
627227652Sgrehan	    "%s request", device_get_nameunit(dev));
628227652Sgrehan
629227652Sgrehan	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
630227652Sgrehan}
631227652Sgrehan
632227652Sgrehanstatic void
633252703Sbryanvvtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
634252703Sbryanv{
635252703Sbryanv	device_t dev;
636252703Sbryanv	struct disk *dp;
637252703Sbryanv	int error;
638252703Sbryanv
639252703Sbryanv	dev = sc->vtblk_dev;
640252703Sbryanv	dp = sc->vtblk_disk;
641252703Sbryanv
642252703Sbryanv	dp->d_mediasize = new_capacity;
643252703Sbryanv	if (bootverbose) {
644252703Sbryanv		device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
645252703Sbryanv		    (uintmax_t) dp->d_mediasize >> 20,
646252703Sbryanv		    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
647252703Sbryanv		    dp->d_sectorsize);
648252703Sbryanv	}
649252703Sbryanv
650252703Sbryanv	error = disk_resize(dp, M_NOWAIT);
651252703Sbryanv	if (error) {
652252703Sbryanv		device_printf(dev,
653252703Sbryanv		    "disk_resize(9) failed, error: %d\n", error);
654252703Sbryanv	}
655252703Sbryanv}
656252703Sbryanv
657252703Sbryanvstatic void
658227652Sgrehanvtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
659227652Sgrehan{
660227652Sgrehan	device_t dev;
661227652Sgrehan	struct disk *dp;
662227652Sgrehan
663227652Sgrehan	dev = sc->vtblk_dev;
664227652Sgrehan
665227652Sgrehan	sc->vtblk_disk = dp = disk_alloc();
666227652Sgrehan	dp->d_open = vtblk_open;
667227652Sgrehan	dp->d_close = vtblk_close;
668227652Sgrehan	dp->d_ioctl = vtblk_ioctl;
669227652Sgrehan	dp->d_strategy = vtblk_strategy;
670227652Sgrehan	dp->d_name = VTBLK_DISK_NAME;
671228301Sgrehan	dp->d_unit = device_get_unit(dev);
672227652Sgrehan	dp->d_drv1 = sc;
673275335Sbryanv	dp->d_flags = DISKFLAG_CANFLUSHCACHE | DISKFLAG_UNMAPPED_BIO |
674275335Sbryanv	    DISKFLAG_DIRECT_COMPLETION;
675252703Sbryanv	dp->d_hba_vendor = virtio_get_vendor(dev);
676252703Sbryanv	dp->d_hba_device = virtio_get_device(dev);
677252703Sbryanv	dp->d_hba_subvendor = virtio_get_subvendor(dev);
678252703Sbryanv	dp->d_hba_subdevice = virtio_get_subdevice(dev);
679227652Sgrehan
680227652Sgrehan	if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0)
681227652Sgrehan		dp->d_dump = vtblk_dump;
682227652Sgrehan
683227652Sgrehan	/* Capacity is always in 512-byte units. */
684227652Sgrehan	dp->d_mediasize = blkcfg->capacity * 512;
685227652Sgrehan
686227652Sgrehan	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
687228301Sgrehan		dp->d_sectorsize = blkcfg->blk_size;
688227652Sgrehan	else
689228301Sgrehan		dp->d_sectorsize = 512;
690227652Sgrehan
691227652Sgrehan	/*
692227652Sgrehan	 * The VirtIO maximum I/O size is given in terms of segments.
693227652Sgrehan	 * However, FreeBSD limits I/O size by logical buffer size, not
694227652Sgrehan	 * by physically contiguous pages. Therefore, we have to assume
695227652Sgrehan	 * no pages are contiguous. This may impose an artificially low
696227652Sgrehan	 * maximum I/O size. But in practice, since QEMU advertises 128
697227652Sgrehan	 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
698227652Sgrehan	 * which is typically greater than MAXPHYS. Eventually we should
699227652Sgrehan	 * just advertise MAXPHYS and split buffers that are too big.
700227652Sgrehan	 *
701227652Sgrehan	 * Note we must subtract one additional segment in case of non
702227652Sgrehan	 * page aligned buffers.
703227652Sgrehan	 */
704227652Sgrehan	dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
705227652Sgrehan	    PAGE_SIZE;
706227652Sgrehan	if (dp->d_maxsize < PAGE_SIZE)
707227652Sgrehan		dp->d_maxsize = PAGE_SIZE; /* XXX */
708227652Sgrehan
709227652Sgrehan	if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
710227652Sgrehan		dp->d_fwsectors = blkcfg->geometry.sectors;
711227652Sgrehan		dp->d_fwheads = blkcfg->geometry.heads;
712227652Sgrehan	}
713227652Sgrehan
714281698Smav	if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
715281698Smav	    blkcfg->topology.physical_block_exp > 0) {
716252703Sbryanv		dp->d_stripesize = dp->d_sectorsize *
717252703Sbryanv		    (1 << blkcfg->topology.physical_block_exp);
718252703Sbryanv		dp->d_stripeoffset = (dp->d_stripesize -
719252703Sbryanv		    blkcfg->topology.alignment_offset * dp->d_sectorsize) %
720252703Sbryanv		    dp->d_stripesize;
721252703Sbryanv	}
722252703Sbryanv
723252703Sbryanv	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
724252703Sbryanv		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
725252703Sbryanv	else
726252703Sbryanv		sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
727227652Sgrehan}
728227652Sgrehan
729227652Sgrehanstatic void
730227652Sgrehanvtblk_create_disk(struct vtblk_softc *sc)
731227652Sgrehan{
732227652Sgrehan	struct disk *dp;
733227652Sgrehan
734227652Sgrehan	dp = sc->vtblk_disk;
735227652Sgrehan
736275335Sbryanv	vtblk_ident(sc);
737227652Sgrehan
738227652Sgrehan	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
739227652Sgrehan	    (uintmax_t) dp->d_mediasize >> 20,
740227652Sgrehan	    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
741227652Sgrehan	    dp->d_sectorsize);
742227652Sgrehan
743227652Sgrehan	disk_create(dp, DISK_VERSION);
744227652Sgrehan}
745227652Sgrehan
746234270Sgrehanstatic int
747275335Sbryanvvtblk_request_prealloc(struct vtblk_softc *sc)
748234270Sgrehan{
749275335Sbryanv	struct vtblk_request *req;
750275335Sbryanv	int i, nreqs;
751234270Sgrehan
752275335Sbryanv	nreqs = virtqueue_size(sc->vtblk_vq);
753234270Sgrehan
754275335Sbryanv	/*
755275335Sbryanv	 * Preallocate sufficient requests to keep the virtqueue full. Each
756275335Sbryanv	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
757275335Sbryanv	 * the number allocated when indirect descriptors are not available.
758275335Sbryanv	 */
759275335Sbryanv	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
760275335Sbryanv		nreqs /= VTBLK_MIN_SEGMENTS;
761234270Sgrehan
762275335Sbryanv	for (i = 0; i < nreqs; i++) {
763275335Sbryanv		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
764275335Sbryanv		if (req == NULL)
765275335Sbryanv			return (ENOMEM);
766275335Sbryanv
767275335Sbryanv		MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
768275335Sbryanv		MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
769275335Sbryanv
770275335Sbryanv		sc->vtblk_request_count++;
771275335Sbryanv		vtblk_request_enqueue(sc, req);
772234270Sgrehan	}
773234270Sgrehan
774275335Sbryanv	return (0);
775234270Sgrehan}
776234270Sgrehan
777227652Sgrehanstatic void
778275335Sbryanvvtblk_request_free(struct vtblk_softc *sc)
779227652Sgrehan{
780227652Sgrehan	struct vtblk_request *req;
781227652Sgrehan
782275335Sbryanv	MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
783227652Sgrehan
784275335Sbryanv	while ((req = vtblk_request_dequeue(sc)) != NULL) {
785275335Sbryanv		sc->vtblk_request_count--;
786275335Sbryanv		free(req, M_DEVBUF);
787275335Sbryanv	}
788227652Sgrehan
789275335Sbryanv	KASSERT(sc->vtblk_request_count == 0,
790275335Sbryanv	    ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
791275335Sbryanv}
792227652Sgrehan
793275335Sbryanvstatic struct vtblk_request *
794275335Sbryanvvtblk_request_dequeue(struct vtblk_softc *sc)
795275335Sbryanv{
796275335Sbryanv	struct vtblk_request *req;
797227652Sgrehan
798275335Sbryanv	req = TAILQ_FIRST(&sc->vtblk_req_free);
799275335Sbryanv	if (req != NULL) {
800275335Sbryanv		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
801275335Sbryanv		bzero(req, sizeof(struct vtblk_request));
802227652Sgrehan	}
803227652Sgrehan
804275335Sbryanv	return (req);
805227652Sgrehan}
806227652Sgrehan
807275335Sbryanvstatic void
808275335Sbryanvvtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
809275335Sbryanv{
810275335Sbryanv
811275335Sbryanv	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
812275335Sbryanv}
813275335Sbryanv
814227652Sgrehanstatic struct vtblk_request *
815275335Sbryanvvtblk_request_next_ready(struct vtblk_softc *sc)
816227652Sgrehan{
817275335Sbryanv	struct vtblk_request *req;
818275335Sbryanv
819275335Sbryanv	req = TAILQ_FIRST(&sc->vtblk_req_ready);
820275335Sbryanv	if (req != NULL)
821275335Sbryanv		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
822275335Sbryanv
823275335Sbryanv	return (req);
824275335Sbryanv}
825275335Sbryanv
826275335Sbryanvstatic void
827275335Sbryanvvtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
828275335Sbryanv{
829275335Sbryanv
830275335Sbryanv	/* NOTE: Currently, there will be at most one request in the queue. */
831275335Sbryanv	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
832275335Sbryanv}
833275335Sbryanv
834275335Sbryanvstatic struct vtblk_request *
835275335Sbryanvvtblk_request_next(struct vtblk_softc *sc)
836275335Sbryanv{
837275335Sbryanv	struct vtblk_request *req;
838275335Sbryanv
839275335Sbryanv	req = vtblk_request_next_ready(sc);
840275335Sbryanv	if (req != NULL)
841275335Sbryanv		return (req);
842275335Sbryanv
843275335Sbryanv	return (vtblk_request_bio(sc));
844275335Sbryanv}
845275335Sbryanv
846275335Sbryanvstatic struct vtblk_request *
847275335Sbryanvvtblk_request_bio(struct vtblk_softc *sc)
848275335Sbryanv{
849227652Sgrehan	struct bio_queue_head *bioq;
850227652Sgrehan	struct vtblk_request *req;
851227652Sgrehan	struct bio *bp;
852227652Sgrehan
853227652Sgrehan	bioq = &sc->vtblk_bioq;
854227652Sgrehan
855227652Sgrehan	if (bioq_first(bioq) == NULL)
856227652Sgrehan		return (NULL);
857227652Sgrehan
858275335Sbryanv	req = vtblk_request_dequeue(sc);
859227652Sgrehan	if (req == NULL)
860227652Sgrehan		return (NULL);
861227652Sgrehan
862227652Sgrehan	bp = bioq_takefirst(bioq);
863227652Sgrehan	req->vbr_bp = bp;
864227652Sgrehan	req->vbr_ack = -1;
865227652Sgrehan	req->vbr_hdr.ioprio = 1;
866227652Sgrehan
867227652Sgrehan	switch (bp->bio_cmd) {
868227652Sgrehan	case BIO_FLUSH:
869227652Sgrehan		req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
870227652Sgrehan		break;
871227652Sgrehan	case BIO_READ:
872227652Sgrehan		req->vbr_hdr.type = VIRTIO_BLK_T_IN;
873227652Sgrehan		req->vbr_hdr.sector = bp->bio_offset / 512;
874227652Sgrehan		break;
875227652Sgrehan	case BIO_WRITE:
876227652Sgrehan		req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
877227652Sgrehan		req->vbr_hdr.sector = bp->bio_offset / 512;
878227652Sgrehan		break;
879227652Sgrehan	default:
880252703Sbryanv		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
881227652Sgrehan	}
882227652Sgrehan
883275335Sbryanv	if (bp->bio_flags & BIO_ORDERED)
884275335Sbryanv		req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
885275335Sbryanv
886227652Sgrehan	return (req);
887227652Sgrehan}
888227652Sgrehan
889227652Sgrehanstatic int
890275335Sbryanvvtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
891227652Sgrehan{
892247829Sbryanv	struct virtqueue *vq;
893227652Sgrehan	struct sglist *sg;
894227652Sgrehan	struct bio *bp;
895247829Sbryanv	int ordered, readable, writable, error;
896227652Sgrehan
897247829Sbryanv	vq = sc->vtblk_vq;
898227652Sgrehan	sg = sc->vtblk_sglist;
899227652Sgrehan	bp = req->vbr_bp;
900247829Sbryanv	ordered = 0;
901227652Sgrehan	writable = 0;
902227652Sgrehan
903247829Sbryanv	/*
904275335Sbryanv	 * Some hosts (such as bhyve) do not implement the barrier feature,
905275335Sbryanv	 * so we emulate it in the driver by allowing the barrier request
906275335Sbryanv	 * to be the only one in flight.
907247829Sbryanv	 */
908275335Sbryanv	if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
909275335Sbryanv		if (sc->vtblk_req_ordered != NULL)
910275335Sbryanv			return (EBUSY);
911275335Sbryanv		if (bp->bio_flags & BIO_ORDERED) {
912247829Sbryanv			if (!virtqueue_empty(vq))
913247829Sbryanv				return (EBUSY);
914247829Sbryanv			ordered = 1;
915275335Sbryanv			req->vbr_hdr.type &= ~VIRTIO_BLK_T_BARRIER;
916275335Sbryanv		}
917247829Sbryanv	}
918247829Sbryanv
919227652Sgrehan	sglist_reset(sg);
920238360Sgrehan	sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
921238360Sgrehan
922227652Sgrehan	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
923260582Sbryanv		error = sglist_append_bio(sg, bp);
924260582Sbryanv		if (error || sg->sg_nseg == sg->sg_maxseg) {
925275335Sbryanv			panic("%s: bio %p data buffer too big %d",
926252703Sbryanv			    __func__, bp, error);
927260582Sbryanv		}
928227652Sgrehan
929227652Sgrehan		/* BIO_READ means the host writes into our buffer. */
930227652Sgrehan		if (bp->bio_cmd == BIO_READ)
931238360Sgrehan			writable = sg->sg_nseg - 1;
932227652Sgrehan	}
933227652Sgrehan
934227652Sgrehan	writable++;
935238360Sgrehan	sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
936234270Sgrehan	readable = sg->sg_nseg - writable;
937227652Sgrehan
938247829Sbryanv	error = virtqueue_enqueue(vq, req, sg, readable, writable);
939247829Sbryanv	if (error == 0 && ordered)
940247829Sbryanv		sc->vtblk_req_ordered = req;
941247829Sbryanv
942247829Sbryanv	return (error);
943227652Sgrehan}
944227652Sgrehan
945275335Sbryanvstatic int
946275335Sbryanvvtblk_request_error(struct vtblk_request *req)
947275335Sbryanv{
948275335Sbryanv	int error;
949275335Sbryanv
950275335Sbryanv	switch (req->vbr_ack) {
951275335Sbryanv	case VIRTIO_BLK_S_OK:
952275335Sbryanv		error = 0;
953275335Sbryanv		break;
954275335Sbryanv	case VIRTIO_BLK_S_UNSUPP:
955275335Sbryanv		error = ENOTSUP;
956275335Sbryanv		break;
957275335Sbryanv	default:
958275335Sbryanv		error = EIO;
959275335Sbryanv		break;
960275335Sbryanv	}
961275335Sbryanv
962275335Sbryanv	return (error);
963275335Sbryanv}
964275335Sbryanv
965252702Sbryanvstatic void
966275335Sbryanvvtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
967227652Sgrehan{
968275335Sbryanv	struct vtblk_request *req;
969275335Sbryanv	struct bio *bp;
970275335Sbryanv
971275335Sbryanv	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
972275335Sbryanv		if (sc->vtblk_req_ordered != NULL) {
973275335Sbryanv			MPASS(sc->vtblk_req_ordered == req);
974275335Sbryanv			sc->vtblk_req_ordered = NULL;
975275335Sbryanv		}
976275335Sbryanv
977275335Sbryanv		bp = req->vbr_bp;
978275335Sbryanv		bp->bio_error = vtblk_request_error(req);
979275335Sbryanv		TAILQ_INSERT_TAIL(queue, bp, bio_queue);
980275335Sbryanv
981275335Sbryanv		vtblk_request_enqueue(sc, req);
982275335Sbryanv	}
983275335Sbryanv}
984275335Sbryanv
985275335Sbryanvstatic void
986275335Sbryanvvtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
987275335Sbryanv{
988275335Sbryanv	struct bio *bp, *tmp;
989275335Sbryanv
990275335Sbryanv	TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
991275335Sbryanv		if (bp->bio_error != 0)
992275335Sbryanv			disk_err(bp, "hard error", -1, 1);
993275335Sbryanv		vtblk_bio_done(sc, bp, bp->bio_error);
994275335Sbryanv	}
995275335Sbryanv}
996275335Sbryanv
997275335Sbryanvstatic void
998277788Sbryanvvtblk_drain_vq(struct vtblk_softc *sc)
999275335Sbryanv{
1000252702Sbryanv	struct virtqueue *vq;
1001275335Sbryanv	struct vtblk_request *req;
1002275335Sbryanv	int last;
1003227652Sgrehan
1004227652Sgrehan	vq = sc->vtblk_vq;
1005275335Sbryanv	last = 0;
1006227652Sgrehan
1007275335Sbryanv	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1008277788Sbryanv		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1009275335Sbryanv		vtblk_request_enqueue(sc, req);
1010227652Sgrehan	}
1011227652Sgrehan
1012275335Sbryanv	sc->vtblk_req_ordered = NULL;
1013275335Sbryanv	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1014275335Sbryanv}
1015227652Sgrehan
1016275335Sbryanvstatic void
1017275335Sbryanvvtblk_drain(struct vtblk_softc *sc)
1018275335Sbryanv{
1019275335Sbryanv	struct bio_queue queue;
1020275335Sbryanv	struct bio_queue_head *bioq;
1021275335Sbryanv	struct vtblk_request *req;
1022275335Sbryanv	struct bio *bp;
1023227652Sgrehan
1024275335Sbryanv	bioq = &sc->vtblk_bioq;
1025275335Sbryanv	TAILQ_INIT(&queue);
1026275335Sbryanv
1027275335Sbryanv	if (sc->vtblk_vq != NULL) {
1028275335Sbryanv		vtblk_queue_completed(sc, &queue);
1029275335Sbryanv		vtblk_done_completed(sc, &queue);
1030275335Sbryanv
1031277788Sbryanv		vtblk_drain_vq(sc);
1032227652Sgrehan	}
1033227652Sgrehan
1034275335Sbryanv	while ((req = vtblk_request_next_ready(sc)) != NULL) {
1035275335Sbryanv		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1036275335Sbryanv		vtblk_request_enqueue(sc, req);
1037275335Sbryanv	}
1038275335Sbryanv
1039275335Sbryanv	while (bioq_first(bioq) != NULL) {
1040275335Sbryanv		bp = bioq_takefirst(bioq);
1041275335Sbryanv		vtblk_bio_done(sc, bp, ENXIO);
1042275335Sbryanv	}
1043275335Sbryanv
1044275335Sbryanv	vtblk_request_free(sc);
1045227652Sgrehan}
1046227652Sgrehan
1047227652Sgrehanstatic void
1048275335Sbryanvvtblk_startio(struct vtblk_softc *sc)
1049227652Sgrehan{
1050275335Sbryanv	struct virtqueue *vq;
1051275335Sbryanv	struct vtblk_request *req;
1052275335Sbryanv	int enq;
1053227652Sgrehan
1054275335Sbryanv	VTBLK_LOCK_ASSERT(sc);
1055275335Sbryanv	vq = sc->vtblk_vq;
1056275335Sbryanv	enq = 0;
1057275335Sbryanv
1058275335Sbryanv	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1059275335Sbryanv		return;
1060275335Sbryanv
1061275335Sbryanv	while (!virtqueue_full(vq)) {
1062275335Sbryanv		req = vtblk_request_next(sc);
1063275335Sbryanv		if (req == NULL)
1064275335Sbryanv			break;
1065275335Sbryanv
1066275335Sbryanv		if (vtblk_request_execute(sc, req) != 0) {
1067275335Sbryanv			vtblk_request_requeue_ready(sc, req);
1068275335Sbryanv			break;
1069275335Sbryanv		}
1070275335Sbryanv
1071275335Sbryanv		enq++;
1072275335Sbryanv	}
1073275335Sbryanv
1074275335Sbryanv	if (enq > 0)
1075275335Sbryanv		virtqueue_notify(vq);
1076227652Sgrehan}
1077227652Sgrehan
1078275335Sbryanvstatic void
1079275335Sbryanvvtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1080275335Sbryanv{
1081275335Sbryanv
1082275335Sbryanv	/* Because of GEOM direct dispatch, we cannot hold any locks. */
1083275335Sbryanv	if (sc != NULL)
1084275335Sbryanv		VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1085275335Sbryanv
1086275335Sbryanv	if (error) {
1087275335Sbryanv		bp->bio_resid = bp->bio_bcount;
1088275335Sbryanv		bp->bio_error = error;
1089275335Sbryanv		bp->bio_flags |= BIO_ERROR;
1090275335Sbryanv	}
1091275335Sbryanv
1092275335Sbryanv	biodone(bp);
1093275335Sbryanv}
1094275335Sbryanv
1095252703Sbryanv#define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)			\
1096252703Sbryanv	if (virtio_with_feature(_dev, _feature)) {			\
1097252703Sbryanv		virtio_read_device_config(_dev,				\
1098252703Sbryanv		    offsetof(struct virtio_blk_config, _field),		\
1099252703Sbryanv		    &(_cfg)->_field, sizeof((_cfg)->_field));		\
1100252703Sbryanv	}
1101252703Sbryanv
1102227652Sgrehanstatic void
1103252703Sbryanvvtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1104252703Sbryanv{
1105252703Sbryanv	device_t dev;
1106252703Sbryanv
1107252703Sbryanv	dev = sc->vtblk_dev;
1108252703Sbryanv
1109252703Sbryanv	bzero(blkcfg, sizeof(struct virtio_blk_config));
1110252703Sbryanv
1111252703Sbryanv	/* The capacity is always available. */
1112252703Sbryanv	virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1113252703Sbryanv	    capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1114252703Sbryanv
1115252703Sbryanv	/* Read the configuration if the feature was negotiated. */
1116252703Sbryanv	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1117252703Sbryanv	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1118252703Sbryanv	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg);
1119252703Sbryanv	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1120252703Sbryanv	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg);
1121252703Sbryanv	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg);
1122252703Sbryanv}
1123252703Sbryanv
1124252703Sbryanv#undef VTBLK_GET_CONFIG
1125252703Sbryanv
1126252703Sbryanvstatic void
1127275335Sbryanvvtblk_ident(struct vtblk_softc *sc)
1128227652Sgrehan{
1129227652Sgrehan	struct bio buf;
1130227652Sgrehan	struct disk *dp;
1131227652Sgrehan	struct vtblk_request *req;
1132227652Sgrehan	int len, error;
1133227652Sgrehan
1134227652Sgrehan	dp = sc->vtblk_disk;
1135227652Sgrehan	len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1136227652Sgrehan
1137252703Sbryanv	if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1138227652Sgrehan		return;
1139227652Sgrehan
1140275335Sbryanv	req = vtblk_request_dequeue(sc);
1141227652Sgrehan	if (req == NULL)
1142227652Sgrehan		return;
1143227652Sgrehan
1144227652Sgrehan	req->vbr_ack = -1;
1145227652Sgrehan	req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID;
1146227652Sgrehan	req->vbr_hdr.ioprio = 1;
1147227652Sgrehan	req->vbr_hdr.sector = 0;
1148227652Sgrehan
1149227652Sgrehan	req->vbr_bp = &buf;
1150295707Simp	g_reset_bio(&buf);
1151227652Sgrehan
1152227652Sgrehan	buf.bio_cmd = BIO_READ;
1153227652Sgrehan	buf.bio_data = dp->d_ident;
1154227652Sgrehan	buf.bio_bcount = len;
1155227652Sgrehan
1156227652Sgrehan	VTBLK_LOCK(sc);
1157227652Sgrehan	error = vtblk_poll_request(sc, req);
1158227652Sgrehan	VTBLK_UNLOCK(sc);
1159227652Sgrehan
1160275335Sbryanv	vtblk_request_enqueue(sc, req);
1161228301Sgrehan
1162227652Sgrehan	if (error) {
1163227652Sgrehan		device_printf(sc->vtblk_dev,
1164227652Sgrehan		    "error getting device identifier: %d\n", error);
1165227652Sgrehan	}
1166227652Sgrehan}
1167227652Sgrehan
1168227652Sgrehanstatic int
1169227652Sgrehanvtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1170227652Sgrehan{
1171227652Sgrehan	struct virtqueue *vq;
1172227652Sgrehan	int error;
1173227652Sgrehan
1174227652Sgrehan	vq = sc->vtblk_vq;
1175227652Sgrehan
1176227652Sgrehan	if (!virtqueue_empty(vq))
1177227652Sgrehan		return (EBUSY);
1178227652Sgrehan
1179275335Sbryanv	error = vtblk_request_execute(sc, req);
1180227652Sgrehan	if (error)
1181227652Sgrehan		return (error);
1182227652Sgrehan
1183227652Sgrehan	virtqueue_notify(vq);
1184252703Sbryanv	virtqueue_poll(vq, NULL);
1185227652Sgrehan
1186234270Sgrehan	error = vtblk_request_error(req);
1187234270Sgrehan	if (error && bootverbose) {
1188238360Sgrehan		device_printf(sc->vtblk_dev,
1189252703Sbryanv		    "%s: IO error: %d\n", __func__, error);
1190227652Sgrehan	}
1191227652Sgrehan
1192227652Sgrehan	return (error);
1193227652Sgrehan}
1194227652Sgrehan
1195275335Sbryanvstatic int
1196275335Sbryanvvtblk_quiesce(struct vtblk_softc *sc)
1197234270Sgrehan{
1198234270Sgrehan	int error;
1199234270Sgrehan
1200275335Sbryanv	VTBLK_LOCK_ASSERT(sc);
1201275335Sbryanv	error = 0;
1202234270Sgrehan
1203275335Sbryanv	while (!virtqueue_empty(sc->vtblk_vq)) {
1204275335Sbryanv		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1205275335Sbryanv		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1206275335Sbryanv			error = EBUSY;
1207275335Sbryanv			break;
1208247829Sbryanv		}
1209275335Sbryanv	}
1210247829Sbryanv
1211275335Sbryanv	return (error);
1212234270Sgrehan}
1213234270Sgrehan
1214234270Sgrehanstatic void
1215275335Sbryanvvtblk_vq_intr(void *xsc)
1216227652Sgrehan{
1217275335Sbryanv	struct vtblk_softc *sc;
1218227652Sgrehan	struct virtqueue *vq;
1219275335Sbryanv	struct bio_queue queue;
1220227652Sgrehan
1221275335Sbryanv	sc = xsc;
1222227652Sgrehan	vq = sc->vtblk_vq;
1223275335Sbryanv	TAILQ_INIT(&queue);
1224227652Sgrehan
1225275335Sbryanv	VTBLK_LOCK(sc);
1226227652Sgrehan
1227275335Sbryanvagain:
1228275335Sbryanv	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1229275335Sbryanv		goto out;
1230227652Sgrehan
1231275335Sbryanv	vtblk_queue_completed(sc, &queue);
1232275335Sbryanv	vtblk_startio(sc);
1233227652Sgrehan
1234275335Sbryanv	if (virtqueue_enable_intr(vq) != 0) {
1235275335Sbryanv		virtqueue_disable_intr(vq);
1236275335Sbryanv		goto again;
1237234270Sgrehan	}
1238227652Sgrehan
1239275335Sbryanv	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1240275335Sbryanv		wakeup(&sc->vtblk_vq);
1241227652Sgrehan
1242275335Sbryanvout:
1243275335Sbryanv	VTBLK_UNLOCK(sc);
1244275335Sbryanv	vtblk_done_completed(sc, &queue);
1245227652Sgrehan}
1246227652Sgrehan
1247238360Sgrehanstatic void
1248275335Sbryanvvtblk_stop(struct vtblk_softc *sc)
1249238360Sgrehan{
1250238360Sgrehan
1251275335Sbryanv	virtqueue_disable_intr(sc->vtblk_vq);
1252275335Sbryanv	virtio_stop(sc->vtblk_dev);
1253238360Sgrehan}
1254238360Sgrehan
1255275335Sbryanvstatic void
1256277788Sbryanvvtblk_dump_quiesce(struct vtblk_softc *sc)
1257227652Sgrehan{
1258227652Sgrehan
1259227652Sgrehan	/*
1260277788Sbryanv	 * Spin here until all the requests in-flight at the time of the
1261277788Sbryanv	 * dump are completed and queued. The queued requests will be
1262277788Sbryanv	 * biodone'd once the dump is finished.
1263227652Sgrehan	 */
1264277788Sbryanv	while (!virtqueue_empty(sc->vtblk_vq))
1265277788Sbryanv		vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1266227652Sgrehan}
1267227652Sgrehan
1268275335Sbryanvstatic int
1269275335Sbryanvvtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1270275335Sbryanv    size_t length)
1271227652Sgrehan{
1272275335Sbryanv	struct bio buf;
1273227652Sgrehan	struct vtblk_request *req;
1274227652Sgrehan
1275275335Sbryanv	req = &sc->vtblk_dump_request;
1276275335Sbryanv	req->vbr_ack = -1;
1277275335Sbryanv	req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
1278275335Sbryanv	req->vbr_hdr.ioprio = 1;
1279275335Sbryanv	req->vbr_hdr.sector = offset / 512;
1280234270Sgrehan
1281275335Sbryanv	req->vbr_bp = &buf;
1282295707Simp	g_reset_bio(&buf);
1283227652Sgrehan
1284275335Sbryanv	buf.bio_cmd = BIO_WRITE;
1285275335Sbryanv	buf.bio_data = virtual;
1286275335Sbryanv	buf.bio_bcount = length;
1287275335Sbryanv
1288275335Sbryanv	return (vtblk_poll_request(sc, req));
1289227652Sgrehan}
1290227652Sgrehan
1291275335Sbryanvstatic int
1292275335Sbryanvvtblk_dump_flush(struct vtblk_softc *sc)
1293227652Sgrehan{
1294275335Sbryanv	struct bio buf;
1295227652Sgrehan	struct vtblk_request *req;
1296227652Sgrehan
1297275335Sbryanv	req = &sc->vtblk_dump_request;
1298275335Sbryanv	req->vbr_ack = -1;
1299275335Sbryanv	req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
1300275335Sbryanv	req->vbr_hdr.ioprio = 1;
1301275335Sbryanv	req->vbr_hdr.sector = 0;
1302227652Sgrehan
1303275335Sbryanv	req->vbr_bp = &buf;
1304295707Simp	g_reset_bio(&buf);
1305275335Sbryanv
1306275335Sbryanv	buf.bio_cmd = BIO_FLUSH;
1307275335Sbryanv
1308275335Sbryanv	return (vtblk_poll_request(sc, req));
1309227652Sgrehan}
1310227652Sgrehan
1311227652Sgrehanstatic void
1312277788Sbryanvvtblk_dump_complete(struct vtblk_softc *sc)
1313277788Sbryanv{
1314277788Sbryanv
1315277788Sbryanv	vtblk_dump_flush(sc);
1316277788Sbryanv
1317277788Sbryanv	VTBLK_UNLOCK(sc);
1318277788Sbryanv	vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1319277788Sbryanv	VTBLK_LOCK(sc);
1320277788Sbryanv}
1321277788Sbryanv
1322277788Sbryanvstatic void
1323275335Sbryanvvtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1324227652Sgrehan{
1325227652Sgrehan
1326275335Sbryanv	/* Set either writeback (1) or writethrough (0) mode. */
1327275335Sbryanv	virtio_write_dev_config_1(sc->vtblk_dev,
1328275335Sbryanv	    offsetof(struct virtio_blk_config, writeback), wc);
1329227652Sgrehan}
1330227652Sgrehan
1331275335Sbryanvstatic int
1332275335Sbryanvvtblk_write_cache_enabled(struct vtblk_softc *sc,
1333275335Sbryanv    struct virtio_blk_config *blkcfg)
1334227652Sgrehan{
1335275335Sbryanv	int wc;
1336227652Sgrehan
1337275335Sbryanv	if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) {
1338275335Sbryanv		wc = vtblk_tunable_int(sc, "writecache_mode",
1339275335Sbryanv		    vtblk_writecache_mode);
1340275335Sbryanv		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1341275335Sbryanv			vtblk_set_write_cache(sc, wc);
1342275335Sbryanv		else
1343275335Sbryanv			wc = blkcfg->writeback;
1344275335Sbryanv	} else
1345275335Sbryanv		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);
1346227652Sgrehan
1347275335Sbryanv	return (wc);
1348227652Sgrehan}
1349227652Sgrehan
1350234270Sgrehanstatic int
1351275335Sbryanvvtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1352234270Sgrehan{
1353275335Sbryanv	struct vtblk_softc *sc;
1354275335Sbryanv	int wc, error;
1355234270Sgrehan
1356275335Sbryanv	sc = oidp->oid_arg1;
1357275335Sbryanv	wc = sc->vtblk_write_cache;
1358234270Sgrehan
1359275335Sbryanv	error = sysctl_handle_int(oidp, &wc, 0, req);
1360275335Sbryanv	if (error || req->newptr == NULL)
1361275335Sbryanv		return (error);
1362275335Sbryanv	if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0)
1363275335Sbryanv		return (EPERM);
1364275335Sbryanv	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1365275335Sbryanv		return (EINVAL);
1366234270Sgrehan
1367275335Sbryanv	VTBLK_LOCK(sc);
1368275335Sbryanv	sc->vtblk_write_cache = wc;
1369275335Sbryanv	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1370275335Sbryanv	VTBLK_UNLOCK(sc);
1371227652Sgrehan
1372275335Sbryanv	return (0);
1373227652Sgrehan}
1374252703Sbryanv
1375252703Sbryanvstatic void
1376252703Sbryanvvtblk_setup_sysctl(struct vtblk_softc *sc)
1377252703Sbryanv{
1378252703Sbryanv	device_t dev;
1379252703Sbryanv	struct sysctl_ctx_list *ctx;
1380252703Sbryanv	struct sysctl_oid *tree;
1381252703Sbryanv	struct sysctl_oid_list *child;
1382252703Sbryanv
1383252703Sbryanv	dev = sc->vtblk_dev;
1384252703Sbryanv	ctx = device_get_sysctl_ctx(dev);
1385252703Sbryanv	tree = device_get_sysctl_tree(dev);
1386252703Sbryanv	child = SYSCTL_CHILDREN(tree);
1387252703Sbryanv
1388252703Sbryanv	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1389252703Sbryanv	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl,
1390252703Sbryanv	    "I", "Write cache mode (writethrough (0) or writeback (1))");
1391252703Sbryanv}
1392252703Sbryanv
1393252703Sbryanvstatic int
1394252703Sbryanvvtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1395252703Sbryanv{
1396252703Sbryanv	char path[64];
1397252703Sbryanv
1398252703Sbryanv	snprintf(path, sizeof(path),
1399252703Sbryanv	    "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1400252703Sbryanv	TUNABLE_INT_FETCH(path, &def);
1401252703Sbryanv
1402252703Sbryanv	return (def);
1403252703Sbryanv}
1404