ctl_backend_block.c revision 287760
1255643Snwhitehorn/*-
2255643Snwhitehorn * Copyright (c) 2003 Silicon Graphics International Corp.
3255643Snwhitehorn * Copyright (c) 2009-2011 Spectra Logic Corporation
4255643Snwhitehorn * Copyright (c) 2012 The FreeBSD Foundation
5255643Snwhitehorn * All rights reserved.
6255643Snwhitehorn *
7255643Snwhitehorn * Portions of this software were developed by Edward Tomasz Napierala
8255643Snwhitehorn * under sponsorship from the FreeBSD Foundation.
9255643Snwhitehorn *
10255643Snwhitehorn * Redistribution and use in source and binary forms, with or without
11255643Snwhitehorn * modification, are permitted provided that the following conditions
12255643Snwhitehorn * are met:
13255643Snwhitehorn * 1. Redistributions of source code must retain the above copyright
14255643Snwhitehorn *    notice, this list of conditions, and the following disclaimer,
15255643Snwhitehorn *    without modification.
16255643Snwhitehorn * 2. Redistributions in binary form must reproduce at minimum a disclaimer
17255643Snwhitehorn *    substantially similar to the "NO WARRANTY" disclaimer below
18255643Snwhitehorn *    ("Disclaimer") and any redistribution must be conditioned upon
19255643Snwhitehorn *    including a substantially similar Disclaimer requirement for further
20255643Snwhitehorn *    binary redistribution.
21255643Snwhitehorn *
22255643Snwhitehorn * NO WARRANTY
23255643Snwhitehorn * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24255643Snwhitehorn * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25255643Snwhitehorn * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
26255643Snwhitehorn * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27255643Snwhitehorn * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28255643Snwhitehorn * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29255643Snwhitehorn * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30255643Snwhitehorn * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31255643Snwhitehorn * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32255643Snwhitehorn * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33255643Snwhitehorn * POSSIBILITY OF SUCH DAMAGES.
34255643Snwhitehorn *
35255643Snwhitehorn * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
36255643Snwhitehorn */
37255643Snwhitehorn/*
38255643Snwhitehorn * CAM Target Layer driver backend for block devices.
39255643Snwhitehorn *
40255643Snwhitehorn * Author: Ken Merry <ken@FreeBSD.org>
41255643Snwhitehorn */
42255643Snwhitehorn#include <sys/cdefs.h>
43255643Snwhitehorn__FBSDID("$FreeBSD: head/sys/cam/ctl/ctl_backend_block.c 287760 2015-09-13 16:49:41Z mav $");
44255643Snwhitehorn
45255643Snwhitehorn#include <sys/param.h>
46255643Snwhitehorn#include <sys/systm.h>
47255643Snwhitehorn#include <sys/kernel.h>
48255643Snwhitehorn#include <sys/types.h>
49255643Snwhitehorn#include <sys/kthread.h>
50255643Snwhitehorn#include <sys/bio.h>
51255643Snwhitehorn#include <sys/fcntl.h>
52255643Snwhitehorn#include <sys/limits.h>
53255643Snwhitehorn#include <sys/lock.h>
54255643Snwhitehorn#include <sys/mutex.h>
55255643Snwhitehorn#include <sys/condvar.h>
56255643Snwhitehorn#include <sys/malloc.h>
57255643Snwhitehorn#include <sys/conf.h>
58255643Snwhitehorn#include <sys/ioccom.h>
59255643Snwhitehorn#include <sys/queue.h>
60255643Snwhitehorn#include <sys/sbuf.h>
61255643Snwhitehorn#include <sys/endian.h>
62255643Snwhitehorn#include <sys/uio.h>
63255643Snwhitehorn#include <sys/buf.h>
64255643Snwhitehorn#include <sys/taskqueue.h>
65255643Snwhitehorn#include <sys/vnode.h>
66255643Snwhitehorn#include <sys/namei.h>
67255643Snwhitehorn#include <sys/mount.h>
68255643Snwhitehorn#include <sys/disk.h>
69255643Snwhitehorn#include <sys/fcntl.h>
70255643Snwhitehorn#include <sys/filedesc.h>
71255643Snwhitehorn#include <sys/filio.h>
72255643Snwhitehorn#include <sys/proc.h>
73255643Snwhitehorn#include <sys/pcpu.h>
74255643Snwhitehorn#include <sys/module.h>
75255643Snwhitehorn#include <sys/sdt.h>
76255643Snwhitehorn#include <sys/devicestat.h>
77255643Snwhitehorn#include <sys/sysctl.h>
78255643Snwhitehorn
79255643Snwhitehorn#include <geom/geom.h>
80255643Snwhitehorn
81255643Snwhitehorn#include <cam/cam.h>
82255643Snwhitehorn#include <cam/scsi/scsi_all.h>
83255643Snwhitehorn#include <cam/scsi/scsi_da.h>
84255643Snwhitehorn#include <cam/ctl/ctl_io.h>
85255643Snwhitehorn#include <cam/ctl/ctl.h>
86255643Snwhitehorn#include <cam/ctl/ctl_backend.h>
87255643Snwhitehorn#include <cam/ctl/ctl_ioctl.h>
88255643Snwhitehorn#include <cam/ctl/ctl_ha.h>
89255643Snwhitehorn#include <cam/ctl/ctl_scsi_all.h>
90255643Snwhitehorn#include <cam/ctl/ctl_private.h>
91255643Snwhitehorn#include <cam/ctl/ctl_error.h>
92255643Snwhitehorn
93255643Snwhitehorn/*
94255643Snwhitehorn * The idea here is that we'll allocate enough S/G space to hold a 1MB
95255643Snwhitehorn * I/O.  If we get an I/O larger than that, we'll split it.
96255643Snwhitehorn */
97255643Snwhitehorn#define	CTLBLK_HALF_IO_SIZE	(512 * 1024)
98255643Snwhitehorn#define	CTLBLK_MAX_IO_SIZE	(CTLBLK_HALF_IO_SIZE * 2)
99255643Snwhitehorn#define	CTLBLK_MAX_SEG		MAXPHYS
100255643Snwhitehorn#define	CTLBLK_HALF_SEGS	MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
101255643Snwhitehorn#define	CTLBLK_MAX_SEGS		(CTLBLK_HALF_SEGS * 2)
102255643Snwhitehorn
103255643Snwhitehorn#ifdef CTLBLK_DEBUG
104255643Snwhitehorn#define DPRINTF(fmt, args...) \
105255643Snwhitehorn    printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
106255643Snwhitehorn#else
107255643Snwhitehorn#define DPRINTF(fmt, args...) do {} while(0)
108255643Snwhitehorn#endif
109255643Snwhitehorn
110255643Snwhitehorn#define PRIV(io)	\
111255643Snwhitehorn    ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
112255643Snwhitehorn#define ARGS(io)	\
113255643Snwhitehorn    ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
114255643Snwhitehorn
115255643SnwhitehornSDT_PROVIDER_DEFINE(cbb);
116255643Snwhitehorn
117255643Snwhitehorntypedef enum {
118255643Snwhitehorn	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
119255643Snwhitehorn	CTL_BE_BLOCK_LUN_CONFIG_ERR	= 0x02,
120255643Snwhitehorn	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
121255643Snwhitehorn} ctl_be_block_lun_flags;
122255643Snwhitehorn
123255643Snwhitehorntypedef enum {
124255643Snwhitehorn	CTL_BE_BLOCK_NONE,
125255643Snwhitehorn	CTL_BE_BLOCK_DEV,
126255643Snwhitehorn	CTL_BE_BLOCK_FILE
127255643Snwhitehorn} ctl_be_block_type;
128255643Snwhitehorn
129255643Snwhitehornstruct ctl_be_block_filedata {
130255643Snwhitehorn	struct ucred *cred;
131255643Snwhitehorn};
132255643Snwhitehorn
133255643Snwhitehornunion ctl_be_block_bedata {
134255643Snwhitehorn	struct ctl_be_block_filedata file;
135255643Snwhitehorn};
136255643Snwhitehorn
137255643Snwhitehornstruct ctl_be_block_io;
138255643Snwhitehornstruct ctl_be_block_lun;
139255643Snwhitehorn
140255643Snwhitehorntypedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
141255643Snwhitehorn			       struct ctl_be_block_io *beio);
142255643Snwhitehorntypedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
143255643Snwhitehorn				  const char *attrname);
144255643Snwhitehorn
145255643Snwhitehorn/*
146255643Snwhitehorn * Backend LUN structure.  There is a 1:1 mapping between a block device
147255643Snwhitehorn * and a backend block LUN, and between a backend block LUN and a CTL LUN.
148255643Snwhitehorn */
149255643Snwhitehornstruct ctl_be_block_lun {
150255643Snwhitehorn	struct ctl_lun_create_params params;
151255643Snwhitehorn	char lunname[32];
152255643Snwhitehorn	char *dev_path;
153255643Snwhitehorn	ctl_be_block_type dev_type;
154255643Snwhitehorn	struct vnode *vn;
155255643Snwhitehorn	union ctl_be_block_bedata backend;
156255643Snwhitehorn	cbb_dispatch_t dispatch;
157255643Snwhitehorn	cbb_dispatch_t lun_flush;
158255643Snwhitehorn	cbb_dispatch_t unmap;
159255643Snwhitehorn	cbb_dispatch_t get_lba_status;
160255643Snwhitehorn	cbb_getattr_t getattr;
161255643Snwhitehorn	uma_zone_t lun_zone;
162255643Snwhitehorn	uint64_t size_blocks;
163255643Snwhitehorn	uint64_t size_bytes;
164255643Snwhitehorn	struct ctl_be_block_softc *softc;
165255643Snwhitehorn	struct devstat *disk_stats;
166255643Snwhitehorn	ctl_be_block_lun_flags flags;
167255643Snwhitehorn	STAILQ_ENTRY(ctl_be_block_lun) links;
168255643Snwhitehorn	struct ctl_be_lun cbe_lun;
169255643Snwhitehorn	struct taskqueue *io_taskqueue;
170255643Snwhitehorn	struct task io_task;
171255643Snwhitehorn	int num_threads;
172255643Snwhitehorn	STAILQ_HEAD(, ctl_io_hdr) input_queue;
173255643Snwhitehorn	STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
174255643Snwhitehorn	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
175255643Snwhitehorn	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
176255643Snwhitehorn	struct mtx_padalign io_lock;
177255643Snwhitehorn	struct mtx_padalign queue_lock;
178255643Snwhitehorn};
179255643Snwhitehorn
180255643Snwhitehorn/*
181255643Snwhitehorn * Overall softc structure for the block backend module.
182255643Snwhitehorn */
183255643Snwhitehornstruct ctl_be_block_softc {
184255643Snwhitehorn	struct mtx			 lock;
185255643Snwhitehorn	int				 num_luns;
186255643Snwhitehorn	STAILQ_HEAD(, ctl_be_block_lun)	 lun_list;
187255643Snwhitehorn};
188255643Snwhitehorn
189255643Snwhitehornstatic struct ctl_be_block_softc backend_block_softc;
190255643Snwhitehorn
191255643Snwhitehorn/*
192255643Snwhitehorn * Per-I/O information.
193255643Snwhitehorn */
194255643Snwhitehornstruct ctl_be_block_io {
195255643Snwhitehorn	union ctl_io			*io;
196255643Snwhitehorn	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
197255643Snwhitehorn	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
198255643Snwhitehorn	int				bio_cmd;
199255643Snwhitehorn	int				num_segs;
200255643Snwhitehorn	int				num_bios_sent;
201255643Snwhitehorn	int				num_bios_done;
202255643Snwhitehorn	int				send_complete;
203255643Snwhitehorn	int				num_errors;
204255643Snwhitehorn	struct bintime			ds_t0;
205255643Snwhitehorn	devstat_tag_type		ds_tag_type;
206255643Snwhitehorn	devstat_trans_flags		ds_trans_type;
207255643Snwhitehorn	uint64_t			io_len;
208255643Snwhitehorn	uint64_t			io_offset;
209255643Snwhitehorn	int				io_arg;
210255643Snwhitehorn	struct ctl_be_block_softc	*softc;
211255643Snwhitehorn	struct ctl_be_block_lun		*lun;
212255643Snwhitehorn	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
213255643Snwhitehorn};
214255643Snwhitehorn
215255643Snwhitehornextern struct ctl_softc *control_softc;
216255643Snwhitehorn
217255643Snwhitehornstatic int cbb_num_threads = 14;
218255643SnwhitehornSYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
219255643Snwhitehorn	    "CAM Target Layer Block Backend");
220255643SnwhitehornSYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
221255643Snwhitehorn           &cbb_num_threads, 0, "Number of threads per backing file");
222255643Snwhitehorn
223255643Snwhitehornstatic struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
224255643Snwhitehornstatic void ctl_free_beio(struct ctl_be_block_io *beio);
225255643Snwhitehornstatic void ctl_complete_beio(struct ctl_be_block_io *beio);
226255643Snwhitehornstatic int ctl_be_block_move_done(union ctl_io *io);
227255643Snwhitehornstatic void ctl_be_block_biodone(struct bio *bio);
228255643Snwhitehornstatic void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
229255643Snwhitehorn				    struct ctl_be_block_io *beio);
230255643Snwhitehornstatic void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
231255643Snwhitehorn				       struct ctl_be_block_io *beio);
232255643Snwhitehornstatic void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
233255643Snwhitehorn				  struct ctl_be_block_io *beio);
234255643Snwhitehornstatic uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
235255643Snwhitehorn					 const char *attrname);
236255643Snwhitehornstatic void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
237255643Snwhitehorn				   struct ctl_be_block_io *beio);
238255643Snwhitehornstatic void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
239255643Snwhitehorn				   struct ctl_be_block_io *beio);
240255643Snwhitehornstatic void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
241255643Snwhitehorn				      struct ctl_be_block_io *beio);
242255643Snwhitehornstatic uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
243255643Snwhitehorn					 const char *attrname);
244255643Snwhitehornstatic void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
245255643Snwhitehorn				    union ctl_io *io);
246255643Snwhitehornstatic void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
247255643Snwhitehorn				    union ctl_io *io);
248255643Snwhitehornstatic void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
249255643Snwhitehorn				  union ctl_io *io);
250255643Snwhitehornstatic void ctl_be_block_worker(void *context, int pending);
251255643Snwhitehornstatic int ctl_be_block_submit(union ctl_io *io);
252255643Snwhitehornstatic int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
253255643Snwhitehorn				   int flag, struct thread *td);
254255643Snwhitehornstatic int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
255255643Snwhitehorn				  struct ctl_lun_req *req);
256255643Snwhitehornstatic int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
257255643Snwhitehorn				 struct ctl_lun_req *req);
258255643Snwhitehornstatic int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
259255643Snwhitehornstatic int ctl_be_block_open(struct ctl_be_block_softc *softc,
260255643Snwhitehorn			     struct ctl_be_block_lun *be_lun,
261255643Snwhitehorn			     struct ctl_lun_req *req);
262255643Snwhitehornstatic int ctl_be_block_create(struct ctl_be_block_softc *softc,
263255643Snwhitehorn			       struct ctl_lun_req *req);
264255643Snwhitehornstatic int ctl_be_block_rm(struct ctl_be_block_softc *softc,
265255643Snwhitehorn			   struct ctl_lun_req *req);
266255643Snwhitehornstatic int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
267255643Snwhitehorn				  struct ctl_lun_req *req);
268255643Snwhitehornstatic int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
269255643Snwhitehorn				 struct ctl_lun_req *req);
270255643Snwhitehornstatic int ctl_be_block_modify(struct ctl_be_block_softc *softc,
271255643Snwhitehorn			   struct ctl_lun_req *req);
272255643Snwhitehornstatic void ctl_be_block_lun_shutdown(void *be_lun);
273255643Snwhitehornstatic void ctl_be_block_lun_config_status(void *be_lun,
274255643Snwhitehorn					   ctl_lun_config_status status);
275255643Snwhitehornstatic int ctl_be_block_config_write(union ctl_io *io);
276255643Snwhitehornstatic int ctl_be_block_config_read(union ctl_io *io);
277255643Snwhitehornstatic int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
278255643Snwhitehornstatic uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname);
279255643Snwhitehornint ctl_be_block_init(void);
280255643Snwhitehorn
281255643Snwhitehornstatic struct ctl_backend_driver ctl_be_block_driver =
282255643Snwhitehorn{
283255643Snwhitehorn	.name = "block",
284255643Snwhitehorn	.flags = CTL_BE_FLAG_HAS_CONFIG,
285255643Snwhitehorn	.init = ctl_be_block_init,
286255643Snwhitehorn	.data_submit = ctl_be_block_submit,
287255643Snwhitehorn	.data_move_done = ctl_be_block_move_done,
288255643Snwhitehorn	.config_read = ctl_be_block_config_read,
289255643Snwhitehorn	.config_write = ctl_be_block_config_write,
290255643Snwhitehorn	.ioctl = ctl_be_block_ioctl,
291255643Snwhitehorn	.lun_info = ctl_be_block_lun_info,
292255643Snwhitehorn	.lun_attr = ctl_be_block_lun_attr
293255643Snwhitehorn};
294255643Snwhitehorn
295255643SnwhitehornMALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
296255643SnwhitehornCTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
297255643Snwhitehorn
298255643Snwhitehornstatic uma_zone_t beio_zone;
299255643Snwhitehorn
300255643Snwhitehornstatic struct ctl_be_block_io *
301255643Snwhitehornctl_alloc_beio(struct ctl_be_block_softc *softc)
302255643Snwhitehorn{
303255643Snwhitehorn	struct ctl_be_block_io *beio;
304255643Snwhitehorn
305255643Snwhitehorn	beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
306	beio->softc = softc;
307	return (beio);
308}
309
310static void
311ctl_free_beio(struct ctl_be_block_io *beio)
312{
313	int duplicate_free;
314	int i;
315
316	duplicate_free = 0;
317
318	for (i = 0; i < beio->num_segs; i++) {
319		if (beio->sg_segs[i].addr == NULL)
320			duplicate_free++;
321
322		uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
323		beio->sg_segs[i].addr = NULL;
324
325		/* For compare we had two equal S/G lists. */
326		if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
327			uma_zfree(beio->lun->lun_zone,
328			    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
329			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
330		}
331	}
332
333	if (duplicate_free > 0) {
334		printf("%s: %d duplicate frees out of %d segments\n", __func__,
335		       duplicate_free, beio->num_segs);
336	}
337
338	uma_zfree(beio_zone, beio);
339}
340
341static void
342ctl_complete_beio(struct ctl_be_block_io *beio)
343{
344	union ctl_io *io = beio->io;
345
346	if (beio->beio_cont != NULL) {
347		beio->beio_cont(beio);
348	} else {
349		ctl_free_beio(beio);
350		ctl_data_submit_done(io);
351	}
352}
353
354static int
355ctl_be_block_move_done(union ctl_io *io)
356{
357	struct ctl_be_block_io *beio;
358	struct ctl_be_block_lun *be_lun;
359	struct ctl_lba_len_flags *lbalen;
360#ifdef CTL_TIME_IO
361	struct bintime cur_bt;
362#endif
363	int i;
364
365	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
366	be_lun = beio->lun;
367
368	DPRINTF("entered\n");
369
370#ifdef CTL_TIME_IO
371	getbintime(&cur_bt);
372	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
373	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
374	io->io_hdr.num_dmas++;
375#endif
376	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
377
378	/*
379	 * We set status at this point for read commands, and write
380	 * commands with errors.
381	 */
382	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
383		;
384	} else if ((io->io_hdr.port_status == 0) &&
385	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
386		lbalen = ARGS(beio->io);
387		if (lbalen->flags & CTL_LLF_READ) {
388			ctl_set_success(&io->scsiio);
389		} else if (lbalen->flags & CTL_LLF_COMPARE) {
390			/* We have two data blocks ready for comparison. */
391			for (i = 0; i < beio->num_segs; i++) {
392				if (memcmp(beio->sg_segs[i].addr,
393				    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
394				    beio->sg_segs[i].len) != 0)
395					break;
396			}
397			if (i < beio->num_segs)
398				ctl_set_sense(&io->scsiio,
399				    /*current_error*/ 1,
400				    /*sense_key*/ SSD_KEY_MISCOMPARE,
401				    /*asc*/ 0x1D,
402				    /*ascq*/ 0x00,
403				    SSD_ELEM_NONE);
404			else
405				ctl_set_success(&io->scsiio);
406		}
407	} else if ((io->io_hdr.port_status != 0) &&
408	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
409	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
410		/*
411		 * For hardware error sense keys, the sense key
412		 * specific value is defined to be a retry count,
413		 * but we use it to pass back an internal FETD
414		 * error code.  XXX KDM  Hopefully the FETD is only
415		 * using 16 bits for an error code, since that's
416		 * all the space we have in the sks field.
417		 */
418		ctl_set_internal_failure(&io->scsiio,
419					 /*sks_valid*/ 1,
420					 /*retry_count*/
421					 io->io_hdr.port_status);
422	}
423
424	/*
425	 * If this is a read, or a write with errors, it is done.
426	 */
427	if ((beio->bio_cmd == BIO_READ)
428	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
429	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
430		ctl_complete_beio(beio);
431		return (0);
432	}
433
434	/*
435	 * At this point, we have a write and the DMA completed
436	 * successfully.  We now have to queue it to the task queue to
437	 * execute the backend I/O.  That is because we do blocking
438	 * memory allocations, and in the file backing case, blocking I/O.
439	 * This move done routine is generally called in the SIM's
440	 * interrupt context, and therefore we cannot block.
441	 */
442	mtx_lock(&be_lun->queue_lock);
443	/*
444	 * XXX KDM make sure that links is okay to use at this point.
445	 * Otherwise, we either need to add another field to ctl_io_hdr,
446	 * or deal with resource allocation here.
447	 */
448	STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
449	mtx_unlock(&be_lun->queue_lock);
450
451	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
452
453	return (0);
454}
455
456static void
457ctl_be_block_biodone(struct bio *bio)
458{
459	struct ctl_be_block_io *beio;
460	struct ctl_be_block_lun *be_lun;
461	union ctl_io *io;
462	int error;
463
464	beio = bio->bio_caller1;
465	be_lun = beio->lun;
466	io = beio->io;
467
468	DPRINTF("entered\n");
469
470	error = bio->bio_error;
471	mtx_lock(&be_lun->io_lock);
472	if (error != 0)
473		beio->num_errors++;
474
475	beio->num_bios_done++;
476
477	/*
478	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
479	 * during the free might cause it to complain.
480	 */
481	g_destroy_bio(bio);
482
483	/*
484	 * If the send complete bit isn't set, or we aren't the last I/O to
485	 * complete, then we're done.
486	 */
487	if ((beio->send_complete == 0)
488	 || (beio->num_bios_done < beio->num_bios_sent)) {
489		mtx_unlock(&be_lun->io_lock);
490		return;
491	}
492
493	/*
494	 * At this point, we've verified that we are the last I/O to
495	 * complete, so it's safe to drop the lock.
496	 */
497	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
498	    beio->ds_tag_type, beio->ds_trans_type,
499	    /*now*/ NULL, /*then*/&beio->ds_t0);
500	mtx_unlock(&be_lun->io_lock);
501
502	/*
503	 * If there are any errors from the backing device, we fail the
504	 * entire I/O with a medium error.
505	 */
506	if (beio->num_errors > 0) {
507		if (error == EOPNOTSUPP) {
508			ctl_set_invalid_opcode(&io->scsiio);
509		} else if (error == ENOSPC || error == EDQUOT) {
510			ctl_set_space_alloc_fail(&io->scsiio);
511		} else if (error == EROFS || error == EACCES) {
512			ctl_set_hw_write_protected(&io->scsiio);
513		} else if (beio->bio_cmd == BIO_FLUSH) {
514			/* XXX KDM is there is a better error here? */
515			ctl_set_internal_failure(&io->scsiio,
516						 /*sks_valid*/ 1,
517						 /*retry_count*/ 0xbad2);
518		} else
519			ctl_set_medium_error(&io->scsiio);
520		ctl_complete_beio(beio);
521		return;
522	}
523
524	/*
525	 * If this is a write, a flush, a delete or verify, we're all done.
526	 * If this is a read, we can now send the data to the user.
527	 */
528	if ((beio->bio_cmd == BIO_WRITE)
529	 || (beio->bio_cmd == BIO_FLUSH)
530	 || (beio->bio_cmd == BIO_DELETE)
531	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
532		ctl_set_success(&io->scsiio);
533		ctl_complete_beio(beio);
534	} else {
535		if ((ARGS(io)->flags & CTL_LLF_READ) &&
536		    beio->beio_cont == NULL)
537			ctl_set_success(&io->scsiio);
538#ifdef CTL_TIME_IO
539        	getbintime(&io->io_hdr.dma_start_bt);
540#endif
541		ctl_datamove(io);
542	}
543}
544
545static void
546ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
547			struct ctl_be_block_io *beio)
548{
549	union ctl_io *io = beio->io;
550	struct mount *mountpoint;
551	int error, lock_flags;
552
553	DPRINTF("entered\n");
554
555	binuptime(&beio->ds_t0);
556	mtx_lock(&be_lun->io_lock);
557	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
558	mtx_unlock(&be_lun->io_lock);
559
560	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
561
562	if (MNT_SHARED_WRITES(mountpoint)
563	 || ((mountpoint == NULL)
564	  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
565		lock_flags = LK_SHARED;
566	else
567		lock_flags = LK_EXCLUSIVE;
568
569	vn_lock(be_lun->vn, lock_flags | LK_RETRY);
570
571	error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
572	    curthread);
573	VOP_UNLOCK(be_lun->vn, 0);
574
575	vn_finished_write(mountpoint);
576
577	mtx_lock(&be_lun->io_lock);
578	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
579	    beio->ds_tag_type, beio->ds_trans_type,
580	    /*now*/ NULL, /*then*/&beio->ds_t0);
581	mtx_unlock(&be_lun->io_lock);
582
583	if (error == 0)
584		ctl_set_success(&io->scsiio);
585	else {
586		/* XXX KDM is there is a better error here? */
587		ctl_set_internal_failure(&io->scsiio,
588					 /*sks_valid*/ 1,
589					 /*retry_count*/ 0xbad1);
590	}
591
592	ctl_complete_beio(beio);
593}
594
595SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
596SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
597SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
598SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
599
600static void
601ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
602			   struct ctl_be_block_io *beio)
603{
604	struct ctl_be_block_filedata *file_data;
605	union ctl_io *io;
606	struct uio xuio;
607	struct iovec *xiovec;
608	int flags;
609	int error, i;
610
611	DPRINTF("entered\n");
612
613	file_data = &be_lun->backend.file;
614	io = beio->io;
615	flags = 0;
616	if (ARGS(io)->flags & CTL_LLF_DPO)
617		flags |= IO_DIRECT;
618	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
619		flags |= IO_SYNC;
620
621	bzero(&xuio, sizeof(xuio));
622	if (beio->bio_cmd == BIO_READ) {
623		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
624		xuio.uio_rw = UIO_READ;
625	} else {
626		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
627		xuio.uio_rw = UIO_WRITE;
628	}
629	xuio.uio_offset = beio->io_offset;
630	xuio.uio_resid = beio->io_len;
631	xuio.uio_segflg = UIO_SYSSPACE;
632	xuio.uio_iov = beio->xiovecs;
633	xuio.uio_iovcnt = beio->num_segs;
634	xuio.uio_td = curthread;
635
636	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
637		xiovec->iov_base = beio->sg_segs[i].addr;
638		xiovec->iov_len = beio->sg_segs[i].len;
639	}
640
641	binuptime(&beio->ds_t0);
642	mtx_lock(&be_lun->io_lock);
643	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
644	mtx_unlock(&be_lun->io_lock);
645
646	if (beio->bio_cmd == BIO_READ) {
647		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
648
649		/*
650		 * UFS pays attention to IO_DIRECT for reads.  If the
651		 * DIRECTIO option is configured into the kernel, it calls
652		 * ffs_rawread().  But that only works for single-segment
653		 * uios with user space addresses.  In our case, with a
654		 * kernel uio, it still reads into the buffer cache, but it
655		 * will just try to release the buffer from the cache later
656		 * on in ffs_read().
657		 *
658		 * ZFS does not pay attention to IO_DIRECT for reads.
659		 *
660		 * UFS does not pay attention to IO_SYNC for reads.
661		 *
662		 * ZFS pays attention to IO_SYNC (which translates into the
663		 * Solaris define FRSYNC for zfs_read()) for reads.  It
664		 * attempts to sync the file before reading.
665		 */
666		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
667
668		VOP_UNLOCK(be_lun->vn, 0);
669		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
670	} else {
671		struct mount *mountpoint;
672		int lock_flags;
673
674		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
675
676		if (MNT_SHARED_WRITES(mountpoint)
677		 || ((mountpoint == NULL)
678		  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
679			lock_flags = LK_SHARED;
680		else
681			lock_flags = LK_EXCLUSIVE;
682
683		vn_lock(be_lun->vn, lock_flags | LK_RETRY);
684
685		/*
686		 * UFS pays attention to IO_DIRECT for writes.  The write
687		 * is done asynchronously.  (Normally the write would just
688		 * get put into cache.
689		 *
690		 * UFS pays attention to IO_SYNC for writes.  It will
691		 * attempt to write the buffer out synchronously if that
692		 * flag is set.
693		 *
694		 * ZFS does not pay attention to IO_DIRECT for writes.
695		 *
696		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
697		 * for writes.  It will flush the transaction from the
698		 * cache before returning.
699		 */
700		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
701		VOP_UNLOCK(be_lun->vn, 0);
702
703		vn_finished_write(mountpoint);
704		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
705        }
706
707	mtx_lock(&be_lun->io_lock);
708	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
709	    beio->ds_tag_type, beio->ds_trans_type,
710	    /*now*/ NULL, /*then*/&beio->ds_t0);
711	mtx_unlock(&be_lun->io_lock);
712
713	/*
714	 * If we got an error, set the sense data to "MEDIUM ERROR" and
715	 * return the I/O to the user.
716	 */
717	if (error != 0) {
718		char path_str[32];
719
720		ctl_scsi_path_string(io, path_str, sizeof(path_str));
721		printf("%s%s command returned errno %d\n", path_str,
722		       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error);
723		if (error == ENOSPC || error == EDQUOT) {
724			ctl_set_space_alloc_fail(&io->scsiio);
725		} else if (error == EROFS || error == EACCES) {
726			ctl_set_hw_write_protected(&io->scsiio);
727		} else
728			ctl_set_medium_error(&io->scsiio);
729		ctl_complete_beio(beio);
730		return;
731	}
732
733	/*
734	 * If this is a write or a verify, we're all done.
735	 * If this is a read, we can now send the data to the user.
736	 */
737	if ((beio->bio_cmd == BIO_WRITE) ||
738	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
739		ctl_set_success(&io->scsiio);
740		ctl_complete_beio(beio);
741	} else {
742		if ((ARGS(io)->flags & CTL_LLF_READ) &&
743		    beio->beio_cont == NULL)
744			ctl_set_success(&io->scsiio);
745#ifdef CTL_TIME_IO
746        	getbintime(&io->io_hdr.dma_start_bt);
747#endif
748		ctl_datamove(io);
749	}
750}
751
752static void
753ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
754			struct ctl_be_block_io *beio)
755{
756	union ctl_io *io = beio->io;
757	struct ctl_lba_len_flags *lbalen = ARGS(io);
758	struct scsi_get_lba_status_data *data;
759	off_t roff, off;
760	int error, status;
761
762	DPRINTF("entered\n");
763
764	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
765	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
766	error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
767	    0, curthread->td_ucred, curthread);
768	if (error == 0 && off > roff)
769		status = 0;	/* mapped up to off */
770	else {
771		error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
772		    0, curthread->td_ucred, curthread);
773		if (error == 0 && off > roff)
774			status = 1;	/* deallocated up to off */
775		else {
776			status = 0;	/* unknown up to the end */
777			off = be_lun->size_bytes;
778		}
779	}
780	VOP_UNLOCK(be_lun->vn, 0);
781
782	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
783	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
784	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
785	    lbalen->lba), data->descr[0].length);
786	data->descr[0].status = status;
787
788	ctl_complete_beio(beio);
789}
790
791static uint64_t
792ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
793{
794	struct vattr		vattr;
795	struct statfs		statfs;
796	uint64_t		val;
797	int			error;
798
799	val = UINT64_MAX;
800	if (be_lun->vn == NULL)
801		return (val);
802	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
803	if (strcmp(attrname, "blocksused") == 0) {
804		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
805		if (error == 0)
806			val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
807	}
808	if (strcmp(attrname, "blocksavail") == 0 &&
809	    (be_lun->vn->v_iflag & VI_DOOMED) == 0) {
810		error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
811		if (error == 0)
812			val = statfs.f_bavail * statfs.f_bsize /
813			    be_lun->cbe_lun.blocksize;
814	}
815	VOP_UNLOCK(be_lun->vn, 0);
816	return (val);
817}
818
819static void
820ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
821			   struct ctl_be_block_io *beio)
822{
823	union ctl_io *io;
824	struct cdevsw *csw;
825	struct cdev *dev;
826	struct uio xuio;
827	struct iovec *xiovec;
828	int error, flags, i, ref;
829
830	DPRINTF("entered\n");
831
832	io = beio->io;
833	flags = 0;
834	if (ARGS(io)->flags & CTL_LLF_DPO)
835		flags |= IO_DIRECT;
836	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
837		flags |= IO_SYNC;
838
839	bzero(&xuio, sizeof(xuio));
840	if (beio->bio_cmd == BIO_READ) {
841		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
842		xuio.uio_rw = UIO_READ;
843	} else {
844		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
845		xuio.uio_rw = UIO_WRITE;
846	}
847	xuio.uio_offset = beio->io_offset;
848	xuio.uio_resid = beio->io_len;
849	xuio.uio_segflg = UIO_SYSSPACE;
850	xuio.uio_iov = beio->xiovecs;
851	xuio.uio_iovcnt = beio->num_segs;
852	xuio.uio_td = curthread;
853
854	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
855		xiovec->iov_base = beio->sg_segs[i].addr;
856		xiovec->iov_len = beio->sg_segs[i].len;
857	}
858
859	binuptime(&beio->ds_t0);
860	mtx_lock(&be_lun->io_lock);
861	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
862	mtx_unlock(&be_lun->io_lock);
863
864	csw = devvn_refthread(be_lun->vn, &dev, &ref);
865	if (csw) {
866		if (beio->bio_cmd == BIO_READ)
867			error = csw->d_read(dev, &xuio, flags);
868		else
869			error = csw->d_write(dev, &xuio, flags);
870		dev_relthread(dev, ref);
871	} else
872		error = ENXIO;
873
874	if (beio->bio_cmd == BIO_READ)
875		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
876	else
877		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
878
879	mtx_lock(&be_lun->io_lock);
880	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
881	    beio->ds_tag_type, beio->ds_trans_type,
882	    /*now*/ NULL, /*then*/&beio->ds_t0);
883	mtx_unlock(&be_lun->io_lock);
884
885	/*
886	 * If we got an error, set the sense data to "MEDIUM ERROR" and
887	 * return the I/O to the user.
888	 */
889	if (error != 0) {
890		if (error == ENOSPC || error == EDQUOT) {
891			ctl_set_space_alloc_fail(&io->scsiio);
892		} else if (error == EROFS || error == EACCES) {
893			ctl_set_hw_write_protected(&io->scsiio);
894		} else
895			ctl_set_medium_error(&io->scsiio);
896		ctl_complete_beio(beio);
897		return;
898	}
899
900	/*
901	 * If this is a write or a verify, we're all done.
902	 * If this is a read, we can now send the data to the user.
903	 */
904	if ((beio->bio_cmd == BIO_WRITE) ||
905	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
906		ctl_set_success(&io->scsiio);
907		ctl_complete_beio(beio);
908	} else {
909		if ((ARGS(io)->flags & CTL_LLF_READ) &&
910		    beio->beio_cont == NULL)
911			ctl_set_success(&io->scsiio);
912#ifdef CTL_TIME_IO
913        	getbintime(&io->io_hdr.dma_start_bt);
914#endif
915		ctl_datamove(io);
916	}
917}
918
919static void
920ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
921			struct ctl_be_block_io *beio)
922{
923	union ctl_io *io = beio->io;
924	struct cdevsw *csw;
925	struct cdev *dev;
926	struct ctl_lba_len_flags *lbalen = ARGS(io);
927	struct scsi_get_lba_status_data *data;
928	off_t roff, off;
929	int error, ref, status;
930
931	DPRINTF("entered\n");
932
933	csw = devvn_refthread(be_lun->vn, &dev, &ref);
934	if (csw == NULL) {
935		status = 0;	/* unknown up to the end */
936		off = be_lun->size_bytes;
937		goto done;
938	}
939	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
940	error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
941	    curthread);
942	if (error == 0 && off > roff)
943		status = 0;	/* mapped up to off */
944	else {
945		error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
946		    curthread);
947		if (error == 0 && off > roff)
948			status = 1;	/* deallocated up to off */
949		else {
950			status = 0;	/* unknown up to the end */
951			off = be_lun->size_bytes;
952		}
953	}
954	dev_relthread(dev, ref);
955
956done:
957	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
958	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
959	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
960	    lbalen->lba), data->descr[0].length);
961	data->descr[0].status = status;
962
963	ctl_complete_beio(beio);
964}
965
966static void
967ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
968		       struct ctl_be_block_io *beio)
969{
970	struct bio *bio;
971	union ctl_io *io;
972	struct cdevsw *csw;
973	struct cdev *dev;
974	int ref;
975
976	io = beio->io;
977
978	DPRINTF("entered\n");
979
980	/* This can't fail, it's a blocking allocation. */
981	bio = g_alloc_bio();
982
983	bio->bio_cmd	    = BIO_FLUSH;
984	bio->bio_offset	    = 0;
985	bio->bio_data	    = 0;
986	bio->bio_done	    = ctl_be_block_biodone;
987	bio->bio_caller1    = beio;
988	bio->bio_pblkno	    = 0;
989
990	/*
991	 * We don't need to acquire the LUN lock here, because we are only
992	 * sending one bio, and so there is no other context to synchronize
993	 * with.
994	 */
995	beio->num_bios_sent = 1;
996	beio->send_complete = 1;
997
998	binuptime(&beio->ds_t0);
999	mtx_lock(&be_lun->io_lock);
1000	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1001	mtx_unlock(&be_lun->io_lock);
1002
1003	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1004	if (csw) {
1005		bio->bio_dev = dev;
1006		csw->d_strategy(bio);
1007		dev_relthread(dev, ref);
1008	} else {
1009		bio->bio_error = ENXIO;
1010		ctl_be_block_biodone(bio);
1011	}
1012}
1013
1014static void
1015ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1016		       struct ctl_be_block_io *beio,
1017		       uint64_t off, uint64_t len, int last)
1018{
1019	struct bio *bio;
1020	uint64_t maxlen;
1021	struct cdevsw *csw;
1022	struct cdev *dev;
1023	int ref;
1024
1025	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1026	maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1027	while (len > 0) {
1028		bio = g_alloc_bio();
1029		bio->bio_cmd	    = BIO_DELETE;
1030		bio->bio_dev	    = dev;
1031		bio->bio_offset	    = off;
1032		bio->bio_length	    = MIN(len, maxlen);
1033		bio->bio_data	    = 0;
1034		bio->bio_done	    = ctl_be_block_biodone;
1035		bio->bio_caller1    = beio;
1036		bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1037
1038		off += bio->bio_length;
1039		len -= bio->bio_length;
1040
1041		mtx_lock(&be_lun->io_lock);
1042		beio->num_bios_sent++;
1043		if (last && len == 0)
1044			beio->send_complete = 1;
1045		mtx_unlock(&be_lun->io_lock);
1046
1047		if (csw) {
1048			csw->d_strategy(bio);
1049		} else {
1050			bio->bio_error = ENXIO;
1051			ctl_be_block_biodone(bio);
1052		}
1053	}
1054	if (csw)
1055		dev_relthread(dev, ref);
1056}
1057
1058static void
1059ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1060		       struct ctl_be_block_io *beio)
1061{
1062	union ctl_io *io;
1063	struct ctl_ptr_len_flags *ptrlen;
1064	struct scsi_unmap_desc *buf, *end;
1065	uint64_t len;
1066
1067	io = beio->io;
1068
1069	DPRINTF("entered\n");
1070
1071	binuptime(&beio->ds_t0);
1072	mtx_lock(&be_lun->io_lock);
1073	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1074	mtx_unlock(&be_lun->io_lock);
1075
1076	if (beio->io_offset == -1) {
1077		beio->io_len = 0;
1078		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1079		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1080		end = buf + ptrlen->len / sizeof(*buf);
1081		for (; buf < end; buf++) {
1082			len = (uint64_t)scsi_4btoul(buf->length) *
1083			    be_lun->cbe_lun.blocksize;
1084			beio->io_len += len;
1085			ctl_be_block_unmap_dev_range(be_lun, beio,
1086			    scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1087			    len, (end - buf < 2) ? TRUE : FALSE);
1088		}
1089	} else
1090		ctl_be_block_unmap_dev_range(be_lun, beio,
1091		    beio->io_offset, beio->io_len, TRUE);
1092}
1093
1094static void
1095ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1096			  struct ctl_be_block_io *beio)
1097{
1098	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1099	struct bio *bio;
1100	struct cdevsw *csw;
1101	struct cdev *dev;
1102	off_t cur_offset;
1103	int i, max_iosize, ref;
1104
1105	DPRINTF("entered\n");
1106	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1107
1108	/*
1109	 * We have to limit our I/O size to the maximum supported by the
1110	 * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
1111	 * set it properly, use DFLTPHYS.
1112	 */
1113	if (csw) {
1114		max_iosize = dev->si_iosize_max;
1115		if (max_iosize < PAGE_SIZE)
1116			max_iosize = DFLTPHYS;
1117	} else
1118		max_iosize = DFLTPHYS;
1119
1120	cur_offset = beio->io_offset;
1121	for (i = 0; i < beio->num_segs; i++) {
1122		size_t cur_size;
1123		uint8_t *cur_ptr;
1124
1125		cur_size = beio->sg_segs[i].len;
1126		cur_ptr = beio->sg_segs[i].addr;
1127
1128		while (cur_size > 0) {
1129			/* This can't fail, it's a blocking allocation. */
1130			bio = g_alloc_bio();
1131
1132			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1133
1134			bio->bio_cmd = beio->bio_cmd;
1135			bio->bio_dev = dev;
1136			bio->bio_caller1 = beio;
1137			bio->bio_length = min(cur_size, max_iosize);
1138			bio->bio_offset = cur_offset;
1139			bio->bio_data = cur_ptr;
1140			bio->bio_done = ctl_be_block_biodone;
1141			bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1142
1143			cur_offset += bio->bio_length;
1144			cur_ptr += bio->bio_length;
1145			cur_size -= bio->bio_length;
1146
1147			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1148			beio->num_bios_sent++;
1149		}
1150	}
1151	binuptime(&beio->ds_t0);
1152	mtx_lock(&be_lun->io_lock);
1153	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1154	beio->send_complete = 1;
1155	mtx_unlock(&be_lun->io_lock);
1156
1157	/*
1158	 * Fire off all allocated requests!
1159	 */
1160	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1161		TAILQ_REMOVE(&queue, bio, bio_queue);
1162		if (csw)
1163			csw->d_strategy(bio);
1164		else {
1165			bio->bio_error = ENXIO;
1166			ctl_be_block_biodone(bio);
1167		}
1168	}
1169	if (csw)
1170		dev_relthread(dev, ref);
1171}
1172
1173static uint64_t
1174ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1175{
1176	struct diocgattr_arg	arg;
1177	struct cdevsw *csw;
1178	struct cdev *dev;
1179	int error, ref;
1180
1181	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1182	if (csw == NULL)
1183		return (UINT64_MAX);
1184	strlcpy(arg.name, attrname, sizeof(arg.name));
1185	arg.len = sizeof(arg.value.off);
1186	if (csw->d_ioctl) {
1187		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1188		    curthread);
1189	} else
1190		error = ENODEV;
1191	dev_relthread(dev, ref);
1192	if (error != 0)
1193		return (UINT64_MAX);
1194	return (arg.value.off);
1195}
1196
1197static void
1198ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1199			    union ctl_io *io)
1200{
1201	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1202	struct ctl_be_block_io *beio;
1203	struct ctl_lba_len_flags *lbalen;
1204
1205	DPRINTF("entered\n");
1206	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1207	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1208
1209	beio->io_len = lbalen->len * cbe_lun->blocksize;
1210	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1211	beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1212	beio->bio_cmd = BIO_FLUSH;
1213	beio->ds_trans_type = DEVSTAT_NO_DATA;
1214	DPRINTF("SYNC\n");
1215	be_lun->lun_flush(be_lun, beio);
1216}
1217
1218static void
1219ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1220{
1221	union ctl_io *io;
1222
1223	io = beio->io;
1224	ctl_free_beio(beio);
1225	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1226	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1227	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1228		ctl_config_write_done(io);
1229		return;
1230	}
1231
1232	ctl_be_block_config_write(io);
1233}
1234
1235static void
1236ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1237			    union ctl_io *io)
1238{
1239	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1240	struct ctl_be_block_io *beio;
1241	struct ctl_lba_len_flags *lbalen;
1242	uint64_t len_left, lba;
1243	uint32_t pb, pbo, adj;
1244	int i, seglen;
1245	uint8_t *buf, *end;
1246
1247	DPRINTF("entered\n");
1248
1249	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1250	lbalen = ARGS(beio->io);
1251
1252	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1253	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1254		ctl_free_beio(beio);
1255		ctl_set_invalid_field(&io->scsiio,
1256				      /*sks_valid*/ 1,
1257				      /*command*/ 1,
1258				      /*field*/ 1,
1259				      /*bit_valid*/ 0,
1260				      /*bit*/ 0);
1261		ctl_config_write_done(io);
1262		return;
1263	}
1264
1265	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1266		beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1267		beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1268		beio->bio_cmd = BIO_DELETE;
1269		beio->ds_trans_type = DEVSTAT_FREE;
1270
1271		be_lun->unmap(be_lun, beio);
1272		return;
1273	}
1274
1275	beio->bio_cmd = BIO_WRITE;
1276	beio->ds_trans_type = DEVSTAT_WRITE;
1277
1278	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1279	       (uintmax_t)lbalen->lba, lbalen->len);
1280
1281	pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1282	if (be_lun->cbe_lun.pblockoff > 0)
1283		pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1284	else
1285		pbo = 0;
1286	len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1287	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1288
1289		/*
1290		 * Setup the S/G entry for this chunk.
1291		 */
1292		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1293		if (pb > cbe_lun->blocksize) {
1294			adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1295			    seglen - pbo) % pb;
1296			if (seglen > adj)
1297				seglen -= adj;
1298			else
1299				seglen -= seglen % cbe_lun->blocksize;
1300		} else
1301			seglen -= seglen % cbe_lun->blocksize;
1302		beio->sg_segs[i].len = seglen;
1303		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1304
1305		DPRINTF("segment %d addr %p len %zd\n", i,
1306			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1307
1308		beio->num_segs++;
1309		len_left -= seglen;
1310
1311		buf = beio->sg_segs[i].addr;
1312		end = buf + seglen;
1313		for (; buf < end; buf += cbe_lun->blocksize) {
1314			memcpy(buf, io->scsiio.kern_data_ptr, cbe_lun->blocksize);
1315			if (lbalen->flags & SWS_LBDATA)
1316				scsi_ulto4b(lbalen->lba + lba, buf);
1317			lba++;
1318		}
1319	}
1320
1321	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1322	beio->io_len = lba * cbe_lun->blocksize;
1323
1324	/* We can not do all in one run. Correct and schedule rerun. */
1325	if (len_left > 0) {
1326		lbalen->lba += lba;
1327		lbalen->len -= lba;
1328		beio->beio_cont = ctl_be_block_cw_done_ws;
1329	}
1330
1331	be_lun->dispatch(be_lun, beio);
1332}
1333
1334static void
1335ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1336			    union ctl_io *io)
1337{
1338	struct ctl_be_block_io *beio;
1339	struct ctl_ptr_len_flags *ptrlen;
1340
1341	DPRINTF("entered\n");
1342
1343	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1344	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1345
1346	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1347		ctl_free_beio(beio);
1348		ctl_set_invalid_field(&io->scsiio,
1349				      /*sks_valid*/ 0,
1350				      /*command*/ 1,
1351				      /*field*/ 0,
1352				      /*bit_valid*/ 0,
1353				      /*bit*/ 0);
1354		ctl_config_write_done(io);
1355		return;
1356	}
1357
1358	beio->io_len = 0;
1359	beio->io_offset = -1;
1360	beio->bio_cmd = BIO_DELETE;
1361	beio->ds_trans_type = DEVSTAT_FREE;
1362	DPRINTF("UNMAP\n");
1363	be_lun->unmap(be_lun, beio);
1364}
1365
1366static void
1367ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1368{
1369	union ctl_io *io;
1370
1371	io = beio->io;
1372	ctl_free_beio(beio);
1373	ctl_config_read_done(io);
1374}
1375
1376static void
1377ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1378			 union ctl_io *io)
1379{
1380	struct ctl_be_block_io *beio;
1381	struct ctl_be_block_softc *softc;
1382
1383	DPRINTF("entered\n");
1384
1385	softc = be_lun->softc;
1386	beio = ctl_alloc_beio(softc);
1387	beio->io = io;
1388	beio->lun = be_lun;
1389	beio->beio_cont = ctl_be_block_cr_done;
1390	PRIV(io)->ptr = (void *)beio;
1391
1392	switch (io->scsiio.cdb[0]) {
1393	case SERVICE_ACTION_IN:		/* GET LBA STATUS */
1394		beio->bio_cmd = -1;
1395		beio->ds_trans_type = DEVSTAT_NO_DATA;
1396		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1397		beio->io_len = 0;
1398		if (be_lun->get_lba_status)
1399			be_lun->get_lba_status(be_lun, beio);
1400		else
1401			ctl_be_block_cr_done(beio);
1402		break;
1403	default:
1404		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1405		break;
1406	}
1407}
1408
1409static void
1410ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1411{
1412	union ctl_io *io;
1413
1414	io = beio->io;
1415	ctl_free_beio(beio);
1416	ctl_config_write_done(io);
1417}
1418
1419static void
1420ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1421			 union ctl_io *io)
1422{
1423	struct ctl_be_block_io *beio;
1424	struct ctl_be_block_softc *softc;
1425
1426	DPRINTF("entered\n");
1427
1428	softc = be_lun->softc;
1429	beio = ctl_alloc_beio(softc);
1430	beio->io = io;
1431	beio->lun = be_lun;
1432	beio->beio_cont = ctl_be_block_cw_done;
1433	switch (io->scsiio.tag_type) {
1434	case CTL_TAG_ORDERED:
1435		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1436		break;
1437	case CTL_TAG_HEAD_OF_QUEUE:
1438		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1439		break;
1440	case CTL_TAG_UNTAGGED:
1441	case CTL_TAG_SIMPLE:
1442	case CTL_TAG_ACA:
1443	default:
1444		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1445		break;
1446	}
1447	PRIV(io)->ptr = (void *)beio;
1448
1449	switch (io->scsiio.cdb[0]) {
1450	case SYNCHRONIZE_CACHE:
1451	case SYNCHRONIZE_CACHE_16:
1452		ctl_be_block_cw_dispatch_sync(be_lun, io);
1453		break;
1454	case WRITE_SAME_10:
1455	case WRITE_SAME_16:
1456		ctl_be_block_cw_dispatch_ws(be_lun, io);
1457		break;
1458	case UNMAP:
1459		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1460		break;
1461	default:
1462		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1463		break;
1464	}
1465}
1466
1467SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
1468SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
1469SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
1470SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
1471
1472static void
1473ctl_be_block_next(struct ctl_be_block_io *beio)
1474{
1475	struct ctl_be_block_lun *be_lun;
1476	union ctl_io *io;
1477
1478	io = beio->io;
1479	be_lun = beio->lun;
1480	ctl_free_beio(beio);
1481	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1482	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1483	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1484		ctl_data_submit_done(io);
1485		return;
1486	}
1487
1488	io->io_hdr.status &= ~CTL_STATUS_MASK;
1489	io->io_hdr.status |= CTL_STATUS_NONE;
1490
1491	mtx_lock(&be_lun->queue_lock);
1492	/*
1493	 * XXX KDM make sure that links is okay to use at this point.
1494	 * Otherwise, we either need to add another field to ctl_io_hdr,
1495	 * or deal with resource allocation here.
1496	 */
1497	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1498	mtx_unlock(&be_lun->queue_lock);
1499
1500	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1501}
1502
1503static void
1504ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1505			   union ctl_io *io)
1506{
1507	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1508	struct ctl_be_block_io *beio;
1509	struct ctl_be_block_softc *softc;
1510	struct ctl_lba_len_flags *lbalen;
1511	struct ctl_ptr_len_flags *bptrlen;
1512	uint64_t len_left, lbas;
1513	int i;
1514
1515	softc = be_lun->softc;
1516
1517	DPRINTF("entered\n");
1518
1519	lbalen = ARGS(io);
1520	if (lbalen->flags & CTL_LLF_WRITE) {
1521		SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
1522	} else {
1523		SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
1524	}
1525
1526	beio = ctl_alloc_beio(softc);
1527	beio->io = io;
1528	beio->lun = be_lun;
1529	bptrlen = PRIV(io);
1530	bptrlen->ptr = (void *)beio;
1531
1532	switch (io->scsiio.tag_type) {
1533	case CTL_TAG_ORDERED:
1534		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1535		break;
1536	case CTL_TAG_HEAD_OF_QUEUE:
1537		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1538		break;
1539	case CTL_TAG_UNTAGGED:
1540	case CTL_TAG_SIMPLE:
1541	case CTL_TAG_ACA:
1542	default:
1543		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1544		break;
1545	}
1546
1547	if (lbalen->flags & CTL_LLF_WRITE) {
1548		beio->bio_cmd = BIO_WRITE;
1549		beio->ds_trans_type = DEVSTAT_WRITE;
1550	} else {
1551		beio->bio_cmd = BIO_READ;
1552		beio->ds_trans_type = DEVSTAT_READ;
1553	}
1554
1555	DPRINTF("%s at LBA %jx len %u @%ju\n",
1556	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1557	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1558	if (lbalen->flags & CTL_LLF_COMPARE)
1559		lbas = CTLBLK_HALF_IO_SIZE;
1560	else
1561		lbas = CTLBLK_MAX_IO_SIZE;
1562	lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1563	beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1564	beio->io_len = lbas * cbe_lun->blocksize;
1565	bptrlen->len += lbas;
1566
1567	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1568		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1569		    i, CTLBLK_MAX_SEGS));
1570
1571		/*
1572		 * Setup the S/G entry for this chunk.
1573		 */
1574		beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1575		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1576
1577		DPRINTF("segment %d addr %p len %zd\n", i,
1578			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1579
1580		/* Set up second segment for compare operation. */
1581		if (lbalen->flags & CTL_LLF_COMPARE) {
1582			beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1583			    beio->sg_segs[i].len;
1584			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1585			    uma_zalloc(be_lun->lun_zone, M_WAITOK);
1586		}
1587
1588		beio->num_segs++;
1589		len_left -= beio->sg_segs[i].len;
1590	}
1591	if (bptrlen->len < lbalen->len)
1592		beio->beio_cont = ctl_be_block_next;
1593	io->scsiio.be_move_done = ctl_be_block_move_done;
1594	/* For compare we have separate S/G lists for read and datamove. */
1595	if (lbalen->flags & CTL_LLF_COMPARE)
1596		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1597	else
1598		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1599	io->scsiio.kern_data_len = beio->io_len;
1600	io->scsiio.kern_data_resid = 0;
1601	io->scsiio.kern_sg_entries = beio->num_segs;
1602	io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST;
1603
1604	/*
1605	 * For the read case, we need to read the data into our buffers and
1606	 * then we can send it back to the user.  For the write case, we
1607	 * need to get the data from the user first.
1608	 */
1609	if (beio->bio_cmd == BIO_READ) {
1610		SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
1611		be_lun->dispatch(be_lun, beio);
1612	} else {
1613		SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
1614#ifdef CTL_TIME_IO
1615        	getbintime(&io->io_hdr.dma_start_bt);
1616#endif
1617		ctl_datamove(io);
1618	}
1619}
1620
1621static void
1622ctl_be_block_worker(void *context, int pending)
1623{
1624	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1625	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1626	union ctl_io *io;
1627	struct ctl_be_block_io *beio;
1628
1629	DPRINTF("entered\n");
1630	/*
1631	 * Fetch and process I/Os from all queues.  If we detect LUN
1632	 * CTL_LUN_FLAG_OFFLINE status here -- it is result of a race,
1633	 * so make response maximally opaque to not confuse initiator.
1634	 */
1635	for (;;) {
1636		mtx_lock(&be_lun->queue_lock);
1637		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1638		if (io != NULL) {
1639			DPRINTF("datamove queue\n");
1640			STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1641				      ctl_io_hdr, links);
1642			mtx_unlock(&be_lun->queue_lock);
1643			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1644			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1645				ctl_set_busy(&io->scsiio);
1646				ctl_complete_beio(beio);
1647				return;
1648			}
1649			be_lun->dispatch(be_lun, beio);
1650			continue;
1651		}
1652		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1653		if (io != NULL) {
1654			DPRINTF("config write queue\n");
1655			STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1656				      ctl_io_hdr, links);
1657			mtx_unlock(&be_lun->queue_lock);
1658			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1659				ctl_set_busy(&io->scsiio);
1660				ctl_config_write_done(io);
1661				return;
1662			}
1663			ctl_be_block_cw_dispatch(be_lun, io);
1664			continue;
1665		}
1666		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1667		if (io != NULL) {
1668			DPRINTF("config read queue\n");
1669			STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
1670				      ctl_io_hdr, links);
1671			mtx_unlock(&be_lun->queue_lock);
1672			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1673				ctl_set_busy(&io->scsiio);
1674				ctl_config_read_done(io);
1675				return;
1676			}
1677			ctl_be_block_cr_dispatch(be_lun, io);
1678			continue;
1679		}
1680		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1681		if (io != NULL) {
1682			DPRINTF("input queue\n");
1683			STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1684				      ctl_io_hdr, links);
1685			mtx_unlock(&be_lun->queue_lock);
1686			if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) {
1687				ctl_set_busy(&io->scsiio);
1688				ctl_data_submit_done(io);
1689				return;
1690			}
1691			ctl_be_block_dispatch(be_lun, io);
1692			continue;
1693		}
1694
1695		/*
1696		 * If we get here, there is no work left in the queues, so
1697		 * just break out and let the task queue go to sleep.
1698		 */
1699		mtx_unlock(&be_lun->queue_lock);
1700		break;
1701	}
1702}
1703
1704/*
1705 * Entry point from CTL to the backend for I/O.  We queue everything to a
1706 * work thread, so this just puts the I/O on a queue and wakes up the
1707 * thread.
1708 */
1709static int
1710ctl_be_block_submit(union ctl_io *io)
1711{
1712	struct ctl_be_block_lun *be_lun;
1713	struct ctl_be_lun *cbe_lun;
1714
1715	DPRINTF("entered\n");
1716
1717	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
1718		CTL_PRIV_BACKEND_LUN].ptr;
1719	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
1720
1721	/*
1722	 * Make sure we only get SCSI I/O.
1723	 */
1724	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1725		"%#x) encountered", io->io_hdr.io_type));
1726
1727	PRIV(io)->len = 0;
1728
1729	mtx_lock(&be_lun->queue_lock);
1730	/*
1731	 * XXX KDM make sure that links is okay to use at this point.
1732	 * Otherwise, we either need to add another field to ctl_io_hdr,
1733	 * or deal with resource allocation here.
1734	 */
1735	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1736	mtx_unlock(&be_lun->queue_lock);
1737	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1738
1739	return (CTL_RETVAL_COMPLETE);
1740}
1741
1742static int
1743ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1744			int flag, struct thread *td)
1745{
1746	struct ctl_be_block_softc *softc;
1747	int error;
1748
1749	softc = &backend_block_softc;
1750
1751	error = 0;
1752
1753	switch (cmd) {
1754	case CTL_LUN_REQ: {
1755		struct ctl_lun_req *lun_req;
1756
1757		lun_req = (struct ctl_lun_req *)addr;
1758
1759		switch (lun_req->reqtype) {
1760		case CTL_LUNREQ_CREATE:
1761			error = ctl_be_block_create(softc, lun_req);
1762			break;
1763		case CTL_LUNREQ_RM:
1764			error = ctl_be_block_rm(softc, lun_req);
1765			break;
1766		case CTL_LUNREQ_MODIFY:
1767			error = ctl_be_block_modify(softc, lun_req);
1768			break;
1769		default:
1770			lun_req->status = CTL_LUN_ERROR;
1771			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1772				 "invalid LUN request type %d",
1773				 lun_req->reqtype);
1774			break;
1775		}
1776		break;
1777	}
1778	default:
1779		error = ENOTTY;
1780		break;
1781	}
1782
1783	return (error);
1784}
1785
1786static int
1787ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1788{
1789	struct ctl_be_lun *cbe_lun;
1790	struct ctl_be_block_filedata *file_data;
1791	struct ctl_lun_create_params *params;
1792	char			     *value;
1793	struct vattr		      vattr;
1794	off_t			      ps, pss, po, pos, us, uss, uo, uos;
1795	int			      error;
1796
1797	error = 0;
1798	cbe_lun = &be_lun->cbe_lun;
1799	file_data = &be_lun->backend.file;
1800	params = &be_lun->params;
1801
1802	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1803	be_lun->dispatch = ctl_be_block_dispatch_file;
1804	be_lun->lun_flush = ctl_be_block_flush_file;
1805	be_lun->get_lba_status = ctl_be_block_gls_file;
1806	be_lun->getattr = ctl_be_block_getattr_file;
1807	be_lun->unmap = NULL;
1808	cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1809
1810	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1811	if (error != 0) {
1812		snprintf(req->error_str, sizeof(req->error_str),
1813			 "error calling VOP_GETATTR() for file %s",
1814			 be_lun->dev_path);
1815		return (error);
1816	}
1817
1818	/*
1819	 * Verify that we have the ability to upgrade to exclusive
1820	 * access on this file so we can trap errors at open instead
1821	 * of reporting them during first access.
1822	 */
1823	if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) {
1824		vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY);
1825		if (be_lun->vn->v_iflag & VI_DOOMED) {
1826			error = EBADF;
1827			snprintf(req->error_str, sizeof(req->error_str),
1828				 "error locking file %s", be_lun->dev_path);
1829			return (error);
1830		}
1831	}
1832
1833	file_data->cred = crhold(curthread->td_ucred);
1834	if (params->lun_size_bytes != 0)
1835		be_lun->size_bytes = params->lun_size_bytes;
1836	else
1837		be_lun->size_bytes = vattr.va_size;
1838
1839	/*
1840	 * For files we can use any logical block size.  Prefer 512 bytes
1841	 * for compatibility reasons.  If file's vattr.va_blocksize
1842	 * (preferred I/O block size) is bigger and multiple to chosen
1843	 * logical block size -- report it as physical block size.
1844	 */
1845	if (params->blocksize_bytes != 0)
1846		cbe_lun->blocksize = params->blocksize_bytes;
1847	else
1848		cbe_lun->blocksize = 512;
1849	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1850	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1851	    0 : (be_lun->size_blocks - 1);
1852
1853	us = ps = vattr.va_blocksize;
1854	uo = po = 0;
1855
1856	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
1857	if (value != NULL)
1858		ctl_expand_number(value, &ps);
1859	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
1860	if (value != NULL)
1861		ctl_expand_number(value, &po);
1862	pss = ps / cbe_lun->blocksize;
1863	pos = po / cbe_lun->blocksize;
1864	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1865	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1866		cbe_lun->pblockexp = fls(pss) - 1;
1867		cbe_lun->pblockoff = (pss - pos) % pss;
1868	}
1869
1870	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
1871	if (value != NULL)
1872		ctl_expand_number(value, &us);
1873	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
1874	if (value != NULL)
1875		ctl_expand_number(value, &uo);
1876	uss = us / cbe_lun->blocksize;
1877	uos = uo / cbe_lun->blocksize;
1878	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1879	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1880		cbe_lun->ublockexp = fls(uss) - 1;
1881		cbe_lun->ublockoff = (uss - uos) % uss;
1882	}
1883
1884	/*
1885	 * Sanity check.  The media size has to be at least one
1886	 * sector long.
1887	 */
1888	if (be_lun->size_bytes < cbe_lun->blocksize) {
1889		error = EINVAL;
1890		snprintf(req->error_str, sizeof(req->error_str),
1891			 "file %s size %ju < block size %u", be_lun->dev_path,
1892			 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1893	}
1894
1895	cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1896	return (error);
1897}
1898
1899static int
1900ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1901{
1902	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1903	struct ctl_lun_create_params *params;
1904	struct cdevsw		     *csw;
1905	struct cdev		     *dev;
1906	char			     *value;
1907	int			      error, atomic, maxio, ref, unmap, tmp;
1908	off_t			      ps, pss, po, pos, us, uss, uo, uos, otmp;
1909
1910	params = &be_lun->params;
1911
1912	be_lun->dev_type = CTL_BE_BLOCK_DEV;
1913	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1914	if (csw == NULL)
1915		return (ENXIO);
1916	if (strcmp(csw->d_name, "zvol") == 0) {
1917		be_lun->dispatch = ctl_be_block_dispatch_zvol;
1918		be_lun->get_lba_status = ctl_be_block_gls_zvol;
1919		atomic = maxio = CTLBLK_MAX_IO_SIZE;
1920	} else {
1921		be_lun->dispatch = ctl_be_block_dispatch_dev;
1922		be_lun->get_lba_status = NULL;
1923		atomic = 0;
1924		maxio = dev->si_iosize_max;
1925		if (maxio <= 0)
1926			maxio = DFLTPHYS;
1927		if (maxio > CTLBLK_MAX_IO_SIZE)
1928			maxio = CTLBLK_MAX_IO_SIZE;
1929	}
1930	be_lun->lun_flush = ctl_be_block_flush_dev;
1931	be_lun->getattr = ctl_be_block_getattr_dev;
1932	be_lun->unmap = ctl_be_block_unmap_dev;
1933
1934	if (!csw->d_ioctl) {
1935		dev_relthread(dev, ref);
1936		snprintf(req->error_str, sizeof(req->error_str),
1937			 "no d_ioctl for device %s!", be_lun->dev_path);
1938		return (ENODEV);
1939	}
1940
1941	error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
1942			       curthread);
1943	if (error) {
1944		dev_relthread(dev, ref);
1945		snprintf(req->error_str, sizeof(req->error_str),
1946			 "error %d returned for DIOCGSECTORSIZE ioctl "
1947			 "on %s!", error, be_lun->dev_path);
1948		return (error);
1949	}
1950
1951	/*
1952	 * If the user has asked for a blocksize that is greater than the
1953	 * backing device's blocksize, we can do it only if the blocksize
1954	 * the user is asking for is an even multiple of the underlying
1955	 * device's blocksize.
1956	 */
1957	if ((params->blocksize_bytes != 0) &&
1958	    (params->blocksize_bytes >= tmp)) {
1959		if (params->blocksize_bytes % tmp == 0) {
1960			cbe_lun->blocksize = params->blocksize_bytes;
1961		} else {
1962			dev_relthread(dev, ref);
1963			snprintf(req->error_str, sizeof(req->error_str),
1964				 "requested blocksize %u is not an even "
1965				 "multiple of backing device blocksize %u",
1966				 params->blocksize_bytes, tmp);
1967			return (EINVAL);
1968		}
1969	} else if (params->blocksize_bytes != 0) {
1970		dev_relthread(dev, ref);
1971		snprintf(req->error_str, sizeof(req->error_str),
1972			 "requested blocksize %u < backing device "
1973			 "blocksize %u", params->blocksize_bytes, tmp);
1974		return (EINVAL);
1975	} else
1976		cbe_lun->blocksize = tmp;
1977
1978	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
1979			     curthread);
1980	if (error) {
1981		dev_relthread(dev, ref);
1982		snprintf(req->error_str, sizeof(req->error_str),
1983			 "error %d returned for DIOCGMEDIASIZE "
1984			 " ioctl on %s!", error,
1985			 be_lun->dev_path);
1986		return (error);
1987	}
1988
1989	if (params->lun_size_bytes != 0) {
1990		if (params->lun_size_bytes > otmp) {
1991			dev_relthread(dev, ref);
1992			snprintf(req->error_str, sizeof(req->error_str),
1993				 "requested LUN size %ju > backing device "
1994				 "size %ju",
1995				 (uintmax_t)params->lun_size_bytes,
1996				 (uintmax_t)otmp);
1997			return (EINVAL);
1998		}
1999
2000		be_lun->size_bytes = params->lun_size_bytes;
2001	} else
2002		be_lun->size_bytes = otmp;
2003	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2004	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2005	    0 : (be_lun->size_blocks - 1);
2006
2007	error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2008	    curthread);
2009	if (error)
2010		ps = po = 0;
2011	else {
2012		error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2013		    FREAD, curthread);
2014		if (error)
2015			po = 0;
2016	}
2017	us = ps;
2018	uo = po;
2019
2020	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
2021	if (value != NULL)
2022		ctl_expand_number(value, &ps);
2023	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
2024	if (value != NULL)
2025		ctl_expand_number(value, &po);
2026	pss = ps / cbe_lun->blocksize;
2027	pos = po / cbe_lun->blocksize;
2028	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2029	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2030		cbe_lun->pblockexp = fls(pss) - 1;
2031		cbe_lun->pblockoff = (pss - pos) % pss;
2032	}
2033
2034	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
2035	if (value != NULL)
2036		ctl_expand_number(value, &us);
2037	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
2038	if (value != NULL)
2039		ctl_expand_number(value, &uo);
2040	uss = us / cbe_lun->blocksize;
2041	uos = uo / cbe_lun->blocksize;
2042	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2043	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2044		cbe_lun->ublockexp = fls(uss) - 1;
2045		cbe_lun->ublockoff = (uss - uos) % uss;
2046	}
2047
2048	cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2049	cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2050
2051	if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2052		unmap = 1;
2053	} else {
2054		struct diocgattr_arg	arg;
2055
2056		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2057		arg.len = sizeof(arg.value.i);
2058		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2059		    curthread);
2060		unmap = (error == 0) ? arg.value.i : 0;
2061	}
2062	value = ctl_get_opt(&cbe_lun->options, "unmap");
2063	if (value != NULL)
2064		unmap = (strcmp(value, "on") == 0);
2065	if (unmap)
2066		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2067	else
2068		cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2069
2070	dev_relthread(dev, ref);
2071	return (0);
2072}
2073
2074static int
2075ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2076{
2077	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2078	int flags;
2079
2080	if (be_lun->vn) {
2081		flags = FREAD;
2082		if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2083			flags |= FWRITE;
2084		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2085		be_lun->vn = NULL;
2086
2087		switch (be_lun->dev_type) {
2088		case CTL_BE_BLOCK_DEV:
2089			break;
2090		case CTL_BE_BLOCK_FILE:
2091			if (be_lun->backend.file.cred != NULL) {
2092				crfree(be_lun->backend.file.cred);
2093				be_lun->backend.file.cred = NULL;
2094			}
2095			break;
2096		case CTL_BE_BLOCK_NONE:
2097			break;
2098		default:
2099			panic("Unexpected backend type.");
2100			break;
2101		}
2102		be_lun->dev_type = CTL_BE_BLOCK_NONE;
2103	}
2104	return (0);
2105}
2106
2107static int
2108ctl_be_block_open(struct ctl_be_block_softc *softc,
2109		  struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2110{
2111	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2112	struct nameidata nd;
2113	char		*value;
2114	int		 error, flags;
2115
2116	error = 0;
2117	if (rootvnode == NULL) {
2118		snprintf(req->error_str, sizeof(req->error_str),
2119			 "Root filesystem is not mounted");
2120		return (1);
2121	}
2122	pwd_ensure_dirs();
2123
2124	value = ctl_get_opt(&cbe_lun->options, "file");
2125	if (value == NULL) {
2126		snprintf(req->error_str, sizeof(req->error_str),
2127			 "no file argument specified");
2128		return (1);
2129	}
2130	free(be_lun->dev_path, M_CTLBLK);
2131	be_lun->dev_path = strdup(value, M_CTLBLK);
2132
2133	flags = FREAD;
2134	value = ctl_get_opt(&cbe_lun->options, "readonly");
2135	if (value == NULL || strcmp(value, "on") != 0)
2136		flags |= FWRITE;
2137
2138again:
2139	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
2140	error = vn_open(&nd, &flags, 0, NULL);
2141	if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2142		flags &= ~FWRITE;
2143		goto again;
2144	}
2145	if (error) {
2146		/*
2147		 * This is the only reasonable guess we can make as far as
2148		 * path if the user doesn't give us a fully qualified path.
2149		 * If they want to specify a file, they need to specify the
2150		 * full path.
2151		 */
2152		if (be_lun->dev_path[0] != '/') {
2153			char *dev_name;
2154
2155			asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2156				be_lun->dev_path);
2157			free(be_lun->dev_path, M_CTLBLK);
2158			be_lun->dev_path = dev_name;
2159			goto again;
2160		}
2161		snprintf(req->error_str, sizeof(req->error_str),
2162		    "error opening %s: %d", be_lun->dev_path, error);
2163		return (error);
2164	}
2165	if (flags & FWRITE)
2166		cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2167	else
2168		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2169
2170	NDFREE(&nd, NDF_ONLY_PNBUF);
2171	be_lun->vn = nd.ni_vp;
2172
2173	/* We only support disks and files. */
2174	if (vn_isdisk(be_lun->vn, &error)) {
2175		error = ctl_be_block_open_dev(be_lun, req);
2176	} else if (be_lun->vn->v_type == VREG) {
2177		error = ctl_be_block_open_file(be_lun, req);
2178	} else {
2179		error = EINVAL;
2180		snprintf(req->error_str, sizeof(req->error_str),
2181			 "%s is not a disk or plain file", be_lun->dev_path);
2182	}
2183	VOP_UNLOCK(be_lun->vn, 0);
2184
2185	if (error != 0)
2186		ctl_be_block_close(be_lun);
2187	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2188	if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2189		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2190	value = ctl_get_opt(&cbe_lun->options, "serseq");
2191	if (value != NULL && strcmp(value, "on") == 0)
2192		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2193	else if (value != NULL && strcmp(value, "read") == 0)
2194		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2195	else if (value != NULL && strcmp(value, "off") == 0)
2196		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2197	return (0);
2198}
2199
2200static int
2201ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2202{
2203	struct ctl_be_lun *cbe_lun;
2204	struct ctl_be_block_lun *be_lun;
2205	struct ctl_lun_create_params *params;
2206	char num_thread_str[16];
2207	char tmpstr[32];
2208	char *value;
2209	int retval, num_threads;
2210	int tmp_num_threads;
2211
2212	params = &req->reqdata.create;
2213	retval = 0;
2214	req->status = CTL_LUN_OK;
2215
2216	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2217	cbe_lun = &be_lun->cbe_lun;
2218	cbe_lun->be_lun = be_lun;
2219	be_lun->params = req->reqdata.create;
2220	be_lun->softc = softc;
2221	STAILQ_INIT(&be_lun->input_queue);
2222	STAILQ_INIT(&be_lun->config_read_queue);
2223	STAILQ_INIT(&be_lun->config_write_queue);
2224	STAILQ_INIT(&be_lun->datamove_queue);
2225	sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
2226	mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
2227	mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
2228	ctl_init_opts(&cbe_lun->options,
2229	    req->num_be_args, req->kern_be_args);
2230	be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
2231	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2232	if (be_lun->lun_zone == NULL) {
2233		snprintf(req->error_str, sizeof(req->error_str),
2234			 "error allocating UMA zone");
2235		goto bailout_error;
2236	}
2237
2238	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2239		cbe_lun->lun_type = params->device_type;
2240	else
2241		cbe_lun->lun_type = T_DIRECT;
2242	be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
2243	cbe_lun->flags = 0;
2244	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2245	if (value != NULL) {
2246		if (strcmp(value, "primary") == 0)
2247			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2248	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2249		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2250
2251	if (cbe_lun->lun_type == T_DIRECT) {
2252		be_lun->size_bytes = params->lun_size_bytes;
2253		if (params->blocksize_bytes != 0)
2254			cbe_lun->blocksize = params->blocksize_bytes;
2255		else
2256			cbe_lun->blocksize = 512;
2257		be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2258		cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2259		    0 : (be_lun->size_blocks - 1);
2260
2261		if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2262		    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2263			retval = ctl_be_block_open(softc, be_lun, req);
2264			if (retval != 0) {
2265				retval = 0;
2266				req->status = CTL_LUN_WARNING;
2267			}
2268		}
2269		num_threads = cbb_num_threads;
2270	} else {
2271		num_threads = 1;
2272	}
2273
2274	/*
2275	 * XXX This searching loop might be refactored to be combined with
2276	 * the loop above,
2277	 */
2278	value = ctl_get_opt(&cbe_lun->options, "num_threads");
2279	if (value != NULL) {
2280		tmp_num_threads = strtol(value, NULL, 0);
2281
2282		/*
2283		 * We don't let the user specify less than one
2284		 * thread, but hope he's clueful enough not to
2285		 * specify 1000 threads.
2286		 */
2287		if (tmp_num_threads < 1) {
2288			snprintf(req->error_str, sizeof(req->error_str),
2289				 "invalid number of threads %s",
2290				 num_thread_str);
2291			goto bailout_error;
2292		}
2293		num_threads = tmp_num_threads;
2294	}
2295
2296	if (be_lun->vn == NULL)
2297		cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2298	/* Tell the user the blocksize we ended up using */
2299	params->lun_size_bytes = be_lun->size_bytes;
2300	params->blocksize_bytes = cbe_lun->blocksize;
2301	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2302		cbe_lun->req_lun_id = params->req_lun_id;
2303		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2304	} else
2305		cbe_lun->req_lun_id = 0;
2306
2307	cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2308	cbe_lun->lun_config_status = ctl_be_block_lun_config_status;
2309	cbe_lun->be = &ctl_be_block_driver;
2310
2311	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2312		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
2313			 softc->num_luns);
2314		strncpy((char *)cbe_lun->serial_num, tmpstr,
2315			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2316
2317		/* Tell the user what we used for a serial number */
2318		strncpy((char *)params->serial_num, tmpstr,
2319			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2320	} else {
2321		strncpy((char *)cbe_lun->serial_num, params->serial_num,
2322			MIN(sizeof(cbe_lun->serial_num),
2323			sizeof(params->serial_num)));
2324	}
2325	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2326		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
2327		strncpy((char *)cbe_lun->device_id, tmpstr,
2328			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2329
2330		/* Tell the user what we used for a device ID */
2331		strncpy((char *)params->device_id, tmpstr,
2332			MIN(sizeof(params->device_id), sizeof(tmpstr)));
2333	} else {
2334		strncpy((char *)cbe_lun->device_id, params->device_id,
2335			MIN(sizeof(cbe_lun->device_id),
2336			    sizeof(params->device_id)));
2337	}
2338
2339	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2340
2341	be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2342	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2343
2344	if (be_lun->io_taskqueue == NULL) {
2345		snprintf(req->error_str, sizeof(req->error_str),
2346			 "unable to create taskqueue");
2347		goto bailout_error;
2348	}
2349
2350	/*
2351	 * Note that we start the same number of threads by default for
2352	 * both the file case and the block device case.  For the file
2353	 * case, we need multiple threads to allow concurrency, because the
2354	 * vnode interface is designed to be a blocking interface.  For the
2355	 * block device case, ZFS zvols at least will block the caller's
2356	 * context in many instances, and so we need multiple threads to
2357	 * overcome that problem.  Other block devices don't need as many
2358	 * threads, but they shouldn't cause too many problems.
2359	 *
2360	 * If the user wants to just have a single thread for a block
2361	 * device, he can specify that when the LUN is created, or change
2362	 * the tunable/sysctl to alter the default number of threads.
2363	 */
2364	retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2365					 /*num threads*/num_threads,
2366					 /*priority*/PWAIT,
2367					 /*thread name*/
2368					 "%s taskq", be_lun->lunname);
2369
2370	if (retval != 0)
2371		goto bailout_error;
2372
2373	be_lun->num_threads = num_threads;
2374
2375	mtx_lock(&softc->lock);
2376	softc->num_luns++;
2377	STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2378
2379	mtx_unlock(&softc->lock);
2380
2381	retval = ctl_add_lun(&be_lun->cbe_lun);
2382	if (retval != 0) {
2383		mtx_lock(&softc->lock);
2384		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2385			      links);
2386		softc->num_luns--;
2387		mtx_unlock(&softc->lock);
2388		snprintf(req->error_str, sizeof(req->error_str),
2389			 "ctl_add_lun() returned error %d, see dmesg for "
2390			 "details", retval);
2391		retval = 0;
2392		goto bailout_error;
2393	}
2394
2395	mtx_lock(&softc->lock);
2396
2397	/*
2398	 * Tell the config_status routine that we're waiting so it won't
2399	 * clean up the LUN in the event of an error.
2400	 */
2401	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2402
2403	while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2404		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2405		if (retval == EINTR)
2406			break;
2407	}
2408	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2409
2410	if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2411		snprintf(req->error_str, sizeof(req->error_str),
2412			 "LUN configuration error, see dmesg for details");
2413		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2414			      links);
2415		softc->num_luns--;
2416		mtx_unlock(&softc->lock);
2417		goto bailout_error;
2418	} else {
2419		params->req_lun_id = cbe_lun->lun_id;
2420	}
2421
2422	mtx_unlock(&softc->lock);
2423
2424	be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2425					       cbe_lun->blocksize,
2426					       DEVSTAT_ALL_SUPPORTED,
2427					       cbe_lun->lun_type
2428					       | DEVSTAT_TYPE_IF_OTHER,
2429					       DEVSTAT_PRIORITY_OTHER);
2430
2431	return (retval);
2432
2433bailout_error:
2434	req->status = CTL_LUN_ERROR;
2435
2436	if (be_lun->io_taskqueue != NULL)
2437		taskqueue_free(be_lun->io_taskqueue);
2438	ctl_be_block_close(be_lun);
2439	if (be_lun->dev_path != NULL)
2440		free(be_lun->dev_path, M_CTLBLK);
2441	if (be_lun->lun_zone != NULL)
2442		uma_zdestroy(be_lun->lun_zone);
2443	ctl_free_opts(&cbe_lun->options);
2444	mtx_destroy(&be_lun->queue_lock);
2445	mtx_destroy(&be_lun->io_lock);
2446	free(be_lun, M_CTLBLK);
2447
2448	return (retval);
2449}
2450
2451static int
2452ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2453{
2454	struct ctl_lun_rm_params *params;
2455	struct ctl_be_block_lun *be_lun;
2456	struct ctl_be_lun *cbe_lun;
2457	int retval;
2458
2459	params = &req->reqdata.rm;
2460
2461	mtx_lock(&softc->lock);
2462	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2463		if (be_lun->cbe_lun.lun_id == params->lun_id)
2464			break;
2465	}
2466	mtx_unlock(&softc->lock);
2467
2468	if (be_lun == NULL) {
2469		snprintf(req->error_str, sizeof(req->error_str),
2470			 "LUN %u is not managed by the block backend",
2471			 params->lun_id);
2472		goto bailout_error;
2473	}
2474	cbe_lun = &be_lun->cbe_lun;
2475
2476	retval = ctl_disable_lun(cbe_lun);
2477	if (retval != 0) {
2478		snprintf(req->error_str, sizeof(req->error_str),
2479			 "error %d returned from ctl_disable_lun() for "
2480			 "LUN %d", retval, params->lun_id);
2481		goto bailout_error;
2482	}
2483
2484	if (be_lun->vn != NULL) {
2485		cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2486		ctl_lun_offline(cbe_lun);
2487		taskqueue_drain_all(be_lun->io_taskqueue);
2488		ctl_be_block_close(be_lun);
2489	}
2490
2491	retval = ctl_invalidate_lun(cbe_lun);
2492	if (retval != 0) {
2493		snprintf(req->error_str, sizeof(req->error_str),
2494			 "error %d returned from ctl_invalidate_lun() for "
2495			 "LUN %d", retval, params->lun_id);
2496		goto bailout_error;
2497	}
2498
2499	mtx_lock(&softc->lock);
2500	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2501	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2502                retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2503                if (retval == EINTR)
2504                        break;
2505        }
2506	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2507
2508	if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2509		snprintf(req->error_str, sizeof(req->error_str),
2510			 "interrupted waiting for LUN to be freed");
2511		mtx_unlock(&softc->lock);
2512		goto bailout_error;
2513	}
2514
2515	STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2516
2517	softc->num_luns--;
2518	mtx_unlock(&softc->lock);
2519
2520	taskqueue_drain_all(be_lun->io_taskqueue);
2521	taskqueue_free(be_lun->io_taskqueue);
2522
2523	if (be_lun->disk_stats != NULL)
2524		devstat_remove_entry(be_lun->disk_stats);
2525
2526	uma_zdestroy(be_lun->lun_zone);
2527
2528	ctl_free_opts(&cbe_lun->options);
2529	free(be_lun->dev_path, M_CTLBLK);
2530	mtx_destroy(&be_lun->queue_lock);
2531	mtx_destroy(&be_lun->io_lock);
2532	free(be_lun, M_CTLBLK);
2533
2534	req->status = CTL_LUN_OK;
2535
2536	return (0);
2537
2538bailout_error:
2539
2540	req->status = CTL_LUN_ERROR;
2541
2542	return (0);
2543}
2544
2545static int
2546ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
2547			 struct ctl_lun_req *req)
2548{
2549	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2550	struct vattr vattr;
2551	int error;
2552	struct ctl_lun_create_params *params = &be_lun->params;
2553
2554	if (params->lun_size_bytes != 0) {
2555		be_lun->size_bytes = params->lun_size_bytes;
2556	} else  {
2557		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2558		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
2559		VOP_UNLOCK(be_lun->vn, 0);
2560		if (error != 0) {
2561			snprintf(req->error_str, sizeof(req->error_str),
2562				 "error calling VOP_GETATTR() for file %s",
2563				 be_lun->dev_path);
2564			return (error);
2565		}
2566		be_lun->size_bytes = vattr.va_size;
2567	}
2568	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2569	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2570	    0 : (be_lun->size_blocks - 1);
2571	return (0);
2572}
2573
2574static int
2575ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
2576			struct ctl_lun_req *req)
2577{
2578	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2579	struct ctl_lun_create_params *params = &be_lun->params;
2580	struct cdevsw *csw;
2581	struct cdev *dev;
2582	uint64_t size_bytes;
2583	int error, ref;
2584
2585	csw = devvn_refthread(be_lun->vn, &dev, &ref);
2586	if (csw == NULL)
2587		return (ENXIO);
2588	if (csw->d_ioctl == NULL) {
2589		dev_relthread(dev, ref);
2590		snprintf(req->error_str, sizeof(req->error_str),
2591			 "no d_ioctl for device %s!", be_lun->dev_path);
2592		return (ENODEV);
2593	}
2594
2595	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&size_bytes, FREAD,
2596	    curthread);
2597	dev_relthread(dev, ref);
2598	if (error) {
2599		snprintf(req->error_str, sizeof(req->error_str),
2600			 "error %d returned for DIOCGMEDIASIZE ioctl "
2601			 "on %s!", error, be_lun->dev_path);
2602		return (error);
2603	}
2604
2605	if (params->lun_size_bytes != 0) {
2606		if (params->lun_size_bytes > size_bytes) {
2607			snprintf(req->error_str, sizeof(req->error_str),
2608				 "requested LUN size %ju > backing device "
2609				 "size %ju",
2610				 (uintmax_t)params->lun_size_bytes,
2611				 (uintmax_t)size_bytes);
2612			return (EINVAL);
2613		}
2614		be_lun->size_bytes = params->lun_size_bytes;
2615	} else {
2616		be_lun->size_bytes = size_bytes;
2617	}
2618	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2619	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2620	    0 : (be_lun->size_blocks - 1);
2621	return (0);
2622}
2623
2624static int
2625ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2626{
2627	struct ctl_lun_modify_params *params;
2628	struct ctl_be_block_lun *be_lun;
2629	struct ctl_be_lun *cbe_lun;
2630	char *value;
2631	uint64_t oldsize;
2632	int error, wasprim;
2633
2634	params = &req->reqdata.modify;
2635
2636	mtx_lock(&softc->lock);
2637	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2638		if (be_lun->cbe_lun.lun_id == params->lun_id)
2639			break;
2640	}
2641	mtx_unlock(&softc->lock);
2642
2643	if (be_lun == NULL) {
2644		snprintf(req->error_str, sizeof(req->error_str),
2645			 "LUN %u is not managed by the block backend",
2646			 params->lun_id);
2647		goto bailout_error;
2648	}
2649	cbe_lun = &be_lun->cbe_lun;
2650
2651	if (params->lun_size_bytes != 0)
2652		be_lun->params.lun_size_bytes = params->lun_size_bytes;
2653	ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args);
2654
2655	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2656	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2657	if (value != NULL) {
2658		if (strcmp(value, "primary") == 0)
2659			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2660		else
2661			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2662	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2663		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2664	else
2665		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2666	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2667		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2668			ctl_lun_primary(cbe_lun);
2669		else
2670			ctl_lun_secondary(cbe_lun);
2671	}
2672
2673	oldsize = be_lun->size_blocks;
2674	if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2675	    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2676		if (be_lun->vn == NULL)
2677			error = ctl_be_block_open(softc, be_lun, req);
2678		else if (vn_isdisk(be_lun->vn, &error))
2679			error = ctl_be_block_modify_dev(be_lun, req);
2680		else if (be_lun->vn->v_type == VREG)
2681			error = ctl_be_block_modify_file(be_lun, req);
2682		else
2683			error = EINVAL;
2684		if ((cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) &&
2685		    be_lun->vn != NULL) {
2686			cbe_lun->flags &= ~CTL_LUN_FLAG_OFFLINE;
2687			ctl_lun_online(cbe_lun);
2688		}
2689	} else {
2690		if (be_lun->vn != NULL) {
2691			cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE;
2692			ctl_lun_offline(cbe_lun);
2693			taskqueue_drain_all(be_lun->io_taskqueue);
2694			error = ctl_be_block_close(be_lun);
2695		} else
2696			error = 0;
2697	}
2698	if (be_lun->size_blocks != oldsize)
2699		ctl_lun_capacity_changed(cbe_lun);
2700
2701	/* Tell the user the exact size we ended up using */
2702	params->lun_size_bytes = be_lun->size_bytes;
2703
2704	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2705	return (0);
2706
2707bailout_error:
2708	req->status = CTL_LUN_ERROR;
2709	return (0);
2710}
2711
2712static void
2713ctl_be_block_lun_shutdown(void *be_lun)
2714{
2715	struct ctl_be_block_lun *lun;
2716	struct ctl_be_block_softc *softc;
2717
2718	lun = (struct ctl_be_block_lun *)be_lun;
2719
2720	softc = lun->softc;
2721
2722	mtx_lock(&softc->lock);
2723	lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2724	if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2725		wakeup(lun);
2726	mtx_unlock(&softc->lock);
2727
2728}
2729
2730static void
2731ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2732{
2733	struct ctl_be_block_lun *lun;
2734	struct ctl_be_block_softc *softc;
2735
2736	lun = (struct ctl_be_block_lun *)be_lun;
2737	softc = lun->softc;
2738
2739	if (status == CTL_LUN_CONFIG_OK) {
2740		mtx_lock(&softc->lock);
2741		lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2742		if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2743			wakeup(lun);
2744		mtx_unlock(&softc->lock);
2745
2746		/*
2747		 * We successfully added the LUN, attempt to enable it.
2748		 */
2749		if (ctl_enable_lun(&lun->cbe_lun) != 0) {
2750			printf("%s: ctl_enable_lun() failed!\n", __func__);
2751			if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
2752				printf("%s: ctl_invalidate_lun() failed!\n",
2753				       __func__);
2754			}
2755		}
2756
2757		return;
2758	}
2759
2760
2761	mtx_lock(&softc->lock);
2762	lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2763	lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2764	wakeup(lun);
2765	mtx_unlock(&softc->lock);
2766}
2767
2768
2769static int
2770ctl_be_block_config_write(union ctl_io *io)
2771{
2772	struct ctl_be_block_lun *be_lun;
2773	struct ctl_be_lun *cbe_lun;
2774	int retval;
2775
2776	retval = 0;
2777
2778	DPRINTF("entered\n");
2779
2780	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2781		CTL_PRIV_BACKEND_LUN].ptr;
2782	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2783
2784	switch (io->scsiio.cdb[0]) {
2785	case SYNCHRONIZE_CACHE:
2786	case SYNCHRONIZE_CACHE_16:
2787	case WRITE_SAME_10:
2788	case WRITE_SAME_16:
2789	case UNMAP:
2790		/*
2791		 * The upper level CTL code will filter out any CDBs with
2792		 * the immediate bit set and return the proper error.
2793		 *
2794		 * We don't really need to worry about what LBA range the
2795		 * user asked to be synced out.  When they issue a sync
2796		 * cache command, we'll sync out the whole thing.
2797		 */
2798		mtx_lock(&be_lun->queue_lock);
2799		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2800				   links);
2801		mtx_unlock(&be_lun->queue_lock);
2802		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2803		break;
2804	case START_STOP_UNIT: {
2805		struct scsi_start_stop_unit *cdb;
2806
2807		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2808
2809		if (cdb->how & SSS_START)
2810			retval = ctl_start_lun(cbe_lun);
2811		else {
2812			retval = ctl_stop_lun(cbe_lun);
2813			/*
2814			 * XXX KDM Copan-specific offline behavior.
2815			 * Figure out a reasonable way to port this?
2816			 */
2817#ifdef NEEDTOPORT
2818			if ((retval == 0)
2819			 && (cdb->byte2 & SSS_ONOFFLINE))
2820				retval = ctl_lun_offline(cbe_lun);
2821#endif
2822		}
2823
2824		/*
2825		 * In general, the above routines should not fail.  They
2826		 * just set state for the LUN.  So we've got something
2827		 * pretty wrong here if we can't start or stop the LUN.
2828		 */
2829		if (retval != 0) {
2830			ctl_set_internal_failure(&io->scsiio,
2831						 /*sks_valid*/ 1,
2832						 /*retry_count*/ 0xf051);
2833			retval = CTL_RETVAL_COMPLETE;
2834		} else {
2835			ctl_set_success(&io->scsiio);
2836		}
2837		ctl_config_write_done(io);
2838		break;
2839	}
2840	default:
2841		ctl_set_invalid_opcode(&io->scsiio);
2842		ctl_config_write_done(io);
2843		retval = CTL_RETVAL_COMPLETE;
2844		break;
2845	}
2846
2847	return (retval);
2848}
2849
2850static int
2851ctl_be_block_config_read(union ctl_io *io)
2852{
2853	struct ctl_be_block_lun *be_lun;
2854	struct ctl_be_lun *cbe_lun;
2855	int retval = 0;
2856
2857	DPRINTF("entered\n");
2858
2859	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2860		CTL_PRIV_BACKEND_LUN].ptr;
2861	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2862
2863	switch (io->scsiio.cdb[0]) {
2864	case SERVICE_ACTION_IN:
2865		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2866			mtx_lock(&be_lun->queue_lock);
2867			STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2868			    &io->io_hdr, links);
2869			mtx_unlock(&be_lun->queue_lock);
2870			taskqueue_enqueue(be_lun->io_taskqueue,
2871			    &be_lun->io_task);
2872			retval = CTL_RETVAL_QUEUED;
2873			break;
2874		}
2875		ctl_set_invalid_field(&io->scsiio,
2876				      /*sks_valid*/ 1,
2877				      /*command*/ 1,
2878				      /*field*/ 1,
2879				      /*bit_valid*/ 1,
2880				      /*bit*/ 4);
2881		ctl_config_read_done(io);
2882		retval = CTL_RETVAL_COMPLETE;
2883		break;
2884	default:
2885		ctl_set_invalid_opcode(&io->scsiio);
2886		ctl_config_read_done(io);
2887		retval = CTL_RETVAL_COMPLETE;
2888		break;
2889	}
2890
2891	return (retval);
2892}
2893
2894static int
2895ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2896{
2897	struct ctl_be_block_lun *lun;
2898	int retval;
2899
2900	lun = (struct ctl_be_block_lun *)be_lun;
2901	retval = 0;
2902
2903	retval = sbuf_printf(sb, "\t<num_threads>");
2904
2905	if (retval != 0)
2906		goto bailout;
2907
2908	retval = sbuf_printf(sb, "%d", lun->num_threads);
2909
2910	if (retval != 0)
2911		goto bailout;
2912
2913	retval = sbuf_printf(sb, "</num_threads>\n");
2914
2915bailout:
2916
2917	return (retval);
2918}
2919
2920static uint64_t
2921ctl_be_block_lun_attr(void *be_lun, const char *attrname)
2922{
2923	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun;
2924
2925	if (lun->getattr == NULL)
2926		return (UINT64_MAX);
2927	return (lun->getattr(lun, attrname));
2928}
2929
2930int
2931ctl_be_block_init(void)
2932{
2933	struct ctl_be_block_softc *softc;
2934	int retval;
2935
2936	softc = &backend_block_softc;
2937	retval = 0;
2938
2939	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2940	beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2941	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2942	STAILQ_INIT(&softc->lun_list);
2943
2944	return (retval);
2945}
2946