ctl_backend_block.c revision 267992
1229997Sken/*-
2229997Sken * Copyright (c) 2003 Silicon Graphics International Corp.
3229997Sken * Copyright (c) 2009-2011 Spectra Logic Corporation
4232604Strasz * Copyright (c) 2012 The FreeBSD Foundation
5229997Sken * All rights reserved.
6229997Sken *
7232604Strasz * Portions of this software were developed by Edward Tomasz Napierala
8232604Strasz * under sponsorship from the FreeBSD Foundation.
9232604Strasz *
10229997Sken * Redistribution and use in source and binary forms, with or without
11229997Sken * modification, are permitted provided that the following conditions
12229997Sken * are met:
13229997Sken * 1. Redistributions of source code must retain the above copyright
14229997Sken *    notice, this list of conditions, and the following disclaimer,
15229997Sken *    without modification.
16229997Sken * 2. Redistributions in binary form must reproduce at minimum a disclaimer
17229997Sken *    substantially similar to the "NO WARRANTY" disclaimer below
18229997Sken *    ("Disclaimer") and any redistribution must be conditioned upon
19229997Sken *    including a substantially similar Disclaimer requirement for further
20229997Sken *    binary redistribution.
21229997Sken *
22229997Sken * NO WARRANTY
23229997Sken * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24229997Sken * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25229997Sken * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
26229997Sken * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27229997Sken * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28229997Sken * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29229997Sken * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30229997Sken * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31229997Sken * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32229997Sken * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33229997Sken * POSSIBILITY OF SUCH DAMAGES.
34229997Sken *
35229997Sken * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
36229997Sken */
37229997Sken/*
38229997Sken * CAM Target Layer driver backend for block devices.
39229997Sken *
40229997Sken * Author: Ken Merry <ken@FreeBSD.org>
41229997Sken */
42229997Sken#include <sys/cdefs.h>
43229997Sken__FBSDID("$FreeBSD: head/sys/cam/ctl/ctl_backend_block.c 267992 2014-06-28 03:56:17Z hselasky $");
44229997Sken
45229997Sken#include <sys/param.h>
46229997Sken#include <sys/systm.h>
47229997Sken#include <sys/kernel.h>
48229997Sken#include <sys/types.h>
49229997Sken#include <sys/kthread.h>
50229997Sken#include <sys/bio.h>
51229997Sken#include <sys/fcntl.h>
52264274Smav#include <sys/limits.h>
53229997Sken#include <sys/lock.h>
54229997Sken#include <sys/mutex.h>
55229997Sken#include <sys/condvar.h>
56229997Sken#include <sys/malloc.h>
57229997Sken#include <sys/conf.h>
58229997Sken#include <sys/ioccom.h>
59229997Sken#include <sys/queue.h>
60229997Sken#include <sys/sbuf.h>
61229997Sken#include <sys/endian.h>
62229997Sken#include <sys/uio.h>
63229997Sken#include <sys/buf.h>
64229997Sken#include <sys/taskqueue.h>
65229997Sken#include <sys/vnode.h>
66229997Sken#include <sys/namei.h>
67229997Sken#include <sys/mount.h>
68229997Sken#include <sys/disk.h>
69229997Sken#include <sys/fcntl.h>
70229997Sken#include <sys/filedesc.h>
71229997Sken#include <sys/proc.h>
72229997Sken#include <sys/pcpu.h>
73229997Sken#include <sys/module.h>
74229997Sken#include <sys/sdt.h>
75229997Sken#include <sys/devicestat.h>
76229997Sken#include <sys/sysctl.h>
77229997Sken
78229997Sken#include <geom/geom.h>
79229997Sken
80229997Sken#include <cam/cam.h>
81229997Sken#include <cam/scsi/scsi_all.h>
82229997Sken#include <cam/scsi/scsi_da.h>
83229997Sken#include <cam/ctl/ctl_io.h>
84229997Sken#include <cam/ctl/ctl.h>
85229997Sken#include <cam/ctl/ctl_backend.h>
86229997Sken#include <cam/ctl/ctl_frontend_internal.h>
87229997Sken#include <cam/ctl/ctl_ioctl.h>
88229997Sken#include <cam/ctl/ctl_scsi_all.h>
89229997Sken#include <cam/ctl/ctl_error.h>
90229997Sken
91229997Sken/*
92264886Smav * The idea here is that we'll allocate enough S/G space to hold a 1MB
93264886Smav * I/O.  If we get an I/O larger than that, we'll split it.
94229997Sken */
95267537Smav#define	CTLBLK_HALF_IO_SIZE	(512 * 1024)
96267537Smav#define	CTLBLK_MAX_IO_SIZE	(CTLBLK_HALF_IO_SIZE * 2)
97264886Smav#define	CTLBLK_MAX_SEG		MAXPHYS
98267537Smav#define	CTLBLK_HALF_SEGS	MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
99267537Smav#define	CTLBLK_MAX_SEGS		(CTLBLK_HALF_SEGS * 2)
100229997Sken
101229997Sken#ifdef CTLBLK_DEBUG
102229997Sken#define DPRINTF(fmt, args...) \
103229997Sken    printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
104229997Sken#else
105229997Sken#define DPRINTF(fmt, args...) do {} while(0)
106229997Sken#endif
107229997Sken
108267519Smav#define PRIV(io)	\
109267519Smav    ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
110267537Smav#define ARGS(io)	\
111267537Smav    ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
112267519Smav
113229997SkenSDT_PROVIDER_DEFINE(cbb);
114229997Sken
115229997Skentypedef enum {
116229997Sken	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
117229997Sken	CTL_BE_BLOCK_LUN_CONFIG_ERR	= 0x02,
118229997Sken	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
119229997Sken	CTL_BE_BLOCK_LUN_MULTI_THREAD	= 0x08
120229997Sken} ctl_be_block_lun_flags;
121229997Sken
122229997Skentypedef enum {
123229997Sken	CTL_BE_BLOCK_NONE,
124229997Sken	CTL_BE_BLOCK_DEV,
125229997Sken	CTL_BE_BLOCK_FILE
126229997Sken} ctl_be_block_type;
127229997Sken
128229997Skenstruct ctl_be_block_devdata {
129229997Sken	struct cdev *cdev;
130229997Sken	struct cdevsw *csw;
131229997Sken	int dev_ref;
132229997Sken};
133229997Sken
134229997Skenstruct ctl_be_block_filedata {
135229997Sken	struct ucred *cred;
136229997Sken};
137229997Sken
138229997Skenunion ctl_be_block_bedata {
139229997Sken	struct ctl_be_block_devdata dev;
140229997Sken	struct ctl_be_block_filedata file;
141229997Sken};
142229997Sken
143229997Skenstruct ctl_be_block_io;
144229997Skenstruct ctl_be_block_lun;
145229997Sken
146229997Skentypedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
147229997Sken			       struct ctl_be_block_io *beio);
148229997Sken
149229997Sken/*
150229997Sken * Backend LUN structure.  There is a 1:1 mapping between a block device
151229997Sken * and a backend block LUN, and between a backend block LUN and a CTL LUN.
152229997Sken */
153229997Skenstruct ctl_be_block_lun {
154229997Sken	struct ctl_block_disk *disk;
155229997Sken	char lunname[32];
156229997Sken	char *dev_path;
157229997Sken	ctl_be_block_type dev_type;
158229997Sken	struct vnode *vn;
159229997Sken	union ctl_be_block_bedata backend;
160229997Sken	cbb_dispatch_t dispatch;
161229997Sken	cbb_dispatch_t lun_flush;
162264274Smav	cbb_dispatch_t unmap;
163229997Sken	uma_zone_t lun_zone;
164229997Sken	uint64_t size_blocks;
165229997Sken	uint64_t size_bytes;
166229997Sken	uint32_t blocksize;
167229997Sken	int blocksize_shift;
168264191Smav	uint16_t pblockexp;
169264191Smav	uint16_t pblockoff;
170229997Sken	struct ctl_be_block_softc *softc;
171229997Sken	struct devstat *disk_stats;
172229997Sken	ctl_be_block_lun_flags flags;
173229997Sken	STAILQ_ENTRY(ctl_be_block_lun) links;
174229997Sken	struct ctl_be_lun ctl_be_lun;
175229997Sken	struct taskqueue *io_taskqueue;
176229997Sken	struct task io_task;
177229997Sken	int num_threads;
178229997Sken	STAILQ_HEAD(, ctl_io_hdr) input_queue;
179229997Sken	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
180229997Sken	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
181267877Smav	struct mtx_padalign io_lock;
182267877Smav	struct mtx_padalign queue_lock;
183229997Sken};
184229997Sken
185229997Sken/*
186229997Sken * Overall softc structure for the block backend module.
187229997Sken */
188229997Skenstruct ctl_be_block_softc {
189229997Sken	struct mtx			 lock;
190229997Sken	int				 num_disks;
191229997Sken	STAILQ_HEAD(, ctl_block_disk)	 disk_list;
192229997Sken	int				 num_luns;
193229997Sken	STAILQ_HEAD(, ctl_be_block_lun)	 lun_list;
194229997Sken};
195229997Sken
196229997Skenstatic struct ctl_be_block_softc backend_block_softc;
197229997Sken
198229997Sken/*
199229997Sken * Per-I/O information.
200229997Sken */
201229997Skenstruct ctl_be_block_io {
202229997Sken	union ctl_io			*io;
203229997Sken	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
204229997Sken	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
205229997Sken	int				bio_cmd;
206229997Sken	int				bio_flags;
207229997Sken	int				num_segs;
208229997Sken	int				num_bios_sent;
209229997Sken	int				num_bios_done;
210229997Sken	int				send_complete;
211229997Sken	int				num_errors;
212229997Sken	struct bintime			ds_t0;
213229997Sken	devstat_tag_type		ds_tag_type;
214229997Sken	devstat_trans_flags		ds_trans_type;
215229997Sken	uint64_t			io_len;
216229997Sken	uint64_t			io_offset;
217229997Sken	struct ctl_be_block_softc	*softc;
218229997Sken	struct ctl_be_block_lun		*lun;
219264274Smav	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
220229997Sken};
221229997Sken
222229997Skenstatic int cbb_num_threads = 14;
223229997SkenSYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
224229997Sken	    "CAM Target Layer Block Backend");
225267992ShselaskySYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
226229997Sken           &cbb_num_threads, 0, "Number of threads per backing file");
227229997Sken
228229997Skenstatic struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
229229997Skenstatic void ctl_free_beio(struct ctl_be_block_io *beio);
230229997Skenstatic void ctl_complete_beio(struct ctl_be_block_io *beio);
231229997Skenstatic int ctl_be_block_move_done(union ctl_io *io);
232229997Skenstatic void ctl_be_block_biodone(struct bio *bio);
233229997Skenstatic void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
234229997Sken				    struct ctl_be_block_io *beio);
235229997Skenstatic void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
236229997Sken				       struct ctl_be_block_io *beio);
237229997Skenstatic void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
238229997Sken				   struct ctl_be_block_io *beio);
239264274Smavstatic void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
240264274Smav				   struct ctl_be_block_io *beio);
241229997Skenstatic void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
242229997Sken				      struct ctl_be_block_io *beio);
243229997Skenstatic void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
244229997Sken				    union ctl_io *io);
245229997Skenstatic void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
246229997Sken				  union ctl_io *io);
247229997Skenstatic void ctl_be_block_worker(void *context, int pending);
248229997Skenstatic int ctl_be_block_submit(union ctl_io *io);
249229997Skenstatic int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
250229997Sken				   int flag, struct thread *td);
251229997Skenstatic int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
252229997Sken				  struct ctl_lun_req *req);
253229997Skenstatic int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
254229997Sken				 struct ctl_lun_req *req);
255229997Skenstatic int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
256229997Skenstatic int ctl_be_block_open(struct ctl_be_block_softc *softc,
257229997Sken			     struct ctl_be_block_lun *be_lun,
258229997Sken			     struct ctl_lun_req *req);
259229997Skenstatic int ctl_be_block_create(struct ctl_be_block_softc *softc,
260229997Sken			       struct ctl_lun_req *req);
261229997Skenstatic int ctl_be_block_rm(struct ctl_be_block_softc *softc,
262229997Sken			   struct ctl_lun_req *req);
263232604Straszstatic int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
264232604Strasz				  struct ctl_lun_req *req);
265232604Straszstatic int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
266232604Strasz				 struct ctl_lun_req *req);
267232604Straszstatic int ctl_be_block_modify(struct ctl_be_block_softc *softc,
268232604Strasz			   struct ctl_lun_req *req);
269229997Skenstatic void ctl_be_block_lun_shutdown(void *be_lun);
270229997Skenstatic void ctl_be_block_lun_config_status(void *be_lun,
271229997Sken					   ctl_lun_config_status status);
272229997Skenstatic int ctl_be_block_config_write(union ctl_io *io);
273229997Skenstatic int ctl_be_block_config_read(union ctl_io *io);
274229997Skenstatic int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
275229997Skenint ctl_be_block_init(void);
276229997Sken
277229997Skenstatic struct ctl_backend_driver ctl_be_block_driver =
278229997Sken{
279230334Sken	.name = "block",
280230334Sken	.flags = CTL_BE_FLAG_HAS_CONFIG,
281230334Sken	.init = ctl_be_block_init,
282230334Sken	.data_submit = ctl_be_block_submit,
283230334Sken	.data_move_done = ctl_be_block_move_done,
284230334Sken	.config_read = ctl_be_block_config_read,
285230334Sken	.config_write = ctl_be_block_config_write,
286230334Sken	.ioctl = ctl_be_block_ioctl,
287230334Sken	.lun_info = ctl_be_block_lun_info
288229997Sken};
289229997Sken
290229997SkenMALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
291229997SkenCTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
292229997Sken
293264020Straszstatic uma_zone_t beio_zone;
294264020Strasz
295229997Skenstatic struct ctl_be_block_io *
296229997Skenctl_alloc_beio(struct ctl_be_block_softc *softc)
297229997Sken{
298229997Sken	struct ctl_be_block_io *beio;
299229997Sken
300264020Strasz	beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
301264020Strasz	beio->softc = softc;
302229997Sken	return (beio);
303229997Sken}
304229997Sken
305229997Skenstatic void
306229997Skenctl_free_beio(struct ctl_be_block_io *beio)
307229997Sken{
308229997Sken	int duplicate_free;
309229997Sken	int i;
310229997Sken
311229997Sken	duplicate_free = 0;
312229997Sken
313229997Sken	for (i = 0; i < beio->num_segs; i++) {
314229997Sken		if (beio->sg_segs[i].addr == NULL)
315229997Sken			duplicate_free++;
316229997Sken
317229997Sken		uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
318229997Sken		beio->sg_segs[i].addr = NULL;
319267537Smav
320267537Smav		/* For compare we had two equal S/G lists. */
321267537Smav		if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
322267537Smav			uma_zfree(beio->lun->lun_zone,
323267537Smav			    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
324267537Smav			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
325267537Smav		}
326229997Sken	}
327229997Sken
328229997Sken	if (duplicate_free > 0) {
329229997Sken		printf("%s: %d duplicate frees out of %d segments\n", __func__,
330229997Sken		       duplicate_free, beio->num_segs);
331229997Sken	}
332229997Sken
333264020Strasz	uma_zfree(beio_zone, beio);
334229997Sken}
335229997Sken
336229997Skenstatic void
337229997Skenctl_complete_beio(struct ctl_be_block_io *beio)
338229997Sken{
339267877Smav	union ctl_io *io = beio->io;
340229997Sken
341264274Smav	if (beio->beio_cont != NULL) {
342264274Smav		beio->beio_cont(beio);
343264274Smav	} else {
344264274Smav		ctl_free_beio(beio);
345267537Smav		ctl_data_submit_done(io);
346264274Smav	}
347229997Sken}
348229997Sken
349229997Skenstatic int
350229997Skenctl_be_block_move_done(union ctl_io *io)
351229997Sken{
352229997Sken	struct ctl_be_block_io *beio;
353229997Sken	struct ctl_be_block_lun *be_lun;
354267537Smav	struct ctl_lba_len_flags *lbalen;
355229997Sken#ifdef CTL_TIME_IO
356229997Sken	struct bintime cur_bt;
357267537Smav#endif
358267537Smav	int i;
359229997Sken
360267519Smav	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
361229997Sken	be_lun = beio->lun;
362229997Sken
363229997Sken	DPRINTF("entered\n");
364229997Sken
365229997Sken#ifdef CTL_TIME_IO
366229997Sken	getbintime(&cur_bt);
367229997Sken	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
368229997Sken	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
369229997Sken	io->io_hdr.num_dmas++;
370229997Sken#endif
371267537Smav	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
372229997Sken
373229997Sken	/*
374229997Sken	 * We set status at this point for read commands, and write
375229997Sken	 * commands with errors.
376229997Sken	 */
377267537Smav	if ((io->io_hdr.port_status == 0) &&
378267537Smav	    ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0) &&
379267537Smav	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
380267537Smav		lbalen = ARGS(beio->io);
381267537Smav		if (lbalen->flags & CTL_LLF_READ) {
382267537Smav			ctl_set_success(&io->scsiio);
383267537Smav		} else if (lbalen->flags & CTL_LLF_COMPARE) {
384267537Smav			/* We have two data blocks ready for comparison. */
385267537Smav			for (i = 0; i < beio->num_segs; i++) {
386267537Smav				if (memcmp(beio->sg_segs[i].addr,
387267537Smav				    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
388267537Smav				    beio->sg_segs[i].len) != 0)
389267537Smav					break;
390267537Smav			}
391267537Smav			if (i < beio->num_segs)
392267537Smav				ctl_set_sense(&io->scsiio,
393267537Smav				    /*current_error*/ 1,
394267537Smav				    /*sense_key*/ SSD_KEY_MISCOMPARE,
395267537Smav				    /*asc*/ 0x1D,
396267537Smav				    /*ascq*/ 0x00,
397267537Smav				    SSD_ELEM_NONE);
398267537Smav			else
399267537Smav				ctl_set_success(&io->scsiio);
400267537Smav		}
401267537Smav	}
402229997Sken	else if ((io->io_hdr.port_status != 0)
403229997Sken	      && ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0)
404229997Sken	      && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
405229997Sken		/*
406229997Sken		 * For hardware error sense keys, the sense key
407229997Sken		 * specific value is defined to be a retry count,
408229997Sken		 * but we use it to pass back an internal FETD
409229997Sken		 * error code.  XXX KDM  Hopefully the FETD is only
410229997Sken		 * using 16 bits for an error code, since that's
411229997Sken		 * all the space we have in the sks field.
412229997Sken		 */
413229997Sken		ctl_set_internal_failure(&io->scsiio,
414229997Sken					 /*sks_valid*/ 1,
415229997Sken					 /*retry_count*/
416229997Sken					 io->io_hdr.port_status);
417229997Sken	}
418229997Sken
419229997Sken	/*
420229997Sken	 * If this is a read, or a write with errors, it is done.
421229997Sken	 */
422229997Sken	if ((beio->bio_cmd == BIO_READ)
423229997Sken	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
424229997Sken	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
425229997Sken		ctl_complete_beio(beio);
426229997Sken		return (0);
427229997Sken	}
428229997Sken
429229997Sken	/*
430229997Sken	 * At this point, we have a write and the DMA completed
431229997Sken	 * successfully.  We now have to queue it to the task queue to
432229997Sken	 * execute the backend I/O.  That is because we do blocking
433229997Sken	 * memory allocations, and in the file backing case, blocking I/O.
434229997Sken	 * This move done routine is generally called in the SIM's
435229997Sken	 * interrupt context, and therefore we cannot block.
436229997Sken	 */
437267877Smav	mtx_lock(&be_lun->queue_lock);
438229997Sken	/*
439229997Sken	 * XXX KDM make sure that links is okay to use at this point.
440229997Sken	 * Otherwise, we either need to add another field to ctl_io_hdr,
441229997Sken	 * or deal with resource allocation here.
442229997Sken	 */
443229997Sken	STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
444267877Smav	mtx_unlock(&be_lun->queue_lock);
445229997Sken
446229997Sken	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
447229997Sken
448229997Sken	return (0);
449229997Sken}
450229997Sken
451229997Skenstatic void
452229997Skenctl_be_block_biodone(struct bio *bio)
453229997Sken{
454229997Sken	struct ctl_be_block_io *beio;
455229997Sken	struct ctl_be_block_lun *be_lun;
456229997Sken	union ctl_io *io;
457261538Smav	int error;
458229997Sken
459229997Sken	beio = bio->bio_caller1;
460229997Sken	be_lun = beio->lun;
461229997Sken	io = beio->io;
462229997Sken
463229997Sken	DPRINTF("entered\n");
464229997Sken
465261538Smav	error = bio->bio_error;
466267877Smav	mtx_lock(&be_lun->io_lock);
467261538Smav	if (error != 0)
468229997Sken		beio->num_errors++;
469229997Sken
470229997Sken	beio->num_bios_done++;
471229997Sken
472229997Sken	/*
473229997Sken	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
474229997Sken	 * during the free might cause it to complain.
475229997Sken	 */
476229997Sken	g_destroy_bio(bio);
477229997Sken
478229997Sken	/*
479229997Sken	 * If the send complete bit isn't set, or we aren't the last I/O to
480229997Sken	 * complete, then we're done.
481229997Sken	 */
482229997Sken	if ((beio->send_complete == 0)
483229997Sken	 || (beio->num_bios_done < beio->num_bios_sent)) {
484267877Smav		mtx_unlock(&be_lun->io_lock);
485229997Sken		return;
486229997Sken	}
487229997Sken
488229997Sken	/*
489229997Sken	 * At this point, we've verified that we are the last I/O to
490229997Sken	 * complete, so it's safe to drop the lock.
491229997Sken	 */
492267877Smav	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
493267877Smav	    beio->ds_tag_type, beio->ds_trans_type,
494267877Smav	    /*now*/ NULL, /*then*/&beio->ds_t0);
495267877Smav	mtx_unlock(&be_lun->io_lock);
496229997Sken
497229997Sken	/*
498229997Sken	 * If there are any errors from the backing device, we fail the
499229997Sken	 * entire I/O with a medium error.
500229997Sken	 */
501229997Sken	if (beio->num_errors > 0) {
502261538Smav		if (error == EOPNOTSUPP) {
503261538Smav			ctl_set_invalid_opcode(&io->scsiio);
504261538Smav		} else if (beio->bio_cmd == BIO_FLUSH) {
505229997Sken			/* XXX KDM is there is a better error here? */
506229997Sken			ctl_set_internal_failure(&io->scsiio,
507229997Sken						 /*sks_valid*/ 1,
508229997Sken						 /*retry_count*/ 0xbad2);
509229997Sken		} else
510229997Sken			ctl_set_medium_error(&io->scsiio);
511229997Sken		ctl_complete_beio(beio);
512229997Sken		return;
513229997Sken	}
514229997Sken
515229997Sken	/*
516267537Smav	 * If this is a write, a flush, a delete or verify, we're all done.
517229997Sken	 * If this is a read, we can now send the data to the user.
518229997Sken	 */
519229997Sken	if ((beio->bio_cmd == BIO_WRITE)
520264274Smav	 || (beio->bio_cmd == BIO_FLUSH)
521267537Smav	 || (beio->bio_cmd == BIO_DELETE)
522267537Smav	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
523229997Sken		ctl_set_success(&io->scsiio);
524229997Sken		ctl_complete_beio(beio);
525229997Sken	} else {
526229997Sken#ifdef CTL_TIME_IO
527229997Sken        	getbintime(&io->io_hdr.dma_start_bt);
528229997Sken#endif
529229997Sken		ctl_datamove(io);
530229997Sken	}
531229997Sken}
532229997Sken
533229997Skenstatic void
534229997Skenctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
535229997Sken			struct ctl_be_block_io *beio)
536229997Sken{
537267877Smav	union ctl_io *io = beio->io;
538229997Sken	struct mount *mountpoint;
539241896Skib	int error, lock_flags;
540229997Sken
541229997Sken	DPRINTF("entered\n");
542229997Sken
543267877Smav	binuptime(&beio->ds_t0);
544267877Smav	mtx_lock(&be_lun->io_lock);
545267877Smav	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
546267877Smav	mtx_unlock(&be_lun->io_lock);
547229997Sken
548267877Smav	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
549229997Sken
550229997Sken	if (MNT_SHARED_WRITES(mountpoint)
551229997Sken	 || ((mountpoint == NULL)
552229997Sken	  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
553229997Sken		lock_flags = LK_SHARED;
554229997Sken	else
555229997Sken		lock_flags = LK_EXCLUSIVE;
556229997Sken
557229997Sken	vn_lock(be_lun->vn, lock_flags | LK_RETRY);
558229997Sken
559229997Sken	error = VOP_FSYNC(be_lun->vn, MNT_WAIT, curthread);
560229997Sken	VOP_UNLOCK(be_lun->vn, 0);
561229997Sken
562229997Sken	vn_finished_write(mountpoint);
563229997Sken
564267877Smav	mtx_lock(&be_lun->io_lock);
565267877Smav	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
566267877Smav	    beio->ds_tag_type, beio->ds_trans_type,
567267877Smav	    /*now*/ NULL, /*then*/&beio->ds_t0);
568267877Smav	mtx_unlock(&be_lun->io_lock);
569267877Smav
570229997Sken	if (error == 0)
571229997Sken		ctl_set_success(&io->scsiio);
572229997Sken	else {
573229997Sken		/* XXX KDM is there is a better error here? */
574229997Sken		ctl_set_internal_failure(&io->scsiio,
575229997Sken					 /*sks_valid*/ 1,
576229997Sken					 /*retry_count*/ 0xbad1);
577229997Sken	}
578229997Sken
579229997Sken	ctl_complete_beio(beio);
580229997Sken}
581229997Sken
582258622SavgSDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
583258622SavgSDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
584258622SavgSDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
585258622SavgSDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
586229997Sken
587229997Skenstatic void
588229997Skenctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
589229997Sken			   struct ctl_be_block_io *beio)
590229997Sken{
591229997Sken	struct ctl_be_block_filedata *file_data;
592229997Sken	union ctl_io *io;
593229997Sken	struct uio xuio;
594229997Sken	struct iovec *xiovec;
595241896Skib	int flags;
596229997Sken	int error, i;
597229997Sken
598229997Sken	DPRINTF("entered\n");
599229997Sken
600229997Sken	file_data = &be_lun->backend.file;
601229997Sken	io = beio->io;
602229997Sken	flags = beio->bio_flags;
603229997Sken
604267537Smav	bzero(&xuio, sizeof(xuio));
605229997Sken	if (beio->bio_cmd == BIO_READ) {
606229997Sken		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
607267537Smav		xuio.uio_rw = UIO_READ;
608229997Sken	} else {
609229997Sken		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
610267537Smav		xuio.uio_rw = UIO_WRITE;
611229997Sken	}
612229997Sken	xuio.uio_offset = beio->io_offset;
613229997Sken	xuio.uio_resid = beio->io_len;
614229997Sken	xuio.uio_segflg = UIO_SYSSPACE;
615229997Sken	xuio.uio_iov = beio->xiovecs;
616229997Sken	xuio.uio_iovcnt = beio->num_segs;
617229997Sken	xuio.uio_td = curthread;
618229997Sken
619229997Sken	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
620229997Sken		xiovec->iov_base = beio->sg_segs[i].addr;
621229997Sken		xiovec->iov_len = beio->sg_segs[i].len;
622229997Sken	}
623229997Sken
624267877Smav	binuptime(&beio->ds_t0);
625267877Smav	mtx_lock(&be_lun->io_lock);
626267877Smav	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
627267877Smav	mtx_unlock(&be_lun->io_lock);
628267877Smav
629229997Sken	if (beio->bio_cmd == BIO_READ) {
630229997Sken		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
631229997Sken
632229997Sken		/*
633229997Sken		 * UFS pays attention to IO_DIRECT for reads.  If the
634229997Sken		 * DIRECTIO option is configured into the kernel, it calls
635229997Sken		 * ffs_rawread().  But that only works for single-segment
636229997Sken		 * uios with user space addresses.  In our case, with a
637229997Sken		 * kernel uio, it still reads into the buffer cache, but it
638229997Sken		 * will just try to release the buffer from the cache later
639229997Sken		 * on in ffs_read().
640229997Sken		 *
641229997Sken		 * ZFS does not pay attention to IO_DIRECT for reads.
642229997Sken		 *
643229997Sken		 * UFS does not pay attention to IO_SYNC for reads.
644229997Sken		 *
645229997Sken		 * ZFS pays attention to IO_SYNC (which translates into the
646229997Sken		 * Solaris define FRSYNC for zfs_read()) for reads.  It
647229997Sken		 * attempts to sync the file before reading.
648229997Sken		 *
649229997Sken		 * So, to attempt to provide some barrier semantics in the
650229997Sken		 * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC.
651229997Sken		 */
652229997Sken		error = VOP_READ(be_lun->vn, &xuio, (flags & BIO_ORDERED) ?
653229997Sken				 (IO_DIRECT|IO_SYNC) : 0, file_data->cred);
654229997Sken
655229997Sken		VOP_UNLOCK(be_lun->vn, 0);
656267537Smav		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
657229997Sken	} else {
658229997Sken		struct mount *mountpoint;
659229997Sken		int lock_flags;
660229997Sken
661229997Sken		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
662229997Sken
663229997Sken		if (MNT_SHARED_WRITES(mountpoint)
664229997Sken		 || ((mountpoint == NULL)
665229997Sken		  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
666229997Sken			lock_flags = LK_SHARED;
667229997Sken		else
668229997Sken			lock_flags = LK_EXCLUSIVE;
669229997Sken
670229997Sken		vn_lock(be_lun->vn, lock_flags | LK_RETRY);
671229997Sken
672229997Sken		/*
673229997Sken		 * UFS pays attention to IO_DIRECT for writes.  The write
674229997Sken		 * is done asynchronously.  (Normally the write would just
675229997Sken		 * get put into cache.
676229997Sken		 *
677229997Sken		 * UFS pays attention to IO_SYNC for writes.  It will
678229997Sken		 * attempt to write the buffer out synchronously if that
679229997Sken		 * flag is set.
680229997Sken		 *
681229997Sken		 * ZFS does not pay attention to IO_DIRECT for writes.
682229997Sken		 *
683229997Sken		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
684229997Sken		 * for writes.  It will flush the transaction from the
685229997Sken		 * cache before returning.
686229997Sken		 *
687229997Sken		 * So if we've got the BIO_ORDERED flag set, we want
688229997Sken		 * IO_SYNC in either the UFS or ZFS case.
689229997Sken		 */
690229997Sken		error = VOP_WRITE(be_lun->vn, &xuio, (flags & BIO_ORDERED) ?
691229997Sken				  IO_SYNC : 0, file_data->cred);
692229997Sken		VOP_UNLOCK(be_lun->vn, 0);
693229997Sken
694229997Sken		vn_finished_write(mountpoint);
695267537Smav		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
696229997Sken        }
697229997Sken
698267877Smav	mtx_lock(&be_lun->io_lock);
699267877Smav	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
700267877Smav	    beio->ds_tag_type, beio->ds_trans_type,
701267877Smav	    /*now*/ NULL, /*then*/&beio->ds_t0);
702267877Smav	mtx_unlock(&be_lun->io_lock);
703267877Smav
704229997Sken	/*
705229997Sken	 * If we got an error, set the sense data to "MEDIUM ERROR" and
706229997Sken	 * return the I/O to the user.
707229997Sken	 */
708229997Sken	if (error != 0) {
709229997Sken		char path_str[32];
710229997Sken
711229997Sken		ctl_scsi_path_string(io, path_str, sizeof(path_str));
712229997Sken		/*
713229997Sken		 * XXX KDM ZFS returns ENOSPC when the underlying
714229997Sken		 * filesystem fills up.  What kind of SCSI error should we
715229997Sken		 * return for that?
716229997Sken		 */
717229997Sken		printf("%s%s command returned errno %d\n", path_str,
718229997Sken		       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error);
719229997Sken		ctl_set_medium_error(&io->scsiio);
720229997Sken		ctl_complete_beio(beio);
721229997Sken		return;
722229997Sken	}
723229997Sken
724229997Sken	/*
725229997Sken	 * If this is a write, we're all done.
726229997Sken	 * If this is a read, we can now send the data to the user.
727229997Sken	 */
728267537Smav	if (ARGS(io)->flags & (CTL_LLF_WRITE | CTL_LLF_VERIFY)) {
729229997Sken		ctl_set_success(&io->scsiio);
730229997Sken		ctl_complete_beio(beio);
731229997Sken	} else {
732229997Sken#ifdef CTL_TIME_IO
733229997Sken        	getbintime(&io->io_hdr.dma_start_bt);
734229997Sken#endif
735229997Sken		ctl_datamove(io);
736229997Sken	}
737229997Sken}
738229997Sken
739229997Skenstatic void
740229997Skenctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
741229997Sken		       struct ctl_be_block_io *beio)
742229997Sken{
743229997Sken	struct bio *bio;
744229997Sken	union ctl_io *io;
745229997Sken	struct ctl_be_block_devdata *dev_data;
746229997Sken
747229997Sken	dev_data = &be_lun->backend.dev;
748229997Sken	io = beio->io;
749229997Sken
750229997Sken	DPRINTF("entered\n");
751229997Sken
752229997Sken	/* This can't fail, it's a blocking allocation. */
753229997Sken	bio = g_alloc_bio();
754229997Sken
755229997Sken	bio->bio_cmd	    = BIO_FLUSH;
756229997Sken	bio->bio_flags	   |= BIO_ORDERED;
757229997Sken	bio->bio_dev	    = dev_data->cdev;
758229997Sken	bio->bio_offset	    = 0;
759229997Sken	bio->bio_data	    = 0;
760229997Sken	bio->bio_done	    = ctl_be_block_biodone;
761229997Sken	bio->bio_caller1    = beio;
762229997Sken	bio->bio_pblkno	    = 0;
763229997Sken
764229997Sken	/*
765229997Sken	 * We don't need to acquire the LUN lock here, because we are only
766229997Sken	 * sending one bio, and so there is no other context to synchronize
767229997Sken	 * with.
768229997Sken	 */
769229997Sken	beio->num_bios_sent = 1;
770229997Sken	beio->send_complete = 1;
771229997Sken
772229997Sken	binuptime(&beio->ds_t0);
773267877Smav	mtx_lock(&be_lun->io_lock);
774229997Sken	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
775267877Smav	mtx_unlock(&be_lun->io_lock);
776229997Sken
777229997Sken	(*dev_data->csw->d_strategy)(bio);
778229997Sken}
779229997Sken
780229997Skenstatic void
781264274Smavctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
782264274Smav		       struct ctl_be_block_io *beio,
783264274Smav		       uint64_t off, uint64_t len, int last)
784264274Smav{
785264274Smav	struct bio *bio;
786264274Smav	struct ctl_be_block_devdata *dev_data;
787264296Smav	uint64_t maxlen;
788264274Smav
789264274Smav	dev_data = &be_lun->backend.dev;
790264296Smav	maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize);
791264274Smav	while (len > 0) {
792264274Smav		bio = g_alloc_bio();
793264274Smav		bio->bio_cmd	    = BIO_DELETE;
794264274Smav		bio->bio_flags	   |= beio->bio_flags;
795264274Smav		bio->bio_dev	    = dev_data->cdev;
796264274Smav		bio->bio_offset	    = off;
797264296Smav		bio->bio_length	    = MIN(len, maxlen);
798264274Smav		bio->bio_data	    = 0;
799264274Smav		bio->bio_done	    = ctl_be_block_biodone;
800264274Smav		bio->bio_caller1    = beio;
801264296Smav		bio->bio_pblkno     = off / be_lun->blocksize;
802264274Smav
803264274Smav		off += bio->bio_length;
804264274Smav		len -= bio->bio_length;
805264274Smav
806267877Smav		mtx_lock(&be_lun->io_lock);
807264274Smav		beio->num_bios_sent++;
808264274Smav		if (last && len == 0)
809264274Smav			beio->send_complete = 1;
810267877Smav		mtx_unlock(&be_lun->io_lock);
811264274Smav
812264274Smav		(*dev_data->csw->d_strategy)(bio);
813264274Smav	}
814264274Smav}
815264274Smav
816264274Smavstatic void
817264274Smavctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
818264274Smav		       struct ctl_be_block_io *beio)
819264274Smav{
820264274Smav	union ctl_io *io;
821264274Smav	struct ctl_be_block_devdata *dev_data;
822267515Smav	struct ctl_ptr_len_flags *ptrlen;
823264274Smav	struct scsi_unmap_desc *buf, *end;
824264274Smav	uint64_t len;
825264274Smav
826264274Smav	dev_data = &be_lun->backend.dev;
827264274Smav	io = beio->io;
828264274Smav
829264274Smav	DPRINTF("entered\n");
830264274Smav
831264274Smav	binuptime(&beio->ds_t0);
832267877Smav	mtx_lock(&be_lun->io_lock);
833264274Smav	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
834267877Smav	mtx_unlock(&be_lun->io_lock);
835264274Smav
836264274Smav	if (beio->io_offset == -1) {
837264274Smav		beio->io_len = 0;
838267515Smav		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
839267515Smav		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
840267515Smav		end = buf + ptrlen->len / sizeof(*buf);
841264274Smav		for (; buf < end; buf++) {
842264274Smav			len = (uint64_t)scsi_4btoul(buf->length) *
843264274Smav			    be_lun->blocksize;
844264274Smav			beio->io_len += len;
845264274Smav			ctl_be_block_unmap_dev_range(be_lun, beio,
846264274Smav			    scsi_8btou64(buf->lba) * be_lun->blocksize, len,
847264283Smav			    (end - buf < 2) ? TRUE : FALSE);
848264274Smav		}
849264274Smav	} else
850264274Smav		ctl_be_block_unmap_dev_range(be_lun, beio,
851264274Smav		    beio->io_offset, beio->io_len, TRUE);
852264274Smav}
853264274Smav
854264274Smavstatic void
855229997Skenctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
856229997Sken			  struct ctl_be_block_io *beio)
857229997Sken{
858267877Smav	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
859229997Sken	int i;
860229997Sken	struct bio *bio;
861229997Sken	struct ctl_be_block_devdata *dev_data;
862229997Sken	off_t cur_offset;
863229997Sken	int max_iosize;
864229997Sken
865229997Sken	DPRINTF("entered\n");
866229997Sken
867229997Sken	dev_data = &be_lun->backend.dev;
868229997Sken
869229997Sken	/*
870229997Sken	 * We have to limit our I/O size to the maximum supported by the
871229997Sken	 * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
872229997Sken	 * set it properly, use DFLTPHYS.
873229997Sken	 */
874229997Sken	max_iosize = dev_data->cdev->si_iosize_max;
875229997Sken	if (max_iosize < PAGE_SIZE)
876229997Sken		max_iosize = DFLTPHYS;
877229997Sken
878229997Sken	cur_offset = beio->io_offset;
879229997Sken	for (i = 0; i < beio->num_segs; i++) {
880229997Sken		size_t cur_size;
881229997Sken		uint8_t *cur_ptr;
882229997Sken
883229997Sken		cur_size = beio->sg_segs[i].len;
884229997Sken		cur_ptr = beio->sg_segs[i].addr;
885229997Sken
886229997Sken		while (cur_size > 0) {
887229997Sken			/* This can't fail, it's a blocking allocation. */
888229997Sken			bio = g_alloc_bio();
889229997Sken
890229997Sken			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
891229997Sken
892229997Sken			bio->bio_cmd = beio->bio_cmd;
893229997Sken			bio->bio_flags |= beio->bio_flags;
894229997Sken			bio->bio_dev = dev_data->cdev;
895229997Sken			bio->bio_caller1 = beio;
896229997Sken			bio->bio_length = min(cur_size, max_iosize);
897229997Sken			bio->bio_offset = cur_offset;
898229997Sken			bio->bio_data = cur_ptr;
899229997Sken			bio->bio_done = ctl_be_block_biodone;
900229997Sken			bio->bio_pblkno = cur_offset / be_lun->blocksize;
901229997Sken
902229997Sken			cur_offset += bio->bio_length;
903229997Sken			cur_ptr += bio->bio_length;
904229997Sken			cur_size -= bio->bio_length;
905229997Sken
906267877Smav			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
907229997Sken			beio->num_bios_sent++;
908229997Sken		}
909229997Sken	}
910267877Smav	binuptime(&beio->ds_t0);
911267877Smav	mtx_lock(&be_lun->io_lock);
912267877Smav	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
913267877Smav	beio->send_complete = 1;
914267877Smav	mtx_unlock(&be_lun->io_lock);
915267877Smav
916267877Smav	/*
917267877Smav	 * Fire off all allocated requests!
918267877Smav	 */
919267877Smav	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
920267877Smav		TAILQ_REMOVE(&queue, bio, bio_queue);
921267877Smav		(*dev_data->csw->d_strategy)(bio);
922267877Smav	}
923229997Sken}
924229997Sken
925229997Skenstatic void
926264274Smavctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
927264274Smav{
928264274Smav	union ctl_io *io;
929264274Smav
930264274Smav	io = beio->io;
931264274Smav	ctl_free_beio(beio);
932267641Smav	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
933267641Smav	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
934267641Smav	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
935264274Smav		ctl_config_write_done(io);
936264274Smav		return;
937264274Smav	}
938264274Smav
939264274Smav	ctl_be_block_config_write(io);
940264274Smav}
941264274Smav
942264274Smavstatic void
943264274Smavctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
944264274Smav			    union ctl_io *io)
945264274Smav{
946264274Smav	struct ctl_be_block_io *beio;
947264274Smav	struct ctl_be_block_softc *softc;
948267515Smav	struct ctl_lba_len_flags *lbalen;
949264274Smav	uint64_t len_left, lba;
950264274Smav	int i, seglen;
951264274Smav	uint8_t *buf, *end;
952264274Smav
953264274Smav	DPRINTF("entered\n");
954264274Smav
955267519Smav	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
956264274Smav	softc = be_lun->softc;
957267537Smav	lbalen = ARGS(beio->io);
958264274Smav
959267515Smav	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP) ||
960267515Smav	    (lbalen->flags & SWS_UNMAP && be_lun->unmap == NULL)) {
961264274Smav		ctl_free_beio(beio);
962264274Smav		ctl_set_invalid_field(&io->scsiio,
963264274Smav				      /*sks_valid*/ 1,
964264274Smav				      /*command*/ 1,
965264274Smav				      /*field*/ 1,
966264274Smav				      /*bit_valid*/ 0,
967264274Smav				      /*bit*/ 0);
968264274Smav		ctl_config_write_done(io);
969264274Smav		return;
970264274Smav	}
971264274Smav
972264274Smav	/*
973264274Smav	 * If the I/O came down with an ordered or head of queue tag, set
974264274Smav	 * the BIO_ORDERED attribute.  For head of queue tags, that's
975264274Smav	 * pretty much the best we can do.
976264274Smav	 */
977264274Smav	if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
978264274Smav	 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
979264274Smav		beio->bio_flags = BIO_ORDERED;
980264274Smav
981264274Smav	switch (io->scsiio.tag_type) {
982264274Smav	case CTL_TAG_ORDERED:
983264274Smav		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
984264274Smav		break;
985264274Smav	case CTL_TAG_HEAD_OF_QUEUE:
986264274Smav		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
987264274Smav		break;
988264274Smav	case CTL_TAG_UNTAGGED:
989264274Smav	case CTL_TAG_SIMPLE:
990264274Smav	case CTL_TAG_ACA:
991264274Smav	default:
992264274Smav		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
993264274Smav		break;
994264274Smav	}
995264274Smav
996267515Smav	if (lbalen->flags & SWS_UNMAP) {
997267515Smav		beio->io_offset = lbalen->lba * be_lun->blocksize;
998267515Smav		beio->io_len = (uint64_t)lbalen->len * be_lun->blocksize;
999264274Smav		beio->bio_cmd = BIO_DELETE;
1000264274Smav		beio->ds_trans_type = DEVSTAT_FREE;
1001264274Smav
1002264274Smav		be_lun->unmap(be_lun, beio);
1003264274Smav		return;
1004264274Smav	}
1005264274Smav
1006264274Smav	beio->bio_cmd = BIO_WRITE;
1007264274Smav	beio->ds_trans_type = DEVSTAT_WRITE;
1008264274Smav
1009264274Smav	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1010267515Smav	       (uintmax_t)lbalen->lba, lbalen->len);
1011264274Smav
1012267515Smav	len_left = (uint64_t)lbalen->len * be_lun->blocksize;
1013264274Smav	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1014264274Smav
1015264274Smav		/*
1016264274Smav		 * Setup the S/G entry for this chunk.
1017264274Smav		 */
1018264886Smav		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1019264274Smav		seglen -= seglen % be_lun->blocksize;
1020264274Smav		beio->sg_segs[i].len = seglen;
1021264274Smav		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1022264274Smav
1023264274Smav		DPRINTF("segment %d addr %p len %zd\n", i,
1024264274Smav			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1025264274Smav
1026264274Smav		beio->num_segs++;
1027264274Smav		len_left -= seglen;
1028264274Smav
1029264274Smav		buf = beio->sg_segs[i].addr;
1030264274Smav		end = buf + seglen;
1031264274Smav		for (; buf < end; buf += be_lun->blocksize) {
1032264274Smav			memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize);
1033267515Smav			if (lbalen->flags & SWS_LBDATA)
1034267515Smav				scsi_ulto4b(lbalen->lba + lba, buf);
1035264274Smav			lba++;
1036264274Smav		}
1037264274Smav	}
1038264274Smav
1039267515Smav	beio->io_offset = lbalen->lba * be_lun->blocksize;
1040264274Smav	beio->io_len = lba * be_lun->blocksize;
1041264274Smav
1042264274Smav	/* We can not do all in one run. Correct and schedule rerun. */
1043264274Smav	if (len_left > 0) {
1044267515Smav		lbalen->lba += lba;
1045267515Smav		lbalen->len -= lba;
1046264274Smav		beio->beio_cont = ctl_be_block_cw_done_ws;
1047264274Smav	}
1048264274Smav
1049264274Smav	be_lun->dispatch(be_lun, beio);
1050264274Smav}
1051264274Smav
1052264274Smavstatic void
1053264274Smavctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1054264274Smav			    union ctl_io *io)
1055264274Smav{
1056264274Smav	struct ctl_be_block_io *beio;
1057264274Smav	struct ctl_be_block_softc *softc;
1058267515Smav	struct ctl_ptr_len_flags *ptrlen;
1059264274Smav
1060264274Smav	DPRINTF("entered\n");
1061264274Smav
1062267519Smav	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1063264274Smav	softc = be_lun->softc;
1064267515Smav	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1065264274Smav
1066267515Smav	if (ptrlen->flags != 0 || be_lun->unmap == NULL) {
1067264274Smav		ctl_free_beio(beio);
1068264274Smav		ctl_set_invalid_field(&io->scsiio,
1069264274Smav				      /*sks_valid*/ 0,
1070264274Smav				      /*command*/ 1,
1071264274Smav				      /*field*/ 0,
1072264274Smav				      /*bit_valid*/ 0,
1073264274Smav				      /*bit*/ 0);
1074264274Smav		ctl_config_write_done(io);
1075264274Smav		return;
1076264274Smav	}
1077264274Smav
1078264274Smav	/*
1079264274Smav	 * If the I/O came down with an ordered or head of queue tag, set
1080264274Smav	 * the BIO_ORDERED attribute.  For head of queue tags, that's
1081264274Smav	 * pretty much the best we can do.
1082264274Smav	 */
1083264274Smav	if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
1084264274Smav	 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
1085264274Smav		beio->bio_flags = BIO_ORDERED;
1086264274Smav
1087264274Smav	switch (io->scsiio.tag_type) {
1088264274Smav	case CTL_TAG_ORDERED:
1089264274Smav		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1090264274Smav		break;
1091264274Smav	case CTL_TAG_HEAD_OF_QUEUE:
1092264274Smav		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1093264274Smav		break;
1094264274Smav	case CTL_TAG_UNTAGGED:
1095264274Smav	case CTL_TAG_SIMPLE:
1096264274Smav	case CTL_TAG_ACA:
1097264274Smav	default:
1098264274Smav		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1099264274Smav		break;
1100264274Smav	}
1101264274Smav
1102264274Smav	beio->io_len = 0;
1103264274Smav	beio->io_offset = -1;
1104264274Smav
1105264274Smav	beio->bio_cmd = BIO_DELETE;
1106264274Smav	beio->ds_trans_type = DEVSTAT_FREE;
1107264274Smav
1108267515Smav	DPRINTF("UNMAP\n");
1109264274Smav
1110264274Smav	be_lun->unmap(be_lun, beio);
1111264274Smav}
1112264274Smav
1113264274Smavstatic void
1114264274Smavctl_be_block_cw_done(struct ctl_be_block_io *beio)
1115264274Smav{
1116264274Smav	union ctl_io *io;
1117264274Smav
1118264274Smav	io = beio->io;
1119264274Smav	ctl_free_beio(beio);
1120264274Smav	ctl_config_write_done(io);
1121264274Smav}
1122264274Smav
1123264274Smavstatic void
1124229997Skenctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1125229997Sken			 union ctl_io *io)
1126229997Sken{
1127229997Sken	struct ctl_be_block_io *beio;
1128229997Sken	struct ctl_be_block_softc *softc;
1129229997Sken
1130229997Sken	DPRINTF("entered\n");
1131229997Sken
1132229997Sken	softc = be_lun->softc;
1133229997Sken	beio = ctl_alloc_beio(softc);
1134229997Sken	beio->io = io;
1135229997Sken	beio->lun = be_lun;
1136264274Smav	beio->beio_cont = ctl_be_block_cw_done;
1137267519Smav	PRIV(io)->ptr = (void *)beio;
1138229997Sken
1139229997Sken	switch (io->scsiio.cdb[0]) {
1140229997Sken	case SYNCHRONIZE_CACHE:
1141229997Sken	case SYNCHRONIZE_CACHE_16:
1142249194Strasz		beio->bio_cmd = BIO_FLUSH;
1143229997Sken		beio->ds_trans_type = DEVSTAT_NO_DATA;
1144229997Sken		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1145229997Sken		beio->io_len = 0;
1146229997Sken		be_lun->lun_flush(be_lun, beio);
1147229997Sken		break;
1148264274Smav	case WRITE_SAME_10:
1149264274Smav	case WRITE_SAME_16:
1150264274Smav		ctl_be_block_cw_dispatch_ws(be_lun, io);
1151264274Smav		break;
1152264274Smav	case UNMAP:
1153264274Smav		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1154264274Smav		break;
1155229997Sken	default:
1156229997Sken		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1157229997Sken		break;
1158229997Sken	}
1159229997Sken}
1160229997Sken
1161258622SavgSDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
1162258622SavgSDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
1163258622SavgSDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
1164258622SavgSDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
1165229997Sken
1166229997Skenstatic void
1167264886Smavctl_be_block_next(struct ctl_be_block_io *beio)
1168264886Smav{
1169264886Smav	struct ctl_be_block_lun *be_lun;
1170264886Smav	union ctl_io *io;
1171264886Smav
1172264886Smav	io = beio->io;
1173264886Smav	be_lun = beio->lun;
1174264886Smav	ctl_free_beio(beio);
1175267641Smav	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1176267641Smav	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1177267641Smav	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1178267537Smav		ctl_data_submit_done(io);
1179264886Smav		return;
1180264886Smav	}
1181264886Smav
1182264886Smav	io->io_hdr.status &= ~CTL_STATUS_MASK;
1183264886Smav	io->io_hdr.status |= CTL_STATUS_NONE;
1184264886Smav
1185267877Smav	mtx_lock(&be_lun->queue_lock);
1186264886Smav	/*
1187264886Smav	 * XXX KDM make sure that links is okay to use at this point.
1188264886Smav	 * Otherwise, we either need to add another field to ctl_io_hdr,
1189264886Smav	 * or deal with resource allocation here.
1190264886Smav	 */
1191264886Smav	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1192267877Smav	mtx_unlock(&be_lun->queue_lock);
1193264886Smav
1194264886Smav	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1195264886Smav}
1196264886Smav
1197264886Smavstatic void
1198229997Skenctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1199229997Sken			   union ctl_io *io)
1200229997Sken{
1201229997Sken	struct ctl_be_block_io *beio;
1202229997Sken	struct ctl_be_block_softc *softc;
1203267537Smav	struct ctl_lba_len_flags *lbalen;
1204267519Smav	struct ctl_ptr_len_flags *bptrlen;
1205267519Smav	uint64_t len_left, lbas;
1206229997Sken	int i;
1207229997Sken
1208229997Sken	softc = be_lun->softc;
1209229997Sken
1210229997Sken	DPRINTF("entered\n");
1211229997Sken
1212267537Smav	lbalen = ARGS(io);
1213267537Smav	if (lbalen->flags & CTL_LLF_WRITE) {
1214267537Smav		SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
1215267537Smav	} else {
1216229997Sken		SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
1217229997Sken	}
1218229997Sken
1219229997Sken	beio = ctl_alloc_beio(softc);
1220229997Sken	beio->io = io;
1221229997Sken	beio->lun = be_lun;
1222267519Smav	bptrlen = PRIV(io);
1223267519Smav	bptrlen->ptr = (void *)beio;
1224229997Sken
1225229997Sken	/*
1226229997Sken	 * If the I/O came down with an ordered or head of queue tag, set
1227229997Sken	 * the BIO_ORDERED attribute.  For head of queue tags, that's
1228229997Sken	 * pretty much the best we can do.
1229229997Sken	 *
1230229997Sken	 * XXX KDM we don't have a great way to easily know about the FUA
1231229997Sken	 * bit right now (it is decoded in ctl_read_write(), but we don't
1232229997Sken	 * pass that knowledge to the backend), and in any case we would
1233229997Sken	 * need to determine how to handle it.
1234229997Sken	 */
1235229997Sken	if ((io->scsiio.tag_type == CTL_TAG_ORDERED)
1236229997Sken	 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE))
1237229997Sken		beio->bio_flags = BIO_ORDERED;
1238229997Sken
1239229997Sken	switch (io->scsiio.tag_type) {
1240229997Sken	case CTL_TAG_ORDERED:
1241229997Sken		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1242229997Sken		break;
1243229997Sken	case CTL_TAG_HEAD_OF_QUEUE:
1244229997Sken		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1245229997Sken		break;
1246229997Sken	case CTL_TAG_UNTAGGED:
1247229997Sken	case CTL_TAG_SIMPLE:
1248229997Sken	case CTL_TAG_ACA:
1249229997Sken	default:
1250229997Sken		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1251229997Sken		break;
1252229997Sken	}
1253229997Sken
1254267537Smav	if (lbalen->flags & CTL_LLF_WRITE) {
1255267537Smav		beio->bio_cmd = BIO_WRITE;
1256267537Smav		beio->ds_trans_type = DEVSTAT_WRITE;
1257267537Smav	} else {
1258229997Sken		beio->bio_cmd = BIO_READ;
1259229997Sken		beio->ds_trans_type = DEVSTAT_READ;
1260229997Sken	}
1261229997Sken
1262264886Smav	DPRINTF("%s at LBA %jx len %u @%ju\n",
1263229997Sken	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1264267519Smav	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1265267537Smav	if (lbalen->flags & CTL_LLF_COMPARE)
1266267537Smav		lbas = CTLBLK_HALF_IO_SIZE;
1267267537Smav	else
1268267537Smav		lbas = CTLBLK_MAX_IO_SIZE;
1269267537Smav	lbas = MIN(lbalen->len - bptrlen->len, lbas / be_lun->blocksize);
1270267519Smav	beio->io_offset = (lbalen->lba + bptrlen->len) * be_lun->blocksize;
1271267519Smav	beio->io_len = lbas * be_lun->blocksize;
1272267519Smav	bptrlen->len += lbas;
1273229997Sken
1274264886Smav	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1275264886Smav		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1276264886Smav		    i, CTLBLK_MAX_SEGS));
1277229997Sken
1278229997Sken		/*
1279229997Sken		 * Setup the S/G entry for this chunk.
1280229997Sken		 */
1281264886Smav		beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1282229997Sken		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1283229997Sken
1284229997Sken		DPRINTF("segment %d addr %p len %zd\n", i,
1285229997Sken			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1286229997Sken
1287267537Smav		/* Set up second segment for compare operation. */
1288267537Smav		if (lbalen->flags & CTL_LLF_COMPARE) {
1289267537Smav			beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1290267537Smav			    beio->sg_segs[i].len;
1291267537Smav			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1292267537Smav			    uma_zalloc(be_lun->lun_zone, M_WAITOK);
1293267537Smav		}
1294267537Smav
1295229997Sken		beio->num_segs++;
1296229997Sken		len_left -= beio->sg_segs[i].len;
1297229997Sken	}
1298267519Smav	if (bptrlen->len < lbalen->len)
1299264886Smav		beio->beio_cont = ctl_be_block_next;
1300264886Smav	io->scsiio.be_move_done = ctl_be_block_move_done;
1301267537Smav	/* For compare we have separate S/G lists for read and datamove. */
1302267537Smav	if (lbalen->flags & CTL_LLF_COMPARE)
1303267537Smav		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1304267537Smav	else
1305267537Smav		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1306264886Smav	io->scsiio.kern_data_len = beio->io_len;
1307264886Smav	io->scsiio.kern_data_resid = 0;
1308264886Smav	io->scsiio.kern_sg_entries = beio->num_segs;
1309264886Smav	io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST;
1310229997Sken
1311229997Sken	/*
1312229997Sken	 * For the read case, we need to read the data into our buffers and
1313229997Sken	 * then we can send it back to the user.  For the write case, we
1314229997Sken	 * need to get the data from the user first.
1315229997Sken	 */
1316229997Sken	if (beio->bio_cmd == BIO_READ) {
1317229997Sken		SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
1318229997Sken		be_lun->dispatch(be_lun, beio);
1319229997Sken	} else {
1320229997Sken		SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
1321229997Sken#ifdef CTL_TIME_IO
1322229997Sken        	getbintime(&io->io_hdr.dma_start_bt);
1323229997Sken#endif
1324229997Sken		ctl_datamove(io);
1325229997Sken	}
1326229997Sken}
1327229997Sken
1328229997Skenstatic void
1329229997Skenctl_be_block_worker(void *context, int pending)
1330229997Sken{
1331229997Sken	struct ctl_be_block_lun *be_lun;
1332229997Sken	struct ctl_be_block_softc *softc;
1333229997Sken	union ctl_io *io;
1334229997Sken
1335229997Sken	be_lun = (struct ctl_be_block_lun *)context;
1336229997Sken	softc = be_lun->softc;
1337229997Sken
1338229997Sken	DPRINTF("entered\n");
1339229997Sken
1340267877Smav	mtx_lock(&be_lun->queue_lock);
1341229997Sken	for (;;) {
1342229997Sken		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1343229997Sken		if (io != NULL) {
1344229997Sken			struct ctl_be_block_io *beio;
1345229997Sken
1346229997Sken			DPRINTF("datamove queue\n");
1347229997Sken
1348229997Sken			STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1349229997Sken				      ctl_io_hdr, links);
1350229997Sken
1351267877Smav			mtx_unlock(&be_lun->queue_lock);
1352229997Sken
1353267519Smav			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1354229997Sken
1355229997Sken			be_lun->dispatch(be_lun, beio);
1356229997Sken
1357267877Smav			mtx_lock(&be_lun->queue_lock);
1358229997Sken			continue;
1359229997Sken		}
1360229997Sken		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1361229997Sken		if (io != NULL) {
1362229997Sken
1363229997Sken			DPRINTF("config write queue\n");
1364229997Sken
1365229997Sken			STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1366229997Sken				      ctl_io_hdr, links);
1367229997Sken
1368267877Smav			mtx_unlock(&be_lun->queue_lock);
1369229997Sken
1370229997Sken			ctl_be_block_cw_dispatch(be_lun, io);
1371229997Sken
1372267877Smav			mtx_lock(&be_lun->queue_lock);
1373229997Sken			continue;
1374229997Sken		}
1375229997Sken		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1376229997Sken		if (io != NULL) {
1377229997Sken			DPRINTF("input queue\n");
1378229997Sken
1379229997Sken			STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1380229997Sken				      ctl_io_hdr, links);
1381267877Smav			mtx_unlock(&be_lun->queue_lock);
1382229997Sken
1383229997Sken			/*
1384229997Sken			 * We must drop the lock, since this routine and
1385229997Sken			 * its children may sleep.
1386229997Sken			 */
1387229997Sken			ctl_be_block_dispatch(be_lun, io);
1388229997Sken
1389267877Smav			mtx_lock(&be_lun->queue_lock);
1390229997Sken			continue;
1391229997Sken		}
1392229997Sken
1393229997Sken		/*
1394229997Sken		 * If we get here, there is no work left in the queues, so
1395229997Sken		 * just break out and let the task queue go to sleep.
1396229997Sken		 */
1397229997Sken		break;
1398229997Sken	}
1399267877Smav	mtx_unlock(&be_lun->queue_lock);
1400229997Sken}
1401229997Sken
1402229997Sken/*
1403229997Sken * Entry point from CTL to the backend for I/O.  We queue everything to a
1404229997Sken * work thread, so this just puts the I/O on a queue and wakes up the
1405229997Sken * thread.
1406229997Sken */
1407229997Skenstatic int
1408229997Skenctl_be_block_submit(union ctl_io *io)
1409229997Sken{
1410229997Sken	struct ctl_be_block_lun *be_lun;
1411229997Sken	struct ctl_be_lun *ctl_be_lun;
1412229997Sken
1413229997Sken	DPRINTF("entered\n");
1414229997Sken
1415229997Sken	ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
1416229997Sken		CTL_PRIV_BACKEND_LUN].ptr;
1417229997Sken	be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
1418229997Sken
1419229997Sken	/*
1420229997Sken	 * Make sure we only get SCSI I/O.
1421229997Sken	 */
1422229997Sken	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1423229997Sken		"%#x) encountered", io->io_hdr.io_type));
1424229997Sken
1425267519Smav	PRIV(io)->len = 0;
1426267519Smav
1427267877Smav	mtx_lock(&be_lun->queue_lock);
1428229997Sken	/*
1429229997Sken	 * XXX KDM make sure that links is okay to use at this point.
1430229997Sken	 * Otherwise, we either need to add another field to ctl_io_hdr,
1431229997Sken	 * or deal with resource allocation here.
1432229997Sken	 */
1433229997Sken	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1434267877Smav	mtx_unlock(&be_lun->queue_lock);
1435229997Sken	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1436229997Sken
1437267514Smav	return (CTL_RETVAL_COMPLETE);
1438229997Sken}
1439229997Sken
1440229997Skenstatic int
1441229997Skenctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1442229997Sken			int flag, struct thread *td)
1443229997Sken{
1444229997Sken	struct ctl_be_block_softc *softc;
1445229997Sken	int error;
1446229997Sken
1447229997Sken	softc = &backend_block_softc;
1448229997Sken
1449229997Sken	error = 0;
1450229997Sken
1451229997Sken	switch (cmd) {
1452229997Sken	case CTL_LUN_REQ: {
1453229997Sken		struct ctl_lun_req *lun_req;
1454229997Sken
1455229997Sken		lun_req = (struct ctl_lun_req *)addr;
1456229997Sken
1457229997Sken		switch (lun_req->reqtype) {
1458229997Sken		case CTL_LUNREQ_CREATE:
1459229997Sken			error = ctl_be_block_create(softc, lun_req);
1460229997Sken			break;
1461229997Sken		case CTL_LUNREQ_RM:
1462229997Sken			error = ctl_be_block_rm(softc, lun_req);
1463229997Sken			break;
1464232604Strasz		case CTL_LUNREQ_MODIFY:
1465232604Strasz			error = ctl_be_block_modify(softc, lun_req);
1466232604Strasz			break;
1467229997Sken		default:
1468229997Sken			lun_req->status = CTL_LUN_ERROR;
1469229997Sken			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1470229997Sken				 "%s: invalid LUN request type %d", __func__,
1471229997Sken				 lun_req->reqtype);
1472229997Sken			break;
1473229997Sken		}
1474229997Sken		break;
1475229997Sken	}
1476229997Sken	default:
1477229997Sken		error = ENOTTY;
1478229997Sken		break;
1479229997Sken	}
1480229997Sken
1481229997Sken	return (error);
1482229997Sken}
1483229997Sken
1484229997Skenstatic int
1485229997Skenctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1486229997Sken{
1487229997Sken	struct ctl_be_block_filedata *file_data;
1488229997Sken	struct ctl_lun_create_params *params;
1489229997Sken	struct vattr		      vattr;
1490229997Sken	int			      error;
1491229997Sken
1492229997Sken	error = 0;
1493229997Sken	file_data = &be_lun->backend.file;
1494229997Sken	params = &req->reqdata.create;
1495229997Sken
1496229997Sken	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1497229997Sken	be_lun->dispatch = ctl_be_block_dispatch_file;
1498229997Sken	be_lun->lun_flush = ctl_be_block_flush_file;
1499229997Sken
1500229997Sken	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1501229997Sken	if (error != 0) {
1502229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1503229997Sken			 "error calling VOP_GETATTR() for file %s",
1504229997Sken			 be_lun->dev_path);
1505229997Sken		return (error);
1506229997Sken	}
1507229997Sken
1508229997Sken	/*
1509229997Sken	 * Verify that we have the ability to upgrade to exclusive
1510229997Sken	 * access on this file so we can trap errors at open instead
1511229997Sken	 * of reporting them during first access.
1512229997Sken	 */
1513229997Sken	if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) {
1514229997Sken		vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY);
1515229997Sken		if (be_lun->vn->v_iflag & VI_DOOMED) {
1516229997Sken			error = EBADF;
1517229997Sken			snprintf(req->error_str, sizeof(req->error_str),
1518229997Sken				 "error locking file %s", be_lun->dev_path);
1519229997Sken			return (error);
1520229997Sken		}
1521229997Sken	}
1522229997Sken
1523229997Sken
1524229997Sken	file_data->cred = crhold(curthread->td_ucred);
1525232604Strasz	if (params->lun_size_bytes != 0)
1526232604Strasz		be_lun->size_bytes = params->lun_size_bytes;
1527232604Strasz	else
1528232604Strasz		be_lun->size_bytes = vattr.va_size;
1529229997Sken	/*
1530229997Sken	 * We set the multi thread flag for file operations because all
1531229997Sken	 * filesystems (in theory) are capable of allowing multiple readers
1532229997Sken	 * of a file at once.  So we want to get the maximum possible
1533229997Sken	 * concurrency.
1534229997Sken	 */
1535229997Sken	be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD;
1536229997Sken
1537229997Sken	/*
1538229997Sken	 * XXX KDM vattr.va_blocksize may be larger than 512 bytes here.
1539229997Sken	 * With ZFS, it is 131072 bytes.  Block sizes that large don't work
1540229997Sken	 * with disklabel and UFS on FreeBSD at least.  Large block sizes
1541229997Sken	 * may not work with other OSes as well.  So just export a sector
1542229997Sken	 * size of 512 bytes, which should work with any OS or
1543229997Sken	 * application.  Since our backing is a file, any block size will
1544229997Sken	 * work fine for the backing store.
1545229997Sken	 */
1546229997Sken#if 0
1547229997Sken	be_lun->blocksize= vattr.va_blocksize;
1548229997Sken#endif
1549229997Sken	if (params->blocksize_bytes != 0)
1550229997Sken		be_lun->blocksize = params->blocksize_bytes;
1551229997Sken	else
1552229997Sken		be_lun->blocksize = 512;
1553229997Sken
1554229997Sken	/*
1555229997Sken	 * Sanity check.  The media size has to be at least one
1556229997Sken	 * sector long.
1557229997Sken	 */
1558229997Sken	if (be_lun->size_bytes < be_lun->blocksize) {
1559229997Sken		error = EINVAL;
1560229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1561229997Sken			 "file %s size %ju < block size %u", be_lun->dev_path,
1562229997Sken			 (uintmax_t)be_lun->size_bytes, be_lun->blocksize);
1563229997Sken	}
1564229997Sken	return (error);
1565229997Sken}
1566229997Sken
1567229997Skenstatic int
1568229997Skenctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1569229997Sken{
1570229997Sken	struct ctl_lun_create_params *params;
1571229997Sken	struct vattr		      vattr;
1572229997Sken	struct cdev		     *dev;
1573229997Sken	struct cdevsw		     *devsw;
1574229997Sken	int			      error;
1575264191Smav	off_t			      ps, pss, po, pos;
1576229997Sken
1577229997Sken	params = &req->reqdata.create;
1578229997Sken
1579229997Sken	be_lun->dev_type = CTL_BE_BLOCK_DEV;
1580229997Sken	be_lun->dispatch = ctl_be_block_dispatch_dev;
1581229997Sken	be_lun->lun_flush = ctl_be_block_flush_dev;
1582264274Smav	be_lun->unmap = ctl_be_block_unmap_dev;
1583229997Sken	be_lun->backend.dev.cdev = be_lun->vn->v_rdev;
1584229997Sken	be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev,
1585229997Sken					     &be_lun->backend.dev.dev_ref);
1586229997Sken	if (be_lun->backend.dev.csw == NULL)
1587229997Sken		panic("Unable to retrieve device switch");
1588229997Sken
1589229997Sken	error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED);
1590229997Sken	if (error) {
1591229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1592229997Sken			 "%s: error getting vnode attributes for device %s",
1593229997Sken			 __func__, be_lun->dev_path);
1594229997Sken		return (error);
1595229997Sken	}
1596229997Sken
1597229997Sken	dev = be_lun->vn->v_rdev;
1598229997Sken	devsw = dev->si_devsw;
1599229997Sken	if (!devsw->d_ioctl) {
1600229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1601229997Sken			 "%s: no d_ioctl for device %s!", __func__,
1602229997Sken			 be_lun->dev_path);
1603229997Sken		return (ENODEV);
1604229997Sken	}
1605229997Sken
1606229997Sken	error = devsw->d_ioctl(dev, DIOCGSECTORSIZE,
1607229997Sken			       (caddr_t)&be_lun->blocksize, FREAD,
1608229997Sken			       curthread);
1609229997Sken	if (error) {
1610229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1611229997Sken			 "%s: error %d returned for DIOCGSECTORSIZE ioctl "
1612229997Sken			 "on %s!", __func__, error, be_lun->dev_path);
1613229997Sken		return (error);
1614229997Sken	}
1615229997Sken
1616229997Sken	/*
1617229997Sken	 * If the user has asked for a blocksize that is greater than the
1618229997Sken	 * backing device's blocksize, we can do it only if the blocksize
1619229997Sken	 * the user is asking for is an even multiple of the underlying
1620229997Sken	 * device's blocksize.
1621229997Sken	 */
1622229997Sken	if ((params->blocksize_bytes != 0)
1623229997Sken	 && (params->blocksize_bytes > be_lun->blocksize)) {
1624229997Sken		uint32_t bs_multiple, tmp_blocksize;
1625229997Sken
1626229997Sken		bs_multiple = params->blocksize_bytes / be_lun->blocksize;
1627229997Sken
1628229997Sken		tmp_blocksize = bs_multiple * be_lun->blocksize;
1629229997Sken
1630229997Sken		if (tmp_blocksize == params->blocksize_bytes) {
1631229997Sken			be_lun->blocksize = params->blocksize_bytes;
1632229997Sken		} else {
1633229997Sken			snprintf(req->error_str, sizeof(req->error_str),
1634229997Sken				 "%s: requested blocksize %u is not an even "
1635229997Sken				 "multiple of backing device blocksize %u",
1636229997Sken				 __func__, params->blocksize_bytes,
1637229997Sken				 be_lun->blocksize);
1638229997Sken			return (EINVAL);
1639229997Sken
1640229997Sken		}
1641229997Sken	} else if ((params->blocksize_bytes != 0)
1642229997Sken		&& (params->blocksize_bytes != be_lun->blocksize)) {
1643229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1644229997Sken			 "%s: requested blocksize %u < backing device "
1645229997Sken			 "blocksize %u", __func__, params->blocksize_bytes,
1646229997Sken			 be_lun->blocksize);
1647229997Sken		return (EINVAL);
1648229997Sken	}
1649229997Sken
1650229997Sken	error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
1651229997Sken			       (caddr_t)&be_lun->size_bytes, FREAD,
1652229997Sken			       curthread);
1653229997Sken	if (error) {
1654229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1655232604Strasz			 "%s: error %d returned for DIOCGMEDIASIZE "
1656232604Strasz			 " ioctl on %s!", __func__, error,
1657232604Strasz			 be_lun->dev_path);
1658229997Sken		return (error);
1659229997Sken	}
1660229997Sken
1661232604Strasz	if (params->lun_size_bytes != 0) {
1662232604Strasz		if (params->lun_size_bytes > be_lun->size_bytes) {
1663232604Strasz			snprintf(req->error_str, sizeof(req->error_str),
1664232604Strasz				 "%s: requested LUN size %ju > backing device "
1665232604Strasz				 "size %ju", __func__,
1666232604Strasz				 (uintmax_t)params->lun_size_bytes,
1667232604Strasz				 (uintmax_t)be_lun->size_bytes);
1668232604Strasz			return (EINVAL);
1669232604Strasz		}
1670232604Strasz
1671232604Strasz		be_lun->size_bytes = params->lun_size_bytes;
1672232604Strasz	}
1673232604Strasz
1674264191Smav	error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE,
1675264191Smav			       (caddr_t)&ps, FREAD, curthread);
1676264191Smav	if (error)
1677264191Smav		ps = po = 0;
1678264191Smav	else {
1679264191Smav		error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET,
1680264191Smav				       (caddr_t)&po, FREAD, curthread);
1681264191Smav		if (error)
1682264191Smav			po = 0;
1683264191Smav	}
1684264191Smav	pss = ps / be_lun->blocksize;
1685264191Smav	pos = po / be_lun->blocksize;
1686264191Smav	if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) &&
1687264191Smav	    ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) {
1688264191Smav		be_lun->pblockexp = fls(pss) - 1;
1689264191Smav		be_lun->pblockoff = (pss - pos) % pss;
1690264191Smav	}
1691264191Smav
1692229997Sken	return (0);
1693229997Sken}
1694229997Sken
1695229997Skenstatic int
1696229997Skenctl_be_block_close(struct ctl_be_block_lun *be_lun)
1697229997Sken{
1698229997Sken	DROP_GIANT();
1699229997Sken	if (be_lun->vn) {
1700229997Sken		int flags = FREAD | FWRITE;
1701229997Sken
1702229997Sken		switch (be_lun->dev_type) {
1703229997Sken		case CTL_BE_BLOCK_DEV:
1704229997Sken			if (be_lun->backend.dev.csw) {
1705229997Sken				dev_relthread(be_lun->backend.dev.cdev,
1706229997Sken					      be_lun->backend.dev.dev_ref);
1707229997Sken				be_lun->backend.dev.csw  = NULL;
1708229997Sken				be_lun->backend.dev.cdev = NULL;
1709229997Sken			}
1710229997Sken			break;
1711229997Sken		case CTL_BE_BLOCK_FILE:
1712229997Sken			break;
1713229997Sken		case CTL_BE_BLOCK_NONE:
1714258871Strasz			break;
1715229997Sken		default:
1716229997Sken			panic("Unexpected backend type.");
1717229997Sken			break;
1718229997Sken		}
1719229997Sken
1720229997Sken		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
1721229997Sken		be_lun->vn = NULL;
1722229997Sken
1723229997Sken		switch (be_lun->dev_type) {
1724229997Sken		case CTL_BE_BLOCK_DEV:
1725229997Sken			break;
1726229997Sken		case CTL_BE_BLOCK_FILE:
1727229997Sken			if (be_lun->backend.file.cred != NULL) {
1728229997Sken				crfree(be_lun->backend.file.cred);
1729229997Sken				be_lun->backend.file.cred = NULL;
1730229997Sken			}
1731229997Sken			break;
1732229997Sken		case CTL_BE_BLOCK_NONE:
1733258871Strasz			break;
1734229997Sken		default:
1735229997Sken			panic("Unexpected backend type.");
1736229997Sken			break;
1737229997Sken		}
1738229997Sken	}
1739229997Sken	PICKUP_GIANT();
1740229997Sken
1741229997Sken	return (0);
1742229997Sken}
1743229997Sken
1744229997Skenstatic int
1745229997Skenctl_be_block_open(struct ctl_be_block_softc *softc,
1746229997Sken		       struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1747229997Sken{
1748229997Sken	struct nameidata nd;
1749229997Sken	int		 flags;
1750229997Sken	int		 error;
1751229997Sken
1752229997Sken	/*
1753229997Sken	 * XXX KDM allow a read-only option?
1754229997Sken	 */
1755229997Sken	flags = FREAD | FWRITE;
1756229997Sken	error = 0;
1757229997Sken
1758229997Sken	if (rootvnode == NULL) {
1759229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1760229997Sken			 "%s: Root filesystem is not mounted", __func__);
1761229997Sken		return (1);
1762229997Sken	}
1763229997Sken
1764229997Sken	if (!curthread->td_proc->p_fd->fd_cdir) {
1765229997Sken		curthread->td_proc->p_fd->fd_cdir = rootvnode;
1766229997Sken		VREF(rootvnode);
1767229997Sken	}
1768229997Sken	if (!curthread->td_proc->p_fd->fd_rdir) {
1769229997Sken		curthread->td_proc->p_fd->fd_rdir = rootvnode;
1770229997Sken		VREF(rootvnode);
1771229997Sken	}
1772229997Sken	if (!curthread->td_proc->p_fd->fd_jdir) {
1773229997Sken		curthread->td_proc->p_fd->fd_jdir = rootvnode;
1774229997Sken		VREF(rootvnode);
1775229997Sken	}
1776229997Sken
1777229997Sken again:
1778229997Sken	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
1779229997Sken	error = vn_open(&nd, &flags, 0, NULL);
1780229997Sken	if (error) {
1781229997Sken		/*
1782229997Sken		 * This is the only reasonable guess we can make as far as
1783229997Sken		 * path if the user doesn't give us a fully qualified path.
1784229997Sken		 * If they want to specify a file, they need to specify the
1785229997Sken		 * full path.
1786229997Sken		 */
1787229997Sken		if (be_lun->dev_path[0] != '/') {
1788229997Sken			char *dev_path = "/dev/";
1789229997Sken			char *dev_name;
1790229997Sken
1791229997Sken			/* Try adding device path at beginning of name */
1792229997Sken			dev_name = malloc(strlen(be_lun->dev_path)
1793229997Sken					+ strlen(dev_path) + 1,
1794229997Sken					  M_CTLBLK, M_WAITOK);
1795229997Sken			if (dev_name) {
1796229997Sken				sprintf(dev_name, "%s%s", dev_path,
1797229997Sken					be_lun->dev_path);
1798229997Sken				free(be_lun->dev_path, M_CTLBLK);
1799229997Sken				be_lun->dev_path = dev_name;
1800229997Sken				goto again;
1801229997Sken			}
1802229997Sken		}
1803229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1804229997Sken			 "%s: error opening %s", __func__, be_lun->dev_path);
1805229997Sken		return (error);
1806229997Sken	}
1807229997Sken
1808229997Sken	NDFREE(&nd, NDF_ONLY_PNBUF);
1809229997Sken
1810229997Sken	be_lun->vn = nd.ni_vp;
1811229997Sken
1812229997Sken	/* We only support disks and files. */
1813229997Sken	if (vn_isdisk(be_lun->vn, &error)) {
1814229997Sken		error = ctl_be_block_open_dev(be_lun, req);
1815229997Sken	} else if (be_lun->vn->v_type == VREG) {
1816229997Sken		error = ctl_be_block_open_file(be_lun, req);
1817229997Sken	} else {
1818229997Sken		error = EINVAL;
1819229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1820258871Strasz			 "%s is not a disk or plain file", be_lun->dev_path);
1821229997Sken	}
1822229997Sken	VOP_UNLOCK(be_lun->vn, 0);
1823229997Sken
1824229997Sken	if (error != 0) {
1825229997Sken		ctl_be_block_close(be_lun);
1826229997Sken		return (error);
1827229997Sken	}
1828229997Sken
1829229997Sken	be_lun->blocksize_shift = fls(be_lun->blocksize) - 1;
1830229997Sken	be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift;
1831229997Sken
1832229997Sken	return (0);
1833229997Sken}
1834229997Sken
1835229997Skenstatic int
1836229997Skenctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
1837229997Sken{
1838229997Sken	struct ctl_be_block_lun *be_lun;
1839229997Sken	struct ctl_lun_create_params *params;
1840267481Smav	char num_thread_str[16];
1841229997Sken	char tmpstr[32];
1842267481Smav	char *value;
1843264274Smav	int retval, num_threads, unmap;
1844267481Smav	int tmp_num_threads;
1845229997Sken
1846229997Sken	params = &req->reqdata.create;
1847229997Sken	retval = 0;
1848229997Sken
1849229997Sken	num_threads = cbb_num_threads;
1850229997Sken
1851229997Sken	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
1852229997Sken
1853229997Sken	be_lun->softc = softc;
1854229997Sken	STAILQ_INIT(&be_lun->input_queue);
1855229997Sken	STAILQ_INIT(&be_lun->config_write_queue);
1856229997Sken	STAILQ_INIT(&be_lun->datamove_queue);
1857229997Sken	sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
1858267877Smav	mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
1859267877Smav	mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
1860267481Smav	ctl_init_opts(&be_lun->ctl_be_lun, req);
1861229997Sken
1862264886Smav	be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
1863256995Smav	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
1864229997Sken
1865229997Sken	if (be_lun->lun_zone == NULL) {
1866229997Sken		snprintf(req->error_str, sizeof(req->error_str),
1867229997Sken			 "%s: error allocating UMA zone", __func__);
1868229997Sken		goto bailout_error;
1869229997Sken	}
1870229997Sken
1871229997Sken	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
1872229997Sken		be_lun->ctl_be_lun.lun_type = params->device_type;
1873229997Sken	else
1874229997Sken		be_lun->ctl_be_lun.lun_type = T_DIRECT;
1875229997Sken
1876229997Sken	if (be_lun->ctl_be_lun.lun_type == T_DIRECT) {
1877267499Smav		value = ctl_get_opt(&be_lun->ctl_be_lun, "file");
1878267499Smav		if (value == NULL) {
1879229997Sken			snprintf(req->error_str, sizeof(req->error_str),
1880229997Sken				 "%s: no file argument specified", __func__);
1881229997Sken			goto bailout_error;
1882229997Sken		}
1883267499Smav		be_lun->dev_path = strdup(value, M_CTLBLK);
1884229997Sken
1885229997Sken		retval = ctl_be_block_open(softc, be_lun, req);
1886229997Sken		if (retval != 0) {
1887229997Sken			retval = 0;
1888229997Sken			goto bailout_error;
1889229997Sken		}
1890229997Sken
1891229997Sken		/*
1892229997Sken		 * Tell the user the size of the file/device.
1893229997Sken		 */
1894229997Sken		params->lun_size_bytes = be_lun->size_bytes;
1895229997Sken
1896229997Sken		/*
1897229997Sken		 * The maximum LBA is the size - 1.
1898229997Sken		 */
1899229997Sken		be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1;
1900229997Sken	} else {
1901229997Sken		/*
1902229997Sken		 * For processor devices, we don't have any size.
1903229997Sken		 */
1904229997Sken		be_lun->blocksize = 0;
1905264191Smav		be_lun->pblockexp = 0;
1906264191Smav		be_lun->pblockoff = 0;
1907229997Sken		be_lun->size_blocks = 0;
1908229997Sken		be_lun->size_bytes = 0;
1909229997Sken		be_lun->ctl_be_lun.maxlba = 0;
1910229997Sken		params->lun_size_bytes = 0;
1911229997Sken
1912229997Sken		/*
1913229997Sken		 * Default to just 1 thread for processor devices.
1914229997Sken		 */
1915229997Sken		num_threads = 1;
1916229997Sken	}
1917229997Sken
1918229997Sken	/*
1919229997Sken	 * XXX This searching loop might be refactored to be combined with
1920229997Sken	 * the loop above,
1921229997Sken	 */
1922267481Smav	value = ctl_get_opt(&be_lun->ctl_be_lun, "num_threads");
1923267481Smav	if (value != NULL) {
1924267481Smav		tmp_num_threads = strtol(value, NULL, 0);
1925229997Sken
1926267481Smav		/*
1927267481Smav		 * We don't let the user specify less than one
1928267481Smav		 * thread, but hope he's clueful enough not to
1929267481Smav		 * specify 1000 threads.
1930267481Smav		 */
1931267481Smav		if (tmp_num_threads < 1) {
1932267481Smav			snprintf(req->error_str, sizeof(req->error_str),
1933267481Smav				 "%s: invalid number of threads %s",
1934267481Smav			         __func__, num_thread_str);
1935267481Smav			goto bailout_error;
1936229997Sken		}
1937267481Smav		num_threads = tmp_num_threads;
1938229997Sken	}
1939267481Smav	unmap = 0;
1940267481Smav	value = ctl_get_opt(&be_lun->ctl_be_lun, "unmap");
1941267481Smav	if (value != NULL && strcmp(value, "on") == 0)
1942267481Smav		unmap = 1;
1943229997Sken
1944229997Sken	be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
1945229997Sken	be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY;
1946264274Smav	if (unmap)
1947264274Smav		be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP;
1948229997Sken	be_lun->ctl_be_lun.be_lun = be_lun;
1949229997Sken	be_lun->ctl_be_lun.blocksize = be_lun->blocksize;
1950264191Smav	be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp;
1951264191Smav	be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff;
1952229997Sken	/* Tell the user the blocksize we ended up using */
1953229997Sken	params->blocksize_bytes = be_lun->blocksize;
1954229997Sken	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
1955229997Sken		be_lun->ctl_be_lun.req_lun_id = params->req_lun_id;
1956229997Sken		be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ;
1957229997Sken	} else
1958229997Sken		be_lun->ctl_be_lun.req_lun_id = 0;
1959229997Sken
1960229997Sken	be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown;
1961229997Sken	be_lun->ctl_be_lun.lun_config_status =
1962229997Sken		ctl_be_block_lun_config_status;
1963229997Sken	be_lun->ctl_be_lun.be = &ctl_be_block_driver;
1964229997Sken
1965229997Sken	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
1966229997Sken		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
1967229997Sken			 softc->num_luns);
1968229997Sken		strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr,
1969229997Sken			ctl_min(sizeof(be_lun->ctl_be_lun.serial_num),
1970229997Sken			sizeof(tmpstr)));
1971229997Sken
1972229997Sken		/* Tell the user what we used for a serial number */
1973229997Sken		strncpy((char *)params->serial_num, tmpstr,
1974229997Sken			ctl_min(sizeof(params->serial_num), sizeof(tmpstr)));
1975229997Sken	} else {
1976229997Sken		strncpy((char *)be_lun->ctl_be_lun.serial_num,
1977229997Sken			params->serial_num,
1978229997Sken			ctl_min(sizeof(be_lun->ctl_be_lun.serial_num),
1979229997Sken			sizeof(params->serial_num)));
1980229997Sken	}
1981229997Sken	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
1982229997Sken		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
1983229997Sken		strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr,
1984229997Sken			ctl_min(sizeof(be_lun->ctl_be_lun.device_id),
1985229997Sken			sizeof(tmpstr)));
1986229997Sken
1987229997Sken		/* Tell the user what we used for a device ID */
1988229997Sken		strncpy((char *)params->device_id, tmpstr,
1989229997Sken			ctl_min(sizeof(params->device_id), sizeof(tmpstr)));
1990229997Sken	} else {
1991229997Sken		strncpy((char *)be_lun->ctl_be_lun.device_id,
1992229997Sken			params->device_id,
1993229997Sken			ctl_min(sizeof(be_lun->ctl_be_lun.device_id),
1994229997Sken				sizeof(params->device_id)));
1995229997Sken	}
1996229997Sken
1997229997Sken	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
1998229997Sken
1999229997Sken	be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2000229997Sken	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2001229997Sken
2002229997Sken	if (be_lun->io_taskqueue == NULL) {
2003229997Sken		snprintf(req->error_str, sizeof(req->error_str),
2004229997Sken			 "%s: Unable to create taskqueue", __func__);
2005229997Sken		goto bailout_error;
2006229997Sken	}
2007229997Sken
2008229997Sken	/*
2009229997Sken	 * Note that we start the same number of threads by default for
2010229997Sken	 * both the file case and the block device case.  For the file
2011229997Sken	 * case, we need multiple threads to allow concurrency, because the
2012229997Sken	 * vnode interface is designed to be a blocking interface.  For the
2013229997Sken	 * block device case, ZFS zvols at least will block the caller's
2014229997Sken	 * context in many instances, and so we need multiple threads to
2015229997Sken	 * overcome that problem.  Other block devices don't need as many
2016229997Sken	 * threads, but they shouldn't cause too many problems.
2017229997Sken	 *
2018229997Sken	 * If the user wants to just have a single thread for a block
2019229997Sken	 * device, he can specify that when the LUN is created, or change
2020229997Sken	 * the tunable/sysctl to alter the default number of threads.
2021229997Sken	 */
2022229997Sken	retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2023229997Sken					 /*num threads*/num_threads,
2024229997Sken					 /*priority*/PWAIT,
2025229997Sken					 /*thread name*/
2026229997Sken					 "%s taskq", be_lun->lunname);
2027229997Sken
2028229997Sken	if (retval != 0)
2029229997Sken		goto bailout_error;
2030229997Sken
2031229997Sken	be_lun->num_threads = num_threads;
2032229997Sken
2033229997Sken	mtx_lock(&softc->lock);
2034229997Sken	softc->num_luns++;
2035229997Sken	STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2036229997Sken
2037229997Sken	mtx_unlock(&softc->lock);
2038229997Sken
2039229997Sken	retval = ctl_add_lun(&be_lun->ctl_be_lun);
2040229997Sken	if (retval != 0) {
2041229997Sken		mtx_lock(&softc->lock);
2042229997Sken		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2043229997Sken			      links);
2044229997Sken		softc->num_luns--;
2045229997Sken		mtx_unlock(&softc->lock);
2046229997Sken		snprintf(req->error_str, sizeof(req->error_str),
2047229997Sken			 "%s: ctl_add_lun() returned error %d, see dmesg for "
2048229997Sken			"details", __func__, retval);
2049229997Sken		retval = 0;
2050229997Sken		goto bailout_error;
2051229997Sken	}
2052229997Sken
2053229997Sken	mtx_lock(&softc->lock);
2054229997Sken
2055229997Sken	/*
2056229997Sken	 * Tell the config_status routine that we're waiting so it won't
2057229997Sken	 * clean up the LUN in the event of an error.
2058229997Sken	 */
2059229997Sken	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2060229997Sken
2061229997Sken	while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2062229997Sken		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2063229997Sken		if (retval == EINTR)
2064229997Sken			break;
2065229997Sken	}
2066229997Sken	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2067229997Sken
2068229997Sken	if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2069229997Sken		snprintf(req->error_str, sizeof(req->error_str),
2070229997Sken			 "%s: LUN configuration error, see dmesg for details",
2071229997Sken			 __func__);
2072229997Sken		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2073229997Sken			      links);
2074229997Sken		softc->num_luns--;
2075229997Sken		mtx_unlock(&softc->lock);
2076229997Sken		goto bailout_error;
2077229997Sken	} else {
2078229997Sken		params->req_lun_id = be_lun->ctl_be_lun.lun_id;
2079229997Sken	}
2080229997Sken
2081229997Sken	mtx_unlock(&softc->lock);
2082229997Sken
2083229997Sken	be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2084229997Sken					       be_lun->blocksize,
2085229997Sken					       DEVSTAT_ALL_SUPPORTED,
2086229997Sken					       be_lun->ctl_be_lun.lun_type
2087229997Sken					       | DEVSTAT_TYPE_IF_OTHER,
2088229997Sken					       DEVSTAT_PRIORITY_OTHER);
2089229997Sken
2090229997Sken
2091229997Sken	req->status = CTL_LUN_OK;
2092229997Sken
2093229997Sken	return (retval);
2094229997Sken
2095229997Skenbailout_error:
2096229997Sken	req->status = CTL_LUN_ERROR;
2097229997Sken
2098267429Smav	if (be_lun->io_taskqueue != NULL)
2099267429Smav		taskqueue_free(be_lun->io_taskqueue);
2100229997Sken	ctl_be_block_close(be_lun);
2101267429Smav	if (be_lun->dev_path != NULL)
2102267429Smav		free(be_lun->dev_path, M_CTLBLK);
2103267429Smav	if (be_lun->lun_zone != NULL)
2104267429Smav		uma_zdestroy(be_lun->lun_zone);
2105267481Smav	ctl_free_opts(&be_lun->ctl_be_lun);
2106267877Smav	mtx_destroy(&be_lun->queue_lock);
2107267877Smav	mtx_destroy(&be_lun->io_lock);
2108229997Sken	free(be_lun, M_CTLBLK);
2109229997Sken
2110229997Sken	return (retval);
2111229997Sken}
2112229997Sken
2113229997Skenstatic int
2114229997Skenctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2115229997Sken{
2116229997Sken	struct ctl_lun_rm_params *params;
2117229997Sken	struct ctl_be_block_lun *be_lun;
2118229997Sken	int retval;
2119229997Sken
2120229997Sken	params = &req->reqdata.rm;
2121229997Sken
2122229997Sken	mtx_lock(&softc->lock);
2123229997Sken
2124229997Sken	be_lun = NULL;
2125229997Sken
2126229997Sken	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2127229997Sken		if (be_lun->ctl_be_lun.lun_id == params->lun_id)
2128229997Sken			break;
2129229997Sken	}
2130229997Sken	mtx_unlock(&softc->lock);
2131229997Sken
2132229997Sken	if (be_lun == NULL) {
2133229997Sken		snprintf(req->error_str, sizeof(req->error_str),
2134229997Sken			 "%s: LUN %u is not managed by the block backend",
2135229997Sken			 __func__, params->lun_id);
2136229997Sken		goto bailout_error;
2137229997Sken	}
2138229997Sken
2139229997Sken	retval = ctl_disable_lun(&be_lun->ctl_be_lun);
2140229997Sken
2141229997Sken	if (retval != 0) {
2142229997Sken		snprintf(req->error_str, sizeof(req->error_str),
2143229997Sken			 "%s: error %d returned from ctl_disable_lun() for "
2144229997Sken			 "LUN %d", __func__, retval, params->lun_id);
2145229997Sken		goto bailout_error;
2146229997Sken
2147229997Sken	}
2148229997Sken
2149229997Sken	retval = ctl_invalidate_lun(&be_lun->ctl_be_lun);
2150229997Sken	if (retval != 0) {
2151229997Sken		snprintf(req->error_str, sizeof(req->error_str),
2152229997Sken			 "%s: error %d returned from ctl_invalidate_lun() for "
2153229997Sken			 "LUN %d", __func__, retval, params->lun_id);
2154229997Sken		goto bailout_error;
2155229997Sken	}
2156229997Sken
2157229997Sken	mtx_lock(&softc->lock);
2158229997Sken
2159229997Sken	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2160229997Sken
2161229997Sken	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2162229997Sken                retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2163229997Sken                if (retval == EINTR)
2164229997Sken                        break;
2165229997Sken        }
2166229997Sken
2167229997Sken	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2168229997Sken
2169229997Sken	if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2170229997Sken		snprintf(req->error_str, sizeof(req->error_str),
2171229997Sken			 "%s: interrupted waiting for LUN to be freed",
2172229997Sken			 __func__);
2173229997Sken		mtx_unlock(&softc->lock);
2174229997Sken		goto bailout_error;
2175229997Sken	}
2176229997Sken
2177229997Sken	STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2178229997Sken
2179229997Sken	softc->num_luns--;
2180229997Sken	mtx_unlock(&softc->lock);
2181229997Sken
2182229997Sken	taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task);
2183229997Sken
2184229997Sken	taskqueue_free(be_lun->io_taskqueue);
2185229997Sken
2186229997Sken	ctl_be_block_close(be_lun);
2187229997Sken
2188229997Sken	if (be_lun->disk_stats != NULL)
2189229997Sken		devstat_remove_entry(be_lun->disk_stats);
2190229997Sken
2191229997Sken	uma_zdestroy(be_lun->lun_zone);
2192229997Sken
2193267481Smav	ctl_free_opts(&be_lun->ctl_be_lun);
2194229997Sken	free(be_lun->dev_path, M_CTLBLK);
2195267877Smav	mtx_destroy(&be_lun->queue_lock);
2196267877Smav	mtx_destroy(&be_lun->io_lock);
2197229997Sken	free(be_lun, M_CTLBLK);
2198229997Sken
2199229997Sken	req->status = CTL_LUN_OK;
2200229997Sken
2201229997Sken	return (0);
2202229997Sken
2203229997Skenbailout_error:
2204229997Sken
2205229997Sken	req->status = CTL_LUN_ERROR;
2206229997Sken
2207229997Sken	return (0);
2208229997Sken}
2209229997Sken
2210232604Straszstatic int
2211232604Straszctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
2212232604Strasz			 struct ctl_lun_req *req)
2213232604Strasz{
2214232604Strasz	struct vattr vattr;
2215232604Strasz	int error;
2216232604Strasz	struct ctl_lun_modify_params *params;
2217232604Strasz
2218232604Strasz	params = &req->reqdata.modify;
2219232604Strasz
2220232604Strasz	if (params->lun_size_bytes != 0) {
2221232604Strasz		be_lun->size_bytes = params->lun_size_bytes;
2222232604Strasz	} else  {
2223232604Strasz		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
2224232604Strasz		if (error != 0) {
2225232604Strasz			snprintf(req->error_str, sizeof(req->error_str),
2226232604Strasz				 "error calling VOP_GETATTR() for file %s",
2227232604Strasz				 be_lun->dev_path);
2228232604Strasz			return (error);
2229232604Strasz		}
2230232604Strasz
2231232604Strasz		be_lun->size_bytes = vattr.va_size;
2232232604Strasz	}
2233232604Strasz
2234232604Strasz	return (0);
2235232604Strasz}
2236232604Strasz
2237232604Straszstatic int
2238232604Straszctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
2239232604Strasz			struct ctl_lun_req *req)
2240232604Strasz{
2241232604Strasz	struct cdev *dev;
2242232604Strasz	struct cdevsw *devsw;
2243232604Strasz	int error;
2244232604Strasz	struct ctl_lun_modify_params *params;
2245232604Strasz	uint64_t size_bytes;
2246232604Strasz
2247232604Strasz	params = &req->reqdata.modify;
2248232604Strasz
2249232604Strasz	dev = be_lun->vn->v_rdev;
2250232604Strasz	devsw = dev->si_devsw;
2251232604Strasz	if (!devsw->d_ioctl) {
2252232604Strasz		snprintf(req->error_str, sizeof(req->error_str),
2253232604Strasz			 "%s: no d_ioctl for device %s!", __func__,
2254232604Strasz			 be_lun->dev_path);
2255232604Strasz		return (ENODEV);
2256232604Strasz	}
2257232604Strasz
2258232604Strasz	error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
2259232604Strasz			       (caddr_t)&size_bytes, FREAD,
2260232604Strasz			       curthread);
2261232604Strasz	if (error) {
2262232604Strasz		snprintf(req->error_str, sizeof(req->error_str),
2263232604Strasz			 "%s: error %d returned for DIOCGMEDIASIZE ioctl "
2264232604Strasz			 "on %s!", __func__, error, be_lun->dev_path);
2265232604Strasz		return (error);
2266232604Strasz	}
2267232604Strasz
2268232604Strasz	if (params->lun_size_bytes != 0) {
2269232604Strasz		if (params->lun_size_bytes > size_bytes) {
2270232604Strasz			snprintf(req->error_str, sizeof(req->error_str),
2271232604Strasz				 "%s: requested LUN size %ju > backing device "
2272232604Strasz				 "size %ju", __func__,
2273232604Strasz				 (uintmax_t)params->lun_size_bytes,
2274232604Strasz				 (uintmax_t)size_bytes);
2275232604Strasz			return (EINVAL);
2276232604Strasz		}
2277232604Strasz
2278232604Strasz		be_lun->size_bytes = params->lun_size_bytes;
2279232604Strasz	} else {
2280232604Strasz		be_lun->size_bytes = size_bytes;
2281232604Strasz	}
2282232604Strasz
2283232604Strasz	return (0);
2284232604Strasz}
2285232604Strasz
2286232604Straszstatic int
2287232604Straszctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2288232604Strasz{
2289232604Strasz	struct ctl_lun_modify_params *params;
2290232604Strasz	struct ctl_be_block_lun *be_lun;
2291241896Skib	int error;
2292232604Strasz
2293232604Strasz	params = &req->reqdata.modify;
2294232604Strasz
2295232604Strasz	mtx_lock(&softc->lock);
2296232604Strasz
2297232604Strasz	be_lun = NULL;
2298232604Strasz
2299232604Strasz	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2300232604Strasz		if (be_lun->ctl_be_lun.lun_id == params->lun_id)
2301232604Strasz			break;
2302232604Strasz	}
2303232604Strasz	mtx_unlock(&softc->lock);
2304232604Strasz
2305232604Strasz	if (be_lun == NULL) {
2306232604Strasz		snprintf(req->error_str, sizeof(req->error_str),
2307232604Strasz			 "%s: LUN %u is not managed by the block backend",
2308232604Strasz			 __func__, params->lun_id);
2309232604Strasz		goto bailout_error;
2310232604Strasz	}
2311232604Strasz
2312232604Strasz	if (params->lun_size_bytes != 0) {
2313232604Strasz		if (params->lun_size_bytes < be_lun->blocksize) {
2314232604Strasz			snprintf(req->error_str, sizeof(req->error_str),
2315232604Strasz				"%s: LUN size %ju < blocksize %u", __func__,
2316232604Strasz				params->lun_size_bytes, be_lun->blocksize);
2317232604Strasz			goto bailout_error;
2318232604Strasz		}
2319232604Strasz	}
2320232604Strasz
2321232604Strasz	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2322232604Strasz
2323232604Strasz	if (be_lun->vn->v_type == VREG)
2324232604Strasz		error = ctl_be_block_modify_file(be_lun, req);
2325232604Strasz	else
2326232604Strasz		error = ctl_be_block_modify_dev(be_lun, req);
2327232604Strasz
2328232604Strasz	VOP_UNLOCK(be_lun->vn, 0);
2329232604Strasz
2330232604Strasz	if (error != 0)
2331232604Strasz		goto bailout_error;
2332232604Strasz
2333232604Strasz	be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift;
2334232604Strasz
2335232604Strasz	/*
2336232604Strasz	 * The maximum LBA is the size - 1.
2337232604Strasz	 *
2338232604Strasz	 * XXX: Note that this field is being updated without locking,
2339232604Strasz	 * 	which might cause problems on 32-bit architectures.
2340232604Strasz	 */
2341232604Strasz	be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1;
2342232604Strasz	ctl_lun_capacity_changed(&be_lun->ctl_be_lun);
2343232604Strasz
2344232604Strasz	/* Tell the user the exact size we ended up using */
2345232604Strasz	params->lun_size_bytes = be_lun->size_bytes;
2346232604Strasz
2347232604Strasz	req->status = CTL_LUN_OK;
2348232604Strasz
2349232604Strasz	return (0);
2350232604Strasz
2351232604Straszbailout_error:
2352232604Strasz	req->status = CTL_LUN_ERROR;
2353232604Strasz
2354232604Strasz	return (0);
2355232604Strasz}
2356232604Strasz
2357229997Skenstatic void
2358229997Skenctl_be_block_lun_shutdown(void *be_lun)
2359229997Sken{
2360229997Sken	struct ctl_be_block_lun *lun;
2361229997Sken	struct ctl_be_block_softc *softc;
2362229997Sken
2363229997Sken	lun = (struct ctl_be_block_lun *)be_lun;
2364229997Sken
2365229997Sken	softc = lun->softc;
2366229997Sken
2367229997Sken	mtx_lock(&softc->lock);
2368229997Sken	lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2369229997Sken	if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2370229997Sken		wakeup(lun);
2371229997Sken	mtx_unlock(&softc->lock);
2372229997Sken
2373229997Sken}
2374229997Sken
2375229997Skenstatic void
2376229997Skenctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2377229997Sken{
2378229997Sken	struct ctl_be_block_lun *lun;
2379229997Sken	struct ctl_be_block_softc *softc;
2380229997Sken
2381229997Sken	lun = (struct ctl_be_block_lun *)be_lun;
2382229997Sken	softc = lun->softc;
2383229997Sken
2384229997Sken	if (status == CTL_LUN_CONFIG_OK) {
2385229997Sken		mtx_lock(&softc->lock);
2386229997Sken		lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2387229997Sken		if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2388229997Sken			wakeup(lun);
2389229997Sken		mtx_unlock(&softc->lock);
2390229997Sken
2391229997Sken		/*
2392229997Sken		 * We successfully added the LUN, attempt to enable it.
2393229997Sken		 */
2394229997Sken		if (ctl_enable_lun(&lun->ctl_be_lun) != 0) {
2395229997Sken			printf("%s: ctl_enable_lun() failed!\n", __func__);
2396229997Sken			if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) {
2397229997Sken				printf("%s: ctl_invalidate_lun() failed!\n",
2398229997Sken				       __func__);
2399229997Sken			}
2400229997Sken		}
2401229997Sken
2402229997Sken		return;
2403229997Sken	}
2404229997Sken
2405229997Sken
2406229997Sken	mtx_lock(&softc->lock);
2407229997Sken	lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2408229997Sken	lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2409229997Sken	wakeup(lun);
2410229997Sken	mtx_unlock(&softc->lock);
2411229997Sken}
2412229997Sken
2413229997Sken
2414229997Skenstatic int
2415229997Skenctl_be_block_config_write(union ctl_io *io)
2416229997Sken{
2417229997Sken	struct ctl_be_block_lun *be_lun;
2418229997Sken	struct ctl_be_lun *ctl_be_lun;
2419229997Sken	int retval;
2420229997Sken
2421229997Sken	retval = 0;
2422229997Sken
2423229997Sken	DPRINTF("entered\n");
2424229997Sken
2425229997Sken	ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2426229997Sken		CTL_PRIV_BACKEND_LUN].ptr;
2427229997Sken	be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
2428229997Sken
2429229997Sken	switch (io->scsiio.cdb[0]) {
2430229997Sken	case SYNCHRONIZE_CACHE:
2431229997Sken	case SYNCHRONIZE_CACHE_16:
2432264274Smav	case WRITE_SAME_10:
2433264274Smav	case WRITE_SAME_16:
2434264274Smav	case UNMAP:
2435229997Sken		/*
2436229997Sken		 * The upper level CTL code will filter out any CDBs with
2437229997Sken		 * the immediate bit set and return the proper error.
2438229997Sken		 *
2439229997Sken		 * We don't really need to worry about what LBA range the
2440229997Sken		 * user asked to be synced out.  When they issue a sync
2441229997Sken		 * cache command, we'll sync out the whole thing.
2442229997Sken		 */
2443267877Smav		mtx_lock(&be_lun->queue_lock);
2444229997Sken		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2445229997Sken				   links);
2446267877Smav		mtx_unlock(&be_lun->queue_lock);
2447229997Sken		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2448229997Sken		break;
2449229997Sken	case START_STOP_UNIT: {
2450229997Sken		struct scsi_start_stop_unit *cdb;
2451229997Sken
2452229997Sken		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2453229997Sken
2454229997Sken		if (cdb->how & SSS_START)
2455229997Sken			retval = ctl_start_lun(ctl_be_lun);
2456229997Sken		else {
2457229997Sken			retval = ctl_stop_lun(ctl_be_lun);
2458229997Sken			/*
2459229997Sken			 * XXX KDM Copan-specific offline behavior.
2460229997Sken			 * Figure out a reasonable way to port this?
2461229997Sken			 */
2462229997Sken#ifdef NEEDTOPORT
2463229997Sken			if ((retval == 0)
2464229997Sken			 && (cdb->byte2 & SSS_ONOFFLINE))
2465229997Sken				retval = ctl_lun_offline(ctl_be_lun);
2466229997Sken#endif
2467229997Sken		}
2468229997Sken
2469229997Sken		/*
2470229997Sken		 * In general, the above routines should not fail.  They
2471229997Sken		 * just set state for the LUN.  So we've got something
2472229997Sken		 * pretty wrong here if we can't start or stop the LUN.
2473229997Sken		 */
2474229997Sken		if (retval != 0) {
2475229997Sken			ctl_set_internal_failure(&io->scsiio,
2476229997Sken						 /*sks_valid*/ 1,
2477229997Sken						 /*retry_count*/ 0xf051);
2478229997Sken			retval = CTL_RETVAL_COMPLETE;
2479229997Sken		} else {
2480229997Sken			ctl_set_success(&io->scsiio);
2481229997Sken		}
2482229997Sken		ctl_config_write_done(io);
2483229997Sken		break;
2484229997Sken	}
2485229997Sken	default:
2486229997Sken		ctl_set_invalid_opcode(&io->scsiio);
2487229997Sken		ctl_config_write_done(io);
2488229997Sken		retval = CTL_RETVAL_COMPLETE;
2489229997Sken		break;
2490229997Sken	}
2491229997Sken
2492229997Sken	return (retval);
2493229997Sken
2494229997Sken}
2495229997Sken
2496229997Skenstatic int
2497229997Skenctl_be_block_config_read(union ctl_io *io)
2498229997Sken{
2499229997Sken	return (0);
2500229997Sken}
2501229997Sken
2502229997Skenstatic int
2503229997Skenctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2504229997Sken{
2505229997Sken	struct ctl_be_block_lun *lun;
2506229997Sken	int retval;
2507229997Sken
2508229997Sken	lun = (struct ctl_be_block_lun *)be_lun;
2509229997Sken	retval = 0;
2510229997Sken
2511229997Sken	retval = sbuf_printf(sb, "<num_threads>");
2512229997Sken
2513229997Sken	if (retval != 0)
2514229997Sken		goto bailout;
2515229997Sken
2516229997Sken	retval = sbuf_printf(sb, "%d", lun->num_threads);
2517229997Sken
2518229997Sken	if (retval != 0)
2519229997Sken		goto bailout;
2520229997Sken
2521229997Sken	retval = sbuf_printf(sb, "</num_threads>");
2522229997Sken
2523229997Skenbailout:
2524229997Sken
2525229997Sken	return (retval);
2526229997Sken}
2527229997Sken
2528229997Skenint
2529229997Skenctl_be_block_init(void)
2530229997Sken{
2531229997Sken	struct ctl_be_block_softc *softc;
2532229997Sken	int retval;
2533229997Sken
2534229997Sken	softc = &backend_block_softc;
2535229997Sken	retval = 0;
2536229997Sken
2537267877Smav	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2538264020Strasz	beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2539264020Strasz	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2540229997Sken	STAILQ_INIT(&softc->disk_list);
2541229997Sken	STAILQ_INIT(&softc->lun_list);
2542229997Sken
2543229997Sken	return (retval);
2544229997Sken}
2545