ctl_backend_block.c revision 312835
1/*-
2 * Copyright (c) 2003 Silicon Graphics International Corp.
3 * Copyright (c) 2009-2011 Spectra Logic Corporation
4 * Copyright (c) 2012 The FreeBSD Foundation
5 * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Portions of this software were developed by Edward Tomasz Napierala
9 * under sponsorship from the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions, and the following disclaimer,
16 *    without modification.
17 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
18 *    substantially similar to the "NO WARRANTY" disclaimer below
19 *    ("Disclaimer") and any redistribution must be conditioned upon
20 *    including a substantially similar Disclaimer requirement for further
21 *    binary redistribution.
22 *
23 * NO WARRANTY
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
27 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGES.
35 *
36 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
37 */
38/*
39 * CAM Target Layer driver backend for block devices.
40 *
41 * Author: Ken Merry <ken@FreeBSD.org>
42 */
43#include <sys/cdefs.h>
44__FBSDID("$FreeBSD: stable/10/sys/cam/ctl/ctl_backend_block.c 312835 2017-01-26 20:50:01Z mav $");
45
46#include <opt_kdtrace.h>
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kernel.h>
51#include <sys/types.h>
52#include <sys/kthread.h>
53#include <sys/bio.h>
54#include <sys/fcntl.h>
55#include <sys/limits.h>
56#include <sys/lock.h>
57#include <sys/mutex.h>
58#include <sys/condvar.h>
59#include <sys/malloc.h>
60#include <sys/conf.h>
61#include <sys/ioccom.h>
62#include <sys/queue.h>
63#include <sys/sbuf.h>
64#include <sys/endian.h>
65#include <sys/uio.h>
66#include <sys/buf.h>
67#include <sys/taskqueue.h>
68#include <sys/vnode.h>
69#include <sys/namei.h>
70#include <sys/mount.h>
71#include <sys/disk.h>
72#include <sys/fcntl.h>
73#include <sys/filedesc.h>
74#include <sys/filio.h>
75#include <sys/proc.h>
76#include <sys/pcpu.h>
77#include <sys/module.h>
78#include <sys/sdt.h>
79#include <sys/devicestat.h>
80#include <sys/sysctl.h>
81
82#include <geom/geom.h>
83
84#include <cam/cam.h>
85#include <cam/scsi/scsi_all.h>
86#include <cam/scsi/scsi_da.h>
87#include <cam/ctl/ctl_io.h>
88#include <cam/ctl/ctl.h>
89#include <cam/ctl/ctl_backend.h>
90#include <cam/ctl/ctl_ioctl.h>
91#include <cam/ctl/ctl_ha.h>
92#include <cam/ctl/ctl_scsi_all.h>
93#include <cam/ctl/ctl_private.h>
94#include <cam/ctl/ctl_error.h>
95
96/*
97 * The idea here is that we'll allocate enough S/G space to hold a 1MB
98 * I/O.  If we get an I/O larger than that, we'll split it.
99 */
100#define	CTLBLK_HALF_IO_SIZE	(512 * 1024)
101#define	CTLBLK_MAX_IO_SIZE	(CTLBLK_HALF_IO_SIZE * 2)
102#define	CTLBLK_MAX_SEG		MAXPHYS
103#define	CTLBLK_HALF_SEGS	MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
104#define	CTLBLK_MAX_SEGS		(CTLBLK_HALF_SEGS * 2)
105
106#ifdef CTLBLK_DEBUG
107#define DPRINTF(fmt, args...) \
108    printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
109#else
110#define DPRINTF(fmt, args...) do {} while(0)
111#endif
112
113#define PRIV(io)	\
114    ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
115#define ARGS(io)	\
116    ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
117
118SDT_PROVIDER_DEFINE(cbb);
119
120typedef enum {
121	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
122	CTL_BE_BLOCK_LUN_CONFIG_ERR	= 0x02,
123	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
124} ctl_be_block_lun_flags;
125
126typedef enum {
127	CTL_BE_BLOCK_NONE,
128	CTL_BE_BLOCK_DEV,
129	CTL_BE_BLOCK_FILE
130} ctl_be_block_type;
131
132struct ctl_be_block_filedata {
133	struct ucred *cred;
134};
135
136union ctl_be_block_bedata {
137	struct ctl_be_block_filedata file;
138};
139
140struct ctl_be_block_io;
141struct ctl_be_block_lun;
142
143typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
144			       struct ctl_be_block_io *beio);
145typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
146				  const char *attrname);
147
148/*
149 * Backend LUN structure.  There is a 1:1 mapping between a block device
150 * and a backend block LUN, and between a backend block LUN and a CTL LUN.
151 */
152struct ctl_be_block_lun {
153	struct ctl_lun_create_params params;
154	char lunname[32];
155	char *dev_path;
156	ctl_be_block_type dev_type;
157	struct vnode *vn;
158	union ctl_be_block_bedata backend;
159	cbb_dispatch_t dispatch;
160	cbb_dispatch_t lun_flush;
161	cbb_dispatch_t unmap;
162	cbb_dispatch_t get_lba_status;
163	cbb_getattr_t getattr;
164	uma_zone_t lun_zone;
165	uint64_t size_blocks;
166	uint64_t size_bytes;
167	struct ctl_be_block_softc *softc;
168	struct devstat *disk_stats;
169	ctl_be_block_lun_flags flags;
170	STAILQ_ENTRY(ctl_be_block_lun) links;
171	struct ctl_be_lun cbe_lun;
172	struct taskqueue *io_taskqueue;
173	struct task io_task;
174	int num_threads;
175	STAILQ_HEAD(, ctl_io_hdr) input_queue;
176	STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
177	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
178	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
179	struct mtx_padalign io_lock;
180	struct mtx_padalign queue_lock;
181};
182
183/*
184 * Overall softc structure for the block backend module.
185 */
186struct ctl_be_block_softc {
187	struct mtx			 lock;
188	int				 num_luns;
189	STAILQ_HEAD(, ctl_be_block_lun)	 lun_list;
190};
191
192static struct ctl_be_block_softc backend_block_softc;
193
194/*
195 * Per-I/O information.
196 */
197struct ctl_be_block_io {
198	union ctl_io			*io;
199	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
200	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
201	int				bio_cmd;
202	int				num_segs;
203	int				num_bios_sent;
204	int				num_bios_done;
205	int				send_complete;
206	int				first_error;
207	uint64_t			first_error_offset;
208	struct bintime			ds_t0;
209	devstat_tag_type		ds_tag_type;
210	devstat_trans_flags		ds_trans_type;
211	uint64_t			io_len;
212	uint64_t			io_offset;
213	int				io_arg;
214	struct ctl_be_block_softc	*softc;
215	struct ctl_be_block_lun		*lun;
216	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
217};
218
219extern struct ctl_softc *control_softc;
220
221static int cbb_num_threads = 14;
222TUNABLE_INT("kern.cam.ctl.block.num_threads", &cbb_num_threads);
223SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
224	    "CAM Target Layer Block Backend");
225SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RW,
226           &cbb_num_threads, 0, "Number of threads per backing file");
227
228static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
229static void ctl_free_beio(struct ctl_be_block_io *beio);
230static void ctl_complete_beio(struct ctl_be_block_io *beio);
231static int ctl_be_block_move_done(union ctl_io *io);
232static void ctl_be_block_biodone(struct bio *bio);
233static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
234				    struct ctl_be_block_io *beio);
235static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
236				       struct ctl_be_block_io *beio);
237static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
238				  struct ctl_be_block_io *beio);
239static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
240					 const char *attrname);
241static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
242				   struct ctl_be_block_io *beio);
243static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
244				   struct ctl_be_block_io *beio);
245static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
246				      struct ctl_be_block_io *beio);
247static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
248					 const char *attrname);
249static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
250				    union ctl_io *io);
251static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
252				    union ctl_io *io);
253static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
254				  union ctl_io *io);
255static void ctl_be_block_worker(void *context, int pending);
256static int ctl_be_block_submit(union ctl_io *io);
257static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
258				   int flag, struct thread *td);
259static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
260				  struct ctl_lun_req *req);
261static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
262				 struct ctl_lun_req *req);
263static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
264static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
265			     struct ctl_lun_req *req);
266static int ctl_be_block_create(struct ctl_be_block_softc *softc,
267			       struct ctl_lun_req *req);
268static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
269			   struct ctl_lun_req *req);
270static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
271			   struct ctl_lun_req *req);
272static void ctl_be_block_lun_shutdown(void *be_lun);
273static void ctl_be_block_lun_config_status(void *be_lun,
274					   ctl_lun_config_status status);
275static int ctl_be_block_config_write(union ctl_io *io);
276static int ctl_be_block_config_read(union ctl_io *io);
277static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
278static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname);
279int ctl_be_block_init(void);
280
281static struct ctl_backend_driver ctl_be_block_driver =
282{
283	.name = "block",
284	.flags = CTL_BE_FLAG_HAS_CONFIG,
285	.init = ctl_be_block_init,
286	.data_submit = ctl_be_block_submit,
287	.data_move_done = ctl_be_block_move_done,
288	.config_read = ctl_be_block_config_read,
289	.config_write = ctl_be_block_config_write,
290	.ioctl = ctl_be_block_ioctl,
291	.lun_info = ctl_be_block_lun_info,
292	.lun_attr = ctl_be_block_lun_attr
293};
294
295MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
296CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
297
298static uma_zone_t beio_zone;
299
300static struct ctl_be_block_io *
301ctl_alloc_beio(struct ctl_be_block_softc *softc)
302{
303	struct ctl_be_block_io *beio;
304
305	beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
306	beio->softc = softc;
307	return (beio);
308}
309
310static void
311ctl_free_beio(struct ctl_be_block_io *beio)
312{
313	int duplicate_free;
314	int i;
315
316	duplicate_free = 0;
317
318	for (i = 0; i < beio->num_segs; i++) {
319		if (beio->sg_segs[i].addr == NULL)
320			duplicate_free++;
321
322		uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
323		beio->sg_segs[i].addr = NULL;
324
325		/* For compare we had two equal S/G lists. */
326		if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
327			uma_zfree(beio->lun->lun_zone,
328			    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
329			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
330		}
331	}
332
333	if (duplicate_free > 0) {
334		printf("%s: %d duplicate frees out of %d segments\n", __func__,
335		       duplicate_free, beio->num_segs);
336	}
337
338	uma_zfree(beio_zone, beio);
339}
340
341static void
342ctl_complete_beio(struct ctl_be_block_io *beio)
343{
344	union ctl_io *io = beio->io;
345
346	if (beio->beio_cont != NULL) {
347		beio->beio_cont(beio);
348	} else {
349		ctl_free_beio(beio);
350		ctl_data_submit_done(io);
351	}
352}
353
354static size_t
355cmp(uint8_t *a, uint8_t *b, size_t size)
356{
357	size_t i;
358
359	for (i = 0; i < size; i++) {
360		if (a[i] != b[i])
361			break;
362	}
363	return (i);
364}
365
366static void
367ctl_be_block_compare(union ctl_io *io)
368{
369	struct ctl_be_block_io *beio;
370	uint64_t off, res;
371	int i;
372	uint8_t info[8];
373
374	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
375	off = 0;
376	for (i = 0; i < beio->num_segs; i++) {
377		res = cmp(beio->sg_segs[i].addr,
378		    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
379		    beio->sg_segs[i].len);
380		off += res;
381		if (res < beio->sg_segs[i].len)
382			break;
383	}
384	if (i < beio->num_segs) {
385		scsi_u64to8b(off, info);
386		ctl_set_sense(&io->scsiio, /*current_error*/ 1,
387		    /*sense_key*/ SSD_KEY_MISCOMPARE,
388		    /*asc*/ 0x1D, /*ascq*/ 0x00,
389		    /*type*/ SSD_ELEM_INFO,
390		    /*size*/ sizeof(info), /*data*/ &info,
391		    /*type*/ SSD_ELEM_NONE);
392	} else
393		ctl_set_success(&io->scsiio);
394}
395
396static int
397ctl_be_block_move_done(union ctl_io *io)
398{
399	struct ctl_be_block_io *beio;
400	struct ctl_be_block_lun *be_lun;
401	struct ctl_lba_len_flags *lbalen;
402#ifdef CTL_TIME_IO
403	struct bintime cur_bt;
404#endif
405
406	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
407	be_lun = beio->lun;
408
409	DPRINTF("entered\n");
410
411#ifdef CTL_TIME_IO
412	getbinuptime(&cur_bt);
413	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
414	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
415#endif
416	io->io_hdr.num_dmas++;
417	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
418
419	/*
420	 * We set status at this point for read commands, and write
421	 * commands with errors.
422	 */
423	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
424		;
425	} else if ((io->io_hdr.port_status == 0) &&
426	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
427		lbalen = ARGS(beio->io);
428		if (lbalen->flags & CTL_LLF_READ) {
429			ctl_set_success(&io->scsiio);
430		} else if (lbalen->flags & CTL_LLF_COMPARE) {
431			/* We have two data blocks ready for comparison. */
432			ctl_be_block_compare(io);
433		}
434	} else if ((io->io_hdr.port_status != 0) &&
435	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
436	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
437		/*
438		 * For hardware error sense keys, the sense key
439		 * specific value is defined to be a retry count,
440		 * but we use it to pass back an internal FETD
441		 * error code.  XXX KDM  Hopefully the FETD is only
442		 * using 16 bits for an error code, since that's
443		 * all the space we have in the sks field.
444		 */
445		ctl_set_internal_failure(&io->scsiio,
446					 /*sks_valid*/ 1,
447					 /*retry_count*/
448					 io->io_hdr.port_status);
449	}
450
451	/*
452	 * If this is a read, or a write with errors, it is done.
453	 */
454	if ((beio->bio_cmd == BIO_READ)
455	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
456	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
457		ctl_complete_beio(beio);
458		return (0);
459	}
460
461	/*
462	 * At this point, we have a write and the DMA completed
463	 * successfully.  We now have to queue it to the task queue to
464	 * execute the backend I/O.  That is because we do blocking
465	 * memory allocations, and in the file backing case, blocking I/O.
466	 * This move done routine is generally called in the SIM's
467	 * interrupt context, and therefore we cannot block.
468	 */
469	mtx_lock(&be_lun->queue_lock);
470	STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
471	mtx_unlock(&be_lun->queue_lock);
472	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
473
474	return (0);
475}
476
477static void
478ctl_be_block_biodone(struct bio *bio)
479{
480	struct ctl_be_block_io *beio;
481	struct ctl_be_block_lun *be_lun;
482	union ctl_io *io;
483	int error;
484
485	beio = bio->bio_caller1;
486	be_lun = beio->lun;
487	io = beio->io;
488
489	DPRINTF("entered\n");
490
491	error = bio->bio_error;
492	mtx_lock(&be_lun->io_lock);
493	if (error != 0 &&
494	    (beio->first_error == 0 ||
495	     bio->bio_offset < beio->first_error_offset)) {
496		beio->first_error = error;
497		beio->first_error_offset = bio->bio_offset;
498	}
499
500	beio->num_bios_done++;
501
502	/*
503	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
504	 * during the free might cause it to complain.
505	 */
506	g_destroy_bio(bio);
507
508	/*
509	 * If the send complete bit isn't set, or we aren't the last I/O to
510	 * complete, then we're done.
511	 */
512	if ((beio->send_complete == 0)
513	 || (beio->num_bios_done < beio->num_bios_sent)) {
514		mtx_unlock(&be_lun->io_lock);
515		return;
516	}
517
518	/*
519	 * At this point, we've verified that we are the last I/O to
520	 * complete, so it's safe to drop the lock.
521	 */
522	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
523	    beio->ds_tag_type, beio->ds_trans_type,
524	    /*now*/ NULL, /*then*/&beio->ds_t0);
525	mtx_unlock(&be_lun->io_lock);
526
527	/*
528	 * If there are any errors from the backing device, we fail the
529	 * entire I/O with a medium error.
530	 */
531	error = beio->first_error;
532	if (error != 0) {
533		if (error == EOPNOTSUPP) {
534			ctl_set_invalid_opcode(&io->scsiio);
535		} else if (error == ENOSPC || error == EDQUOT) {
536			ctl_set_space_alloc_fail(&io->scsiio);
537		} else if (error == EROFS || error == EACCES) {
538			ctl_set_hw_write_protected(&io->scsiio);
539		} else if (beio->bio_cmd == BIO_FLUSH) {
540			/* XXX KDM is there is a better error here? */
541			ctl_set_internal_failure(&io->scsiio,
542						 /*sks_valid*/ 1,
543						 /*retry_count*/ 0xbad2);
544		} else {
545			ctl_set_medium_error(&io->scsiio,
546			    beio->bio_cmd == BIO_READ);
547		}
548		ctl_complete_beio(beio);
549		return;
550	}
551
552	/*
553	 * If this is a write, a flush, a delete or verify, we're all done.
554	 * If this is a read, we can now send the data to the user.
555	 */
556	if ((beio->bio_cmd == BIO_WRITE)
557	 || (beio->bio_cmd == BIO_FLUSH)
558	 || (beio->bio_cmd == BIO_DELETE)
559	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
560		ctl_set_success(&io->scsiio);
561		ctl_complete_beio(beio);
562	} else {
563		if ((ARGS(io)->flags & CTL_LLF_READ) &&
564		    beio->beio_cont == NULL) {
565			ctl_set_success(&io->scsiio);
566			ctl_serseq_done(io);
567		}
568#ifdef CTL_TIME_IO
569		getbinuptime(&io->io_hdr.dma_start_bt);
570#endif
571		ctl_datamove(io);
572	}
573}
574
575static void
576ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
577			struct ctl_be_block_io *beio)
578{
579	union ctl_io *io = beio->io;
580	struct mount *mountpoint;
581	int error, lock_flags;
582
583	DPRINTF("entered\n");
584
585	binuptime(&beio->ds_t0);
586	mtx_lock(&be_lun->io_lock);
587	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
588	mtx_unlock(&be_lun->io_lock);
589
590	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
591
592	if (MNT_SHARED_WRITES(mountpoint) ||
593	    ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
594		lock_flags = LK_SHARED;
595	else
596		lock_flags = LK_EXCLUSIVE;
597	vn_lock(be_lun->vn, lock_flags | LK_RETRY);
598	error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
599	    curthread);
600	VOP_UNLOCK(be_lun->vn, 0);
601
602	vn_finished_write(mountpoint);
603
604	mtx_lock(&be_lun->io_lock);
605	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
606	    beio->ds_tag_type, beio->ds_trans_type,
607	    /*now*/ NULL, /*then*/&beio->ds_t0);
608	mtx_unlock(&be_lun->io_lock);
609
610	if (error == 0)
611		ctl_set_success(&io->scsiio);
612	else {
613		/* XXX KDM is there is a better error here? */
614		ctl_set_internal_failure(&io->scsiio,
615					 /*sks_valid*/ 1,
616					 /*retry_count*/ 0xbad1);
617	}
618
619	ctl_complete_beio(beio);
620}
621
622SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
623SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
624SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
625SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
626
627static void
628ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
629			   struct ctl_be_block_io *beio)
630{
631	struct ctl_be_block_filedata *file_data;
632	union ctl_io *io;
633	struct uio xuio;
634	struct iovec *xiovec;
635	size_t s;
636	int error, flags, i;
637
638	DPRINTF("entered\n");
639
640	file_data = &be_lun->backend.file;
641	io = beio->io;
642	flags = 0;
643	if (ARGS(io)->flags & CTL_LLF_DPO)
644		flags |= IO_DIRECT;
645	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
646		flags |= IO_SYNC;
647
648	bzero(&xuio, sizeof(xuio));
649	if (beio->bio_cmd == BIO_READ) {
650		SDT_PROBE0(cbb, , read, file_start);
651		xuio.uio_rw = UIO_READ;
652	} else {
653		SDT_PROBE0(cbb, , write, file_start);
654		xuio.uio_rw = UIO_WRITE;
655	}
656	xuio.uio_offset = beio->io_offset;
657	xuio.uio_resid = beio->io_len;
658	xuio.uio_segflg = UIO_SYSSPACE;
659	xuio.uio_iov = beio->xiovecs;
660	xuio.uio_iovcnt = beio->num_segs;
661	xuio.uio_td = curthread;
662
663	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
664		xiovec->iov_base = beio->sg_segs[i].addr;
665		xiovec->iov_len = beio->sg_segs[i].len;
666	}
667
668	binuptime(&beio->ds_t0);
669	mtx_lock(&be_lun->io_lock);
670	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
671	mtx_unlock(&be_lun->io_lock);
672
673	if (beio->bio_cmd == BIO_READ) {
674		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
675
676		/*
677		 * UFS pays attention to IO_DIRECT for reads.  If the
678		 * DIRECTIO option is configured into the kernel, it calls
679		 * ffs_rawread().  But that only works for single-segment
680		 * uios with user space addresses.  In our case, with a
681		 * kernel uio, it still reads into the buffer cache, but it
682		 * will just try to release the buffer from the cache later
683		 * on in ffs_read().
684		 *
685		 * ZFS does not pay attention to IO_DIRECT for reads.
686		 *
687		 * UFS does not pay attention to IO_SYNC for reads.
688		 *
689		 * ZFS pays attention to IO_SYNC (which translates into the
690		 * Solaris define FRSYNC for zfs_read()) for reads.  It
691		 * attempts to sync the file before reading.
692		 */
693		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
694
695		VOP_UNLOCK(be_lun->vn, 0);
696		SDT_PROBE0(cbb, , read, file_done);
697		if (error == 0 && xuio.uio_resid > 0) {
698			/*
699			 * If we red less then requested (EOF), then
700			 * we should clean the rest of the buffer.
701			 */
702			s = beio->io_len - xuio.uio_resid;
703			for (i = 0; i < beio->num_segs; i++) {
704				if (s >= beio->sg_segs[i].len) {
705					s -= beio->sg_segs[i].len;
706					continue;
707				}
708				bzero((uint8_t *)beio->sg_segs[i].addr + s,
709				    beio->sg_segs[i].len - s);
710				s = 0;
711			}
712		}
713	} else {
714		struct mount *mountpoint;
715		int lock_flags;
716
717		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
718
719		if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL)
720		  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
721			lock_flags = LK_SHARED;
722		else
723			lock_flags = LK_EXCLUSIVE;
724		vn_lock(be_lun->vn, lock_flags | LK_RETRY);
725
726		/*
727		 * UFS pays attention to IO_DIRECT for writes.  The write
728		 * is done asynchronously.  (Normally the write would just
729		 * get put into cache.
730		 *
731		 * UFS pays attention to IO_SYNC for writes.  It will
732		 * attempt to write the buffer out synchronously if that
733		 * flag is set.
734		 *
735		 * ZFS does not pay attention to IO_DIRECT for writes.
736		 *
737		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
738		 * for writes.  It will flush the transaction from the
739		 * cache before returning.
740		 */
741		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
742		VOP_UNLOCK(be_lun->vn, 0);
743
744		vn_finished_write(mountpoint);
745		SDT_PROBE0(cbb, , write, file_done);
746        }
747
748	mtx_lock(&be_lun->io_lock);
749	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
750	    beio->ds_tag_type, beio->ds_trans_type,
751	    /*now*/ NULL, /*then*/&beio->ds_t0);
752	mtx_unlock(&be_lun->io_lock);
753
754	/*
755	 * If we got an error, set the sense data to "MEDIUM ERROR" and
756	 * return the I/O to the user.
757	 */
758	if (error != 0) {
759		if (error == ENOSPC || error == EDQUOT) {
760			ctl_set_space_alloc_fail(&io->scsiio);
761		} else if (error == EROFS || error == EACCES) {
762			ctl_set_hw_write_protected(&io->scsiio);
763		} else {
764			ctl_set_medium_error(&io->scsiio,
765			    beio->bio_cmd == BIO_READ);
766		}
767		ctl_complete_beio(beio);
768		return;
769	}
770
771	/*
772	 * If this is a write or a verify, we're all done.
773	 * If this is a read, we can now send the data to the user.
774	 */
775	if ((beio->bio_cmd == BIO_WRITE) ||
776	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
777		ctl_set_success(&io->scsiio);
778		ctl_complete_beio(beio);
779	} else {
780		if ((ARGS(io)->flags & CTL_LLF_READ) &&
781		    beio->beio_cont == NULL) {
782			ctl_set_success(&io->scsiio);
783			ctl_serseq_done(io);
784		}
785#ifdef CTL_TIME_IO
786		getbinuptime(&io->io_hdr.dma_start_bt);
787#endif
788		ctl_datamove(io);
789	}
790}
791
792static void
793ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
794			struct ctl_be_block_io *beio)
795{
796	union ctl_io *io = beio->io;
797	struct ctl_lba_len_flags *lbalen = ARGS(io);
798	struct scsi_get_lba_status_data *data;
799	off_t roff, off;
800	int error, status;
801
802	DPRINTF("entered\n");
803
804	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
805	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
806	error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
807	    0, curthread->td_ucred, curthread);
808	if (error == 0 && off > roff)
809		status = 0;	/* mapped up to off */
810	else {
811		error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
812		    0, curthread->td_ucred, curthread);
813		if (error == 0 && off > roff)
814			status = 1;	/* deallocated up to off */
815		else {
816			status = 0;	/* unknown up to the end */
817			off = be_lun->size_bytes;
818		}
819	}
820	VOP_UNLOCK(be_lun->vn, 0);
821
822	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
823	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
824	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
825	    lbalen->lba), data->descr[0].length);
826	data->descr[0].status = status;
827
828	ctl_complete_beio(beio);
829}
830
831static uint64_t
832ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
833{
834	struct vattr		vattr;
835	struct statfs		statfs;
836	uint64_t		val;
837	int			error;
838
839	val = UINT64_MAX;
840	if (be_lun->vn == NULL)
841		return (val);
842	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
843	if (strcmp(attrname, "blocksused") == 0) {
844		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
845		if (error == 0)
846			val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
847	}
848	if (strcmp(attrname, "blocksavail") == 0 &&
849	    (be_lun->vn->v_iflag & VI_DOOMED) == 0) {
850		error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
851		if (error == 0)
852			val = statfs.f_bavail * statfs.f_bsize /
853			    be_lun->cbe_lun.blocksize;
854	}
855	VOP_UNLOCK(be_lun->vn, 0);
856	return (val);
857}
858
859static void
860ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
861			   struct ctl_be_block_io *beio)
862{
863	union ctl_io *io;
864	struct cdevsw *csw;
865	struct cdev *dev;
866	struct uio xuio;
867	struct iovec *xiovec;
868	int error, flags, i, ref;
869
870	DPRINTF("entered\n");
871
872	io = beio->io;
873	flags = 0;
874	if (ARGS(io)->flags & CTL_LLF_DPO)
875		flags |= IO_DIRECT;
876	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
877		flags |= IO_SYNC;
878
879	bzero(&xuio, sizeof(xuio));
880	if (beio->bio_cmd == BIO_READ) {
881		SDT_PROBE0(cbb, , read, file_start);
882		xuio.uio_rw = UIO_READ;
883	} else {
884		SDT_PROBE0(cbb, , write, file_start);
885		xuio.uio_rw = UIO_WRITE;
886	}
887	xuio.uio_offset = beio->io_offset;
888	xuio.uio_resid = beio->io_len;
889	xuio.uio_segflg = UIO_SYSSPACE;
890	xuio.uio_iov = beio->xiovecs;
891	xuio.uio_iovcnt = beio->num_segs;
892	xuio.uio_td = curthread;
893
894	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
895		xiovec->iov_base = beio->sg_segs[i].addr;
896		xiovec->iov_len = beio->sg_segs[i].len;
897	}
898
899	binuptime(&beio->ds_t0);
900	mtx_lock(&be_lun->io_lock);
901	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
902	mtx_unlock(&be_lun->io_lock);
903
904	csw = devvn_refthread(be_lun->vn, &dev, &ref);
905	if (csw) {
906		if (beio->bio_cmd == BIO_READ)
907			error = csw->d_read(dev, &xuio, flags);
908		else
909			error = csw->d_write(dev, &xuio, flags);
910		dev_relthread(dev, ref);
911	} else
912		error = ENXIO;
913
914	if (beio->bio_cmd == BIO_READ)
915		SDT_PROBE0(cbb, , read, file_done);
916	else
917		SDT_PROBE0(cbb, , write, file_done);
918
919	mtx_lock(&be_lun->io_lock);
920	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
921	    beio->ds_tag_type, beio->ds_trans_type,
922	    /*now*/ NULL, /*then*/&beio->ds_t0);
923	mtx_unlock(&be_lun->io_lock);
924
925	/*
926	 * If we got an error, set the sense data to "MEDIUM ERROR" and
927	 * return the I/O to the user.
928	 */
929	if (error != 0) {
930		if (error == ENOSPC || error == EDQUOT) {
931			ctl_set_space_alloc_fail(&io->scsiio);
932		} else if (error == EROFS || error == EACCES) {
933			ctl_set_hw_write_protected(&io->scsiio);
934		} else {
935			ctl_set_medium_error(&io->scsiio,
936			    beio->bio_cmd == BIO_READ);
937		}
938		ctl_complete_beio(beio);
939		return;
940	}
941
942	/*
943	 * If this is a write or a verify, we're all done.
944	 * If this is a read, we can now send the data to the user.
945	 */
946	if ((beio->bio_cmd == BIO_WRITE) ||
947	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
948		ctl_set_success(&io->scsiio);
949		ctl_complete_beio(beio);
950	} else {
951		if ((ARGS(io)->flags & CTL_LLF_READ) &&
952		    beio->beio_cont == NULL) {
953			ctl_set_success(&io->scsiio);
954			ctl_serseq_done(io);
955		}
956#ifdef CTL_TIME_IO
957		getbinuptime(&io->io_hdr.dma_start_bt);
958#endif
959		ctl_datamove(io);
960	}
961}
962
963static void
964ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
965			struct ctl_be_block_io *beio)
966{
967	union ctl_io *io = beio->io;
968	struct cdevsw *csw;
969	struct cdev *dev;
970	struct ctl_lba_len_flags *lbalen = ARGS(io);
971	struct scsi_get_lba_status_data *data;
972	off_t roff, off;
973	int error, ref, status;
974
975	DPRINTF("entered\n");
976
977	csw = devvn_refthread(be_lun->vn, &dev, &ref);
978	if (csw == NULL) {
979		status = 0;	/* unknown up to the end */
980		off = be_lun->size_bytes;
981		goto done;
982	}
983	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
984	error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
985	    curthread);
986	if (error == 0 && off > roff)
987		status = 0;	/* mapped up to off */
988	else {
989		error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
990		    curthread);
991		if (error == 0 && off > roff)
992			status = 1;	/* deallocated up to off */
993		else {
994			status = 0;	/* unknown up to the end */
995			off = be_lun->size_bytes;
996		}
997	}
998	dev_relthread(dev, ref);
999
1000done:
1001	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
1002	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
1003	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
1004	    lbalen->lba), data->descr[0].length);
1005	data->descr[0].status = status;
1006
1007	ctl_complete_beio(beio);
1008}
1009
1010static void
1011ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1012		       struct ctl_be_block_io *beio)
1013{
1014	struct bio *bio;
1015	struct cdevsw *csw;
1016	struct cdev *dev;
1017	int ref;
1018
1019	DPRINTF("entered\n");
1020
1021	/* This can't fail, it's a blocking allocation. */
1022	bio = g_alloc_bio();
1023
1024	bio->bio_cmd	    = BIO_FLUSH;
1025	bio->bio_offset	    = 0;
1026	bio->bio_data	    = 0;
1027	bio->bio_done	    = ctl_be_block_biodone;
1028	bio->bio_caller1    = beio;
1029	bio->bio_pblkno	    = 0;
1030
1031	/*
1032	 * We don't need to acquire the LUN lock here, because we are only
1033	 * sending one bio, and so there is no other context to synchronize
1034	 * with.
1035	 */
1036	beio->num_bios_sent = 1;
1037	beio->send_complete = 1;
1038
1039	binuptime(&beio->ds_t0);
1040	mtx_lock(&be_lun->io_lock);
1041	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1042	mtx_unlock(&be_lun->io_lock);
1043
1044	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1045	if (csw) {
1046		bio->bio_dev = dev;
1047		csw->d_strategy(bio);
1048		dev_relthread(dev, ref);
1049	} else {
1050		bio->bio_error = ENXIO;
1051		ctl_be_block_biodone(bio);
1052	}
1053}
1054
1055static void
1056ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1057		       struct ctl_be_block_io *beio,
1058		       uint64_t off, uint64_t len, int last)
1059{
1060	struct bio *bio;
1061	uint64_t maxlen;
1062	struct cdevsw *csw;
1063	struct cdev *dev;
1064	int ref;
1065
1066	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1067	maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1068	while (len > 0) {
1069		bio = g_alloc_bio();
1070		bio->bio_cmd	    = BIO_DELETE;
1071		bio->bio_dev	    = dev;
1072		bio->bio_offset	    = off;
1073		bio->bio_length	    = MIN(len, maxlen);
1074		bio->bio_data	    = 0;
1075		bio->bio_done	    = ctl_be_block_biodone;
1076		bio->bio_caller1    = beio;
1077		bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1078
1079		off += bio->bio_length;
1080		len -= bio->bio_length;
1081
1082		mtx_lock(&be_lun->io_lock);
1083		beio->num_bios_sent++;
1084		if (last && len == 0)
1085			beio->send_complete = 1;
1086		mtx_unlock(&be_lun->io_lock);
1087
1088		if (csw) {
1089			csw->d_strategy(bio);
1090		} else {
1091			bio->bio_error = ENXIO;
1092			ctl_be_block_biodone(bio);
1093		}
1094	}
1095	if (csw)
1096		dev_relthread(dev, ref);
1097}
1098
1099static void
1100ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1101		       struct ctl_be_block_io *beio)
1102{
1103	union ctl_io *io;
1104	struct ctl_ptr_len_flags *ptrlen;
1105	struct scsi_unmap_desc *buf, *end;
1106	uint64_t len;
1107
1108	io = beio->io;
1109
1110	DPRINTF("entered\n");
1111
1112	binuptime(&beio->ds_t0);
1113	mtx_lock(&be_lun->io_lock);
1114	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1115	mtx_unlock(&be_lun->io_lock);
1116
1117	if (beio->io_offset == -1) {
1118		beio->io_len = 0;
1119		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1120		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1121		end = buf + ptrlen->len / sizeof(*buf);
1122		for (; buf < end; buf++) {
1123			len = (uint64_t)scsi_4btoul(buf->length) *
1124			    be_lun->cbe_lun.blocksize;
1125			beio->io_len += len;
1126			ctl_be_block_unmap_dev_range(be_lun, beio,
1127			    scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1128			    len, (end - buf < 2) ? TRUE : FALSE);
1129		}
1130	} else
1131		ctl_be_block_unmap_dev_range(be_lun, beio,
1132		    beio->io_offset, beio->io_len, TRUE);
1133}
1134
1135static void
1136ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1137			  struct ctl_be_block_io *beio)
1138{
1139	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1140	struct bio *bio;
1141	struct cdevsw *csw;
1142	struct cdev *dev;
1143	off_t cur_offset;
1144	int i, max_iosize, ref;
1145
1146	DPRINTF("entered\n");
1147	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1148
1149	/*
1150	 * We have to limit our I/O size to the maximum supported by the
1151	 * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
1152	 * set it properly, use DFLTPHYS.
1153	 */
1154	if (csw) {
1155		max_iosize = dev->si_iosize_max;
1156		if (max_iosize < PAGE_SIZE)
1157			max_iosize = DFLTPHYS;
1158	} else
1159		max_iosize = DFLTPHYS;
1160
1161	cur_offset = beio->io_offset;
1162	for (i = 0; i < beio->num_segs; i++) {
1163		size_t cur_size;
1164		uint8_t *cur_ptr;
1165
1166		cur_size = beio->sg_segs[i].len;
1167		cur_ptr = beio->sg_segs[i].addr;
1168
1169		while (cur_size > 0) {
1170			/* This can't fail, it's a blocking allocation. */
1171			bio = g_alloc_bio();
1172
1173			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1174
1175			bio->bio_cmd = beio->bio_cmd;
1176			bio->bio_dev = dev;
1177			bio->bio_caller1 = beio;
1178			bio->bio_length = min(cur_size, max_iosize);
1179			bio->bio_offset = cur_offset;
1180			bio->bio_data = cur_ptr;
1181			bio->bio_done = ctl_be_block_biodone;
1182			bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1183
1184			cur_offset += bio->bio_length;
1185			cur_ptr += bio->bio_length;
1186			cur_size -= bio->bio_length;
1187
1188			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1189			beio->num_bios_sent++;
1190		}
1191	}
1192	binuptime(&beio->ds_t0);
1193	mtx_lock(&be_lun->io_lock);
1194	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1195	beio->send_complete = 1;
1196	mtx_unlock(&be_lun->io_lock);
1197
1198	/*
1199	 * Fire off all allocated requests!
1200	 */
1201	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1202		TAILQ_REMOVE(&queue, bio, bio_queue);
1203		if (csw)
1204			csw->d_strategy(bio);
1205		else {
1206			bio->bio_error = ENXIO;
1207			ctl_be_block_biodone(bio);
1208		}
1209	}
1210	if (csw)
1211		dev_relthread(dev, ref);
1212}
1213
1214static uint64_t
1215ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1216{
1217	struct diocgattr_arg	arg;
1218	struct cdevsw *csw;
1219	struct cdev *dev;
1220	int error, ref;
1221
1222	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1223	if (csw == NULL)
1224		return (UINT64_MAX);
1225	strlcpy(arg.name, attrname, sizeof(arg.name));
1226	arg.len = sizeof(arg.value.off);
1227	if (csw->d_ioctl) {
1228		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1229		    curthread);
1230	} else
1231		error = ENODEV;
1232	dev_relthread(dev, ref);
1233	if (error != 0)
1234		return (UINT64_MAX);
1235	return (arg.value.off);
1236}
1237
1238static void
1239ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1240			    union ctl_io *io)
1241{
1242	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1243	struct ctl_be_block_io *beio;
1244	struct ctl_lba_len_flags *lbalen;
1245
1246	DPRINTF("entered\n");
1247	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1248	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1249
1250	beio->io_len = lbalen->len * cbe_lun->blocksize;
1251	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1252	beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1253	beio->bio_cmd = BIO_FLUSH;
1254	beio->ds_trans_type = DEVSTAT_NO_DATA;
1255	DPRINTF("SYNC\n");
1256	be_lun->lun_flush(be_lun, beio);
1257}
1258
1259static void
1260ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1261{
1262	union ctl_io *io;
1263
1264	io = beio->io;
1265	ctl_free_beio(beio);
1266	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1267	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1268	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1269		ctl_config_write_done(io);
1270		return;
1271	}
1272
1273	ctl_be_block_config_write(io);
1274}
1275
1276static void
1277ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1278			    union ctl_io *io)
1279{
1280	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1281	struct ctl_be_block_io *beio;
1282	struct ctl_lba_len_flags *lbalen;
1283	uint64_t len_left, lba;
1284	uint32_t pb, pbo, adj;
1285	int i, seglen;
1286	uint8_t *buf, *end;
1287
1288	DPRINTF("entered\n");
1289
1290	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1291	lbalen = ARGS(beio->io);
1292
1293	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1294	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1295		ctl_free_beio(beio);
1296		ctl_set_invalid_field(&io->scsiio,
1297				      /*sks_valid*/ 1,
1298				      /*command*/ 1,
1299				      /*field*/ 1,
1300				      /*bit_valid*/ 0,
1301				      /*bit*/ 0);
1302		ctl_config_write_done(io);
1303		return;
1304	}
1305
1306	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1307		beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1308		beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1309		beio->bio_cmd = BIO_DELETE;
1310		beio->ds_trans_type = DEVSTAT_FREE;
1311
1312		be_lun->unmap(be_lun, beio);
1313		return;
1314	}
1315
1316	beio->bio_cmd = BIO_WRITE;
1317	beio->ds_trans_type = DEVSTAT_WRITE;
1318
1319	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1320	       (uintmax_t)lbalen->lba, lbalen->len);
1321
1322	pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1323	if (be_lun->cbe_lun.pblockoff > 0)
1324		pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1325	else
1326		pbo = 0;
1327	len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1328	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1329
1330		/*
1331		 * Setup the S/G entry for this chunk.
1332		 */
1333		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1334		if (pb > cbe_lun->blocksize) {
1335			adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1336			    seglen - pbo) % pb;
1337			if (seglen > adj)
1338				seglen -= adj;
1339			else
1340				seglen -= seglen % cbe_lun->blocksize;
1341		} else
1342			seglen -= seglen % cbe_lun->blocksize;
1343		beio->sg_segs[i].len = seglen;
1344		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1345
1346		DPRINTF("segment %d addr %p len %zd\n", i,
1347			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1348
1349		beio->num_segs++;
1350		len_left -= seglen;
1351
1352		buf = beio->sg_segs[i].addr;
1353		end = buf + seglen;
1354		for (; buf < end; buf += cbe_lun->blocksize) {
1355			if (lbalen->flags & SWS_NDOB) {
1356				memset(buf, 0, cbe_lun->blocksize);
1357			} else {
1358				memcpy(buf, io->scsiio.kern_data_ptr,
1359				    cbe_lun->blocksize);
1360			}
1361			if (lbalen->flags & SWS_LBDATA)
1362				scsi_ulto4b(lbalen->lba + lba, buf);
1363			lba++;
1364		}
1365	}
1366
1367	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1368	beio->io_len = lba * cbe_lun->blocksize;
1369
1370	/* We can not do all in one run. Correct and schedule rerun. */
1371	if (len_left > 0) {
1372		lbalen->lba += lba;
1373		lbalen->len -= lba;
1374		beio->beio_cont = ctl_be_block_cw_done_ws;
1375	}
1376
1377	be_lun->dispatch(be_lun, beio);
1378}
1379
1380static void
1381ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1382			    union ctl_io *io)
1383{
1384	struct ctl_be_block_io *beio;
1385	struct ctl_ptr_len_flags *ptrlen;
1386
1387	DPRINTF("entered\n");
1388
1389	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1390	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1391
1392	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1393		ctl_free_beio(beio);
1394		ctl_set_invalid_field(&io->scsiio,
1395				      /*sks_valid*/ 0,
1396				      /*command*/ 1,
1397				      /*field*/ 0,
1398				      /*bit_valid*/ 0,
1399				      /*bit*/ 0);
1400		ctl_config_write_done(io);
1401		return;
1402	}
1403
1404	beio->io_len = 0;
1405	beio->io_offset = -1;
1406	beio->bio_cmd = BIO_DELETE;
1407	beio->ds_trans_type = DEVSTAT_FREE;
1408	DPRINTF("UNMAP\n");
1409	be_lun->unmap(be_lun, beio);
1410}
1411
1412static void
1413ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1414{
1415	union ctl_io *io;
1416
1417	io = beio->io;
1418	ctl_free_beio(beio);
1419	ctl_config_read_done(io);
1420}
1421
1422static void
1423ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1424			 union ctl_io *io)
1425{
1426	struct ctl_be_block_io *beio;
1427	struct ctl_be_block_softc *softc;
1428
1429	DPRINTF("entered\n");
1430
1431	softc = be_lun->softc;
1432	beio = ctl_alloc_beio(softc);
1433	beio->io = io;
1434	beio->lun = be_lun;
1435	beio->beio_cont = ctl_be_block_cr_done;
1436	PRIV(io)->ptr = (void *)beio;
1437
1438	switch (io->scsiio.cdb[0]) {
1439	case SERVICE_ACTION_IN:		/* GET LBA STATUS */
1440		beio->bio_cmd = -1;
1441		beio->ds_trans_type = DEVSTAT_NO_DATA;
1442		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1443		beio->io_len = 0;
1444		if (be_lun->get_lba_status)
1445			be_lun->get_lba_status(be_lun, beio);
1446		else
1447			ctl_be_block_cr_done(beio);
1448		break;
1449	default:
1450		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1451		break;
1452	}
1453}
1454
1455static void
1456ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1457{
1458	union ctl_io *io;
1459
1460	io = beio->io;
1461	ctl_free_beio(beio);
1462	ctl_config_write_done(io);
1463}
1464
1465static void
1466ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1467			 union ctl_io *io)
1468{
1469	struct ctl_be_block_io *beio;
1470	struct ctl_be_block_softc *softc;
1471
1472	DPRINTF("entered\n");
1473
1474	softc = be_lun->softc;
1475	beio = ctl_alloc_beio(softc);
1476	beio->io = io;
1477	beio->lun = be_lun;
1478	beio->beio_cont = ctl_be_block_cw_done;
1479	switch (io->scsiio.tag_type) {
1480	case CTL_TAG_ORDERED:
1481		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1482		break;
1483	case CTL_TAG_HEAD_OF_QUEUE:
1484		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1485		break;
1486	case CTL_TAG_UNTAGGED:
1487	case CTL_TAG_SIMPLE:
1488	case CTL_TAG_ACA:
1489	default:
1490		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1491		break;
1492	}
1493	PRIV(io)->ptr = (void *)beio;
1494
1495	switch (io->scsiio.cdb[0]) {
1496	case SYNCHRONIZE_CACHE:
1497	case SYNCHRONIZE_CACHE_16:
1498		ctl_be_block_cw_dispatch_sync(be_lun, io);
1499		break;
1500	case WRITE_SAME_10:
1501	case WRITE_SAME_16:
1502		ctl_be_block_cw_dispatch_ws(be_lun, io);
1503		break;
1504	case UNMAP:
1505		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1506		break;
1507	default:
1508		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1509		break;
1510	}
1511}
1512
1513SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
1514SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
1515SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
1516SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
1517
1518static void
1519ctl_be_block_next(struct ctl_be_block_io *beio)
1520{
1521	struct ctl_be_block_lun *be_lun;
1522	union ctl_io *io;
1523
1524	io = beio->io;
1525	be_lun = beio->lun;
1526	ctl_free_beio(beio);
1527	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1528	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1529	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1530		ctl_data_submit_done(io);
1531		return;
1532	}
1533
1534	io->io_hdr.status &= ~CTL_STATUS_MASK;
1535	io->io_hdr.status |= CTL_STATUS_NONE;
1536
1537	mtx_lock(&be_lun->queue_lock);
1538	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1539	mtx_unlock(&be_lun->queue_lock);
1540	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1541}
1542
1543static void
1544ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1545			   union ctl_io *io)
1546{
1547	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1548	struct ctl_be_block_io *beio;
1549	struct ctl_be_block_softc *softc;
1550	struct ctl_lba_len_flags *lbalen;
1551	struct ctl_ptr_len_flags *bptrlen;
1552	uint64_t len_left, lbas;
1553	int i;
1554
1555	softc = be_lun->softc;
1556
1557	DPRINTF("entered\n");
1558
1559	lbalen = ARGS(io);
1560	if (lbalen->flags & CTL_LLF_WRITE) {
1561		SDT_PROBE0(cbb, , write, start);
1562	} else {
1563		SDT_PROBE0(cbb, , read, start);
1564	}
1565
1566	beio = ctl_alloc_beio(softc);
1567	beio->io = io;
1568	beio->lun = be_lun;
1569	bptrlen = PRIV(io);
1570	bptrlen->ptr = (void *)beio;
1571
1572	switch (io->scsiio.tag_type) {
1573	case CTL_TAG_ORDERED:
1574		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1575		break;
1576	case CTL_TAG_HEAD_OF_QUEUE:
1577		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1578		break;
1579	case CTL_TAG_UNTAGGED:
1580	case CTL_TAG_SIMPLE:
1581	case CTL_TAG_ACA:
1582	default:
1583		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1584		break;
1585	}
1586
1587	if (lbalen->flags & CTL_LLF_WRITE) {
1588		beio->bio_cmd = BIO_WRITE;
1589		beio->ds_trans_type = DEVSTAT_WRITE;
1590	} else {
1591		beio->bio_cmd = BIO_READ;
1592		beio->ds_trans_type = DEVSTAT_READ;
1593	}
1594
1595	DPRINTF("%s at LBA %jx len %u @%ju\n",
1596	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1597	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1598	if (lbalen->flags & CTL_LLF_COMPARE)
1599		lbas = CTLBLK_HALF_IO_SIZE;
1600	else
1601		lbas = CTLBLK_MAX_IO_SIZE;
1602	lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1603	beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1604	beio->io_len = lbas * cbe_lun->blocksize;
1605	bptrlen->len += lbas;
1606
1607	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1608		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1609		    i, CTLBLK_MAX_SEGS));
1610
1611		/*
1612		 * Setup the S/G entry for this chunk.
1613		 */
1614		beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1615		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1616
1617		DPRINTF("segment %d addr %p len %zd\n", i,
1618			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1619
1620		/* Set up second segment for compare operation. */
1621		if (lbalen->flags & CTL_LLF_COMPARE) {
1622			beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1623			    beio->sg_segs[i].len;
1624			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1625			    uma_zalloc(be_lun->lun_zone, M_WAITOK);
1626		}
1627
1628		beio->num_segs++;
1629		len_left -= beio->sg_segs[i].len;
1630	}
1631	if (bptrlen->len < lbalen->len)
1632		beio->beio_cont = ctl_be_block_next;
1633	io->scsiio.be_move_done = ctl_be_block_move_done;
1634	/* For compare we have separate S/G lists for read and datamove. */
1635	if (lbalen->flags & CTL_LLF_COMPARE)
1636		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1637	else
1638		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1639	io->scsiio.kern_data_len = beio->io_len;
1640	io->scsiio.kern_data_resid = 0;
1641	io->scsiio.kern_sg_entries = beio->num_segs;
1642	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1643
1644	/*
1645	 * For the read case, we need to read the data into our buffers and
1646	 * then we can send it back to the user.  For the write case, we
1647	 * need to get the data from the user first.
1648	 */
1649	if (beio->bio_cmd == BIO_READ) {
1650		SDT_PROBE0(cbb, , read, alloc_done);
1651		be_lun->dispatch(be_lun, beio);
1652	} else {
1653		SDT_PROBE0(cbb, , write, alloc_done);
1654#ifdef CTL_TIME_IO
1655		getbinuptime(&io->io_hdr.dma_start_bt);
1656#endif
1657		ctl_datamove(io);
1658	}
1659}
1660
1661static void
1662ctl_be_block_worker(void *context, int pending)
1663{
1664	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1665	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1666	union ctl_io *io;
1667	struct ctl_be_block_io *beio;
1668
1669	DPRINTF("entered\n");
1670	/*
1671	 * Fetch and process I/Os from all queues.  If we detect LUN
1672	 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
1673	 * so make response maximally opaque to not confuse initiator.
1674	 */
1675	for (;;) {
1676		mtx_lock(&be_lun->queue_lock);
1677		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1678		if (io != NULL) {
1679			DPRINTF("datamove queue\n");
1680			STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1681				      ctl_io_hdr, links);
1682			mtx_unlock(&be_lun->queue_lock);
1683			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1684			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1685				ctl_set_busy(&io->scsiio);
1686				ctl_complete_beio(beio);
1687				return;
1688			}
1689			be_lun->dispatch(be_lun, beio);
1690			continue;
1691		}
1692		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1693		if (io != NULL) {
1694			DPRINTF("config write queue\n");
1695			STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1696				      ctl_io_hdr, links);
1697			mtx_unlock(&be_lun->queue_lock);
1698			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1699				ctl_set_busy(&io->scsiio);
1700				ctl_config_write_done(io);
1701				return;
1702			}
1703			ctl_be_block_cw_dispatch(be_lun, io);
1704			continue;
1705		}
1706		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1707		if (io != NULL) {
1708			DPRINTF("config read queue\n");
1709			STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
1710				      ctl_io_hdr, links);
1711			mtx_unlock(&be_lun->queue_lock);
1712			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1713				ctl_set_busy(&io->scsiio);
1714				ctl_config_read_done(io);
1715				return;
1716			}
1717			ctl_be_block_cr_dispatch(be_lun, io);
1718			continue;
1719		}
1720		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1721		if (io != NULL) {
1722			DPRINTF("input queue\n");
1723			STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1724				      ctl_io_hdr, links);
1725			mtx_unlock(&be_lun->queue_lock);
1726			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1727				ctl_set_busy(&io->scsiio);
1728				ctl_data_submit_done(io);
1729				return;
1730			}
1731			ctl_be_block_dispatch(be_lun, io);
1732			continue;
1733		}
1734
1735		/*
1736		 * If we get here, there is no work left in the queues, so
1737		 * just break out and let the task queue go to sleep.
1738		 */
1739		mtx_unlock(&be_lun->queue_lock);
1740		break;
1741	}
1742}
1743
1744/*
1745 * Entry point from CTL to the backend for I/O.  We queue everything to a
1746 * work thread, so this just puts the I/O on a queue and wakes up the
1747 * thread.
1748 */
1749static int
1750ctl_be_block_submit(union ctl_io *io)
1751{
1752	struct ctl_be_block_lun *be_lun;
1753	struct ctl_be_lun *cbe_lun;
1754
1755	DPRINTF("entered\n");
1756
1757	cbe_lun = CTL_BACKEND_LUN(io);
1758	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
1759
1760	/*
1761	 * Make sure we only get SCSI I/O.
1762	 */
1763	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1764		"%#x) encountered", io->io_hdr.io_type));
1765
1766	PRIV(io)->len = 0;
1767
1768	mtx_lock(&be_lun->queue_lock);
1769	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1770	mtx_unlock(&be_lun->queue_lock);
1771	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1772
1773	return (CTL_RETVAL_COMPLETE);
1774}
1775
1776static int
1777ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1778			int flag, struct thread *td)
1779{
1780	struct ctl_be_block_softc *softc;
1781	int error;
1782
1783	softc = &backend_block_softc;
1784
1785	error = 0;
1786
1787	switch (cmd) {
1788	case CTL_LUN_REQ: {
1789		struct ctl_lun_req *lun_req;
1790
1791		lun_req = (struct ctl_lun_req *)addr;
1792
1793		switch (lun_req->reqtype) {
1794		case CTL_LUNREQ_CREATE:
1795			error = ctl_be_block_create(softc, lun_req);
1796			break;
1797		case CTL_LUNREQ_RM:
1798			error = ctl_be_block_rm(softc, lun_req);
1799			break;
1800		case CTL_LUNREQ_MODIFY:
1801			error = ctl_be_block_modify(softc, lun_req);
1802			break;
1803		default:
1804			lun_req->status = CTL_LUN_ERROR;
1805			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1806				 "invalid LUN request type %d",
1807				 lun_req->reqtype);
1808			break;
1809		}
1810		break;
1811	}
1812	default:
1813		error = ENOTTY;
1814		break;
1815	}
1816
1817	return (error);
1818}
1819
1820static int
1821ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1822{
1823	struct ctl_be_lun *cbe_lun;
1824	struct ctl_be_block_filedata *file_data;
1825	struct ctl_lun_create_params *params;
1826	char			     *value;
1827	struct vattr		      vattr;
1828	off_t			      ps, pss, po, pos, us, uss, uo, uos;
1829	int			      error;
1830
1831	cbe_lun = &be_lun->cbe_lun;
1832	file_data = &be_lun->backend.file;
1833	params = &be_lun->params;
1834
1835	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1836	be_lun->dispatch = ctl_be_block_dispatch_file;
1837	be_lun->lun_flush = ctl_be_block_flush_file;
1838	be_lun->get_lba_status = ctl_be_block_gls_file;
1839	be_lun->getattr = ctl_be_block_getattr_file;
1840	be_lun->unmap = NULL;
1841	cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1842
1843	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1844	if (error != 0) {
1845		snprintf(req->error_str, sizeof(req->error_str),
1846			 "error calling VOP_GETATTR() for file %s",
1847			 be_lun->dev_path);
1848		return (error);
1849	}
1850
1851	file_data->cred = crhold(curthread->td_ucred);
1852	if (params->lun_size_bytes != 0)
1853		be_lun->size_bytes = params->lun_size_bytes;
1854	else
1855		be_lun->size_bytes = vattr.va_size;
1856
1857	/*
1858	 * For files we can use any logical block size.  Prefer 512 bytes
1859	 * for compatibility reasons.  If file's vattr.va_blocksize
1860	 * (preferred I/O block size) is bigger and multiple to chosen
1861	 * logical block size -- report it as physical block size.
1862	 */
1863	if (params->blocksize_bytes != 0)
1864		cbe_lun->blocksize = params->blocksize_bytes;
1865	else if (cbe_lun->lun_type == T_CDROM)
1866		cbe_lun->blocksize = 2048;
1867	else
1868		cbe_lun->blocksize = 512;
1869	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1870	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1871	    0 : (be_lun->size_blocks - 1);
1872
1873	us = ps = vattr.va_blocksize;
1874	uo = po = 0;
1875
1876	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
1877	if (value != NULL)
1878		ctl_expand_number(value, &ps);
1879	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
1880	if (value != NULL)
1881		ctl_expand_number(value, &po);
1882	pss = ps / cbe_lun->blocksize;
1883	pos = po / cbe_lun->blocksize;
1884	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1885	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1886		cbe_lun->pblockexp = fls(pss) - 1;
1887		cbe_lun->pblockoff = (pss - pos) % pss;
1888	}
1889
1890	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
1891	if (value != NULL)
1892		ctl_expand_number(value, &us);
1893	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
1894	if (value != NULL)
1895		ctl_expand_number(value, &uo);
1896	uss = us / cbe_lun->blocksize;
1897	uos = uo / cbe_lun->blocksize;
1898	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1899	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1900		cbe_lun->ublockexp = fls(uss) - 1;
1901		cbe_lun->ublockoff = (uss - uos) % uss;
1902	}
1903
1904	/*
1905	 * Sanity check.  The media size has to be at least one
1906	 * sector long.
1907	 */
1908	if (be_lun->size_bytes < cbe_lun->blocksize) {
1909		error = EINVAL;
1910		snprintf(req->error_str, sizeof(req->error_str),
1911			 "file %s size %ju < block size %u", be_lun->dev_path,
1912			 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1913	}
1914
1915	cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1916	return (error);
1917}
1918
1919static int
1920ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1921{
1922	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1923	struct ctl_lun_create_params *params;
1924	struct cdevsw		     *csw;
1925	struct cdev		     *dev;
1926	char			     *value;
1927	int			      error, atomic, maxio, ref, unmap, tmp;
1928	off_t			      ps, pss, po, pos, us, uss, uo, uos, otmp;
1929
1930	params = &be_lun->params;
1931
1932	be_lun->dev_type = CTL_BE_BLOCK_DEV;
1933	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1934	if (csw == NULL)
1935		return (ENXIO);
1936	if (strcmp(csw->d_name, "zvol") == 0) {
1937		be_lun->dispatch = ctl_be_block_dispatch_zvol;
1938		be_lun->get_lba_status = ctl_be_block_gls_zvol;
1939		atomic = maxio = CTLBLK_MAX_IO_SIZE;
1940	} else {
1941		be_lun->dispatch = ctl_be_block_dispatch_dev;
1942		be_lun->get_lba_status = NULL;
1943		atomic = 0;
1944		maxio = dev->si_iosize_max;
1945		if (maxio <= 0)
1946			maxio = DFLTPHYS;
1947		if (maxio > CTLBLK_MAX_IO_SIZE)
1948			maxio = CTLBLK_MAX_IO_SIZE;
1949	}
1950	be_lun->lun_flush = ctl_be_block_flush_dev;
1951	be_lun->getattr = ctl_be_block_getattr_dev;
1952	be_lun->unmap = ctl_be_block_unmap_dev;
1953
1954	if (!csw->d_ioctl) {
1955		dev_relthread(dev, ref);
1956		snprintf(req->error_str, sizeof(req->error_str),
1957			 "no d_ioctl for device %s!", be_lun->dev_path);
1958		return (ENODEV);
1959	}
1960
1961	error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
1962			       curthread);
1963	if (error) {
1964		dev_relthread(dev, ref);
1965		snprintf(req->error_str, sizeof(req->error_str),
1966			 "error %d returned for DIOCGSECTORSIZE ioctl "
1967			 "on %s!", error, be_lun->dev_path);
1968		return (error);
1969	}
1970
1971	/*
1972	 * If the user has asked for a blocksize that is greater than the
1973	 * backing device's blocksize, we can do it only if the blocksize
1974	 * the user is asking for is an even multiple of the underlying
1975	 * device's blocksize.
1976	 */
1977	if ((params->blocksize_bytes != 0) &&
1978	    (params->blocksize_bytes >= tmp)) {
1979		if (params->blocksize_bytes % tmp == 0) {
1980			cbe_lun->blocksize = params->blocksize_bytes;
1981		} else {
1982			dev_relthread(dev, ref);
1983			snprintf(req->error_str, sizeof(req->error_str),
1984				 "requested blocksize %u is not an even "
1985				 "multiple of backing device blocksize %u",
1986				 params->blocksize_bytes, tmp);
1987			return (EINVAL);
1988		}
1989	} else if (params->blocksize_bytes != 0) {
1990		dev_relthread(dev, ref);
1991		snprintf(req->error_str, sizeof(req->error_str),
1992			 "requested blocksize %u < backing device "
1993			 "blocksize %u", params->blocksize_bytes, tmp);
1994		return (EINVAL);
1995	} else if (cbe_lun->lun_type == T_CDROM)
1996		cbe_lun->blocksize = MAX(tmp, 2048);
1997	else
1998		cbe_lun->blocksize = tmp;
1999
2000	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
2001			     curthread);
2002	if (error) {
2003		dev_relthread(dev, ref);
2004		snprintf(req->error_str, sizeof(req->error_str),
2005			 "error %d returned for DIOCGMEDIASIZE "
2006			 " ioctl on %s!", error,
2007			 be_lun->dev_path);
2008		return (error);
2009	}
2010
2011	if (params->lun_size_bytes != 0) {
2012		if (params->lun_size_bytes > otmp) {
2013			dev_relthread(dev, ref);
2014			snprintf(req->error_str, sizeof(req->error_str),
2015				 "requested LUN size %ju > backing device "
2016				 "size %ju",
2017				 (uintmax_t)params->lun_size_bytes,
2018				 (uintmax_t)otmp);
2019			return (EINVAL);
2020		}
2021
2022		be_lun->size_bytes = params->lun_size_bytes;
2023	} else
2024		be_lun->size_bytes = otmp;
2025	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2026	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2027	    0 : (be_lun->size_blocks - 1);
2028
2029	error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2030	    curthread);
2031	if (error)
2032		ps = po = 0;
2033	else {
2034		error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2035		    FREAD, curthread);
2036		if (error)
2037			po = 0;
2038	}
2039	us = ps;
2040	uo = po;
2041
2042	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
2043	if (value != NULL)
2044		ctl_expand_number(value, &ps);
2045	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
2046	if (value != NULL)
2047		ctl_expand_number(value, &po);
2048	pss = ps / cbe_lun->blocksize;
2049	pos = po / cbe_lun->blocksize;
2050	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2051	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2052		cbe_lun->pblockexp = fls(pss) - 1;
2053		cbe_lun->pblockoff = (pss - pos) % pss;
2054	}
2055
2056	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
2057	if (value != NULL)
2058		ctl_expand_number(value, &us);
2059	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
2060	if (value != NULL)
2061		ctl_expand_number(value, &uo);
2062	uss = us / cbe_lun->blocksize;
2063	uos = uo / cbe_lun->blocksize;
2064	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2065	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2066		cbe_lun->ublockexp = fls(uss) - 1;
2067		cbe_lun->ublockoff = (uss - uos) % uss;
2068	}
2069
2070	cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2071	cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2072
2073	if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2074		unmap = 1;
2075	} else {
2076		struct diocgattr_arg	arg;
2077
2078		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2079		arg.len = sizeof(arg.value.i);
2080		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2081		    curthread);
2082		unmap = (error == 0) ? arg.value.i : 0;
2083	}
2084	value = ctl_get_opt(&cbe_lun->options, "unmap");
2085	if (value != NULL)
2086		unmap = (strcmp(value, "on") == 0);
2087	if (unmap)
2088		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2089	else
2090		cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2091
2092	dev_relthread(dev, ref);
2093	return (0);
2094}
2095
2096static int
2097ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2098{
2099	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2100	int flags;
2101
2102	if (be_lun->vn) {
2103		flags = FREAD;
2104		if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2105			flags |= FWRITE;
2106		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2107		be_lun->vn = NULL;
2108
2109		switch (be_lun->dev_type) {
2110		case CTL_BE_BLOCK_DEV:
2111			break;
2112		case CTL_BE_BLOCK_FILE:
2113			if (be_lun->backend.file.cred != NULL) {
2114				crfree(be_lun->backend.file.cred);
2115				be_lun->backend.file.cred = NULL;
2116			}
2117			break;
2118		case CTL_BE_BLOCK_NONE:
2119			break;
2120		default:
2121			panic("Unexpected backend type %d", be_lun->dev_type);
2122			break;
2123		}
2124		be_lun->dev_type = CTL_BE_BLOCK_NONE;
2125	}
2126	return (0);
2127}
2128
2129static int
2130ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2131{
2132	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2133	struct nameidata nd;
2134	char		*value;
2135	int		 error, flags;
2136
2137	error = 0;
2138	if (rootvnode == NULL) {
2139		snprintf(req->error_str, sizeof(req->error_str),
2140			 "Root filesystem is not mounted");
2141		return (1);
2142	}
2143	if (!curthread->td_proc->p_fd->fd_cdir) {
2144		curthread->td_proc->p_fd->fd_cdir = rootvnode;
2145		VREF(rootvnode);
2146	}
2147	if (!curthread->td_proc->p_fd->fd_rdir) {
2148		curthread->td_proc->p_fd->fd_rdir = rootvnode;
2149		VREF(rootvnode);
2150	}
2151	if (!curthread->td_proc->p_fd->fd_jdir) {
2152		curthread->td_proc->p_fd->fd_jdir = rootvnode;
2153		VREF(rootvnode);
2154	}
2155
2156	value = ctl_get_opt(&cbe_lun->options, "file");
2157	if (value == NULL) {
2158		snprintf(req->error_str, sizeof(req->error_str),
2159			 "no file argument specified");
2160		return (1);
2161	}
2162	free(be_lun->dev_path, M_CTLBLK);
2163	be_lun->dev_path = strdup(value, M_CTLBLK);
2164
2165	flags = FREAD;
2166	value = ctl_get_opt(&cbe_lun->options, "readonly");
2167	if (value != NULL) {
2168		if (strcmp(value, "on") != 0)
2169			flags |= FWRITE;
2170	} else if (cbe_lun->lun_type == T_DIRECT)
2171		flags |= FWRITE;
2172
2173again:
2174	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
2175	error = vn_open(&nd, &flags, 0, NULL);
2176	if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2177		flags &= ~FWRITE;
2178		goto again;
2179	}
2180	if (error) {
2181		/*
2182		 * This is the only reasonable guess we can make as far as
2183		 * path if the user doesn't give us a fully qualified path.
2184		 * If they want to specify a file, they need to specify the
2185		 * full path.
2186		 */
2187		if (be_lun->dev_path[0] != '/') {
2188			char *dev_name;
2189
2190			asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2191				be_lun->dev_path);
2192			free(be_lun->dev_path, M_CTLBLK);
2193			be_lun->dev_path = dev_name;
2194			goto again;
2195		}
2196		snprintf(req->error_str, sizeof(req->error_str),
2197		    "error opening %s: %d", be_lun->dev_path, error);
2198		return (error);
2199	}
2200	if (flags & FWRITE)
2201		cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2202	else
2203		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2204
2205	NDFREE(&nd, NDF_ONLY_PNBUF);
2206	be_lun->vn = nd.ni_vp;
2207
2208	/* We only support disks and files. */
2209	if (vn_isdisk(be_lun->vn, &error)) {
2210		error = ctl_be_block_open_dev(be_lun, req);
2211	} else if (be_lun->vn->v_type == VREG) {
2212		error = ctl_be_block_open_file(be_lun, req);
2213	} else {
2214		error = EINVAL;
2215		snprintf(req->error_str, sizeof(req->error_str),
2216			 "%s is not a disk or plain file", be_lun->dev_path);
2217	}
2218	VOP_UNLOCK(be_lun->vn, 0);
2219
2220	if (error != 0)
2221		ctl_be_block_close(be_lun);
2222	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2223	if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2224		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2225	value = ctl_get_opt(&cbe_lun->options, "serseq");
2226	if (value != NULL && strcmp(value, "on") == 0)
2227		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2228	else if (value != NULL && strcmp(value, "read") == 0)
2229		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2230	else if (value != NULL && strcmp(value, "off") == 0)
2231		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2232	return (0);
2233}
2234
2235static int
2236ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2237{
2238	struct ctl_be_lun *cbe_lun;
2239	struct ctl_be_block_lun *be_lun;
2240	struct ctl_lun_create_params *params;
2241	char num_thread_str[16];
2242	char tmpstr[32];
2243	char *value;
2244	int retval, num_threads;
2245	int tmp_num_threads;
2246
2247	params = &req->reqdata.create;
2248	retval = 0;
2249	req->status = CTL_LUN_OK;
2250
2251	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2252	cbe_lun = &be_lun->cbe_lun;
2253	cbe_lun->be_lun = be_lun;
2254	be_lun->params = req->reqdata.create;
2255	be_lun->softc = softc;
2256	STAILQ_INIT(&be_lun->input_queue);
2257	STAILQ_INIT(&be_lun->config_read_queue);
2258	STAILQ_INIT(&be_lun->config_write_queue);
2259	STAILQ_INIT(&be_lun->datamove_queue);
2260	sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
2261	mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
2262	mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
2263	ctl_init_opts(&cbe_lun->options,
2264	    req->num_be_args, req->kern_be_args);
2265	be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
2266	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2267	if (be_lun->lun_zone == NULL) {
2268		snprintf(req->error_str, sizeof(req->error_str),
2269			 "error allocating UMA zone");
2270		goto bailout_error;
2271	}
2272
2273	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2274		cbe_lun->lun_type = params->device_type;
2275	else
2276		cbe_lun->lun_type = T_DIRECT;
2277	be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
2278	cbe_lun->flags = 0;
2279	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2280	if (value != NULL) {
2281		if (strcmp(value, "primary") == 0)
2282			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2283	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2284		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2285
2286	if (cbe_lun->lun_type == T_DIRECT ||
2287	    cbe_lun->lun_type == T_CDROM) {
2288		be_lun->size_bytes = params->lun_size_bytes;
2289		if (params->blocksize_bytes != 0)
2290			cbe_lun->blocksize = params->blocksize_bytes;
2291		else if (cbe_lun->lun_type == T_CDROM)
2292			cbe_lun->blocksize = 2048;
2293		else
2294			cbe_lun->blocksize = 512;
2295		be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2296		cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2297		    0 : (be_lun->size_blocks - 1);
2298
2299		if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2300		    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2301			retval = ctl_be_block_open(be_lun, req);
2302			if (retval != 0) {
2303				retval = 0;
2304				req->status = CTL_LUN_WARNING;
2305			}
2306		}
2307		num_threads = cbb_num_threads;
2308	} else {
2309		num_threads = 1;
2310	}
2311
2312	value = ctl_get_opt(&cbe_lun->options, "num_threads");
2313	if (value != NULL) {
2314		tmp_num_threads = strtol(value, NULL, 0);
2315
2316		/*
2317		 * We don't let the user specify less than one
2318		 * thread, but hope he's clueful enough not to
2319		 * specify 1000 threads.
2320		 */
2321		if (tmp_num_threads < 1) {
2322			snprintf(req->error_str, sizeof(req->error_str),
2323				 "invalid number of threads %s",
2324				 num_thread_str);
2325			goto bailout_error;
2326		}
2327		num_threads = tmp_num_threads;
2328	}
2329
2330	if (be_lun->vn == NULL)
2331		cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2332	/* Tell the user the blocksize we ended up using */
2333	params->lun_size_bytes = be_lun->size_bytes;
2334	params->blocksize_bytes = cbe_lun->blocksize;
2335	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2336		cbe_lun->req_lun_id = params->req_lun_id;
2337		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2338	} else
2339		cbe_lun->req_lun_id = 0;
2340
2341	cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2342	cbe_lun->lun_config_status = ctl_be_block_lun_config_status;
2343	cbe_lun->be = &ctl_be_block_driver;
2344
2345	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2346		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
2347			 softc->num_luns);
2348		strncpy((char *)cbe_lun->serial_num, tmpstr,
2349			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2350
2351		/* Tell the user what we used for a serial number */
2352		strncpy((char *)params->serial_num, tmpstr,
2353			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2354	} else {
2355		strncpy((char *)cbe_lun->serial_num, params->serial_num,
2356			MIN(sizeof(cbe_lun->serial_num),
2357			sizeof(params->serial_num)));
2358	}
2359	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2360		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
2361		strncpy((char *)cbe_lun->device_id, tmpstr,
2362			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2363
2364		/* Tell the user what we used for a device ID */
2365		strncpy((char *)params->device_id, tmpstr,
2366			MIN(sizeof(params->device_id), sizeof(tmpstr)));
2367	} else {
2368		strncpy((char *)cbe_lun->device_id, params->device_id,
2369			MIN(sizeof(cbe_lun->device_id),
2370			    sizeof(params->device_id)));
2371	}
2372
2373	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2374
2375	be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2376	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2377
2378	if (be_lun->io_taskqueue == NULL) {
2379		snprintf(req->error_str, sizeof(req->error_str),
2380			 "unable to create taskqueue");
2381		goto bailout_error;
2382	}
2383
2384	/*
2385	 * Note that we start the same number of threads by default for
2386	 * both the file case and the block device case.  For the file
2387	 * case, we need multiple threads to allow concurrency, because the
2388	 * vnode interface is designed to be a blocking interface.  For the
2389	 * block device case, ZFS zvols at least will block the caller's
2390	 * context in many instances, and so we need multiple threads to
2391	 * overcome that problem.  Other block devices don't need as many
2392	 * threads, but they shouldn't cause too many problems.
2393	 *
2394	 * If the user wants to just have a single thread for a block
2395	 * device, he can specify that when the LUN is created, or change
2396	 * the tunable/sysctl to alter the default number of threads.
2397	 */
2398	retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2399					 /*num threads*/num_threads,
2400					 /*priority*/PWAIT,
2401					 /*thread name*/
2402					 "%s taskq", be_lun->lunname);
2403
2404	if (retval != 0)
2405		goto bailout_error;
2406
2407	be_lun->num_threads = num_threads;
2408
2409	mtx_lock(&softc->lock);
2410	softc->num_luns++;
2411	STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2412
2413	mtx_unlock(&softc->lock);
2414
2415	retval = ctl_add_lun(&be_lun->cbe_lun);
2416	if (retval != 0) {
2417		mtx_lock(&softc->lock);
2418		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2419			      links);
2420		softc->num_luns--;
2421		mtx_unlock(&softc->lock);
2422		snprintf(req->error_str, sizeof(req->error_str),
2423			 "ctl_add_lun() returned error %d, see dmesg for "
2424			 "details", retval);
2425		retval = 0;
2426		goto bailout_error;
2427	}
2428
2429	mtx_lock(&softc->lock);
2430
2431	/*
2432	 * Tell the config_status routine that we're waiting so it won't
2433	 * clean up the LUN in the event of an error.
2434	 */
2435	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2436
2437	while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2438		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2439		if (retval == EINTR)
2440			break;
2441	}
2442	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2443
2444	if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2445		snprintf(req->error_str, sizeof(req->error_str),
2446			 "LUN configuration error, see dmesg for details");
2447		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2448			      links);
2449		softc->num_luns--;
2450		mtx_unlock(&softc->lock);
2451		goto bailout_error;
2452	} else {
2453		params->req_lun_id = cbe_lun->lun_id;
2454	}
2455
2456	mtx_unlock(&softc->lock);
2457
2458	be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2459					       cbe_lun->blocksize,
2460					       DEVSTAT_ALL_SUPPORTED,
2461					       cbe_lun->lun_type
2462					       | DEVSTAT_TYPE_IF_OTHER,
2463					       DEVSTAT_PRIORITY_OTHER);
2464
2465	return (retval);
2466
2467bailout_error:
2468	req->status = CTL_LUN_ERROR;
2469
2470	if (be_lun->io_taskqueue != NULL)
2471		taskqueue_free(be_lun->io_taskqueue);
2472	ctl_be_block_close(be_lun);
2473	if (be_lun->dev_path != NULL)
2474		free(be_lun->dev_path, M_CTLBLK);
2475	if (be_lun->lun_zone != NULL)
2476		uma_zdestroy(be_lun->lun_zone);
2477	ctl_free_opts(&cbe_lun->options);
2478	mtx_destroy(&be_lun->queue_lock);
2479	mtx_destroy(&be_lun->io_lock);
2480	free(be_lun, M_CTLBLK);
2481
2482	return (retval);
2483}
2484
2485static int
2486ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2487{
2488	struct ctl_lun_rm_params *params;
2489	struct ctl_be_block_lun *be_lun;
2490	struct ctl_be_lun *cbe_lun;
2491	int retval;
2492
2493	params = &req->reqdata.rm;
2494
2495	mtx_lock(&softc->lock);
2496	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2497		if (be_lun->cbe_lun.lun_id == params->lun_id)
2498			break;
2499	}
2500	mtx_unlock(&softc->lock);
2501	if (be_lun == NULL) {
2502		snprintf(req->error_str, sizeof(req->error_str),
2503			 "LUN %u is not managed by the block backend",
2504			 params->lun_id);
2505		goto bailout_error;
2506	}
2507	cbe_lun = &be_lun->cbe_lun;
2508
2509	retval = ctl_disable_lun(cbe_lun);
2510	if (retval != 0) {
2511		snprintf(req->error_str, sizeof(req->error_str),
2512			 "error %d returned from ctl_disable_lun() for "
2513			 "LUN %d", retval, params->lun_id);
2514		goto bailout_error;
2515	}
2516
2517	if (be_lun->vn != NULL) {
2518		cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2519		ctl_lun_no_media(cbe_lun);
2520		taskqueue_drain_all(be_lun->io_taskqueue);
2521		ctl_be_block_close(be_lun);
2522	}
2523
2524	retval = ctl_invalidate_lun(cbe_lun);
2525	if (retval != 0) {
2526		snprintf(req->error_str, sizeof(req->error_str),
2527			 "error %d returned from ctl_invalidate_lun() for "
2528			 "LUN %d", retval, params->lun_id);
2529		goto bailout_error;
2530	}
2531
2532	mtx_lock(&softc->lock);
2533	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2534	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2535                retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2536                if (retval == EINTR)
2537                        break;
2538        }
2539	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2540
2541	if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2542		snprintf(req->error_str, sizeof(req->error_str),
2543			 "interrupted waiting for LUN to be freed");
2544		mtx_unlock(&softc->lock);
2545		goto bailout_error;
2546	}
2547
2548	STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2549
2550	softc->num_luns--;
2551	mtx_unlock(&softc->lock);
2552
2553	taskqueue_drain_all(be_lun->io_taskqueue);
2554	taskqueue_free(be_lun->io_taskqueue);
2555
2556	if (be_lun->disk_stats != NULL)
2557		devstat_remove_entry(be_lun->disk_stats);
2558
2559	uma_zdestroy(be_lun->lun_zone);
2560
2561	ctl_free_opts(&cbe_lun->options);
2562	free(be_lun->dev_path, M_CTLBLK);
2563	mtx_destroy(&be_lun->queue_lock);
2564	mtx_destroy(&be_lun->io_lock);
2565	free(be_lun, M_CTLBLK);
2566
2567	req->status = CTL_LUN_OK;
2568	return (0);
2569
2570bailout_error:
2571	req->status = CTL_LUN_ERROR;
2572	return (0);
2573}
2574
2575static int
2576ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2577{
2578	struct ctl_lun_modify_params *params;
2579	struct ctl_be_block_lun *be_lun;
2580	struct ctl_be_lun *cbe_lun;
2581	char *value;
2582	uint64_t oldsize;
2583	int error, wasprim;
2584
2585	params = &req->reqdata.modify;
2586
2587	mtx_lock(&softc->lock);
2588	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2589		if (be_lun->cbe_lun.lun_id == params->lun_id)
2590			break;
2591	}
2592	mtx_unlock(&softc->lock);
2593	if (be_lun == NULL) {
2594		snprintf(req->error_str, sizeof(req->error_str),
2595			 "LUN %u is not managed by the block backend",
2596			 params->lun_id);
2597		goto bailout_error;
2598	}
2599	cbe_lun = &be_lun->cbe_lun;
2600
2601	if (params->lun_size_bytes != 0)
2602		be_lun->params.lun_size_bytes = params->lun_size_bytes;
2603	ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args);
2604
2605	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2606	value = ctl_get_opt(&cbe_lun->options, "ha_role");
2607	if (value != NULL) {
2608		if (strcmp(value, "primary") == 0)
2609			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2610		else
2611			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2612	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2613		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2614	else
2615		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2616	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2617		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2618			ctl_lun_primary(cbe_lun);
2619		else
2620			ctl_lun_secondary(cbe_lun);
2621	}
2622
2623	oldsize = be_lun->size_blocks;
2624	if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2625	    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2626		if (be_lun->vn == NULL)
2627			error = ctl_be_block_open(be_lun, req);
2628		else if (vn_isdisk(be_lun->vn, &error))
2629			error = ctl_be_block_open_dev(be_lun, req);
2630		else if (be_lun->vn->v_type == VREG) {
2631			vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2632			error = ctl_be_block_open_file(be_lun, req);
2633			VOP_UNLOCK(be_lun->vn, 0);
2634		} else
2635			error = EINVAL;
2636		if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
2637		    be_lun->vn != NULL) {
2638			cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2639			ctl_lun_has_media(cbe_lun);
2640		} else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
2641		    be_lun->vn == NULL) {
2642			cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2643			ctl_lun_no_media(cbe_lun);
2644		}
2645		cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2646	} else {
2647		if (be_lun->vn != NULL) {
2648			cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2649			ctl_lun_no_media(cbe_lun);
2650			taskqueue_drain_all(be_lun->io_taskqueue);
2651			error = ctl_be_block_close(be_lun);
2652		} else
2653			error = 0;
2654	}
2655	if (be_lun->size_blocks != oldsize)
2656		ctl_lun_capacity_changed(cbe_lun);
2657
2658	/* Tell the user the exact size we ended up using */
2659	params->lun_size_bytes = be_lun->size_bytes;
2660
2661	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2662	return (0);
2663
2664bailout_error:
2665	req->status = CTL_LUN_ERROR;
2666	return (0);
2667}
2668
2669static void
2670ctl_be_block_lun_shutdown(void *be_lun)
2671{
2672	struct ctl_be_block_lun *lun;
2673	struct ctl_be_block_softc *softc;
2674
2675	lun = (struct ctl_be_block_lun *)be_lun;
2676	softc = lun->softc;
2677
2678	mtx_lock(&softc->lock);
2679	lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2680	if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2681		wakeup(lun);
2682	mtx_unlock(&softc->lock);
2683}
2684
2685static void
2686ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2687{
2688	struct ctl_be_block_lun *lun;
2689	struct ctl_be_block_softc *softc;
2690
2691	lun = (struct ctl_be_block_lun *)be_lun;
2692	softc = lun->softc;
2693
2694	if (status == CTL_LUN_CONFIG_OK) {
2695		mtx_lock(&softc->lock);
2696		lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2697		if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2698			wakeup(lun);
2699		mtx_unlock(&softc->lock);
2700
2701		/*
2702		 * We successfully added the LUN, attempt to enable it.
2703		 */
2704		if (ctl_enable_lun(&lun->cbe_lun) != 0) {
2705			printf("%s: ctl_enable_lun() failed!\n", __func__);
2706			if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
2707				printf("%s: ctl_invalidate_lun() failed!\n",
2708				       __func__);
2709			}
2710		}
2711
2712		return;
2713	}
2714
2715
2716	mtx_lock(&softc->lock);
2717	lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2718	lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2719	wakeup(lun);
2720	mtx_unlock(&softc->lock);
2721}
2722
2723
2724static int
2725ctl_be_block_config_write(union ctl_io *io)
2726{
2727	struct ctl_be_block_lun *be_lun;
2728	struct ctl_be_lun *cbe_lun;
2729	int retval;
2730
2731	DPRINTF("entered\n");
2732
2733	cbe_lun = CTL_BACKEND_LUN(io);
2734	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2735
2736	retval = 0;
2737	switch (io->scsiio.cdb[0]) {
2738	case SYNCHRONIZE_CACHE:
2739	case SYNCHRONIZE_CACHE_16:
2740	case WRITE_SAME_10:
2741	case WRITE_SAME_16:
2742	case UNMAP:
2743		/*
2744		 * The upper level CTL code will filter out any CDBs with
2745		 * the immediate bit set and return the proper error.
2746		 *
2747		 * We don't really need to worry about what LBA range the
2748		 * user asked to be synced out.  When they issue a sync
2749		 * cache command, we'll sync out the whole thing.
2750		 */
2751		mtx_lock(&be_lun->queue_lock);
2752		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2753				   links);
2754		mtx_unlock(&be_lun->queue_lock);
2755		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2756		break;
2757	case START_STOP_UNIT: {
2758		struct scsi_start_stop_unit *cdb;
2759		struct ctl_lun_req req;
2760
2761		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2762		if ((cdb->how & SSS_PC_MASK) != 0) {
2763			ctl_set_success(&io->scsiio);
2764			ctl_config_write_done(io);
2765			break;
2766		}
2767		if (cdb->how & SSS_START) {
2768			if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
2769				retval = ctl_be_block_open(be_lun, &req);
2770				cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2771				if (retval == 0) {
2772					cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2773					ctl_lun_has_media(cbe_lun);
2774				} else {
2775					cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2776					ctl_lun_no_media(cbe_lun);
2777				}
2778			}
2779			ctl_start_lun(cbe_lun);
2780		} else {
2781			ctl_stop_lun(cbe_lun);
2782			if (cdb->how & SSS_LOEJ) {
2783				cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2784				cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
2785				ctl_lun_ejected(cbe_lun);
2786				if (be_lun->vn != NULL)
2787					ctl_be_block_close(be_lun);
2788			}
2789		}
2790
2791		ctl_set_success(&io->scsiio);
2792		ctl_config_write_done(io);
2793		break;
2794	}
2795	case PREVENT_ALLOW:
2796		ctl_set_success(&io->scsiio);
2797		ctl_config_write_done(io);
2798		break;
2799	default:
2800		ctl_set_invalid_opcode(&io->scsiio);
2801		ctl_config_write_done(io);
2802		retval = CTL_RETVAL_COMPLETE;
2803		break;
2804	}
2805
2806	return (retval);
2807}
2808
2809static int
2810ctl_be_block_config_read(union ctl_io *io)
2811{
2812	struct ctl_be_block_lun *be_lun;
2813	struct ctl_be_lun *cbe_lun;
2814	int retval = 0;
2815
2816	DPRINTF("entered\n");
2817
2818	cbe_lun = CTL_BACKEND_LUN(io);
2819	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
2820
2821	switch (io->scsiio.cdb[0]) {
2822	case SERVICE_ACTION_IN:
2823		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2824			mtx_lock(&be_lun->queue_lock);
2825			STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2826			    &io->io_hdr, links);
2827			mtx_unlock(&be_lun->queue_lock);
2828			taskqueue_enqueue(be_lun->io_taskqueue,
2829			    &be_lun->io_task);
2830			retval = CTL_RETVAL_QUEUED;
2831			break;
2832		}
2833		ctl_set_invalid_field(&io->scsiio,
2834				      /*sks_valid*/ 1,
2835				      /*command*/ 1,
2836				      /*field*/ 1,
2837				      /*bit_valid*/ 1,
2838				      /*bit*/ 4);
2839		ctl_config_read_done(io);
2840		retval = CTL_RETVAL_COMPLETE;
2841		break;
2842	default:
2843		ctl_set_invalid_opcode(&io->scsiio);
2844		ctl_config_read_done(io);
2845		retval = CTL_RETVAL_COMPLETE;
2846		break;
2847	}
2848
2849	return (retval);
2850}
2851
2852static int
2853ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2854{
2855	struct ctl_be_block_lun *lun;
2856	int retval;
2857
2858	lun = (struct ctl_be_block_lun *)be_lun;
2859
2860	retval = sbuf_printf(sb, "\t<num_threads>");
2861	if (retval != 0)
2862		goto bailout;
2863	retval = sbuf_printf(sb, "%d", lun->num_threads);
2864	if (retval != 0)
2865		goto bailout;
2866	retval = sbuf_printf(sb, "</num_threads>\n");
2867
2868bailout:
2869	return (retval);
2870}
2871
2872static uint64_t
2873ctl_be_block_lun_attr(void *be_lun, const char *attrname)
2874{
2875	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun;
2876
2877	if (lun->getattr == NULL)
2878		return (UINT64_MAX);
2879	return (lun->getattr(lun, attrname));
2880}
2881
2882int
2883ctl_be_block_init(void)
2884{
2885	struct ctl_be_block_softc *softc;
2886	int retval;
2887
2888	softc = &backend_block_softc;
2889	retval = 0;
2890
2891	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2892	beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2893	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2894	STAILQ_INIT(&softc->lun_list);
2895
2896	return (retval);
2897}
2898