ctl_backend_block.c revision 274154
1/*-
2 * Copyright (c) 2003 Silicon Graphics International Corp.
3 * Copyright (c) 2009-2011 Spectra Logic Corporation
4 * Copyright (c) 2012 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed by Edward Tomasz Napierala
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions, and the following disclaimer,
15 *    without modification.
16 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
17 *    substantially similar to the "NO WARRANTY" disclaimer below
18 *    ("Disclaimer") and any redistribution must be conditioned upon
19 *    including a substantially similar Disclaimer requirement for further
20 *    binary redistribution.
21 *
22 * NO WARRANTY
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGES.
34 *
35 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
36 */
37/*
38 * CAM Target Layer driver backend for block devices.
39 *
40 * Author: Ken Merry <ken@FreeBSD.org>
41 */
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: head/sys/cam/ctl/ctl_backend_block.c 274154 2014-11-06 00:48:36Z mav $");
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/kernel.h>
48#include <sys/types.h>
49#include <sys/kthread.h>
50#include <sys/bio.h>
51#include <sys/fcntl.h>
52#include <sys/limits.h>
53#include <sys/lock.h>
54#include <sys/mutex.h>
55#include <sys/condvar.h>
56#include <sys/malloc.h>
57#include <sys/conf.h>
58#include <sys/ioccom.h>
59#include <sys/queue.h>
60#include <sys/sbuf.h>
61#include <sys/endian.h>
62#include <sys/uio.h>
63#include <sys/buf.h>
64#include <sys/taskqueue.h>
65#include <sys/vnode.h>
66#include <sys/namei.h>
67#include <sys/mount.h>
68#include <sys/disk.h>
69#include <sys/fcntl.h>
70#include <sys/filedesc.h>
71#include <sys/proc.h>
72#include <sys/pcpu.h>
73#include <sys/module.h>
74#include <sys/sdt.h>
75#include <sys/devicestat.h>
76#include <sys/sysctl.h>
77
78#include <geom/geom.h>
79
80#include <cam/cam.h>
81#include <cam/scsi/scsi_all.h>
82#include <cam/scsi/scsi_da.h>
83#include <cam/ctl/ctl_io.h>
84#include <cam/ctl/ctl.h>
85#include <cam/ctl/ctl_backend.h>
86#include <cam/ctl/ctl_frontend_internal.h>
87#include <cam/ctl/ctl_ioctl.h>
88#include <cam/ctl/ctl_scsi_all.h>
89#include <cam/ctl/ctl_error.h>
90
91/*
92 * The idea here is that we'll allocate enough S/G space to hold a 1MB
93 * I/O.  If we get an I/O larger than that, we'll split it.
94 */
95#define	CTLBLK_HALF_IO_SIZE	(512 * 1024)
96#define	CTLBLK_MAX_IO_SIZE	(CTLBLK_HALF_IO_SIZE * 2)
97#define	CTLBLK_MAX_SEG		MAXPHYS
98#define	CTLBLK_HALF_SEGS	MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
99#define	CTLBLK_MAX_SEGS		(CTLBLK_HALF_SEGS * 2)
100
101#ifdef CTLBLK_DEBUG
102#define DPRINTF(fmt, args...) \
103    printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
104#else
105#define DPRINTF(fmt, args...) do {} while(0)
106#endif
107
108#define PRIV(io)	\
109    ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
110#define ARGS(io)	\
111    ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
112
113SDT_PROVIDER_DEFINE(cbb);
114
115typedef enum {
116	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
117	CTL_BE_BLOCK_LUN_CONFIG_ERR	= 0x02,
118	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
119	CTL_BE_BLOCK_LUN_MULTI_THREAD	= 0x08
120} ctl_be_block_lun_flags;
121
122typedef enum {
123	CTL_BE_BLOCK_NONE,
124	CTL_BE_BLOCK_DEV,
125	CTL_BE_BLOCK_FILE
126} ctl_be_block_type;
127
128struct ctl_be_block_devdata {
129	struct cdev *cdev;
130	struct cdevsw *csw;
131	int dev_ref;
132};
133
134struct ctl_be_block_filedata {
135	struct ucred *cred;
136};
137
138union ctl_be_block_bedata {
139	struct ctl_be_block_devdata dev;
140	struct ctl_be_block_filedata file;
141};
142
143struct ctl_be_block_io;
144struct ctl_be_block_lun;
145
146typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
147			       struct ctl_be_block_io *beio);
148typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
149				  const char *attrname);
150
151/*
152 * Backend LUN structure.  There is a 1:1 mapping between a block device
153 * and a backend block LUN, and between a backend block LUN and a CTL LUN.
154 */
155struct ctl_be_block_lun {
156	struct ctl_lun_create_params params;
157	struct ctl_block_disk *disk;
158	char lunname[32];
159	char *dev_path;
160	ctl_be_block_type dev_type;
161	struct vnode *vn;
162	union ctl_be_block_bedata backend;
163	cbb_dispatch_t dispatch;
164	cbb_dispatch_t lun_flush;
165	cbb_dispatch_t unmap;
166	cbb_getattr_t getattr;
167	uma_zone_t lun_zone;
168	uint64_t size_blocks;
169	uint64_t size_bytes;
170	uint32_t blocksize;
171	int blocksize_shift;
172	uint16_t pblockexp;
173	uint16_t pblockoff;
174	struct ctl_be_block_softc *softc;
175	struct devstat *disk_stats;
176	ctl_be_block_lun_flags flags;
177	STAILQ_ENTRY(ctl_be_block_lun) links;
178	struct ctl_be_lun ctl_be_lun;
179	struct taskqueue *io_taskqueue;
180	struct task io_task;
181	int num_threads;
182	STAILQ_HEAD(, ctl_io_hdr) input_queue;
183	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
184	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
185	struct mtx_padalign io_lock;
186	struct mtx_padalign queue_lock;
187};
188
189/*
190 * Overall softc structure for the block backend module.
191 */
192struct ctl_be_block_softc {
193	struct mtx			 lock;
194	int				 num_disks;
195	STAILQ_HEAD(, ctl_block_disk)	 disk_list;
196	int				 num_luns;
197	STAILQ_HEAD(, ctl_be_block_lun)	 lun_list;
198};
199
200static struct ctl_be_block_softc backend_block_softc;
201
202/*
203 * Per-I/O information.
204 */
205struct ctl_be_block_io {
206	union ctl_io			*io;
207	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
208	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
209	int				bio_cmd;
210	int				num_segs;
211	int				num_bios_sent;
212	int				num_bios_done;
213	int				send_complete;
214	int				num_errors;
215	struct bintime			ds_t0;
216	devstat_tag_type		ds_tag_type;
217	devstat_trans_flags		ds_trans_type;
218	uint64_t			io_len;
219	uint64_t			io_offset;
220	struct ctl_be_block_softc	*softc;
221	struct ctl_be_block_lun		*lun;
222	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
223};
224
225static int cbb_num_threads = 14;
226SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
227	    "CAM Target Layer Block Backend");
228SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
229           &cbb_num_threads, 0, "Number of threads per backing file");
230
231static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
232static void ctl_free_beio(struct ctl_be_block_io *beio);
233static void ctl_complete_beio(struct ctl_be_block_io *beio);
234static int ctl_be_block_move_done(union ctl_io *io);
235static void ctl_be_block_biodone(struct bio *bio);
236static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
237				    struct ctl_be_block_io *beio);
238static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
239				       struct ctl_be_block_io *beio);
240static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
241				   struct ctl_be_block_io *beio);
242static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
243				   struct ctl_be_block_io *beio);
244static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
245				      struct ctl_be_block_io *beio);
246static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
247					 const char *attrname);
248static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
249				    union ctl_io *io);
250static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
251				  union ctl_io *io);
252static void ctl_be_block_worker(void *context, int pending);
253static int ctl_be_block_submit(union ctl_io *io);
254static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
255				   int flag, struct thread *td);
256static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
257				  struct ctl_lun_req *req);
258static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
259				 struct ctl_lun_req *req);
260static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
261static int ctl_be_block_open(struct ctl_be_block_softc *softc,
262			     struct ctl_be_block_lun *be_lun,
263			     struct ctl_lun_req *req);
264static int ctl_be_block_create(struct ctl_be_block_softc *softc,
265			       struct ctl_lun_req *req);
266static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
267			   struct ctl_lun_req *req);
268static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
269				  struct ctl_lun_req *req);
270static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
271				 struct ctl_lun_req *req);
272static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
273			   struct ctl_lun_req *req);
274static void ctl_be_block_lun_shutdown(void *be_lun);
275static void ctl_be_block_lun_config_status(void *be_lun,
276					   ctl_lun_config_status status);
277static int ctl_be_block_config_write(union ctl_io *io);
278static int ctl_be_block_config_read(union ctl_io *io);
279static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
280static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname);
281int ctl_be_block_init(void);
282
283static struct ctl_backend_driver ctl_be_block_driver =
284{
285	.name = "block",
286	.flags = CTL_BE_FLAG_HAS_CONFIG,
287	.init = ctl_be_block_init,
288	.data_submit = ctl_be_block_submit,
289	.data_move_done = ctl_be_block_move_done,
290	.config_read = ctl_be_block_config_read,
291	.config_write = ctl_be_block_config_write,
292	.ioctl = ctl_be_block_ioctl,
293	.lun_info = ctl_be_block_lun_info,
294	.lun_attr = ctl_be_block_lun_attr
295};
296
297MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
298CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
299
300static uma_zone_t beio_zone;
301
302static struct ctl_be_block_io *
303ctl_alloc_beio(struct ctl_be_block_softc *softc)
304{
305	struct ctl_be_block_io *beio;
306
307	beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
308	beio->softc = softc;
309	return (beio);
310}
311
312static void
313ctl_free_beio(struct ctl_be_block_io *beio)
314{
315	int duplicate_free;
316	int i;
317
318	duplicate_free = 0;
319
320	for (i = 0; i < beio->num_segs; i++) {
321		if (beio->sg_segs[i].addr == NULL)
322			duplicate_free++;
323
324		uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
325		beio->sg_segs[i].addr = NULL;
326
327		/* For compare we had two equal S/G lists. */
328		if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
329			uma_zfree(beio->lun->lun_zone,
330			    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
331			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
332		}
333	}
334
335	if (duplicate_free > 0) {
336		printf("%s: %d duplicate frees out of %d segments\n", __func__,
337		       duplicate_free, beio->num_segs);
338	}
339
340	uma_zfree(beio_zone, beio);
341}
342
343static void
344ctl_complete_beio(struct ctl_be_block_io *beio)
345{
346	union ctl_io *io = beio->io;
347
348	if (beio->beio_cont != NULL) {
349		beio->beio_cont(beio);
350	} else {
351		ctl_free_beio(beio);
352		ctl_data_submit_done(io);
353	}
354}
355
356static int
357ctl_be_block_move_done(union ctl_io *io)
358{
359	struct ctl_be_block_io *beio;
360	struct ctl_be_block_lun *be_lun;
361	struct ctl_lba_len_flags *lbalen;
362#ifdef CTL_TIME_IO
363	struct bintime cur_bt;
364#endif
365	int i;
366
367	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
368	be_lun = beio->lun;
369
370	DPRINTF("entered\n");
371
372#ifdef CTL_TIME_IO
373	getbintime(&cur_bt);
374	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
375	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
376	io->io_hdr.num_dmas++;
377#endif
378	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
379
380	/*
381	 * We set status at this point for read commands, and write
382	 * commands with errors.
383	 */
384	if ((io->io_hdr.port_status == 0) &&
385	    ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0) &&
386	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
387		lbalen = ARGS(beio->io);
388		if (lbalen->flags & CTL_LLF_READ) {
389			ctl_set_success(&io->scsiio);
390		} else if (lbalen->flags & CTL_LLF_COMPARE) {
391			/* We have two data blocks ready for comparison. */
392			for (i = 0; i < beio->num_segs; i++) {
393				if (memcmp(beio->sg_segs[i].addr,
394				    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
395				    beio->sg_segs[i].len) != 0)
396					break;
397			}
398			if (i < beio->num_segs)
399				ctl_set_sense(&io->scsiio,
400				    /*current_error*/ 1,
401				    /*sense_key*/ SSD_KEY_MISCOMPARE,
402				    /*asc*/ 0x1D,
403				    /*ascq*/ 0x00,
404				    SSD_ELEM_NONE);
405			else
406				ctl_set_success(&io->scsiio);
407		}
408	}
409	else if ((io->io_hdr.port_status != 0)
410	      && ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0)
411	      && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
412		/*
413		 * For hardware error sense keys, the sense key
414		 * specific value is defined to be a retry count,
415		 * but we use it to pass back an internal FETD
416		 * error code.  XXX KDM  Hopefully the FETD is only
417		 * using 16 bits for an error code, since that's
418		 * all the space we have in the sks field.
419		 */
420		ctl_set_internal_failure(&io->scsiio,
421					 /*sks_valid*/ 1,
422					 /*retry_count*/
423					 io->io_hdr.port_status);
424	}
425
426	/*
427	 * If this is a read, or a write with errors, it is done.
428	 */
429	if ((beio->bio_cmd == BIO_READ)
430	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
431	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
432		ctl_complete_beio(beio);
433		return (0);
434	}
435
436	/*
437	 * At this point, we have a write and the DMA completed
438	 * successfully.  We now have to queue it to the task queue to
439	 * execute the backend I/O.  That is because we do blocking
440	 * memory allocations, and in the file backing case, blocking I/O.
441	 * This move done routine is generally called in the SIM's
442	 * interrupt context, and therefore we cannot block.
443	 */
444	mtx_lock(&be_lun->queue_lock);
445	/*
446	 * XXX KDM make sure that links is okay to use at this point.
447	 * Otherwise, we either need to add another field to ctl_io_hdr,
448	 * or deal with resource allocation here.
449	 */
450	STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
451	mtx_unlock(&be_lun->queue_lock);
452
453	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
454
455	return (0);
456}
457
458static void
459ctl_be_block_biodone(struct bio *bio)
460{
461	struct ctl_be_block_io *beio;
462	struct ctl_be_block_lun *be_lun;
463	union ctl_io *io;
464	int error;
465
466	beio = bio->bio_caller1;
467	be_lun = beio->lun;
468	io = beio->io;
469
470	DPRINTF("entered\n");
471
472	error = bio->bio_error;
473	mtx_lock(&be_lun->io_lock);
474	if (error != 0)
475		beio->num_errors++;
476
477	beio->num_bios_done++;
478
479	/*
480	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
481	 * during the free might cause it to complain.
482	 */
483	g_destroy_bio(bio);
484
485	/*
486	 * If the send complete bit isn't set, or we aren't the last I/O to
487	 * complete, then we're done.
488	 */
489	if ((beio->send_complete == 0)
490	 || (beio->num_bios_done < beio->num_bios_sent)) {
491		mtx_unlock(&be_lun->io_lock);
492		return;
493	}
494
495	/*
496	 * At this point, we've verified that we are the last I/O to
497	 * complete, so it's safe to drop the lock.
498	 */
499	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
500	    beio->ds_tag_type, beio->ds_trans_type,
501	    /*now*/ NULL, /*then*/&beio->ds_t0);
502	mtx_unlock(&be_lun->io_lock);
503
504	/*
505	 * If there are any errors from the backing device, we fail the
506	 * entire I/O with a medium error.
507	 */
508	if (beio->num_errors > 0) {
509		if (error == EOPNOTSUPP) {
510			ctl_set_invalid_opcode(&io->scsiio);
511		} else if (error == ENOSPC) {
512			ctl_set_space_alloc_fail(&io->scsiio);
513		} else if (beio->bio_cmd == BIO_FLUSH) {
514			/* XXX KDM is there is a better error here? */
515			ctl_set_internal_failure(&io->scsiio,
516						 /*sks_valid*/ 1,
517						 /*retry_count*/ 0xbad2);
518		} else
519			ctl_set_medium_error(&io->scsiio);
520		ctl_complete_beio(beio);
521		return;
522	}
523
524	/*
525	 * If this is a write, a flush, a delete or verify, we're all done.
526	 * If this is a read, we can now send the data to the user.
527	 */
528	if ((beio->bio_cmd == BIO_WRITE)
529	 || (beio->bio_cmd == BIO_FLUSH)
530	 || (beio->bio_cmd == BIO_DELETE)
531	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
532		ctl_set_success(&io->scsiio);
533		ctl_complete_beio(beio);
534	} else {
535#ifdef CTL_TIME_IO
536        	getbintime(&io->io_hdr.dma_start_bt);
537#endif
538		ctl_datamove(io);
539	}
540}
541
542static void
543ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
544			struct ctl_be_block_io *beio)
545{
546	union ctl_io *io = beio->io;
547	struct mount *mountpoint;
548	int error, lock_flags;
549
550	DPRINTF("entered\n");
551
552	binuptime(&beio->ds_t0);
553	mtx_lock(&be_lun->io_lock);
554	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
555	mtx_unlock(&be_lun->io_lock);
556
557	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
558
559	if (MNT_SHARED_WRITES(mountpoint)
560	 || ((mountpoint == NULL)
561	  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
562		lock_flags = LK_SHARED;
563	else
564		lock_flags = LK_EXCLUSIVE;
565
566	vn_lock(be_lun->vn, lock_flags | LK_RETRY);
567
568	error = VOP_FSYNC(be_lun->vn, MNT_WAIT, curthread);
569	VOP_UNLOCK(be_lun->vn, 0);
570
571	vn_finished_write(mountpoint);
572
573	mtx_lock(&be_lun->io_lock);
574	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
575	    beio->ds_tag_type, beio->ds_trans_type,
576	    /*now*/ NULL, /*then*/&beio->ds_t0);
577	mtx_unlock(&be_lun->io_lock);
578
579	if (error == 0)
580		ctl_set_success(&io->scsiio);
581	else {
582		/* XXX KDM is there is a better error here? */
583		ctl_set_internal_failure(&io->scsiio,
584					 /*sks_valid*/ 1,
585					 /*retry_count*/ 0xbad1);
586	}
587
588	ctl_complete_beio(beio);
589}
590
591SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
592SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
593SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
594SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
595
596static void
597ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
598			   struct ctl_be_block_io *beio)
599{
600	struct ctl_be_block_filedata *file_data;
601	union ctl_io *io;
602	struct uio xuio;
603	struct iovec *xiovec;
604	int flags;
605	int error, i;
606
607	DPRINTF("entered\n");
608
609	file_data = &be_lun->backend.file;
610	io = beio->io;
611	flags = 0;
612	if (ARGS(io)->flags & CTL_LLF_DPO)
613		flags |= IO_DIRECT;
614	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
615		flags |= IO_SYNC;
616
617	bzero(&xuio, sizeof(xuio));
618	if (beio->bio_cmd == BIO_READ) {
619		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
620		xuio.uio_rw = UIO_READ;
621	} else {
622		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
623		xuio.uio_rw = UIO_WRITE;
624	}
625	xuio.uio_offset = beio->io_offset;
626	xuio.uio_resid = beio->io_len;
627	xuio.uio_segflg = UIO_SYSSPACE;
628	xuio.uio_iov = beio->xiovecs;
629	xuio.uio_iovcnt = beio->num_segs;
630	xuio.uio_td = curthread;
631
632	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
633		xiovec->iov_base = beio->sg_segs[i].addr;
634		xiovec->iov_len = beio->sg_segs[i].len;
635	}
636
637	binuptime(&beio->ds_t0);
638	mtx_lock(&be_lun->io_lock);
639	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
640	mtx_unlock(&be_lun->io_lock);
641
642	if (beio->bio_cmd == BIO_READ) {
643		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
644
645		/*
646		 * UFS pays attention to IO_DIRECT for reads.  If the
647		 * DIRECTIO option is configured into the kernel, it calls
648		 * ffs_rawread().  But that only works for single-segment
649		 * uios with user space addresses.  In our case, with a
650		 * kernel uio, it still reads into the buffer cache, but it
651		 * will just try to release the buffer from the cache later
652		 * on in ffs_read().
653		 *
654		 * ZFS does not pay attention to IO_DIRECT for reads.
655		 *
656		 * UFS does not pay attention to IO_SYNC for reads.
657		 *
658		 * ZFS pays attention to IO_SYNC (which translates into the
659		 * Solaris define FRSYNC for zfs_read()) for reads.  It
660		 * attempts to sync the file before reading.
661		 *
662		 * So, to attempt to provide some barrier semantics in the
663		 * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC.
664		 */
665		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
666
667		VOP_UNLOCK(be_lun->vn, 0);
668		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
669	} else {
670		struct mount *mountpoint;
671		int lock_flags;
672
673		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
674
675		if (MNT_SHARED_WRITES(mountpoint)
676		 || ((mountpoint == NULL)
677		  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
678			lock_flags = LK_SHARED;
679		else
680			lock_flags = LK_EXCLUSIVE;
681
682		vn_lock(be_lun->vn, lock_flags | LK_RETRY);
683
684		/*
685		 * UFS pays attention to IO_DIRECT for writes.  The write
686		 * is done asynchronously.  (Normally the write would just
687		 * get put into cache.
688		 *
689		 * UFS pays attention to IO_SYNC for writes.  It will
690		 * attempt to write the buffer out synchronously if that
691		 * flag is set.
692		 *
693		 * ZFS does not pay attention to IO_DIRECT for writes.
694		 *
695		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
696		 * for writes.  It will flush the transaction from the
697		 * cache before returning.
698		 *
699		 * So if we've got the BIO_ORDERED flag set, we want
700		 * IO_SYNC in either the UFS or ZFS case.
701		 */
702		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
703		VOP_UNLOCK(be_lun->vn, 0);
704
705		vn_finished_write(mountpoint);
706		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
707        }
708
709	mtx_lock(&be_lun->io_lock);
710	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
711	    beio->ds_tag_type, beio->ds_trans_type,
712	    /*now*/ NULL, /*then*/&beio->ds_t0);
713	mtx_unlock(&be_lun->io_lock);
714
715	/*
716	 * If we got an error, set the sense data to "MEDIUM ERROR" and
717	 * return the I/O to the user.
718	 */
719	if (error != 0) {
720		char path_str[32];
721
722		ctl_scsi_path_string(io, path_str, sizeof(path_str));
723		printf("%s%s command returned errno %d\n", path_str,
724		       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error);
725		if (error == ENOSPC) {
726			ctl_set_space_alloc_fail(&io->scsiio);
727		} else
728			ctl_set_medium_error(&io->scsiio);
729		ctl_complete_beio(beio);
730		return;
731	}
732
733	/*
734	 * If this is a write or a verify, we're all done.
735	 * If this is a read, we can now send the data to the user.
736	 */
737	if ((beio->bio_cmd == BIO_WRITE) ||
738	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
739		ctl_set_success(&io->scsiio);
740		ctl_complete_beio(beio);
741	} else {
742#ifdef CTL_TIME_IO
743        	getbintime(&io->io_hdr.dma_start_bt);
744#endif
745		ctl_datamove(io);
746	}
747}
748
749static void
750ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
751			   struct ctl_be_block_io *beio)
752{
753	struct ctl_be_block_devdata *dev_data;
754	union ctl_io *io;
755	struct uio xuio;
756	struct iovec *xiovec;
757	int flags;
758	int error, i;
759
760	DPRINTF("entered\n");
761
762	dev_data = &be_lun->backend.dev;
763	io = beio->io;
764	flags = 0;
765	if (ARGS(io)->flags & CTL_LLF_DPO)
766		flags |= IO_DIRECT;
767	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
768		flags |= IO_SYNC;
769
770	bzero(&xuio, sizeof(xuio));
771	if (beio->bio_cmd == BIO_READ) {
772		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
773		xuio.uio_rw = UIO_READ;
774	} else {
775		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
776		xuio.uio_rw = UIO_WRITE;
777	}
778	xuio.uio_offset = beio->io_offset;
779	xuio.uio_resid = beio->io_len;
780	xuio.uio_segflg = UIO_SYSSPACE;
781	xuio.uio_iov = beio->xiovecs;
782	xuio.uio_iovcnt = beio->num_segs;
783	xuio.uio_td = curthread;
784
785	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
786		xiovec->iov_base = beio->sg_segs[i].addr;
787		xiovec->iov_len = beio->sg_segs[i].len;
788	}
789
790	binuptime(&beio->ds_t0);
791	mtx_lock(&be_lun->io_lock);
792	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
793	mtx_unlock(&be_lun->io_lock);
794
795	if (beio->bio_cmd == BIO_READ) {
796		error = (*dev_data->csw->d_read)(dev_data->cdev, &xuio, flags);
797		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
798	} else {
799		error = (*dev_data->csw->d_write)(dev_data->cdev, &xuio, flags);
800		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
801	}
802
803	mtx_lock(&be_lun->io_lock);
804	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
805	    beio->ds_tag_type, beio->ds_trans_type,
806	    /*now*/ NULL, /*then*/&beio->ds_t0);
807	mtx_unlock(&be_lun->io_lock);
808
809	/*
810	 * If we got an error, set the sense data to "MEDIUM ERROR" and
811	 * return the I/O to the user.
812	 */
813	if (error != 0) {
814		if (error == ENOSPC) {
815			ctl_set_space_alloc_fail(&io->scsiio);
816		} else
817			ctl_set_medium_error(&io->scsiio);
818		ctl_complete_beio(beio);
819		return;
820	}
821
822	/*
823	 * If this is a write or a verify, we're all done.
824	 * If this is a read, we can now send the data to the user.
825	 */
826	if ((beio->bio_cmd == BIO_WRITE) ||
827	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
828		ctl_set_success(&io->scsiio);
829		ctl_complete_beio(beio);
830	} else {
831#ifdef CTL_TIME_IO
832        	getbintime(&io->io_hdr.dma_start_bt);
833#endif
834		ctl_datamove(io);
835	}
836}
837
838static void
839ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
840		       struct ctl_be_block_io *beio)
841{
842	struct bio *bio;
843	union ctl_io *io;
844	struct ctl_be_block_devdata *dev_data;
845
846	dev_data = &be_lun->backend.dev;
847	io = beio->io;
848
849	DPRINTF("entered\n");
850
851	/* This can't fail, it's a blocking allocation. */
852	bio = g_alloc_bio();
853
854	bio->bio_cmd	    = BIO_FLUSH;
855	bio->bio_flags	   |= BIO_ORDERED;
856	bio->bio_dev	    = dev_data->cdev;
857	bio->bio_offset	    = 0;
858	bio->bio_data	    = 0;
859	bio->bio_done	    = ctl_be_block_biodone;
860	bio->bio_caller1    = beio;
861	bio->bio_pblkno	    = 0;
862
863	/*
864	 * We don't need to acquire the LUN lock here, because we are only
865	 * sending one bio, and so there is no other context to synchronize
866	 * with.
867	 */
868	beio->num_bios_sent = 1;
869	beio->send_complete = 1;
870
871	binuptime(&beio->ds_t0);
872	mtx_lock(&be_lun->io_lock);
873	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
874	mtx_unlock(&be_lun->io_lock);
875
876	(*dev_data->csw->d_strategy)(bio);
877}
878
879static void
880ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
881		       struct ctl_be_block_io *beio,
882		       uint64_t off, uint64_t len, int last)
883{
884	struct bio *bio;
885	struct ctl_be_block_devdata *dev_data;
886	uint64_t maxlen;
887
888	dev_data = &be_lun->backend.dev;
889	maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize);
890	while (len > 0) {
891		bio = g_alloc_bio();
892		bio->bio_cmd	    = BIO_DELETE;
893		bio->bio_dev	    = dev_data->cdev;
894		bio->bio_offset	    = off;
895		bio->bio_length	    = MIN(len, maxlen);
896		bio->bio_data	    = 0;
897		bio->bio_done	    = ctl_be_block_biodone;
898		bio->bio_caller1    = beio;
899		bio->bio_pblkno     = off / be_lun->blocksize;
900
901		off += bio->bio_length;
902		len -= bio->bio_length;
903
904		mtx_lock(&be_lun->io_lock);
905		beio->num_bios_sent++;
906		if (last && len == 0)
907			beio->send_complete = 1;
908		mtx_unlock(&be_lun->io_lock);
909
910		(*dev_data->csw->d_strategy)(bio);
911	}
912}
913
914static void
915ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
916		       struct ctl_be_block_io *beio)
917{
918	union ctl_io *io;
919	struct ctl_be_block_devdata *dev_data;
920	struct ctl_ptr_len_flags *ptrlen;
921	struct scsi_unmap_desc *buf, *end;
922	uint64_t len;
923
924	dev_data = &be_lun->backend.dev;
925	io = beio->io;
926
927	DPRINTF("entered\n");
928
929	binuptime(&beio->ds_t0);
930	mtx_lock(&be_lun->io_lock);
931	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
932	mtx_unlock(&be_lun->io_lock);
933
934	if (beio->io_offset == -1) {
935		beio->io_len = 0;
936		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
937		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
938		end = buf + ptrlen->len / sizeof(*buf);
939		for (; buf < end; buf++) {
940			len = (uint64_t)scsi_4btoul(buf->length) *
941			    be_lun->blocksize;
942			beio->io_len += len;
943			ctl_be_block_unmap_dev_range(be_lun, beio,
944			    scsi_8btou64(buf->lba) * be_lun->blocksize, len,
945			    (end - buf < 2) ? TRUE : FALSE);
946		}
947	} else
948		ctl_be_block_unmap_dev_range(be_lun, beio,
949		    beio->io_offset, beio->io_len, TRUE);
950}
951
952static void
953ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
954			  struct ctl_be_block_io *beio)
955{
956	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
957	int i;
958	struct bio *bio;
959	struct ctl_be_block_devdata *dev_data;
960	off_t cur_offset;
961	int max_iosize;
962
963	DPRINTF("entered\n");
964
965	dev_data = &be_lun->backend.dev;
966
967	/*
968	 * We have to limit our I/O size to the maximum supported by the
969	 * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
970	 * set it properly, use DFLTPHYS.
971	 */
972	max_iosize = dev_data->cdev->si_iosize_max;
973	if (max_iosize < PAGE_SIZE)
974		max_iosize = DFLTPHYS;
975
976	cur_offset = beio->io_offset;
977	for (i = 0; i < beio->num_segs; i++) {
978		size_t cur_size;
979		uint8_t *cur_ptr;
980
981		cur_size = beio->sg_segs[i].len;
982		cur_ptr = beio->sg_segs[i].addr;
983
984		while (cur_size > 0) {
985			/* This can't fail, it's a blocking allocation. */
986			bio = g_alloc_bio();
987
988			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
989
990			bio->bio_cmd = beio->bio_cmd;
991			bio->bio_dev = dev_data->cdev;
992			bio->bio_caller1 = beio;
993			bio->bio_length = min(cur_size, max_iosize);
994			bio->bio_offset = cur_offset;
995			bio->bio_data = cur_ptr;
996			bio->bio_done = ctl_be_block_biodone;
997			bio->bio_pblkno = cur_offset / be_lun->blocksize;
998
999			cur_offset += bio->bio_length;
1000			cur_ptr += bio->bio_length;
1001			cur_size -= bio->bio_length;
1002
1003			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1004			beio->num_bios_sent++;
1005		}
1006	}
1007	binuptime(&beio->ds_t0);
1008	mtx_lock(&be_lun->io_lock);
1009	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1010	beio->send_complete = 1;
1011	mtx_unlock(&be_lun->io_lock);
1012
1013	/*
1014	 * Fire off all allocated requests!
1015	 */
1016	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1017		TAILQ_REMOVE(&queue, bio, bio_queue);
1018		(*dev_data->csw->d_strategy)(bio);
1019	}
1020}
1021
1022static uint64_t
1023ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1024{
1025	struct ctl_be_block_devdata	*dev_data = &be_lun->backend.dev;
1026	struct diocgattr_arg	arg;
1027	int			error;
1028
1029	if (dev_data->csw == NULL || dev_data->csw->d_ioctl == NULL)
1030		return (UINT64_MAX);
1031	strlcpy(arg.name, attrname, sizeof(arg.name));
1032	arg.len = sizeof(arg.value.off);
1033	error = dev_data->csw->d_ioctl(dev_data->cdev,
1034	    DIOCGATTR, (caddr_t)&arg, FREAD, curthread);
1035	if (error != 0)
1036		return (UINT64_MAX);
1037	return (arg.value.off);
1038}
1039
1040static void
1041ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1042{
1043	union ctl_io *io;
1044
1045	io = beio->io;
1046	ctl_free_beio(beio);
1047	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1048	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1049	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1050		ctl_config_write_done(io);
1051		return;
1052	}
1053
1054	ctl_be_block_config_write(io);
1055}
1056
1057static void
1058ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1059			    union ctl_io *io)
1060{
1061	struct ctl_be_block_io *beio;
1062	struct ctl_be_block_softc *softc;
1063	struct ctl_lba_len_flags *lbalen;
1064	uint64_t len_left, lba;
1065	int i, seglen;
1066	uint8_t *buf, *end;
1067
1068	DPRINTF("entered\n");
1069
1070	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1071	softc = be_lun->softc;
1072	lbalen = ARGS(beio->io);
1073
1074	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1075	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1076		ctl_free_beio(beio);
1077		ctl_set_invalid_field(&io->scsiio,
1078				      /*sks_valid*/ 1,
1079				      /*command*/ 1,
1080				      /*field*/ 1,
1081				      /*bit_valid*/ 0,
1082				      /*bit*/ 0);
1083		ctl_config_write_done(io);
1084		return;
1085	}
1086
1087	switch (io->scsiio.tag_type) {
1088	case CTL_TAG_ORDERED:
1089		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1090		break;
1091	case CTL_TAG_HEAD_OF_QUEUE:
1092		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1093		break;
1094	case CTL_TAG_UNTAGGED:
1095	case CTL_TAG_SIMPLE:
1096	case CTL_TAG_ACA:
1097	default:
1098		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1099		break;
1100	}
1101
1102	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1103		beio->io_offset = lbalen->lba * be_lun->blocksize;
1104		beio->io_len = (uint64_t)lbalen->len * be_lun->blocksize;
1105		beio->bio_cmd = BIO_DELETE;
1106		beio->ds_trans_type = DEVSTAT_FREE;
1107
1108		be_lun->unmap(be_lun, beio);
1109		return;
1110	}
1111
1112	beio->bio_cmd = BIO_WRITE;
1113	beio->ds_trans_type = DEVSTAT_WRITE;
1114
1115	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1116	       (uintmax_t)lbalen->lba, lbalen->len);
1117
1118	len_left = (uint64_t)lbalen->len * be_lun->blocksize;
1119	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1120
1121		/*
1122		 * Setup the S/G entry for this chunk.
1123		 */
1124		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1125		seglen -= seglen % be_lun->blocksize;
1126		beio->sg_segs[i].len = seglen;
1127		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1128
1129		DPRINTF("segment %d addr %p len %zd\n", i,
1130			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1131
1132		beio->num_segs++;
1133		len_left -= seglen;
1134
1135		buf = beio->sg_segs[i].addr;
1136		end = buf + seglen;
1137		for (; buf < end; buf += be_lun->blocksize) {
1138			memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize);
1139			if (lbalen->flags & SWS_LBDATA)
1140				scsi_ulto4b(lbalen->lba + lba, buf);
1141			lba++;
1142		}
1143	}
1144
1145	beio->io_offset = lbalen->lba * be_lun->blocksize;
1146	beio->io_len = lba * be_lun->blocksize;
1147
1148	/* We can not do all in one run. Correct and schedule rerun. */
1149	if (len_left > 0) {
1150		lbalen->lba += lba;
1151		lbalen->len -= lba;
1152		beio->beio_cont = ctl_be_block_cw_done_ws;
1153	}
1154
1155	be_lun->dispatch(be_lun, beio);
1156}
1157
1158static void
1159ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1160			    union ctl_io *io)
1161{
1162	struct ctl_be_block_io *beio;
1163	struct ctl_be_block_softc *softc;
1164	struct ctl_ptr_len_flags *ptrlen;
1165
1166	DPRINTF("entered\n");
1167
1168	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1169	softc = be_lun->softc;
1170	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1171
1172	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1173		ctl_free_beio(beio);
1174		ctl_set_invalid_field(&io->scsiio,
1175				      /*sks_valid*/ 0,
1176				      /*command*/ 1,
1177				      /*field*/ 0,
1178				      /*bit_valid*/ 0,
1179				      /*bit*/ 0);
1180		ctl_config_write_done(io);
1181		return;
1182	}
1183
1184	switch (io->scsiio.tag_type) {
1185	case CTL_TAG_ORDERED:
1186		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1187		break;
1188	case CTL_TAG_HEAD_OF_QUEUE:
1189		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1190		break;
1191	case CTL_TAG_UNTAGGED:
1192	case CTL_TAG_SIMPLE:
1193	case CTL_TAG_ACA:
1194	default:
1195		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1196		break;
1197	}
1198
1199	beio->io_len = 0;
1200	beio->io_offset = -1;
1201
1202	beio->bio_cmd = BIO_DELETE;
1203	beio->ds_trans_type = DEVSTAT_FREE;
1204
1205	DPRINTF("UNMAP\n");
1206
1207	be_lun->unmap(be_lun, beio);
1208}
1209
1210static void
1211ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1212{
1213	union ctl_io *io;
1214
1215	io = beio->io;
1216	ctl_free_beio(beio);
1217	ctl_config_write_done(io);
1218}
1219
1220static void
1221ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1222			 union ctl_io *io)
1223{
1224	struct ctl_be_block_io *beio;
1225	struct ctl_be_block_softc *softc;
1226
1227	DPRINTF("entered\n");
1228
1229	softc = be_lun->softc;
1230	beio = ctl_alloc_beio(softc);
1231	beio->io = io;
1232	beio->lun = be_lun;
1233	beio->beio_cont = ctl_be_block_cw_done;
1234	PRIV(io)->ptr = (void *)beio;
1235
1236	switch (io->scsiio.cdb[0]) {
1237	case SYNCHRONIZE_CACHE:
1238	case SYNCHRONIZE_CACHE_16:
1239		beio->bio_cmd = BIO_FLUSH;
1240		beio->ds_trans_type = DEVSTAT_NO_DATA;
1241		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1242		beio->io_len = 0;
1243		be_lun->lun_flush(be_lun, beio);
1244		break;
1245	case WRITE_SAME_10:
1246	case WRITE_SAME_16:
1247		ctl_be_block_cw_dispatch_ws(be_lun, io);
1248		break;
1249	case UNMAP:
1250		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1251		break;
1252	default:
1253		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1254		break;
1255	}
1256}
1257
1258SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
1259SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
1260SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
1261SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
1262
1263static void
1264ctl_be_block_next(struct ctl_be_block_io *beio)
1265{
1266	struct ctl_be_block_lun *be_lun;
1267	union ctl_io *io;
1268
1269	io = beio->io;
1270	be_lun = beio->lun;
1271	ctl_free_beio(beio);
1272	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1273	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1274	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1275		ctl_data_submit_done(io);
1276		return;
1277	}
1278
1279	io->io_hdr.status &= ~CTL_STATUS_MASK;
1280	io->io_hdr.status |= CTL_STATUS_NONE;
1281
1282	mtx_lock(&be_lun->queue_lock);
1283	/*
1284	 * XXX KDM make sure that links is okay to use at this point.
1285	 * Otherwise, we either need to add another field to ctl_io_hdr,
1286	 * or deal with resource allocation here.
1287	 */
1288	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1289	mtx_unlock(&be_lun->queue_lock);
1290
1291	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1292}
1293
1294static void
1295ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1296			   union ctl_io *io)
1297{
1298	struct ctl_be_block_io *beio;
1299	struct ctl_be_block_softc *softc;
1300	struct ctl_lba_len_flags *lbalen;
1301	struct ctl_ptr_len_flags *bptrlen;
1302	uint64_t len_left, lbas;
1303	int i;
1304
1305	softc = be_lun->softc;
1306
1307	DPRINTF("entered\n");
1308
1309	lbalen = ARGS(io);
1310	if (lbalen->flags & CTL_LLF_WRITE) {
1311		SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
1312	} else {
1313		SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
1314	}
1315
1316	beio = ctl_alloc_beio(softc);
1317	beio->io = io;
1318	beio->lun = be_lun;
1319	bptrlen = PRIV(io);
1320	bptrlen->ptr = (void *)beio;
1321
1322	switch (io->scsiio.tag_type) {
1323	case CTL_TAG_ORDERED:
1324		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1325		break;
1326	case CTL_TAG_HEAD_OF_QUEUE:
1327		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1328		break;
1329	case CTL_TAG_UNTAGGED:
1330	case CTL_TAG_SIMPLE:
1331	case CTL_TAG_ACA:
1332	default:
1333		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1334		break;
1335	}
1336
1337	if (lbalen->flags & CTL_LLF_WRITE) {
1338		beio->bio_cmd = BIO_WRITE;
1339		beio->ds_trans_type = DEVSTAT_WRITE;
1340	} else {
1341		beio->bio_cmd = BIO_READ;
1342		beio->ds_trans_type = DEVSTAT_READ;
1343	}
1344
1345	DPRINTF("%s at LBA %jx len %u @%ju\n",
1346	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1347	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1348	if (lbalen->flags & CTL_LLF_COMPARE)
1349		lbas = CTLBLK_HALF_IO_SIZE;
1350	else
1351		lbas = CTLBLK_MAX_IO_SIZE;
1352	lbas = MIN(lbalen->len - bptrlen->len, lbas / be_lun->blocksize);
1353	beio->io_offset = (lbalen->lba + bptrlen->len) * be_lun->blocksize;
1354	beio->io_len = lbas * be_lun->blocksize;
1355	bptrlen->len += lbas;
1356
1357	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1358		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1359		    i, CTLBLK_MAX_SEGS));
1360
1361		/*
1362		 * Setup the S/G entry for this chunk.
1363		 */
1364		beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
1365		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
1366
1367		DPRINTF("segment %d addr %p len %zd\n", i,
1368			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1369
1370		/* Set up second segment for compare operation. */
1371		if (lbalen->flags & CTL_LLF_COMPARE) {
1372			beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
1373			    beio->sg_segs[i].len;
1374			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
1375			    uma_zalloc(be_lun->lun_zone, M_WAITOK);
1376		}
1377
1378		beio->num_segs++;
1379		len_left -= beio->sg_segs[i].len;
1380	}
1381	if (bptrlen->len < lbalen->len)
1382		beio->beio_cont = ctl_be_block_next;
1383	io->scsiio.be_move_done = ctl_be_block_move_done;
1384	/* For compare we have separate S/G lists for read and datamove. */
1385	if (lbalen->flags & CTL_LLF_COMPARE)
1386		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1387	else
1388		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1389	io->scsiio.kern_data_len = beio->io_len;
1390	io->scsiio.kern_data_resid = 0;
1391	io->scsiio.kern_sg_entries = beio->num_segs;
1392	io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST;
1393
1394	/*
1395	 * For the read case, we need to read the data into our buffers and
1396	 * then we can send it back to the user.  For the write case, we
1397	 * need to get the data from the user first.
1398	 */
1399	if (beio->bio_cmd == BIO_READ) {
1400		SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
1401		be_lun->dispatch(be_lun, beio);
1402	} else {
1403		SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
1404#ifdef CTL_TIME_IO
1405        	getbintime(&io->io_hdr.dma_start_bt);
1406#endif
1407		ctl_datamove(io);
1408	}
1409}
1410
1411static void
1412ctl_be_block_worker(void *context, int pending)
1413{
1414	struct ctl_be_block_lun *be_lun;
1415	struct ctl_be_block_softc *softc;
1416	union ctl_io *io;
1417
1418	be_lun = (struct ctl_be_block_lun *)context;
1419	softc = be_lun->softc;
1420
1421	DPRINTF("entered\n");
1422
1423	mtx_lock(&be_lun->queue_lock);
1424	for (;;) {
1425		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1426		if (io != NULL) {
1427			struct ctl_be_block_io *beio;
1428
1429			DPRINTF("datamove queue\n");
1430
1431			STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
1432				      ctl_io_hdr, links);
1433
1434			mtx_unlock(&be_lun->queue_lock);
1435
1436			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1437
1438			be_lun->dispatch(be_lun, beio);
1439
1440			mtx_lock(&be_lun->queue_lock);
1441			continue;
1442		}
1443		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1444		if (io != NULL) {
1445
1446			DPRINTF("config write queue\n");
1447
1448			STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
1449				      ctl_io_hdr, links);
1450
1451			mtx_unlock(&be_lun->queue_lock);
1452
1453			ctl_be_block_cw_dispatch(be_lun, io);
1454
1455			mtx_lock(&be_lun->queue_lock);
1456			continue;
1457		}
1458		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1459		if (io != NULL) {
1460			DPRINTF("input queue\n");
1461
1462			STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
1463				      ctl_io_hdr, links);
1464			mtx_unlock(&be_lun->queue_lock);
1465
1466			/*
1467			 * We must drop the lock, since this routine and
1468			 * its children may sleep.
1469			 */
1470			ctl_be_block_dispatch(be_lun, io);
1471
1472			mtx_lock(&be_lun->queue_lock);
1473			continue;
1474		}
1475
1476		/*
1477		 * If we get here, there is no work left in the queues, so
1478		 * just break out and let the task queue go to sleep.
1479		 */
1480		break;
1481	}
1482	mtx_unlock(&be_lun->queue_lock);
1483}
1484
1485/*
1486 * Entry point from CTL to the backend for I/O.  We queue everything to a
1487 * work thread, so this just puts the I/O on a queue and wakes up the
1488 * thread.
1489 */
1490static int
1491ctl_be_block_submit(union ctl_io *io)
1492{
1493	struct ctl_be_block_lun *be_lun;
1494	struct ctl_be_lun *ctl_be_lun;
1495
1496	DPRINTF("entered\n");
1497
1498	ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
1499		CTL_PRIV_BACKEND_LUN].ptr;
1500	be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
1501
1502	/*
1503	 * Make sure we only get SCSI I/O.
1504	 */
1505	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
1506		"%#x) encountered", io->io_hdr.io_type));
1507
1508	PRIV(io)->len = 0;
1509
1510	mtx_lock(&be_lun->queue_lock);
1511	/*
1512	 * XXX KDM make sure that links is okay to use at this point.
1513	 * Otherwise, we either need to add another field to ctl_io_hdr,
1514	 * or deal with resource allocation here.
1515	 */
1516	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1517	mtx_unlock(&be_lun->queue_lock);
1518	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1519
1520	return (CTL_RETVAL_COMPLETE);
1521}
1522
1523static int
1524ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1525			int flag, struct thread *td)
1526{
1527	struct ctl_be_block_softc *softc;
1528	int error;
1529
1530	softc = &backend_block_softc;
1531
1532	error = 0;
1533
1534	switch (cmd) {
1535	case CTL_LUN_REQ: {
1536		struct ctl_lun_req *lun_req;
1537
1538		lun_req = (struct ctl_lun_req *)addr;
1539
1540		switch (lun_req->reqtype) {
1541		case CTL_LUNREQ_CREATE:
1542			error = ctl_be_block_create(softc, lun_req);
1543			break;
1544		case CTL_LUNREQ_RM:
1545			error = ctl_be_block_rm(softc, lun_req);
1546			break;
1547		case CTL_LUNREQ_MODIFY:
1548			error = ctl_be_block_modify(softc, lun_req);
1549			break;
1550		default:
1551			lun_req->status = CTL_LUN_ERROR;
1552			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1553				 "invalid LUN request type %d",
1554				 lun_req->reqtype);
1555			break;
1556		}
1557		break;
1558	}
1559	default:
1560		error = ENOTTY;
1561		break;
1562	}
1563
1564	return (error);
1565}
1566
1567static int
1568ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1569{
1570	struct ctl_be_block_filedata *file_data;
1571	struct ctl_lun_create_params *params;
1572	struct vattr		      vattr;
1573	off_t			      pss;
1574	int			      error;
1575
1576	error = 0;
1577	file_data = &be_lun->backend.file;
1578	params = &be_lun->params;
1579
1580	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1581	be_lun->dispatch = ctl_be_block_dispatch_file;
1582	be_lun->lun_flush = ctl_be_block_flush_file;
1583
1584	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1585	if (error != 0) {
1586		snprintf(req->error_str, sizeof(req->error_str),
1587			 "error calling VOP_GETATTR() for file %s",
1588			 be_lun->dev_path);
1589		return (error);
1590	}
1591
1592	/*
1593	 * Verify that we have the ability to upgrade to exclusive
1594	 * access on this file so we can trap errors at open instead
1595	 * of reporting them during first access.
1596	 */
1597	if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) {
1598		vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY);
1599		if (be_lun->vn->v_iflag & VI_DOOMED) {
1600			error = EBADF;
1601			snprintf(req->error_str, sizeof(req->error_str),
1602				 "error locking file %s", be_lun->dev_path);
1603			return (error);
1604		}
1605	}
1606
1607
1608	file_data->cred = crhold(curthread->td_ucred);
1609	if (params->lun_size_bytes != 0)
1610		be_lun->size_bytes = params->lun_size_bytes;
1611	else
1612		be_lun->size_bytes = vattr.va_size;
1613	/*
1614	 * We set the multi thread flag for file operations because all
1615	 * filesystems (in theory) are capable of allowing multiple readers
1616	 * of a file at once.  So we want to get the maximum possible
1617	 * concurrency.
1618	 */
1619	be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD;
1620
1621	/*
1622	 * For files we can use any logical block size.  Prefer 512 bytes
1623	 * for compatibility reasons.  If file's vattr.va_blocksize
1624	 * (preferred I/O block size) is bigger and multiple to chosen
1625	 * logical block size -- report it as physical block size.
1626	 */
1627	if (params->blocksize_bytes != 0)
1628		be_lun->blocksize = params->blocksize_bytes;
1629	else
1630		be_lun->blocksize = 512;
1631	pss = vattr.va_blocksize / be_lun->blocksize;
1632	if ((pss > 0) && (pss * be_lun->blocksize == vattr.va_blocksize) &&
1633	    ((pss & (pss - 1)) == 0)) {
1634		be_lun->pblockexp = fls(pss) - 1;
1635		be_lun->pblockoff = 0;
1636	}
1637
1638	/*
1639	 * Sanity check.  The media size has to be at least one
1640	 * sector long.
1641	 */
1642	if (be_lun->size_bytes < be_lun->blocksize) {
1643		error = EINVAL;
1644		snprintf(req->error_str, sizeof(req->error_str),
1645			 "file %s size %ju < block size %u", be_lun->dev_path,
1646			 (uintmax_t)be_lun->size_bytes, be_lun->blocksize);
1647	}
1648	return (error);
1649}
1650
1651static int
1652ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1653{
1654	struct ctl_lun_create_params *params;
1655	struct vattr		      vattr;
1656	struct cdev		     *dev;
1657	struct cdevsw		     *devsw;
1658	int			      error;
1659	off_t			      ps, pss, po, pos;
1660
1661	params = &be_lun->params;
1662
1663	be_lun->dev_type = CTL_BE_BLOCK_DEV;
1664	be_lun->backend.dev.cdev = be_lun->vn->v_rdev;
1665	be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev,
1666					     &be_lun->backend.dev.dev_ref);
1667	if (be_lun->backend.dev.csw == NULL)
1668		panic("Unable to retrieve device switch");
1669	if (strcmp(be_lun->backend.dev.csw->d_name, "zvol") == 0)
1670		be_lun->dispatch = ctl_be_block_dispatch_zvol;
1671	else
1672		be_lun->dispatch = ctl_be_block_dispatch_dev;
1673	be_lun->lun_flush = ctl_be_block_flush_dev;
1674	be_lun->unmap = ctl_be_block_unmap_dev;
1675	be_lun->getattr = ctl_be_block_getattr_dev;
1676
1677	error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED);
1678	if (error) {
1679		snprintf(req->error_str, sizeof(req->error_str),
1680			 "error getting vnode attributes for device %s",
1681			 be_lun->dev_path);
1682		return (error);
1683	}
1684
1685	dev = be_lun->vn->v_rdev;
1686	devsw = dev->si_devsw;
1687	if (!devsw->d_ioctl) {
1688		snprintf(req->error_str, sizeof(req->error_str),
1689			 "no d_ioctl for device %s!",
1690			 be_lun->dev_path);
1691		return (ENODEV);
1692	}
1693
1694	error = devsw->d_ioctl(dev, DIOCGSECTORSIZE,
1695			       (caddr_t)&be_lun->blocksize, FREAD,
1696			       curthread);
1697	if (error) {
1698		snprintf(req->error_str, sizeof(req->error_str),
1699			 "error %d returned for DIOCGSECTORSIZE ioctl "
1700			 "on %s!", error, be_lun->dev_path);
1701		return (error);
1702	}
1703
1704	/*
1705	 * If the user has asked for a blocksize that is greater than the
1706	 * backing device's blocksize, we can do it only if the blocksize
1707	 * the user is asking for is an even multiple of the underlying
1708	 * device's blocksize.
1709	 */
1710	if ((params->blocksize_bytes != 0)
1711	 && (params->blocksize_bytes > be_lun->blocksize)) {
1712		uint32_t bs_multiple, tmp_blocksize;
1713
1714		bs_multiple = params->blocksize_bytes / be_lun->blocksize;
1715
1716		tmp_blocksize = bs_multiple * be_lun->blocksize;
1717
1718		if (tmp_blocksize == params->blocksize_bytes) {
1719			be_lun->blocksize = params->blocksize_bytes;
1720		} else {
1721			snprintf(req->error_str, sizeof(req->error_str),
1722				 "requested blocksize %u is not an even "
1723				 "multiple of backing device blocksize %u",
1724				 params->blocksize_bytes,
1725				 be_lun->blocksize);
1726			return (EINVAL);
1727
1728		}
1729	} else if ((params->blocksize_bytes != 0)
1730		&& (params->blocksize_bytes != be_lun->blocksize)) {
1731		snprintf(req->error_str, sizeof(req->error_str),
1732			 "requested blocksize %u < backing device "
1733			 "blocksize %u", params->blocksize_bytes,
1734			 be_lun->blocksize);
1735		return (EINVAL);
1736	}
1737
1738	error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
1739			       (caddr_t)&be_lun->size_bytes, FREAD,
1740			       curthread);
1741	if (error) {
1742		snprintf(req->error_str, sizeof(req->error_str),
1743			 "error %d returned for DIOCGMEDIASIZE "
1744			 " ioctl on %s!", error,
1745			 be_lun->dev_path);
1746		return (error);
1747	}
1748
1749	if (params->lun_size_bytes != 0) {
1750		if (params->lun_size_bytes > be_lun->size_bytes) {
1751			snprintf(req->error_str, sizeof(req->error_str),
1752				 "requested LUN size %ju > backing device "
1753				 "size %ju",
1754				 (uintmax_t)params->lun_size_bytes,
1755				 (uintmax_t)be_lun->size_bytes);
1756			return (EINVAL);
1757		}
1758
1759		be_lun->size_bytes = params->lun_size_bytes;
1760	}
1761
1762	error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE,
1763			       (caddr_t)&ps, FREAD, curthread);
1764	if (error)
1765		ps = po = 0;
1766	else {
1767		error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET,
1768				       (caddr_t)&po, FREAD, curthread);
1769		if (error)
1770			po = 0;
1771	}
1772	pss = ps / be_lun->blocksize;
1773	pos = po / be_lun->blocksize;
1774	if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) &&
1775	    ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) {
1776		be_lun->pblockexp = fls(pss) - 1;
1777		be_lun->pblockoff = (pss - pos) % pss;
1778	}
1779
1780	return (0);
1781}
1782
1783static int
1784ctl_be_block_close(struct ctl_be_block_lun *be_lun)
1785{
1786	DROP_GIANT();
1787	if (be_lun->vn) {
1788		int flags = FREAD | FWRITE;
1789
1790		switch (be_lun->dev_type) {
1791		case CTL_BE_BLOCK_DEV:
1792			if (be_lun->backend.dev.csw) {
1793				dev_relthread(be_lun->backend.dev.cdev,
1794					      be_lun->backend.dev.dev_ref);
1795				be_lun->backend.dev.csw  = NULL;
1796				be_lun->backend.dev.cdev = NULL;
1797			}
1798			break;
1799		case CTL_BE_BLOCK_FILE:
1800			break;
1801		case CTL_BE_BLOCK_NONE:
1802			break;
1803		default:
1804			panic("Unexpected backend type.");
1805			break;
1806		}
1807
1808		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
1809		be_lun->vn = NULL;
1810
1811		switch (be_lun->dev_type) {
1812		case CTL_BE_BLOCK_DEV:
1813			break;
1814		case CTL_BE_BLOCK_FILE:
1815			if (be_lun->backend.file.cred != NULL) {
1816				crfree(be_lun->backend.file.cred);
1817				be_lun->backend.file.cred = NULL;
1818			}
1819			break;
1820		case CTL_BE_BLOCK_NONE:
1821			break;
1822		default:
1823			panic("Unexpected backend type.");
1824			break;
1825		}
1826		be_lun->dev_type = CTL_BE_BLOCK_NONE;
1827	}
1828	PICKUP_GIANT();
1829
1830	return (0);
1831}
1832
1833static int
1834ctl_be_block_open(struct ctl_be_block_softc *softc,
1835		       struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1836{
1837	struct nameidata nd;
1838	int		 flags;
1839	int		 error;
1840
1841	/*
1842	 * XXX KDM allow a read-only option?
1843	 */
1844	flags = FREAD | FWRITE;
1845	error = 0;
1846
1847	if (rootvnode == NULL) {
1848		snprintf(req->error_str, sizeof(req->error_str),
1849			 "Root filesystem is not mounted");
1850		return (1);
1851	}
1852
1853	if (!curthread->td_proc->p_fd->fd_cdir) {
1854		curthread->td_proc->p_fd->fd_cdir = rootvnode;
1855		VREF(rootvnode);
1856	}
1857	if (!curthread->td_proc->p_fd->fd_rdir) {
1858		curthread->td_proc->p_fd->fd_rdir = rootvnode;
1859		VREF(rootvnode);
1860	}
1861	if (!curthread->td_proc->p_fd->fd_jdir) {
1862		curthread->td_proc->p_fd->fd_jdir = rootvnode;
1863		VREF(rootvnode);
1864	}
1865
1866 again:
1867	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
1868	error = vn_open(&nd, &flags, 0, NULL);
1869	if (error) {
1870		/*
1871		 * This is the only reasonable guess we can make as far as
1872		 * path if the user doesn't give us a fully qualified path.
1873		 * If they want to specify a file, they need to specify the
1874		 * full path.
1875		 */
1876		if (be_lun->dev_path[0] != '/') {
1877			char *dev_path = "/dev/";
1878			char *dev_name;
1879
1880			/* Try adding device path at beginning of name */
1881			dev_name = malloc(strlen(be_lun->dev_path)
1882					+ strlen(dev_path) + 1,
1883					  M_CTLBLK, M_WAITOK);
1884			if (dev_name) {
1885				sprintf(dev_name, "%s%s", dev_path,
1886					be_lun->dev_path);
1887				free(be_lun->dev_path, M_CTLBLK);
1888				be_lun->dev_path = dev_name;
1889				goto again;
1890			}
1891		}
1892		snprintf(req->error_str, sizeof(req->error_str),
1893		    "error opening %s: %d", be_lun->dev_path, error);
1894		return (error);
1895	}
1896
1897	NDFREE(&nd, NDF_ONLY_PNBUF);
1898
1899	be_lun->vn = nd.ni_vp;
1900
1901	/* We only support disks and files. */
1902	if (vn_isdisk(be_lun->vn, &error)) {
1903		error = ctl_be_block_open_dev(be_lun, req);
1904	} else if (be_lun->vn->v_type == VREG) {
1905		error = ctl_be_block_open_file(be_lun, req);
1906	} else {
1907		error = EINVAL;
1908		snprintf(req->error_str, sizeof(req->error_str),
1909			 "%s is not a disk or plain file", be_lun->dev_path);
1910	}
1911	VOP_UNLOCK(be_lun->vn, 0);
1912
1913	if (error != 0) {
1914		ctl_be_block_close(be_lun);
1915		return (error);
1916	}
1917
1918	be_lun->blocksize_shift = fls(be_lun->blocksize) - 1;
1919	be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift;
1920
1921	return (0);
1922}
1923
1924static int
1925ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
1926{
1927	struct ctl_be_block_lun *be_lun;
1928	struct ctl_lun_create_params *params;
1929	char num_thread_str[16];
1930	char tmpstr[32];
1931	char *value;
1932	int retval, num_threads, unmap;
1933	int tmp_num_threads;
1934
1935	params = &req->reqdata.create;
1936	retval = 0;
1937	req->status = CTL_LUN_OK;
1938
1939	num_threads = cbb_num_threads;
1940
1941	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
1942
1943	be_lun->params = req->reqdata.create;
1944	be_lun->softc = softc;
1945	STAILQ_INIT(&be_lun->input_queue);
1946	STAILQ_INIT(&be_lun->config_write_queue);
1947	STAILQ_INIT(&be_lun->datamove_queue);
1948	sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
1949	mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
1950	mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
1951	ctl_init_opts(&be_lun->ctl_be_lun.options,
1952	    req->num_be_args, req->kern_be_args);
1953
1954	be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
1955	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
1956
1957	if (be_lun->lun_zone == NULL) {
1958		snprintf(req->error_str, sizeof(req->error_str),
1959			 "error allocating UMA zone");
1960		goto bailout_error;
1961	}
1962
1963	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
1964		be_lun->ctl_be_lun.lun_type = params->device_type;
1965	else
1966		be_lun->ctl_be_lun.lun_type = T_DIRECT;
1967
1968	if (be_lun->ctl_be_lun.lun_type == T_DIRECT) {
1969		value = ctl_get_opt(&be_lun->ctl_be_lun.options, "file");
1970		if (value == NULL) {
1971			snprintf(req->error_str, sizeof(req->error_str),
1972				 "no file argument specified");
1973			goto bailout_error;
1974		}
1975		be_lun->dev_path = strdup(value, M_CTLBLK);
1976		be_lun->blocksize = 512;
1977		be_lun->blocksize_shift = fls(be_lun->blocksize) - 1;
1978
1979		retval = ctl_be_block_open(softc, be_lun, req);
1980		if (retval != 0) {
1981			retval = 0;
1982			req->status = CTL_LUN_WARNING;
1983		}
1984	} else {
1985		/*
1986		 * For processor devices, we don't have any size.
1987		 */
1988		be_lun->blocksize = 0;
1989		be_lun->pblockexp = 0;
1990		be_lun->pblockoff = 0;
1991		be_lun->size_blocks = 0;
1992		be_lun->size_bytes = 0;
1993		be_lun->ctl_be_lun.maxlba = 0;
1994
1995		/*
1996		 * Default to just 1 thread for processor devices.
1997		 */
1998		num_threads = 1;
1999	}
2000
2001	/*
2002	 * XXX This searching loop might be refactored to be combined with
2003	 * the loop above,
2004	 */
2005	value = ctl_get_opt(&be_lun->ctl_be_lun.options, "num_threads");
2006	if (value != NULL) {
2007		tmp_num_threads = strtol(value, NULL, 0);
2008
2009		/*
2010		 * We don't let the user specify less than one
2011		 * thread, but hope he's clueful enough not to
2012		 * specify 1000 threads.
2013		 */
2014		if (tmp_num_threads < 1) {
2015			snprintf(req->error_str, sizeof(req->error_str),
2016				 "invalid number of threads %s",
2017				 num_thread_str);
2018			goto bailout_error;
2019		}
2020		num_threads = tmp_num_threads;
2021	}
2022	unmap = (be_lun->dispatch == ctl_be_block_dispatch_zvol);
2023	value = ctl_get_opt(&be_lun->ctl_be_lun.options, "unmap");
2024	if (value != NULL)
2025		unmap = (strcmp(value, "on") == 0);
2026
2027	be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
2028	be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY;
2029	if (be_lun->vn == NULL)
2030		be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_OFFLINE;
2031	if (unmap)
2032		be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP;
2033	be_lun->ctl_be_lun.be_lun = be_lun;
2034	be_lun->ctl_be_lun.maxlba = (be_lun->size_blocks == 0) ?
2035	    0 : (be_lun->size_blocks - 1);
2036	be_lun->ctl_be_lun.blocksize = be_lun->blocksize;
2037	be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp;
2038	be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff;
2039	if (be_lun->dispatch == ctl_be_block_dispatch_zvol &&
2040	    be_lun->blocksize != 0)
2041		be_lun->ctl_be_lun.atomicblock = CTLBLK_MAX_IO_SIZE /
2042		    be_lun->blocksize;
2043	/* Tell the user the blocksize we ended up using */
2044	params->lun_size_bytes = be_lun->size_bytes;
2045	params->blocksize_bytes = be_lun->blocksize;
2046	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2047		be_lun->ctl_be_lun.req_lun_id = params->req_lun_id;
2048		be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ;
2049	} else
2050		be_lun->ctl_be_lun.req_lun_id = 0;
2051
2052	be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown;
2053	be_lun->ctl_be_lun.lun_config_status =
2054		ctl_be_block_lun_config_status;
2055	be_lun->ctl_be_lun.be = &ctl_be_block_driver;
2056
2057	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2058		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
2059			 softc->num_luns);
2060		strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr,
2061			ctl_min(sizeof(be_lun->ctl_be_lun.serial_num),
2062			sizeof(tmpstr)));
2063
2064		/* Tell the user what we used for a serial number */
2065		strncpy((char *)params->serial_num, tmpstr,
2066			ctl_min(sizeof(params->serial_num), sizeof(tmpstr)));
2067	} else {
2068		strncpy((char *)be_lun->ctl_be_lun.serial_num,
2069			params->serial_num,
2070			ctl_min(sizeof(be_lun->ctl_be_lun.serial_num),
2071			sizeof(params->serial_num)));
2072	}
2073	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2074		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
2075		strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr,
2076			ctl_min(sizeof(be_lun->ctl_be_lun.device_id),
2077			sizeof(tmpstr)));
2078
2079		/* Tell the user what we used for a device ID */
2080		strncpy((char *)params->device_id, tmpstr,
2081			ctl_min(sizeof(params->device_id), sizeof(tmpstr)));
2082	} else {
2083		strncpy((char *)be_lun->ctl_be_lun.device_id,
2084			params->device_id,
2085			ctl_min(sizeof(be_lun->ctl_be_lun.device_id),
2086				sizeof(params->device_id)));
2087	}
2088
2089	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2090
2091	be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
2092	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2093
2094	if (be_lun->io_taskqueue == NULL) {
2095		snprintf(req->error_str, sizeof(req->error_str),
2096			 "unable to create taskqueue");
2097		goto bailout_error;
2098	}
2099
2100	/*
2101	 * Note that we start the same number of threads by default for
2102	 * both the file case and the block device case.  For the file
2103	 * case, we need multiple threads to allow concurrency, because the
2104	 * vnode interface is designed to be a blocking interface.  For the
2105	 * block device case, ZFS zvols at least will block the caller's
2106	 * context in many instances, and so we need multiple threads to
2107	 * overcome that problem.  Other block devices don't need as many
2108	 * threads, but they shouldn't cause too many problems.
2109	 *
2110	 * If the user wants to just have a single thread for a block
2111	 * device, he can specify that when the LUN is created, or change
2112	 * the tunable/sysctl to alter the default number of threads.
2113	 */
2114	retval = taskqueue_start_threads(&be_lun->io_taskqueue,
2115					 /*num threads*/num_threads,
2116					 /*priority*/PWAIT,
2117					 /*thread name*/
2118					 "%s taskq", be_lun->lunname);
2119
2120	if (retval != 0)
2121		goto bailout_error;
2122
2123	be_lun->num_threads = num_threads;
2124
2125	mtx_lock(&softc->lock);
2126	softc->num_luns++;
2127	STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
2128
2129	mtx_unlock(&softc->lock);
2130
2131	retval = ctl_add_lun(&be_lun->ctl_be_lun);
2132	if (retval != 0) {
2133		mtx_lock(&softc->lock);
2134		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2135			      links);
2136		softc->num_luns--;
2137		mtx_unlock(&softc->lock);
2138		snprintf(req->error_str, sizeof(req->error_str),
2139			 "ctl_add_lun() returned error %d, see dmesg for "
2140			 "details", retval);
2141		retval = 0;
2142		goto bailout_error;
2143	}
2144
2145	mtx_lock(&softc->lock);
2146
2147	/*
2148	 * Tell the config_status routine that we're waiting so it won't
2149	 * clean up the LUN in the event of an error.
2150	 */
2151	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2152
2153	while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2154		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2155		if (retval == EINTR)
2156			break;
2157	}
2158	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2159
2160	if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
2161		snprintf(req->error_str, sizeof(req->error_str),
2162			 "LUN configuration error, see dmesg for details");
2163		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
2164			      links);
2165		softc->num_luns--;
2166		mtx_unlock(&softc->lock);
2167		goto bailout_error;
2168	} else {
2169		params->req_lun_id = be_lun->ctl_be_lun.lun_id;
2170	}
2171
2172	mtx_unlock(&softc->lock);
2173
2174	be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
2175					       be_lun->blocksize,
2176					       DEVSTAT_ALL_SUPPORTED,
2177					       be_lun->ctl_be_lun.lun_type
2178					       | DEVSTAT_TYPE_IF_OTHER,
2179					       DEVSTAT_PRIORITY_OTHER);
2180
2181	return (retval);
2182
2183bailout_error:
2184	req->status = CTL_LUN_ERROR;
2185
2186	if (be_lun->io_taskqueue != NULL)
2187		taskqueue_free(be_lun->io_taskqueue);
2188	ctl_be_block_close(be_lun);
2189	if (be_lun->dev_path != NULL)
2190		free(be_lun->dev_path, M_CTLBLK);
2191	if (be_lun->lun_zone != NULL)
2192		uma_zdestroy(be_lun->lun_zone);
2193	ctl_free_opts(&be_lun->ctl_be_lun.options);
2194	mtx_destroy(&be_lun->queue_lock);
2195	mtx_destroy(&be_lun->io_lock);
2196	free(be_lun, M_CTLBLK);
2197
2198	return (retval);
2199}
2200
2201static int
2202ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2203{
2204	struct ctl_lun_rm_params *params;
2205	struct ctl_be_block_lun *be_lun;
2206	int retval;
2207
2208	params = &req->reqdata.rm;
2209
2210	mtx_lock(&softc->lock);
2211
2212	be_lun = NULL;
2213
2214	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2215		if (be_lun->ctl_be_lun.lun_id == params->lun_id)
2216			break;
2217	}
2218	mtx_unlock(&softc->lock);
2219
2220	if (be_lun == NULL) {
2221		snprintf(req->error_str, sizeof(req->error_str),
2222			 "LUN %u is not managed by the block backend",
2223			 params->lun_id);
2224		goto bailout_error;
2225	}
2226
2227	retval = ctl_disable_lun(&be_lun->ctl_be_lun);
2228
2229	if (retval != 0) {
2230		snprintf(req->error_str, sizeof(req->error_str),
2231			 "error %d returned from ctl_disable_lun() for "
2232			 "LUN %d", retval, params->lun_id);
2233		goto bailout_error;
2234
2235	}
2236
2237	retval = ctl_invalidate_lun(&be_lun->ctl_be_lun);
2238	if (retval != 0) {
2239		snprintf(req->error_str, sizeof(req->error_str),
2240			 "error %d returned from ctl_invalidate_lun() for "
2241			 "LUN %d", retval, params->lun_id);
2242		goto bailout_error;
2243	}
2244
2245	mtx_lock(&softc->lock);
2246
2247	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2248
2249	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2250                retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
2251                if (retval == EINTR)
2252                        break;
2253        }
2254
2255	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2256
2257	if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2258		snprintf(req->error_str, sizeof(req->error_str),
2259			 "interrupted waiting for LUN to be freed");
2260		mtx_unlock(&softc->lock);
2261		goto bailout_error;
2262	}
2263
2264	STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
2265
2266	softc->num_luns--;
2267	mtx_unlock(&softc->lock);
2268
2269	taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task);
2270
2271	taskqueue_free(be_lun->io_taskqueue);
2272
2273	ctl_be_block_close(be_lun);
2274
2275	if (be_lun->disk_stats != NULL)
2276		devstat_remove_entry(be_lun->disk_stats);
2277
2278	uma_zdestroy(be_lun->lun_zone);
2279
2280	ctl_free_opts(&be_lun->ctl_be_lun.options);
2281	free(be_lun->dev_path, M_CTLBLK);
2282	mtx_destroy(&be_lun->queue_lock);
2283	mtx_destroy(&be_lun->io_lock);
2284	free(be_lun, M_CTLBLK);
2285
2286	req->status = CTL_LUN_OK;
2287
2288	return (0);
2289
2290bailout_error:
2291
2292	req->status = CTL_LUN_ERROR;
2293
2294	return (0);
2295}
2296
2297static int
2298ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun,
2299			 struct ctl_lun_req *req)
2300{
2301	struct vattr vattr;
2302	int error;
2303	struct ctl_lun_create_params *params = &be_lun->params;
2304
2305	if (params->lun_size_bytes != 0) {
2306		be_lun->size_bytes = params->lun_size_bytes;
2307	} else  {
2308		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2309		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
2310		VOP_UNLOCK(be_lun->vn, 0);
2311		if (error != 0) {
2312			snprintf(req->error_str, sizeof(req->error_str),
2313				 "error calling VOP_GETATTR() for file %s",
2314				 be_lun->dev_path);
2315			return (error);
2316		}
2317
2318		be_lun->size_bytes = vattr.va_size;
2319	}
2320
2321	return (0);
2322}
2323
2324static int
2325ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun,
2326			struct ctl_lun_req *req)
2327{
2328	struct ctl_be_block_devdata *dev_data;
2329	int error;
2330	struct ctl_lun_create_params *params = &be_lun->params;
2331	uint64_t size_bytes;
2332
2333	dev_data = &be_lun->backend.dev;
2334	if (!dev_data->csw->d_ioctl) {
2335		snprintf(req->error_str, sizeof(req->error_str),
2336			 "no d_ioctl for device %s!", be_lun->dev_path);
2337		return (ENODEV);
2338	}
2339
2340	error = dev_data->csw->d_ioctl(dev_data->cdev, DIOCGMEDIASIZE,
2341			       (caddr_t)&size_bytes, FREAD,
2342			       curthread);
2343	if (error) {
2344		snprintf(req->error_str, sizeof(req->error_str),
2345			 "error %d returned for DIOCGMEDIASIZE ioctl "
2346			 "on %s!", error, be_lun->dev_path);
2347		return (error);
2348	}
2349
2350	if (params->lun_size_bytes != 0) {
2351		if (params->lun_size_bytes > size_bytes) {
2352			snprintf(req->error_str, sizeof(req->error_str),
2353				 "requested LUN size %ju > backing device "
2354				 "size %ju",
2355				 (uintmax_t)params->lun_size_bytes,
2356				 (uintmax_t)size_bytes);
2357			return (EINVAL);
2358		}
2359
2360		be_lun->size_bytes = params->lun_size_bytes;
2361	} else {
2362		be_lun->size_bytes = size_bytes;
2363	}
2364
2365	return (0);
2366}
2367
2368static int
2369ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2370{
2371	struct ctl_lun_modify_params *params;
2372	struct ctl_be_block_lun *be_lun;
2373	uint64_t oldsize;
2374	int error;
2375
2376	params = &req->reqdata.modify;
2377
2378	mtx_lock(&softc->lock);
2379	be_lun = NULL;
2380	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
2381		if (be_lun->ctl_be_lun.lun_id == params->lun_id)
2382			break;
2383	}
2384	mtx_unlock(&softc->lock);
2385
2386	if (be_lun == NULL) {
2387		snprintf(req->error_str, sizeof(req->error_str),
2388			 "LUN %u is not managed by the block backend",
2389			 params->lun_id);
2390		goto bailout_error;
2391	}
2392
2393	be_lun->params.lun_size_bytes = params->lun_size_bytes;
2394
2395	oldsize = be_lun->size_blocks;
2396	if (be_lun->vn == NULL)
2397		error = ctl_be_block_open(softc, be_lun, req);
2398	else if (be_lun->vn->v_type == VREG)
2399		error = ctl_be_block_modify_file(be_lun, req);
2400	else
2401		error = ctl_be_block_modify_dev(be_lun, req);
2402
2403	if (error == 0 && be_lun->size_blocks != oldsize) {
2404		be_lun->size_blocks = be_lun->size_bytes >>
2405		    be_lun->blocksize_shift;
2406
2407		/*
2408		 * The maximum LBA is the size - 1.
2409		 *
2410		 * XXX: Note that this field is being updated without locking,
2411		 * 	which might cause problems on 32-bit architectures.
2412		 */
2413		be_lun->ctl_be_lun.maxlba = (be_lun->size_blocks == 0) ?
2414		    0 : (be_lun->size_blocks - 1);
2415		be_lun->ctl_be_lun.blocksize = be_lun->blocksize;
2416		be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp;
2417		be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff;
2418		if (be_lun->dispatch == ctl_be_block_dispatch_zvol &&
2419		    be_lun->blocksize != 0)
2420			be_lun->ctl_be_lun.atomicblock = CTLBLK_MAX_IO_SIZE /
2421			    be_lun->blocksize;
2422		ctl_lun_capacity_changed(&be_lun->ctl_be_lun);
2423		if (oldsize == 0 && be_lun->size_blocks != 0)
2424			ctl_lun_online(&be_lun->ctl_be_lun);
2425	}
2426
2427	/* Tell the user the exact size we ended up using */
2428	params->lun_size_bytes = be_lun->size_bytes;
2429
2430	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2431
2432	return (0);
2433
2434bailout_error:
2435	req->status = CTL_LUN_ERROR;
2436
2437	return (0);
2438}
2439
2440static void
2441ctl_be_block_lun_shutdown(void *be_lun)
2442{
2443	struct ctl_be_block_lun *lun;
2444	struct ctl_be_block_softc *softc;
2445
2446	lun = (struct ctl_be_block_lun *)be_lun;
2447
2448	softc = lun->softc;
2449
2450	mtx_lock(&softc->lock);
2451	lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2452	if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2453		wakeup(lun);
2454	mtx_unlock(&softc->lock);
2455
2456}
2457
2458static void
2459ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
2460{
2461	struct ctl_be_block_lun *lun;
2462	struct ctl_be_block_softc *softc;
2463
2464	lun = (struct ctl_be_block_lun *)be_lun;
2465	softc = lun->softc;
2466
2467	if (status == CTL_LUN_CONFIG_OK) {
2468		mtx_lock(&softc->lock);
2469		lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2470		if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2471			wakeup(lun);
2472		mtx_unlock(&softc->lock);
2473
2474		/*
2475		 * We successfully added the LUN, attempt to enable it.
2476		 */
2477		if (ctl_enable_lun(&lun->ctl_be_lun) != 0) {
2478			printf("%s: ctl_enable_lun() failed!\n", __func__);
2479			if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) {
2480				printf("%s: ctl_invalidate_lun() failed!\n",
2481				       __func__);
2482			}
2483		}
2484
2485		return;
2486	}
2487
2488
2489	mtx_lock(&softc->lock);
2490	lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
2491	lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
2492	wakeup(lun);
2493	mtx_unlock(&softc->lock);
2494}
2495
2496
2497static int
2498ctl_be_block_config_write(union ctl_io *io)
2499{
2500	struct ctl_be_block_lun *be_lun;
2501	struct ctl_be_lun *ctl_be_lun;
2502	int retval;
2503
2504	retval = 0;
2505
2506	DPRINTF("entered\n");
2507
2508	ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
2509		CTL_PRIV_BACKEND_LUN].ptr;
2510	be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun;
2511
2512	switch (io->scsiio.cdb[0]) {
2513	case SYNCHRONIZE_CACHE:
2514	case SYNCHRONIZE_CACHE_16:
2515	case WRITE_SAME_10:
2516	case WRITE_SAME_16:
2517	case UNMAP:
2518		/*
2519		 * The upper level CTL code will filter out any CDBs with
2520		 * the immediate bit set and return the proper error.
2521		 *
2522		 * We don't really need to worry about what LBA range the
2523		 * user asked to be synced out.  When they issue a sync
2524		 * cache command, we'll sync out the whole thing.
2525		 */
2526		mtx_lock(&be_lun->queue_lock);
2527		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2528				   links);
2529		mtx_unlock(&be_lun->queue_lock);
2530		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2531		break;
2532	case START_STOP_UNIT: {
2533		struct scsi_start_stop_unit *cdb;
2534
2535		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2536
2537		if (cdb->how & SSS_START)
2538			retval = ctl_start_lun(ctl_be_lun);
2539		else {
2540			retval = ctl_stop_lun(ctl_be_lun);
2541			/*
2542			 * XXX KDM Copan-specific offline behavior.
2543			 * Figure out a reasonable way to port this?
2544			 */
2545#ifdef NEEDTOPORT
2546			if ((retval == 0)
2547			 && (cdb->byte2 & SSS_ONOFFLINE))
2548				retval = ctl_lun_offline(ctl_be_lun);
2549#endif
2550		}
2551
2552		/*
2553		 * In general, the above routines should not fail.  They
2554		 * just set state for the LUN.  So we've got something
2555		 * pretty wrong here if we can't start or stop the LUN.
2556		 */
2557		if (retval != 0) {
2558			ctl_set_internal_failure(&io->scsiio,
2559						 /*sks_valid*/ 1,
2560						 /*retry_count*/ 0xf051);
2561			retval = CTL_RETVAL_COMPLETE;
2562		} else {
2563			ctl_set_success(&io->scsiio);
2564		}
2565		ctl_config_write_done(io);
2566		break;
2567	}
2568	default:
2569		ctl_set_invalid_opcode(&io->scsiio);
2570		ctl_config_write_done(io);
2571		retval = CTL_RETVAL_COMPLETE;
2572		break;
2573	}
2574
2575	return (retval);
2576
2577}
2578
2579static int
2580ctl_be_block_config_read(union ctl_io *io)
2581{
2582	return (0);
2583}
2584
2585static int
2586ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
2587{
2588	struct ctl_be_block_lun *lun;
2589	int retval;
2590
2591	lun = (struct ctl_be_block_lun *)be_lun;
2592	retval = 0;
2593
2594	retval = sbuf_printf(sb, "\t<num_threads>");
2595
2596	if (retval != 0)
2597		goto bailout;
2598
2599	retval = sbuf_printf(sb, "%d", lun->num_threads);
2600
2601	if (retval != 0)
2602		goto bailout;
2603
2604	retval = sbuf_printf(sb, "</num_threads>\n");
2605
2606bailout:
2607
2608	return (retval);
2609}
2610
2611static uint64_t
2612ctl_be_block_lun_attr(void *be_lun, const char *attrname)
2613{
2614	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun;
2615
2616	if (lun->getattr == NULL)
2617		return (UINT64_MAX);
2618	return (lun->getattr(lun, attrname));
2619}
2620
2621int
2622ctl_be_block_init(void)
2623{
2624	struct ctl_be_block_softc *softc;
2625	int retval;
2626
2627	softc = &backend_block_softc;
2628	retval = 0;
2629
2630	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2631	beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2632	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2633	STAILQ_INIT(&softc->disk_list);
2634	STAILQ_INIT(&softc->lun_list);
2635
2636	return (retval);
2637}
2638