ctl_backend_block.c revision 269123
1193323Sed/*- 2193323Sed * Copyright (c) 2003 Silicon Graphics International Corp. 3353358Sdim * Copyright (c) 2009-2011 Spectra Logic Corporation 4353358Sdim * Copyright (c) 2012 The FreeBSD Foundation 5353358Sdim * All rights reserved. 6193323Sed * 7193323Sed * Portions of this software were developed by Edward Tomasz Napierala 8193323Sed * under sponsorship from the FreeBSD Foundation. 9193323Sed * 10193323Sed * Redistribution and use in source and binary forms, with or without 11193323Sed * modification, are permitted provided that the following conditions 12193323Sed * are met: 13193323Sed * 1. Redistributions of source code must retain the above copyright 14193323Sed * notice, this list of conditions, and the following disclaimer, 15193323Sed * without modification. 16261991Sdim * 2. Redistributions in binary form must reproduce at minimum a disclaimer 17218893Sdim * substantially similar to the "NO WARRANTY" disclaimer below 18309124Sdim * ("Disclaimer") and any redistribution must be conditioned upon 19276479Sdim * including a substantially similar Disclaimer requirement for further 20321369Sdim * binary redistribution. 21261991Sdim * 22321369Sdim * NO WARRANTY 23276479Sdim * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24261991Sdim * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25288943Sdim * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 26288943Sdim * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27288943Sdim * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28288943Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29341825Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30341825Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 31341825Sdim * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 32341825Sdim * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33341825Sdim * POSSIBILITY OF SUCH DAMAGES. 34341825Sdim * 35341825Sdim * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 36341825Sdim */ 37341825Sdim/* 38341825Sdim * CAM Target Layer driver backend for block devices. 39341825Sdim * 40341825Sdim * Author: Ken Merry <ken@FreeBSD.org> 41193323Sed */ 42360784Sdim#include <sys/cdefs.h> 43341825Sdim__FBSDID("$FreeBSD: head/sys/cam/ctl/ctl_backend_block.c 269123 2014-07-26 13:56:50Z mav $"); 44261991Sdim 45341825Sdim#include <sys/param.h> 46261991Sdim#include <sys/systm.h> 47341825Sdim#include <sys/kernel.h> 48261991Sdim#include <sys/types.h> 49341825Sdim#include <sys/kthread.h> 50261991Sdim#include <sys/bio.h> 51261991Sdim#include <sys/fcntl.h> 52193323Sed#include <sys/limits.h> 53360784Sdim#include <sys/lock.h> 54360784Sdim#include <sys/mutex.h> 55360784Sdim#include <sys/condvar.h> 56360784Sdim#include <sys/malloc.h> 57360784Sdim#include <sys/conf.h> 58360784Sdim#include <sys/ioccom.h> 59360784Sdim#include <sys/queue.h> 60360784Sdim#include <sys/sbuf.h> 61360784Sdim#include <sys/endian.h> 62360784Sdim#include <sys/uio.h> 63360784Sdim#include <sys/buf.h> 64360784Sdim#include <sys/taskqueue.h> 65360784Sdim#include <sys/vnode.h> 66360784Sdim#include <sys/namei.h> 67360784Sdim#include <sys/mount.h> 68360784Sdim#include <sys/disk.h> 69360784Sdim#include <sys/fcntl.h> 70360784Sdim#include <sys/filedesc.h> 71360784Sdim#include <sys/proc.h> 72360784Sdim#include <sys/pcpu.h> 73360784Sdim#include <sys/module.h> 74360784Sdim#include <sys/sdt.h> 75360784Sdim#include <sys/devicestat.h> 76360784Sdim#include <sys/sysctl.h> 77360784Sdim 78360784Sdim#include <geom/geom.h> 79360784Sdim 80360784Sdim#include <cam/cam.h> 81360784Sdim#include <cam/scsi/scsi_all.h> 82360784Sdim#include <cam/scsi/scsi_da.h> 83360784Sdim#include <cam/ctl/ctl_io.h> 84360784Sdim#include <cam/ctl/ctl.h> 85360784Sdim#include <cam/ctl/ctl_backend.h> 86360784Sdim#include <cam/ctl/ctl_frontend_internal.h> 87360784Sdim#include <cam/ctl/ctl_ioctl.h> 88360784Sdim#include <cam/ctl/ctl_scsi_all.h> 89288943Sdim#include <cam/ctl/ctl_error.h> 90288943Sdim 91353358Sdim/* 92288943Sdim * The idea here is that we'll allocate enough S/G space to hold a 1MB 93288943Sdim * I/O. If we get an I/O larger than that, we'll split it. 94288943Sdim */ 95288943Sdim#define CTLBLK_HALF_IO_SIZE (512 * 1024) 96261991Sdim#define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) 97288943Sdim#define CTLBLK_MAX_SEG MAXPHYS 98353358Sdim#define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) 99288943Sdim#define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) 100288943Sdim 101288943Sdim#ifdef CTLBLK_DEBUG 102288943Sdim#define DPRINTF(fmt, args...) \ 103288943Sdim printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 104288943Sdim#else 105288943Sdim#define DPRINTF(fmt, args...) do {} while(0) 106288943Sdim#endif 107288943Sdim 108261991Sdim#define PRIV(io) \ 109288943Sdim ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 110261991Sdim#define ARGS(io) \ 111288943Sdim ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 112261991Sdim 113360784SdimSDT_PROVIDER_DEFINE(cbb); 114288943Sdim 115353358Sdimtypedef enum { 116288943Sdim CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 117288943Sdim CTL_BE_BLOCK_LUN_CONFIG_ERR = 0x02, 118261991Sdim CTL_BE_BLOCK_LUN_WAITING = 0x04, 119360784Sdim CTL_BE_BLOCK_LUN_MULTI_THREAD = 0x08 120288943Sdim} ctl_be_block_lun_flags; 121296417Sdim 122288943Sdimtypedef enum { 123288943Sdim CTL_BE_BLOCK_NONE, 124288943Sdim CTL_BE_BLOCK_DEV, 125261991Sdim CTL_BE_BLOCK_FILE 126288943Sdim} ctl_be_block_type; 127288943Sdim 128261991Sdimstruct ctl_be_block_devdata { 129261991Sdim struct cdev *cdev; 130288943Sdim struct cdevsw *csw; 131353358Sdim int dev_ref; 132288943Sdim}; 133288943Sdim 134261991Sdimstruct ctl_be_block_filedata { 135360784Sdim struct ucred *cred; 136288943Sdim}; 137296417Sdim 138288943Sdimunion ctl_be_block_bedata { 139288943Sdim struct ctl_be_block_devdata dev; 140288943Sdim struct ctl_be_block_filedata file; 141261991Sdim}; 142288943Sdim 143288943Sdimstruct ctl_be_block_io; 144261991Sdimstruct ctl_be_block_lun; 145261991Sdim 146288943Sdimtypedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 147261991Sdim struct ctl_be_block_io *beio); 148341825Sdim 149261991Sdim/* 150261991Sdim * Backend LUN structure. There is a 1:1 mapping between a block device 151261991Sdim * and a backend block LUN, and between a backend block LUN and a CTL LUN. 152261991Sdim */ 153261991Sdimstruct ctl_be_block_lun { 154261991Sdim struct ctl_block_disk *disk; 155261991Sdim char lunname[32]; 156353358Sdim char *dev_path; 157288943Sdim ctl_be_block_type dev_type; 158288943Sdim struct vnode *vn; 159288943Sdim union ctl_be_block_bedata backend; 160321369Sdim cbb_dispatch_t dispatch; 161288943Sdim cbb_dispatch_t lun_flush; 162261991Sdim cbb_dispatch_t unmap; 163288943Sdim uma_zone_t lun_zone; 164288943Sdim uint64_t size_blocks; 165353358Sdim uint64_t size_bytes; 166288943Sdim uint32_t blocksize; 167288943Sdim int blocksize_shift; 168261991Sdim uint16_t pblockexp; 169288943Sdim uint16_t pblockoff; 170353358Sdim struct ctl_be_block_softc *softc; 171288943Sdim struct devstat *disk_stats; 172288943Sdim ctl_be_block_lun_flags flags; 173288943Sdim STAILQ_ENTRY(ctl_be_block_lun) links; 174288943Sdim struct ctl_be_lun ctl_be_lun; 175288943Sdim struct taskqueue *io_taskqueue; 176288943Sdim struct task io_task; 177288943Sdim int num_threads; 178288943Sdim STAILQ_HEAD(, ctl_io_hdr) input_queue; 179261991Sdim STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 180288943Sdim STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 181261991Sdim struct mtx_padalign io_lock; 182360784Sdim struct mtx_padalign queue_lock; 183288943Sdim}; 184353358Sdim 185288943Sdim/* 186288943Sdim * Overall softc structure for the block backend module. 187261991Sdim */ 188360784Sdimstruct ctl_be_block_softc { 189288943Sdim struct mtx lock; 190296417Sdim int num_disks; 191288943Sdim STAILQ_HEAD(, ctl_block_disk) disk_list; 192288943Sdim int num_luns; 193288943Sdim STAILQ_HEAD(, ctl_be_block_lun) lun_list; 194261991Sdim}; 195288943Sdim 196288943Sdimstatic struct ctl_be_block_softc backend_block_softc; 197261991Sdim 198261991Sdim/* 199288943Sdim * Per-I/O information. 200353358Sdim */ 201288943Sdimstruct ctl_be_block_io { 202288943Sdim union ctl_io *io; 203261991Sdim struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 204360784Sdim struct iovec xiovecs[CTLBLK_MAX_SEGS]; 205288943Sdim int bio_cmd; 206296417Sdim int bio_flags; 207288943Sdim int num_segs; 208288943Sdim int num_bios_sent; 209288943Sdim int num_bios_done; 210261991Sdim int send_complete; 211288943Sdim int num_errors; 212288943Sdim struct bintime ds_t0; 213261991Sdim devstat_tag_type ds_tag_type; 214261991Sdim devstat_trans_flags ds_trans_type; 215288943Sdim uint64_t io_len; 216261991Sdim uint64_t io_offset; 217341825Sdim struct ctl_be_block_softc *softc; 218288943Sdim struct ctl_be_block_lun *lun; 219288943Sdim void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 220288943Sdim}; 221288943Sdim 222288943Sdimstatic int cbb_num_threads = 14; 223288943SdimSYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0, 224288943Sdim "CAM Target Layer Block Backend"); 225353358SdimSYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 226288943Sdim &cbb_num_threads, 0, "Number of threads per backing file"); 227288943Sdim 228288943Sdimstatic struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 229321369Sdimstatic void ctl_free_beio(struct ctl_be_block_io *beio); 230288943Sdimstatic void ctl_complete_beio(struct ctl_be_block_io *beio); 231288943Sdimstatic int ctl_be_block_move_done(union ctl_io *io); 232341825Sdimstatic void ctl_be_block_biodone(struct bio *bio); 233261991Sdimstatic void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 234261991Sdim struct ctl_be_block_io *beio); 235261991Sdimstatic void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 236261991Sdim struct ctl_be_block_io *beio); 237261991Sdimstatic void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 238261991Sdim struct ctl_be_block_io *beio); 239288943Sdimstatic void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 240261991Sdim struct ctl_be_block_io *beio); 241261991Sdimstatic void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 242261991Sdim struct ctl_be_block_io *beio); 243261991Sdimstatic void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 244261991Sdim union ctl_io *io); 245261991Sdimstatic void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 246341825Sdim union ctl_io *io); 247321369Sdimstatic void ctl_be_block_worker(void *context, int pending); 248321369Sdimstatic int ctl_be_block_submit(union ctl_io *io); 249321369Sdimstatic int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 250321369Sdim int flag, struct thread *td); 251321369Sdimstatic int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 252321369Sdim struct ctl_lun_req *req); 253321369Sdimstatic int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 254321369Sdim struct ctl_lun_req *req); 255341825Sdimstatic int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 256321369Sdimstatic int ctl_be_block_open(struct ctl_be_block_softc *softc, 257321369Sdim struct ctl_be_block_lun *be_lun, 258321369Sdim struct ctl_lun_req *req); 259321369Sdimstatic int ctl_be_block_create(struct ctl_be_block_softc *softc, 260321369Sdim struct ctl_lun_req *req); 261341825Sdimstatic int ctl_be_block_rm(struct ctl_be_block_softc *softc, 262321369Sdim struct ctl_lun_req *req); 263321369Sdimstatic int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, 264321369Sdim struct ctl_lun_req *req); 265321369Sdimstatic int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, 266321369Sdim struct ctl_lun_req *req); 267341825Sdimstatic int ctl_be_block_modify(struct ctl_be_block_softc *softc, 268321369Sdim struct ctl_lun_req *req); 269321369Sdimstatic void ctl_be_block_lun_shutdown(void *be_lun); 270321369Sdimstatic void ctl_be_block_lun_config_status(void *be_lun, 271321369Sdim ctl_lun_config_status status); 272321369Sdimstatic int ctl_be_block_config_write(union ctl_io *io); 273341825Sdimstatic int ctl_be_block_config_read(union ctl_io *io); 274261991Sdimstatic int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb); 275261991Sdimint ctl_be_block_init(void); 276261991Sdim 277261991Sdimstatic struct ctl_backend_driver ctl_be_block_driver = 278261991Sdim{ 279261991Sdim .name = "block", 280288943Sdim .flags = CTL_BE_FLAG_HAS_CONFIG, 281261991Sdim .init = ctl_be_block_init, 282261991Sdim .data_submit = ctl_be_block_submit, 283261991Sdim .data_move_done = ctl_be_block_move_done, 284261991Sdim .config_read = ctl_be_block_config_read, 285261991Sdim .config_write = ctl_be_block_config_write, 286261991Sdim .ioctl = ctl_be_block_ioctl, 287261991Sdim .lun_info = ctl_be_block_lun_info 288261991Sdim}; 289261991Sdim 290341825SdimMALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend"); 291261991SdimCTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 292261991Sdim 293261991Sdimstatic uma_zone_t beio_zone; 294261991Sdim 295261991Sdimstatic struct ctl_be_block_io * 296261991Sdimctl_alloc_beio(struct ctl_be_block_softc *softc) 297261991Sdim{ 298276479Sdim struct ctl_be_block_io *beio; 299276479Sdim 300276479Sdim beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO); 301261991Sdim beio->softc = softc; 302261991Sdim return (beio); 303341825Sdim} 304261991Sdim 305261991Sdimstatic void 306261991Sdimctl_free_beio(struct ctl_be_block_io *beio) 307261991Sdim{ 308261991Sdim int duplicate_free; 309261991Sdim int i; 310261991Sdim 311261991Sdim duplicate_free = 0; 312261991Sdim 313261991Sdim for (i = 0; i < beio->num_segs; i++) { 314261991Sdim if (beio->sg_segs[i].addr == NULL) 315193323Sed duplicate_free++; 316193323Sed 317193323Sed uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr); 318193323Sed beio->sg_segs[i].addr = NULL; 319321369Sdim 320314564Sdim /* For compare we had two equal S/G lists. */ 321193323Sed if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) { 322193323Sed uma_zfree(beio->lun->lun_zone, 323193323Sed beio->sg_segs[i + CTLBLK_HALF_SEGS].addr); 324321369Sdim beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL; 325314564Sdim } 326193323Sed } 327193323Sed 328193323Sed if (duplicate_free > 0) { 329321369Sdim printf("%s: %d duplicate frees out of %d segments\n", __func__, 330314564Sdim duplicate_free, beio->num_segs); 331276479Sdim } 332276479Sdim 333276479Sdim uma_zfree(beio_zone, beio); 334321369Sdim} 335314564Sdim 336206083Srdivackystatic void 337193323Sedctl_complete_beio(struct ctl_be_block_io *beio) 338206083Srdivacky{ 339314564Sdim union ctl_io *io = beio->io; 340206083Srdivacky 341193323Sed if (beio->beio_cont != NULL) { 342314564Sdim beio->beio_cont(beio); 343206083Srdivacky } else { 344193323Sed ctl_free_beio(beio); 345314564Sdim ctl_data_submit_done(io); 346206083Srdivacky } 347193323Sed} 348193323Sed 349321369Sdimstatic int 350314564Sdimctl_be_block_move_done(union ctl_io *io) 351314564Sdim{ 352309124Sdim struct ctl_be_block_io *beio; 353309124Sdim struct ctl_be_block_lun *be_lun; 354309124Sdim struct ctl_lba_len_flags *lbalen; 355309124Sdim#ifdef CTL_TIME_IO 356234353Sdim struct bintime cur_bt; 357234353Sdim#endif 358321369Sdim int i; 359314564Sdim 360314564Sdim beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 361314564Sdim be_lun = beio->lun; 362314564Sdim 363314564Sdim DPRINTF("entered\n"); 364314564Sdim 365314564Sdim#ifdef CTL_TIME_IO 366314564Sdim getbintime(&cur_bt); 367314564Sdim bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); 368314564Sdim bintime_add(&io->io_hdr.dma_bt, &cur_bt); 369314564Sdim io->io_hdr.num_dmas++; 370314564Sdim#endif 371198090Srdivacky io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 372314564Sdim 373314564Sdim /* 374314564Sdim * We set status at this point for read commands, and write 375314564Sdim * commands with errors. 376314564Sdim */ 377309124Sdim if ((io->io_hdr.port_status == 0) && 378206083Srdivacky ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0) && 379314564Sdim ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { 380206083Srdivacky lbalen = ARGS(beio->io); 381206083Srdivacky if (lbalen->flags & CTL_LLF_READ) { 382314564Sdim ctl_set_success(&io->scsiio); 383206083Srdivacky } else if (lbalen->flags & CTL_LLF_COMPARE) { 384206083Srdivacky /* We have two data blocks ready for comparison. */ 385314564Sdim for (i = 0; i < beio->num_segs; i++) { 386206083Srdivacky if (memcmp(beio->sg_segs[i].addr, 387206083Srdivacky beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 388198090Srdivacky beio->sg_segs[i].len) != 0) 389309124Sdim break; 390314564Sdim } 391314564Sdim if (i < beio->num_segs) 392309124Sdim ctl_set_sense(&io->scsiio, 393309124Sdim /*current_error*/ 1, 394309124Sdim /*sense_key*/ SSD_KEY_MISCOMPARE, 395309124Sdim /*asc*/ 0x1D, 396309124Sdim /*ascq*/ 0x00, 397309124Sdim SSD_ELEM_NONE); 398309124Sdim else 399234353Sdim ctl_set_success(&io->scsiio); 400234353Sdim } 401309124Sdim } 402309124Sdim else if ((io->io_hdr.port_status != 0) 403309124Sdim && ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0) 404309124Sdim && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { 405309124Sdim /* 406309124Sdim * For hardware error sense keys, the sense key 407309124Sdim * specific value is defined to be a retry count, 408309124Sdim * but we use it to pass back an internal FETD 409309124Sdim * error code. XXX KDM Hopefully the FETD is only 410309124Sdim * using 16 bits for an error code, since that's 411309124Sdim * all the space we have in the sks field. 412309124Sdim */ 413309124Sdim ctl_set_internal_failure(&io->scsiio, 414309124Sdim /*sks_valid*/ 1, 415309124Sdim /*retry_count*/ 416309124Sdim io->io_hdr.port_status); 417309124Sdim } 418309124Sdim 419309124Sdim /* 420309124Sdim * If this is a read, or a write with errors, it is done. 421309124Sdim */ 422309124Sdim if ((beio->bio_cmd == BIO_READ) 423309124Sdim || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 424309124Sdim || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 425309124Sdim ctl_complete_beio(beio); 426309124Sdim return (0); 427309124Sdim } 428321369Sdim 429218893Sdim /* 430309124Sdim * At this point, we have a write and the DMA completed 431218893Sdim * successfully. We now have to queue it to the task queue to 432218893Sdim * execute the backend I/O. That is because we do blocking 433321369Sdim * memory allocations, and in the file backing case, blocking I/O. 434218893Sdim * This move done routine is generally called in the SIM's 435309124Sdim * interrupt context, and therefore we cannot block. 436218893Sdim */ 437218893Sdim mtx_lock(&be_lun->queue_lock); 438321369Sdim /* 439321369Sdim * XXX KDM make sure that links is okay to use at this point. 440321369Sdim * Otherwise, we either need to add another field to ctl_io_hdr, 441314564Sdim * or deal with resource allocation here. 442193323Sed */ 443193323Sed STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 444193323Sed mtx_unlock(&be_lun->queue_lock); 445321369Sdim 446321369Sdim taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 447314564Sdim 448193323Sed return (0); 449193323Sed} 450193323Sed 451321369Sdimstatic void 452321369Sdimctl_be_block_biodone(struct bio *bio) 453314564Sdim{ 454288943Sdim struct ctl_be_block_io *beio; 455193323Sed struct ctl_be_block_lun *be_lun; 456193323Sed union ctl_io *io; 457321369Sdim int error; 458321369Sdim 459314564Sdim beio = bio->bio_caller1; 460288943Sdim be_lun = beio->lun; 461193323Sed io = beio->io; 462193323Sed 463321369Sdim DPRINTF("entered\n"); 464321369Sdim 465314564Sdim error = bio->bio_error; 466193323Sed mtx_lock(&be_lun->io_lock); 467193323Sed if (error != 0) 468193323Sed beio->num_errors++; 469321369Sdim 470314564Sdim beio->num_bios_done++; 471327952Sdim 472193323Sed /* 473193323Sed * XXX KDM will this cause WITNESS to complain? Holding a lock 474321369Sdim * during the free might cause it to complain. 475193323Sed */ 476218893Sdim g_destroy_bio(bio); 477193323Sed 478193323Sed /* 479321369Sdim * If the send complete bit isn't set, or we aren't the last I/O to 480193323Sed * complete, then we're done. 481218893Sdim */ 482193323Sed if ((beio->send_complete == 0) 483193323Sed || (beio->num_bios_done < beio->num_bios_sent)) { 484321369Sdim mtx_unlock(&be_lun->io_lock); 485193323Sed return; 486218893Sdim } 487193323Sed 488193323Sed /* 489341825Sdim * At this point, we've verified that we are the last I/O to 490288943Sdim * complete, so it's safe to drop the lock. 491288943Sdim */ 492321369Sdim devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 493288943Sdim beio->ds_tag_type, beio->ds_trans_type, 494288943Sdim /*now*/ NULL, /*then*/&beio->ds_t0); 495288943Sdim mtx_unlock(&be_lun->io_lock); 496288943Sdim 497288943Sdim /* 498353358Sdim * If there are any errors from the backing device, we fail the 499288943Sdim * entire I/O with a medium error. 500288943Sdim */ 501288943Sdim if (beio->num_errors > 0) { 502341825Sdim if (error == EOPNOTSUPP) { 503193323Sed ctl_set_invalid_opcode(&io->scsiio); 504193323Sed } else if (beio->bio_cmd == BIO_FLUSH) { 505341825Sdim /* XXX KDM is there is a better error here? */ 506288943Sdim ctl_set_internal_failure(&io->scsiio, 507288943Sdim /*sks_valid*/ 1, 508288943Sdim /*retry_count*/ 0xbad2); 509288943Sdim } else 510288943Sdim ctl_set_medium_error(&io->scsiio); 511288943Sdim ctl_complete_beio(beio); 512288943Sdim return; 513288943Sdim } 514353358Sdim 515288943Sdim /* 516288943Sdim * If this is a write, a flush, a delete or verify, we're all done. 517288943Sdim * If this is a read, we can now send the data to the user. 518341825Sdim */ 519193323Sed if ((beio->bio_cmd == BIO_WRITE) 520193323Sed || (beio->bio_cmd == BIO_FLUSH) 521288943Sdim || (beio->bio_cmd == BIO_DELETE) 522288943Sdim || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 523288943Sdim ctl_set_success(&io->scsiio); 524288943Sdim ctl_complete_beio(beio); 525288943Sdim } else { 526360784Sdim#ifdef CTL_TIME_IO 527288943Sdim getbintime(&io->io_hdr.dma_start_bt); 528288943Sdim#endif 529288943Sdim ctl_datamove(io); 530288943Sdim } 531288943Sdim} 532288943Sdim 533288943Sdimstatic void 534288943Sdimctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 535288943Sdim struct ctl_be_block_io *beio) 536193323Sed{ 537288943Sdim union ctl_io *io = beio->io; 538288943Sdim struct mount *mountpoint; 539360784Sdim int error, lock_flags; 540288943Sdim 541193323Sed DPRINTF("entered\n"); 542288943Sdim 543288943Sdim binuptime(&beio->ds_t0); 544288943Sdim mtx_lock(&be_lun->io_lock); 545288943Sdim devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 546288943Sdim mtx_unlock(&be_lun->io_lock); 547193323Sed 548288943Sdim (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 549288943Sdim 550288943Sdim if (MNT_SHARED_WRITES(mountpoint) 551288943Sdim || ((mountpoint == NULL) 552341825Sdim && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 553288943Sdim lock_flags = LK_SHARED; 554288943Sdim else 555288943Sdim lock_flags = LK_EXCLUSIVE; 556288943Sdim 557288943Sdim vn_lock(be_lun->vn, lock_flags | LK_RETRY); 558288943Sdim 559288943Sdim error = VOP_FSYNC(be_lun->vn, MNT_WAIT, curthread); 560288943Sdim VOP_UNLOCK(be_lun->vn, 0); 561193323Sed 562193323Sed vn_finished_write(mountpoint); 563360784Sdim 564360784Sdim mtx_lock(&be_lun->io_lock); 565360784Sdim devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 566360784Sdim beio->ds_tag_type, beio->ds_trans_type, 567360784Sdim /*now*/ NULL, /*then*/&beio->ds_t0); 568360784Sdim mtx_unlock(&be_lun->io_lock); 569360784Sdim 570360784Sdim if (error == 0) 571360784Sdim ctl_set_success(&io->scsiio); 572360784Sdim else { 573321369Sdim /* XXX KDM is there is a better error here? */ 574288943Sdim ctl_set_internal_failure(&io->scsiio, 575288943Sdim /*sks_valid*/ 1, 576288943Sdim /*retry_count*/ 0xbad1); 577193323Sed } 578288943Sdim 579193323Sed ctl_complete_beio(beio); 580193323Sed} 581193323Sed 582321369SdimSDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t"); 583321369SdimSDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t"); 584193323SedSDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t"); 585193323SedSDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t"); 586261991Sdim 587193323Sedstatic void 588193323Sedctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 589321369Sdim struct ctl_be_block_io *beio) 590321369Sdim{ 591193323Sed struct ctl_be_block_filedata *file_data; 592261991Sdim union ctl_io *io; 593193323Sed struct uio xuio; 594193323Sed struct iovec *xiovec; 595321369Sdim int flags; 596321369Sdim int error, i; 597193323Sed 598193323Sed DPRINTF("entered\n"); 599261991Sdim 600193323Sed file_data = &be_lun->backend.file; 601193323Sed io = beio->io; 602321369Sdim flags = beio->bio_flags; 603321369Sdim 604193323Sed bzero(&xuio, sizeof(xuio)); 605261991Sdim if (beio->bio_cmd == BIO_READ) { 606193323Sed SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); 607193323Sed xuio.uio_rw = UIO_READ; 608321369Sdim } else { 609353358Sdim SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); 610353358Sdim xuio.uio_rw = UIO_WRITE; 611193323Sed } 612353358Sdim xuio.uio_offset = beio->io_offset; 613193323Sed xuio.uio_resid = beio->io_len; 614353358Sdim xuio.uio_segflg = UIO_SYSSPACE; 615193323Sed xuio.uio_iov = beio->xiovecs; 616193323Sed xuio.uio_iovcnt = beio->num_segs; 617193323Sed xuio.uio_td = curthread; 618193323Sed 619353358Sdim for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 620353358Sdim xiovec->iov_base = beio->sg_segs[i].addr; 621353358Sdim xiovec->iov_len = beio->sg_segs[i].len; 622353358Sdim } 623321369Sdim 624193323Sed binuptime(&beio->ds_t0); 625314564Sdim mtx_lock(&be_lun->io_lock); 626314564Sdim devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 627314564Sdim mtx_unlock(&be_lun->io_lock); 628314564Sdim 629193323Sed if (beio->bio_cmd == BIO_READ) { 630193323Sed vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 631321369Sdim 632193323Sed /* 633314564Sdim * UFS pays attention to IO_DIRECT for reads. If the 634314564Sdim * DIRECTIO option is configured into the kernel, it calls 635314564Sdim * ffs_rawread(). But that only works for single-segment 636314564Sdim * uios with user space addresses. In our case, with a 637193323Sed * kernel uio, it still reads into the buffer cache, but it 638193323Sed * will just try to release the buffer from the cache later 639321369Sdim * on in ffs_read(). 640321369Sdim * 641321369Sdim * ZFS does not pay attention to IO_DIRECT for reads. 642193323Sed * 643314564Sdim * UFS does not pay attention to IO_SYNC for reads. 644314564Sdim * 645314564Sdim * ZFS pays attention to IO_SYNC (which translates into the 646314564Sdim * Solaris define FRSYNC for zfs_read()) for reads. It 647193323Sed * attempts to sync the file before reading. 648193323Sed * 649321369Sdim * So, to attempt to provide some barrier semantics in the 650321369Sdim * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC. 651321369Sdim */ 652193323Sed error = VOP_READ(be_lun->vn, &xuio, (flags & BIO_ORDERED) ? 653314564Sdim (IO_DIRECT|IO_SYNC) : 0, file_data->cred); 654314564Sdim 655314564Sdim VOP_UNLOCK(be_lun->vn, 0); 656314564Sdim SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); 657193323Sed } else { 658193323Sed struct mount *mountpoint; 659321369Sdim int lock_flags; 660321369Sdim 661314564Sdim (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 662193323Sed 663249423Sdim if (MNT_SHARED_WRITES(mountpoint) 664296417Sdim || ((mountpoint == NULL) 665249423Sdim && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 666249423Sdim lock_flags = LK_SHARED; 667249423Sdim else 668193323Sed lock_flags = LK_EXCLUSIVE; 669193323Sed 670321369Sdim vn_lock(be_lun->vn, lock_flags | LK_RETRY); 671321369Sdim 672239462Sdim /* 673193323Sed * UFS pays attention to IO_DIRECT for writes. The write 674193323Sed * is done asynchronously. (Normally the write would just 675193323Sed * get put into cache. 676193323Sed * 677193323Sed * UFS pays attention to IO_SYNC for writes. It will 678193323Sed * attempt to write the buffer out synchronously if that 679193323Sed * flag is set. 680193323Sed * 681193323Sed * ZFS does not pay attention to IO_DIRECT for writes. 682276479Sdim * 683276479Sdim * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 684276479Sdim * for writes. It will flush the transaction from the 685276479Sdim * cache before returning. 686276479Sdim * 687276479Sdim * So if we've got the BIO_ORDERED flag set, we want 688276479Sdim * IO_SYNC in either the UFS or ZFS case. 689314564Sdim */ 690314564Sdim error = VOP_WRITE(be_lun->vn, &xuio, (flags & BIO_ORDERED) ? 691314564Sdim IO_SYNC : 0, file_data->cred); 692314564Sdim VOP_UNLOCK(be_lun->vn, 0); 693314564Sdim 694314564Sdim vn_finished_write(mountpoint); 695314564Sdim SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); 696314564Sdim } 697243830Sdim 698243830Sdim mtx_lock(&be_lun->io_lock); 699193323Sed devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 700296417Sdim beio->ds_tag_type, beio->ds_trans_type, 701296417Sdim /*now*/ NULL, /*then*/&beio->ds_t0); 702296417Sdim mtx_unlock(&be_lun->io_lock); 703296417Sdim 704296417Sdim /* 705193323Sed * If we got an error, set the sense data to "MEDIUM ERROR" and 706243830Sdim * return the I/O to the user. 707309124Sdim */ 708309124Sdim if (error != 0) { 709309124Sdim char path_str[32]; 710309124Sdim 711296417Sdim ctl_scsi_path_string(io, path_str, sizeof(path_str)); 712309124Sdim /* 713309124Sdim * XXX KDM ZFS returns ENOSPC when the underlying 714309124Sdim * filesystem fills up. What kind of SCSI error should we 715309124Sdim * return for that? 716243830Sdim */ 717309124Sdim printf("%s%s command returned errno %d\n", path_str, 718314564Sdim (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error); 719296417Sdim ctl_set_medium_error(&io->scsiio); 720296417Sdim ctl_complete_beio(beio); 721193323Sed return; 722193323Sed } 723314564Sdim 724314564Sdim /* 725314564Sdim * If this is a write or a verify, we're all done. 726314564Sdim * If this is a read, we can now send the data to the user. 727314564Sdim */ 728314564Sdim if ((beio->bio_cmd == BIO_WRITE) || 729314564Sdim (ARGS(io)->flags & CTL_LLF_VERIFY)) { 730327952Sdim ctl_set_success(&io->scsiio); 731327952Sdim ctl_complete_beio(beio); 732327952Sdim } else { 733327952Sdim#ifdef CTL_TIME_IO 734327952Sdim getbintime(&io->io_hdr.dma_start_bt); 735360784Sdim#endif 736360784Sdim ctl_datamove(io); 737360784Sdim } 738360784Sdim} 739314564Sdim 740309124Sdimstatic void 741309124Sdimctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 742309124Sdim struct ctl_be_block_io *beio) 743314564Sdim{ 744309124Sdim struct ctl_be_block_devdata *dev_data; 745309124Sdim union ctl_io *io; 746309124Sdim struct uio xuio; 747309124Sdim struct iovec *xiovec; 748309124Sdim int flags; 749309124Sdim int error, i; 750314564Sdim 751309124Sdim DPRINTF("entered\n"); 752309124Sdim 753309124Sdim dev_data = &be_lun->backend.dev; 754206124Srdivacky io = beio->io; 755206124Srdivacky flags = beio->bio_flags; 756309124Sdim 757309124Sdim bzero(&xuio, sizeof(xuio)); 758243830Sdim if (beio->bio_cmd == BIO_READ) { 759309124Sdim SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); 760309124Sdim xuio.uio_rw = UIO_READ; 761243830Sdim } else { 762243830Sdim SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); 763243830Sdim xuio.uio_rw = UIO_WRITE; 764309124Sdim } 765309124Sdim xuio.uio_offset = beio->io_offset; 766314564Sdim xuio.uio_resid = beio->io_len; 767309124Sdim xuio.uio_segflg = UIO_SYSSPACE; 768309124Sdim xuio.uio_iov = beio->xiovecs; 769206274Srdivacky xuio.uio_iovcnt = beio->num_segs; 770206124Srdivacky xuio.uio_td = curthread; 771206124Srdivacky 772309124Sdim for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 773309124Sdim xiovec->iov_base = beio->sg_segs[i].addr; 774243830Sdim xiovec->iov_len = beio->sg_segs[i].len; 775309124Sdim } 776309124Sdim 777243830Sdim binuptime(&beio->ds_t0); 778243830Sdim mtx_lock(&be_lun->io_lock); 779243830Sdim devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 780309124Sdim mtx_unlock(&be_lun->io_lock); 781309124Sdim 782296417Sdim if (beio->bio_cmd == BIO_READ) { 783296417Sdim error = (*dev_data->csw->d_read)(dev_data->cdev, &xuio, 0); 784309124Sdim SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); 785309124Sdim } else { 786309124Sdim error = (*dev_data->csw->d_write)(dev_data->cdev, &xuio, 0); 787309124Sdim SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); 788309124Sdim } 789309124Sdim 790309124Sdim mtx_lock(&be_lun->io_lock); 791309124Sdim devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 792309124Sdim beio->ds_tag_type, beio->ds_trans_type, 793296417Sdim /*now*/ NULL, /*then*/&beio->ds_t0); 794296417Sdim mtx_unlock(&be_lun->io_lock); 795296417Sdim 796296417Sdim /* 797296417Sdim * If we got an error, set the sense data to "MEDIUM ERROR" and 798296417Sdim * return the I/O to the user. 799296417Sdim */ 800296417Sdim if (error != 0) { 801296417Sdim ctl_set_medium_error(&io->scsiio); 802296417Sdim ctl_complete_beio(beio); 803296417Sdim return; 804296417Sdim } 805309124Sdim 806309124Sdim /* 807309124Sdim * If this is a write or a verify, we're all done. 808296417Sdim * If this is a read, we can now send the data to the user. 809296417Sdim */ 810296417Sdim if ((beio->bio_cmd == BIO_WRITE) || 811296417Sdim (ARGS(io)->flags & CTL_LLF_VERIFY)) { 812296417Sdim ctl_set_success(&io->scsiio); 813296417Sdim ctl_complete_beio(beio); 814296417Sdim } else { 815296417Sdim#ifdef CTL_TIME_IO 816296417Sdim getbintime(&io->io_hdr.dma_start_bt); 817296417Sdim#endif 818296417Sdim ctl_datamove(io); 819296417Sdim } 820296417Sdim} 821296417Sdim 822296417Sdimstatic void 823296417Sdimctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 824296417Sdim struct ctl_be_block_io *beio) 825296417Sdim{ 826296417Sdim struct bio *bio; 827296417Sdim union ctl_io *io; 828296417Sdim struct ctl_be_block_devdata *dev_data; 829296417Sdim 830296417Sdim dev_data = &be_lun->backend.dev; 831296417Sdim io = beio->io; 832296417Sdim 833296417Sdim DPRINTF("entered\n"); 834296417Sdim 835296417Sdim /* This can't fail, it's a blocking allocation. */ 836296417Sdim bio = g_alloc_bio(); 837296417Sdim 838296417Sdim bio->bio_cmd = BIO_FLUSH; 839296417Sdim bio->bio_flags |= BIO_ORDERED; 840296417Sdim bio->bio_dev = dev_data->cdev; 841296417Sdim bio->bio_offset = 0; 842296417Sdim bio->bio_data = 0; 843296417Sdim bio->bio_done = ctl_be_block_biodone; 844296417Sdim bio->bio_caller1 = beio; 845296417Sdim bio->bio_pblkno = 0; 846296417Sdim 847296417Sdim /* 848296417Sdim * We don't need to acquire the LUN lock here, because we are only 849296417Sdim * sending one bio, and so there is no other context to synchronize 850309124Sdim * with. 851309124Sdim */ 852309124Sdim beio->num_bios_sent = 1; 853309124Sdim beio->send_complete = 1; 854296417Sdim 855296417Sdim binuptime(&beio->ds_t0); 856296417Sdim mtx_lock(&be_lun->io_lock); 857296417Sdim devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 858296417Sdim mtx_unlock(&be_lun->io_lock); 859296417Sdim 860296417Sdim (*dev_data->csw->d_strategy)(bio); 861296417Sdim} 862296417Sdim 863296417Sdimstatic void 864296417Sdimctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 865296417Sdim struct ctl_be_block_io *beio, 866296417Sdim uint64_t off, uint64_t len, int last) 867309124Sdim{ 868280031Sdim struct bio *bio; 869360784Sdim struct ctl_be_block_devdata *dev_data; 870360784Sdim uint64_t maxlen; 871360784Sdim 872360784Sdim dev_data = &be_lun->backend.dev; 873360784Sdim maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize); 874360784Sdim while (len > 0) { 875360784Sdim bio = g_alloc_bio(); 876360784Sdim bio->bio_cmd = BIO_DELETE; 877360784Sdim bio->bio_flags |= beio->bio_flags; 878360784Sdim bio->bio_dev = dev_data->cdev; 879360784Sdim bio->bio_offset = off; 880360784Sdim bio->bio_length = MIN(len, maxlen); 881360784Sdim bio->bio_data = 0; 882360784Sdim bio->bio_done = ctl_be_block_biodone; 883360784Sdim bio->bio_caller1 = beio; 884360784Sdim bio->bio_pblkno = off / be_lun->blocksize; 885360784Sdim 886360784Sdim off += bio->bio_length; 887360784Sdim len -= bio->bio_length; 888360784Sdim 889360784Sdim mtx_lock(&be_lun->io_lock); 890360784Sdim beio->num_bios_sent++; 891360784Sdim if (last && len == 0) 892360784Sdim beio->send_complete = 1; 893360784Sdim mtx_unlock(&be_lun->io_lock); 894360784Sdim 895360784Sdim (*dev_data->csw->d_strategy)(bio); 896360784Sdim } 897360784Sdim} 898360784Sdim 899360784Sdimstatic void 900360784Sdimctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 901360784Sdim struct ctl_be_block_io *beio) 902360784Sdim{ 903360784Sdim union ctl_io *io; 904360784Sdim struct ctl_be_block_devdata *dev_data; 905360784Sdim struct ctl_ptr_len_flags *ptrlen; 906360784Sdim struct scsi_unmap_desc *buf, *end; 907360784Sdim uint64_t len; 908360784Sdim 909360784Sdim dev_data = &be_lun->backend.dev; 910360784Sdim io = beio->io; 911360784Sdim 912360784Sdim DPRINTF("entered\n"); 913360784Sdim 914360784Sdim binuptime(&beio->ds_t0); 915360784Sdim mtx_lock(&be_lun->io_lock); 916360784Sdim devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 917360784Sdim mtx_unlock(&be_lun->io_lock); 918360784Sdim 919360784Sdim if (beio->io_offset == -1) { 920360784Sdim beio->io_len = 0; 921360784Sdim ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 922360784Sdim buf = (struct scsi_unmap_desc *)ptrlen->ptr; 923360784Sdim end = buf + ptrlen->len / sizeof(*buf); 924360784Sdim for (; buf < end; buf++) { 925360784Sdim len = (uint64_t)scsi_4btoul(buf->length) * 926360784Sdim be_lun->blocksize; 927360784Sdim beio->io_len += len; 928360784Sdim ctl_be_block_unmap_dev_range(be_lun, beio, 929360784Sdim scsi_8btou64(buf->lba) * be_lun->blocksize, len, 930360784Sdim (end - buf < 2) ? TRUE : FALSE); 931360784Sdim } 932360784Sdim } else 933360784Sdim ctl_be_block_unmap_dev_range(be_lun, beio, 934360784Sdim beio->io_offset, beio->io_len, TRUE); 935360784Sdim} 936360784Sdim 937360784Sdimstatic void 938360784Sdimctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 939360784Sdim struct ctl_be_block_io *beio) 940360784Sdim{ 941360784Sdim TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 942360784Sdim int i; 943360784Sdim struct bio *bio; 944360784Sdim struct ctl_be_block_devdata *dev_data; 945360784Sdim off_t cur_offset; 946360784Sdim int max_iosize; 947360784Sdim 948360784Sdim DPRINTF("entered\n"); 949360784Sdim 950360784Sdim dev_data = &be_lun->backend.dev; 951360784Sdim 952360784Sdim /* 953360784Sdim * We have to limit our I/O size to the maximum supported by the 954193323Sed * backend device. Hopefully it is MAXPHYS. If the driver doesn't 955193323Sed * set it properly, use DFLTPHYS. 956193323Sed */ 957 max_iosize = dev_data->cdev->si_iosize_max; 958 if (max_iosize < PAGE_SIZE) 959 max_iosize = DFLTPHYS; 960 961 cur_offset = beio->io_offset; 962 for (i = 0; i < beio->num_segs; i++) { 963 size_t cur_size; 964 uint8_t *cur_ptr; 965 966 cur_size = beio->sg_segs[i].len; 967 cur_ptr = beio->sg_segs[i].addr; 968 969 while (cur_size > 0) { 970 /* This can't fail, it's a blocking allocation. */ 971 bio = g_alloc_bio(); 972 973 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 974 975 bio->bio_cmd = beio->bio_cmd; 976 bio->bio_flags |= beio->bio_flags; 977 bio->bio_dev = dev_data->cdev; 978 bio->bio_caller1 = beio; 979 bio->bio_length = min(cur_size, max_iosize); 980 bio->bio_offset = cur_offset; 981 bio->bio_data = cur_ptr; 982 bio->bio_done = ctl_be_block_biodone; 983 bio->bio_pblkno = cur_offset / be_lun->blocksize; 984 985 cur_offset += bio->bio_length; 986 cur_ptr += bio->bio_length; 987 cur_size -= bio->bio_length; 988 989 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 990 beio->num_bios_sent++; 991 } 992 } 993 binuptime(&beio->ds_t0); 994 mtx_lock(&be_lun->io_lock); 995 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 996 beio->send_complete = 1; 997 mtx_unlock(&be_lun->io_lock); 998 999 /* 1000 * Fire off all allocated requests! 1001 */ 1002 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1003 TAILQ_REMOVE(&queue, bio, bio_queue); 1004 (*dev_data->csw->d_strategy)(bio); 1005 } 1006} 1007 1008static void 1009ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1010{ 1011 union ctl_io *io; 1012 1013 io = beio->io; 1014 ctl_free_beio(beio); 1015 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1016 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1017 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1018 ctl_config_write_done(io); 1019 return; 1020 } 1021 1022 ctl_be_block_config_write(io); 1023} 1024 1025static void 1026ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1027 union ctl_io *io) 1028{ 1029 struct ctl_be_block_io *beio; 1030 struct ctl_be_block_softc *softc; 1031 struct ctl_lba_len_flags *lbalen; 1032 uint64_t len_left, lba; 1033 int i, seglen; 1034 uint8_t *buf, *end; 1035 1036 DPRINTF("entered\n"); 1037 1038 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1039 softc = be_lun->softc; 1040 lbalen = ARGS(beio->io); 1041 1042 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP) || 1043 (lbalen->flags & SWS_UNMAP && be_lun->unmap == NULL)) { 1044 ctl_free_beio(beio); 1045 ctl_set_invalid_field(&io->scsiio, 1046 /*sks_valid*/ 1, 1047 /*command*/ 1, 1048 /*field*/ 1, 1049 /*bit_valid*/ 0, 1050 /*bit*/ 0); 1051 ctl_config_write_done(io); 1052 return; 1053 } 1054 1055 /* 1056 * If the I/O came down with an ordered or head of queue tag, set 1057 * the BIO_ORDERED attribute. For head of queue tags, that's 1058 * pretty much the best we can do. 1059 */ 1060 if ((io->scsiio.tag_type == CTL_TAG_ORDERED) 1061 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE)) 1062 beio->bio_flags = BIO_ORDERED; 1063 1064 switch (io->scsiio.tag_type) { 1065 case CTL_TAG_ORDERED: 1066 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1067 break; 1068 case CTL_TAG_HEAD_OF_QUEUE: 1069 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1070 break; 1071 case CTL_TAG_UNTAGGED: 1072 case CTL_TAG_SIMPLE: 1073 case CTL_TAG_ACA: 1074 default: 1075 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1076 break; 1077 } 1078 1079 if (lbalen->flags & SWS_UNMAP) { 1080 beio->io_offset = lbalen->lba * be_lun->blocksize; 1081 beio->io_len = (uint64_t)lbalen->len * be_lun->blocksize; 1082 beio->bio_cmd = BIO_DELETE; 1083 beio->ds_trans_type = DEVSTAT_FREE; 1084 1085 be_lun->unmap(be_lun, beio); 1086 return; 1087 } 1088 1089 beio->bio_cmd = BIO_WRITE; 1090 beio->ds_trans_type = DEVSTAT_WRITE; 1091 1092 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1093 (uintmax_t)lbalen->lba, lbalen->len); 1094 1095 len_left = (uint64_t)lbalen->len * be_lun->blocksize; 1096 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1097 1098 /* 1099 * Setup the S/G entry for this chunk. 1100 */ 1101 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1102 seglen -= seglen % be_lun->blocksize; 1103 beio->sg_segs[i].len = seglen; 1104 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1105 1106 DPRINTF("segment %d addr %p len %zd\n", i, 1107 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1108 1109 beio->num_segs++; 1110 len_left -= seglen; 1111 1112 buf = beio->sg_segs[i].addr; 1113 end = buf + seglen; 1114 for (; buf < end; buf += be_lun->blocksize) { 1115 memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize); 1116 if (lbalen->flags & SWS_LBDATA) 1117 scsi_ulto4b(lbalen->lba + lba, buf); 1118 lba++; 1119 } 1120 } 1121 1122 beio->io_offset = lbalen->lba * be_lun->blocksize; 1123 beio->io_len = lba * be_lun->blocksize; 1124 1125 /* We can not do all in one run. Correct and schedule rerun. */ 1126 if (len_left > 0) { 1127 lbalen->lba += lba; 1128 lbalen->len -= lba; 1129 beio->beio_cont = ctl_be_block_cw_done_ws; 1130 } 1131 1132 be_lun->dispatch(be_lun, beio); 1133} 1134 1135static void 1136ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1137 union ctl_io *io) 1138{ 1139 struct ctl_be_block_io *beio; 1140 struct ctl_be_block_softc *softc; 1141 struct ctl_ptr_len_flags *ptrlen; 1142 1143 DPRINTF("entered\n"); 1144 1145 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1146 softc = be_lun->softc; 1147 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1148 1149 if (ptrlen->flags != 0 || be_lun->unmap == NULL) { 1150 ctl_free_beio(beio); 1151 ctl_set_invalid_field(&io->scsiio, 1152 /*sks_valid*/ 0, 1153 /*command*/ 1, 1154 /*field*/ 0, 1155 /*bit_valid*/ 0, 1156 /*bit*/ 0); 1157 ctl_config_write_done(io); 1158 return; 1159 } 1160 1161 /* 1162 * If the I/O came down with an ordered or head of queue tag, set 1163 * the BIO_ORDERED attribute. For head of queue tags, that's 1164 * pretty much the best we can do. 1165 */ 1166 if ((io->scsiio.tag_type == CTL_TAG_ORDERED) 1167 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE)) 1168 beio->bio_flags = BIO_ORDERED; 1169 1170 switch (io->scsiio.tag_type) { 1171 case CTL_TAG_ORDERED: 1172 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1173 break; 1174 case CTL_TAG_HEAD_OF_QUEUE: 1175 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1176 break; 1177 case CTL_TAG_UNTAGGED: 1178 case CTL_TAG_SIMPLE: 1179 case CTL_TAG_ACA: 1180 default: 1181 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1182 break; 1183 } 1184 1185 beio->io_len = 0; 1186 beio->io_offset = -1; 1187 1188 beio->bio_cmd = BIO_DELETE; 1189 beio->ds_trans_type = DEVSTAT_FREE; 1190 1191 DPRINTF("UNMAP\n"); 1192 1193 be_lun->unmap(be_lun, beio); 1194} 1195 1196static void 1197ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1198{ 1199 union ctl_io *io; 1200 1201 io = beio->io; 1202 ctl_free_beio(beio); 1203 ctl_config_write_done(io); 1204} 1205 1206static void 1207ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1208 union ctl_io *io) 1209{ 1210 struct ctl_be_block_io *beio; 1211 struct ctl_be_block_softc *softc; 1212 1213 DPRINTF("entered\n"); 1214 1215 softc = be_lun->softc; 1216 beio = ctl_alloc_beio(softc); 1217 beio->io = io; 1218 beio->lun = be_lun; 1219 beio->beio_cont = ctl_be_block_cw_done; 1220 PRIV(io)->ptr = (void *)beio; 1221 1222 switch (io->scsiio.cdb[0]) { 1223 case SYNCHRONIZE_CACHE: 1224 case SYNCHRONIZE_CACHE_16: 1225 beio->bio_cmd = BIO_FLUSH; 1226 beio->ds_trans_type = DEVSTAT_NO_DATA; 1227 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1228 beio->io_len = 0; 1229 be_lun->lun_flush(be_lun, beio); 1230 break; 1231 case WRITE_SAME_10: 1232 case WRITE_SAME_16: 1233 ctl_be_block_cw_dispatch_ws(be_lun, io); 1234 break; 1235 case UNMAP: 1236 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1237 break; 1238 default: 1239 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1240 break; 1241 } 1242} 1243 1244SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t"); 1245SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t"); 1246SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t"); 1247SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t"); 1248 1249static void 1250ctl_be_block_next(struct ctl_be_block_io *beio) 1251{ 1252 struct ctl_be_block_lun *be_lun; 1253 union ctl_io *io; 1254 1255 io = beio->io; 1256 be_lun = beio->lun; 1257 ctl_free_beio(beio); 1258 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1259 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1260 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1261 ctl_data_submit_done(io); 1262 return; 1263 } 1264 1265 io->io_hdr.status &= ~CTL_STATUS_MASK; 1266 io->io_hdr.status |= CTL_STATUS_NONE; 1267 1268 mtx_lock(&be_lun->queue_lock); 1269 /* 1270 * XXX KDM make sure that links is okay to use at this point. 1271 * Otherwise, we either need to add another field to ctl_io_hdr, 1272 * or deal with resource allocation here. 1273 */ 1274 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1275 mtx_unlock(&be_lun->queue_lock); 1276 1277 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1278} 1279 1280static void 1281ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1282 union ctl_io *io) 1283{ 1284 struct ctl_be_block_io *beio; 1285 struct ctl_be_block_softc *softc; 1286 struct ctl_lba_len_flags *lbalen; 1287 struct ctl_ptr_len_flags *bptrlen; 1288 uint64_t len_left, lbas; 1289 int i; 1290 1291 softc = be_lun->softc; 1292 1293 DPRINTF("entered\n"); 1294 1295 lbalen = ARGS(io); 1296 if (lbalen->flags & CTL_LLF_WRITE) { 1297 SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0); 1298 } else { 1299 SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0); 1300 } 1301 1302 beio = ctl_alloc_beio(softc); 1303 beio->io = io; 1304 beio->lun = be_lun; 1305 bptrlen = PRIV(io); 1306 bptrlen->ptr = (void *)beio; 1307 1308 /* 1309 * If the I/O came down with an ordered or head of queue tag, set 1310 * the BIO_ORDERED attribute. For head of queue tags, that's 1311 * pretty much the best we can do. 1312 * 1313 * XXX KDM we don't have a great way to easily know about the FUA 1314 * bit right now (it is decoded in ctl_read_write(), but we don't 1315 * pass that knowledge to the backend), and in any case we would 1316 * need to determine how to handle it. 1317 */ 1318 if ((io->scsiio.tag_type == CTL_TAG_ORDERED) 1319 || (io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE)) 1320 beio->bio_flags = BIO_ORDERED; 1321 1322 switch (io->scsiio.tag_type) { 1323 case CTL_TAG_ORDERED: 1324 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1325 break; 1326 case CTL_TAG_HEAD_OF_QUEUE: 1327 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1328 break; 1329 case CTL_TAG_UNTAGGED: 1330 case CTL_TAG_SIMPLE: 1331 case CTL_TAG_ACA: 1332 default: 1333 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1334 break; 1335 } 1336 1337 if (lbalen->flags & CTL_LLF_WRITE) { 1338 beio->bio_cmd = BIO_WRITE; 1339 beio->ds_trans_type = DEVSTAT_WRITE; 1340 } else { 1341 beio->bio_cmd = BIO_READ; 1342 beio->ds_trans_type = DEVSTAT_READ; 1343 } 1344 1345 DPRINTF("%s at LBA %jx len %u @%ju\n", 1346 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1347 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1348 if (lbalen->flags & CTL_LLF_COMPARE) 1349 lbas = CTLBLK_HALF_IO_SIZE; 1350 else 1351 lbas = CTLBLK_MAX_IO_SIZE; 1352 lbas = MIN(lbalen->len - bptrlen->len, lbas / be_lun->blocksize); 1353 beio->io_offset = (lbalen->lba + bptrlen->len) * be_lun->blocksize; 1354 beio->io_len = lbas * be_lun->blocksize; 1355 bptrlen->len += lbas; 1356 1357 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1358 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1359 i, CTLBLK_MAX_SEGS)); 1360 1361 /* 1362 * Setup the S/G entry for this chunk. 1363 */ 1364 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left); 1365 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1366 1367 DPRINTF("segment %d addr %p len %zd\n", i, 1368 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1369 1370 /* Set up second segment for compare operation. */ 1371 if (lbalen->flags & CTL_LLF_COMPARE) { 1372 beio->sg_segs[i + CTLBLK_HALF_SEGS].len = 1373 beio->sg_segs[i].len; 1374 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = 1375 uma_zalloc(be_lun->lun_zone, M_WAITOK); 1376 } 1377 1378 beio->num_segs++; 1379 len_left -= beio->sg_segs[i].len; 1380 } 1381 if (bptrlen->len < lbalen->len) 1382 beio->beio_cont = ctl_be_block_next; 1383 io->scsiio.be_move_done = ctl_be_block_move_done; 1384 /* For compare we have separate S/G lists for read and datamove. */ 1385 if (lbalen->flags & CTL_LLF_COMPARE) 1386 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1387 else 1388 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1389 io->scsiio.kern_data_len = beio->io_len; 1390 io->scsiio.kern_data_resid = 0; 1391 io->scsiio.kern_sg_entries = beio->num_segs; 1392 io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST; 1393 1394 /* 1395 * For the read case, we need to read the data into our buffers and 1396 * then we can send it back to the user. For the write case, we 1397 * need to get the data from the user first. 1398 */ 1399 if (beio->bio_cmd == BIO_READ) { 1400 SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0); 1401 be_lun->dispatch(be_lun, beio); 1402 } else { 1403 SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0); 1404#ifdef CTL_TIME_IO 1405 getbintime(&io->io_hdr.dma_start_bt); 1406#endif 1407 ctl_datamove(io); 1408 } 1409} 1410 1411static void 1412ctl_be_block_worker(void *context, int pending) 1413{ 1414 struct ctl_be_block_lun *be_lun; 1415 struct ctl_be_block_softc *softc; 1416 union ctl_io *io; 1417 1418 be_lun = (struct ctl_be_block_lun *)context; 1419 softc = be_lun->softc; 1420 1421 DPRINTF("entered\n"); 1422 1423 mtx_lock(&be_lun->queue_lock); 1424 for (;;) { 1425 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1426 if (io != NULL) { 1427 struct ctl_be_block_io *beio; 1428 1429 DPRINTF("datamove queue\n"); 1430 1431 STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr, 1432 ctl_io_hdr, links); 1433 1434 mtx_unlock(&be_lun->queue_lock); 1435 1436 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1437 1438 be_lun->dispatch(be_lun, beio); 1439 1440 mtx_lock(&be_lun->queue_lock); 1441 continue; 1442 } 1443 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1444 if (io != NULL) { 1445 1446 DPRINTF("config write queue\n"); 1447 1448 STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr, 1449 ctl_io_hdr, links); 1450 1451 mtx_unlock(&be_lun->queue_lock); 1452 1453 ctl_be_block_cw_dispatch(be_lun, io); 1454 1455 mtx_lock(&be_lun->queue_lock); 1456 continue; 1457 } 1458 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1459 if (io != NULL) { 1460 DPRINTF("input queue\n"); 1461 1462 STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr, 1463 ctl_io_hdr, links); 1464 mtx_unlock(&be_lun->queue_lock); 1465 1466 /* 1467 * We must drop the lock, since this routine and 1468 * its children may sleep. 1469 */ 1470 ctl_be_block_dispatch(be_lun, io); 1471 1472 mtx_lock(&be_lun->queue_lock); 1473 continue; 1474 } 1475 1476 /* 1477 * If we get here, there is no work left in the queues, so 1478 * just break out and let the task queue go to sleep. 1479 */ 1480 break; 1481 } 1482 mtx_unlock(&be_lun->queue_lock); 1483} 1484 1485/* 1486 * Entry point from CTL to the backend for I/O. We queue everything to a 1487 * work thread, so this just puts the I/O on a queue and wakes up the 1488 * thread. 1489 */ 1490static int 1491ctl_be_block_submit(union ctl_io *io) 1492{ 1493 struct ctl_be_block_lun *be_lun; 1494 struct ctl_be_lun *ctl_be_lun; 1495 1496 DPRINTF("entered\n"); 1497 1498 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 1499 CTL_PRIV_BACKEND_LUN].ptr; 1500 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; 1501 1502 /* 1503 * Make sure we only get SCSI I/O. 1504 */ 1505 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type " 1506 "%#x) encountered", io->io_hdr.io_type)); 1507 1508 PRIV(io)->len = 0; 1509 1510 mtx_lock(&be_lun->queue_lock); 1511 /* 1512 * XXX KDM make sure that links is okay to use at this point. 1513 * Otherwise, we either need to add another field to ctl_io_hdr, 1514 * or deal with resource allocation here. 1515 */ 1516 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1517 mtx_unlock(&be_lun->queue_lock); 1518 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1519 1520 return (CTL_RETVAL_COMPLETE); 1521} 1522 1523static int 1524ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1525 int flag, struct thread *td) 1526{ 1527 struct ctl_be_block_softc *softc; 1528 int error; 1529 1530 softc = &backend_block_softc; 1531 1532 error = 0; 1533 1534 switch (cmd) { 1535 case CTL_LUN_REQ: { 1536 struct ctl_lun_req *lun_req; 1537 1538 lun_req = (struct ctl_lun_req *)addr; 1539 1540 switch (lun_req->reqtype) { 1541 case CTL_LUNREQ_CREATE: 1542 error = ctl_be_block_create(softc, lun_req); 1543 break; 1544 case CTL_LUNREQ_RM: 1545 error = ctl_be_block_rm(softc, lun_req); 1546 break; 1547 case CTL_LUNREQ_MODIFY: 1548 error = ctl_be_block_modify(softc, lun_req); 1549 break; 1550 default: 1551 lun_req->status = CTL_LUN_ERROR; 1552 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1553 "%s: invalid LUN request type %d", __func__, 1554 lun_req->reqtype); 1555 break; 1556 } 1557 break; 1558 } 1559 default: 1560 error = ENOTTY; 1561 break; 1562 } 1563 1564 return (error); 1565} 1566 1567static int 1568ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1569{ 1570 struct ctl_be_block_filedata *file_data; 1571 struct ctl_lun_create_params *params; 1572 struct vattr vattr; 1573 int error; 1574 1575 error = 0; 1576 file_data = &be_lun->backend.file; 1577 params = &req->reqdata.create; 1578 1579 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1580 be_lun->dispatch = ctl_be_block_dispatch_file; 1581 be_lun->lun_flush = ctl_be_block_flush_file; 1582 1583 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1584 if (error != 0) { 1585 snprintf(req->error_str, sizeof(req->error_str), 1586 "error calling VOP_GETATTR() for file %s", 1587 be_lun->dev_path); 1588 return (error); 1589 } 1590 1591 /* 1592 * Verify that we have the ability to upgrade to exclusive 1593 * access on this file so we can trap errors at open instead 1594 * of reporting them during first access. 1595 */ 1596 if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) { 1597 vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY); 1598 if (be_lun->vn->v_iflag & VI_DOOMED) { 1599 error = EBADF; 1600 snprintf(req->error_str, sizeof(req->error_str), 1601 "error locking file %s", be_lun->dev_path); 1602 return (error); 1603 } 1604 } 1605 1606 1607 file_data->cred = crhold(curthread->td_ucred); 1608 if (params->lun_size_bytes != 0) 1609 be_lun->size_bytes = params->lun_size_bytes; 1610 else 1611 be_lun->size_bytes = vattr.va_size; 1612 /* 1613 * We set the multi thread flag for file operations because all 1614 * filesystems (in theory) are capable of allowing multiple readers 1615 * of a file at once. So we want to get the maximum possible 1616 * concurrency. 1617 */ 1618 be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD; 1619 1620 /* 1621 * XXX KDM vattr.va_blocksize may be larger than 512 bytes here. 1622 * With ZFS, it is 131072 bytes. Block sizes that large don't work 1623 * with disklabel and UFS on FreeBSD at least. Large block sizes 1624 * may not work with other OSes as well. So just export a sector 1625 * size of 512 bytes, which should work with any OS or 1626 * application. Since our backing is a file, any block size will 1627 * work fine for the backing store. 1628 */ 1629#if 0 1630 be_lun->blocksize= vattr.va_blocksize; 1631#endif 1632 if (params->blocksize_bytes != 0) 1633 be_lun->blocksize = params->blocksize_bytes; 1634 else 1635 be_lun->blocksize = 512; 1636 1637 /* 1638 * Sanity check. The media size has to be at least one 1639 * sector long. 1640 */ 1641 if (be_lun->size_bytes < be_lun->blocksize) { 1642 error = EINVAL; 1643 snprintf(req->error_str, sizeof(req->error_str), 1644 "file %s size %ju < block size %u", be_lun->dev_path, 1645 (uintmax_t)be_lun->size_bytes, be_lun->blocksize); 1646 } 1647 return (error); 1648} 1649 1650static int 1651ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1652{ 1653 struct ctl_lun_create_params *params; 1654 struct vattr vattr; 1655 struct cdev *dev; 1656 struct cdevsw *devsw; 1657 int error; 1658 off_t ps, pss, po, pos; 1659 1660 params = &req->reqdata.create; 1661 1662 be_lun->dev_type = CTL_BE_BLOCK_DEV; 1663 be_lun->backend.dev.cdev = be_lun->vn->v_rdev; 1664 be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev, 1665 &be_lun->backend.dev.dev_ref); 1666 if (be_lun->backend.dev.csw == NULL) 1667 panic("Unable to retrieve device switch"); 1668 if (strcmp(be_lun->backend.dev.csw->d_name, "zvol") == 0) 1669 be_lun->dispatch = ctl_be_block_dispatch_zvol; 1670 else 1671 be_lun->dispatch = ctl_be_block_dispatch_dev; 1672 be_lun->lun_flush = ctl_be_block_flush_dev; 1673 be_lun->unmap = ctl_be_block_unmap_dev; 1674 1675 error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED); 1676 if (error) { 1677 snprintf(req->error_str, sizeof(req->error_str), 1678 "%s: error getting vnode attributes for device %s", 1679 __func__, be_lun->dev_path); 1680 return (error); 1681 } 1682 1683 dev = be_lun->vn->v_rdev; 1684 devsw = dev->si_devsw; 1685 if (!devsw->d_ioctl) { 1686 snprintf(req->error_str, sizeof(req->error_str), 1687 "%s: no d_ioctl for device %s!", __func__, 1688 be_lun->dev_path); 1689 return (ENODEV); 1690 } 1691 1692 error = devsw->d_ioctl(dev, DIOCGSECTORSIZE, 1693 (caddr_t)&be_lun->blocksize, FREAD, 1694 curthread); 1695 if (error) { 1696 snprintf(req->error_str, sizeof(req->error_str), 1697 "%s: error %d returned for DIOCGSECTORSIZE ioctl " 1698 "on %s!", __func__, error, be_lun->dev_path); 1699 return (error); 1700 } 1701 1702 /* 1703 * If the user has asked for a blocksize that is greater than the 1704 * backing device's blocksize, we can do it only if the blocksize 1705 * the user is asking for is an even multiple of the underlying 1706 * device's blocksize. 1707 */ 1708 if ((params->blocksize_bytes != 0) 1709 && (params->blocksize_bytes > be_lun->blocksize)) { 1710 uint32_t bs_multiple, tmp_blocksize; 1711 1712 bs_multiple = params->blocksize_bytes / be_lun->blocksize; 1713 1714 tmp_blocksize = bs_multiple * be_lun->blocksize; 1715 1716 if (tmp_blocksize == params->blocksize_bytes) { 1717 be_lun->blocksize = params->blocksize_bytes; 1718 } else { 1719 snprintf(req->error_str, sizeof(req->error_str), 1720 "%s: requested blocksize %u is not an even " 1721 "multiple of backing device blocksize %u", 1722 __func__, params->blocksize_bytes, 1723 be_lun->blocksize); 1724 return (EINVAL); 1725 1726 } 1727 } else if ((params->blocksize_bytes != 0) 1728 && (params->blocksize_bytes != be_lun->blocksize)) { 1729 snprintf(req->error_str, sizeof(req->error_str), 1730 "%s: requested blocksize %u < backing device " 1731 "blocksize %u", __func__, params->blocksize_bytes, 1732 be_lun->blocksize); 1733 return (EINVAL); 1734 } 1735 1736 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE, 1737 (caddr_t)&be_lun->size_bytes, FREAD, 1738 curthread); 1739 if (error) { 1740 snprintf(req->error_str, sizeof(req->error_str), 1741 "%s: error %d returned for DIOCGMEDIASIZE " 1742 " ioctl on %s!", __func__, error, 1743 be_lun->dev_path); 1744 return (error); 1745 } 1746 1747 if (params->lun_size_bytes != 0) { 1748 if (params->lun_size_bytes > be_lun->size_bytes) { 1749 snprintf(req->error_str, sizeof(req->error_str), 1750 "%s: requested LUN size %ju > backing device " 1751 "size %ju", __func__, 1752 (uintmax_t)params->lun_size_bytes, 1753 (uintmax_t)be_lun->size_bytes); 1754 return (EINVAL); 1755 } 1756 1757 be_lun->size_bytes = params->lun_size_bytes; 1758 } 1759 1760 error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE, 1761 (caddr_t)&ps, FREAD, curthread); 1762 if (error) 1763 ps = po = 0; 1764 else { 1765 error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET, 1766 (caddr_t)&po, FREAD, curthread); 1767 if (error) 1768 po = 0; 1769 } 1770 pss = ps / be_lun->blocksize; 1771 pos = po / be_lun->blocksize; 1772 if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) && 1773 ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) { 1774 be_lun->pblockexp = fls(pss) - 1; 1775 be_lun->pblockoff = (pss - pos) % pss; 1776 } 1777 1778 return (0); 1779} 1780 1781static int 1782ctl_be_block_close(struct ctl_be_block_lun *be_lun) 1783{ 1784 DROP_GIANT(); 1785 if (be_lun->vn) { 1786 int flags = FREAD | FWRITE; 1787 1788 switch (be_lun->dev_type) { 1789 case CTL_BE_BLOCK_DEV: 1790 if (be_lun->backend.dev.csw) { 1791 dev_relthread(be_lun->backend.dev.cdev, 1792 be_lun->backend.dev.dev_ref); 1793 be_lun->backend.dev.csw = NULL; 1794 be_lun->backend.dev.cdev = NULL; 1795 } 1796 break; 1797 case CTL_BE_BLOCK_FILE: 1798 break; 1799 case CTL_BE_BLOCK_NONE: 1800 break; 1801 default: 1802 panic("Unexpected backend type."); 1803 break; 1804 } 1805 1806 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 1807 be_lun->vn = NULL; 1808 1809 switch (be_lun->dev_type) { 1810 case CTL_BE_BLOCK_DEV: 1811 break; 1812 case CTL_BE_BLOCK_FILE: 1813 if (be_lun->backend.file.cred != NULL) { 1814 crfree(be_lun->backend.file.cred); 1815 be_lun->backend.file.cred = NULL; 1816 } 1817 break; 1818 case CTL_BE_BLOCK_NONE: 1819 break; 1820 default: 1821 panic("Unexpected backend type."); 1822 break; 1823 } 1824 } 1825 PICKUP_GIANT(); 1826 1827 return (0); 1828} 1829 1830static int 1831ctl_be_block_open(struct ctl_be_block_softc *softc, 1832 struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1833{ 1834 struct nameidata nd; 1835 int flags; 1836 int error; 1837 1838 /* 1839 * XXX KDM allow a read-only option? 1840 */ 1841 flags = FREAD | FWRITE; 1842 error = 0; 1843 1844 if (rootvnode == NULL) { 1845 snprintf(req->error_str, sizeof(req->error_str), 1846 "%s: Root filesystem is not mounted", __func__); 1847 return (1); 1848 } 1849 1850 if (!curthread->td_proc->p_fd->fd_cdir) { 1851 curthread->td_proc->p_fd->fd_cdir = rootvnode; 1852 VREF(rootvnode); 1853 } 1854 if (!curthread->td_proc->p_fd->fd_rdir) { 1855 curthread->td_proc->p_fd->fd_rdir = rootvnode; 1856 VREF(rootvnode); 1857 } 1858 if (!curthread->td_proc->p_fd->fd_jdir) { 1859 curthread->td_proc->p_fd->fd_jdir = rootvnode; 1860 VREF(rootvnode); 1861 } 1862 1863 again: 1864 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); 1865 error = vn_open(&nd, &flags, 0, NULL); 1866 if (error) { 1867 /* 1868 * This is the only reasonable guess we can make as far as 1869 * path if the user doesn't give us a fully qualified path. 1870 * If they want to specify a file, they need to specify the 1871 * full path. 1872 */ 1873 if (be_lun->dev_path[0] != '/') { 1874 char *dev_path = "/dev/"; 1875 char *dev_name; 1876 1877 /* Try adding device path at beginning of name */ 1878 dev_name = malloc(strlen(be_lun->dev_path) 1879 + strlen(dev_path) + 1, 1880 M_CTLBLK, M_WAITOK); 1881 if (dev_name) { 1882 sprintf(dev_name, "%s%s", dev_path, 1883 be_lun->dev_path); 1884 free(be_lun->dev_path, M_CTLBLK); 1885 be_lun->dev_path = dev_name; 1886 goto again; 1887 } 1888 } 1889 snprintf(req->error_str, sizeof(req->error_str), 1890 "%s: error opening %s", __func__, be_lun->dev_path); 1891 return (error); 1892 } 1893 1894 NDFREE(&nd, NDF_ONLY_PNBUF); 1895 1896 be_lun->vn = nd.ni_vp; 1897 1898 /* We only support disks and files. */ 1899 if (vn_isdisk(be_lun->vn, &error)) { 1900 error = ctl_be_block_open_dev(be_lun, req); 1901 } else if (be_lun->vn->v_type == VREG) { 1902 error = ctl_be_block_open_file(be_lun, req); 1903 } else { 1904 error = EINVAL; 1905 snprintf(req->error_str, sizeof(req->error_str), 1906 "%s is not a disk or plain file", be_lun->dev_path); 1907 } 1908 VOP_UNLOCK(be_lun->vn, 0); 1909 1910 if (error != 0) { 1911 ctl_be_block_close(be_lun); 1912 return (error); 1913 } 1914 1915 be_lun->blocksize_shift = fls(be_lun->blocksize) - 1; 1916 be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift; 1917 1918 return (0); 1919} 1920 1921static int 1922ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 1923{ 1924 struct ctl_be_block_lun *be_lun; 1925 struct ctl_lun_create_params *params; 1926 char num_thread_str[16]; 1927 char tmpstr[32]; 1928 char *value; 1929 int retval, num_threads, unmap; 1930 int tmp_num_threads; 1931 1932 params = &req->reqdata.create; 1933 retval = 0; 1934 1935 num_threads = cbb_num_threads; 1936 1937 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 1938 1939 be_lun->softc = softc; 1940 STAILQ_INIT(&be_lun->input_queue); 1941 STAILQ_INIT(&be_lun->config_write_queue); 1942 STAILQ_INIT(&be_lun->datamove_queue); 1943 sprintf(be_lun->lunname, "cblk%d", softc->num_luns); 1944 mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF); 1945 mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF); 1946 ctl_init_opts(&be_lun->ctl_be_lun.options, 1947 req->num_be_args, req->kern_be_args); 1948 1949 be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG, 1950 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 1951 1952 if (be_lun->lun_zone == NULL) { 1953 snprintf(req->error_str, sizeof(req->error_str), 1954 "%s: error allocating UMA zone", __func__); 1955 goto bailout_error; 1956 } 1957 1958 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 1959 be_lun->ctl_be_lun.lun_type = params->device_type; 1960 else 1961 be_lun->ctl_be_lun.lun_type = T_DIRECT; 1962 1963 if (be_lun->ctl_be_lun.lun_type == T_DIRECT) { 1964 value = ctl_get_opt(&be_lun->ctl_be_lun.options, "file"); 1965 if (value == NULL) { 1966 snprintf(req->error_str, sizeof(req->error_str), 1967 "%s: no file argument specified", __func__); 1968 goto bailout_error; 1969 } 1970 be_lun->dev_path = strdup(value, M_CTLBLK); 1971 1972 retval = ctl_be_block_open(softc, be_lun, req); 1973 if (retval != 0) { 1974 retval = 0; 1975 goto bailout_error; 1976 } 1977 1978 /* 1979 * Tell the user the size of the file/device. 1980 */ 1981 params->lun_size_bytes = be_lun->size_bytes; 1982 1983 /* 1984 * The maximum LBA is the size - 1. 1985 */ 1986 be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1; 1987 } else { 1988 /* 1989 * For processor devices, we don't have any size. 1990 */ 1991 be_lun->blocksize = 0; 1992 be_lun->pblockexp = 0; 1993 be_lun->pblockoff = 0; 1994 be_lun->size_blocks = 0; 1995 be_lun->size_bytes = 0; 1996 be_lun->ctl_be_lun.maxlba = 0; 1997 params->lun_size_bytes = 0; 1998 1999 /* 2000 * Default to just 1 thread for processor devices. 2001 */ 2002 num_threads = 1; 2003 } 2004 2005 /* 2006 * XXX This searching loop might be refactored to be combined with 2007 * the loop above, 2008 */ 2009 value = ctl_get_opt(&be_lun->ctl_be_lun.options, "num_threads"); 2010 if (value != NULL) { 2011 tmp_num_threads = strtol(value, NULL, 0); 2012 2013 /* 2014 * We don't let the user specify less than one 2015 * thread, but hope he's clueful enough not to 2016 * specify 1000 threads. 2017 */ 2018 if (tmp_num_threads < 1) { 2019 snprintf(req->error_str, sizeof(req->error_str), 2020 "%s: invalid number of threads %s", 2021 __func__, num_thread_str); 2022 goto bailout_error; 2023 } 2024 num_threads = tmp_num_threads; 2025 } 2026 unmap = 0; 2027 value = ctl_get_opt(&be_lun->ctl_be_lun.options, "unmap"); 2028 if (value != NULL && strcmp(value, "on") == 0) 2029 unmap = 1; 2030 2031 be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED; 2032 be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY; 2033 if (unmap) 2034 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP; 2035 be_lun->ctl_be_lun.be_lun = be_lun; 2036 be_lun->ctl_be_lun.blocksize = be_lun->blocksize; 2037 be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp; 2038 be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff; 2039 /* Tell the user the blocksize we ended up using */ 2040 params->blocksize_bytes = be_lun->blocksize; 2041 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2042 be_lun->ctl_be_lun.req_lun_id = params->req_lun_id; 2043 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ; 2044 } else 2045 be_lun->ctl_be_lun.req_lun_id = 0; 2046 2047 be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown; 2048 be_lun->ctl_be_lun.lun_config_status = 2049 ctl_be_block_lun_config_status; 2050 be_lun->ctl_be_lun.be = &ctl_be_block_driver; 2051 2052 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2053 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d", 2054 softc->num_luns); 2055 strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr, 2056 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num), 2057 sizeof(tmpstr))); 2058 2059 /* Tell the user what we used for a serial number */ 2060 strncpy((char *)params->serial_num, tmpstr, 2061 ctl_min(sizeof(params->serial_num), sizeof(tmpstr))); 2062 } else { 2063 strncpy((char *)be_lun->ctl_be_lun.serial_num, 2064 params->serial_num, 2065 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num), 2066 sizeof(params->serial_num))); 2067 } 2068 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2069 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns); 2070 strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr, 2071 ctl_min(sizeof(be_lun->ctl_be_lun.device_id), 2072 sizeof(tmpstr))); 2073 2074 /* Tell the user what we used for a device ID */ 2075 strncpy((char *)params->device_id, tmpstr, 2076 ctl_min(sizeof(params->device_id), sizeof(tmpstr))); 2077 } else { 2078 strncpy((char *)be_lun->ctl_be_lun.device_id, 2079 params->device_id, 2080 ctl_min(sizeof(be_lun->ctl_be_lun.device_id), 2081 sizeof(params->device_id))); 2082 } 2083 2084 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2085 2086 be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK, 2087 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2088 2089 if (be_lun->io_taskqueue == NULL) { 2090 snprintf(req->error_str, sizeof(req->error_str), 2091 "%s: Unable to create taskqueue", __func__); 2092 goto bailout_error; 2093 } 2094 2095 /* 2096 * Note that we start the same number of threads by default for 2097 * both the file case and the block device case. For the file 2098 * case, we need multiple threads to allow concurrency, because the 2099 * vnode interface is designed to be a blocking interface. For the 2100 * block device case, ZFS zvols at least will block the caller's 2101 * context in many instances, and so we need multiple threads to 2102 * overcome that problem. Other block devices don't need as many 2103 * threads, but they shouldn't cause too many problems. 2104 * 2105 * If the user wants to just have a single thread for a block 2106 * device, he can specify that when the LUN is created, or change 2107 * the tunable/sysctl to alter the default number of threads. 2108 */ 2109 retval = taskqueue_start_threads(&be_lun->io_taskqueue, 2110 /*num threads*/num_threads, 2111 /*priority*/PWAIT, 2112 /*thread name*/ 2113 "%s taskq", be_lun->lunname); 2114 2115 if (retval != 0) 2116 goto bailout_error; 2117 2118 be_lun->num_threads = num_threads; 2119 2120 mtx_lock(&softc->lock); 2121 softc->num_luns++; 2122 STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links); 2123 2124 mtx_unlock(&softc->lock); 2125 2126 retval = ctl_add_lun(&be_lun->ctl_be_lun); 2127 if (retval != 0) { 2128 mtx_lock(&softc->lock); 2129 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, 2130 links); 2131 softc->num_luns--; 2132 mtx_unlock(&softc->lock); 2133 snprintf(req->error_str, sizeof(req->error_str), 2134 "%s: ctl_add_lun() returned error %d, see dmesg for " 2135 "details", __func__, retval); 2136 retval = 0; 2137 goto bailout_error; 2138 } 2139 2140 mtx_lock(&softc->lock); 2141 2142 /* 2143 * Tell the config_status routine that we're waiting so it won't 2144 * clean up the LUN in the event of an error. 2145 */ 2146 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2147 2148 while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2149 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); 2150 if (retval == EINTR) 2151 break; 2152 } 2153 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2154 2155 if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) { 2156 snprintf(req->error_str, sizeof(req->error_str), 2157 "%s: LUN configuration error, see dmesg for details", 2158 __func__); 2159 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, 2160 links); 2161 softc->num_luns--; 2162 mtx_unlock(&softc->lock); 2163 goto bailout_error; 2164 } else { 2165 params->req_lun_id = be_lun->ctl_be_lun.lun_id; 2166 } 2167 2168 mtx_unlock(&softc->lock); 2169 2170 be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id, 2171 be_lun->blocksize, 2172 DEVSTAT_ALL_SUPPORTED, 2173 be_lun->ctl_be_lun.lun_type 2174 | DEVSTAT_TYPE_IF_OTHER, 2175 DEVSTAT_PRIORITY_OTHER); 2176 2177 2178 req->status = CTL_LUN_OK; 2179 2180 return (retval); 2181 2182bailout_error: 2183 req->status = CTL_LUN_ERROR; 2184 2185 if (be_lun->io_taskqueue != NULL) 2186 taskqueue_free(be_lun->io_taskqueue); 2187 ctl_be_block_close(be_lun); 2188 if (be_lun->dev_path != NULL) 2189 free(be_lun->dev_path, M_CTLBLK); 2190 if (be_lun->lun_zone != NULL) 2191 uma_zdestroy(be_lun->lun_zone); 2192 ctl_free_opts(&be_lun->ctl_be_lun.options); 2193 mtx_destroy(&be_lun->queue_lock); 2194 mtx_destroy(&be_lun->io_lock); 2195 free(be_lun, M_CTLBLK); 2196 2197 return (retval); 2198} 2199 2200static int 2201ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2202{ 2203 struct ctl_lun_rm_params *params; 2204 struct ctl_be_block_lun *be_lun; 2205 int retval; 2206 2207 params = &req->reqdata.rm; 2208 2209 mtx_lock(&softc->lock); 2210 2211 be_lun = NULL; 2212 2213 STAILQ_FOREACH(be_lun, &softc->lun_list, links) { 2214 if (be_lun->ctl_be_lun.lun_id == params->lun_id) 2215 break; 2216 } 2217 mtx_unlock(&softc->lock); 2218 2219 if (be_lun == NULL) { 2220 snprintf(req->error_str, sizeof(req->error_str), 2221 "%s: LUN %u is not managed by the block backend", 2222 __func__, params->lun_id); 2223 goto bailout_error; 2224 } 2225 2226 retval = ctl_disable_lun(&be_lun->ctl_be_lun); 2227 2228 if (retval != 0) { 2229 snprintf(req->error_str, sizeof(req->error_str), 2230 "%s: error %d returned from ctl_disable_lun() for " 2231 "LUN %d", __func__, retval, params->lun_id); 2232 goto bailout_error; 2233 2234 } 2235 2236 retval = ctl_invalidate_lun(&be_lun->ctl_be_lun); 2237 if (retval != 0) { 2238 snprintf(req->error_str, sizeof(req->error_str), 2239 "%s: error %d returned from ctl_invalidate_lun() for " 2240 "LUN %d", __func__, retval, params->lun_id); 2241 goto bailout_error; 2242 } 2243 2244 mtx_lock(&softc->lock); 2245 2246 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2247 2248 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2249 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); 2250 if (retval == EINTR) 2251 break; 2252 } 2253 2254 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2255 2256 if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2257 snprintf(req->error_str, sizeof(req->error_str), 2258 "%s: interrupted waiting for LUN to be freed", 2259 __func__); 2260 mtx_unlock(&softc->lock); 2261 goto bailout_error; 2262 } 2263 2264 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); 2265 2266 softc->num_luns--; 2267 mtx_unlock(&softc->lock); 2268 2269 taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task); 2270 2271 taskqueue_free(be_lun->io_taskqueue); 2272 2273 ctl_be_block_close(be_lun); 2274 2275 if (be_lun->disk_stats != NULL) 2276 devstat_remove_entry(be_lun->disk_stats); 2277 2278 uma_zdestroy(be_lun->lun_zone); 2279 2280 ctl_free_opts(&be_lun->ctl_be_lun.options); 2281 free(be_lun->dev_path, M_CTLBLK); 2282 mtx_destroy(&be_lun->queue_lock); 2283 mtx_destroy(&be_lun->io_lock); 2284 free(be_lun, M_CTLBLK); 2285 2286 req->status = CTL_LUN_OK; 2287 2288 return (0); 2289 2290bailout_error: 2291 2292 req->status = CTL_LUN_ERROR; 2293 2294 return (0); 2295} 2296 2297static int 2298ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, 2299 struct ctl_lun_req *req) 2300{ 2301 struct vattr vattr; 2302 int error; 2303 struct ctl_lun_modify_params *params; 2304 2305 params = &req->reqdata.modify; 2306 2307 if (params->lun_size_bytes != 0) { 2308 be_lun->size_bytes = params->lun_size_bytes; 2309 } else { 2310 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 2311 if (error != 0) { 2312 snprintf(req->error_str, sizeof(req->error_str), 2313 "error calling VOP_GETATTR() for file %s", 2314 be_lun->dev_path); 2315 return (error); 2316 } 2317 2318 be_lun->size_bytes = vattr.va_size; 2319 } 2320 2321 return (0); 2322} 2323 2324static int 2325ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, 2326 struct ctl_lun_req *req) 2327{ 2328 struct cdev *dev; 2329 struct cdevsw *devsw; 2330 int error; 2331 struct ctl_lun_modify_params *params; 2332 uint64_t size_bytes; 2333 2334 params = &req->reqdata.modify; 2335 2336 dev = be_lun->vn->v_rdev; 2337 devsw = dev->si_devsw; 2338 if (!devsw->d_ioctl) { 2339 snprintf(req->error_str, sizeof(req->error_str), 2340 "%s: no d_ioctl for device %s!", __func__, 2341 be_lun->dev_path); 2342 return (ENODEV); 2343 } 2344 2345 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE, 2346 (caddr_t)&size_bytes, FREAD, 2347 curthread); 2348 if (error) { 2349 snprintf(req->error_str, sizeof(req->error_str), 2350 "%s: error %d returned for DIOCGMEDIASIZE ioctl " 2351 "on %s!", __func__, error, be_lun->dev_path); 2352 return (error); 2353 } 2354 2355 if (params->lun_size_bytes != 0) { 2356 if (params->lun_size_bytes > size_bytes) { 2357 snprintf(req->error_str, sizeof(req->error_str), 2358 "%s: requested LUN size %ju > backing device " 2359 "size %ju", __func__, 2360 (uintmax_t)params->lun_size_bytes, 2361 (uintmax_t)size_bytes); 2362 return (EINVAL); 2363 } 2364 2365 be_lun->size_bytes = params->lun_size_bytes; 2366 } else { 2367 be_lun->size_bytes = size_bytes; 2368 } 2369 2370 return (0); 2371} 2372 2373static int 2374ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2375{ 2376 struct ctl_lun_modify_params *params; 2377 struct ctl_be_block_lun *be_lun; 2378 int error; 2379 2380 params = &req->reqdata.modify; 2381 2382 mtx_lock(&softc->lock); 2383 2384 be_lun = NULL; 2385 2386 STAILQ_FOREACH(be_lun, &softc->lun_list, links) { 2387 if (be_lun->ctl_be_lun.lun_id == params->lun_id) 2388 break; 2389 } 2390 mtx_unlock(&softc->lock); 2391 2392 if (be_lun == NULL) { 2393 snprintf(req->error_str, sizeof(req->error_str), 2394 "%s: LUN %u is not managed by the block backend", 2395 __func__, params->lun_id); 2396 goto bailout_error; 2397 } 2398 2399 if (params->lun_size_bytes != 0) { 2400 if (params->lun_size_bytes < be_lun->blocksize) { 2401 snprintf(req->error_str, sizeof(req->error_str), 2402 "%s: LUN size %ju < blocksize %u", __func__, 2403 params->lun_size_bytes, be_lun->blocksize); 2404 goto bailout_error; 2405 } 2406 } 2407 2408 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2409 2410 if (be_lun->vn->v_type == VREG) 2411 error = ctl_be_block_modify_file(be_lun, req); 2412 else 2413 error = ctl_be_block_modify_dev(be_lun, req); 2414 2415 VOP_UNLOCK(be_lun->vn, 0); 2416 2417 if (error != 0) 2418 goto bailout_error; 2419 2420 be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift; 2421 2422 /* 2423 * The maximum LBA is the size - 1. 2424 * 2425 * XXX: Note that this field is being updated without locking, 2426 * which might cause problems on 32-bit architectures. 2427 */ 2428 be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1; 2429 ctl_lun_capacity_changed(&be_lun->ctl_be_lun); 2430 2431 /* Tell the user the exact size we ended up using */ 2432 params->lun_size_bytes = be_lun->size_bytes; 2433 2434 req->status = CTL_LUN_OK; 2435 2436 return (0); 2437 2438bailout_error: 2439 req->status = CTL_LUN_ERROR; 2440 2441 return (0); 2442} 2443 2444static void 2445ctl_be_block_lun_shutdown(void *be_lun) 2446{ 2447 struct ctl_be_block_lun *lun; 2448 struct ctl_be_block_softc *softc; 2449 2450 lun = (struct ctl_be_block_lun *)be_lun; 2451 2452 softc = lun->softc; 2453 2454 mtx_lock(&softc->lock); 2455 lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2456 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2457 wakeup(lun); 2458 mtx_unlock(&softc->lock); 2459 2460} 2461 2462static void 2463ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status) 2464{ 2465 struct ctl_be_block_lun *lun; 2466 struct ctl_be_block_softc *softc; 2467 2468 lun = (struct ctl_be_block_lun *)be_lun; 2469 softc = lun->softc; 2470 2471 if (status == CTL_LUN_CONFIG_OK) { 2472 mtx_lock(&softc->lock); 2473 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; 2474 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2475 wakeup(lun); 2476 mtx_unlock(&softc->lock); 2477 2478 /* 2479 * We successfully added the LUN, attempt to enable it. 2480 */ 2481 if (ctl_enable_lun(&lun->ctl_be_lun) != 0) { 2482 printf("%s: ctl_enable_lun() failed!\n", __func__); 2483 if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) { 2484 printf("%s: ctl_invalidate_lun() failed!\n", 2485 __func__); 2486 } 2487 } 2488 2489 return; 2490 } 2491 2492 2493 mtx_lock(&softc->lock); 2494 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; 2495 lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR; 2496 wakeup(lun); 2497 mtx_unlock(&softc->lock); 2498} 2499 2500 2501static int 2502ctl_be_block_config_write(union ctl_io *io) 2503{ 2504 struct ctl_be_block_lun *be_lun; 2505 struct ctl_be_lun *ctl_be_lun; 2506 int retval; 2507 2508 retval = 0; 2509 2510 DPRINTF("entered\n"); 2511 2512 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 2513 CTL_PRIV_BACKEND_LUN].ptr; 2514 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; 2515 2516 switch (io->scsiio.cdb[0]) { 2517 case SYNCHRONIZE_CACHE: 2518 case SYNCHRONIZE_CACHE_16: 2519 case WRITE_SAME_10: 2520 case WRITE_SAME_16: 2521 case UNMAP: 2522 /* 2523 * The upper level CTL code will filter out any CDBs with 2524 * the immediate bit set and return the proper error. 2525 * 2526 * We don't really need to worry about what LBA range the 2527 * user asked to be synced out. When they issue a sync 2528 * cache command, we'll sync out the whole thing. 2529 */ 2530 mtx_lock(&be_lun->queue_lock); 2531 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2532 links); 2533 mtx_unlock(&be_lun->queue_lock); 2534 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2535 break; 2536 case START_STOP_UNIT: { 2537 struct scsi_start_stop_unit *cdb; 2538 2539 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2540 2541 if (cdb->how & SSS_START) 2542 retval = ctl_start_lun(ctl_be_lun); 2543 else { 2544 retval = ctl_stop_lun(ctl_be_lun); 2545 /* 2546 * XXX KDM Copan-specific offline behavior. 2547 * Figure out a reasonable way to port this? 2548 */ 2549#ifdef NEEDTOPORT 2550 if ((retval == 0) 2551 && (cdb->byte2 & SSS_ONOFFLINE)) 2552 retval = ctl_lun_offline(ctl_be_lun); 2553#endif 2554 } 2555 2556 /* 2557 * In general, the above routines should not fail. They 2558 * just set state for the LUN. So we've got something 2559 * pretty wrong here if we can't start or stop the LUN. 2560 */ 2561 if (retval != 0) { 2562 ctl_set_internal_failure(&io->scsiio, 2563 /*sks_valid*/ 1, 2564 /*retry_count*/ 0xf051); 2565 retval = CTL_RETVAL_COMPLETE; 2566 } else { 2567 ctl_set_success(&io->scsiio); 2568 } 2569 ctl_config_write_done(io); 2570 break; 2571 } 2572 default: 2573 ctl_set_invalid_opcode(&io->scsiio); 2574 ctl_config_write_done(io); 2575 retval = CTL_RETVAL_COMPLETE; 2576 break; 2577 } 2578 2579 return (retval); 2580 2581} 2582 2583static int 2584ctl_be_block_config_read(union ctl_io *io) 2585{ 2586 return (0); 2587} 2588 2589static int 2590ctl_be_block_lun_info(void *be_lun, struct sbuf *sb) 2591{ 2592 struct ctl_be_block_lun *lun; 2593 int retval; 2594 2595 lun = (struct ctl_be_block_lun *)be_lun; 2596 retval = 0; 2597 2598 retval = sbuf_printf(sb, "\t<num_threads>"); 2599 2600 if (retval != 0) 2601 goto bailout; 2602 2603 retval = sbuf_printf(sb, "%d", lun->num_threads); 2604 2605 if (retval != 0) 2606 goto bailout; 2607 2608 retval = sbuf_printf(sb, "</num_threads>\n"); 2609 2610bailout: 2611 2612 return (retval); 2613} 2614 2615int 2616ctl_be_block_init(void) 2617{ 2618 struct ctl_be_block_softc *softc; 2619 int retval; 2620 2621 softc = &backend_block_softc; 2622 retval = 0; 2623 2624 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2625 beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2626 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2627 STAILQ_INIT(&softc->disk_list); 2628 STAILQ_INIT(&softc->lun_list); 2629 2630 return (retval); 2631} 2632