1229997Sken/*- 2229997Sken * Copyright (c) 2003 Silicon Graphics International Corp. 3229997Sken * Copyright (c) 2009-2011 Spectra Logic Corporation 4232604Strasz * Copyright (c) 2012 The FreeBSD Foundation 5288348Smav * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org> 6229997Sken * All rights reserved. 7229997Sken * 8232604Strasz * Portions of this software were developed by Edward Tomasz Napierala 9232604Strasz * under sponsorship from the FreeBSD Foundation. 10232604Strasz * 11229997Sken * Redistribution and use in source and binary forms, with or without 12229997Sken * modification, are permitted provided that the following conditions 13229997Sken * are met: 14229997Sken * 1. Redistributions of source code must retain the above copyright 15229997Sken * notice, this list of conditions, and the following disclaimer, 16229997Sken * without modification. 17229997Sken * 2. Redistributions in binary form must reproduce at minimum a disclaimer 18229997Sken * substantially similar to the "NO WARRANTY" disclaimer below 19229997Sken * ("Disclaimer") and any redistribution must be conditioned upon 20229997Sken * including a substantially similar Disclaimer requirement for further 21229997Sken * binary redistribution. 22229997Sken * 23229997Sken * NO WARRANTY 24229997Sken * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25229997Sken * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26229997Sken * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 27229997Sken * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28229997Sken * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29229997Sken * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30229997Sken * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31229997Sken * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32229997Sken * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33229997Sken * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34229997Sken * POSSIBILITY OF SUCH DAMAGES. 35229997Sken * 36229997Sken * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 37229997Sken */ 38229997Sken/* 39229997Sken * CAM Target Layer driver backend for block devices. 40229997Sken * 41229997Sken * Author: Ken Merry <ken@FreeBSD.org> 42229997Sken */ 43229997Sken#include <sys/cdefs.h> 44229997Sken__FBSDID("$FreeBSD: stable/11/sys/cam/ctl/ctl_backend_block.c 361256 2020-05-19 14:42:09Z mav $"); 45229997Sken 46229997Sken#include <sys/param.h> 47229997Sken#include <sys/systm.h> 48229997Sken#include <sys/kernel.h> 49229997Sken#include <sys/types.h> 50229997Sken#include <sys/kthread.h> 51229997Sken#include <sys/bio.h> 52229997Sken#include <sys/fcntl.h> 53264274Smav#include <sys/limits.h> 54229997Sken#include <sys/lock.h> 55229997Sken#include <sys/mutex.h> 56229997Sken#include <sys/condvar.h> 57229997Sken#include <sys/malloc.h> 58229997Sken#include <sys/conf.h> 59229997Sken#include <sys/ioccom.h> 60229997Sken#include <sys/queue.h> 61229997Sken#include <sys/sbuf.h> 62229997Sken#include <sys/endian.h> 63229997Sken#include <sys/uio.h> 64229997Sken#include <sys/buf.h> 65229997Sken#include <sys/taskqueue.h> 66229997Sken#include <sys/vnode.h> 67229997Sken#include <sys/namei.h> 68229997Sken#include <sys/mount.h> 69229997Sken#include <sys/disk.h> 70229997Sken#include <sys/fcntl.h> 71229997Sken#include <sys/filedesc.h> 72275474Smav#include <sys/filio.h> 73229997Sken#include <sys/proc.h> 74229997Sken#include <sys/pcpu.h> 75229997Sken#include <sys/module.h> 76229997Sken#include <sys/sdt.h> 77229997Sken#include <sys/devicestat.h> 78229997Sken#include <sys/sysctl.h> 79361256Smav#include <sys/sx.h> 80229997Sken 81229997Sken#include <geom/geom.h> 82229997Sken 83229997Sken#include <cam/cam.h> 84229997Sken#include <cam/scsi/scsi_all.h> 85229997Sken#include <cam/scsi/scsi_da.h> 86229997Sken#include <cam/ctl/ctl_io.h> 87229997Sken#include <cam/ctl/ctl.h> 88229997Sken#include <cam/ctl/ctl_backend.h> 89229997Sken#include <cam/ctl/ctl_ioctl.h> 90287621Smav#include <cam/ctl/ctl_ha.h> 91229997Sken#include <cam/ctl/ctl_scsi_all.h> 92287621Smav#include <cam/ctl/ctl_private.h> 93229997Sken#include <cam/ctl/ctl_error.h> 94229997Sken 95229997Sken/* 96264886Smav * The idea here is that we'll allocate enough S/G space to hold a 1MB 97264886Smav * I/O. If we get an I/O larger than that, we'll split it. 98229997Sken */ 99267537Smav#define CTLBLK_HALF_IO_SIZE (512 * 1024) 100267537Smav#define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) 101264886Smav#define CTLBLK_MAX_SEG MAXPHYS 102267537Smav#define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) 103267537Smav#define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) 104229997Sken 105229997Sken#ifdef CTLBLK_DEBUG 106229997Sken#define DPRINTF(fmt, args...) \ 107229997Sken printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 108229997Sken#else 109229997Sken#define DPRINTF(fmt, args...) do {} while(0) 110229997Sken#endif 111229997Sken 112267519Smav#define PRIV(io) \ 113267519Smav ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 114267537Smav#define ARGS(io) \ 115267537Smav ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 116267519Smav 117229997SkenSDT_PROVIDER_DEFINE(cbb); 118229997Sken 119229997Skentypedef enum { 120229997Sken CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 121229997Sken CTL_BE_BLOCK_LUN_WAITING = 0x04, 122229997Sken} ctl_be_block_lun_flags; 123229997Sken 124229997Skentypedef enum { 125229997Sken CTL_BE_BLOCK_NONE, 126229997Sken CTL_BE_BLOCK_DEV, 127229997Sken CTL_BE_BLOCK_FILE 128229997Sken} ctl_be_block_type; 129229997Sken 130229997Skenstruct ctl_be_block_filedata { 131229997Sken struct ucred *cred; 132229997Sken}; 133229997Sken 134229997Skenunion ctl_be_block_bedata { 135229997Sken struct ctl_be_block_filedata file; 136229997Sken}; 137229997Sken 138229997Skenstruct ctl_be_block_io; 139229997Skenstruct ctl_be_block_lun; 140229997Sken 141229997Skentypedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 142229997Sken struct ctl_be_block_io *beio); 143274154Smavtypedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 144274154Smav const char *attrname); 145229997Sken 146229997Sken/* 147229997Sken * Backend LUN structure. There is a 1:1 mapping between a block device 148229997Sken * and a backend block LUN, and between a backend block LUN and a CTL LUN. 149229997Sken */ 150229997Skenstruct ctl_be_block_lun { 151272911Smav struct ctl_lun_create_params params; 152229997Sken char *dev_path; 153229997Sken ctl_be_block_type dev_type; 154229997Sken struct vnode *vn; 155229997Sken union ctl_be_block_bedata backend; 156229997Sken cbb_dispatch_t dispatch; 157229997Sken cbb_dispatch_t lun_flush; 158264274Smav cbb_dispatch_t unmap; 159275474Smav cbb_dispatch_t get_lba_status; 160274154Smav cbb_getattr_t getattr; 161229997Sken uma_zone_t lun_zone; 162229997Sken uint64_t size_blocks; 163229997Sken uint64_t size_bytes; 164229997Sken struct ctl_be_block_softc *softc; 165229997Sken struct devstat *disk_stats; 166229997Sken ctl_be_block_lun_flags flags; 167361256Smav SLIST_ENTRY(ctl_be_block_lun) links; 168287499Smav struct ctl_be_lun cbe_lun; 169229997Sken struct taskqueue *io_taskqueue; 170229997Sken struct task io_task; 171229997Sken int num_threads; 172229997Sken STAILQ_HEAD(, ctl_io_hdr) input_queue; 173275474Smav STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 174229997Sken STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 175229997Sken STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 176267877Smav struct mtx_padalign io_lock; 177267877Smav struct mtx_padalign queue_lock; 178229997Sken}; 179229997Sken 180229997Sken/* 181229997Sken * Overall softc structure for the block backend module. 182229997Sken */ 183229997Skenstruct ctl_be_block_softc { 184361256Smav struct sx modify_lock; 185229997Sken struct mtx lock; 186313368Smav uma_zone_t beio_zone; 187229997Sken int num_luns; 188361256Smav SLIST_HEAD(, ctl_be_block_lun) lun_list; 189229997Sken}; 190229997Sken 191229997Skenstatic struct ctl_be_block_softc backend_block_softc; 192229997Sken 193229997Sken/* 194229997Sken * Per-I/O information. 195229997Sken */ 196229997Skenstruct ctl_be_block_io { 197229997Sken union ctl_io *io; 198229997Sken struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 199229997Sken struct iovec xiovecs[CTLBLK_MAX_SEGS]; 200229997Sken int bio_cmd; 201229997Sken int num_segs; 202229997Sken int num_bios_sent; 203229997Sken int num_bios_done; 204229997Sken int send_complete; 205311418Smav int first_error; 206311418Smav uint64_t first_error_offset; 207229997Sken struct bintime ds_t0; 208229997Sken devstat_tag_type ds_tag_type; 209229997Sken devstat_trans_flags ds_trans_type; 210229997Sken uint64_t io_len; 211229997Sken uint64_t io_offset; 212286353Smav int io_arg; 213229997Sken struct ctl_be_block_softc *softc; 214229997Sken struct ctl_be_block_lun *lun; 215264274Smav void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 216229997Sken}; 217229997Sken 218287621Smavextern struct ctl_softc *control_softc; 219287621Smav 220229997Skenstatic int cbb_num_threads = 14; 221229997SkenSYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0, 222229997Sken "CAM Target Layer Block Backend"); 223267992ShselaskySYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 224229997Sken &cbb_num_threads, 0, "Number of threads per backing file"); 225229997Sken 226229997Skenstatic struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 227229997Skenstatic void ctl_free_beio(struct ctl_be_block_io *beio); 228229997Skenstatic void ctl_complete_beio(struct ctl_be_block_io *beio); 229229997Skenstatic int ctl_be_block_move_done(union ctl_io *io); 230229997Skenstatic void ctl_be_block_biodone(struct bio *bio); 231229997Skenstatic void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 232229997Sken struct ctl_be_block_io *beio); 233229997Skenstatic void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 234229997Sken struct ctl_be_block_io *beio); 235275474Smavstatic void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 236275474Smav struct ctl_be_block_io *beio); 237275481Smavstatic uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 238275481Smav const char *attrname); 239229997Skenstatic void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 240229997Sken struct ctl_be_block_io *beio); 241264274Smavstatic void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 242264274Smav struct ctl_be_block_io *beio); 243229997Skenstatic void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 244229997Sken struct ctl_be_block_io *beio); 245274154Smavstatic uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 246274154Smav const char *attrname); 247275474Smavstatic void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 248275474Smav union ctl_io *io); 249229997Skenstatic void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 250229997Sken union ctl_io *io); 251229997Skenstatic void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 252229997Sken union ctl_io *io); 253229997Skenstatic void ctl_be_block_worker(void *context, int pending); 254229997Skenstatic int ctl_be_block_submit(union ctl_io *io); 255229997Skenstatic int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 256229997Sken int flag, struct thread *td); 257229997Skenstatic int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 258229997Sken struct ctl_lun_req *req); 259229997Skenstatic int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 260229997Sken struct ctl_lun_req *req); 261229997Skenstatic int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 262288348Smavstatic int ctl_be_block_open(struct ctl_be_block_lun *be_lun, 263229997Sken struct ctl_lun_req *req); 264229997Skenstatic int ctl_be_block_create(struct ctl_be_block_softc *softc, 265229997Sken struct ctl_lun_req *req); 266229997Skenstatic int ctl_be_block_rm(struct ctl_be_block_softc *softc, 267229997Sken struct ctl_lun_req *req); 268232604Straszstatic int ctl_be_block_modify(struct ctl_be_block_softc *softc, 269232604Strasz struct ctl_lun_req *req); 270229997Skenstatic void ctl_be_block_lun_shutdown(void *be_lun); 271229997Skenstatic int ctl_be_block_config_write(union ctl_io *io); 272229997Skenstatic int ctl_be_block_config_read(union ctl_io *io); 273229997Skenstatic int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb); 274274154Smavstatic uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname); 275313368Smavstatic int ctl_be_block_init(void); 276313368Smavstatic int ctl_be_block_shutdown(void); 277229997Sken 278229997Skenstatic struct ctl_backend_driver ctl_be_block_driver = 279229997Sken{ 280230334Sken .name = "block", 281230334Sken .flags = CTL_BE_FLAG_HAS_CONFIG, 282230334Sken .init = ctl_be_block_init, 283313368Smav .shutdown = ctl_be_block_shutdown, 284230334Sken .data_submit = ctl_be_block_submit, 285230334Sken .data_move_done = ctl_be_block_move_done, 286230334Sken .config_read = ctl_be_block_config_read, 287230334Sken .config_write = ctl_be_block_config_write, 288230334Sken .ioctl = ctl_be_block_ioctl, 289274154Smav .lun_info = ctl_be_block_lun_info, 290274154Smav .lun_attr = ctl_be_block_lun_attr 291229997Sken}; 292229997Sken 293361256SmavMALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend"); 294229997SkenCTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 295229997Sken 296229997Skenstatic struct ctl_be_block_io * 297229997Skenctl_alloc_beio(struct ctl_be_block_softc *softc) 298229997Sken{ 299229997Sken struct ctl_be_block_io *beio; 300229997Sken 301313368Smav beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO); 302264020Strasz beio->softc = softc; 303229997Sken return (beio); 304229997Sken} 305229997Sken 306229997Skenstatic void 307229997Skenctl_free_beio(struct ctl_be_block_io *beio) 308229997Sken{ 309229997Sken int duplicate_free; 310229997Sken int i; 311229997Sken 312229997Sken duplicate_free = 0; 313229997Sken 314229997Sken for (i = 0; i < beio->num_segs; i++) { 315229997Sken if (beio->sg_segs[i].addr == NULL) 316229997Sken duplicate_free++; 317229997Sken 318229997Sken uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr); 319229997Sken beio->sg_segs[i].addr = NULL; 320267537Smav 321267537Smav /* For compare we had two equal S/G lists. */ 322267537Smav if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) { 323267537Smav uma_zfree(beio->lun->lun_zone, 324267537Smav beio->sg_segs[i + CTLBLK_HALF_SEGS].addr); 325267537Smav beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL; 326267537Smav } 327229997Sken } 328229997Sken 329229997Sken if (duplicate_free > 0) { 330229997Sken printf("%s: %d duplicate frees out of %d segments\n", __func__, 331229997Sken duplicate_free, beio->num_segs); 332229997Sken } 333229997Sken 334313368Smav uma_zfree(beio->softc->beio_zone, beio); 335229997Sken} 336229997Sken 337229997Skenstatic void 338229997Skenctl_complete_beio(struct ctl_be_block_io *beio) 339229997Sken{ 340267877Smav union ctl_io *io = beio->io; 341229997Sken 342264274Smav if (beio->beio_cont != NULL) { 343264274Smav beio->beio_cont(beio); 344264274Smav } else { 345264274Smav ctl_free_beio(beio); 346267537Smav ctl_data_submit_done(io); 347264274Smav } 348229997Sken} 349229997Sken 350287868Smavstatic size_t 351287868Smavcmp(uint8_t *a, uint8_t *b, size_t size) 352287868Smav{ 353287868Smav size_t i; 354287868Smav 355287868Smav for (i = 0; i < size; i++) { 356287868Smav if (a[i] != b[i]) 357287868Smav break; 358287868Smav } 359287868Smav return (i); 360287868Smav} 361287868Smav 362287868Smavstatic void 363287868Smavctl_be_block_compare(union ctl_io *io) 364287868Smav{ 365287868Smav struct ctl_be_block_io *beio; 366287868Smav uint64_t off, res; 367287868Smav int i; 368287868Smav uint8_t info[8]; 369287868Smav 370287868Smav beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 371287868Smav off = 0; 372287868Smav for (i = 0; i < beio->num_segs; i++) { 373287868Smav res = cmp(beio->sg_segs[i].addr, 374287868Smav beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 375287868Smav beio->sg_segs[i].len); 376287868Smav off += res; 377287868Smav if (res < beio->sg_segs[i].len) 378287868Smav break; 379287868Smav } 380287868Smav if (i < beio->num_segs) { 381287868Smav scsi_u64to8b(off, info); 382287868Smav ctl_set_sense(&io->scsiio, /*current_error*/ 1, 383287868Smav /*sense_key*/ SSD_KEY_MISCOMPARE, 384287868Smav /*asc*/ 0x1D, /*ascq*/ 0x00, 385287868Smav /*type*/ SSD_ELEM_INFO, 386287868Smav /*size*/ sizeof(info), /*data*/ &info, 387287868Smav /*type*/ SSD_ELEM_NONE); 388287868Smav } else 389287868Smav ctl_set_success(&io->scsiio); 390287868Smav} 391287868Smav 392229997Skenstatic int 393229997Skenctl_be_block_move_done(union ctl_io *io) 394229997Sken{ 395229997Sken struct ctl_be_block_io *beio; 396229997Sken struct ctl_be_block_lun *be_lun; 397267537Smav struct ctl_lba_len_flags *lbalen; 398229997Sken#ifdef CTL_TIME_IO 399229997Sken struct bintime cur_bt; 400267537Smav#endif 401229997Sken 402267519Smav beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 403229997Sken be_lun = beio->lun; 404229997Sken 405229997Sken DPRINTF("entered\n"); 406229997Sken 407229997Sken#ifdef CTL_TIME_IO 408288215Smav getbinuptime(&cur_bt); 409229997Sken bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); 410229997Sken bintime_add(&io->io_hdr.dma_bt, &cur_bt); 411288215Smav#endif 412229997Sken io->io_hdr.num_dmas++; 413267537Smav io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 414229997Sken 415229997Sken /* 416229997Sken * We set status at this point for read commands, and write 417229997Sken * commands with errors. 418229997Sken */ 419275058Smav if (io->io_hdr.flags & CTL_FLAG_ABORT) { 420275058Smav ; 421313364Smav } else if ((io->io_hdr.port_status != 0) && 422313364Smav ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || 423313364Smav (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { 424313364Smav ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, 425313364Smav /*retry_count*/ io->io_hdr.port_status); 426313364Smav } else if (io->scsiio.kern_data_resid != 0 && 427313364Smav (io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT && 428313364Smav ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || 429313364Smav (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { 430313364Smav ctl_set_invalid_field_ciu(&io->scsiio); 431275058Smav } else if ((io->io_hdr.port_status == 0) && 432267537Smav ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { 433267537Smav lbalen = ARGS(beio->io); 434267537Smav if (lbalen->flags & CTL_LLF_READ) { 435267537Smav ctl_set_success(&io->scsiio); 436267537Smav } else if (lbalen->flags & CTL_LLF_COMPARE) { 437267537Smav /* We have two data blocks ready for comparison. */ 438287868Smav ctl_be_block_compare(io); 439267537Smav } 440229997Sken } 441229997Sken 442229997Sken /* 443229997Sken * If this is a read, or a write with errors, it is done. 444229997Sken */ 445229997Sken if ((beio->bio_cmd == BIO_READ) 446229997Sken || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 447229997Sken || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 448229997Sken ctl_complete_beio(beio); 449229997Sken return (0); 450229997Sken } 451229997Sken 452229997Sken /* 453229997Sken * At this point, we have a write and the DMA completed 454229997Sken * successfully. We now have to queue it to the task queue to 455229997Sken * execute the backend I/O. That is because we do blocking 456229997Sken * memory allocations, and in the file backing case, blocking I/O. 457229997Sken * This move done routine is generally called in the SIM's 458229997Sken * interrupt context, and therefore we cannot block. 459229997Sken */ 460267877Smav mtx_lock(&be_lun->queue_lock); 461229997Sken STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 462267877Smav mtx_unlock(&be_lun->queue_lock); 463229997Sken taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 464229997Sken 465229997Sken return (0); 466229997Sken} 467229997Sken 468229997Skenstatic void 469229997Skenctl_be_block_biodone(struct bio *bio) 470229997Sken{ 471229997Sken struct ctl_be_block_io *beio; 472229997Sken struct ctl_be_block_lun *be_lun; 473229997Sken union ctl_io *io; 474261538Smav int error; 475229997Sken 476229997Sken beio = bio->bio_caller1; 477229997Sken be_lun = beio->lun; 478229997Sken io = beio->io; 479229997Sken 480229997Sken DPRINTF("entered\n"); 481229997Sken 482261538Smav error = bio->bio_error; 483267877Smav mtx_lock(&be_lun->io_lock); 484311418Smav if (error != 0 && 485311418Smav (beio->first_error == 0 || 486311418Smav bio->bio_offset < beio->first_error_offset)) { 487311418Smav beio->first_error = error; 488311418Smav beio->first_error_offset = bio->bio_offset; 489311418Smav } 490229997Sken 491229997Sken beio->num_bios_done++; 492229997Sken 493229997Sken /* 494229997Sken * XXX KDM will this cause WITNESS to complain? Holding a lock 495229997Sken * during the free might cause it to complain. 496229997Sken */ 497229997Sken g_destroy_bio(bio); 498229997Sken 499229997Sken /* 500229997Sken * If the send complete bit isn't set, or we aren't the last I/O to 501229997Sken * complete, then we're done. 502229997Sken */ 503229997Sken if ((beio->send_complete == 0) 504229997Sken || (beio->num_bios_done < beio->num_bios_sent)) { 505267877Smav mtx_unlock(&be_lun->io_lock); 506229997Sken return; 507229997Sken } 508229997Sken 509229997Sken /* 510229997Sken * At this point, we've verified that we are the last I/O to 511229997Sken * complete, so it's safe to drop the lock. 512229997Sken */ 513267877Smav devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 514267877Smav beio->ds_tag_type, beio->ds_trans_type, 515267877Smav /*now*/ NULL, /*then*/&beio->ds_t0); 516267877Smav mtx_unlock(&be_lun->io_lock); 517229997Sken 518229997Sken /* 519229997Sken * If there are any errors from the backing device, we fail the 520229997Sken * entire I/O with a medium error. 521229997Sken */ 522311418Smav error = beio->first_error; 523311418Smav if (error != 0) { 524261538Smav if (error == EOPNOTSUPP) { 525261538Smav ctl_set_invalid_opcode(&io->scsiio); 526282565Smav } else if (error == ENOSPC || error == EDQUOT) { 527273809Smav ctl_set_space_alloc_fail(&io->scsiio); 528287760Smav } else if (error == EROFS || error == EACCES) { 529287760Smav ctl_set_hw_write_protected(&io->scsiio); 530261538Smav } else if (beio->bio_cmd == BIO_FLUSH) { 531229997Sken /* XXX KDM is there is a better error here? */ 532229997Sken ctl_set_internal_failure(&io->scsiio, 533229997Sken /*sks_valid*/ 1, 534229997Sken /*retry_count*/ 0xbad2); 535287912Smav } else { 536287912Smav ctl_set_medium_error(&io->scsiio, 537287912Smav beio->bio_cmd == BIO_READ); 538287912Smav } 539229997Sken ctl_complete_beio(beio); 540229997Sken return; 541229997Sken } 542229997Sken 543229997Sken /* 544267537Smav * If this is a write, a flush, a delete or verify, we're all done. 545229997Sken * If this is a read, we can now send the data to the user. 546229997Sken */ 547229997Sken if ((beio->bio_cmd == BIO_WRITE) 548264274Smav || (beio->bio_cmd == BIO_FLUSH) 549267537Smav || (beio->bio_cmd == BIO_DELETE) 550267537Smav || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 551229997Sken ctl_set_success(&io->scsiio); 552229997Sken ctl_complete_beio(beio); 553229997Sken } else { 554275058Smav if ((ARGS(io)->flags & CTL_LLF_READ) && 555287967Smav beio->beio_cont == NULL) { 556275058Smav ctl_set_success(&io->scsiio); 557287967Smav ctl_serseq_done(io); 558287967Smav } 559229997Sken#ifdef CTL_TIME_IO 560288215Smav getbinuptime(&io->io_hdr.dma_start_bt); 561288215Smav#endif 562229997Sken ctl_datamove(io); 563229997Sken } 564229997Sken} 565229997Sken 566229997Skenstatic void 567229997Skenctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 568229997Sken struct ctl_be_block_io *beio) 569229997Sken{ 570267877Smav union ctl_io *io = beio->io; 571229997Sken struct mount *mountpoint; 572241896Skib int error, lock_flags; 573229997Sken 574229997Sken DPRINTF("entered\n"); 575229997Sken 576267877Smav binuptime(&beio->ds_t0); 577267877Smav mtx_lock(&be_lun->io_lock); 578267877Smav devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 579267877Smav mtx_unlock(&be_lun->io_lock); 580229997Sken 581267877Smav (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 582229997Sken 583288220Smav if (MNT_SHARED_WRITES(mountpoint) || 584288220Smav ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 585229997Sken lock_flags = LK_SHARED; 586229997Sken else 587229997Sken lock_flags = LK_EXCLUSIVE; 588229997Sken vn_lock(be_lun->vn, lock_flags | LK_RETRY); 589286353Smav error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, 590286353Smav curthread); 591229997Sken VOP_UNLOCK(be_lun->vn, 0); 592229997Sken 593229997Sken vn_finished_write(mountpoint); 594229997Sken 595267877Smav mtx_lock(&be_lun->io_lock); 596267877Smav devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 597267877Smav beio->ds_tag_type, beio->ds_trans_type, 598267877Smav /*now*/ NULL, /*then*/&beio->ds_t0); 599267877Smav mtx_unlock(&be_lun->io_lock); 600267877Smav 601229997Sken if (error == 0) 602229997Sken ctl_set_success(&io->scsiio); 603229997Sken else { 604229997Sken /* XXX KDM is there is a better error here? */ 605229997Sken ctl_set_internal_failure(&io->scsiio, 606229997Sken /*sks_valid*/ 1, 607229997Sken /*retry_count*/ 0xbad1); 608229997Sken } 609229997Sken 610229997Sken ctl_complete_beio(beio); 611229997Sken} 612229997Sken 613292384SmarkjSDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); 614292384SmarkjSDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); 615292384SmarkjSDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); 616292384SmarkjSDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); 617229997Sken 618229997Skenstatic void 619229997Skenctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 620229997Sken struct ctl_be_block_io *beio) 621229997Sken{ 622229997Sken struct ctl_be_block_filedata *file_data; 623229997Sken union ctl_io *io; 624229997Sken struct uio xuio; 625229997Sken struct iovec *xiovec; 626287875Smav size_t s; 627287875Smav int error, flags, i; 628229997Sken 629229997Sken DPRINTF("entered\n"); 630229997Sken 631229997Sken file_data = &be_lun->backend.file; 632229997Sken io = beio->io; 633271309Smav flags = 0; 634271309Smav if (ARGS(io)->flags & CTL_LLF_DPO) 635271309Smav flags |= IO_DIRECT; 636271309Smav if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 637271309Smav flags |= IO_SYNC; 638229997Sken 639267537Smav bzero(&xuio, sizeof(xuio)); 640229997Sken if (beio->bio_cmd == BIO_READ) { 641292384Smarkj SDT_PROBE0(cbb, , read, file_start); 642267537Smav xuio.uio_rw = UIO_READ; 643229997Sken } else { 644292384Smarkj SDT_PROBE0(cbb, , write, file_start); 645267537Smav xuio.uio_rw = UIO_WRITE; 646229997Sken } 647229997Sken xuio.uio_offset = beio->io_offset; 648229997Sken xuio.uio_resid = beio->io_len; 649229997Sken xuio.uio_segflg = UIO_SYSSPACE; 650229997Sken xuio.uio_iov = beio->xiovecs; 651229997Sken xuio.uio_iovcnt = beio->num_segs; 652229997Sken xuio.uio_td = curthread; 653229997Sken 654229997Sken for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 655229997Sken xiovec->iov_base = beio->sg_segs[i].addr; 656229997Sken xiovec->iov_len = beio->sg_segs[i].len; 657229997Sken } 658229997Sken 659267877Smav binuptime(&beio->ds_t0); 660267877Smav mtx_lock(&be_lun->io_lock); 661267877Smav devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 662267877Smav mtx_unlock(&be_lun->io_lock); 663267877Smav 664229997Sken if (beio->bio_cmd == BIO_READ) { 665229997Sken vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 666229997Sken 667229997Sken /* 668229997Sken * UFS pays attention to IO_DIRECT for reads. If the 669229997Sken * DIRECTIO option is configured into the kernel, it calls 670229997Sken * ffs_rawread(). But that only works for single-segment 671229997Sken * uios with user space addresses. In our case, with a 672229997Sken * kernel uio, it still reads into the buffer cache, but it 673229997Sken * will just try to release the buffer from the cache later 674229997Sken * on in ffs_read(). 675229997Sken * 676229997Sken * ZFS does not pay attention to IO_DIRECT for reads. 677229997Sken * 678229997Sken * UFS does not pay attention to IO_SYNC for reads. 679229997Sken * 680229997Sken * ZFS pays attention to IO_SYNC (which translates into the 681229997Sken * Solaris define FRSYNC for zfs_read()) for reads. It 682229997Sken * attempts to sync the file before reading. 683229997Sken */ 684271309Smav error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 685229997Sken 686229997Sken VOP_UNLOCK(be_lun->vn, 0); 687292384Smarkj SDT_PROBE0(cbb, , read, file_done); 688287875Smav if (error == 0 && xuio.uio_resid > 0) { 689287875Smav /* 690287875Smav * If we red less then requested (EOF), then 691287875Smav * we should clean the rest of the buffer. 692287875Smav */ 693287875Smav s = beio->io_len - xuio.uio_resid; 694287875Smav for (i = 0; i < beio->num_segs; i++) { 695287875Smav if (s >= beio->sg_segs[i].len) { 696287875Smav s -= beio->sg_segs[i].len; 697287875Smav continue; 698287875Smav } 699287875Smav bzero((uint8_t *)beio->sg_segs[i].addr + s, 700287875Smav beio->sg_segs[i].len - s); 701287875Smav s = 0; 702287875Smav } 703287875Smav } 704229997Sken } else { 705229997Sken struct mount *mountpoint; 706229997Sken int lock_flags; 707229997Sken 708229997Sken (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 709229997Sken 710288220Smav if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL) 711229997Sken && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 712229997Sken lock_flags = LK_SHARED; 713229997Sken else 714229997Sken lock_flags = LK_EXCLUSIVE; 715229997Sken vn_lock(be_lun->vn, lock_flags | LK_RETRY); 716229997Sken 717229997Sken /* 718229997Sken * UFS pays attention to IO_DIRECT for writes. The write 719229997Sken * is done asynchronously. (Normally the write would just 720229997Sken * get put into cache. 721229997Sken * 722229997Sken * UFS pays attention to IO_SYNC for writes. It will 723229997Sken * attempt to write the buffer out synchronously if that 724229997Sken * flag is set. 725229997Sken * 726229997Sken * ZFS does not pay attention to IO_DIRECT for writes. 727229997Sken * 728229997Sken * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 729229997Sken * for writes. It will flush the transaction from the 730229997Sken * cache before returning. 731229997Sken */ 732271309Smav error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 733229997Sken VOP_UNLOCK(be_lun->vn, 0); 734229997Sken 735229997Sken vn_finished_write(mountpoint); 736292384Smarkj SDT_PROBE0(cbb, , write, file_done); 737229997Sken } 738229997Sken 739267877Smav mtx_lock(&be_lun->io_lock); 740267877Smav devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 741267877Smav beio->ds_tag_type, beio->ds_trans_type, 742267877Smav /*now*/ NULL, /*then*/&beio->ds_t0); 743267877Smav mtx_unlock(&be_lun->io_lock); 744267877Smav 745229997Sken /* 746229997Sken * If we got an error, set the sense data to "MEDIUM ERROR" and 747229997Sken * return the I/O to the user. 748229997Sken */ 749229997Sken if (error != 0) { 750282565Smav if (error == ENOSPC || error == EDQUOT) { 751273809Smav ctl_set_space_alloc_fail(&io->scsiio); 752287760Smav } else if (error == EROFS || error == EACCES) { 753287760Smav ctl_set_hw_write_protected(&io->scsiio); 754287912Smav } else { 755287912Smav ctl_set_medium_error(&io->scsiio, 756287912Smav beio->bio_cmd == BIO_READ); 757287912Smav } 758229997Sken ctl_complete_beio(beio); 759229997Sken return; 760229997Sken } 761229997Sken 762229997Sken /* 763269122Smav * If this is a write or a verify, we're all done. 764229997Sken * If this is a read, we can now send the data to the user. 765229997Sken */ 766269122Smav if ((beio->bio_cmd == BIO_WRITE) || 767269122Smav (ARGS(io)->flags & CTL_LLF_VERIFY)) { 768229997Sken ctl_set_success(&io->scsiio); 769229997Sken ctl_complete_beio(beio); 770229997Sken } else { 771275058Smav if ((ARGS(io)->flags & CTL_LLF_READ) && 772287967Smav beio->beio_cont == NULL) { 773275058Smav ctl_set_success(&io->scsiio); 774287967Smav ctl_serseq_done(io); 775287967Smav } 776229997Sken#ifdef CTL_TIME_IO 777288215Smav getbinuptime(&io->io_hdr.dma_start_bt); 778288215Smav#endif 779229997Sken ctl_datamove(io); 780229997Sken } 781229997Sken} 782229997Sken 783229997Skenstatic void 784275474Smavctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 785275474Smav struct ctl_be_block_io *beio) 786275474Smav{ 787275474Smav union ctl_io *io = beio->io; 788275474Smav struct ctl_lba_len_flags *lbalen = ARGS(io); 789275474Smav struct scsi_get_lba_status_data *data; 790275474Smav off_t roff, off; 791275474Smav int error, status; 792275474Smav 793275474Smav DPRINTF("entered\n"); 794275474Smav 795287499Smav off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 796275474Smav vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 797275474Smav error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 798275474Smav 0, curthread->td_ucred, curthread); 799275474Smav if (error == 0 && off > roff) 800275474Smav status = 0; /* mapped up to off */ 801275474Smav else { 802275474Smav error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 803275474Smav 0, curthread->td_ucred, curthread); 804275474Smav if (error == 0 && off > roff) 805275474Smav status = 1; /* deallocated up to off */ 806275474Smav else { 807275474Smav status = 0; /* unknown up to the end */ 808275474Smav off = be_lun->size_bytes; 809275474Smav } 810275474Smav } 811275474Smav VOP_UNLOCK(be_lun->vn, 0); 812275474Smav 813275474Smav data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 814275474Smav scsi_u64to8b(lbalen->lba, data->descr[0].addr); 815287499Smav scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 816287499Smav lbalen->lba), data->descr[0].length); 817275474Smav data->descr[0].status = status; 818275474Smav 819275474Smav ctl_complete_beio(beio); 820275474Smav} 821275474Smav 822275481Smavstatic uint64_t 823275481Smavctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 824275481Smav{ 825275481Smav struct vattr vattr; 826275481Smav struct statfs statfs; 827285030Smav uint64_t val; 828275481Smav int error; 829275481Smav 830285030Smav val = UINT64_MAX; 831275481Smav if (be_lun->vn == NULL) 832285030Smav return (val); 833285030Smav vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 834275481Smav if (strcmp(attrname, "blocksused") == 0) { 835275481Smav error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 836285030Smav if (error == 0) 837287499Smav val = vattr.va_bytes / be_lun->cbe_lun.blocksize; 838275481Smav } 839285030Smav if (strcmp(attrname, "blocksavail") == 0 && 840285030Smav (be_lun->vn->v_iflag & VI_DOOMED) == 0) { 841275481Smav error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 842285030Smav if (error == 0) 843286811Smav val = statfs.f_bavail * statfs.f_bsize / 844287499Smav be_lun->cbe_lun.blocksize; 845275481Smav } 846285030Smav VOP_UNLOCK(be_lun->vn, 0); 847285030Smav return (val); 848275481Smav} 849275481Smav 850275474Smavstatic void 851269123Smavctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 852269123Smav struct ctl_be_block_io *beio) 853269123Smav{ 854269123Smav union ctl_io *io; 855287664Smav struct cdevsw *csw; 856287664Smav struct cdev *dev; 857269123Smav struct uio xuio; 858269123Smav struct iovec *xiovec; 859287664Smav int error, flags, i, ref; 860269123Smav 861269123Smav DPRINTF("entered\n"); 862269123Smav 863269123Smav io = beio->io; 864271309Smav flags = 0; 865271309Smav if (ARGS(io)->flags & CTL_LLF_DPO) 866271309Smav flags |= IO_DIRECT; 867271309Smav if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 868271309Smav flags |= IO_SYNC; 869269123Smav 870269123Smav bzero(&xuio, sizeof(xuio)); 871269123Smav if (beio->bio_cmd == BIO_READ) { 872292384Smarkj SDT_PROBE0(cbb, , read, file_start); 873269123Smav xuio.uio_rw = UIO_READ; 874269123Smav } else { 875292384Smarkj SDT_PROBE0(cbb, , write, file_start); 876269123Smav xuio.uio_rw = UIO_WRITE; 877269123Smav } 878269123Smav xuio.uio_offset = beio->io_offset; 879269123Smav xuio.uio_resid = beio->io_len; 880269123Smav xuio.uio_segflg = UIO_SYSSPACE; 881269123Smav xuio.uio_iov = beio->xiovecs; 882269123Smav xuio.uio_iovcnt = beio->num_segs; 883269123Smav xuio.uio_td = curthread; 884269123Smav 885269123Smav for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 886269123Smav xiovec->iov_base = beio->sg_segs[i].addr; 887269123Smav xiovec->iov_len = beio->sg_segs[i].len; 888269123Smav } 889269123Smav 890269123Smav binuptime(&beio->ds_t0); 891269123Smav mtx_lock(&be_lun->io_lock); 892269123Smav devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 893269123Smav mtx_unlock(&be_lun->io_lock); 894269123Smav 895287664Smav csw = devvn_refthread(be_lun->vn, &dev, &ref); 896287664Smav if (csw) { 897287664Smav if (beio->bio_cmd == BIO_READ) 898287664Smav error = csw->d_read(dev, &xuio, flags); 899287664Smav else 900287664Smav error = csw->d_write(dev, &xuio, flags); 901287664Smav dev_relthread(dev, ref); 902287664Smav } else 903287664Smav error = ENXIO; 904287664Smav 905287664Smav if (beio->bio_cmd == BIO_READ) 906292384Smarkj SDT_PROBE0(cbb, , read, file_done); 907287664Smav else 908292384Smarkj SDT_PROBE0(cbb, , write, file_done); 909269123Smav 910269123Smav mtx_lock(&be_lun->io_lock); 911269123Smav devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 912269123Smav beio->ds_tag_type, beio->ds_trans_type, 913269123Smav /*now*/ NULL, /*then*/&beio->ds_t0); 914269123Smav mtx_unlock(&be_lun->io_lock); 915269123Smav 916269123Smav /* 917269123Smav * If we got an error, set the sense data to "MEDIUM ERROR" and 918269123Smav * return the I/O to the user. 919269123Smav */ 920269123Smav if (error != 0) { 921282565Smav if (error == ENOSPC || error == EDQUOT) { 922273809Smav ctl_set_space_alloc_fail(&io->scsiio); 923287760Smav } else if (error == EROFS || error == EACCES) { 924287760Smav ctl_set_hw_write_protected(&io->scsiio); 925287912Smav } else { 926287912Smav ctl_set_medium_error(&io->scsiio, 927287912Smav beio->bio_cmd == BIO_READ); 928287912Smav } 929269123Smav ctl_complete_beio(beio); 930269123Smav return; 931269123Smav } 932269123Smav 933269123Smav /* 934269123Smav * If this is a write or a verify, we're all done. 935269123Smav * If this is a read, we can now send the data to the user. 936269123Smav */ 937269123Smav if ((beio->bio_cmd == BIO_WRITE) || 938269123Smav (ARGS(io)->flags & CTL_LLF_VERIFY)) { 939269123Smav ctl_set_success(&io->scsiio); 940269123Smav ctl_complete_beio(beio); 941269123Smav } else { 942275058Smav if ((ARGS(io)->flags & CTL_LLF_READ) && 943287967Smav beio->beio_cont == NULL) { 944275058Smav ctl_set_success(&io->scsiio); 945287967Smav ctl_serseq_done(io); 946287967Smav } 947269123Smav#ifdef CTL_TIME_IO 948288215Smav getbinuptime(&io->io_hdr.dma_start_bt); 949288215Smav#endif 950269123Smav ctl_datamove(io); 951269123Smav } 952269123Smav} 953269123Smav 954269123Smavstatic void 955275474Smavctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 956275474Smav struct ctl_be_block_io *beio) 957275474Smav{ 958275474Smav union ctl_io *io = beio->io; 959287664Smav struct cdevsw *csw; 960287664Smav struct cdev *dev; 961275474Smav struct ctl_lba_len_flags *lbalen = ARGS(io); 962275474Smav struct scsi_get_lba_status_data *data; 963275474Smav off_t roff, off; 964287664Smav int error, ref, status; 965275474Smav 966275474Smav DPRINTF("entered\n"); 967275474Smav 968287664Smav csw = devvn_refthread(be_lun->vn, &dev, &ref); 969287664Smav if (csw == NULL) { 970287664Smav status = 0; /* unknown up to the end */ 971287664Smav off = be_lun->size_bytes; 972287664Smav goto done; 973287664Smav } 974287499Smav off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 975287664Smav error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, 976287664Smav curthread); 977275474Smav if (error == 0 && off > roff) 978275474Smav status = 0; /* mapped up to off */ 979275474Smav else { 980287664Smav error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, 981287664Smav curthread); 982275474Smav if (error == 0 && off > roff) 983275474Smav status = 1; /* deallocated up to off */ 984275474Smav else { 985275474Smav status = 0; /* unknown up to the end */ 986275474Smav off = be_lun->size_bytes; 987275474Smav } 988275474Smav } 989287664Smav dev_relthread(dev, ref); 990275474Smav 991287664Smavdone: 992275474Smav data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 993275474Smav scsi_u64to8b(lbalen->lba, data->descr[0].addr); 994287499Smav scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 995287499Smav lbalen->lba), data->descr[0].length); 996275474Smav data->descr[0].status = status; 997275474Smav 998275474Smav ctl_complete_beio(beio); 999275474Smav} 1000275474Smav 1001275474Smavstatic void 1002229997Skenctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 1003229997Sken struct ctl_be_block_io *beio) 1004229997Sken{ 1005229997Sken struct bio *bio; 1006287664Smav struct cdevsw *csw; 1007287664Smav struct cdev *dev; 1008287664Smav int ref; 1009229997Sken 1010229997Sken DPRINTF("entered\n"); 1011229997Sken 1012229997Sken /* This can't fail, it's a blocking allocation. */ 1013229997Sken bio = g_alloc_bio(); 1014229997Sken 1015229997Sken bio->bio_cmd = BIO_FLUSH; 1016229997Sken bio->bio_offset = 0; 1017229997Sken bio->bio_data = 0; 1018229997Sken bio->bio_done = ctl_be_block_biodone; 1019229997Sken bio->bio_caller1 = beio; 1020229997Sken bio->bio_pblkno = 0; 1021229997Sken 1022229997Sken /* 1023229997Sken * We don't need to acquire the LUN lock here, because we are only 1024229997Sken * sending one bio, and so there is no other context to synchronize 1025229997Sken * with. 1026229997Sken */ 1027229997Sken beio->num_bios_sent = 1; 1028229997Sken beio->send_complete = 1; 1029229997Sken 1030229997Sken binuptime(&beio->ds_t0); 1031267877Smav mtx_lock(&be_lun->io_lock); 1032229997Sken devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1033267877Smav mtx_unlock(&be_lun->io_lock); 1034229997Sken 1035287664Smav csw = devvn_refthread(be_lun->vn, &dev, &ref); 1036287664Smav if (csw) { 1037287664Smav bio->bio_dev = dev; 1038287664Smav csw->d_strategy(bio); 1039287664Smav dev_relthread(dev, ref); 1040287664Smav } else { 1041287664Smav bio->bio_error = ENXIO; 1042287664Smav ctl_be_block_biodone(bio); 1043287664Smav } 1044229997Sken} 1045229997Sken 1046229997Skenstatic void 1047264274Smavctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1048264274Smav struct ctl_be_block_io *beio, 1049264274Smav uint64_t off, uint64_t len, int last) 1050264274Smav{ 1051264274Smav struct bio *bio; 1052264296Smav uint64_t maxlen; 1053287664Smav struct cdevsw *csw; 1054287664Smav struct cdev *dev; 1055287664Smav int ref; 1056264274Smav 1057287664Smav csw = devvn_refthread(be_lun->vn, &dev, &ref); 1058287499Smav maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); 1059264274Smav while (len > 0) { 1060264274Smav bio = g_alloc_bio(); 1061264274Smav bio->bio_cmd = BIO_DELETE; 1062287664Smav bio->bio_dev = dev; 1063264274Smav bio->bio_offset = off; 1064264296Smav bio->bio_length = MIN(len, maxlen); 1065264274Smav bio->bio_data = 0; 1066264274Smav bio->bio_done = ctl_be_block_biodone; 1067264274Smav bio->bio_caller1 = beio; 1068287499Smav bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; 1069264274Smav 1070264274Smav off += bio->bio_length; 1071264274Smav len -= bio->bio_length; 1072264274Smav 1073267877Smav mtx_lock(&be_lun->io_lock); 1074264274Smav beio->num_bios_sent++; 1075264274Smav if (last && len == 0) 1076264274Smav beio->send_complete = 1; 1077267877Smav mtx_unlock(&be_lun->io_lock); 1078264274Smav 1079287664Smav if (csw) { 1080287664Smav csw->d_strategy(bio); 1081287664Smav } else { 1082287664Smav bio->bio_error = ENXIO; 1083287664Smav ctl_be_block_biodone(bio); 1084287664Smav } 1085264274Smav } 1086287664Smav if (csw) 1087287664Smav dev_relthread(dev, ref); 1088264274Smav} 1089264274Smav 1090264274Smavstatic void 1091264274Smavctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1092264274Smav struct ctl_be_block_io *beio) 1093264274Smav{ 1094264274Smav union ctl_io *io; 1095267515Smav struct ctl_ptr_len_flags *ptrlen; 1096264274Smav struct scsi_unmap_desc *buf, *end; 1097264274Smav uint64_t len; 1098264274Smav 1099264274Smav io = beio->io; 1100264274Smav 1101264274Smav DPRINTF("entered\n"); 1102264274Smav 1103264274Smav binuptime(&beio->ds_t0); 1104267877Smav mtx_lock(&be_lun->io_lock); 1105264274Smav devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1106267877Smav mtx_unlock(&be_lun->io_lock); 1107264274Smav 1108264274Smav if (beio->io_offset == -1) { 1109264274Smav beio->io_len = 0; 1110267515Smav ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1111267515Smav buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1112267515Smav end = buf + ptrlen->len / sizeof(*buf); 1113264274Smav for (; buf < end; buf++) { 1114264274Smav len = (uint64_t)scsi_4btoul(buf->length) * 1115287499Smav be_lun->cbe_lun.blocksize; 1116264274Smav beio->io_len += len; 1117264274Smav ctl_be_block_unmap_dev_range(be_lun, beio, 1118287499Smav scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, 1119287499Smav len, (end - buf < 2) ? TRUE : FALSE); 1120264274Smav } 1121264274Smav } else 1122264274Smav ctl_be_block_unmap_dev_range(be_lun, beio, 1123264274Smav beio->io_offset, beio->io_len, TRUE); 1124264274Smav} 1125264274Smav 1126264274Smavstatic void 1127229997Skenctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1128229997Sken struct ctl_be_block_io *beio) 1129229997Sken{ 1130267877Smav TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1131229997Sken struct bio *bio; 1132287664Smav struct cdevsw *csw; 1133287664Smav struct cdev *dev; 1134229997Sken off_t cur_offset; 1135287664Smav int i, max_iosize, ref; 1136229997Sken 1137229997Sken DPRINTF("entered\n"); 1138287664Smav csw = devvn_refthread(be_lun->vn, &dev, &ref); 1139229997Sken 1140229997Sken /* 1141229997Sken * We have to limit our I/O size to the maximum supported by the 1142229997Sken * backend device. Hopefully it is MAXPHYS. If the driver doesn't 1143229997Sken * set it properly, use DFLTPHYS. 1144229997Sken */ 1145287664Smav if (csw) { 1146287664Smav max_iosize = dev->si_iosize_max; 1147287664Smav if (max_iosize < PAGE_SIZE) 1148287664Smav max_iosize = DFLTPHYS; 1149287664Smav } else 1150229997Sken max_iosize = DFLTPHYS; 1151229997Sken 1152229997Sken cur_offset = beio->io_offset; 1153229997Sken for (i = 0; i < beio->num_segs; i++) { 1154229997Sken size_t cur_size; 1155229997Sken uint8_t *cur_ptr; 1156229997Sken 1157229997Sken cur_size = beio->sg_segs[i].len; 1158229997Sken cur_ptr = beio->sg_segs[i].addr; 1159229997Sken 1160229997Sken while (cur_size > 0) { 1161229997Sken /* This can't fail, it's a blocking allocation. */ 1162229997Sken bio = g_alloc_bio(); 1163229997Sken 1164229997Sken KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1165229997Sken 1166229997Sken bio->bio_cmd = beio->bio_cmd; 1167287664Smav bio->bio_dev = dev; 1168229997Sken bio->bio_caller1 = beio; 1169229997Sken bio->bio_length = min(cur_size, max_iosize); 1170229997Sken bio->bio_offset = cur_offset; 1171229997Sken bio->bio_data = cur_ptr; 1172229997Sken bio->bio_done = ctl_be_block_biodone; 1173287499Smav bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; 1174229997Sken 1175229997Sken cur_offset += bio->bio_length; 1176229997Sken cur_ptr += bio->bio_length; 1177229997Sken cur_size -= bio->bio_length; 1178229997Sken 1179267877Smav TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1180229997Sken beio->num_bios_sent++; 1181229997Sken } 1182229997Sken } 1183267877Smav binuptime(&beio->ds_t0); 1184267877Smav mtx_lock(&be_lun->io_lock); 1185267877Smav devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1186267877Smav beio->send_complete = 1; 1187267877Smav mtx_unlock(&be_lun->io_lock); 1188267877Smav 1189267877Smav /* 1190267877Smav * Fire off all allocated requests! 1191267877Smav */ 1192267877Smav while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1193267877Smav TAILQ_REMOVE(&queue, bio, bio_queue); 1194287664Smav if (csw) 1195287664Smav csw->d_strategy(bio); 1196287664Smav else { 1197287664Smav bio->bio_error = ENXIO; 1198287664Smav ctl_be_block_biodone(bio); 1199287664Smav } 1200267877Smav } 1201287664Smav if (csw) 1202287664Smav dev_relthread(dev, ref); 1203229997Sken} 1204229997Sken 1205274154Smavstatic uint64_t 1206274154Smavctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1207274154Smav{ 1208274154Smav struct diocgattr_arg arg; 1209287664Smav struct cdevsw *csw; 1210287664Smav struct cdev *dev; 1211287664Smav int error, ref; 1212274154Smav 1213287664Smav csw = devvn_refthread(be_lun->vn, &dev, &ref); 1214287664Smav if (csw == NULL) 1215274154Smav return (UINT64_MAX); 1216274154Smav strlcpy(arg.name, attrname, sizeof(arg.name)); 1217274154Smav arg.len = sizeof(arg.value.off); 1218287664Smav if (csw->d_ioctl) { 1219287664Smav error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 1220287664Smav curthread); 1221287664Smav } else 1222287664Smav error = ENODEV; 1223287664Smav dev_relthread(dev, ref); 1224274154Smav if (error != 0) 1225274154Smav return (UINT64_MAX); 1226274154Smav return (arg.value.off); 1227274154Smav} 1228274154Smav 1229229997Skenstatic void 1230286353Smavctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, 1231286353Smav union ctl_io *io) 1232286353Smav{ 1233287499Smav struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1234286353Smav struct ctl_be_block_io *beio; 1235286353Smav struct ctl_lba_len_flags *lbalen; 1236286353Smav 1237286353Smav DPRINTF("entered\n"); 1238286353Smav beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1239286353Smav lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1240286353Smav 1241287499Smav beio->io_len = lbalen->len * cbe_lun->blocksize; 1242287499Smav beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1243286353Smav beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; 1244286353Smav beio->bio_cmd = BIO_FLUSH; 1245286353Smav beio->ds_trans_type = DEVSTAT_NO_DATA; 1246286353Smav DPRINTF("SYNC\n"); 1247286353Smav be_lun->lun_flush(be_lun, beio); 1248286353Smav} 1249286353Smav 1250286353Smavstatic void 1251264274Smavctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1252264274Smav{ 1253264274Smav union ctl_io *io; 1254264274Smav 1255264274Smav io = beio->io; 1256264274Smav ctl_free_beio(beio); 1257267641Smav if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1258267641Smav ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1259267641Smav (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1260264274Smav ctl_config_write_done(io); 1261264274Smav return; 1262264274Smav } 1263264274Smav 1264264274Smav ctl_be_block_config_write(io); 1265264274Smav} 1266264274Smav 1267264274Smavstatic void 1268264274Smavctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1269264274Smav union ctl_io *io) 1270264274Smav{ 1271287499Smav struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1272264274Smav struct ctl_be_block_io *beio; 1273267515Smav struct ctl_lba_len_flags *lbalen; 1274278625Smav uint64_t len_left, lba; 1275278625Smav uint32_t pb, pbo, adj; 1276264274Smav int i, seglen; 1277264274Smav uint8_t *buf, *end; 1278264274Smav 1279264274Smav DPRINTF("entered\n"); 1280264274Smav 1281267519Smav beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1282267537Smav lbalen = ARGS(beio->io); 1283264274Smav 1284271839Smav if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1285269622Smav (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1286264274Smav ctl_free_beio(beio); 1287264274Smav ctl_set_invalid_field(&io->scsiio, 1288264274Smav /*sks_valid*/ 1, 1289264274Smav /*command*/ 1, 1290264274Smav /*field*/ 1, 1291264274Smav /*bit_valid*/ 0, 1292264274Smav /*bit*/ 0); 1293264274Smav ctl_config_write_done(io); 1294264274Smav return; 1295264274Smav } 1296264274Smav 1297269622Smav if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1298287499Smav beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1299287499Smav beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1300264274Smav beio->bio_cmd = BIO_DELETE; 1301264274Smav beio->ds_trans_type = DEVSTAT_FREE; 1302264274Smav 1303264274Smav be_lun->unmap(be_lun, beio); 1304264274Smav return; 1305264274Smav } 1306264274Smav 1307264274Smav beio->bio_cmd = BIO_WRITE; 1308264274Smav beio->ds_trans_type = DEVSTAT_WRITE; 1309264274Smav 1310264274Smav DPRINTF("WRITE SAME at LBA %jx len %u\n", 1311267515Smav (uintmax_t)lbalen->lba, lbalen->len); 1312264274Smav 1313287499Smav pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1314287499Smav if (be_lun->cbe_lun.pblockoff > 0) 1315287499Smav pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1316278625Smav else 1317278625Smav pbo = 0; 1318287499Smav len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1319264274Smav for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1320264274Smav 1321264274Smav /* 1322264274Smav * Setup the S/G entry for this chunk. 1323264274Smav */ 1324264886Smav seglen = MIN(CTLBLK_MAX_SEG, len_left); 1325287499Smav if (pb > cbe_lun->blocksize) { 1326287499Smav adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1327278619Smav seglen - pbo) % pb; 1328278619Smav if (seglen > adj) 1329278619Smav seglen -= adj; 1330278619Smav else 1331287499Smav seglen -= seglen % cbe_lun->blocksize; 1332278619Smav } else 1333287499Smav seglen -= seglen % cbe_lun->blocksize; 1334264274Smav beio->sg_segs[i].len = seglen; 1335264274Smav beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1336264274Smav 1337264274Smav DPRINTF("segment %d addr %p len %zd\n", i, 1338264274Smav beio->sg_segs[i].addr, beio->sg_segs[i].len); 1339264274Smav 1340264274Smav beio->num_segs++; 1341264274Smav len_left -= seglen; 1342264274Smav 1343264274Smav buf = beio->sg_segs[i].addr; 1344264274Smav end = buf + seglen; 1345287499Smav for (; buf < end; buf += cbe_lun->blocksize) { 1346288175Smav if (lbalen->flags & SWS_NDOB) { 1347288175Smav memset(buf, 0, cbe_lun->blocksize); 1348288175Smav } else { 1349288175Smav memcpy(buf, io->scsiio.kern_data_ptr, 1350288175Smav cbe_lun->blocksize); 1351288175Smav } 1352267515Smav if (lbalen->flags & SWS_LBDATA) 1353267515Smav scsi_ulto4b(lbalen->lba + lba, buf); 1354264274Smav lba++; 1355264274Smav } 1356264274Smav } 1357264274Smav 1358287499Smav beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1359287499Smav beio->io_len = lba * cbe_lun->blocksize; 1360264274Smav 1361264274Smav /* We can not do all in one run. Correct and schedule rerun. */ 1362264274Smav if (len_left > 0) { 1363267515Smav lbalen->lba += lba; 1364267515Smav lbalen->len -= lba; 1365264274Smav beio->beio_cont = ctl_be_block_cw_done_ws; 1366264274Smav } 1367264274Smav 1368264274Smav be_lun->dispatch(be_lun, beio); 1369264274Smav} 1370264274Smav 1371264274Smavstatic void 1372264274Smavctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1373264274Smav union ctl_io *io) 1374264274Smav{ 1375264274Smav struct ctl_be_block_io *beio; 1376267515Smav struct ctl_ptr_len_flags *ptrlen; 1377264274Smav 1378264274Smav DPRINTF("entered\n"); 1379264274Smav 1380267519Smav beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1381267515Smav ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1382264274Smav 1383269622Smav if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1384264274Smav ctl_free_beio(beio); 1385264274Smav ctl_set_invalid_field(&io->scsiio, 1386264274Smav /*sks_valid*/ 0, 1387264274Smav /*command*/ 1, 1388264274Smav /*field*/ 0, 1389264274Smav /*bit_valid*/ 0, 1390264274Smav /*bit*/ 0); 1391264274Smav ctl_config_write_done(io); 1392264274Smav return; 1393264274Smav } 1394264274Smav 1395264274Smav beio->io_len = 0; 1396264274Smav beio->io_offset = -1; 1397264274Smav beio->bio_cmd = BIO_DELETE; 1398264274Smav beio->ds_trans_type = DEVSTAT_FREE; 1399267515Smav DPRINTF("UNMAP\n"); 1400264274Smav be_lun->unmap(be_lun, beio); 1401264274Smav} 1402264274Smav 1403264274Smavstatic void 1404275474Smavctl_be_block_cr_done(struct ctl_be_block_io *beio) 1405275474Smav{ 1406275474Smav union ctl_io *io; 1407275474Smav 1408275474Smav io = beio->io; 1409275474Smav ctl_free_beio(beio); 1410275474Smav ctl_config_read_done(io); 1411275474Smav} 1412275474Smav 1413275474Smavstatic void 1414275474Smavctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1415275474Smav union ctl_io *io) 1416275474Smav{ 1417275474Smav struct ctl_be_block_io *beio; 1418275474Smav struct ctl_be_block_softc *softc; 1419275474Smav 1420275474Smav DPRINTF("entered\n"); 1421275474Smav 1422275474Smav softc = be_lun->softc; 1423275474Smav beio = ctl_alloc_beio(softc); 1424275474Smav beio->io = io; 1425275474Smav beio->lun = be_lun; 1426275474Smav beio->beio_cont = ctl_be_block_cr_done; 1427275474Smav PRIV(io)->ptr = (void *)beio; 1428275474Smav 1429275474Smav switch (io->scsiio.cdb[0]) { 1430275474Smav case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1431275474Smav beio->bio_cmd = -1; 1432275474Smav beio->ds_trans_type = DEVSTAT_NO_DATA; 1433275474Smav beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1434275474Smav beio->io_len = 0; 1435275474Smav if (be_lun->get_lba_status) 1436275474Smav be_lun->get_lba_status(be_lun, beio); 1437275474Smav else 1438275474Smav ctl_be_block_cr_done(beio); 1439275474Smav break; 1440275474Smav default: 1441275474Smav panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1442275474Smav break; 1443275474Smav } 1444275474Smav} 1445275474Smav 1446275474Smavstatic void 1447264274Smavctl_be_block_cw_done(struct ctl_be_block_io *beio) 1448264274Smav{ 1449264274Smav union ctl_io *io; 1450264274Smav 1451264274Smav io = beio->io; 1452264274Smav ctl_free_beio(beio); 1453264274Smav ctl_config_write_done(io); 1454264274Smav} 1455264274Smav 1456264274Smavstatic void 1457229997Skenctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1458229997Sken union ctl_io *io) 1459229997Sken{ 1460229997Sken struct ctl_be_block_io *beio; 1461229997Sken struct ctl_be_block_softc *softc; 1462229997Sken 1463229997Sken DPRINTF("entered\n"); 1464229997Sken 1465229997Sken softc = be_lun->softc; 1466229997Sken beio = ctl_alloc_beio(softc); 1467229997Sken beio->io = io; 1468229997Sken beio->lun = be_lun; 1469264274Smav beio->beio_cont = ctl_be_block_cw_done; 1470286353Smav switch (io->scsiio.tag_type) { 1471286353Smav case CTL_TAG_ORDERED: 1472286353Smav beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1473286353Smav break; 1474286353Smav case CTL_TAG_HEAD_OF_QUEUE: 1475286353Smav beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1476286353Smav break; 1477286353Smav case CTL_TAG_UNTAGGED: 1478286353Smav case CTL_TAG_SIMPLE: 1479286353Smav case CTL_TAG_ACA: 1480286353Smav default: 1481286353Smav beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1482286353Smav break; 1483286353Smav } 1484267519Smav PRIV(io)->ptr = (void *)beio; 1485229997Sken 1486229997Sken switch (io->scsiio.cdb[0]) { 1487229997Sken case SYNCHRONIZE_CACHE: 1488229997Sken case SYNCHRONIZE_CACHE_16: 1489286353Smav ctl_be_block_cw_dispatch_sync(be_lun, io); 1490229997Sken break; 1491264274Smav case WRITE_SAME_10: 1492264274Smav case WRITE_SAME_16: 1493264274Smav ctl_be_block_cw_dispatch_ws(be_lun, io); 1494264274Smav break; 1495264274Smav case UNMAP: 1496264274Smav ctl_be_block_cw_dispatch_unmap(be_lun, io); 1497264274Smav break; 1498229997Sken default: 1499229997Sken panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1500229997Sken break; 1501229997Sken } 1502229997Sken} 1503229997Sken 1504292384SmarkjSDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); 1505292384SmarkjSDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); 1506292384SmarkjSDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); 1507292384SmarkjSDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); 1508229997Sken 1509229997Skenstatic void 1510264886Smavctl_be_block_next(struct ctl_be_block_io *beio) 1511264886Smav{ 1512264886Smav struct ctl_be_block_lun *be_lun; 1513264886Smav union ctl_io *io; 1514264886Smav 1515264886Smav io = beio->io; 1516264886Smav be_lun = beio->lun; 1517264886Smav ctl_free_beio(beio); 1518267641Smav if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1519267641Smav ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1520267641Smav (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1521267537Smav ctl_data_submit_done(io); 1522264886Smav return; 1523264886Smav } 1524264886Smav 1525264886Smav io->io_hdr.status &= ~CTL_STATUS_MASK; 1526264886Smav io->io_hdr.status |= CTL_STATUS_NONE; 1527264886Smav 1528267877Smav mtx_lock(&be_lun->queue_lock); 1529264886Smav STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1530267877Smav mtx_unlock(&be_lun->queue_lock); 1531264886Smav taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1532264886Smav} 1533264886Smav 1534264886Smavstatic void 1535229997Skenctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1536229997Sken union ctl_io *io) 1537229997Sken{ 1538287499Smav struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1539229997Sken struct ctl_be_block_io *beio; 1540229997Sken struct ctl_be_block_softc *softc; 1541267537Smav struct ctl_lba_len_flags *lbalen; 1542267519Smav struct ctl_ptr_len_flags *bptrlen; 1543267519Smav uint64_t len_left, lbas; 1544229997Sken int i; 1545229997Sken 1546229997Sken softc = be_lun->softc; 1547229997Sken 1548229997Sken DPRINTF("entered\n"); 1549229997Sken 1550267537Smav lbalen = ARGS(io); 1551267537Smav if (lbalen->flags & CTL_LLF_WRITE) { 1552292384Smarkj SDT_PROBE0(cbb, , write, start); 1553267537Smav } else { 1554292384Smarkj SDT_PROBE0(cbb, , read, start); 1555229997Sken } 1556229997Sken 1557229997Sken beio = ctl_alloc_beio(softc); 1558229997Sken beio->io = io; 1559229997Sken beio->lun = be_lun; 1560267519Smav bptrlen = PRIV(io); 1561267519Smav bptrlen->ptr = (void *)beio; 1562229997Sken 1563229997Sken switch (io->scsiio.tag_type) { 1564229997Sken case CTL_TAG_ORDERED: 1565229997Sken beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1566229997Sken break; 1567229997Sken case CTL_TAG_HEAD_OF_QUEUE: 1568229997Sken beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1569229997Sken break; 1570229997Sken case CTL_TAG_UNTAGGED: 1571229997Sken case CTL_TAG_SIMPLE: 1572229997Sken case CTL_TAG_ACA: 1573229997Sken default: 1574229997Sken beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1575229997Sken break; 1576229997Sken } 1577229997Sken 1578267537Smav if (lbalen->flags & CTL_LLF_WRITE) { 1579267537Smav beio->bio_cmd = BIO_WRITE; 1580267537Smav beio->ds_trans_type = DEVSTAT_WRITE; 1581267537Smav } else { 1582229997Sken beio->bio_cmd = BIO_READ; 1583229997Sken beio->ds_trans_type = DEVSTAT_READ; 1584229997Sken } 1585229997Sken 1586264886Smav DPRINTF("%s at LBA %jx len %u @%ju\n", 1587229997Sken (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1588267519Smav (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1589267537Smav if (lbalen->flags & CTL_LLF_COMPARE) 1590267537Smav lbas = CTLBLK_HALF_IO_SIZE; 1591267537Smav else 1592267537Smav lbas = CTLBLK_MAX_IO_SIZE; 1593287499Smav lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); 1594287499Smav beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; 1595287499Smav beio->io_len = lbas * cbe_lun->blocksize; 1596267519Smav bptrlen->len += lbas; 1597229997Sken 1598264886Smav for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1599264886Smav KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1600264886Smav i, CTLBLK_MAX_SEGS)); 1601229997Sken 1602229997Sken /* 1603229997Sken * Setup the S/G entry for this chunk. 1604229997Sken */ 1605264886Smav beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left); 1606229997Sken beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1607229997Sken 1608229997Sken DPRINTF("segment %d addr %p len %zd\n", i, 1609229997Sken beio->sg_segs[i].addr, beio->sg_segs[i].len); 1610229997Sken 1611267537Smav /* Set up second segment for compare operation. */ 1612267537Smav if (lbalen->flags & CTL_LLF_COMPARE) { 1613267537Smav beio->sg_segs[i + CTLBLK_HALF_SEGS].len = 1614267537Smav beio->sg_segs[i].len; 1615267537Smav beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = 1616267537Smav uma_zalloc(be_lun->lun_zone, M_WAITOK); 1617267537Smav } 1618267537Smav 1619229997Sken beio->num_segs++; 1620229997Sken len_left -= beio->sg_segs[i].len; 1621229997Sken } 1622267519Smav if (bptrlen->len < lbalen->len) 1623264886Smav beio->beio_cont = ctl_be_block_next; 1624264886Smav io->scsiio.be_move_done = ctl_be_block_move_done; 1625267537Smav /* For compare we have separate S/G lists for read and datamove. */ 1626267537Smav if (lbalen->flags & CTL_LLF_COMPARE) 1627267537Smav io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1628267537Smav else 1629267537Smav io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1630264886Smav io->scsiio.kern_data_len = beio->io_len; 1631264886Smav io->scsiio.kern_sg_entries = beio->num_segs; 1632288020Smav io->io_hdr.flags |= CTL_FLAG_ALLOCATED; 1633229997Sken 1634229997Sken /* 1635229997Sken * For the read case, we need to read the data into our buffers and 1636229997Sken * then we can send it back to the user. For the write case, we 1637229997Sken * need to get the data from the user first. 1638229997Sken */ 1639229997Sken if (beio->bio_cmd == BIO_READ) { 1640292384Smarkj SDT_PROBE0(cbb, , read, alloc_done); 1641229997Sken be_lun->dispatch(be_lun, beio); 1642229997Sken } else { 1643292384Smarkj SDT_PROBE0(cbb, , write, alloc_done); 1644229997Sken#ifdef CTL_TIME_IO 1645288215Smav getbinuptime(&io->io_hdr.dma_start_bt); 1646288215Smav#endif 1647229997Sken ctl_datamove(io); 1648229997Sken } 1649229997Sken} 1650229997Sken 1651229997Skenstatic void 1652229997Skenctl_be_block_worker(void *context, int pending) 1653229997Sken{ 1654287670Smav struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; 1655287670Smav struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1656229997Sken union ctl_io *io; 1657287670Smav struct ctl_be_block_io *beio; 1658229997Sken 1659229997Sken DPRINTF("entered\n"); 1660287670Smav /* 1661287670Smav * Fetch and process I/Os from all queues. If we detect LUN 1662288348Smav * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race, 1663287670Smav * so make response maximally opaque to not confuse initiator. 1664287670Smav */ 1665229997Sken for (;;) { 1666287670Smav mtx_lock(&be_lun->queue_lock); 1667229997Sken io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1668229997Sken if (io != NULL) { 1669229997Sken DPRINTF("datamove queue\n"); 1670229997Sken STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr, 1671229997Sken ctl_io_hdr, links); 1672267877Smav mtx_unlock(&be_lun->queue_lock); 1673267519Smav beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1674288348Smav if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1675287670Smav ctl_set_busy(&io->scsiio); 1676287670Smav ctl_complete_beio(beio); 1677287670Smav return; 1678287670Smav } 1679229997Sken be_lun->dispatch(be_lun, beio); 1680229997Sken continue; 1681229997Sken } 1682229997Sken io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1683229997Sken if (io != NULL) { 1684229997Sken DPRINTF("config write queue\n"); 1685229997Sken STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr, 1686229997Sken ctl_io_hdr, links); 1687267877Smav mtx_unlock(&be_lun->queue_lock); 1688288348Smav if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1689287670Smav ctl_set_busy(&io->scsiio); 1690287670Smav ctl_config_write_done(io); 1691287670Smav return; 1692287670Smav } 1693229997Sken ctl_be_block_cw_dispatch(be_lun, io); 1694229997Sken continue; 1695229997Sken } 1696275474Smav io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 1697275474Smav if (io != NULL) { 1698275474Smav DPRINTF("config read queue\n"); 1699275474Smav STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr, 1700275474Smav ctl_io_hdr, links); 1701275474Smav mtx_unlock(&be_lun->queue_lock); 1702288348Smav if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1703287670Smav ctl_set_busy(&io->scsiio); 1704287670Smav ctl_config_read_done(io); 1705287670Smav return; 1706287670Smav } 1707275474Smav ctl_be_block_cr_dispatch(be_lun, io); 1708275474Smav continue; 1709275474Smav } 1710229997Sken io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1711229997Sken if (io != NULL) { 1712229997Sken DPRINTF("input queue\n"); 1713229997Sken STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr, 1714229997Sken ctl_io_hdr, links); 1715267877Smav mtx_unlock(&be_lun->queue_lock); 1716288348Smav if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1717287670Smav ctl_set_busy(&io->scsiio); 1718287670Smav ctl_data_submit_done(io); 1719287670Smav return; 1720287670Smav } 1721229997Sken ctl_be_block_dispatch(be_lun, io); 1722229997Sken continue; 1723229997Sken } 1724229997Sken 1725229997Sken /* 1726229997Sken * If we get here, there is no work left in the queues, so 1727229997Sken * just break out and let the task queue go to sleep. 1728229997Sken */ 1729287670Smav mtx_unlock(&be_lun->queue_lock); 1730229997Sken break; 1731229997Sken } 1732229997Sken} 1733229997Sken 1734229997Sken/* 1735229997Sken * Entry point from CTL to the backend for I/O. We queue everything to a 1736229997Sken * work thread, so this just puts the I/O on a queue and wakes up the 1737229997Sken * thread. 1738229997Sken */ 1739229997Skenstatic int 1740229997Skenctl_be_block_submit(union ctl_io *io) 1741229997Sken{ 1742229997Sken struct ctl_be_block_lun *be_lun; 1743287499Smav struct ctl_be_lun *cbe_lun; 1744229997Sken 1745229997Sken DPRINTF("entered\n"); 1746229997Sken 1747312834Smav cbe_lun = CTL_BACKEND_LUN(io); 1748287499Smav be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; 1749229997Sken 1750229997Sken /* 1751229997Sken * Make sure we only get SCSI I/O. 1752229997Sken */ 1753229997Sken KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type " 1754229997Sken "%#x) encountered", io->io_hdr.io_type)); 1755229997Sken 1756267519Smav PRIV(io)->len = 0; 1757267519Smav 1758267877Smav mtx_lock(&be_lun->queue_lock); 1759229997Sken STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1760267877Smav mtx_unlock(&be_lun->queue_lock); 1761229997Sken taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1762229997Sken 1763267514Smav return (CTL_RETVAL_COMPLETE); 1764229997Sken} 1765229997Sken 1766229997Skenstatic int 1767229997Skenctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1768229997Sken int flag, struct thread *td) 1769229997Sken{ 1770361256Smav struct ctl_be_block_softc *softc = &backend_block_softc; 1771229997Sken int error; 1772229997Sken 1773229997Sken error = 0; 1774229997Sken switch (cmd) { 1775229997Sken case CTL_LUN_REQ: { 1776229997Sken struct ctl_lun_req *lun_req; 1777229997Sken 1778229997Sken lun_req = (struct ctl_lun_req *)addr; 1779229997Sken 1780229997Sken switch (lun_req->reqtype) { 1781229997Sken case CTL_LUNREQ_CREATE: 1782229997Sken error = ctl_be_block_create(softc, lun_req); 1783229997Sken break; 1784229997Sken case CTL_LUNREQ_RM: 1785229997Sken error = ctl_be_block_rm(softc, lun_req); 1786229997Sken break; 1787232604Strasz case CTL_LUNREQ_MODIFY: 1788232604Strasz error = ctl_be_block_modify(softc, lun_req); 1789232604Strasz break; 1790229997Sken default: 1791229997Sken lun_req->status = CTL_LUN_ERROR; 1792229997Sken snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1793272911Smav "invalid LUN request type %d", 1794229997Sken lun_req->reqtype); 1795229997Sken break; 1796229997Sken } 1797229997Sken break; 1798229997Sken } 1799229997Sken default: 1800229997Sken error = ENOTTY; 1801229997Sken break; 1802229997Sken } 1803229997Sken 1804229997Sken return (error); 1805229997Sken} 1806229997Sken 1807229997Skenstatic int 1808229997Skenctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1809229997Sken{ 1810287499Smav struct ctl_be_lun *cbe_lun; 1811229997Sken struct ctl_be_block_filedata *file_data; 1812229997Sken struct ctl_lun_create_params *params; 1813275865Smav char *value; 1814229997Sken struct vattr vattr; 1815275865Smav off_t ps, pss, po, pos, us, uss, uo, uos; 1816229997Sken int error; 1817229997Sken 1818287499Smav cbe_lun = &be_lun->cbe_lun; 1819229997Sken file_data = &be_lun->backend.file; 1820272911Smav params = &be_lun->params; 1821229997Sken 1822229997Sken be_lun->dev_type = CTL_BE_BLOCK_FILE; 1823229997Sken be_lun->dispatch = ctl_be_block_dispatch_file; 1824229997Sken be_lun->lun_flush = ctl_be_block_flush_file; 1825275474Smav be_lun->get_lba_status = ctl_be_block_gls_file; 1826275481Smav be_lun->getattr = ctl_be_block_getattr_file; 1827287499Smav be_lun->unmap = NULL; 1828287499Smav cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 1829229997Sken 1830229997Sken error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1831229997Sken if (error != 0) { 1832229997Sken snprintf(req->error_str, sizeof(req->error_str), 1833229997Sken "error calling VOP_GETATTR() for file %s", 1834229997Sken be_lun->dev_path); 1835229997Sken return (error); 1836229997Sken } 1837229997Sken 1838229997Sken file_data->cred = crhold(curthread->td_ucred); 1839232604Strasz if (params->lun_size_bytes != 0) 1840232604Strasz be_lun->size_bytes = params->lun_size_bytes; 1841232604Strasz else 1842232604Strasz be_lun->size_bytes = vattr.va_size; 1843229997Sken 1844229997Sken /* 1845273029Smav * For files we can use any logical block size. Prefer 512 bytes 1846273029Smav * for compatibility reasons. If file's vattr.va_blocksize 1847273029Smav * (preferred I/O block size) is bigger and multiple to chosen 1848273029Smav * logical block size -- report it as physical block size. 1849229997Sken */ 1850229997Sken if (params->blocksize_bytes != 0) 1851287499Smav cbe_lun->blocksize = params->blocksize_bytes; 1852288310Smav else if (cbe_lun->lun_type == T_CDROM) 1853288310Smav cbe_lun->blocksize = 2048; 1854229997Sken else 1855287499Smav cbe_lun->blocksize = 512; 1856287499Smav be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 1857287499Smav cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 1858287499Smav 0 : (be_lun->size_blocks - 1); 1859275865Smav 1860275865Smav us = ps = vattr.va_blocksize; 1861275865Smav uo = po = 0; 1862275865Smav 1863287499Smav value = ctl_get_opt(&cbe_lun->options, "pblocksize"); 1864275865Smav if (value != NULL) 1865275865Smav ctl_expand_number(value, &ps); 1866287499Smav value = ctl_get_opt(&cbe_lun->options, "pblockoffset"); 1867275865Smav if (value != NULL) 1868275865Smav ctl_expand_number(value, &po); 1869287499Smav pss = ps / cbe_lun->blocksize; 1870287499Smav pos = po / cbe_lun->blocksize; 1871287499Smav if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 1872287499Smav ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 1873287499Smav cbe_lun->pblockexp = fls(pss) - 1; 1874287499Smav cbe_lun->pblockoff = (pss - pos) % pss; 1875273029Smav } 1876229997Sken 1877287499Smav value = ctl_get_opt(&cbe_lun->options, "ublocksize"); 1878275865Smav if (value != NULL) 1879275865Smav ctl_expand_number(value, &us); 1880287499Smav value = ctl_get_opt(&cbe_lun->options, "ublockoffset"); 1881275865Smav if (value != NULL) 1882275865Smav ctl_expand_number(value, &uo); 1883287499Smav uss = us / cbe_lun->blocksize; 1884287499Smav uos = uo / cbe_lun->blocksize; 1885287499Smav if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 1886287499Smav ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 1887287499Smav cbe_lun->ublockexp = fls(uss) - 1; 1888287499Smav cbe_lun->ublockoff = (uss - uos) % uss; 1889275865Smav } 1890275865Smav 1891229997Sken /* 1892229997Sken * Sanity check. The media size has to be at least one 1893229997Sken * sector long. 1894229997Sken */ 1895287499Smav if (be_lun->size_bytes < cbe_lun->blocksize) { 1896229997Sken error = EINVAL; 1897229997Sken snprintf(req->error_str, sizeof(req->error_str), 1898229997Sken "file %s size %ju < block size %u", be_lun->dev_path, 1899287499Smav (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); 1900229997Sken } 1901275920Smav 1902287499Smav cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; 1903229997Sken return (error); 1904229997Sken} 1905229997Sken 1906229997Skenstatic int 1907229997Skenctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1908229997Sken{ 1909287499Smav struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1910229997Sken struct ctl_lun_create_params *params; 1911287664Smav struct cdevsw *csw; 1912229997Sken struct cdev *dev; 1913275865Smav char *value; 1914287664Smav int error, atomic, maxio, ref, unmap, tmp; 1915287221Smav off_t ps, pss, po, pos, us, uss, uo, uos, otmp; 1916229997Sken 1917272911Smav params = &be_lun->params; 1918229997Sken 1919229997Sken be_lun->dev_type = CTL_BE_BLOCK_DEV; 1920287664Smav csw = devvn_refthread(be_lun->vn, &dev, &ref); 1921287664Smav if (csw == NULL) 1922287664Smav return (ENXIO); 1923287664Smav if (strcmp(csw->d_name, "zvol") == 0) { 1924269123Smav be_lun->dispatch = ctl_be_block_dispatch_zvol; 1925275474Smav be_lun->get_lba_status = ctl_be_block_gls_zvol; 1926275920Smav atomic = maxio = CTLBLK_MAX_IO_SIZE; 1927275920Smav } else { 1928269123Smav be_lun->dispatch = ctl_be_block_dispatch_dev; 1929287499Smav be_lun->get_lba_status = NULL; 1930275920Smav atomic = 0; 1931287664Smav maxio = dev->si_iosize_max; 1932275920Smav if (maxio <= 0) 1933275920Smav maxio = DFLTPHYS; 1934275920Smav if (maxio > CTLBLK_MAX_IO_SIZE) 1935275920Smav maxio = CTLBLK_MAX_IO_SIZE; 1936275920Smav } 1937269123Smav be_lun->lun_flush = ctl_be_block_flush_dev; 1938274154Smav be_lun->getattr = ctl_be_block_getattr_dev; 1939287499Smav be_lun->unmap = ctl_be_block_unmap_dev; 1940229997Sken 1941287664Smav if (!csw->d_ioctl) { 1942287664Smav dev_relthread(dev, ref); 1943229997Sken snprintf(req->error_str, sizeof(req->error_str), 1944287664Smav "no d_ioctl for device %s!", be_lun->dev_path); 1945229997Sken return (ENODEV); 1946229997Sken } 1947229997Sken 1948287664Smav error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, 1949229997Sken curthread); 1950229997Sken if (error) { 1951287664Smav dev_relthread(dev, ref); 1952229997Sken snprintf(req->error_str, sizeof(req->error_str), 1953272911Smav "error %d returned for DIOCGSECTORSIZE ioctl " 1954272911Smav "on %s!", error, be_lun->dev_path); 1955229997Sken return (error); 1956229997Sken } 1957229997Sken 1958229997Sken /* 1959229997Sken * If the user has asked for a blocksize that is greater than the 1960229997Sken * backing device's blocksize, we can do it only if the blocksize 1961229997Sken * the user is asking for is an even multiple of the underlying 1962229997Sken * device's blocksize. 1963229997Sken */ 1964286811Smav if ((params->blocksize_bytes != 0) && 1965286811Smav (params->blocksize_bytes >= tmp)) { 1966286811Smav if (params->blocksize_bytes % tmp == 0) { 1967287499Smav cbe_lun->blocksize = params->blocksize_bytes; 1968229997Sken } else { 1969287664Smav dev_relthread(dev, ref); 1970229997Sken snprintf(req->error_str, sizeof(req->error_str), 1971272911Smav "requested blocksize %u is not an even " 1972229997Sken "multiple of backing device blocksize %u", 1973287221Smav params->blocksize_bytes, tmp); 1974229997Sken return (EINVAL); 1975229997Sken } 1976286811Smav } else if (params->blocksize_bytes != 0) { 1977287664Smav dev_relthread(dev, ref); 1978229997Sken snprintf(req->error_str, sizeof(req->error_str), 1979272911Smav "requested blocksize %u < backing device " 1980287221Smav "blocksize %u", params->blocksize_bytes, tmp); 1981229997Sken return (EINVAL); 1982288310Smav } else if (cbe_lun->lun_type == T_CDROM) 1983288310Smav cbe_lun->blocksize = MAX(tmp, 2048); 1984288310Smav else 1985287499Smav cbe_lun->blocksize = tmp; 1986229997Sken 1987287664Smav error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, 1988287664Smav curthread); 1989229997Sken if (error) { 1990287664Smav dev_relthread(dev, ref); 1991229997Sken snprintf(req->error_str, sizeof(req->error_str), 1992272911Smav "error %d returned for DIOCGMEDIASIZE " 1993272911Smav " ioctl on %s!", error, 1994232604Strasz be_lun->dev_path); 1995229997Sken return (error); 1996229997Sken } 1997229997Sken 1998232604Strasz if (params->lun_size_bytes != 0) { 1999287221Smav if (params->lun_size_bytes > otmp) { 2000287664Smav dev_relthread(dev, ref); 2001232604Strasz snprintf(req->error_str, sizeof(req->error_str), 2002272911Smav "requested LUN size %ju > backing device " 2003272911Smav "size %ju", 2004232604Strasz (uintmax_t)params->lun_size_bytes, 2005287221Smav (uintmax_t)otmp); 2006232604Strasz return (EINVAL); 2007232604Strasz } 2008232604Strasz 2009232604Strasz be_lun->size_bytes = params->lun_size_bytes; 2010286811Smav } else 2011287221Smav be_lun->size_bytes = otmp; 2012287499Smav be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2013287499Smav cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2014287499Smav 0 : (be_lun->size_blocks - 1); 2015232604Strasz 2016287664Smav error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, 2017287664Smav curthread); 2018264191Smav if (error) 2019264191Smav ps = po = 0; 2020264191Smav else { 2021287664Smav error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, 2022287664Smav FREAD, curthread); 2023264191Smav if (error) 2024264191Smav po = 0; 2025264191Smav } 2026275865Smav us = ps; 2027275865Smav uo = po; 2028275865Smav 2029287499Smav value = ctl_get_opt(&cbe_lun->options, "pblocksize"); 2030275865Smav if (value != NULL) 2031275865Smav ctl_expand_number(value, &ps); 2032287499Smav value = ctl_get_opt(&cbe_lun->options, "pblockoffset"); 2033275865Smav if (value != NULL) 2034275865Smav ctl_expand_number(value, &po); 2035287499Smav pss = ps / cbe_lun->blocksize; 2036287499Smav pos = po / cbe_lun->blocksize; 2037287499Smav if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2038287499Smav ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2039287499Smav cbe_lun->pblockexp = fls(pss) - 1; 2040287499Smav cbe_lun->pblockoff = (pss - pos) % pss; 2041264191Smav } 2042264191Smav 2043287499Smav value = ctl_get_opt(&cbe_lun->options, "ublocksize"); 2044275865Smav if (value != NULL) 2045275865Smav ctl_expand_number(value, &us); 2046287499Smav value = ctl_get_opt(&cbe_lun->options, "ublockoffset"); 2047275865Smav if (value != NULL) 2048275865Smav ctl_expand_number(value, &uo); 2049287499Smav uss = us / cbe_lun->blocksize; 2050287499Smav uos = uo / cbe_lun->blocksize; 2051287499Smav if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2052287499Smav ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2053287499Smav cbe_lun->ublockexp = fls(uss) - 1; 2054287499Smav cbe_lun->ublockoff = (uss - uos) % uss; 2055275865Smav } 2056275865Smav 2057287499Smav cbe_lun->atomicblock = atomic / cbe_lun->blocksize; 2058287499Smav cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; 2059278672Smav 2060278672Smav if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { 2061278672Smav unmap = 1; 2062278672Smav } else { 2063278672Smav struct diocgattr_arg arg; 2064278672Smav 2065278672Smav strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 2066278672Smav arg.len = sizeof(arg.value.i); 2067287664Smav error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 2068287664Smav curthread); 2069278672Smav unmap = (error == 0) ? arg.value.i : 0; 2070278672Smav } 2071287499Smav value = ctl_get_opt(&cbe_lun->options, "unmap"); 2072278672Smav if (value != NULL) 2073278672Smav unmap = (strcmp(value, "on") == 0); 2074278672Smav if (unmap) 2075287499Smav cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2076287499Smav else 2077287499Smav cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2078278672Smav 2079287664Smav dev_relthread(dev, ref); 2080229997Sken return (0); 2081229997Sken} 2082229997Sken 2083229997Skenstatic int 2084229997Skenctl_be_block_close(struct ctl_be_block_lun *be_lun) 2085229997Sken{ 2086287499Smav struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2087287499Smav int flags; 2088287499Smav 2089229997Sken if (be_lun->vn) { 2090287499Smav flags = FREAD; 2091287499Smav if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) 2092287499Smav flags |= FWRITE; 2093229997Sken (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 2094229997Sken be_lun->vn = NULL; 2095229997Sken 2096229997Sken switch (be_lun->dev_type) { 2097229997Sken case CTL_BE_BLOCK_DEV: 2098229997Sken break; 2099229997Sken case CTL_BE_BLOCK_FILE: 2100229997Sken if (be_lun->backend.file.cred != NULL) { 2101229997Sken crfree(be_lun->backend.file.cred); 2102229997Sken be_lun->backend.file.cred = NULL; 2103229997Sken } 2104229997Sken break; 2105229997Sken case CTL_BE_BLOCK_NONE: 2106258871Strasz break; 2107229997Sken default: 2108289702Smav panic("Unexpected backend type %d", be_lun->dev_type); 2109229997Sken break; 2110229997Sken } 2111272911Smav be_lun->dev_type = CTL_BE_BLOCK_NONE; 2112229997Sken } 2113229997Sken return (0); 2114229997Sken} 2115229997Sken 2116229997Skenstatic int 2117288348Smavctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2118229997Sken{ 2119287499Smav struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2120229997Sken struct nameidata nd; 2121287499Smav char *value; 2122287499Smav int error, flags; 2123229997Sken 2124229997Sken error = 0; 2125229997Sken if (rootvnode == NULL) { 2126229997Sken snprintf(req->error_str, sizeof(req->error_str), 2127272911Smav "Root filesystem is not mounted"); 2128229997Sken return (1); 2129229997Sken } 2130285391Smjg pwd_ensure_dirs(); 2131229997Sken 2132287499Smav value = ctl_get_opt(&cbe_lun->options, "file"); 2133287499Smav if (value == NULL) { 2134287499Smav snprintf(req->error_str, sizeof(req->error_str), 2135287499Smav "no file argument specified"); 2136287499Smav return (1); 2137287499Smav } 2138287499Smav free(be_lun->dev_path, M_CTLBLK); 2139287499Smav be_lun->dev_path = strdup(value, M_CTLBLK); 2140287499Smav 2141287499Smav flags = FREAD; 2142287499Smav value = ctl_get_opt(&cbe_lun->options, "readonly"); 2143288310Smav if (value != NULL) { 2144288310Smav if (strcmp(value, "on") != 0) 2145288310Smav flags |= FWRITE; 2146288310Smav } else if (cbe_lun->lun_type == T_DIRECT) 2147287499Smav flags |= FWRITE; 2148287499Smav 2149287499Smavagain: 2150229997Sken NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); 2151229997Sken error = vn_open(&nd, &flags, 0, NULL); 2152287499Smav if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { 2153287499Smav flags &= ~FWRITE; 2154287499Smav goto again; 2155287499Smav } 2156229997Sken if (error) { 2157229997Sken /* 2158229997Sken * This is the only reasonable guess we can make as far as 2159229997Sken * path if the user doesn't give us a fully qualified path. 2160229997Sken * If they want to specify a file, they need to specify the 2161229997Sken * full path. 2162229997Sken */ 2163229997Sken if (be_lun->dev_path[0] != '/') { 2164229997Sken char *dev_name; 2165229997Sken 2166287499Smav asprintf(&dev_name, M_CTLBLK, "/dev/%s", 2167287499Smav be_lun->dev_path); 2168287499Smav free(be_lun->dev_path, M_CTLBLK); 2169287499Smav be_lun->dev_path = dev_name; 2170287499Smav goto again; 2171229997Sken } 2172229997Sken snprintf(req->error_str, sizeof(req->error_str), 2173272911Smav "error opening %s: %d", be_lun->dev_path, error); 2174229997Sken return (error); 2175229997Sken } 2176287499Smav if (flags & FWRITE) 2177287499Smav cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; 2178287499Smav else 2179287499Smav cbe_lun->flags |= CTL_LUN_FLAG_READONLY; 2180229997Sken 2181229997Sken NDFREE(&nd, NDF_ONLY_PNBUF); 2182229997Sken be_lun->vn = nd.ni_vp; 2183229997Sken 2184229997Sken /* We only support disks and files. */ 2185229997Sken if (vn_isdisk(be_lun->vn, &error)) { 2186229997Sken error = ctl_be_block_open_dev(be_lun, req); 2187229997Sken } else if (be_lun->vn->v_type == VREG) { 2188229997Sken error = ctl_be_block_open_file(be_lun, req); 2189229997Sken } else { 2190229997Sken error = EINVAL; 2191229997Sken snprintf(req->error_str, sizeof(req->error_str), 2192258871Strasz "%s is not a disk or plain file", be_lun->dev_path); 2193229997Sken } 2194229997Sken VOP_UNLOCK(be_lun->vn, 0); 2195229997Sken 2196286811Smav if (error != 0) 2197229997Sken ctl_be_block_close(be_lun); 2198287499Smav cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2199287499Smav if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2200287499Smav cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2201287499Smav value = ctl_get_opt(&cbe_lun->options, "serseq"); 2202287499Smav if (value != NULL && strcmp(value, "on") == 0) 2203287499Smav cbe_lun->serseq = CTL_LUN_SERSEQ_ON; 2204287499Smav else if (value != NULL && strcmp(value, "read") == 0) 2205287499Smav cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2206287499Smav else if (value != NULL && strcmp(value, "off") == 0) 2207287499Smav cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2208229997Sken return (0); 2209229997Sken} 2210229997Sken 2211229997Skenstatic int 2212229997Skenctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2213229997Sken{ 2214287499Smav struct ctl_be_lun *cbe_lun; 2215229997Sken struct ctl_be_block_lun *be_lun; 2216229997Sken struct ctl_lun_create_params *params; 2217267481Smav char num_thread_str[16]; 2218229997Sken char tmpstr[32]; 2219267481Smav char *value; 2220278672Smav int retval, num_threads; 2221267481Smav int tmp_num_threads; 2222229997Sken 2223229997Sken params = &req->reqdata.create; 2224229997Sken retval = 0; 2225272911Smav req->status = CTL_LUN_OK; 2226229997Sken 2227229997Sken be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2228287499Smav cbe_lun = &be_lun->cbe_lun; 2229287499Smav cbe_lun->be_lun = be_lun; 2230272911Smav be_lun->params = req->reqdata.create; 2231229997Sken be_lun->softc = softc; 2232229997Sken STAILQ_INIT(&be_lun->input_queue); 2233275474Smav STAILQ_INIT(&be_lun->config_read_queue); 2234229997Sken STAILQ_INIT(&be_lun->config_write_queue); 2235229997Sken STAILQ_INIT(&be_lun->datamove_queue); 2236361256Smav mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF); 2237361256Smav mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF); 2238287499Smav ctl_init_opts(&cbe_lun->options, 2239268280Smav req->num_be_args, req->kern_be_args); 2240361256Smav be_lun->lun_zone = uma_zcreate("ctlblock", CTLBLK_MAX_SEG, 2241256995Smav NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2242229997Sken if (be_lun->lun_zone == NULL) { 2243229997Sken snprintf(req->error_str, sizeof(req->error_str), 2244272911Smav "error allocating UMA zone"); 2245229997Sken goto bailout_error; 2246229997Sken } 2247229997Sken 2248229997Sken if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2249287499Smav cbe_lun->lun_type = params->device_type; 2250229997Sken else 2251287499Smav cbe_lun->lun_type = T_DIRECT; 2252361256Smav be_lun->flags = 0; 2253287621Smav cbe_lun->flags = 0; 2254287621Smav value = ctl_get_opt(&cbe_lun->options, "ha_role"); 2255287621Smav if (value != NULL) { 2256287621Smav if (strcmp(value, "primary") == 0) 2257287621Smav cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2258287621Smav } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2259287621Smav cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2260229997Sken 2261288310Smav if (cbe_lun->lun_type == T_DIRECT || 2262288310Smav cbe_lun->lun_type == T_CDROM) { 2263286811Smav be_lun->size_bytes = params->lun_size_bytes; 2264286811Smav if (params->blocksize_bytes != 0) 2265287499Smav cbe_lun->blocksize = params->blocksize_bytes; 2266288310Smav else if (cbe_lun->lun_type == T_CDROM) 2267288310Smav cbe_lun->blocksize = 2048; 2268286811Smav else 2269287499Smav cbe_lun->blocksize = 512; 2270287499Smav be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2271287499Smav cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2272287499Smav 0 : (be_lun->size_blocks - 1); 2273229997Sken 2274287621Smav if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2275287621Smav control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2276288348Smav retval = ctl_be_block_open(be_lun, req); 2277287621Smav if (retval != 0) { 2278287621Smav retval = 0; 2279287621Smav req->status = CTL_LUN_WARNING; 2280287621Smav } 2281229997Sken } 2282287499Smav num_threads = cbb_num_threads; 2283229997Sken } else { 2284229997Sken num_threads = 1; 2285229997Sken } 2286229997Sken 2287287499Smav value = ctl_get_opt(&cbe_lun->options, "num_threads"); 2288267481Smav if (value != NULL) { 2289267481Smav tmp_num_threads = strtol(value, NULL, 0); 2290229997Sken 2291267481Smav /* 2292267481Smav * We don't let the user specify less than one 2293267481Smav * thread, but hope he's clueful enough not to 2294267481Smav * specify 1000 threads. 2295267481Smav */ 2296267481Smav if (tmp_num_threads < 1) { 2297267481Smav snprintf(req->error_str, sizeof(req->error_str), 2298272911Smav "invalid number of threads %s", 2299272911Smav num_thread_str); 2300267481Smav goto bailout_error; 2301229997Sken } 2302267481Smav num_threads = tmp_num_threads; 2303229997Sken } 2304229997Sken 2305272911Smav if (be_lun->vn == NULL) 2306288348Smav cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2307229997Sken /* Tell the user the blocksize we ended up using */ 2308272911Smav params->lun_size_bytes = be_lun->size_bytes; 2309287499Smav params->blocksize_bytes = cbe_lun->blocksize; 2310229997Sken if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2311287499Smav cbe_lun->req_lun_id = params->req_lun_id; 2312287499Smav cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; 2313229997Sken } else 2314287499Smav cbe_lun->req_lun_id = 0; 2315229997Sken 2316287499Smav cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; 2317287499Smav cbe_lun->be = &ctl_be_block_driver; 2318229997Sken 2319229997Sken if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2320229997Sken snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d", 2321229997Sken softc->num_luns); 2322287499Smav strncpy((char *)cbe_lun->serial_num, tmpstr, 2323287499Smav MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); 2324229997Sken 2325229997Sken /* Tell the user what we used for a serial number */ 2326229997Sken strncpy((char *)params->serial_num, tmpstr, 2327275953Smav MIN(sizeof(params->serial_num), sizeof(tmpstr))); 2328229997Sken } else { 2329287499Smav strncpy((char *)cbe_lun->serial_num, params->serial_num, 2330287499Smav MIN(sizeof(cbe_lun->serial_num), 2331229997Sken sizeof(params->serial_num))); 2332229997Sken } 2333229997Sken if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2334229997Sken snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns); 2335287499Smav strncpy((char *)cbe_lun->device_id, tmpstr, 2336287499Smav MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); 2337229997Sken 2338229997Sken /* Tell the user what we used for a device ID */ 2339229997Sken strncpy((char *)params->device_id, tmpstr, 2340275953Smav MIN(sizeof(params->device_id), sizeof(tmpstr))); 2341229997Sken } else { 2342287499Smav strncpy((char *)cbe_lun->device_id, params->device_id, 2343287499Smav MIN(sizeof(cbe_lun->device_id), 2344275953Smav sizeof(params->device_id))); 2345229997Sken } 2346229997Sken 2347229997Sken TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2348229997Sken 2349361256Smav be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK, 2350229997Sken taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2351229997Sken 2352229997Sken if (be_lun->io_taskqueue == NULL) { 2353229997Sken snprintf(req->error_str, sizeof(req->error_str), 2354272911Smav "unable to create taskqueue"); 2355229997Sken goto bailout_error; 2356229997Sken } 2357229997Sken 2358229997Sken /* 2359229997Sken * Note that we start the same number of threads by default for 2360229997Sken * both the file case and the block device case. For the file 2361229997Sken * case, we need multiple threads to allow concurrency, because the 2362229997Sken * vnode interface is designed to be a blocking interface. For the 2363229997Sken * block device case, ZFS zvols at least will block the caller's 2364229997Sken * context in many instances, and so we need multiple threads to 2365229997Sken * overcome that problem. Other block devices don't need as many 2366229997Sken * threads, but they shouldn't cause too many problems. 2367229997Sken * 2368229997Sken * If the user wants to just have a single thread for a block 2369229997Sken * device, he can specify that when the LUN is created, or change 2370229997Sken * the tunable/sysctl to alter the default number of threads. 2371229997Sken */ 2372229997Sken retval = taskqueue_start_threads(&be_lun->io_taskqueue, 2373229997Sken /*num threads*/num_threads, 2374345007Smav /*priority*/PUSER, 2375361256Smav /*thread name*/"block"); 2376229997Sken 2377229997Sken if (retval != 0) 2378229997Sken goto bailout_error; 2379229997Sken 2380229997Sken be_lun->num_threads = num_threads; 2381229997Sken 2382287499Smav retval = ctl_add_lun(&be_lun->cbe_lun); 2383229997Sken if (retval != 0) { 2384229997Sken snprintf(req->error_str, sizeof(req->error_str), 2385272911Smav "ctl_add_lun() returned error %d, see dmesg for " 2386272911Smav "details", retval); 2387229997Sken retval = 0; 2388229997Sken goto bailout_error; 2389229997Sken } 2390229997Sken 2391361256Smav be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id, 2392287499Smav cbe_lun->blocksize, 2393229997Sken DEVSTAT_ALL_SUPPORTED, 2394287499Smav cbe_lun->lun_type 2395229997Sken | DEVSTAT_TYPE_IF_OTHER, 2396229997Sken DEVSTAT_PRIORITY_OTHER); 2397229997Sken 2398361256Smav mtx_lock(&softc->lock); 2399361256Smav softc->num_luns++; 2400361256Smav SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links); 2401361256Smav mtx_unlock(&softc->lock); 2402361256Smav 2403361256Smav params->req_lun_id = cbe_lun->lun_id; 2404361256Smav 2405229997Sken return (retval); 2406229997Sken 2407229997Skenbailout_error: 2408229997Sken req->status = CTL_LUN_ERROR; 2409229997Sken 2410267429Smav if (be_lun->io_taskqueue != NULL) 2411267429Smav taskqueue_free(be_lun->io_taskqueue); 2412229997Sken ctl_be_block_close(be_lun); 2413267429Smav if (be_lun->dev_path != NULL) 2414267429Smav free(be_lun->dev_path, M_CTLBLK); 2415267429Smav if (be_lun->lun_zone != NULL) 2416267429Smav uma_zdestroy(be_lun->lun_zone); 2417287499Smav ctl_free_opts(&cbe_lun->options); 2418267877Smav mtx_destroy(&be_lun->queue_lock); 2419267877Smav mtx_destroy(&be_lun->io_lock); 2420229997Sken free(be_lun, M_CTLBLK); 2421229997Sken 2422229997Sken return (retval); 2423229997Sken} 2424229997Sken 2425229997Skenstatic int 2426229997Skenctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2427229997Sken{ 2428229997Sken struct ctl_lun_rm_params *params; 2429229997Sken struct ctl_be_block_lun *be_lun; 2430287670Smav struct ctl_be_lun *cbe_lun; 2431229997Sken int retval; 2432229997Sken 2433229997Sken params = &req->reqdata.rm; 2434229997Sken 2435361256Smav sx_xlock(&softc->modify_lock); 2436229997Sken mtx_lock(&softc->lock); 2437361256Smav SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2438361256Smav if (be_lun->cbe_lun.lun_id == params->lun_id) { 2439361256Smav SLIST_REMOVE(&softc->lun_list, be_lun, 2440361256Smav ctl_be_block_lun, links); 2441361256Smav softc->num_luns--; 2442229997Sken break; 2443361256Smav } 2444229997Sken } 2445229997Sken mtx_unlock(&softc->lock); 2446361256Smav sx_xunlock(&softc->modify_lock); 2447229997Sken if (be_lun == NULL) { 2448229997Sken snprintf(req->error_str, sizeof(req->error_str), 2449272911Smav "LUN %u is not managed by the block backend", 2450272911Smav params->lun_id); 2451229997Sken goto bailout_error; 2452229997Sken } 2453287670Smav cbe_lun = &be_lun->cbe_lun; 2454229997Sken 2455287670Smav if (be_lun->vn != NULL) { 2456288348Smav cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2457288348Smav ctl_lun_no_media(cbe_lun); 2458287670Smav taskqueue_drain_all(be_lun->io_taskqueue); 2459287670Smav ctl_be_block_close(be_lun); 2460229997Sken } 2461229997Sken 2462361256Smav mtx_lock(&softc->lock); 2463361256Smav be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2464361256Smav mtx_unlock(&softc->lock); 2465361256Smav 2466361256Smav retval = ctl_remove_lun(cbe_lun); 2467229997Sken if (retval != 0) { 2468229997Sken snprintf(req->error_str, sizeof(req->error_str), 2469361256Smav "error %d returned from ctl_remove_lun() for " 2470272911Smav "LUN %d", retval, params->lun_id); 2471361256Smav mtx_lock(&softc->lock); 2472361256Smav be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2473361256Smav mtx_unlock(&softc->lock); 2474229997Sken goto bailout_error; 2475229997Sken } 2476229997Sken 2477229997Sken mtx_lock(&softc->lock); 2478229997Sken while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2479361256Smav retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0); 2480361256Smav if (retval == EINTR) 2481361256Smav break; 2482361256Smav } 2483229997Sken be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2484361256Smav if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2485229997Sken mtx_unlock(&softc->lock); 2486361256Smav free(be_lun, M_CTLBLK); 2487361256Smav } else { 2488361256Smav mtx_unlock(&softc->lock); 2489361256Smav return (EINTR); 2490229997Sken } 2491229997Sken 2492229997Sken req->status = CTL_LUN_OK; 2493229997Sken return (0); 2494229997Sken 2495229997Skenbailout_error: 2496229997Sken req->status = CTL_LUN_ERROR; 2497229997Sken return (0); 2498229997Sken} 2499229997Sken 2500232604Straszstatic int 2501232604Straszctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2502232604Strasz{ 2503232604Strasz struct ctl_lun_modify_params *params; 2504232604Strasz struct ctl_be_block_lun *be_lun; 2505287500Smav struct ctl_be_lun *cbe_lun; 2506287621Smav char *value; 2507271794Smav uint64_t oldsize; 2508287621Smav int error, wasprim; 2509232604Strasz 2510232604Strasz params = &req->reqdata.modify; 2511232604Strasz 2512361256Smav sx_xlock(&softc->modify_lock); 2513232604Strasz mtx_lock(&softc->lock); 2514361256Smav SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2515287499Smav if (be_lun->cbe_lun.lun_id == params->lun_id) 2516232604Strasz break; 2517232604Strasz } 2518232604Strasz mtx_unlock(&softc->lock); 2519232604Strasz if (be_lun == NULL) { 2520232604Strasz snprintf(req->error_str, sizeof(req->error_str), 2521272911Smav "LUN %u is not managed by the block backend", 2522272911Smav params->lun_id); 2523232604Strasz goto bailout_error; 2524232604Strasz } 2525287500Smav cbe_lun = &be_lun->cbe_lun; 2526232604Strasz 2527287500Smav if (params->lun_size_bytes != 0) 2528287500Smav be_lun->params.lun_size_bytes = params->lun_size_bytes; 2529287500Smav ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args); 2530232604Strasz 2531287621Smav wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); 2532287621Smav value = ctl_get_opt(&cbe_lun->options, "ha_role"); 2533287621Smav if (value != NULL) { 2534287621Smav if (strcmp(value, "primary") == 0) 2535287621Smav cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2536287621Smav else 2537287621Smav cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2538287621Smav } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2539287621Smav cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2540232604Strasz else 2541287621Smav cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2542287621Smav if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { 2543287621Smav if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) 2544287621Smav ctl_lun_primary(cbe_lun); 2545287621Smav else 2546287621Smav ctl_lun_secondary(cbe_lun); 2547287621Smav } 2548232604Strasz 2549287621Smav oldsize = be_lun->size_blocks; 2550287621Smav if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2551287621Smav control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2552287621Smav if (be_lun->vn == NULL) 2553288348Smav error = ctl_be_block_open(be_lun, req); 2554287621Smav else if (vn_isdisk(be_lun->vn, &error)) 2555288104Smav error = ctl_be_block_open_dev(be_lun, req); 2556289017Smav else if (be_lun->vn->v_type == VREG) { 2557289017Smav vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2558288104Smav error = ctl_be_block_open_file(be_lun, req); 2559289017Smav VOP_UNLOCK(be_lun->vn, 0); 2560289017Smav } else 2561287621Smav error = EINVAL; 2562288348Smav if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) && 2563287621Smav be_lun->vn != NULL) { 2564288348Smav cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2565288348Smav ctl_lun_has_media(cbe_lun); 2566288348Smav } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 && 2567288348Smav be_lun->vn == NULL) { 2568288348Smav cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2569288348Smav ctl_lun_no_media(cbe_lun); 2570287621Smav } 2571288348Smav cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2572287621Smav } else { 2573287621Smav if (be_lun->vn != NULL) { 2574288348Smav cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2575288348Smav ctl_lun_no_media(cbe_lun); 2576287670Smav taskqueue_drain_all(be_lun->io_taskqueue); 2577287621Smav error = ctl_be_block_close(be_lun); 2578287621Smav } else 2579287621Smav error = 0; 2580287621Smav } 2581287499Smav if (be_lun->size_blocks != oldsize) 2582287500Smav ctl_lun_capacity_changed(cbe_lun); 2583232604Strasz 2584232604Strasz /* Tell the user the exact size we ended up using */ 2585232604Strasz params->lun_size_bytes = be_lun->size_bytes; 2586232604Strasz 2587361256Smav sx_xunlock(&softc->modify_lock); 2588272911Smav req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2589232604Strasz return (0); 2590232604Strasz 2591232604Straszbailout_error: 2592361256Smav sx_xunlock(&softc->modify_lock); 2593232604Strasz req->status = CTL_LUN_ERROR; 2594232604Strasz return (0); 2595232604Strasz} 2596232604Strasz 2597229997Skenstatic void 2598361256Smavctl_be_block_lun_shutdown(void *lun) 2599229997Sken{ 2600361256Smav struct ctl_be_block_lun *be_lun = lun; 2601361256Smav struct ctl_be_block_softc *softc = be_lun->softc; 2602229997Sken 2603361256Smav taskqueue_drain_all(be_lun->io_taskqueue); 2604361256Smav taskqueue_free(be_lun->io_taskqueue); 2605361256Smav if (be_lun->disk_stats != NULL) 2606361256Smav devstat_remove_entry(be_lun->disk_stats); 2607361256Smav uma_zdestroy(be_lun->lun_zone); 2608361256Smav ctl_free_opts(&be_lun->cbe_lun.options); 2609361256Smav free(be_lun->dev_path, M_CTLBLK); 2610361256Smav mtx_destroy(&be_lun->queue_lock); 2611361256Smav mtx_destroy(&be_lun->io_lock); 2612229997Sken 2613229997Sken mtx_lock(&softc->lock); 2614361256Smav be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2615361256Smav if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2616361256Smav wakeup(be_lun); 2617361256Smav else 2618361256Smav free(be_lun, M_CTLBLK); 2619229997Sken mtx_unlock(&softc->lock); 2620229997Sken} 2621229997Sken 2622229997Skenstatic int 2623229997Skenctl_be_block_config_write(union ctl_io *io) 2624229997Sken{ 2625229997Sken struct ctl_be_block_lun *be_lun; 2626287499Smav struct ctl_be_lun *cbe_lun; 2627229997Sken int retval; 2628229997Sken 2629229997Sken DPRINTF("entered\n"); 2630229997Sken 2631312834Smav cbe_lun = CTL_BACKEND_LUN(io); 2632287499Smav be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; 2633229997Sken 2634288220Smav retval = 0; 2635229997Sken switch (io->scsiio.cdb[0]) { 2636229997Sken case SYNCHRONIZE_CACHE: 2637229997Sken case SYNCHRONIZE_CACHE_16: 2638264274Smav case WRITE_SAME_10: 2639264274Smav case WRITE_SAME_16: 2640264274Smav case UNMAP: 2641229997Sken /* 2642229997Sken * The upper level CTL code will filter out any CDBs with 2643229997Sken * the immediate bit set and return the proper error. 2644229997Sken * 2645229997Sken * We don't really need to worry about what LBA range the 2646229997Sken * user asked to be synced out. When they issue a sync 2647229997Sken * cache command, we'll sync out the whole thing. 2648229997Sken */ 2649267877Smav mtx_lock(&be_lun->queue_lock); 2650229997Sken STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2651229997Sken links); 2652267877Smav mtx_unlock(&be_lun->queue_lock); 2653229997Sken taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2654229997Sken break; 2655229997Sken case START_STOP_UNIT: { 2656229997Sken struct scsi_start_stop_unit *cdb; 2657288348Smav struct ctl_lun_req req; 2658229997Sken 2659229997Sken cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2660288369Smav if ((cdb->how & SSS_PC_MASK) != 0) { 2661288369Smav ctl_set_success(&io->scsiio); 2662288369Smav ctl_config_write_done(io); 2663288369Smav break; 2664288369Smav } 2665288348Smav if (cdb->how & SSS_START) { 2666288348Smav if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) { 2667288348Smav retval = ctl_be_block_open(be_lun, &req); 2668288348Smav cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2669288348Smav if (retval == 0) { 2670288348Smav cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2671288348Smav ctl_lun_has_media(cbe_lun); 2672288348Smav } else { 2673288348Smav cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2674288348Smav ctl_lun_no_media(cbe_lun); 2675288348Smav } 2676288348Smav } 2677288348Smav ctl_start_lun(cbe_lun); 2678229997Sken } else { 2679288348Smav ctl_stop_lun(cbe_lun); 2680288348Smav if (cdb->how & SSS_LOEJ) { 2681288348Smav cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2682288348Smav cbe_lun->flags |= CTL_LUN_FLAG_EJECTED; 2683288348Smav ctl_lun_ejected(cbe_lun); 2684288348Smav if (be_lun->vn != NULL) 2685288348Smav ctl_be_block_close(be_lun); 2686288348Smav } 2687229997Sken } 2688288348Smav 2689288348Smav ctl_set_success(&io->scsiio); 2690229997Sken ctl_config_write_done(io); 2691229997Sken break; 2692229997Sken } 2693288310Smav case PREVENT_ALLOW: 2694288310Smav ctl_set_success(&io->scsiio); 2695288310Smav ctl_config_write_done(io); 2696288310Smav break; 2697229997Sken default: 2698229997Sken ctl_set_invalid_opcode(&io->scsiio); 2699229997Sken ctl_config_write_done(io); 2700229997Sken retval = CTL_RETVAL_COMPLETE; 2701229997Sken break; 2702229997Sken } 2703229997Sken 2704229997Sken return (retval); 2705229997Sken} 2706229997Sken 2707229997Skenstatic int 2708229997Skenctl_be_block_config_read(union ctl_io *io) 2709229997Sken{ 2710275474Smav struct ctl_be_block_lun *be_lun; 2711287499Smav struct ctl_be_lun *cbe_lun; 2712275474Smav int retval = 0; 2713275474Smav 2714275474Smav DPRINTF("entered\n"); 2715275474Smav 2716312834Smav cbe_lun = CTL_BACKEND_LUN(io); 2717287499Smav be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; 2718275474Smav 2719275474Smav switch (io->scsiio.cdb[0]) { 2720275474Smav case SERVICE_ACTION_IN: 2721275474Smav if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 2722275474Smav mtx_lock(&be_lun->queue_lock); 2723275474Smav STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 2724275474Smav &io->io_hdr, links); 2725275474Smav mtx_unlock(&be_lun->queue_lock); 2726275474Smav taskqueue_enqueue(be_lun->io_taskqueue, 2727275474Smav &be_lun->io_task); 2728275474Smav retval = CTL_RETVAL_QUEUED; 2729275474Smav break; 2730275474Smav } 2731275474Smav ctl_set_invalid_field(&io->scsiio, 2732275474Smav /*sks_valid*/ 1, 2733275474Smav /*command*/ 1, 2734275474Smav /*field*/ 1, 2735275474Smav /*bit_valid*/ 1, 2736275474Smav /*bit*/ 4); 2737275474Smav ctl_config_read_done(io); 2738275474Smav retval = CTL_RETVAL_COMPLETE; 2739275474Smav break; 2740275474Smav default: 2741275474Smav ctl_set_invalid_opcode(&io->scsiio); 2742275474Smav ctl_config_read_done(io); 2743275474Smav retval = CTL_RETVAL_COMPLETE; 2744275474Smav break; 2745275474Smav } 2746275474Smav 2747275474Smav return (retval); 2748229997Sken} 2749229997Sken 2750229997Skenstatic int 2751229997Skenctl_be_block_lun_info(void *be_lun, struct sbuf *sb) 2752229997Sken{ 2753229997Sken struct ctl_be_block_lun *lun; 2754229997Sken int retval; 2755229997Sken 2756229997Sken lun = (struct ctl_be_block_lun *)be_lun; 2757229997Sken 2758268283Smav retval = sbuf_printf(sb, "\t<num_threads>"); 2759229997Sken if (retval != 0) 2760229997Sken goto bailout; 2761229997Sken retval = sbuf_printf(sb, "%d", lun->num_threads); 2762229997Sken if (retval != 0) 2763229997Sken goto bailout; 2764268283Smav retval = sbuf_printf(sb, "</num_threads>\n"); 2765229997Sken 2766229997Skenbailout: 2767229997Sken return (retval); 2768229997Sken} 2769229997Sken 2770274154Smavstatic uint64_t 2771274154Smavctl_be_block_lun_attr(void *be_lun, const char *attrname) 2772274154Smav{ 2773274154Smav struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun; 2774274154Smav 2775274154Smav if (lun->getattr == NULL) 2776274154Smav return (UINT64_MAX); 2777274154Smav return (lun->getattr(lun, attrname)); 2778274154Smav} 2779274154Smav 2780313368Smavstatic int 2781229997Skenctl_be_block_init(void) 2782229997Sken{ 2783313368Smav struct ctl_be_block_softc *softc = &backend_block_softc; 2784229997Sken 2785361256Smav sx_init(&softc->modify_lock, "ctlblock modify"); 2786267877Smav mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2787313368Smav softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2788264020Strasz NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2789361256Smav SLIST_INIT(&softc->lun_list); 2790313368Smav return (0); 2791313368Smav} 2792229997Sken 2793313368Smav 2794313368Smavstatic int 2795313368Smavctl_be_block_shutdown(void) 2796313368Smav{ 2797313368Smav struct ctl_be_block_softc *softc = &backend_block_softc; 2798361256Smav struct ctl_be_block_lun *lun; 2799313368Smav 2800313368Smav mtx_lock(&softc->lock); 2801361256Smav while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) { 2802361256Smav SLIST_REMOVE_HEAD(&softc->lun_list, links); 2803361256Smav softc->num_luns--; 2804313368Smav /* 2805361256Smav * Drop our lock here. Since ctl_remove_lun() can call 2806313368Smav * back into us, this could potentially lead to a recursive 2807313368Smav * lock of the same mutex, which would cause a hang. 2808313368Smav */ 2809313368Smav mtx_unlock(&softc->lock); 2810361256Smav ctl_remove_lun(&lun->cbe_lun); 2811313368Smav mtx_lock(&softc->lock); 2812313368Smav } 2813313368Smav mtx_unlock(&softc->lock); 2814313368Smav uma_zdestroy(softc->beio_zone); 2815313368Smav mtx_destroy(&softc->lock); 2816361256Smav sx_destroy(&softc->modify_lock); 2817313368Smav return (0); 2818229997Sken} 2819