1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2003 Silicon Graphics International Corp. 5 * Copyright (c) 2009-2011 Spectra Logic Corporation 6 * Copyright (c) 2012 The FreeBSD Foundation 7 * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Portions of this software were developed by Edward Tomasz Napierala 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions, and the following disclaimer, 18 * without modification. 19 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 20 * substantially similar to the "NO WARRANTY" disclaimer below 21 * ("Disclaimer") and any redistribution must be conditioned upon 22 * including a substantially similar Disclaimer requirement for further 23 * binary redistribution. 24 * 25 * NO WARRANTY 26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 34 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 35 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGES. 37 * 38 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 39 */ 40/* 41 * CAM Target Layer driver backend for block devices. 42 * 43 * Author: Ken Merry <ken@FreeBSD.org> 44 */ 45#include <sys/cdefs.h> 46__FBSDID("$FreeBSD$"); 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/kernel.h> 51#include <sys/types.h> 52#include <sys/kthread.h> 53#include <sys/bio.h> 54#include <sys/fcntl.h> 55#include <sys/limits.h> 56#include <sys/lock.h> 57#include <sys/mutex.h> 58#include <sys/condvar.h> 59#include <sys/malloc.h> 60#include <sys/conf.h> 61#include <sys/ioccom.h> 62#include <sys/queue.h> 63#include <sys/sbuf.h> 64#include <sys/endian.h> 65#include <sys/uio.h> 66#include <sys/buf.h> 67#include <sys/taskqueue.h> 68#include <sys/vnode.h> 69#include <sys/namei.h> 70#include <sys/mount.h> 71#include <sys/disk.h> 72#include <sys/fcntl.h> 73#include <sys/filedesc.h> 74#include <sys/filio.h> 75#include <sys/proc.h> 76#include <sys/pcpu.h> 77#include <sys/module.h> 78#include <sys/sdt.h> 79#include <sys/devicestat.h> 80#include <sys/sysctl.h> 81#include <sys/nv.h> 82#include <sys/dnv.h> 83#include <sys/sx.h> 84 85#include <geom/geom.h> 86 87#include <cam/cam.h> 88#include <cam/scsi/scsi_all.h> 89#include <cam/scsi/scsi_da.h> 90#include <cam/ctl/ctl_io.h> 91#include <cam/ctl/ctl.h> 92#include <cam/ctl/ctl_backend.h> 93#include <cam/ctl/ctl_ioctl.h> 94#include <cam/ctl/ctl_ha.h> 95#include <cam/ctl/ctl_scsi_all.h> 96#include <cam/ctl/ctl_private.h> 97#include <cam/ctl/ctl_error.h> 98 99/* 100 * The idea here is that we'll allocate enough S/G space to hold a 1MB 101 * I/O. If we get an I/O larger than that, we'll split it. 102 */ 103#define CTLBLK_HALF_IO_SIZE (512 * 1024) 104#define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) 105#define CTLBLK_MAX_SEG MIN(CTLBLK_HALF_IO_SIZE, MAXPHYS) 106#define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) 107#define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) 108 109#ifdef CTLBLK_DEBUG 110#define DPRINTF(fmt, args...) \ 111 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 112#else 113#define DPRINTF(fmt, args...) do {} while(0) 114#endif 115 116#define PRIV(io) \ 117 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 118#define ARGS(io) \ 119 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 120 121SDT_PROVIDER_DEFINE(cbb); 122 123typedef enum { 124 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 125 CTL_BE_BLOCK_LUN_WAITING = 0x04, 126} ctl_be_block_lun_flags; 127 128typedef enum { 129 CTL_BE_BLOCK_NONE, 130 CTL_BE_BLOCK_DEV, 131 CTL_BE_BLOCK_FILE 132} ctl_be_block_type; 133 134struct ctl_be_block_filedata { 135 struct ucred *cred; 136}; 137 138union ctl_be_block_bedata { 139 struct ctl_be_block_filedata file; 140}; 141 142struct ctl_be_block_io; 143struct ctl_be_block_lun; 144 145typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 146 struct ctl_be_block_io *beio); 147typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 148 const char *attrname); 149 150/* 151 * Backend LUN structure. There is a 1:1 mapping between a block device 152 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 153 */ 154struct ctl_be_block_lun { 155 struct ctl_be_lun cbe_lun; /* Must be first element. */ 156 struct ctl_lun_create_params params; 157 char *dev_path; 158 ctl_be_block_type dev_type; 159 struct vnode *vn; 160 union ctl_be_block_bedata backend; 161 cbb_dispatch_t dispatch; 162 cbb_dispatch_t lun_flush; 163 cbb_dispatch_t unmap; 164 cbb_dispatch_t get_lba_status; 165 cbb_getattr_t getattr; 166 uint64_t size_blocks; 167 uint64_t size_bytes; 168 struct ctl_be_block_softc *softc; 169 struct devstat *disk_stats; 170 ctl_be_block_lun_flags flags; 171 SLIST_ENTRY(ctl_be_block_lun) links; 172 struct taskqueue *io_taskqueue; 173 struct task io_task; 174 int num_threads; 175 STAILQ_HEAD(, ctl_io_hdr) input_queue; 176 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 177 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 178 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 179 struct mtx_padalign io_lock; 180 struct mtx_padalign queue_lock; 181}; 182 183/* 184 * Overall softc structure for the block backend module. 185 */ 186struct ctl_be_block_softc { 187 struct sx modify_lock; 188 struct mtx lock; 189 int num_luns; 190 SLIST_HEAD(, ctl_be_block_lun) lun_list; 191 uma_zone_t beio_zone; 192 uma_zone_t buf_zone; 193#if (CTLBLK_MAX_SEG > 131072) 194 uma_zone_t buf128_zone; 195#endif 196}; 197 198static struct ctl_be_block_softc backend_block_softc; 199 200/* 201 * Per-I/O information. 202 */ 203struct ctl_be_block_io { 204 union ctl_io *io; 205 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 206 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 207 int bio_cmd; 208 int two_sglists; 209 int num_segs; 210 int num_bios_sent; 211 int num_bios_done; 212 int send_complete; 213 int first_error; 214 uint64_t first_error_offset; 215 struct bintime ds_t0; 216 devstat_tag_type ds_tag_type; 217 devstat_trans_flags ds_trans_type; 218 uint64_t io_len; 219 uint64_t io_offset; 220 int io_arg; 221 struct ctl_be_block_softc *softc; 222 struct ctl_be_block_lun *lun; 223 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 224}; 225 226extern struct ctl_softc *control_softc; 227 228static int cbb_num_threads = 32; 229SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0, 230 "CAM Target Layer Block Backend"); 231SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 232 &cbb_num_threads, 0, "Number of threads per backing file"); 233 234static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 235static void ctl_free_beio(struct ctl_be_block_io *beio); 236static void ctl_complete_beio(struct ctl_be_block_io *beio); 237static int ctl_be_block_move_done(union ctl_io *io, bool samethr); 238static void ctl_be_block_biodone(struct bio *bio); 239static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 240 struct ctl_be_block_io *beio); 241static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 242 struct ctl_be_block_io *beio); 243static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 244 struct ctl_be_block_io *beio); 245static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 246 const char *attrname); 247static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 248 struct ctl_be_block_io *beio); 249static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 250 struct ctl_be_block_io *beio); 251static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 252 struct ctl_be_block_io *beio); 253static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 254 const char *attrname); 255static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 256 union ctl_io *io); 257static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 258 union ctl_io *io); 259static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 260 union ctl_io *io); 261static void ctl_be_block_worker(void *context, int pending); 262static int ctl_be_block_submit(union ctl_io *io); 263static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 264 int flag, struct thread *td); 265static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 266 struct ctl_lun_req *req); 267static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 268 struct ctl_lun_req *req); 269static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 270static int ctl_be_block_open(struct ctl_be_block_lun *be_lun, 271 struct ctl_lun_req *req); 272static int ctl_be_block_create(struct ctl_be_block_softc *softc, 273 struct ctl_lun_req *req); 274static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 275 struct ctl_lun_req *req); 276static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 277 struct ctl_lun_req *req); 278static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun); 279static int ctl_be_block_config_write(union ctl_io *io); 280static int ctl_be_block_config_read(union ctl_io *io); 281static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb); 282static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname); 283static int ctl_be_block_init(void); 284static int ctl_be_block_shutdown(void); 285 286static struct ctl_backend_driver ctl_be_block_driver = 287{ 288 .name = "block", 289 .flags = CTL_BE_FLAG_HAS_CONFIG, 290 .init = ctl_be_block_init, 291 .shutdown = ctl_be_block_shutdown, 292 .data_submit = ctl_be_block_submit, 293 .config_read = ctl_be_block_config_read, 294 .config_write = ctl_be_block_config_write, 295 .ioctl = ctl_be_block_ioctl, 296 .lun_info = ctl_be_block_lun_info, 297 .lun_attr = ctl_be_block_lun_attr 298}; 299 300MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend"); 301CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 302 303static void 304ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg, 305 size_t len) 306{ 307 308#if (CTLBLK_MAX_SEG > 131072) 309 if (len <= 131072) 310 sg->addr = uma_zalloc(softc->buf128_zone, M_WAITOK); 311 else 312#endif 313 sg->addr = uma_zalloc(softc->buf_zone, M_WAITOK); 314 sg->len = len; 315} 316 317static void 318ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg) 319{ 320 321#if (CTLBLK_MAX_SEG > 131072) 322 if (sg->len <= 131072) 323 uma_zfree(softc->buf128_zone, sg->addr); 324 else 325#endif 326 uma_zfree(softc->buf_zone, sg->addr); 327} 328 329static struct ctl_be_block_io * 330ctl_alloc_beio(struct ctl_be_block_softc *softc) 331{ 332 struct ctl_be_block_io *beio; 333 334 beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO); 335 beio->softc = softc; 336 return (beio); 337} 338 339static void 340ctl_free_beio(struct ctl_be_block_io *beio) 341{ 342 struct ctl_be_block_softc *softc = beio->softc; 343 int i; 344 345 for (i = 0; i < beio->num_segs; i++) { 346 ctl_free_seg(softc, &beio->sg_segs[i]); 347 348 /* For compare we had two equal S/G lists. */ 349 if (beio->two_sglists) { 350 ctl_free_seg(softc, 351 &beio->sg_segs[i + CTLBLK_HALF_SEGS]); 352 } 353 } 354 355 uma_zfree(softc->beio_zone, beio); 356} 357 358static void 359ctl_complete_beio(struct ctl_be_block_io *beio) 360{ 361 union ctl_io *io = beio->io; 362 363 if (beio->beio_cont != NULL) { 364 beio->beio_cont(beio); 365 } else { 366 ctl_free_beio(beio); 367 ctl_data_submit_done(io); 368 } 369} 370 371static size_t 372cmp(uint8_t *a, uint8_t *b, size_t size) 373{ 374 size_t i; 375 376 for (i = 0; i < size; i++) { 377 if (a[i] != b[i]) 378 break; 379 } 380 return (i); 381} 382 383static void 384ctl_be_block_compare(union ctl_io *io) 385{ 386 struct ctl_be_block_io *beio; 387 uint64_t off, res; 388 int i; 389 uint8_t info[8]; 390 391 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 392 off = 0; 393 for (i = 0; i < beio->num_segs; i++) { 394 res = cmp(beio->sg_segs[i].addr, 395 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 396 beio->sg_segs[i].len); 397 off += res; 398 if (res < beio->sg_segs[i].len) 399 break; 400 } 401 if (i < beio->num_segs) { 402 scsi_u64to8b(off, info); 403 ctl_set_sense(&io->scsiio, /*current_error*/ 1, 404 /*sense_key*/ SSD_KEY_MISCOMPARE, 405 /*asc*/ 0x1D, /*ascq*/ 0x00, 406 /*type*/ SSD_ELEM_INFO, 407 /*size*/ sizeof(info), /*data*/ &info, 408 /*type*/ SSD_ELEM_NONE); 409 } else 410 ctl_set_success(&io->scsiio); 411} 412 413static int 414ctl_be_block_move_done(union ctl_io *io, bool samethr) 415{ 416 struct ctl_be_block_io *beio; 417 struct ctl_be_block_lun *be_lun; 418 struct ctl_lba_len_flags *lbalen; 419 420 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 421 422 DPRINTF("entered\n"); 423 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 424 425 /* 426 * We set status at this point for read and compare commands. 427 */ 428 if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && 429 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) { 430 lbalen = ARGS(io); 431 if (lbalen->flags & CTL_LLF_READ) { 432 ctl_set_success(&io->scsiio); 433 } else if (lbalen->flags & CTL_LLF_COMPARE) { 434 /* We have two data blocks ready for comparison. */ 435 ctl_be_block_compare(io); 436 } 437 } 438 439 /* 440 * If this is a read, or a write with errors, it is done. 441 */ 442 if ((beio->bio_cmd == BIO_READ) 443 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 444 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 445 ctl_complete_beio(beio); 446 return (0); 447 } 448 449 /* 450 * At this point, we have a write and the DMA completed successfully. 451 * If we were called synchronously in the original thread then just 452 * dispatch, otherwise we now have to queue it to the task queue to 453 * execute the backend I/O. That is because we do blocking 454 * memory allocations, and in the file backing case, blocking I/O. 455 * This move done routine is generally called in the SIM's 456 * interrupt context, and therefore we cannot block. 457 */ 458 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 459 if (samethr) { 460 be_lun->dispatch(be_lun, beio); 461 } else { 462 mtx_lock(&be_lun->queue_lock); 463 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 464 mtx_unlock(&be_lun->queue_lock); 465 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 466 } 467 return (0); 468} 469 470static void 471ctl_be_block_biodone(struct bio *bio) 472{ 473 struct ctl_be_block_io *beio; 474 struct ctl_be_block_lun *be_lun; 475 union ctl_io *io; 476 int error; 477 478 beio = bio->bio_caller1; 479 be_lun = beio->lun; 480 io = beio->io; 481 482 DPRINTF("entered\n"); 483 484 error = bio->bio_error; 485 mtx_lock(&be_lun->io_lock); 486 if (error != 0 && 487 (beio->first_error == 0 || 488 bio->bio_offset < beio->first_error_offset)) { 489 beio->first_error = error; 490 beio->first_error_offset = bio->bio_offset; 491 } 492 493 beio->num_bios_done++; 494 495 /* 496 * XXX KDM will this cause WITNESS to complain? Holding a lock 497 * during the free might cause it to complain. 498 */ 499 g_destroy_bio(bio); 500 501 /* 502 * If the send complete bit isn't set, or we aren't the last I/O to 503 * complete, then we're done. 504 */ 505 if ((beio->send_complete == 0) 506 || (beio->num_bios_done < beio->num_bios_sent)) { 507 mtx_unlock(&be_lun->io_lock); 508 return; 509 } 510 511 /* 512 * At this point, we've verified that we are the last I/O to 513 * complete, so it's safe to drop the lock. 514 */ 515 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 516 beio->ds_tag_type, beio->ds_trans_type, 517 /*now*/ NULL, /*then*/&beio->ds_t0); 518 mtx_unlock(&be_lun->io_lock); 519 520 /* 521 * If there are any errors from the backing device, we fail the 522 * entire I/O with a medium error. 523 */ 524 error = beio->first_error; 525 if (error != 0) { 526 if (error == EOPNOTSUPP) { 527 ctl_set_invalid_opcode(&io->scsiio); 528 } else if (error == ENOSPC || error == EDQUOT) { 529 ctl_set_space_alloc_fail(&io->scsiio); 530 } else if (error == EROFS || error == EACCES) { 531 ctl_set_hw_write_protected(&io->scsiio); 532 } else if (beio->bio_cmd == BIO_FLUSH) { 533 /* XXX KDM is there is a better error here? */ 534 ctl_set_internal_failure(&io->scsiio, 535 /*sks_valid*/ 1, 536 /*retry_count*/ 0xbad2); 537 } else { 538 ctl_set_medium_error(&io->scsiio, 539 beio->bio_cmd == BIO_READ); 540 } 541 ctl_complete_beio(beio); 542 return; 543 } 544 545 /* 546 * If this is a write, a flush, a delete or verify, we're all done. 547 * If this is a read, we can now send the data to the user. 548 */ 549 if ((beio->bio_cmd == BIO_WRITE) 550 || (beio->bio_cmd == BIO_FLUSH) 551 || (beio->bio_cmd == BIO_DELETE) 552 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 553 ctl_set_success(&io->scsiio); 554 ctl_complete_beio(beio); 555 } else { 556 if ((ARGS(io)->flags & CTL_LLF_READ) && 557 beio->beio_cont == NULL) { 558 ctl_set_success(&io->scsiio); 559 ctl_serseq_done(io); 560 } 561 ctl_datamove(io); 562 } 563} 564 565static void 566ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 567 struct ctl_be_block_io *beio) 568{ 569 union ctl_io *io = beio->io; 570 struct mount *mountpoint; 571 int error, lock_flags; 572 573 DPRINTF("entered\n"); 574 575 binuptime(&beio->ds_t0); 576 mtx_lock(&be_lun->io_lock); 577 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 578 mtx_unlock(&be_lun->io_lock); 579 580 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 581 582 if (MNT_SHARED_WRITES(mountpoint) || 583 ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 584 lock_flags = LK_SHARED; 585 else 586 lock_flags = LK_EXCLUSIVE; 587 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 588 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, 589 curthread); 590 VOP_UNLOCK(be_lun->vn, 0); 591 592 vn_finished_write(mountpoint); 593 594 mtx_lock(&be_lun->io_lock); 595 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 596 beio->ds_tag_type, beio->ds_trans_type, 597 /*now*/ NULL, /*then*/&beio->ds_t0); 598 mtx_unlock(&be_lun->io_lock); 599 600 if (error == 0) 601 ctl_set_success(&io->scsiio); 602 else { 603 /* XXX KDM is there is a better error here? */ 604 ctl_set_internal_failure(&io->scsiio, 605 /*sks_valid*/ 1, 606 /*retry_count*/ 0xbad1); 607 } 608 609 ctl_complete_beio(beio); 610} 611 612SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); 613SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); 614SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); 615SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); 616 617static void 618ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 619 struct ctl_be_block_io *beio) 620{ 621 struct ctl_be_block_filedata *file_data; 622 union ctl_io *io; 623 struct uio xuio; 624 struct iovec *xiovec; 625 size_t s; 626 int error, flags, i; 627 628 DPRINTF("entered\n"); 629 630 file_data = &be_lun->backend.file; 631 io = beio->io; 632 flags = 0; 633 if (ARGS(io)->flags & CTL_LLF_DPO) 634 flags |= IO_DIRECT; 635 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 636 flags |= IO_SYNC; 637 638 bzero(&xuio, sizeof(xuio)); 639 if (beio->bio_cmd == BIO_READ) { 640 SDT_PROBE0(cbb, , read, file_start); 641 xuio.uio_rw = UIO_READ; 642 } else { 643 SDT_PROBE0(cbb, , write, file_start); 644 xuio.uio_rw = UIO_WRITE; 645 } 646 xuio.uio_offset = beio->io_offset; 647 xuio.uio_resid = beio->io_len; 648 xuio.uio_segflg = UIO_SYSSPACE; 649 xuio.uio_iov = beio->xiovecs; 650 xuio.uio_iovcnt = beio->num_segs; 651 xuio.uio_td = curthread; 652 653 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 654 xiovec->iov_base = beio->sg_segs[i].addr; 655 xiovec->iov_len = beio->sg_segs[i].len; 656 } 657 658 binuptime(&beio->ds_t0); 659 mtx_lock(&be_lun->io_lock); 660 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 661 mtx_unlock(&be_lun->io_lock); 662 663 if (beio->bio_cmd == BIO_READ) { 664 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 665 666 /* 667 * UFS pays attention to IO_DIRECT for reads. If the 668 * DIRECTIO option is configured into the kernel, it calls 669 * ffs_rawread(). But that only works for single-segment 670 * uios with user space addresses. In our case, with a 671 * kernel uio, it still reads into the buffer cache, but it 672 * will just try to release the buffer from the cache later 673 * on in ffs_read(). 674 * 675 * ZFS does not pay attention to IO_DIRECT for reads. 676 * 677 * UFS does not pay attention to IO_SYNC for reads. 678 * 679 * ZFS pays attention to IO_SYNC (which translates into the 680 * Solaris define FRSYNC for zfs_read()) for reads. It 681 * attempts to sync the file before reading. 682 */ 683 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 684 685 VOP_UNLOCK(be_lun->vn, 0); 686 SDT_PROBE0(cbb, , read, file_done); 687 if (error == 0 && xuio.uio_resid > 0) { 688 /* 689 * If we red less then requested (EOF), then 690 * we should clean the rest of the buffer. 691 */ 692 s = beio->io_len - xuio.uio_resid; 693 for (i = 0; i < beio->num_segs; i++) { 694 if (s >= beio->sg_segs[i].len) { 695 s -= beio->sg_segs[i].len; 696 continue; 697 } 698 bzero((uint8_t *)beio->sg_segs[i].addr + s, 699 beio->sg_segs[i].len - s); 700 s = 0; 701 } 702 } 703 } else { 704 struct mount *mountpoint; 705 int lock_flags; 706 707 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 708 709 if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL) 710 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 711 lock_flags = LK_SHARED; 712 else 713 lock_flags = LK_EXCLUSIVE; 714 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 715 716 /* 717 * UFS pays attention to IO_DIRECT for writes. The write 718 * is done asynchronously. (Normally the write would just 719 * get put into cache. 720 * 721 * UFS pays attention to IO_SYNC for writes. It will 722 * attempt to write the buffer out synchronously if that 723 * flag is set. 724 * 725 * ZFS does not pay attention to IO_DIRECT for writes. 726 * 727 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 728 * for writes. It will flush the transaction from the 729 * cache before returning. 730 */ 731 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 732 VOP_UNLOCK(be_lun->vn, 0); 733 734 vn_finished_write(mountpoint); 735 SDT_PROBE0(cbb, , write, file_done); 736 } 737 738 mtx_lock(&be_lun->io_lock); 739 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 740 beio->ds_tag_type, beio->ds_trans_type, 741 /*now*/ NULL, /*then*/&beio->ds_t0); 742 mtx_unlock(&be_lun->io_lock); 743 744 /* 745 * If we got an error, set the sense data to "MEDIUM ERROR" and 746 * return the I/O to the user. 747 */ 748 if (error != 0) { 749 if (error == ENOSPC || error == EDQUOT) { 750 ctl_set_space_alloc_fail(&io->scsiio); 751 } else if (error == EROFS || error == EACCES) { 752 ctl_set_hw_write_protected(&io->scsiio); 753 } else { 754 ctl_set_medium_error(&io->scsiio, 755 beio->bio_cmd == BIO_READ); 756 } 757 ctl_complete_beio(beio); 758 return; 759 } 760 761 /* 762 * If this is a write or a verify, we're all done. 763 * If this is a read, we can now send the data to the user. 764 */ 765 if ((beio->bio_cmd == BIO_WRITE) || 766 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 767 ctl_set_success(&io->scsiio); 768 ctl_complete_beio(beio); 769 } else { 770 if ((ARGS(io)->flags & CTL_LLF_READ) && 771 beio->beio_cont == NULL) { 772 ctl_set_success(&io->scsiio); 773 ctl_serseq_done(io); 774 } 775 ctl_datamove(io); 776 } 777} 778 779static void 780ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 781 struct ctl_be_block_io *beio) 782{ 783 union ctl_io *io = beio->io; 784 struct ctl_lba_len_flags *lbalen = ARGS(io); 785 struct scsi_get_lba_status_data *data; 786 off_t roff, off; 787 int error, status; 788 789 DPRINTF("entered\n"); 790 791 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 792 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 793 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 794 0, curthread->td_ucred, curthread); 795 if (error == 0 && off > roff) 796 status = 0; /* mapped up to off */ 797 else { 798 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 799 0, curthread->td_ucred, curthread); 800 if (error == 0 && off > roff) 801 status = 1; /* deallocated up to off */ 802 else { 803 status = 0; /* unknown up to the end */ 804 off = be_lun->size_bytes; 805 } 806 } 807 VOP_UNLOCK(be_lun->vn, 0); 808 809 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 810 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 811 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 812 lbalen->lba), data->descr[0].length); 813 data->descr[0].status = status; 814 815 ctl_complete_beio(beio); 816} 817 818static uint64_t 819ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 820{ 821 struct vattr vattr; 822 struct statfs statfs; 823 uint64_t val; 824 int error; 825 826 val = UINT64_MAX; 827 if (be_lun->vn == NULL) 828 return (val); 829 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 830 if (strcmp(attrname, "blocksused") == 0) { 831 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 832 if (error == 0) 833 val = vattr.va_bytes / be_lun->cbe_lun.blocksize; 834 } 835 if (strcmp(attrname, "blocksavail") == 0 && 836 (be_lun->vn->v_iflag & VI_DOOMED) == 0) { 837 error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 838 if (error == 0) 839 val = statfs.f_bavail * statfs.f_bsize / 840 be_lun->cbe_lun.blocksize; 841 } 842 VOP_UNLOCK(be_lun->vn, 0); 843 return (val); 844} 845 846static void 847ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 848 struct ctl_be_block_io *beio) 849{ 850 union ctl_io *io; 851 struct cdevsw *csw; 852 struct cdev *dev; 853 struct uio xuio; 854 struct iovec *xiovec; 855 int error, flags, i, ref; 856 857 DPRINTF("entered\n"); 858 859 io = beio->io; 860 flags = 0; 861 if (ARGS(io)->flags & CTL_LLF_DPO) 862 flags |= IO_DIRECT; 863 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 864 flags |= IO_SYNC; 865 866 bzero(&xuio, sizeof(xuio)); 867 if (beio->bio_cmd == BIO_READ) { 868 SDT_PROBE0(cbb, , read, file_start); 869 xuio.uio_rw = UIO_READ; 870 } else { 871 SDT_PROBE0(cbb, , write, file_start); 872 xuio.uio_rw = UIO_WRITE; 873 } 874 xuio.uio_offset = beio->io_offset; 875 xuio.uio_resid = beio->io_len; 876 xuio.uio_segflg = UIO_SYSSPACE; 877 xuio.uio_iov = beio->xiovecs; 878 xuio.uio_iovcnt = beio->num_segs; 879 xuio.uio_td = curthread; 880 881 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 882 xiovec->iov_base = beio->sg_segs[i].addr; 883 xiovec->iov_len = beio->sg_segs[i].len; 884 } 885 886 binuptime(&beio->ds_t0); 887 mtx_lock(&be_lun->io_lock); 888 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 889 mtx_unlock(&be_lun->io_lock); 890 891 csw = devvn_refthread(be_lun->vn, &dev, &ref); 892 if (csw) { 893 if (beio->bio_cmd == BIO_READ) 894 error = csw->d_read(dev, &xuio, flags); 895 else 896 error = csw->d_write(dev, &xuio, flags); 897 dev_relthread(dev, ref); 898 } else 899 error = ENXIO; 900 901 if (beio->bio_cmd == BIO_READ) 902 SDT_PROBE0(cbb, , read, file_done); 903 else 904 SDT_PROBE0(cbb, , write, file_done); 905 906 mtx_lock(&be_lun->io_lock); 907 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 908 beio->ds_tag_type, beio->ds_trans_type, 909 /*now*/ NULL, /*then*/&beio->ds_t0); 910 mtx_unlock(&be_lun->io_lock); 911 912 /* 913 * If we got an error, set the sense data to "MEDIUM ERROR" and 914 * return the I/O to the user. 915 */ 916 if (error != 0) { 917 if (error == ENOSPC || error == EDQUOT) { 918 ctl_set_space_alloc_fail(&io->scsiio); 919 } else if (error == EROFS || error == EACCES) { 920 ctl_set_hw_write_protected(&io->scsiio); 921 } else { 922 ctl_set_medium_error(&io->scsiio, 923 beio->bio_cmd == BIO_READ); 924 } 925 ctl_complete_beio(beio); 926 return; 927 } 928 929 /* 930 * If this is a write or a verify, we're all done. 931 * If this is a read, we can now send the data to the user. 932 */ 933 if ((beio->bio_cmd == BIO_WRITE) || 934 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 935 ctl_set_success(&io->scsiio); 936 ctl_complete_beio(beio); 937 } else { 938 if ((ARGS(io)->flags & CTL_LLF_READ) && 939 beio->beio_cont == NULL) { 940 ctl_set_success(&io->scsiio); 941 ctl_serseq_done(io); 942 } 943 ctl_datamove(io); 944 } 945} 946 947static void 948ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 949 struct ctl_be_block_io *beio) 950{ 951 union ctl_io *io = beio->io; 952 struct cdevsw *csw; 953 struct cdev *dev; 954 struct ctl_lba_len_flags *lbalen = ARGS(io); 955 struct scsi_get_lba_status_data *data; 956 off_t roff, off; 957 int error, ref, status; 958 959 DPRINTF("entered\n"); 960 961 csw = devvn_refthread(be_lun->vn, &dev, &ref); 962 if (csw == NULL) { 963 status = 0; /* unknown up to the end */ 964 off = be_lun->size_bytes; 965 goto done; 966 } 967 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 968 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, 969 curthread); 970 if (error == 0 && off > roff) 971 status = 0; /* mapped up to off */ 972 else { 973 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, 974 curthread); 975 if (error == 0 && off > roff) 976 status = 1; /* deallocated up to off */ 977 else { 978 status = 0; /* unknown up to the end */ 979 off = be_lun->size_bytes; 980 } 981 } 982 dev_relthread(dev, ref); 983 984done: 985 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 986 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 987 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 988 lbalen->lba), data->descr[0].length); 989 data->descr[0].status = status; 990 991 ctl_complete_beio(beio); 992} 993 994static void 995ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 996 struct ctl_be_block_io *beio) 997{ 998 struct bio *bio; 999 struct cdevsw *csw; 1000 struct cdev *dev; 1001 int ref; 1002 1003 DPRINTF("entered\n"); 1004 1005 /* This can't fail, it's a blocking allocation. */ 1006 bio = g_alloc_bio(); 1007 1008 bio->bio_cmd = BIO_FLUSH; 1009 bio->bio_offset = 0; 1010 bio->bio_data = 0; 1011 bio->bio_done = ctl_be_block_biodone; 1012 bio->bio_caller1 = beio; 1013 bio->bio_pblkno = 0; 1014 1015 /* 1016 * We don't need to acquire the LUN lock here, because we are only 1017 * sending one bio, and so there is no other context to synchronize 1018 * with. 1019 */ 1020 beio->num_bios_sent = 1; 1021 beio->send_complete = 1; 1022 1023 binuptime(&beio->ds_t0); 1024 mtx_lock(&be_lun->io_lock); 1025 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1026 mtx_unlock(&be_lun->io_lock); 1027 1028 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1029 if (csw) { 1030 bio->bio_dev = dev; 1031 csw->d_strategy(bio); 1032 dev_relthread(dev, ref); 1033 } else { 1034 bio->bio_error = ENXIO; 1035 ctl_be_block_biodone(bio); 1036 } 1037} 1038 1039static void 1040ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1041 struct ctl_be_block_io *beio, 1042 uint64_t off, uint64_t len, int last) 1043{ 1044 struct bio *bio; 1045 uint64_t maxlen; 1046 struct cdevsw *csw; 1047 struct cdev *dev; 1048 int ref; 1049 1050 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1051 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); 1052 while (len > 0) { 1053 bio = g_alloc_bio(); 1054 bio->bio_cmd = BIO_DELETE; 1055 bio->bio_dev = dev; 1056 bio->bio_offset = off; 1057 bio->bio_length = MIN(len, maxlen); 1058 bio->bio_data = 0; 1059 bio->bio_done = ctl_be_block_biodone; 1060 bio->bio_caller1 = beio; 1061 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; 1062 1063 off += bio->bio_length; 1064 len -= bio->bio_length; 1065 1066 mtx_lock(&be_lun->io_lock); 1067 beio->num_bios_sent++; 1068 if (last && len == 0) 1069 beio->send_complete = 1; 1070 mtx_unlock(&be_lun->io_lock); 1071 1072 if (csw) { 1073 csw->d_strategy(bio); 1074 } else { 1075 bio->bio_error = ENXIO; 1076 ctl_be_block_biodone(bio); 1077 } 1078 } 1079 if (csw) 1080 dev_relthread(dev, ref); 1081} 1082 1083static void 1084ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1085 struct ctl_be_block_io *beio) 1086{ 1087 union ctl_io *io; 1088 struct ctl_ptr_len_flags *ptrlen; 1089 struct scsi_unmap_desc *buf, *end; 1090 uint64_t len; 1091 1092 io = beio->io; 1093 1094 DPRINTF("entered\n"); 1095 1096 binuptime(&beio->ds_t0); 1097 mtx_lock(&be_lun->io_lock); 1098 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1099 mtx_unlock(&be_lun->io_lock); 1100 1101 if (beio->io_offset == -1) { 1102 beio->io_len = 0; 1103 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1104 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1105 end = buf + ptrlen->len / sizeof(*buf); 1106 for (; buf < end; buf++) { 1107 len = (uint64_t)scsi_4btoul(buf->length) * 1108 be_lun->cbe_lun.blocksize; 1109 beio->io_len += len; 1110 ctl_be_block_unmap_dev_range(be_lun, beio, 1111 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, 1112 len, (end - buf < 2) ? TRUE : FALSE); 1113 } 1114 } else 1115 ctl_be_block_unmap_dev_range(be_lun, beio, 1116 beio->io_offset, beio->io_len, TRUE); 1117} 1118 1119static void 1120ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1121 struct ctl_be_block_io *beio) 1122{ 1123 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1124 struct bio *bio; 1125 struct cdevsw *csw; 1126 struct cdev *dev; 1127 off_t cur_offset; 1128 int i, max_iosize, ref; 1129 1130 DPRINTF("entered\n"); 1131 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1132 1133 /* 1134 * We have to limit our I/O size to the maximum supported by the 1135 * backend device. 1136 */ 1137 if (csw) { 1138 max_iosize = dev->si_iosize_max; 1139 if (max_iosize < PAGE_SIZE) 1140 max_iosize = DFLTPHYS; 1141 } else 1142 max_iosize = DFLTPHYS; 1143 1144 cur_offset = beio->io_offset; 1145 for (i = 0; i < beio->num_segs; i++) { 1146 size_t cur_size; 1147 uint8_t *cur_ptr; 1148 1149 cur_size = beio->sg_segs[i].len; 1150 cur_ptr = beio->sg_segs[i].addr; 1151 1152 while (cur_size > 0) { 1153 /* This can't fail, it's a blocking allocation. */ 1154 bio = g_alloc_bio(); 1155 1156 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1157 1158 bio->bio_cmd = beio->bio_cmd; 1159 bio->bio_dev = dev; 1160 bio->bio_caller1 = beio; 1161 bio->bio_length = min(cur_size, max_iosize); 1162 bio->bio_offset = cur_offset; 1163 bio->bio_data = cur_ptr; 1164 bio->bio_done = ctl_be_block_biodone; 1165 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; 1166 1167 cur_offset += bio->bio_length; 1168 cur_ptr += bio->bio_length; 1169 cur_size -= bio->bio_length; 1170 1171 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1172 beio->num_bios_sent++; 1173 } 1174 } 1175 binuptime(&beio->ds_t0); 1176 mtx_lock(&be_lun->io_lock); 1177 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1178 beio->send_complete = 1; 1179 mtx_unlock(&be_lun->io_lock); 1180 1181 /* 1182 * Fire off all allocated requests! 1183 */ 1184 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1185 TAILQ_REMOVE(&queue, bio, bio_queue); 1186 if (csw) 1187 csw->d_strategy(bio); 1188 else { 1189 bio->bio_error = ENXIO; 1190 ctl_be_block_biodone(bio); 1191 } 1192 } 1193 if (csw) 1194 dev_relthread(dev, ref); 1195} 1196 1197static uint64_t 1198ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1199{ 1200 struct diocgattr_arg arg; 1201 struct cdevsw *csw; 1202 struct cdev *dev; 1203 int error, ref; 1204 1205 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1206 if (csw == NULL) 1207 return (UINT64_MAX); 1208 strlcpy(arg.name, attrname, sizeof(arg.name)); 1209 arg.len = sizeof(arg.value.off); 1210 if (csw->d_ioctl) { 1211 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 1212 curthread); 1213 } else 1214 error = ENODEV; 1215 dev_relthread(dev, ref); 1216 if (error != 0) 1217 return (UINT64_MAX); 1218 return (arg.value.off); 1219} 1220 1221static void 1222ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, 1223 union ctl_io *io) 1224{ 1225 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1226 struct ctl_be_block_io *beio; 1227 struct ctl_lba_len_flags *lbalen; 1228 1229 DPRINTF("entered\n"); 1230 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1231 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1232 1233 beio->io_len = lbalen->len * cbe_lun->blocksize; 1234 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1235 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; 1236 beio->bio_cmd = BIO_FLUSH; 1237 beio->ds_trans_type = DEVSTAT_NO_DATA; 1238 DPRINTF("SYNC\n"); 1239 be_lun->lun_flush(be_lun, beio); 1240} 1241 1242static void 1243ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1244{ 1245 union ctl_io *io; 1246 1247 io = beio->io; 1248 ctl_free_beio(beio); 1249 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1250 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1251 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1252 ctl_config_write_done(io); 1253 return; 1254 } 1255 1256 ctl_be_block_config_write(io); 1257} 1258 1259static void 1260ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1261 union ctl_io *io) 1262{ 1263 struct ctl_be_block_softc *softc = be_lun->softc; 1264 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1265 struct ctl_be_block_io *beio; 1266 struct ctl_lba_len_flags *lbalen; 1267 uint64_t len_left, lba; 1268 uint32_t pb, pbo, adj; 1269 int i, seglen; 1270 uint8_t *buf, *end; 1271 1272 DPRINTF("entered\n"); 1273 1274 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1275 lbalen = ARGS(io); 1276 1277 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1278 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1279 ctl_free_beio(beio); 1280 ctl_set_invalid_field(&io->scsiio, 1281 /*sks_valid*/ 1, 1282 /*command*/ 1, 1283 /*field*/ 1, 1284 /*bit_valid*/ 0, 1285 /*bit*/ 0); 1286 ctl_config_write_done(io); 1287 return; 1288 } 1289 1290 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1291 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1292 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1293 beio->bio_cmd = BIO_DELETE; 1294 beio->ds_trans_type = DEVSTAT_FREE; 1295 1296 be_lun->unmap(be_lun, beio); 1297 return; 1298 } 1299 1300 beio->bio_cmd = BIO_WRITE; 1301 beio->ds_trans_type = DEVSTAT_WRITE; 1302 1303 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1304 (uintmax_t)lbalen->lba, lbalen->len); 1305 1306 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1307 if (be_lun->cbe_lun.pblockoff > 0) 1308 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1309 else 1310 pbo = 0; 1311 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1312 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1313 1314 /* 1315 * Setup the S/G entry for this chunk. 1316 */ 1317 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1318 if (pb > cbe_lun->blocksize) { 1319 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1320 seglen - pbo) % pb; 1321 if (seglen > adj) 1322 seglen -= adj; 1323 else 1324 seglen -= seglen % cbe_lun->blocksize; 1325 } else 1326 seglen -= seglen % cbe_lun->blocksize; 1327 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen); 1328 1329 DPRINTF("segment %d addr %p len %zd\n", i, 1330 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1331 1332 beio->num_segs++; 1333 len_left -= seglen; 1334 1335 buf = beio->sg_segs[i].addr; 1336 end = buf + seglen; 1337 for (; buf < end; buf += cbe_lun->blocksize) { 1338 if (lbalen->flags & SWS_NDOB) { 1339 memset(buf, 0, cbe_lun->blocksize); 1340 } else { 1341 memcpy(buf, io->scsiio.kern_data_ptr, 1342 cbe_lun->blocksize); 1343 } 1344 if (lbalen->flags & SWS_LBDATA) 1345 scsi_ulto4b(lbalen->lba + lba, buf); 1346 lba++; 1347 } 1348 } 1349 1350 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1351 beio->io_len = lba * cbe_lun->blocksize; 1352 1353 /* We can not do all in one run. Correct and schedule rerun. */ 1354 if (len_left > 0) { 1355 lbalen->lba += lba; 1356 lbalen->len -= lba; 1357 beio->beio_cont = ctl_be_block_cw_done_ws; 1358 } 1359 1360 be_lun->dispatch(be_lun, beio); 1361} 1362 1363static void 1364ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1365 union ctl_io *io) 1366{ 1367 struct ctl_be_block_io *beio; 1368 struct ctl_ptr_len_flags *ptrlen; 1369 1370 DPRINTF("entered\n"); 1371 1372 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1373 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1374 1375 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1376 ctl_free_beio(beio); 1377 ctl_set_invalid_field(&io->scsiio, 1378 /*sks_valid*/ 0, 1379 /*command*/ 1, 1380 /*field*/ 0, 1381 /*bit_valid*/ 0, 1382 /*bit*/ 0); 1383 ctl_config_write_done(io); 1384 return; 1385 } 1386 1387 beio->io_len = 0; 1388 beio->io_offset = -1; 1389 beio->bio_cmd = BIO_DELETE; 1390 beio->ds_trans_type = DEVSTAT_FREE; 1391 DPRINTF("UNMAP\n"); 1392 be_lun->unmap(be_lun, beio); 1393} 1394 1395static void 1396ctl_be_block_cr_done(struct ctl_be_block_io *beio) 1397{ 1398 union ctl_io *io; 1399 1400 io = beio->io; 1401 ctl_free_beio(beio); 1402 ctl_config_read_done(io); 1403} 1404 1405static void 1406ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1407 union ctl_io *io) 1408{ 1409 struct ctl_be_block_io *beio; 1410 struct ctl_be_block_softc *softc; 1411 1412 DPRINTF("entered\n"); 1413 1414 softc = be_lun->softc; 1415 beio = ctl_alloc_beio(softc); 1416 beio->io = io; 1417 beio->lun = be_lun; 1418 beio->beio_cont = ctl_be_block_cr_done; 1419 PRIV(io)->ptr = (void *)beio; 1420 1421 switch (io->scsiio.cdb[0]) { 1422 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1423 beio->bio_cmd = -1; 1424 beio->ds_trans_type = DEVSTAT_NO_DATA; 1425 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1426 beio->io_len = 0; 1427 if (be_lun->get_lba_status) 1428 be_lun->get_lba_status(be_lun, beio); 1429 else 1430 ctl_be_block_cr_done(beio); 1431 break; 1432 default: 1433 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1434 break; 1435 } 1436} 1437 1438static void 1439ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1440{ 1441 union ctl_io *io; 1442 1443 io = beio->io; 1444 ctl_free_beio(beio); 1445 ctl_config_write_done(io); 1446} 1447 1448static void 1449ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1450 union ctl_io *io) 1451{ 1452 struct ctl_be_block_io *beio; 1453 struct ctl_be_block_softc *softc; 1454 1455 DPRINTF("entered\n"); 1456 1457 softc = be_lun->softc; 1458 beio = ctl_alloc_beio(softc); 1459 beio->io = io; 1460 beio->lun = be_lun; 1461 beio->beio_cont = ctl_be_block_cw_done; 1462 switch (io->scsiio.tag_type) { 1463 case CTL_TAG_ORDERED: 1464 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1465 break; 1466 case CTL_TAG_HEAD_OF_QUEUE: 1467 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1468 break; 1469 case CTL_TAG_UNTAGGED: 1470 case CTL_TAG_SIMPLE: 1471 case CTL_TAG_ACA: 1472 default: 1473 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1474 break; 1475 } 1476 PRIV(io)->ptr = (void *)beio; 1477 1478 switch (io->scsiio.cdb[0]) { 1479 case SYNCHRONIZE_CACHE: 1480 case SYNCHRONIZE_CACHE_16: 1481 ctl_be_block_cw_dispatch_sync(be_lun, io); 1482 break; 1483 case WRITE_SAME_10: 1484 case WRITE_SAME_16: 1485 ctl_be_block_cw_dispatch_ws(be_lun, io); 1486 break; 1487 case UNMAP: 1488 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1489 break; 1490 default: 1491 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1492 break; 1493 } 1494} 1495 1496SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); 1497SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); 1498SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); 1499SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); 1500 1501static void 1502ctl_be_block_next(struct ctl_be_block_io *beio) 1503{ 1504 struct ctl_be_block_lun *be_lun; 1505 union ctl_io *io; 1506 1507 io = beio->io; 1508 be_lun = beio->lun; 1509 ctl_free_beio(beio); 1510 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1511 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1512 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1513 ctl_data_submit_done(io); 1514 return; 1515 } 1516 1517 io->io_hdr.status &= ~CTL_STATUS_MASK; 1518 io->io_hdr.status |= CTL_STATUS_NONE; 1519 1520 mtx_lock(&be_lun->queue_lock); 1521 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1522 mtx_unlock(&be_lun->queue_lock); 1523 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1524} 1525 1526static void 1527ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1528 union ctl_io *io) 1529{ 1530 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1531 struct ctl_be_block_io *beio; 1532 struct ctl_be_block_softc *softc; 1533 struct ctl_lba_len_flags *lbalen; 1534 struct ctl_ptr_len_flags *bptrlen; 1535 uint64_t len_left, lbas; 1536 int i; 1537 1538 softc = be_lun->softc; 1539 1540 DPRINTF("entered\n"); 1541 1542 lbalen = ARGS(io); 1543 if (lbalen->flags & CTL_LLF_WRITE) { 1544 SDT_PROBE0(cbb, , write, start); 1545 } else { 1546 SDT_PROBE0(cbb, , read, start); 1547 } 1548 1549 beio = ctl_alloc_beio(softc); 1550 beio->io = io; 1551 beio->lun = be_lun; 1552 bptrlen = PRIV(io); 1553 bptrlen->ptr = (void *)beio; 1554 1555 switch (io->scsiio.tag_type) { 1556 case CTL_TAG_ORDERED: 1557 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1558 break; 1559 case CTL_TAG_HEAD_OF_QUEUE: 1560 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1561 break; 1562 case CTL_TAG_UNTAGGED: 1563 case CTL_TAG_SIMPLE: 1564 case CTL_TAG_ACA: 1565 default: 1566 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1567 break; 1568 } 1569 1570 if (lbalen->flags & CTL_LLF_WRITE) { 1571 beio->bio_cmd = BIO_WRITE; 1572 beio->ds_trans_type = DEVSTAT_WRITE; 1573 } else { 1574 beio->bio_cmd = BIO_READ; 1575 beio->ds_trans_type = DEVSTAT_READ; 1576 } 1577 1578 DPRINTF("%s at LBA %jx len %u @%ju\n", 1579 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1580 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1581 if (lbalen->flags & CTL_LLF_COMPARE) { 1582 beio->two_sglists = 1; 1583 lbas = CTLBLK_HALF_IO_SIZE; 1584 } else { 1585 lbas = CTLBLK_MAX_IO_SIZE; 1586 } 1587 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); 1588 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; 1589 beio->io_len = lbas * cbe_lun->blocksize; 1590 bptrlen->len += lbas; 1591 1592 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1593 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1594 i, CTLBLK_MAX_SEGS)); 1595 1596 /* 1597 * Setup the S/G entry for this chunk. 1598 */ 1599 ctl_alloc_seg(softc, &beio->sg_segs[i], 1600 min(CTLBLK_MAX_SEG, len_left)); 1601 1602 DPRINTF("segment %d addr %p len %zd\n", i, 1603 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1604 1605 /* Set up second segment for compare operation. */ 1606 if (beio->two_sglists) { 1607 ctl_alloc_seg(softc, 1608 &beio->sg_segs[i + CTLBLK_HALF_SEGS], 1609 beio->sg_segs[i].len); 1610 } 1611 1612 beio->num_segs++; 1613 len_left -= beio->sg_segs[i].len; 1614 } 1615 if (bptrlen->len < lbalen->len) 1616 beio->beio_cont = ctl_be_block_next; 1617 io->scsiio.be_move_done = ctl_be_block_move_done; 1618 /* For compare we have separate S/G lists for read and datamove. */ 1619 if (beio->two_sglists) 1620 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1621 else 1622 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1623 io->scsiio.kern_data_len = beio->io_len; 1624 io->scsiio.kern_sg_entries = beio->num_segs; 1625 io->io_hdr.flags |= CTL_FLAG_ALLOCATED; 1626 1627 /* 1628 * For the read case, we need to read the data into our buffers and 1629 * then we can send it back to the user. For the write case, we 1630 * need to get the data from the user first. 1631 */ 1632 if (beio->bio_cmd == BIO_READ) { 1633 SDT_PROBE0(cbb, , read, alloc_done); 1634 be_lun->dispatch(be_lun, beio); 1635 } else { 1636 SDT_PROBE0(cbb, , write, alloc_done); 1637 ctl_datamove(io); 1638 } 1639} 1640 1641static void 1642ctl_be_block_worker(void *context, int pending) 1643{ 1644 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; 1645 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1646 union ctl_io *io; 1647 struct ctl_be_block_io *beio; 1648 1649 DPRINTF("entered\n"); 1650 /* 1651 * Fetch and process I/Os from all queues. If we detect LUN 1652 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race, 1653 * so make response maximally opaque to not confuse initiator. 1654 */ 1655 for (;;) { 1656 mtx_lock(&be_lun->queue_lock); 1657 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1658 if (io != NULL) { 1659 DPRINTF("datamove queue\n"); 1660 STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links); 1661 mtx_unlock(&be_lun->queue_lock); 1662 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1663 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1664 ctl_set_busy(&io->scsiio); 1665 ctl_complete_beio(beio); 1666 continue; 1667 } 1668 be_lun->dispatch(be_lun, beio); 1669 continue; 1670 } 1671 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1672 if (io != NULL) { 1673 DPRINTF("config write queue\n"); 1674 STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links); 1675 mtx_unlock(&be_lun->queue_lock); 1676 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1677 ctl_set_busy(&io->scsiio); 1678 ctl_config_write_done(io); 1679 continue; 1680 } 1681 ctl_be_block_cw_dispatch(be_lun, io); 1682 continue; 1683 } 1684 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 1685 if (io != NULL) { 1686 DPRINTF("config read queue\n"); 1687 STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links); 1688 mtx_unlock(&be_lun->queue_lock); 1689 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1690 ctl_set_busy(&io->scsiio); 1691 ctl_config_read_done(io); 1692 continue; 1693 } 1694 ctl_be_block_cr_dispatch(be_lun, io); 1695 continue; 1696 } 1697 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1698 if (io != NULL) { 1699 DPRINTF("input queue\n"); 1700 STAILQ_REMOVE_HEAD(&be_lun->input_queue, links); 1701 mtx_unlock(&be_lun->queue_lock); 1702 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { 1703 ctl_set_busy(&io->scsiio); 1704 ctl_data_submit_done(io); 1705 continue; 1706 } 1707 ctl_be_block_dispatch(be_lun, io); 1708 continue; 1709 } 1710 1711 /* 1712 * If we get here, there is no work left in the queues, so 1713 * just break out and let the task queue go to sleep. 1714 */ 1715 mtx_unlock(&be_lun->queue_lock); 1716 break; 1717 } 1718} 1719 1720/* 1721 * Entry point from CTL to the backend for I/O. We queue everything to a 1722 * work thread, so this just puts the I/O on a queue and wakes up the 1723 * thread. 1724 */ 1725static int 1726ctl_be_block_submit(union ctl_io *io) 1727{ 1728 struct ctl_be_block_lun *be_lun; 1729 1730 DPRINTF("entered\n"); 1731 1732 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 1733 1734 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, 1735 ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type)); 1736 1737 PRIV(io)->len = 0; 1738 1739 mtx_lock(&be_lun->queue_lock); 1740 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1741 mtx_unlock(&be_lun->queue_lock); 1742 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1743 1744 return (CTL_RETVAL_COMPLETE); 1745} 1746 1747static int 1748ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1749 int flag, struct thread *td) 1750{ 1751 struct ctl_be_block_softc *softc = &backend_block_softc; 1752 int error; 1753 1754 error = 0; 1755 switch (cmd) { 1756 case CTL_LUN_REQ: { 1757 struct ctl_lun_req *lun_req; 1758 1759 lun_req = (struct ctl_lun_req *)addr; 1760 1761 switch (lun_req->reqtype) { 1762 case CTL_LUNREQ_CREATE: 1763 error = ctl_be_block_create(softc, lun_req); 1764 break; 1765 case CTL_LUNREQ_RM: 1766 error = ctl_be_block_rm(softc, lun_req); 1767 break; 1768 case CTL_LUNREQ_MODIFY: 1769 error = ctl_be_block_modify(softc, lun_req); 1770 break; 1771 default: 1772 lun_req->status = CTL_LUN_ERROR; 1773 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1774 "invalid LUN request type %d", 1775 lun_req->reqtype); 1776 break; 1777 } 1778 break; 1779 } 1780 default: 1781 error = ENOTTY; 1782 break; 1783 } 1784 1785 return (error); 1786} 1787 1788static int 1789ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1790{ 1791 struct ctl_be_lun *cbe_lun; 1792 struct ctl_be_block_filedata *file_data; 1793 struct ctl_lun_create_params *params; 1794 const char *value; 1795 struct vattr vattr; 1796 off_t ps, pss, po, pos, us, uss, uo, uos; 1797 int error; 1798 1799 cbe_lun = &be_lun->cbe_lun; 1800 file_data = &be_lun->backend.file; 1801 params = &be_lun->params; 1802 1803 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1804 be_lun->dispatch = ctl_be_block_dispatch_file; 1805 be_lun->lun_flush = ctl_be_block_flush_file; 1806 be_lun->get_lba_status = ctl_be_block_gls_file; 1807 be_lun->getattr = ctl_be_block_getattr_file; 1808 be_lun->unmap = NULL; 1809 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 1810 1811 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1812 if (error != 0) { 1813 snprintf(req->error_str, sizeof(req->error_str), 1814 "error calling VOP_GETATTR() for file %s", 1815 be_lun->dev_path); 1816 return (error); 1817 } 1818 1819 file_data->cred = crhold(curthread->td_ucred); 1820 if (params->lun_size_bytes != 0) 1821 be_lun->size_bytes = params->lun_size_bytes; 1822 else 1823 be_lun->size_bytes = vattr.va_size; 1824 1825 /* 1826 * For files we can use any logical block size. Prefer 512 bytes 1827 * for compatibility reasons. If file's vattr.va_blocksize 1828 * (preferred I/O block size) is bigger and multiple to chosen 1829 * logical block size -- report it as physical block size. 1830 */ 1831 if (params->blocksize_bytes != 0) 1832 cbe_lun->blocksize = params->blocksize_bytes; 1833 else if (cbe_lun->lun_type == T_CDROM) 1834 cbe_lun->blocksize = 2048; 1835 else 1836 cbe_lun->blocksize = 512; 1837 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 1838 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 1839 0 : (be_lun->size_blocks - 1); 1840 1841 us = ps = vattr.va_blocksize; 1842 uo = po = 0; 1843 1844 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 1845 if (value != NULL) 1846 ctl_expand_number(value, &ps); 1847 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 1848 if (value != NULL) 1849 ctl_expand_number(value, &po); 1850 pss = ps / cbe_lun->blocksize; 1851 pos = po / cbe_lun->blocksize; 1852 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 1853 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 1854 cbe_lun->pblockexp = fls(pss) - 1; 1855 cbe_lun->pblockoff = (pss - pos) % pss; 1856 } 1857 1858 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 1859 if (value != NULL) 1860 ctl_expand_number(value, &us); 1861 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 1862 if (value != NULL) 1863 ctl_expand_number(value, &uo); 1864 uss = us / cbe_lun->blocksize; 1865 uos = uo / cbe_lun->blocksize; 1866 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 1867 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 1868 cbe_lun->ublockexp = fls(uss) - 1; 1869 cbe_lun->ublockoff = (uss - uos) % uss; 1870 } 1871 1872 /* 1873 * Sanity check. The media size has to be at least one 1874 * sector long. 1875 */ 1876 if (be_lun->size_bytes < cbe_lun->blocksize) { 1877 error = EINVAL; 1878 snprintf(req->error_str, sizeof(req->error_str), 1879 "file %s size %ju < block size %u", be_lun->dev_path, 1880 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); 1881 } 1882 1883 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; 1884 return (error); 1885} 1886 1887static int 1888ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1889{ 1890 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1891 struct ctl_lun_create_params *params; 1892 struct cdevsw *csw; 1893 struct cdev *dev; 1894 const char *value; 1895 int error, atomic, maxio, ref, unmap, tmp; 1896 off_t ps, pss, po, pos, us, uss, uo, uos, otmp; 1897 1898 params = &be_lun->params; 1899 1900 be_lun->dev_type = CTL_BE_BLOCK_DEV; 1901 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1902 if (csw == NULL) 1903 return (ENXIO); 1904 if (strcmp(csw->d_name, "zvol") == 0) { 1905 be_lun->dispatch = ctl_be_block_dispatch_zvol; 1906 be_lun->get_lba_status = ctl_be_block_gls_zvol; 1907 atomic = maxio = CTLBLK_MAX_IO_SIZE; 1908 } else { 1909 be_lun->dispatch = ctl_be_block_dispatch_dev; 1910 be_lun->get_lba_status = NULL; 1911 atomic = 0; 1912 maxio = dev->si_iosize_max; 1913 if (maxio <= 0) 1914 maxio = DFLTPHYS; 1915 if (maxio > CTLBLK_MAX_SEG) 1916 maxio = CTLBLK_MAX_SEG; 1917 } 1918 be_lun->lun_flush = ctl_be_block_flush_dev; 1919 be_lun->getattr = ctl_be_block_getattr_dev; 1920 be_lun->unmap = ctl_be_block_unmap_dev; 1921 1922 if (!csw->d_ioctl) { 1923 dev_relthread(dev, ref); 1924 snprintf(req->error_str, sizeof(req->error_str), 1925 "no d_ioctl for device %s!", be_lun->dev_path); 1926 return (ENODEV); 1927 } 1928 1929 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, 1930 curthread); 1931 if (error) { 1932 dev_relthread(dev, ref); 1933 snprintf(req->error_str, sizeof(req->error_str), 1934 "error %d returned for DIOCGSECTORSIZE ioctl " 1935 "on %s!", error, be_lun->dev_path); 1936 return (error); 1937 } 1938 1939 /* 1940 * If the user has asked for a blocksize that is greater than the 1941 * backing device's blocksize, we can do it only if the blocksize 1942 * the user is asking for is an even multiple of the underlying 1943 * device's blocksize. 1944 */ 1945 if ((params->blocksize_bytes != 0) && 1946 (params->blocksize_bytes >= tmp)) { 1947 if (params->blocksize_bytes % tmp == 0) { 1948 cbe_lun->blocksize = params->blocksize_bytes; 1949 } else { 1950 dev_relthread(dev, ref); 1951 snprintf(req->error_str, sizeof(req->error_str), 1952 "requested blocksize %u is not an even " 1953 "multiple of backing device blocksize %u", 1954 params->blocksize_bytes, tmp); 1955 return (EINVAL); 1956 } 1957 } else if (params->blocksize_bytes != 0) { 1958 dev_relthread(dev, ref); 1959 snprintf(req->error_str, sizeof(req->error_str), 1960 "requested blocksize %u < backing device " 1961 "blocksize %u", params->blocksize_bytes, tmp); 1962 return (EINVAL); 1963 } else if (cbe_lun->lun_type == T_CDROM) 1964 cbe_lun->blocksize = MAX(tmp, 2048); 1965 else 1966 cbe_lun->blocksize = tmp; 1967 1968 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, 1969 curthread); 1970 if (error) { 1971 dev_relthread(dev, ref); 1972 snprintf(req->error_str, sizeof(req->error_str), 1973 "error %d returned for DIOCGMEDIASIZE " 1974 " ioctl on %s!", error, 1975 be_lun->dev_path); 1976 return (error); 1977 } 1978 1979 if (params->lun_size_bytes != 0) { 1980 if (params->lun_size_bytes > otmp) { 1981 dev_relthread(dev, ref); 1982 snprintf(req->error_str, sizeof(req->error_str), 1983 "requested LUN size %ju > backing device " 1984 "size %ju", 1985 (uintmax_t)params->lun_size_bytes, 1986 (uintmax_t)otmp); 1987 return (EINVAL); 1988 } 1989 1990 be_lun->size_bytes = params->lun_size_bytes; 1991 } else 1992 be_lun->size_bytes = otmp; 1993 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 1994 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 1995 0 : (be_lun->size_blocks - 1); 1996 1997 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, 1998 curthread); 1999 if (error) 2000 ps = po = 0; 2001 else { 2002 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, 2003 FREAD, curthread); 2004 if (error) 2005 po = 0; 2006 } 2007 us = ps; 2008 uo = po; 2009 2010 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); 2011 if (value != NULL) 2012 ctl_expand_number(value, &ps); 2013 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); 2014 if (value != NULL) 2015 ctl_expand_number(value, &po); 2016 pss = ps / cbe_lun->blocksize; 2017 pos = po / cbe_lun->blocksize; 2018 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2019 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2020 cbe_lun->pblockexp = fls(pss) - 1; 2021 cbe_lun->pblockoff = (pss - pos) % pss; 2022 } 2023 2024 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); 2025 if (value != NULL) 2026 ctl_expand_number(value, &us); 2027 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); 2028 if (value != NULL) 2029 ctl_expand_number(value, &uo); 2030 uss = us / cbe_lun->blocksize; 2031 uos = uo / cbe_lun->blocksize; 2032 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2033 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2034 cbe_lun->ublockexp = fls(uss) - 1; 2035 cbe_lun->ublockoff = (uss - uos) % uss; 2036 } 2037 2038 cbe_lun->atomicblock = atomic / cbe_lun->blocksize; 2039 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; 2040 2041 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { 2042 unmap = 1; 2043 } else { 2044 struct diocgattr_arg arg; 2045 2046 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 2047 arg.len = sizeof(arg.value.i); 2048 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 2049 curthread); 2050 unmap = (error == 0) ? arg.value.i : 0; 2051 } 2052 value = dnvlist_get_string(cbe_lun->options, "unmap", NULL); 2053 if (value != NULL) 2054 unmap = (strcmp(value, "on") == 0); 2055 if (unmap) 2056 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2057 else 2058 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2059 2060 dev_relthread(dev, ref); 2061 return (0); 2062} 2063 2064static int 2065ctl_be_block_close(struct ctl_be_block_lun *be_lun) 2066{ 2067 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2068 int flags; 2069 2070 if (be_lun->vn) { 2071 flags = FREAD; 2072 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) 2073 flags |= FWRITE; 2074 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 2075 be_lun->vn = NULL; 2076 2077 switch (be_lun->dev_type) { 2078 case CTL_BE_BLOCK_DEV: 2079 break; 2080 case CTL_BE_BLOCK_FILE: 2081 if (be_lun->backend.file.cred != NULL) { 2082 crfree(be_lun->backend.file.cred); 2083 be_lun->backend.file.cred = NULL; 2084 } 2085 break; 2086 case CTL_BE_BLOCK_NONE: 2087 break; 2088 default: 2089 panic("Unexpected backend type %d", be_lun->dev_type); 2090 break; 2091 } 2092 be_lun->dev_type = CTL_BE_BLOCK_NONE; 2093 } 2094 return (0); 2095} 2096 2097static int 2098ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2099{ 2100 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2101 struct nameidata nd; 2102 const char *value; 2103 int error, flags; 2104 2105 error = 0; 2106 if (rootvnode == NULL) { 2107 snprintf(req->error_str, sizeof(req->error_str), 2108 "Root filesystem is not mounted"); 2109 return (1); 2110 } 2111 pwd_ensure_dirs(); 2112 2113 value = dnvlist_get_string(cbe_lun->options, "file", NULL); 2114 if (value == NULL) { 2115 snprintf(req->error_str, sizeof(req->error_str), 2116 "no file argument specified"); 2117 return (1); 2118 } 2119 free(be_lun->dev_path, M_CTLBLK); 2120 be_lun->dev_path = strdup(value, M_CTLBLK); 2121 2122 flags = FREAD; 2123 value = dnvlist_get_string(cbe_lun->options, "readonly", NULL); 2124 if (value != NULL) { 2125 if (strcmp(value, "on") != 0) 2126 flags |= FWRITE; 2127 } else if (cbe_lun->lun_type == T_DIRECT) 2128 flags |= FWRITE; 2129 2130again: 2131 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); 2132 error = vn_open(&nd, &flags, 0, NULL); 2133 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { 2134 flags &= ~FWRITE; 2135 goto again; 2136 } 2137 if (error) { 2138 /* 2139 * This is the only reasonable guess we can make as far as 2140 * path if the user doesn't give us a fully qualified path. 2141 * If they want to specify a file, they need to specify the 2142 * full path. 2143 */ 2144 if (be_lun->dev_path[0] != '/') { 2145 char *dev_name; 2146 2147 asprintf(&dev_name, M_CTLBLK, "/dev/%s", 2148 be_lun->dev_path); 2149 free(be_lun->dev_path, M_CTLBLK); 2150 be_lun->dev_path = dev_name; 2151 goto again; 2152 } 2153 snprintf(req->error_str, sizeof(req->error_str), 2154 "error opening %s: %d", be_lun->dev_path, error); 2155 return (error); 2156 } 2157 if (flags & FWRITE) 2158 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; 2159 else 2160 cbe_lun->flags |= CTL_LUN_FLAG_READONLY; 2161 2162 NDFREE(&nd, NDF_ONLY_PNBUF); 2163 be_lun->vn = nd.ni_vp; 2164 2165 /* We only support disks and files. */ 2166 if (vn_isdisk(be_lun->vn, &error)) { 2167 error = ctl_be_block_open_dev(be_lun, req); 2168 } else if (be_lun->vn->v_type == VREG) { 2169 error = ctl_be_block_open_file(be_lun, req); 2170 } else { 2171 error = EINVAL; 2172 snprintf(req->error_str, sizeof(req->error_str), 2173 "%s is not a disk or plain file", be_lun->dev_path); 2174 } 2175 VOP_UNLOCK(be_lun->vn, 0); 2176 2177 if (error != 0) 2178 ctl_be_block_close(be_lun); 2179 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2180 if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2181 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2182 value = dnvlist_get_string(cbe_lun->options, "serseq", NULL); 2183 if (value != NULL && strcmp(value, "on") == 0) 2184 cbe_lun->serseq = CTL_LUN_SERSEQ_ON; 2185 else if (value != NULL && strcmp(value, "read") == 0) 2186 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2187 else if (value != NULL && strcmp(value, "off") == 0) 2188 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2189 return (0); 2190} 2191 2192static int 2193ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2194{ 2195 struct ctl_be_lun *cbe_lun; 2196 struct ctl_be_block_lun *be_lun; 2197 struct ctl_lun_create_params *params; 2198 char num_thread_str[16]; 2199 char tmpstr[32]; 2200 const char *value; 2201 int retval, num_threads; 2202 int tmp_num_threads; 2203 2204 params = &req->reqdata.create; 2205 retval = 0; 2206 req->status = CTL_LUN_OK; 2207 2208 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2209 cbe_lun = &be_lun->cbe_lun; 2210 be_lun->params = req->reqdata.create; 2211 be_lun->softc = softc; 2212 STAILQ_INIT(&be_lun->input_queue); 2213 STAILQ_INIT(&be_lun->config_read_queue); 2214 STAILQ_INIT(&be_lun->config_write_queue); 2215 STAILQ_INIT(&be_lun->datamove_queue); 2216 mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF); 2217 mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF); 2218 cbe_lun->options = nvlist_clone(req->args_nvl); 2219 2220 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2221 cbe_lun->lun_type = params->device_type; 2222 else 2223 cbe_lun->lun_type = T_DIRECT; 2224 be_lun->flags = 0; 2225 cbe_lun->flags = 0; 2226 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2227 if (value != NULL) { 2228 if (strcmp(value, "primary") == 0) 2229 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2230 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2231 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2232 2233 if (cbe_lun->lun_type == T_DIRECT || 2234 cbe_lun->lun_type == T_CDROM) { 2235 be_lun->size_bytes = params->lun_size_bytes; 2236 if (params->blocksize_bytes != 0) 2237 cbe_lun->blocksize = params->blocksize_bytes; 2238 else if (cbe_lun->lun_type == T_CDROM) 2239 cbe_lun->blocksize = 2048; 2240 else 2241 cbe_lun->blocksize = 512; 2242 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2243 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2244 0 : (be_lun->size_blocks - 1); 2245 2246 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2247 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2248 retval = ctl_be_block_open(be_lun, req); 2249 if (retval != 0) { 2250 retval = 0; 2251 req->status = CTL_LUN_WARNING; 2252 } 2253 } 2254 num_threads = cbb_num_threads; 2255 } else { 2256 num_threads = 1; 2257 } 2258 2259 value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL); 2260 if (value != NULL) { 2261 tmp_num_threads = strtol(value, NULL, 0); 2262 2263 /* 2264 * We don't let the user specify less than one 2265 * thread, but hope he's clueful enough not to 2266 * specify 1000 threads. 2267 */ 2268 if (tmp_num_threads < 1) { 2269 snprintf(req->error_str, sizeof(req->error_str), 2270 "invalid number of threads %s", 2271 num_thread_str); 2272 goto bailout_error; 2273 } 2274 num_threads = tmp_num_threads; 2275 } 2276 2277 if (be_lun->vn == NULL) 2278 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2279 /* Tell the user the blocksize we ended up using */ 2280 params->lun_size_bytes = be_lun->size_bytes; 2281 params->blocksize_bytes = cbe_lun->blocksize; 2282 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2283 cbe_lun->req_lun_id = params->req_lun_id; 2284 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; 2285 } else 2286 cbe_lun->req_lun_id = 0; 2287 2288 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; 2289 cbe_lun->be = &ctl_be_block_driver; 2290 2291 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2292 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d", 2293 softc->num_luns); 2294 strncpy((char *)cbe_lun->serial_num, tmpstr, 2295 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); 2296 2297 /* Tell the user what we used for a serial number */ 2298 strncpy((char *)params->serial_num, tmpstr, 2299 MIN(sizeof(params->serial_num), sizeof(tmpstr))); 2300 } else { 2301 strncpy((char *)cbe_lun->serial_num, params->serial_num, 2302 MIN(sizeof(cbe_lun->serial_num), 2303 sizeof(params->serial_num))); 2304 } 2305 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2306 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns); 2307 strncpy((char *)cbe_lun->device_id, tmpstr, 2308 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); 2309 2310 /* Tell the user what we used for a device ID */ 2311 strncpy((char *)params->device_id, tmpstr, 2312 MIN(sizeof(params->device_id), sizeof(tmpstr))); 2313 } else { 2314 strncpy((char *)cbe_lun->device_id, params->device_id, 2315 MIN(sizeof(cbe_lun->device_id), 2316 sizeof(params->device_id))); 2317 } 2318 2319 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2320 2321 be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK, 2322 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2323 2324 if (be_lun->io_taskqueue == NULL) { 2325 snprintf(req->error_str, sizeof(req->error_str), 2326 "unable to create taskqueue"); 2327 goto bailout_error; 2328 } 2329 2330 /* 2331 * Note that we start the same number of threads by default for 2332 * both the file case and the block device case. For the file 2333 * case, we need multiple threads to allow concurrency, because the 2334 * vnode interface is designed to be a blocking interface. For the 2335 * block device case, ZFS zvols at least will block the caller's 2336 * context in many instances, and so we need multiple threads to 2337 * overcome that problem. Other block devices don't need as many 2338 * threads, but they shouldn't cause too many problems. 2339 * 2340 * If the user wants to just have a single thread for a block 2341 * device, he can specify that when the LUN is created, or change 2342 * the tunable/sysctl to alter the default number of threads. 2343 */ 2344 retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue, 2345 /*num threads*/num_threads, 2346 /*priority*/PUSER, 2347 /*proc*/control_softc->ctl_proc, 2348 /*thread name*/"block"); 2349 2350 if (retval != 0) 2351 goto bailout_error; 2352 2353 be_lun->num_threads = num_threads; 2354 2355 retval = ctl_add_lun(&be_lun->cbe_lun); 2356 if (retval != 0) { 2357 snprintf(req->error_str, sizeof(req->error_str), 2358 "ctl_add_lun() returned error %d, see dmesg for " 2359 "details", retval); 2360 retval = 0; 2361 goto bailout_error; 2362 } 2363 2364 be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id, 2365 cbe_lun->blocksize, 2366 DEVSTAT_ALL_SUPPORTED, 2367 cbe_lun->lun_type 2368 | DEVSTAT_TYPE_IF_OTHER, 2369 DEVSTAT_PRIORITY_OTHER); 2370 2371 mtx_lock(&softc->lock); 2372 softc->num_luns++; 2373 SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links); 2374 mtx_unlock(&softc->lock); 2375 2376 params->req_lun_id = cbe_lun->lun_id; 2377 2378 return (retval); 2379 2380bailout_error: 2381 req->status = CTL_LUN_ERROR; 2382 2383 if (be_lun->io_taskqueue != NULL) 2384 taskqueue_free(be_lun->io_taskqueue); 2385 ctl_be_block_close(be_lun); 2386 if (be_lun->dev_path != NULL) 2387 free(be_lun->dev_path, M_CTLBLK); 2388 nvlist_destroy(cbe_lun->options); 2389 mtx_destroy(&be_lun->queue_lock); 2390 mtx_destroy(&be_lun->io_lock); 2391 free(be_lun, M_CTLBLK); 2392 2393 return (retval); 2394} 2395 2396static int 2397ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2398{ 2399 struct ctl_lun_rm_params *params; 2400 struct ctl_be_block_lun *be_lun; 2401 struct ctl_be_lun *cbe_lun; 2402 int retval; 2403 2404 params = &req->reqdata.rm; 2405 2406 sx_xlock(&softc->modify_lock); 2407 mtx_lock(&softc->lock); 2408 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2409 if (be_lun->cbe_lun.lun_id == params->lun_id) { 2410 SLIST_REMOVE(&softc->lun_list, be_lun, 2411 ctl_be_block_lun, links); 2412 softc->num_luns--; 2413 break; 2414 } 2415 } 2416 mtx_unlock(&softc->lock); 2417 sx_xunlock(&softc->modify_lock); 2418 if (be_lun == NULL) { 2419 snprintf(req->error_str, sizeof(req->error_str), 2420 "LUN %u is not managed by the block backend", 2421 params->lun_id); 2422 goto bailout_error; 2423 } 2424 cbe_lun = &be_lun->cbe_lun; 2425 2426 if (be_lun->vn != NULL) { 2427 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2428 ctl_lun_no_media(cbe_lun); 2429 taskqueue_drain_all(be_lun->io_taskqueue); 2430 ctl_be_block_close(be_lun); 2431 } 2432 2433 mtx_lock(&softc->lock); 2434 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2435 mtx_unlock(&softc->lock); 2436 2437 retval = ctl_remove_lun(cbe_lun); 2438 if (retval != 0) { 2439 snprintf(req->error_str, sizeof(req->error_str), 2440 "error %d returned from ctl_remove_lun() for " 2441 "LUN %d", retval, params->lun_id); 2442 mtx_lock(&softc->lock); 2443 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2444 mtx_unlock(&softc->lock); 2445 goto bailout_error; 2446 } 2447 2448 mtx_lock(&softc->lock); 2449 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2450 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0); 2451 if (retval == EINTR) 2452 break; 2453 } 2454 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2455 if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2456 mtx_unlock(&softc->lock); 2457 free(be_lun, M_CTLBLK); 2458 } else { 2459 mtx_unlock(&softc->lock); 2460 return (EINTR); 2461 } 2462 2463 req->status = CTL_LUN_OK; 2464 return (0); 2465 2466bailout_error: 2467 req->status = CTL_LUN_ERROR; 2468 return (0); 2469} 2470 2471static int 2472ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2473{ 2474 struct ctl_lun_modify_params *params; 2475 struct ctl_be_block_lun *be_lun; 2476 struct ctl_be_lun *cbe_lun; 2477 const char *value; 2478 uint64_t oldsize; 2479 int error, wasprim; 2480 2481 params = &req->reqdata.modify; 2482 2483 sx_xlock(&softc->modify_lock); 2484 mtx_lock(&softc->lock); 2485 SLIST_FOREACH(be_lun, &softc->lun_list, links) { 2486 if (be_lun->cbe_lun.lun_id == params->lun_id) 2487 break; 2488 } 2489 mtx_unlock(&softc->lock); 2490 if (be_lun == NULL) { 2491 snprintf(req->error_str, sizeof(req->error_str), 2492 "LUN %u is not managed by the block backend", 2493 params->lun_id); 2494 goto bailout_error; 2495 } 2496 cbe_lun = &be_lun->cbe_lun; 2497 2498 if (params->lun_size_bytes != 0) 2499 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2500 2501 if (req->args_nvl != NULL) { 2502 nvlist_destroy(cbe_lun->options); 2503 cbe_lun->options = nvlist_clone(req->args_nvl); 2504 } 2505 2506 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); 2507 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); 2508 if (value != NULL) { 2509 if (strcmp(value, "primary") == 0) 2510 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2511 else 2512 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2513 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2514 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2515 else 2516 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2517 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { 2518 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) 2519 ctl_lun_primary(cbe_lun); 2520 else 2521 ctl_lun_secondary(cbe_lun); 2522 } 2523 2524 oldsize = be_lun->size_blocks; 2525 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2526 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2527 if (be_lun->vn == NULL) 2528 error = ctl_be_block_open(be_lun, req); 2529 else if (vn_isdisk(be_lun->vn, &error)) 2530 error = ctl_be_block_open_dev(be_lun, req); 2531 else if (be_lun->vn->v_type == VREG) { 2532 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2533 error = ctl_be_block_open_file(be_lun, req); 2534 VOP_UNLOCK(be_lun->vn, 0); 2535 } else 2536 error = EINVAL; 2537 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) && 2538 be_lun->vn != NULL) { 2539 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2540 ctl_lun_has_media(cbe_lun); 2541 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 && 2542 be_lun->vn == NULL) { 2543 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2544 ctl_lun_no_media(cbe_lun); 2545 } 2546 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2547 } else { 2548 if (be_lun->vn != NULL) { 2549 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2550 ctl_lun_no_media(cbe_lun); 2551 taskqueue_drain_all(be_lun->io_taskqueue); 2552 error = ctl_be_block_close(be_lun); 2553 } else 2554 error = 0; 2555 } 2556 if (be_lun->size_blocks != oldsize) 2557 ctl_lun_capacity_changed(cbe_lun); 2558 2559 /* Tell the user the exact size we ended up using */ 2560 params->lun_size_bytes = be_lun->size_bytes; 2561 2562 sx_xunlock(&softc->modify_lock); 2563 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2564 return (0); 2565 2566bailout_error: 2567 sx_xunlock(&softc->modify_lock); 2568 req->status = CTL_LUN_ERROR; 2569 return (0); 2570} 2571 2572static void 2573ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun) 2574{ 2575 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun; 2576 struct ctl_be_block_softc *softc = be_lun->softc; 2577 2578 taskqueue_drain_all(be_lun->io_taskqueue); 2579 taskqueue_free(be_lun->io_taskqueue); 2580 if (be_lun->disk_stats != NULL) 2581 devstat_remove_entry(be_lun->disk_stats); 2582 nvlist_destroy(be_lun->cbe_lun.options); 2583 free(be_lun->dev_path, M_CTLBLK); 2584 mtx_destroy(&be_lun->queue_lock); 2585 mtx_destroy(&be_lun->io_lock); 2586 2587 mtx_lock(&softc->lock); 2588 be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2589 if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2590 wakeup(be_lun); 2591 else 2592 free(be_lun, M_CTLBLK); 2593 mtx_unlock(&softc->lock); 2594} 2595 2596static int 2597ctl_be_block_config_write(union ctl_io *io) 2598{ 2599 struct ctl_be_block_lun *be_lun; 2600 struct ctl_be_lun *cbe_lun; 2601 int retval; 2602 2603 DPRINTF("entered\n"); 2604 2605 cbe_lun = CTL_BACKEND_LUN(io); 2606 be_lun = (struct ctl_be_block_lun *)cbe_lun; 2607 2608 retval = 0; 2609 switch (io->scsiio.cdb[0]) { 2610 case SYNCHRONIZE_CACHE: 2611 case SYNCHRONIZE_CACHE_16: 2612 case WRITE_SAME_10: 2613 case WRITE_SAME_16: 2614 case UNMAP: 2615 /* 2616 * The upper level CTL code will filter out any CDBs with 2617 * the immediate bit set and return the proper error. 2618 * 2619 * We don't really need to worry about what LBA range the 2620 * user asked to be synced out. When they issue a sync 2621 * cache command, we'll sync out the whole thing. 2622 */ 2623 mtx_lock(&be_lun->queue_lock); 2624 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2625 links); 2626 mtx_unlock(&be_lun->queue_lock); 2627 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2628 break; 2629 case START_STOP_UNIT: { 2630 struct scsi_start_stop_unit *cdb; 2631 struct ctl_lun_req req; 2632 2633 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2634 if ((cdb->how & SSS_PC_MASK) != 0) { 2635 ctl_set_success(&io->scsiio); 2636 ctl_config_write_done(io); 2637 break; 2638 } 2639 if (cdb->how & SSS_START) { 2640 if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) { 2641 retval = ctl_be_block_open(be_lun, &req); 2642 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; 2643 if (retval == 0) { 2644 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; 2645 ctl_lun_has_media(cbe_lun); 2646 } else { 2647 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2648 ctl_lun_no_media(cbe_lun); 2649 } 2650 } 2651 ctl_start_lun(cbe_lun); 2652 } else { 2653 ctl_stop_lun(cbe_lun); 2654 if (cdb->how & SSS_LOEJ) { 2655 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; 2656 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED; 2657 ctl_lun_ejected(cbe_lun); 2658 if (be_lun->vn != NULL) 2659 ctl_be_block_close(be_lun); 2660 } 2661 } 2662 2663 ctl_set_success(&io->scsiio); 2664 ctl_config_write_done(io); 2665 break; 2666 } 2667 case PREVENT_ALLOW: 2668 ctl_set_success(&io->scsiio); 2669 ctl_config_write_done(io); 2670 break; 2671 default: 2672 ctl_set_invalid_opcode(&io->scsiio); 2673 ctl_config_write_done(io); 2674 retval = CTL_RETVAL_COMPLETE; 2675 break; 2676 } 2677 2678 return (retval); 2679} 2680 2681static int 2682ctl_be_block_config_read(union ctl_io *io) 2683{ 2684 struct ctl_be_block_lun *be_lun; 2685 int retval = 0; 2686 2687 DPRINTF("entered\n"); 2688 2689 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); 2690 2691 switch (io->scsiio.cdb[0]) { 2692 case SERVICE_ACTION_IN: 2693 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 2694 mtx_lock(&be_lun->queue_lock); 2695 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 2696 &io->io_hdr, links); 2697 mtx_unlock(&be_lun->queue_lock); 2698 taskqueue_enqueue(be_lun->io_taskqueue, 2699 &be_lun->io_task); 2700 retval = CTL_RETVAL_QUEUED; 2701 break; 2702 } 2703 ctl_set_invalid_field(&io->scsiio, 2704 /*sks_valid*/ 1, 2705 /*command*/ 1, 2706 /*field*/ 1, 2707 /*bit_valid*/ 1, 2708 /*bit*/ 4); 2709 ctl_config_read_done(io); 2710 retval = CTL_RETVAL_COMPLETE; 2711 break; 2712 default: 2713 ctl_set_invalid_opcode(&io->scsiio); 2714 ctl_config_read_done(io); 2715 retval = CTL_RETVAL_COMPLETE; 2716 break; 2717 } 2718 2719 return (retval); 2720} 2721 2722static int 2723ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb) 2724{ 2725 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2726 int retval; 2727 2728 retval = sbuf_printf(sb, "\t<num_threads>"); 2729 if (retval != 0) 2730 goto bailout; 2731 retval = sbuf_printf(sb, "%d", lun->num_threads); 2732 if (retval != 0) 2733 goto bailout; 2734 retval = sbuf_printf(sb, "</num_threads>\n"); 2735 2736bailout: 2737 return (retval); 2738} 2739 2740static uint64_t 2741ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname) 2742{ 2743 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; 2744 2745 if (lun->getattr == NULL) 2746 return (UINT64_MAX); 2747 return (lun->getattr(lun, attrname)); 2748} 2749 2750static int 2751ctl_be_block_init(void) 2752{ 2753 struct ctl_be_block_softc *softc = &backend_block_softc; 2754 2755 sx_init(&softc->modify_lock, "ctlblock modify"); 2756 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2757 softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2758 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2759 softc->buf_zone = uma_zcreate("ctlblock", CTLBLK_MAX_SEG, 2760 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2761#if (CTLBLK_MAX_SEG > 131072) 2762 softc->buf128_zone = uma_zcreate("ctlblock128", 131072, 2763 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2764#endif 2765 SLIST_INIT(&softc->lun_list); 2766 return (0); 2767} 2768 2769 2770static int 2771ctl_be_block_shutdown(void) 2772{ 2773 struct ctl_be_block_softc *softc = &backend_block_softc; 2774 struct ctl_be_block_lun *lun; 2775 2776 mtx_lock(&softc->lock); 2777 while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) { 2778 SLIST_REMOVE_HEAD(&softc->lun_list, links); 2779 softc->num_luns--; 2780 /* 2781 * Drop our lock here. Since ctl_remove_lun() can call 2782 * back into us, this could potentially lead to a recursive 2783 * lock of the same mutex, which would cause a hang. 2784 */ 2785 mtx_unlock(&softc->lock); 2786 ctl_remove_lun(&lun->cbe_lun); 2787 mtx_lock(&softc->lock); 2788 } 2789 mtx_unlock(&softc->lock); 2790 uma_zdestroy(softc->buf_zone); 2791#if (CTLBLK_MAX_SEG > 131072) 2792 uma_zdestroy(softc->buf128_zone); 2793#endif 2794 uma_zdestroy(softc->beio_zone); 2795 mtx_destroy(&softc->lock); 2796 sx_destroy(&softc->modify_lock); 2797 return (0); 2798} 2799