ctl_backend_block.c revision 275895
1/*- 2 * Copyright (c) 2003 Silicon Graphics International Corp. 3 * Copyright (c) 2009-2011 Spectra Logic Corporation 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Edward Tomasz Napierala 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions, and the following disclaimer, 15 * without modification. 16 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 17 * substantially similar to the "NO WARRANTY" disclaimer below 18 * ("Disclaimer") and any redistribution must be conditioned upon 19 * including a substantially similar Disclaimer requirement for further 20 * binary redistribution. 21 * 22 * NO WARRANTY 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGES. 34 * 35 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 36 */ 37/* 38 * CAM Target Layer driver backend for block devices. 39 * 40 * Author: Ken Merry <ken@FreeBSD.org> 41 */ 42#include <sys/cdefs.h> 43__FBSDID("$FreeBSD: stable/10/sys/cam/ctl/ctl_backend_block.c 275895 2014-12-18 08:46:53Z mav $"); 44 45#include <opt_kdtrace.h> 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/types.h> 51#include <sys/kthread.h> 52#include <sys/bio.h> 53#include <sys/fcntl.h> 54#include <sys/limits.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#include <sys/condvar.h> 58#include <sys/malloc.h> 59#include <sys/conf.h> 60#include <sys/ioccom.h> 61#include <sys/queue.h> 62#include <sys/sbuf.h> 63#include <sys/endian.h> 64#include <sys/uio.h> 65#include <sys/buf.h> 66#include <sys/taskqueue.h> 67#include <sys/vnode.h> 68#include <sys/namei.h> 69#include <sys/mount.h> 70#include <sys/disk.h> 71#include <sys/fcntl.h> 72#include <sys/filedesc.h> 73#include <sys/filio.h> 74#include <sys/proc.h> 75#include <sys/pcpu.h> 76#include <sys/module.h> 77#include <sys/sdt.h> 78#include <sys/devicestat.h> 79#include <sys/sysctl.h> 80 81#include <geom/geom.h> 82 83#include <cam/cam.h> 84#include <cam/scsi/scsi_all.h> 85#include <cam/scsi/scsi_da.h> 86#include <cam/ctl/ctl_io.h> 87#include <cam/ctl/ctl.h> 88#include <cam/ctl/ctl_backend.h> 89#include <cam/ctl/ctl_frontend_internal.h> 90#include <cam/ctl/ctl_ioctl.h> 91#include <cam/ctl/ctl_scsi_all.h> 92#include <cam/ctl/ctl_error.h> 93 94/* 95 * The idea here is that we'll allocate enough S/G space to hold a 1MB 96 * I/O. If we get an I/O larger than that, we'll split it. 97 */ 98#define CTLBLK_HALF_IO_SIZE (512 * 1024) 99#define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) 100#define CTLBLK_MAX_SEG MAXPHYS 101#define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) 102#define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) 103 104#ifdef CTLBLK_DEBUG 105#define DPRINTF(fmt, args...) \ 106 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 107#else 108#define DPRINTF(fmt, args...) do {} while(0) 109#endif 110 111#define PRIV(io) \ 112 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 113#define ARGS(io) \ 114 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 115 116SDT_PROVIDER_DEFINE(cbb); 117 118typedef enum { 119 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 120 CTL_BE_BLOCK_LUN_CONFIG_ERR = 0x02, 121 CTL_BE_BLOCK_LUN_WAITING = 0x04, 122 CTL_BE_BLOCK_LUN_MULTI_THREAD = 0x08 123} ctl_be_block_lun_flags; 124 125typedef enum { 126 CTL_BE_BLOCK_NONE, 127 CTL_BE_BLOCK_DEV, 128 CTL_BE_BLOCK_FILE 129} ctl_be_block_type; 130 131struct ctl_be_block_devdata { 132 struct cdev *cdev; 133 struct cdevsw *csw; 134 int dev_ref; 135}; 136 137struct ctl_be_block_filedata { 138 struct ucred *cred; 139}; 140 141union ctl_be_block_bedata { 142 struct ctl_be_block_devdata dev; 143 struct ctl_be_block_filedata file; 144}; 145 146struct ctl_be_block_io; 147struct ctl_be_block_lun; 148 149typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 150 struct ctl_be_block_io *beio); 151typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 152 const char *attrname); 153 154/* 155 * Backend LUN structure. There is a 1:1 mapping between a block device 156 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 157 */ 158struct ctl_be_block_lun { 159 struct ctl_lun_create_params params; 160 struct ctl_block_disk *disk; 161 char lunname[32]; 162 char *dev_path; 163 ctl_be_block_type dev_type; 164 struct vnode *vn; 165 union ctl_be_block_bedata backend; 166 cbb_dispatch_t dispatch; 167 cbb_dispatch_t lun_flush; 168 cbb_dispatch_t unmap; 169 cbb_dispatch_t get_lba_status; 170 cbb_getattr_t getattr; 171 uma_zone_t lun_zone; 172 uint64_t size_blocks; 173 uint64_t size_bytes; 174 uint32_t blocksize; 175 int blocksize_shift; 176 uint16_t pblockexp; 177 uint16_t pblockoff; 178 struct ctl_be_block_softc *softc; 179 struct devstat *disk_stats; 180 ctl_be_block_lun_flags flags; 181 STAILQ_ENTRY(ctl_be_block_lun) links; 182 struct ctl_be_lun ctl_be_lun; 183 struct taskqueue *io_taskqueue; 184 struct task io_task; 185 int num_threads; 186 STAILQ_HEAD(, ctl_io_hdr) input_queue; 187 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 188 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 189 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 190 struct mtx_padalign io_lock; 191 struct mtx_padalign queue_lock; 192}; 193 194/* 195 * Overall softc structure for the block backend module. 196 */ 197struct ctl_be_block_softc { 198 struct mtx lock; 199 int num_disks; 200 STAILQ_HEAD(, ctl_block_disk) disk_list; 201 int num_luns; 202 STAILQ_HEAD(, ctl_be_block_lun) lun_list; 203}; 204 205static struct ctl_be_block_softc backend_block_softc; 206 207/* 208 * Per-I/O information. 209 */ 210struct ctl_be_block_io { 211 union ctl_io *io; 212 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 213 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 214 int bio_cmd; 215 int num_segs; 216 int num_bios_sent; 217 int num_bios_done; 218 int send_complete; 219 int num_errors; 220 struct bintime ds_t0; 221 devstat_tag_type ds_tag_type; 222 devstat_trans_flags ds_trans_type; 223 uint64_t io_len; 224 uint64_t io_offset; 225 struct ctl_be_block_softc *softc; 226 struct ctl_be_block_lun *lun; 227 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 228}; 229 230static int cbb_num_threads = 14; 231TUNABLE_INT("kern.cam.ctl.block.num_threads", &cbb_num_threads); 232SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0, 233 "CAM Target Layer Block Backend"); 234SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RW, 235 &cbb_num_threads, 0, "Number of threads per backing file"); 236 237static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 238static void ctl_free_beio(struct ctl_be_block_io *beio); 239static void ctl_complete_beio(struct ctl_be_block_io *beio); 240static int ctl_be_block_move_done(union ctl_io *io); 241static void ctl_be_block_biodone(struct bio *bio); 242static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 243 struct ctl_be_block_io *beio); 244static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 245 struct ctl_be_block_io *beio); 246static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 247 struct ctl_be_block_io *beio); 248static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 249 const char *attrname); 250static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 251 struct ctl_be_block_io *beio); 252static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 253 struct ctl_be_block_io *beio); 254static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 255 struct ctl_be_block_io *beio); 256static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 257 const char *attrname); 258static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 259 union ctl_io *io); 260static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 261 union ctl_io *io); 262static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 263 union ctl_io *io); 264static void ctl_be_block_worker(void *context, int pending); 265static int ctl_be_block_submit(union ctl_io *io); 266static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 267 int flag, struct thread *td); 268static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 269 struct ctl_lun_req *req); 270static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 271 struct ctl_lun_req *req); 272static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 273static int ctl_be_block_open(struct ctl_be_block_softc *softc, 274 struct ctl_be_block_lun *be_lun, 275 struct ctl_lun_req *req); 276static int ctl_be_block_create(struct ctl_be_block_softc *softc, 277 struct ctl_lun_req *req); 278static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 279 struct ctl_lun_req *req); 280static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, 281 struct ctl_lun_req *req); 282static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, 283 struct ctl_lun_req *req); 284static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 285 struct ctl_lun_req *req); 286static void ctl_be_block_lun_shutdown(void *be_lun); 287static void ctl_be_block_lun_config_status(void *be_lun, 288 ctl_lun_config_status status); 289static int ctl_be_block_config_write(union ctl_io *io); 290static int ctl_be_block_config_read(union ctl_io *io); 291static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb); 292static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname); 293int ctl_be_block_init(void); 294 295static struct ctl_backend_driver ctl_be_block_driver = 296{ 297 .name = "block", 298 .flags = CTL_BE_FLAG_HAS_CONFIG, 299 .init = ctl_be_block_init, 300 .data_submit = ctl_be_block_submit, 301 .data_move_done = ctl_be_block_move_done, 302 .config_read = ctl_be_block_config_read, 303 .config_write = ctl_be_block_config_write, 304 .ioctl = ctl_be_block_ioctl, 305 .lun_info = ctl_be_block_lun_info, 306 .lun_attr = ctl_be_block_lun_attr 307}; 308 309MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend"); 310CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 311 312static uma_zone_t beio_zone; 313 314static struct ctl_be_block_io * 315ctl_alloc_beio(struct ctl_be_block_softc *softc) 316{ 317 struct ctl_be_block_io *beio; 318 319 beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO); 320 beio->softc = softc; 321 return (beio); 322} 323 324static void 325ctl_free_beio(struct ctl_be_block_io *beio) 326{ 327 int duplicate_free; 328 int i; 329 330 duplicate_free = 0; 331 332 for (i = 0; i < beio->num_segs; i++) { 333 if (beio->sg_segs[i].addr == NULL) 334 duplicate_free++; 335 336 uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr); 337 beio->sg_segs[i].addr = NULL; 338 339 /* For compare we had two equal S/G lists. */ 340 if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) { 341 uma_zfree(beio->lun->lun_zone, 342 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr); 343 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL; 344 } 345 } 346 347 if (duplicate_free > 0) { 348 printf("%s: %d duplicate frees out of %d segments\n", __func__, 349 duplicate_free, beio->num_segs); 350 } 351 352 uma_zfree(beio_zone, beio); 353} 354 355static void 356ctl_complete_beio(struct ctl_be_block_io *beio) 357{ 358 union ctl_io *io = beio->io; 359 360 if (beio->beio_cont != NULL) { 361 beio->beio_cont(beio); 362 } else { 363 ctl_free_beio(beio); 364 ctl_data_submit_done(io); 365 } 366} 367 368static int 369ctl_be_block_move_done(union ctl_io *io) 370{ 371 struct ctl_be_block_io *beio; 372 struct ctl_be_block_lun *be_lun; 373 struct ctl_lba_len_flags *lbalen; 374#ifdef CTL_TIME_IO 375 struct bintime cur_bt; 376#endif 377 int i; 378 379 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 380 be_lun = beio->lun; 381 382 DPRINTF("entered\n"); 383 384#ifdef CTL_TIME_IO 385 getbintime(&cur_bt); 386 bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); 387 bintime_add(&io->io_hdr.dma_bt, &cur_bt); 388 io->io_hdr.num_dmas++; 389#endif 390 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 391 392 /* 393 * We set status at this point for read commands, and write 394 * commands with errors. 395 */ 396 if (io->io_hdr.flags & CTL_FLAG_ABORT) { 397 ; 398 } else if ((io->io_hdr.port_status == 0) && 399 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { 400 lbalen = ARGS(beio->io); 401 if (lbalen->flags & CTL_LLF_READ) { 402 ctl_set_success(&io->scsiio); 403 } else if (lbalen->flags & CTL_LLF_COMPARE) { 404 /* We have two data blocks ready for comparison. */ 405 for (i = 0; i < beio->num_segs; i++) { 406 if (memcmp(beio->sg_segs[i].addr, 407 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 408 beio->sg_segs[i].len) != 0) 409 break; 410 } 411 if (i < beio->num_segs) 412 ctl_set_sense(&io->scsiio, 413 /*current_error*/ 1, 414 /*sense_key*/ SSD_KEY_MISCOMPARE, 415 /*asc*/ 0x1D, 416 /*ascq*/ 0x00, 417 SSD_ELEM_NONE); 418 else 419 ctl_set_success(&io->scsiio); 420 } 421 } else if ((io->io_hdr.port_status != 0) && 422 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || 423 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { 424 /* 425 * For hardware error sense keys, the sense key 426 * specific value is defined to be a retry count, 427 * but we use it to pass back an internal FETD 428 * error code. XXX KDM Hopefully the FETD is only 429 * using 16 bits for an error code, since that's 430 * all the space we have in the sks field. 431 */ 432 ctl_set_internal_failure(&io->scsiio, 433 /*sks_valid*/ 1, 434 /*retry_count*/ 435 io->io_hdr.port_status); 436 } 437 438 /* 439 * If this is a read, or a write with errors, it is done. 440 */ 441 if ((beio->bio_cmd == BIO_READ) 442 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 443 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 444 ctl_complete_beio(beio); 445 return (0); 446 } 447 448 /* 449 * At this point, we have a write and the DMA completed 450 * successfully. We now have to queue it to the task queue to 451 * execute the backend I/O. That is because we do blocking 452 * memory allocations, and in the file backing case, blocking I/O. 453 * This move done routine is generally called in the SIM's 454 * interrupt context, and therefore we cannot block. 455 */ 456 mtx_lock(&be_lun->queue_lock); 457 /* 458 * XXX KDM make sure that links is okay to use at this point. 459 * Otherwise, we either need to add another field to ctl_io_hdr, 460 * or deal with resource allocation here. 461 */ 462 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 463 mtx_unlock(&be_lun->queue_lock); 464 465 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 466 467 return (0); 468} 469 470static void 471ctl_be_block_biodone(struct bio *bio) 472{ 473 struct ctl_be_block_io *beio; 474 struct ctl_be_block_lun *be_lun; 475 union ctl_io *io; 476 int error; 477 478 beio = bio->bio_caller1; 479 be_lun = beio->lun; 480 io = beio->io; 481 482 DPRINTF("entered\n"); 483 484 error = bio->bio_error; 485 mtx_lock(&be_lun->io_lock); 486 if (error != 0) 487 beio->num_errors++; 488 489 beio->num_bios_done++; 490 491 /* 492 * XXX KDM will this cause WITNESS to complain? Holding a lock 493 * during the free might cause it to complain. 494 */ 495 g_destroy_bio(bio); 496 497 /* 498 * If the send complete bit isn't set, or we aren't the last I/O to 499 * complete, then we're done. 500 */ 501 if ((beio->send_complete == 0) 502 || (beio->num_bios_done < beio->num_bios_sent)) { 503 mtx_unlock(&be_lun->io_lock); 504 return; 505 } 506 507 /* 508 * At this point, we've verified that we are the last I/O to 509 * complete, so it's safe to drop the lock. 510 */ 511 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 512 beio->ds_tag_type, beio->ds_trans_type, 513 /*now*/ NULL, /*then*/&beio->ds_t0); 514 mtx_unlock(&be_lun->io_lock); 515 516 /* 517 * If there are any errors from the backing device, we fail the 518 * entire I/O with a medium error. 519 */ 520 if (beio->num_errors > 0) { 521 if (error == EOPNOTSUPP) { 522 ctl_set_invalid_opcode(&io->scsiio); 523 } else if (error == ENOSPC) { 524 ctl_set_space_alloc_fail(&io->scsiio); 525 } else if (beio->bio_cmd == BIO_FLUSH) { 526 /* XXX KDM is there is a better error here? */ 527 ctl_set_internal_failure(&io->scsiio, 528 /*sks_valid*/ 1, 529 /*retry_count*/ 0xbad2); 530 } else 531 ctl_set_medium_error(&io->scsiio); 532 ctl_complete_beio(beio); 533 return; 534 } 535 536 /* 537 * If this is a write, a flush, a delete or verify, we're all done. 538 * If this is a read, we can now send the data to the user. 539 */ 540 if ((beio->bio_cmd == BIO_WRITE) 541 || (beio->bio_cmd == BIO_FLUSH) 542 || (beio->bio_cmd == BIO_DELETE) 543 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 544 ctl_set_success(&io->scsiio); 545 ctl_complete_beio(beio); 546 } else { 547 if ((ARGS(io)->flags & CTL_LLF_READ) && 548 beio->beio_cont == NULL) 549 ctl_set_success(&io->scsiio); 550#ifdef CTL_TIME_IO 551 getbintime(&io->io_hdr.dma_start_bt); 552#endif 553 ctl_datamove(io); 554 } 555} 556 557static void 558ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 559 struct ctl_be_block_io *beio) 560{ 561 union ctl_io *io = beio->io; 562 struct mount *mountpoint; 563 int error, lock_flags; 564 565 DPRINTF("entered\n"); 566 567 binuptime(&beio->ds_t0); 568 mtx_lock(&be_lun->io_lock); 569 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 570 mtx_unlock(&be_lun->io_lock); 571 572 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 573 574 if (MNT_SHARED_WRITES(mountpoint) 575 || ((mountpoint == NULL) 576 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 577 lock_flags = LK_SHARED; 578 else 579 lock_flags = LK_EXCLUSIVE; 580 581 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 582 583 error = VOP_FSYNC(be_lun->vn, MNT_WAIT, curthread); 584 VOP_UNLOCK(be_lun->vn, 0); 585 586 vn_finished_write(mountpoint); 587 588 mtx_lock(&be_lun->io_lock); 589 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 590 beio->ds_tag_type, beio->ds_trans_type, 591 /*now*/ NULL, /*then*/&beio->ds_t0); 592 mtx_unlock(&be_lun->io_lock); 593 594 if (error == 0) 595 ctl_set_success(&io->scsiio); 596 else { 597 /* XXX KDM is there is a better error here? */ 598 ctl_set_internal_failure(&io->scsiio, 599 /*sks_valid*/ 1, 600 /*retry_count*/ 0xbad1); 601 } 602 603 ctl_complete_beio(beio); 604} 605 606SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t"); 607SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t"); 608SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t"); 609SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t"); 610 611static void 612ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 613 struct ctl_be_block_io *beio) 614{ 615 struct ctl_be_block_filedata *file_data; 616 union ctl_io *io; 617 struct uio xuio; 618 struct iovec *xiovec; 619 int flags; 620 int error, i; 621 622 DPRINTF("entered\n"); 623 624 file_data = &be_lun->backend.file; 625 io = beio->io; 626 flags = 0; 627 if (ARGS(io)->flags & CTL_LLF_DPO) 628 flags |= IO_DIRECT; 629 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 630 flags |= IO_SYNC; 631 632 bzero(&xuio, sizeof(xuio)); 633 if (beio->bio_cmd == BIO_READ) { 634 SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); 635 xuio.uio_rw = UIO_READ; 636 } else { 637 SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); 638 xuio.uio_rw = UIO_WRITE; 639 } 640 xuio.uio_offset = beio->io_offset; 641 xuio.uio_resid = beio->io_len; 642 xuio.uio_segflg = UIO_SYSSPACE; 643 xuio.uio_iov = beio->xiovecs; 644 xuio.uio_iovcnt = beio->num_segs; 645 xuio.uio_td = curthread; 646 647 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 648 xiovec->iov_base = beio->sg_segs[i].addr; 649 xiovec->iov_len = beio->sg_segs[i].len; 650 } 651 652 binuptime(&beio->ds_t0); 653 mtx_lock(&be_lun->io_lock); 654 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 655 mtx_unlock(&be_lun->io_lock); 656 657 if (beio->bio_cmd == BIO_READ) { 658 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 659 660 /* 661 * UFS pays attention to IO_DIRECT for reads. If the 662 * DIRECTIO option is configured into the kernel, it calls 663 * ffs_rawread(). But that only works for single-segment 664 * uios with user space addresses. In our case, with a 665 * kernel uio, it still reads into the buffer cache, but it 666 * will just try to release the buffer from the cache later 667 * on in ffs_read(). 668 * 669 * ZFS does not pay attention to IO_DIRECT for reads. 670 * 671 * UFS does not pay attention to IO_SYNC for reads. 672 * 673 * ZFS pays attention to IO_SYNC (which translates into the 674 * Solaris define FRSYNC for zfs_read()) for reads. It 675 * attempts to sync the file before reading. 676 * 677 * So, to attempt to provide some barrier semantics in the 678 * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC. 679 */ 680 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 681 682 VOP_UNLOCK(be_lun->vn, 0); 683 SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); 684 } else { 685 struct mount *mountpoint; 686 int lock_flags; 687 688 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 689 690 if (MNT_SHARED_WRITES(mountpoint) 691 || ((mountpoint == NULL) 692 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 693 lock_flags = LK_SHARED; 694 else 695 lock_flags = LK_EXCLUSIVE; 696 697 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 698 699 /* 700 * UFS pays attention to IO_DIRECT for writes. The write 701 * is done asynchronously. (Normally the write would just 702 * get put into cache. 703 * 704 * UFS pays attention to IO_SYNC for writes. It will 705 * attempt to write the buffer out synchronously if that 706 * flag is set. 707 * 708 * ZFS does not pay attention to IO_DIRECT for writes. 709 * 710 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 711 * for writes. It will flush the transaction from the 712 * cache before returning. 713 * 714 * So if we've got the BIO_ORDERED flag set, we want 715 * IO_SYNC in either the UFS or ZFS case. 716 */ 717 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 718 VOP_UNLOCK(be_lun->vn, 0); 719 720 vn_finished_write(mountpoint); 721 SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); 722 } 723 724 mtx_lock(&be_lun->io_lock); 725 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 726 beio->ds_tag_type, beio->ds_trans_type, 727 /*now*/ NULL, /*then*/&beio->ds_t0); 728 mtx_unlock(&be_lun->io_lock); 729 730 /* 731 * If we got an error, set the sense data to "MEDIUM ERROR" and 732 * return the I/O to the user. 733 */ 734 if (error != 0) { 735 char path_str[32]; 736 737 ctl_scsi_path_string(io, path_str, sizeof(path_str)); 738 printf("%s%s command returned errno %d\n", path_str, 739 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error); 740 if (error == ENOSPC) { 741 ctl_set_space_alloc_fail(&io->scsiio); 742 } else 743 ctl_set_medium_error(&io->scsiio); 744 ctl_complete_beio(beio); 745 return; 746 } 747 748 /* 749 * If this is a write or a verify, we're all done. 750 * If this is a read, we can now send the data to the user. 751 */ 752 if ((beio->bio_cmd == BIO_WRITE) || 753 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 754 ctl_set_success(&io->scsiio); 755 ctl_complete_beio(beio); 756 } else { 757 if ((ARGS(io)->flags & CTL_LLF_READ) && 758 beio->beio_cont == NULL) 759 ctl_set_success(&io->scsiio); 760#ifdef CTL_TIME_IO 761 getbintime(&io->io_hdr.dma_start_bt); 762#endif 763 ctl_datamove(io); 764 } 765} 766 767static void 768ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 769 struct ctl_be_block_io *beio) 770{ 771 union ctl_io *io = beio->io; 772 struct ctl_lba_len_flags *lbalen = ARGS(io); 773 struct scsi_get_lba_status_data *data; 774 off_t roff, off; 775 int error, status; 776 777 DPRINTF("entered\n"); 778 779 off = roff = ((off_t)lbalen->lba) << be_lun->blocksize_shift; 780 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 781 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 782 0, curthread->td_ucred, curthread); 783 if (error == 0 && off > roff) 784 status = 0; /* mapped up to off */ 785 else { 786 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 787 0, curthread->td_ucred, curthread); 788 if (error == 0 && off > roff) 789 status = 1; /* deallocated up to off */ 790 else { 791 status = 0; /* unknown up to the end */ 792 off = be_lun->size_bytes; 793 } 794 } 795 VOP_UNLOCK(be_lun->vn, 0); 796 797 off >>= be_lun->blocksize_shift; 798 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 799 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 800 scsi_ulto4b(MIN(UINT32_MAX, off - lbalen->lba), 801 data->descr[0].length); 802 data->descr[0].status = status; 803 804 ctl_complete_beio(beio); 805} 806 807static uint64_t 808ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 809{ 810 struct vattr vattr; 811 struct statfs statfs; 812 int error; 813 814 if (be_lun->vn == NULL) 815 return (UINT64_MAX); 816 if (strcmp(attrname, "blocksused") == 0) { 817 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 818 if (error != 0) 819 return (UINT64_MAX); 820 return (vattr.va_bytes >> be_lun->blocksize_shift); 821 } 822 if (strcmp(attrname, "blocksavail") == 0) { 823 error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 824 if (error != 0) 825 return (UINT64_MAX); 826 return ((statfs.f_bavail * statfs.f_bsize) >> 827 be_lun->blocksize_shift); 828 } 829 return (UINT64_MAX); 830} 831 832static void 833ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 834 struct ctl_be_block_io *beio) 835{ 836 struct ctl_be_block_devdata *dev_data; 837 union ctl_io *io; 838 struct uio xuio; 839 struct iovec *xiovec; 840 int flags; 841 int error, i; 842 843 DPRINTF("entered\n"); 844 845 dev_data = &be_lun->backend.dev; 846 io = beio->io; 847 flags = 0; 848 if (ARGS(io)->flags & CTL_LLF_DPO) 849 flags |= IO_DIRECT; 850 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 851 flags |= IO_SYNC; 852 853 bzero(&xuio, sizeof(xuio)); 854 if (beio->bio_cmd == BIO_READ) { 855 SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); 856 xuio.uio_rw = UIO_READ; 857 } else { 858 SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); 859 xuio.uio_rw = UIO_WRITE; 860 } 861 xuio.uio_offset = beio->io_offset; 862 xuio.uio_resid = beio->io_len; 863 xuio.uio_segflg = UIO_SYSSPACE; 864 xuio.uio_iov = beio->xiovecs; 865 xuio.uio_iovcnt = beio->num_segs; 866 xuio.uio_td = curthread; 867 868 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 869 xiovec->iov_base = beio->sg_segs[i].addr; 870 xiovec->iov_len = beio->sg_segs[i].len; 871 } 872 873 binuptime(&beio->ds_t0); 874 mtx_lock(&be_lun->io_lock); 875 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 876 mtx_unlock(&be_lun->io_lock); 877 878 if (beio->bio_cmd == BIO_READ) { 879 error = (*dev_data->csw->d_read)(dev_data->cdev, &xuio, flags); 880 SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); 881 } else { 882 error = (*dev_data->csw->d_write)(dev_data->cdev, &xuio, flags); 883 SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); 884 } 885 886 mtx_lock(&be_lun->io_lock); 887 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 888 beio->ds_tag_type, beio->ds_trans_type, 889 /*now*/ NULL, /*then*/&beio->ds_t0); 890 mtx_unlock(&be_lun->io_lock); 891 892 /* 893 * If we got an error, set the sense data to "MEDIUM ERROR" and 894 * return the I/O to the user. 895 */ 896 if (error != 0) { 897 if (error == ENOSPC) { 898 ctl_set_space_alloc_fail(&io->scsiio); 899 } else 900 ctl_set_medium_error(&io->scsiio); 901 ctl_complete_beio(beio); 902 return; 903 } 904 905 /* 906 * If this is a write or a verify, we're all done. 907 * If this is a read, we can now send the data to the user. 908 */ 909 if ((beio->bio_cmd == BIO_WRITE) || 910 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 911 ctl_set_success(&io->scsiio); 912 ctl_complete_beio(beio); 913 } else { 914 if ((ARGS(io)->flags & CTL_LLF_READ) && 915 beio->beio_cont == NULL) 916 ctl_set_success(&io->scsiio); 917#ifdef CTL_TIME_IO 918 getbintime(&io->io_hdr.dma_start_bt); 919#endif 920 ctl_datamove(io); 921 } 922} 923 924static void 925ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 926 struct ctl_be_block_io *beio) 927{ 928 struct ctl_be_block_devdata *dev_data = &be_lun->backend.dev; 929 union ctl_io *io = beio->io; 930 struct ctl_lba_len_flags *lbalen = ARGS(io); 931 struct scsi_get_lba_status_data *data; 932 off_t roff, off; 933 int error, status; 934 935 DPRINTF("entered\n"); 936 937 off = roff = ((off_t)lbalen->lba) << be_lun->blocksize_shift; 938 error = (*dev_data->csw->d_ioctl)(dev_data->cdev, FIOSEEKHOLE, 939 (caddr_t)&off, FREAD, curthread); 940 if (error == 0 && off > roff) 941 status = 0; /* mapped up to off */ 942 else { 943 error = (*dev_data->csw->d_ioctl)(dev_data->cdev, FIOSEEKDATA, 944 (caddr_t)&off, FREAD, curthread); 945 if (error == 0 && off > roff) 946 status = 1; /* deallocated up to off */ 947 else { 948 status = 0; /* unknown up to the end */ 949 off = be_lun->size_bytes; 950 } 951 } 952 953 off >>= be_lun->blocksize_shift; 954 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 955 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 956 scsi_ulto4b(MIN(UINT32_MAX, off - lbalen->lba), 957 data->descr[0].length); 958 data->descr[0].status = status; 959 960 ctl_complete_beio(beio); 961} 962 963static void 964ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 965 struct ctl_be_block_io *beio) 966{ 967 struct bio *bio; 968 union ctl_io *io; 969 struct ctl_be_block_devdata *dev_data; 970 971 dev_data = &be_lun->backend.dev; 972 io = beio->io; 973 974 DPRINTF("entered\n"); 975 976 /* This can't fail, it's a blocking allocation. */ 977 bio = g_alloc_bio(); 978 979 bio->bio_cmd = BIO_FLUSH; 980 bio->bio_flags |= BIO_ORDERED; 981 bio->bio_dev = dev_data->cdev; 982 bio->bio_offset = 0; 983 bio->bio_data = 0; 984 bio->bio_done = ctl_be_block_biodone; 985 bio->bio_caller1 = beio; 986 bio->bio_pblkno = 0; 987 988 /* 989 * We don't need to acquire the LUN lock here, because we are only 990 * sending one bio, and so there is no other context to synchronize 991 * with. 992 */ 993 beio->num_bios_sent = 1; 994 beio->send_complete = 1; 995 996 binuptime(&beio->ds_t0); 997 mtx_lock(&be_lun->io_lock); 998 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 999 mtx_unlock(&be_lun->io_lock); 1000 1001 (*dev_data->csw->d_strategy)(bio); 1002} 1003 1004static void 1005ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1006 struct ctl_be_block_io *beio, 1007 uint64_t off, uint64_t len, int last) 1008{ 1009 struct bio *bio; 1010 struct ctl_be_block_devdata *dev_data; 1011 uint64_t maxlen; 1012 1013 dev_data = &be_lun->backend.dev; 1014 maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize); 1015 while (len > 0) { 1016 bio = g_alloc_bio(); 1017 bio->bio_cmd = BIO_DELETE; 1018 bio->bio_dev = dev_data->cdev; 1019 bio->bio_offset = off; 1020 bio->bio_length = MIN(len, maxlen); 1021 bio->bio_data = 0; 1022 bio->bio_done = ctl_be_block_biodone; 1023 bio->bio_caller1 = beio; 1024 bio->bio_pblkno = off / be_lun->blocksize; 1025 1026 off += bio->bio_length; 1027 len -= bio->bio_length; 1028 1029 mtx_lock(&be_lun->io_lock); 1030 beio->num_bios_sent++; 1031 if (last && len == 0) 1032 beio->send_complete = 1; 1033 mtx_unlock(&be_lun->io_lock); 1034 1035 (*dev_data->csw->d_strategy)(bio); 1036 } 1037} 1038 1039static void 1040ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1041 struct ctl_be_block_io *beio) 1042{ 1043 union ctl_io *io; 1044 struct ctl_be_block_devdata *dev_data; 1045 struct ctl_ptr_len_flags *ptrlen; 1046 struct scsi_unmap_desc *buf, *end; 1047 uint64_t len; 1048 1049 dev_data = &be_lun->backend.dev; 1050 io = beio->io; 1051 1052 DPRINTF("entered\n"); 1053 1054 binuptime(&beio->ds_t0); 1055 mtx_lock(&be_lun->io_lock); 1056 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1057 mtx_unlock(&be_lun->io_lock); 1058 1059 if (beio->io_offset == -1) { 1060 beio->io_len = 0; 1061 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1062 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1063 end = buf + ptrlen->len / sizeof(*buf); 1064 for (; buf < end; buf++) { 1065 len = (uint64_t)scsi_4btoul(buf->length) * 1066 be_lun->blocksize; 1067 beio->io_len += len; 1068 ctl_be_block_unmap_dev_range(be_lun, beio, 1069 scsi_8btou64(buf->lba) * be_lun->blocksize, len, 1070 (end - buf < 2) ? TRUE : FALSE); 1071 } 1072 } else 1073 ctl_be_block_unmap_dev_range(be_lun, beio, 1074 beio->io_offset, beio->io_len, TRUE); 1075} 1076 1077static void 1078ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1079 struct ctl_be_block_io *beio) 1080{ 1081 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1082 int i; 1083 struct bio *bio; 1084 struct ctl_be_block_devdata *dev_data; 1085 off_t cur_offset; 1086 int max_iosize; 1087 1088 DPRINTF("entered\n"); 1089 1090 dev_data = &be_lun->backend.dev; 1091 1092 /* 1093 * We have to limit our I/O size to the maximum supported by the 1094 * backend device. Hopefully it is MAXPHYS. If the driver doesn't 1095 * set it properly, use DFLTPHYS. 1096 */ 1097 max_iosize = dev_data->cdev->si_iosize_max; 1098 if (max_iosize < PAGE_SIZE) 1099 max_iosize = DFLTPHYS; 1100 1101 cur_offset = beio->io_offset; 1102 for (i = 0; i < beio->num_segs; i++) { 1103 size_t cur_size; 1104 uint8_t *cur_ptr; 1105 1106 cur_size = beio->sg_segs[i].len; 1107 cur_ptr = beio->sg_segs[i].addr; 1108 1109 while (cur_size > 0) { 1110 /* This can't fail, it's a blocking allocation. */ 1111 bio = g_alloc_bio(); 1112 1113 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1114 1115 bio->bio_cmd = beio->bio_cmd; 1116 bio->bio_dev = dev_data->cdev; 1117 bio->bio_caller1 = beio; 1118 bio->bio_length = min(cur_size, max_iosize); 1119 bio->bio_offset = cur_offset; 1120 bio->bio_data = cur_ptr; 1121 bio->bio_done = ctl_be_block_biodone; 1122 bio->bio_pblkno = cur_offset / be_lun->blocksize; 1123 1124 cur_offset += bio->bio_length; 1125 cur_ptr += bio->bio_length; 1126 cur_size -= bio->bio_length; 1127 1128 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1129 beio->num_bios_sent++; 1130 } 1131 } 1132 binuptime(&beio->ds_t0); 1133 mtx_lock(&be_lun->io_lock); 1134 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1135 beio->send_complete = 1; 1136 mtx_unlock(&be_lun->io_lock); 1137 1138 /* 1139 * Fire off all allocated requests! 1140 */ 1141 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1142 TAILQ_REMOVE(&queue, bio, bio_queue); 1143 (*dev_data->csw->d_strategy)(bio); 1144 } 1145} 1146 1147static uint64_t 1148ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1149{ 1150 struct ctl_be_block_devdata *dev_data = &be_lun->backend.dev; 1151 struct diocgattr_arg arg; 1152 int error; 1153 1154 if (dev_data->csw == NULL || dev_data->csw->d_ioctl == NULL) 1155 return (UINT64_MAX); 1156 strlcpy(arg.name, attrname, sizeof(arg.name)); 1157 arg.len = sizeof(arg.value.off); 1158 error = dev_data->csw->d_ioctl(dev_data->cdev, 1159 DIOCGATTR, (caddr_t)&arg, FREAD, curthread); 1160 if (error != 0) 1161 return (UINT64_MAX); 1162 return (arg.value.off); 1163} 1164 1165static void 1166ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1167{ 1168 union ctl_io *io; 1169 1170 io = beio->io; 1171 ctl_free_beio(beio); 1172 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1173 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1174 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1175 ctl_config_write_done(io); 1176 return; 1177 } 1178 1179 ctl_be_block_config_write(io); 1180} 1181 1182static void 1183ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1184 union ctl_io *io) 1185{ 1186 struct ctl_be_block_io *beio; 1187 struct ctl_be_block_softc *softc; 1188 struct ctl_lba_len_flags *lbalen; 1189 uint64_t len_left, lba; 1190 int i, seglen; 1191 uint8_t *buf, *end; 1192 1193 DPRINTF("entered\n"); 1194 1195 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1196 softc = be_lun->softc; 1197 lbalen = ARGS(beio->io); 1198 1199 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1200 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1201 ctl_free_beio(beio); 1202 ctl_set_invalid_field(&io->scsiio, 1203 /*sks_valid*/ 1, 1204 /*command*/ 1, 1205 /*field*/ 1, 1206 /*bit_valid*/ 0, 1207 /*bit*/ 0); 1208 ctl_config_write_done(io); 1209 return; 1210 } 1211 1212 switch (io->scsiio.tag_type) { 1213 case CTL_TAG_ORDERED: 1214 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1215 break; 1216 case CTL_TAG_HEAD_OF_QUEUE: 1217 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1218 break; 1219 case CTL_TAG_UNTAGGED: 1220 case CTL_TAG_SIMPLE: 1221 case CTL_TAG_ACA: 1222 default: 1223 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1224 break; 1225 } 1226 1227 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1228 beio->io_offset = lbalen->lba * be_lun->blocksize; 1229 beio->io_len = (uint64_t)lbalen->len * be_lun->blocksize; 1230 beio->bio_cmd = BIO_DELETE; 1231 beio->ds_trans_type = DEVSTAT_FREE; 1232 1233 be_lun->unmap(be_lun, beio); 1234 return; 1235 } 1236 1237 beio->bio_cmd = BIO_WRITE; 1238 beio->ds_trans_type = DEVSTAT_WRITE; 1239 1240 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1241 (uintmax_t)lbalen->lba, lbalen->len); 1242 1243 len_left = (uint64_t)lbalen->len * be_lun->blocksize; 1244 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1245 1246 /* 1247 * Setup the S/G entry for this chunk. 1248 */ 1249 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1250 seglen -= seglen % be_lun->blocksize; 1251 beio->sg_segs[i].len = seglen; 1252 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1253 1254 DPRINTF("segment %d addr %p len %zd\n", i, 1255 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1256 1257 beio->num_segs++; 1258 len_left -= seglen; 1259 1260 buf = beio->sg_segs[i].addr; 1261 end = buf + seglen; 1262 for (; buf < end; buf += be_lun->blocksize) { 1263 memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize); 1264 if (lbalen->flags & SWS_LBDATA) 1265 scsi_ulto4b(lbalen->lba + lba, buf); 1266 lba++; 1267 } 1268 } 1269 1270 beio->io_offset = lbalen->lba * be_lun->blocksize; 1271 beio->io_len = lba * be_lun->blocksize; 1272 1273 /* We can not do all in one run. Correct and schedule rerun. */ 1274 if (len_left > 0) { 1275 lbalen->lba += lba; 1276 lbalen->len -= lba; 1277 beio->beio_cont = ctl_be_block_cw_done_ws; 1278 } 1279 1280 be_lun->dispatch(be_lun, beio); 1281} 1282 1283static void 1284ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1285 union ctl_io *io) 1286{ 1287 struct ctl_be_block_io *beio; 1288 struct ctl_be_block_softc *softc; 1289 struct ctl_ptr_len_flags *ptrlen; 1290 1291 DPRINTF("entered\n"); 1292 1293 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1294 softc = be_lun->softc; 1295 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1296 1297 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1298 ctl_free_beio(beio); 1299 ctl_set_invalid_field(&io->scsiio, 1300 /*sks_valid*/ 0, 1301 /*command*/ 1, 1302 /*field*/ 0, 1303 /*bit_valid*/ 0, 1304 /*bit*/ 0); 1305 ctl_config_write_done(io); 1306 return; 1307 } 1308 1309 switch (io->scsiio.tag_type) { 1310 case CTL_TAG_ORDERED: 1311 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1312 break; 1313 case CTL_TAG_HEAD_OF_QUEUE: 1314 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1315 break; 1316 case CTL_TAG_UNTAGGED: 1317 case CTL_TAG_SIMPLE: 1318 case CTL_TAG_ACA: 1319 default: 1320 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1321 break; 1322 } 1323 1324 beio->io_len = 0; 1325 beio->io_offset = -1; 1326 1327 beio->bio_cmd = BIO_DELETE; 1328 beio->ds_trans_type = DEVSTAT_FREE; 1329 1330 DPRINTF("UNMAP\n"); 1331 1332 be_lun->unmap(be_lun, beio); 1333} 1334 1335static void 1336ctl_be_block_cr_done(struct ctl_be_block_io *beio) 1337{ 1338 union ctl_io *io; 1339 1340 io = beio->io; 1341 ctl_free_beio(beio); 1342 ctl_config_read_done(io); 1343} 1344 1345static void 1346ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1347 union ctl_io *io) 1348{ 1349 struct ctl_be_block_io *beio; 1350 struct ctl_be_block_softc *softc; 1351 1352 DPRINTF("entered\n"); 1353 1354 softc = be_lun->softc; 1355 beio = ctl_alloc_beio(softc); 1356 beio->io = io; 1357 beio->lun = be_lun; 1358 beio->beio_cont = ctl_be_block_cr_done; 1359 PRIV(io)->ptr = (void *)beio; 1360 1361 switch (io->scsiio.cdb[0]) { 1362 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1363 beio->bio_cmd = -1; 1364 beio->ds_trans_type = DEVSTAT_NO_DATA; 1365 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1366 beio->io_len = 0; 1367 if (be_lun->get_lba_status) 1368 be_lun->get_lba_status(be_lun, beio); 1369 else 1370 ctl_be_block_cr_done(beio); 1371 break; 1372 default: 1373 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1374 break; 1375 } 1376} 1377 1378static void 1379ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1380{ 1381 union ctl_io *io; 1382 1383 io = beio->io; 1384 ctl_free_beio(beio); 1385 ctl_config_write_done(io); 1386} 1387 1388static void 1389ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1390 union ctl_io *io) 1391{ 1392 struct ctl_be_block_io *beio; 1393 struct ctl_be_block_softc *softc; 1394 1395 DPRINTF("entered\n"); 1396 1397 softc = be_lun->softc; 1398 beio = ctl_alloc_beio(softc); 1399 beio->io = io; 1400 beio->lun = be_lun; 1401 beio->beio_cont = ctl_be_block_cw_done; 1402 PRIV(io)->ptr = (void *)beio; 1403 1404 switch (io->scsiio.cdb[0]) { 1405 case SYNCHRONIZE_CACHE: 1406 case SYNCHRONIZE_CACHE_16: 1407 beio->bio_cmd = BIO_FLUSH; 1408 beio->ds_trans_type = DEVSTAT_NO_DATA; 1409 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1410 beio->io_len = 0; 1411 be_lun->lun_flush(be_lun, beio); 1412 break; 1413 case WRITE_SAME_10: 1414 case WRITE_SAME_16: 1415 ctl_be_block_cw_dispatch_ws(be_lun, io); 1416 break; 1417 case UNMAP: 1418 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1419 break; 1420 default: 1421 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1422 break; 1423 } 1424} 1425 1426SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t"); 1427SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t"); 1428SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t"); 1429SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t"); 1430 1431static void 1432ctl_be_block_next(struct ctl_be_block_io *beio) 1433{ 1434 struct ctl_be_block_lun *be_lun; 1435 union ctl_io *io; 1436 1437 io = beio->io; 1438 be_lun = beio->lun; 1439 ctl_free_beio(beio); 1440 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1441 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1442 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1443 ctl_data_submit_done(io); 1444 return; 1445 } 1446 1447 io->io_hdr.status &= ~CTL_STATUS_MASK; 1448 io->io_hdr.status |= CTL_STATUS_NONE; 1449 1450 mtx_lock(&be_lun->queue_lock); 1451 /* 1452 * XXX KDM make sure that links is okay to use at this point. 1453 * Otherwise, we either need to add another field to ctl_io_hdr, 1454 * or deal with resource allocation here. 1455 */ 1456 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1457 mtx_unlock(&be_lun->queue_lock); 1458 1459 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1460} 1461 1462static void 1463ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1464 union ctl_io *io) 1465{ 1466 struct ctl_be_block_io *beio; 1467 struct ctl_be_block_softc *softc; 1468 struct ctl_lba_len_flags *lbalen; 1469 struct ctl_ptr_len_flags *bptrlen; 1470 uint64_t len_left, lbas; 1471 int i; 1472 1473 softc = be_lun->softc; 1474 1475 DPRINTF("entered\n"); 1476 1477 lbalen = ARGS(io); 1478 if (lbalen->flags & CTL_LLF_WRITE) { 1479 SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0); 1480 } else { 1481 SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0); 1482 } 1483 1484 beio = ctl_alloc_beio(softc); 1485 beio->io = io; 1486 beio->lun = be_lun; 1487 bptrlen = PRIV(io); 1488 bptrlen->ptr = (void *)beio; 1489 1490 switch (io->scsiio.tag_type) { 1491 case CTL_TAG_ORDERED: 1492 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1493 break; 1494 case CTL_TAG_HEAD_OF_QUEUE: 1495 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1496 break; 1497 case CTL_TAG_UNTAGGED: 1498 case CTL_TAG_SIMPLE: 1499 case CTL_TAG_ACA: 1500 default: 1501 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1502 break; 1503 } 1504 1505 if (lbalen->flags & CTL_LLF_WRITE) { 1506 beio->bio_cmd = BIO_WRITE; 1507 beio->ds_trans_type = DEVSTAT_WRITE; 1508 } else { 1509 beio->bio_cmd = BIO_READ; 1510 beio->ds_trans_type = DEVSTAT_READ; 1511 } 1512 1513 DPRINTF("%s at LBA %jx len %u @%ju\n", 1514 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1515 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1516 if (lbalen->flags & CTL_LLF_COMPARE) 1517 lbas = CTLBLK_HALF_IO_SIZE; 1518 else 1519 lbas = CTLBLK_MAX_IO_SIZE; 1520 lbas = MIN(lbalen->len - bptrlen->len, lbas / be_lun->blocksize); 1521 beio->io_offset = (lbalen->lba + bptrlen->len) * be_lun->blocksize; 1522 beio->io_len = lbas * be_lun->blocksize; 1523 bptrlen->len += lbas; 1524 1525 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1526 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1527 i, CTLBLK_MAX_SEGS)); 1528 1529 /* 1530 * Setup the S/G entry for this chunk. 1531 */ 1532 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left); 1533 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1534 1535 DPRINTF("segment %d addr %p len %zd\n", i, 1536 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1537 1538 /* Set up second segment for compare operation. */ 1539 if (lbalen->flags & CTL_LLF_COMPARE) { 1540 beio->sg_segs[i + CTLBLK_HALF_SEGS].len = 1541 beio->sg_segs[i].len; 1542 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = 1543 uma_zalloc(be_lun->lun_zone, M_WAITOK); 1544 } 1545 1546 beio->num_segs++; 1547 len_left -= beio->sg_segs[i].len; 1548 } 1549 if (bptrlen->len < lbalen->len) 1550 beio->beio_cont = ctl_be_block_next; 1551 io->scsiio.be_move_done = ctl_be_block_move_done; 1552 /* For compare we have separate S/G lists for read and datamove. */ 1553 if (lbalen->flags & CTL_LLF_COMPARE) 1554 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1555 else 1556 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1557 io->scsiio.kern_data_len = beio->io_len; 1558 io->scsiio.kern_data_resid = 0; 1559 io->scsiio.kern_sg_entries = beio->num_segs; 1560 io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST; 1561 1562 /* 1563 * For the read case, we need to read the data into our buffers and 1564 * then we can send it back to the user. For the write case, we 1565 * need to get the data from the user first. 1566 */ 1567 if (beio->bio_cmd == BIO_READ) { 1568 SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0); 1569 be_lun->dispatch(be_lun, beio); 1570 } else { 1571 SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0); 1572#ifdef CTL_TIME_IO 1573 getbintime(&io->io_hdr.dma_start_bt); 1574#endif 1575 ctl_datamove(io); 1576 } 1577} 1578 1579static void 1580ctl_be_block_worker(void *context, int pending) 1581{ 1582 struct ctl_be_block_lun *be_lun; 1583 struct ctl_be_block_softc *softc; 1584 union ctl_io *io; 1585 1586 be_lun = (struct ctl_be_block_lun *)context; 1587 softc = be_lun->softc; 1588 1589 DPRINTF("entered\n"); 1590 1591 mtx_lock(&be_lun->queue_lock); 1592 for (;;) { 1593 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1594 if (io != NULL) { 1595 struct ctl_be_block_io *beio; 1596 1597 DPRINTF("datamove queue\n"); 1598 1599 STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr, 1600 ctl_io_hdr, links); 1601 1602 mtx_unlock(&be_lun->queue_lock); 1603 1604 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1605 1606 be_lun->dispatch(be_lun, beio); 1607 1608 mtx_lock(&be_lun->queue_lock); 1609 continue; 1610 } 1611 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1612 if (io != NULL) { 1613 DPRINTF("config write queue\n"); 1614 STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr, 1615 ctl_io_hdr, links); 1616 mtx_unlock(&be_lun->queue_lock); 1617 ctl_be_block_cw_dispatch(be_lun, io); 1618 mtx_lock(&be_lun->queue_lock); 1619 continue; 1620 } 1621 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 1622 if (io != NULL) { 1623 DPRINTF("config read queue\n"); 1624 STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr, 1625 ctl_io_hdr, links); 1626 mtx_unlock(&be_lun->queue_lock); 1627 ctl_be_block_cr_dispatch(be_lun, io); 1628 mtx_lock(&be_lun->queue_lock); 1629 continue; 1630 } 1631 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1632 if (io != NULL) { 1633 DPRINTF("input queue\n"); 1634 1635 STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr, 1636 ctl_io_hdr, links); 1637 mtx_unlock(&be_lun->queue_lock); 1638 1639 /* 1640 * We must drop the lock, since this routine and 1641 * its children may sleep. 1642 */ 1643 ctl_be_block_dispatch(be_lun, io); 1644 1645 mtx_lock(&be_lun->queue_lock); 1646 continue; 1647 } 1648 1649 /* 1650 * If we get here, there is no work left in the queues, so 1651 * just break out and let the task queue go to sleep. 1652 */ 1653 break; 1654 } 1655 mtx_unlock(&be_lun->queue_lock); 1656} 1657 1658/* 1659 * Entry point from CTL to the backend for I/O. We queue everything to a 1660 * work thread, so this just puts the I/O on a queue and wakes up the 1661 * thread. 1662 */ 1663static int 1664ctl_be_block_submit(union ctl_io *io) 1665{ 1666 struct ctl_be_block_lun *be_lun; 1667 struct ctl_be_lun *ctl_be_lun; 1668 1669 DPRINTF("entered\n"); 1670 1671 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 1672 CTL_PRIV_BACKEND_LUN].ptr; 1673 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; 1674 1675 /* 1676 * Make sure we only get SCSI I/O. 1677 */ 1678 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type " 1679 "%#x) encountered", io->io_hdr.io_type)); 1680 1681 PRIV(io)->len = 0; 1682 1683 mtx_lock(&be_lun->queue_lock); 1684 /* 1685 * XXX KDM make sure that links is okay to use at this point. 1686 * Otherwise, we either need to add another field to ctl_io_hdr, 1687 * or deal with resource allocation here. 1688 */ 1689 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1690 mtx_unlock(&be_lun->queue_lock); 1691 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1692 1693 return (CTL_RETVAL_COMPLETE); 1694} 1695 1696static int 1697ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1698 int flag, struct thread *td) 1699{ 1700 struct ctl_be_block_softc *softc; 1701 int error; 1702 1703 softc = &backend_block_softc; 1704 1705 error = 0; 1706 1707 switch (cmd) { 1708 case CTL_LUN_REQ: { 1709 struct ctl_lun_req *lun_req; 1710 1711 lun_req = (struct ctl_lun_req *)addr; 1712 1713 switch (lun_req->reqtype) { 1714 case CTL_LUNREQ_CREATE: 1715 error = ctl_be_block_create(softc, lun_req); 1716 break; 1717 case CTL_LUNREQ_RM: 1718 error = ctl_be_block_rm(softc, lun_req); 1719 break; 1720 case CTL_LUNREQ_MODIFY: 1721 error = ctl_be_block_modify(softc, lun_req); 1722 break; 1723 default: 1724 lun_req->status = CTL_LUN_ERROR; 1725 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1726 "invalid LUN request type %d", 1727 lun_req->reqtype); 1728 break; 1729 } 1730 break; 1731 } 1732 default: 1733 error = ENOTTY; 1734 break; 1735 } 1736 1737 return (error); 1738} 1739 1740static int 1741ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1742{ 1743 struct ctl_be_block_filedata *file_data; 1744 struct ctl_lun_create_params *params; 1745 struct vattr vattr; 1746 off_t pss; 1747 int error; 1748 1749 error = 0; 1750 file_data = &be_lun->backend.file; 1751 params = &be_lun->params; 1752 1753 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1754 be_lun->dispatch = ctl_be_block_dispatch_file; 1755 be_lun->lun_flush = ctl_be_block_flush_file; 1756 be_lun->get_lba_status = ctl_be_block_gls_file; 1757 be_lun->getattr = ctl_be_block_getattr_file; 1758 1759 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1760 if (error != 0) { 1761 snprintf(req->error_str, sizeof(req->error_str), 1762 "error calling VOP_GETATTR() for file %s", 1763 be_lun->dev_path); 1764 return (error); 1765 } 1766 1767 /* 1768 * Verify that we have the ability to upgrade to exclusive 1769 * access on this file so we can trap errors at open instead 1770 * of reporting them during first access. 1771 */ 1772 if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) { 1773 vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY); 1774 if (be_lun->vn->v_iflag & VI_DOOMED) { 1775 error = EBADF; 1776 snprintf(req->error_str, sizeof(req->error_str), 1777 "error locking file %s", be_lun->dev_path); 1778 return (error); 1779 } 1780 } 1781 1782 1783 file_data->cred = crhold(curthread->td_ucred); 1784 if (params->lun_size_bytes != 0) 1785 be_lun->size_bytes = params->lun_size_bytes; 1786 else 1787 be_lun->size_bytes = vattr.va_size; 1788 /* 1789 * We set the multi thread flag for file operations because all 1790 * filesystems (in theory) are capable of allowing multiple readers 1791 * of a file at once. So we want to get the maximum possible 1792 * concurrency. 1793 */ 1794 be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD; 1795 1796 /* 1797 * For files we can use any logical block size. Prefer 512 bytes 1798 * for compatibility reasons. If file's vattr.va_blocksize 1799 * (preferred I/O block size) is bigger and multiple to chosen 1800 * logical block size -- report it as physical block size. 1801 */ 1802 if (params->blocksize_bytes != 0) 1803 be_lun->blocksize = params->blocksize_bytes; 1804 else 1805 be_lun->blocksize = 512; 1806 pss = vattr.va_blocksize / be_lun->blocksize; 1807 if ((pss > 0) && (pss * be_lun->blocksize == vattr.va_blocksize) && 1808 ((pss & (pss - 1)) == 0)) { 1809 be_lun->pblockexp = fls(pss) - 1; 1810 be_lun->pblockoff = 0; 1811 } 1812 1813 /* 1814 * Sanity check. The media size has to be at least one 1815 * sector long. 1816 */ 1817 if (be_lun->size_bytes < be_lun->blocksize) { 1818 error = EINVAL; 1819 snprintf(req->error_str, sizeof(req->error_str), 1820 "file %s size %ju < block size %u", be_lun->dev_path, 1821 (uintmax_t)be_lun->size_bytes, be_lun->blocksize); 1822 } 1823 return (error); 1824} 1825 1826static int 1827ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1828{ 1829 struct ctl_lun_create_params *params; 1830 struct vattr vattr; 1831 struct cdev *dev; 1832 struct cdevsw *devsw; 1833 int error; 1834 off_t ps, pss, po, pos; 1835 1836 params = &be_lun->params; 1837 1838 be_lun->dev_type = CTL_BE_BLOCK_DEV; 1839 be_lun->backend.dev.cdev = be_lun->vn->v_rdev; 1840 be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev, 1841 &be_lun->backend.dev.dev_ref); 1842 if (be_lun->backend.dev.csw == NULL) 1843 panic("Unable to retrieve device switch"); 1844 if (strcmp(be_lun->backend.dev.csw->d_name, "zvol") == 0) { 1845 be_lun->dispatch = ctl_be_block_dispatch_zvol; 1846 be_lun->get_lba_status = ctl_be_block_gls_zvol; 1847 } else 1848 be_lun->dispatch = ctl_be_block_dispatch_dev; 1849 be_lun->lun_flush = ctl_be_block_flush_dev; 1850 be_lun->unmap = ctl_be_block_unmap_dev; 1851 be_lun->getattr = ctl_be_block_getattr_dev; 1852 1853 error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED); 1854 if (error) { 1855 snprintf(req->error_str, sizeof(req->error_str), 1856 "error getting vnode attributes for device %s", 1857 be_lun->dev_path); 1858 return (error); 1859 } 1860 1861 dev = be_lun->vn->v_rdev; 1862 devsw = dev->si_devsw; 1863 if (!devsw->d_ioctl) { 1864 snprintf(req->error_str, sizeof(req->error_str), 1865 "no d_ioctl for device %s!", 1866 be_lun->dev_path); 1867 return (ENODEV); 1868 } 1869 1870 error = devsw->d_ioctl(dev, DIOCGSECTORSIZE, 1871 (caddr_t)&be_lun->blocksize, FREAD, 1872 curthread); 1873 if (error) { 1874 snprintf(req->error_str, sizeof(req->error_str), 1875 "error %d returned for DIOCGSECTORSIZE ioctl " 1876 "on %s!", error, be_lun->dev_path); 1877 return (error); 1878 } 1879 1880 /* 1881 * If the user has asked for a blocksize that is greater than the 1882 * backing device's blocksize, we can do it only if the blocksize 1883 * the user is asking for is an even multiple of the underlying 1884 * device's blocksize. 1885 */ 1886 if ((params->blocksize_bytes != 0) 1887 && (params->blocksize_bytes > be_lun->blocksize)) { 1888 uint32_t bs_multiple, tmp_blocksize; 1889 1890 bs_multiple = params->blocksize_bytes / be_lun->blocksize; 1891 1892 tmp_blocksize = bs_multiple * be_lun->blocksize; 1893 1894 if (tmp_blocksize == params->blocksize_bytes) { 1895 be_lun->blocksize = params->blocksize_bytes; 1896 } else { 1897 snprintf(req->error_str, sizeof(req->error_str), 1898 "requested blocksize %u is not an even " 1899 "multiple of backing device blocksize %u", 1900 params->blocksize_bytes, 1901 be_lun->blocksize); 1902 return (EINVAL); 1903 1904 } 1905 } else if ((params->blocksize_bytes != 0) 1906 && (params->blocksize_bytes != be_lun->blocksize)) { 1907 snprintf(req->error_str, sizeof(req->error_str), 1908 "requested blocksize %u < backing device " 1909 "blocksize %u", params->blocksize_bytes, 1910 be_lun->blocksize); 1911 return (EINVAL); 1912 } 1913 1914 error = devsw->d_ioctl(dev, DIOCGMEDIASIZE, 1915 (caddr_t)&be_lun->size_bytes, FREAD, 1916 curthread); 1917 if (error) { 1918 snprintf(req->error_str, sizeof(req->error_str), 1919 "error %d returned for DIOCGMEDIASIZE " 1920 " ioctl on %s!", error, 1921 be_lun->dev_path); 1922 return (error); 1923 } 1924 1925 if (params->lun_size_bytes != 0) { 1926 if (params->lun_size_bytes > be_lun->size_bytes) { 1927 snprintf(req->error_str, sizeof(req->error_str), 1928 "requested LUN size %ju > backing device " 1929 "size %ju", 1930 (uintmax_t)params->lun_size_bytes, 1931 (uintmax_t)be_lun->size_bytes); 1932 return (EINVAL); 1933 } 1934 1935 be_lun->size_bytes = params->lun_size_bytes; 1936 } 1937 1938 error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE, 1939 (caddr_t)&ps, FREAD, curthread); 1940 if (error) 1941 ps = po = 0; 1942 else { 1943 error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET, 1944 (caddr_t)&po, FREAD, curthread); 1945 if (error) 1946 po = 0; 1947 } 1948 pss = ps / be_lun->blocksize; 1949 pos = po / be_lun->blocksize; 1950 if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) && 1951 ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) { 1952 be_lun->pblockexp = fls(pss) - 1; 1953 be_lun->pblockoff = (pss - pos) % pss; 1954 } 1955 1956 return (0); 1957} 1958 1959static int 1960ctl_be_block_close(struct ctl_be_block_lun *be_lun) 1961{ 1962 DROP_GIANT(); 1963 if (be_lun->vn) { 1964 int flags = FREAD | FWRITE; 1965 1966 switch (be_lun->dev_type) { 1967 case CTL_BE_BLOCK_DEV: 1968 if (be_lun->backend.dev.csw) { 1969 dev_relthread(be_lun->backend.dev.cdev, 1970 be_lun->backend.dev.dev_ref); 1971 be_lun->backend.dev.csw = NULL; 1972 be_lun->backend.dev.cdev = NULL; 1973 } 1974 break; 1975 case CTL_BE_BLOCK_FILE: 1976 break; 1977 case CTL_BE_BLOCK_NONE: 1978 break; 1979 default: 1980 panic("Unexpected backend type."); 1981 break; 1982 } 1983 1984 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 1985 be_lun->vn = NULL; 1986 1987 switch (be_lun->dev_type) { 1988 case CTL_BE_BLOCK_DEV: 1989 break; 1990 case CTL_BE_BLOCK_FILE: 1991 if (be_lun->backend.file.cred != NULL) { 1992 crfree(be_lun->backend.file.cred); 1993 be_lun->backend.file.cred = NULL; 1994 } 1995 break; 1996 case CTL_BE_BLOCK_NONE: 1997 break; 1998 default: 1999 panic("Unexpected backend type."); 2000 break; 2001 } 2002 be_lun->dev_type = CTL_BE_BLOCK_NONE; 2003 } 2004 PICKUP_GIANT(); 2005 2006 return (0); 2007} 2008 2009static int 2010ctl_be_block_open(struct ctl_be_block_softc *softc, 2011 struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2012{ 2013 struct nameidata nd; 2014 int flags; 2015 int error; 2016 2017 /* 2018 * XXX KDM allow a read-only option? 2019 */ 2020 flags = FREAD | FWRITE; 2021 error = 0; 2022 2023 if (rootvnode == NULL) { 2024 snprintf(req->error_str, sizeof(req->error_str), 2025 "Root filesystem is not mounted"); 2026 return (1); 2027 } 2028 2029 if (!curthread->td_proc->p_fd->fd_cdir) { 2030 curthread->td_proc->p_fd->fd_cdir = rootvnode; 2031 VREF(rootvnode); 2032 } 2033 if (!curthread->td_proc->p_fd->fd_rdir) { 2034 curthread->td_proc->p_fd->fd_rdir = rootvnode; 2035 VREF(rootvnode); 2036 } 2037 if (!curthread->td_proc->p_fd->fd_jdir) { 2038 curthread->td_proc->p_fd->fd_jdir = rootvnode; 2039 VREF(rootvnode); 2040 } 2041 2042 again: 2043 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); 2044 error = vn_open(&nd, &flags, 0, NULL); 2045 if (error) { 2046 /* 2047 * This is the only reasonable guess we can make as far as 2048 * path if the user doesn't give us a fully qualified path. 2049 * If they want to specify a file, they need to specify the 2050 * full path. 2051 */ 2052 if (be_lun->dev_path[0] != '/') { 2053 char *dev_path = "/dev/"; 2054 char *dev_name; 2055 2056 /* Try adding device path at beginning of name */ 2057 dev_name = malloc(strlen(be_lun->dev_path) 2058 + strlen(dev_path) + 1, 2059 M_CTLBLK, M_WAITOK); 2060 if (dev_name) { 2061 sprintf(dev_name, "%s%s", dev_path, 2062 be_lun->dev_path); 2063 free(be_lun->dev_path, M_CTLBLK); 2064 be_lun->dev_path = dev_name; 2065 goto again; 2066 } 2067 } 2068 snprintf(req->error_str, sizeof(req->error_str), 2069 "error opening %s: %d", be_lun->dev_path, error); 2070 return (error); 2071 } 2072 2073 NDFREE(&nd, NDF_ONLY_PNBUF); 2074 2075 be_lun->vn = nd.ni_vp; 2076 2077 /* We only support disks and files. */ 2078 if (vn_isdisk(be_lun->vn, &error)) { 2079 error = ctl_be_block_open_dev(be_lun, req); 2080 } else if (be_lun->vn->v_type == VREG) { 2081 error = ctl_be_block_open_file(be_lun, req); 2082 } else { 2083 error = EINVAL; 2084 snprintf(req->error_str, sizeof(req->error_str), 2085 "%s is not a disk or plain file", be_lun->dev_path); 2086 } 2087 VOP_UNLOCK(be_lun->vn, 0); 2088 2089 if (error != 0) { 2090 ctl_be_block_close(be_lun); 2091 return (error); 2092 } 2093 2094 be_lun->blocksize_shift = fls(be_lun->blocksize) - 1; 2095 be_lun->size_blocks = be_lun->size_bytes >> be_lun->blocksize_shift; 2096 2097 return (0); 2098} 2099 2100static int 2101ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2102{ 2103 struct ctl_be_block_lun *be_lun; 2104 struct ctl_lun_create_params *params; 2105 char num_thread_str[16]; 2106 char tmpstr[32]; 2107 char *value; 2108 int retval, num_threads, unmap; 2109 int tmp_num_threads; 2110 2111 params = &req->reqdata.create; 2112 retval = 0; 2113 req->status = CTL_LUN_OK; 2114 2115 num_threads = cbb_num_threads; 2116 2117 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2118 2119 be_lun->params = req->reqdata.create; 2120 be_lun->softc = softc; 2121 STAILQ_INIT(&be_lun->input_queue); 2122 STAILQ_INIT(&be_lun->config_read_queue); 2123 STAILQ_INIT(&be_lun->config_write_queue); 2124 STAILQ_INIT(&be_lun->datamove_queue); 2125 sprintf(be_lun->lunname, "cblk%d", softc->num_luns); 2126 mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF); 2127 mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF); 2128 ctl_init_opts(&be_lun->ctl_be_lun.options, 2129 req->num_be_args, req->kern_be_args); 2130 2131 be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG, 2132 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2133 2134 if (be_lun->lun_zone == NULL) { 2135 snprintf(req->error_str, sizeof(req->error_str), 2136 "error allocating UMA zone"); 2137 goto bailout_error; 2138 } 2139 2140 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2141 be_lun->ctl_be_lun.lun_type = params->device_type; 2142 else 2143 be_lun->ctl_be_lun.lun_type = T_DIRECT; 2144 2145 if (be_lun->ctl_be_lun.lun_type == T_DIRECT) { 2146 value = ctl_get_opt(&be_lun->ctl_be_lun.options, "file"); 2147 if (value == NULL) { 2148 snprintf(req->error_str, sizeof(req->error_str), 2149 "no file argument specified"); 2150 goto bailout_error; 2151 } 2152 be_lun->dev_path = strdup(value, M_CTLBLK); 2153 be_lun->blocksize = 512; 2154 be_lun->blocksize_shift = fls(be_lun->blocksize) - 1; 2155 2156 retval = ctl_be_block_open(softc, be_lun, req); 2157 if (retval != 0) { 2158 retval = 0; 2159 req->status = CTL_LUN_WARNING; 2160 } 2161 } else { 2162 /* 2163 * For processor devices, we don't have any size. 2164 */ 2165 be_lun->blocksize = 0; 2166 be_lun->pblockexp = 0; 2167 be_lun->pblockoff = 0; 2168 be_lun->size_blocks = 0; 2169 be_lun->size_bytes = 0; 2170 be_lun->ctl_be_lun.maxlba = 0; 2171 2172 /* 2173 * Default to just 1 thread for processor devices. 2174 */ 2175 num_threads = 1; 2176 } 2177 2178 /* 2179 * XXX This searching loop might be refactored to be combined with 2180 * the loop above, 2181 */ 2182 value = ctl_get_opt(&be_lun->ctl_be_lun.options, "num_threads"); 2183 if (value != NULL) { 2184 tmp_num_threads = strtol(value, NULL, 0); 2185 2186 /* 2187 * We don't let the user specify less than one 2188 * thread, but hope he's clueful enough not to 2189 * specify 1000 threads. 2190 */ 2191 if (tmp_num_threads < 1) { 2192 snprintf(req->error_str, sizeof(req->error_str), 2193 "invalid number of threads %s", 2194 num_thread_str); 2195 goto bailout_error; 2196 } 2197 num_threads = tmp_num_threads; 2198 } 2199 unmap = (be_lun->dispatch == ctl_be_block_dispatch_zvol); 2200 value = ctl_get_opt(&be_lun->ctl_be_lun.options, "unmap"); 2201 if (value != NULL) 2202 unmap = (strcmp(value, "on") == 0); 2203 2204 be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED; 2205 be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY; 2206 if (be_lun->vn == NULL) 2207 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_OFFLINE; 2208 if (unmap) 2209 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP; 2210 if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2211 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_SERSEQ_READ; 2212 be_lun->ctl_be_lun.be_lun = be_lun; 2213 be_lun->ctl_be_lun.maxlba = (be_lun->size_blocks == 0) ? 2214 0 : (be_lun->size_blocks - 1); 2215 be_lun->ctl_be_lun.blocksize = be_lun->blocksize; 2216 be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp; 2217 be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff; 2218 if (be_lun->dispatch == ctl_be_block_dispatch_zvol && 2219 be_lun->blocksize != 0) 2220 be_lun->ctl_be_lun.atomicblock = CTLBLK_MAX_IO_SIZE / 2221 be_lun->blocksize; 2222 /* Tell the user the blocksize we ended up using */ 2223 params->lun_size_bytes = be_lun->size_bytes; 2224 params->blocksize_bytes = be_lun->blocksize; 2225 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2226 be_lun->ctl_be_lun.req_lun_id = params->req_lun_id; 2227 be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ; 2228 } else 2229 be_lun->ctl_be_lun.req_lun_id = 0; 2230 2231 be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown; 2232 be_lun->ctl_be_lun.lun_config_status = 2233 ctl_be_block_lun_config_status; 2234 be_lun->ctl_be_lun.be = &ctl_be_block_driver; 2235 2236 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2237 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d", 2238 softc->num_luns); 2239 strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr, 2240 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num), 2241 sizeof(tmpstr))); 2242 2243 /* Tell the user what we used for a serial number */ 2244 strncpy((char *)params->serial_num, tmpstr, 2245 ctl_min(sizeof(params->serial_num), sizeof(tmpstr))); 2246 } else { 2247 strncpy((char *)be_lun->ctl_be_lun.serial_num, 2248 params->serial_num, 2249 ctl_min(sizeof(be_lun->ctl_be_lun.serial_num), 2250 sizeof(params->serial_num))); 2251 } 2252 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2253 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns); 2254 strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr, 2255 ctl_min(sizeof(be_lun->ctl_be_lun.device_id), 2256 sizeof(tmpstr))); 2257 2258 /* Tell the user what we used for a device ID */ 2259 strncpy((char *)params->device_id, tmpstr, 2260 ctl_min(sizeof(params->device_id), sizeof(tmpstr))); 2261 } else { 2262 strncpy((char *)be_lun->ctl_be_lun.device_id, 2263 params->device_id, 2264 ctl_min(sizeof(be_lun->ctl_be_lun.device_id), 2265 sizeof(params->device_id))); 2266 } 2267 2268 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2269 2270 be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK, 2271 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2272 2273 if (be_lun->io_taskqueue == NULL) { 2274 snprintf(req->error_str, sizeof(req->error_str), 2275 "unable to create taskqueue"); 2276 goto bailout_error; 2277 } 2278 2279 /* 2280 * Note that we start the same number of threads by default for 2281 * both the file case and the block device case. For the file 2282 * case, we need multiple threads to allow concurrency, because the 2283 * vnode interface is designed to be a blocking interface. For the 2284 * block device case, ZFS zvols at least will block the caller's 2285 * context in many instances, and so we need multiple threads to 2286 * overcome that problem. Other block devices don't need as many 2287 * threads, but they shouldn't cause too many problems. 2288 * 2289 * If the user wants to just have a single thread for a block 2290 * device, he can specify that when the LUN is created, or change 2291 * the tunable/sysctl to alter the default number of threads. 2292 */ 2293 retval = taskqueue_start_threads(&be_lun->io_taskqueue, 2294 /*num threads*/num_threads, 2295 /*priority*/PWAIT, 2296 /*thread name*/ 2297 "%s taskq", be_lun->lunname); 2298 2299 if (retval != 0) 2300 goto bailout_error; 2301 2302 be_lun->num_threads = num_threads; 2303 2304 mtx_lock(&softc->lock); 2305 softc->num_luns++; 2306 STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links); 2307 2308 mtx_unlock(&softc->lock); 2309 2310 retval = ctl_add_lun(&be_lun->ctl_be_lun); 2311 if (retval != 0) { 2312 mtx_lock(&softc->lock); 2313 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, 2314 links); 2315 softc->num_luns--; 2316 mtx_unlock(&softc->lock); 2317 snprintf(req->error_str, sizeof(req->error_str), 2318 "ctl_add_lun() returned error %d, see dmesg for " 2319 "details", retval); 2320 retval = 0; 2321 goto bailout_error; 2322 } 2323 2324 mtx_lock(&softc->lock); 2325 2326 /* 2327 * Tell the config_status routine that we're waiting so it won't 2328 * clean up the LUN in the event of an error. 2329 */ 2330 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2331 2332 while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2333 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); 2334 if (retval == EINTR) 2335 break; 2336 } 2337 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2338 2339 if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) { 2340 snprintf(req->error_str, sizeof(req->error_str), 2341 "LUN configuration error, see dmesg for details"); 2342 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, 2343 links); 2344 softc->num_luns--; 2345 mtx_unlock(&softc->lock); 2346 goto bailout_error; 2347 } else { 2348 params->req_lun_id = be_lun->ctl_be_lun.lun_id; 2349 } 2350 2351 mtx_unlock(&softc->lock); 2352 2353 be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id, 2354 be_lun->blocksize, 2355 DEVSTAT_ALL_SUPPORTED, 2356 be_lun->ctl_be_lun.lun_type 2357 | DEVSTAT_TYPE_IF_OTHER, 2358 DEVSTAT_PRIORITY_OTHER); 2359 2360 return (retval); 2361 2362bailout_error: 2363 req->status = CTL_LUN_ERROR; 2364 2365 if (be_lun->io_taskqueue != NULL) 2366 taskqueue_free(be_lun->io_taskqueue); 2367 ctl_be_block_close(be_lun); 2368 if (be_lun->dev_path != NULL) 2369 free(be_lun->dev_path, M_CTLBLK); 2370 if (be_lun->lun_zone != NULL) 2371 uma_zdestroy(be_lun->lun_zone); 2372 ctl_free_opts(&be_lun->ctl_be_lun.options); 2373 mtx_destroy(&be_lun->queue_lock); 2374 mtx_destroy(&be_lun->io_lock); 2375 free(be_lun, M_CTLBLK); 2376 2377 return (retval); 2378} 2379 2380static int 2381ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2382{ 2383 struct ctl_lun_rm_params *params; 2384 struct ctl_be_block_lun *be_lun; 2385 int retval; 2386 2387 params = &req->reqdata.rm; 2388 2389 mtx_lock(&softc->lock); 2390 2391 be_lun = NULL; 2392 2393 STAILQ_FOREACH(be_lun, &softc->lun_list, links) { 2394 if (be_lun->ctl_be_lun.lun_id == params->lun_id) 2395 break; 2396 } 2397 mtx_unlock(&softc->lock); 2398 2399 if (be_lun == NULL) { 2400 snprintf(req->error_str, sizeof(req->error_str), 2401 "LUN %u is not managed by the block backend", 2402 params->lun_id); 2403 goto bailout_error; 2404 } 2405 2406 retval = ctl_disable_lun(&be_lun->ctl_be_lun); 2407 2408 if (retval != 0) { 2409 snprintf(req->error_str, sizeof(req->error_str), 2410 "error %d returned from ctl_disable_lun() for " 2411 "LUN %d", retval, params->lun_id); 2412 goto bailout_error; 2413 2414 } 2415 2416 retval = ctl_invalidate_lun(&be_lun->ctl_be_lun); 2417 if (retval != 0) { 2418 snprintf(req->error_str, sizeof(req->error_str), 2419 "error %d returned from ctl_invalidate_lun() for " 2420 "LUN %d", retval, params->lun_id); 2421 goto bailout_error; 2422 } 2423 2424 mtx_lock(&softc->lock); 2425 2426 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2427 2428 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2429 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); 2430 if (retval == EINTR) 2431 break; 2432 } 2433 2434 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2435 2436 if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2437 snprintf(req->error_str, sizeof(req->error_str), 2438 "interrupted waiting for LUN to be freed"); 2439 mtx_unlock(&softc->lock); 2440 goto bailout_error; 2441 } 2442 2443 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); 2444 2445 softc->num_luns--; 2446 mtx_unlock(&softc->lock); 2447 2448 taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task); 2449 2450 taskqueue_free(be_lun->io_taskqueue); 2451 2452 ctl_be_block_close(be_lun); 2453 2454 if (be_lun->disk_stats != NULL) 2455 devstat_remove_entry(be_lun->disk_stats); 2456 2457 uma_zdestroy(be_lun->lun_zone); 2458 2459 ctl_free_opts(&be_lun->ctl_be_lun.options); 2460 free(be_lun->dev_path, M_CTLBLK); 2461 mtx_destroy(&be_lun->queue_lock); 2462 mtx_destroy(&be_lun->io_lock); 2463 free(be_lun, M_CTLBLK); 2464 2465 req->status = CTL_LUN_OK; 2466 2467 return (0); 2468 2469bailout_error: 2470 2471 req->status = CTL_LUN_ERROR; 2472 2473 return (0); 2474} 2475 2476static int 2477ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, 2478 struct ctl_lun_req *req) 2479{ 2480 struct vattr vattr; 2481 int error; 2482 struct ctl_lun_create_params *params = &be_lun->params; 2483 2484 if (params->lun_size_bytes != 0) { 2485 be_lun->size_bytes = params->lun_size_bytes; 2486 } else { 2487 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2488 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 2489 VOP_UNLOCK(be_lun->vn, 0); 2490 if (error != 0) { 2491 snprintf(req->error_str, sizeof(req->error_str), 2492 "error calling VOP_GETATTR() for file %s", 2493 be_lun->dev_path); 2494 return (error); 2495 } 2496 2497 be_lun->size_bytes = vattr.va_size; 2498 } 2499 2500 return (0); 2501} 2502 2503static int 2504ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, 2505 struct ctl_lun_req *req) 2506{ 2507 struct ctl_be_block_devdata *dev_data; 2508 int error; 2509 struct ctl_lun_create_params *params = &be_lun->params; 2510 uint64_t size_bytes; 2511 2512 dev_data = &be_lun->backend.dev; 2513 if (!dev_data->csw->d_ioctl) { 2514 snprintf(req->error_str, sizeof(req->error_str), 2515 "no d_ioctl for device %s!", be_lun->dev_path); 2516 return (ENODEV); 2517 } 2518 2519 error = dev_data->csw->d_ioctl(dev_data->cdev, DIOCGMEDIASIZE, 2520 (caddr_t)&size_bytes, FREAD, 2521 curthread); 2522 if (error) { 2523 snprintf(req->error_str, sizeof(req->error_str), 2524 "error %d returned for DIOCGMEDIASIZE ioctl " 2525 "on %s!", error, be_lun->dev_path); 2526 return (error); 2527 } 2528 2529 if (params->lun_size_bytes != 0) { 2530 if (params->lun_size_bytes > size_bytes) { 2531 snprintf(req->error_str, sizeof(req->error_str), 2532 "requested LUN size %ju > backing device " 2533 "size %ju", 2534 (uintmax_t)params->lun_size_bytes, 2535 (uintmax_t)size_bytes); 2536 return (EINVAL); 2537 } 2538 2539 be_lun->size_bytes = params->lun_size_bytes; 2540 } else { 2541 be_lun->size_bytes = size_bytes; 2542 } 2543 2544 return (0); 2545} 2546 2547static int 2548ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2549{ 2550 struct ctl_lun_modify_params *params; 2551 struct ctl_be_block_lun *be_lun; 2552 uint64_t oldsize; 2553 int error; 2554 2555 params = &req->reqdata.modify; 2556 2557 mtx_lock(&softc->lock); 2558 be_lun = NULL; 2559 STAILQ_FOREACH(be_lun, &softc->lun_list, links) { 2560 if (be_lun->ctl_be_lun.lun_id == params->lun_id) 2561 break; 2562 } 2563 mtx_unlock(&softc->lock); 2564 2565 if (be_lun == NULL) { 2566 snprintf(req->error_str, sizeof(req->error_str), 2567 "LUN %u is not managed by the block backend", 2568 params->lun_id); 2569 goto bailout_error; 2570 } 2571 2572 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2573 2574 oldsize = be_lun->size_bytes; 2575 if (be_lun->vn == NULL) 2576 error = ctl_be_block_open(softc, be_lun, req); 2577 else if (be_lun->vn->v_type == VREG) 2578 error = ctl_be_block_modify_file(be_lun, req); 2579 else 2580 error = ctl_be_block_modify_dev(be_lun, req); 2581 2582 if (error == 0 && be_lun->size_bytes != oldsize) { 2583 be_lun->size_blocks = be_lun->size_bytes >> 2584 be_lun->blocksize_shift; 2585 2586 /* 2587 * The maximum LBA is the size - 1. 2588 * 2589 * XXX: Note that this field is being updated without locking, 2590 * which might cause problems on 32-bit architectures. 2591 */ 2592 be_lun->ctl_be_lun.maxlba = (be_lun->size_blocks == 0) ? 2593 0 : (be_lun->size_blocks - 1); 2594 be_lun->ctl_be_lun.blocksize = be_lun->blocksize; 2595 be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp; 2596 be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff; 2597 if (be_lun->dispatch == ctl_be_block_dispatch_zvol && 2598 be_lun->blocksize != 0) 2599 be_lun->ctl_be_lun.atomicblock = CTLBLK_MAX_IO_SIZE / 2600 be_lun->blocksize; 2601 ctl_lun_capacity_changed(&be_lun->ctl_be_lun); 2602 if (oldsize == 0 && be_lun->size_blocks != 0) 2603 ctl_lun_online(&be_lun->ctl_be_lun); 2604 } 2605 2606 /* Tell the user the exact size we ended up using */ 2607 params->lun_size_bytes = be_lun->size_bytes; 2608 2609 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2610 2611 return (0); 2612 2613bailout_error: 2614 req->status = CTL_LUN_ERROR; 2615 2616 return (0); 2617} 2618 2619static void 2620ctl_be_block_lun_shutdown(void *be_lun) 2621{ 2622 struct ctl_be_block_lun *lun; 2623 struct ctl_be_block_softc *softc; 2624 2625 lun = (struct ctl_be_block_lun *)be_lun; 2626 2627 softc = lun->softc; 2628 2629 mtx_lock(&softc->lock); 2630 lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2631 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2632 wakeup(lun); 2633 mtx_unlock(&softc->lock); 2634 2635} 2636 2637static void 2638ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status) 2639{ 2640 struct ctl_be_block_lun *lun; 2641 struct ctl_be_block_softc *softc; 2642 2643 lun = (struct ctl_be_block_lun *)be_lun; 2644 softc = lun->softc; 2645 2646 if (status == CTL_LUN_CONFIG_OK) { 2647 mtx_lock(&softc->lock); 2648 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; 2649 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2650 wakeup(lun); 2651 mtx_unlock(&softc->lock); 2652 2653 /* 2654 * We successfully added the LUN, attempt to enable it. 2655 */ 2656 if (ctl_enable_lun(&lun->ctl_be_lun) != 0) { 2657 printf("%s: ctl_enable_lun() failed!\n", __func__); 2658 if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) { 2659 printf("%s: ctl_invalidate_lun() failed!\n", 2660 __func__); 2661 } 2662 } 2663 2664 return; 2665 } 2666 2667 2668 mtx_lock(&softc->lock); 2669 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; 2670 lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR; 2671 wakeup(lun); 2672 mtx_unlock(&softc->lock); 2673} 2674 2675 2676static int 2677ctl_be_block_config_write(union ctl_io *io) 2678{ 2679 struct ctl_be_block_lun *be_lun; 2680 struct ctl_be_lun *ctl_be_lun; 2681 int retval; 2682 2683 retval = 0; 2684 2685 DPRINTF("entered\n"); 2686 2687 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 2688 CTL_PRIV_BACKEND_LUN].ptr; 2689 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; 2690 2691 switch (io->scsiio.cdb[0]) { 2692 case SYNCHRONIZE_CACHE: 2693 case SYNCHRONIZE_CACHE_16: 2694 case WRITE_SAME_10: 2695 case WRITE_SAME_16: 2696 case UNMAP: 2697 /* 2698 * The upper level CTL code will filter out any CDBs with 2699 * the immediate bit set and return the proper error. 2700 * 2701 * We don't really need to worry about what LBA range the 2702 * user asked to be synced out. When they issue a sync 2703 * cache command, we'll sync out the whole thing. 2704 */ 2705 mtx_lock(&be_lun->queue_lock); 2706 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2707 links); 2708 mtx_unlock(&be_lun->queue_lock); 2709 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2710 break; 2711 case START_STOP_UNIT: { 2712 struct scsi_start_stop_unit *cdb; 2713 2714 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2715 2716 if (cdb->how & SSS_START) 2717 retval = ctl_start_lun(ctl_be_lun); 2718 else { 2719 retval = ctl_stop_lun(ctl_be_lun); 2720 /* 2721 * XXX KDM Copan-specific offline behavior. 2722 * Figure out a reasonable way to port this? 2723 */ 2724#ifdef NEEDTOPORT 2725 if ((retval == 0) 2726 && (cdb->byte2 & SSS_ONOFFLINE)) 2727 retval = ctl_lun_offline(ctl_be_lun); 2728#endif 2729 } 2730 2731 /* 2732 * In general, the above routines should not fail. They 2733 * just set state for the LUN. So we've got something 2734 * pretty wrong here if we can't start or stop the LUN. 2735 */ 2736 if (retval != 0) { 2737 ctl_set_internal_failure(&io->scsiio, 2738 /*sks_valid*/ 1, 2739 /*retry_count*/ 0xf051); 2740 retval = CTL_RETVAL_COMPLETE; 2741 } else { 2742 ctl_set_success(&io->scsiio); 2743 } 2744 ctl_config_write_done(io); 2745 break; 2746 } 2747 default: 2748 ctl_set_invalid_opcode(&io->scsiio); 2749 ctl_config_write_done(io); 2750 retval = CTL_RETVAL_COMPLETE; 2751 break; 2752 } 2753 2754 return (retval); 2755} 2756 2757static int 2758ctl_be_block_config_read(union ctl_io *io) 2759{ 2760 struct ctl_be_block_lun *be_lun; 2761 struct ctl_be_lun *ctl_be_lun; 2762 int retval = 0; 2763 2764 DPRINTF("entered\n"); 2765 2766 ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 2767 CTL_PRIV_BACKEND_LUN].ptr; 2768 be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; 2769 2770 switch (io->scsiio.cdb[0]) { 2771 case SERVICE_ACTION_IN: 2772 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 2773 mtx_lock(&be_lun->queue_lock); 2774 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 2775 &io->io_hdr, links); 2776 mtx_unlock(&be_lun->queue_lock); 2777 taskqueue_enqueue(be_lun->io_taskqueue, 2778 &be_lun->io_task); 2779 retval = CTL_RETVAL_QUEUED; 2780 break; 2781 } 2782 ctl_set_invalid_field(&io->scsiio, 2783 /*sks_valid*/ 1, 2784 /*command*/ 1, 2785 /*field*/ 1, 2786 /*bit_valid*/ 1, 2787 /*bit*/ 4); 2788 ctl_config_read_done(io); 2789 retval = CTL_RETVAL_COMPLETE; 2790 break; 2791 default: 2792 ctl_set_invalid_opcode(&io->scsiio); 2793 ctl_config_read_done(io); 2794 retval = CTL_RETVAL_COMPLETE; 2795 break; 2796 } 2797 2798 return (retval); 2799} 2800 2801static int 2802ctl_be_block_lun_info(void *be_lun, struct sbuf *sb) 2803{ 2804 struct ctl_be_block_lun *lun; 2805 int retval; 2806 2807 lun = (struct ctl_be_block_lun *)be_lun; 2808 retval = 0; 2809 2810 retval = sbuf_printf(sb, "\t<num_threads>"); 2811 2812 if (retval != 0) 2813 goto bailout; 2814 2815 retval = sbuf_printf(sb, "%d", lun->num_threads); 2816 2817 if (retval != 0) 2818 goto bailout; 2819 2820 retval = sbuf_printf(sb, "</num_threads>\n"); 2821 2822bailout: 2823 2824 return (retval); 2825} 2826 2827static uint64_t 2828ctl_be_block_lun_attr(void *be_lun, const char *attrname) 2829{ 2830 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun; 2831 2832 if (lun->getattr == NULL) 2833 return (UINT64_MAX); 2834 return (lun->getattr(lun, attrname)); 2835} 2836 2837int 2838ctl_be_block_init(void) 2839{ 2840 struct ctl_be_block_softc *softc; 2841 int retval; 2842 2843 softc = &backend_block_softc; 2844 retval = 0; 2845 2846 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2847 beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2848 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2849 STAILQ_INIT(&softc->disk_list); 2850 STAILQ_INIT(&softc->lun_list); 2851 2852 return (retval); 2853} 2854