ctl_backend_block.c revision 287868
1/*- 2 * Copyright (c) 2003 Silicon Graphics International Corp. 3 * Copyright (c) 2009-2011 Spectra Logic Corporation 4 * Copyright (c) 2012 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Edward Tomasz Napierala 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions, and the following disclaimer, 15 * without modification. 16 * 2. Redistributions in binary form must reproduce at minimum a disclaimer 17 * substantially similar to the "NO WARRANTY" disclaimer below 18 * ("Disclaimer") and any redistribution must be conditioned upon 19 * including a substantially similar Disclaimer requirement for further 20 * binary redistribution. 21 * 22 * NO WARRANTY 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGES. 34 * 35 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ 36 */ 37/* 38 * CAM Target Layer driver backend for block devices. 39 * 40 * Author: Ken Merry <ken@FreeBSD.org> 41 */ 42#include <sys/cdefs.h> 43__FBSDID("$FreeBSD: head/sys/cam/ctl/ctl_backend_block.c 287868 2015-09-16 18:33:04Z mav $"); 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/kernel.h> 48#include <sys/types.h> 49#include <sys/kthread.h> 50#include <sys/bio.h> 51#include <sys/fcntl.h> 52#include <sys/limits.h> 53#include <sys/lock.h> 54#include <sys/mutex.h> 55#include <sys/condvar.h> 56#include <sys/malloc.h> 57#include <sys/conf.h> 58#include <sys/ioccom.h> 59#include <sys/queue.h> 60#include <sys/sbuf.h> 61#include <sys/endian.h> 62#include <sys/uio.h> 63#include <sys/buf.h> 64#include <sys/taskqueue.h> 65#include <sys/vnode.h> 66#include <sys/namei.h> 67#include <sys/mount.h> 68#include <sys/disk.h> 69#include <sys/fcntl.h> 70#include <sys/filedesc.h> 71#include <sys/filio.h> 72#include <sys/proc.h> 73#include <sys/pcpu.h> 74#include <sys/module.h> 75#include <sys/sdt.h> 76#include <sys/devicestat.h> 77#include <sys/sysctl.h> 78 79#include <geom/geom.h> 80 81#include <cam/cam.h> 82#include <cam/scsi/scsi_all.h> 83#include <cam/scsi/scsi_da.h> 84#include <cam/ctl/ctl_io.h> 85#include <cam/ctl/ctl.h> 86#include <cam/ctl/ctl_backend.h> 87#include <cam/ctl/ctl_ioctl.h> 88#include <cam/ctl/ctl_ha.h> 89#include <cam/ctl/ctl_scsi_all.h> 90#include <cam/ctl/ctl_private.h> 91#include <cam/ctl/ctl_error.h> 92 93/* 94 * The idea here is that we'll allocate enough S/G space to hold a 1MB 95 * I/O. If we get an I/O larger than that, we'll split it. 96 */ 97#define CTLBLK_HALF_IO_SIZE (512 * 1024) 98#define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) 99#define CTLBLK_MAX_SEG MAXPHYS 100#define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) 101#define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) 102 103#ifdef CTLBLK_DEBUG 104#define DPRINTF(fmt, args...) \ 105 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) 106#else 107#define DPRINTF(fmt, args...) do {} while(0) 108#endif 109 110#define PRIV(io) \ 111 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) 112#define ARGS(io) \ 113 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) 114 115SDT_PROVIDER_DEFINE(cbb); 116 117typedef enum { 118 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, 119 CTL_BE_BLOCK_LUN_CONFIG_ERR = 0x02, 120 CTL_BE_BLOCK_LUN_WAITING = 0x04, 121} ctl_be_block_lun_flags; 122 123typedef enum { 124 CTL_BE_BLOCK_NONE, 125 CTL_BE_BLOCK_DEV, 126 CTL_BE_BLOCK_FILE 127} ctl_be_block_type; 128 129struct ctl_be_block_filedata { 130 struct ucred *cred; 131}; 132 133union ctl_be_block_bedata { 134 struct ctl_be_block_filedata file; 135}; 136 137struct ctl_be_block_io; 138struct ctl_be_block_lun; 139 140typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, 141 struct ctl_be_block_io *beio); 142typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, 143 const char *attrname); 144 145/* 146 * Backend LUN structure. There is a 1:1 mapping between a block device 147 * and a backend block LUN, and between a backend block LUN and a CTL LUN. 148 */ 149struct ctl_be_block_lun { 150 struct ctl_lun_create_params params; 151 char lunname[32]; 152 char *dev_path; 153 ctl_be_block_type dev_type; 154 struct vnode *vn; 155 union ctl_be_block_bedata backend; 156 cbb_dispatch_t dispatch; 157 cbb_dispatch_t lun_flush; 158 cbb_dispatch_t unmap; 159 cbb_dispatch_t get_lba_status; 160 cbb_getattr_t getattr; 161 uma_zone_t lun_zone; 162 uint64_t size_blocks; 163 uint64_t size_bytes; 164 struct ctl_be_block_softc *softc; 165 struct devstat *disk_stats; 166 ctl_be_block_lun_flags flags; 167 STAILQ_ENTRY(ctl_be_block_lun) links; 168 struct ctl_be_lun cbe_lun; 169 struct taskqueue *io_taskqueue; 170 struct task io_task; 171 int num_threads; 172 STAILQ_HEAD(, ctl_io_hdr) input_queue; 173 STAILQ_HEAD(, ctl_io_hdr) config_read_queue; 174 STAILQ_HEAD(, ctl_io_hdr) config_write_queue; 175 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 176 struct mtx_padalign io_lock; 177 struct mtx_padalign queue_lock; 178}; 179 180/* 181 * Overall softc structure for the block backend module. 182 */ 183struct ctl_be_block_softc { 184 struct mtx lock; 185 int num_luns; 186 STAILQ_HEAD(, ctl_be_block_lun) lun_list; 187}; 188 189static struct ctl_be_block_softc backend_block_softc; 190 191/* 192 * Per-I/O information. 193 */ 194struct ctl_be_block_io { 195 union ctl_io *io; 196 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; 197 struct iovec xiovecs[CTLBLK_MAX_SEGS]; 198 int bio_cmd; 199 int num_segs; 200 int num_bios_sent; 201 int num_bios_done; 202 int send_complete; 203 int num_errors; 204 struct bintime ds_t0; 205 devstat_tag_type ds_tag_type; 206 devstat_trans_flags ds_trans_type; 207 uint64_t io_len; 208 uint64_t io_offset; 209 int io_arg; 210 struct ctl_be_block_softc *softc; 211 struct ctl_be_block_lun *lun; 212 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ 213}; 214 215extern struct ctl_softc *control_softc; 216 217static int cbb_num_threads = 14; 218SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0, 219 "CAM Target Layer Block Backend"); 220SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, 221 &cbb_num_threads, 0, "Number of threads per backing file"); 222 223static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); 224static void ctl_free_beio(struct ctl_be_block_io *beio); 225static void ctl_complete_beio(struct ctl_be_block_io *beio); 226static int ctl_be_block_move_done(union ctl_io *io); 227static void ctl_be_block_biodone(struct bio *bio); 228static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 229 struct ctl_be_block_io *beio); 230static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 231 struct ctl_be_block_io *beio); 232static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 233 struct ctl_be_block_io *beio); 234static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, 235 const char *attrname); 236static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 237 struct ctl_be_block_io *beio); 238static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 239 struct ctl_be_block_io *beio); 240static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 241 struct ctl_be_block_io *beio); 242static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, 243 const char *attrname); 244static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 245 union ctl_io *io); 246static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 247 union ctl_io *io); 248static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 249 union ctl_io *io); 250static void ctl_be_block_worker(void *context, int pending); 251static int ctl_be_block_submit(union ctl_io *io); 252static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 253 int flag, struct thread *td); 254static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, 255 struct ctl_lun_req *req); 256static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, 257 struct ctl_lun_req *req); 258static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); 259static int ctl_be_block_open(struct ctl_be_block_softc *softc, 260 struct ctl_be_block_lun *be_lun, 261 struct ctl_lun_req *req); 262static int ctl_be_block_create(struct ctl_be_block_softc *softc, 263 struct ctl_lun_req *req); 264static int ctl_be_block_rm(struct ctl_be_block_softc *softc, 265 struct ctl_lun_req *req); 266static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, 267 struct ctl_lun_req *req); 268static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, 269 struct ctl_lun_req *req); 270static int ctl_be_block_modify(struct ctl_be_block_softc *softc, 271 struct ctl_lun_req *req); 272static void ctl_be_block_lun_shutdown(void *be_lun); 273static void ctl_be_block_lun_config_status(void *be_lun, 274 ctl_lun_config_status status); 275static int ctl_be_block_config_write(union ctl_io *io); 276static int ctl_be_block_config_read(union ctl_io *io); 277static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb); 278static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname); 279int ctl_be_block_init(void); 280 281static struct ctl_backend_driver ctl_be_block_driver = 282{ 283 .name = "block", 284 .flags = CTL_BE_FLAG_HAS_CONFIG, 285 .init = ctl_be_block_init, 286 .data_submit = ctl_be_block_submit, 287 .data_move_done = ctl_be_block_move_done, 288 .config_read = ctl_be_block_config_read, 289 .config_write = ctl_be_block_config_write, 290 .ioctl = ctl_be_block_ioctl, 291 .lun_info = ctl_be_block_lun_info, 292 .lun_attr = ctl_be_block_lun_attr 293}; 294 295MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend"); 296CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); 297 298static uma_zone_t beio_zone; 299 300static struct ctl_be_block_io * 301ctl_alloc_beio(struct ctl_be_block_softc *softc) 302{ 303 struct ctl_be_block_io *beio; 304 305 beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO); 306 beio->softc = softc; 307 return (beio); 308} 309 310static void 311ctl_free_beio(struct ctl_be_block_io *beio) 312{ 313 int duplicate_free; 314 int i; 315 316 duplicate_free = 0; 317 318 for (i = 0; i < beio->num_segs; i++) { 319 if (beio->sg_segs[i].addr == NULL) 320 duplicate_free++; 321 322 uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr); 323 beio->sg_segs[i].addr = NULL; 324 325 /* For compare we had two equal S/G lists. */ 326 if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) { 327 uma_zfree(beio->lun->lun_zone, 328 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr); 329 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL; 330 } 331 } 332 333 if (duplicate_free > 0) { 334 printf("%s: %d duplicate frees out of %d segments\n", __func__, 335 duplicate_free, beio->num_segs); 336 } 337 338 uma_zfree(beio_zone, beio); 339} 340 341static void 342ctl_complete_beio(struct ctl_be_block_io *beio) 343{ 344 union ctl_io *io = beio->io; 345 346 if (beio->beio_cont != NULL) { 347 beio->beio_cont(beio); 348 } else { 349 ctl_free_beio(beio); 350 ctl_data_submit_done(io); 351 } 352} 353 354static size_t 355cmp(uint8_t *a, uint8_t *b, size_t size) 356{ 357 size_t i; 358 359 for (i = 0; i < size; i++) { 360 if (a[i] != b[i]) 361 break; 362 } 363 return (i); 364} 365 366static void 367ctl_be_block_compare(union ctl_io *io) 368{ 369 struct ctl_be_block_io *beio; 370 uint64_t off, res; 371 int i; 372 uint8_t info[8]; 373 374 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 375 off = 0; 376 for (i = 0; i < beio->num_segs; i++) { 377 res = cmp(beio->sg_segs[i].addr, 378 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, 379 beio->sg_segs[i].len); 380 off += res; 381 if (res < beio->sg_segs[i].len) 382 break; 383 } 384 if (i < beio->num_segs) { 385 scsi_u64to8b(off, info); 386 ctl_set_sense(&io->scsiio, /*current_error*/ 1, 387 /*sense_key*/ SSD_KEY_MISCOMPARE, 388 /*asc*/ 0x1D, /*ascq*/ 0x00, 389 /*type*/ SSD_ELEM_INFO, 390 /*size*/ sizeof(info), /*data*/ &info, 391 /*type*/ SSD_ELEM_NONE); 392 } else 393 ctl_set_success(&io->scsiio); 394} 395 396static int 397ctl_be_block_move_done(union ctl_io *io) 398{ 399 struct ctl_be_block_io *beio; 400 struct ctl_be_block_lun *be_lun; 401 struct ctl_lba_len_flags *lbalen; 402#ifdef CTL_TIME_IO 403 struct bintime cur_bt; 404#endif 405 406 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 407 be_lun = beio->lun; 408 409 DPRINTF("entered\n"); 410 411#ifdef CTL_TIME_IO 412 getbintime(&cur_bt); 413 bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); 414 bintime_add(&io->io_hdr.dma_bt, &cur_bt); 415 io->io_hdr.num_dmas++; 416#endif 417 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; 418 419 /* 420 * We set status at this point for read commands, and write 421 * commands with errors. 422 */ 423 if (io->io_hdr.flags & CTL_FLAG_ABORT) { 424 ; 425 } else if ((io->io_hdr.port_status == 0) && 426 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { 427 lbalen = ARGS(beio->io); 428 if (lbalen->flags & CTL_LLF_READ) { 429 ctl_set_success(&io->scsiio); 430 } else if (lbalen->flags & CTL_LLF_COMPARE) { 431 /* We have two data blocks ready for comparison. */ 432 ctl_be_block_compare(io); 433 } 434 } else if ((io->io_hdr.port_status != 0) && 435 ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || 436 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { 437 /* 438 * For hardware error sense keys, the sense key 439 * specific value is defined to be a retry count, 440 * but we use it to pass back an internal FETD 441 * error code. XXX KDM Hopefully the FETD is only 442 * using 16 bits for an error code, since that's 443 * all the space we have in the sks field. 444 */ 445 ctl_set_internal_failure(&io->scsiio, 446 /*sks_valid*/ 1, 447 /*retry_count*/ 448 io->io_hdr.port_status); 449 } 450 451 /* 452 * If this is a read, or a write with errors, it is done. 453 */ 454 if ((beio->bio_cmd == BIO_READ) 455 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) 456 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { 457 ctl_complete_beio(beio); 458 return (0); 459 } 460 461 /* 462 * At this point, we have a write and the DMA completed 463 * successfully. We now have to queue it to the task queue to 464 * execute the backend I/O. That is because we do blocking 465 * memory allocations, and in the file backing case, blocking I/O. 466 * This move done routine is generally called in the SIM's 467 * interrupt context, and therefore we cannot block. 468 */ 469 mtx_lock(&be_lun->queue_lock); 470 /* 471 * XXX KDM make sure that links is okay to use at this point. 472 * Otherwise, we either need to add another field to ctl_io_hdr, 473 * or deal with resource allocation here. 474 */ 475 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); 476 mtx_unlock(&be_lun->queue_lock); 477 478 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 479 480 return (0); 481} 482 483static void 484ctl_be_block_biodone(struct bio *bio) 485{ 486 struct ctl_be_block_io *beio; 487 struct ctl_be_block_lun *be_lun; 488 union ctl_io *io; 489 int error; 490 491 beio = bio->bio_caller1; 492 be_lun = beio->lun; 493 io = beio->io; 494 495 DPRINTF("entered\n"); 496 497 error = bio->bio_error; 498 mtx_lock(&be_lun->io_lock); 499 if (error != 0) 500 beio->num_errors++; 501 502 beio->num_bios_done++; 503 504 /* 505 * XXX KDM will this cause WITNESS to complain? Holding a lock 506 * during the free might cause it to complain. 507 */ 508 g_destroy_bio(bio); 509 510 /* 511 * If the send complete bit isn't set, or we aren't the last I/O to 512 * complete, then we're done. 513 */ 514 if ((beio->send_complete == 0) 515 || (beio->num_bios_done < beio->num_bios_sent)) { 516 mtx_unlock(&be_lun->io_lock); 517 return; 518 } 519 520 /* 521 * At this point, we've verified that we are the last I/O to 522 * complete, so it's safe to drop the lock. 523 */ 524 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 525 beio->ds_tag_type, beio->ds_trans_type, 526 /*now*/ NULL, /*then*/&beio->ds_t0); 527 mtx_unlock(&be_lun->io_lock); 528 529 /* 530 * If there are any errors from the backing device, we fail the 531 * entire I/O with a medium error. 532 */ 533 if (beio->num_errors > 0) { 534 if (error == EOPNOTSUPP) { 535 ctl_set_invalid_opcode(&io->scsiio); 536 } else if (error == ENOSPC || error == EDQUOT) { 537 ctl_set_space_alloc_fail(&io->scsiio); 538 } else if (error == EROFS || error == EACCES) { 539 ctl_set_hw_write_protected(&io->scsiio); 540 } else if (beio->bio_cmd == BIO_FLUSH) { 541 /* XXX KDM is there is a better error here? */ 542 ctl_set_internal_failure(&io->scsiio, 543 /*sks_valid*/ 1, 544 /*retry_count*/ 0xbad2); 545 } else 546 ctl_set_medium_error(&io->scsiio); 547 ctl_complete_beio(beio); 548 return; 549 } 550 551 /* 552 * If this is a write, a flush, a delete or verify, we're all done. 553 * If this is a read, we can now send the data to the user. 554 */ 555 if ((beio->bio_cmd == BIO_WRITE) 556 || (beio->bio_cmd == BIO_FLUSH) 557 || (beio->bio_cmd == BIO_DELETE) 558 || (ARGS(io)->flags & CTL_LLF_VERIFY)) { 559 ctl_set_success(&io->scsiio); 560 ctl_complete_beio(beio); 561 } else { 562 if ((ARGS(io)->flags & CTL_LLF_READ) && 563 beio->beio_cont == NULL) 564 ctl_set_success(&io->scsiio); 565#ifdef CTL_TIME_IO 566 getbintime(&io->io_hdr.dma_start_bt); 567#endif 568 ctl_datamove(io); 569 } 570} 571 572static void 573ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, 574 struct ctl_be_block_io *beio) 575{ 576 union ctl_io *io = beio->io; 577 struct mount *mountpoint; 578 int error, lock_flags; 579 580 DPRINTF("entered\n"); 581 582 binuptime(&beio->ds_t0); 583 mtx_lock(&be_lun->io_lock); 584 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 585 mtx_unlock(&be_lun->io_lock); 586 587 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 588 589 if (MNT_SHARED_WRITES(mountpoint) 590 || ((mountpoint == NULL) 591 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 592 lock_flags = LK_SHARED; 593 else 594 lock_flags = LK_EXCLUSIVE; 595 596 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 597 598 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, 599 curthread); 600 VOP_UNLOCK(be_lun->vn, 0); 601 602 vn_finished_write(mountpoint); 603 604 mtx_lock(&be_lun->io_lock); 605 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 606 beio->ds_tag_type, beio->ds_trans_type, 607 /*now*/ NULL, /*then*/&beio->ds_t0); 608 mtx_unlock(&be_lun->io_lock); 609 610 if (error == 0) 611 ctl_set_success(&io->scsiio); 612 else { 613 /* XXX KDM is there is a better error here? */ 614 ctl_set_internal_failure(&io->scsiio, 615 /*sks_valid*/ 1, 616 /*retry_count*/ 0xbad1); 617 } 618 619 ctl_complete_beio(beio); 620} 621 622SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t"); 623SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t"); 624SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t"); 625SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t"); 626 627static void 628ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, 629 struct ctl_be_block_io *beio) 630{ 631 struct ctl_be_block_filedata *file_data; 632 union ctl_io *io; 633 struct uio xuio; 634 struct iovec *xiovec; 635 int flags; 636 int error, i; 637 638 DPRINTF("entered\n"); 639 640 file_data = &be_lun->backend.file; 641 io = beio->io; 642 flags = 0; 643 if (ARGS(io)->flags & CTL_LLF_DPO) 644 flags |= IO_DIRECT; 645 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 646 flags |= IO_SYNC; 647 648 bzero(&xuio, sizeof(xuio)); 649 if (beio->bio_cmd == BIO_READ) { 650 SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); 651 xuio.uio_rw = UIO_READ; 652 } else { 653 SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); 654 xuio.uio_rw = UIO_WRITE; 655 } 656 xuio.uio_offset = beio->io_offset; 657 xuio.uio_resid = beio->io_len; 658 xuio.uio_segflg = UIO_SYSSPACE; 659 xuio.uio_iov = beio->xiovecs; 660 xuio.uio_iovcnt = beio->num_segs; 661 xuio.uio_td = curthread; 662 663 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 664 xiovec->iov_base = beio->sg_segs[i].addr; 665 xiovec->iov_len = beio->sg_segs[i].len; 666 } 667 668 binuptime(&beio->ds_t0); 669 mtx_lock(&be_lun->io_lock); 670 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 671 mtx_unlock(&be_lun->io_lock); 672 673 if (beio->bio_cmd == BIO_READ) { 674 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 675 676 /* 677 * UFS pays attention to IO_DIRECT for reads. If the 678 * DIRECTIO option is configured into the kernel, it calls 679 * ffs_rawread(). But that only works for single-segment 680 * uios with user space addresses. In our case, with a 681 * kernel uio, it still reads into the buffer cache, but it 682 * will just try to release the buffer from the cache later 683 * on in ffs_read(). 684 * 685 * ZFS does not pay attention to IO_DIRECT for reads. 686 * 687 * UFS does not pay attention to IO_SYNC for reads. 688 * 689 * ZFS pays attention to IO_SYNC (which translates into the 690 * Solaris define FRSYNC for zfs_read()) for reads. It 691 * attempts to sync the file before reading. 692 */ 693 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); 694 695 VOP_UNLOCK(be_lun->vn, 0); 696 SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); 697 } else { 698 struct mount *mountpoint; 699 int lock_flags; 700 701 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); 702 703 if (MNT_SHARED_WRITES(mountpoint) 704 || ((mountpoint == NULL) 705 && MNT_SHARED_WRITES(be_lun->vn->v_mount))) 706 lock_flags = LK_SHARED; 707 else 708 lock_flags = LK_EXCLUSIVE; 709 710 vn_lock(be_lun->vn, lock_flags | LK_RETRY); 711 712 /* 713 * UFS pays attention to IO_DIRECT for writes. The write 714 * is done asynchronously. (Normally the write would just 715 * get put into cache. 716 * 717 * UFS pays attention to IO_SYNC for writes. It will 718 * attempt to write the buffer out synchronously if that 719 * flag is set. 720 * 721 * ZFS does not pay attention to IO_DIRECT for writes. 722 * 723 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) 724 * for writes. It will flush the transaction from the 725 * cache before returning. 726 */ 727 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); 728 VOP_UNLOCK(be_lun->vn, 0); 729 730 vn_finished_write(mountpoint); 731 SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); 732 } 733 734 mtx_lock(&be_lun->io_lock); 735 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 736 beio->ds_tag_type, beio->ds_trans_type, 737 /*now*/ NULL, /*then*/&beio->ds_t0); 738 mtx_unlock(&be_lun->io_lock); 739 740 /* 741 * If we got an error, set the sense data to "MEDIUM ERROR" and 742 * return the I/O to the user. 743 */ 744 if (error != 0) { 745 char path_str[32]; 746 747 ctl_scsi_path_string(io, path_str, sizeof(path_str)); 748 printf("%s%s command returned errno %d\n", path_str, 749 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error); 750 if (error == ENOSPC || error == EDQUOT) { 751 ctl_set_space_alloc_fail(&io->scsiio); 752 } else if (error == EROFS || error == EACCES) { 753 ctl_set_hw_write_protected(&io->scsiio); 754 } else 755 ctl_set_medium_error(&io->scsiio); 756 ctl_complete_beio(beio); 757 return; 758 } 759 760 /* 761 * If this is a write or a verify, we're all done. 762 * If this is a read, we can now send the data to the user. 763 */ 764 if ((beio->bio_cmd == BIO_WRITE) || 765 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 766 ctl_set_success(&io->scsiio); 767 ctl_complete_beio(beio); 768 } else { 769 if ((ARGS(io)->flags & CTL_LLF_READ) && 770 beio->beio_cont == NULL) 771 ctl_set_success(&io->scsiio); 772#ifdef CTL_TIME_IO 773 getbintime(&io->io_hdr.dma_start_bt); 774#endif 775 ctl_datamove(io); 776 } 777} 778 779static void 780ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, 781 struct ctl_be_block_io *beio) 782{ 783 union ctl_io *io = beio->io; 784 struct ctl_lba_len_flags *lbalen = ARGS(io); 785 struct scsi_get_lba_status_data *data; 786 off_t roff, off; 787 int error, status; 788 789 DPRINTF("entered\n"); 790 791 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 792 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 793 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 794 0, curthread->td_ucred, curthread); 795 if (error == 0 && off > roff) 796 status = 0; /* mapped up to off */ 797 else { 798 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 799 0, curthread->td_ucred, curthread); 800 if (error == 0 && off > roff) 801 status = 1; /* deallocated up to off */ 802 else { 803 status = 0; /* unknown up to the end */ 804 off = be_lun->size_bytes; 805 } 806 } 807 VOP_UNLOCK(be_lun->vn, 0); 808 809 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 810 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 811 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 812 lbalen->lba), data->descr[0].length); 813 data->descr[0].status = status; 814 815 ctl_complete_beio(beio); 816} 817 818static uint64_t 819ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) 820{ 821 struct vattr vattr; 822 struct statfs statfs; 823 uint64_t val; 824 int error; 825 826 val = UINT64_MAX; 827 if (be_lun->vn == NULL) 828 return (val); 829 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 830 if (strcmp(attrname, "blocksused") == 0) { 831 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 832 if (error == 0) 833 val = vattr.va_bytes / be_lun->cbe_lun.blocksize; 834 } 835 if (strcmp(attrname, "blocksavail") == 0 && 836 (be_lun->vn->v_iflag & VI_DOOMED) == 0) { 837 error = VFS_STATFS(be_lun->vn->v_mount, &statfs); 838 if (error == 0) 839 val = statfs.f_bavail * statfs.f_bsize / 840 be_lun->cbe_lun.blocksize; 841 } 842 VOP_UNLOCK(be_lun->vn, 0); 843 return (val); 844} 845 846static void 847ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, 848 struct ctl_be_block_io *beio) 849{ 850 union ctl_io *io; 851 struct cdevsw *csw; 852 struct cdev *dev; 853 struct uio xuio; 854 struct iovec *xiovec; 855 int error, flags, i, ref; 856 857 DPRINTF("entered\n"); 858 859 io = beio->io; 860 flags = 0; 861 if (ARGS(io)->flags & CTL_LLF_DPO) 862 flags |= IO_DIRECT; 863 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) 864 flags |= IO_SYNC; 865 866 bzero(&xuio, sizeof(xuio)); 867 if (beio->bio_cmd == BIO_READ) { 868 SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); 869 xuio.uio_rw = UIO_READ; 870 } else { 871 SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); 872 xuio.uio_rw = UIO_WRITE; 873 } 874 xuio.uio_offset = beio->io_offset; 875 xuio.uio_resid = beio->io_len; 876 xuio.uio_segflg = UIO_SYSSPACE; 877 xuio.uio_iov = beio->xiovecs; 878 xuio.uio_iovcnt = beio->num_segs; 879 xuio.uio_td = curthread; 880 881 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { 882 xiovec->iov_base = beio->sg_segs[i].addr; 883 xiovec->iov_len = beio->sg_segs[i].len; 884 } 885 886 binuptime(&beio->ds_t0); 887 mtx_lock(&be_lun->io_lock); 888 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); 889 mtx_unlock(&be_lun->io_lock); 890 891 csw = devvn_refthread(be_lun->vn, &dev, &ref); 892 if (csw) { 893 if (beio->bio_cmd == BIO_READ) 894 error = csw->d_read(dev, &xuio, flags); 895 else 896 error = csw->d_write(dev, &xuio, flags); 897 dev_relthread(dev, ref); 898 } else 899 error = ENXIO; 900 901 if (beio->bio_cmd == BIO_READ) 902 SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); 903 else 904 SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); 905 906 mtx_lock(&be_lun->io_lock); 907 devstat_end_transaction(beio->lun->disk_stats, beio->io_len, 908 beio->ds_tag_type, beio->ds_trans_type, 909 /*now*/ NULL, /*then*/&beio->ds_t0); 910 mtx_unlock(&be_lun->io_lock); 911 912 /* 913 * If we got an error, set the sense data to "MEDIUM ERROR" and 914 * return the I/O to the user. 915 */ 916 if (error != 0) { 917 if (error == ENOSPC || error == EDQUOT) { 918 ctl_set_space_alloc_fail(&io->scsiio); 919 } else if (error == EROFS || error == EACCES) { 920 ctl_set_hw_write_protected(&io->scsiio); 921 } else 922 ctl_set_medium_error(&io->scsiio); 923 ctl_complete_beio(beio); 924 return; 925 } 926 927 /* 928 * If this is a write or a verify, we're all done. 929 * If this is a read, we can now send the data to the user. 930 */ 931 if ((beio->bio_cmd == BIO_WRITE) || 932 (ARGS(io)->flags & CTL_LLF_VERIFY)) { 933 ctl_set_success(&io->scsiio); 934 ctl_complete_beio(beio); 935 } else { 936 if ((ARGS(io)->flags & CTL_LLF_READ) && 937 beio->beio_cont == NULL) 938 ctl_set_success(&io->scsiio); 939#ifdef CTL_TIME_IO 940 getbintime(&io->io_hdr.dma_start_bt); 941#endif 942 ctl_datamove(io); 943 } 944} 945 946static void 947ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, 948 struct ctl_be_block_io *beio) 949{ 950 union ctl_io *io = beio->io; 951 struct cdevsw *csw; 952 struct cdev *dev; 953 struct ctl_lba_len_flags *lbalen = ARGS(io); 954 struct scsi_get_lba_status_data *data; 955 off_t roff, off; 956 int error, ref, status; 957 958 DPRINTF("entered\n"); 959 960 csw = devvn_refthread(be_lun->vn, &dev, &ref); 961 if (csw == NULL) { 962 status = 0; /* unknown up to the end */ 963 off = be_lun->size_bytes; 964 goto done; 965 } 966 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; 967 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, 968 curthread); 969 if (error == 0 && off > roff) 970 status = 0; /* mapped up to off */ 971 else { 972 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, 973 curthread); 974 if (error == 0 && off > roff) 975 status = 1; /* deallocated up to off */ 976 else { 977 status = 0; /* unknown up to the end */ 978 off = be_lun->size_bytes; 979 } 980 } 981 dev_relthread(dev, ref); 982 983done: 984 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; 985 scsi_u64to8b(lbalen->lba, data->descr[0].addr); 986 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - 987 lbalen->lba), data->descr[0].length); 988 data->descr[0].status = status; 989 990 ctl_complete_beio(beio); 991} 992 993static void 994ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, 995 struct ctl_be_block_io *beio) 996{ 997 struct bio *bio; 998 union ctl_io *io; 999 struct cdevsw *csw; 1000 struct cdev *dev; 1001 int ref; 1002 1003 io = beio->io; 1004 1005 DPRINTF("entered\n"); 1006 1007 /* This can't fail, it's a blocking allocation. */ 1008 bio = g_alloc_bio(); 1009 1010 bio->bio_cmd = BIO_FLUSH; 1011 bio->bio_offset = 0; 1012 bio->bio_data = 0; 1013 bio->bio_done = ctl_be_block_biodone; 1014 bio->bio_caller1 = beio; 1015 bio->bio_pblkno = 0; 1016 1017 /* 1018 * We don't need to acquire the LUN lock here, because we are only 1019 * sending one bio, and so there is no other context to synchronize 1020 * with. 1021 */ 1022 beio->num_bios_sent = 1; 1023 beio->send_complete = 1; 1024 1025 binuptime(&beio->ds_t0); 1026 mtx_lock(&be_lun->io_lock); 1027 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1028 mtx_unlock(&be_lun->io_lock); 1029 1030 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1031 if (csw) { 1032 bio->bio_dev = dev; 1033 csw->d_strategy(bio); 1034 dev_relthread(dev, ref); 1035 } else { 1036 bio->bio_error = ENXIO; 1037 ctl_be_block_biodone(bio); 1038 } 1039} 1040 1041static void 1042ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, 1043 struct ctl_be_block_io *beio, 1044 uint64_t off, uint64_t len, int last) 1045{ 1046 struct bio *bio; 1047 uint64_t maxlen; 1048 struct cdevsw *csw; 1049 struct cdev *dev; 1050 int ref; 1051 1052 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1053 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); 1054 while (len > 0) { 1055 bio = g_alloc_bio(); 1056 bio->bio_cmd = BIO_DELETE; 1057 bio->bio_dev = dev; 1058 bio->bio_offset = off; 1059 bio->bio_length = MIN(len, maxlen); 1060 bio->bio_data = 0; 1061 bio->bio_done = ctl_be_block_biodone; 1062 bio->bio_caller1 = beio; 1063 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; 1064 1065 off += bio->bio_length; 1066 len -= bio->bio_length; 1067 1068 mtx_lock(&be_lun->io_lock); 1069 beio->num_bios_sent++; 1070 if (last && len == 0) 1071 beio->send_complete = 1; 1072 mtx_unlock(&be_lun->io_lock); 1073 1074 if (csw) { 1075 csw->d_strategy(bio); 1076 } else { 1077 bio->bio_error = ENXIO; 1078 ctl_be_block_biodone(bio); 1079 } 1080 } 1081 if (csw) 1082 dev_relthread(dev, ref); 1083} 1084 1085static void 1086ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, 1087 struct ctl_be_block_io *beio) 1088{ 1089 union ctl_io *io; 1090 struct ctl_ptr_len_flags *ptrlen; 1091 struct scsi_unmap_desc *buf, *end; 1092 uint64_t len; 1093 1094 io = beio->io; 1095 1096 DPRINTF("entered\n"); 1097 1098 binuptime(&beio->ds_t0); 1099 mtx_lock(&be_lun->io_lock); 1100 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1101 mtx_unlock(&be_lun->io_lock); 1102 1103 if (beio->io_offset == -1) { 1104 beio->io_len = 0; 1105 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1106 buf = (struct scsi_unmap_desc *)ptrlen->ptr; 1107 end = buf + ptrlen->len / sizeof(*buf); 1108 for (; buf < end; buf++) { 1109 len = (uint64_t)scsi_4btoul(buf->length) * 1110 be_lun->cbe_lun.blocksize; 1111 beio->io_len += len; 1112 ctl_be_block_unmap_dev_range(be_lun, beio, 1113 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, 1114 len, (end - buf < 2) ? TRUE : FALSE); 1115 } 1116 } else 1117 ctl_be_block_unmap_dev_range(be_lun, beio, 1118 beio->io_offset, beio->io_len, TRUE); 1119} 1120 1121static void 1122ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, 1123 struct ctl_be_block_io *beio) 1124{ 1125 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); 1126 struct bio *bio; 1127 struct cdevsw *csw; 1128 struct cdev *dev; 1129 off_t cur_offset; 1130 int i, max_iosize, ref; 1131 1132 DPRINTF("entered\n"); 1133 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1134 1135 /* 1136 * We have to limit our I/O size to the maximum supported by the 1137 * backend device. Hopefully it is MAXPHYS. If the driver doesn't 1138 * set it properly, use DFLTPHYS. 1139 */ 1140 if (csw) { 1141 max_iosize = dev->si_iosize_max; 1142 if (max_iosize < PAGE_SIZE) 1143 max_iosize = DFLTPHYS; 1144 } else 1145 max_iosize = DFLTPHYS; 1146 1147 cur_offset = beio->io_offset; 1148 for (i = 0; i < beio->num_segs; i++) { 1149 size_t cur_size; 1150 uint8_t *cur_ptr; 1151 1152 cur_size = beio->sg_segs[i].len; 1153 cur_ptr = beio->sg_segs[i].addr; 1154 1155 while (cur_size > 0) { 1156 /* This can't fail, it's a blocking allocation. */ 1157 bio = g_alloc_bio(); 1158 1159 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); 1160 1161 bio->bio_cmd = beio->bio_cmd; 1162 bio->bio_dev = dev; 1163 bio->bio_caller1 = beio; 1164 bio->bio_length = min(cur_size, max_iosize); 1165 bio->bio_offset = cur_offset; 1166 bio->bio_data = cur_ptr; 1167 bio->bio_done = ctl_be_block_biodone; 1168 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; 1169 1170 cur_offset += bio->bio_length; 1171 cur_ptr += bio->bio_length; 1172 cur_size -= bio->bio_length; 1173 1174 TAILQ_INSERT_TAIL(&queue, bio, bio_queue); 1175 beio->num_bios_sent++; 1176 } 1177 } 1178 binuptime(&beio->ds_t0); 1179 mtx_lock(&be_lun->io_lock); 1180 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); 1181 beio->send_complete = 1; 1182 mtx_unlock(&be_lun->io_lock); 1183 1184 /* 1185 * Fire off all allocated requests! 1186 */ 1187 while ((bio = TAILQ_FIRST(&queue)) != NULL) { 1188 TAILQ_REMOVE(&queue, bio, bio_queue); 1189 if (csw) 1190 csw->d_strategy(bio); 1191 else { 1192 bio->bio_error = ENXIO; 1193 ctl_be_block_biodone(bio); 1194 } 1195 } 1196 if (csw) 1197 dev_relthread(dev, ref); 1198} 1199 1200static uint64_t 1201ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) 1202{ 1203 struct diocgattr_arg arg; 1204 struct cdevsw *csw; 1205 struct cdev *dev; 1206 int error, ref; 1207 1208 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1209 if (csw == NULL) 1210 return (UINT64_MAX); 1211 strlcpy(arg.name, attrname, sizeof(arg.name)); 1212 arg.len = sizeof(arg.value.off); 1213 if (csw->d_ioctl) { 1214 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 1215 curthread); 1216 } else 1217 error = ENODEV; 1218 dev_relthread(dev, ref); 1219 if (error != 0) 1220 return (UINT64_MAX); 1221 return (arg.value.off); 1222} 1223 1224static void 1225ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, 1226 union ctl_io *io) 1227{ 1228 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1229 struct ctl_be_block_io *beio; 1230 struct ctl_lba_len_flags *lbalen; 1231 1232 DPRINTF("entered\n"); 1233 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1234 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1235 1236 beio->io_len = lbalen->len * cbe_lun->blocksize; 1237 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1238 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; 1239 beio->bio_cmd = BIO_FLUSH; 1240 beio->ds_trans_type = DEVSTAT_NO_DATA; 1241 DPRINTF("SYNC\n"); 1242 be_lun->lun_flush(be_lun, beio); 1243} 1244 1245static void 1246ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) 1247{ 1248 union ctl_io *io; 1249 1250 io = beio->io; 1251 ctl_free_beio(beio); 1252 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1253 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1254 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1255 ctl_config_write_done(io); 1256 return; 1257 } 1258 1259 ctl_be_block_config_write(io); 1260} 1261 1262static void 1263ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, 1264 union ctl_io *io) 1265{ 1266 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1267 struct ctl_be_block_io *beio; 1268 struct ctl_lba_len_flags *lbalen; 1269 uint64_t len_left, lba; 1270 uint32_t pb, pbo, adj; 1271 int i, seglen; 1272 uint8_t *buf, *end; 1273 1274 DPRINTF("entered\n"); 1275 1276 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1277 lbalen = ARGS(beio->io); 1278 1279 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || 1280 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { 1281 ctl_free_beio(beio); 1282 ctl_set_invalid_field(&io->scsiio, 1283 /*sks_valid*/ 1, 1284 /*command*/ 1, 1285 /*field*/ 1, 1286 /*bit_valid*/ 0, 1287 /*bit*/ 0); 1288 ctl_config_write_done(io); 1289 return; 1290 } 1291 1292 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { 1293 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1294 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; 1295 beio->bio_cmd = BIO_DELETE; 1296 beio->ds_trans_type = DEVSTAT_FREE; 1297 1298 be_lun->unmap(be_lun, beio); 1299 return; 1300 } 1301 1302 beio->bio_cmd = BIO_WRITE; 1303 beio->ds_trans_type = DEVSTAT_WRITE; 1304 1305 DPRINTF("WRITE SAME at LBA %jx len %u\n", 1306 (uintmax_t)lbalen->lba, lbalen->len); 1307 1308 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; 1309 if (be_lun->cbe_lun.pblockoff > 0) 1310 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; 1311 else 1312 pbo = 0; 1313 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; 1314 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { 1315 1316 /* 1317 * Setup the S/G entry for this chunk. 1318 */ 1319 seglen = MIN(CTLBLK_MAX_SEG, len_left); 1320 if (pb > cbe_lun->blocksize) { 1321 adj = ((lbalen->lba + lba) * cbe_lun->blocksize + 1322 seglen - pbo) % pb; 1323 if (seglen > adj) 1324 seglen -= adj; 1325 else 1326 seglen -= seglen % cbe_lun->blocksize; 1327 } else 1328 seglen -= seglen % cbe_lun->blocksize; 1329 beio->sg_segs[i].len = seglen; 1330 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1331 1332 DPRINTF("segment %d addr %p len %zd\n", i, 1333 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1334 1335 beio->num_segs++; 1336 len_left -= seglen; 1337 1338 buf = beio->sg_segs[i].addr; 1339 end = buf + seglen; 1340 for (; buf < end; buf += cbe_lun->blocksize) { 1341 memcpy(buf, io->scsiio.kern_data_ptr, cbe_lun->blocksize); 1342 if (lbalen->flags & SWS_LBDATA) 1343 scsi_ulto4b(lbalen->lba + lba, buf); 1344 lba++; 1345 } 1346 } 1347 1348 beio->io_offset = lbalen->lba * cbe_lun->blocksize; 1349 beio->io_len = lba * cbe_lun->blocksize; 1350 1351 /* We can not do all in one run. Correct and schedule rerun. */ 1352 if (len_left > 0) { 1353 lbalen->lba += lba; 1354 lbalen->len -= lba; 1355 beio->beio_cont = ctl_be_block_cw_done_ws; 1356 } 1357 1358 be_lun->dispatch(be_lun, beio); 1359} 1360 1361static void 1362ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, 1363 union ctl_io *io) 1364{ 1365 struct ctl_be_block_io *beio; 1366 struct ctl_ptr_len_flags *ptrlen; 1367 1368 DPRINTF("entered\n"); 1369 1370 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1371 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; 1372 1373 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { 1374 ctl_free_beio(beio); 1375 ctl_set_invalid_field(&io->scsiio, 1376 /*sks_valid*/ 0, 1377 /*command*/ 1, 1378 /*field*/ 0, 1379 /*bit_valid*/ 0, 1380 /*bit*/ 0); 1381 ctl_config_write_done(io); 1382 return; 1383 } 1384 1385 beio->io_len = 0; 1386 beio->io_offset = -1; 1387 beio->bio_cmd = BIO_DELETE; 1388 beio->ds_trans_type = DEVSTAT_FREE; 1389 DPRINTF("UNMAP\n"); 1390 be_lun->unmap(be_lun, beio); 1391} 1392 1393static void 1394ctl_be_block_cr_done(struct ctl_be_block_io *beio) 1395{ 1396 union ctl_io *io; 1397 1398 io = beio->io; 1399 ctl_free_beio(beio); 1400 ctl_config_read_done(io); 1401} 1402 1403static void 1404ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, 1405 union ctl_io *io) 1406{ 1407 struct ctl_be_block_io *beio; 1408 struct ctl_be_block_softc *softc; 1409 1410 DPRINTF("entered\n"); 1411 1412 softc = be_lun->softc; 1413 beio = ctl_alloc_beio(softc); 1414 beio->io = io; 1415 beio->lun = be_lun; 1416 beio->beio_cont = ctl_be_block_cr_done; 1417 PRIV(io)->ptr = (void *)beio; 1418 1419 switch (io->scsiio.cdb[0]) { 1420 case SERVICE_ACTION_IN: /* GET LBA STATUS */ 1421 beio->bio_cmd = -1; 1422 beio->ds_trans_type = DEVSTAT_NO_DATA; 1423 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1424 beio->io_len = 0; 1425 if (be_lun->get_lba_status) 1426 be_lun->get_lba_status(be_lun, beio); 1427 else 1428 ctl_be_block_cr_done(beio); 1429 break; 1430 default: 1431 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1432 break; 1433 } 1434} 1435 1436static void 1437ctl_be_block_cw_done(struct ctl_be_block_io *beio) 1438{ 1439 union ctl_io *io; 1440 1441 io = beio->io; 1442 ctl_free_beio(beio); 1443 ctl_config_write_done(io); 1444} 1445 1446static void 1447ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, 1448 union ctl_io *io) 1449{ 1450 struct ctl_be_block_io *beio; 1451 struct ctl_be_block_softc *softc; 1452 1453 DPRINTF("entered\n"); 1454 1455 softc = be_lun->softc; 1456 beio = ctl_alloc_beio(softc); 1457 beio->io = io; 1458 beio->lun = be_lun; 1459 beio->beio_cont = ctl_be_block_cw_done; 1460 switch (io->scsiio.tag_type) { 1461 case CTL_TAG_ORDERED: 1462 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1463 break; 1464 case CTL_TAG_HEAD_OF_QUEUE: 1465 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1466 break; 1467 case CTL_TAG_UNTAGGED: 1468 case CTL_TAG_SIMPLE: 1469 case CTL_TAG_ACA: 1470 default: 1471 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1472 break; 1473 } 1474 PRIV(io)->ptr = (void *)beio; 1475 1476 switch (io->scsiio.cdb[0]) { 1477 case SYNCHRONIZE_CACHE: 1478 case SYNCHRONIZE_CACHE_16: 1479 ctl_be_block_cw_dispatch_sync(be_lun, io); 1480 break; 1481 case WRITE_SAME_10: 1482 case WRITE_SAME_16: 1483 ctl_be_block_cw_dispatch_ws(be_lun, io); 1484 break; 1485 case UNMAP: 1486 ctl_be_block_cw_dispatch_unmap(be_lun, io); 1487 break; 1488 default: 1489 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); 1490 break; 1491 } 1492} 1493 1494SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t"); 1495SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t"); 1496SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t"); 1497SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t"); 1498 1499static void 1500ctl_be_block_next(struct ctl_be_block_io *beio) 1501{ 1502 struct ctl_be_block_lun *be_lun; 1503 union ctl_io *io; 1504 1505 io = beio->io; 1506 be_lun = beio->lun; 1507 ctl_free_beio(beio); 1508 if ((io->io_hdr.flags & CTL_FLAG_ABORT) || 1509 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && 1510 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { 1511 ctl_data_submit_done(io); 1512 return; 1513 } 1514 1515 io->io_hdr.status &= ~CTL_STATUS_MASK; 1516 io->io_hdr.status |= CTL_STATUS_NONE; 1517 1518 mtx_lock(&be_lun->queue_lock); 1519 /* 1520 * XXX KDM make sure that links is okay to use at this point. 1521 * Otherwise, we either need to add another field to ctl_io_hdr, 1522 * or deal with resource allocation here. 1523 */ 1524 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1525 mtx_unlock(&be_lun->queue_lock); 1526 1527 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1528} 1529 1530static void 1531ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, 1532 union ctl_io *io) 1533{ 1534 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1535 struct ctl_be_block_io *beio; 1536 struct ctl_be_block_softc *softc; 1537 struct ctl_lba_len_flags *lbalen; 1538 struct ctl_ptr_len_flags *bptrlen; 1539 uint64_t len_left, lbas; 1540 int i; 1541 1542 softc = be_lun->softc; 1543 1544 DPRINTF("entered\n"); 1545 1546 lbalen = ARGS(io); 1547 if (lbalen->flags & CTL_LLF_WRITE) { 1548 SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0); 1549 } else { 1550 SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0); 1551 } 1552 1553 beio = ctl_alloc_beio(softc); 1554 beio->io = io; 1555 beio->lun = be_lun; 1556 bptrlen = PRIV(io); 1557 bptrlen->ptr = (void *)beio; 1558 1559 switch (io->scsiio.tag_type) { 1560 case CTL_TAG_ORDERED: 1561 beio->ds_tag_type = DEVSTAT_TAG_ORDERED; 1562 break; 1563 case CTL_TAG_HEAD_OF_QUEUE: 1564 beio->ds_tag_type = DEVSTAT_TAG_HEAD; 1565 break; 1566 case CTL_TAG_UNTAGGED: 1567 case CTL_TAG_SIMPLE: 1568 case CTL_TAG_ACA: 1569 default: 1570 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; 1571 break; 1572 } 1573 1574 if (lbalen->flags & CTL_LLF_WRITE) { 1575 beio->bio_cmd = BIO_WRITE; 1576 beio->ds_trans_type = DEVSTAT_WRITE; 1577 } else { 1578 beio->bio_cmd = BIO_READ; 1579 beio->ds_trans_type = DEVSTAT_READ; 1580 } 1581 1582 DPRINTF("%s at LBA %jx len %u @%ju\n", 1583 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", 1584 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); 1585 if (lbalen->flags & CTL_LLF_COMPARE) 1586 lbas = CTLBLK_HALF_IO_SIZE; 1587 else 1588 lbas = CTLBLK_MAX_IO_SIZE; 1589 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); 1590 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; 1591 beio->io_len = lbas * cbe_lun->blocksize; 1592 bptrlen->len += lbas; 1593 1594 for (i = 0, len_left = beio->io_len; len_left > 0; i++) { 1595 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", 1596 i, CTLBLK_MAX_SEGS)); 1597 1598 /* 1599 * Setup the S/G entry for this chunk. 1600 */ 1601 beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left); 1602 beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); 1603 1604 DPRINTF("segment %d addr %p len %zd\n", i, 1605 beio->sg_segs[i].addr, beio->sg_segs[i].len); 1606 1607 /* Set up second segment for compare operation. */ 1608 if (lbalen->flags & CTL_LLF_COMPARE) { 1609 beio->sg_segs[i + CTLBLK_HALF_SEGS].len = 1610 beio->sg_segs[i].len; 1611 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = 1612 uma_zalloc(be_lun->lun_zone, M_WAITOK); 1613 } 1614 1615 beio->num_segs++; 1616 len_left -= beio->sg_segs[i].len; 1617 } 1618 if (bptrlen->len < lbalen->len) 1619 beio->beio_cont = ctl_be_block_next; 1620 io->scsiio.be_move_done = ctl_be_block_move_done; 1621 /* For compare we have separate S/G lists for read and datamove. */ 1622 if (lbalen->flags & CTL_LLF_COMPARE) 1623 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; 1624 else 1625 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; 1626 io->scsiio.kern_data_len = beio->io_len; 1627 io->scsiio.kern_data_resid = 0; 1628 io->scsiio.kern_sg_entries = beio->num_segs; 1629 io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST; 1630 1631 /* 1632 * For the read case, we need to read the data into our buffers and 1633 * then we can send it back to the user. For the write case, we 1634 * need to get the data from the user first. 1635 */ 1636 if (beio->bio_cmd == BIO_READ) { 1637 SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0); 1638 be_lun->dispatch(be_lun, beio); 1639 } else { 1640 SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0); 1641#ifdef CTL_TIME_IO 1642 getbintime(&io->io_hdr.dma_start_bt); 1643#endif 1644 ctl_datamove(io); 1645 } 1646} 1647 1648static void 1649ctl_be_block_worker(void *context, int pending) 1650{ 1651 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; 1652 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1653 union ctl_io *io; 1654 struct ctl_be_block_io *beio; 1655 1656 DPRINTF("entered\n"); 1657 /* 1658 * Fetch and process I/Os from all queues. If we detect LUN 1659 * CTL_LUN_FLAG_OFFLINE status here -- it is result of a race, 1660 * so make response maximally opaque to not confuse initiator. 1661 */ 1662 for (;;) { 1663 mtx_lock(&be_lun->queue_lock); 1664 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); 1665 if (io != NULL) { 1666 DPRINTF("datamove queue\n"); 1667 STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr, 1668 ctl_io_hdr, links); 1669 mtx_unlock(&be_lun->queue_lock); 1670 beio = (struct ctl_be_block_io *)PRIV(io)->ptr; 1671 if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) { 1672 ctl_set_busy(&io->scsiio); 1673 ctl_complete_beio(beio); 1674 return; 1675 } 1676 be_lun->dispatch(be_lun, beio); 1677 continue; 1678 } 1679 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); 1680 if (io != NULL) { 1681 DPRINTF("config write queue\n"); 1682 STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr, 1683 ctl_io_hdr, links); 1684 mtx_unlock(&be_lun->queue_lock); 1685 if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) { 1686 ctl_set_busy(&io->scsiio); 1687 ctl_config_write_done(io); 1688 return; 1689 } 1690 ctl_be_block_cw_dispatch(be_lun, io); 1691 continue; 1692 } 1693 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); 1694 if (io != NULL) { 1695 DPRINTF("config read queue\n"); 1696 STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr, 1697 ctl_io_hdr, links); 1698 mtx_unlock(&be_lun->queue_lock); 1699 if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) { 1700 ctl_set_busy(&io->scsiio); 1701 ctl_config_read_done(io); 1702 return; 1703 } 1704 ctl_be_block_cr_dispatch(be_lun, io); 1705 continue; 1706 } 1707 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); 1708 if (io != NULL) { 1709 DPRINTF("input queue\n"); 1710 STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr, 1711 ctl_io_hdr, links); 1712 mtx_unlock(&be_lun->queue_lock); 1713 if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) { 1714 ctl_set_busy(&io->scsiio); 1715 ctl_data_submit_done(io); 1716 return; 1717 } 1718 ctl_be_block_dispatch(be_lun, io); 1719 continue; 1720 } 1721 1722 /* 1723 * If we get here, there is no work left in the queues, so 1724 * just break out and let the task queue go to sleep. 1725 */ 1726 mtx_unlock(&be_lun->queue_lock); 1727 break; 1728 } 1729} 1730 1731/* 1732 * Entry point from CTL to the backend for I/O. We queue everything to a 1733 * work thread, so this just puts the I/O on a queue and wakes up the 1734 * thread. 1735 */ 1736static int 1737ctl_be_block_submit(union ctl_io *io) 1738{ 1739 struct ctl_be_block_lun *be_lun; 1740 struct ctl_be_lun *cbe_lun; 1741 1742 DPRINTF("entered\n"); 1743 1744 cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 1745 CTL_PRIV_BACKEND_LUN].ptr; 1746 be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; 1747 1748 /* 1749 * Make sure we only get SCSI I/O. 1750 */ 1751 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type " 1752 "%#x) encountered", io->io_hdr.io_type)); 1753 1754 PRIV(io)->len = 0; 1755 1756 mtx_lock(&be_lun->queue_lock); 1757 /* 1758 * XXX KDM make sure that links is okay to use at this point. 1759 * Otherwise, we either need to add another field to ctl_io_hdr, 1760 * or deal with resource allocation here. 1761 */ 1762 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); 1763 mtx_unlock(&be_lun->queue_lock); 1764 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 1765 1766 return (CTL_RETVAL_COMPLETE); 1767} 1768 1769static int 1770ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, 1771 int flag, struct thread *td) 1772{ 1773 struct ctl_be_block_softc *softc; 1774 int error; 1775 1776 softc = &backend_block_softc; 1777 1778 error = 0; 1779 1780 switch (cmd) { 1781 case CTL_LUN_REQ: { 1782 struct ctl_lun_req *lun_req; 1783 1784 lun_req = (struct ctl_lun_req *)addr; 1785 1786 switch (lun_req->reqtype) { 1787 case CTL_LUNREQ_CREATE: 1788 error = ctl_be_block_create(softc, lun_req); 1789 break; 1790 case CTL_LUNREQ_RM: 1791 error = ctl_be_block_rm(softc, lun_req); 1792 break; 1793 case CTL_LUNREQ_MODIFY: 1794 error = ctl_be_block_modify(softc, lun_req); 1795 break; 1796 default: 1797 lun_req->status = CTL_LUN_ERROR; 1798 snprintf(lun_req->error_str, sizeof(lun_req->error_str), 1799 "invalid LUN request type %d", 1800 lun_req->reqtype); 1801 break; 1802 } 1803 break; 1804 } 1805 default: 1806 error = ENOTTY; 1807 break; 1808 } 1809 1810 return (error); 1811} 1812 1813static int 1814ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1815{ 1816 struct ctl_be_lun *cbe_lun; 1817 struct ctl_be_block_filedata *file_data; 1818 struct ctl_lun_create_params *params; 1819 char *value; 1820 struct vattr vattr; 1821 off_t ps, pss, po, pos, us, uss, uo, uos; 1822 int error; 1823 1824 error = 0; 1825 cbe_lun = &be_lun->cbe_lun; 1826 file_data = &be_lun->backend.file; 1827 params = &be_lun->params; 1828 1829 be_lun->dev_type = CTL_BE_BLOCK_FILE; 1830 be_lun->dispatch = ctl_be_block_dispatch_file; 1831 be_lun->lun_flush = ctl_be_block_flush_file; 1832 be_lun->get_lba_status = ctl_be_block_gls_file; 1833 be_lun->getattr = ctl_be_block_getattr_file; 1834 be_lun->unmap = NULL; 1835 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 1836 1837 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 1838 if (error != 0) { 1839 snprintf(req->error_str, sizeof(req->error_str), 1840 "error calling VOP_GETATTR() for file %s", 1841 be_lun->dev_path); 1842 return (error); 1843 } 1844 1845 /* 1846 * Verify that we have the ability to upgrade to exclusive 1847 * access on this file so we can trap errors at open instead 1848 * of reporting them during first access. 1849 */ 1850 if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) { 1851 vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY); 1852 if (be_lun->vn->v_iflag & VI_DOOMED) { 1853 error = EBADF; 1854 snprintf(req->error_str, sizeof(req->error_str), 1855 "error locking file %s", be_lun->dev_path); 1856 return (error); 1857 } 1858 } 1859 1860 file_data->cred = crhold(curthread->td_ucred); 1861 if (params->lun_size_bytes != 0) 1862 be_lun->size_bytes = params->lun_size_bytes; 1863 else 1864 be_lun->size_bytes = vattr.va_size; 1865 1866 /* 1867 * For files we can use any logical block size. Prefer 512 bytes 1868 * for compatibility reasons. If file's vattr.va_blocksize 1869 * (preferred I/O block size) is bigger and multiple to chosen 1870 * logical block size -- report it as physical block size. 1871 */ 1872 if (params->blocksize_bytes != 0) 1873 cbe_lun->blocksize = params->blocksize_bytes; 1874 else 1875 cbe_lun->blocksize = 512; 1876 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 1877 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 1878 0 : (be_lun->size_blocks - 1); 1879 1880 us = ps = vattr.va_blocksize; 1881 uo = po = 0; 1882 1883 value = ctl_get_opt(&cbe_lun->options, "pblocksize"); 1884 if (value != NULL) 1885 ctl_expand_number(value, &ps); 1886 value = ctl_get_opt(&cbe_lun->options, "pblockoffset"); 1887 if (value != NULL) 1888 ctl_expand_number(value, &po); 1889 pss = ps / cbe_lun->blocksize; 1890 pos = po / cbe_lun->blocksize; 1891 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 1892 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 1893 cbe_lun->pblockexp = fls(pss) - 1; 1894 cbe_lun->pblockoff = (pss - pos) % pss; 1895 } 1896 1897 value = ctl_get_opt(&cbe_lun->options, "ublocksize"); 1898 if (value != NULL) 1899 ctl_expand_number(value, &us); 1900 value = ctl_get_opt(&cbe_lun->options, "ublockoffset"); 1901 if (value != NULL) 1902 ctl_expand_number(value, &uo); 1903 uss = us / cbe_lun->blocksize; 1904 uos = uo / cbe_lun->blocksize; 1905 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 1906 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 1907 cbe_lun->ublockexp = fls(uss) - 1; 1908 cbe_lun->ublockoff = (uss - uos) % uss; 1909 } 1910 1911 /* 1912 * Sanity check. The media size has to be at least one 1913 * sector long. 1914 */ 1915 if (be_lun->size_bytes < cbe_lun->blocksize) { 1916 error = EINVAL; 1917 snprintf(req->error_str, sizeof(req->error_str), 1918 "file %s size %ju < block size %u", be_lun->dev_path, 1919 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); 1920 } 1921 1922 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; 1923 return (error); 1924} 1925 1926static int 1927ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 1928{ 1929 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 1930 struct ctl_lun_create_params *params; 1931 struct cdevsw *csw; 1932 struct cdev *dev; 1933 char *value; 1934 int error, atomic, maxio, ref, unmap, tmp; 1935 off_t ps, pss, po, pos, us, uss, uo, uos, otmp; 1936 1937 params = &be_lun->params; 1938 1939 be_lun->dev_type = CTL_BE_BLOCK_DEV; 1940 csw = devvn_refthread(be_lun->vn, &dev, &ref); 1941 if (csw == NULL) 1942 return (ENXIO); 1943 if (strcmp(csw->d_name, "zvol") == 0) { 1944 be_lun->dispatch = ctl_be_block_dispatch_zvol; 1945 be_lun->get_lba_status = ctl_be_block_gls_zvol; 1946 atomic = maxio = CTLBLK_MAX_IO_SIZE; 1947 } else { 1948 be_lun->dispatch = ctl_be_block_dispatch_dev; 1949 be_lun->get_lba_status = NULL; 1950 atomic = 0; 1951 maxio = dev->si_iosize_max; 1952 if (maxio <= 0) 1953 maxio = DFLTPHYS; 1954 if (maxio > CTLBLK_MAX_IO_SIZE) 1955 maxio = CTLBLK_MAX_IO_SIZE; 1956 } 1957 be_lun->lun_flush = ctl_be_block_flush_dev; 1958 be_lun->getattr = ctl_be_block_getattr_dev; 1959 be_lun->unmap = ctl_be_block_unmap_dev; 1960 1961 if (!csw->d_ioctl) { 1962 dev_relthread(dev, ref); 1963 snprintf(req->error_str, sizeof(req->error_str), 1964 "no d_ioctl for device %s!", be_lun->dev_path); 1965 return (ENODEV); 1966 } 1967 1968 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, 1969 curthread); 1970 if (error) { 1971 dev_relthread(dev, ref); 1972 snprintf(req->error_str, sizeof(req->error_str), 1973 "error %d returned for DIOCGSECTORSIZE ioctl " 1974 "on %s!", error, be_lun->dev_path); 1975 return (error); 1976 } 1977 1978 /* 1979 * If the user has asked for a blocksize that is greater than the 1980 * backing device's blocksize, we can do it only if the blocksize 1981 * the user is asking for is an even multiple of the underlying 1982 * device's blocksize. 1983 */ 1984 if ((params->blocksize_bytes != 0) && 1985 (params->blocksize_bytes >= tmp)) { 1986 if (params->blocksize_bytes % tmp == 0) { 1987 cbe_lun->blocksize = params->blocksize_bytes; 1988 } else { 1989 dev_relthread(dev, ref); 1990 snprintf(req->error_str, sizeof(req->error_str), 1991 "requested blocksize %u is not an even " 1992 "multiple of backing device blocksize %u", 1993 params->blocksize_bytes, tmp); 1994 return (EINVAL); 1995 } 1996 } else if (params->blocksize_bytes != 0) { 1997 dev_relthread(dev, ref); 1998 snprintf(req->error_str, sizeof(req->error_str), 1999 "requested blocksize %u < backing device " 2000 "blocksize %u", params->blocksize_bytes, tmp); 2001 return (EINVAL); 2002 } else 2003 cbe_lun->blocksize = tmp; 2004 2005 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, 2006 curthread); 2007 if (error) { 2008 dev_relthread(dev, ref); 2009 snprintf(req->error_str, sizeof(req->error_str), 2010 "error %d returned for DIOCGMEDIASIZE " 2011 " ioctl on %s!", error, 2012 be_lun->dev_path); 2013 return (error); 2014 } 2015 2016 if (params->lun_size_bytes != 0) { 2017 if (params->lun_size_bytes > otmp) { 2018 dev_relthread(dev, ref); 2019 snprintf(req->error_str, sizeof(req->error_str), 2020 "requested LUN size %ju > backing device " 2021 "size %ju", 2022 (uintmax_t)params->lun_size_bytes, 2023 (uintmax_t)otmp); 2024 return (EINVAL); 2025 } 2026 2027 be_lun->size_bytes = params->lun_size_bytes; 2028 } else 2029 be_lun->size_bytes = otmp; 2030 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2031 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2032 0 : (be_lun->size_blocks - 1); 2033 2034 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, 2035 curthread); 2036 if (error) 2037 ps = po = 0; 2038 else { 2039 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, 2040 FREAD, curthread); 2041 if (error) 2042 po = 0; 2043 } 2044 us = ps; 2045 uo = po; 2046 2047 value = ctl_get_opt(&cbe_lun->options, "pblocksize"); 2048 if (value != NULL) 2049 ctl_expand_number(value, &ps); 2050 value = ctl_get_opt(&cbe_lun->options, "pblockoffset"); 2051 if (value != NULL) 2052 ctl_expand_number(value, &po); 2053 pss = ps / cbe_lun->blocksize; 2054 pos = po / cbe_lun->blocksize; 2055 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && 2056 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { 2057 cbe_lun->pblockexp = fls(pss) - 1; 2058 cbe_lun->pblockoff = (pss - pos) % pss; 2059 } 2060 2061 value = ctl_get_opt(&cbe_lun->options, "ublocksize"); 2062 if (value != NULL) 2063 ctl_expand_number(value, &us); 2064 value = ctl_get_opt(&cbe_lun->options, "ublockoffset"); 2065 if (value != NULL) 2066 ctl_expand_number(value, &uo); 2067 uss = us / cbe_lun->blocksize; 2068 uos = uo / cbe_lun->blocksize; 2069 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && 2070 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { 2071 cbe_lun->ublockexp = fls(uss) - 1; 2072 cbe_lun->ublockoff = (uss - uos) % uss; 2073 } 2074 2075 cbe_lun->atomicblock = atomic / cbe_lun->blocksize; 2076 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; 2077 2078 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { 2079 unmap = 1; 2080 } else { 2081 struct diocgattr_arg arg; 2082 2083 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 2084 arg.len = sizeof(arg.value.i); 2085 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, 2086 curthread); 2087 unmap = (error == 0) ? arg.value.i : 0; 2088 } 2089 value = ctl_get_opt(&cbe_lun->options, "unmap"); 2090 if (value != NULL) 2091 unmap = (strcmp(value, "on") == 0); 2092 if (unmap) 2093 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; 2094 else 2095 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; 2096 2097 dev_relthread(dev, ref); 2098 return (0); 2099} 2100 2101static int 2102ctl_be_block_close(struct ctl_be_block_lun *be_lun) 2103{ 2104 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2105 int flags; 2106 2107 if (be_lun->vn) { 2108 flags = FREAD; 2109 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) 2110 flags |= FWRITE; 2111 (void)vn_close(be_lun->vn, flags, NOCRED, curthread); 2112 be_lun->vn = NULL; 2113 2114 switch (be_lun->dev_type) { 2115 case CTL_BE_BLOCK_DEV: 2116 break; 2117 case CTL_BE_BLOCK_FILE: 2118 if (be_lun->backend.file.cred != NULL) { 2119 crfree(be_lun->backend.file.cred); 2120 be_lun->backend.file.cred = NULL; 2121 } 2122 break; 2123 case CTL_BE_BLOCK_NONE: 2124 break; 2125 default: 2126 panic("Unexpected backend type."); 2127 break; 2128 } 2129 be_lun->dev_type = CTL_BE_BLOCK_NONE; 2130 } 2131 return (0); 2132} 2133 2134static int 2135ctl_be_block_open(struct ctl_be_block_softc *softc, 2136 struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) 2137{ 2138 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2139 struct nameidata nd; 2140 char *value; 2141 int error, flags; 2142 2143 error = 0; 2144 if (rootvnode == NULL) { 2145 snprintf(req->error_str, sizeof(req->error_str), 2146 "Root filesystem is not mounted"); 2147 return (1); 2148 } 2149 pwd_ensure_dirs(); 2150 2151 value = ctl_get_opt(&cbe_lun->options, "file"); 2152 if (value == NULL) { 2153 snprintf(req->error_str, sizeof(req->error_str), 2154 "no file argument specified"); 2155 return (1); 2156 } 2157 free(be_lun->dev_path, M_CTLBLK); 2158 be_lun->dev_path = strdup(value, M_CTLBLK); 2159 2160 flags = FREAD; 2161 value = ctl_get_opt(&cbe_lun->options, "readonly"); 2162 if (value == NULL || strcmp(value, "on") != 0) 2163 flags |= FWRITE; 2164 2165again: 2166 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); 2167 error = vn_open(&nd, &flags, 0, NULL); 2168 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { 2169 flags &= ~FWRITE; 2170 goto again; 2171 } 2172 if (error) { 2173 /* 2174 * This is the only reasonable guess we can make as far as 2175 * path if the user doesn't give us a fully qualified path. 2176 * If they want to specify a file, they need to specify the 2177 * full path. 2178 */ 2179 if (be_lun->dev_path[0] != '/') { 2180 char *dev_name; 2181 2182 asprintf(&dev_name, M_CTLBLK, "/dev/%s", 2183 be_lun->dev_path); 2184 free(be_lun->dev_path, M_CTLBLK); 2185 be_lun->dev_path = dev_name; 2186 goto again; 2187 } 2188 snprintf(req->error_str, sizeof(req->error_str), 2189 "error opening %s: %d", be_lun->dev_path, error); 2190 return (error); 2191 } 2192 if (flags & FWRITE) 2193 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; 2194 else 2195 cbe_lun->flags |= CTL_LUN_FLAG_READONLY; 2196 2197 NDFREE(&nd, NDF_ONLY_PNBUF); 2198 be_lun->vn = nd.ni_vp; 2199 2200 /* We only support disks and files. */ 2201 if (vn_isdisk(be_lun->vn, &error)) { 2202 error = ctl_be_block_open_dev(be_lun, req); 2203 } else if (be_lun->vn->v_type == VREG) { 2204 error = ctl_be_block_open_file(be_lun, req); 2205 } else { 2206 error = EINVAL; 2207 snprintf(req->error_str, sizeof(req->error_str), 2208 "%s is not a disk or plain file", be_lun->dev_path); 2209 } 2210 VOP_UNLOCK(be_lun->vn, 0); 2211 2212 if (error != 0) 2213 ctl_be_block_close(be_lun); 2214 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2215 if (be_lun->dispatch != ctl_be_block_dispatch_dev) 2216 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2217 value = ctl_get_opt(&cbe_lun->options, "serseq"); 2218 if (value != NULL && strcmp(value, "on") == 0) 2219 cbe_lun->serseq = CTL_LUN_SERSEQ_ON; 2220 else if (value != NULL && strcmp(value, "read") == 0) 2221 cbe_lun->serseq = CTL_LUN_SERSEQ_READ; 2222 else if (value != NULL && strcmp(value, "off") == 0) 2223 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; 2224 return (0); 2225} 2226 2227static int 2228ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2229{ 2230 struct ctl_be_lun *cbe_lun; 2231 struct ctl_be_block_lun *be_lun; 2232 struct ctl_lun_create_params *params; 2233 char num_thread_str[16]; 2234 char tmpstr[32]; 2235 char *value; 2236 int retval, num_threads; 2237 int tmp_num_threads; 2238 2239 params = &req->reqdata.create; 2240 retval = 0; 2241 req->status = CTL_LUN_OK; 2242 2243 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); 2244 cbe_lun = &be_lun->cbe_lun; 2245 cbe_lun->be_lun = be_lun; 2246 be_lun->params = req->reqdata.create; 2247 be_lun->softc = softc; 2248 STAILQ_INIT(&be_lun->input_queue); 2249 STAILQ_INIT(&be_lun->config_read_queue); 2250 STAILQ_INIT(&be_lun->config_write_queue); 2251 STAILQ_INIT(&be_lun->datamove_queue); 2252 sprintf(be_lun->lunname, "cblk%d", softc->num_luns); 2253 mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF); 2254 mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF); 2255 ctl_init_opts(&cbe_lun->options, 2256 req->num_be_args, req->kern_be_args); 2257 be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG, 2258 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); 2259 if (be_lun->lun_zone == NULL) { 2260 snprintf(req->error_str, sizeof(req->error_str), 2261 "error allocating UMA zone"); 2262 goto bailout_error; 2263 } 2264 2265 if (params->flags & CTL_LUN_FLAG_DEV_TYPE) 2266 cbe_lun->lun_type = params->device_type; 2267 else 2268 cbe_lun->lun_type = T_DIRECT; 2269 be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED; 2270 cbe_lun->flags = 0; 2271 value = ctl_get_opt(&cbe_lun->options, "ha_role"); 2272 if (value != NULL) { 2273 if (strcmp(value, "primary") == 0) 2274 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2275 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2276 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2277 2278 if (cbe_lun->lun_type == T_DIRECT) { 2279 be_lun->size_bytes = params->lun_size_bytes; 2280 if (params->blocksize_bytes != 0) 2281 cbe_lun->blocksize = params->blocksize_bytes; 2282 else 2283 cbe_lun->blocksize = 512; 2284 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2285 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2286 0 : (be_lun->size_blocks - 1); 2287 2288 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2289 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2290 retval = ctl_be_block_open(softc, be_lun, req); 2291 if (retval != 0) { 2292 retval = 0; 2293 req->status = CTL_LUN_WARNING; 2294 } 2295 } 2296 num_threads = cbb_num_threads; 2297 } else { 2298 num_threads = 1; 2299 } 2300 2301 /* 2302 * XXX This searching loop might be refactored to be combined with 2303 * the loop above, 2304 */ 2305 value = ctl_get_opt(&cbe_lun->options, "num_threads"); 2306 if (value != NULL) { 2307 tmp_num_threads = strtol(value, NULL, 0); 2308 2309 /* 2310 * We don't let the user specify less than one 2311 * thread, but hope he's clueful enough not to 2312 * specify 1000 threads. 2313 */ 2314 if (tmp_num_threads < 1) { 2315 snprintf(req->error_str, sizeof(req->error_str), 2316 "invalid number of threads %s", 2317 num_thread_str); 2318 goto bailout_error; 2319 } 2320 num_threads = tmp_num_threads; 2321 } 2322 2323 if (be_lun->vn == NULL) 2324 cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE; 2325 /* Tell the user the blocksize we ended up using */ 2326 params->lun_size_bytes = be_lun->size_bytes; 2327 params->blocksize_bytes = cbe_lun->blocksize; 2328 if (params->flags & CTL_LUN_FLAG_ID_REQ) { 2329 cbe_lun->req_lun_id = params->req_lun_id; 2330 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; 2331 } else 2332 cbe_lun->req_lun_id = 0; 2333 2334 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; 2335 cbe_lun->lun_config_status = ctl_be_block_lun_config_status; 2336 cbe_lun->be = &ctl_be_block_driver; 2337 2338 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { 2339 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d", 2340 softc->num_luns); 2341 strncpy((char *)cbe_lun->serial_num, tmpstr, 2342 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); 2343 2344 /* Tell the user what we used for a serial number */ 2345 strncpy((char *)params->serial_num, tmpstr, 2346 MIN(sizeof(params->serial_num), sizeof(tmpstr))); 2347 } else { 2348 strncpy((char *)cbe_lun->serial_num, params->serial_num, 2349 MIN(sizeof(cbe_lun->serial_num), 2350 sizeof(params->serial_num))); 2351 } 2352 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { 2353 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns); 2354 strncpy((char *)cbe_lun->device_id, tmpstr, 2355 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); 2356 2357 /* Tell the user what we used for a device ID */ 2358 strncpy((char *)params->device_id, tmpstr, 2359 MIN(sizeof(params->device_id), sizeof(tmpstr))); 2360 } else { 2361 strncpy((char *)cbe_lun->device_id, params->device_id, 2362 MIN(sizeof(cbe_lun->device_id), 2363 sizeof(params->device_id))); 2364 } 2365 2366 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); 2367 2368 be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK, 2369 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); 2370 2371 if (be_lun->io_taskqueue == NULL) { 2372 snprintf(req->error_str, sizeof(req->error_str), 2373 "unable to create taskqueue"); 2374 goto bailout_error; 2375 } 2376 2377 /* 2378 * Note that we start the same number of threads by default for 2379 * both the file case and the block device case. For the file 2380 * case, we need multiple threads to allow concurrency, because the 2381 * vnode interface is designed to be a blocking interface. For the 2382 * block device case, ZFS zvols at least will block the caller's 2383 * context in many instances, and so we need multiple threads to 2384 * overcome that problem. Other block devices don't need as many 2385 * threads, but they shouldn't cause too many problems. 2386 * 2387 * If the user wants to just have a single thread for a block 2388 * device, he can specify that when the LUN is created, or change 2389 * the tunable/sysctl to alter the default number of threads. 2390 */ 2391 retval = taskqueue_start_threads(&be_lun->io_taskqueue, 2392 /*num threads*/num_threads, 2393 /*priority*/PWAIT, 2394 /*thread name*/ 2395 "%s taskq", be_lun->lunname); 2396 2397 if (retval != 0) 2398 goto bailout_error; 2399 2400 be_lun->num_threads = num_threads; 2401 2402 mtx_lock(&softc->lock); 2403 softc->num_luns++; 2404 STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links); 2405 2406 mtx_unlock(&softc->lock); 2407 2408 retval = ctl_add_lun(&be_lun->cbe_lun); 2409 if (retval != 0) { 2410 mtx_lock(&softc->lock); 2411 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, 2412 links); 2413 softc->num_luns--; 2414 mtx_unlock(&softc->lock); 2415 snprintf(req->error_str, sizeof(req->error_str), 2416 "ctl_add_lun() returned error %d, see dmesg for " 2417 "details", retval); 2418 retval = 0; 2419 goto bailout_error; 2420 } 2421 2422 mtx_lock(&softc->lock); 2423 2424 /* 2425 * Tell the config_status routine that we're waiting so it won't 2426 * clean up the LUN in the event of an error. 2427 */ 2428 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2429 2430 while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { 2431 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); 2432 if (retval == EINTR) 2433 break; 2434 } 2435 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2436 2437 if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) { 2438 snprintf(req->error_str, sizeof(req->error_str), 2439 "LUN configuration error, see dmesg for details"); 2440 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, 2441 links); 2442 softc->num_luns--; 2443 mtx_unlock(&softc->lock); 2444 goto bailout_error; 2445 } else { 2446 params->req_lun_id = cbe_lun->lun_id; 2447 } 2448 2449 mtx_unlock(&softc->lock); 2450 2451 be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id, 2452 cbe_lun->blocksize, 2453 DEVSTAT_ALL_SUPPORTED, 2454 cbe_lun->lun_type 2455 | DEVSTAT_TYPE_IF_OTHER, 2456 DEVSTAT_PRIORITY_OTHER); 2457 2458 return (retval); 2459 2460bailout_error: 2461 req->status = CTL_LUN_ERROR; 2462 2463 if (be_lun->io_taskqueue != NULL) 2464 taskqueue_free(be_lun->io_taskqueue); 2465 ctl_be_block_close(be_lun); 2466 if (be_lun->dev_path != NULL) 2467 free(be_lun->dev_path, M_CTLBLK); 2468 if (be_lun->lun_zone != NULL) 2469 uma_zdestroy(be_lun->lun_zone); 2470 ctl_free_opts(&cbe_lun->options); 2471 mtx_destroy(&be_lun->queue_lock); 2472 mtx_destroy(&be_lun->io_lock); 2473 free(be_lun, M_CTLBLK); 2474 2475 return (retval); 2476} 2477 2478static int 2479ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2480{ 2481 struct ctl_lun_rm_params *params; 2482 struct ctl_be_block_lun *be_lun; 2483 struct ctl_be_lun *cbe_lun; 2484 int retval; 2485 2486 params = &req->reqdata.rm; 2487 2488 mtx_lock(&softc->lock); 2489 STAILQ_FOREACH(be_lun, &softc->lun_list, links) { 2490 if (be_lun->cbe_lun.lun_id == params->lun_id) 2491 break; 2492 } 2493 mtx_unlock(&softc->lock); 2494 2495 if (be_lun == NULL) { 2496 snprintf(req->error_str, sizeof(req->error_str), 2497 "LUN %u is not managed by the block backend", 2498 params->lun_id); 2499 goto bailout_error; 2500 } 2501 cbe_lun = &be_lun->cbe_lun; 2502 2503 retval = ctl_disable_lun(cbe_lun); 2504 if (retval != 0) { 2505 snprintf(req->error_str, sizeof(req->error_str), 2506 "error %d returned from ctl_disable_lun() for " 2507 "LUN %d", retval, params->lun_id); 2508 goto bailout_error; 2509 } 2510 2511 if (be_lun->vn != NULL) { 2512 cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE; 2513 ctl_lun_offline(cbe_lun); 2514 taskqueue_drain_all(be_lun->io_taskqueue); 2515 ctl_be_block_close(be_lun); 2516 } 2517 2518 retval = ctl_invalidate_lun(cbe_lun); 2519 if (retval != 0) { 2520 snprintf(req->error_str, sizeof(req->error_str), 2521 "error %d returned from ctl_invalidate_lun() for " 2522 "LUN %d", retval, params->lun_id); 2523 goto bailout_error; 2524 } 2525 2526 mtx_lock(&softc->lock); 2527 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; 2528 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2529 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); 2530 if (retval == EINTR) 2531 break; 2532 } 2533 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; 2534 2535 if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { 2536 snprintf(req->error_str, sizeof(req->error_str), 2537 "interrupted waiting for LUN to be freed"); 2538 mtx_unlock(&softc->lock); 2539 goto bailout_error; 2540 } 2541 2542 STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); 2543 2544 softc->num_luns--; 2545 mtx_unlock(&softc->lock); 2546 2547 taskqueue_drain_all(be_lun->io_taskqueue); 2548 taskqueue_free(be_lun->io_taskqueue); 2549 2550 if (be_lun->disk_stats != NULL) 2551 devstat_remove_entry(be_lun->disk_stats); 2552 2553 uma_zdestroy(be_lun->lun_zone); 2554 2555 ctl_free_opts(&cbe_lun->options); 2556 free(be_lun->dev_path, M_CTLBLK); 2557 mtx_destroy(&be_lun->queue_lock); 2558 mtx_destroy(&be_lun->io_lock); 2559 free(be_lun, M_CTLBLK); 2560 2561 req->status = CTL_LUN_OK; 2562 2563 return (0); 2564 2565bailout_error: 2566 2567 req->status = CTL_LUN_ERROR; 2568 2569 return (0); 2570} 2571 2572static int 2573ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, 2574 struct ctl_lun_req *req) 2575{ 2576 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2577 struct vattr vattr; 2578 int error; 2579 struct ctl_lun_create_params *params = &be_lun->params; 2580 2581 if (params->lun_size_bytes != 0) { 2582 be_lun->size_bytes = params->lun_size_bytes; 2583 } else { 2584 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); 2585 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); 2586 VOP_UNLOCK(be_lun->vn, 0); 2587 if (error != 0) { 2588 snprintf(req->error_str, sizeof(req->error_str), 2589 "error calling VOP_GETATTR() for file %s", 2590 be_lun->dev_path); 2591 return (error); 2592 } 2593 be_lun->size_bytes = vattr.va_size; 2594 } 2595 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2596 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2597 0 : (be_lun->size_blocks - 1); 2598 return (0); 2599} 2600 2601static int 2602ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, 2603 struct ctl_lun_req *req) 2604{ 2605 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; 2606 struct ctl_lun_create_params *params = &be_lun->params; 2607 struct cdevsw *csw; 2608 struct cdev *dev; 2609 uint64_t size_bytes; 2610 int error, ref; 2611 2612 csw = devvn_refthread(be_lun->vn, &dev, &ref); 2613 if (csw == NULL) 2614 return (ENXIO); 2615 if (csw->d_ioctl == NULL) { 2616 dev_relthread(dev, ref); 2617 snprintf(req->error_str, sizeof(req->error_str), 2618 "no d_ioctl for device %s!", be_lun->dev_path); 2619 return (ENODEV); 2620 } 2621 2622 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&size_bytes, FREAD, 2623 curthread); 2624 dev_relthread(dev, ref); 2625 if (error) { 2626 snprintf(req->error_str, sizeof(req->error_str), 2627 "error %d returned for DIOCGMEDIASIZE ioctl " 2628 "on %s!", error, be_lun->dev_path); 2629 return (error); 2630 } 2631 2632 if (params->lun_size_bytes != 0) { 2633 if (params->lun_size_bytes > size_bytes) { 2634 snprintf(req->error_str, sizeof(req->error_str), 2635 "requested LUN size %ju > backing device " 2636 "size %ju", 2637 (uintmax_t)params->lun_size_bytes, 2638 (uintmax_t)size_bytes); 2639 return (EINVAL); 2640 } 2641 be_lun->size_bytes = params->lun_size_bytes; 2642 } else { 2643 be_lun->size_bytes = size_bytes; 2644 } 2645 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; 2646 cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 2647 0 : (be_lun->size_blocks - 1); 2648 return (0); 2649} 2650 2651static int 2652ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) 2653{ 2654 struct ctl_lun_modify_params *params; 2655 struct ctl_be_block_lun *be_lun; 2656 struct ctl_be_lun *cbe_lun; 2657 char *value; 2658 uint64_t oldsize; 2659 int error, wasprim; 2660 2661 params = &req->reqdata.modify; 2662 2663 mtx_lock(&softc->lock); 2664 STAILQ_FOREACH(be_lun, &softc->lun_list, links) { 2665 if (be_lun->cbe_lun.lun_id == params->lun_id) 2666 break; 2667 } 2668 mtx_unlock(&softc->lock); 2669 2670 if (be_lun == NULL) { 2671 snprintf(req->error_str, sizeof(req->error_str), 2672 "LUN %u is not managed by the block backend", 2673 params->lun_id); 2674 goto bailout_error; 2675 } 2676 cbe_lun = &be_lun->cbe_lun; 2677 2678 if (params->lun_size_bytes != 0) 2679 be_lun->params.lun_size_bytes = params->lun_size_bytes; 2680 ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args); 2681 2682 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); 2683 value = ctl_get_opt(&cbe_lun->options, "ha_role"); 2684 if (value != NULL) { 2685 if (strcmp(value, "primary") == 0) 2686 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2687 else 2688 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2689 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) 2690 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; 2691 else 2692 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; 2693 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { 2694 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) 2695 ctl_lun_primary(cbe_lun); 2696 else 2697 ctl_lun_secondary(cbe_lun); 2698 } 2699 2700 oldsize = be_lun->size_blocks; 2701 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || 2702 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { 2703 if (be_lun->vn == NULL) 2704 error = ctl_be_block_open(softc, be_lun, req); 2705 else if (vn_isdisk(be_lun->vn, &error)) 2706 error = ctl_be_block_modify_dev(be_lun, req); 2707 else if (be_lun->vn->v_type == VREG) 2708 error = ctl_be_block_modify_file(be_lun, req); 2709 else 2710 error = EINVAL; 2711 if ((cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) && 2712 be_lun->vn != NULL) { 2713 cbe_lun->flags &= ~CTL_LUN_FLAG_OFFLINE; 2714 ctl_lun_online(cbe_lun); 2715 } 2716 } else { 2717 if (be_lun->vn != NULL) { 2718 cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE; 2719 ctl_lun_offline(cbe_lun); 2720 taskqueue_drain_all(be_lun->io_taskqueue); 2721 error = ctl_be_block_close(be_lun); 2722 } else 2723 error = 0; 2724 } 2725 if (be_lun->size_blocks != oldsize) 2726 ctl_lun_capacity_changed(cbe_lun); 2727 2728 /* Tell the user the exact size we ended up using */ 2729 params->lun_size_bytes = be_lun->size_bytes; 2730 2731 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; 2732 return (0); 2733 2734bailout_error: 2735 req->status = CTL_LUN_ERROR; 2736 return (0); 2737} 2738 2739static void 2740ctl_be_block_lun_shutdown(void *be_lun) 2741{ 2742 struct ctl_be_block_lun *lun; 2743 struct ctl_be_block_softc *softc; 2744 2745 lun = (struct ctl_be_block_lun *)be_lun; 2746 2747 softc = lun->softc; 2748 2749 mtx_lock(&softc->lock); 2750 lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; 2751 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2752 wakeup(lun); 2753 mtx_unlock(&softc->lock); 2754 2755} 2756 2757static void 2758ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status) 2759{ 2760 struct ctl_be_block_lun *lun; 2761 struct ctl_be_block_softc *softc; 2762 2763 lun = (struct ctl_be_block_lun *)be_lun; 2764 softc = lun->softc; 2765 2766 if (status == CTL_LUN_CONFIG_OK) { 2767 mtx_lock(&softc->lock); 2768 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; 2769 if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) 2770 wakeup(lun); 2771 mtx_unlock(&softc->lock); 2772 2773 /* 2774 * We successfully added the LUN, attempt to enable it. 2775 */ 2776 if (ctl_enable_lun(&lun->cbe_lun) != 0) { 2777 printf("%s: ctl_enable_lun() failed!\n", __func__); 2778 if (ctl_invalidate_lun(&lun->cbe_lun) != 0) { 2779 printf("%s: ctl_invalidate_lun() failed!\n", 2780 __func__); 2781 } 2782 } 2783 2784 return; 2785 } 2786 2787 2788 mtx_lock(&softc->lock); 2789 lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; 2790 lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR; 2791 wakeup(lun); 2792 mtx_unlock(&softc->lock); 2793} 2794 2795 2796static int 2797ctl_be_block_config_write(union ctl_io *io) 2798{ 2799 struct ctl_be_block_lun *be_lun; 2800 struct ctl_be_lun *cbe_lun; 2801 int retval; 2802 2803 retval = 0; 2804 2805 DPRINTF("entered\n"); 2806 2807 cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 2808 CTL_PRIV_BACKEND_LUN].ptr; 2809 be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; 2810 2811 switch (io->scsiio.cdb[0]) { 2812 case SYNCHRONIZE_CACHE: 2813 case SYNCHRONIZE_CACHE_16: 2814 case WRITE_SAME_10: 2815 case WRITE_SAME_16: 2816 case UNMAP: 2817 /* 2818 * The upper level CTL code will filter out any CDBs with 2819 * the immediate bit set and return the proper error. 2820 * 2821 * We don't really need to worry about what LBA range the 2822 * user asked to be synced out. When they issue a sync 2823 * cache command, we'll sync out the whole thing. 2824 */ 2825 mtx_lock(&be_lun->queue_lock); 2826 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, 2827 links); 2828 mtx_unlock(&be_lun->queue_lock); 2829 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); 2830 break; 2831 case START_STOP_UNIT: { 2832 struct scsi_start_stop_unit *cdb; 2833 2834 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; 2835 2836 if (cdb->how & SSS_START) 2837 retval = ctl_start_lun(cbe_lun); 2838 else { 2839 retval = ctl_stop_lun(cbe_lun); 2840 /* 2841 * XXX KDM Copan-specific offline behavior. 2842 * Figure out a reasonable way to port this? 2843 */ 2844#ifdef NEEDTOPORT 2845 if ((retval == 0) 2846 && (cdb->byte2 & SSS_ONOFFLINE)) 2847 retval = ctl_lun_offline(cbe_lun); 2848#endif 2849 } 2850 2851 /* 2852 * In general, the above routines should not fail. They 2853 * just set state for the LUN. So we've got something 2854 * pretty wrong here if we can't start or stop the LUN. 2855 */ 2856 if (retval != 0) { 2857 ctl_set_internal_failure(&io->scsiio, 2858 /*sks_valid*/ 1, 2859 /*retry_count*/ 0xf051); 2860 retval = CTL_RETVAL_COMPLETE; 2861 } else { 2862 ctl_set_success(&io->scsiio); 2863 } 2864 ctl_config_write_done(io); 2865 break; 2866 } 2867 default: 2868 ctl_set_invalid_opcode(&io->scsiio); 2869 ctl_config_write_done(io); 2870 retval = CTL_RETVAL_COMPLETE; 2871 break; 2872 } 2873 2874 return (retval); 2875} 2876 2877static int 2878ctl_be_block_config_read(union ctl_io *io) 2879{ 2880 struct ctl_be_block_lun *be_lun; 2881 struct ctl_be_lun *cbe_lun; 2882 int retval = 0; 2883 2884 DPRINTF("entered\n"); 2885 2886 cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ 2887 CTL_PRIV_BACKEND_LUN].ptr; 2888 be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; 2889 2890 switch (io->scsiio.cdb[0]) { 2891 case SERVICE_ACTION_IN: 2892 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { 2893 mtx_lock(&be_lun->queue_lock); 2894 STAILQ_INSERT_TAIL(&be_lun->config_read_queue, 2895 &io->io_hdr, links); 2896 mtx_unlock(&be_lun->queue_lock); 2897 taskqueue_enqueue(be_lun->io_taskqueue, 2898 &be_lun->io_task); 2899 retval = CTL_RETVAL_QUEUED; 2900 break; 2901 } 2902 ctl_set_invalid_field(&io->scsiio, 2903 /*sks_valid*/ 1, 2904 /*command*/ 1, 2905 /*field*/ 1, 2906 /*bit_valid*/ 1, 2907 /*bit*/ 4); 2908 ctl_config_read_done(io); 2909 retval = CTL_RETVAL_COMPLETE; 2910 break; 2911 default: 2912 ctl_set_invalid_opcode(&io->scsiio); 2913 ctl_config_read_done(io); 2914 retval = CTL_RETVAL_COMPLETE; 2915 break; 2916 } 2917 2918 return (retval); 2919} 2920 2921static int 2922ctl_be_block_lun_info(void *be_lun, struct sbuf *sb) 2923{ 2924 struct ctl_be_block_lun *lun; 2925 int retval; 2926 2927 lun = (struct ctl_be_block_lun *)be_lun; 2928 retval = 0; 2929 2930 retval = sbuf_printf(sb, "\t<num_threads>"); 2931 2932 if (retval != 0) 2933 goto bailout; 2934 2935 retval = sbuf_printf(sb, "%d", lun->num_threads); 2936 2937 if (retval != 0) 2938 goto bailout; 2939 2940 retval = sbuf_printf(sb, "</num_threads>\n"); 2941 2942bailout: 2943 2944 return (retval); 2945} 2946 2947static uint64_t 2948ctl_be_block_lun_attr(void *be_lun, const char *attrname) 2949{ 2950 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun; 2951 2952 if (lun->getattr == NULL) 2953 return (UINT64_MAX); 2954 return (lun->getattr(lun, attrname)); 2955} 2956 2957int 2958ctl_be_block_init(void) 2959{ 2960 struct ctl_be_block_softc *softc; 2961 int retval; 2962 2963 softc = &backend_block_softc; 2964 retval = 0; 2965 2966 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); 2967 beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), 2968 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 2969 STAILQ_INIT(&softc->lun_list); 2970 2971 return (retval); 2972} 2973