zvol.c (208047) | zvol.c (209962) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 58 unchanged lines hidden (view full) --- 67#include <sys/fs/zfs.h> 68#include <sys/zfs_ioctl.h> 69#include <sys/zil.h> 70#include <sys/refcount.h> 71#include <sys/zfs_znode.h> 72#include <sys/zfs_rlock.h> 73#include <sys/vdev_impl.h> 74#include <sys/zvol.h> | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 58 unchanged lines hidden (view full) --- 67#include <sys/fs/zfs.h> 68#include <sys/zfs_ioctl.h> 69#include <sys/zil.h> 70#include <sys/refcount.h> 71#include <sys/zfs_znode.h> 72#include <sys/zfs_rlock.h> 73#include <sys/vdev_impl.h> 74#include <sys/zvol.h> |
75#include <sys/zil_impl.h> |
|
75#include <geom/geom.h> 76 77#include "zfs_namecheck.h" 78 79#define ZVOL_DUMPSIZE "dumpsize" 80 81struct g_class zfs_zvol_class = { 82 .name = "ZFS::ZVOL", --- 27 unchanged lines hidden (view full) --- 110 struct g_provider *zv_provider; /* GEOM provider */ 111 uint8_t zv_min_bs; /* minimum addressable block shift */ 112 uint8_t zv_flags; /* readonly; dumpified */ 113 objset_t *zv_objset; /* objset handle */ 114 uint32_t zv_mode; /* DS_MODE_* flags at open time */ 115 uint32_t zv_total_opens; /* total open count */ 116 zilog_t *zv_zilog; /* ZIL handle */ 117 list_t zv_extents; /* List of extents for dump */ | 76#include <geom/geom.h> 77 78#include "zfs_namecheck.h" 79 80#define ZVOL_DUMPSIZE "dumpsize" 81 82struct g_class zfs_zvol_class = { 83 .name = "ZFS::ZVOL", --- 27 unchanged lines hidden (view full) --- 111 struct g_provider *zv_provider; /* GEOM provider */ 112 uint8_t zv_min_bs; /* minimum addressable block shift */ 113 uint8_t zv_flags; /* readonly; dumpified */ 114 objset_t *zv_objset; /* objset handle */ 115 uint32_t zv_mode; /* DS_MODE_* flags at open time */ 116 uint32_t zv_total_opens; /* total open count */ 117 zilog_t *zv_zilog; /* ZIL handle */ 118 list_t zv_extents; /* List of extents for dump */ |
118 uint64_t zv_txg_assign; /* txg to assign during ZIL replay */ | |
119 znode_t zv_znode; /* for range locking */ 120 int zv_state; 121 struct bio_queue_head zv_queue; 122 struct mtx zv_queue_mtx; /* zv_queue mutex */ 123} zvol_state_t; 124 125/* 126 * zvol specific flags --- 155 unchanged lines hidden (view full) --- 282 * Otherwise we will later flush the data out via dmu_sync(). 283 */ 284ssize_t zvol_immediate_write_sz = 32768; 285 286static void 287zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len) 288{ 289 uint32_t blocksize = zv->zv_volblocksize; | 119 znode_t zv_znode; /* for range locking */ 120 int zv_state; 121 struct bio_queue_head zv_queue; 122 struct mtx zv_queue_mtx; /* zv_queue mutex */ 123} zvol_state_t; 124 125/* 126 * zvol specific flags --- 155 unchanged lines hidden (view full) --- 282 * Otherwise we will later flush the data out via dmu_sync(). 283 */ 284ssize_t zvol_immediate_write_sz = 32768; 285 286static void 287zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len) 288{ 289 uint32_t blocksize = zv->zv_volblocksize; |
290 zilog_t *zilog = zv->zv_zilog; |
|
290 lr_write_t *lr; 291 | 291 lr_write_t *lr; 292 |
293 if (zilog->zl_replay) { 294 dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); 295 zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = 296 zilog->zl_replaying_seq; 297 return; 298 } 299 |
|
292 while (len) { 293 ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize)); 294 itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 295 296 itx->itx_wr_state = 297 len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY; 298 itx->itx_private = zv; 299 lr = (lr_write_t *)&itx->itx_lr; 300 lr->lr_foid = ZVOL_OBJ; 301 lr->lr_offset = off; 302 lr->lr_length = nbytes; 303 lr->lr_blkoff = off - P2ALIGN_TYPED(off, blocksize, uint64_t); 304 BP_ZERO(&lr->lr_blkptr); 305 | 300 while (len) { 301 ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize)); 302 itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 303 304 itx->itx_wr_state = 305 len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY; 306 itx->itx_private = zv; 307 lr = (lr_write_t *)&itx->itx_lr; 308 lr->lr_foid = ZVOL_OBJ; 309 lr->lr_offset = off; 310 lr->lr_length = nbytes; 311 lr->lr_blkoff = off - P2ALIGN_TYPED(off, blocksize, uint64_t); 312 BP_ZERO(&lr->lr_blkptr); 313 |
306 (void) zil_itx_assign(zv->zv_zilog, itx, tx); | 314 (void) zil_itx_assign(zilog, itx, tx); |
307 len -= nbytes; 308 off += nbytes; 309 } 310} 311 312static void 313zvol_start(struct bio *bp) 314{ --- 53 unchanged lines hidden (view full) --- 368 369 while (resid != 0 && off < volsize) { 370 size_t size = MIN(resid, zvol_maxphys); /* zvol_maxphys per tx */ 371 372 if (size > volsize - off) /* don't write past the end */ 373 size = volsize - off; 374 375 if (doread) { | 315 len -= nbytes; 316 off += nbytes; 317 } 318} 319 320static void 321zvol_start(struct bio *bp) 322{ --- 53 unchanged lines hidden (view full) --- 376 377 while (resid != 0 && off < volsize) { 378 size_t size = MIN(resid, zvol_maxphys); /* zvol_maxphys per tx */ 379 380 if (size > volsize - off) /* don't write past the end */ 381 size = volsize - off; 382 383 if (doread) { |
376 error = dmu_read(os, ZVOL_OBJ, off, size, addr); | 384 error = dmu_read(os, ZVOL_OBJ, off, size, addr, 385 DMU_READ_PREFETCH); |
377 } else { 378 dmu_tx_t *tx = dmu_tx_create(os); 379 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); 380 error = dmu_tx_assign(tx, TXG_WAIT); 381 if (error) { 382 dmu_tx_abort(tx); 383 } else { 384 dmu_write(os, ZVOL_OBJ, off, size, addr, tx); --- 186 unchanged lines hidden (view full) --- 571 uint64_t off = lr->lr_offset; 572 uint64_t len = lr->lr_length; 573 dmu_tx_t *tx; 574 int error; 575 576 if (byteswap) 577 byteswap_uint64_array(lr, sizeof (*lr)); 578 | 386 } else { 387 dmu_tx_t *tx = dmu_tx_create(os); 388 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); 389 error = dmu_tx_assign(tx, TXG_WAIT); 390 if (error) { 391 dmu_tx_abort(tx); 392 } else { 393 dmu_write(os, ZVOL_OBJ, off, size, addr, tx); --- 186 unchanged lines hidden (view full) --- 580 uint64_t off = lr->lr_offset; 581 uint64_t len = lr->lr_length; 582 dmu_tx_t *tx; 583 int error; 584 585 if (byteswap) 586 byteswap_uint64_array(lr, sizeof (*lr)); 587 |
588 /* If it's a dmu_sync() block get the data and write the whole block */ 589 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) 590 zil_get_replay_data(dmu_objset_zil(os), lr); 591 |
|
579 tx = dmu_tx_create(os); 580 dmu_tx_hold_write(tx, ZVOL_OBJ, off, len); | 592 tx = dmu_tx_create(os); 593 dmu_tx_hold_write(tx, ZVOL_OBJ, off, len); |
581 error = dmu_tx_assign(tx, zv->zv_txg_assign); | 594 error = dmu_tx_assign(tx, TXG_WAIT); |
582 if (error) { 583 dmu_tx_abort(tx); 584 } else { 585 dmu_write(os, ZVOL_OBJ, off, len, data, tx); 586 dmu_tx_commit(tx); 587 } 588 589 return (error); --- 19 unchanged lines hidden (view full) --- 609 zvol_replay_err, /* TX_REMOVE */ 610 zvol_replay_err, /* TX_RMDIR */ 611 zvol_replay_err, /* TX_LINK */ 612 zvol_replay_err, /* TX_RENAME */ 613 zvol_replay_write, /* TX_WRITE */ 614 zvol_replay_err, /* TX_TRUNCATE */ 615 zvol_replay_err, /* TX_SETATTR */ 616 zvol_replay_err, /* TX_ACL */ | 595 if (error) { 596 dmu_tx_abort(tx); 597 } else { 598 dmu_write(os, ZVOL_OBJ, off, len, data, tx); 599 dmu_tx_commit(tx); 600 } 601 602 return (error); --- 19 unchanged lines hidden (view full) --- 622 zvol_replay_err, /* TX_REMOVE */ 623 zvol_replay_err, /* TX_RMDIR */ 624 zvol_replay_err, /* TX_LINK */ 625 zvol_replay_err, /* TX_RENAME */ 626 zvol_replay_write, /* TX_WRITE */ 627 zvol_replay_err, /* TX_TRUNCATE */ 628 zvol_replay_err, /* TX_SETATTR */ 629 zvol_replay_err, /* TX_ACL */ |
630 zvol_replay_err, /* TX_CREATE_ACL */ 631 zvol_replay_err, /* TX_CREATE_ATTR */ 632 zvol_replay_err, /* TX_CREATE_ACL_ATTR */ 633 zvol_replay_err, /* TX_MKDIR_ACL */ 634 zvol_replay_err, /* TX_MKDIR_ATTR */ 635 zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ 636 zvol_replay_err, /* TX_WRITE2 */ |
|
617}; 618 619/* 620 * Create a minor node (plus a whole lot more) for the specified volume. 621 */ 622int 623zvol_create_minor(const char *name, major_t maj) 624{ --- 48 unchanged lines hidden (view full) --- 673 sizeof (rl_t), offsetof(rl_t, r_node)); 674 list_create(&zv->zv_extents, sizeof (zvol_extent_t), 675 offsetof(zvol_extent_t, ze_node)); 676 /* get and cache the blocksize */ 677 error = dmu_object_info(os, ZVOL_OBJ, &doi); 678 ASSERT(error == 0); 679 zv->zv_volblocksize = doi.doi_data_block_size; 680 | 637}; 638 639/* 640 * Create a minor node (plus a whole lot more) for the specified volume. 641 */ 642int 643zvol_create_minor(const char *name, major_t maj) 644{ --- 48 unchanged lines hidden (view full) --- 693 sizeof (rl_t), offsetof(rl_t, r_node)); 694 list_create(&zv->zv_extents, sizeof (zvol_extent_t), 695 offsetof(zvol_extent_t, ze_node)); 696 /* get and cache the blocksize */ 697 error = dmu_object_info(os, ZVOL_OBJ, &doi); 698 ASSERT(error == 0); 699 zv->zv_volblocksize = doi.doi_data_block_size; 700 |
681 zil_replay(os, zv, &zv->zv_txg_assign, zvol_replay_vector, NULL); | 701 zil_replay(os, zv, zvol_replay_vector); |
682 683 /* XXX this should handle the possible i/o error */ 684 VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset), 685 "readonly", zvol_readonly_changed_cb, zv) == 0); 686 687 pp->private = zv; 688 g_error_provider(pp, 0); 689 --- 288 unchanged lines hidden (view full) --- 978 /* 979 * Write records come in two flavors: immediate and indirect. 980 * For small writes it's cheaper to store the data with the 981 * log record (immediate); for large writes it's cheaper to 982 * sync the data and get a pointer to it (indirect) so that 983 * we don't have to write the data twice. 984 */ 985 if (buf != NULL) /* immediate write */ | 702 703 /* XXX this should handle the possible i/o error */ 704 VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset), 705 "readonly", zvol_readonly_changed_cb, zv) == 0); 706 707 pp->private = zv; 708 g_error_provider(pp, 0); 709 --- 288 unchanged lines hidden (view full) --- 998 /* 999 * Write records come in two flavors: immediate and indirect. 1000 * For small writes it's cheaper to store the data with the 1001 * log record (immediate); for large writes it's cheaper to 1002 * sync the data and get a pointer to it (indirect) so that 1003 * we don't have to write the data twice. 1004 */ 1005 if (buf != NULL) /* immediate write */ |
986 return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf)); | 1006 return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf, 1007 DMU_READ_NO_PREFETCH)); |
987 988 zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); 989 zgd->zgd_zilog = zv->zv_zilog; 990 zgd->zgd_bp = &lr->lr_blkptr; 991 992 /* 993 * Lock the range of the block to ensure that when the data is 994 * written out and its checksum is being calculated that no other 995 * thread can change the block. 996 */ 997 boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t); 998 rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize, 999 RL_READER); 1000 zgd->zgd_rl = rl; 1001 1002 VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db)); | 1008 1009 zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); 1010 zgd->zgd_zilog = zv->zv_zilog; 1011 zgd->zgd_bp = &lr->lr_blkptr; 1012 1013 /* 1014 * Lock the range of the block to ensure that when the data is 1015 * written out and its checksum is being calculated that no other 1016 * thread can change the block. 1017 */ 1018 boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t); 1019 rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize, 1020 RL_READER); 1021 zgd->zgd_rl = rl; 1022 1023 VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db)); |
1024 |
|
1003 error = dmu_sync(zio, db, &lr->lr_blkptr, 1004 lr->lr_common.lrc_txg, zvol_get_done, zgd); | 1025 error = dmu_sync(zio, db, &lr->lr_blkptr, 1026 lr->lr_common.lrc_txg, zvol_get_done, zgd); |
1005 if (error == 0) | 1027 if (error == 0) { 1028 /* 1029 * dmu_sync() can compress a block of zeros to a null blkptr 1030 * but the block size still needs to be passed through to 1031 * replay. 1032 */ 1033 BP_SET_LSIZE(&lr->lr_blkptr, db->db_size); |
1006 zil_add_block(zv->zv_zilog, &lr->lr_blkptr); | 1034 zil_add_block(zv->zv_zilog, &lr->lr_blkptr); |
1035 } 1036 |
|
1007 /* 1008 * If we get EINPROGRESS, then we need to wait for a 1009 * write IO initiated by dmu_sync() to complete before 1010 * we can release this dbuf. We will finish everything 1011 * up in the zvol_get_done() callback. 1012 */ 1013 if (error == EINPROGRESS) 1014 return (0); --- 255 unchanged lines hidden --- | 1037 /* 1038 * If we get EINPROGRESS, then we need to wait for a 1039 * write IO initiated by dmu_sync() to complete before 1040 * we can release this dbuf. We will finish everything 1041 * up in the zvol_get_done() callback. 1042 */ 1043 if (error == EINPROGRESS) 1044 return (0); --- 255 unchanged lines hidden --- |