Deleted Added
full compact
zvol.c (208047) zvol.c (209962)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 58 unchanged lines hidden (view full) ---

67#include <sys/fs/zfs.h>
68#include <sys/zfs_ioctl.h>
69#include <sys/zil.h>
70#include <sys/refcount.h>
71#include <sys/zfs_znode.h>
72#include <sys/zfs_rlock.h>
73#include <sys/vdev_impl.h>
74#include <sys/zvol.h>
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 58 unchanged lines hidden (view full) ---

67#include <sys/fs/zfs.h>
68#include <sys/zfs_ioctl.h>
69#include <sys/zil.h>
70#include <sys/refcount.h>
71#include <sys/zfs_znode.h>
72#include <sys/zfs_rlock.h>
73#include <sys/vdev_impl.h>
74#include <sys/zvol.h>
75#include <sys/zil_impl.h>
75#include <geom/geom.h>
76
77#include "zfs_namecheck.h"
78
79#define ZVOL_DUMPSIZE "dumpsize"
80
81struct g_class zfs_zvol_class = {
82 .name = "ZFS::ZVOL",

--- 27 unchanged lines hidden (view full) ---

110 struct g_provider *zv_provider; /* GEOM provider */
111 uint8_t zv_min_bs; /* minimum addressable block shift */
112 uint8_t zv_flags; /* readonly; dumpified */
113 objset_t *zv_objset; /* objset handle */
114 uint32_t zv_mode; /* DS_MODE_* flags at open time */
115 uint32_t zv_total_opens; /* total open count */
116 zilog_t *zv_zilog; /* ZIL handle */
117 list_t zv_extents; /* List of extents for dump */
76#include <geom/geom.h>
77
78#include "zfs_namecheck.h"
79
80#define ZVOL_DUMPSIZE "dumpsize"
81
82struct g_class zfs_zvol_class = {
83 .name = "ZFS::ZVOL",

--- 27 unchanged lines hidden (view full) ---

111 struct g_provider *zv_provider; /* GEOM provider */
112 uint8_t zv_min_bs; /* minimum addressable block shift */
113 uint8_t zv_flags; /* readonly; dumpified */
114 objset_t *zv_objset; /* objset handle */
115 uint32_t zv_mode; /* DS_MODE_* flags at open time */
116 uint32_t zv_total_opens; /* total open count */
117 zilog_t *zv_zilog; /* ZIL handle */
118 list_t zv_extents; /* List of extents for dump */
118 uint64_t zv_txg_assign; /* txg to assign during ZIL replay */
119 znode_t zv_znode; /* for range locking */
120 int zv_state;
121 struct bio_queue_head zv_queue;
122 struct mtx zv_queue_mtx; /* zv_queue mutex */
123} zvol_state_t;
124
125/*
126 * zvol specific flags

--- 155 unchanged lines hidden (view full) ---

282 * Otherwise we will later flush the data out via dmu_sync().
283 */
284ssize_t zvol_immediate_write_sz = 32768;
285
286static void
287zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len)
288{
289 uint32_t blocksize = zv->zv_volblocksize;
119 znode_t zv_znode; /* for range locking */
120 int zv_state;
121 struct bio_queue_head zv_queue;
122 struct mtx zv_queue_mtx; /* zv_queue mutex */
123} zvol_state_t;
124
125/*
126 * zvol specific flags

--- 155 unchanged lines hidden (view full) ---

282 * Otherwise we will later flush the data out via dmu_sync().
283 */
284ssize_t zvol_immediate_write_sz = 32768;
285
286static void
287zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len)
288{
289 uint32_t blocksize = zv->zv_volblocksize;
290 zilog_t *zilog = zv->zv_zilog;
290 lr_write_t *lr;
291
291 lr_write_t *lr;
292
293 if (zilog->zl_replay) {
294 dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
295 zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] =
296 zilog->zl_replaying_seq;
297 return;
298 }
299
292 while (len) {
293 ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize));
294 itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr));
295
296 itx->itx_wr_state =
297 len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY;
298 itx->itx_private = zv;
299 lr = (lr_write_t *)&itx->itx_lr;
300 lr->lr_foid = ZVOL_OBJ;
301 lr->lr_offset = off;
302 lr->lr_length = nbytes;
303 lr->lr_blkoff = off - P2ALIGN_TYPED(off, blocksize, uint64_t);
304 BP_ZERO(&lr->lr_blkptr);
305
300 while (len) {
301 ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize));
302 itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr));
303
304 itx->itx_wr_state =
305 len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY;
306 itx->itx_private = zv;
307 lr = (lr_write_t *)&itx->itx_lr;
308 lr->lr_foid = ZVOL_OBJ;
309 lr->lr_offset = off;
310 lr->lr_length = nbytes;
311 lr->lr_blkoff = off - P2ALIGN_TYPED(off, blocksize, uint64_t);
312 BP_ZERO(&lr->lr_blkptr);
313
306 (void) zil_itx_assign(zv->zv_zilog, itx, tx);
314 (void) zil_itx_assign(zilog, itx, tx);
307 len -= nbytes;
308 off += nbytes;
309 }
310}
311
312static void
313zvol_start(struct bio *bp)
314{

--- 53 unchanged lines hidden (view full) ---

368
369 while (resid != 0 && off < volsize) {
370 size_t size = MIN(resid, zvol_maxphys); /* zvol_maxphys per tx */
371
372 if (size > volsize - off) /* don't write past the end */
373 size = volsize - off;
374
375 if (doread) {
315 len -= nbytes;
316 off += nbytes;
317 }
318}
319
320static void
321zvol_start(struct bio *bp)
322{

--- 53 unchanged lines hidden (view full) ---

376
377 while (resid != 0 && off < volsize) {
378 size_t size = MIN(resid, zvol_maxphys); /* zvol_maxphys per tx */
379
380 if (size > volsize - off) /* don't write past the end */
381 size = volsize - off;
382
383 if (doread) {
376 error = dmu_read(os, ZVOL_OBJ, off, size, addr);
384 error = dmu_read(os, ZVOL_OBJ, off, size, addr,
385 DMU_READ_PREFETCH);
377 } else {
378 dmu_tx_t *tx = dmu_tx_create(os);
379 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
380 error = dmu_tx_assign(tx, TXG_WAIT);
381 if (error) {
382 dmu_tx_abort(tx);
383 } else {
384 dmu_write(os, ZVOL_OBJ, off, size, addr, tx);

--- 186 unchanged lines hidden (view full) ---

571 uint64_t off = lr->lr_offset;
572 uint64_t len = lr->lr_length;
573 dmu_tx_t *tx;
574 int error;
575
576 if (byteswap)
577 byteswap_uint64_array(lr, sizeof (*lr));
578
386 } else {
387 dmu_tx_t *tx = dmu_tx_create(os);
388 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
389 error = dmu_tx_assign(tx, TXG_WAIT);
390 if (error) {
391 dmu_tx_abort(tx);
392 } else {
393 dmu_write(os, ZVOL_OBJ, off, size, addr, tx);

--- 186 unchanged lines hidden (view full) ---

580 uint64_t off = lr->lr_offset;
581 uint64_t len = lr->lr_length;
582 dmu_tx_t *tx;
583 int error;
584
585 if (byteswap)
586 byteswap_uint64_array(lr, sizeof (*lr));
587
588 /* If it's a dmu_sync() block get the data and write the whole block */
589 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t))
590 zil_get_replay_data(dmu_objset_zil(os), lr);
591
579 tx = dmu_tx_create(os);
580 dmu_tx_hold_write(tx, ZVOL_OBJ, off, len);
592 tx = dmu_tx_create(os);
593 dmu_tx_hold_write(tx, ZVOL_OBJ, off, len);
581 error = dmu_tx_assign(tx, zv->zv_txg_assign);
594 error = dmu_tx_assign(tx, TXG_WAIT);
582 if (error) {
583 dmu_tx_abort(tx);
584 } else {
585 dmu_write(os, ZVOL_OBJ, off, len, data, tx);
586 dmu_tx_commit(tx);
587 }
588
589 return (error);

--- 19 unchanged lines hidden (view full) ---

609 zvol_replay_err, /* TX_REMOVE */
610 zvol_replay_err, /* TX_RMDIR */
611 zvol_replay_err, /* TX_LINK */
612 zvol_replay_err, /* TX_RENAME */
613 zvol_replay_write, /* TX_WRITE */
614 zvol_replay_err, /* TX_TRUNCATE */
615 zvol_replay_err, /* TX_SETATTR */
616 zvol_replay_err, /* TX_ACL */
595 if (error) {
596 dmu_tx_abort(tx);
597 } else {
598 dmu_write(os, ZVOL_OBJ, off, len, data, tx);
599 dmu_tx_commit(tx);
600 }
601
602 return (error);

--- 19 unchanged lines hidden (view full) ---

622 zvol_replay_err, /* TX_REMOVE */
623 zvol_replay_err, /* TX_RMDIR */
624 zvol_replay_err, /* TX_LINK */
625 zvol_replay_err, /* TX_RENAME */
626 zvol_replay_write, /* TX_WRITE */
627 zvol_replay_err, /* TX_TRUNCATE */
628 zvol_replay_err, /* TX_SETATTR */
629 zvol_replay_err, /* TX_ACL */
630 zvol_replay_err, /* TX_CREATE_ACL */
631 zvol_replay_err, /* TX_CREATE_ATTR */
632 zvol_replay_err, /* TX_CREATE_ACL_ATTR */
633 zvol_replay_err, /* TX_MKDIR_ACL */
634 zvol_replay_err, /* TX_MKDIR_ATTR */
635 zvol_replay_err, /* TX_MKDIR_ACL_ATTR */
636 zvol_replay_err, /* TX_WRITE2 */
617};
618
619/*
620 * Create a minor node (plus a whole lot more) for the specified volume.
621 */
622int
623zvol_create_minor(const char *name, major_t maj)
624{

--- 48 unchanged lines hidden (view full) ---

673 sizeof (rl_t), offsetof(rl_t, r_node));
674 list_create(&zv->zv_extents, sizeof (zvol_extent_t),
675 offsetof(zvol_extent_t, ze_node));
676 /* get and cache the blocksize */
677 error = dmu_object_info(os, ZVOL_OBJ, &doi);
678 ASSERT(error == 0);
679 zv->zv_volblocksize = doi.doi_data_block_size;
680
637};
638
639/*
640 * Create a minor node (plus a whole lot more) for the specified volume.
641 */
642int
643zvol_create_minor(const char *name, major_t maj)
644{

--- 48 unchanged lines hidden (view full) ---

693 sizeof (rl_t), offsetof(rl_t, r_node));
694 list_create(&zv->zv_extents, sizeof (zvol_extent_t),
695 offsetof(zvol_extent_t, ze_node));
696 /* get and cache the blocksize */
697 error = dmu_object_info(os, ZVOL_OBJ, &doi);
698 ASSERT(error == 0);
699 zv->zv_volblocksize = doi.doi_data_block_size;
700
681 zil_replay(os, zv, &zv->zv_txg_assign, zvol_replay_vector, NULL);
701 zil_replay(os, zv, zvol_replay_vector);
682
683 /* XXX this should handle the possible i/o error */
684 VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset),
685 "readonly", zvol_readonly_changed_cb, zv) == 0);
686
687 pp->private = zv;
688 g_error_provider(pp, 0);
689

--- 288 unchanged lines hidden (view full) ---

978 /*
979 * Write records come in two flavors: immediate and indirect.
980 * For small writes it's cheaper to store the data with the
981 * log record (immediate); for large writes it's cheaper to
982 * sync the data and get a pointer to it (indirect) so that
983 * we don't have to write the data twice.
984 */
985 if (buf != NULL) /* immediate write */
702
703 /* XXX this should handle the possible i/o error */
704 VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset),
705 "readonly", zvol_readonly_changed_cb, zv) == 0);
706
707 pp->private = zv;
708 g_error_provider(pp, 0);
709

--- 288 unchanged lines hidden (view full) ---

998 /*
999 * Write records come in two flavors: immediate and indirect.
1000 * For small writes it's cheaper to store the data with the
1001 * log record (immediate); for large writes it's cheaper to
1002 * sync the data and get a pointer to it (indirect) so that
1003 * we don't have to write the data twice.
1004 */
1005 if (buf != NULL) /* immediate write */
986 return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf));
1006 return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf,
1007 DMU_READ_NO_PREFETCH));
987
988 zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP);
989 zgd->zgd_zilog = zv->zv_zilog;
990 zgd->zgd_bp = &lr->lr_blkptr;
991
992 /*
993 * Lock the range of the block to ensure that when the data is
994 * written out and its checksum is being calculated that no other
995 * thread can change the block.
996 */
997 boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t);
998 rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize,
999 RL_READER);
1000 zgd->zgd_rl = rl;
1001
1002 VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db));
1008
1009 zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP);
1010 zgd->zgd_zilog = zv->zv_zilog;
1011 zgd->zgd_bp = &lr->lr_blkptr;
1012
1013 /*
1014 * Lock the range of the block to ensure that when the data is
1015 * written out and its checksum is being calculated that no other
1016 * thread can change the block.
1017 */
1018 boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t);
1019 rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize,
1020 RL_READER);
1021 zgd->zgd_rl = rl;
1022
1023 VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db));
1024
1003 error = dmu_sync(zio, db, &lr->lr_blkptr,
1004 lr->lr_common.lrc_txg, zvol_get_done, zgd);
1025 error = dmu_sync(zio, db, &lr->lr_blkptr,
1026 lr->lr_common.lrc_txg, zvol_get_done, zgd);
1005 if (error == 0)
1027 if (error == 0) {
1028 /*
1029 * dmu_sync() can compress a block of zeros to a null blkptr
1030 * but the block size still needs to be passed through to
1031 * replay.
1032 */
1033 BP_SET_LSIZE(&lr->lr_blkptr, db->db_size);
1006 zil_add_block(zv->zv_zilog, &lr->lr_blkptr);
1034 zil_add_block(zv->zv_zilog, &lr->lr_blkptr);
1035 }
1036
1007 /*
1008 * If we get EINPROGRESS, then we need to wait for a
1009 * write IO initiated by dmu_sync() to complete before
1010 * we can release this dbuf. We will finish everything
1011 * up in the zvol_get_done() callback.
1012 */
1013 if (error == EINPROGRESS)
1014 return (0);

--- 255 unchanged lines hidden ---
1037 /*
1038 * If we get EINPROGRESS, then we need to wait for a
1039 * write IO initiated by dmu_sync() to complete before
1040 * we can release this dbuf. We will finish everything
1041 * up in the zvol_get_done() callback.
1042 */
1043 if (error == EINPROGRESS)
1044 return (0);

--- 255 unchanged lines hidden ---