zio.c (267992) | zio.c (268075) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 23 unchanged lines hidden (view full) --- 32#include <sys/vdev_impl.h> 33#include <sys/zio_impl.h> 34#include <sys/zio_compress.h> 35#include <sys/zio_checksum.h> 36#include <sys/dmu_objset.h> 37#include <sys/arc.h> 38#include <sys/ddt.h> 39#include <sys/trim_map.h> | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 23 unchanged lines hidden (view full) --- 32#include <sys/vdev_impl.h> 33#include <sys/zio_impl.h> 34#include <sys/zio_compress.h> 35#include <sys/zio_checksum.h> 36#include <sys/dmu_objset.h> 37#include <sys/arc.h> 38#include <sys/ddt.h> 39#include <sys/trim_map.h> |
40#include <sys/blkptr.h> |
|
40#include <sys/zfeature.h> 41 42SYSCTL_DECL(_vfs_zfs); 43SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); 44#if defined(__amd64__) 45static int zio_use_uma = 1; 46#else 47static int zio_use_uma = 0; --- 210 unchanged lines hidden (view full) --- 258 * excess / transient data in-core during a crashdump. 259 */ 260void * 261zio_buf_alloc(size_t size) 262{ 263 size_t c = (size - 1) >> SPA_MINBLOCKSHIFT; 264 int flags = zio_exclude_metadata ? KM_NODEBUG : 0; 265 | 41#include <sys/zfeature.h> 42 43SYSCTL_DECL(_vfs_zfs); 44SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); 45#if defined(__amd64__) 46static int zio_use_uma = 1; 47#else 48static int zio_use_uma = 0; --- 210 unchanged lines hidden (view full) --- 259 * excess / transient data in-core during a crashdump. 260 */ 261void * 262zio_buf_alloc(size_t size) 263{ 264 size_t c = (size - 1) >> SPA_MINBLOCKSHIFT; 265 int flags = zio_exclude_metadata ? KM_NODEBUG : 0; 266 |
266 ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); | 267 ASSERT3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); |
267 268 if (zio_use_uma) 269 return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE)); 270 else 271 return (kmem_alloc(size, KM_SLEEP|flags)); 272} 273 274/* --- 418 unchanged lines hidden (view full) --- 693 ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb, 694 ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ? 695 ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE); 696 697 zio->io_ready = ready; 698 zio->io_physdone = physdone; 699 zio->io_prop = *zp; 700 | 268 269 if (zio_use_uma) 270 return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE)); 271 else 272 return (kmem_alloc(size, KM_SLEEP|flags)); 273} 274 275/* --- 418 unchanged lines hidden (view full) --- 694 ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb, 695 ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ? 696 ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE); 697 698 zio->io_ready = ready; 699 zio->io_physdone = physdone; 700 zio->io_prop = *zp; 701 |
702 /* 703 * Data can be NULL if we are going to call zio_write_override() to 704 * provide the already-allocated BP. But we may need the data to 705 * verify a dedup hit (if requested). In this case, don't try to 706 * dedup (just take the already-allocated BP verbatim). 707 */ 708 if (data == NULL && zio->io_prop.zp_dedup_verify) { 709 zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE; 710 } 711 |
|
701 return (zio); 702} 703 704zio_t * 705zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, 706 uint64_t size, zio_done_func_t *done, void *private, 707 zio_priority_t priority, enum zio_flag flags, zbookmark_t *zb) 708{ --- 23 unchanged lines hidden (view full) --- 732 zio->io_prop.zp_nopwrite = nopwrite; 733 zio->io_prop.zp_copies = copies; 734 zio->io_bp_override = bp; 735} 736 737void 738zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) 739{ | 712 return (zio); 713} 714 715zio_t * 716zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, 717 uint64_t size, zio_done_func_t *done, void *private, 718 zio_priority_t priority, enum zio_flag flags, zbookmark_t *zb) 719{ --- 23 unchanged lines hidden (view full) --- 743 zio->io_prop.zp_nopwrite = nopwrite; 744 zio->io_prop.zp_copies = copies; 745 zio->io_bp_override = bp; 746} 747 748void 749zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) 750{ |
751 752 /* 753 * The check for EMBEDDED is a performance optimization. We 754 * process the free here (by ignoring it) rather than 755 * putting it on the list and then processing it in zio_free_sync(). 756 */ 757 if (BP_IS_EMBEDDED(bp)) 758 return; |
|
740 metaslab_check_free(spa, bp); 741 742 /* 743 * Frees that are for the currently-syncing txg, are not going to be 744 * deferred, and which will not need to do a read (i.e. not GANG or 745 * DEDUP), can be processed immediately. Otherwise, put them on the 746 * in-memory list for later processing. 747 */ --- 9 unchanged lines hidden (view full) --- 757 758zio_t * 759zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, 760 uint64_t size, enum zio_flag flags) 761{ 762 zio_t *zio; 763 enum zio_stage stage = ZIO_FREE_PIPELINE; 764 | 759 metaslab_check_free(spa, bp); 760 761 /* 762 * Frees that are for the currently-syncing txg, are not going to be 763 * deferred, and which will not need to do a read (i.e. not GANG or 764 * DEDUP), can be processed immediately. Otherwise, put them on the 765 * in-memory list for later processing. 766 */ --- 9 unchanged lines hidden (view full) --- 776 777zio_t * 778zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, 779 uint64_t size, enum zio_flag flags) 780{ 781 zio_t *zio; 782 enum zio_stage stage = ZIO_FREE_PIPELINE; 783 |
765 dprintf_bp(bp, "freeing in txg %llu, pass %u", 766 (longlong_t)txg, spa->spa_sync_pass); 767 | |
768 ASSERT(!BP_IS_HOLE(bp)); 769 ASSERT(spa_syncing_txg(spa) == txg); 770 ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free); 771 | 784 ASSERT(!BP_IS_HOLE(bp)); 785 ASSERT(spa_syncing_txg(spa) == txg); 786 ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free); 787 |
788 if (BP_IS_EMBEDDED(bp)) 789 return (zio_null(pio, spa, NULL, NULL, NULL, 0)); 790 |
|
772 metaslab_check_free(spa, bp); 773 arc_freed(spa, bp); 774 775 if (zfs_trim_enabled) 776 stage |= ZIO_STAGE_ISSUE_ASYNC | ZIO_STAGE_VDEV_IO_START | 777 ZIO_STAGE_VDEV_IO_ASSESS; 778 /* 779 * GANG and DEDUP blocks can induce a read (for the gang block header, --- 13 unchanged lines hidden (view full) --- 793} 794 795zio_t * 796zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, 797 zio_done_func_t *done, void *private, enum zio_flag flags) 798{ 799 zio_t *zio; 800 | 791 metaslab_check_free(spa, bp); 792 arc_freed(spa, bp); 793 794 if (zfs_trim_enabled) 795 stage |= ZIO_STAGE_ISSUE_ASYNC | ZIO_STAGE_VDEV_IO_START | 796 ZIO_STAGE_VDEV_IO_ASSESS; 797 /* 798 * GANG and DEDUP blocks can induce a read (for the gang block header, --- 13 unchanged lines hidden (view full) --- 812} 813 814zio_t * 815zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, 816 zio_done_func_t *done, void *private, enum zio_flag flags) 817{ 818 zio_t *zio; 819 |
820 dprintf_bp(bp, "claiming in txg %llu", txg); 821 822 if (BP_IS_EMBEDDED(bp)) 823 return (zio_null(pio, spa, NULL, NULL, NULL, 0)); 824 |
|
801 /* 802 * A claim is an allocation of a specific block. Claims are needed 803 * to support immediate writes in the intent log. The issue is that 804 * immediate writes contain committed data, but in a txg that was 805 * *not* committed. Upon opening the pool after an unclean shutdown, 806 * the intent log claims all blocks that contain immediate write data 807 * so that the SPA knows they're in use. 808 * --- 208 unchanged lines hidden (view full) --- 1017zio_read_bp_init(zio_t **ziop) 1018{ 1019 zio_t *zio = *ziop; 1020 blkptr_t *bp = zio->io_bp; 1021 1022 if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && 1023 zio->io_child_type == ZIO_CHILD_LOGICAL && 1024 !(zio->io_flags & ZIO_FLAG_RAW)) { | 825 /* 826 * A claim is an allocation of a specific block. Claims are needed 827 * to support immediate writes in the intent log. The issue is that 828 * immediate writes contain committed data, but in a txg that was 829 * *not* committed. Upon opening the pool after an unclean shutdown, 830 * the intent log claims all blocks that contain immediate write data 831 * so that the SPA knows they're in use. 832 * --- 208 unchanged lines hidden (view full) --- 1041zio_read_bp_init(zio_t **ziop) 1042{ 1043 zio_t *zio = *ziop; 1044 blkptr_t *bp = zio->io_bp; 1045 1046 if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && 1047 zio->io_child_type == ZIO_CHILD_LOGICAL && 1048 !(zio->io_flags & ZIO_FLAG_RAW)) { |
1025 uint64_t psize = BP_GET_PSIZE(bp); | 1049 uint64_t psize = 1050 BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); |
1026 void *cbuf = zio_buf_alloc(psize); 1027 1028 zio_push_transform(zio, cbuf, psize, psize, zio_decompress); 1029 } 1030 | 1051 void *cbuf = zio_buf_alloc(psize); 1052 1053 zio_push_transform(zio, cbuf, psize, psize, zio_decompress); 1054 } 1055 |
1056 if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { 1057 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 1058 decode_embedded_bp_compressed(bp, zio->io_data); 1059 } else { 1060 ASSERT(!BP_IS_EMBEDDED(bp)); 1061 } 1062 |
|
1031 if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0) 1032 zio->io_flags |= ZIO_FLAG_DONT_CACHE; 1033 1034 if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP) 1035 zio->io_flags |= ZIO_FLAG_DONT_CACHE; 1036 1037 if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL) 1038 zio->io_pipeline = ZIO_DDT_READ_PIPELINE; --- 28 unchanged lines hidden (view full) --- 1067 1068 if (zio->io_bp_override) { 1069 ASSERT(bp->blk_birth != zio->io_txg); 1070 ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0); 1071 1072 *bp = *zio->io_bp_override; 1073 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 1074 | 1063 if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0) 1064 zio->io_flags |= ZIO_FLAG_DONT_CACHE; 1065 1066 if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP) 1067 zio->io_flags |= ZIO_FLAG_DONT_CACHE; 1068 1069 if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL) 1070 zio->io_pipeline = ZIO_DDT_READ_PIPELINE; --- 28 unchanged lines hidden (view full) --- 1099 1100 if (zio->io_bp_override) { 1101 ASSERT(bp->blk_birth != zio->io_txg); 1102 ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0); 1103 1104 *bp = *zio->io_bp_override; 1105 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 1106 |
1107 if (BP_IS_EMBEDDED(bp)) 1108 return (ZIO_PIPELINE_CONTINUE); 1109 |
|
1075 /* 1076 * If we've been overridden and nopwrite is set then 1077 * set the flag accordingly to indicate that a nopwrite 1078 * has already occurred. 1079 */ 1080 if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) { 1081 ASSERT(!zp->zp_dedup); 1082 zio->io_flags |= ZIO_FLAG_NOPWRITE; --- 32 unchanged lines hidden (view full) --- 1115 ASSERT(zio->io_txg == spa_syncing_txg(spa)); 1116 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); 1117 ASSERT(!BP_GET_DEDUP(bp)); 1118 1119 if (pass >= zfs_sync_pass_dont_compress) 1120 compress = ZIO_COMPRESS_OFF; 1121 1122 /* Make sure someone doesn't change their mind on overwrites */ | 1110 /* 1111 * If we've been overridden and nopwrite is set then 1112 * set the flag accordingly to indicate that a nopwrite 1113 * has already occurred. 1114 */ 1115 if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) { 1116 ASSERT(!zp->zp_dedup); 1117 zio->io_flags |= ZIO_FLAG_NOPWRITE; --- 32 unchanged lines hidden (view full) --- 1150 ASSERT(zio->io_txg == spa_syncing_txg(spa)); 1151 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); 1152 ASSERT(!BP_GET_DEDUP(bp)); 1153 1154 if (pass >= zfs_sync_pass_dont_compress) 1155 compress = ZIO_COMPRESS_OFF; 1156 1157 /* Make sure someone doesn't change their mind on overwrites */ |
1123 ASSERT(MIN(zp->zp_copies + BP_IS_GANG(bp), | 1158 ASSERT(BP_IS_EMBEDDED(bp) || MIN(zp->zp_copies + BP_IS_GANG(bp), |
1124 spa_max_replication(spa)) == BP_GET_NDVAS(bp)); 1125 } 1126 1127 if (compress != ZIO_COMPRESS_OFF) { 1128 metaslab_class_t *mc = spa_normal_class(spa); 1129 void *cbuf = zio_buf_alloc(lsize); 1130 psize = zio_compress_data(compress, zio->io_data, cbuf, lsize, 1131 (size_t)metaslab_class_get_minblocksize(mc)); 1132 if (psize == 0 || psize == lsize) { 1133 compress = ZIO_COMPRESS_OFF; 1134 zio_buf_free(cbuf, lsize); | 1159 spa_max_replication(spa)) == BP_GET_NDVAS(bp)); 1160 } 1161 1162 if (compress != ZIO_COMPRESS_OFF) { 1163 metaslab_class_t *mc = spa_normal_class(spa); 1164 void *cbuf = zio_buf_alloc(lsize); 1165 psize = zio_compress_data(compress, zio->io_data, cbuf, lsize, 1166 (size_t)metaslab_class_get_minblocksize(mc)); 1167 if (psize == 0 || psize == lsize) { 1168 compress = ZIO_COMPRESS_OFF; 1169 zio_buf_free(cbuf, lsize); |
1170 } else if (!zp->zp_dedup && psize <= BPE_PAYLOAD_SIZE && 1171 zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) && 1172 spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) { 1173 encode_embedded_bp_compressed(bp, 1174 cbuf, compress, lsize, psize); 1175 BPE_SET_ETYPE(bp, BP_EMBEDDED_TYPE_DATA); 1176 BP_SET_TYPE(bp, zio->io_prop.zp_type); 1177 BP_SET_LEVEL(bp, zio->io_prop.zp_level); 1178 zio_buf_free(cbuf, lsize); 1179 bp->blk_birth = zio->io_txg; 1180 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 1181 ASSERT(spa_feature_is_active(spa, 1182 SPA_FEATURE_EMBEDDED_DATA)); 1183 return (ZIO_PIPELINE_CONTINUE); |
|
1135 } else { | 1184 } else { |
1136 ASSERT(psize < lsize); 1137 zio_push_transform(zio, cbuf, psize, lsize, NULL); | 1185 /* 1186 * Round up compressed size to MINBLOCKSIZE and 1187 * zero the tail. 1188 */ 1189 size_t rounded = 1190 P2ROUNDUP(psize, (size_t)SPA_MINBLOCKSIZE); 1191 if (rounded > psize) { 1192 bzero((char *)cbuf + psize, rounded - psize); 1193 psize = rounded; 1194 } 1195 if (psize == lsize) { 1196 compress = ZIO_COMPRESS_OFF; 1197 zio_buf_free(cbuf, lsize); 1198 } else { 1199 zio_push_transform(zio, cbuf, 1200 psize, lsize, NULL); 1201 } |
1138 } 1139 } 1140 1141 /* 1142 * The final pass of spa_sync() must be all rewrites, but the first 1143 * few passes offer a trade-off: allocating blocks defers convergence, 1144 * but newly allocated blocks are sequential, so they can be written 1145 * to disk faster. Therefore, we allow the first few passes of --- 1757 unchanged lines hidden (view full) --- 2903zio_checksum_verified(zio_t *zio) 2904{ 2905 zio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY; 2906} 2907 2908/* 2909 * ========================================================================== 2910 * Error rank. Error are ranked in the order 0, ENXIO, ECKSUM, EIO, other. | 1202 } 1203 } 1204 1205 /* 1206 * The final pass of spa_sync() must be all rewrites, but the first 1207 * few passes offer a trade-off: allocating blocks defers convergence, 1208 * but newly allocated blocks are sequential, so they can be written 1209 * to disk faster. Therefore, we allow the first few passes of --- 1757 unchanged lines hidden (view full) --- 2967zio_checksum_verified(zio_t *zio) 2968{ 2969 zio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY; 2970} 2971 2972/* 2973 * ========================================================================== 2974 * Error rank. Error are ranked in the order 0, ENXIO, ECKSUM, EIO, other. |
2911 * An error of 0 indictes success. ENXIO indicates whole-device failure, | 2975 * An error of 0 indicates success. ENXIO indicates whole-device failure, |
2912 * which may be transient (e.g. unplugged) or permament. ECKSUM and EIO 2913 * indicate errors that are specific to one I/O, and most likely permanent. 2914 * Any other error is presumed to be worse because we weren't expecting it. 2915 * ========================================================================== 2916 */ 2917int 2918zio_worst_error(int e1, int e2) 2919{ --- 95 unchanged lines hidden (view full) --- 3015 zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE) || 3016 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE)) 3017 return (ZIO_PIPELINE_STOP); 3018 3019 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 3020 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 3021 ASSERT(zio->io_children[c][w] == 0); 3022 | 2976 * which may be transient (e.g. unplugged) or permament. ECKSUM and EIO 2977 * indicate errors that are specific to one I/O, and most likely permanent. 2978 * Any other error is presumed to be worse because we weren't expecting it. 2979 * ========================================================================== 2980 */ 2981int 2982zio_worst_error(int e1, int e2) 2983{ --- 95 unchanged lines hidden (view full) --- 3079 zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE) || 3080 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE)) 3081 return (ZIO_PIPELINE_STOP); 3082 3083 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 3084 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 3085 ASSERT(zio->io_children[c][w] == 0); 3086 |
3023 if (bp != NULL) { | 3087 if (bp != NULL && !BP_IS_EMBEDDED(bp)) { |
3024 ASSERT(bp->blk_pad[0] == 0); 3025 ASSERT(bp->blk_pad[1] == 0); 3026 ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || 3027 (bp == zio_unique_parent(zio)->io_bp)); 3028 if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) && 3029 zio->io_bp_override == NULL && 3030 !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { 3031 ASSERT(!BP_SHOULD_BYTESWAP(bp)); --- 316 unchanged lines hidden --- | 3088 ASSERT(bp->blk_pad[0] == 0); 3089 ASSERT(bp->blk_pad[1] == 0); 3090 ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || 3091 (bp == zio_unique_parent(zio)->io_bp)); 3092 if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) && 3093 zio->io_bp_override == NULL && 3094 !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { 3095 ASSERT(!BP_SHOULD_BYTESWAP(bp)); --- 316 unchanged lines hidden --- |