Deleted Added
full compact
zio.c (307265) zio.c (307277)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 27 unchanged lines hidden (view full) ---

36#include <sys/zio_compress.h>
37#include <sys/zio_checksum.h>
38#include <sys/dmu_objset.h>
39#include <sys/arc.h>
40#include <sys/ddt.h>
41#include <sys/trim_map.h>
42#include <sys/blkptr.h>
43#include <sys/zfeature.h>
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 27 unchanged lines hidden (view full) ---

36#include <sys/zio_compress.h>
37#include <sys/zio_checksum.h>
38#include <sys/dmu_objset.h>
39#include <sys/arc.h>
40#include <sys/ddt.h>
41#include <sys/trim_map.h>
42#include <sys/blkptr.h>
43#include <sys/zfeature.h>
44#include <sys/metaslab_impl.h>
44
45SYSCTL_DECL(_vfs_zfs);
46SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
47#if defined(__amd64__)
48static int zio_use_uma = 1;
49#else
50static int zio_use_uma = 0;
51#endif

--- 21 unchanged lines hidden (view full) ---

73 * I/O type descriptions
74 * ==========================================================================
75 */
76const char *zio_type_name[ZIO_TYPES] = {
77 "zio_null", "zio_read", "zio_write", "zio_free", "zio_claim",
78 "zio_ioctl"
79};
80
45
46SYSCTL_DECL(_vfs_zfs);
47SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
48#if defined(__amd64__)
49static int zio_use_uma = 1;
50#else
51static int zio_use_uma = 0;
52#endif

--- 21 unchanged lines hidden (view full) ---

74 * I/O type descriptions
75 * ==========================================================================
76 */
77const char *zio_type_name[ZIO_TYPES] = {
78 "zio_null", "zio_read", "zio_write", "zio_free", "zio_claim",
79 "zio_ioctl"
80};
81
82boolean_t zio_dva_throttle_enabled = B_TRUE;
83SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, dva_throttle_enabled, CTLFLAG_RDTUN,
84 &zio_dva_throttle_enabled, 0, "");
85
81/*
82 * ==========================================================================
83 * I/O kmem caches
84 * ==========================================================================
85 */
86kmem_cache_t *zio_cache;
87kmem_cache_t *zio_link_cache;
88kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];

--- 42 unchanged lines hidden (view full) ---

131#ifdef illumos
132#ifdef ZFS_DEBUG
133int zio_buf_debug_limit = 16384;
134#else
135int zio_buf_debug_limit = 0;
136#endif
137#endif
138
86/*
87 * ==========================================================================
88 * I/O kmem caches
89 * ==========================================================================
90 */
91kmem_cache_t *zio_cache;
92kmem_cache_t *zio_link_cache;
93kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];

--- 42 unchanged lines hidden (view full) ---

136#ifdef illumos
137#ifdef ZFS_DEBUG
138int zio_buf_debug_limit = 16384;
139#else
140int zio_buf_debug_limit = 0;
141#endif
142#endif
143
144static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t);
145
139void
140zio_init(void)
141{
142 size_t c;
143 zio_cache = kmem_cache_create("zio_cache",
144 sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
145 zio_link_cache = kmem_cache_create("zio_link_cache",
146 sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);

--- 244 unchanged lines hidden (view full) ---

391 zio->io_error = SET_ERROR(EIO);
392}
393
394/*
395 * ==========================================================================
396 * I/O parent/child relationships and pipeline interlocks
397 * ==========================================================================
398 */
146void
147zio_init(void)
148{
149 size_t c;
150 zio_cache = kmem_cache_create("zio_cache",
151 sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
152 zio_link_cache = kmem_cache_create("zio_link_cache",
153 sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);

--- 244 unchanged lines hidden (view full) ---

398 zio->io_error = SET_ERROR(EIO);
399}
400
401/*
402 * ==========================================================================
403 * I/O parent/child relationships and pipeline interlocks
404 * ==========================================================================
405 */
399/*
400 * NOTE - Callers to zio_walk_parents() and zio_walk_children must
401 * continue calling these functions until they return NULL.
402 * Otherwise, the next caller will pick up the list walk in
403 * some indeterminate state. (Otherwise every caller would
404 * have to pass in a cookie to keep the state represented by
405 * io_walk_link, which gets annoying.)
406 */
407zio_t *
406zio_t *
408zio_walk_parents(zio_t *cio)
407zio_walk_parents(zio_t *cio, zio_link_t **zl)
409{
408{
410 zio_link_t *zl = cio->io_walk_link;
411 list_t *pl = &cio->io_parent_list;
412
409 list_t *pl = &cio->io_parent_list;
410
413 zl = (zl == NULL) ? list_head(pl) : list_next(pl, zl);
414 cio->io_walk_link = zl;
415
416 if (zl == NULL)
411 *zl = (*zl == NULL) ? list_head(pl) : list_next(pl, *zl);
412 if (*zl == NULL)
417 return (NULL);
418
413 return (NULL);
414
419 ASSERT(zl->zl_child == cio);
420 return (zl->zl_parent);
415 ASSERT((*zl)->zl_child == cio);
416 return ((*zl)->zl_parent);
421}
422
423zio_t *
417}
418
419zio_t *
424zio_walk_children(zio_t *pio)
420zio_walk_children(zio_t *pio, zio_link_t **zl)
425{
421{
426 zio_link_t *zl = pio->io_walk_link;
427 list_t *cl = &pio->io_child_list;
428
422 list_t *cl = &pio->io_child_list;
423
429 zl = (zl == NULL) ? list_head(cl) : list_next(cl, zl);
430 pio->io_walk_link = zl;
431
432 if (zl == NULL)
424 *zl = (*zl == NULL) ? list_head(cl) : list_next(cl, *zl);
425 if (*zl == NULL)
433 return (NULL);
434
426 return (NULL);
427
435 ASSERT(zl->zl_parent == pio);
436 return (zl->zl_child);
428 ASSERT((*zl)->zl_parent == pio);
429 return ((*zl)->zl_child);
437}
438
439zio_t *
440zio_unique_parent(zio_t *cio)
441{
430}
431
432zio_t *
433zio_unique_parent(zio_t *cio)
434{
442 zio_t *pio = zio_walk_parents(cio);
435 zio_link_t *zl = NULL;
436 zio_t *pio = zio_walk_parents(cio, &zl);
443
437
444 VERIFY(zio_walk_parents(cio) == NULL);
438 VERIFY3P(zio_walk_parents(cio, &zl), ==, NULL);
445 return (pio);
446}
447
448void
449zio_add_child(zio_t *pio, zio_t *cio)
450{
451 zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
452

--- 52 unchanged lines hidden (view full) ---

505{
506 uint64_t *countp = &zio->io_children[child][wait];
507 boolean_t waiting = B_FALSE;
508
509 mutex_enter(&zio->io_lock);
510 ASSERT(zio->io_stall == NULL);
511 if (*countp != 0) {
512 zio->io_stage >>= 1;
439 return (pio);
440}
441
442void
443zio_add_child(zio_t *pio, zio_t *cio)
444{
445 zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
446

--- 52 unchanged lines hidden (view full) ---

499{
500 uint64_t *countp = &zio->io_children[child][wait];
501 boolean_t waiting = B_FALSE;
502
503 mutex_enter(&zio->io_lock);
504 ASSERT(zio->io_stall == NULL);
505 if (*countp != 0) {
506 zio->io_stage >>= 1;
507 ASSERT3U(zio->io_stage, !=, ZIO_STAGE_OPEN);
513 zio->io_stall = countp;
514 waiting = B_TRUE;
515 }
516 mutex_exit(&zio->io_lock);
517
518 return (waiting);
519}
520

--- 7 unchanged lines hidden (view full) ---

528 if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
529 *errorp = zio_worst_error(*errorp, zio->io_error);
530 pio->io_reexecute |= zio->io_reexecute;
531 ASSERT3U(*countp, >, 0);
532
533 (*countp)--;
534
535 if (*countp == 0 && pio->io_stall == countp) {
508 zio->io_stall = countp;
509 waiting = B_TRUE;
510 }
511 mutex_exit(&zio->io_lock);
512
513 return (waiting);
514}
515

--- 7 unchanged lines hidden (view full) ---

523 if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
524 *errorp = zio_worst_error(*errorp, zio->io_error);
525 pio->io_reexecute |= zio->io_reexecute;
526 ASSERT3U(*countp, >, 0);
527
528 (*countp)--;
529
530 if (*countp == 0 && pio->io_stall == countp) {
531 zio_taskq_type_t type =
532 pio->io_stage < ZIO_STAGE_VDEV_IO_START ? ZIO_TASKQ_ISSUE :
533 ZIO_TASKQ_INTERRUPT;
536 pio->io_stall = NULL;
537 mutex_exit(&pio->io_lock);
534 pio->io_stall = NULL;
535 mutex_exit(&pio->io_lock);
538 zio_execute(pio);
536 /*
537 * Dispatch the parent zio in its own taskq so that
538 * the child can continue to make progress. This also
539 * prevents overflowing the stack when we have deeply nested
540 * parent-child relationships.
541 */
542 zio_taskq_dispatch(pio, type, B_FALSE);
539 } else {
540 mutex_exit(&pio->io_lock);
541 }
542}
543
544static void
545zio_inherit_child_errors(zio_t *zio, enum zio_child c)
546{
547 if (zio->io_child_error[c] != 0 && zio->io_error == 0)
548 zio->io_error = zio->io_child_error[c];
549}
550
543 } else {
544 mutex_exit(&pio->io_lock);
545 }
546}
547
548static void
549zio_inherit_child_errors(zio_t *zio, enum zio_child c)
550{
551 if (zio->io_child_error[c] != 0 && zio->io_error == 0)
552 zio->io_error = zio->io_child_error[c];
553}
554
555int
556zio_timestamp_compare(const void *x1, const void *x2)
557{
558 const zio_t *z1 = x1;
559 const zio_t *z2 = x2;
560
561 if (z1->io_queued_timestamp < z2->io_queued_timestamp)
562 return (-1);
563 if (z1->io_queued_timestamp > z2->io_queued_timestamp)
564 return (1);
565
566 if (z1->io_offset < z2->io_offset)
567 return (-1);
568 if (z1->io_offset > z2->io_offset)
569 return (1);
570
571 if (z1 < z2)
572 return (-1);
573 if (z1 > z2)
574 return (1);
575
576 return (0);
577}
578
551/*
552 * ==========================================================================
553 * Create the various types of I/O (read, write, free, etc)
554 * ==========================================================================
555 */
556static zio_t *
557zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
558 void *data, uint64_t size, zio_done_func_t *done, void *private,

--- 52 unchanged lines hidden (view full) ---

611 zio->io_priority = priority;
612 zio->io_vd = vd;
613 zio->io_offset = offset;
614 zio->io_orig_data = zio->io_data = data;
615 zio->io_orig_size = zio->io_size = size;
616 zio->io_orig_flags = zio->io_flags = flags;
617 zio->io_orig_stage = zio->io_stage = stage;
618 zio->io_orig_pipeline = zio->io_pipeline = pipeline;
579/*
580 * ==========================================================================
581 * Create the various types of I/O (read, write, free, etc)
582 * ==========================================================================
583 */
584static zio_t *
585zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
586 void *data, uint64_t size, zio_done_func_t *done, void *private,

--- 52 unchanged lines hidden (view full) ---

639 zio->io_priority = priority;
640 zio->io_vd = vd;
641 zio->io_offset = offset;
642 zio->io_orig_data = zio->io_data = data;
643 zio->io_orig_size = zio->io_size = size;
644 zio->io_orig_flags = zio->io_flags = flags;
645 zio->io_orig_stage = zio->io_stage = stage;
646 zio->io_orig_pipeline = zio->io_pipeline = pipeline;
647 zio->io_pipeline_trace = ZIO_STAGE_OPEN;
619
620 zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY);
621 zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE);
622
623 if (zb != NULL)
624 zio->io_bookmark = *zb;
625
626 if (pio != NULL) {

--- 181 unchanged lines hidden (view full) ---

808zio_t *
809zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data,
810 uint64_t size, zio_done_func_t *done, void *private,
811 zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb)
812{
813 zio_t *zio;
814
815 zio = zio_create(pio, spa, txg, bp, data, size, done, private,
648
649 zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY);
650 zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE);
651
652 if (zb != NULL)
653 zio->io_bookmark = *zb;
654
655 if (pio != NULL) {

--- 181 unchanged lines hidden (view full) ---

837zio_t *
838zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data,
839 uint64_t size, zio_done_func_t *done, void *private,
840 zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb)
841{
842 zio_t *zio;
843
844 zio = zio_create(pio, spa, txg, bp, data, size, done, private,
816 ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
845 ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_IO_REWRITE, NULL, 0, zb,
817 ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
818
819 return (zio);
820}
821
822void
823zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite)
824{

--- 104 unchanged lines hidden (view full) ---

929 */
930 ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa));
931 ASSERT(txg == spa_first_txg(spa) || txg == 0);
932 ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(1M) */
933
934 zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
935 done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
936 NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
846 ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
847
848 return (zio);
849}
850
851void
852zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite)
853{

--- 104 unchanged lines hidden (view full) ---

958 */
959 ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa));
960 ASSERT(txg == spa_first_txg(spa) || txg == 0);
961 ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(1M) */
962
963 zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
964 done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
965 NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
966 ASSERT0(zio->io_queued_timestamp);
937
938 return (zio);
939}
940
941zio_t *
942zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, uint64_t offset,
943 uint64_t size, zio_done_func_t *done, void *private,
944 zio_priority_t priority, enum zio_flag flags)

--- 108 unchanged lines hidden (view full) ---

1053
1054 /*
1055 * If we've decided to do a repair, the write is not speculative --
1056 * even if the original read was.
1057 */
1058 if (flags & ZIO_FLAG_IO_REPAIR)
1059 flags &= ~ZIO_FLAG_SPECULATIVE;
1060
967
968 return (zio);
969}
970
971zio_t *
972zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, uint64_t offset,
973 uint64_t size, zio_done_func_t *done, void *private,
974 zio_priority_t priority, enum zio_flag flags)

--- 108 unchanged lines hidden (view full) ---

1083
1084 /*
1085 * If we've decided to do a repair, the write is not speculative --
1086 * even if the original read was.
1087 */
1088 if (flags & ZIO_FLAG_IO_REPAIR)
1089 flags &= ~ZIO_FLAG_SPECULATIVE;
1090
1091 /*
1092 * If we're creating a child I/O that is not associated with a
1093 * top-level vdev, then the child zio is not an allocating I/O.
1094 * If this is a retried I/O then we ignore it since we will
1095 * have already processed the original allocating I/O.
1096 */
1097 if (flags & ZIO_FLAG_IO_ALLOCATING &&
1098 (vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY))) {
1099 metaslab_class_t *mc = spa_normal_class(pio->io_spa);
1100
1101 ASSERT(mc->mc_alloc_throttle_enabled);
1102 ASSERT(type == ZIO_TYPE_WRITE);
1103 ASSERT(priority == ZIO_PRIORITY_ASYNC_WRITE);
1104 ASSERT(!(flags & ZIO_FLAG_IO_REPAIR));
1105 ASSERT(!(pio->io_flags & ZIO_FLAG_IO_REWRITE) ||
1106 pio->io_child_type == ZIO_CHILD_GANG);
1107
1108 flags &= ~ZIO_FLAG_IO_ALLOCATING;
1109 }
1110
1061 zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size,
1062 done, private, type, priority, flags, vd, offset, &pio->io_bookmark,
1063 ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
1111 zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size,
1112 done, private, type, priority, flags, vd, offset, &pio->io_bookmark,
1113 ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
1114 ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);
1064
1065 zio->io_physdone = pio->io_physdone;
1066 if (vd->vdev_ops->vdev_op_leaf && zio->io_logical != NULL)
1067 zio->io_logical->io_phys_children++;
1068
1069 return (zio);
1070}
1071

--- 90 unchanged lines hidden (view full) ---

1162 zio->io_pipeline = ZIO_DDT_READ_PIPELINE;
1163
1164 return (ZIO_PIPELINE_CONTINUE);
1165}
1166
1167static int
1168zio_write_bp_init(zio_t *zio)
1169{
1115
1116 zio->io_physdone = pio->io_physdone;
1117 if (vd->vdev_ops->vdev_op_leaf && zio->io_logical != NULL)
1118 zio->io_logical->io_phys_children++;
1119
1120 return (zio);
1121}
1122

--- 90 unchanged lines hidden (view full) ---

1213 zio->io_pipeline = ZIO_DDT_READ_PIPELINE;
1214
1215 return (ZIO_PIPELINE_CONTINUE);
1216}
1217
1218static int
1219zio_write_bp_init(zio_t *zio)
1220{
1170 spa_t *spa = zio->io_spa;
1171 zio_prop_t *zp = &zio->io_prop;
1172 enum zio_compress compress = zp->zp_compress;
1173 blkptr_t *bp = zio->io_bp;
1174 uint64_t lsize = zio->io_size;
1175 uint64_t psize = lsize;
1176 int pass = 1;
1177
1178 /*
1179 * If our children haven't all reached the ready stage,
1180 * wait for them and then repeat this pipeline stage.
1181 */
1182 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
1183 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
1184 return (ZIO_PIPELINE_STOP);
1185
1186 if (!IO_IS_ALLOCATING(zio))
1187 return (ZIO_PIPELINE_CONTINUE);
1188
1221 if (!IO_IS_ALLOCATING(zio))
1222 return (ZIO_PIPELINE_CONTINUE);
1223
1189 if (zio->io_children_ready != NULL) {
1190 /*
1191 * Now that all our children are ready, run the callback
1192 * associated with this zio in case it wants to modify the
1193 * data to be written.
1194 */
1195 ASSERT3U(zp->zp_level, >, 0);
1196 zio->io_children_ready(zio);
1197 }
1198
1199 ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
1200
1201 if (zio->io_bp_override) {
1224 ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
1225
1226 if (zio->io_bp_override) {
1227 blkptr_t *bp = zio->io_bp;
1228 zio_prop_t *zp = &zio->io_prop;
1229
1202 ASSERT(bp->blk_birth != zio->io_txg);
1203 ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0);
1204
1205 *bp = *zio->io_bp_override;
1206 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1207
1208 if (BP_IS_EMBEDDED(bp))
1209 return (ZIO_PIPELINE_CONTINUE);
1210
1211 /*
1212 * If we've been overridden and nopwrite is set then
1213 * set the flag accordingly to indicate that a nopwrite
1214 * has already occurred.
1215 */
1216 if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) {
1217 ASSERT(!zp->zp_dedup);
1230 ASSERT(bp->blk_birth != zio->io_txg);
1231 ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0);
1232
1233 *bp = *zio->io_bp_override;
1234 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1235
1236 if (BP_IS_EMBEDDED(bp))
1237 return (ZIO_PIPELINE_CONTINUE);
1238
1239 /*
1240 * If we've been overridden and nopwrite is set then
1241 * set the flag accordingly to indicate that a nopwrite
1242 * has already occurred.
1243 */
1244 if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) {
1245 ASSERT(!zp->zp_dedup);
1246 ASSERT3U(BP_GET_CHECKSUM(bp), ==, zp->zp_checksum);
1218 zio->io_flags |= ZIO_FLAG_NOPWRITE;
1219 return (ZIO_PIPELINE_CONTINUE);
1220 }
1221
1222 ASSERT(!zp->zp_nopwrite);
1223
1224 if (BP_IS_HOLE(bp) || !zp->zp_dedup)
1225 return (ZIO_PIPELINE_CONTINUE);
1226
1227 ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags &
1228 ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify);
1229
1230 if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
1231 BP_SET_DEDUP(bp, 1);
1232 zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
1233 return (ZIO_PIPELINE_CONTINUE);
1234 }
1247 zio->io_flags |= ZIO_FLAG_NOPWRITE;
1248 return (ZIO_PIPELINE_CONTINUE);
1249 }
1250
1251 ASSERT(!zp->zp_nopwrite);
1252
1253 if (BP_IS_HOLE(bp) || !zp->zp_dedup)
1254 return (ZIO_PIPELINE_CONTINUE);
1255
1256 ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags &
1257 ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify);
1258
1259 if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
1260 BP_SET_DEDUP(bp, 1);
1261 zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
1262 return (ZIO_PIPELINE_CONTINUE);
1263 }
1264
1265 /*
1266 * We were unable to handle this as an override bp, treat
1267 * it as a regular write I/O.
1268 */
1235 zio->io_bp_override = NULL;
1269 zio->io_bp_override = NULL;
1236 BP_ZERO(bp);
1270 *bp = zio->io_bp_orig;
1271 zio->io_pipeline = zio->io_orig_pipeline;
1237 }
1238
1272 }
1273
1274 return (ZIO_PIPELINE_CONTINUE);
1275}
1276
1277static int
1278zio_write_compress(zio_t *zio)
1279{
1280 spa_t *spa = zio->io_spa;
1281 zio_prop_t *zp = &zio->io_prop;
1282 enum zio_compress compress = zp->zp_compress;
1283 blkptr_t *bp = zio->io_bp;
1284 uint64_t lsize = zio->io_size;
1285 uint64_t psize = lsize;
1286 int pass = 1;
1287
1288 /*
1289 * If our children haven't all reached the ready stage,
1290 * wait for them and then repeat this pipeline stage.
1291 */
1292 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
1293 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
1294 return (ZIO_PIPELINE_STOP);
1295
1296 if (!IO_IS_ALLOCATING(zio))
1297 return (ZIO_PIPELINE_CONTINUE);
1298
1299 if (zio->io_children_ready != NULL) {
1300 /*
1301 * Now that all our children are ready, run the callback
1302 * associated with this zio in case it wants to modify the
1303 * data to be written.
1304 */
1305 ASSERT3U(zp->zp_level, >, 0);
1306 zio->io_children_ready(zio);
1307 }
1308
1309 ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
1310 ASSERT(zio->io_bp_override == NULL);
1311
1239 if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
1240 /*
1241 * We're rewriting an existing block, which means we're
1242 * working on behalf of spa_sync(). For spa_sync() to
1243 * converge, it must eventually be the case that we don't
1244 * have to allocate new blocks. But compression changes
1245 * the blocksize, which forces a reallocate, and makes
1246 * convergence take longer. Therefore, after the first

--- 51 unchanged lines hidden (view full) ---

1298 psize = lsize;
1299 } else {
1300 bzero((char *)cbuf + psize, rounded - psize);
1301 psize = rounded;
1302 zio_push_transform(zio, cbuf,
1303 psize, lsize, NULL);
1304 }
1305 }
1312 if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
1313 /*
1314 * We're rewriting an existing block, which means we're
1315 * working on behalf of spa_sync(). For spa_sync() to
1316 * converge, it must eventually be the case that we don't
1317 * have to allocate new blocks. But compression changes
1318 * the blocksize, which forces a reallocate, and makes
1319 * convergence take longer. Therefore, after the first

--- 51 unchanged lines hidden (view full) ---

1371 psize = lsize;
1372 } else {
1373 bzero((char *)cbuf + psize, rounded - psize);
1374 psize = rounded;
1375 zio_push_transform(zio, cbuf,
1376 psize, lsize, NULL);
1377 }
1378 }
1379
1380 /*
1381 * We were unable to handle this as an override bp, treat
1382 * it as a regular write I/O.
1383 */
1384 zio->io_bp_override = NULL;
1385 *bp = zio->io_bp_orig;
1386 zio->io_pipeline = zio->io_orig_pipeline;
1306 }
1307
1308 /*
1309 * The final pass of spa_sync() must be all rewrites, but the first
1310 * few passes offer a trade-off: allocating blocks defers convergence,
1311 * but newly allocated blocks are sequential, so they can be written
1312 * to disk faster. Therefore, we allow the first few passes of
1313 * spa_sync() to allocate new blocks, but force rewrites after that.

--- 36 unchanged lines hidden (view full) ---

1350 zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE;
1351 }
1352 if (zp->zp_nopwrite) {
1353 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
1354 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
1355 zio->io_pipeline |= ZIO_STAGE_NOP_WRITE;
1356 }
1357 }
1387 }
1388
1389 /*
1390 * The final pass of spa_sync() must be all rewrites, but the first
1391 * few passes offer a trade-off: allocating blocks defers convergence,
1392 * but newly allocated blocks are sequential, so they can be written
1393 * to disk faster. Therefore, we allow the first few passes of
1394 * spa_sync() to allocate new blocks, but force rewrites after that.

--- 36 unchanged lines hidden (view full) ---

1431 zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE;
1432 }
1433 if (zp->zp_nopwrite) {
1434 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
1435 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
1436 zio->io_pipeline |= ZIO_STAGE_NOP_WRITE;
1437 }
1438 }
1358
1359 return (ZIO_PIPELINE_CONTINUE);
1360}
1361
1362static int
1363zio_free_bp_init(zio_t *zio)
1364{
1365 blkptr_t *bp = zio->io_bp;
1366

--- 160 unchanged lines hidden (view full) ---

1527 */
1528static zio_pipe_stage_t *zio_pipeline[];
1529
1530void
1531zio_execute(zio_t *zio)
1532{
1533 zio->io_executor = curthread;
1534
1439 return (ZIO_PIPELINE_CONTINUE);
1440}
1441
1442static int
1443zio_free_bp_init(zio_t *zio)
1444{
1445 blkptr_t *bp = zio->io_bp;
1446

--- 160 unchanged lines hidden (view full) ---

1607 */
1608static zio_pipe_stage_t *zio_pipeline[];
1609
1610void
1611zio_execute(zio_t *zio)
1612{
1613 zio->io_executor = curthread;
1614
1615 ASSERT3U(zio->io_queued_timestamp, >, 0);
1616
1535 while (zio->io_stage < ZIO_STAGE_DONE) {
1536 enum zio_stage pipeline = zio->io_pipeline;
1537 enum zio_stage stage = zio->io_stage;
1538 int rv;
1539
1540 ASSERT(!MUTEX_HELD(&zio->io_lock));
1541 ASSERT(ISP2(stage));
1542 ASSERT(zio->io_stall == NULL);

--- 17 unchanged lines hidden (view full) ---

1560 zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) {
1561 boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ?
1562 zio_requeue_io_start_cut_in_line : B_FALSE;
1563 zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut);
1564 return;
1565 }
1566
1567 zio->io_stage = stage;
1617 while (zio->io_stage < ZIO_STAGE_DONE) {
1618 enum zio_stage pipeline = zio->io_pipeline;
1619 enum zio_stage stage = zio->io_stage;
1620 int rv;
1621
1622 ASSERT(!MUTEX_HELD(&zio->io_lock));
1623 ASSERT(ISP2(stage));
1624 ASSERT(zio->io_stall == NULL);

--- 17 unchanged lines hidden (view full) ---

1642 zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) {
1643 boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ?
1644 zio_requeue_io_start_cut_in_line : B_FALSE;
1645 zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut);
1646 return;
1647 }
1648
1649 zio->io_stage = stage;
1650 zio->io_pipeline_trace |= zio->io_stage;
1568 rv = zio_pipeline[highbit64(stage) - 1](zio);
1569
1570 if (rv == ZIO_PIPELINE_STOP)
1571 return;
1572
1573 ASSERT(rv == ZIO_PIPELINE_CONTINUE);
1574 }
1575}

--- 7 unchanged lines hidden (view full) ---

1583zio_wait(zio_t *zio)
1584{
1585 int error;
1586
1587 ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
1588 ASSERT(zio->io_executor == NULL);
1589
1590 zio->io_waiter = curthread;
1651 rv = zio_pipeline[highbit64(stage) - 1](zio);
1652
1653 if (rv == ZIO_PIPELINE_STOP)
1654 return;
1655
1656 ASSERT(rv == ZIO_PIPELINE_CONTINUE);
1657 }
1658}

--- 7 unchanged lines hidden (view full) ---

1666zio_wait(zio_t *zio)
1667{
1668 int error;
1669
1670 ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
1671 ASSERT(zio->io_executor == NULL);
1672
1673 zio->io_waiter = curthread;
1674 ASSERT0(zio->io_queued_timestamp);
1675 zio->io_queued_timestamp = gethrtime();
1591
1592 zio_execute(zio);
1593
1594 mutex_enter(&zio->io_lock);
1595 while (zio->io_executor != NULL)
1596 cv_wait(&zio->io_cv, &zio->io_lock);
1597 mutex_exit(&zio->io_lock);
1598

--- 15 unchanged lines hidden (view full) ---

1614 * We add it to the spa_async_root_zio "Godfather" I/O which
1615 * will ensure they complete prior to unloading the pool.
1616 */
1617 spa_t *spa = zio->io_spa;
1618
1619 zio_add_child(spa->spa_async_zio_root[CPU_SEQID], zio);
1620 }
1621
1676
1677 zio_execute(zio);
1678
1679 mutex_enter(&zio->io_lock);
1680 while (zio->io_executor != NULL)
1681 cv_wait(&zio->io_cv, &zio->io_lock);
1682 mutex_exit(&zio->io_lock);
1683

--- 15 unchanged lines hidden (view full) ---

1699 * We add it to the spa_async_root_zio "Godfather" I/O which
1700 * will ensure they complete prior to unloading the pool.
1701 */
1702 spa_t *spa = zio->io_spa;
1703
1704 zio_add_child(spa->spa_async_zio_root[CPU_SEQID], zio);
1705 }
1706
1707 ASSERT0(zio->io_queued_timestamp);
1708 zio->io_queued_timestamp = gethrtime();
1622 zio_execute(zio);
1623}
1624
1625/*
1626 * ==========================================================================
1627 * Reexecute or suspend/resume failed I/O
1628 * ==========================================================================
1629 */

--- 8 unchanged lines hidden (view full) ---

1638 ASSERT(pio->io_gang_leader == NULL);
1639 ASSERT(pio->io_gang_tree == NULL);
1640
1641 pio->io_flags = pio->io_orig_flags;
1642 pio->io_stage = pio->io_orig_stage;
1643 pio->io_pipeline = pio->io_orig_pipeline;
1644 pio->io_reexecute = 0;
1645 pio->io_flags |= ZIO_FLAG_REEXECUTED;
1709 zio_execute(zio);
1710}
1711
1712/*
1713 * ==========================================================================
1714 * Reexecute or suspend/resume failed I/O
1715 * ==========================================================================
1716 */

--- 8 unchanged lines hidden (view full) ---

1725 ASSERT(pio->io_gang_leader == NULL);
1726 ASSERT(pio->io_gang_tree == NULL);
1727
1728 pio->io_flags = pio->io_orig_flags;
1729 pio->io_stage = pio->io_orig_stage;
1730 pio->io_pipeline = pio->io_orig_pipeline;
1731 pio->io_reexecute = 0;
1732 pio->io_flags |= ZIO_FLAG_REEXECUTED;
1733 pio->io_pipeline_trace = 0;
1646 pio->io_error = 0;
1647 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
1648 pio->io_state[w] = 0;
1649 for (int c = 0; c < ZIO_CHILD_TYPES; c++)
1650 pio->io_child_error[c] = 0;
1651
1652 if (IO_IS_ALLOCATING(pio))
1653 BP_ZERO(pio->io_bp);
1654
1655 /*
1656 * As we reexecute pio's children, new children could be created.
1657 * New children go to the head of pio's io_child_list, however,
1658 * so we will (correctly) not reexecute them. The key is that
1659 * the remainder of pio's io_child_list, from 'cio_next' onward,
1660 * cannot be affected by any side effects of reexecuting 'cio'.
1661 */
1734 pio->io_error = 0;
1735 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
1736 pio->io_state[w] = 0;
1737 for (int c = 0; c < ZIO_CHILD_TYPES; c++)
1738 pio->io_child_error[c] = 0;
1739
1740 if (IO_IS_ALLOCATING(pio))
1741 BP_ZERO(pio->io_bp);
1742
1743 /*
1744 * As we reexecute pio's children, new children could be created.
1745 * New children go to the head of pio's io_child_list, however,
1746 * so we will (correctly) not reexecute them. The key is that
1747 * the remainder of pio's io_child_list, from 'cio_next' onward,
1748 * cannot be affected by any side effects of reexecuting 'cio'.
1749 */
1662 for (cio = zio_walk_children(pio); cio != NULL; cio = cio_next) {
1663 cio_next = zio_walk_children(pio);
1750 zio_link_t *zl = NULL;
1751 for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) {
1752 cio_next = zio_walk_children(pio, &zl);
1664 mutex_enter(&pio->io_lock);
1665 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
1666 pio->io_children[cio->io_child_type][w]++;
1667 mutex_exit(&pio->io_lock);
1668 zio_reexecute(cio);
1669 }
1670
1671 /*
1672 * Now that all children have been reexecuted, execute the parent.
1673 * We don't reexecute "The Godfather" I/O here as it's the
1674 * responsibility of the caller to wait on him.
1675 */
1753 mutex_enter(&pio->io_lock);
1754 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
1755 pio->io_children[cio->io_child_type][w]++;
1756 mutex_exit(&pio->io_lock);
1757 zio_reexecute(cio);
1758 }
1759
1760 /*
1761 * Now that all children have been reexecuted, execute the parent.
1762 * We don't reexecute "The Godfather" I/O here as it's the
1763 * responsibility of the caller to wait on him.
1764 */
1676 if (!(pio->io_flags & ZIO_FLAG_GODFATHER))
1765 if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) {
1766 pio->io_queued_timestamp = gethrtime();
1677 zio_execute(pio);
1767 zio_execute(pio);
1768 }
1678}
1679
1680void
1681zio_suspend(spa_t *spa, zio_t *zio)
1682{
1683 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
1684 fm_panic("Pool '%s' has encountered an uncorrectable I/O "
1685 "failure and the failure mode property for this pool "

--- 377 unchanged lines hidden (view full) ---

2063 }
2064 mutex_exit(&pio->io_lock);
2065}
2066
2067static int
2068zio_write_gang_block(zio_t *pio)
2069{
2070 spa_t *spa = pio->io_spa;
1769}
1770
1771void
1772zio_suspend(spa_t *spa, zio_t *zio)
1773{
1774 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
1775 fm_panic("Pool '%s' has encountered an uncorrectable I/O "
1776 "failure and the failure mode property for this pool "

--- 377 unchanged lines hidden (view full) ---

2154 }
2155 mutex_exit(&pio->io_lock);
2156}
2157
2158static int
2159zio_write_gang_block(zio_t *pio)
2160{
2161 spa_t *spa = pio->io_spa;
2162 metaslab_class_t *mc = spa_normal_class(spa);
2071 blkptr_t *bp = pio->io_bp;
2072 zio_t *gio = pio->io_gang_leader;
2073 zio_t *zio;
2074 zio_gang_node_t *gn, **gnpp;
2075 zio_gbh_phys_t *gbh;
2076 uint64_t txg = pio->io_txg;
2077 uint64_t resid = pio->io_size;
2078 uint64_t lsize;
2079 int copies = gio->io_prop.zp_copies;
2080 int gbh_copies = MIN(copies + 1, spa_max_replication(spa));
2081 zio_prop_t zp;
2082 int error;
2083
2163 blkptr_t *bp = pio->io_bp;
2164 zio_t *gio = pio->io_gang_leader;
2165 zio_t *zio;
2166 zio_gang_node_t *gn, **gnpp;
2167 zio_gbh_phys_t *gbh;
2168 uint64_t txg = pio->io_txg;
2169 uint64_t resid = pio->io_size;
2170 uint64_t lsize;
2171 int copies = gio->io_prop.zp_copies;
2172 int gbh_copies = MIN(copies + 1, spa_max_replication(spa));
2173 zio_prop_t zp;
2174 int error;
2175
2084 error = metaslab_alloc(spa, spa_normal_class(spa), SPA_GANGBLOCKSIZE,
2085 bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp,
2086 METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER);
2176 int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
2177 if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
2178 ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
2179 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
2180
2181 flags |= METASLAB_ASYNC_ALLOC;
2182 VERIFY(refcount_held(&mc->mc_alloc_slots, pio));
2183
2184 /*
2185 * The logical zio has already placed a reservation for
2186 * 'copies' allocation slots but gang blocks may require
2187 * additional copies. These additional copies
2188 * (i.e. gbh_copies - copies) are guaranteed to succeed
2189 * since metaslab_class_throttle_reserve() always allows
2190 * additional reservations for gang blocks.
2191 */
2192 VERIFY(metaslab_class_throttle_reserve(mc, gbh_copies - copies,
2193 pio, flags));
2194 }
2195
2196 error = metaslab_alloc(spa, mc, SPA_GANGBLOCKSIZE,
2197 bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, flags, pio);
2087 if (error) {
2198 if (error) {
2199 if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
2200 ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
2201 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
2202
2203 /*
2204 * If we failed to allocate the gang block header then
2205 * we remove any additional allocation reservations that
2206 * we placed here. The original reservation will
2207 * be removed when the logical I/O goes to the ready
2208 * stage.
2209 */
2210 metaslab_class_throttle_unreserve(mc,
2211 gbh_copies - copies, pio);
2212 }
2088 pio->io_error = error;
2089 return (ZIO_PIPELINE_CONTINUE);
2090 }
2091
2092 if (pio == gio) {
2093 gnpp = &gio->io_gang_tree;
2094 } else {
2095 gnpp = pio->io_private;

--- 22 unchanged lines hidden (view full) ---

2118 zp.zp_compress = ZIO_COMPRESS_OFF;
2119 zp.zp_type = DMU_OT_NONE;
2120 zp.zp_level = 0;
2121 zp.zp_copies = gio->io_prop.zp_copies;
2122 zp.zp_dedup = B_FALSE;
2123 zp.zp_dedup_verify = B_FALSE;
2124 zp.zp_nopwrite = B_FALSE;
2125
2213 pio->io_error = error;
2214 return (ZIO_PIPELINE_CONTINUE);
2215 }
2216
2217 if (pio == gio) {
2218 gnpp = &gio->io_gang_tree;
2219 } else {
2220 gnpp = pio->io_private;

--- 22 unchanged lines hidden (view full) ---

2243 zp.zp_compress = ZIO_COMPRESS_OFF;
2244 zp.zp_type = DMU_OT_NONE;
2245 zp.zp_level = 0;
2246 zp.zp_copies = gio->io_prop.zp_copies;
2247 zp.zp_dedup = B_FALSE;
2248 zp.zp_dedup_verify = B_FALSE;
2249 zp.zp_nopwrite = B_FALSE;
2250
2126 zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
2251 zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
2127 (char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
2128 zio_write_gang_member_ready, NULL, NULL, NULL,
2129 &gn->gn_child[g], pio->io_priority,
2252 (char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
2253 zio_write_gang_member_ready, NULL, NULL, NULL,
2254 &gn->gn_child[g], pio->io_priority,
2130 ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark));
2255 ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
2256
2257 if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
2258 ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
2259 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
2260
2261 /*
2262 * Gang children won't throttle but we should
2263 * account for their work, so reserve an allocation
2264 * slot for them here.
2265 */
2266 VERIFY(metaslab_class_throttle_reserve(mc,
2267 zp.zp_copies, cio, flags));
2268 }
2269 zio_nowait(cio);
2131 }
2132
2133 /*
2134 * Set pio's pipeline to just wait for zio to finish.
2135 */
2136 pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
2137
2138 zio_nowait(zio);

--- 241 unchanged lines hidden (view full) ---

2380 return;
2381
2382 ddt_enter(ddt);
2383
2384 ASSERT(dde->dde_lead_zio[p] == zio);
2385
2386 ddt_phys_fill(ddp, zio->io_bp);
2387
2270 }
2271
2272 /*
2273 * Set pio's pipeline to just wait for zio to finish.
2274 */
2275 pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
2276
2277 zio_nowait(zio);

--- 241 unchanged lines hidden (view full) ---

2519 return;
2520
2521 ddt_enter(ddt);
2522
2523 ASSERT(dde->dde_lead_zio[p] == zio);
2524
2525 ddt_phys_fill(ddp, zio->io_bp);
2526
2388 while ((pio = zio_walk_parents(zio)) != NULL)
2527 zio_link_t *zl = NULL;
2528 while ((pio = zio_walk_parents(zio, &zl)) != NULL)
2389 ddt_bp_fill(ddp, pio->io_bp, zio->io_txg);
2390
2391 ddt_exit(ddt);
2392}
2393
2394static void
2395zio_ddt_child_write_done(zio_t *zio)
2396{

--- 4 unchanged lines hidden (view full) ---

2401
2402 ddt_enter(ddt);
2403
2404 ASSERT(ddp->ddp_refcnt == 0);
2405 ASSERT(dde->dde_lead_zio[p] == zio);
2406 dde->dde_lead_zio[p] = NULL;
2407
2408 if (zio->io_error == 0) {
2529 ddt_bp_fill(ddp, pio->io_bp, zio->io_txg);
2530
2531 ddt_exit(ddt);
2532}
2533
2534static void
2535zio_ddt_child_write_done(zio_t *zio)
2536{

--- 4 unchanged lines hidden (view full) ---

2541
2542 ddt_enter(ddt);
2543
2544 ASSERT(ddp->ddp_refcnt == 0);
2545 ASSERT(dde->dde_lead_zio[p] == zio);
2546 dde->dde_lead_zio[p] = NULL;
2547
2548 if (zio->io_error == 0) {
2409 while (zio_walk_parents(zio) != NULL)
2549 zio_link_t *zl = NULL;
2550 while (zio_walk_parents(zio, &zl) != NULL)
2410 ddt_phys_addref(ddp);
2411 } else {
2412 ddt_phys_clear(ddp);
2413 }
2414
2415 ddt_exit(ddt);
2416}
2417

--- 161 unchanged lines hidden (view full) ---

2579 return (ZIO_PIPELINE_CONTINUE);
2580}
2581
2582/*
2583 * ==========================================================================
2584 * Allocate and free blocks
2585 * ==========================================================================
2586 */
2551 ddt_phys_addref(ddp);
2552 } else {
2553 ddt_phys_clear(ddp);
2554 }
2555
2556 ddt_exit(ddt);
2557}
2558

--- 161 unchanged lines hidden (view full) ---

2720 return (ZIO_PIPELINE_CONTINUE);
2721}
2722
2723/*
2724 * ==========================================================================
2725 * Allocate and free blocks
2726 * ==========================================================================
2727 */
2728
2729static zio_t *
2730zio_io_to_allocate(spa_t *spa)
2731{
2732 zio_t *zio;
2733
2734 ASSERT(MUTEX_HELD(&spa->spa_alloc_lock));
2735
2736 zio = avl_first(&spa->spa_alloc_tree);
2737 if (zio == NULL)
2738 return (NULL);
2739
2740 ASSERT(IO_IS_ALLOCATING(zio));
2741
2742 /*
2743 * Try to place a reservation for this zio. If we're unable to
2744 * reserve then we throttle.
2745 */
2746 if (!metaslab_class_throttle_reserve(spa_normal_class(spa),
2747 zio->io_prop.zp_copies, zio, 0)) {
2748 return (NULL);
2749 }
2750
2751 avl_remove(&spa->spa_alloc_tree, zio);
2752 ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE);
2753
2754 return (zio);
2755}
2756
2587static int
2757static int
2758zio_dva_throttle(zio_t *zio)
2759{
2760 spa_t *spa = zio->io_spa;
2761 zio_t *nio;
2762
2763 if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE ||
2764 !spa_normal_class(zio->io_spa)->mc_alloc_throttle_enabled ||
2765 zio->io_child_type == ZIO_CHILD_GANG ||
2766 zio->io_flags & ZIO_FLAG_NODATA) {
2767 return (ZIO_PIPELINE_CONTINUE);
2768 }
2769
2770 ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
2771
2772 ASSERT3U(zio->io_queued_timestamp, >, 0);
2773 ASSERT(zio->io_stage == ZIO_STAGE_DVA_THROTTLE);
2774
2775 mutex_enter(&spa->spa_alloc_lock);
2776
2777 ASSERT(zio->io_type == ZIO_TYPE_WRITE);
2778 avl_add(&spa->spa_alloc_tree, zio);
2779
2780 nio = zio_io_to_allocate(zio->io_spa);
2781 mutex_exit(&spa->spa_alloc_lock);
2782
2783 if (nio == zio)
2784 return (ZIO_PIPELINE_CONTINUE);
2785
2786 if (nio != NULL) {
2787 ASSERT3U(nio->io_queued_timestamp, <=,
2788 zio->io_queued_timestamp);
2789 ASSERT(nio->io_stage == ZIO_STAGE_DVA_THROTTLE);
2790 /*
2791 * We are passing control to a new zio so make sure that
2792 * it is processed by a different thread. We do this to
2793 * avoid stack overflows that can occur when parents are
2794 * throttled and children are making progress. We allow
2795 * it to go to the head of the taskq since it's already
2796 * been waiting.
2797 */
2798 zio_taskq_dispatch(nio, ZIO_TASKQ_ISSUE, B_TRUE);
2799 }
2800 return (ZIO_PIPELINE_STOP);
2801}
2802
2803void
2804zio_allocate_dispatch(spa_t *spa)
2805{
2806 zio_t *zio;
2807
2808 mutex_enter(&spa->spa_alloc_lock);
2809 zio = zio_io_to_allocate(spa);
2810 mutex_exit(&spa->spa_alloc_lock);
2811 if (zio == NULL)
2812 return;
2813
2814 ASSERT3U(zio->io_stage, ==, ZIO_STAGE_DVA_THROTTLE);
2815 ASSERT0(zio->io_error);
2816 zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_TRUE);
2817}
2818
2819static int
2588zio_dva_allocate(zio_t *zio)
2589{
2590 spa_t *spa = zio->io_spa;
2591 metaslab_class_t *mc = spa_normal_class(spa);
2592 blkptr_t *bp = zio->io_bp;
2593 int error;
2594 int flags = 0;
2595
2596 if (zio->io_gang_leader == NULL) {
2597 ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
2598 zio->io_gang_leader = zio;
2599 }
2600
2601 ASSERT(BP_IS_HOLE(bp));
2602 ASSERT0(BP_GET_NDVAS(bp));
2603 ASSERT3U(zio->io_prop.zp_copies, >, 0);
2604 ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
2605 ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
2606
2820zio_dva_allocate(zio_t *zio)
2821{
2822 spa_t *spa = zio->io_spa;
2823 metaslab_class_t *mc = spa_normal_class(spa);
2824 blkptr_t *bp = zio->io_bp;
2825 int error;
2826 int flags = 0;
2827
2828 if (zio->io_gang_leader == NULL) {
2829 ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
2830 zio->io_gang_leader = zio;
2831 }
2832
2833 ASSERT(BP_IS_HOLE(bp));
2834 ASSERT0(BP_GET_NDVAS(bp));
2835 ASSERT3U(zio->io_prop.zp_copies, >, 0);
2836 ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
2837 ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
2838
2607 /*
2608 * The dump device does not support gang blocks so allocation on
2609 * behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid
2610 * the "fast" gang feature.
2611 */
2612 flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
2613 flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
2614 METASLAB_GANG_CHILD : 0;
2839 if (zio->io_flags & ZIO_FLAG_NODATA) {
2840 flags |= METASLAB_DONT_THROTTLE;
2841 }
2842 if (zio->io_flags & ZIO_FLAG_GANG_CHILD) {
2843 flags |= METASLAB_GANG_CHILD;
2844 }
2845 if (zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE) {
2846 flags |= METASLAB_ASYNC_ALLOC;
2847 }
2848
2615 error = metaslab_alloc(spa, mc, zio->io_size, bp,
2849 error = metaslab_alloc(spa, mc, zio->io_size, bp,
2616 zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
2850 zio->io_prop.zp_copies, zio->io_txg, NULL, flags, zio);
2617
2851
2618 if (error) {
2852 if (error != 0) {
2619 spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
2620 "size %llu, error %d", spa_name(spa), zio, zio->io_size,
2621 error);
2622 if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
2623 return (zio_write_gang_block(zio));
2624 zio->io_error = error;
2625 }
2626

--- 48 unchanged lines hidden (view full) ---

2675int
2676zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp,
2677 uint64_t size, boolean_t use_slog)
2678{
2679 int error = 1;
2680
2681 ASSERT(txg > spa_syncing_txg(spa));
2682
2853 spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
2854 "size %llu, error %d", spa_name(spa), zio, zio->io_size,
2855 error);
2856 if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
2857 return (zio_write_gang_block(zio));
2858 zio->io_error = error;
2859 }
2860

--- 48 unchanged lines hidden (view full) ---

2909int
2910zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp,
2911 uint64_t size, boolean_t use_slog)
2912{
2913 int error = 1;
2914
2915 ASSERT(txg > spa_syncing_txg(spa));
2916
2683 /*
2684 * ZIL blocks are always contiguous (i.e. not gang blocks) so we
2685 * set the METASLAB_GANG_AVOID flag so that they don't "fast gang"
2686 * when allocating them.
2687 */
2688 if (use_slog) {
2689 error = metaslab_alloc(spa, spa_log_class(spa), size,
2917 if (use_slog) {
2918 error = metaslab_alloc(spa, spa_log_class(spa), size,
2690 new_bp, 1, txg, old_bp,
2691 METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID);
2919 new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, NULL);
2692 }
2693
2694 if (error) {
2695 error = metaslab_alloc(spa, spa_normal_class(spa), size,
2920 }
2921
2922 if (error) {
2923 error = metaslab_alloc(spa, spa_normal_class(spa), size,
2696 new_bp, 1, txg, old_bp,
2697 METASLAB_HINTBP_AVOID);
2924 new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, NULL);
2698 }
2699
2700 if (error == 0) {
2701 BP_SET_LSIZE(new_bp, size);
2702 BP_SET_PSIZE(new_bp, size);
2703 BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF);
2704 BP_SET_CHECKSUM(new_bp,
2705 spa_version(spa) >= SPA_VERSION_SLIM_ZIL

--- 59 unchanged lines hidden (view full) ---

2765 }
2766
2767 if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_FREE &&
2768 zio->io_priority == ZIO_PRIORITY_NOW) {
2769 trim_map_free(vd, zio->io_offset, zio->io_size, zio->io_txg);
2770 return (ZIO_PIPELINE_CONTINUE);
2771 }
2772
2925 }
2926
2927 if (error == 0) {
2928 BP_SET_LSIZE(new_bp, size);
2929 BP_SET_PSIZE(new_bp, size);
2930 BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF);
2931 BP_SET_CHECKSUM(new_bp,
2932 spa_version(spa) >= SPA_VERSION_SLIM_ZIL

--- 59 unchanged lines hidden (view full) ---

2992 }
2993
2994 if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_FREE &&
2995 zio->io_priority == ZIO_PRIORITY_NOW) {
2996 trim_map_free(vd, zio->io_offset, zio->io_size, zio->io_txg);
2997 return (ZIO_PIPELINE_CONTINUE);
2998 }
2999
3000 ASSERT3P(zio->io_logical, !=, zio);
3001
2773 /*
2774 * We keep track of time-sensitive I/Os so that the scan thread
2775 * can quickly react to certain workloads. In particular, we care
2776 * about non-scrubbing, top-level reads and writes with the following
2777 * characteristics:
2778 * - synchronous writes of user data to non-slog devices
2779 * - any reads of user data
2780 * When these conditions are met, adjust the timestamp of spa_last_io

--- 402 unchanged lines hidden (view full) ---

3183 * I/O completion
3184 * ==========================================================================
3185 */
3186static int
3187zio_ready(zio_t *zio)
3188{
3189 blkptr_t *bp = zio->io_bp;
3190 zio_t *pio, *pio_next;
3002 /*
3003 * We keep track of time-sensitive I/Os so that the scan thread
3004 * can quickly react to certain workloads. In particular, we care
3005 * about non-scrubbing, top-level reads and writes with the following
3006 * characteristics:
3007 * - synchronous writes of user data to non-slog devices
3008 * - any reads of user data
3009 * When these conditions are met, adjust the timestamp of spa_last_io

--- 402 unchanged lines hidden (view full) ---

3412 * I/O completion
3413 * ==========================================================================
3414 */
3415static int
3416zio_ready(zio_t *zio)
3417{
3418 blkptr_t *bp = zio->io_bp;
3419 zio_t *pio, *pio_next;
3420 zio_link_t *zl = NULL;
3191
3192 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
3193 zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_READY))
3194 return (ZIO_PIPELINE_STOP);
3195
3196 if (zio->io_ready) {
3197 ASSERT(IO_IS_ALLOCATING(zio));
3198 ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp) ||
3199 (zio->io_flags & ZIO_FLAG_NOPWRITE));
3200 ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0);
3201
3202 zio->io_ready(zio);
3203 }
3204
3205 if (bp != NULL && bp != &zio->io_bp_copy)
3206 zio->io_bp_copy = *bp;
3207
3421
3422 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
3423 zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_READY))
3424 return (ZIO_PIPELINE_STOP);
3425
3426 if (zio->io_ready) {
3427 ASSERT(IO_IS_ALLOCATING(zio));
3428 ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp) ||
3429 (zio->io_flags & ZIO_FLAG_NOPWRITE));
3430 ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0);
3431
3432 zio->io_ready(zio);
3433 }
3434
3435 if (bp != NULL && bp != &zio->io_bp_copy)
3436 zio->io_bp_copy = *bp;
3437
3208 if (zio->io_error)
3438 if (zio->io_error != 0) {
3209 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
3210
3439 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
3440
3441 if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
3442 ASSERT(IO_IS_ALLOCATING(zio));
3443 ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
3444 /*
3445 * We were unable to allocate anything, unreserve and
3446 * issue the next I/O to allocate.
3447 */
3448 metaslab_class_throttle_unreserve(
3449 spa_normal_class(zio->io_spa),
3450 zio->io_prop.zp_copies, zio);
3451 zio_allocate_dispatch(zio->io_spa);
3452 }
3453 }
3454
3211 mutex_enter(&zio->io_lock);
3212 zio->io_state[ZIO_WAIT_READY] = 1;
3455 mutex_enter(&zio->io_lock);
3456 zio->io_state[ZIO_WAIT_READY] = 1;
3213 pio = zio_walk_parents(zio);
3457 pio = zio_walk_parents(zio, &zl);
3214 mutex_exit(&zio->io_lock);
3215
3216 /*
3217 * As we notify zio's parents, new parents could be added.
3218 * New parents go to the head of zio's io_parent_list, however,
3219 * so we will (correctly) not notify them. The remainder of zio's
3220 * io_parent_list, from 'pio_next' onward, cannot change because
3221 * all parents must wait for us to be done before they can be done.
3222 */
3223 for (; pio != NULL; pio = pio_next) {
3458 mutex_exit(&zio->io_lock);
3459
3460 /*
3461 * As we notify zio's parents, new parents could be added.
3462 * New parents go to the head of zio's io_parent_list, however,
3463 * so we will (correctly) not notify them. The remainder of zio's
3464 * io_parent_list, from 'pio_next' onward, cannot change because
3465 * all parents must wait for us to be done before they can be done.
3466 */
3467 for (; pio != NULL; pio = pio_next) {
3224 pio_next = zio_walk_parents(zio);
3468 pio_next = zio_walk_parents(zio, &zl);
3225 zio_notify_parent(pio, zio, ZIO_WAIT_READY);
3226 }
3227
3228 if (zio->io_flags & ZIO_FLAG_NODATA) {
3229 if (BP_IS_GANG(bp)) {
3230 zio->io_flags &= ~ZIO_FLAG_NODATA;
3231 } else {
3232 ASSERT((uintptr_t)zio->io_data < SPA_MAXBLOCKSIZE);
3233 zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
3234 }
3235 }
3236
3237 if (zio_injection_enabled &&
3238 zio->io_spa->spa_syncing_txg == zio->io_txg)
3239 zio_handle_ignored_writes(zio);
3240
3241 return (ZIO_PIPELINE_CONTINUE);
3242}
3243
3469 zio_notify_parent(pio, zio, ZIO_WAIT_READY);
3470 }
3471
3472 if (zio->io_flags & ZIO_FLAG_NODATA) {
3473 if (BP_IS_GANG(bp)) {
3474 zio->io_flags &= ~ZIO_FLAG_NODATA;
3475 } else {
3476 ASSERT((uintptr_t)zio->io_data < SPA_MAXBLOCKSIZE);
3477 zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
3478 }
3479 }
3480
3481 if (zio_injection_enabled &&
3482 zio->io_spa->spa_syncing_txg == zio->io_txg)
3483 zio_handle_ignored_writes(zio);
3484
3485 return (ZIO_PIPELINE_CONTINUE);
3486}
3487
3488/*
3489 * Update the allocation throttle accounting.
3490 */
3491static void
3492zio_dva_throttle_done(zio_t *zio)
3493{
3494 zio_t *lio = zio->io_logical;
3495 zio_t *pio = zio_unique_parent(zio);
3496 vdev_t *vd = zio->io_vd;
3497 int flags = METASLAB_ASYNC_ALLOC;
3498
3499 ASSERT3P(zio->io_bp, !=, NULL);
3500 ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
3501 ASSERT3U(zio->io_priority, ==, ZIO_PRIORITY_ASYNC_WRITE);
3502 ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);
3503 ASSERT(vd != NULL);
3504 ASSERT3P(vd, ==, vd->vdev_top);
3505 ASSERT(!(zio->io_flags & (ZIO_FLAG_IO_REPAIR | ZIO_FLAG_IO_RETRY)));
3506 ASSERT(zio->io_flags & ZIO_FLAG_IO_ALLOCATING);
3507 ASSERT(!(lio->io_flags & ZIO_FLAG_IO_REWRITE));
3508 ASSERT(!(lio->io_orig_flags & ZIO_FLAG_NODATA));
3509
3510 /*
3511 * Parents of gang children can have two flavors -- ones that
3512 * allocated the gang header (will have ZIO_FLAG_IO_REWRITE set)
3513 * and ones that allocated the constituent blocks. The allocation
3514 * throttle needs to know the allocating parent zio so we must find
3515 * it here.
3516 */
3517 if (pio->io_child_type == ZIO_CHILD_GANG) {
3518 /*
3519 * If our parent is a rewrite gang child then our grandparent
3520 * would have been the one that performed the allocation.
3521 */
3522 if (pio->io_flags & ZIO_FLAG_IO_REWRITE)
3523 pio = zio_unique_parent(pio);
3524 flags |= METASLAB_GANG_CHILD;
3525 }
3526
3527 ASSERT(IO_IS_ALLOCATING(pio));
3528 ASSERT3P(zio, !=, zio->io_logical);
3529 ASSERT(zio->io_logical != NULL);
3530 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
3531 ASSERT0(zio->io_flags & ZIO_FLAG_NOPWRITE);
3532
3533 mutex_enter(&pio->io_lock);
3534 metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id, pio, flags);
3535 mutex_exit(&pio->io_lock);
3536
3537 metaslab_class_throttle_unreserve(spa_normal_class(zio->io_spa),
3538 1, pio);
3539
3540 /*
3541 * Call into the pipeline to see if there is more work that
3542 * needs to be done. If there is work to be done it will be
3543 * dispatched to another taskq thread.
3544 */
3545 zio_allocate_dispatch(zio->io_spa);
3546}
3547
3244static int
3245zio_done(zio_t *zio)
3246{
3247 spa_t *spa = zio->io_spa;
3248 zio_t *lio = zio->io_logical;
3249 blkptr_t *bp = zio->io_bp;
3250 vdev_t *vd = zio->io_vd;
3251 uint64_t psize = zio->io_size;
3252 zio_t *pio, *pio_next;
3548static int
3549zio_done(zio_t *zio)
3550{
3551 spa_t *spa = zio->io_spa;
3552 zio_t *lio = zio->io_logical;
3553 blkptr_t *bp = zio->io_bp;
3554 vdev_t *vd = zio->io_vd;
3555 uint64_t psize = zio->io_size;
3556 zio_t *pio, *pio_next;
3557 metaslab_class_t *mc = spa_normal_class(spa);
3558 zio_link_t *zl = NULL;
3253
3254 /*
3255 * If our children haven't all completed,
3256 * wait for them and then repeat this pipeline stage.
3257 */
3258 if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE) ||
3259 zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE) ||
3260 zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE) ||
3261 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE))
3262 return (ZIO_PIPELINE_STOP);
3263
3559
3560 /*
3561 * If our children haven't all completed,
3562 * wait for them and then repeat this pipeline stage.
3563 */
3564 if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE) ||
3565 zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE) ||
3566 zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE) ||
3567 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE))
3568 return (ZIO_PIPELINE_STOP);
3569
3570 /*
3571 * If the allocation throttle is enabled, then update the accounting.
3572 * We only track child I/Os that are part of an allocating async
3573 * write. We must do this since the allocation is performed
3574 * by the logical I/O but the actual write is done by child I/Os.
3575 */
3576 if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING &&
3577 zio->io_child_type == ZIO_CHILD_VDEV) {
3578 ASSERT(mc->mc_alloc_throttle_enabled);
3579 zio_dva_throttle_done(zio);
3580 }
3581
3582 /*
3583 * If the allocation throttle is enabled, verify that
3584 * we have decremented the refcounts for every I/O that was throttled.
3585 */
3586 if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
3587 ASSERT(zio->io_type == ZIO_TYPE_WRITE);
3588 ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
3589 ASSERT(bp != NULL);
3590 metaslab_group_alloc_verify(spa, zio->io_bp, zio);
3591 VERIFY(refcount_not_held(&mc->mc_alloc_slots, zio));
3592 }
3593
3264 for (int c = 0; c < ZIO_CHILD_TYPES; c++)
3265 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
3266 ASSERT(zio->io_children[c][w] == 0);
3267
3268 if (bp != NULL && !BP_IS_EMBEDDED(bp)) {
3269 ASSERT(bp->blk_pad[0] == 0);
3270 ASSERT(bp->blk_pad[1] == 0);
3271 ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 ||

--- 153 unchanged lines hidden (view full) ---

3425
3426 /*
3427 * "The Godfather" I/O monitors its children but is
3428 * not a true parent to them. It will track them through
3429 * the pipeline but severs its ties whenever they get into
3430 * trouble (e.g. suspended). This allows "The Godfather"
3431 * I/O to return status without blocking.
3432 */
3594 for (int c = 0; c < ZIO_CHILD_TYPES; c++)
3595 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
3596 ASSERT(zio->io_children[c][w] == 0);
3597
3598 if (bp != NULL && !BP_IS_EMBEDDED(bp)) {
3599 ASSERT(bp->blk_pad[0] == 0);
3600 ASSERT(bp->blk_pad[1] == 0);
3601 ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 ||

--- 153 unchanged lines hidden (view full) ---

3755
3756 /*
3757 * "The Godfather" I/O monitors its children but is
3758 * not a true parent to them. It will track them through
3759 * the pipeline but severs its ties whenever they get into
3760 * trouble (e.g. suspended). This allows "The Godfather"
3761 * I/O to return status without blocking.
3762 */
3433 for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) {
3434 zio_link_t *zl = zio->io_walk_link;
3435 pio_next = zio_walk_parents(zio);
3763 zl = NULL;
3764 for (pio = zio_walk_parents(zio, &zl); pio != NULL;
3765 pio = pio_next) {
3766 zio_link_t *remove_zl = zl;
3767 pio_next = zio_walk_parents(zio, &zl);
3436
3437 if ((pio->io_flags & ZIO_FLAG_GODFATHER) &&
3438 (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) {
3768
3769 if ((pio->io_flags & ZIO_FLAG_GODFATHER) &&
3770 (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) {
3439 zio_remove_child(pio, zio, zl);
3771 zio_remove_child(pio, zio, remove_zl);
3440 zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
3441 }
3442 }
3443
3444 if ((pio = zio_unique_parent(zio)) != NULL) {
3445 /*
3446 * We're not a root i/o, so there's nothing to do
3447 * but notify our parent. Don't propagate errors

--- 47 unchanged lines hidden (view full) ---

3495 */
3496 if (zio->io_done)
3497 zio->io_done(zio);
3498
3499 mutex_enter(&zio->io_lock);
3500 zio->io_state[ZIO_WAIT_DONE] = 1;
3501 mutex_exit(&zio->io_lock);
3502
3772 zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
3773 }
3774 }
3775
3776 if ((pio = zio_unique_parent(zio)) != NULL) {
3777 /*
3778 * We're not a root i/o, so there's nothing to do
3779 * but notify our parent. Don't propagate errors

--- 47 unchanged lines hidden (view full) ---

3827 */
3828 if (zio->io_done)
3829 zio->io_done(zio);
3830
3831 mutex_enter(&zio->io_lock);
3832 zio->io_state[ZIO_WAIT_DONE] = 1;
3833 mutex_exit(&zio->io_lock);
3834
3503 for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) {
3504 zio_link_t *zl = zio->io_walk_link;
3505 pio_next = zio_walk_parents(zio);
3506 zio_remove_child(pio, zio, zl);
3835 zl = NULL;
3836 for (pio = zio_walk_parents(zio, &zl); pio != NULL; pio = pio_next) {
3837 zio_link_t *remove_zl = zl;
3838 pio_next = zio_walk_parents(zio, &zl);
3839 zio_remove_child(pio, zio, remove_zl);
3507 zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
3508 }
3509
3510 if (zio->io_waiter != NULL) {
3511 mutex_enter(&zio->io_lock);
3512 zio->io_executor = NULL;
3513 cv_broadcast(&zio->io_cv);
3514 mutex_exit(&zio->io_lock);

--- 7 unchanged lines hidden (view full) ---

3522/*
3523 * ==========================================================================
3524 * I/O pipeline definition
3525 * ==========================================================================
3526 */
3527static zio_pipe_stage_t *zio_pipeline[] = {
3528 NULL,
3529 zio_read_bp_init,
3840 zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
3841 }
3842
3843 if (zio->io_waiter != NULL) {
3844 mutex_enter(&zio->io_lock);
3845 zio->io_executor = NULL;
3846 cv_broadcast(&zio->io_cv);
3847 mutex_exit(&zio->io_lock);

--- 7 unchanged lines hidden (view full) ---

3855/*
3856 * ==========================================================================
3857 * I/O pipeline definition
3858 * ==========================================================================
3859 */
3860static zio_pipe_stage_t *zio_pipeline[] = {
3861 NULL,
3862 zio_read_bp_init,
3863 zio_write_bp_init,
3530 zio_free_bp_init,
3531 zio_issue_async,
3864 zio_free_bp_init,
3865 zio_issue_async,
3532 zio_write_bp_init,
3866 zio_write_compress,
3533 zio_checksum_generate,
3534 zio_nop_write,
3535 zio_ddt_read_start,
3536 zio_ddt_read_done,
3537 zio_ddt_write,
3538 zio_ddt_free,
3539 zio_gang_assemble,
3540 zio_gang_issue,
3867 zio_checksum_generate,
3868 zio_nop_write,
3869 zio_ddt_read_start,
3870 zio_ddt_read_done,
3871 zio_ddt_write,
3872 zio_ddt_free,
3873 zio_gang_assemble,
3874 zio_gang_issue,
3875 zio_dva_throttle,
3541 zio_dva_allocate,
3542 zio_dva_free,
3543 zio_dva_claim,
3544 zio_ready,
3545 zio_vdev_io_start,
3546 zio_vdev_io_done,
3547 zio_vdev_io_assess,
3548 zio_checksum_verify,

--- 127 unchanged lines hidden ---
3876 zio_dva_allocate,
3877 zio_dva_free,
3878 zio_dva_claim,
3879 zio_ready,
3880 zio_vdev_io_start,
3881 zio_vdev_io_done,
3882 zio_vdev_io_assess,
3883 zio_checksum_verify,

--- 127 unchanged lines hidden ---