zio.c (307265) | zio.c (307277) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 27 unchanged lines hidden (view full) --- 36#include <sys/zio_compress.h> 37#include <sys/zio_checksum.h> 38#include <sys/dmu_objset.h> 39#include <sys/arc.h> 40#include <sys/ddt.h> 41#include <sys/trim_map.h> 42#include <sys/blkptr.h> 43#include <sys/zfeature.h> | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 27 unchanged lines hidden (view full) --- 36#include <sys/zio_compress.h> 37#include <sys/zio_checksum.h> 38#include <sys/dmu_objset.h> 39#include <sys/arc.h> 40#include <sys/ddt.h> 41#include <sys/trim_map.h> 42#include <sys/blkptr.h> 43#include <sys/zfeature.h> |
44#include <sys/metaslab_impl.h> |
|
44 45SYSCTL_DECL(_vfs_zfs); 46SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); 47#if defined(__amd64__) 48static int zio_use_uma = 1; 49#else 50static int zio_use_uma = 0; 51#endif --- 21 unchanged lines hidden (view full) --- 73 * I/O type descriptions 74 * ========================================================================== 75 */ 76const char *zio_type_name[ZIO_TYPES] = { 77 "zio_null", "zio_read", "zio_write", "zio_free", "zio_claim", 78 "zio_ioctl" 79}; 80 | 45 46SYSCTL_DECL(_vfs_zfs); 47SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); 48#if defined(__amd64__) 49static int zio_use_uma = 1; 50#else 51static int zio_use_uma = 0; 52#endif --- 21 unchanged lines hidden (view full) --- 74 * I/O type descriptions 75 * ========================================================================== 76 */ 77const char *zio_type_name[ZIO_TYPES] = { 78 "zio_null", "zio_read", "zio_write", "zio_free", "zio_claim", 79 "zio_ioctl" 80}; 81 |
82boolean_t zio_dva_throttle_enabled = B_TRUE; 83SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, dva_throttle_enabled, CTLFLAG_RDTUN, 84 &zio_dva_throttle_enabled, 0, ""); 85 |
|
81/* 82 * ========================================================================== 83 * I/O kmem caches 84 * ========================================================================== 85 */ 86kmem_cache_t *zio_cache; 87kmem_cache_t *zio_link_cache; 88kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; --- 42 unchanged lines hidden (view full) --- 131#ifdef illumos 132#ifdef ZFS_DEBUG 133int zio_buf_debug_limit = 16384; 134#else 135int zio_buf_debug_limit = 0; 136#endif 137#endif 138 | 86/* 87 * ========================================================================== 88 * I/O kmem caches 89 * ========================================================================== 90 */ 91kmem_cache_t *zio_cache; 92kmem_cache_t *zio_link_cache; 93kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; --- 42 unchanged lines hidden (view full) --- 136#ifdef illumos 137#ifdef ZFS_DEBUG 138int zio_buf_debug_limit = 16384; 139#else 140int zio_buf_debug_limit = 0; 141#endif 142#endif 143 |
144static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t); 145 |
|
139void 140zio_init(void) 141{ 142 size_t c; 143 zio_cache = kmem_cache_create("zio_cache", 144 sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 145 zio_link_cache = kmem_cache_create("zio_link_cache", 146 sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); --- 244 unchanged lines hidden (view full) --- 391 zio->io_error = SET_ERROR(EIO); 392} 393 394/* 395 * ========================================================================== 396 * I/O parent/child relationships and pipeline interlocks 397 * ========================================================================== 398 */ | 146void 147zio_init(void) 148{ 149 size_t c; 150 zio_cache = kmem_cache_create("zio_cache", 151 sizeof (zio_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 152 zio_link_cache = kmem_cache_create("zio_link_cache", 153 sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); --- 244 unchanged lines hidden (view full) --- 398 zio->io_error = SET_ERROR(EIO); 399} 400 401/* 402 * ========================================================================== 403 * I/O parent/child relationships and pipeline interlocks 404 * ========================================================================== 405 */ |
399/* 400 * NOTE - Callers to zio_walk_parents() and zio_walk_children must 401 * continue calling these functions until they return NULL. 402 * Otherwise, the next caller will pick up the list walk in 403 * some indeterminate state. (Otherwise every caller would 404 * have to pass in a cookie to keep the state represented by 405 * io_walk_link, which gets annoying.) 406 */ | |
407zio_t * | 406zio_t * |
408zio_walk_parents(zio_t *cio) | 407zio_walk_parents(zio_t *cio, zio_link_t **zl) |
409{ | 408{ |
410 zio_link_t *zl = cio->io_walk_link; | |
411 list_t *pl = &cio->io_parent_list; 412 | 409 list_t *pl = &cio->io_parent_list; 410 |
413 zl = (zl == NULL) ? list_head(pl) : list_next(pl, zl); 414 cio->io_walk_link = zl; 415 416 if (zl == NULL) | 411 *zl = (*zl == NULL) ? list_head(pl) : list_next(pl, *zl); 412 if (*zl == NULL) |
417 return (NULL); 418 | 413 return (NULL); 414 |
419 ASSERT(zl->zl_child == cio); 420 return (zl->zl_parent); | 415 ASSERT((*zl)->zl_child == cio); 416 return ((*zl)->zl_parent); |
421} 422 423zio_t * | 417} 418 419zio_t * |
424zio_walk_children(zio_t *pio) | 420zio_walk_children(zio_t *pio, zio_link_t **zl) |
425{ | 421{ |
426 zio_link_t *zl = pio->io_walk_link; | |
427 list_t *cl = &pio->io_child_list; 428 | 422 list_t *cl = &pio->io_child_list; 423 |
429 zl = (zl == NULL) ? list_head(cl) : list_next(cl, zl); 430 pio->io_walk_link = zl; 431 432 if (zl == NULL) | 424 *zl = (*zl == NULL) ? list_head(cl) : list_next(cl, *zl); 425 if (*zl == NULL) |
433 return (NULL); 434 | 426 return (NULL); 427 |
435 ASSERT(zl->zl_parent == pio); 436 return (zl->zl_child); | 428 ASSERT((*zl)->zl_parent == pio); 429 return ((*zl)->zl_child); |
437} 438 439zio_t * 440zio_unique_parent(zio_t *cio) 441{ | 430} 431 432zio_t * 433zio_unique_parent(zio_t *cio) 434{ |
442 zio_t *pio = zio_walk_parents(cio); | 435 zio_link_t *zl = NULL; 436 zio_t *pio = zio_walk_parents(cio, &zl); |
443 | 437 |
444 VERIFY(zio_walk_parents(cio) == NULL); | 438 VERIFY3P(zio_walk_parents(cio, &zl), ==, NULL); |
445 return (pio); 446} 447 448void 449zio_add_child(zio_t *pio, zio_t *cio) 450{ 451 zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP); 452 --- 52 unchanged lines hidden (view full) --- 505{ 506 uint64_t *countp = &zio->io_children[child][wait]; 507 boolean_t waiting = B_FALSE; 508 509 mutex_enter(&zio->io_lock); 510 ASSERT(zio->io_stall == NULL); 511 if (*countp != 0) { 512 zio->io_stage >>= 1; | 439 return (pio); 440} 441 442void 443zio_add_child(zio_t *pio, zio_t *cio) 444{ 445 zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP); 446 --- 52 unchanged lines hidden (view full) --- 499{ 500 uint64_t *countp = &zio->io_children[child][wait]; 501 boolean_t waiting = B_FALSE; 502 503 mutex_enter(&zio->io_lock); 504 ASSERT(zio->io_stall == NULL); 505 if (*countp != 0) { 506 zio->io_stage >>= 1; |
507 ASSERT3U(zio->io_stage, !=, ZIO_STAGE_OPEN); |
|
513 zio->io_stall = countp; 514 waiting = B_TRUE; 515 } 516 mutex_exit(&zio->io_lock); 517 518 return (waiting); 519} 520 --- 7 unchanged lines hidden (view full) --- 528 if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE)) 529 *errorp = zio_worst_error(*errorp, zio->io_error); 530 pio->io_reexecute |= zio->io_reexecute; 531 ASSERT3U(*countp, >, 0); 532 533 (*countp)--; 534 535 if (*countp == 0 && pio->io_stall == countp) { | 508 zio->io_stall = countp; 509 waiting = B_TRUE; 510 } 511 mutex_exit(&zio->io_lock); 512 513 return (waiting); 514} 515 --- 7 unchanged lines hidden (view full) --- 523 if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE)) 524 *errorp = zio_worst_error(*errorp, zio->io_error); 525 pio->io_reexecute |= zio->io_reexecute; 526 ASSERT3U(*countp, >, 0); 527 528 (*countp)--; 529 530 if (*countp == 0 && pio->io_stall == countp) { |
531 zio_taskq_type_t type = 532 pio->io_stage < ZIO_STAGE_VDEV_IO_START ? ZIO_TASKQ_ISSUE : 533 ZIO_TASKQ_INTERRUPT; |
|
536 pio->io_stall = NULL; 537 mutex_exit(&pio->io_lock); | 534 pio->io_stall = NULL; 535 mutex_exit(&pio->io_lock); |
538 zio_execute(pio); | 536 /* 537 * Dispatch the parent zio in its own taskq so that 538 * the child can continue to make progress. This also 539 * prevents overflowing the stack when we have deeply nested 540 * parent-child relationships. 541 */ 542 zio_taskq_dispatch(pio, type, B_FALSE); |
539 } else { 540 mutex_exit(&pio->io_lock); 541 } 542} 543 544static void 545zio_inherit_child_errors(zio_t *zio, enum zio_child c) 546{ 547 if (zio->io_child_error[c] != 0 && zio->io_error == 0) 548 zio->io_error = zio->io_child_error[c]; 549} 550 | 543 } else { 544 mutex_exit(&pio->io_lock); 545 } 546} 547 548static void 549zio_inherit_child_errors(zio_t *zio, enum zio_child c) 550{ 551 if (zio->io_child_error[c] != 0 && zio->io_error == 0) 552 zio->io_error = zio->io_child_error[c]; 553} 554 |
555int 556zio_timestamp_compare(const void *x1, const void *x2) 557{ 558 const zio_t *z1 = x1; 559 const zio_t *z2 = x2; 560 561 if (z1->io_queued_timestamp < z2->io_queued_timestamp) 562 return (-1); 563 if (z1->io_queued_timestamp > z2->io_queued_timestamp) 564 return (1); 565 566 if (z1->io_offset < z2->io_offset) 567 return (-1); 568 if (z1->io_offset > z2->io_offset) 569 return (1); 570 571 if (z1 < z2) 572 return (-1); 573 if (z1 > z2) 574 return (1); 575 576 return (0); 577} 578 |
|
551/* 552 * ========================================================================== 553 * Create the various types of I/O (read, write, free, etc) 554 * ========================================================================== 555 */ 556static zio_t * 557zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, 558 void *data, uint64_t size, zio_done_func_t *done, void *private, --- 52 unchanged lines hidden (view full) --- 611 zio->io_priority = priority; 612 zio->io_vd = vd; 613 zio->io_offset = offset; 614 zio->io_orig_data = zio->io_data = data; 615 zio->io_orig_size = zio->io_size = size; 616 zio->io_orig_flags = zio->io_flags = flags; 617 zio->io_orig_stage = zio->io_stage = stage; 618 zio->io_orig_pipeline = zio->io_pipeline = pipeline; | 579/* 580 * ========================================================================== 581 * Create the various types of I/O (read, write, free, etc) 582 * ========================================================================== 583 */ 584static zio_t * 585zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, 586 void *data, uint64_t size, zio_done_func_t *done, void *private, --- 52 unchanged lines hidden (view full) --- 639 zio->io_priority = priority; 640 zio->io_vd = vd; 641 zio->io_offset = offset; 642 zio->io_orig_data = zio->io_data = data; 643 zio->io_orig_size = zio->io_size = size; 644 zio->io_orig_flags = zio->io_flags = flags; 645 zio->io_orig_stage = zio->io_stage = stage; 646 zio->io_orig_pipeline = zio->io_pipeline = pipeline; |
647 zio->io_pipeline_trace = ZIO_STAGE_OPEN; |
|
619 620 zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY); 621 zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE); 622 623 if (zb != NULL) 624 zio->io_bookmark = *zb; 625 626 if (pio != NULL) { --- 181 unchanged lines hidden (view full) --- 808zio_t * 809zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, 810 uint64_t size, zio_done_func_t *done, void *private, 811 zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb) 812{ 813 zio_t *zio; 814 815 zio = zio_create(pio, spa, txg, bp, data, size, done, private, | 648 649 zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY); 650 zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE); 651 652 if (zb != NULL) 653 zio->io_bookmark = *zb; 654 655 if (pio != NULL) { --- 181 unchanged lines hidden (view full) --- 837zio_t * 838zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, 839 uint64_t size, zio_done_func_t *done, void *private, 840 zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb) 841{ 842 zio_t *zio; 843 844 zio = zio_create(pio, spa, txg, bp, data, size, done, private, |
816 ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb, | 845 ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_IO_REWRITE, NULL, 0, zb, |
817 ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE); 818 819 return (zio); 820} 821 822void 823zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite) 824{ --- 104 unchanged lines hidden (view full) --- 929 */ 930 ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa)); 931 ASSERT(txg == spa_first_txg(spa) || txg == 0); 932 ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(1M) */ 933 934 zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), 935 done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags, 936 NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE); | 846 ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE); 847 848 return (zio); 849} 850 851void 852zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite) 853{ --- 104 unchanged lines hidden (view full) --- 958 */ 959 ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa)); 960 ASSERT(txg == spa_first_txg(spa) || txg == 0); 961 ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(1M) */ 962 963 zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), 964 done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags, 965 NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE); |
966 ASSERT0(zio->io_queued_timestamp); |
|
937 938 return (zio); 939} 940 941zio_t * 942zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, uint64_t offset, 943 uint64_t size, zio_done_func_t *done, void *private, 944 zio_priority_t priority, enum zio_flag flags) --- 108 unchanged lines hidden (view full) --- 1053 1054 /* 1055 * If we've decided to do a repair, the write is not speculative -- 1056 * even if the original read was. 1057 */ 1058 if (flags & ZIO_FLAG_IO_REPAIR) 1059 flags &= ~ZIO_FLAG_SPECULATIVE; 1060 | 967 968 return (zio); 969} 970 971zio_t * 972zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, uint64_t offset, 973 uint64_t size, zio_done_func_t *done, void *private, 974 zio_priority_t priority, enum zio_flag flags) --- 108 unchanged lines hidden (view full) --- 1083 1084 /* 1085 * If we've decided to do a repair, the write is not speculative -- 1086 * even if the original read was. 1087 */ 1088 if (flags & ZIO_FLAG_IO_REPAIR) 1089 flags &= ~ZIO_FLAG_SPECULATIVE; 1090 |
1091 /* 1092 * If we're creating a child I/O that is not associated with a 1093 * top-level vdev, then the child zio is not an allocating I/O. 1094 * If this is a retried I/O then we ignore it since we will 1095 * have already processed the original allocating I/O. 1096 */ 1097 if (flags & ZIO_FLAG_IO_ALLOCATING && 1098 (vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY))) { 1099 metaslab_class_t *mc = spa_normal_class(pio->io_spa); 1100 1101 ASSERT(mc->mc_alloc_throttle_enabled); 1102 ASSERT(type == ZIO_TYPE_WRITE); 1103 ASSERT(priority == ZIO_PRIORITY_ASYNC_WRITE); 1104 ASSERT(!(flags & ZIO_FLAG_IO_REPAIR)); 1105 ASSERT(!(pio->io_flags & ZIO_FLAG_IO_REWRITE) || 1106 pio->io_child_type == ZIO_CHILD_GANG); 1107 1108 flags &= ~ZIO_FLAG_IO_ALLOCATING; 1109 } 1110 |
|
1061 zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, 1062 done, private, type, priority, flags, vd, offset, &pio->io_bookmark, 1063 ZIO_STAGE_VDEV_IO_START >> 1, pipeline); | 1111 zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, 1112 done, private, type, priority, flags, vd, offset, &pio->io_bookmark, 1113 ZIO_STAGE_VDEV_IO_START >> 1, pipeline); |
1114 ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV); |
|
1064 1065 zio->io_physdone = pio->io_physdone; 1066 if (vd->vdev_ops->vdev_op_leaf && zio->io_logical != NULL) 1067 zio->io_logical->io_phys_children++; 1068 1069 return (zio); 1070} 1071 --- 90 unchanged lines hidden (view full) --- 1162 zio->io_pipeline = ZIO_DDT_READ_PIPELINE; 1163 1164 return (ZIO_PIPELINE_CONTINUE); 1165} 1166 1167static int 1168zio_write_bp_init(zio_t *zio) 1169{ | 1115 1116 zio->io_physdone = pio->io_physdone; 1117 if (vd->vdev_ops->vdev_op_leaf && zio->io_logical != NULL) 1118 zio->io_logical->io_phys_children++; 1119 1120 return (zio); 1121} 1122 --- 90 unchanged lines hidden (view full) --- 1213 zio->io_pipeline = ZIO_DDT_READ_PIPELINE; 1214 1215 return (ZIO_PIPELINE_CONTINUE); 1216} 1217 1218static int 1219zio_write_bp_init(zio_t *zio) 1220{ |
1170 spa_t *spa = zio->io_spa; 1171 zio_prop_t *zp = &zio->io_prop; 1172 enum zio_compress compress = zp->zp_compress; 1173 blkptr_t *bp = zio->io_bp; 1174 uint64_t lsize = zio->io_size; 1175 uint64_t psize = lsize; 1176 int pass = 1; 1177 1178 /* 1179 * If our children haven't all reached the ready stage, 1180 * wait for them and then repeat this pipeline stage. 1181 */ 1182 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) || 1183 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY)) 1184 return (ZIO_PIPELINE_STOP); 1185 | |
1186 if (!IO_IS_ALLOCATING(zio)) 1187 return (ZIO_PIPELINE_CONTINUE); 1188 | 1221 if (!IO_IS_ALLOCATING(zio)) 1222 return (ZIO_PIPELINE_CONTINUE); 1223 |
1189 if (zio->io_children_ready != NULL) { 1190 /* 1191 * Now that all our children are ready, run the callback 1192 * associated with this zio in case it wants to modify the 1193 * data to be written. 1194 */ 1195 ASSERT3U(zp->zp_level, >, 0); 1196 zio->io_children_ready(zio); 1197 } 1198 | |
1199 ASSERT(zio->io_child_type != ZIO_CHILD_DDT); 1200 1201 if (zio->io_bp_override) { | 1224 ASSERT(zio->io_child_type != ZIO_CHILD_DDT); 1225 1226 if (zio->io_bp_override) { |
1227 blkptr_t *bp = zio->io_bp; 1228 zio_prop_t *zp = &zio->io_prop; 1229 |
|
1202 ASSERT(bp->blk_birth != zio->io_txg); 1203 ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0); 1204 1205 *bp = *zio->io_bp_override; 1206 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 1207 1208 if (BP_IS_EMBEDDED(bp)) 1209 return (ZIO_PIPELINE_CONTINUE); 1210 1211 /* 1212 * If we've been overridden and nopwrite is set then 1213 * set the flag accordingly to indicate that a nopwrite 1214 * has already occurred. 1215 */ 1216 if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) { 1217 ASSERT(!zp->zp_dedup); | 1230 ASSERT(bp->blk_birth != zio->io_txg); 1231 ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0); 1232 1233 *bp = *zio->io_bp_override; 1234 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 1235 1236 if (BP_IS_EMBEDDED(bp)) 1237 return (ZIO_PIPELINE_CONTINUE); 1238 1239 /* 1240 * If we've been overridden and nopwrite is set then 1241 * set the flag accordingly to indicate that a nopwrite 1242 * has already occurred. 1243 */ 1244 if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) { 1245 ASSERT(!zp->zp_dedup); |
1246 ASSERT3U(BP_GET_CHECKSUM(bp), ==, zp->zp_checksum); |
|
1218 zio->io_flags |= ZIO_FLAG_NOPWRITE; 1219 return (ZIO_PIPELINE_CONTINUE); 1220 } 1221 1222 ASSERT(!zp->zp_nopwrite); 1223 1224 if (BP_IS_HOLE(bp) || !zp->zp_dedup) 1225 return (ZIO_PIPELINE_CONTINUE); 1226 1227 ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags & 1228 ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify); 1229 1230 if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) { 1231 BP_SET_DEDUP(bp, 1); 1232 zio->io_pipeline |= ZIO_STAGE_DDT_WRITE; 1233 return (ZIO_PIPELINE_CONTINUE); 1234 } | 1247 zio->io_flags |= ZIO_FLAG_NOPWRITE; 1248 return (ZIO_PIPELINE_CONTINUE); 1249 } 1250 1251 ASSERT(!zp->zp_nopwrite); 1252 1253 if (BP_IS_HOLE(bp) || !zp->zp_dedup) 1254 return (ZIO_PIPELINE_CONTINUE); 1255 1256 ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags & 1257 ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify); 1258 1259 if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) { 1260 BP_SET_DEDUP(bp, 1); 1261 zio->io_pipeline |= ZIO_STAGE_DDT_WRITE; 1262 return (ZIO_PIPELINE_CONTINUE); 1263 } |
1264 1265 /* 1266 * We were unable to handle this as an override bp, treat 1267 * it as a regular write I/O. 1268 */ |
|
1235 zio->io_bp_override = NULL; | 1269 zio->io_bp_override = NULL; |
1236 BP_ZERO(bp); | 1270 *bp = zio->io_bp_orig; 1271 zio->io_pipeline = zio->io_orig_pipeline; |
1237 } 1238 | 1272 } 1273 |
1274 return (ZIO_PIPELINE_CONTINUE); 1275} 1276 1277static int 1278zio_write_compress(zio_t *zio) 1279{ 1280 spa_t *spa = zio->io_spa; 1281 zio_prop_t *zp = &zio->io_prop; 1282 enum zio_compress compress = zp->zp_compress; 1283 blkptr_t *bp = zio->io_bp; 1284 uint64_t lsize = zio->io_size; 1285 uint64_t psize = lsize; 1286 int pass = 1; 1287 1288 /* 1289 * If our children haven't all reached the ready stage, 1290 * wait for them and then repeat this pipeline stage. 1291 */ 1292 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) || 1293 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY)) 1294 return (ZIO_PIPELINE_STOP); 1295 1296 if (!IO_IS_ALLOCATING(zio)) 1297 return (ZIO_PIPELINE_CONTINUE); 1298 1299 if (zio->io_children_ready != NULL) { 1300 /* 1301 * Now that all our children are ready, run the callback 1302 * associated with this zio in case it wants to modify the 1303 * data to be written. 1304 */ 1305 ASSERT3U(zp->zp_level, >, 0); 1306 zio->io_children_ready(zio); 1307 } 1308 1309 ASSERT(zio->io_child_type != ZIO_CHILD_DDT); 1310 ASSERT(zio->io_bp_override == NULL); 1311 |
|
1239 if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) { 1240 /* 1241 * We're rewriting an existing block, which means we're 1242 * working on behalf of spa_sync(). For spa_sync() to 1243 * converge, it must eventually be the case that we don't 1244 * have to allocate new blocks. But compression changes 1245 * the blocksize, which forces a reallocate, and makes 1246 * convergence take longer. Therefore, after the first --- 51 unchanged lines hidden (view full) --- 1298 psize = lsize; 1299 } else { 1300 bzero((char *)cbuf + psize, rounded - psize); 1301 psize = rounded; 1302 zio_push_transform(zio, cbuf, 1303 psize, lsize, NULL); 1304 } 1305 } | 1312 if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) { 1313 /* 1314 * We're rewriting an existing block, which means we're 1315 * working on behalf of spa_sync(). For spa_sync() to 1316 * converge, it must eventually be the case that we don't 1317 * have to allocate new blocks. But compression changes 1318 * the blocksize, which forces a reallocate, and makes 1319 * convergence take longer. Therefore, after the first --- 51 unchanged lines hidden (view full) --- 1371 psize = lsize; 1372 } else { 1373 bzero((char *)cbuf + psize, rounded - psize); 1374 psize = rounded; 1375 zio_push_transform(zio, cbuf, 1376 psize, lsize, NULL); 1377 } 1378 } |
1379 1380 /* 1381 * We were unable to handle this as an override bp, treat 1382 * it as a regular write I/O. 1383 */ 1384 zio->io_bp_override = NULL; 1385 *bp = zio->io_bp_orig; 1386 zio->io_pipeline = zio->io_orig_pipeline; |
|
1306 } 1307 1308 /* 1309 * The final pass of spa_sync() must be all rewrites, but the first 1310 * few passes offer a trade-off: allocating blocks defers convergence, 1311 * but newly allocated blocks are sequential, so they can be written 1312 * to disk faster. Therefore, we allow the first few passes of 1313 * spa_sync() to allocate new blocks, but force rewrites after that. --- 36 unchanged lines hidden (view full) --- 1350 zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE; 1351 } 1352 if (zp->zp_nopwrite) { 1353 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); 1354 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE)); 1355 zio->io_pipeline |= ZIO_STAGE_NOP_WRITE; 1356 } 1357 } | 1387 } 1388 1389 /* 1390 * The final pass of spa_sync() must be all rewrites, but the first 1391 * few passes offer a trade-off: allocating blocks defers convergence, 1392 * but newly allocated blocks are sequential, so they can be written 1393 * to disk faster. Therefore, we allow the first few passes of 1394 * spa_sync() to allocate new blocks, but force rewrites after that. --- 36 unchanged lines hidden (view full) --- 1431 zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE; 1432 } 1433 if (zp->zp_nopwrite) { 1434 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); 1435 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE)); 1436 zio->io_pipeline |= ZIO_STAGE_NOP_WRITE; 1437 } 1438 } |
1358 | |
1359 return (ZIO_PIPELINE_CONTINUE); 1360} 1361 1362static int 1363zio_free_bp_init(zio_t *zio) 1364{ 1365 blkptr_t *bp = zio->io_bp; 1366 --- 160 unchanged lines hidden (view full) --- 1527 */ 1528static zio_pipe_stage_t *zio_pipeline[]; 1529 1530void 1531zio_execute(zio_t *zio) 1532{ 1533 zio->io_executor = curthread; 1534 | 1439 return (ZIO_PIPELINE_CONTINUE); 1440} 1441 1442static int 1443zio_free_bp_init(zio_t *zio) 1444{ 1445 blkptr_t *bp = zio->io_bp; 1446 --- 160 unchanged lines hidden (view full) --- 1607 */ 1608static zio_pipe_stage_t *zio_pipeline[]; 1609 1610void 1611zio_execute(zio_t *zio) 1612{ 1613 zio->io_executor = curthread; 1614 |
1615 ASSERT3U(zio->io_queued_timestamp, >, 0); 1616 |
|
1535 while (zio->io_stage < ZIO_STAGE_DONE) { 1536 enum zio_stage pipeline = zio->io_pipeline; 1537 enum zio_stage stage = zio->io_stage; 1538 int rv; 1539 1540 ASSERT(!MUTEX_HELD(&zio->io_lock)); 1541 ASSERT(ISP2(stage)); 1542 ASSERT(zio->io_stall == NULL); --- 17 unchanged lines hidden (view full) --- 1560 zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) { 1561 boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ? 1562 zio_requeue_io_start_cut_in_line : B_FALSE; 1563 zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); 1564 return; 1565 } 1566 1567 zio->io_stage = stage; | 1617 while (zio->io_stage < ZIO_STAGE_DONE) { 1618 enum zio_stage pipeline = zio->io_pipeline; 1619 enum zio_stage stage = zio->io_stage; 1620 int rv; 1621 1622 ASSERT(!MUTEX_HELD(&zio->io_lock)); 1623 ASSERT(ISP2(stage)); 1624 ASSERT(zio->io_stall == NULL); --- 17 unchanged lines hidden (view full) --- 1642 zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) { 1643 boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ? 1644 zio_requeue_io_start_cut_in_line : B_FALSE; 1645 zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); 1646 return; 1647 } 1648 1649 zio->io_stage = stage; |
1650 zio->io_pipeline_trace |= zio->io_stage; |
|
1568 rv = zio_pipeline[highbit64(stage) - 1](zio); 1569 1570 if (rv == ZIO_PIPELINE_STOP) 1571 return; 1572 1573 ASSERT(rv == ZIO_PIPELINE_CONTINUE); 1574 } 1575} --- 7 unchanged lines hidden (view full) --- 1583zio_wait(zio_t *zio) 1584{ 1585 int error; 1586 1587 ASSERT(zio->io_stage == ZIO_STAGE_OPEN); 1588 ASSERT(zio->io_executor == NULL); 1589 1590 zio->io_waiter = curthread; | 1651 rv = zio_pipeline[highbit64(stage) - 1](zio); 1652 1653 if (rv == ZIO_PIPELINE_STOP) 1654 return; 1655 1656 ASSERT(rv == ZIO_PIPELINE_CONTINUE); 1657 } 1658} --- 7 unchanged lines hidden (view full) --- 1666zio_wait(zio_t *zio) 1667{ 1668 int error; 1669 1670 ASSERT(zio->io_stage == ZIO_STAGE_OPEN); 1671 ASSERT(zio->io_executor == NULL); 1672 1673 zio->io_waiter = curthread; |
1674 ASSERT0(zio->io_queued_timestamp); 1675 zio->io_queued_timestamp = gethrtime(); |
|
1591 1592 zio_execute(zio); 1593 1594 mutex_enter(&zio->io_lock); 1595 while (zio->io_executor != NULL) 1596 cv_wait(&zio->io_cv, &zio->io_lock); 1597 mutex_exit(&zio->io_lock); 1598 --- 15 unchanged lines hidden (view full) --- 1614 * We add it to the spa_async_root_zio "Godfather" I/O which 1615 * will ensure they complete prior to unloading the pool. 1616 */ 1617 spa_t *spa = zio->io_spa; 1618 1619 zio_add_child(spa->spa_async_zio_root[CPU_SEQID], zio); 1620 } 1621 | 1676 1677 zio_execute(zio); 1678 1679 mutex_enter(&zio->io_lock); 1680 while (zio->io_executor != NULL) 1681 cv_wait(&zio->io_cv, &zio->io_lock); 1682 mutex_exit(&zio->io_lock); 1683 --- 15 unchanged lines hidden (view full) --- 1699 * We add it to the spa_async_root_zio "Godfather" I/O which 1700 * will ensure they complete prior to unloading the pool. 1701 */ 1702 spa_t *spa = zio->io_spa; 1703 1704 zio_add_child(spa->spa_async_zio_root[CPU_SEQID], zio); 1705 } 1706 |
1707 ASSERT0(zio->io_queued_timestamp); 1708 zio->io_queued_timestamp = gethrtime(); |
|
1622 zio_execute(zio); 1623} 1624 1625/* 1626 * ========================================================================== 1627 * Reexecute or suspend/resume failed I/O 1628 * ========================================================================== 1629 */ --- 8 unchanged lines hidden (view full) --- 1638 ASSERT(pio->io_gang_leader == NULL); 1639 ASSERT(pio->io_gang_tree == NULL); 1640 1641 pio->io_flags = pio->io_orig_flags; 1642 pio->io_stage = pio->io_orig_stage; 1643 pio->io_pipeline = pio->io_orig_pipeline; 1644 pio->io_reexecute = 0; 1645 pio->io_flags |= ZIO_FLAG_REEXECUTED; | 1709 zio_execute(zio); 1710} 1711 1712/* 1713 * ========================================================================== 1714 * Reexecute or suspend/resume failed I/O 1715 * ========================================================================== 1716 */ --- 8 unchanged lines hidden (view full) --- 1725 ASSERT(pio->io_gang_leader == NULL); 1726 ASSERT(pio->io_gang_tree == NULL); 1727 1728 pio->io_flags = pio->io_orig_flags; 1729 pio->io_stage = pio->io_orig_stage; 1730 pio->io_pipeline = pio->io_orig_pipeline; 1731 pio->io_reexecute = 0; 1732 pio->io_flags |= ZIO_FLAG_REEXECUTED; |
1733 pio->io_pipeline_trace = 0; |
|
1646 pio->io_error = 0; 1647 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 1648 pio->io_state[w] = 0; 1649 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 1650 pio->io_child_error[c] = 0; 1651 1652 if (IO_IS_ALLOCATING(pio)) 1653 BP_ZERO(pio->io_bp); 1654 1655 /* 1656 * As we reexecute pio's children, new children could be created. 1657 * New children go to the head of pio's io_child_list, however, 1658 * so we will (correctly) not reexecute them. The key is that 1659 * the remainder of pio's io_child_list, from 'cio_next' onward, 1660 * cannot be affected by any side effects of reexecuting 'cio'. 1661 */ | 1734 pio->io_error = 0; 1735 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 1736 pio->io_state[w] = 0; 1737 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 1738 pio->io_child_error[c] = 0; 1739 1740 if (IO_IS_ALLOCATING(pio)) 1741 BP_ZERO(pio->io_bp); 1742 1743 /* 1744 * As we reexecute pio's children, new children could be created. 1745 * New children go to the head of pio's io_child_list, however, 1746 * so we will (correctly) not reexecute them. The key is that 1747 * the remainder of pio's io_child_list, from 'cio_next' onward, 1748 * cannot be affected by any side effects of reexecuting 'cio'. 1749 */ |
1662 for (cio = zio_walk_children(pio); cio != NULL; cio = cio_next) { 1663 cio_next = zio_walk_children(pio); | 1750 zio_link_t *zl = NULL; 1751 for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) { 1752 cio_next = zio_walk_children(pio, &zl); |
1664 mutex_enter(&pio->io_lock); 1665 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 1666 pio->io_children[cio->io_child_type][w]++; 1667 mutex_exit(&pio->io_lock); 1668 zio_reexecute(cio); 1669 } 1670 1671 /* 1672 * Now that all children have been reexecuted, execute the parent. 1673 * We don't reexecute "The Godfather" I/O here as it's the 1674 * responsibility of the caller to wait on him. 1675 */ | 1753 mutex_enter(&pio->io_lock); 1754 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 1755 pio->io_children[cio->io_child_type][w]++; 1756 mutex_exit(&pio->io_lock); 1757 zio_reexecute(cio); 1758 } 1759 1760 /* 1761 * Now that all children have been reexecuted, execute the parent. 1762 * We don't reexecute "The Godfather" I/O here as it's the 1763 * responsibility of the caller to wait on him. 1764 */ |
1676 if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) | 1765 if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) { 1766 pio->io_queued_timestamp = gethrtime(); |
1677 zio_execute(pio); | 1767 zio_execute(pio); |
1768 } |
|
1678} 1679 1680void 1681zio_suspend(spa_t *spa, zio_t *zio) 1682{ 1683 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC) 1684 fm_panic("Pool '%s' has encountered an uncorrectable I/O " 1685 "failure and the failure mode property for this pool " --- 377 unchanged lines hidden (view full) --- 2063 } 2064 mutex_exit(&pio->io_lock); 2065} 2066 2067static int 2068zio_write_gang_block(zio_t *pio) 2069{ 2070 spa_t *spa = pio->io_spa; | 1769} 1770 1771void 1772zio_suspend(spa_t *spa, zio_t *zio) 1773{ 1774 if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC) 1775 fm_panic("Pool '%s' has encountered an uncorrectable I/O " 1776 "failure and the failure mode property for this pool " --- 377 unchanged lines hidden (view full) --- 2154 } 2155 mutex_exit(&pio->io_lock); 2156} 2157 2158static int 2159zio_write_gang_block(zio_t *pio) 2160{ 2161 spa_t *spa = pio->io_spa; |
2162 metaslab_class_t *mc = spa_normal_class(spa); |
|
2071 blkptr_t *bp = pio->io_bp; 2072 zio_t *gio = pio->io_gang_leader; 2073 zio_t *zio; 2074 zio_gang_node_t *gn, **gnpp; 2075 zio_gbh_phys_t *gbh; 2076 uint64_t txg = pio->io_txg; 2077 uint64_t resid = pio->io_size; 2078 uint64_t lsize; 2079 int copies = gio->io_prop.zp_copies; 2080 int gbh_copies = MIN(copies + 1, spa_max_replication(spa)); 2081 zio_prop_t zp; 2082 int error; 2083 | 2163 blkptr_t *bp = pio->io_bp; 2164 zio_t *gio = pio->io_gang_leader; 2165 zio_t *zio; 2166 zio_gang_node_t *gn, **gnpp; 2167 zio_gbh_phys_t *gbh; 2168 uint64_t txg = pio->io_txg; 2169 uint64_t resid = pio->io_size; 2170 uint64_t lsize; 2171 int copies = gio->io_prop.zp_copies; 2172 int gbh_copies = MIN(copies + 1, spa_max_replication(spa)); 2173 zio_prop_t zp; 2174 int error; 2175 |
2084 error = metaslab_alloc(spa, spa_normal_class(spa), SPA_GANGBLOCKSIZE, 2085 bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, 2086 METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER); | 2176 int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; 2177 if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { 2178 ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); 2179 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA)); 2180 2181 flags |= METASLAB_ASYNC_ALLOC; 2182 VERIFY(refcount_held(&mc->mc_alloc_slots, pio)); 2183 2184 /* 2185 * The logical zio has already placed a reservation for 2186 * 'copies' allocation slots but gang blocks may require 2187 * additional copies. These additional copies 2188 * (i.e. gbh_copies - copies) are guaranteed to succeed 2189 * since metaslab_class_throttle_reserve() always allows 2190 * additional reservations for gang blocks. 2191 */ 2192 VERIFY(metaslab_class_throttle_reserve(mc, gbh_copies - copies, 2193 pio, flags)); 2194 } 2195 2196 error = metaslab_alloc(spa, mc, SPA_GANGBLOCKSIZE, 2197 bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, flags, pio); |
2087 if (error) { | 2198 if (error) { |
2199 if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { 2200 ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); 2201 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA)); 2202 2203 /* 2204 * If we failed to allocate the gang block header then 2205 * we remove any additional allocation reservations that 2206 * we placed here. The original reservation will 2207 * be removed when the logical I/O goes to the ready 2208 * stage. 2209 */ 2210 metaslab_class_throttle_unreserve(mc, 2211 gbh_copies - copies, pio); 2212 } |
|
2088 pio->io_error = error; 2089 return (ZIO_PIPELINE_CONTINUE); 2090 } 2091 2092 if (pio == gio) { 2093 gnpp = &gio->io_gang_tree; 2094 } else { 2095 gnpp = pio->io_private; --- 22 unchanged lines hidden (view full) --- 2118 zp.zp_compress = ZIO_COMPRESS_OFF; 2119 zp.zp_type = DMU_OT_NONE; 2120 zp.zp_level = 0; 2121 zp.zp_copies = gio->io_prop.zp_copies; 2122 zp.zp_dedup = B_FALSE; 2123 zp.zp_dedup_verify = B_FALSE; 2124 zp.zp_nopwrite = B_FALSE; 2125 | 2213 pio->io_error = error; 2214 return (ZIO_PIPELINE_CONTINUE); 2215 } 2216 2217 if (pio == gio) { 2218 gnpp = &gio->io_gang_tree; 2219 } else { 2220 gnpp = pio->io_private; --- 22 unchanged lines hidden (view full) --- 2243 zp.zp_compress = ZIO_COMPRESS_OFF; 2244 zp.zp_type = DMU_OT_NONE; 2245 zp.zp_level = 0; 2246 zp.zp_copies = gio->io_prop.zp_copies; 2247 zp.zp_dedup = B_FALSE; 2248 zp.zp_dedup_verify = B_FALSE; 2249 zp.zp_nopwrite = B_FALSE; 2250 |
2126 zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g], | 2251 zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g], |
2127 (char *)pio->io_data + (pio->io_size - resid), lsize, &zp, 2128 zio_write_gang_member_ready, NULL, NULL, NULL, 2129 &gn->gn_child[g], pio->io_priority, | 2252 (char *)pio->io_data + (pio->io_size - resid), lsize, &zp, 2253 zio_write_gang_member_ready, NULL, NULL, NULL, 2254 &gn->gn_child[g], pio->io_priority, |
2130 ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark)); | 2255 ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); 2256 2257 if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { 2258 ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); 2259 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA)); 2260 2261 /* 2262 * Gang children won't throttle but we should 2263 * account for their work, so reserve an allocation 2264 * slot for them here. 2265 */ 2266 VERIFY(metaslab_class_throttle_reserve(mc, 2267 zp.zp_copies, cio, flags)); 2268 } 2269 zio_nowait(cio); |
2131 } 2132 2133 /* 2134 * Set pio's pipeline to just wait for zio to finish. 2135 */ 2136 pio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 2137 2138 zio_nowait(zio); --- 241 unchanged lines hidden (view full) --- 2380 return; 2381 2382 ddt_enter(ddt); 2383 2384 ASSERT(dde->dde_lead_zio[p] == zio); 2385 2386 ddt_phys_fill(ddp, zio->io_bp); 2387 | 2270 } 2271 2272 /* 2273 * Set pio's pipeline to just wait for zio to finish. 2274 */ 2275 pio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 2276 2277 zio_nowait(zio); --- 241 unchanged lines hidden (view full) --- 2519 return; 2520 2521 ddt_enter(ddt); 2522 2523 ASSERT(dde->dde_lead_zio[p] == zio); 2524 2525 ddt_phys_fill(ddp, zio->io_bp); 2526 |
2388 while ((pio = zio_walk_parents(zio)) != NULL) | 2527 zio_link_t *zl = NULL; 2528 while ((pio = zio_walk_parents(zio, &zl)) != NULL) |
2389 ddt_bp_fill(ddp, pio->io_bp, zio->io_txg); 2390 2391 ddt_exit(ddt); 2392} 2393 2394static void 2395zio_ddt_child_write_done(zio_t *zio) 2396{ --- 4 unchanged lines hidden (view full) --- 2401 2402 ddt_enter(ddt); 2403 2404 ASSERT(ddp->ddp_refcnt == 0); 2405 ASSERT(dde->dde_lead_zio[p] == zio); 2406 dde->dde_lead_zio[p] = NULL; 2407 2408 if (zio->io_error == 0) { | 2529 ddt_bp_fill(ddp, pio->io_bp, zio->io_txg); 2530 2531 ddt_exit(ddt); 2532} 2533 2534static void 2535zio_ddt_child_write_done(zio_t *zio) 2536{ --- 4 unchanged lines hidden (view full) --- 2541 2542 ddt_enter(ddt); 2543 2544 ASSERT(ddp->ddp_refcnt == 0); 2545 ASSERT(dde->dde_lead_zio[p] == zio); 2546 dde->dde_lead_zio[p] = NULL; 2547 2548 if (zio->io_error == 0) { |
2409 while (zio_walk_parents(zio) != NULL) | 2549 zio_link_t *zl = NULL; 2550 while (zio_walk_parents(zio, &zl) != NULL) |
2410 ddt_phys_addref(ddp); 2411 } else { 2412 ddt_phys_clear(ddp); 2413 } 2414 2415 ddt_exit(ddt); 2416} 2417 --- 161 unchanged lines hidden (view full) --- 2579 return (ZIO_PIPELINE_CONTINUE); 2580} 2581 2582/* 2583 * ========================================================================== 2584 * Allocate and free blocks 2585 * ========================================================================== 2586 */ | 2551 ddt_phys_addref(ddp); 2552 } else { 2553 ddt_phys_clear(ddp); 2554 } 2555 2556 ddt_exit(ddt); 2557} 2558 --- 161 unchanged lines hidden (view full) --- 2720 return (ZIO_PIPELINE_CONTINUE); 2721} 2722 2723/* 2724 * ========================================================================== 2725 * Allocate and free blocks 2726 * ========================================================================== 2727 */ |
2728 2729static zio_t * 2730zio_io_to_allocate(spa_t *spa) 2731{ 2732 zio_t *zio; 2733 2734 ASSERT(MUTEX_HELD(&spa->spa_alloc_lock)); 2735 2736 zio = avl_first(&spa->spa_alloc_tree); 2737 if (zio == NULL) 2738 return (NULL); 2739 2740 ASSERT(IO_IS_ALLOCATING(zio)); 2741 2742 /* 2743 * Try to place a reservation for this zio. If we're unable to 2744 * reserve then we throttle. 2745 */ 2746 if (!metaslab_class_throttle_reserve(spa_normal_class(spa), 2747 zio->io_prop.zp_copies, zio, 0)) { 2748 return (NULL); 2749 } 2750 2751 avl_remove(&spa->spa_alloc_tree, zio); 2752 ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE); 2753 2754 return (zio); 2755} 2756 |
|
2587static int | 2757static int |
2758zio_dva_throttle(zio_t *zio) 2759{ 2760 spa_t *spa = zio->io_spa; 2761 zio_t *nio; 2762 2763 if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE || 2764 !spa_normal_class(zio->io_spa)->mc_alloc_throttle_enabled || 2765 zio->io_child_type == ZIO_CHILD_GANG || 2766 zio->io_flags & ZIO_FLAG_NODATA) { 2767 return (ZIO_PIPELINE_CONTINUE); 2768 } 2769 2770 ASSERT(zio->io_child_type > ZIO_CHILD_GANG); 2771 2772 ASSERT3U(zio->io_queued_timestamp, >, 0); 2773 ASSERT(zio->io_stage == ZIO_STAGE_DVA_THROTTLE); 2774 2775 mutex_enter(&spa->spa_alloc_lock); 2776 2777 ASSERT(zio->io_type == ZIO_TYPE_WRITE); 2778 avl_add(&spa->spa_alloc_tree, zio); 2779 2780 nio = zio_io_to_allocate(zio->io_spa); 2781 mutex_exit(&spa->spa_alloc_lock); 2782 2783 if (nio == zio) 2784 return (ZIO_PIPELINE_CONTINUE); 2785 2786 if (nio != NULL) { 2787 ASSERT3U(nio->io_queued_timestamp, <=, 2788 zio->io_queued_timestamp); 2789 ASSERT(nio->io_stage == ZIO_STAGE_DVA_THROTTLE); 2790 /* 2791 * We are passing control to a new zio so make sure that 2792 * it is processed by a different thread. We do this to 2793 * avoid stack overflows that can occur when parents are 2794 * throttled and children are making progress. We allow 2795 * it to go to the head of the taskq since it's already 2796 * been waiting. 2797 */ 2798 zio_taskq_dispatch(nio, ZIO_TASKQ_ISSUE, B_TRUE); 2799 } 2800 return (ZIO_PIPELINE_STOP); 2801} 2802 2803void 2804zio_allocate_dispatch(spa_t *spa) 2805{ 2806 zio_t *zio; 2807 2808 mutex_enter(&spa->spa_alloc_lock); 2809 zio = zio_io_to_allocate(spa); 2810 mutex_exit(&spa->spa_alloc_lock); 2811 if (zio == NULL) 2812 return; 2813 2814 ASSERT3U(zio->io_stage, ==, ZIO_STAGE_DVA_THROTTLE); 2815 ASSERT0(zio->io_error); 2816 zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_TRUE); 2817} 2818 2819static int |
|
2588zio_dva_allocate(zio_t *zio) 2589{ 2590 spa_t *spa = zio->io_spa; 2591 metaslab_class_t *mc = spa_normal_class(spa); 2592 blkptr_t *bp = zio->io_bp; 2593 int error; 2594 int flags = 0; 2595 2596 if (zio->io_gang_leader == NULL) { 2597 ASSERT(zio->io_child_type > ZIO_CHILD_GANG); 2598 zio->io_gang_leader = zio; 2599 } 2600 2601 ASSERT(BP_IS_HOLE(bp)); 2602 ASSERT0(BP_GET_NDVAS(bp)); 2603 ASSERT3U(zio->io_prop.zp_copies, >, 0); 2604 ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa)); 2605 ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); 2606 | 2820zio_dva_allocate(zio_t *zio) 2821{ 2822 spa_t *spa = zio->io_spa; 2823 metaslab_class_t *mc = spa_normal_class(spa); 2824 blkptr_t *bp = zio->io_bp; 2825 int error; 2826 int flags = 0; 2827 2828 if (zio->io_gang_leader == NULL) { 2829 ASSERT(zio->io_child_type > ZIO_CHILD_GANG); 2830 zio->io_gang_leader = zio; 2831 } 2832 2833 ASSERT(BP_IS_HOLE(bp)); 2834 ASSERT0(BP_GET_NDVAS(bp)); 2835 ASSERT3U(zio->io_prop.zp_copies, >, 0); 2836 ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa)); 2837 ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); 2838 |
2607 /* 2608 * The dump device does not support gang blocks so allocation on 2609 * behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid 2610 * the "fast" gang feature. 2611 */ 2612 flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0; 2613 flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ? 2614 METASLAB_GANG_CHILD : 0; | 2839 if (zio->io_flags & ZIO_FLAG_NODATA) { 2840 flags |= METASLAB_DONT_THROTTLE; 2841 } 2842 if (zio->io_flags & ZIO_FLAG_GANG_CHILD) { 2843 flags |= METASLAB_GANG_CHILD; 2844 } 2845 if (zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE) { 2846 flags |= METASLAB_ASYNC_ALLOC; 2847 } 2848 |
2615 error = metaslab_alloc(spa, mc, zio->io_size, bp, | 2849 error = metaslab_alloc(spa, mc, zio->io_size, bp, |
2616 zio->io_prop.zp_copies, zio->io_txg, NULL, flags); | 2850 zio->io_prop.zp_copies, zio->io_txg, NULL, flags, zio); |
2617 | 2851 |
2618 if (error) { | 2852 if (error != 0) { |
2619 spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, " 2620 "size %llu, error %d", spa_name(spa), zio, zio->io_size, 2621 error); 2622 if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) 2623 return (zio_write_gang_block(zio)); 2624 zio->io_error = error; 2625 } 2626 --- 48 unchanged lines hidden (view full) --- 2675int 2676zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, 2677 uint64_t size, boolean_t use_slog) 2678{ 2679 int error = 1; 2680 2681 ASSERT(txg > spa_syncing_txg(spa)); 2682 | 2853 spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, " 2854 "size %llu, error %d", spa_name(spa), zio, zio->io_size, 2855 error); 2856 if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) 2857 return (zio_write_gang_block(zio)); 2858 zio->io_error = error; 2859 } 2860 --- 48 unchanged lines hidden (view full) --- 2909int 2910zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, 2911 uint64_t size, boolean_t use_slog) 2912{ 2913 int error = 1; 2914 2915 ASSERT(txg > spa_syncing_txg(spa)); 2916 |
2683 /* 2684 * ZIL blocks are always contiguous (i.e. not gang blocks) so we 2685 * set the METASLAB_GANG_AVOID flag so that they don't "fast gang" 2686 * when allocating them. 2687 */ | |
2688 if (use_slog) { 2689 error = metaslab_alloc(spa, spa_log_class(spa), size, | 2917 if (use_slog) { 2918 error = metaslab_alloc(spa, spa_log_class(spa), size, |
2690 new_bp, 1, txg, old_bp, 2691 METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); | 2919 new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, NULL); |
2692 } 2693 2694 if (error) { 2695 error = metaslab_alloc(spa, spa_normal_class(spa), size, | 2920 } 2921 2922 if (error) { 2923 error = metaslab_alloc(spa, spa_normal_class(spa), size, |
2696 new_bp, 1, txg, old_bp, 2697 METASLAB_HINTBP_AVOID); | 2924 new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, NULL); |
2698 } 2699 2700 if (error == 0) { 2701 BP_SET_LSIZE(new_bp, size); 2702 BP_SET_PSIZE(new_bp, size); 2703 BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF); 2704 BP_SET_CHECKSUM(new_bp, 2705 spa_version(spa) >= SPA_VERSION_SLIM_ZIL --- 59 unchanged lines hidden (view full) --- 2765 } 2766 2767 if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_FREE && 2768 zio->io_priority == ZIO_PRIORITY_NOW) { 2769 trim_map_free(vd, zio->io_offset, zio->io_size, zio->io_txg); 2770 return (ZIO_PIPELINE_CONTINUE); 2771 } 2772 | 2925 } 2926 2927 if (error == 0) { 2928 BP_SET_LSIZE(new_bp, size); 2929 BP_SET_PSIZE(new_bp, size); 2930 BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF); 2931 BP_SET_CHECKSUM(new_bp, 2932 spa_version(spa) >= SPA_VERSION_SLIM_ZIL --- 59 unchanged lines hidden (view full) --- 2992 } 2993 2994 if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_FREE && 2995 zio->io_priority == ZIO_PRIORITY_NOW) { 2996 trim_map_free(vd, zio->io_offset, zio->io_size, zio->io_txg); 2997 return (ZIO_PIPELINE_CONTINUE); 2998 } 2999 |
3000 ASSERT3P(zio->io_logical, !=, zio); 3001 |
|
2773 /* 2774 * We keep track of time-sensitive I/Os so that the scan thread 2775 * can quickly react to certain workloads. In particular, we care 2776 * about non-scrubbing, top-level reads and writes with the following 2777 * characteristics: 2778 * - synchronous writes of user data to non-slog devices 2779 * - any reads of user data 2780 * When these conditions are met, adjust the timestamp of spa_last_io --- 402 unchanged lines hidden (view full) --- 3183 * I/O completion 3184 * ========================================================================== 3185 */ 3186static int 3187zio_ready(zio_t *zio) 3188{ 3189 blkptr_t *bp = zio->io_bp; 3190 zio_t *pio, *pio_next; | 3002 /* 3003 * We keep track of time-sensitive I/Os so that the scan thread 3004 * can quickly react to certain workloads. In particular, we care 3005 * about non-scrubbing, top-level reads and writes with the following 3006 * characteristics: 3007 * - synchronous writes of user data to non-slog devices 3008 * - any reads of user data 3009 * When these conditions are met, adjust the timestamp of spa_last_io --- 402 unchanged lines hidden (view full) --- 3412 * I/O completion 3413 * ========================================================================== 3414 */ 3415static int 3416zio_ready(zio_t *zio) 3417{ 3418 blkptr_t *bp = zio->io_bp; 3419 zio_t *pio, *pio_next; |
3420 zio_link_t *zl = NULL; |
|
3191 3192 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) || 3193 zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_READY)) 3194 return (ZIO_PIPELINE_STOP); 3195 3196 if (zio->io_ready) { 3197 ASSERT(IO_IS_ALLOCATING(zio)); 3198 ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp) || 3199 (zio->io_flags & ZIO_FLAG_NOPWRITE)); 3200 ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0); 3201 3202 zio->io_ready(zio); 3203 } 3204 3205 if (bp != NULL && bp != &zio->io_bp_copy) 3206 zio->io_bp_copy = *bp; 3207 | 3421 3422 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) || 3423 zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_READY)) 3424 return (ZIO_PIPELINE_STOP); 3425 3426 if (zio->io_ready) { 3427 ASSERT(IO_IS_ALLOCATING(zio)); 3428 ASSERT(bp->blk_birth == zio->io_txg || BP_IS_HOLE(bp) || 3429 (zio->io_flags & ZIO_FLAG_NOPWRITE)); 3430 ASSERT(zio->io_children[ZIO_CHILD_GANG][ZIO_WAIT_READY] == 0); 3431 3432 zio->io_ready(zio); 3433 } 3434 3435 if (bp != NULL && bp != &zio->io_bp_copy) 3436 zio->io_bp_copy = *bp; 3437 |
3208 if (zio->io_error) | 3438 if (zio->io_error != 0) { |
3209 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 3210 | 3439 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; 3440 |
3441 if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) { 3442 ASSERT(IO_IS_ALLOCATING(zio)); 3443 ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); 3444 /* 3445 * We were unable to allocate anything, unreserve and 3446 * issue the next I/O to allocate. 3447 */ 3448 metaslab_class_throttle_unreserve( 3449 spa_normal_class(zio->io_spa), 3450 zio->io_prop.zp_copies, zio); 3451 zio_allocate_dispatch(zio->io_spa); 3452 } 3453 } 3454 |
|
3211 mutex_enter(&zio->io_lock); 3212 zio->io_state[ZIO_WAIT_READY] = 1; | 3455 mutex_enter(&zio->io_lock); 3456 zio->io_state[ZIO_WAIT_READY] = 1; |
3213 pio = zio_walk_parents(zio); | 3457 pio = zio_walk_parents(zio, &zl); |
3214 mutex_exit(&zio->io_lock); 3215 3216 /* 3217 * As we notify zio's parents, new parents could be added. 3218 * New parents go to the head of zio's io_parent_list, however, 3219 * so we will (correctly) not notify them. The remainder of zio's 3220 * io_parent_list, from 'pio_next' onward, cannot change because 3221 * all parents must wait for us to be done before they can be done. 3222 */ 3223 for (; pio != NULL; pio = pio_next) { | 3458 mutex_exit(&zio->io_lock); 3459 3460 /* 3461 * As we notify zio's parents, new parents could be added. 3462 * New parents go to the head of zio's io_parent_list, however, 3463 * so we will (correctly) not notify them. The remainder of zio's 3464 * io_parent_list, from 'pio_next' onward, cannot change because 3465 * all parents must wait for us to be done before they can be done. 3466 */ 3467 for (; pio != NULL; pio = pio_next) { |
3224 pio_next = zio_walk_parents(zio); | 3468 pio_next = zio_walk_parents(zio, &zl); |
3225 zio_notify_parent(pio, zio, ZIO_WAIT_READY); 3226 } 3227 3228 if (zio->io_flags & ZIO_FLAG_NODATA) { 3229 if (BP_IS_GANG(bp)) { 3230 zio->io_flags &= ~ZIO_FLAG_NODATA; 3231 } else { 3232 ASSERT((uintptr_t)zio->io_data < SPA_MAXBLOCKSIZE); 3233 zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; 3234 } 3235 } 3236 3237 if (zio_injection_enabled && 3238 zio->io_spa->spa_syncing_txg == zio->io_txg) 3239 zio_handle_ignored_writes(zio); 3240 3241 return (ZIO_PIPELINE_CONTINUE); 3242} 3243 | 3469 zio_notify_parent(pio, zio, ZIO_WAIT_READY); 3470 } 3471 3472 if (zio->io_flags & ZIO_FLAG_NODATA) { 3473 if (BP_IS_GANG(bp)) { 3474 zio->io_flags &= ~ZIO_FLAG_NODATA; 3475 } else { 3476 ASSERT((uintptr_t)zio->io_data < SPA_MAXBLOCKSIZE); 3477 zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; 3478 } 3479 } 3480 3481 if (zio_injection_enabled && 3482 zio->io_spa->spa_syncing_txg == zio->io_txg) 3483 zio_handle_ignored_writes(zio); 3484 3485 return (ZIO_PIPELINE_CONTINUE); 3486} 3487 |
3488/* 3489 * Update the allocation throttle accounting. 3490 */ 3491static void 3492zio_dva_throttle_done(zio_t *zio) 3493{ 3494 zio_t *lio = zio->io_logical; 3495 zio_t *pio = zio_unique_parent(zio); 3496 vdev_t *vd = zio->io_vd; 3497 int flags = METASLAB_ASYNC_ALLOC; 3498 3499 ASSERT3P(zio->io_bp, !=, NULL); 3500 ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE); 3501 ASSERT3U(zio->io_priority, ==, ZIO_PRIORITY_ASYNC_WRITE); 3502 ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV); 3503 ASSERT(vd != NULL); 3504 ASSERT3P(vd, ==, vd->vdev_top); 3505 ASSERT(!(zio->io_flags & (ZIO_FLAG_IO_REPAIR | ZIO_FLAG_IO_RETRY))); 3506 ASSERT(zio->io_flags & ZIO_FLAG_IO_ALLOCATING); 3507 ASSERT(!(lio->io_flags & ZIO_FLAG_IO_REWRITE)); 3508 ASSERT(!(lio->io_orig_flags & ZIO_FLAG_NODATA)); 3509 3510 /* 3511 * Parents of gang children can have two flavors -- ones that 3512 * allocated the gang header (will have ZIO_FLAG_IO_REWRITE set) 3513 * and ones that allocated the constituent blocks. The allocation 3514 * throttle needs to know the allocating parent zio so we must find 3515 * it here. 3516 */ 3517 if (pio->io_child_type == ZIO_CHILD_GANG) { 3518 /* 3519 * If our parent is a rewrite gang child then our grandparent 3520 * would have been the one that performed the allocation. 3521 */ 3522 if (pio->io_flags & ZIO_FLAG_IO_REWRITE) 3523 pio = zio_unique_parent(pio); 3524 flags |= METASLAB_GANG_CHILD; 3525 } 3526 3527 ASSERT(IO_IS_ALLOCATING(pio)); 3528 ASSERT3P(zio, !=, zio->io_logical); 3529 ASSERT(zio->io_logical != NULL); 3530 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR)); 3531 ASSERT0(zio->io_flags & ZIO_FLAG_NOPWRITE); 3532 3533 mutex_enter(&pio->io_lock); 3534 metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id, pio, flags); 3535 mutex_exit(&pio->io_lock); 3536 3537 metaslab_class_throttle_unreserve(spa_normal_class(zio->io_spa), 3538 1, pio); 3539 3540 /* 3541 * Call into the pipeline to see if there is more work that 3542 * needs to be done. If there is work to be done it will be 3543 * dispatched to another taskq thread. 3544 */ 3545 zio_allocate_dispatch(zio->io_spa); 3546} 3547 |
|
3244static int 3245zio_done(zio_t *zio) 3246{ 3247 spa_t *spa = zio->io_spa; 3248 zio_t *lio = zio->io_logical; 3249 blkptr_t *bp = zio->io_bp; 3250 vdev_t *vd = zio->io_vd; 3251 uint64_t psize = zio->io_size; 3252 zio_t *pio, *pio_next; | 3548static int 3549zio_done(zio_t *zio) 3550{ 3551 spa_t *spa = zio->io_spa; 3552 zio_t *lio = zio->io_logical; 3553 blkptr_t *bp = zio->io_bp; 3554 vdev_t *vd = zio->io_vd; 3555 uint64_t psize = zio->io_size; 3556 zio_t *pio, *pio_next; |
3557 metaslab_class_t *mc = spa_normal_class(spa); 3558 zio_link_t *zl = NULL; |
|
3253 3254 /* 3255 * If our children haven't all completed, 3256 * wait for them and then repeat this pipeline stage. 3257 */ 3258 if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE) || 3259 zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE) || 3260 zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE) || 3261 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE)) 3262 return (ZIO_PIPELINE_STOP); 3263 | 3559 3560 /* 3561 * If our children haven't all completed, 3562 * wait for them and then repeat this pipeline stage. 3563 */ 3564 if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE) || 3565 zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE) || 3566 zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE) || 3567 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE)) 3568 return (ZIO_PIPELINE_STOP); 3569 |
3570 /* 3571 * If the allocation throttle is enabled, then update the accounting. 3572 * We only track child I/Os that are part of an allocating async 3573 * write. We must do this since the allocation is performed 3574 * by the logical I/O but the actual write is done by child I/Os. 3575 */ 3576 if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING && 3577 zio->io_child_type == ZIO_CHILD_VDEV) { 3578 ASSERT(mc->mc_alloc_throttle_enabled); 3579 zio_dva_throttle_done(zio); 3580 } 3581 3582 /* 3583 * If the allocation throttle is enabled, verify that 3584 * we have decremented the refcounts for every I/O that was throttled. 3585 */ 3586 if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) { 3587 ASSERT(zio->io_type == ZIO_TYPE_WRITE); 3588 ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); 3589 ASSERT(bp != NULL); 3590 metaslab_group_alloc_verify(spa, zio->io_bp, zio); 3591 VERIFY(refcount_not_held(&mc->mc_alloc_slots, zio)); 3592 } 3593 |
|
3264 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 3265 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 3266 ASSERT(zio->io_children[c][w] == 0); 3267 3268 if (bp != NULL && !BP_IS_EMBEDDED(bp)) { 3269 ASSERT(bp->blk_pad[0] == 0); 3270 ASSERT(bp->blk_pad[1] == 0); 3271 ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || --- 153 unchanged lines hidden (view full) --- 3425 3426 /* 3427 * "The Godfather" I/O monitors its children but is 3428 * not a true parent to them. It will track them through 3429 * the pipeline but severs its ties whenever they get into 3430 * trouble (e.g. suspended). This allows "The Godfather" 3431 * I/O to return status without blocking. 3432 */ | 3594 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 3595 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 3596 ASSERT(zio->io_children[c][w] == 0); 3597 3598 if (bp != NULL && !BP_IS_EMBEDDED(bp)) { 3599 ASSERT(bp->blk_pad[0] == 0); 3600 ASSERT(bp->blk_pad[1] == 0); 3601 ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || --- 153 unchanged lines hidden (view full) --- 3755 3756 /* 3757 * "The Godfather" I/O monitors its children but is 3758 * not a true parent to them. It will track them through 3759 * the pipeline but severs its ties whenever they get into 3760 * trouble (e.g. suspended). This allows "The Godfather" 3761 * I/O to return status without blocking. 3762 */ |
3433 for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) { 3434 zio_link_t *zl = zio->io_walk_link; 3435 pio_next = zio_walk_parents(zio); | 3763 zl = NULL; 3764 for (pio = zio_walk_parents(zio, &zl); pio != NULL; 3765 pio = pio_next) { 3766 zio_link_t *remove_zl = zl; 3767 pio_next = zio_walk_parents(zio, &zl); |
3436 3437 if ((pio->io_flags & ZIO_FLAG_GODFATHER) && 3438 (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) { | 3768 3769 if ((pio->io_flags & ZIO_FLAG_GODFATHER) && 3770 (zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) { |
3439 zio_remove_child(pio, zio, zl); | 3771 zio_remove_child(pio, zio, remove_zl); |
3440 zio_notify_parent(pio, zio, ZIO_WAIT_DONE); 3441 } 3442 } 3443 3444 if ((pio = zio_unique_parent(zio)) != NULL) { 3445 /* 3446 * We're not a root i/o, so there's nothing to do 3447 * but notify our parent. Don't propagate errors --- 47 unchanged lines hidden (view full) --- 3495 */ 3496 if (zio->io_done) 3497 zio->io_done(zio); 3498 3499 mutex_enter(&zio->io_lock); 3500 zio->io_state[ZIO_WAIT_DONE] = 1; 3501 mutex_exit(&zio->io_lock); 3502 | 3772 zio_notify_parent(pio, zio, ZIO_WAIT_DONE); 3773 } 3774 } 3775 3776 if ((pio = zio_unique_parent(zio)) != NULL) { 3777 /* 3778 * We're not a root i/o, so there's nothing to do 3779 * but notify our parent. Don't propagate errors --- 47 unchanged lines hidden (view full) --- 3827 */ 3828 if (zio->io_done) 3829 zio->io_done(zio); 3830 3831 mutex_enter(&zio->io_lock); 3832 zio->io_state[ZIO_WAIT_DONE] = 1; 3833 mutex_exit(&zio->io_lock); 3834 |
3503 for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) { 3504 zio_link_t *zl = zio->io_walk_link; 3505 pio_next = zio_walk_parents(zio); 3506 zio_remove_child(pio, zio, zl); | 3835 zl = NULL; 3836 for (pio = zio_walk_parents(zio, &zl); pio != NULL; pio = pio_next) { 3837 zio_link_t *remove_zl = zl; 3838 pio_next = zio_walk_parents(zio, &zl); 3839 zio_remove_child(pio, zio, remove_zl); |
3507 zio_notify_parent(pio, zio, ZIO_WAIT_DONE); 3508 } 3509 3510 if (zio->io_waiter != NULL) { 3511 mutex_enter(&zio->io_lock); 3512 zio->io_executor = NULL; 3513 cv_broadcast(&zio->io_cv); 3514 mutex_exit(&zio->io_lock); --- 7 unchanged lines hidden (view full) --- 3522/* 3523 * ========================================================================== 3524 * I/O pipeline definition 3525 * ========================================================================== 3526 */ 3527static zio_pipe_stage_t *zio_pipeline[] = { 3528 NULL, 3529 zio_read_bp_init, | 3840 zio_notify_parent(pio, zio, ZIO_WAIT_DONE); 3841 } 3842 3843 if (zio->io_waiter != NULL) { 3844 mutex_enter(&zio->io_lock); 3845 zio->io_executor = NULL; 3846 cv_broadcast(&zio->io_cv); 3847 mutex_exit(&zio->io_lock); --- 7 unchanged lines hidden (view full) --- 3855/* 3856 * ========================================================================== 3857 * I/O pipeline definition 3858 * ========================================================================== 3859 */ 3860static zio_pipe_stage_t *zio_pipeline[] = { 3861 NULL, 3862 zio_read_bp_init, |
3863 zio_write_bp_init, |
|
3530 zio_free_bp_init, 3531 zio_issue_async, | 3864 zio_free_bp_init, 3865 zio_issue_async, |
3532 zio_write_bp_init, | 3866 zio_write_compress, |
3533 zio_checksum_generate, 3534 zio_nop_write, 3535 zio_ddt_read_start, 3536 zio_ddt_read_done, 3537 zio_ddt_write, 3538 zio_ddt_free, 3539 zio_gang_assemble, 3540 zio_gang_issue, | 3867 zio_checksum_generate, 3868 zio_nop_write, 3869 zio_ddt_read_start, 3870 zio_ddt_read_done, 3871 zio_ddt_write, 3872 zio_ddt_free, 3873 zio_gang_assemble, 3874 zio_gang_issue, |
3875 zio_dva_throttle, |
|
3541 zio_dva_allocate, 3542 zio_dva_free, 3543 zio_dva_claim, 3544 zio_ready, 3545 zio_vdev_io_start, 3546 zio_vdev_io_done, 3547 zio_vdev_io_assess, 3548 zio_checksum_verify, --- 127 unchanged lines hidden --- | 3876 zio_dva_allocate, 3877 zio_dva_free, 3878 zio_dva_claim, 3879 zio_ready, 3880 zio_vdev_io_start, 3881 zio_vdev_io_done, 3882 zio_vdev_io_assess, 3883 zio_checksum_verify, --- 127 unchanged lines hidden --- |