zio.c (332547) | zio.c (339034) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 27 unchanged lines hidden (view full) --- 36#include <sys/zio_compress.h> 37#include <sys/zio_checksum.h> 38#include <sys/dmu_objset.h> 39#include <sys/arc.h> 40#include <sys/ddt.h> 41#include <sys/trim_map.h> 42#include <sys/blkptr.h> 43#include <sys/zfeature.h> | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 27 unchanged lines hidden (view full) --- 36#include <sys/zio_compress.h> 37#include <sys/zio_checksum.h> 38#include <sys/dmu_objset.h> 39#include <sys/arc.h> 40#include <sys/ddt.h> 41#include <sys/trim_map.h> 42#include <sys/blkptr.h> 43#include <sys/zfeature.h> |
44#include <sys/dsl_scan.h> |
|
44#include <sys/metaslab_impl.h> 45#include <sys/abd.h> 46 47SYSCTL_DECL(_vfs_zfs); 48SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); 49#if defined(__amd64__) 50static int zio_use_uma = 1; 51#else --- 381 unchanged lines hidden (view full) --- 433 return ((*zl)->zl_parent); 434} 435 436zio_t * 437zio_walk_children(zio_t *pio, zio_link_t **zl) 438{ 439 list_t *cl = &pio->io_child_list; 440 | 45#include <sys/metaslab_impl.h> 46#include <sys/abd.h> 47 48SYSCTL_DECL(_vfs_zfs); 49SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); 50#if defined(__amd64__) 51static int zio_use_uma = 1; 52#else --- 381 unchanged lines hidden (view full) --- 434 return ((*zl)->zl_parent); 435} 436 437zio_t * 438zio_walk_children(zio_t *pio, zio_link_t **zl) 439{ 440 list_t *cl = &pio->io_child_list; 441 |
442 ASSERT(MUTEX_HELD(&pio->io_lock)); 443 |
|
441 *zl = (*zl == NULL) ? list_head(cl) : list_next(cl, *zl); 442 if (*zl == NULL) 443 return (NULL); 444 445 ASSERT((*zl)->zl_parent == pio); 446 return ((*zl)->zl_child); 447} 448 --- 18 unchanged lines hidden (view full) --- 467 * Vdev I/Os can only have vdev children. 468 * The following ASSERT captures all of these constraints. 469 */ 470 ASSERT3S(cio->io_child_type, <=, pio->io_child_type); 471 472 zl->zl_parent = pio; 473 zl->zl_child = cio; 474 | 444 *zl = (*zl == NULL) ? list_head(cl) : list_next(cl, *zl); 445 if (*zl == NULL) 446 return (NULL); 447 448 ASSERT((*zl)->zl_parent == pio); 449 return ((*zl)->zl_child); 450} 451 --- 18 unchanged lines hidden (view full) --- 470 * Vdev I/Os can only have vdev children. 471 * The following ASSERT captures all of these constraints. 472 */ 473 ASSERT3S(cio->io_child_type, <=, pio->io_child_type); 474 475 zl->zl_parent = pio; 476 zl->zl_child = cio; 477 |
475 mutex_enter(&cio->io_lock); | |
476 mutex_enter(&pio->io_lock); | 478 mutex_enter(&pio->io_lock); |
479 mutex_enter(&cio->io_lock); |
|
477 478 ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0); 479 480 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 481 pio->io_children[cio->io_child_type][w] += !cio->io_state[w]; 482 483 list_insert_head(&pio->io_child_list, zl); 484 list_insert_head(&cio->io_parent_list, zl); 485 486 pio->io_child_count++; 487 cio->io_parent_count++; 488 | 480 481 ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0); 482 483 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 484 pio->io_children[cio->io_child_type][w] += !cio->io_state[w]; 485 486 list_insert_head(&pio->io_child_list, zl); 487 list_insert_head(&cio->io_parent_list, zl); 488 489 pio->io_child_count++; 490 cio->io_parent_count++; 491 |
489 mutex_exit(&pio->io_lock); | |
490 mutex_exit(&cio->io_lock); | 492 mutex_exit(&cio->io_lock); |
493 mutex_exit(&pio->io_lock); |
|
491} 492 493static void 494zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl) 495{ 496 ASSERT(zl->zl_parent == pio); 497 ASSERT(zl->zl_child == cio); 498 | 494} 495 496static void 497zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl) 498{ 499 ASSERT(zl->zl_parent == pio); 500 ASSERT(zl->zl_child == cio); 501 |
499 mutex_enter(&cio->io_lock); | |
500 mutex_enter(&pio->io_lock); | 502 mutex_enter(&pio->io_lock); |
503 mutex_enter(&cio->io_lock); |
|
501 502 list_remove(&pio->io_child_list, zl); 503 list_remove(&cio->io_parent_list, zl); 504 505 pio->io_child_count--; 506 cio->io_parent_count--; 507 | 504 505 list_remove(&pio->io_child_list, zl); 506 list_remove(&cio->io_parent_list, zl); 507 508 pio->io_child_count--; 509 cio->io_parent_count--; 510 |
508 mutex_exit(&pio->io_lock); | |
509 mutex_exit(&cio->io_lock); | 511 mutex_exit(&cio->io_lock); |
510 | 512 mutex_exit(&pio->io_lock); |
511 kmem_cache_free(zio_link_cache, zl); 512} 513 514static boolean_t 515zio_wait_for_children(zio_t *zio, uint8_t childbits, enum zio_wait_type wait) 516{ 517 boolean_t waiting = B_FALSE; 518 --- 464 unchanged lines hidden (view full) --- 983 ASSERT(spa_syncing_txg(spa) == txg); 984 ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free); 985 986 if (BP_IS_EMBEDDED(bp)) 987 return (zio_null(pio, spa, NULL, NULL, NULL, 0)); 988 989 metaslab_check_free(spa, bp); 990 arc_freed(spa, bp); | 513 kmem_cache_free(zio_link_cache, zl); 514} 515 516static boolean_t 517zio_wait_for_children(zio_t *zio, uint8_t childbits, enum zio_wait_type wait) 518{ 519 boolean_t waiting = B_FALSE; 520 --- 464 unchanged lines hidden (view full) --- 985 ASSERT(spa_syncing_txg(spa) == txg); 986 ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free); 987 988 if (BP_IS_EMBEDDED(bp)) 989 return (zio_null(pio, spa, NULL, NULL, NULL, 0)); 990 991 metaslab_check_free(spa, bp); 992 arc_freed(spa, bp); |
993 dsl_scan_freed(spa, bp); |
|
991 992 if (zfs_trim_enabled) 993 stage |= ZIO_STAGE_ISSUE_ASYNC | ZIO_STAGE_VDEV_IO_START | 994 ZIO_STAGE_VDEV_IO_ASSESS; 995 /* 996 * GANG and DEDUP blocks can induce a read (for the gang block header, 997 * or the DDT), so issue them asynchronously so that this thread is 998 * not tied up. --- 861 unchanged lines hidden (view full) --- 1860 /* 1861 * As we reexecute pio's children, new children could be created. 1862 * New children go to the head of pio's io_child_list, however, 1863 * so we will (correctly) not reexecute them. The key is that 1864 * the remainder of pio's io_child_list, from 'cio_next' onward, 1865 * cannot be affected by any side effects of reexecuting 'cio'. 1866 */ 1867 zio_link_t *zl = NULL; | 994 995 if (zfs_trim_enabled) 996 stage |= ZIO_STAGE_ISSUE_ASYNC | ZIO_STAGE_VDEV_IO_START | 997 ZIO_STAGE_VDEV_IO_ASSESS; 998 /* 999 * GANG and DEDUP blocks can induce a read (for the gang block header, 1000 * or the DDT), so issue them asynchronously so that this thread is 1001 * not tied up. --- 861 unchanged lines hidden (view full) --- 1863 /* 1864 * As we reexecute pio's children, new children could be created. 1865 * New children go to the head of pio's io_child_list, however, 1866 * so we will (correctly) not reexecute them. The key is that 1867 * the remainder of pio's io_child_list, from 'cio_next' onward, 1868 * cannot be affected by any side effects of reexecuting 'cio'. 1869 */ 1870 zio_link_t *zl = NULL; |
1871 mutex_enter(&pio->io_lock); |
|
1868 for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) { 1869 cio_next = zio_walk_children(pio, &zl); | 1872 for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) { 1873 cio_next = zio_walk_children(pio, &zl); |
1870 mutex_enter(&pio->io_lock); | |
1871 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 1872 pio->io_children[cio->io_child_type][w]++; 1873 mutex_exit(&pio->io_lock); 1874 zio_reexecute(cio); | 1874 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 1875 pio->io_children[cio->io_child_type][w]++; 1876 mutex_exit(&pio->io_lock); 1877 zio_reexecute(cio); |
1878 mutex_enter(&pio->io_lock); |
|
1875 } | 1879 } |
1880 mutex_exit(&pio->io_lock); |
|
1876 1877 /* 1878 * Now that all children have been reexecuted, execute the parent. 1879 * We don't reexecute "The Godfather" I/O here as it's the 1880 * responsibility of the caller to wait on it. 1881 */ 1882 if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) { 1883 pio->io_queued_timestamp = gethrtime(); --- 1295 unchanged lines hidden (view full) --- 3179 3180 if (zio->io_vd->vdev_removing) { 3181 ASSERT(zio->io_flags & 3182 (ZIO_FLAG_PHYSICAL | ZIO_FLAG_SELF_HEAL | 3183 ZIO_FLAG_INDUCE_DAMAGE)); 3184 } 3185 } 3186 | 1881 1882 /* 1883 * Now that all children have been reexecuted, execute the parent. 1884 * We don't reexecute "The Godfather" I/O here as it's the 1885 * responsibility of the caller to wait on it. 1886 */ 1887 if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) { 1888 pio->io_queued_timestamp = gethrtime(); --- 1295 unchanged lines hidden (view full) --- 3184 3185 if (zio->io_vd->vdev_removing) { 3186 ASSERT(zio->io_flags & 3187 (ZIO_FLAG_PHYSICAL | ZIO_FLAG_SELF_HEAL | 3188 ZIO_FLAG_INDUCE_DAMAGE)); 3189 } 3190 } 3191 |
3187 /* 3188 * We keep track of time-sensitive I/Os so that the scan thread 3189 * can quickly react to certain workloads. In particular, we care 3190 * about non-scrubbing, top-level reads and writes with the following 3191 * characteristics: 3192 * - synchronous writes of user data to non-slog devices 3193 * - any reads of user data 3194 * When these conditions are met, adjust the timestamp of spa_last_io 3195 * which allows the scan thread to adjust its workload accordingly. 3196 */ 3197 if (!(zio->io_flags & ZIO_FLAG_SCAN_THREAD) && zio->io_bp != NULL && 3198 vd == vd->vdev_top && !vd->vdev_islog && 3199 zio->io_bookmark.zb_objset != DMU_META_OBJSET && 3200 zio->io_txg != spa_syncing_txg(spa)) { 3201 uint64_t old = spa->spa_last_io; 3202 uint64_t new = ddi_get_lbolt64(); 3203 if (old != new) 3204 (void) atomic_cas_64(&spa->spa_last_io, old, new); 3205 } 3206 | 3192 /* 3193 * We keep track of time-sensitive I/Os so that the scan thread 3194 * can quickly react to certain workloads. In particular, we care 3195 * about non-scrubbing, top-level reads and writes with the following 3196 * characteristics: 3197 * - synchronous writes of user data to non-slog devices 3198 * - any reads of user data 3199 * When these conditions are met, adjust the timestamp of spa_last_io 3200 * which allows the scan thread to adjust its workload accordingly. 3201 */ 3202 if (!(zio->io_flags & ZIO_FLAG_SCAN_THREAD) && zio->io_bp != NULL && 3203 vd == vd->vdev_top && !vd->vdev_islog && 3204 zio->io_bookmark.zb_objset != DMU_META_OBJSET && 3205 zio->io_txg != spa_syncing_txg(spa)) { 3206 uint64_t old = spa->spa_last_io; 3207 uint64_t new = ddi_get_lbolt64(); 3208 if (old != new) 3209 (void) atomic_cas_64(&spa->spa_last_io, old, new); 3210 } |
3207 align = 1ULL << vd->vdev_top->vdev_ashift; 3208 3209 if (!(zio->io_flags & ZIO_FLAG_PHYSICAL) && 3210 P2PHASE(zio->io_size, align) != 0) { 3211 /* Transform logical writes to be a full physical block size. */ 3212 uint64_t asize = P2ROUNDUP(zio->io_size, align); 3213 abd_t *abuf = NULL; 3214 if (zio->io_type == ZIO_TYPE_READ || --- 133 unchanged lines hidden (view full) --- 3348 3349 if (unexpected_error) 3350 VERIFY(vdev_probe(vd, zio) == NULL); 3351 3352 return (ZIO_PIPELINE_CONTINUE); 3353} 3354 3355/* | 3211 align = 1ULL << vd->vdev_top->vdev_ashift; 3212 3213 if (!(zio->io_flags & ZIO_FLAG_PHYSICAL) && 3214 P2PHASE(zio->io_size, align) != 0) { 3215 /* Transform logical writes to be a full physical block size. */ 3216 uint64_t asize = P2ROUNDUP(zio->io_size, align); 3217 abd_t *abuf = NULL; 3218 if (zio->io_type == ZIO_TYPE_READ || --- 133 unchanged lines hidden (view full) --- 3352 3353 if (unexpected_error) 3354 VERIFY(vdev_probe(vd, zio) == NULL); 3355 3356 return (ZIO_PIPELINE_CONTINUE); 3357} 3358 3359/* |
3360 * This function is used to change the priority of an existing zio that is 3361 * currently in-flight. This is used by the arc to upgrade priority in the 3362 * event that a demand read is made for a block that is currently queued 3363 * as a scrub or async read IO. Otherwise, the high priority read request 3364 * would end up having to wait for the lower priority IO. 3365 */ 3366void 3367zio_change_priority(zio_t *pio, zio_priority_t priority) 3368{ 3369 zio_t *cio, *cio_next; 3370 zio_link_t *zl = NULL; 3371 3372 ASSERT3U(priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); 3373 3374 if (pio->io_vd != NULL && pio->io_vd->vdev_ops->vdev_op_leaf) { 3375 vdev_queue_change_io_priority(pio, priority); 3376 } else { 3377 pio->io_priority = priority; 3378 } 3379 3380 mutex_enter(&pio->io_lock); 3381 for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) { 3382 cio_next = zio_walk_children(pio, &zl); 3383 zio_change_priority(cio, priority); 3384 } 3385 mutex_exit(&pio->io_lock); 3386} 3387 3388/* |
|
3356 * For non-raidz ZIOs, we can just copy aside the bad data read from the 3357 * disk, and use that to finish the checksum ereport later. 3358 */ 3359static void 3360zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, 3361 const void *good_buf) 3362{ 3363 /* no processing needed */ --- 850 unchanged lines hidden --- | 3389 * For non-raidz ZIOs, we can just copy aside the bad data read from the 3390 * disk, and use that to finish the checksum ereport later. 3391 */ 3392static void 3393zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, 3394 const void *good_buf) 3395{ 3396 /* no processing needed */ --- 850 unchanged lines hidden --- |