Deleted Added
full compact
zio.c (332547) zio.c (339034)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 27 unchanged lines hidden (view full) ---

36#include <sys/zio_compress.h>
37#include <sys/zio_checksum.h>
38#include <sys/dmu_objset.h>
39#include <sys/arc.h>
40#include <sys/ddt.h>
41#include <sys/trim_map.h>
42#include <sys/blkptr.h>
43#include <sys/zfeature.h>
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 27 unchanged lines hidden (view full) ---

36#include <sys/zio_compress.h>
37#include <sys/zio_checksum.h>
38#include <sys/dmu_objset.h>
39#include <sys/arc.h>
40#include <sys/ddt.h>
41#include <sys/trim_map.h>
42#include <sys/blkptr.h>
43#include <sys/zfeature.h>
44#include <sys/dsl_scan.h>
44#include <sys/metaslab_impl.h>
45#include <sys/abd.h>
46
47SYSCTL_DECL(_vfs_zfs);
48SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
49#if defined(__amd64__)
50static int zio_use_uma = 1;
51#else

--- 381 unchanged lines hidden (view full) ---

433 return ((*zl)->zl_parent);
434}
435
436zio_t *
437zio_walk_children(zio_t *pio, zio_link_t **zl)
438{
439 list_t *cl = &pio->io_child_list;
440
45#include <sys/metaslab_impl.h>
46#include <sys/abd.h>
47
48SYSCTL_DECL(_vfs_zfs);
49SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
50#if defined(__amd64__)
51static int zio_use_uma = 1;
52#else

--- 381 unchanged lines hidden (view full) ---

434 return ((*zl)->zl_parent);
435}
436
437zio_t *
438zio_walk_children(zio_t *pio, zio_link_t **zl)
439{
440 list_t *cl = &pio->io_child_list;
441
442 ASSERT(MUTEX_HELD(&pio->io_lock));
443
441 *zl = (*zl == NULL) ? list_head(cl) : list_next(cl, *zl);
442 if (*zl == NULL)
443 return (NULL);
444
445 ASSERT((*zl)->zl_parent == pio);
446 return ((*zl)->zl_child);
447}
448

--- 18 unchanged lines hidden (view full) ---

467 * Vdev I/Os can only have vdev children.
468 * The following ASSERT captures all of these constraints.
469 */
470 ASSERT3S(cio->io_child_type, <=, pio->io_child_type);
471
472 zl->zl_parent = pio;
473 zl->zl_child = cio;
474
444 *zl = (*zl == NULL) ? list_head(cl) : list_next(cl, *zl);
445 if (*zl == NULL)
446 return (NULL);
447
448 ASSERT((*zl)->zl_parent == pio);
449 return ((*zl)->zl_child);
450}
451

--- 18 unchanged lines hidden (view full) ---

470 * Vdev I/Os can only have vdev children.
471 * The following ASSERT captures all of these constraints.
472 */
473 ASSERT3S(cio->io_child_type, <=, pio->io_child_type);
474
475 zl->zl_parent = pio;
476 zl->zl_child = cio;
477
475 mutex_enter(&cio->io_lock);
476 mutex_enter(&pio->io_lock);
478 mutex_enter(&pio->io_lock);
479 mutex_enter(&cio->io_lock);
477
478 ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
479
480 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
481 pio->io_children[cio->io_child_type][w] += !cio->io_state[w];
482
483 list_insert_head(&pio->io_child_list, zl);
484 list_insert_head(&cio->io_parent_list, zl);
485
486 pio->io_child_count++;
487 cio->io_parent_count++;
488
480
481 ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
482
483 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
484 pio->io_children[cio->io_child_type][w] += !cio->io_state[w];
485
486 list_insert_head(&pio->io_child_list, zl);
487 list_insert_head(&cio->io_parent_list, zl);
488
489 pio->io_child_count++;
490 cio->io_parent_count++;
491
489 mutex_exit(&pio->io_lock);
490 mutex_exit(&cio->io_lock);
492 mutex_exit(&cio->io_lock);
493 mutex_exit(&pio->io_lock);
491}
492
493static void
494zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl)
495{
496 ASSERT(zl->zl_parent == pio);
497 ASSERT(zl->zl_child == cio);
498
494}
495
496static void
497zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl)
498{
499 ASSERT(zl->zl_parent == pio);
500 ASSERT(zl->zl_child == cio);
501
499 mutex_enter(&cio->io_lock);
500 mutex_enter(&pio->io_lock);
502 mutex_enter(&pio->io_lock);
503 mutex_enter(&cio->io_lock);
501
502 list_remove(&pio->io_child_list, zl);
503 list_remove(&cio->io_parent_list, zl);
504
505 pio->io_child_count--;
506 cio->io_parent_count--;
507
504
505 list_remove(&pio->io_child_list, zl);
506 list_remove(&cio->io_parent_list, zl);
507
508 pio->io_child_count--;
509 cio->io_parent_count--;
510
508 mutex_exit(&pio->io_lock);
509 mutex_exit(&cio->io_lock);
511 mutex_exit(&cio->io_lock);
510
512 mutex_exit(&pio->io_lock);
511 kmem_cache_free(zio_link_cache, zl);
512}
513
514static boolean_t
515zio_wait_for_children(zio_t *zio, uint8_t childbits, enum zio_wait_type wait)
516{
517 boolean_t waiting = B_FALSE;
518

--- 464 unchanged lines hidden (view full) ---

983 ASSERT(spa_syncing_txg(spa) == txg);
984 ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
985
986 if (BP_IS_EMBEDDED(bp))
987 return (zio_null(pio, spa, NULL, NULL, NULL, 0));
988
989 metaslab_check_free(spa, bp);
990 arc_freed(spa, bp);
513 kmem_cache_free(zio_link_cache, zl);
514}
515
516static boolean_t
517zio_wait_for_children(zio_t *zio, uint8_t childbits, enum zio_wait_type wait)
518{
519 boolean_t waiting = B_FALSE;
520

--- 464 unchanged lines hidden (view full) ---

985 ASSERT(spa_syncing_txg(spa) == txg);
986 ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
987
988 if (BP_IS_EMBEDDED(bp))
989 return (zio_null(pio, spa, NULL, NULL, NULL, 0));
990
991 metaslab_check_free(spa, bp);
992 arc_freed(spa, bp);
993 dsl_scan_freed(spa, bp);
991
992 if (zfs_trim_enabled)
993 stage |= ZIO_STAGE_ISSUE_ASYNC | ZIO_STAGE_VDEV_IO_START |
994 ZIO_STAGE_VDEV_IO_ASSESS;
995 /*
996 * GANG and DEDUP blocks can induce a read (for the gang block header,
997 * or the DDT), so issue them asynchronously so that this thread is
998 * not tied up.

--- 861 unchanged lines hidden (view full) ---

1860 /*
1861 * As we reexecute pio's children, new children could be created.
1862 * New children go to the head of pio's io_child_list, however,
1863 * so we will (correctly) not reexecute them. The key is that
1864 * the remainder of pio's io_child_list, from 'cio_next' onward,
1865 * cannot be affected by any side effects of reexecuting 'cio'.
1866 */
1867 zio_link_t *zl = NULL;
994
995 if (zfs_trim_enabled)
996 stage |= ZIO_STAGE_ISSUE_ASYNC | ZIO_STAGE_VDEV_IO_START |
997 ZIO_STAGE_VDEV_IO_ASSESS;
998 /*
999 * GANG and DEDUP blocks can induce a read (for the gang block header,
1000 * or the DDT), so issue them asynchronously so that this thread is
1001 * not tied up.

--- 861 unchanged lines hidden (view full) ---

1863 /*
1864 * As we reexecute pio's children, new children could be created.
1865 * New children go to the head of pio's io_child_list, however,
1866 * so we will (correctly) not reexecute them. The key is that
1867 * the remainder of pio's io_child_list, from 'cio_next' onward,
1868 * cannot be affected by any side effects of reexecuting 'cio'.
1869 */
1870 zio_link_t *zl = NULL;
1871 mutex_enter(&pio->io_lock);
1868 for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) {
1869 cio_next = zio_walk_children(pio, &zl);
1872 for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) {
1873 cio_next = zio_walk_children(pio, &zl);
1870 mutex_enter(&pio->io_lock);
1871 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
1872 pio->io_children[cio->io_child_type][w]++;
1873 mutex_exit(&pio->io_lock);
1874 zio_reexecute(cio);
1874 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
1875 pio->io_children[cio->io_child_type][w]++;
1876 mutex_exit(&pio->io_lock);
1877 zio_reexecute(cio);
1878 mutex_enter(&pio->io_lock);
1875 }
1879 }
1880 mutex_exit(&pio->io_lock);
1876
1877 /*
1878 * Now that all children have been reexecuted, execute the parent.
1879 * We don't reexecute "The Godfather" I/O here as it's the
1880 * responsibility of the caller to wait on it.
1881 */
1882 if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) {
1883 pio->io_queued_timestamp = gethrtime();

--- 1295 unchanged lines hidden (view full) ---

3179
3180 if (zio->io_vd->vdev_removing) {
3181 ASSERT(zio->io_flags &
3182 (ZIO_FLAG_PHYSICAL | ZIO_FLAG_SELF_HEAL |
3183 ZIO_FLAG_INDUCE_DAMAGE));
3184 }
3185 }
3186
1881
1882 /*
1883 * Now that all children have been reexecuted, execute the parent.
1884 * We don't reexecute "The Godfather" I/O here as it's the
1885 * responsibility of the caller to wait on it.
1886 */
1887 if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) {
1888 pio->io_queued_timestamp = gethrtime();

--- 1295 unchanged lines hidden (view full) ---

3184
3185 if (zio->io_vd->vdev_removing) {
3186 ASSERT(zio->io_flags &
3187 (ZIO_FLAG_PHYSICAL | ZIO_FLAG_SELF_HEAL |
3188 ZIO_FLAG_INDUCE_DAMAGE));
3189 }
3190 }
3191
3187 /*
3188 * We keep track of time-sensitive I/Os so that the scan thread
3189 * can quickly react to certain workloads. In particular, we care
3190 * about non-scrubbing, top-level reads and writes with the following
3191 * characteristics:
3192 * - synchronous writes of user data to non-slog devices
3193 * - any reads of user data
3194 * When these conditions are met, adjust the timestamp of spa_last_io
3195 * which allows the scan thread to adjust its workload accordingly.
3196 */
3197 if (!(zio->io_flags & ZIO_FLAG_SCAN_THREAD) && zio->io_bp != NULL &&
3198 vd == vd->vdev_top && !vd->vdev_islog &&
3199 zio->io_bookmark.zb_objset != DMU_META_OBJSET &&
3200 zio->io_txg != spa_syncing_txg(spa)) {
3201 uint64_t old = spa->spa_last_io;
3202 uint64_t new = ddi_get_lbolt64();
3203 if (old != new)
3204 (void) atomic_cas_64(&spa->spa_last_io, old, new);
3205 }
3206
3192 /*
3193 * We keep track of time-sensitive I/Os so that the scan thread
3194 * can quickly react to certain workloads. In particular, we care
3195 * about non-scrubbing, top-level reads and writes with the following
3196 * characteristics:
3197 * - synchronous writes of user data to non-slog devices
3198 * - any reads of user data
3199 * When these conditions are met, adjust the timestamp of spa_last_io
3200 * which allows the scan thread to adjust its workload accordingly.
3201 */
3202 if (!(zio->io_flags & ZIO_FLAG_SCAN_THREAD) && zio->io_bp != NULL &&
3203 vd == vd->vdev_top && !vd->vdev_islog &&
3204 zio->io_bookmark.zb_objset != DMU_META_OBJSET &&
3205 zio->io_txg != spa_syncing_txg(spa)) {
3206 uint64_t old = spa->spa_last_io;
3207 uint64_t new = ddi_get_lbolt64();
3208 if (old != new)
3209 (void) atomic_cas_64(&spa->spa_last_io, old, new);
3210 }
3207 align = 1ULL << vd->vdev_top->vdev_ashift;
3208
3209 if (!(zio->io_flags & ZIO_FLAG_PHYSICAL) &&
3210 P2PHASE(zio->io_size, align) != 0) {
3211 /* Transform logical writes to be a full physical block size. */
3212 uint64_t asize = P2ROUNDUP(zio->io_size, align);
3213 abd_t *abuf = NULL;
3214 if (zio->io_type == ZIO_TYPE_READ ||

--- 133 unchanged lines hidden (view full) ---

3348
3349 if (unexpected_error)
3350 VERIFY(vdev_probe(vd, zio) == NULL);
3351
3352 return (ZIO_PIPELINE_CONTINUE);
3353}
3354
3355/*
3211 align = 1ULL << vd->vdev_top->vdev_ashift;
3212
3213 if (!(zio->io_flags & ZIO_FLAG_PHYSICAL) &&
3214 P2PHASE(zio->io_size, align) != 0) {
3215 /* Transform logical writes to be a full physical block size. */
3216 uint64_t asize = P2ROUNDUP(zio->io_size, align);
3217 abd_t *abuf = NULL;
3218 if (zio->io_type == ZIO_TYPE_READ ||

--- 133 unchanged lines hidden (view full) ---

3352
3353 if (unexpected_error)
3354 VERIFY(vdev_probe(vd, zio) == NULL);
3355
3356 return (ZIO_PIPELINE_CONTINUE);
3357}
3358
3359/*
3360 * This function is used to change the priority of an existing zio that is
3361 * currently in-flight. This is used by the arc to upgrade priority in the
3362 * event that a demand read is made for a block that is currently queued
3363 * as a scrub or async read IO. Otherwise, the high priority read request
3364 * would end up having to wait for the lower priority IO.
3365 */
3366void
3367zio_change_priority(zio_t *pio, zio_priority_t priority)
3368{
3369 zio_t *cio, *cio_next;
3370 zio_link_t *zl = NULL;
3371
3372 ASSERT3U(priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
3373
3374 if (pio->io_vd != NULL && pio->io_vd->vdev_ops->vdev_op_leaf) {
3375 vdev_queue_change_io_priority(pio, priority);
3376 } else {
3377 pio->io_priority = priority;
3378 }
3379
3380 mutex_enter(&pio->io_lock);
3381 for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) {
3382 cio_next = zio_walk_children(pio, &zl);
3383 zio_change_priority(cio, priority);
3384 }
3385 mutex_exit(&pio->io_lock);
3386}
3387
3388/*
3356 * For non-raidz ZIOs, we can just copy aside the bad data read from the
3357 * disk, and use that to finish the checksum ereport later.
3358 */
3359static void
3360zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,
3361 const void *good_buf)
3362{
3363 /* no processing needed */

--- 850 unchanged lines hidden ---
3389 * For non-raidz ZIOs, we can just copy aside the bad data read from the
3390 * disk, and use that to finish the checksum ereport later.
3391 */
3392static void
3393zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,
3394 const void *good_buf)
3395{
3396 /* no processing needed */

--- 850 unchanged lines hidden ---