Deleted Added
sdiff udiff text old ( 339034 ) new ( 339105 )
full compact
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 6 unchanged lines hidden (view full) ---

15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
24 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 */
27
28#include <sys/sysmacros.h>
29#include <sys/zfs_context.h>
30#include <sys/fm/fs/zfs.h>
31#include <sys/spa.h>

--- 7 unchanged lines hidden (view full) ---

39#include <sys/arc.h>
40#include <sys/ddt.h>
41#include <sys/trim_map.h>
42#include <sys/blkptr.h>
43#include <sys/zfeature.h>
44#include <sys/dsl_scan.h>
45#include <sys/metaslab_impl.h>
46#include <sys/abd.h>
47#include <sys/cityhash.h>
48
49SYSCTL_DECL(_vfs_zfs);
50SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
51#if defined(__amd64__)
52static int zio_use_uma = 1;
53#else
54static int zio_use_uma = 0;
55#endif

--- 2275 unchanged lines hidden (view full) ---

2331 int error;
2332
2333 int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
2334 if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
2335 ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
2336 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
2337
2338 flags |= METASLAB_ASYNC_ALLOC;
2339 VERIFY(refcount_held(&mc->mc_alloc_slots[pio->io_allocator],
2340 pio));
2341
2342 /*
2343 * The logical zio has already placed a reservation for
2344 * 'copies' allocation slots but gang blocks may require
2345 * additional copies. These additional copies
2346 * (i.e. gbh_copies - copies) are guaranteed to succeed
2347 * since metaslab_class_throttle_reserve() always allows
2348 * additional reservations for gang blocks.
2349 */
2350 VERIFY(metaslab_class_throttle_reserve(mc, gbh_copies - copies,
2351 pio->io_allocator, pio, flags));
2352 }
2353
2354 error = metaslab_alloc(spa, mc, SPA_GANGBLOCKSIZE,
2355 bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, flags,
2356 &pio->io_alloc_list, pio, pio->io_allocator);
2357 if (error) {
2358 if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
2359 ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
2360 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
2361
2362 /*
2363 * If we failed to allocate the gang block header then
2364 * we remove any additional allocation reservations that
2365 * we placed here. The original reservation will
2366 * be removed when the logical I/O goes to the ready
2367 * stage.
2368 */
2369 metaslab_class_throttle_unreserve(mc,
2370 gbh_copies - copies, pio->io_allocator, pio);
2371 }
2372 pio->io_error = error;
2373 return (ZIO_PIPELINE_CONTINUE);
2374 }
2375
2376 if (pio == gio) {
2377 gnpp = &gio->io_gang_tree;
2378 } else {

--- 41 unchanged lines hidden (view full) ---

2420 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
2421
2422 /*
2423 * Gang children won't throttle but we should
2424 * account for their work, so reserve an allocation
2425 * slot for them here.
2426 */
2427 VERIFY(metaslab_class_throttle_reserve(mc,
2428 zp.zp_copies, cio->io_allocator, cio, flags));
2429 }
2430 zio_nowait(cio);
2431 }
2432
2433 /*
2434 * Set pio's pipeline to just wait for zio to finish.
2435 */
2436 pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;

--- 473 unchanged lines hidden (view full) ---

2910
2911/*
2912 * ==========================================================================
2913 * Allocate and free blocks
2914 * ==========================================================================
2915 */
2916
2917static zio_t *
2918zio_io_to_allocate(spa_t *spa, int allocator)
2919{
2920 zio_t *zio;
2921
2922 ASSERT(MUTEX_HELD(&spa->spa_alloc_locks[allocator]));
2923
2924 zio = avl_first(&spa->spa_alloc_trees[allocator]);
2925 if (zio == NULL)
2926 return (NULL);
2927
2928 ASSERT(IO_IS_ALLOCATING(zio));
2929
2930 /*
2931 * Try to place a reservation for this zio. If we're unable to
2932 * reserve then we throttle.
2933 */
2934 ASSERT3U(zio->io_allocator, ==, allocator);
2935 if (!metaslab_class_throttle_reserve(spa_normal_class(spa),
2936 zio->io_prop.zp_copies, zio->io_allocator, zio, 0)) {
2937 return (NULL);
2938 }
2939
2940 avl_remove(&spa->spa_alloc_trees[allocator], zio);
2941 ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE);
2942
2943 return (zio);
2944}
2945
2946static int
2947zio_dva_throttle(zio_t *zio)
2948{

--- 7 unchanged lines hidden (view full) ---

2956 return (ZIO_PIPELINE_CONTINUE);
2957 }
2958
2959 ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
2960
2961 ASSERT3U(zio->io_queued_timestamp, >, 0);
2962 ASSERT(zio->io_stage == ZIO_STAGE_DVA_THROTTLE);
2963
2964 zbookmark_phys_t *bm = &zio->io_bookmark;
2965 /*
2966 * We want to try to use as many allocators as possible to help improve
2967 * performance, but we also want logically adjacent IOs to be physically
2968 * adjacent to improve sequential read performance. We chunk each object
2969 * into 2^20 block regions, and then hash based on the objset, object,
2970 * level, and region to accomplish both of these goals.
2971 */
2972 zio->io_allocator = cityhash4(bm->zb_objset, bm->zb_object,
2973 bm->zb_level, bm->zb_blkid >> 20) % spa->spa_alloc_count;
2974 mutex_enter(&spa->spa_alloc_locks[zio->io_allocator]);
2975
2976 ASSERT(zio->io_type == ZIO_TYPE_WRITE);
2977 avl_add(&spa->spa_alloc_trees[zio->io_allocator], zio);
2978
2979 nio = zio_io_to_allocate(zio->io_spa, zio->io_allocator);
2980 mutex_exit(&spa->spa_alloc_locks[zio->io_allocator]);
2981
2982 if (nio == zio)
2983 return (ZIO_PIPELINE_CONTINUE);
2984
2985 if (nio != NULL) {
2986 ASSERT(nio->io_stage == ZIO_STAGE_DVA_THROTTLE);
2987 /*
2988 * We are passing control to a new zio so make sure that

--- 4 unchanged lines hidden (view full) ---

2993 * been waiting.
2994 */
2995 zio_taskq_dispatch(nio, ZIO_TASKQ_ISSUE, B_TRUE);
2996 }
2997 return (ZIO_PIPELINE_STOP);
2998}
2999
3000void
3001zio_allocate_dispatch(spa_t *spa, int allocator)
3002{
3003 zio_t *zio;
3004
3005 mutex_enter(&spa->spa_alloc_locks[allocator]);
3006 zio = zio_io_to_allocate(spa, allocator);
3007 mutex_exit(&spa->spa_alloc_locks[allocator]);
3008 if (zio == NULL)
3009 return;
3010
3011 ASSERT3U(zio->io_stage, ==, ZIO_STAGE_DVA_THROTTLE);
3012 ASSERT0(zio->io_error);
3013 zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_TRUE);
3014}
3015

--- 24 unchanged lines hidden (view full) ---

3040 flags |= METASLAB_GANG_CHILD;
3041 }
3042 if (zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE) {
3043 flags |= METASLAB_ASYNC_ALLOC;
3044 }
3045
3046 error = metaslab_alloc(spa, mc, zio->io_size, bp,
3047 zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
3048 &zio->io_alloc_list, zio, zio->io_allocator);
3049
3050 if (error != 0) {
3051 spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
3052 "size %llu, error %d", spa_name(spa), zio, zio->io_size,
3053 error);
3054 if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
3055 return (zio_write_gang_block(zio));
3056 zio->io_error = error;

--- 43 unchanged lines hidden (view full) ---

3100 }
3101 }
3102}
3103
3104/*
3105 * Try to allocate an intent log block. Return 0 on success, errno on failure.
3106 */
3107int
3108zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp,
3109 blkptr_t *old_bp, uint64_t size, boolean_t *slog)
3110{
3111 int error = 1;
3112 zio_alloc_list_t io_alloc_list;
3113
3114 ASSERT(txg > spa_syncing_txg(spa));
3115
3116 metaslab_trace_init(&io_alloc_list);
3117 /*
3118 * When allocating a zil block, we don't have information about
3119 * the final destination of the block except the objset it's part
3120 * of, so we just hash the objset ID to pick the allocator to get
3121 * some parallelism.
3122 */
3123 error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
3124 txg, old_bp, METASLAB_HINTBP_AVOID, &io_alloc_list, NULL,
3125 cityhash4(0, 0, 0, objset) % spa->spa_alloc_count);
3126 if (error == 0) {
3127 *slog = TRUE;
3128 } else {
3129 error = metaslab_alloc(spa, spa_normal_class(spa), size,
3130 new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID,
3131 &io_alloc_list, NULL, cityhash4(0, 0, 0, objset) %
3132 spa->spa_alloc_count);
3133 if (error == 0)
3134 *slog = FALSE;
3135 }
3136 metaslab_trace_fini(&io_alloc_list);
3137
3138 if (error == 0) {
3139 BP_SET_LSIZE(new_bp, size);
3140 BP_SET_PSIZE(new_bp, size);

--- 553 unchanged lines hidden (view full) ---

3694 ASSERT(IO_IS_ALLOCATING(zio));
3695 ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
3696 /*
3697 * We were unable to allocate anything, unreserve and
3698 * issue the next I/O to allocate.
3699 */
3700 metaslab_class_throttle_unreserve(
3701 spa_normal_class(zio->io_spa),
3702 zio->io_prop.zp_copies, zio->io_allocator, zio);
3703 zio_allocate_dispatch(zio->io_spa, zio->io_allocator);
3704 }
3705 }
3706
3707 mutex_enter(&zio->io_lock);
3708 zio->io_state[ZIO_WAIT_READY] = 1;
3709 pio = zio_walk_parents(zio, &zl);
3710 mutex_exit(&zio->io_lock);
3711

--- 66 unchanged lines hidden (view full) ---

3778
3779 ASSERT(IO_IS_ALLOCATING(pio));
3780 ASSERT3P(zio, !=, zio->io_logical);
3781 ASSERT(zio->io_logical != NULL);
3782 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
3783 ASSERT0(zio->io_flags & ZIO_FLAG_NOPWRITE);
3784
3785 mutex_enter(&pio->io_lock);
3786 metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id, pio, flags,
3787 pio->io_allocator, B_TRUE);
3788 mutex_exit(&pio->io_lock);
3789
3790 metaslab_class_throttle_unreserve(spa_normal_class(zio->io_spa),
3791 1, pio->io_allocator, pio);
3792
3793 /*
3794 * Call into the pipeline to see if there is more work that
3795 * needs to be done. If there is work to be done it will be
3796 * dispatched to another taskq thread.
3797 */
3798 zio_allocate_dispatch(zio->io_spa, pio->io_allocator);
3799}
3800
3801static int
3802zio_done(zio_t *zio)
3803{
3804 spa_t *spa = zio->io_spa;
3805 zio_t *lio = zio->io_logical;
3806 blkptr_t *bp = zio->io_bp;

--- 26 unchanged lines hidden (view full) ---

3833 /*
3834 * If the allocation throttle is enabled, verify that
3835 * we have decremented the refcounts for every I/O that was throttled.
3836 */
3837 if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
3838 ASSERT(zio->io_type == ZIO_TYPE_WRITE);
3839 ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
3840 ASSERT(bp != NULL);
3841 metaslab_group_alloc_verify(spa, zio->io_bp, zio,
3842 zio->io_allocator);
3843 VERIFY(refcount_not_held(&mc->mc_alloc_slots[zio->io_allocator],
3844 zio));
3845 }
3846
3847 for (int c = 0; c < ZIO_CHILD_TYPES; c++)
3848 for (int w = 0; w < ZIO_WAIT_TYPES; w++)
3849 ASSERT(zio->io_children[c][w] == 0);
3850
3851 if (bp != NULL && !BP_IS_EMBEDDED(bp)) {
3852 ASSERT(bp->blk_pad[0] == 0);

--- 418 unchanged lines hidden ---