1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 6 unchanged lines hidden (view full) --- 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
23 * Copyright (c) 2011, 2018 by Delphix. All rights reserved. |
24 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2014 Integros [integros.com] 26 */ 27 28#include <sys/sysmacros.h> 29#include <sys/zfs_context.h> 30#include <sys/fm/fs/zfs.h> 31#include <sys/spa.h> --- 7 unchanged lines hidden (view full) --- 39#include <sys/arc.h> 40#include <sys/ddt.h> 41#include <sys/trim_map.h> 42#include <sys/blkptr.h> 43#include <sys/zfeature.h> 44#include <sys/dsl_scan.h> 45#include <sys/metaslab_impl.h> 46#include <sys/abd.h> |
47#include <sys/cityhash.h> |
48 49SYSCTL_DECL(_vfs_zfs); 50SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); 51#if defined(__amd64__) 52static int zio_use_uma = 1; 53#else 54static int zio_use_uma = 0; 55#endif --- 2275 unchanged lines hidden (view full) --- 2331 int error; 2332 2333 int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; 2334 if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { 2335 ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); 2336 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA)); 2337 2338 flags |= METASLAB_ASYNC_ALLOC; |
2339 VERIFY(refcount_held(&mc->mc_alloc_slots[pio->io_allocator], 2340 pio)); |
2341 2342 /* 2343 * The logical zio has already placed a reservation for 2344 * 'copies' allocation slots but gang blocks may require 2345 * additional copies. These additional copies 2346 * (i.e. gbh_copies - copies) are guaranteed to succeed 2347 * since metaslab_class_throttle_reserve() always allows 2348 * additional reservations for gang blocks. 2349 */ 2350 VERIFY(metaslab_class_throttle_reserve(mc, gbh_copies - copies, |
2351 pio->io_allocator, pio, flags)); |
2352 } 2353 2354 error = metaslab_alloc(spa, mc, SPA_GANGBLOCKSIZE, 2355 bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, flags, |
2356 &pio->io_alloc_list, pio, pio->io_allocator); |
2357 if (error) { 2358 if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { 2359 ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); 2360 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA)); 2361 2362 /* 2363 * If we failed to allocate the gang block header then 2364 * we remove any additional allocation reservations that 2365 * we placed here. The original reservation will 2366 * be removed when the logical I/O goes to the ready 2367 * stage. 2368 */ 2369 metaslab_class_throttle_unreserve(mc, |
2370 gbh_copies - copies, pio->io_allocator, pio); |
2371 } 2372 pio->io_error = error; 2373 return (ZIO_PIPELINE_CONTINUE); 2374 } 2375 2376 if (pio == gio) { 2377 gnpp = &gio->io_gang_tree; 2378 } else { --- 41 unchanged lines hidden (view full) --- 2420 ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA)); 2421 2422 /* 2423 * Gang children won't throttle but we should 2424 * account for their work, so reserve an allocation 2425 * slot for them here. 2426 */ 2427 VERIFY(metaslab_class_throttle_reserve(mc, |
2428 zp.zp_copies, cio->io_allocator, cio, flags)); |
2429 } 2430 zio_nowait(cio); 2431 } 2432 2433 /* 2434 * Set pio's pipeline to just wait for zio to finish. 2435 */ 2436 pio->io_pipeline = ZIO_INTERLOCK_PIPELINE; --- 473 unchanged lines hidden (view full) --- 2910 2911/* 2912 * ========================================================================== 2913 * Allocate and free blocks 2914 * ========================================================================== 2915 */ 2916 2917static zio_t * |
2918zio_io_to_allocate(spa_t *spa, int allocator) |
2919{ 2920 zio_t *zio; 2921 |
2922 ASSERT(MUTEX_HELD(&spa->spa_alloc_locks[allocator])); |
2923 |
2924 zio = avl_first(&spa->spa_alloc_trees[allocator]); |
2925 if (zio == NULL) 2926 return (NULL); 2927 2928 ASSERT(IO_IS_ALLOCATING(zio)); 2929 2930 /* 2931 * Try to place a reservation for this zio. If we're unable to 2932 * reserve then we throttle. 2933 */ |
2934 ASSERT3U(zio->io_allocator, ==, allocator); |
2935 if (!metaslab_class_throttle_reserve(spa_normal_class(spa), |
2936 zio->io_prop.zp_copies, zio->io_allocator, zio, 0)) { |
2937 return (NULL); 2938 } 2939 |
2940 avl_remove(&spa->spa_alloc_trees[allocator], zio); |
2941 ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE); 2942 2943 return (zio); 2944} 2945 2946static int 2947zio_dva_throttle(zio_t *zio) 2948{ --- 7 unchanged lines hidden (view full) --- 2956 return (ZIO_PIPELINE_CONTINUE); 2957 } 2958 2959 ASSERT(zio->io_child_type > ZIO_CHILD_GANG); 2960 2961 ASSERT3U(zio->io_queued_timestamp, >, 0); 2962 ASSERT(zio->io_stage == ZIO_STAGE_DVA_THROTTLE); 2963 |
2964 zbookmark_phys_t *bm = &zio->io_bookmark; 2965 /* 2966 * We want to try to use as many allocators as possible to help improve 2967 * performance, but we also want logically adjacent IOs to be physically 2968 * adjacent to improve sequential read performance. We chunk each object 2969 * into 2^20 block regions, and then hash based on the objset, object, 2970 * level, and region to accomplish both of these goals. 2971 */ 2972 zio->io_allocator = cityhash4(bm->zb_objset, bm->zb_object, 2973 bm->zb_level, bm->zb_blkid >> 20) % spa->spa_alloc_count; 2974 mutex_enter(&spa->spa_alloc_locks[zio->io_allocator]); |
2975 2976 ASSERT(zio->io_type == ZIO_TYPE_WRITE); |
2977 avl_add(&spa->spa_alloc_trees[zio->io_allocator], zio); |
2978 |
2979 nio = zio_io_to_allocate(zio->io_spa, zio->io_allocator); 2980 mutex_exit(&spa->spa_alloc_locks[zio->io_allocator]); |
2981 2982 if (nio == zio) 2983 return (ZIO_PIPELINE_CONTINUE); 2984 2985 if (nio != NULL) { 2986 ASSERT(nio->io_stage == ZIO_STAGE_DVA_THROTTLE); 2987 /* 2988 * We are passing control to a new zio so make sure that --- 4 unchanged lines hidden (view full) --- 2993 * been waiting. 2994 */ 2995 zio_taskq_dispatch(nio, ZIO_TASKQ_ISSUE, B_TRUE); 2996 } 2997 return (ZIO_PIPELINE_STOP); 2998} 2999 3000void |
3001zio_allocate_dispatch(spa_t *spa, int allocator) |
3002{ 3003 zio_t *zio; 3004 |
3005 mutex_enter(&spa->spa_alloc_locks[allocator]); 3006 zio = zio_io_to_allocate(spa, allocator); 3007 mutex_exit(&spa->spa_alloc_locks[allocator]); |
3008 if (zio == NULL) 3009 return; 3010 3011 ASSERT3U(zio->io_stage, ==, ZIO_STAGE_DVA_THROTTLE); 3012 ASSERT0(zio->io_error); 3013 zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_TRUE); 3014} 3015 --- 24 unchanged lines hidden (view full) --- 3040 flags |= METASLAB_GANG_CHILD; 3041 } 3042 if (zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE) { 3043 flags |= METASLAB_ASYNC_ALLOC; 3044 } 3045 3046 error = metaslab_alloc(spa, mc, zio->io_size, bp, 3047 zio->io_prop.zp_copies, zio->io_txg, NULL, flags, |
3048 &zio->io_alloc_list, zio, zio->io_allocator); |
3049 3050 if (error != 0) { 3051 spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, " 3052 "size %llu, error %d", spa_name(spa), zio, zio->io_size, 3053 error); 3054 if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) 3055 return (zio_write_gang_block(zio)); 3056 zio->io_error = error; --- 43 unchanged lines hidden (view full) --- 3100 } 3101 } 3102} 3103 3104/* 3105 * Try to allocate an intent log block. Return 0 on success, errno on failure. 3106 */ 3107int |
3108zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp, 3109 blkptr_t *old_bp, uint64_t size, boolean_t *slog) |
3110{ 3111 int error = 1; 3112 zio_alloc_list_t io_alloc_list; 3113 3114 ASSERT(txg > spa_syncing_txg(spa)); 3115 3116 metaslab_trace_init(&io_alloc_list); |
3117 /* 3118 * When allocating a zil block, we don't have information about 3119 * the final destination of the block except the objset it's part 3120 * of, so we just hash the objset ID to pick the allocator to get 3121 * some parallelism. 3122 */ |
3123 error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, |
3124 txg, old_bp, METASLAB_HINTBP_AVOID, &io_alloc_list, NULL, 3125 cityhash4(0, 0, 0, objset) % spa->spa_alloc_count); |
3126 if (error == 0) { 3127 *slog = TRUE; 3128 } else { 3129 error = metaslab_alloc(spa, spa_normal_class(spa), size, 3130 new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, |
3131 &io_alloc_list, NULL, cityhash4(0, 0, 0, objset) % 3132 spa->spa_alloc_count); |
3133 if (error == 0) 3134 *slog = FALSE; 3135 } 3136 metaslab_trace_fini(&io_alloc_list); 3137 3138 if (error == 0) { 3139 BP_SET_LSIZE(new_bp, size); 3140 BP_SET_PSIZE(new_bp, size); --- 553 unchanged lines hidden (view full) --- 3694 ASSERT(IO_IS_ALLOCATING(zio)); 3695 ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); 3696 /* 3697 * We were unable to allocate anything, unreserve and 3698 * issue the next I/O to allocate. 3699 */ 3700 metaslab_class_throttle_unreserve( 3701 spa_normal_class(zio->io_spa), |
3702 zio->io_prop.zp_copies, zio->io_allocator, zio); 3703 zio_allocate_dispatch(zio->io_spa, zio->io_allocator); |
3704 } 3705 } 3706 3707 mutex_enter(&zio->io_lock); 3708 zio->io_state[ZIO_WAIT_READY] = 1; 3709 pio = zio_walk_parents(zio, &zl); 3710 mutex_exit(&zio->io_lock); 3711 --- 66 unchanged lines hidden (view full) --- 3778 3779 ASSERT(IO_IS_ALLOCATING(pio)); 3780 ASSERT3P(zio, !=, zio->io_logical); 3781 ASSERT(zio->io_logical != NULL); 3782 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR)); 3783 ASSERT0(zio->io_flags & ZIO_FLAG_NOPWRITE); 3784 3785 mutex_enter(&pio->io_lock); |
3786 metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id, pio, flags, 3787 pio->io_allocator, B_TRUE); |
3788 mutex_exit(&pio->io_lock); 3789 3790 metaslab_class_throttle_unreserve(spa_normal_class(zio->io_spa), |
3791 1, pio->io_allocator, pio); |
3792 3793 /* 3794 * Call into the pipeline to see if there is more work that 3795 * needs to be done. If there is work to be done it will be 3796 * dispatched to another taskq thread. 3797 */ |
3798 zio_allocate_dispatch(zio->io_spa, pio->io_allocator); |
3799} 3800 3801static int 3802zio_done(zio_t *zio) 3803{ 3804 spa_t *spa = zio->io_spa; 3805 zio_t *lio = zio->io_logical; 3806 blkptr_t *bp = zio->io_bp; --- 26 unchanged lines hidden (view full) --- 3833 /* 3834 * If the allocation throttle is enabled, verify that 3835 * we have decremented the refcounts for every I/O that was throttled. 3836 */ 3837 if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) { 3838 ASSERT(zio->io_type == ZIO_TYPE_WRITE); 3839 ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); 3840 ASSERT(bp != NULL); |
3841 metaslab_group_alloc_verify(spa, zio->io_bp, zio, 3842 zio->io_allocator); 3843 VERIFY(refcount_not_held(&mc->mc_alloc_slots[zio->io_allocator], 3844 zio)); |
3845 } 3846 3847 for (int c = 0; c < ZIO_CHILD_TYPES; c++) 3848 for (int w = 0; w < ZIO_WAIT_TYPES; w++) 3849 ASSERT(zio->io_children[c][w] == 0); 3850 3851 if (bp != NULL && !BP_IS_EMBEDDED(bp)) { 3852 ASSERT(bp->blk_pad[0] == 0); --- 418 unchanged lines hidden --- |