1/* 2 * CDDL HEADER START 3 * 4 * This file and its contents are supplied under the terms of the 5 * Common Development and Distribution License ("CDDL"), version 1.0. 6 * You may only use this file in accordance with the terms of version 7 * 1.0 of the CDDL. 8 * 9 * A full copy of the text of the CDDL should have accompanied this 10 * source. A copy of the CDDL is also available via the Internet at 11 * http://www.illumos.org/license/CDDL. 12 * 13 * CDDL HEADER END 14 */ 15/* 16 * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 17 */ 18 19#include <sys/zfs_context.h> 20#include <sys/multilist.h> 21 22/* needed for spa_get_random() */ 23#include <sys/spa.h> 24 25/* 26 * This overrides the number of sublists in each multilist_t, which defaults 27 * to the number of CPUs in the system (see multilist_create()). 28 */ 29int zfs_multilist_num_sublists = 0; 30 31/* 32 * Given the object contained on the list, return a pointer to the 33 * object's multilist_node_t structure it contains. 34 */ 35static multilist_node_t * 36multilist_d2l(multilist_t *ml, void *obj) 37{ 38 return ((multilist_node_t *)((char *)obj + ml->ml_offset)); 39} 40 41/* 42 * Initialize a new mutlilist using the parameters specified. 43 * 44 * - 'size' denotes the size of the structure containing the 45 * multilist_node_t. 46 * - 'offset' denotes the byte offset of the mutlilist_node_t within 47 * the structure that contains it. 48 * - 'num' specifies the number of internal sublists to create. 49 * - 'index_func' is used to determine which sublist to insert into 50 * when the multilist_insert() function is called; as well as which 51 * sublist to remove from when multilist_remove() is called. The 52 * requirements this function must meet, are the following: 53 * 54 * - It must always return the same value when called on the same 55 * object (to ensure the object is removed from the list it was 56 * inserted into). 57 * 58 * - It must return a value in the range [0, number of sublists). 59 * The multilist_get_num_sublists() function may be used to 60 * determine the number of sublists in the multilist. 61 * 62 * Also, in order to reduce internal contention between the sublists 63 * during insertion and removal, this function should choose evenly 64 * between all available sublists when inserting. This isn't a hard 65 * requirement, but a general rule of thumb in order to garner the 66 * best multi-threaded performance out of the data structure. 67 */ 68static multilist_t * 69multilist_create_impl(size_t size, size_t offset, 70 unsigned int num, multilist_sublist_index_func_t *index_func) 71{ 72 ASSERT3U(size, >, 0); 73 ASSERT3U(size, >=, offset + sizeof (multilist_node_t)); 74 ASSERT3U(num, >, 0); 75 ASSERT3P(index_func, !=, NULL); 76 77 multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP); 78 ml->ml_offset = offset; 79 ml->ml_num_sublists = num; 80 ml->ml_index_func = index_func; 81 82 ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) * 83 ml->ml_num_sublists, KM_SLEEP); 84 85 ASSERT3P(ml->ml_sublists, !=, NULL); 86 87 for (int i = 0; i < ml->ml_num_sublists; i++) { 88 multilist_sublist_t *mls = &ml->ml_sublists[i]; 89 mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL); 90 list_create(&mls->mls_list, size, offset); 91 } 92 return (ml); 93} 94 95/* 96 * Allocate a new multilist, using the default number of sublists 97 * (the number of CPUs, or at least 4, or the tunable 98 * zfs_multilist_num_sublists). 99 */ 100multilist_t * 101multilist_create(size_t size, size_t offset, 102 multilist_sublist_index_func_t *index_func) 103{ 104 int num_sublists; 105 106 if (zfs_multilist_num_sublists > 0) { 107 num_sublists = zfs_multilist_num_sublists; 108 } else { 109 num_sublists = MAX(max_ncpus, 4); 110 } 111 112 return (multilist_create_impl(size, offset, num_sublists, index_func)); 113} 114 115/* 116 * Destroy the given multilist object, and free up any memory it holds. 117 */ 118void 119multilist_destroy(multilist_t *ml) 120{ 121 ASSERT(multilist_is_empty(ml)); 122 123 for (int i = 0; i < ml->ml_num_sublists; i++) { 124 multilist_sublist_t *mls = &ml->ml_sublists[i]; 125 126 ASSERT(list_is_empty(&mls->mls_list)); 127 128 list_destroy(&mls->mls_list); 129 mutex_destroy(&mls->mls_lock); 130 } 131 132 ASSERT3P(ml->ml_sublists, !=, NULL); 133 kmem_free(ml->ml_sublists, 134 sizeof (multilist_sublist_t) * ml->ml_num_sublists); 135 136 ml->ml_num_sublists = 0; 137 ml->ml_offset = 0; 138 kmem_free(ml, sizeof (multilist_t)); 139} 140 141/* 142 * Insert the given object into the multilist. 143 * 144 * This function will insert the object specified into the sublist 145 * determined using the function given at multilist creation time. 146 * 147 * The sublist locks are automatically acquired if not already held, to 148 * ensure consistency when inserting and removing from multiple threads. 149 */ 150void 151multilist_insert(multilist_t *ml, void *obj) 152{ 153 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 154 multilist_sublist_t *mls; 155 boolean_t need_lock; 156 157 DTRACE_PROBE3(multilist__insert, multilist_t *, ml, 158 unsigned int, sublist_idx, void *, obj); 159 160 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 161 162 mls = &ml->ml_sublists[sublist_idx]; 163 164 /* 165 * Note: Callers may already hold the sublist lock by calling 166 * multilist_sublist_lock(). Here we rely on MUTEX_HELD() 167 * returning TRUE if and only if the current thread holds the 168 * lock. While it's a little ugly to make the lock recursive in 169 * this way, it works and allows the calling code to be much 170 * simpler -- otherwise it would have to pass around a flag 171 * indicating that it already has the lock. 172 */ 173 need_lock = !MUTEX_HELD(&mls->mls_lock); 174 175 if (need_lock) 176 mutex_enter(&mls->mls_lock); 177 178 ASSERT(!multilist_link_active(multilist_d2l(ml, obj))); 179 180 multilist_sublist_insert_head(mls, obj); 181 182 if (need_lock) 183 mutex_exit(&mls->mls_lock); 184} 185 186/* 187 * Remove the given object from the multilist. 188 * 189 * This function will remove the object specified from the sublist 190 * determined using the function given at multilist creation time. 191 * 192 * The necessary sublist locks are automatically acquired, to ensure 193 * consistency when inserting and removing from multiple threads. 194 */ 195void 196multilist_remove(multilist_t *ml, void *obj) 197{ 198 unsigned int sublist_idx = ml->ml_index_func(ml, obj); 199 multilist_sublist_t *mls; 200 boolean_t need_lock; 201 202 DTRACE_PROBE3(multilist__remove, multilist_t *, ml, 203 unsigned int, sublist_idx, void *, obj); 204 205 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 206 207 mls = &ml->ml_sublists[sublist_idx]; 208 /* See comment in multilist_insert(). */ 209 need_lock = !MUTEX_HELD(&mls->mls_lock); 210 211 if (need_lock) 212 mutex_enter(&mls->mls_lock); 213 214 ASSERT(multilist_link_active(multilist_d2l(ml, obj))); 215 216 multilist_sublist_remove(mls, obj); 217 218 if (need_lock) 219 mutex_exit(&mls->mls_lock); 220} 221 222/* 223 * Check to see if this multilist object is empty. 224 * 225 * This will return TRUE if it finds all of the sublists of this 226 * multilist to be empty, and FALSE otherwise. Each sublist lock will be 227 * automatically acquired as necessary. 228 * 229 * If concurrent insertions and removals are occurring, the semantics 230 * of this function become a little fuzzy. Instead of locking all 231 * sublists for the entire call time of the function, each sublist is 232 * only locked as it is individually checked for emptiness. Thus, it's 233 * possible for this function to return TRUE with non-empty sublists at 234 * the time the function returns. This would be due to another thread 235 * inserting into a given sublist, after that specific sublist was check 236 * and deemed empty, but before all sublists have been checked. 237 */ 238int 239multilist_is_empty(multilist_t *ml) 240{ 241 for (int i = 0; i < ml->ml_num_sublists; i++) { 242 multilist_sublist_t *mls = &ml->ml_sublists[i]; 243 /* See comment in multilist_insert(). */ 244 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock); 245 246 if (need_lock) 247 mutex_enter(&mls->mls_lock); 248 249 if (!list_is_empty(&mls->mls_list)) { 250 if (need_lock) 251 mutex_exit(&mls->mls_lock); 252 253 return (FALSE); 254 } 255 256 if (need_lock) 257 mutex_exit(&mls->mls_lock); 258 } 259 260 return (TRUE); 261} 262 263/* Return the number of sublists composing this multilist */ 264unsigned int 265multilist_get_num_sublists(multilist_t *ml) 266{ 267 return (ml->ml_num_sublists); 268} 269 270/* Return a randomly selected, valid sublist index for this multilist */ 271unsigned int 272multilist_get_random_index(multilist_t *ml) 273{ 274 return (spa_get_random(ml->ml_num_sublists)); 275} 276 277/* Lock and return the sublist specified at the given index */ 278multilist_sublist_t * 279multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx) 280{ 281 multilist_sublist_t *mls; 282 283 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 284 mls = &ml->ml_sublists[sublist_idx]; 285 mutex_enter(&mls->mls_lock); 286 287 return (mls); 288} 289 290/* Lock and return the sublist that would be used to store the specified obj */ 291multilist_sublist_t * 292multilist_sublist_lock_obj(multilist_t *ml, void *obj) 293{ 294 return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj))); 295} 296 297void 298multilist_sublist_unlock(multilist_sublist_t *mls) 299{ 300 mutex_exit(&mls->mls_lock); 301} 302 303/* 304 * We're allowing any object to be inserted into this specific sublist, 305 * but this can lead to trouble if multilist_remove() is called to 306 * remove this object. Specifically, if calling ml_index_func on this 307 * object returns an index for sublist different than what is passed as 308 * a parameter here, any call to multilist_remove() with this newly 309 * inserted object is undefined! (the call to multilist_remove() will 310 * remove the object from a list that it isn't contained in) 311 */ 312void 313multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj) 314{ 315 ASSERT(MUTEX_HELD(&mls->mls_lock)); 316 list_insert_head(&mls->mls_list, obj); 317} 318 319/* please see comment above multilist_sublist_insert_head */ 320void 321multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj) 322{ 323 ASSERT(MUTEX_HELD(&mls->mls_lock)); 324 list_insert_tail(&mls->mls_list, obj); 325} 326 327/* 328 * Move the object one element forward in the list. 329 * 330 * This function will move the given object forward in the list (towards 331 * the head) by one object. So, in essence, it will swap its position in 332 * the list with its "prev" pointer. If the given object is already at the 333 * head of the list, it cannot be moved forward any more than it already 334 * is, so no action is taken. 335 * 336 * NOTE: This function **must not** remove any object from the list other 337 * than the object given as the parameter. This is relied upon in 338 * arc_evict_state_impl(). 339 */ 340void 341multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj) 342{ 343 void *prev = list_prev(&mls->mls_list, obj); 344 345 ASSERT(MUTEX_HELD(&mls->mls_lock)); 346 ASSERT(!list_is_empty(&mls->mls_list)); 347 348 /* 'obj' must be at the head of the list, nothing to do */ 349 if (prev == NULL) 350 return; 351 352 list_remove(&mls->mls_list, obj); 353 list_insert_before(&mls->mls_list, prev, obj); 354} 355 356void 357multilist_sublist_remove(multilist_sublist_t *mls, void *obj) 358{ 359 ASSERT(MUTEX_HELD(&mls->mls_lock)); 360 list_remove(&mls->mls_list, obj); 361} 362 363int 364multilist_sublist_is_empty(multilist_sublist_t *mls) 365{ 366 ASSERT(MUTEX_HELD(&mls->mls_lock)); 367 return (list_is_empty(&mls->mls_list)); 368} 369 370int 371multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx) 372{ 373 multilist_sublist_t *mls; 374 int empty; 375 376 ASSERT3U(sublist_idx, <, ml->ml_num_sublists); 377 mls = &ml->ml_sublists[sublist_idx]; 378 ASSERT(!MUTEX_HELD(&mls->mls_lock)); 379 mutex_enter(&mls->mls_lock); 380 empty = list_is_empty(&mls->mls_list); 381 mutex_exit(&mls->mls_lock); 382 return (empty); 383} 384 385void * 386multilist_sublist_head(multilist_sublist_t *mls) 387{ 388 ASSERT(MUTEX_HELD(&mls->mls_lock)); 389 return (list_head(&mls->mls_list)); 390} 391 392void * 393multilist_sublist_tail(multilist_sublist_t *mls) 394{ 395 ASSERT(MUTEX_HELD(&mls->mls_lock)); 396 return (list_tail(&mls->mls_list)); 397} 398 399void * 400multilist_sublist_next(multilist_sublist_t *mls, void *obj) 401{ 402 ASSERT(MUTEX_HELD(&mls->mls_lock)); 403 return (list_next(&mls->mls_list, obj)); 404} 405 406void * 407multilist_sublist_prev(multilist_sublist_t *mls, void *obj) 408{ 409 ASSERT(MUTEX_HELD(&mls->mls_lock)); 410 return (list_prev(&mls->mls_list, obj)); 411} 412 413void 414multilist_link_init(multilist_node_t *link) 415{ 416 list_link_init(link); 417} 418 419int 420multilist_link_active(multilist_node_t *link) 421{ 422 return (list_link_active(link)); 423} 424