arc.c (339114) | arc.c (339141) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 363 unchanged lines hidden (view full) --- 372int zfs_arc_no_grow_shift = 0; 373int zfs_arc_p_min_shift = 0; 374uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ 375u_int zfs_arc_free_target = 0; 376 377/* Absolute min for arc min / max is 16MB. */ 378static uint64_t arc_abs_min = 16 << 20; 379 | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 363 unchanged lines hidden (view full) --- 372int zfs_arc_no_grow_shift = 0; 373int zfs_arc_p_min_shift = 0; 374uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ 375u_int zfs_arc_free_target = 0; 376 377/* Absolute min for arc min / max is 16MB. */ 378static uint64_t arc_abs_min = 16 << 20; 379 |
380/* 381 * ARC dirty data constraints for arc_tempreserve_space() throttle 382 */ 383uint_t zfs_arc_dirty_limit_percent = 50; /* total dirty data limit */ 384uint_t zfs_arc_anon_limit_percent = 25; /* anon block dirty limit */ 385uint_t zfs_arc_pool_dirty_percent = 20; /* each pool's anon allowance */ 386 |
|
380boolean_t zfs_compressed_arc_enabled = B_TRUE; 381 382static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); 383static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS); 384static int sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS); 385static int sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS); 386static int sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS); 387 --- 5944 unchanged lines hidden (view full) --- 6332 (children_ready != NULL) ? arc_write_children_ready : NULL, 6333 arc_write_physdone, arc_write_done, callback, 6334 priority, zio_flags, zb); 6335 6336 return (zio); 6337} 6338 6339static int | 387boolean_t zfs_compressed_arc_enabled = B_TRUE; 388 389static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); 390static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS); 391static int sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS); 392static int sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS); 393static int sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS); 394 --- 5944 unchanged lines hidden (view full) --- 6339 (children_ready != NULL) ? arc_write_children_ready : NULL, 6340 arc_write_physdone, arc_write_done, callback, 6341 priority, zio_flags, zb); 6342 6343 return (zio); 6344} 6345 6346static int |
6340arc_memory_throttle(uint64_t reserve, uint64_t txg) | 6347arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) |
6341{ 6342#ifdef _KERNEL 6343 uint64_t available_memory = ptob(freemem); | 6348{ 6349#ifdef _KERNEL 6350 uint64_t available_memory = ptob(freemem); |
6344 static uint64_t page_load = 0; 6345 static uint64_t last_txg = 0; | |
6346 6347#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) 6348 available_memory = 6349 MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE))); 6350#endif 6351 6352 if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100) 6353 return (0); 6354 | 6351 6352#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) 6353 available_memory = 6354 MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE))); 6355#endif 6356 6357 if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100) 6358 return (0); 6359 |
6355 if (txg > last_txg) { 6356 last_txg = txg; 6357 page_load = 0; | 6360 if (txg > spa->spa_lowmem_last_txg) { 6361 spa->spa_lowmem_last_txg = txg; 6362 spa->spa_lowmem_page_load = 0; |
6358 } 6359 /* 6360 * If we are in pageout, we know that memory is already tight, 6361 * the arc is already going to be evicting, so we just want to 6362 * continue to let page writes occur as quickly as possible. 6363 */ 6364 if (curproc == pageproc) { | 6363 } 6364 /* 6365 * If we are in pageout, we know that memory is already tight, 6366 * the arc is already going to be evicting, so we just want to 6367 * continue to let page writes occur as quickly as possible. 6368 */ 6369 if (curproc == pageproc) { |
6365 if (page_load > MAX(ptob(minfree), available_memory) / 4) | 6370 if (spa->spa_lowmem_page_load > 6371 MAX(ptob(minfree), available_memory) / 4) |
6366 return (SET_ERROR(ERESTART)); 6367 /* Note: reserve is inflated, so we deflate */ | 6372 return (SET_ERROR(ERESTART)); 6373 /* Note: reserve is inflated, so we deflate */ |
6368 page_load += reserve / 8; | 6374 atomic_add_64(&spa->spa_lowmem_page_load, reserve / 8); |
6369 return (0); | 6375 return (0); |
6370 } else if (page_load > 0 && arc_reclaim_needed()) { | 6376 } else if (spa->spa_lowmem_page_load > 0 && arc_reclaim_needed()) { |
6371 /* memory is low, delay before restarting */ 6372 ARCSTAT_INCR(arcstat_memory_throttle_count, 1); 6373 return (SET_ERROR(EAGAIN)); 6374 } | 6377 /* memory is low, delay before restarting */ 6378 ARCSTAT_INCR(arcstat_memory_throttle_count, 1); 6379 return (SET_ERROR(EAGAIN)); 6380 } |
6375 page_load = 0; 6376#endif | 6381 spa->spa_lowmem_page_load = 0; 6382#endif /* _KERNEL */ |
6377 return (0); 6378} 6379 6380void 6381arc_tempreserve_clear(uint64_t reserve) 6382{ 6383 atomic_add_64(&arc_tempreserve, -reserve); 6384 ASSERT((int64_t)arc_tempreserve >= 0); 6385} 6386 6387int | 6383 return (0); 6384} 6385 6386void 6387arc_tempreserve_clear(uint64_t reserve) 6388{ 6389 atomic_add_64(&arc_tempreserve, -reserve); 6390 ASSERT((int64_t)arc_tempreserve >= 0); 6391} 6392 6393int |
6388arc_tempreserve_space(uint64_t reserve, uint64_t txg) | 6394arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg) |
6389{ 6390 int error; 6391 uint64_t anon_size; 6392 6393 if (reserve > arc_c/4 && !arc_no_grow) { 6394 arc_c = MIN(arc_c_max, reserve * 4); 6395 DTRACE_PROBE1(arc__set_reserve, uint64_t, arc_c); 6396 } --- 12 unchanged lines hidden (view full) --- 6409 anon_size = MAX((int64_t)(refcount_count(&arc_anon->arcs_size) - 6410 arc_loaned_bytes), 0); 6411 6412 /* 6413 * Writes will, almost always, require additional memory allocations 6414 * in order to compress/encrypt/etc the data. We therefore need to 6415 * make sure that there is sufficient available memory for this. 6416 */ | 6395{ 6396 int error; 6397 uint64_t anon_size; 6398 6399 if (reserve > arc_c/4 && !arc_no_grow) { 6400 arc_c = MIN(arc_c_max, reserve * 4); 6401 DTRACE_PROBE1(arc__set_reserve, uint64_t, arc_c); 6402 } --- 12 unchanged lines hidden (view full) --- 6415 anon_size = MAX((int64_t)(refcount_count(&arc_anon->arcs_size) - 6416 arc_loaned_bytes), 0); 6417 6418 /* 6419 * Writes will, almost always, require additional memory allocations 6420 * in order to compress/encrypt/etc the data. We therefore need to 6421 * make sure that there is sufficient available memory for this. 6422 */ |
6417 error = arc_memory_throttle(reserve, txg); | 6423 error = arc_memory_throttle(spa, reserve, txg); |
6418 if (error != 0) 6419 return (error); 6420 6421 /* 6422 * Throttle writes when the amount of dirty data in the cache 6423 * gets too large. We try to keep the cache less than half full 6424 * of dirty blocks so that our sync times don't grow too large. | 6424 if (error != 0) 6425 return (error); 6426 6427 /* 6428 * Throttle writes when the amount of dirty data in the cache 6429 * gets too large. We try to keep the cache less than half full 6430 * of dirty blocks so that our sync times don't grow too large. |
6431 * 6432 * In the case of one pool being built on another pool, we want 6433 * to make sure we don't end up throttling the lower (backing) 6434 * pool when the upper pool is the majority contributor to dirty 6435 * data. To insure we make forward progress during throttling, we 6436 * also check the current pool's net dirty data and only throttle 6437 * if it exceeds zfs_arc_pool_dirty_percent of the anonymous dirty 6438 * data in the cache. 6439 * |
|
6425 * Note: if two requests come in concurrently, we might let them 6426 * both succeed, when one of them should fail. Not a huge deal. 6427 */ | 6440 * Note: if two requests come in concurrently, we might let them 6441 * both succeed, when one of them should fail. Not a huge deal. 6442 */ |
6443 uint64_t total_dirty = reserve + arc_tempreserve + anon_size; 6444 uint64_t spa_dirty_anon = spa_dirty_data(spa); |
|
6428 | 6445 |
6429 if (reserve + arc_tempreserve + anon_size > arc_c / 2 && 6430 anon_size > arc_c / 4) { | 6446 if (total_dirty > arc_c * zfs_arc_dirty_limit_percent / 100 && 6447 anon_size > arc_c * zfs_arc_anon_limit_percent / 100 && 6448 spa_dirty_anon > anon_size * zfs_arc_pool_dirty_percent / 100) { |
6431 uint64_t meta_esize = 6432 refcount_count(&arc_anon->arcs_esize[ARC_BUFC_METADATA]); 6433 uint64_t data_esize = 6434 refcount_count(&arc_anon->arcs_esize[ARC_BUFC_DATA]); 6435 dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK " 6436 "anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n", 6437 arc_tempreserve >> 10, meta_esize >> 10, 6438 data_esize >> 10, reserve >> 10, arc_c >> 10); --- 1645 unchanged lines hidden --- | 6449 uint64_t meta_esize = 6450 refcount_count(&arc_anon->arcs_esize[ARC_BUFC_METADATA]); 6451 uint64_t data_esize = 6452 refcount_count(&arc_anon->arcs_esize[ARC_BUFC_DATA]); 6453 dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK " 6454 "anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n", 6455 arc_tempreserve >> 10, meta_esize >> 10, 6456 data_esize >> 10, reserve >> 10, arc_c >> 10); --- 1645 unchanged lines hidden --- |