Deleted Added
full compact
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 325 unchanged lines hidden (view full) ---

334 */
335int arc_no_grow_shift = 5;
336
337
338/*
339 * minimum lifespan of a prefetch block in clock ticks
340 * (initialized in arc_init())
341 */
342static int arc_min_prefetch_lifespan;
342static int zfs_arc_min_prefetch_ms = 1;
343static int zfs_arc_min_prescient_prefetch_ms = 6;
344
345/*
346 * If this percent of memory is free, don't throttle.
347 */
348int arc_lotsfree_percent = 10;
349
350static int arc_dead;
351extern boolean_t zfs_prefetch_disable;

--- 427 unchanged lines hidden (view full) ---

779 kstat_named_t arcstat_l2_write_buffer_list_iter;
780 kstat_named_t arcstat_l2_write_buffer_list_null_iter;
781 kstat_named_t arcstat_memory_throttle_count;
782 /* Not updated directly; only synced in arc_kstat_update. */
783 kstat_named_t arcstat_meta_used;
784 kstat_named_t arcstat_meta_limit;
785 kstat_named_t arcstat_meta_max;
786 kstat_named_t arcstat_meta_min;
786 kstat_named_t arcstat_sync_wait_for_async;
787 kstat_named_t arcstat_async_upgrade_sync;
788 kstat_named_t arcstat_demand_hit_predictive_prefetch;
789 kstat_named_t arcstat_demand_hit_prescient_prefetch;
790} arc_stats_t;
791
792static arc_stats_t arc_stats = {
793 { "hits", KSTAT_DATA_UINT64 },
794 { "misses", KSTAT_DATA_UINT64 },
795 { "demand_data_hits", KSTAT_DATA_UINT64 },
796 { "demand_data_misses", KSTAT_DATA_UINT64 },
797 { "demand_metadata_hits", KSTAT_DATA_UINT64 },

--- 80 unchanged lines hidden (view full) ---

878 { "l2_write_buffer_bytes_scanned", KSTAT_DATA_UINT64 },
879 { "l2_write_buffer_list_iter", KSTAT_DATA_UINT64 },
880 { "l2_write_buffer_list_null_iter", KSTAT_DATA_UINT64 },
881 { "memory_throttle_count", KSTAT_DATA_UINT64 },
882 { "arc_meta_used", KSTAT_DATA_UINT64 },
883 { "arc_meta_limit", KSTAT_DATA_UINT64 },
884 { "arc_meta_max", KSTAT_DATA_UINT64 },
885 { "arc_meta_min", KSTAT_DATA_UINT64 },
884 { "sync_wait_for_async", KSTAT_DATA_UINT64 },
886 { "async_upgrade_sync", KSTAT_DATA_UINT64 },
887 { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
888 { "demand_hit_prescient_prefetch", KSTAT_DATA_UINT64 },
889};
890
891#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
892
893#define ARCSTAT_INCR(stat, val) \
894 atomic_add_64(&arc_stats.stat.value.ui64, (val))
895
896#define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1)

--- 79 unchanged lines hidden (view full) ---

976static int arc_no_grow; /* Don't try to grow cache size */
977static uint64_t arc_tempreserve;
978static uint64_t arc_loaned_bytes;
979
980typedef struct arc_callback arc_callback_t;
981
982struct arc_callback {
983 void *acb_private;
981 arc_done_func_t *acb_done;
984 arc_read_done_func_t *acb_done;
985 arc_buf_t *acb_buf;
986 boolean_t acb_compressed;
987 zio_t *acb_zio_dummy;
988 zio_t *acb_zio_head;
989 arc_callback_t *acb_next;
990};
991
992typedef struct arc_write_callback arc_write_callback_t;
993
994struct arc_write_callback {
991 void *awcb_private;
992 arc_done_func_t *awcb_ready;
993 arc_done_func_t *awcb_children_ready;
994 arc_done_func_t *awcb_physdone;
995 arc_done_func_t *awcb_done;
996 arc_buf_t *awcb_buf;
995 void *awcb_private;
996 arc_write_done_func_t *awcb_ready;
997 arc_write_done_func_t *awcb_children_ready;
998 arc_write_done_func_t *awcb_physdone;
999 arc_write_done_func_t *awcb_done;
1000 arc_buf_t *awcb_buf;
1001};
1002
1003/*
1004 * ARC buffers are separated into multiple structs as a memory saving measure:
1005 * - Common fields struct, always defined, and embedded within it:
1006 * - L2-only fields, always allocated but undefined when not in L2ARC
1007 * - L1-only fields, only allocated when in L1ARC
1008 *

--- 223 unchanged lines hidden (view full) ---

1232#define GHOST_STATE(state) \
1233 ((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \
1234 (state) == arc_l2c_only)
1235
1236#define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_FLAG_IN_HASH_TABLE)
1237#define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS)
1238#define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_FLAG_IO_ERROR)
1239#define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH)
1240#define HDR_PRESCIENT_PREFETCH(hdr) \
1241 ((hdr)->b_flags & ARC_FLAG_PRESCIENT_PREFETCH)
1242#define HDR_COMPRESSION_ENABLED(hdr) \
1243 ((hdr)->b_flags & ARC_FLAG_COMPRESSED_ARC)
1244
1245#define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_FLAG_L2CACHE)
1246#define HDR_L2_READING(hdr) \
1247 (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) && \
1248 ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR))
1249#define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING)

--- 147 unchanged lines hidden (view full) ---

1397 "size of metadata in mfu ghost state");
1398SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
1399 &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
1400 "size of data in mfu ghost state");
1401
1402SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
1403 &ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state");
1404
1405SYSCTL_UINT(_vfs_zfs, OID_AUTO, arc_min_prefetch_ms, CTLFLAG_RW,
1406 &zfs_arc_min_prefetch_ms, 0, "Min life of prefetch block in ms");
1407SYSCTL_UINT(_vfs_zfs, OID_AUTO, arc_min_prescient_prefetch_ms, CTLFLAG_RW,
1408 &zfs_arc_min_prescient_prefetch_ms, 0, "Min life of prescient prefetched block in ms");
1409
1410/*
1411 * L2ARC Internals
1412 */
1413struct l2arc_dev {
1414 vdev_t *l2ad_vdev; /* vdev */
1415 spa_t *l2ad_spa; /* spa */
1416 uint64_t l2ad_hand; /* next write location */
1417 uint64_t l2ad_start; /* first addr on device */

--- 2136 unchanged lines hidden (view full) ---

3554 * - arc_mfu_ghost -> arc_l2c_only
3555 * - arc_mfu_ghost -> deleted
3556 */
3557static int64_t
3558arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
3559{
3560 arc_state_t *evicted_state, *state;
3561 int64_t bytes_evicted = 0;
3562 int min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ?
3563 zfs_arc_min_prescient_prefetch_ms : zfs_arc_min_prefetch_ms;
3564
3565 ASSERT(MUTEX_HELD(hash_lock));
3566 ASSERT(HDR_HAS_L1HDR(hdr));
3567
3568 state = hdr->b_l1hdr.b_state;
3569 if (GHOST_STATE(state)) {
3570 ASSERT(!HDR_IO_IN_PROGRESS(hdr));
3571 ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);

--- 36 unchanged lines hidden (view full) ---

3608 }
3609
3610 ASSERT(state == arc_mru || state == arc_mfu);
3611 evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
3612
3613 /* prefetch buffers have a minimum lifespan */
3614 if (HDR_IO_IN_PROGRESS(hdr) ||
3615 ((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) &&
3603 ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access <
3604 arc_min_prefetch_lifespan)) {
3616 ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access < min_lifetime * hz)) {
3617 ARCSTAT_BUMP(arcstat_evict_skip);
3618 return (bytes_evicted);
3619 }
3620
3621 ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt));
3622 while (hdr->b_l1hdr.b_buf) {
3623 arc_buf_t *buf = hdr->b_l1hdr.b_buf;
3624 if (!mutex_tryenter(&buf->b_evict_lock)) {

--- 1379 unchanged lines hidden (view full) ---

5004 /*
5005 * If this buffer is here because of a prefetch, then either:
5006 * - clear the flag if this is a "referencing" read
5007 * (any subsequent access will bump this into the MFU state).
5008 * or
5009 * - move the buffer to the head of the list if this is
5010 * another prefetch (to make it less likely to be evicted).
5011 */
5000 if (HDR_PREFETCH(hdr)) {
5012 if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) {
5013 if (refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
5014 /* link protected by hash lock */
5015 ASSERT(multilist_link_active(
5016 &hdr->b_l1hdr.b_arc_node));
5017 } else {
5006 arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH);
5018 arc_hdr_clear_flags(hdr,
5019 ARC_FLAG_PREFETCH |
5020 ARC_FLAG_PRESCIENT_PREFETCH);
5021 ARCSTAT_BUMP(arcstat_mru_hits);
5022 }
5023 hdr->b_l1hdr.b_arc_access = now;
5024 return;
5025 }
5026
5027 /*
5028 * This buffer has been "accessed" only once so far,

--- 14 unchanged lines hidden (view full) ---

5043 } else if (hdr->b_l1hdr.b_state == arc_mru_ghost) {
5044 arc_state_t *new_state;
5045 /*
5046 * This buffer has been "accessed" recently, but
5047 * was evicted from the cache. Move it to the
5048 * MFU state.
5049 */
5050
5037 if (HDR_PREFETCH(hdr)) {
5051 if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) {
5052 new_state = arc_mru;
5039 if (refcount_count(&hdr->b_l1hdr.b_refcnt) > 0)
5040 arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH);
5053 if (refcount_count(&hdr->b_l1hdr.b_refcnt) > 0) {
5054 arc_hdr_clear_flags(hdr,
5055 ARC_FLAG_PREFETCH |
5056 ARC_FLAG_PRESCIENT_PREFETCH);
5057 }
5058 DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
5059 } else {
5060 new_state = arc_mfu;
5061 DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
5062 }
5063
5064 hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
5065 arc_change_state(new_state, hdr, hash_lock);

--- 4 unchanged lines hidden (view full) ---

5070 * This buffer has been accessed more than once and is
5071 * still in the cache. Keep it in the MFU state.
5072 *
5073 * NOTE: an add_reference() that occurred when we did
5074 * the arc_read() will have kicked this off the list.
5075 * If it was a prefetch, we will explicitly move it to
5076 * the head of the list now.
5077 */
5061 if ((HDR_PREFETCH(hdr)) != 0) {
5062 ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
5063 /* link protected by hash_lock */
5064 ASSERT(multilist_link_active(&hdr->b_l1hdr.b_arc_node));
5065 }
5078
5079 ARCSTAT_BUMP(arcstat_mfu_hits);
5080 hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
5081 } else if (hdr->b_l1hdr.b_state == arc_mfu_ghost) {
5082 arc_state_t *new_state = arc_mfu;
5083 /*
5084 * This buffer has been accessed more than once but has
5085 * been evicted from the cache. Move it back to the
5086 * MFU state.
5087 */
5088
5076 if (HDR_PREFETCH(hdr)) {
5089 if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) {
5090 /*
5091 * This is a prefetch access...
5092 * move this block back to the MRU state.
5093 */
5081 ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt));
5094 new_state = arc_mru;
5095 }
5096
5097 hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
5098 DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
5099 arc_change_state(new_state, hdr, hash_lock);
5100
5101 ARCSTAT_BUMP(arcstat_mfu_ghost_hits);

--- 50 unchanged lines hidden (view full) ---

5152 arc_access(hdr, hash_lock);
5153 mutex_exit(hash_lock);
5154
5155 ARCSTAT_BUMP(arcstat_hits);
5156 ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
5157 demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, hits);
5158}
5159
5148/* a generic arc_done_func_t which you can use */
5160/* a generic arc_read_done_func_t which you can use */
5161/* ARGSUSED */
5162void
5151arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
5163arc_bcopy_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
5164 arc_buf_t *buf, void *arg)
5165{
5153 if (zio == NULL || zio->io_error == 0)
5154 bcopy(buf->b_data, arg, arc_buf_size(buf));
5166 if (buf == NULL)
5167 return;
5168
5169 bcopy(buf->b_data, arg, arc_buf_size(buf));
5170 arc_buf_destroy(buf, arg);
5171}
5172
5158/* a generic arc_done_func_t */
5173/* a generic arc_read_done_func_t */
5174/* ARGSUSED */
5175void
5160arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
5176arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
5177 arc_buf_t *buf, void *arg)
5178{
5179 arc_buf_t **bufp = arg;
5163 if (zio && zio->io_error) {
5164 arc_buf_destroy(buf, arg);
5180
5181 if (buf == NULL) {
5182 *bufp = NULL;
5183 } else {
5184 *bufp = buf;
5185 ASSERT(buf->b_data);
5186 }
5187}
5188
5189static void

--- 15 unchanged lines hidden (view full) ---

5205static void
5206arc_read_done(zio_t *zio)
5207{
5208 arc_buf_hdr_t *hdr = zio->io_private;
5209 kmutex_t *hash_lock = NULL;
5210 arc_callback_t *callback_list;
5211 arc_callback_t *acb;
5212 boolean_t freeable = B_FALSE;
5196 boolean_t no_zio_error = (zio->io_error == 0);
5213
5214 /*
5215 * The hdr was inserted into hash-table and removed from lists
5216 * prior to starting I/O. We should find this header, since
5217 * it's in the hash table, and it should be legit since it's
5218 * not possible to evict it during the I/O. The only possible
5219 * reason for it not to be found is if we were freed during the
5220 * read.

--- 9 unchanged lines hidden (view full) ---

5230 &hash_lock);
5231
5232 ASSERT((found == hdr &&
5233 DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))) ||
5234 (found == hdr && HDR_L2_READING(hdr)));
5235 ASSERT3P(hash_lock, !=, NULL);
5236 }
5237
5222 if (no_zio_error) {
5238 if (zio->io_error == 0) {
5239 /* byteswap if necessary */
5240 if (BP_SHOULD_BYTESWAP(zio->io_bp)) {
5241 if (BP_GET_LEVEL(zio->io_bp) > 0) {
5242 hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64;
5243 } else {
5244 hdr->b_l1hdr.b_byteswap =
5245 DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp));
5246 }

--- 4 unchanged lines hidden (view full) ---

5251
5252 arc_hdr_clear_flags(hdr, ARC_FLAG_L2_EVICTED);
5253 if (l2arc_noprefetch && HDR_PREFETCH(hdr))
5254 arc_hdr_clear_flags(hdr, ARC_FLAG_L2CACHE);
5255
5256 callback_list = hdr->b_l1hdr.b_acb;
5257 ASSERT3P(callback_list, !=, NULL);
5258
5243 if (hash_lock && no_zio_error && hdr->b_l1hdr.b_state == arc_anon) {
5259 if (hash_lock && zio->io_error == 0 &&
5260 hdr->b_l1hdr.b_state == arc_anon) {
5261 /*
5262 * Only call arc_access on anonymous buffers. This is because
5263 * if we've issued an I/O for an evicted buffer, we've already
5264 * called arc_access (to prevent any simultaneous readers from
5265 * getting confused).
5266 */
5267 arc_access(hdr, hash_lock);
5268 }

--- 4 unchanged lines hidden (view full) ---

5273 * passed in. The implementation of arc_buf_alloc_impl() ensures that we
5274 * aren't needlessly decompressing the data multiple times.
5275 */
5276 int callback_cnt = 0;
5277 for (acb = callback_list; acb != NULL; acb = acb->acb_next) {
5278 if (!acb->acb_done)
5279 continue;
5280
5264 /* This is a demand read since prefetches don't use callbacks */
5281 callback_cnt++;
5282
5283 if (zio->io_error != 0)
5284 continue;
5285
5286 int error = arc_buf_alloc_impl(hdr, acb->acb_private,
5268 acb->acb_compressed, no_zio_error, &acb->acb_buf);
5269 if (no_zio_error) {
5270 zio->io_error = error;
5287 acb->acb_compressed,
5288 B_TRUE, &acb->acb_buf);
5289 if (error != 0) {
5290 arc_buf_destroy(acb->acb_buf, acb->acb_private);
5291 acb->acb_buf = NULL;
5292 }
5293
5294 if (zio->io_error == 0)
5295 zio->io_error = error;
5296 }
5297 hdr->b_l1hdr.b_acb = NULL;
5298 arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
5299 if (callback_cnt == 0) {
5300 ASSERT(HDR_PREFETCH(hdr));
5301 ASSERT0(hdr->b_l1hdr.b_bufcnt);
5302 ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
5303 }
5304
5305 ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
5306 callback_list != NULL);
5307
5284 if (no_zio_error) {
5308 if (zio->io_error == 0) {
5309 arc_hdr_verify(hdr, zio->io_bp);
5310 } else {
5311 arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR);
5312 if (hdr->b_l1hdr.b_state != arc_anon)
5313 arc_change_state(arc_anon, hdr, hash_lock);
5314 if (HDR_IN_HASH_TABLE(hdr))
5315 buf_hash_remove(hdr);
5316 freeable = refcount_is_zero(&hdr->b_l1hdr.b_refcnt);

--- 16 unchanged lines hidden (view full) ---

5333 * in the cache).
5334 */
5335 ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
5336 freeable = refcount_is_zero(&hdr->b_l1hdr.b_refcnt);
5337 }
5338
5339 /* execute each callback and free its structure */
5340 while ((acb = callback_list) != NULL) {
5317 if (acb->acb_done)
5318 acb->acb_done(zio, acb->acb_buf, acb->acb_private);
5341 if (acb->acb_done) {
5342 acb->acb_done(zio, &zio->io_bookmark, zio->io_bp,
5343 acb->acb_buf, acb->acb_private);
5344 }
5345
5346 if (acb->acb_zio_dummy != NULL) {
5347 acb->acb_zio_dummy->io_error = zio->io_error;
5348 zio_nowait(acb->acb_zio_dummy);
5349 }
5350
5351 callback_list = acb->acb_next;
5352 kmem_free(acb, sizeof (arc_callback_t));

--- 17 unchanged lines hidden (view full) ---

5370 * results); or, if this is a read with a "done" func, add a record
5371 * to the read to invoke the "done" func when the read completes,
5372 * and return; or just return.
5373 *
5374 * arc_read_done() will invoke all the requested "done" functions
5375 * for readers of this block.
5376 */
5377int
5352arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
5378arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_read_done_func_t *done,
5379 void *private, zio_priority_t priority, int zio_flags,
5380 arc_flags_t *arc_flags, const zbookmark_phys_t *zb)
5381{
5382 arc_buf_hdr_t *hdr = NULL;
5383 kmutex_t *hash_lock = NULL;
5384 zio_t *rzio;
5385 uint64_t guid = spa_load_guid(spa);
5386 boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW) != 0;
5361
5387 int rc = 0;
5388
5389 ASSERT(!BP_IS_EMBEDDED(bp) ||
5390 BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
5391
5392top:
5393 if (!BP_IS_EMBEDDED(bp)) {
5394 /*
5395 * Embedded BP's have no DVA and require no I/O to "read".
5396 * Create an anonymous arc buf to back it.
5397 */
5398 hdr = buf_hash_find(guid, bp, &hash_lock);
5399 }
5400
5401 if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pabd != NULL) {
5402 arc_buf_t *buf = NULL;
5403 *arc_flags |= ARC_FLAG_CACHED;
5404
5405 if (HDR_IO_IN_PROGRESS(hdr)) {
5406 zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head;
5407
5408 ASSERT3P(head_zio, !=, NULL);
5409 if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) &&
5410 priority == ZIO_PRIORITY_SYNC_READ) {
5411 /*
5383 * This sync read must wait for an
5384 * in-progress async read (e.g. a predictive
5385 * prefetch). Async reads are queued
5386 * separately at the vdev_queue layer, so
5387 * this is a form of priority inversion.
5388 * Ideally, we would "inherit" the demand
5389 * i/o's priority by moving the i/o from
5390 * the async queue to the synchronous queue,
5391 * but there is currently no mechanism to do
5392 * so. Track this so that we can evaluate
5393 * the magnitude of this potential performance
5394 * problem.
5395 *
5396 * Note that if the prefetch i/o is already
5397 * active (has been issued to the device),
5398 * the prefetch improved performance, because
5399 * we issued it sooner than we would have
5400 * without the prefetch.
5412 * This is a sync read that needs to wait for
5413 * an in-flight async read. Request that the
5414 * zio have its priority upgraded.
5415 */
5402 DTRACE_PROBE1(arc__sync__wait__for__async,
5416 zio_change_priority(head_zio, priority);
5417 DTRACE_PROBE1(arc__async__upgrade__sync,
5418 arc_buf_hdr_t *, hdr);
5404 ARCSTAT_BUMP(arcstat_sync_wait_for_async);
5419 ARCSTAT_BUMP(arcstat_async_upgrade_sync);
5420 }
5421 if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
5422 arc_hdr_clear_flags(hdr,
5423 ARC_FLAG_PREDICTIVE_PREFETCH);
5424 }
5425
5426 if (*arc_flags & ARC_FLAG_WAIT) {
5427 cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);

--- 10 unchanged lines hidden (view full) ---

5438 acb->acb_done = done;
5439 acb->acb_private = private;
5440 acb->acb_compressed = compressed_read;
5441 if (pio != NULL)
5442 acb->acb_zio_dummy = zio_null(pio,
5443 spa, NULL, NULL, NULL, zio_flags);
5444
5445 ASSERT3P(acb->acb_done, !=, NULL);
5446 acb->acb_zio_head = head_zio;
5447 acb->acb_next = hdr->b_l1hdr.b_acb;
5448 hdr->b_l1hdr.b_acb = acb;
5449 mutex_exit(hash_lock);
5450 return (0);
5451 }
5452 mutex_exit(hash_lock);
5453 return (0);
5454 }

--- 11 unchanged lines hidden (view full) ---

5466 DTRACE_PROBE1(
5467 arc__demand__hit__predictive__prefetch,
5468 arc_buf_hdr_t *, hdr);
5469 ARCSTAT_BUMP(
5470 arcstat_demand_hit_predictive_prefetch);
5471 arc_hdr_clear_flags(hdr,
5472 ARC_FLAG_PREDICTIVE_PREFETCH);
5473 }
5458 ASSERT(!BP_IS_EMBEDDED(bp) || !BP_IS_HOLE(bp));
5474
5475 if (hdr->b_flags & ARC_FLAG_PRESCIENT_PREFETCH) {
5476 ARCSTAT_BUMP(
5477 arcstat_demand_hit_prescient_prefetch);
5478 arc_hdr_clear_flags(hdr,
5479 ARC_FLAG_PRESCIENT_PREFETCH);
5480 }
5481
5482 ASSERT(!BP_IS_EMBEDDED(bp) || !BP_IS_HOLE(bp));
5483 /* Get a buf with the desired data in it. */
5461 VERIFY0(arc_buf_alloc_impl(hdr, private,
5462 compressed_read, B_TRUE, &buf));
5484 rc = arc_buf_alloc_impl(hdr, private,
5485 compressed_read, B_TRUE, &buf);
5486 if (rc != 0) {
5487 arc_buf_destroy(buf, private);
5488 buf = NULL;
5489 }
5490 ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) ||
5491 rc == 0 || rc != ENOENT);
5492 } else if (*arc_flags & ARC_FLAG_PREFETCH &&
5493 refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
5494 arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
5495 }
5496 DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
5497 arc_access(hdr, hash_lock);
5498 if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH)
5499 arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH);
5500 if (*arc_flags & ARC_FLAG_L2CACHE)
5501 arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
5502 mutex_exit(hash_lock);
5503 ARCSTAT_BUMP(arcstat_hits);
5504 ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
5505 demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
5506 data, metadata, hits);
5507
5508 if (done)
5478 done(NULL, buf, private);
5509 done(NULL, zb, bp, buf, private);
5510 } else {
5511 uint64_t lsize = BP_GET_LSIZE(bp);
5512 uint64_t psize = BP_GET_PSIZE(bp);
5513 arc_callback_t *acb;
5514 vdev_t *vd = NULL;
5515 uint64_t addr = 0;
5516 boolean_t devw = B_FALSE;
5517 uint64_t size;

--- 57 unchanged lines hidden (view full) ---

5575 * the uncompressed data.
5576 */
5577 if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) {
5578 zio_flags |= ZIO_FLAG_RAW;
5579 }
5580
5581 if (*arc_flags & ARC_FLAG_PREFETCH)
5582 arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
5583 if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH)
5584 arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH);
5585
5586 if (*arc_flags & ARC_FLAG_L2CACHE)
5587 arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
5588 if (BP_GET_LEVEL(bp) > 0)
5589 arc_hdr_set_flags(hdr, ARC_FLAG_INDIRECT);
5590 if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH)
5591 arc_hdr_set_flags(hdr, ARC_FLAG_PREDICTIVE_PREFETCH);
5592 ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state));
5593

--- 13 unchanged lines hidden (view full) ---

5607 /*
5608 * Lock out L2ARC device removal.
5609 */
5610 if (vdev_is_dead(vd) ||
5611 !spa_config_tryenter(spa, SCL_L2ARC, vd, RW_READER))
5612 vd = NULL;
5613 }
5614
5581 if (priority == ZIO_PRIORITY_ASYNC_READ)
5615 /*
5616 * We count both async reads and scrub IOs as asynchronous so
5617 * that both can be upgraded in the event of a cache hit while
5618 * the read IO is still in-flight.
5619 */
5620 if (priority == ZIO_PRIORITY_ASYNC_READ ||
5621 priority == ZIO_PRIORITY_SCRUB)
5622 arc_hdr_set_flags(hdr, ARC_FLAG_PRIO_ASYNC_READ);
5623 else
5624 arc_hdr_clear_flags(hdr, ARC_FLAG_PRIO_ASYNC_READ);
5625
5586 if (hash_lock != NULL)
5587 mutex_exit(hash_lock);
5588
5626 /*
5627 * At this point, we have a level 1 cache miss. Try again in
5628 * L2ARC if possible.
5629 */
5630 ASSERT3U(HDR_GET_LSIZE(hdr), ==, lsize);
5631
5632 DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp,
5633 uint64_t, lsize, zbookmark_phys_t *, zb);

--- 64 unchanged lines hidden (view full) ---

5698 rzio = zio_read_phys(pio, vd, addr,
5699 asize, abd,
5700 ZIO_CHECKSUM_OFF,
5701 l2arc_read_done, cb, priority,
5702 zio_flags | ZIO_FLAG_DONT_CACHE |
5703 ZIO_FLAG_CANFAIL |
5704 ZIO_FLAG_DONT_PROPAGATE |
5705 ZIO_FLAG_DONT_RETRY, B_FALSE);
5706 acb->acb_zio_head = rzio;
5707
5708 if (hash_lock != NULL)
5709 mutex_exit(hash_lock);
5710
5711 DTRACE_PROBE2(l2arc__read, vdev_t *, vd,
5712 zio_t *, rzio);
5713 ARCSTAT_INCR(arcstat_l2_read_bytes, size);
5714
5715 if (*arc_flags & ARC_FLAG_NOWAIT) {
5716 zio_nowait(rzio);
5717 return (0);
5718 }
5719
5720 ASSERT(*arc_flags & ARC_FLAG_WAIT);
5721 if (zio_wait(rzio) == 0)
5722 return (0);
5723
5724 /* l2arc read error; goto zio_read() */
5725 if (hash_lock != NULL)
5726 mutex_enter(hash_lock);
5727 } else {
5728 DTRACE_PROBE1(l2arc__miss,
5729 arc_buf_hdr_t *, hdr);
5730 ARCSTAT_BUMP(arcstat_l2_misses);
5731 if (HDR_L2_WRITING(hdr))
5732 ARCSTAT_BUMP(arcstat_l2_rw_clash);
5733 spa_config_exit(spa, SCL_L2ARC, vd);
5734 }

--- 4 unchanged lines hidden (view full) ---

5739 DTRACE_PROBE1(l2arc__miss,
5740 arc_buf_hdr_t *, hdr);
5741 ARCSTAT_BUMP(arcstat_l2_misses);
5742 }
5743 }
5744
5745 rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pabd, size,
5746 arc_read_done, hdr, priority, zio_flags, zb);
5747 acb->acb_zio_head = rzio;
5748
5749 if (hash_lock != NULL)
5750 mutex_exit(hash_lock);
5751
5752 if (*arc_flags & ARC_FLAG_WAIT)
5753 return (zio_wait(rzio));
5754
5755 ASSERT(*arc_flags & ARC_FLAG_NOWAIT);
5756 zio_nowait(rzio);
5757 }
5758 return (0);
5759}

--- 474 unchanged lines hidden (view full) ---

6234 callback->awcb_done(zio, buf, callback->awcb_private);
6235
6236 abd_put(zio->io_abd);
6237 kmem_free(callback, sizeof (arc_write_callback_t));
6238}
6239
6240zio_t *
6241arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
6194 boolean_t l2arc, const zio_prop_t *zp, arc_done_func_t *ready,
6195 arc_done_func_t *children_ready, arc_done_func_t *physdone,
6196 arc_done_func_t *done, void *private, zio_priority_t priority,
6242 boolean_t l2arc, const zio_prop_t *zp, arc_write_done_func_t *ready,
6243 arc_write_done_func_t *children_ready, arc_write_done_func_t *physdone,
6244 arc_write_done_func_t *done, void *private, zio_priority_t priority,
6245 int zio_flags, const zbookmark_phys_t *zb)
6246{
6247 arc_buf_hdr_t *hdr = buf->b_hdr;
6248 arc_write_callback_t *callback;
6249 zio_t *zio;
6250 zio_prop_t localprop = *zp;
6251
6252 ASSERT3P(ready, !=, NULL);

--- 411 unchanged lines hidden (view full) ---

6664
6665 mutex_init(&arc_reclaim_lock, NULL, MUTEX_DEFAULT, NULL);
6666 cv_init(&arc_reclaim_thread_cv, NULL, CV_DEFAULT, NULL);
6667 cv_init(&arc_reclaim_waiters_cv, NULL, CV_DEFAULT, NULL);
6668
6669 mutex_init(&arc_dnlc_evicts_lock, NULL, MUTEX_DEFAULT, NULL);
6670 cv_init(&arc_dnlc_evicts_cv, NULL, CV_DEFAULT, NULL);
6671
6624 /* Convert seconds to clock ticks */
6625 arc_min_prefetch_lifespan = 1 * hz;
6626
6672 /* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */
6673 arc_c_min = MAX(allmem / 32, arc_abs_min);
6674 /* set max to 5/8 of all memory, or all but 1GB, whichever is more */
6675 if (allmem >= 1 << 30)
6676 arc_c_max = allmem - (1 << 30);
6677 else
6678 arc_c_max = arc_c_min;
6679 arc_c_max = MAX(allmem * 5 / 8, arc_c_max);

--- 1376 unchanged lines hidden ---