Lines Matching refs:slab

6 #include "slab-depot.h"
42 * get_lock() - Get the lock object for a slab journal block by sequence number.
54 static bool is_slab_open(struct vdo_slab *slab)
56 return (!vdo_is_state_quiescing(&slab->state) &&
57 !vdo_is_state_quiescent(&slab->state));
64 * Return: true if there are no entry waiters, or if the slab is unrecovered.
68 return ((journal->slab->status != VDO_SLAB_REBUILDING) &&
111 * @journal: The slab journal for the block.
129 * is_slab_journal_blank() - Check whether a slab's journal is blank.
131 * A slab journal is blank if it has never had any entries recorded in it.
133 * Return: true if the slab's journal has never been modified.
135 static bool is_slab_journal_blank(const struct vdo_slab *slab)
137 return ((slab->journal.tail == 1) &&
138 (slab->journal.tail_header.entry_count == 0));
142 * mark_slab_journal_dirty() - Put a slab journal on the dirty ring of its allocator in the correct
145 * @lock: The recovery journal lock held by the slab journal.
150 struct list_head *dirty_list = &journal->slab->allocator->dirty_slab_journals;
152 VDO_ASSERT_LOG_ONLY(journal->recovery_lock == 0, "slab journal was clean");
169 static void check_if_slab_drained(struct vdo_slab *slab)
172 struct slab_journal *journal = &slab->journal;
175 if (!vdo_is_state_draining(&slab->state) ||
181 (slab->active_count > 0))
184 /* When not suspending or recovering, the slab must be clean. */
185 code = vdo_get_admin_state_code(&slab->state);
186 read_only = vdo_is_read_only(slab->allocator->depot->vdo);
188 vdo_waitq_has_waiters(&slab->dirty_blocks) &&
193 vdo_finish_draining_with_result(&slab->state,
200 * compute_fullness_hint() - Translate a slab's free block count into a 'fullness hint' that can be
288 * handle_write_error() - Handle an error writing a slab summary block.
311 * launch_write() - Write a slab summary block unless it is currently out for writing.
336 * Flush before writing to ensure that the slab journal tail blocks and reference updates
338 * encounter a slab summary update that refers to a slab journal tail block that has not
339 * actually been written. In such cases, the slab journal referenced will be treated as
340 * empty, causing any data within the slab which predates the existing recovery journal
351 * update_slab_summary_entry() - Update the entry for a slab.
352 * @slab: The slab whose entry is to be updated
354 * @tail_block_offset: The offset of the slab journal's tail block.
356 * @is_clean: Whether the slab is clean.
359 static void update_slab_summary_entry(struct vdo_slab *slab, struct vdo_waiter *waiter,
364 u8 index = slab->slab_number / VDO_SLAB_SUMMARY_ENTRIES_PER_BLOCK;
365 struct block_allocator *allocator = slab->allocator;
383 entry = &allocator->summary_entries[slab->slab_number];
403 check_if_slab_drained(journal->slab);
417 return_vio_to_pool(journal->slab->allocator->vio_pool,
440 journal->slab->allocator->thread_id);
461 * reap_slab_journal() - Conduct a reap on a slab journal to reclaim unreferenced blocks.
462 * @journal: The slab journal.
473 if ((journal->slab->status != VDO_SLAB_REBUILT) ||
474 !vdo_is_state_normal(&journal->slab->state) ||
475 vdo_is_read_only(journal->slab->allocator->depot->vdo)) {
485 * slab summary, which has the sequence number just before the tail.
499 * It is never safe to reap a slab journal block without first issuing a flush, regardless
501 * reference block write which released the locks allowing the slab journal to reap may not
502 * be persisted. Although slab summary writes will eventually issue flushes, multiple slab
503 * journal block writes can be issued while previous slab summary updates have not yet been
504 * made. Even though those slab journal block writes will be ignored if the slab summary
505 * update is not persisted, they may still overwrite the to-be-reaped slab journal block
509 acquire_vio_from_pool(journal->slab->allocator->vio_pool,
514 * adjust_slab_journal_block_reference() - Adjust the reference count for a slab journal block.
515 * @journal: The slab journal.
519 * Note that when the adjustment is negative, the slab journal will be reaped.
530 if (journal->slab->status == VDO_SLAB_REPLAYING) {
539 "adjustment %d of lock count %u for slab journal block %llu must not underflow",
550 * release_journal_locks() - Callback invoked after a slab summary update completes.
551 * @waiter: The slab summary waiter that has just been notified.
571 vdo_log_error_strerror(result, "failed slab summary update %llu",
576 vdo_enter_read_only_mode(journal->slab->allocator->depot->vdo, result);
577 check_if_slab_drained(journal->slab);
594 zone_count_t zone_number = journal->slab->allocator->zone_number;
614 /* Check if the slab summary needs to be updated again. */
619 * update_tail_block_location() - Update the tail block location in the slab summary, if necessary.
620 * @journal: The slab journal that is updating its tail block location.
625 struct vdo_slab *slab = journal->slab;
628 vdo_is_read_only(journal->slab->allocator->depot->vdo) ||
630 check_if_slab_drained(slab);
634 if (slab->status != VDO_SLAB_REBUILT) {
635 u8 hint = slab->allocator->summary_entries[slab->slab_number].fullness_hint;
637 free_block_count = ((block_count_t) hint) << slab->allocator->depot->hint_shift;
639 free_block_count = slab->free_blocks;
646 * Update slab summary as dirty.
648 * slab have been written to the layer. Therefore, indicate that the ref counts must be
651 update_slab_summary_entry(slab, &journal->slab_summary_waiter,
657 * reopen_slab_journal() - Reopen a slab's journal by emptying it and then adding pending entries.
659 static void reopen_slab_journal(struct vdo_slab *slab)
661 struct slab_journal *journal = &slab->journal;
701 return_vio_to_pool(journal->slab->allocator->vio_pool, vdo_forget(pooled));
705 vdo_log_error_strerror(result, "cannot write slab journal block %llu",
707 vdo_enter_read_only_mode(journal->slab->allocator->depot->vdo, result);
708 check_if_slab_drained(journal->slab);
732 continue_vio_after_io(vio, complete_write, journal->slab->allocator->thread_id);
736 * write_slab_journal_block() - Write a slab journal block.
771 block_number = journal->slab->journal_origin +
776 * This block won't be read in recovery until the slab summary is updated to refer to it.
777 * The slab summary update does a flush which is sufficient to protect us from corruption
778 * due to out of order slab journal, reference block, or block map writes.
788 operation = vdo_get_admin_state_code(&journal->slab->state);
790 vdo_finish_operation(&journal->slab->state,
791 (vdo_is_read_only(journal->slab->allocator->depot->vdo) ?
800 * commit_tail() - Commit the tail block of the slab journal.
813 if (vdo_is_read_only(journal->slab->allocator->depot->vdo) ||
832 acquire_vio_from_pool(journal->slab->allocator->vio_pool,
837 * encode_slab_journal_entry() - Encode a slab journal entry.
840 * @sbn: The slab block number of the entry to encode.
878 * increment and decrement entries in the same slab journal. In order to distinguish the two
893 * add_entry() - Actually add an entry to the slab journal, potentially firing off a write if a
895 * @journal: The slab journal to append to.
918 vdo_enter_read_only_mode(journal->slab->allocator->depot->vdo, result);
927 vdo_enter_read_only_mode(journal->slab->allocator->depot->vdo,
934 pbn - journal->slab->start, operation, increment);
946 * vdo_attempt_replay_into_slab() - Replay a recovery journal entry into a slab's journal.
947 * @slab: The slab to play into.
957 bool vdo_attempt_replay_into_slab(struct vdo_slab *slab, physical_block_number_t pbn,
962 struct slab_journal *journal = &slab->journal;
980 vdo_start_operation_with_waiter(&journal->slab->state,
989 * threshold keeps us from having more entries than fit in a slab journal; hence we
996 if (journal->slab->status == VDO_SLAB_REBUILT)
997 journal->slab->status = VDO_SLAB_REPLAYING;
1014 /** finish_summary_update() - A waiter callback that resets the writing state of a slab. */
1017 struct vdo_slab *slab = container_of(waiter, struct vdo_slab, summary_waiter);
1020 slab->active_count--;
1023 vdo_log_error_strerror(result, "failed to update slab summary");
1024 vdo_enter_read_only_mode(slab->allocator->depot->vdo, result);
1027 check_if_slab_drained(slab);
1036 * @context: The parent slab of the block.
1043 struct vdo_slab *slab = context;
1045 if (vdo_is_read_only(slab->allocator->depot->vdo))
1048 slab->active_count++;
1051 acquire_vio_from_pool(slab->allocator->vio_pool, waiter);
1054 static void save_dirty_reference_blocks(struct vdo_slab *slab)
1056 vdo_waitq_notify_all_waiters(&slab->dirty_blocks,
1057 launch_reference_block_write, slab);
1058 check_if_slab_drained(slab);
1071 struct vdo_slab *slab = block->slab;
1074 slab->active_count--;
1076 /* Release the slab journal lock. */
1077 adjust_slab_journal_block_reference(&slab->journal,
1079 return_vio_to_pool(slab->allocator->vio_pool, pooled);
1082 * We can't clear the is_writing flag earlier as releasing the slab journal lock may cause
1088 check_if_slab_drained(slab);
1094 vdo_waitq_enqueue_waiter(&block->slab->dirty_blocks, &block->waiter);
1095 if (vdo_is_state_draining(&slab->state)) {
1097 save_dirty_reference_blocks(slab);
1104 * Mark the slab as clean in the slab summary if there are no dirty or writing blocks
1107 if ((slab->active_count > 0) || vdo_waitq_has_waiters(&slab->dirty_blocks)) {
1108 check_if_slab_drained(slab);
1112 offset = slab->allocator->summary_entries[slab->slab_number].tail_block_offset;
1113 slab->active_count++;
1114 slab->summary_waiter.callback = finish_summary_update;
1115 update_slab_summary_entry(slab, &slab->summary_waiter, offset,
1116 true, true, slab->free_blocks);
1127 size_t block_index = block - block->slab->reference_blocks;
1129 return &block->slab->counters[block_index * COUNTS_PER_BLOCK];
1144 vdo_pack_journal_point(&block->slab->slab_journal_point, &commit_point);
1157 thread_id_t thread_id = block->slab->allocator->thread_id;
1170 struct vdo_slab *slab = ((struct reference_block *) completion->parent)->slab;
1173 return_vio_to_pool(slab->allocator->vio_pool, vio_as_pooled_vio(vio));
1174 slab->active_count--;
1175 vdo_enter_read_only_mode(slab->allocator->depot->vdo, result);
1176 check_if_slab_drained(slab);
1195 block_offset = (block - block->slab->reference_blocks);
1196 pbn = (block->slab->ref_counts_origin + block_offset);
1208 * Flush before writing to ensure that the recovery journal and slab journal entries which
1212 WRITE_ONCE(block->slab->allocator->ref_counts_statistics.blocks_written,
1213 block->slab->allocator->ref_counts_statistics.blocks_written + 1);
1223 struct vdo_slab *slab = journal->slab;
1224 block_count_t write_count = vdo_waitq_num_waiters(&slab->dirty_blocks);
1230 /* The slab journal is over the first threshold, schedule some reference block writes. */
1239 vdo_waitq_notify_next_waiter(&slab->dirty_blocks,
1240 launch_reference_block_write, slab);
1274 vdo_waitq_enqueue_waiter(&block->slab->dirty_blocks, &block->waiter);
1280 static struct reference_block * __must_check get_reference_block(struct vdo_slab *slab,
1283 return &slab->reference_blocks[index / COUNTS_PER_BLOCK];
1287 * slab_block_number_from_pbn() - Determine the index within the slab of a particular physical
1289 * @slab: The slab.
1291 * @slab_block_number_ptr: A pointer to the slab block number.
1295 static int __must_check slab_block_number_from_pbn(struct vdo_slab *slab,
1301 if (pbn < slab->start)
1304 slab_block_number = pbn - slab->start;
1305 if (slab_block_number >= slab->allocator->depot->slab_config.data_blocks)
1314 * @slab: The slab to query.
1318 static int __must_check get_reference_counter(struct vdo_slab *slab,
1323 int result = slab_block_number_from_pbn(slab, pbn, &index);
1328 *counter_ptr = &slab->counters[index];
1333 static unsigned int calculate_slab_priority(struct vdo_slab *slab)
1335 block_count_t free_blocks = slab->free_blocks;
1336 unsigned int unopened_slab_priority = slab->allocator->unopened_slab_priority;
1360 if (is_slab_journal_blank(slab))
1368 * Slabs are essentially prioritized by an approximation of the number of free blocks in the slab
1372 static void prioritize_slab(struct vdo_slab *slab)
1374 VDO_ASSERT_LOG_ONLY(list_empty(&slab->allocq_entry),
1375 "a slab must not already be on a ring when prioritizing");
1376 slab->priority = calculate_slab_priority(slab);
1377 vdo_priority_table_enqueue(slab->allocator->prioritized_slabs,
1378 slab->priority, &slab->allocq_entry);
1382 * adjust_free_block_count() - Adjust the free block count and (if needed) reprioritize the slab.
1385 static void adjust_free_block_count(struct vdo_slab *slab, bool incremented)
1387 struct block_allocator *allocator = slab->allocator;
1392 /* The open slab doesn't need to be reprioritized until it is closed. */
1393 if (slab == allocator->open_slab)
1397 if (slab->priority == calculate_slab_priority(slab))
1401 * Reprioritize the slab to reflect the new free block count by removing it from the table
1404 vdo_priority_table_remove(allocator->prioritized_slabs, &slab->allocq_entry);
1405 prioritize_slab(slab);
1410 * @slab: The slab which owns the block.
1420 static int increment_for_data(struct vdo_slab *slab, struct reference_block *block,
1430 slab->free_blocks--;
1432 adjust_free_block_count(slab, false);
1444 "Incrementing a block already having 254 references (slab %u, offset %u)",
1445 slab->slab_number, block_number);
1457 * @slab: The slab which owns the block.
1468 static int decrement_for_data(struct vdo_slab *slab, struct reference_block *block,
1477 "Decrementing free block at offset %u in slab %u",
1478 block_number, slab->slab_number);
1499 slab->free_blocks++;
1501 adjust_free_block_count(slab, true);
1515 * @slab: The slab which owns the block.
1531 static int increment_for_block_map(struct vdo_slab *slab, struct reference_block *block,
1541 "Incrementing unallocated block map block (slab %u, offset %u)",
1542 slab->slab_number, block_number);
1547 slab->free_blocks--;
1549 adjust_free_block_count(slab, false);
1556 "Block map block had provisional reference during replay (slab %u, offset %u)",
1557 slab->slab_number, block_number);
1566 "Incrementing a block map block which is already referenced %u times (slab %u, offset %u)",
1567 *counter_ptr, slab->slab_number,
1579 * @slab: The slab which owns the block.
1582 * @slab_journal_point: The slab journal point at which this update is journaled.
1585 * @adjust_block_count: Whether to update the slab's free block count.
1591 static int update_reference_count(struct vdo_slab *slab, struct reference_block *block,
1598 vdo_refcount_t *counter_ptr = &slab->counters[block_number];
1603 result = decrement_for_data(slab, block, block_number, old_status,
1611 result = increment_for_data(slab, block, block_number, old_status,
1614 result = increment_for_block_map(slab, block, block_number, old_status,
1623 slab->slab_journal_point = *slab_journal_point;
1628 static int __must_check adjust_reference_count(struct vdo_slab *slab,
1637 if (!is_slab_open(slab))
1640 result = slab_block_number_from_pbn(slab, updater->zpbn.pbn, &block_number);
1644 block = get_reference_block(slab, block_number);
1645 result = update_reference_count(slab, block, block_number, slab_journal_point,
1654 * This block is already dirty and a slab journal entry has been made for it since
1655 * the last time it was clean. We must release the per-entry slab journal lock for
1659 "Reference count adjustments need slab journal points.");
1663 adjust_slab_journal_block_reference(&slab->journal, entry_lock, -1);
1668 * This may be the first time we are applying an update for which there is a slab journal
1670 * slab journal lock to an uncommitted reference block lock, if there is a per-entry lock.
1682 * add_entry_from_waiter() - Add an entry to the slab journal.
1684 * @context: The slab journal to make an entry in.
1687 * another entry in the slab journal. Implements waiter_callback_fn.
1710 zone_count_t zone_number = journal->slab->allocator->zone_number;
1726 if (journal->slab->status != VDO_SLAB_REBUILT) {
1728 * If the slab is unrecovered, scrubbing will take care of the count since the
1735 /* Now that an entry has been made in the slab journal, update the counter. */
1736 result = adjust_reference_count(journal->slab, updater,
1766 * By processing the queue in order, we ensure that slab journal entries are made in the same order
1781 (journal->slab->status == VDO_SLAB_REBUILDING)) {
1811 /* If the slab is over the blocking threshold, make the vio wait. */
1815 save_dirty_reference_blocks(journal->slab);
1824 * Check if the on disk slab journal is full. Because of the blocking and
1841 save_dirty_reference_blocks(journal->slab);
1853 struct vdo_slab *slab = journal->slab;
1857 * This is the first entry in this slab journal, ever. Dirty all of
1864 for (i = 0; i < slab->reference_block_count; i++) {
1865 slab->reference_blocks[i].slab_journal_lock = 1;
1866 dirty_block(&slab->reference_blocks[i]);
1870 slab->reference_block_count);
1881 if (vdo_is_state_draining(&journal->slab->state) &&
1882 !vdo_is_state_suspending(&journal->slab->state) &&
1889 * first reference block of a slab.
1891 static void reset_search_cursor(struct vdo_slab *slab)
1893 struct search_cursor *cursor = &slab->search_cursor;
1898 cursor->end_index = min_t(u32, COUNTS_PER_BLOCK, slab->block_count);
1903 * a slab,
1909 static bool advance_search_cursor(struct vdo_slab *slab)
1911 struct search_cursor *cursor = &slab->search_cursor;
1918 reset_search_cursor(slab);
1928 cursor->end_index = slab->block_count;
1948 struct vdo_slab *slab = vdo_get_slab(depot, pbn);
1954 result = slab_block_number_from_pbn(slab, pbn, &block_number);
1958 block = get_reference_block(slab, block_number);
1959 result = update_reference_count(slab, block, block_number, NULL,
1969 * replay_reference_count_change() - Replay the reference count adjustment from a slab journal
1971 * @slab: The slab.
1972 * @entry_point: The slab journal point for the entry.
1973 * @entry: The slab journal entry being replayed.
1979 static int replay_reference_count_change(struct vdo_slab *slab,
1984 struct reference_block *block = get_reference_block(slab, entry.sbn);
1997 result = update_reference_count(slab, block, entry.sbn, entry_point,
2040 * @slab: The slab counters to scan.
2047 static bool find_free_block(const struct vdo_slab *slab, slab_block_number *index_ptr)
2050 slab_block_number next_index = slab->search_cursor.index;
2051 slab_block_number end_index = slab->search_cursor.end_index;
2052 u8 *next_counter = &slab->counters[next_index];
2053 u8 *end_counter = &slab->counters[end_index];
2099 * @slab: The slab to search.
2104 static bool search_current_reference_block(const struct vdo_slab *slab,
2108 return ((slab->search_cursor.block->allocated_count < COUNTS_PER_BLOCK) &&
2109 find_free_block(slab, free_index_ptr));
2114 * @slab: The slab to search.
2123 static bool search_reference_blocks(struct vdo_slab *slab,
2127 if (search_current_reference_block(slab, free_index_ptr))
2130 /* Search each reference block up to the end of the slab. */
2131 while (advance_search_cursor(slab)) {
2132 if (search_current_reference_block(slab, free_index_ptr))
2142 static void make_provisional_reference(struct vdo_slab *slab,
2145 struct reference_block *block = get_reference_block(slab, block_number);
2151 slab->counters[block_number] = PROVISIONAL_REFERENCE_COUNT;
2155 slab->free_blocks--;
2159 * dirty_all_reference_blocks() - Mark all reference count blocks in a slab as dirty.
2161 static void dirty_all_reference_blocks(struct vdo_slab *slab)
2165 for (i = 0; i < slab->reference_block_count; i++)
2166 dirty_block(&slab->reference_blocks[i]);
2203 struct vdo_slab *slab = block->slab;
2213 if (vdo_before_journal_point(&slab->slab_journal_point,
2215 slab->slab_journal_point = block->commit_points[i];
2220 size_t block_index = block - block->slab->reference_blocks;
2222 vdo_log_warning("Torn write detected in sector %u of reference block %zu of slab %u",
2223 i, block_index, block->slab->slab_number);
2243 struct vdo_slab *slab = block->slab;
2246 return_vio_to_pool(slab->allocator->vio_pool, pooled);
2247 slab->active_count--;
2250 slab->free_blocks -= block->allocated_count;
2251 check_if_slab_drained(slab);
2260 block->slab->allocator->thread_id);
2275 size_t block_offset = (block - block->slab->reference_blocks);
2278 vdo_submit_metadata_vio(vio, block->slab->ref_counts_origin + block_offset,
2284 * load_reference_blocks() - Load a slab's reference blocks from the underlying storage into a
2287 static void load_reference_blocks(struct vdo_slab *slab)
2291 slab->free_blocks = slab->block_count;
2292 slab->active_count = slab->reference_block_count;
2293 for (i = 0; i < slab->reference_block_count; i++) {
2294 struct vdo_waiter *waiter = &slab->reference_blocks[i].waiter;
2297 acquire_vio_from_pool(slab->allocator->vio_pool, waiter);
2307 static void drain_slab(struct vdo_slab *slab)
2311 const struct admin_state_code *state = vdo_get_admin_state_code(&slab->state);
2318 commit_tail(&slab->journal);
2320 if ((state == VDO_ADMIN_STATE_RECOVERING) || (slab->counters == NULL))
2324 load = slab->allocator->summary_entries[slab->slab_number].load_ref_counts;
2327 load_reference_blocks(slab);
2333 dirty_all_reference_blocks(slab);
2338 * Write out the counters if the slab has written them before, or it has any
2339 * non-zero reference counts, or there are any slab journal blocks.
2341 block_count_t data_blocks = slab->allocator->depot->slab_config.data_blocks;
2343 if (load || (slab->free_blocks != data_blocks) ||
2344 !is_slab_journal_blank(slab)) {
2345 dirty_all_reference_blocks(slab);
2349 save = (slab->status == VDO_SLAB_REBUILT);
2351 vdo_finish_draining_with_result(&slab->state, VDO_SUCCESS);
2356 save_dirty_reference_blocks(slab);
2359 static int allocate_slab_counters(struct vdo_slab *slab)
2364 result = VDO_ASSERT(slab->reference_blocks == NULL,
2366 slab->slab_number);
2370 result = vdo_allocate(slab->reference_block_count, struct reference_block,
2371 __func__, &slab->reference_blocks);
2376 * Allocate such that the runt slab has a full-length memory array, plus a little padding
2379 bytes = (slab->reference_block_count * COUNTS_PER_BLOCK) + (2 * BYTES_PER_WORD);
2381 &slab->counters);
2383 vdo_free(vdo_forget(slab->reference_blocks));
2387 slab->search_cursor.first_block = slab->reference_blocks;
2388 slab->search_cursor.last_block = &slab->reference_blocks[slab->reference_block_count - 1];
2389 reset_search_cursor(slab);
2391 for (index = 0; index < slab->reference_block_count; index++) {
2392 slab->reference_blocks[index] = (struct reference_block) {
2393 .slab = slab,
2400 static int allocate_counters_if_clean(struct vdo_slab *slab)
2402 if (vdo_is_state_clean_load(&slab->state))
2403 return allocate_slab_counters(slab);
2412 struct vdo_slab *slab = journal->slab;
2420 (header.nonce == slab->allocator->nonce)) {
2424 * If the slab is clean, this implies the slab journal is empty, so advance the
2427 journal->head = (slab->allocator->summary_entries[slab->slab_number].is_dirty ?
2433 return_vio_to_pool(slab->allocator->vio_pool, vio_as_pooled_vio(vio));
2434 vdo_finish_loading_with_result(&slab->state, allocate_counters_if_clean(slab));
2443 journal->slab->allocator->thread_id);
2453 return_vio_to_pool(journal->slab->allocator->vio_pool, vio_as_pooled_vio(vio));
2454 vdo_finish_loading_with_result(&journal->slab->state, result);
2458 * read_slab_journal_tail() - Read the slab journal tail block by using a vio acquired from the vio
2463 * This is the success callback from acquire_vio_from_pool() when loading a slab journal.
2469 struct vdo_slab *slab = journal->slab;
2473 slab->allocator->summary_entries[slab->slab_number].tail_block_offset;
2484 vio->completion.callback_thread_id = slab->allocator->thread_id;
2485 vdo_submit_metadata_vio(vio, slab->journal_origin + tail_block,
2491 * load_slab_journal() - Load a slab's journal by reading the journal's tail.
2493 static void load_slab_journal(struct vdo_slab *slab)
2495 struct slab_journal *journal = &slab->journal;
2498 last_commit_point = slab->allocator->summary_entries[slab->slab_number].tail_block_offset;
2500 !slab->allocator->summary_entries[slab->slab_number].load_ref_counts) {
2502 * This slab claims that it has a tail block at (journal->size - 1), but a head of
2508 "Scrubbing threshold protects against reads of unwritten slab journal blocks");
2509 vdo_finish_loading_with_result(&slab->state,
2510 allocate_counters_if_clean(slab));
2515 acquire_vio_from_pool(slab->allocator->vio_pool, &journal->resource_waiter);
2518 static void register_slab_for_scrubbing(struct vdo_slab *slab, bool high_priority)
2520 struct slab_scrubber *scrubber = &slab->allocator->scrubber;
2522 VDO_ASSERT_LOG_ONLY((slab->status != VDO_SLAB_REBUILT),
2523 "slab to be scrubbed is unrecovered");
2525 if (slab->status != VDO_SLAB_REQUIRES_SCRUBBING)
2528 list_del_init(&slab->allocq_entry);
2529 if (!slab->was_queued_for_scrubbing) {
2531 slab->was_queued_for_scrubbing = true;
2535 slab->status = VDO_SLAB_REQUIRES_HIGH_PRIORITY_SCRUBBING;
2536 list_add_tail(&slab->allocq_entry, &scrubber->high_priority_slabs);
2540 list_add_tail(&slab->allocq_entry, &scrubber->slabs);
2543 /* Queue a slab for allocation or scrubbing. */
2544 static void queue_slab(struct vdo_slab *slab)
2546 struct block_allocator *allocator = slab->allocator;
2550 VDO_ASSERT_LOG_ONLY(list_empty(&slab->allocq_entry),
2551 "a requeued slab must not already be on a ring");
2556 free_blocks = slab->free_blocks;
2558 "rebuilt slab %u must have a valid free block count (has %llu, expected maximum %llu)",
2559 slab->slab_number, (unsigned long long) free_blocks,
2566 if (slab->status != VDO_SLAB_REBUILT) {
2567 register_slab_for_scrubbing(slab, false);
2571 if (!vdo_is_state_resuming(&slab->state)) {
2573 * If the slab is resuming, we've already accounted for it here, so don't do it
2575 * FIXME: under what situation would the slab be resuming here?
2579 if (!is_slab_journal_blank(slab)) {
2586 reopen_slab_journal(slab);
2588 prioritize_slab(slab);
2592 * initiate_slab_action() - Initiate a slab action.
2598 struct vdo_slab *slab = container_of(state, struct vdo_slab, state);
2604 slab->status = VDO_SLAB_REBUILDING;
2606 drain_slab(slab);
2607 check_if_slab_drained(slab);
2612 load_slab_journal(slab);
2617 queue_slab(slab);
2626 * get_next_slab() - Get the next slab to scrub.
2627 * @scrubber: The slab scrubber.
2629 * Return: The next slab to scrub or NULL if there are none.
2633 struct vdo_slab *slab;
2635 slab = list_first_entry_or_null(&scrubber->high_priority_slabs,
2637 if (slab != NULL)
2638 return slab;
2725 * slab_scrubbed() - Notify the scrubber that a slab has been scrubbed.
2726 * @completion: The slab rebuild completion.
2734 struct vdo_slab *slab = scrubber->slab;
2736 slab->status = VDO_SLAB_REBUILT;
2737 queue_slab(slab);
2738 reopen_slab_journal(slab);
2745 * @scrubber: The slab scrubber.
2755 * handle_scrubber_error() - Handle errors while rebuilding a slab.
2756 * @completion: The slab rebuild completion.
2772 * @slab: The slab to apply the entries to.
2778 sequence_number_t block_number, struct vdo_slab *slab)
2785 slab_block_number max_sbn = slab->end - slab->start;
2794 "vdo_slab journal entry (%llu, %u) had invalid offset %u in slab (size %u blocks)",
2800 result = replay_reference_count_change(slab, &entry_point, entry);
2803 "vdo_slab journal entry (%llu, %u) (%s of offset %u) could not be applied in slab %u",
2807 entry.sbn, slab->slab_number);
2817 * apply_journal_entries() - Find the relevant vio of the slab journal and apply all valid entries.
2827 struct vdo_slab *slab = scrubber->slab;
2828 struct slab_journal *journal = &slab->journal;
2841 struct journal_point ref_counts_point = slab->slab_journal_point;
2853 if ((header.nonce != slab->allocator->nonce) ||
2860 vdo_log_error("vdo_slab journal block for slab %u was invalid",
2861 slab->slab_number);
2866 result = apply_block_entries(block, header.entry_count, sequence, slab);
2885 "Refcounts are not more accurate than the slab journal");
2893 slab->allocator->thread_id, completion->parent);
2894 vdo_start_operation_with_waiter(&slab->state,
2905 scrubber->slab->allocator->thread_id);
2909 * start_scrubbing() - Read the current slab's journal from disk now that it has been flushed.
2918 struct vdo_slab *slab = scrubber->slab;
2920 if (!slab->allocator->summary_entries[slab->slab_number].is_dirty) {
2925 vdo_submit_metadata_vio(&scrubber->vio, slab->journal_origin,
2931 * scrub_next_slab() - Scrub the next slab if there is one.
2937 struct vdo_slab *slab;
2950 slab = get_next_slab(scrubber);
2951 if ((slab == NULL) ||
2960 list_del_init(&slab->allocq_entry);
2961 scrubber->slab = slab;
2963 slab->allocator->thread_id, completion->parent);
2964 vdo_start_operation_with_waiter(&slab->state, VDO_ADMIN_STATE_SCRUBBING,
3001 struct vdo_slab *slab)
3004 allocator->last_slab = slab->slab_number;
3010 * @start: The number of the slab to start iterating from.
3011 * @end: The number of the last slab which may be returned.
3012 * @stride: The difference in slab number between successive slabs.
3040 * next_slab() - Get the next slab from a slab_iterator and advance the iterator
3043 * Return: The next slab or NULL if the iterator is exhausted.
3047 struct vdo_slab *slab = iterator->next;
3049 if ((slab == NULL) || (slab->slab_number < iterator->end + iterator->stride))
3052 iterator->next = iterator->slabs[slab->slab_number - iterator->stride];
3054 return slab;
3060 * This callback is invoked on all vios waiting to make slab journal entries after the VDO has gone
3087 struct vdo_slab *slab = next_slab(&iterator);
3089 vdo_waitq_notify_all_waiters(&slab->journal.entry_waiters,
3090 abort_waiter, &slab->journal);
3091 check_if_slab_drained(slab);
3100 * @slab: The slab which contains the block.
3106 int vdo_acquire_provisional_reference(struct vdo_slab *slab, physical_block_number_t pbn,
3115 if (!is_slab_open(slab))
3118 result = slab_block_number_from_pbn(slab, pbn, &block_number);
3122 if (slab->counters[block_number] == EMPTY_REFERENCE_COUNT) {
3123 make_provisional_reference(slab, block_number);
3129 adjust_free_block_count(slab, false);
3134 static int __must_check allocate_slab_block(struct vdo_slab *slab,
3139 if (!is_slab_open(slab))
3142 if (!search_reference_blocks(slab, &free_index))
3145 VDO_ASSERT_LOG_ONLY((slab->counters[free_index] == EMPTY_REFERENCE_COUNT),
3147 make_provisional_reference(slab, free_index);
3148 adjust_free_block_count(slab, false);
3154 slab->search_cursor.index = (free_index + 1);
3156 *block_number_ptr = slab->start + free_index;
3161 * open_slab() - Prepare a slab to be allocated from.
3162 * @slab: The slab.
3164 static void open_slab(struct vdo_slab *slab)
3166 reset_search_cursor(slab);
3167 if (is_slab_journal_blank(slab)) {
3168 WRITE_ONCE(slab->allocator->statistics.slabs_opened,
3169 slab->allocator->statistics.slabs_opened + 1);
3170 dirty_all_reference_blocks(slab);
3172 WRITE_ONCE(slab->allocator->statistics.slabs_reopened,
3173 slab->allocator->statistics.slabs_reopened + 1);
3176 slab->allocator->open_slab = slab;
3191 /* Try to allocate the next block in the currently open slab. */
3196 /* Put the exhausted open slab back into the priority table. */
3200 /* Remove the highest priority slab from the priority table and make it the open slab. */
3205 * Try allocating again. If we're out of space immediately after opening a slab, then every
3206 * slab must be fully allocated.
3212 * vdo_enqueue_clean_slab_waiter() - Wait for a clean slab.
3233 * vdo_modify_reference_count() - Modify the reference count of a block by first making a slab
3242 struct vdo_slab *slab = vdo_get_slab(completion->vdo->depot, updater->zpbn.pbn);
3244 if (!is_slab_open(slab)) {
3254 vdo_waitq_enqueue_waiter(&slab->journal.entry_waiters, &updater->waiter);
3255 if ((slab->status != VDO_SLAB_REBUILT) && requires_reaping(&slab->journal))
3256 register_slab_for_scrubbing(slab, true);
3258 add_entries(&slab->journal);
3317 /* Inform the slab actor that a action has finished on some slab; used by apply_to_slabs(). */
3351 * Since we are going to dequeue all of the slabs, the open slab will become invalid, so
3366 struct vdo_slab *slab = next_slab(&iterator);
3368 list_del_init(&slab->allocq_entry);
3370 vdo_start_operation_with_waiter(&slab->state, operation,
3413 /* erase_next_slab_journal() - Erase the next slab journal. */
3416 struct vdo_slab *slab;
3427 slab = next_slab(&allocator->slabs_to_erase);
3428 pbn = slab->journal_origin - depot->vdo->geometry.bio_offset;
3470 * vdo_notify_slab_journals_are_recovered() - Inform a block allocator that its slab journals have
3533 struct vdo_slab *slab;
3538 slab = depot->slabs[current_slab_status.slab_number];
3541 (!allocator->summary_entries[slab->slab_number].load_ref_counts &&
3543 queue_slab(slab);
3547 slab->status = VDO_SLAB_REQUIRES_SCRUBBING;
3548 journal = &slab->journal;
3552 register_slab_for_scrubbing(slab, high_priority);
3585 struct vdo_slab *slab = next_slab(&iterator);
3586 struct slab_journal *journal = &slab->journal;
3588 if (slab->reference_blocks != NULL) {
3590 vdo_log_info("slab %u: P%u, %llu free", slab->slab_number,
3591 slab->priority,
3592 (unsigned long long) slab->free_blocks);
3594 vdo_log_info("slab %u: status %s", slab->slab_number,
3595 status_to_string(slab->status));
3598 vdo_log_info(" slab journal: entry_waiters=%zu waiting_to_commit=%s updating_slab_summary=%s head=%llu unreapable=%llu tail=%llu next_commit=%llu summarized=%llu last_summarized=%llu recovery_lock=%llu dirty=%s",
3615 if (slab->counters != NULL) {
3617 vdo_log_info(" slab: free=%u/%u blocks=%u dirty=%zu active=%zu journal@(%llu,%u)",
3618 slab->free_blocks, slab->block_count,
3619 slab->reference_block_count,
3620 vdo_waitq_num_waiters(&slab->dirty_blocks),
3621 slab->active_count,
3622 (unsigned long long) slab->slab_journal_point.sequence_number,
3623 slab->slab_journal_point.entry_count);
3645 static void free_slab(struct vdo_slab *slab)
3647 if (slab == NULL)
3650 list_del(&slab->allocq_entry);
3651 vdo_free(vdo_forget(slab->journal.block));
3652 vdo_free(vdo_forget(slab->journal.locks));
3653 vdo_free(vdo_forget(slab->counters));
3654 vdo_free(vdo_forget(slab->reference_blocks));
3655 vdo_free(slab);
3658 static int initialize_slab_journal(struct vdo_slab *slab)
3660 struct slab_journal *journal = &slab->journal;
3661 const struct slab_config *slab_config = &slab->allocator->depot->slab_config;
3674 journal->slab = slab;
3681 journal->events = &slab->allocator->slab_journal_statistics;
3682 journal->recovery_journal = slab->allocator->depot->vdo->recovery_journal;
3699 journal->tail_header.nonce = slab->allocator->nonce;
3706 * make_slab() - Construct a new, empty slab.
3708 * in the slab.
3709 * @allocator: The block allocator to which the slab belongs.
3710 * @slab_number: The slab number of the slab.
3711 * @is_new: true if this slab is being allocated as part of a resize.
3712 * @slab_ptr: A pointer to receive the new slab.
3722 struct vdo_slab *slab;
3725 result = vdo_allocate(1, struct vdo_slab, __func__, &slab);
3729 *slab = (struct vdo_slab) {
3742 INIT_LIST_HEAD(&slab->allocq_entry);
3744 result = initialize_slab_journal(slab);
3746 free_slab(slab);
3751 vdo_set_admin_state_code(&slab->state, VDO_ADMIN_STATE_NEW);
3752 result = allocate_slab_counters(slab);
3754 free_slab(slab);
3758 vdo_set_admin_state_code(&slab->state, VDO_ADMIN_STATE_NORMAL_OPERATION);
3761 *slab_ptr = slab;
3766 * allocate_slabs() - Allocate a new slab pointer array.
3770 * Any existing slab pointers will be copied into the new array, and slabs will be allocated as
3783 "slab pointer array", &depot->new_slabs);
3841 * release_recovery_journal_lock() - Request the slab journal to release the recovery journal lock
3843 * @journal: The slab journal.
3854 "slab journal recovery lock is not older than the recovery journal head");
3859 vdo_is_read_only(journal->slab->allocator->depot->vdo))
3924 * initialize_slab_scrubber() - Initialize an allocator's slab scrubber.
4054 * This sets the free block threshold for preferring to open an unopened slab to the binary
4055 * floor of 3/4ths the total number of data blocks in a slab, which will generally evaluate
4056 * to about half the slab size.
4113 "%u physical zones exceeds slab count %u",
4132 struct vdo_slab *slab = depot->new_slabs[i];
4134 register_slab_with_allocator(slab->allocator, slab);
4146 * vdo_decode_slab_depot() - Make a slab depot and configure it with the state read from the super
4148 * @state: The slab depot state from the super block.
4150 * @summary_partition: The partition which holds the slab summary.
4165 * requires that the slab size be a power of two.
4171 "slab size must be a power of two");
4215 * vdo_free_slab_depot() - Destroy a slab depot.
4253 * vdo_record_slab_depot() - Record the state of a slab depot for encoding into the super block.
4262 * tool and is now being saved. We did not load and combine the slab summary, so we still
4304 * get_slab_number() - Get the number of the slab that contains a specified block.
4305 * @depot: The slab depot.
4307 * @slab_number_ptr: A pointer to hold the slab number.
4329 * vdo_get_slab() - Get the slab object for the slab that contains a specified block.
4330 * @depot: The slab depot.
4335 * Return: The slab containing the block, or NULL if the block number is the zero block or
4358 * @depot: The slab depot.
4367 struct vdo_slab *slab = vdo_get_slab(depot, pbn);
4371 if ((slab == NULL) || (slab->status != VDO_SLAB_REBUILT))
4374 result = get_reference_counter(slab, pbn, &counter_ptr);
4406 * @depot: The slab depot.
4430 * @depot: The slab depot.
4442 * finish_combining_zones() - Clean up after saving out the combined slab summary.
4471 * update every zone to the correct values for every slab.
4507 * finish_loading_summary() - Finish loading slab summary data.
4510 * Combines the slab summary data from all the previously written zones and copies the combined
4580 * vdo_load_slab_depot() - Asynchronously load any slab depot state that isn't included in the
4619 * vdo_prepare_slab_depot_to_allocate() - Prepare the slab depot to come online and start
4639 * vdo_update_slab_depot_size() - Update the slab depot to reflect its new size in memory.
4650 * vdo_prepare_to_grow_slab_depot() - Allocate new memory needed for a resize of a slab depot to
4669 "New slab depot partition doesn't change origin");
4727 struct vdo_slab *slab = depot->new_slabs[i];
4729 if (slab->allocator == allocator)
4730 register_slab_with_allocator(allocator, slab);
4751 * stop_scrubbing() - Tell the scrubber to stop scrubbing after it finishes the slab it is
4836 * vdo_drain_slab_depot() - Drain all slab depot I/O.
4928 * vdo_resume_slab_depot() - Resume a suspended slab depot.
4988 * @depot: The slab depot.
5014 * @depot: The slab depot.
5035 * get_slab_journal_statistics() - Get the aggregated slab journal statistics for the depot.
5036 * @depot: The slab depot.
5038 * Return: The aggregated statistics for all slab journals in the depot.
5064 * slab depot.
5065 * @depot: The slab depot.
5090 * vdo_dump_slab_depot() - Dump the slab depot, in a thread-unsafe fashion.
5091 * @depot: The slab depot.
5095 vdo_log_info("vdo slab depot");