1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright 2023 Red Hat
4 */
5
6#include "encodings.h"
7
8#include <linux/log2.h>
9
10#include "logger.h"
11#include "memory-alloc.h"
12#include "permassert.h"
13
14#include "constants.h"
15#include "status-codes.h"
16#include "types.h"
17
18/** The maximum logical space is 4 petabytes, which is 1 terablock. */
19static const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS = 1024ULL * 1024 * 1024 * 1024;
20
21/** The maximum physical space is 256 terabytes, which is 64 gigablocks. */
22static const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS = 1024ULL * 1024 * 1024 * 64;
23
24struct geometry_block {
25	char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE];
26	struct packed_header header;
27	u32 checksum;
28} __packed;
29
30static const struct header GEOMETRY_BLOCK_HEADER_5_0 = {
31	.id = VDO_GEOMETRY_BLOCK,
32	.version = {
33		.major_version = 5,
34		.minor_version = 0,
35	},
36	/*
37	 * Note: this size isn't just the payload size following the header, like it is everywhere
38	 * else in VDO.
39	 */
40	.size = sizeof(struct geometry_block) + sizeof(struct volume_geometry),
41};
42
43static const struct header GEOMETRY_BLOCK_HEADER_4_0 = {
44	.id = VDO_GEOMETRY_BLOCK,
45	.version = {
46		.major_version = 4,
47		.minor_version = 0,
48	},
49	/*
50	 * Note: this size isn't just the payload size following the header, like it is everywhere
51	 * else in VDO.
52	 */
53	.size = sizeof(struct geometry_block) + sizeof(struct volume_geometry_4_0),
54};
55
56const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1] = "dmvdo001";
57
58#define PAGE_HEADER_4_1_SIZE (8 + 8 + 8 + 1 + 1 + 1 + 1)
59
60static const struct version_number BLOCK_MAP_4_1 = {
61	.major_version = 4,
62	.minor_version = 1,
63};
64
65const struct header VDO_BLOCK_MAP_HEADER_2_0 = {
66	.id = VDO_BLOCK_MAP,
67	.version = {
68		.major_version = 2,
69		.minor_version = 0,
70	},
71	.size = sizeof(struct block_map_state_2_0),
72};
73
74const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0 = {
75	.id = VDO_RECOVERY_JOURNAL,
76	.version = {
77			.major_version = 7,
78			.minor_version = 0,
79		},
80	.size = sizeof(struct recovery_journal_state_7_0),
81};
82
83const struct header VDO_SLAB_DEPOT_HEADER_2_0 = {
84	.id = VDO_SLAB_DEPOT,
85	.version = {
86		.major_version = 2,
87		.minor_version = 0,
88	},
89	.size = sizeof(struct slab_depot_state_2_0),
90};
91
92static const struct header VDO_LAYOUT_HEADER_3_0 = {
93	.id = VDO_LAYOUT,
94	.version = {
95		.major_version = 3,
96		.minor_version = 0,
97	},
98	.size = sizeof(struct layout_3_0) + (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT),
99};
100
101static const enum partition_id REQUIRED_PARTITIONS[] = {
102	VDO_BLOCK_MAP_PARTITION,
103	VDO_SLAB_DEPOT_PARTITION,
104	VDO_RECOVERY_JOURNAL_PARTITION,
105	VDO_SLAB_SUMMARY_PARTITION,
106};
107
108/*
109 * The current version for the data encoded in the super block. This must be changed any time there
110 * is a change to encoding of the component data of any VDO component.
111 */
112static const struct version_number VDO_COMPONENT_DATA_41_0 = {
113	.major_version = 41,
114	.minor_version = 0,
115};
116
117const struct version_number VDO_VOLUME_VERSION_67_0 = {
118	.major_version = 67,
119	.minor_version = 0,
120};
121
122static const struct header SUPER_BLOCK_HEADER_12_0 = {
123	.id = VDO_SUPER_BLOCK,
124	.version = {
125			.major_version = 12,
126			.minor_version = 0,
127		},
128
129	/* This is the minimum size, if the super block contains no components. */
130	.size = VDO_SUPER_BLOCK_FIXED_SIZE - VDO_ENCODED_HEADER_SIZE,
131};
132
133/**
134 * validate_version() - Check whether a version matches an expected version.
135 * @expected_version: The expected version.
136 * @actual_version: The version being validated.
137 * @component_name: The name of the component or the calling function (for error logging).
138 *
139 * Logs an error describing a mismatch.
140 *
141 * Return: VDO_SUCCESS             if the versions are the same,
142 *         VDO_UNSUPPORTED_VERSION if the versions don't match.
143 */
144static int __must_check validate_version(struct version_number expected_version,
145					 struct version_number actual_version,
146					 const char *component_name)
147{
148	if (!vdo_are_same_version(expected_version, actual_version)) {
149		return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
150					      "%s version mismatch, expected %d.%d, got %d.%d",
151					      component_name,
152					      expected_version.major_version,
153					      expected_version.minor_version,
154					      actual_version.major_version,
155					      actual_version.minor_version);
156	}
157
158	return VDO_SUCCESS;
159}
160
161/**
162 * vdo_validate_header() - Check whether a header matches expectations.
163 * @expected_header: The expected header.
164 * @actual_header: The header being validated.
165 * @exact_size: If true, the size fields of the two headers must be the same, otherwise it is
166 *              required that actual_header.size >= expected_header.size.
167 * @name: The name of the component or the calling function (for error logging).
168 *
169 * Logs an error describing the first mismatch found.
170 *
171 * Return: VDO_SUCCESS             if the header meets expectations,
172 *         VDO_INCORRECT_COMPONENT if the component ids don't match,
173 *         VDO_UNSUPPORTED_VERSION if the versions or sizes don't match.
174 */
175int vdo_validate_header(const struct header *expected_header,
176			const struct header *actual_header, bool exact_size,
177			const char *name)
178{
179	int result;
180
181	if (expected_header->id != actual_header->id) {
182		return vdo_log_error_strerror(VDO_INCORRECT_COMPONENT,
183					      "%s ID mismatch, expected %d, got %d",
184					      name, expected_header->id,
185					      actual_header->id);
186	}
187
188	result = validate_version(expected_header->version, actual_header->version,
189				  name);
190	if (result != VDO_SUCCESS)
191		return result;
192
193	if ((expected_header->size > actual_header->size) ||
194	    (exact_size && (expected_header->size < actual_header->size))) {
195		return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
196					      "%s size mismatch, expected %zu, got %zu",
197					      name, expected_header->size,
198					      actual_header->size);
199	}
200
201	return VDO_SUCCESS;
202}
203
204static void encode_version_number(u8 *buffer, size_t *offset,
205				  struct version_number version)
206{
207	struct packed_version_number packed = vdo_pack_version_number(version);
208
209	memcpy(buffer + *offset, &packed, sizeof(packed));
210	*offset += sizeof(packed);
211}
212
213void vdo_encode_header(u8 *buffer, size_t *offset, const struct header *header)
214{
215	struct packed_header packed = vdo_pack_header(header);
216
217	memcpy(buffer + *offset, &packed, sizeof(packed));
218	*offset += sizeof(packed);
219}
220
221static void decode_version_number(u8 *buffer, size_t *offset,
222				  struct version_number *version)
223{
224	struct packed_version_number packed;
225
226	memcpy(&packed, buffer + *offset, sizeof(packed));
227	*offset += sizeof(packed);
228	*version = vdo_unpack_version_number(packed);
229}
230
231void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header)
232{
233	struct packed_header packed;
234
235	memcpy(&packed, buffer + *offset, sizeof(packed));
236	*offset += sizeof(packed);
237
238	*header = vdo_unpack_header(&packed);
239}
240
241/**
242 * decode_volume_geometry() - Decode the on-disk representation of a volume geometry from a buffer.
243 * @buffer: A buffer to decode from.
244 * @offset: The offset in the buffer at which to decode.
245 * @geometry: The structure to receive the decoded fields.
246 * @version: The geometry block version to decode.
247 */
248static void decode_volume_geometry(u8 *buffer, size_t *offset,
249				   struct volume_geometry *geometry, u32 version)
250{
251	u32 unused, mem;
252	enum volume_region_id id;
253	nonce_t nonce;
254	block_count_t bio_offset = 0;
255	bool sparse;
256
257	/* This is for backwards compatibility. */
258	decode_u32_le(buffer, offset, &unused);
259	geometry->unused = unused;
260
261	decode_u64_le(buffer, offset, &nonce);
262	geometry->nonce = nonce;
263
264	memcpy((unsigned char *) &geometry->uuid, buffer + *offset, sizeof(uuid_t));
265	*offset += sizeof(uuid_t);
266
267	if (version > 4)
268		decode_u64_le(buffer, offset, &bio_offset);
269	geometry->bio_offset = bio_offset;
270
271	for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) {
272		physical_block_number_t start_block;
273		enum volume_region_id saved_id;
274
275		decode_u32_le(buffer, offset, &saved_id);
276		decode_u64_le(buffer, offset, &start_block);
277
278		geometry->regions[id] = (struct volume_region) {
279			.id = saved_id,
280			.start_block = start_block,
281		};
282	}
283
284	decode_u32_le(buffer, offset, &mem);
285	*offset += sizeof(u32);
286	sparse = buffer[(*offset)++];
287
288	geometry->index_config = (struct index_config) {
289		.mem = mem,
290		.sparse = sparse,
291	};
292}
293
294/**
295 * vdo_parse_geometry_block() - Decode and validate an encoded geometry block.
296 * @block: The encoded geometry block.
297 * @geometry: The structure to receive the decoded fields.
298 */
299int __must_check vdo_parse_geometry_block(u8 *block, struct volume_geometry *geometry)
300{
301	u32 checksum, saved_checksum;
302	struct header header;
303	size_t offset = 0;
304	int result;
305
306	if (memcmp(block, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE) != 0)
307		return VDO_BAD_MAGIC;
308	offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE;
309
310	vdo_decode_header(block, &offset, &header);
311	if (header.version.major_version <= 4) {
312		result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_4_0, &header,
313					     true, __func__);
314	} else {
315		result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_5_0, &header,
316					     true, __func__);
317	}
318	if (result != VDO_SUCCESS)
319		return result;
320
321	decode_volume_geometry(block, &offset, geometry, header.version.major_version);
322
323	result = VDO_ASSERT(header.size == offset + sizeof(u32),
324			    "should have decoded up to the geometry checksum");
325	if (result != VDO_SUCCESS)
326		return result;
327
328	/* Decode and verify the checksum. */
329	checksum = vdo_crc32(block, offset);
330	decode_u32_le(block, &offset, &saved_checksum);
331
332	return ((checksum == saved_checksum) ? VDO_SUCCESS : VDO_CHECKSUM_MISMATCH);
333}
334
335struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
336						 physical_block_number_t pbn,
337						 bool initialized)
338{
339	struct block_map_page *page = buffer;
340
341	memset(buffer, 0, VDO_BLOCK_SIZE);
342	page->version = vdo_pack_version_number(BLOCK_MAP_4_1);
343	page->header.nonce = __cpu_to_le64(nonce);
344	page->header.pbn = __cpu_to_le64(pbn);
345	page->header.initialized = initialized;
346	return page;
347}
348
349enum block_map_page_validity vdo_validate_block_map_page(struct block_map_page *page,
350							 nonce_t nonce,
351							 physical_block_number_t pbn)
352{
353	BUILD_BUG_ON(sizeof(struct block_map_page_header) != PAGE_HEADER_4_1_SIZE);
354
355	if (!vdo_are_same_version(BLOCK_MAP_4_1,
356				  vdo_unpack_version_number(page->version)) ||
357	    !page->header.initialized || (nonce != __le64_to_cpu(page->header.nonce)))
358		return VDO_BLOCK_MAP_PAGE_INVALID;
359
360	if (pbn != vdo_get_block_map_page_pbn(page))
361		return VDO_BLOCK_MAP_PAGE_BAD;
362
363	return VDO_BLOCK_MAP_PAGE_VALID;
364}
365
366static int decode_block_map_state_2_0(u8 *buffer, size_t *offset,
367				      struct block_map_state_2_0 *state)
368{
369	size_t initial_offset;
370	block_count_t flat_page_count, root_count;
371	physical_block_number_t flat_page_origin, root_origin;
372	struct header header;
373	int result;
374
375	vdo_decode_header(buffer, offset, &header);
376	result = vdo_validate_header(&VDO_BLOCK_MAP_HEADER_2_0, &header, true, __func__);
377	if (result != VDO_SUCCESS)
378		return result;
379
380	initial_offset = *offset;
381
382	decode_u64_le(buffer, offset, &flat_page_origin);
383	result = VDO_ASSERT(flat_page_origin == VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
384			    "Flat page origin must be %u (recorded as %llu)",
385			    VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
386			    (unsigned long long) state->flat_page_origin);
387	if (result != VDO_SUCCESS)
388		return result;
389
390	decode_u64_le(buffer, offset, &flat_page_count);
391	result = VDO_ASSERT(flat_page_count == 0,
392			    "Flat page count must be 0 (recorded as %llu)",
393			    (unsigned long long) state->flat_page_count);
394	if (result != VDO_SUCCESS)
395		return result;
396
397	decode_u64_le(buffer, offset, &root_origin);
398	decode_u64_le(buffer, offset, &root_count);
399
400	result = VDO_ASSERT(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
401			    "decoded block map component size must match header size");
402	if (result != VDO_SUCCESS)
403		return result;
404
405	*state = (struct block_map_state_2_0) {
406		.flat_page_origin = flat_page_origin,
407		.flat_page_count = flat_page_count,
408		.root_origin = root_origin,
409		.root_count = root_count,
410	};
411
412	return VDO_SUCCESS;
413}
414
415static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
416				       struct block_map_state_2_0 state)
417{
418	size_t initial_offset;
419
420	vdo_encode_header(buffer, offset, &VDO_BLOCK_MAP_HEADER_2_0);
421
422	initial_offset = *offset;
423	encode_u64_le(buffer, offset, state.flat_page_origin);
424	encode_u64_le(buffer, offset, state.flat_page_count);
425	encode_u64_le(buffer, offset, state.root_origin);
426	encode_u64_le(buffer, offset, state.root_count);
427
428	VDO_ASSERT_LOG_ONLY(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
429			    "encoded block map component size must match header size");
430}
431
432/**
433 * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
434 *                                  level in order to grow the forest to a new number of entries.
435 * @entries: The new number of entries the block map must address.
436 *
437 * Return: The total number of non-leaf pages required.
438 */
439block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
440					   struct boundary *old_sizes,
441					   block_count_t entries,
442					   struct boundary *new_sizes)
443{
444	page_count_t leaf_pages = max(vdo_compute_block_map_page_count(entries), 1U);
445	page_count_t level_size = DIV_ROUND_UP(leaf_pages, root_count);
446	block_count_t total_pages = 0;
447	height_t height;
448
449	for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
450		block_count_t new_pages;
451
452		level_size = DIV_ROUND_UP(level_size, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
453		new_sizes->levels[height] = level_size;
454		new_pages = level_size;
455		if (old_sizes != NULL)
456			new_pages -= old_sizes->levels[height];
457		total_pages += (new_pages * root_count);
458	}
459
460	return total_pages;
461}
462
463/**
464 * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
465 *
466 * Return: VDO_SUCCESS or an error code.
467 */
468static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
469					      struct recovery_journal_state_7_0 state)
470{
471	size_t initial_offset;
472
473	vdo_encode_header(buffer, offset, &VDO_RECOVERY_JOURNAL_HEADER_7_0);
474
475	initial_offset = *offset;
476	encode_u64_le(buffer, offset, state.journal_start);
477	encode_u64_le(buffer, offset, state.logical_blocks_used);
478	encode_u64_le(buffer, offset, state.block_map_data_blocks);
479
480	VDO_ASSERT_LOG_ONLY(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
481			    "encoded recovery journal component size must match header size");
482}
483
484/**
485 * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
486 * @buffer: The buffer containing the saved state.
487 * @state: A pointer to a recovery journal state to hold the result of a successful decode.
488 *
489 * Return: VDO_SUCCESS or an error code.
490 */
491static int __must_check decode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
492							  struct recovery_journal_state_7_0 *state)
493{
494	struct header header;
495	int result;
496	size_t initial_offset;
497	sequence_number_t journal_start;
498	block_count_t logical_blocks_used, block_map_data_blocks;
499
500	vdo_decode_header(buffer, offset, &header);
501	result = vdo_validate_header(&VDO_RECOVERY_JOURNAL_HEADER_7_0, &header, true,
502				     __func__);
503	if (result != VDO_SUCCESS)
504		return result;
505
506	initial_offset = *offset;
507	decode_u64_le(buffer, offset, &journal_start);
508	decode_u64_le(buffer, offset, &logical_blocks_used);
509	decode_u64_le(buffer, offset, &block_map_data_blocks);
510
511	result = VDO_ASSERT(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
512			    "decoded recovery journal component size must match header size");
513	if (result != VDO_SUCCESS)
514		return result;
515
516	*state = (struct recovery_journal_state_7_0) {
517		.journal_start = journal_start,
518		.logical_blocks_used = logical_blocks_used,
519		.block_map_data_blocks = block_map_data_blocks,
520	};
521
522	return VDO_SUCCESS;
523}
524
525/**
526 * vdo_get_journal_operation_name() - Get the name of a journal operation.
527 * @operation: The operation to name.
528 *
529 * Return: The name of the operation.
530 */
531const char *vdo_get_journal_operation_name(enum journal_operation operation)
532{
533	switch (operation) {
534	case VDO_JOURNAL_DATA_REMAPPING:
535		return "data remapping";
536
537	case VDO_JOURNAL_BLOCK_MAP_REMAPPING:
538		return "block map remapping";
539
540	default:
541		return "unknown journal operation";
542	}
543}
544
545/**
546 * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
547 */
548static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
549					struct slab_depot_state_2_0 state)
550{
551	size_t initial_offset;
552
553	vdo_encode_header(buffer, offset, &VDO_SLAB_DEPOT_HEADER_2_0);
554
555	initial_offset = *offset;
556	encode_u64_le(buffer, offset, state.slab_config.slab_blocks);
557	encode_u64_le(buffer, offset, state.slab_config.data_blocks);
558	encode_u64_le(buffer, offset, state.slab_config.reference_count_blocks);
559	encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocks);
560	encode_u64_le(buffer, offset, state.slab_config.slab_journal_flushing_threshold);
561	encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocking_threshold);
562	encode_u64_le(buffer, offset, state.slab_config.slab_journal_scrubbing_threshold);
563	encode_u64_le(buffer, offset, state.first_block);
564	encode_u64_le(buffer, offset, state.last_block);
565	buffer[(*offset)++] = state.zone_count;
566
567	VDO_ASSERT_LOG_ONLY(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
568			    "encoded block map component size must match header size");
569}
570
571/**
572 * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
573 *
574 * Return: VDO_SUCCESS or an error code.
575 */
576static int decode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
577				       struct slab_depot_state_2_0 *state)
578{
579	struct header header;
580	int result;
581	size_t initial_offset;
582	struct slab_config slab_config;
583	block_count_t count;
584	physical_block_number_t first_block, last_block;
585	zone_count_t zone_count;
586
587	vdo_decode_header(buffer, offset, &header);
588	result = vdo_validate_header(&VDO_SLAB_DEPOT_HEADER_2_0, &header, true,
589				     __func__);
590	if (result != VDO_SUCCESS)
591		return result;
592
593	initial_offset = *offset;
594	decode_u64_le(buffer, offset, &count);
595	slab_config.slab_blocks = count;
596
597	decode_u64_le(buffer, offset, &count);
598	slab_config.data_blocks = count;
599
600	decode_u64_le(buffer, offset, &count);
601	slab_config.reference_count_blocks = count;
602
603	decode_u64_le(buffer, offset, &count);
604	slab_config.slab_journal_blocks = count;
605
606	decode_u64_le(buffer, offset, &count);
607	slab_config.slab_journal_flushing_threshold = count;
608
609	decode_u64_le(buffer, offset, &count);
610	slab_config.slab_journal_blocking_threshold = count;
611
612	decode_u64_le(buffer, offset, &count);
613	slab_config.slab_journal_scrubbing_threshold = count;
614
615	decode_u64_le(buffer, offset, &first_block);
616	decode_u64_le(buffer, offset, &last_block);
617	zone_count = buffer[(*offset)++];
618
619	result = VDO_ASSERT(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
620			    "decoded slab depot component size must match header size");
621	if (result != VDO_SUCCESS)
622		return result;
623
624	*state = (struct slab_depot_state_2_0) {
625		.slab_config = slab_config,
626		.first_block = first_block,
627		.last_block = last_block,
628		.zone_count = zone_count,
629	};
630
631	return VDO_SUCCESS;
632}
633
634/**
635 * vdo_configure_slab_depot() - Configure the slab depot.
636 * @partition: The slab depot partition
637 * @slab_config: The configuration of a single slab.
638 * @zone_count: The number of zones the depot will use.
639 * @state: The state structure to be configured.
640 *
641 * Configures the slab_depot for the specified storage capacity, finding the number of data blocks
642 * that will fit and still leave room for the depot metadata, then return the saved state for that
643 * configuration.
644 *
645 * Return: VDO_SUCCESS or an error code.
646 */
647int vdo_configure_slab_depot(const struct partition *partition,
648			     struct slab_config slab_config, zone_count_t zone_count,
649			     struct slab_depot_state_2_0 *state)
650{
651	block_count_t total_slab_blocks, total_data_blocks;
652	size_t slab_count;
653	physical_block_number_t last_block;
654	block_count_t slab_size = slab_config.slab_blocks;
655
656	vdo_log_debug("slabDepot %s(block_count=%llu, first_block=%llu, slab_size=%llu, zone_count=%u)",
657		      __func__, (unsigned long long) partition->count,
658		      (unsigned long long) partition->offset,
659		      (unsigned long long) slab_size, zone_count);
660
661	/* We do not allow runt slabs, so we waste up to a slab's worth. */
662	slab_count = (partition->count / slab_size);
663	if (slab_count == 0)
664		return VDO_NO_SPACE;
665
666	if (slab_count > MAX_VDO_SLABS)
667		return VDO_TOO_MANY_SLABS;
668
669	total_slab_blocks = slab_count * slab_config.slab_blocks;
670	total_data_blocks = slab_count * slab_config.data_blocks;
671	last_block = partition->offset + total_slab_blocks;
672
673	*state = (struct slab_depot_state_2_0) {
674		.slab_config = slab_config,
675		.first_block = partition->offset,
676		.last_block = last_block,
677		.zone_count = zone_count,
678	};
679
680	vdo_log_debug("slab_depot last_block=%llu, total_data_blocks=%llu, slab_count=%zu, left_over=%llu",
681		      (unsigned long long) last_block,
682		      (unsigned long long) total_data_blocks, slab_count,
683		      (unsigned long long) (partition->count - (last_block - partition->offset)));
684
685	return VDO_SUCCESS;
686}
687
688/**
689 * vdo_configure_slab() - Measure and initialize the configuration to use for each slab.
690 * @slab_size: The number of blocks per slab.
691 * @slab_journal_blocks: The number of blocks for the slab journal.
692 * @slab_config: The slab configuration to initialize.
693 *
694 * Return: VDO_SUCCESS or an error code.
695 */
696int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_blocks,
697		       struct slab_config *slab_config)
698{
699	block_count_t ref_blocks, meta_blocks, data_blocks;
700	block_count_t flushing_threshold, remaining, blocking_threshold;
701	block_count_t minimal_extra_space, scrubbing_threshold;
702
703	if (slab_journal_blocks >= slab_size)
704		return VDO_BAD_CONFIGURATION;
705
706	/*
707	 * This calculation should technically be a recurrence, but the total number of metadata
708	 * blocks is currently less than a single block of ref_counts, so we'd gain at most one
709	 * data block in each slab with more iteration.
710	 */
711	ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks);
712	meta_blocks = (ref_blocks + slab_journal_blocks);
713
714	/* Make sure test code hasn't configured slabs to be too small. */
715	if (meta_blocks >= slab_size)
716		return VDO_BAD_CONFIGURATION;
717
718	/*
719	 * If the slab size is very small, assume this must be a unit test and override the number
720	 * of data blocks to be a power of two (wasting blocks in the slab). Many tests need their
721	 * data_blocks fields to be the exact capacity of the configured volume, and that used to
722	 * fall out since they use a power of two for the number of data blocks, the slab size was
723	 * a power of two, and every block in a slab was a data block.
724	 *
725	 * TODO: Try to figure out some way of structuring testParameters and unit tests so this
726	 * hack isn't needed without having to edit several unit tests every time the metadata size
727	 * changes by one block.
728	 */
729	data_blocks = slab_size - meta_blocks;
730	if ((slab_size < 1024) && !is_power_of_2(data_blocks))
731		data_blocks = ((block_count_t) 1 << ilog2(data_blocks));
732
733	/*
734	 * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in
735	 * production, or 3/4ths, so we use this ratio for all sizes.
736	 */
737	flushing_threshold = ((slab_journal_blocks * 3) + 3) / 4;
738	/*
739	 * The blocking threshold should be far enough from the flushing threshold to not produce
740	 * delays, but far enough from the end of the journal to allow multiple successive recovery
741	 * failures.
742	 */
743	remaining = slab_journal_blocks - flushing_threshold;
744	blocking_threshold = flushing_threshold + ((remaining * 5) / 7);
745	/* The scrubbing threshold should be at least 2048 entries before the end of the journal. */
746	minimal_extra_space = 1 + (MAXIMUM_VDO_USER_VIOS / VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK);
747	scrubbing_threshold = blocking_threshold;
748	if (slab_journal_blocks > minimal_extra_space)
749		scrubbing_threshold = slab_journal_blocks - minimal_extra_space;
750	if (blocking_threshold > scrubbing_threshold)
751		blocking_threshold = scrubbing_threshold;
752
753	*slab_config = (struct slab_config) {
754		.slab_blocks = slab_size,
755		.data_blocks = data_blocks,
756		.reference_count_blocks = ref_blocks,
757		.slab_journal_blocks = slab_journal_blocks,
758		.slab_journal_flushing_threshold = flushing_threshold,
759		.slab_journal_blocking_threshold = blocking_threshold,
760		.slab_journal_scrubbing_threshold = scrubbing_threshold};
761	return VDO_SUCCESS;
762}
763
764/**
765 * vdo_decode_slab_journal_entry() - Decode a slab journal entry.
766 * @block: The journal block holding the entry.
767 * @entry_count: The number of the entry.
768 *
769 * Return: The decoded entry.
770 */
771struct slab_journal_entry vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
772							journal_entry_count_t entry_count)
773{
774	struct slab_journal_entry entry =
775		vdo_unpack_slab_journal_entry(&block->payload.entries[entry_count]);
776
777	if (block->header.has_block_map_increments &&
778	    ((block->payload.full_entries.entry_types[entry_count / 8] &
779	      ((u8) 1 << (entry_count % 8))) != 0))
780		entry.operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING;
781
782	return entry;
783}
784
785/**
786 * allocate_partition() - Allocate a partition and add it to a layout.
787 * @layout: The layout containing the partition.
788 * @id: The id of the partition.
789 * @offset: The offset into the layout at which the partition begins.
790 * @size: The size of the partition in blocks.
791 *
792 * Return: VDO_SUCCESS or an error.
793 */
794static int allocate_partition(struct layout *layout, u8 id,
795			      physical_block_number_t offset, block_count_t size)
796{
797	struct partition *partition;
798	int result;
799
800	result = vdo_allocate(1, struct partition, __func__, &partition);
801	if (result != VDO_SUCCESS)
802		return result;
803
804	partition->id = id;
805	partition->offset = offset;
806	partition->count = size;
807	partition->next = layout->head;
808	layout->head = partition;
809
810	return VDO_SUCCESS;
811}
812
813/**
814 * make_partition() - Create a new partition from the beginning or end of the unused space in a
815 *                    layout.
816 * @layout: The layout.
817 * @id: The id of the partition to make.
818 * @size: The number of blocks to carve out; if 0, all remaining space will be used.
819 * @beginning: True if the partition should start at the beginning of the unused space.
820 *
821 * Return: A success or error code, particularly VDO_NO_SPACE if there are fewer than size blocks
822 *         remaining.
823 */
824static int __must_check make_partition(struct layout *layout, enum partition_id id,
825				       block_count_t size, bool beginning)
826{
827	int result;
828	physical_block_number_t offset;
829	block_count_t free_blocks = layout->last_free - layout->first_free;
830
831	if (size == 0) {
832		if (free_blocks == 0)
833			return VDO_NO_SPACE;
834		size = free_blocks;
835	} else if (size > free_blocks) {
836		return VDO_NO_SPACE;
837	}
838
839	result = vdo_get_partition(layout, id, NULL);
840	if (result != VDO_UNKNOWN_PARTITION)
841		return VDO_PARTITION_EXISTS;
842
843	offset = beginning ? layout->first_free : (layout->last_free - size);
844
845	result = allocate_partition(layout, id, offset, size);
846	if (result != VDO_SUCCESS)
847		return result;
848
849	layout->num_partitions++;
850	if (beginning)
851		layout->first_free += size;
852	else
853		layout->last_free = layout->last_free - size;
854
855	return VDO_SUCCESS;
856}
857
858/**
859 * vdo_initialize_layout() - Lay out the partitions of a vdo.
860 * @size: The entire size of the vdo.
861 * @origin: The start of the layout on the underlying storage in blocks.
862 * @block_map_blocks: The size of the block map partition.
863 * @journal_blocks: The size of the journal partition.
864 * @summary_blocks: The size of the slab summary partition.
865 * @layout: The layout to initialize.
866 *
867 * Return: VDO_SUCCESS or an error.
868 */
869int vdo_initialize_layout(block_count_t size, physical_block_number_t offset,
870			  block_count_t block_map_blocks, block_count_t journal_blocks,
871			  block_count_t summary_blocks, struct layout *layout)
872{
873	int result;
874	block_count_t necessary_size =
875		(offset + block_map_blocks + journal_blocks + summary_blocks);
876
877	if (necessary_size > size)
878		return vdo_log_error_strerror(VDO_NO_SPACE,
879					      "Not enough space to make a VDO");
880
881	*layout = (struct layout) {
882		.start = offset,
883		.size = size,
884		.first_free = offset,
885		.last_free = size,
886		.num_partitions = 0,
887		.head = NULL,
888	};
889
890	result = make_partition(layout, VDO_BLOCK_MAP_PARTITION, block_map_blocks, true);
891	if (result != VDO_SUCCESS) {
892		vdo_uninitialize_layout(layout);
893		return result;
894	}
895
896	result = make_partition(layout, VDO_SLAB_SUMMARY_PARTITION, summary_blocks,
897				false);
898	if (result != VDO_SUCCESS) {
899		vdo_uninitialize_layout(layout);
900		return result;
901	}
902
903	result = make_partition(layout, VDO_RECOVERY_JOURNAL_PARTITION, journal_blocks,
904				false);
905	if (result != VDO_SUCCESS) {
906		vdo_uninitialize_layout(layout);
907		return result;
908	}
909
910	result = make_partition(layout, VDO_SLAB_DEPOT_PARTITION, 0, true);
911	if (result != VDO_SUCCESS)
912		vdo_uninitialize_layout(layout);
913
914	return result;
915}
916
917/**
918 * vdo_uninitialize_layout() - Clean up a layout.
919 * @layout: The layout to clean up.
920 *
921 * All partitions created by this layout become invalid pointers.
922 */
923void vdo_uninitialize_layout(struct layout *layout)
924{
925	while (layout->head != NULL) {
926		struct partition *part = layout->head;
927
928		layout->head = part->next;
929		vdo_free(part);
930	}
931
932	memset(layout, 0, sizeof(struct layout));
933}
934
935/**
936 * vdo_get_partition() - Get a partition by id.
937 * @layout: The layout from which to get a partition.
938 * @id: The id of the partition.
939 * @partition_ptr: A pointer to hold the partition.
940 *
941 * Return: VDO_SUCCESS or an error.
942 */
943int vdo_get_partition(struct layout *layout, enum partition_id id,
944		      struct partition **partition_ptr)
945{
946	struct partition *partition;
947
948	for (partition = layout->head; partition != NULL; partition = partition->next) {
949		if (partition->id == id) {
950			if (partition_ptr != NULL)
951				*partition_ptr = partition;
952			return VDO_SUCCESS;
953		}
954	}
955
956	return VDO_UNKNOWN_PARTITION;
957}
958
959/**
960 * vdo_get_known_partition() - Get a partition by id from a validated layout.
961 * @layout: The layout from which to get a partition.
962 * @id: The id of the partition.
963 *
964 * Return: the partition
965 */
966struct partition *vdo_get_known_partition(struct layout *layout, enum partition_id id)
967{
968	struct partition *partition;
969	int result = vdo_get_partition(layout, id, &partition);
970
971	VDO_ASSERT_LOG_ONLY(result == VDO_SUCCESS, "layout has expected partition: %u", id);
972
973	return partition;
974}
975
976static void encode_layout(u8 *buffer, size_t *offset, const struct layout *layout)
977{
978	const struct partition *partition;
979	size_t initial_offset;
980	struct header header = VDO_LAYOUT_HEADER_3_0;
981
982	BUILD_BUG_ON(sizeof(enum partition_id) != sizeof(u8));
983	VDO_ASSERT_LOG_ONLY(layout->num_partitions <= U8_MAX,
984			    "layout partition count must fit in a byte");
985
986	vdo_encode_header(buffer, offset, &header);
987
988	initial_offset = *offset;
989	encode_u64_le(buffer, offset, layout->first_free);
990	encode_u64_le(buffer, offset, layout->last_free);
991	buffer[(*offset)++] = layout->num_partitions;
992
993	VDO_ASSERT_LOG_ONLY(sizeof(struct layout_3_0) == *offset - initial_offset,
994			    "encoded size of a layout header must match structure");
995
996	for (partition = layout->head; partition != NULL; partition = partition->next) {
997		buffer[(*offset)++] = partition->id;
998		encode_u64_le(buffer, offset, partition->offset);
999		/* This field only exists for backwards compatibility */
1000		encode_u64_le(buffer, offset, 0);
1001		encode_u64_le(buffer, offset, partition->count);
1002	}
1003
1004	VDO_ASSERT_LOG_ONLY(header.size == *offset - initial_offset,
1005			    "encoded size of a layout must match header size");
1006}
1007
1008static int decode_layout(u8 *buffer, size_t *offset, physical_block_number_t start,
1009			 block_count_t size, struct layout *layout)
1010{
1011	struct header header;
1012	struct layout_3_0 layout_header;
1013	struct partition *partition;
1014	size_t initial_offset;
1015	physical_block_number_t first_free, last_free;
1016	u8 partition_count;
1017	u8 i;
1018	int result;
1019
1020	vdo_decode_header(buffer, offset, &header);
1021	/* Layout is variable size, so only do a minimum size check here. */
1022	result = vdo_validate_header(&VDO_LAYOUT_HEADER_3_0, &header, false, __func__);
1023	if (result != VDO_SUCCESS)
1024		return result;
1025
1026	initial_offset = *offset;
1027	decode_u64_le(buffer, offset, &first_free);
1028	decode_u64_le(buffer, offset, &last_free);
1029	partition_count = buffer[(*offset)++];
1030	layout_header = (struct layout_3_0) {
1031		.first_free = first_free,
1032		.last_free = last_free,
1033		.partition_count = partition_count,
1034	};
1035
1036	result = VDO_ASSERT(sizeof(struct layout_3_0) == *offset - initial_offset,
1037			    "decoded size of a layout header must match structure");
1038	if (result != VDO_SUCCESS)
1039		return result;
1040
1041	layout->start = start;
1042	layout->size = size;
1043	layout->first_free = layout_header.first_free;
1044	layout->last_free = layout_header.last_free;
1045	layout->num_partitions = layout_header.partition_count;
1046
1047	if (layout->num_partitions > VDO_PARTITION_COUNT) {
1048		return vdo_log_error_strerror(VDO_UNKNOWN_PARTITION,
1049					      "layout has extra partitions");
1050	}
1051
1052	for (i = 0; i < layout->num_partitions; i++) {
1053		u8 id;
1054		u64 partition_offset, count;
1055
1056		id = buffer[(*offset)++];
1057		decode_u64_le(buffer, offset, &partition_offset);
1058		*offset += sizeof(u64);
1059		decode_u64_le(buffer, offset, &count);
1060
1061		result = allocate_partition(layout, id, partition_offset, count);
1062		if (result != VDO_SUCCESS) {
1063			vdo_uninitialize_layout(layout);
1064			return result;
1065		}
1066	}
1067
1068	/* Validate that the layout has all (and only) the required partitions */
1069	for (i = 0; i < VDO_PARTITION_COUNT; i++) {
1070		result = vdo_get_partition(layout, REQUIRED_PARTITIONS[i], &partition);
1071		if (result != VDO_SUCCESS) {
1072			vdo_uninitialize_layout(layout);
1073			return vdo_log_error_strerror(result,
1074						      "layout is missing required partition %u",
1075						      REQUIRED_PARTITIONS[i]);
1076		}
1077
1078		start += partition->count;
1079	}
1080
1081	if (start != size) {
1082		vdo_uninitialize_layout(layout);
1083		return vdo_log_error_strerror(UDS_BAD_STATE,
1084					      "partitions do not cover the layout");
1085	}
1086
1087	return VDO_SUCCESS;
1088}
1089
1090/**
1091 * pack_vdo_config() - Convert a vdo_config to its packed on-disk representation.
1092 * @config: The vdo config to convert.
1093 *
1094 * Return: The platform-independent representation of the config.
1095 */
1096static struct packed_vdo_config pack_vdo_config(struct vdo_config config)
1097{
1098	return (struct packed_vdo_config) {
1099		.logical_blocks = __cpu_to_le64(config.logical_blocks),
1100		.physical_blocks = __cpu_to_le64(config.physical_blocks),
1101		.slab_size = __cpu_to_le64(config.slab_size),
1102		.recovery_journal_size = __cpu_to_le64(config.recovery_journal_size),
1103		.slab_journal_blocks = __cpu_to_le64(config.slab_journal_blocks),
1104	};
1105}
1106
1107/**
1108 * pack_vdo_component() - Convert a vdo_component to its packed on-disk representation.
1109 * @component: The VDO component data to convert.
1110 *
1111 * Return: The platform-independent representation of the component.
1112 */
1113static struct packed_vdo_component_41_0 pack_vdo_component(const struct vdo_component component)
1114{
1115	return (struct packed_vdo_component_41_0) {
1116		.state = __cpu_to_le32(component.state),
1117		.complete_recoveries = __cpu_to_le64(component.complete_recoveries),
1118		.read_only_recoveries = __cpu_to_le64(component.read_only_recoveries),
1119		.config = pack_vdo_config(component.config),
1120		.nonce = __cpu_to_le64(component.nonce),
1121	};
1122}
1123
1124static void encode_vdo_component(u8 *buffer, size_t *offset,
1125				 struct vdo_component component)
1126{
1127	struct packed_vdo_component_41_0 packed;
1128
1129	encode_version_number(buffer, offset, VDO_COMPONENT_DATA_41_0);
1130	packed = pack_vdo_component(component);
1131	memcpy(buffer + *offset, &packed, sizeof(packed));
1132	*offset += sizeof(packed);
1133}
1134
1135/**
1136 * unpack_vdo_config() - Convert a packed_vdo_config to its native in-memory representation.
1137 * @config: The packed vdo config to convert.
1138 *
1139 * Return: The native in-memory representation of the vdo config.
1140 */
1141static struct vdo_config unpack_vdo_config(struct packed_vdo_config config)
1142{
1143	return (struct vdo_config) {
1144		.logical_blocks = __le64_to_cpu(config.logical_blocks),
1145		.physical_blocks = __le64_to_cpu(config.physical_blocks),
1146		.slab_size = __le64_to_cpu(config.slab_size),
1147		.recovery_journal_size = __le64_to_cpu(config.recovery_journal_size),
1148		.slab_journal_blocks = __le64_to_cpu(config.slab_journal_blocks),
1149	};
1150}
1151
1152/**
1153 * unpack_vdo_component_41_0() - Convert a packed_vdo_component_41_0 to its native in-memory
1154 *				 representation.
1155 * @component: The packed vdo component data to convert.
1156 *
1157 * Return: The native in-memory representation of the component.
1158 */
1159static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)
1160{
1161	return (struct vdo_component) {
1162		.state = __le32_to_cpu(component.state),
1163		.complete_recoveries = __le64_to_cpu(component.complete_recoveries),
1164		.read_only_recoveries = __le64_to_cpu(component.read_only_recoveries),
1165		.config = unpack_vdo_config(component.config),
1166		.nonce = __le64_to_cpu(component.nonce),
1167	};
1168}
1169
1170/**
1171 * decode_vdo_component() - Decode the component data for the vdo itself out of the super block.
1172 *
1173 * Return: VDO_SUCCESS or an error.
1174 */
1175static int decode_vdo_component(u8 *buffer, size_t *offset, struct vdo_component *component)
1176{
1177	struct version_number version;
1178	struct packed_vdo_component_41_0 packed;
1179	int result;
1180
1181	decode_version_number(buffer, offset, &version);
1182	result = validate_version(version, VDO_COMPONENT_DATA_41_0,
1183				  "VDO component data");
1184	if (result != VDO_SUCCESS)
1185		return result;
1186
1187	memcpy(&packed, buffer + *offset, sizeof(packed));
1188	*offset += sizeof(packed);
1189	*component = unpack_vdo_component_41_0(packed);
1190	return VDO_SUCCESS;
1191}
1192
1193/**
1194 * vdo_validate_config() - Validate constraints on a VDO config.
1195 * @config: The VDO config.
1196 * @physical_block_count: The minimum block count of the underlying storage.
1197 * @logical_block_count: The expected logical size of the VDO, or 0 if the logical size may be
1198 *			 unspecified.
1199 *
1200 * Return: A success or error code.
1201 */
1202int vdo_validate_config(const struct vdo_config *config,
1203			block_count_t physical_block_count,
1204			block_count_t logical_block_count)
1205{
1206	struct slab_config slab_config;
1207	int result;
1208
1209	result = VDO_ASSERT(config->slab_size > 0, "slab size unspecified");
1210	if (result != VDO_SUCCESS)
1211		return result;
1212
1213	result = VDO_ASSERT(is_power_of_2(config->slab_size),
1214			    "slab size must be a power of two");
1215	if (result != VDO_SUCCESS)
1216		return result;
1217
1218	result = VDO_ASSERT(config->slab_size <= (1 << MAX_VDO_SLAB_BITS),
1219			    "slab size must be less than or equal to 2^%d",
1220			    MAX_VDO_SLAB_BITS);
1221	if (result != VDO_SUCCESS)
1222		return result;
1223
1224	result = VDO_ASSERT(config->slab_journal_blocks >= MINIMUM_VDO_SLAB_JOURNAL_BLOCKS,
1225			    "slab journal size meets minimum size");
1226	if (result != VDO_SUCCESS)
1227		return result;
1228
1229	result = VDO_ASSERT(config->slab_journal_blocks <= config->slab_size,
1230			    "slab journal size is within expected bound");
1231	if (result != VDO_SUCCESS)
1232		return result;
1233
1234	result = vdo_configure_slab(config->slab_size, config->slab_journal_blocks,
1235				    &slab_config);
1236	if (result != VDO_SUCCESS)
1237		return result;
1238
1239	result = VDO_ASSERT((slab_config.data_blocks >= 1),
1240			    "slab must be able to hold at least one block");
1241	if (result != VDO_SUCCESS)
1242		return result;
1243
1244	result = VDO_ASSERT(config->physical_blocks > 0, "physical blocks unspecified");
1245	if (result != VDO_SUCCESS)
1246		return result;
1247
1248	result = VDO_ASSERT(config->physical_blocks <= MAXIMUM_VDO_PHYSICAL_BLOCKS,
1249			    "physical block count %llu exceeds maximum %llu",
1250			    (unsigned long long) config->physical_blocks,
1251			    (unsigned long long) MAXIMUM_VDO_PHYSICAL_BLOCKS);
1252	if (result != VDO_SUCCESS)
1253		return VDO_OUT_OF_RANGE;
1254
1255	if (physical_block_count != config->physical_blocks) {
1256		vdo_log_error("A physical size of %llu blocks was specified, not the %llu blocks configured in the vdo super block",
1257			      (unsigned long long) physical_block_count,
1258			      (unsigned long long) config->physical_blocks);
1259		return VDO_PARAMETER_MISMATCH;
1260	}
1261
1262	if (logical_block_count > 0) {
1263		result = VDO_ASSERT((config->logical_blocks > 0),
1264				    "logical blocks unspecified");
1265		if (result != VDO_SUCCESS)
1266			return result;
1267
1268		if (logical_block_count != config->logical_blocks) {
1269			vdo_log_error("A logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
1270				      (unsigned long long) logical_block_count,
1271				      (unsigned long long) config->logical_blocks);
1272			return VDO_PARAMETER_MISMATCH;
1273		}
1274	}
1275
1276	result = VDO_ASSERT(config->logical_blocks <= MAXIMUM_VDO_LOGICAL_BLOCKS,
1277			    "logical blocks too large");
1278	if (result != VDO_SUCCESS)
1279		return result;
1280
1281	result = VDO_ASSERT(config->recovery_journal_size > 0,
1282			    "recovery journal size unspecified");
1283	if (result != VDO_SUCCESS)
1284		return result;
1285
1286	result = VDO_ASSERT(is_power_of_2(config->recovery_journal_size),
1287			    "recovery journal size must be a power of two");
1288	if (result != VDO_SUCCESS)
1289		return result;
1290
1291	return result;
1292}
1293
1294/**
1295 * vdo_destroy_component_states() - Clean up any allocations in a vdo_component_states.
1296 * @states: The component states to destroy.
1297 */
1298void vdo_destroy_component_states(struct vdo_component_states *states)
1299{
1300	if (states == NULL)
1301		return;
1302
1303	vdo_uninitialize_layout(&states->layout);
1304}
1305
1306/**
1307 * decode_components() - Decode the components now that we know the component data is a version we
1308 *                       understand.
1309 * @buffer: The buffer being decoded.
1310 * @offset: The offset to start decoding from.
1311 * @geometry: The vdo geometry
1312 * @states: An object to hold the successfully decoded state.
1313 *
1314 * Return: VDO_SUCCESS or an error.
1315 */
1316static int __must_check decode_components(u8 *buffer, size_t *offset,
1317					  struct volume_geometry *geometry,
1318					  struct vdo_component_states *states)
1319{
1320	int result;
1321
1322	decode_vdo_component(buffer, offset, &states->vdo);
1323
1324	result = decode_layout(buffer, offset, vdo_get_data_region_start(*geometry) + 1,
1325			       states->vdo.config.physical_blocks, &states->layout);
1326	if (result != VDO_SUCCESS)
1327		return result;
1328
1329	result = decode_recovery_journal_state_7_0(buffer, offset,
1330						   &states->recovery_journal);
1331	if (result != VDO_SUCCESS)
1332		return result;
1333
1334	result = decode_slab_depot_state_2_0(buffer, offset, &states->slab_depot);
1335	if (result != VDO_SUCCESS)
1336		return result;
1337
1338	result = decode_block_map_state_2_0(buffer, offset, &states->block_map);
1339	if (result != VDO_SUCCESS)
1340		return result;
1341
1342	VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1343			    "All decoded component data was used");
1344	return VDO_SUCCESS;
1345}
1346
1347/**
1348 * vdo_decode_component_states() - Decode the payload of a super block.
1349 * @buffer: The buffer containing the encoded super block contents.
1350 * @geometry: The vdo geometry
1351 * @states: A pointer to hold the decoded states.
1352 *
1353 * Return: VDO_SUCCESS or an error.
1354 */
1355int vdo_decode_component_states(u8 *buffer, struct volume_geometry *geometry,
1356				struct vdo_component_states *states)
1357{
1358	int result;
1359	size_t offset = VDO_COMPONENT_DATA_OFFSET;
1360
1361	/* This is for backwards compatibility. */
1362	decode_u32_le(buffer, &offset, &states->unused);
1363
1364	/* Check the VDO volume version */
1365	decode_version_number(buffer, &offset, &states->volume_version);
1366	result = validate_version(VDO_VOLUME_VERSION_67_0, states->volume_version,
1367				  "volume");
1368	if (result != VDO_SUCCESS)
1369		return result;
1370
1371	result = decode_components(buffer, &offset, geometry, states);
1372	if (result != VDO_SUCCESS)
1373		vdo_uninitialize_layout(&states->layout);
1374
1375	return result;
1376}
1377
1378/**
1379 * vdo_validate_component_states() - Validate the decoded super block configuration.
1380 * @states: The state decoded from the super block.
1381 * @geometry_nonce: The nonce from the geometry block.
1382 * @physical_size: The minimum block count of the underlying storage.
1383 * @logical_size: The expected logical size of the VDO, or 0 if the logical size may be
1384 *                unspecified.
1385 *
1386 * Return: VDO_SUCCESS or an error if the configuration is invalid.
1387 */
1388int vdo_validate_component_states(struct vdo_component_states *states,
1389				  nonce_t geometry_nonce, block_count_t physical_size,
1390				  block_count_t logical_size)
1391{
1392	if (geometry_nonce != states->vdo.nonce) {
1393		return vdo_log_error_strerror(VDO_BAD_NONCE,
1394					      "Geometry nonce %llu does not match superblock nonce %llu",
1395					      (unsigned long long) geometry_nonce,
1396					      (unsigned long long) states->vdo.nonce);
1397	}
1398
1399	return vdo_validate_config(&states->vdo.config, physical_size, logical_size);
1400}
1401
1402/**
1403 * vdo_encode_component_states() - Encode the state of all vdo components in the super block.
1404 */
1405static void vdo_encode_component_states(u8 *buffer, size_t *offset,
1406					const struct vdo_component_states *states)
1407{
1408	/* This is for backwards compatibility. */
1409	encode_u32_le(buffer, offset, states->unused);
1410	encode_version_number(buffer, offset, states->volume_version);
1411	encode_vdo_component(buffer, offset, states->vdo);
1412	encode_layout(buffer, offset, &states->layout);
1413	encode_recovery_journal_state_7_0(buffer, offset, states->recovery_journal);
1414	encode_slab_depot_state_2_0(buffer, offset, states->slab_depot);
1415	encode_block_map_state_2_0(buffer, offset, states->block_map);
1416
1417	VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1418			    "All super block component data was encoded");
1419}
1420
1421/**
1422 * vdo_encode_super_block() - Encode a super block into its on-disk representation.
1423 */
1424void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
1425{
1426	u32 checksum;
1427	struct header header = SUPER_BLOCK_HEADER_12_0;
1428	size_t offset = 0;
1429
1430	header.size += VDO_COMPONENT_DATA_SIZE;
1431	vdo_encode_header(buffer, &offset, &header);
1432	vdo_encode_component_states(buffer, &offset, states);
1433
1434	checksum = vdo_crc32(buffer, offset);
1435	encode_u32_le(buffer, &offset, checksum);
1436
1437	/*
1438	 * Even though the buffer is a full block, to avoid the potential corruption from a torn
1439	 * write, the entire encoding must fit in the first sector.
1440	 */
1441	VDO_ASSERT_LOG_ONLY(offset <= VDO_SECTOR_SIZE,
1442			    "entire superblock must fit in one sector");
1443}
1444
1445/**
1446 * vdo_decode_super_block() - Decode a super block from its on-disk representation.
1447 */
1448int vdo_decode_super_block(u8 *buffer)
1449{
1450	struct header header;
1451	int result;
1452	u32 checksum, saved_checksum;
1453	size_t offset = 0;
1454
1455	/* Decode and validate the header. */
1456	vdo_decode_header(buffer, &offset, &header);
1457	result = vdo_validate_header(&SUPER_BLOCK_HEADER_12_0, &header, false, __func__);
1458	if (result != VDO_SUCCESS)
1459		return result;
1460
1461	if (header.size > VDO_COMPONENT_DATA_SIZE + sizeof(u32)) {
1462		/*
1463		 * We can't check release version or checksum until we know the content size, so we
1464		 * have to assume a version mismatch on unexpected values.
1465		 */
1466		return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
1467					      "super block contents too large: %zu",
1468					      header.size);
1469	}
1470
1471	/* Skip past the component data for now, to verify the checksum. */
1472	offset += VDO_COMPONENT_DATA_SIZE;
1473
1474	checksum = vdo_crc32(buffer, offset);
1475	decode_u32_le(buffer, &offset, &saved_checksum);
1476
1477	result = VDO_ASSERT(offset == VDO_SUPER_BLOCK_FIXED_SIZE + VDO_COMPONENT_DATA_SIZE,
1478			    "must have decoded entire superblock payload");
1479	if (result != VDO_SUCCESS)
1480		return result;
1481
1482	return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS);
1483}
1484