1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright 2023 Red Hat
4 */
5
6#ifndef VDO_ENCODINGS_H
7#define VDO_ENCODINGS_H
8
9#include <linux/blk_types.h>
10#include <linux/crc32.h>
11#include <linux/limits.h>
12#include <linux/uuid.h>
13
14#include "numeric.h"
15
16#include "constants.h"
17#include "types.h"
18
19/*
20 * An in-memory representation of a version number for versioned structures on disk.
21 *
22 * A version number consists of two portions, a major version and a minor version. Any format
23 * change which does not require an explicit upgrade step from the previous version should
24 * increment the minor version. Any format change which either requires an explicit upgrade step,
25 * or is wholly incompatible (i.e. can not be upgraded to), should increment the major version, and
26 * set the minor version to 0.
27 */
28struct version_number {
29	u32 major_version;
30	u32 minor_version;
31};
32
33/*
34 * A packed, machine-independent, on-disk representation of a version_number. Both fields are
35 * stored in little-endian byte order.
36 */
37struct packed_version_number {
38	__le32 major_version;
39	__le32 minor_version;
40} __packed;
41
42/* The registry of component ids for use in headers */
43#define VDO_SUPER_BLOCK 0
44#define VDO_LAYOUT 1
45#define VDO_RECOVERY_JOURNAL 2
46#define VDO_SLAB_DEPOT 3
47#define VDO_BLOCK_MAP 4
48#define VDO_GEOMETRY_BLOCK 5
49
50/* The header for versioned data stored on disk. */
51struct header {
52	u32 id; /* The component this is a header for */
53	struct version_number version; /* The version of the data format */
54	size_t size; /* The size of the data following this header */
55};
56
57/* A packed, machine-independent, on-disk representation of a component header. */
58struct packed_header {
59	__le32 id;
60	struct packed_version_number version;
61	__le64 size;
62} __packed;
63
64enum {
65	VDO_GEOMETRY_BLOCK_LOCATION = 0,
66	VDO_GEOMETRY_MAGIC_NUMBER_SIZE = 8,
67	VDO_DEFAULT_GEOMETRY_BLOCK_VERSION = 5,
68};
69
70struct index_config {
71	u32 mem;
72	u32 unused;
73	bool sparse;
74} __packed;
75
76enum volume_region_id {
77	VDO_INDEX_REGION = 0,
78	VDO_DATA_REGION = 1,
79	VDO_VOLUME_REGION_COUNT,
80};
81
82struct volume_region {
83	/* The ID of the region */
84	enum volume_region_id id;
85	/*
86	 * The absolute starting offset on the device. The region continues until the next region
87	 * begins.
88	 */
89	physical_block_number_t start_block;
90} __packed;
91
92struct volume_geometry {
93	/* For backwards compatibility */
94	u32 unused;
95	/* The nonce of this volume */
96	nonce_t nonce;
97	/* The uuid of this volume */
98	uuid_t uuid;
99	/* The block offset to be applied to bios */
100	block_count_t bio_offset;
101	/* The regions in ID order */
102	struct volume_region regions[VDO_VOLUME_REGION_COUNT];
103	/* The index config */
104	struct index_config index_config;
105} __packed;
106
107/* This volume geometry struct is used for sizing only */
108struct volume_geometry_4_0 {
109	/* For backwards compatibility */
110	u32 unused;
111	/* The nonce of this volume */
112	nonce_t nonce;
113	/* The uuid of this volume */
114	uuid_t uuid;
115	/* The regions in ID order */
116	struct volume_region regions[VDO_VOLUME_REGION_COUNT];
117	/* The index config */
118	struct index_config index_config;
119} __packed;
120
121extern const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1];
122
123/**
124 * DOC: Block map entries
125 *
126 * The entry for each logical block in the block map is encoded into five bytes, which saves space
127 * in both the on-disk and in-memory layouts. It consists of the 36 low-order bits of a
128 * physical_block_number_t (addressing 256 terabytes with a 4KB block size) and a 4-bit encoding of
129 * a block_mapping_state.
130 *
131 * Of the 8 high bits of the 5-byte structure:
132 *
133 * Bits 7..4: The four highest bits of the 36-bit physical block number
134 * Bits 3..0: The 4-bit block_mapping_state
135 *
136 * The following 4 bytes are the low order bytes of the physical block number, in little-endian
137 * order.
138 *
139 * Conversion functions to and from a data location are provided.
140 */
141struct block_map_entry {
142#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
143	unsigned mapping_state : 4;
144	unsigned pbn_high_nibble : 4;
145#else
146	unsigned pbn_high_nibble : 4;
147	unsigned mapping_state : 4;
148#endif
149
150	__le32 pbn_low_word;
151} __packed;
152
153struct block_map_page_header {
154	__le64 nonce;
155	__le64 pbn;
156
157	/* May be non-zero on disk */
158	u8 unused_long_word[8];
159
160	/* Whether this page has been written twice to disk */
161	bool initialized;
162
163	/* Always zero on disk */
164	u8 unused_byte1;
165
166	/* May be non-zero on disk */
167	u8 unused_byte2;
168	u8 unused_byte3;
169} __packed;
170
171struct block_map_page {
172	struct packed_version_number version;
173	struct block_map_page_header header;
174	struct block_map_entry entries[];
175} __packed;
176
177enum block_map_page_validity {
178	VDO_BLOCK_MAP_PAGE_VALID,
179	VDO_BLOCK_MAP_PAGE_INVALID,
180	/* Valid page found in the wrong location on disk */
181	VDO_BLOCK_MAP_PAGE_BAD,
182};
183
184struct block_map_state_2_0 {
185	physical_block_number_t flat_page_origin;
186	block_count_t flat_page_count;
187	physical_block_number_t root_origin;
188	block_count_t root_count;
189} __packed;
190
191struct boundary {
192	page_number_t levels[VDO_BLOCK_MAP_TREE_HEIGHT];
193};
194
195extern const struct header VDO_BLOCK_MAP_HEADER_2_0;
196
197/* The state of the recovery journal as encoded in the VDO super block. */
198struct recovery_journal_state_7_0 {
199	/* Sequence number to start the journal */
200	sequence_number_t journal_start;
201	/* Number of logical blocks used by VDO */
202	block_count_t logical_blocks_used;
203	/* Number of block map pages allocated */
204	block_count_t block_map_data_blocks;
205} __packed;
206
207extern const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0;
208
209typedef u16 journal_entry_count_t;
210
211/*
212 * A recovery journal entry stores three physical locations: a data location that is the value of a
213 * single mapping in the block map tree, and the two locations of the block map pages and slots
214 * that are acquiring and releasing a reference to the location. The journal entry also stores an
215 * operation code that says whether the mapping is for a logical block or for the block map tree
216 * itself.
217 */
218struct recovery_journal_entry {
219	struct block_map_slot slot;
220	struct data_location mapping;
221	struct data_location unmapping;
222	enum journal_operation operation;
223};
224
225/* The packed, on-disk representation of a recovery journal entry. */
226struct packed_recovery_journal_entry {
227	/*
228	 * In little-endian bit order:
229	 * Bits 15..12: The four highest bits of the 36-bit physical block number of the block map
230	 * tree page
231	 * Bits 11..2: The 10-bit block map page slot number
232	 * Bit 1..0: The journal_operation of the entry (this actually only requires 1 bit, but
233	 *           it is convenient to keep the extra bit as part of this field.
234	 */
235#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
236	unsigned operation : 2;
237	unsigned slot_low : 6;
238	unsigned slot_high : 4;
239	unsigned pbn_high_nibble : 4;
240#else
241	unsigned slot_low : 6;
242	unsigned operation : 2;
243	unsigned pbn_high_nibble : 4;
244	unsigned slot_high : 4;
245#endif
246
247	/*
248	 * Bits 47..16: The 32 low-order bits of the block map page PBN, in little-endian byte
249	 * order
250	 */
251	__le32 pbn_low_word;
252
253	/*
254	 * Bits 87..48: The five-byte block map entry encoding the location that will be stored in
255	 * the block map page slot
256	 */
257	struct block_map_entry mapping;
258
259	/*
260	 * Bits 127..88: The five-byte block map entry encoding the location that was stored in the
261	 * block map page slot
262	 */
263	struct block_map_entry unmapping;
264} __packed;
265
266/* The packed, on-disk representation of an old format recovery journal entry. */
267struct packed_recovery_journal_entry_1 {
268	/*
269	 * In little-endian bit order:
270	 * Bits 15..12: The four highest bits of the 36-bit physical block number of the block map
271	 *              tree page
272	 * Bits 11..2: The 10-bit block map page slot number
273	 * Bits 1..0: The 2-bit journal_operation of the entry
274	 *
275	 */
276#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
277	unsigned operation : 2;
278	unsigned slot_low : 6;
279	unsigned slot_high : 4;
280	unsigned pbn_high_nibble : 4;
281#else
282	unsigned slot_low : 6;
283	unsigned operation : 2;
284	unsigned pbn_high_nibble : 4;
285	unsigned slot_high : 4;
286#endif
287
288	/*
289	 * Bits 47..16: The 32 low-order bits of the block map page PBN, in little-endian byte
290	 * order
291	 */
292	__le32 pbn_low_word;
293
294	/*
295	 * Bits 87..48: The five-byte block map entry encoding the location that was or will be
296	 * stored in the block map page slot
297	 */
298	struct block_map_entry block_map_entry;
299} __packed;
300
301enum journal_operation_1 {
302	VDO_JOURNAL_DATA_DECREMENT = 0,
303	VDO_JOURNAL_DATA_INCREMENT = 1,
304	VDO_JOURNAL_BLOCK_MAP_DECREMENT = 2,
305	VDO_JOURNAL_BLOCK_MAP_INCREMENT = 3,
306} __packed;
307
308struct recovery_block_header {
309	sequence_number_t block_map_head; /* Block map head sequence number */
310	sequence_number_t slab_journal_head; /* Slab journal head seq. number */
311	sequence_number_t sequence_number; /* Sequence number for this block */
312	nonce_t nonce; /* A given VDO instance's nonce */
313	block_count_t logical_blocks_used; /* Logical blocks in use */
314	block_count_t block_map_data_blocks; /* Allocated block map pages */
315	journal_entry_count_t entry_count; /* Number of entries written */
316	u8 check_byte; /* The protection check byte */
317	u8 recovery_count; /* Number of recoveries completed */
318	enum vdo_metadata_type metadata_type; /* Metadata type */
319};
320
321/*
322 * The packed, on-disk representation of a recovery journal block header. All fields are kept in
323 * little-endian byte order.
324 */
325struct packed_journal_header {
326	/* Block map head 64-bit sequence number */
327	__le64 block_map_head;
328
329	/* Slab journal head 64-bit sequence number */
330	__le64 slab_journal_head;
331
332	/* The 64-bit sequence number for this block */
333	__le64 sequence_number;
334
335	/* A given VDO instance's 64-bit nonce */
336	__le64 nonce;
337
338	/* 8-bit metadata type (should always be one for the recovery journal) */
339	u8 metadata_type;
340
341	/* 16-bit count of the entries encoded in the block */
342	__le16 entry_count;
343
344	/* 64-bit count of the logical blocks used when this block was opened */
345	__le64 logical_blocks_used;
346
347	/* 64-bit count of the block map blocks used when this block was opened */
348	__le64 block_map_data_blocks;
349
350	/* The protection check byte */
351	u8 check_byte;
352
353	/* The number of recoveries completed */
354	u8 recovery_count;
355} __packed;
356
357struct packed_journal_sector {
358	/* The protection check byte */
359	u8 check_byte;
360
361	/* The number of recoveries completed */
362	u8 recovery_count;
363
364	/* The number of entries in this sector */
365	u8 entry_count;
366
367	/* Journal entries for this sector */
368	struct packed_recovery_journal_entry entries[];
369} __packed;
370
371enum {
372	/* The number of entries in each sector (except the last) when filled */
373	RECOVERY_JOURNAL_ENTRIES_PER_SECTOR =
374		((VDO_SECTOR_SIZE - sizeof(struct packed_journal_sector)) /
375		 sizeof(struct packed_recovery_journal_entry)),
376	RECOVERY_JOURNAL_ENTRIES_PER_BLOCK = RECOVERY_JOURNAL_ENTRIES_PER_SECTOR * 7,
377	/* The number of entries in a v1 recovery journal block. */
378	RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK = 311,
379	/* The number of entries in each v1 sector (except the last) when filled */
380	RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR =
381		((VDO_SECTOR_SIZE - sizeof(struct packed_journal_sector)) /
382		 sizeof(struct packed_recovery_journal_entry_1)),
383	/* The number of entries in the last sector when a block is full */
384	RECOVERY_JOURNAL_1_ENTRIES_IN_LAST_SECTOR =
385		(RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK % RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR),
386};
387
388/* A type representing a reference count of a block. */
389typedef u8 vdo_refcount_t;
390
391/* The absolute position of an entry in a recovery journal or slab journal. */
392struct journal_point {
393	sequence_number_t sequence_number;
394	journal_entry_count_t entry_count;
395};
396
397/* A packed, platform-independent encoding of a struct journal_point. */
398struct packed_journal_point {
399	/*
400	 * The packed representation is the little-endian 64-bit representation of the low-order 48
401	 * bits of the sequence number, shifted up 16 bits, or'ed with the 16-bit entry count.
402	 *
403	 * Very long-term, the top 16 bits of the sequence number may not always be zero, as this
404	 * encoding assumes--see BZ 1523240.
405	 */
406	__le64 encoded_point;
407} __packed;
408
409/* Special vdo_refcount_t values. */
410#define EMPTY_REFERENCE_COUNT 0
411enum {
412	MAXIMUM_REFERENCE_COUNT = 254,
413	PROVISIONAL_REFERENCE_COUNT = 255,
414};
415
416enum {
417	COUNTS_PER_SECTOR =
418		((VDO_SECTOR_SIZE - sizeof(struct packed_journal_point)) / sizeof(vdo_refcount_t)),
419	COUNTS_PER_BLOCK = COUNTS_PER_SECTOR * VDO_SECTORS_PER_BLOCK,
420};
421
422/* The format of each sector of a reference_block on disk. */
423struct packed_reference_sector {
424	struct packed_journal_point commit_point;
425	vdo_refcount_t counts[COUNTS_PER_SECTOR];
426} __packed;
427
428struct packed_reference_block {
429	struct packed_reference_sector sectors[VDO_SECTORS_PER_BLOCK];
430};
431
432struct slab_depot_state_2_0 {
433	struct slab_config slab_config;
434	physical_block_number_t first_block;
435	physical_block_number_t last_block;
436	zone_count_t zone_count;
437} __packed;
438
439extern const struct header VDO_SLAB_DEPOT_HEADER_2_0;
440
441/*
442 * vdo_slab journal blocks may have one of two formats, depending upon whether or not any of the
443 * entries in the block are block map increments. Since the steady state for a VDO is that all of
444 * the necessary block map pages will be allocated, most slab journal blocks will have only data
445 * entries. Such blocks can hold more entries, hence the two formats.
446 */
447
448/* A single slab journal entry */
449struct slab_journal_entry {
450	slab_block_number sbn;
451	enum journal_operation operation;
452	bool increment;
453};
454
455/* A single slab journal entry in its on-disk form */
456typedef struct {
457	u8 offset_low8;
458	u8 offset_mid8;
459
460#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
461	unsigned offset_high7 : 7;
462	unsigned increment : 1;
463#else
464	unsigned increment : 1;
465	unsigned offset_high7 : 7;
466#endif
467} __packed packed_slab_journal_entry;
468
469/* The unpacked representation of the header of a slab journal block */
470struct slab_journal_block_header {
471	/* Sequence number for head of journal */
472	sequence_number_t head;
473	/* Sequence number for this block */
474	sequence_number_t sequence_number;
475	/* The nonce for a given VDO instance */
476	nonce_t nonce;
477	/* Recovery journal point for last entry */
478	struct journal_point recovery_point;
479	/* Metadata type */
480	enum vdo_metadata_type metadata_type;
481	/* Whether this block contains block map increments */
482	bool has_block_map_increments;
483	/* The number of entries in the block */
484	journal_entry_count_t entry_count;
485};
486
487/*
488 * The packed, on-disk representation of a slab journal block header. All fields are kept in
489 * little-endian byte order.
490 */
491struct packed_slab_journal_block_header {
492	/* 64-bit sequence number for head of journal */
493	__le64 head;
494	/* 64-bit sequence number for this block */
495	__le64 sequence_number;
496	/* Recovery journal point for the last entry, packed into 64 bits */
497	struct packed_journal_point recovery_point;
498	/* The 64-bit nonce for a given VDO instance */
499	__le64 nonce;
500	/* 8-bit metadata type (should always be two, for the slab journal) */
501	u8 metadata_type;
502	/* Whether this block contains block map increments */
503	bool has_block_map_increments;
504	/* 16-bit count of the entries encoded in the block */
505	__le16 entry_count;
506} __packed;
507
508enum {
509	VDO_SLAB_JOURNAL_PAYLOAD_SIZE =
510		VDO_BLOCK_SIZE - sizeof(struct packed_slab_journal_block_header),
511	VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK = (VDO_SLAB_JOURNAL_PAYLOAD_SIZE * 8) / 25,
512	VDO_SLAB_JOURNAL_ENTRY_TYPES_SIZE =
513		((VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK - 1) / 8) + 1,
514	VDO_SLAB_JOURNAL_ENTRIES_PER_BLOCK =
515		(VDO_SLAB_JOURNAL_PAYLOAD_SIZE / sizeof(packed_slab_journal_entry)),
516};
517
518/* The payload of a slab journal block which has block map increments */
519struct full_slab_journal_entries {
520	/* The entries themselves */
521	packed_slab_journal_entry entries[VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK];
522	/* The bit map indicating which entries are block map increments */
523	u8 entry_types[VDO_SLAB_JOURNAL_ENTRY_TYPES_SIZE];
524} __packed;
525
526typedef union {
527	/* Entries which include block map increments */
528	struct full_slab_journal_entries full_entries;
529	/* Entries which are only data updates */
530	packed_slab_journal_entry entries[VDO_SLAB_JOURNAL_ENTRIES_PER_BLOCK];
531	/* Ensure the payload fills to the end of the block */
532	u8 space[VDO_SLAB_JOURNAL_PAYLOAD_SIZE];
533} __packed slab_journal_payload;
534
535struct packed_slab_journal_block {
536	struct packed_slab_journal_block_header header;
537	slab_journal_payload payload;
538} __packed;
539
540/* The offset of a slab journal tail block. */
541typedef u8 tail_block_offset_t;
542
543struct slab_summary_entry {
544	/* Bits 7..0: The offset of the tail block within the slab journal */
545	tail_block_offset_t tail_block_offset;
546
547#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
548	/* Bits 13..8: A hint about the fullness of the slab */
549	unsigned int fullness_hint : 6;
550	/* Bit 14: Whether the ref_counts must be loaded from the layer */
551	unsigned int load_ref_counts : 1;
552	/* Bit 15: The believed cleanliness of this slab */
553	unsigned int is_dirty : 1;
554#else
555	/* Bit 15: The believed cleanliness of this slab */
556	unsigned int is_dirty : 1;
557	/* Bit 14: Whether the ref_counts must be loaded from the layer */
558	unsigned int load_ref_counts : 1;
559	/* Bits 13..8: A hint about the fullness of the slab */
560	unsigned int fullness_hint : 6;
561#endif
562} __packed;
563
564enum {
565	VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS = 6,
566	VDO_SLAB_SUMMARY_ENTRIES_PER_BLOCK = VDO_BLOCK_SIZE / sizeof(struct slab_summary_entry),
567	VDO_SLAB_SUMMARY_BLOCKS_PER_ZONE = MAX_VDO_SLABS / VDO_SLAB_SUMMARY_ENTRIES_PER_BLOCK,
568	VDO_SLAB_SUMMARY_BLOCKS = VDO_SLAB_SUMMARY_BLOCKS_PER_ZONE * MAX_VDO_PHYSICAL_ZONES,
569};
570
571struct layout {
572	physical_block_number_t start;
573	block_count_t size;
574	physical_block_number_t first_free;
575	physical_block_number_t last_free;
576	size_t num_partitions;
577	struct partition *head;
578};
579
580struct partition {
581	enum partition_id id; /* The id of this partition */
582	physical_block_number_t offset; /* The offset into the layout of this partition */
583	block_count_t count; /* The number of blocks in the partition */
584	struct partition *next; /* A pointer to the next partition in the layout */
585};
586
587struct layout_3_0 {
588	physical_block_number_t first_free;
589	physical_block_number_t last_free;
590	u8 partition_count;
591} __packed;
592
593struct partition_3_0 {
594	enum partition_id id;
595	physical_block_number_t offset;
596	physical_block_number_t base; /* unused but retained for backwards compatibility */
597	block_count_t count;
598} __packed;
599
600/*
601 * The configuration of the VDO service.
602 */
603struct vdo_config {
604	block_count_t logical_blocks; /* number of logical blocks */
605	block_count_t physical_blocks; /* number of physical blocks */
606	block_count_t slab_size; /* number of blocks in a slab */
607	block_count_t recovery_journal_size; /* number of recovery journal blocks */
608	block_count_t slab_journal_blocks; /* number of slab journal blocks */
609};
610
611/* This is the structure that captures the vdo fields saved as a super block component. */
612struct vdo_component {
613	enum vdo_state state;
614	u64 complete_recoveries;
615	u64 read_only_recoveries;
616	struct vdo_config config;
617	nonce_t nonce;
618};
619
620/*
621 * A packed, machine-independent, on-disk representation of the vdo_config in the VDO component
622 * data in the super block.
623 */
624struct packed_vdo_config {
625	__le64 logical_blocks;
626	__le64 physical_blocks;
627	__le64 slab_size;
628	__le64 recovery_journal_size;
629	__le64 slab_journal_blocks;
630} __packed;
631
632/*
633 * A packed, machine-independent, on-disk representation of version 41.0 of the VDO component data
634 * in the super block.
635 */
636struct packed_vdo_component_41_0 {
637	__le32 state;
638	__le64 complete_recoveries;
639	__le64 read_only_recoveries;
640	struct packed_vdo_config config;
641	__le64 nonce;
642} __packed;
643
644/*
645 * The version of the on-disk format of a VDO volume. This should be incremented any time the
646 * on-disk representation of any VDO structure changes. Changes which require only online upgrade
647 * steps should increment the minor version. Changes which require an offline upgrade or which can
648 * not be upgraded to at all should increment the major version and set the minor version to 0.
649 */
650extern const struct version_number VDO_VOLUME_VERSION_67_0;
651
652enum {
653	VDO_ENCODED_HEADER_SIZE = sizeof(struct packed_header),
654	BLOCK_MAP_COMPONENT_ENCODED_SIZE =
655		VDO_ENCODED_HEADER_SIZE + sizeof(struct block_map_state_2_0),
656	RECOVERY_JOURNAL_COMPONENT_ENCODED_SIZE =
657		VDO_ENCODED_HEADER_SIZE + sizeof(struct recovery_journal_state_7_0),
658	SLAB_DEPOT_COMPONENT_ENCODED_SIZE =
659		VDO_ENCODED_HEADER_SIZE + sizeof(struct slab_depot_state_2_0),
660	VDO_PARTITION_COUNT = 4,
661	VDO_LAYOUT_ENCODED_SIZE = (VDO_ENCODED_HEADER_SIZE +
662				   sizeof(struct layout_3_0) +
663				   (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT)),
664	VDO_SUPER_BLOCK_FIXED_SIZE = VDO_ENCODED_HEADER_SIZE + sizeof(u32),
665	VDO_MAX_COMPONENT_DATA_SIZE = VDO_SECTOR_SIZE - VDO_SUPER_BLOCK_FIXED_SIZE,
666	VDO_COMPONENT_ENCODED_SIZE =
667		(sizeof(struct packed_version_number) + sizeof(struct packed_vdo_component_41_0)),
668	VDO_COMPONENT_DATA_OFFSET = VDO_ENCODED_HEADER_SIZE,
669	VDO_COMPONENT_DATA_SIZE = (sizeof(u32) +
670				   sizeof(struct packed_version_number) +
671				   VDO_COMPONENT_ENCODED_SIZE +
672				   VDO_LAYOUT_ENCODED_SIZE +
673				   RECOVERY_JOURNAL_COMPONENT_ENCODED_SIZE +
674				   SLAB_DEPOT_COMPONENT_ENCODED_SIZE +
675				   BLOCK_MAP_COMPONENT_ENCODED_SIZE),
676};
677
678/* The entirety of the component data encoded in the VDO super block. */
679struct vdo_component_states {
680	/* For backwards compatibility */
681	u32 unused;
682
683	/* The VDO volume version */
684	struct version_number volume_version;
685
686	/* Components */
687	struct vdo_component vdo;
688	struct block_map_state_2_0 block_map;
689	struct recovery_journal_state_7_0 recovery_journal;
690	struct slab_depot_state_2_0 slab_depot;
691
692	/* Our partitioning of the underlying storage */
693	struct layout layout;
694};
695
696/**
697 * vdo_are_same_version() - Check whether two version numbers are the same.
698 * @version_a: The first version.
699 * @version_b: The second version.
700 *
701 * Return: true if the two versions are the same.
702 */
703static inline bool vdo_are_same_version(struct version_number version_a,
704					struct version_number version_b)
705{
706	return ((version_a.major_version == version_b.major_version) &&
707		(version_a.minor_version == version_b.minor_version));
708}
709
710/**
711 * vdo_is_upgradable_version() - Check whether an actual version is upgradable to an expected
712 *                               version.
713 * @expected_version: The expected version.
714 * @actual_version: The version being validated.
715 *
716 * An actual version is upgradable if its major number is expected but its minor number differs,
717 * and the expected version's minor number is greater than the actual version's minor number.
718 *
719 * Return: true if the actual version is upgradable.
720 */
721static inline bool vdo_is_upgradable_version(struct version_number expected_version,
722					     struct version_number actual_version)
723{
724	return ((expected_version.major_version == actual_version.major_version) &&
725		(expected_version.minor_version > actual_version.minor_version));
726}
727
728int __must_check vdo_validate_header(const struct header *expected_header,
729				     const struct header *actual_header, bool exact_size,
730				     const char *component_name);
731
732void vdo_encode_header(u8 *buffer, size_t *offset, const struct header *header);
733void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header);
734
735/**
736 * vdo_pack_version_number() - Convert a version_number to its packed on-disk representation.
737 * @version: The version number to convert.
738 *
739 * Return: the platform-independent representation of the version
740 */
741static inline struct packed_version_number vdo_pack_version_number(struct version_number version)
742{
743	return (struct packed_version_number) {
744		.major_version = __cpu_to_le32(version.major_version),
745		.minor_version = __cpu_to_le32(version.minor_version),
746	};
747}
748
749/**
750 * vdo_unpack_version_number() - Convert a packed_version_number to its native in-memory
751 *                               representation.
752 * @version: The version number to convert.
753 *
754 * Return: The platform-independent representation of the version.
755 */
756static inline struct version_number vdo_unpack_version_number(struct packed_version_number version)
757{
758	return (struct version_number) {
759		.major_version = __le32_to_cpu(version.major_version),
760		.minor_version = __le32_to_cpu(version.minor_version),
761	};
762}
763
764/**
765 * vdo_pack_header() - Convert a component header to its packed on-disk representation.
766 * @header: The header to convert.
767 *
768 * Return: the platform-independent representation of the header
769 */
770static inline struct packed_header vdo_pack_header(const struct header *header)
771{
772	return (struct packed_header) {
773		.id = __cpu_to_le32(header->id),
774		.version = vdo_pack_version_number(header->version),
775		.size = __cpu_to_le64(header->size),
776	};
777}
778
779/**
780 * vdo_unpack_header() - Convert a packed_header to its native in-memory representation.
781 * @header: The header to convert.
782 *
783 * Return: The platform-independent representation of the version.
784 */
785static inline struct header vdo_unpack_header(const struct packed_header *header)
786{
787	return (struct header) {
788		.id = __le32_to_cpu(header->id),
789		.version = vdo_unpack_version_number(header->version),
790		.size = __le64_to_cpu(header->size),
791	};
792}
793
794/**
795 * vdo_get_index_region_start() - Get the start of the index region from a geometry.
796 * @geometry: The geometry.
797 *
798 * Return: The start of the index region.
799 */
800static inline physical_block_number_t __must_check
801vdo_get_index_region_start(struct volume_geometry geometry)
802{
803	return geometry.regions[VDO_INDEX_REGION].start_block;
804}
805
806/**
807 * vdo_get_data_region_start() - Get the start of the data region from a geometry.
808 * @geometry: The geometry.
809 *
810 * Return: The start of the data region.
811 */
812static inline physical_block_number_t __must_check
813vdo_get_data_region_start(struct volume_geometry geometry)
814{
815	return geometry.regions[VDO_DATA_REGION].start_block;
816}
817
818/**
819 * vdo_get_index_region_size() - Get the size of the index region from a geometry.
820 * @geometry: The geometry.
821 *
822 * Return: The size of the index region.
823 */
824static inline physical_block_number_t __must_check
825vdo_get_index_region_size(struct volume_geometry geometry)
826{
827	return vdo_get_data_region_start(geometry) -
828		vdo_get_index_region_start(geometry);
829}
830
831int __must_check vdo_parse_geometry_block(unsigned char *block,
832					  struct volume_geometry *geometry);
833
834static inline bool vdo_is_state_compressed(const enum block_mapping_state mapping_state)
835{
836	return (mapping_state > VDO_MAPPING_STATE_UNCOMPRESSED);
837}
838
839static inline struct block_map_entry
840vdo_pack_block_map_entry(physical_block_number_t pbn, enum block_mapping_state mapping_state)
841{
842	return (struct block_map_entry) {
843		.mapping_state = (mapping_state & 0x0F),
844		.pbn_high_nibble = ((pbn >> 32) & 0x0F),
845		.pbn_low_word = __cpu_to_le32(pbn & UINT_MAX),
846	};
847}
848
849static inline struct data_location vdo_unpack_block_map_entry(const struct block_map_entry *entry)
850{
851	physical_block_number_t low32 = __le32_to_cpu(entry->pbn_low_word);
852	physical_block_number_t high4 = entry->pbn_high_nibble;
853
854	return (struct data_location) {
855		.pbn = ((high4 << 32) | low32),
856		.state = entry->mapping_state,
857	};
858}
859
860static inline bool vdo_is_mapped_location(const struct data_location *location)
861{
862	return (location->state != VDO_MAPPING_STATE_UNMAPPED);
863}
864
865static inline bool vdo_is_valid_location(const struct data_location *location)
866{
867	if (location->pbn == VDO_ZERO_BLOCK)
868		return !vdo_is_state_compressed(location->state);
869	else
870		return vdo_is_mapped_location(location);
871}
872
873static inline physical_block_number_t __must_check
874vdo_get_block_map_page_pbn(const struct block_map_page *page)
875{
876	return __le64_to_cpu(page->header.pbn);
877}
878
879struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
880						 physical_block_number_t pbn,
881						 bool initialized);
882
883enum block_map_page_validity __must_check vdo_validate_block_map_page(struct block_map_page *page,
884								      nonce_t nonce,
885								      physical_block_number_t pbn);
886
887static inline page_count_t vdo_compute_block_map_page_count(block_count_t entries)
888{
889	return DIV_ROUND_UP(entries, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
890}
891
892block_count_t __must_check vdo_compute_new_forest_pages(root_count_t root_count,
893							struct boundary *old_sizes,
894							block_count_t entries,
895							struct boundary *new_sizes);
896
897/**
898 * vdo_pack_recovery_journal_entry() - Return the packed, on-disk representation of a recovery
899 *                                     journal entry.
900 * @entry: The journal entry to pack.
901 *
902 * Return: The packed representation of the journal entry.
903 */
904static inline struct packed_recovery_journal_entry
905vdo_pack_recovery_journal_entry(const struct recovery_journal_entry *entry)
906{
907	return (struct packed_recovery_journal_entry) {
908		.operation = entry->operation,
909		.slot_low = entry->slot.slot & 0x3F,
910		.slot_high = (entry->slot.slot >> 6) & 0x0F,
911		.pbn_high_nibble = (entry->slot.pbn >> 32) & 0x0F,
912		.pbn_low_word = __cpu_to_le32(entry->slot.pbn & UINT_MAX),
913		.mapping = vdo_pack_block_map_entry(entry->mapping.pbn,
914						    entry->mapping.state),
915		.unmapping = vdo_pack_block_map_entry(entry->unmapping.pbn,
916						      entry->unmapping.state),
917	};
918}
919
920/**
921 * vdo_unpack_recovery_journal_entry() - Unpack the on-disk representation of a recovery journal
922 *                                       entry.
923 * @entry: The recovery journal entry to unpack.
924 *
925 * Return: The unpacked entry.
926 */
927static inline struct recovery_journal_entry
928vdo_unpack_recovery_journal_entry(const struct packed_recovery_journal_entry *entry)
929{
930	physical_block_number_t low32 = __le32_to_cpu(entry->pbn_low_word);
931	physical_block_number_t high4 = entry->pbn_high_nibble;
932
933	return (struct recovery_journal_entry) {
934		.operation = entry->operation,
935		.slot = {
936			.pbn = ((high4 << 32) | low32),
937			.slot = (entry->slot_low | (entry->slot_high << 6)),
938		},
939		.mapping = vdo_unpack_block_map_entry(&entry->mapping),
940		.unmapping = vdo_unpack_block_map_entry(&entry->unmapping),
941	};
942}
943
944const char * __must_check vdo_get_journal_operation_name(enum journal_operation operation);
945
946/**
947 * vdo_is_valid_recovery_journal_sector() - Determine whether the header of the given sector could
948 *                                          describe a valid sector for the given journal block
949 *                                          header.
950 * @header: The unpacked block header to compare against.
951 * @sector: The packed sector to check.
952 * @sector_number: The number of the sector being checked.
953 *
954 * Return: true if the sector matches the block header.
955 */
956static inline bool __must_check
957vdo_is_valid_recovery_journal_sector(const struct recovery_block_header *header,
958				     const struct packed_journal_sector *sector,
959				     u8 sector_number)
960{
961	if ((header->check_byte != sector->check_byte) ||
962	    (header->recovery_count != sector->recovery_count))
963		return false;
964
965	if (header->metadata_type == VDO_METADATA_RECOVERY_JOURNAL_2)
966		return sector->entry_count <= RECOVERY_JOURNAL_ENTRIES_PER_SECTOR;
967
968	if (sector_number == 7)
969		return sector->entry_count <= RECOVERY_JOURNAL_1_ENTRIES_IN_LAST_SECTOR;
970
971	return sector->entry_count <= RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR;
972}
973
974/**
975 * vdo_compute_recovery_journal_block_number() - Compute the physical block number of the recovery
976 *                                               journal block which would have a given sequence
977 *                                               number.
978 * @journal_size: The size of the journal.
979 * @sequence_number: The sequence number.
980 *
981 * Return: The pbn of the journal block which would the specified sequence number.
982 */
983static inline physical_block_number_t __must_check
984vdo_compute_recovery_journal_block_number(block_count_t journal_size,
985					  sequence_number_t sequence_number)
986{
987	/*
988	 * Since journal size is a power of two, the block number modulus can just be extracted
989	 * from the low-order bits of the sequence.
990	 */
991	return (sequence_number & (journal_size - 1));
992}
993
994/**
995 * vdo_get_journal_block_sector() - Find the recovery journal sector from the block header and
996 *                                  sector number.
997 * @header: The header of the recovery journal block.
998 * @sector_number: The index of the sector (1-based).
999 *
1000 * Return: A packed recovery journal sector.
1001 */
1002static inline struct packed_journal_sector * __must_check
1003vdo_get_journal_block_sector(struct packed_journal_header *header, int sector_number)
1004{
1005	char *sector_data = ((char *) header) + (VDO_SECTOR_SIZE * sector_number);
1006
1007	return (struct packed_journal_sector *) sector_data;
1008}
1009
1010/**
1011 * vdo_pack_recovery_block_header() - Generate the packed representation of a recovery block
1012 *                                    header.
1013 * @header: The header containing the values to encode.
1014 * @packed: The header into which to pack the values.
1015 */
1016static inline void vdo_pack_recovery_block_header(const struct recovery_block_header *header,
1017						  struct packed_journal_header *packed)
1018{
1019	*packed = (struct packed_journal_header) {
1020		.block_map_head = __cpu_to_le64(header->block_map_head),
1021		.slab_journal_head = __cpu_to_le64(header->slab_journal_head),
1022		.sequence_number = __cpu_to_le64(header->sequence_number),
1023		.nonce = __cpu_to_le64(header->nonce),
1024		.logical_blocks_used = __cpu_to_le64(header->logical_blocks_used),
1025		.block_map_data_blocks = __cpu_to_le64(header->block_map_data_blocks),
1026		.entry_count = __cpu_to_le16(header->entry_count),
1027		.check_byte = header->check_byte,
1028		.recovery_count = header->recovery_count,
1029		.metadata_type = header->metadata_type,
1030	};
1031}
1032
1033/**
1034 * vdo_unpack_recovery_block_header() - Decode the packed representation of a recovery block
1035 *                                      header.
1036 * @packed: The packed header to decode.
1037 *
1038 * Return: The unpacked header.
1039 */
1040static inline struct recovery_block_header
1041vdo_unpack_recovery_block_header(const struct packed_journal_header *packed)
1042{
1043	return (struct recovery_block_header) {
1044		.block_map_head = __le64_to_cpu(packed->block_map_head),
1045		.slab_journal_head = __le64_to_cpu(packed->slab_journal_head),
1046		.sequence_number = __le64_to_cpu(packed->sequence_number),
1047		.nonce = __le64_to_cpu(packed->nonce),
1048		.logical_blocks_used = __le64_to_cpu(packed->logical_blocks_used),
1049		.block_map_data_blocks = __le64_to_cpu(packed->block_map_data_blocks),
1050		.entry_count = __le16_to_cpu(packed->entry_count),
1051		.check_byte = packed->check_byte,
1052		.recovery_count = packed->recovery_count,
1053		.metadata_type = packed->metadata_type,
1054	};
1055}
1056
1057/**
1058 * vdo_compute_slab_count() - Compute the number of slabs a depot with given parameters would have.
1059 * @first_block: PBN of the first data block.
1060 * @last_block: PBN of the last data block.
1061 * @slab_size_shift: Exponent for the number of blocks per slab.
1062 *
1063 * Return: The number of slabs.
1064 */
1065static inline slab_count_t vdo_compute_slab_count(physical_block_number_t first_block,
1066						  physical_block_number_t last_block,
1067						  unsigned int slab_size_shift)
1068{
1069	return (slab_count_t) ((last_block - first_block) >> slab_size_shift);
1070}
1071
1072int __must_check vdo_configure_slab_depot(const struct partition *partition,
1073					  struct slab_config slab_config,
1074					  zone_count_t zone_count,
1075					  struct slab_depot_state_2_0 *state);
1076
1077int __must_check vdo_configure_slab(block_count_t slab_size,
1078				    block_count_t slab_journal_blocks,
1079				    struct slab_config *slab_config);
1080
1081/**
1082 * vdo_get_saved_reference_count_size() - Get the number of blocks required to save a reference
1083 *                                        counts state covering the specified number of data
1084 *                                        blocks.
1085 * @block_count: The number of physical data blocks that can be referenced.
1086 *
1087 * Return: The number of blocks required to save reference counts with the given block count.
1088 */
1089static inline block_count_t vdo_get_saved_reference_count_size(block_count_t block_count)
1090{
1091	return DIV_ROUND_UP(block_count, COUNTS_PER_BLOCK);
1092}
1093
1094/**
1095 * vdo_get_slab_journal_start_block() - Get the physical block number of the start of the slab
1096 *                                      journal relative to the start block allocator partition.
1097 * @slab_config: The slab configuration of the VDO.
1098 * @origin: The first block of the slab.
1099 */
1100static inline physical_block_number_t __must_check
1101vdo_get_slab_journal_start_block(const struct slab_config *slab_config,
1102				 physical_block_number_t origin)
1103{
1104	return origin + slab_config->data_blocks + slab_config->reference_count_blocks;
1105}
1106
1107/**
1108 * vdo_advance_journal_point() - Move the given journal point forward by one entry.
1109 * @point: The journal point to adjust.
1110 * @entries_per_block: The number of entries in one full block.
1111 */
1112static inline void vdo_advance_journal_point(struct journal_point *point,
1113					     journal_entry_count_t entries_per_block)
1114{
1115	point->entry_count++;
1116	if (point->entry_count == entries_per_block) {
1117		point->sequence_number++;
1118		point->entry_count = 0;
1119	}
1120}
1121
1122/**
1123 * vdo_before_journal_point() - Check whether the first point precedes the second point.
1124 * @first: The first journal point.
1125 * @second: The second journal point.
1126 *
1127 * Return: true if the first point precedes the second point.
1128 */
1129static inline bool vdo_before_journal_point(const struct journal_point *first,
1130					    const struct journal_point *second)
1131{
1132	return ((first->sequence_number < second->sequence_number) ||
1133		((first->sequence_number == second->sequence_number) &&
1134		 (first->entry_count < second->entry_count)));
1135}
1136
1137/**
1138 * vdo_pack_journal_point() - Encode the journal location represented by a
1139 *                            journal_point into a packed_journal_point.
1140 * @unpacked: The unpacked input point.
1141 * @packed: The packed output point.
1142 */
1143static inline void vdo_pack_journal_point(const struct journal_point *unpacked,
1144					  struct packed_journal_point *packed)
1145{
1146	packed->encoded_point =
1147		__cpu_to_le64((unpacked->sequence_number << 16) | unpacked->entry_count);
1148}
1149
1150/**
1151 * vdo_unpack_journal_point() - Decode the journal location represented by a packed_journal_point
1152 *                              into a journal_point.
1153 * @packed: The packed input point.
1154 * @unpacked: The unpacked output point.
1155 */
1156static inline void vdo_unpack_journal_point(const struct packed_journal_point *packed,
1157					    struct journal_point *unpacked)
1158{
1159	u64 native = __le64_to_cpu(packed->encoded_point);
1160
1161	unpacked->sequence_number = (native >> 16);
1162	unpacked->entry_count = (native & 0xffff);
1163}
1164
1165/**
1166 * vdo_pack_slab_journal_block_header() - Generate the packed representation of a slab block
1167 *                                        header.
1168 * @header: The header containing the values to encode.
1169 * @packed: The header into which to pack the values.
1170 */
1171static inline void
1172vdo_pack_slab_journal_block_header(const struct slab_journal_block_header *header,
1173				   struct packed_slab_journal_block_header *packed)
1174{
1175	packed->head = __cpu_to_le64(header->head);
1176	packed->sequence_number = __cpu_to_le64(header->sequence_number);
1177	packed->nonce = __cpu_to_le64(header->nonce);
1178	packed->entry_count = __cpu_to_le16(header->entry_count);
1179	packed->metadata_type = header->metadata_type;
1180	packed->has_block_map_increments = header->has_block_map_increments;
1181
1182	vdo_pack_journal_point(&header->recovery_point, &packed->recovery_point);
1183}
1184
1185/**
1186 * vdo_unpack_slab_journal_block_header() - Decode the packed representation of a slab block
1187 *                                          header.
1188 * @packed: The packed header to decode.
1189 * @header: The header into which to unpack the values.
1190 */
1191static inline void
1192vdo_unpack_slab_journal_block_header(const struct packed_slab_journal_block_header *packed,
1193				     struct slab_journal_block_header *header)
1194{
1195	*header = (struct slab_journal_block_header) {
1196		.head = __le64_to_cpu(packed->head),
1197		.sequence_number = __le64_to_cpu(packed->sequence_number),
1198		.nonce = __le64_to_cpu(packed->nonce),
1199		.entry_count = __le16_to_cpu(packed->entry_count),
1200		.metadata_type = packed->metadata_type,
1201		.has_block_map_increments = packed->has_block_map_increments,
1202	};
1203	vdo_unpack_journal_point(&packed->recovery_point, &header->recovery_point);
1204}
1205
1206/**
1207 * vdo_pack_slab_journal_entry() - Generate the packed encoding of a slab journal entry.
1208 * @packed: The entry into which to pack the values.
1209 * @sbn: The slab block number of the entry to encode.
1210 * @is_increment: The increment flag.
1211 */
1212static inline void vdo_pack_slab_journal_entry(packed_slab_journal_entry *packed,
1213					       slab_block_number sbn, bool is_increment)
1214{
1215	packed->offset_low8 = (sbn & 0x0000FF);
1216	packed->offset_mid8 = (sbn & 0x00FF00) >> 8;
1217	packed->offset_high7 = (sbn & 0x7F0000) >> 16;
1218	packed->increment = is_increment ? 1 : 0;
1219}
1220
1221/**
1222 * vdo_unpack_slab_journal_entry() - Decode the packed representation of a slab journal entry.
1223 * @packed: The packed entry to decode.
1224 *
1225 * Return: The decoded slab journal entry.
1226 */
1227static inline struct slab_journal_entry __must_check
1228vdo_unpack_slab_journal_entry(const packed_slab_journal_entry *packed)
1229{
1230	struct slab_journal_entry entry;
1231
1232	entry.sbn = packed->offset_high7;
1233	entry.sbn <<= 8;
1234	entry.sbn |= packed->offset_mid8;
1235	entry.sbn <<= 8;
1236	entry.sbn |= packed->offset_low8;
1237	entry.operation = VDO_JOURNAL_DATA_REMAPPING;
1238	entry.increment = packed->increment;
1239	return entry;
1240}
1241
1242struct slab_journal_entry __must_check
1243vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
1244			      journal_entry_count_t entry_count);
1245
1246/**
1247 * vdo_get_slab_summary_hint_shift() - Compute the shift for slab summary hints.
1248 * @slab_size_shift: Exponent for the number of blocks per slab.
1249 *
1250 * Return: The hint shift.
1251 */
1252static inline u8 __must_check vdo_get_slab_summary_hint_shift(unsigned int slab_size_shift)
1253{
1254	return ((slab_size_shift > VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS) ?
1255		(slab_size_shift - VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS) :
1256		0);
1257}
1258
1259int __must_check vdo_initialize_layout(block_count_t size,
1260				       physical_block_number_t offset,
1261				       block_count_t block_map_blocks,
1262				       block_count_t journal_blocks,
1263				       block_count_t summary_blocks,
1264				       struct layout *layout);
1265
1266void vdo_uninitialize_layout(struct layout *layout);
1267
1268int __must_check vdo_get_partition(struct layout *layout, enum partition_id id,
1269				   struct partition **partition_ptr);
1270
1271struct partition * __must_check vdo_get_known_partition(struct layout *layout,
1272							enum partition_id id);
1273
1274int vdo_validate_config(const struct vdo_config *config,
1275			block_count_t physical_block_count,
1276			block_count_t logical_block_count);
1277
1278void vdo_destroy_component_states(struct vdo_component_states *states);
1279
1280int __must_check vdo_decode_component_states(u8 *buffer,
1281					     struct volume_geometry *geometry,
1282					     struct vdo_component_states *states);
1283
1284int __must_check vdo_validate_component_states(struct vdo_component_states *states,
1285					       nonce_t geometry_nonce,
1286					       block_count_t physical_size,
1287					       block_count_t logical_size);
1288
1289void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states);
1290int __must_check vdo_decode_super_block(u8 *buffer);
1291
1292/* We start with 0L and postcondition with ~0L to match our historical usage in userspace. */
1293static inline u32 vdo_crc32(const void *buf, unsigned long len)
1294{
1295	return (crc32(0L, buf, len) ^ ~0L);
1296}
1297
1298#endif /* VDO_ENCODINGS_H */
1299