1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright 2023 Red Hat
4 */
5
6#include "index-layout.h"
7
8#include <linux/random.h>
9
10#include "logger.h"
11#include "memory-alloc.h"
12#include "murmurhash3.h"
13#include "numeric.h"
14#include "time-utils.h"
15
16#include "config.h"
17#include "open-chapter.h"
18#include "volume-index.h"
19
20/*
21 * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
22 * which are computed when the index is created. Every header and region begins on 4K block
23 * boundary. Save regions are further sub-divided into regions of their own.
24 *
25 * Each region has a kind and an instance number. Some kinds only have one instance and therefore
26 * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
27 * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
28 * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
29 *
30 * Every region header has a type and version.
31 *
32 *     +-+-+---------+--------+--------+-+
33 *     | | |   I N D E X  0   101, 0   | |
34 *     |H|C+---------+--------+--------+S|
35 *     |D|f| Volume  | Save   | Save   |e|
36 *     |R|g| Region  | Region | Region |a|
37 *     | | | 201, -1 | 202, 0 | 202, 1 |l|
38 *     +-+-+--------+---------+--------+-+
39 *
40 * The header contains the encoded region layout table as well as some index configuration data.
41 * The sub-index region and its subdivisions are maintained in the same table.
42 *
43 * There are two save regions to preserve the old state in case saving the new state is incomplete.
44 * They are used in alternation. Each save region is further divided into sub-regions.
45 *
46 *     +-+-----+------+------+-----+-----+
47 *     |H| IPM | MI   | MI   |     | OC  |
48 *     |D|     | zone | zone | ... |     |
49 *     |R| 301 | 302  | 302  |     | 303 |
50 *     | | -1  |  0   |  1   |     | -1  |
51 *     +-+-----+------+------+-----+-----+
52 *
53 * The header contains the encoded region layout table as well as index state data for that save.
54 * Each save also has a unique nonce.
55 */
56
57#define MAGIC_SIZE 32
58#define NONCE_INFO_SIZE 32
59#define MAX_SAVES 2
60
61enum region_kind {
62	RL_KIND_EMPTY = 0,
63	RL_KIND_HEADER = 1,
64	RL_KIND_CONFIG = 100,
65	RL_KIND_INDEX = 101,
66	RL_KIND_SEAL = 102,
67	RL_KIND_VOLUME = 201,
68	RL_KIND_SAVE = 202,
69	RL_KIND_INDEX_PAGE_MAP = 301,
70	RL_KIND_VOLUME_INDEX = 302,
71	RL_KIND_OPEN_CHAPTER = 303,
72};
73
74/* Some region types are historical and are no longer used. */
75enum region_type {
76	RH_TYPE_FREE = 0, /* unused */
77	RH_TYPE_SUPER = 1,
78	RH_TYPE_SAVE = 2,
79	RH_TYPE_CHECKPOINT = 3, /* unused */
80	RH_TYPE_UNSAVED = 4,
81};
82
83#define RL_SOLE_INSTANCE 65535
84
85/*
86 * Super block version 2 is the first released version.
87 *
88 * Super block version 3 is the normal version used from RHEL 8.2 onwards.
89 *
90 * Super block versions 4 through 6 were incremental development versions and
91 * are not supported.
92 *
93 * Super block version 7 is used for volumes which have been reduced in size by one chapter in
94 * order to make room to prepend LVM metadata to a volume originally created without lvm. This
95 * allows the index to retain most its deduplication records.
96 */
97#define SUPER_VERSION_MINIMUM 3
98#define SUPER_VERSION_CURRENT 3
99#define SUPER_VERSION_MAXIMUM 7
100
101static const u8 LAYOUT_MAGIC[MAGIC_SIZE] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
102static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
103
104struct region_header {
105	u64 magic;
106	u64 region_blocks;
107	u16 type;
108	/* Currently always version 1 */
109	u16 version;
110	u16 region_count;
111	u16 payload;
112};
113
114struct layout_region {
115	u64 start_block;
116	u64 block_count;
117	u32 __unused;
118	u16 kind;
119	u16 instance;
120};
121
122struct region_table {
123	size_t encoded_size;
124	struct region_header header;
125	struct layout_region regions[];
126};
127
128struct index_save_data {
129	u64 timestamp;
130	u64 nonce;
131	/* Currently always version 1 */
132	u32 version;
133	u32 unused__;
134};
135
136struct index_state_version {
137	s32 signature;
138	s32 version_id;
139};
140
141static const struct index_state_version INDEX_STATE_VERSION_301 = {
142	.signature  = -1,
143	.version_id = 301,
144};
145
146struct index_state_data301 {
147	struct index_state_version version;
148	u64 newest_chapter;
149	u64 oldest_chapter;
150	u64 last_save;
151	u32 unused;
152	u32 padding;
153};
154
155struct index_save_layout {
156	unsigned int zone_count;
157	struct layout_region index_save;
158	struct layout_region header;
159	struct layout_region index_page_map;
160	struct layout_region free_space;
161	struct layout_region volume_index_zones[MAX_ZONES];
162	struct layout_region open_chapter;
163	struct index_save_data save_data;
164	struct index_state_data301 state_data;
165};
166
167struct sub_index_layout {
168	u64 nonce;
169	struct layout_region sub_index;
170	struct layout_region volume;
171	struct index_save_layout *saves;
172};
173
174struct super_block_data {
175	u8 magic_label[MAGIC_SIZE];
176	u8 nonce_info[NONCE_INFO_SIZE];
177	u64 nonce;
178	u32 version;
179	u32 block_size;
180	u16 index_count;
181	u16 max_saves;
182	/* Padding reflects a blank field on permanent storage */
183	u8 padding[4];
184	u64 open_chapter_blocks;
185	u64 page_map_blocks;
186	u64 volume_offset;
187	u64 start_offset;
188};
189
190struct index_layout {
191	struct io_factory *factory;
192	size_t factory_size;
193	off_t offset;
194	struct super_block_data super;
195	struct layout_region header;
196	struct layout_region config;
197	struct sub_index_layout index;
198	struct layout_region seal;
199	u64 total_blocks;
200};
201
202struct save_layout_sizes {
203	unsigned int save_count;
204	size_t block_size;
205	u64 volume_blocks;
206	u64 volume_index_blocks;
207	u64 page_map_blocks;
208	u64 open_chapter_blocks;
209	u64 save_blocks;
210	u64 sub_index_blocks;
211	u64 total_blocks;
212	size_t total_size;
213};
214
215static inline bool is_converted_super_block(struct super_block_data *super)
216{
217	return super->version == 7;
218}
219
220static int __must_check compute_sizes(const struct uds_configuration *config,
221				      struct save_layout_sizes *sls)
222{
223	int result;
224	struct index_geometry *geometry = config->geometry;
225
226	memset(sls, 0, sizeof(*sls));
227	sls->save_count = MAX_SAVES;
228	sls->block_size = UDS_BLOCK_SIZE;
229	sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
230
231	result = uds_compute_volume_index_save_blocks(config, sls->block_size,
232						      &sls->volume_index_blocks);
233	if (result != UDS_SUCCESS)
234		return vdo_log_error_strerror(result, "cannot compute index save size");
235
236	sls->page_map_blocks =
237		DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
238			     sls->block_size);
239	sls->open_chapter_blocks =
240		DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
241			     sls->block_size);
242	sls->save_blocks =
243		1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
244	sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
245	sls->total_blocks = 3 + sls->sub_index_blocks;
246	sls->total_size = sls->total_blocks * sls->block_size;
247
248	return UDS_SUCCESS;
249}
250
251int uds_compute_index_size(const struct uds_parameters *parameters, u64 *index_size)
252{
253	int result;
254	struct uds_configuration *index_config;
255	struct save_layout_sizes sizes;
256
257	if (index_size == NULL) {
258		vdo_log_error("Missing output size pointer");
259		return -EINVAL;
260	}
261
262	result = uds_make_configuration(parameters, &index_config);
263	if (result != UDS_SUCCESS) {
264		vdo_log_error_strerror(result, "cannot compute index size");
265		return uds_status_to_errno(result);
266	}
267
268	result = compute_sizes(index_config, &sizes);
269	uds_free_configuration(index_config);
270	if (result != UDS_SUCCESS)
271		return uds_status_to_errno(result);
272
273	*index_size = sizes.total_size;
274	return UDS_SUCCESS;
275}
276
277/* Create unique data using the current time and a pseudorandom number. */
278static void create_unique_nonce_data(u8 *buffer)
279{
280	ktime_t now = current_time_ns(CLOCK_REALTIME);
281	u32 rand;
282	size_t offset = 0;
283
284	get_random_bytes(&rand, sizeof(u32));
285	memcpy(buffer + offset, &now, sizeof(now));
286	offset += sizeof(now);
287	memcpy(buffer + offset, &rand, sizeof(rand));
288	offset += sizeof(rand);
289	while (offset < NONCE_INFO_SIZE) {
290		size_t len = min(NONCE_INFO_SIZE - offset, offset);
291
292		memcpy(buffer + offset, buffer, len);
293		offset += len;
294	}
295}
296
297static u64 hash_stuff(u64 start, const void *data, size_t len)
298{
299	u32 seed = start ^ (start >> 27);
300	u8 hash_buffer[16];
301
302	murmurhash3_128(data, len, seed, hash_buffer);
303	return get_unaligned_le64(hash_buffer + 4);
304}
305
306/* Generate a primary nonce from the provided data. */
307static u64 generate_primary_nonce(const void *data, size_t len)
308{
309	return hash_stuff(0xa1b1e0fc, data, len);
310}
311
312/*
313 * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
314 * hashing the original nonce and the data to produce a new nonce.
315 */
316static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
317{
318	return hash_stuff(nonce + 1, data, len);
319}
320
321static int __must_check open_layout_reader(struct index_layout *layout,
322					   struct layout_region *lr, off_t offset,
323					   struct buffered_reader **reader_ptr)
324{
325	return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
326					lr->block_count, reader_ptr);
327}
328
329static int open_region_reader(struct index_layout *layout, struct layout_region *region,
330			      struct buffered_reader **reader_ptr)
331{
332	return open_layout_reader(layout, region, -layout->super.start_offset,
333				  reader_ptr);
334}
335
336static int __must_check open_layout_writer(struct index_layout *layout,
337					   struct layout_region *lr, off_t offset,
338					   struct buffered_writer **writer_ptr)
339{
340	return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
341					lr->block_count, writer_ptr);
342}
343
344static int open_region_writer(struct index_layout *layout, struct layout_region *region,
345			      struct buffered_writer **writer_ptr)
346{
347	return open_layout_writer(layout, region, -layout->super.start_offset,
348				  writer_ptr);
349}
350
351static void generate_super_block_data(struct save_layout_sizes *sls,
352				      struct super_block_data *super)
353{
354	memset(super, 0, sizeof(*super));
355	memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
356	create_unique_nonce_data(super->nonce_info);
357
358	super->nonce = generate_primary_nonce(super->nonce_info,
359					      sizeof(super->nonce_info));
360	super->version = SUPER_VERSION_CURRENT;
361	super->block_size = sls->block_size;
362	super->index_count = 1;
363	super->max_saves = sls->save_count;
364	super->open_chapter_blocks = sls->open_chapter_blocks;
365	super->page_map_blocks = sls->page_map_blocks;
366	super->volume_offset = 0;
367	super->start_offset = 0;
368}
369
370static void define_sub_index_nonce(struct index_layout *layout)
371{
372	struct sub_index_nonce_data {
373		u64 offset;
374		u16 index_id;
375	};
376	struct sub_index_layout *sil = &layout->index;
377	u64 primary_nonce = layout->super.nonce;
378	u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
379	size_t offset = 0;
380
381	encode_u64_le(buffer, &offset, sil->sub_index.start_block);
382	encode_u16_le(buffer, &offset, 0);
383	sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
384	if (sil->nonce == 0) {
385		sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
386						      sizeof(buffer));
387	}
388}
389
390static void setup_sub_index(struct index_layout *layout, u64 start_block,
391			    struct save_layout_sizes *sls)
392{
393	struct sub_index_layout *sil = &layout->index;
394	u64 next_block = start_block;
395	unsigned int i;
396
397	sil->sub_index = (struct layout_region) {
398		.start_block = start_block,
399		.block_count = sls->sub_index_blocks,
400		.kind = RL_KIND_INDEX,
401		.instance = 0,
402	};
403
404	sil->volume = (struct layout_region) {
405		.start_block = next_block,
406		.block_count = sls->volume_blocks,
407		.kind = RL_KIND_VOLUME,
408		.instance = RL_SOLE_INSTANCE,
409	};
410
411	next_block += sls->volume_blocks;
412
413	for (i = 0; i < sls->save_count; i++) {
414		sil->saves[i].index_save = (struct layout_region) {
415			.start_block = next_block,
416			.block_count = sls->save_blocks,
417			.kind = RL_KIND_SAVE,
418			.instance = i,
419		};
420
421		next_block += sls->save_blocks;
422	}
423
424	define_sub_index_nonce(layout);
425}
426
427static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
428{
429	u64 next_block = layout->offset / sls->block_size;
430
431	layout->total_blocks = sls->total_blocks;
432	generate_super_block_data(sls, &layout->super);
433	layout->header = (struct layout_region) {
434		.start_block = next_block++,
435		.block_count = 1,
436		.kind = RL_KIND_HEADER,
437		.instance = RL_SOLE_INSTANCE,
438	};
439
440	layout->config = (struct layout_region) {
441		.start_block = next_block++,
442		.block_count = 1,
443		.kind = RL_KIND_CONFIG,
444		.instance = RL_SOLE_INSTANCE,
445	};
446
447	setup_sub_index(layout, next_block, sls);
448	next_block += sls->sub_index_blocks;
449
450	layout->seal = (struct layout_region) {
451		.start_block = next_block,
452		.block_count = 1,
453		.kind = RL_KIND_SEAL,
454		.instance = RL_SOLE_INSTANCE,
455	};
456}
457
458static int __must_check make_index_save_region_table(struct index_save_layout *isl,
459						     struct region_table **table_ptr)
460{
461	int result;
462	unsigned int z;
463	struct region_table *table;
464	struct layout_region *lr;
465	u16 region_count;
466	size_t payload;
467	size_t type;
468
469	if (isl->zone_count > 0) {
470		/*
471		 * Normal save regions: header, page map, volume index zones,
472		 * open chapter, and possibly free space.
473		 */
474		region_count = 3 + isl->zone_count;
475		if (isl->free_space.block_count > 0)
476			region_count++;
477
478		payload = sizeof(isl->save_data) + sizeof(isl->state_data);
479		type = RH_TYPE_SAVE;
480	} else {
481		/* Empty save regions: header, page map, free space. */
482		region_count = 3;
483		payload = sizeof(isl->save_data);
484		type = RH_TYPE_UNSAVED;
485	}
486
487	result = vdo_allocate_extended(struct region_table, region_count,
488				       struct layout_region,
489				       "layout region table for ISL", &table);
490	if (result != VDO_SUCCESS)
491		return result;
492
493	lr = &table->regions[0];
494	*lr++ = isl->header;
495	*lr++ = isl->index_page_map;
496	for (z = 0; z < isl->zone_count; z++)
497		*lr++ = isl->volume_index_zones[z];
498
499	if (isl->zone_count > 0)
500		*lr++ = isl->open_chapter;
501
502	if (isl->free_space.block_count > 0)
503		*lr++ = isl->free_space;
504
505	table->header = (struct region_header) {
506		.magic = REGION_MAGIC,
507		.region_blocks = isl->index_save.block_count,
508		.type = type,
509		.version = 1,
510		.region_count = region_count,
511		.payload = payload,
512	};
513
514	table->encoded_size = (sizeof(struct region_header) + payload +
515			       region_count * sizeof(struct layout_region));
516	*table_ptr = table;
517	return UDS_SUCCESS;
518}
519
520static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
521{
522	unsigned int i;
523
524	encode_u64_le(buffer, offset, REGION_MAGIC);
525	encode_u64_le(buffer, offset, table->header.region_blocks);
526	encode_u16_le(buffer, offset, table->header.type);
527	encode_u16_le(buffer, offset, table->header.version);
528	encode_u16_le(buffer, offset, table->header.region_count);
529	encode_u16_le(buffer, offset, table->header.payload);
530
531	for (i = 0; i < table->header.region_count; i++) {
532		encode_u64_le(buffer, offset, table->regions[i].start_block);
533		encode_u64_le(buffer, offset, table->regions[i].block_count);
534		encode_u32_le(buffer, offset, 0);
535		encode_u16_le(buffer, offset, table->regions[i].kind);
536		encode_u16_le(buffer, offset, table->regions[i].instance);
537	}
538}
539
540static int __must_check write_index_save_header(struct index_save_layout *isl,
541						struct region_table *table,
542						struct buffered_writer *writer)
543{
544	int result;
545	u8 *buffer;
546	size_t offset = 0;
547
548	result = vdo_allocate(table->encoded_size, u8, "index save data", &buffer);
549	if (result != VDO_SUCCESS)
550		return result;
551
552	encode_region_table(buffer, &offset, table);
553	encode_u64_le(buffer, &offset, isl->save_data.timestamp);
554	encode_u64_le(buffer, &offset, isl->save_data.nonce);
555	encode_u32_le(buffer, &offset, isl->save_data.version);
556	encode_u32_le(buffer, &offset, 0);
557	if (isl->zone_count > 0) {
558		encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
559		encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
560		encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
561		encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
562		encode_u64_le(buffer, &offset, isl->state_data.last_save);
563		encode_u64_le(buffer, &offset, 0);
564	}
565
566	result = uds_write_to_buffered_writer(writer, buffer, offset);
567	vdo_free(buffer);
568	if (result != UDS_SUCCESS)
569		return result;
570
571	return uds_flush_buffered_writer(writer);
572}
573
574static int write_index_save_layout(struct index_layout *layout,
575				   struct index_save_layout *isl)
576{
577	int result;
578	struct region_table *table;
579	struct buffered_writer *writer;
580
581	result = make_index_save_region_table(isl, &table);
582	if (result != UDS_SUCCESS)
583		return result;
584
585	result = open_region_writer(layout, &isl->header, &writer);
586	if (result != UDS_SUCCESS) {
587		vdo_free(table);
588		return result;
589	}
590
591	result = write_index_save_header(isl, table, writer);
592	vdo_free(table);
593	uds_free_buffered_writer(writer);
594
595	return result;
596}
597
598static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
599{
600	u64 free_blocks;
601	u64 next_block = isl->index_save.start_block;
602
603	isl->zone_count = 0;
604	memset(&isl->save_data, 0, sizeof(isl->save_data));
605
606	isl->header = (struct layout_region) {
607		.start_block = next_block++,
608		.block_count = 1,
609		.kind = RL_KIND_HEADER,
610		.instance = RL_SOLE_INSTANCE,
611	};
612
613	isl->index_page_map = (struct layout_region) {
614		.start_block = next_block,
615		.block_count = page_map_blocks,
616		.kind = RL_KIND_INDEX_PAGE_MAP,
617		.instance = RL_SOLE_INSTANCE,
618	};
619
620	next_block += page_map_blocks;
621
622	free_blocks = isl->index_save.block_count - page_map_blocks - 1;
623	isl->free_space = (struct layout_region) {
624		.start_block = next_block,
625		.block_count = free_blocks,
626		.kind = RL_KIND_EMPTY,
627		.instance = RL_SOLE_INSTANCE,
628	};
629}
630
631static int __must_check invalidate_old_save(struct index_layout *layout,
632					    struct index_save_layout *isl)
633{
634	reset_index_save_layout(isl, layout->super.page_map_blocks);
635	return write_index_save_layout(layout, isl);
636}
637
638static int discard_index_state_data(struct index_layout *layout)
639{
640	int result;
641	int saved_result = UDS_SUCCESS;
642	unsigned int i;
643
644	for (i = 0; i < layout->super.max_saves; i++) {
645		result = invalidate_old_save(layout, &layout->index.saves[i]);
646		if (result != UDS_SUCCESS)
647			saved_result = result;
648	}
649
650	if (saved_result != UDS_SUCCESS) {
651		return vdo_log_error_strerror(result,
652					      "%s: cannot destroy all index saves",
653					      __func__);
654	}
655
656	return UDS_SUCCESS;
657}
658
659static int __must_check make_layout_region_table(struct index_layout *layout,
660						 struct region_table **table_ptr)
661{
662	int result;
663	unsigned int i;
664	/* Regions: header, config, index, volume, saves, seal */
665	u16 region_count = 5 + layout->super.max_saves;
666	u16 payload;
667	struct region_table *table;
668	struct layout_region *lr;
669
670	result = vdo_allocate_extended(struct region_table, region_count,
671				       struct layout_region, "layout region table",
672				       &table);
673	if (result != VDO_SUCCESS)
674		return result;
675
676	lr = &table->regions[0];
677	*lr++ = layout->header;
678	*lr++ = layout->config;
679	*lr++ = layout->index.sub_index;
680	*lr++ = layout->index.volume;
681
682	for (i = 0; i < layout->super.max_saves; i++)
683		*lr++ = layout->index.saves[i].index_save;
684
685	*lr++ = layout->seal;
686
687	if (is_converted_super_block(&layout->super)) {
688		payload = sizeof(struct super_block_data);
689	} else {
690		payload = (sizeof(struct super_block_data) -
691			   sizeof(layout->super.volume_offset) -
692			   sizeof(layout->super.start_offset));
693	}
694
695	table->header = (struct region_header) {
696		.magic = REGION_MAGIC,
697		.region_blocks = layout->total_blocks,
698		.type = RH_TYPE_SUPER,
699		.version = 1,
700		.region_count = region_count,
701		.payload = payload,
702	};
703
704	table->encoded_size = (sizeof(struct region_header) + payload +
705			       region_count * sizeof(struct layout_region));
706	*table_ptr = table;
707	return UDS_SUCCESS;
708}
709
710static int __must_check write_layout_header(struct index_layout *layout,
711					    struct region_table *table,
712					    struct buffered_writer *writer)
713{
714	int result;
715	u8 *buffer;
716	size_t offset = 0;
717
718	result = vdo_allocate(table->encoded_size, u8, "layout data", &buffer);
719	if (result != VDO_SUCCESS)
720		return result;
721
722	encode_region_table(buffer, &offset, table);
723	memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
724	offset += MAGIC_SIZE;
725	memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
726	offset += NONCE_INFO_SIZE;
727	encode_u64_le(buffer, &offset, layout->super.nonce);
728	encode_u32_le(buffer, &offset, layout->super.version);
729	encode_u32_le(buffer, &offset, layout->super.block_size);
730	encode_u16_le(buffer, &offset, layout->super.index_count);
731	encode_u16_le(buffer, &offset, layout->super.max_saves);
732	encode_u32_le(buffer, &offset, 0);
733	encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
734	encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
735
736	if (is_converted_super_block(&layout->super)) {
737		encode_u64_le(buffer, &offset, layout->super.volume_offset);
738		encode_u64_le(buffer, &offset, layout->super.start_offset);
739	}
740
741	result = uds_write_to_buffered_writer(writer, buffer, offset);
742	vdo_free(buffer);
743	if (result != UDS_SUCCESS)
744		return result;
745
746	return uds_flush_buffered_writer(writer);
747}
748
749static int __must_check write_uds_index_config(struct index_layout *layout,
750					       struct uds_configuration *config,
751					       off_t offset)
752{
753	int result;
754	struct buffered_writer *writer = NULL;
755
756	result = open_layout_writer(layout, &layout->config, offset, &writer);
757	if (result != UDS_SUCCESS)
758		return vdo_log_error_strerror(result, "failed to open config region");
759
760	result = uds_write_config_contents(writer, config, layout->super.version);
761	if (result != UDS_SUCCESS) {
762		uds_free_buffered_writer(writer);
763		return vdo_log_error_strerror(result, "failed to write config region");
764	}
765
766	result = uds_flush_buffered_writer(writer);
767	if (result != UDS_SUCCESS) {
768		uds_free_buffered_writer(writer);
769		return vdo_log_error_strerror(result, "cannot flush config writer");
770	}
771
772	uds_free_buffered_writer(writer);
773	return UDS_SUCCESS;
774}
775
776static int __must_check save_layout(struct index_layout *layout, off_t offset)
777{
778	int result;
779	struct buffered_writer *writer = NULL;
780	struct region_table *table;
781
782	result = make_layout_region_table(layout, &table);
783	if (result != UDS_SUCCESS)
784		return result;
785
786	result = open_layout_writer(layout, &layout->header, offset, &writer);
787	if (result != UDS_SUCCESS) {
788		vdo_free(table);
789		return result;
790	}
791
792	result = write_layout_header(layout, table, writer);
793	vdo_free(table);
794	uds_free_buffered_writer(writer);
795
796	return result;
797}
798
799static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
800{
801	int result;
802	struct save_layout_sizes sizes;
803
804	result = compute_sizes(config, &sizes);
805	if (result != UDS_SUCCESS)
806		return result;
807
808	result = vdo_allocate(sizes.save_count, struct index_save_layout, __func__,
809			      &layout->index.saves);
810	if (result != VDO_SUCCESS)
811		return result;
812
813	initialize_layout(layout, &sizes);
814
815	result = discard_index_state_data(layout);
816	if (result != UDS_SUCCESS)
817		return result;
818
819	result = write_uds_index_config(layout, config, 0);
820	if (result != UDS_SUCCESS)
821		return result;
822
823	return save_layout(layout, 0);
824}
825
826static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
827{
828	struct save_nonce_data {
829		struct index_save_data data;
830		u64 offset;
831	} nonce_data;
832	u8 buffer[sizeof(nonce_data)];
833	size_t offset = 0;
834
835	encode_u64_le(buffer, &offset, isl->save_data.timestamp);
836	encode_u64_le(buffer, &offset, 0);
837	encode_u32_le(buffer, &offset, isl->save_data.version);
838	encode_u32_le(buffer, &offset, 0U);
839	encode_u64_le(buffer, &offset, isl->index_save.start_block);
840	VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
841			    "%zu bytes encoded of %zu expected",
842			    offset, sizeof(nonce_data));
843	return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
844}
845
846static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
847{
848	if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
849		return 0;
850
851	if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
852		return 0;
853
854	return isl->save_data.timestamp;
855}
856
857static int find_latest_uds_index_save_slot(struct index_layout *layout,
858					   struct index_save_layout **isl_ptr)
859{
860	struct index_save_layout *latest = NULL;
861	struct index_save_layout *isl;
862	unsigned int i;
863	u64 save_time = 0;
864	u64 latest_time = 0;
865
866	for (i = 0; i < layout->super.max_saves; i++) {
867		isl = &layout->index.saves[i];
868		save_time = validate_index_save_layout(isl, layout->index.nonce);
869		if (save_time > latest_time) {
870			latest = isl;
871			latest_time = save_time;
872		}
873	}
874
875	if (latest == NULL) {
876		vdo_log_error("No valid index save found");
877		return UDS_INDEX_NOT_SAVED_CLEANLY;
878	}
879
880	*isl_ptr = latest;
881	return UDS_SUCCESS;
882}
883
884int uds_discard_open_chapter(struct index_layout *layout)
885{
886	int result;
887	struct index_save_layout *isl;
888	struct buffered_writer *writer;
889
890	result = find_latest_uds_index_save_slot(layout, &isl);
891	if (result != UDS_SUCCESS)
892		return result;
893
894	result = open_region_writer(layout, &isl->open_chapter, &writer);
895	if (result != UDS_SUCCESS)
896		return result;
897
898	result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
899	if (result != UDS_SUCCESS) {
900		uds_free_buffered_writer(writer);
901		return result;
902	}
903
904	result = uds_flush_buffered_writer(writer);
905	uds_free_buffered_writer(writer);
906	return result;
907}
908
909int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
910{
911	int result;
912	unsigned int zone;
913	struct index_save_layout *isl;
914	struct buffered_reader *readers[MAX_ZONES];
915
916	result = find_latest_uds_index_save_slot(layout, &isl);
917	if (result != UDS_SUCCESS)
918		return result;
919
920	index->newest_virtual_chapter = isl->state_data.newest_chapter;
921	index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
922	index->last_save = isl->state_data.last_save;
923
924	result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
925	if (result != UDS_SUCCESS)
926		return result;
927
928	result = uds_load_open_chapter(index, readers[0]);
929	uds_free_buffered_reader(readers[0]);
930	if (result != UDS_SUCCESS)
931		return result;
932
933	for (zone = 0; zone < isl->zone_count; zone++) {
934		result = open_region_reader(layout, &isl->volume_index_zones[zone],
935					    &readers[zone]);
936		if (result != UDS_SUCCESS) {
937			for (; zone > 0; zone--)
938				uds_free_buffered_reader(readers[zone - 1]);
939
940			return result;
941		}
942	}
943
944	result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
945	for (zone = 0; zone < isl->zone_count; zone++)
946		uds_free_buffered_reader(readers[zone]);
947	if (result != UDS_SUCCESS)
948		return result;
949
950	result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
951	if (result != UDS_SUCCESS)
952		return result;
953
954	result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
955	uds_free_buffered_reader(readers[0]);
956
957	return result;
958}
959
960static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
961{
962	struct index_save_layout *oldest = NULL;
963	struct index_save_layout *isl;
964	unsigned int i;
965	u64 save_time = 0;
966	u64 oldest_time = 0;
967
968	for (i = 0; i < layout->super.max_saves; i++) {
969		isl = &layout->index.saves[i];
970		save_time = validate_index_save_layout(isl, layout->index.nonce);
971		if (oldest == NULL || save_time < oldest_time) {
972			oldest = isl;
973			oldest_time = save_time;
974		}
975	}
976
977	return oldest;
978}
979
980static void instantiate_index_save_layout(struct index_save_layout *isl,
981					  struct super_block_data *super,
982					  u64 volume_nonce, unsigned int zone_count)
983{
984	unsigned int z;
985	u64 next_block;
986	u64 free_blocks;
987	u64 volume_index_blocks;
988
989	isl->zone_count = zone_count;
990	memset(&isl->save_data, 0, sizeof(isl->save_data));
991	isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
992	isl->save_data.version = 1;
993	isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
994
995	next_block = isl->index_save.start_block;
996	isl->header = (struct layout_region) {
997		.start_block = next_block++,
998		.block_count = 1,
999		.kind = RL_KIND_HEADER,
1000		.instance = RL_SOLE_INSTANCE,
1001	};
1002
1003	isl->index_page_map = (struct layout_region) {
1004		.start_block = next_block,
1005		.block_count = super->page_map_blocks,
1006		.kind = RL_KIND_INDEX_PAGE_MAP,
1007		.instance = RL_SOLE_INSTANCE,
1008	};
1009	next_block += super->page_map_blocks;
1010
1011	free_blocks = (isl->index_save.block_count - 1 -
1012		       super->page_map_blocks -
1013		       super->open_chapter_blocks);
1014	volume_index_blocks = free_blocks / isl->zone_count;
1015	for (z = 0; z < isl->zone_count; z++) {
1016		isl->volume_index_zones[z] = (struct layout_region) {
1017			.start_block = next_block,
1018			.block_count = volume_index_blocks,
1019			.kind = RL_KIND_VOLUME_INDEX,
1020			.instance = z,
1021		};
1022
1023		next_block += volume_index_blocks;
1024		free_blocks -= volume_index_blocks;
1025	}
1026
1027	isl->open_chapter = (struct layout_region) {
1028		.start_block = next_block,
1029		.block_count = super->open_chapter_blocks,
1030		.kind = RL_KIND_OPEN_CHAPTER,
1031		.instance = RL_SOLE_INSTANCE,
1032	};
1033
1034	next_block += super->open_chapter_blocks;
1035
1036	isl->free_space = (struct layout_region) {
1037		.start_block = next_block,
1038		.block_count = free_blocks,
1039		.kind = RL_KIND_EMPTY,
1040		.instance = RL_SOLE_INSTANCE,
1041	};
1042}
1043
1044static int setup_uds_index_save_slot(struct index_layout *layout,
1045				     unsigned int zone_count,
1046				     struct index_save_layout **isl_ptr)
1047{
1048	int result;
1049	struct index_save_layout *isl;
1050
1051	isl = select_oldest_index_save_layout(layout);
1052	result = invalidate_old_save(layout, isl);
1053	if (result != UDS_SUCCESS)
1054		return result;
1055
1056	instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
1057				      zone_count);
1058
1059	*isl_ptr = isl;
1060	return UDS_SUCCESS;
1061}
1062
1063static void cancel_uds_index_save(struct index_save_layout *isl)
1064{
1065	memset(&isl->save_data, 0, sizeof(isl->save_data));
1066	memset(&isl->state_data, 0, sizeof(isl->state_data));
1067	isl->zone_count = 0;
1068}
1069
1070int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
1071{
1072	int result;
1073	unsigned int zone;
1074	struct index_save_layout *isl;
1075	struct buffered_writer *writers[MAX_ZONES];
1076
1077	result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
1078	if (result != UDS_SUCCESS)
1079		return result;
1080
1081	isl->state_data	= (struct index_state_data301) {
1082		.newest_chapter = index->newest_virtual_chapter,
1083		.oldest_chapter = index->oldest_virtual_chapter,
1084		.last_save = index->last_save,
1085	};
1086
1087	result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
1088	if (result != UDS_SUCCESS) {
1089		cancel_uds_index_save(isl);
1090		return result;
1091	}
1092
1093	result = uds_save_open_chapter(index, writers[0]);
1094	uds_free_buffered_writer(writers[0]);
1095	if (result != UDS_SUCCESS) {
1096		cancel_uds_index_save(isl);
1097		return result;
1098	}
1099
1100	for (zone = 0; zone < index->zone_count; zone++) {
1101		result = open_region_writer(layout, &isl->volume_index_zones[zone],
1102					    &writers[zone]);
1103		if (result != UDS_SUCCESS) {
1104			for (; zone > 0; zone--)
1105				uds_free_buffered_writer(writers[zone - 1]);
1106
1107			cancel_uds_index_save(isl);
1108			return result;
1109		}
1110	}
1111
1112	result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
1113	for (zone = 0; zone < index->zone_count; zone++)
1114		uds_free_buffered_writer(writers[zone]);
1115	if (result != UDS_SUCCESS) {
1116		cancel_uds_index_save(isl);
1117		return result;
1118	}
1119
1120	result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
1121	if (result != UDS_SUCCESS) {
1122		cancel_uds_index_save(isl);
1123		return result;
1124	}
1125
1126	result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
1127	uds_free_buffered_writer(writers[0]);
1128	if (result != UDS_SUCCESS) {
1129		cancel_uds_index_save(isl);
1130		return result;
1131	}
1132
1133	return write_index_save_layout(layout, isl);
1134}
1135
1136static int __must_check load_region_table(struct buffered_reader *reader,
1137					  struct region_table **table_ptr)
1138{
1139	int result;
1140	unsigned int i;
1141	struct region_header header;
1142	struct region_table *table;
1143	u8 buffer[sizeof(struct region_header)];
1144	size_t offset = 0;
1145
1146	result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1147	if (result != UDS_SUCCESS)
1148		return vdo_log_error_strerror(result, "cannot read region table header");
1149
1150	decode_u64_le(buffer, &offset, &header.magic);
1151	decode_u64_le(buffer, &offset, &header.region_blocks);
1152	decode_u16_le(buffer, &offset, &header.type);
1153	decode_u16_le(buffer, &offset, &header.version);
1154	decode_u16_le(buffer, &offset, &header.region_count);
1155	decode_u16_le(buffer, &offset, &header.payload);
1156
1157	if (header.magic != REGION_MAGIC)
1158		return UDS_NO_INDEX;
1159
1160	if (header.version != 1) {
1161		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1162					      "unknown region table version %hu",
1163					      header.version);
1164	}
1165
1166	result = vdo_allocate_extended(struct region_table, header.region_count,
1167				       struct layout_region,
1168				       "single file layout region table", &table);
1169	if (result != VDO_SUCCESS)
1170		return result;
1171
1172	table->header = header;
1173	for (i = 0; i < header.region_count; i++) {
1174		u8 region_buffer[sizeof(struct layout_region)];
1175
1176		offset = 0;
1177		result = uds_read_from_buffered_reader(reader, region_buffer,
1178						       sizeof(region_buffer));
1179		if (result != UDS_SUCCESS) {
1180			vdo_free(table);
1181			return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1182						      "cannot read region table layouts");
1183		}
1184
1185		decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
1186		decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
1187		offset += sizeof(u32);
1188		decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
1189		decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
1190	}
1191
1192	*table_ptr = table;
1193	return UDS_SUCCESS;
1194}
1195
1196static int __must_check read_super_block_data(struct buffered_reader *reader,
1197					      struct index_layout *layout,
1198					      size_t saved_size)
1199{
1200	int result;
1201	struct super_block_data *super = &layout->super;
1202	u8 *buffer;
1203	size_t offset = 0;
1204
1205	result = vdo_allocate(saved_size, u8, "super block data", &buffer);
1206	if (result != VDO_SUCCESS)
1207		return result;
1208
1209	result = uds_read_from_buffered_reader(reader, buffer, saved_size);
1210	if (result != UDS_SUCCESS) {
1211		vdo_free(buffer);
1212		return vdo_log_error_strerror(result, "cannot read region table header");
1213	}
1214
1215	memcpy(&super->magic_label, buffer, MAGIC_SIZE);
1216	offset += MAGIC_SIZE;
1217	memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
1218	offset += NONCE_INFO_SIZE;
1219	decode_u64_le(buffer, &offset, &super->nonce);
1220	decode_u32_le(buffer, &offset, &super->version);
1221	decode_u32_le(buffer, &offset, &super->block_size);
1222	decode_u16_le(buffer, &offset, &super->index_count);
1223	decode_u16_le(buffer, &offset, &super->max_saves);
1224	offset += sizeof(u32);
1225	decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
1226	decode_u64_le(buffer, &offset, &super->page_map_blocks);
1227
1228	if (is_converted_super_block(super)) {
1229		decode_u64_le(buffer, &offset, &super->volume_offset);
1230		decode_u64_le(buffer, &offset, &super->start_offset);
1231	} else {
1232		super->volume_offset = 0;
1233		super->start_offset = 0;
1234	}
1235
1236	vdo_free(buffer);
1237
1238	if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
1239		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1240					      "unknown superblock magic label");
1241
1242	if ((super->version < SUPER_VERSION_MINIMUM) ||
1243	    (super->version == 4) || (super->version == 5) || (super->version == 6) ||
1244	    (super->version > SUPER_VERSION_MAXIMUM)) {
1245		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1246					      "unknown superblock version number %u",
1247					      super->version);
1248	}
1249
1250	if (super->volume_offset < super->start_offset) {
1251		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1252					      "inconsistent offsets (start %llu, volume %llu)",
1253					      (unsigned long long) super->start_offset,
1254					      (unsigned long long) super->volume_offset);
1255	}
1256
1257	/* Sub-indexes are no longer used but the layout retains this field. */
1258	if (super->index_count != 1) {
1259		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1260					      "invalid subindex count %u",
1261					      super->index_count);
1262	}
1263
1264	if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
1265		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1266					      "inconsistent superblock nonce");
1267	}
1268
1269	return UDS_SUCCESS;
1270}
1271
1272static int __must_check verify_region(struct layout_region *lr, u64 start_block,
1273				      enum region_kind kind, unsigned int instance)
1274{
1275	if (lr->start_block != start_block)
1276		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1277					      "incorrect layout region offset");
1278
1279	if (lr->kind != kind)
1280		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1281					      "incorrect layout region kind");
1282
1283	if (lr->instance != instance) {
1284		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1285					      "incorrect layout region instance");
1286	}
1287
1288	return UDS_SUCCESS;
1289}
1290
1291static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
1292					 struct region_table *table)
1293{
1294	int result;
1295	unsigned int i;
1296	struct sub_index_layout *sil = &layout->index;
1297	u64 next_block = start_block;
1298
1299	sil->sub_index = table->regions[2];
1300	result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
1301	if (result != UDS_SUCCESS)
1302		return result;
1303
1304	define_sub_index_nonce(layout);
1305
1306	sil->volume = table->regions[3];
1307	result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
1308			       RL_SOLE_INSTANCE);
1309	if (result != UDS_SUCCESS)
1310		return result;
1311
1312	next_block += sil->volume.block_count + layout->super.volume_offset;
1313
1314	for (i = 0; i < layout->super.max_saves; i++) {
1315		sil->saves[i].index_save = table->regions[i + 4];
1316		result = verify_region(&sil->saves[i].index_save, next_block,
1317				       RL_KIND_SAVE, i);
1318		if (result != UDS_SUCCESS)
1319			return result;
1320
1321		next_block += sil->saves[i].index_save.block_count;
1322	}
1323
1324	next_block -= layout->super.volume_offset;
1325	if (next_block != start_block + sil->sub_index.block_count) {
1326		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1327					      "sub index region does not span all saves");
1328	}
1329
1330	return UDS_SUCCESS;
1331}
1332
1333static int __must_check reconstitute_layout(struct index_layout *layout,
1334					    struct region_table *table, u64 first_block)
1335{
1336	int result;
1337	u64 next_block = first_block;
1338
1339	result = vdo_allocate(layout->super.max_saves, struct index_save_layout,
1340			      __func__, &layout->index.saves);
1341	if (result != VDO_SUCCESS)
1342		return result;
1343
1344	layout->total_blocks = table->header.region_blocks;
1345
1346	layout->header = table->regions[0];
1347	result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
1348			       RL_SOLE_INSTANCE);
1349	if (result != UDS_SUCCESS)
1350		return result;
1351
1352	layout->config = table->regions[1];
1353	result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
1354			       RL_SOLE_INSTANCE);
1355	if (result != UDS_SUCCESS)
1356		return result;
1357
1358	result = verify_sub_index(layout, next_block, table);
1359	if (result != UDS_SUCCESS)
1360		return result;
1361
1362	next_block += layout->index.sub_index.block_count;
1363
1364	layout->seal = table->regions[table->header.region_count - 1];
1365	result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
1366			       RL_KIND_SEAL, RL_SOLE_INSTANCE);
1367	if (result != UDS_SUCCESS)
1368		return result;
1369
1370	if (++next_block != (first_block + layout->total_blocks)) {
1371		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1372					      "layout table does not span total blocks");
1373	}
1374
1375	return UDS_SUCCESS;
1376}
1377
1378static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
1379					 u64 first_block, struct buffered_reader *reader)
1380{
1381	int result;
1382	struct region_table *table = NULL;
1383	struct super_block_data *super = &layout->super;
1384
1385	result = load_region_table(reader, &table);
1386	if (result != UDS_SUCCESS)
1387		return result;
1388
1389	if (table->header.type != RH_TYPE_SUPER) {
1390		vdo_free(table);
1391		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1392					      "not a superblock region table");
1393	}
1394
1395	result = read_super_block_data(reader, layout, table->header.payload);
1396	if (result != UDS_SUCCESS) {
1397		vdo_free(table);
1398		return vdo_log_error_strerror(result, "unknown superblock format");
1399	}
1400
1401	if (super->block_size != block_size) {
1402		vdo_free(table);
1403		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1404					      "superblock saved block_size %u differs from supplied block_size %zu",
1405					      super->block_size, block_size);
1406	}
1407
1408	first_block -= (super->volume_offset - super->start_offset);
1409	result = reconstitute_layout(layout, table, first_block);
1410	vdo_free(table);
1411	return result;
1412}
1413
1414static int __must_check read_index_save_data(struct buffered_reader *reader,
1415					     struct index_save_layout *isl,
1416					     size_t saved_size)
1417{
1418	int result;
1419	struct index_state_version file_version;
1420	u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
1421	size_t offset = 0;
1422
1423	if (saved_size != sizeof(buffer)) {
1424		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1425					      "unexpected index save data size %zu",
1426					      saved_size);
1427	}
1428
1429	result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1430	if (result != UDS_SUCCESS)
1431		return vdo_log_error_strerror(result, "cannot read index save data");
1432
1433	decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
1434	decode_u64_le(buffer, &offset, &isl->save_data.nonce);
1435	decode_u32_le(buffer, &offset, &isl->save_data.version);
1436	offset += sizeof(u32);
1437
1438	if (isl->save_data.version > 1) {
1439		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1440					      "unknown index save version number %u",
1441					      isl->save_data.version);
1442	}
1443
1444	decode_s32_le(buffer, &offset, &file_version.signature);
1445	decode_s32_le(buffer, &offset, &file_version.version_id);
1446
1447	if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
1448	    (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
1449		return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1450					      "index state version %d,%d is unsupported",
1451					      file_version.signature,
1452					      file_version.version_id);
1453	}
1454
1455	decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
1456	decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
1457	decode_u64_le(buffer, &offset, &isl->state_data.last_save);
1458	/* Skip past some historical fields that are now unused */
1459	offset += sizeof(u32) + sizeof(u32);
1460	return UDS_SUCCESS;
1461}
1462
1463static int __must_check reconstruct_index_save(struct index_save_layout *isl,
1464					       struct region_table *table)
1465{
1466	int result;
1467	unsigned int z;
1468	struct layout_region *last_region;
1469	u64 next_block = isl->index_save.start_block;
1470	u64 last_block = next_block + isl->index_save.block_count;
1471
1472	isl->zone_count = table->header.region_count - 3;
1473
1474	last_region = &table->regions[table->header.region_count - 1];
1475	if (last_region->kind == RL_KIND_EMPTY) {
1476		isl->free_space = *last_region;
1477		isl->zone_count--;
1478	} else {
1479		isl->free_space = (struct layout_region) {
1480			.start_block = last_block,
1481			.block_count = 0,
1482			.kind = RL_KIND_EMPTY,
1483			.instance = RL_SOLE_INSTANCE,
1484		};
1485	}
1486
1487	isl->header = table->regions[0];
1488	result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
1489			       RL_SOLE_INSTANCE);
1490	if (result != UDS_SUCCESS)
1491		return result;
1492
1493	isl->index_page_map = table->regions[1];
1494	result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
1495			       RL_SOLE_INSTANCE);
1496	if (result != UDS_SUCCESS)
1497		return result;
1498
1499	next_block += isl->index_page_map.block_count;
1500
1501	for (z = 0; z < isl->zone_count; z++) {
1502		isl->volume_index_zones[z] = table->regions[z + 2];
1503		result = verify_region(&isl->volume_index_zones[z], next_block,
1504				       RL_KIND_VOLUME_INDEX, z);
1505		if (result != UDS_SUCCESS)
1506			return result;
1507
1508		next_block += isl->volume_index_zones[z].block_count;
1509	}
1510
1511	isl->open_chapter = table->regions[isl->zone_count + 2];
1512	result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
1513			       RL_SOLE_INSTANCE);
1514	if (result != UDS_SUCCESS)
1515		return result;
1516
1517	next_block += isl->open_chapter.block_count;
1518
1519	result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
1520			       RL_SOLE_INSTANCE);
1521	if (result != UDS_SUCCESS)
1522		return result;
1523
1524	next_block += isl->free_space.block_count;
1525	if (next_block != last_block) {
1526		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1527					      "index save layout table incomplete");
1528	}
1529
1530	return UDS_SUCCESS;
1531}
1532
1533static int __must_check load_index_save(struct index_save_layout *isl,
1534					struct buffered_reader *reader,
1535					unsigned int instance)
1536{
1537	int result;
1538	struct region_table *table = NULL;
1539
1540	result = load_region_table(reader, &table);
1541	if (result != UDS_SUCCESS) {
1542		return vdo_log_error_strerror(result, "cannot read index save %u header",
1543					      instance);
1544	}
1545
1546	if (table->header.region_blocks != isl->index_save.block_count) {
1547		u64 region_blocks = table->header.region_blocks;
1548
1549		vdo_free(table);
1550		return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1551					      "unexpected index save %u region block count %llu",
1552					      instance,
1553					      (unsigned long long) region_blocks);
1554	}
1555
1556	if (table->header.type == RH_TYPE_UNSAVED) {
1557		vdo_free(table);
1558		reset_index_save_layout(isl, 0);
1559		return UDS_SUCCESS;
1560	}
1561
1562
1563	if (table->header.type != RH_TYPE_SAVE) {
1564		vdo_log_error_strerror(UDS_CORRUPT_DATA,
1565				       "unexpected index save %u header type %u",
1566				       instance, table->header.type);
1567		vdo_free(table);
1568		return UDS_CORRUPT_DATA;
1569	}
1570
1571	result = read_index_save_data(reader, isl, table->header.payload);
1572	if (result != UDS_SUCCESS) {
1573		vdo_free(table);
1574		return vdo_log_error_strerror(result,
1575					      "unknown index save %u data format",
1576					      instance);
1577	}
1578
1579	result = reconstruct_index_save(isl, table);
1580	vdo_free(table);
1581	if (result != UDS_SUCCESS) {
1582		return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
1583					      instance);
1584	}
1585
1586	return UDS_SUCCESS;
1587}
1588
1589static int __must_check load_sub_index_regions(struct index_layout *layout)
1590{
1591	int result;
1592	unsigned int j;
1593	struct index_save_layout *isl;
1594	struct buffered_reader *reader;
1595
1596	for (j = 0; j < layout->super.max_saves; j++) {
1597		isl = &layout->index.saves[j];
1598		result = open_region_reader(layout, &isl->index_save, &reader);
1599
1600		if (result != UDS_SUCCESS) {
1601			vdo_log_error_strerror(result,
1602					       "cannot get reader for index 0 save %u",
1603					       j);
1604			return result;
1605		}
1606
1607		result = load_index_save(isl, reader, j);
1608		uds_free_buffered_reader(reader);
1609		if (result != UDS_SUCCESS) {
1610			/* Another save slot might be valid. */
1611			reset_index_save_layout(isl, 0);
1612			continue;
1613		}
1614	}
1615
1616	return UDS_SUCCESS;
1617}
1618
1619static int __must_check verify_uds_index_config(struct index_layout *layout,
1620						struct uds_configuration *config)
1621{
1622	int result;
1623	struct buffered_reader *reader = NULL;
1624	u64 offset;
1625
1626	offset = layout->super.volume_offset - layout->super.start_offset;
1627	result = open_layout_reader(layout, &layout->config, offset, &reader);
1628	if (result != UDS_SUCCESS)
1629		return vdo_log_error_strerror(result, "failed to open config reader");
1630
1631	result = uds_validate_config_contents(reader, config);
1632	if (result != UDS_SUCCESS) {
1633		uds_free_buffered_reader(reader);
1634		return vdo_log_error_strerror(result, "failed to read config region");
1635	}
1636
1637	uds_free_buffered_reader(reader);
1638	return UDS_SUCCESS;
1639}
1640
1641static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
1642{
1643	int result;
1644	struct buffered_reader *reader;
1645
1646	result = uds_make_buffered_reader(layout->factory,
1647					  layout->offset / UDS_BLOCK_SIZE, 1, &reader);
1648	if (result != UDS_SUCCESS)
1649		return vdo_log_error_strerror(result, "unable to read superblock");
1650
1651	result = load_super_block(layout, UDS_BLOCK_SIZE,
1652				  layout->offset / UDS_BLOCK_SIZE, reader);
1653	uds_free_buffered_reader(reader);
1654	if (result != UDS_SUCCESS)
1655		return result;
1656
1657	result = verify_uds_index_config(layout, config);
1658	if (result != UDS_SUCCESS)
1659		return result;
1660
1661	return load_sub_index_regions(layout);
1662}
1663
1664static int create_layout_factory(struct index_layout *layout,
1665				 const struct uds_configuration *config)
1666{
1667	int result;
1668	size_t writable_size;
1669	struct io_factory *factory = NULL;
1670
1671	result = uds_make_io_factory(config->bdev, &factory);
1672	if (result != UDS_SUCCESS)
1673		return result;
1674
1675	writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
1676	if (writable_size < config->size + config->offset) {
1677		uds_put_io_factory(factory);
1678		vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
1679			      writable_size, config->size + config->offset);
1680		return -ENOSPC;
1681	}
1682
1683	layout->factory = factory;
1684	layout->factory_size = (config->size > 0) ? config->size : writable_size;
1685	layout->offset = config->offset;
1686	return UDS_SUCCESS;
1687}
1688
1689int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
1690			  struct index_layout **layout_ptr)
1691{
1692	int result;
1693	struct index_layout *layout = NULL;
1694	struct save_layout_sizes sizes;
1695
1696	result = compute_sizes(config, &sizes);
1697	if (result != UDS_SUCCESS)
1698		return result;
1699
1700	result = vdo_allocate(1, struct index_layout, __func__, &layout);
1701	if (result != VDO_SUCCESS)
1702		return result;
1703
1704	result = create_layout_factory(layout, config);
1705	if (result != UDS_SUCCESS) {
1706		uds_free_index_layout(layout);
1707		return result;
1708	}
1709
1710	if (layout->factory_size < sizes.total_size) {
1711		vdo_log_error("index storage (%zu) is smaller than the required size %llu",
1712			      layout->factory_size,
1713			      (unsigned long long) sizes.total_size);
1714		uds_free_index_layout(layout);
1715		return -ENOSPC;
1716	}
1717
1718	if (new_layout)
1719		result = create_index_layout(layout, config);
1720	else
1721		result = load_index_layout(layout, config);
1722	if (result != UDS_SUCCESS) {
1723		uds_free_index_layout(layout);
1724		return result;
1725	}
1726
1727	*layout_ptr = layout;
1728	return UDS_SUCCESS;
1729}
1730
1731void uds_free_index_layout(struct index_layout *layout)
1732{
1733	if (layout == NULL)
1734		return;
1735
1736	vdo_free(layout->index.saves);
1737	if (layout->factory != NULL)
1738		uds_put_io_factory(layout->factory);
1739
1740	vdo_free(layout);
1741}
1742
1743int uds_replace_index_layout_storage(struct index_layout *layout,
1744				     struct block_device *bdev)
1745{
1746	return uds_replace_storage(layout->factory, bdev);
1747}
1748
1749/* Obtain a dm_bufio_client for the volume region. */
1750int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
1751			  unsigned int reserved_buffers,
1752			  struct dm_bufio_client **client_ptr)
1753{
1754	off_t offset = (layout->index.volume.start_block +
1755			layout->super.volume_offset -
1756			layout->super.start_offset);
1757
1758	return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
1759			      client_ptr);
1760}
1761
1762u64 uds_get_volume_nonce(struct index_layout *layout)
1763{
1764	return layout->index.nonce;
1765}
1766