1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright 2023 Red Hat
4 */
5#include "delta-index.h"
6
7#include <linux/bitops.h>
8#include <linux/bits.h>
9#include <linux/compiler.h>
10#include <linux/limits.h>
11#include <linux/log2.h>
12
13#include "cpu.h"
14#include "errors.h"
15#include "logger.h"
16#include "memory-alloc.h"
17#include "numeric.h"
18#include "permassert.h"
19#include "string-utils.h"
20#include "time-utils.h"
21
22#include "config.h"
23#include "indexer.h"
24
25/*
26 * The entries in a delta index could be stored in a single delta list, but to reduce search times
27 * and update costs it uses multiple delta lists. These lists are stored in a single chunk of
28 * memory managed by the delta_zone structure. The delta_zone can move the data around within its
29 * memory, so the location of each delta list is recorded as a bit offset into the memory. Because
30 * the volume index can contain over a million delta lists, we want to be efficient with the size
31 * of the delta list header information. This information is encoded into 16 bytes per list. The
32 * volume index delta list memory can easily exceed 4 gigabits, so a 64 bit value is needed to
33 * address the memory. The volume index delta lists average around 6 kilobits, so 16 bits are
34 * sufficient to store the size of a delta list.
35 *
36 * Each delta list is stored as a bit stream. Within the delta list encoding, bits and bytes are
37 * numbered in little endian order. Within a byte, bit 0 is the least significant bit (0x1), and
38 * bit 7 is the most significant bit (0x80). Within a bit stream, bit 7 is the most significant bit
39 * of byte 0, and bit 8 is the least significant bit of byte 1. Within a byte array, a byte's
40 * number corresponds to its index in the array.
41 *
42 * A standard delta list entry is stored as a fixed length payload (the value) followed by a
43 * variable length key (the delta). A collision entry is used when two block names have the same
44 * delta list address. A collision entry always follows a standard entry for the hash with which it
45 * collides, and is encoded with DELTA == 0 with an additional 256 bits field at the end,
46 * containing the full block name. An entry with a delta of 0 at the beginning of a delta list
47 * indicates a normal entry.
48 *
49 * The delta in each entry is encoded with a variable-length Huffman code to minimize the memory
50 * used by small deltas. The Huffman code is specified by three parameters, which can be computed
51 * from the desired mean delta when the index is full. (See compute_coding_constants() for
52 * details.)
53 *
54 * The bit field utilities used to read and write delta entries assume that it is possible to read
55 * some bytes beyond the end of the bit field, so a delta_zone memory allocation is guarded by two
56 * invalid delta lists to prevent reading outside the delta_zone memory. The valid delta lists are
57 * numbered 1 to N, and the guard lists are numbered 0 and N+1. The function to decode the bit
58 * stream include a step that skips over bits set to 0 until the first 1 bit is found. A corrupted
59 * delta list could cause this step to run off the end of the delta_zone memory, so as extra
60 * protection against this happening, the tail guard list is set to all ones.
61 *
62 * The delta_index supports two different forms. The mutable form is created by
63 * uds_initialize_delta_index(), and is used for the volume index and for open chapter indexes. The
64 * immutable form is created by uds_initialize_delta_index_page(), and is used for closed (and
65 * cached) chapter index pages. The immutable form does not allocate delta list headers or
66 * temporary offsets, and thus is somewhat more memory efficient.
67 */
68
69/*
70 * This is the largest field size supported by get_field() and set_field(). Any field that is
71 * larger is not guaranteed to fit in a single byte-aligned u32.
72 */
73#define MAX_FIELD_BITS ((sizeof(u32) - 1) * BITS_PER_BYTE + 1)
74
75/*
76 * This is the largest field size supported by get_big_field() and set_big_field(). Any field that
77 * is larger is not guaranteed to fit in a single byte-aligned u64.
78 */
79#define MAX_BIG_FIELD_BITS ((sizeof(u64) - 1) * BITS_PER_BYTE + 1)
80
81/*
82 * This is the number of guard bytes needed at the end of the memory byte array when using the bit
83 * utilities. These utilities call get_big_field() and set_big_field(), which can access up to 7
84 * bytes beyond the end of the desired field. The definition is written to make it clear how this
85 * value is derived.
86 */
87#define POST_FIELD_GUARD_BYTES (sizeof(u64) - 1)
88
89/* The number of guard bits that are needed in the tail guard list */
90#define GUARD_BITS (POST_FIELD_GUARD_BYTES * BITS_PER_BYTE)
91
92/*
93 * The maximum size of a single delta list in bytes. We count guard bytes in this value because a
94 * buffer of this size can be used with move_bits().
95 */
96#define DELTA_LIST_MAX_BYTE_COUNT					\
97	((U16_MAX + BITS_PER_BYTE) / BITS_PER_BYTE + POST_FIELD_GUARD_BYTES)
98
99/* The number of extra bytes and bits needed to store a collision entry */
100#define COLLISION_BYTES UDS_RECORD_NAME_SIZE
101#define COLLISION_BITS (COLLISION_BYTES * BITS_PER_BYTE)
102
103/*
104 * Immutable delta lists are packed into pages containing a header that encodes the delta list
105 * information into 19 bits per list (64KB bit offset).
106 */
107#define IMMUTABLE_HEADER_SIZE 19
108
109/*
110 * Constants and structures for the saved delta index. "DI" is for delta_index, and -##### is a
111 * number to increment when the format of the data changes.
112 */
113#define MAGIC_SIZE 8
114
115static const char DELTA_INDEX_MAGIC[] = "DI-00002";
116
117struct delta_index_header {
118	char magic[MAGIC_SIZE];
119	u32 zone_number;
120	u32 zone_count;
121	u32 first_list;
122	u32 list_count;
123	u64 record_count;
124	u64 collision_count;
125};
126
127/*
128 * Header data used for immutable delta index pages. This data is followed by the delta list offset
129 * table.
130 */
131struct delta_page_header {
132	/* Externally-defined nonce */
133	u64 nonce;
134	/* The virtual chapter number */
135	u64 virtual_chapter_number;
136	/* Index of the first delta list on the page */
137	u16 first_list;
138	/* Number of delta lists on the page */
139	u16 list_count;
140} __packed;
141
142static inline u64 get_delta_list_byte_start(const struct delta_list *delta_list)
143{
144	return delta_list->start / BITS_PER_BYTE;
145}
146
147static inline u16 get_delta_list_byte_size(const struct delta_list *delta_list)
148{
149	unsigned int bit_offset = delta_list->start % BITS_PER_BYTE;
150
151	return BITS_TO_BYTES(bit_offset + delta_list->size);
152}
153
154static void rebalance_delta_zone(const struct delta_zone *delta_zone, u32 first,
155				 u32 last)
156{
157	struct delta_list *delta_list;
158	u64 new_start;
159
160	if (first == last) {
161		/* Only one list is moving, and we know there is space. */
162		delta_list = &delta_zone->delta_lists[first];
163		new_start = delta_zone->new_offsets[first];
164		if (delta_list->start != new_start) {
165			u64 source;
166			u64 destination;
167
168			source = get_delta_list_byte_start(delta_list);
169			delta_list->start = new_start;
170			destination = get_delta_list_byte_start(delta_list);
171			memmove(delta_zone->memory + destination,
172				delta_zone->memory + source,
173				get_delta_list_byte_size(delta_list));
174		}
175	} else {
176		/*
177		 * There is more than one list. Divide the problem in half, and use recursive calls
178		 * to process each half. Note that after this computation, first <= middle, and
179		 * middle < last.
180		 */
181		u32 middle = (first + last) / 2;
182
183		delta_list = &delta_zone->delta_lists[middle];
184		new_start = delta_zone->new_offsets[middle];
185
186		/*
187		 * The direction that our middle list is moving determines which half of the
188		 * problem must be processed first.
189		 */
190		if (new_start > delta_list->start) {
191			rebalance_delta_zone(delta_zone, middle + 1, last);
192			rebalance_delta_zone(delta_zone, first, middle);
193		} else {
194			rebalance_delta_zone(delta_zone, first, middle);
195			rebalance_delta_zone(delta_zone, middle + 1, last);
196		}
197	}
198}
199
200static inline size_t get_zone_memory_size(unsigned int zone_count, size_t memory_size)
201{
202	/* Round up so that each zone is a multiple of 64K in size. */
203	size_t ALLOC_BOUNDARY = 64 * 1024;
204
205	return (memory_size / zone_count + ALLOC_BOUNDARY - 1) & -ALLOC_BOUNDARY;
206}
207
208void uds_reset_delta_index(const struct delta_index *delta_index)
209{
210	unsigned int z;
211
212	/*
213	 * Initialize all delta lists to be empty. We keep 2 extra delta list descriptors, one
214	 * before the first real entry and one after so that we don't need to bounds check the
215	 * array access when calculating preceding and following gap sizes.
216	 */
217	for (z = 0; z < delta_index->zone_count; z++) {
218		u64 list_bits;
219		u64 spacing;
220		u64 offset;
221		unsigned int i;
222		struct delta_zone *zone = &delta_index->delta_zones[z];
223		struct delta_list *delta_lists = zone->delta_lists;
224
225		/* Zeroing the delta list headers initializes the head guard list correctly. */
226		memset(delta_lists, 0,
227		       (zone->list_count + 2) * sizeof(struct delta_list));
228
229		/* Set all the bits in the end guard list. */
230		list_bits = (u64) zone->size * BITS_PER_BYTE - GUARD_BITS;
231		delta_lists[zone->list_count + 1].start = list_bits;
232		delta_lists[zone->list_count + 1].size = GUARD_BITS;
233		memset(zone->memory + (list_bits / BITS_PER_BYTE), ~0,
234		       POST_FIELD_GUARD_BYTES);
235
236		/* Evenly space out the real delta lists by setting regular offsets. */
237		spacing = list_bits / zone->list_count;
238		offset = spacing / 2;
239		for (i = 1; i <= zone->list_count; i++) {
240			delta_lists[i].start = offset;
241			offset += spacing;
242		}
243
244		/* Update the statistics. */
245		zone->discard_count += zone->record_count;
246		zone->record_count = 0;
247		zone->collision_count = 0;
248	}
249}
250
251/* Compute the Huffman coding parameters for the given mean delta. The Huffman code is specified by
252 * three parameters:
253 *
254 *  MINBITS   The number of bits in the smallest code
255 *  BASE      The number of values coded using a code of length MINBITS
256 *  INCR      The number of values coded by using one additional bit
257 *
258 * These parameters are related by this equation:
259 *
260 *	BASE + INCR == 1 << MINBITS
261 *
262 * The math for the Huffman code of an exponential distribution says that
263 *
264 *	INCR = log(2) * MEAN_DELTA
265 *
266 * Then use the smallest MINBITS value so that
267 *
268 *	(1 << MINBITS) > INCR
269 *
270 * And then
271 *
272 *	BASE = (1 << MINBITS) - INCR
273 *
274 * Now the index can generate a code such that
275 * - The first BASE values code using MINBITS bits.
276 * - The next INCR values code using MINBITS+1 bits.
277 * - The next INCR values code using MINBITS+2 bits.
278 * - (and so on).
279 */
280static void compute_coding_constants(u32 mean_delta, u16 *min_bits, u32 *min_keys, u32 *incr_keys)
281{
282	/*
283	 * We want to compute the rounded value of log(2) * mean_delta. Since we cannot always use
284	 * floating point, use a really good integer approximation.
285	 */
286	*incr_keys = (836158UL * mean_delta + 603160UL) / 1206321UL;
287	*min_bits = bits_per(*incr_keys + 1);
288	*min_keys = (1 << *min_bits) - *incr_keys;
289}
290
291void uds_uninitialize_delta_index(struct delta_index *delta_index)
292{
293	unsigned int z;
294
295	if (delta_index->delta_zones == NULL)
296		return;
297
298	for (z = 0; z < delta_index->zone_count; z++) {
299		vdo_free(vdo_forget(delta_index->delta_zones[z].new_offsets));
300		vdo_free(vdo_forget(delta_index->delta_zones[z].delta_lists));
301		vdo_free(vdo_forget(delta_index->delta_zones[z].memory));
302	}
303
304	vdo_free(delta_index->delta_zones);
305	memset(delta_index, 0, sizeof(struct delta_index));
306}
307
308static int initialize_delta_zone(struct delta_zone *delta_zone, size_t size,
309				 u32 first_list, u32 list_count, u32 mean_delta,
310				 u32 payload_bits, u8 tag)
311{
312	int result;
313
314	result = vdo_allocate(size, u8, "delta list", &delta_zone->memory);
315	if (result != VDO_SUCCESS)
316		return result;
317
318	result = vdo_allocate(list_count + 2, u64, "delta list temp",
319			      &delta_zone->new_offsets);
320	if (result != VDO_SUCCESS)
321		return result;
322
323	/* Allocate the delta lists. */
324	result = vdo_allocate(list_count + 2, struct delta_list, "delta lists",
325			      &delta_zone->delta_lists);
326	if (result != VDO_SUCCESS)
327		return result;
328
329	compute_coding_constants(mean_delta, &delta_zone->min_bits,
330				 &delta_zone->min_keys, &delta_zone->incr_keys);
331	delta_zone->value_bits = payload_bits;
332	delta_zone->buffered_writer = NULL;
333	delta_zone->size = size;
334	delta_zone->rebalance_time = 0;
335	delta_zone->rebalance_count = 0;
336	delta_zone->record_count = 0;
337	delta_zone->collision_count = 0;
338	delta_zone->discard_count = 0;
339	delta_zone->overflow_count = 0;
340	delta_zone->first_list = first_list;
341	delta_zone->list_count = list_count;
342	delta_zone->tag = tag;
343
344	return UDS_SUCCESS;
345}
346
347int uds_initialize_delta_index(struct delta_index *delta_index, unsigned int zone_count,
348			       u32 list_count, u32 mean_delta, u32 payload_bits,
349			       size_t memory_size, u8 tag)
350{
351	int result;
352	unsigned int z;
353	size_t zone_memory;
354
355	result = vdo_allocate(zone_count, struct delta_zone, "Delta Index Zones",
356			      &delta_index->delta_zones);
357	if (result != VDO_SUCCESS)
358		return result;
359
360	delta_index->zone_count = zone_count;
361	delta_index->list_count = list_count;
362	delta_index->lists_per_zone = DIV_ROUND_UP(list_count, zone_count);
363	delta_index->memory_size = 0;
364	delta_index->mutable = true;
365	delta_index->tag = tag;
366
367	for (z = 0; z < zone_count; z++) {
368		u32 lists_in_zone = delta_index->lists_per_zone;
369		u32 first_list_in_zone = z * lists_in_zone;
370
371		if (z == zone_count - 1) {
372			/*
373			 * The last zone gets fewer lists if zone_count doesn't evenly divide
374			 * list_count. We'll have an underflow if the assertion below doesn't hold.
375			 */
376			if (delta_index->list_count <= first_list_in_zone) {
377				uds_uninitialize_delta_index(delta_index);
378				return vdo_log_error_strerror(UDS_INVALID_ARGUMENT,
379							      "%u delta lists not enough for %u zones",
380							      list_count, zone_count);
381			}
382			lists_in_zone = delta_index->list_count - first_list_in_zone;
383		}
384
385		zone_memory = get_zone_memory_size(zone_count, memory_size);
386		result = initialize_delta_zone(&delta_index->delta_zones[z], zone_memory,
387					       first_list_in_zone, lists_in_zone,
388					       mean_delta, payload_bits, tag);
389		if (result != UDS_SUCCESS) {
390			uds_uninitialize_delta_index(delta_index);
391			return result;
392		}
393
394		delta_index->memory_size +=
395			(sizeof(struct delta_zone) + zone_memory +
396			 (lists_in_zone + 2) * (sizeof(struct delta_list) + sizeof(u64)));
397	}
398
399	uds_reset_delta_index(delta_index);
400	return UDS_SUCCESS;
401}
402
403/* Read a bit field from an arbitrary bit boundary. */
404static inline u32 get_field(const u8 *memory, u64 offset, u8 size)
405{
406	const void *addr = memory + offset / BITS_PER_BYTE;
407
408	return (get_unaligned_le32(addr) >> (offset % BITS_PER_BYTE)) & ((1 << size) - 1);
409}
410
411/* Write a bit field to an arbitrary bit boundary. */
412static inline void set_field(u32 value, u8 *memory, u64 offset, u8 size)
413{
414	void *addr = memory + offset / BITS_PER_BYTE;
415	int shift = offset % BITS_PER_BYTE;
416	u32 data = get_unaligned_le32(addr);
417
418	data &= ~(((1 << size) - 1) << shift);
419	data |= value << shift;
420	put_unaligned_le32(data, addr);
421}
422
423/* Get the bit offset to the immutable delta list header. */
424static inline u32 get_immutable_header_offset(u32 list_number)
425{
426	return sizeof(struct delta_page_header) * BITS_PER_BYTE +
427		list_number * IMMUTABLE_HEADER_SIZE;
428}
429
430/* Get the bit offset to the start of the immutable delta list bit stream. */
431static inline u32 get_immutable_start(const u8 *memory, u32 list_number)
432{
433	return get_field(memory, get_immutable_header_offset(list_number),
434			 IMMUTABLE_HEADER_SIZE);
435}
436
437/* Set the bit offset to the start of the immutable delta list bit stream. */
438static inline void set_immutable_start(u8 *memory, u32 list_number, u32 start)
439{
440	set_field(start, memory, get_immutable_header_offset(list_number),
441		  IMMUTABLE_HEADER_SIZE);
442}
443
444static bool verify_delta_index_page(u64 nonce, u16 list_count, u64 expected_nonce,
445				    u8 *memory, size_t memory_size)
446{
447	unsigned int i;
448
449	/*
450	 * Verify the nonce. A mismatch can happen here during rebuild if we haven't written the
451	 * entire volume at least once.
452	 */
453	if (nonce != expected_nonce)
454		return false;
455
456	/* Verify that the number of delta lists can fit in the page. */
457	if (list_count > ((memory_size - sizeof(struct delta_page_header)) *
458			  BITS_PER_BYTE / IMMUTABLE_HEADER_SIZE))
459		return false;
460
461	/*
462	 * Verify that the first delta list is immediately after the last delta
463	 * list header.
464	 */
465	if (get_immutable_start(memory, 0) != get_immutable_header_offset(list_count + 1))
466		return false;
467
468	/* Verify that the lists are in the correct order. */
469	for (i = 0; i < list_count; i++) {
470		if (get_immutable_start(memory, i) > get_immutable_start(memory, i + 1))
471			return false;
472	}
473
474	/*
475	 * Verify that the last list ends on the page, and that there is room
476	 * for the post-field guard bits.
477	 */
478	if (get_immutable_start(memory, list_count) >
479	    (memory_size - POST_FIELD_GUARD_BYTES) * BITS_PER_BYTE)
480		return false;
481
482	/* Verify that the guard bytes are correctly set to all ones. */
483	for (i = 0; i < POST_FIELD_GUARD_BYTES; i++) {
484		if (memory[memory_size - POST_FIELD_GUARD_BYTES + i] != (u8) ~0)
485			return false;
486	}
487
488	/* All verifications passed. */
489	return true;
490}
491
492/* Initialize a delta index page to refer to a supplied page. */
493int uds_initialize_delta_index_page(struct delta_index_page *delta_index_page,
494				    u64 expected_nonce, u32 mean_delta, u32 payload_bits,
495				    u8 *memory, size_t memory_size)
496{
497	u64 nonce;
498	u64 vcn;
499	u64 first_list;
500	u64 list_count;
501	struct delta_page_header *header = (struct delta_page_header *) memory;
502	struct delta_zone *delta_zone = &delta_index_page->delta_zone;
503	const u8 *nonce_addr = (const u8 *) &header->nonce;
504	const u8 *vcn_addr = (const u8 *) &header->virtual_chapter_number;
505	const u8 *first_list_addr = (const u8 *) &header->first_list;
506	const u8 *list_count_addr = (const u8 *) &header->list_count;
507
508	/* First assume that the header is little endian. */
509	nonce = get_unaligned_le64(nonce_addr);
510	vcn = get_unaligned_le64(vcn_addr);
511	first_list = get_unaligned_le16(first_list_addr);
512	list_count = get_unaligned_le16(list_count_addr);
513	if (!verify_delta_index_page(nonce, list_count, expected_nonce, memory,
514				     memory_size)) {
515		/* If that fails, try big endian. */
516		nonce = get_unaligned_be64(nonce_addr);
517		vcn = get_unaligned_be64(vcn_addr);
518		first_list = get_unaligned_be16(first_list_addr);
519		list_count = get_unaligned_be16(list_count_addr);
520		if (!verify_delta_index_page(nonce, list_count, expected_nonce, memory,
521					     memory_size)) {
522			/*
523			 * Both attempts failed. Do not log this as an error, because it can happen
524			 * during a rebuild if we haven't written the entire volume at least once.
525			 */
526			return UDS_CORRUPT_DATA;
527		}
528	}
529
530	delta_index_page->delta_index.delta_zones = delta_zone;
531	delta_index_page->delta_index.zone_count = 1;
532	delta_index_page->delta_index.list_count = list_count;
533	delta_index_page->delta_index.lists_per_zone = list_count;
534	delta_index_page->delta_index.mutable = false;
535	delta_index_page->delta_index.tag = 'p';
536	delta_index_page->virtual_chapter_number = vcn;
537	delta_index_page->lowest_list_number = first_list;
538	delta_index_page->highest_list_number = first_list + list_count - 1;
539
540	compute_coding_constants(mean_delta, &delta_zone->min_bits,
541				 &delta_zone->min_keys, &delta_zone->incr_keys);
542	delta_zone->value_bits = payload_bits;
543	delta_zone->memory = memory;
544	delta_zone->delta_lists = NULL;
545	delta_zone->new_offsets = NULL;
546	delta_zone->buffered_writer = NULL;
547	delta_zone->size = memory_size;
548	delta_zone->rebalance_time = 0;
549	delta_zone->rebalance_count = 0;
550	delta_zone->record_count = 0;
551	delta_zone->collision_count = 0;
552	delta_zone->discard_count = 0;
553	delta_zone->overflow_count = 0;
554	delta_zone->first_list = 0;
555	delta_zone->list_count = list_count;
556	delta_zone->tag = 'p';
557
558	return UDS_SUCCESS;
559}
560
561/* Read a large bit field from an arbitrary bit boundary. */
562static inline u64 get_big_field(const u8 *memory, u64 offset, u8 size)
563{
564	const void *addr = memory + offset / BITS_PER_BYTE;
565
566	return (get_unaligned_le64(addr) >> (offset % BITS_PER_BYTE)) & ((1UL << size) - 1);
567}
568
569/* Write a large bit field to an arbitrary bit boundary. */
570static inline void set_big_field(u64 value, u8 *memory, u64 offset, u8 size)
571{
572	void *addr = memory + offset / BITS_PER_BYTE;
573	u8 shift = offset % BITS_PER_BYTE;
574	u64 data = get_unaligned_le64(addr);
575
576	data &= ~(((1UL << size) - 1) << shift);
577	data |= value << shift;
578	put_unaligned_le64(data, addr);
579}
580
581/* Set a sequence of bits to all zeros. */
582static inline void set_zero(u8 *memory, u64 offset, u32 size)
583{
584	if (size > 0) {
585		u8 *addr = memory + offset / BITS_PER_BYTE;
586		u8 shift = offset % BITS_PER_BYTE;
587		u32 count = size + shift > BITS_PER_BYTE ? (u32) BITS_PER_BYTE - shift : size;
588
589		*addr++ &= ~(((1 << count) - 1) << shift);
590		for (size -= count; size > BITS_PER_BYTE; size -= BITS_PER_BYTE)
591			*addr++ = 0;
592
593		if (size > 0)
594			*addr &= 0xFF << size;
595	}
596}
597
598/*
599 * Move several bits from a higher to a lower address, moving the lower addressed bits first. The
600 * size and memory offsets are measured in bits.
601 */
602static void move_bits_down(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size)
603{
604	const u8 *source;
605	u8 *destination;
606	u8 offset;
607	u8 count;
608	u64 field;
609
610	/* Start by moving one field that ends on a to int boundary. */
611	count = (MAX_BIG_FIELD_BITS - ((to_offset + MAX_BIG_FIELD_BITS) % BITS_PER_TYPE(u32)));
612	field = get_big_field(from, from_offset, count);
613	set_big_field(field, to, to_offset, count);
614	from_offset += count;
615	to_offset += count;
616	size -= count;
617
618	/* Now do the main loop to copy 32 bit chunks that are int-aligned at the destination. */
619	offset = from_offset % BITS_PER_TYPE(u32);
620	source = from + (from_offset - offset) / BITS_PER_BYTE;
621	destination = to + to_offset / BITS_PER_BYTE;
622	while (size > MAX_BIG_FIELD_BITS) {
623		put_unaligned_le32(get_unaligned_le64(source) >> offset, destination);
624		source += sizeof(u32);
625		destination += sizeof(u32);
626		from_offset += BITS_PER_TYPE(u32);
627		to_offset += BITS_PER_TYPE(u32);
628		size -= BITS_PER_TYPE(u32);
629	}
630
631	/* Finish up by moving any remaining bits. */
632	if (size > 0) {
633		field = get_big_field(from, from_offset, size);
634		set_big_field(field, to, to_offset, size);
635	}
636}
637
638/*
639 * Move several bits from a lower to a higher address, moving the higher addressed bits first. The
640 * size and memory offsets are measured in bits.
641 */
642static void move_bits_up(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size)
643{
644	const u8 *source;
645	u8 *destination;
646	u8 offset;
647	u8 count;
648	u64 field;
649
650	/* Start by moving one field that begins on a destination int boundary. */
651	count = (to_offset + size) % BITS_PER_TYPE(u32);
652	if (count > 0) {
653		size -= count;
654		field = get_big_field(from, from_offset + size, count);
655		set_big_field(field, to, to_offset + size, count);
656	}
657
658	/* Now do the main loop to copy 32 bit chunks that are int-aligned at the destination. */
659	offset = (from_offset + size) % BITS_PER_TYPE(u32);
660	source = from + (from_offset + size - offset) / BITS_PER_BYTE;
661	destination = to + (to_offset + size) / BITS_PER_BYTE;
662	while (size > MAX_BIG_FIELD_BITS) {
663		source -= sizeof(u32);
664		destination -= sizeof(u32);
665		size -= BITS_PER_TYPE(u32);
666		put_unaligned_le32(get_unaligned_le64(source) >> offset, destination);
667	}
668
669	/* Finish up by moving any remaining bits. */
670	if (size > 0) {
671		field = get_big_field(from, from_offset, size);
672		set_big_field(field, to, to_offset, size);
673	}
674}
675
676/*
677 * Move bits from one field to another. When the fields overlap, behave as if we first move all the
678 * bits from the source to a temporary value, and then move all the bits from the temporary value
679 * to the destination. The size and memory offsets are measured in bits.
680 */
681static void move_bits(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size)
682{
683	u64 field;
684
685	/* A small move doesn't require special handling. */
686	if (size <= MAX_BIG_FIELD_BITS) {
687		if (size > 0) {
688			field = get_big_field(from, from_offset, size);
689			set_big_field(field, to, to_offset, size);
690		}
691
692		return;
693	}
694
695	if (from_offset > to_offset)
696		move_bits_down(from, from_offset, to, to_offset, size);
697	else
698		move_bits_up(from, from_offset, to, to_offset, size);
699}
700
701/*
702 * Pack delta lists from a mutable delta index into an immutable delta index page. A range of delta
703 * lists (starting with a specified list index) is copied from the mutable delta index into a
704 * memory page used in the immutable index. The number of lists copied onto the page is returned in
705 * list_count.
706 */
707int uds_pack_delta_index_page(const struct delta_index *delta_index, u64 header_nonce,
708			      u8 *memory, size_t memory_size, u64 virtual_chapter_number,
709			      u32 first_list, u32 *list_count)
710{
711	const struct delta_zone *delta_zone;
712	struct delta_list *delta_lists;
713	u32 max_lists;
714	u32 n_lists = 0;
715	u32 offset;
716	u32 i;
717	int free_bits;
718	int bits;
719	struct delta_page_header *header;
720
721	delta_zone = &delta_index->delta_zones[0];
722	delta_lists = &delta_zone->delta_lists[first_list + 1];
723	max_lists = delta_index->list_count - first_list;
724
725	/*
726	 * Compute how many lists will fit on the page. Subtract the size of the fixed header, one
727	 * delta list offset, and the guard bytes from the page size to determine how much space is
728	 * available for delta lists.
729	 */
730	free_bits = memory_size * BITS_PER_BYTE;
731	free_bits -= get_immutable_header_offset(1);
732	free_bits -= GUARD_BITS;
733	if (free_bits < IMMUTABLE_HEADER_SIZE) {
734		/* This page is too small to store any delta lists. */
735		return vdo_log_error_strerror(UDS_OVERFLOW,
736					      "Chapter Index Page of %zu bytes is too small",
737					      memory_size);
738	}
739
740	while (n_lists < max_lists) {
741		/* Each list requires a delta list offset and the list data. */
742		bits = IMMUTABLE_HEADER_SIZE + delta_lists[n_lists].size;
743		if (bits > free_bits)
744			break;
745
746		n_lists++;
747		free_bits -= bits;
748	}
749
750	*list_count = n_lists;
751
752	header = (struct delta_page_header *) memory;
753	put_unaligned_le64(header_nonce, (u8 *) &header->nonce);
754	put_unaligned_le64(virtual_chapter_number,
755			   (u8 *) &header->virtual_chapter_number);
756	put_unaligned_le16(first_list, (u8 *) &header->first_list);
757	put_unaligned_le16(n_lists, (u8 *) &header->list_count);
758
759	/* Construct the delta list offset table. */
760	offset = get_immutable_header_offset(n_lists + 1);
761	set_immutable_start(memory, 0, offset);
762	for (i = 0; i < n_lists; i++) {
763		offset += delta_lists[i].size;
764		set_immutable_start(memory, i + 1, offset);
765	}
766
767	/* Copy the delta list data onto the memory page. */
768	for (i = 0; i < n_lists; i++) {
769		move_bits(delta_zone->memory, delta_lists[i].start, memory,
770			  get_immutable_start(memory, i), delta_lists[i].size);
771	}
772
773	/* Set all the bits in the guard bytes. */
774	memset(memory + memory_size - POST_FIELD_GUARD_BYTES, ~0,
775	       POST_FIELD_GUARD_BYTES);
776	return UDS_SUCCESS;
777}
778
779/* Compute the new offsets of the delta lists. */
780static void compute_new_list_offsets(struct delta_zone *delta_zone, u32 growing_index,
781				     size_t growing_size, size_t used_space)
782{
783	size_t spacing;
784	u32 i;
785	struct delta_list *delta_lists = delta_zone->delta_lists;
786	u32 tail_guard_index = delta_zone->list_count + 1;
787
788	spacing = (delta_zone->size - used_space) / delta_zone->list_count;
789	delta_zone->new_offsets[0] = 0;
790	for (i = 0; i <= delta_zone->list_count; i++) {
791		delta_zone->new_offsets[i + 1] =
792			(delta_zone->new_offsets[i] +
793			 get_delta_list_byte_size(&delta_lists[i]) + spacing);
794		delta_zone->new_offsets[i] *= BITS_PER_BYTE;
795		delta_zone->new_offsets[i] += delta_lists[i].start % BITS_PER_BYTE;
796		if (i == 0)
797			delta_zone->new_offsets[i + 1] -= spacing / 2;
798		if (i + 1 == growing_index)
799			delta_zone->new_offsets[i + 1] += growing_size;
800	}
801
802	delta_zone->new_offsets[tail_guard_index] =
803		(delta_zone->size * BITS_PER_BYTE - delta_lists[tail_guard_index].size);
804}
805
806static void rebalance_lists(struct delta_zone *delta_zone)
807{
808	struct delta_list *delta_lists;
809	u32 i;
810	size_t used_space = 0;
811
812	/* Extend and balance memory to receive the delta lists */
813	delta_lists = delta_zone->delta_lists;
814	for (i = 0; i <= delta_zone->list_count + 1; i++)
815		used_space += get_delta_list_byte_size(&delta_lists[i]);
816
817	compute_new_list_offsets(delta_zone, 0, 0, used_space);
818	for (i = 1; i <= delta_zone->list_count + 1; i++)
819		delta_lists[i].start = delta_zone->new_offsets[i];
820}
821
822/* Start restoring a delta index from multiple input streams. */
823int uds_start_restoring_delta_index(struct delta_index *delta_index,
824				    struct buffered_reader **buffered_readers,
825				    unsigned int reader_count)
826{
827	int result;
828	unsigned int zone_count = reader_count;
829	u64 record_count = 0;
830	u64 collision_count = 0;
831	u32 first_list[MAX_ZONES];
832	u32 list_count[MAX_ZONES];
833	unsigned int z;
834	u32 list_next = 0;
835	const struct delta_zone *delta_zone;
836
837	/* Read and validate each header. */
838	for (z = 0; z < zone_count; z++) {
839		struct delta_index_header header;
840		u8 buffer[sizeof(struct delta_index_header)];
841		size_t offset = 0;
842
843		result = uds_read_from_buffered_reader(buffered_readers[z], buffer,
844						       sizeof(buffer));
845		if (result != UDS_SUCCESS) {
846			return vdo_log_warning_strerror(result,
847							"failed to read delta index header");
848		}
849
850		memcpy(&header.magic, buffer, MAGIC_SIZE);
851		offset += MAGIC_SIZE;
852		decode_u32_le(buffer, &offset, &header.zone_number);
853		decode_u32_le(buffer, &offset, &header.zone_count);
854		decode_u32_le(buffer, &offset, &header.first_list);
855		decode_u32_le(buffer, &offset, &header.list_count);
856		decode_u64_le(buffer, &offset, &header.record_count);
857		decode_u64_le(buffer, &offset, &header.collision_count);
858
859		result = VDO_ASSERT(offset == sizeof(struct delta_index_header),
860				    "%zu bytes decoded of %zu expected", offset,
861				    sizeof(struct delta_index_header));
862		if (result != VDO_SUCCESS) {
863			return vdo_log_warning_strerror(result,
864							"failed to read delta index header");
865		}
866
867		if (memcmp(header.magic, DELTA_INDEX_MAGIC, MAGIC_SIZE) != 0) {
868			return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
869							"delta index file has bad magic number");
870		}
871
872		if (zone_count != header.zone_count) {
873			return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
874							"delta index files contain mismatched zone counts (%u,%u)",
875							zone_count, header.zone_count);
876		}
877
878		if (header.zone_number != z) {
879			return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
880							"delta index zone %u found in slot %u",
881							header.zone_number, z);
882		}
883
884		first_list[z] = header.first_list;
885		list_count[z] = header.list_count;
886		record_count += header.record_count;
887		collision_count += header.collision_count;
888
889		if (first_list[z] != list_next) {
890			return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
891							"delta index file for zone %u starts with list %u instead of list %u",
892							z, first_list[z], list_next);
893		}
894
895		list_next += list_count[z];
896	}
897
898	if (list_next != delta_index->list_count) {
899		return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
900						"delta index files contain %u delta lists instead of %u delta lists",
901						list_next, delta_index->list_count);
902	}
903
904	if (collision_count > record_count) {
905		return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
906						"delta index files contain %llu collisions and %llu records",
907						(unsigned long long) collision_count,
908						(unsigned long long) record_count);
909	}
910
911	uds_reset_delta_index(delta_index);
912	delta_index->delta_zones[0].record_count = record_count;
913	delta_index->delta_zones[0].collision_count = collision_count;
914
915	/* Read the delta lists and distribute them to the proper zones. */
916	for (z = 0; z < zone_count; z++) {
917		u32 i;
918
919		delta_index->load_lists[z] = 0;
920		for (i = 0; i < list_count[z]; i++) {
921			u16 delta_list_size;
922			u32 list_number;
923			unsigned int zone_number;
924			u8 size_data[sizeof(u16)];
925
926			result = uds_read_from_buffered_reader(buffered_readers[z],
927							       size_data,
928							       sizeof(size_data));
929			if (result != UDS_SUCCESS) {
930				return vdo_log_warning_strerror(result,
931								"failed to read delta index size");
932			}
933
934			delta_list_size = get_unaligned_le16(size_data);
935			if (delta_list_size > 0)
936				delta_index->load_lists[z] += 1;
937
938			list_number = first_list[z] + i;
939			zone_number = list_number / delta_index->lists_per_zone;
940			delta_zone = &delta_index->delta_zones[zone_number];
941			list_number -= delta_zone->first_list;
942			delta_zone->delta_lists[list_number + 1].size = delta_list_size;
943		}
944	}
945
946	/* Prepare each zone to start receiving the delta list data. */
947	for (z = 0; z < delta_index->zone_count; z++)
948		rebalance_lists(&delta_index->delta_zones[z]);
949
950	return UDS_SUCCESS;
951}
952
953static int restore_delta_list_to_zone(struct delta_zone *delta_zone,
954				      const struct delta_list_save_info *save_info,
955				      const u8 *data)
956{
957	struct delta_list *delta_list;
958	u16 bit_count;
959	u16 byte_count;
960	u32 list_number = save_info->index - delta_zone->first_list;
961
962	if (list_number >= delta_zone->list_count) {
963		return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
964						"invalid delta list number %u not in range [%u,%u)",
965						save_info->index, delta_zone->first_list,
966						delta_zone->first_list + delta_zone->list_count);
967	}
968
969	delta_list = &delta_zone->delta_lists[list_number + 1];
970	if (delta_list->size == 0) {
971		return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
972						"unexpected delta list number %u",
973						save_info->index);
974	}
975
976	bit_count = delta_list->size + save_info->bit_offset;
977	byte_count = BITS_TO_BYTES(bit_count);
978	if (save_info->byte_count != byte_count) {
979		return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
980						"unexpected delta list size %u != %u",
981						save_info->byte_count, byte_count);
982	}
983
984	move_bits(data, save_info->bit_offset, delta_zone->memory, delta_list->start,
985		  delta_list->size);
986	return UDS_SUCCESS;
987}
988
989static int restore_delta_list_data(struct delta_index *delta_index, unsigned int load_zone,
990				   struct buffered_reader *buffered_reader, u8 *data)
991{
992	int result;
993	struct delta_list_save_info save_info;
994	u8 buffer[sizeof(struct delta_list_save_info)];
995	unsigned int new_zone;
996
997	result = uds_read_from_buffered_reader(buffered_reader, buffer, sizeof(buffer));
998	if (result != UDS_SUCCESS) {
999		return vdo_log_warning_strerror(result,
1000						"failed to read delta list data");
1001	}
1002
1003	save_info = (struct delta_list_save_info) {
1004		.tag = buffer[0],
1005		.bit_offset = buffer[1],
1006		.byte_count = get_unaligned_le16(&buffer[2]),
1007		.index = get_unaligned_le32(&buffer[4]),
1008	};
1009
1010	if ((save_info.bit_offset >= BITS_PER_BYTE) ||
1011	    (save_info.byte_count > DELTA_LIST_MAX_BYTE_COUNT)) {
1012		return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
1013						"corrupt delta list data");
1014	}
1015
1016	/* Make sure the data is intended for this delta index. */
1017	if (save_info.tag != delta_index->tag)
1018		return UDS_CORRUPT_DATA;
1019
1020	if (save_info.index >= delta_index->list_count) {
1021		return vdo_log_warning_strerror(UDS_CORRUPT_DATA,
1022						"invalid delta list number %u of %u",
1023						save_info.index,
1024						delta_index->list_count);
1025	}
1026
1027	result = uds_read_from_buffered_reader(buffered_reader, data,
1028					       save_info.byte_count);
1029	if (result != UDS_SUCCESS) {
1030		return vdo_log_warning_strerror(result,
1031						"failed to read delta list data");
1032	}
1033
1034	delta_index->load_lists[load_zone] -= 1;
1035	new_zone = save_info.index / delta_index->lists_per_zone;
1036	return restore_delta_list_to_zone(&delta_index->delta_zones[new_zone],
1037					  &save_info, data);
1038}
1039
1040/* Restore delta lists from saved data. */
1041int uds_finish_restoring_delta_index(struct delta_index *delta_index,
1042				     struct buffered_reader **buffered_readers,
1043				     unsigned int reader_count)
1044{
1045	int result;
1046	int saved_result = UDS_SUCCESS;
1047	unsigned int z;
1048	u8 *data;
1049
1050	result = vdo_allocate(DELTA_LIST_MAX_BYTE_COUNT, u8, __func__, &data);
1051	if (result != VDO_SUCCESS)
1052		return result;
1053
1054	for (z = 0; z < reader_count; z++) {
1055		while (delta_index->load_lists[z] > 0) {
1056			result = restore_delta_list_data(delta_index, z,
1057							 buffered_readers[z], data);
1058			if (result != UDS_SUCCESS) {
1059				saved_result = result;
1060				break;
1061			}
1062		}
1063	}
1064
1065	vdo_free(data);
1066	return saved_result;
1067}
1068
1069int uds_check_guard_delta_lists(struct buffered_reader **buffered_readers,
1070				unsigned int reader_count)
1071{
1072	int result;
1073	unsigned int z;
1074	u8 buffer[sizeof(struct delta_list_save_info)];
1075
1076	for (z = 0; z < reader_count; z++) {
1077		result = uds_read_from_buffered_reader(buffered_readers[z], buffer,
1078						       sizeof(buffer));
1079		if (result != UDS_SUCCESS)
1080			return result;
1081
1082		if (buffer[0] != 'z')
1083			return UDS_CORRUPT_DATA;
1084	}
1085
1086	return UDS_SUCCESS;
1087}
1088
1089static int flush_delta_list(struct delta_zone *zone, u32 flush_index)
1090{
1091	struct delta_list *delta_list;
1092	u8 buffer[sizeof(struct delta_list_save_info)];
1093	int result;
1094
1095	delta_list = &zone->delta_lists[flush_index + 1];
1096
1097	buffer[0] = zone->tag;
1098	buffer[1] = delta_list->start % BITS_PER_BYTE;
1099	put_unaligned_le16(get_delta_list_byte_size(delta_list), &buffer[2]);
1100	put_unaligned_le32(zone->first_list + flush_index, &buffer[4]);
1101
1102	result = uds_write_to_buffered_writer(zone->buffered_writer, buffer,
1103					      sizeof(buffer));
1104	if (result != UDS_SUCCESS) {
1105		vdo_log_warning_strerror(result, "failed to write delta list memory");
1106		return result;
1107	}
1108
1109	result = uds_write_to_buffered_writer(zone->buffered_writer,
1110					      zone->memory + get_delta_list_byte_start(delta_list),
1111					      get_delta_list_byte_size(delta_list));
1112	if (result != UDS_SUCCESS)
1113		vdo_log_warning_strerror(result, "failed to write delta list memory");
1114
1115	return result;
1116}
1117
1118/* Start saving a delta index zone to a buffered output stream. */
1119int uds_start_saving_delta_index(const struct delta_index *delta_index,
1120				 unsigned int zone_number,
1121				 struct buffered_writer *buffered_writer)
1122{
1123	int result;
1124	u32 i;
1125	struct delta_zone *delta_zone;
1126	u8 buffer[sizeof(struct delta_index_header)];
1127	size_t offset = 0;
1128
1129	delta_zone = &delta_index->delta_zones[zone_number];
1130	memcpy(buffer, DELTA_INDEX_MAGIC, MAGIC_SIZE);
1131	offset += MAGIC_SIZE;
1132	encode_u32_le(buffer, &offset, zone_number);
1133	encode_u32_le(buffer, &offset, delta_index->zone_count);
1134	encode_u32_le(buffer, &offset, delta_zone->first_list);
1135	encode_u32_le(buffer, &offset, delta_zone->list_count);
1136	encode_u64_le(buffer, &offset, delta_zone->record_count);
1137	encode_u64_le(buffer, &offset, delta_zone->collision_count);
1138
1139	result = VDO_ASSERT(offset == sizeof(struct delta_index_header),
1140			    "%zu bytes encoded of %zu expected", offset,
1141			    sizeof(struct delta_index_header));
1142	if (result != VDO_SUCCESS)
1143		return result;
1144
1145	result = uds_write_to_buffered_writer(buffered_writer, buffer, offset);
1146	if (result != UDS_SUCCESS)
1147		return vdo_log_warning_strerror(result,
1148						"failed to write delta index header");
1149
1150	for (i = 0; i < delta_zone->list_count; i++) {
1151		u8 data[sizeof(u16)];
1152		struct delta_list *delta_list;
1153
1154		delta_list = &delta_zone->delta_lists[i + 1];
1155		put_unaligned_le16(delta_list->size, data);
1156		result = uds_write_to_buffered_writer(buffered_writer, data,
1157						      sizeof(data));
1158		if (result != UDS_SUCCESS)
1159			return vdo_log_warning_strerror(result,
1160							"failed to write delta list size");
1161	}
1162
1163	delta_zone->buffered_writer = buffered_writer;
1164	return UDS_SUCCESS;
1165}
1166
1167int uds_finish_saving_delta_index(const struct delta_index *delta_index,
1168				  unsigned int zone_number)
1169{
1170	int result;
1171	int first_error = UDS_SUCCESS;
1172	u32 i;
1173	struct delta_zone *delta_zone;
1174	struct delta_list *delta_list;
1175
1176	delta_zone = &delta_index->delta_zones[zone_number];
1177	for (i = 0; i < delta_zone->list_count; i++) {
1178		delta_list = &delta_zone->delta_lists[i + 1];
1179		if (delta_list->size > 0) {
1180			result = flush_delta_list(delta_zone, i);
1181			if ((result != UDS_SUCCESS) && (first_error == UDS_SUCCESS))
1182				first_error = result;
1183		}
1184	}
1185
1186	delta_zone->buffered_writer = NULL;
1187	return first_error;
1188}
1189
1190int uds_write_guard_delta_list(struct buffered_writer *buffered_writer)
1191{
1192	int result;
1193	u8 buffer[sizeof(struct delta_list_save_info)];
1194
1195	memset(buffer, 0, sizeof(struct delta_list_save_info));
1196	buffer[0] = 'z';
1197
1198	result = uds_write_to_buffered_writer(buffered_writer, buffer, sizeof(buffer));
1199	if (result != UDS_SUCCESS)
1200		vdo_log_warning_strerror(result, "failed to write guard delta list");
1201
1202	return UDS_SUCCESS;
1203}
1204
1205size_t uds_compute_delta_index_save_bytes(u32 list_count, size_t memory_size)
1206{
1207	/* One zone will use at least as much memory as other zone counts. */
1208	return (sizeof(struct delta_index_header) +
1209		list_count * (sizeof(struct delta_list_save_info) + 1) +
1210		get_zone_memory_size(1, memory_size));
1211}
1212
1213static int assert_not_at_end(const struct delta_index_entry *delta_entry)
1214{
1215	int result = VDO_ASSERT(!delta_entry->at_end,
1216				"operation is invalid because the list entry is at the end of the delta list");
1217	if (result != VDO_SUCCESS)
1218		result = UDS_BAD_STATE;
1219
1220	return result;
1221}
1222
1223/*
1224 * Prepare to search for an entry in the specified delta list.
1225 *
1226 * This is always the first function to be called when dealing with delta index entries. It is
1227 * always followed by calls to uds_next_delta_index_entry() to iterate through a delta list. The
1228 * fields of the delta_index_entry argument will be set up for iteration, but will not contain an
1229 * entry from the list.
1230 */
1231int uds_start_delta_index_search(const struct delta_index *delta_index, u32 list_number,
1232				 u32 key, struct delta_index_entry *delta_entry)
1233{
1234	int result;
1235	unsigned int zone_number;
1236	struct delta_zone *delta_zone;
1237	struct delta_list *delta_list;
1238
1239	result = VDO_ASSERT((list_number < delta_index->list_count),
1240			    "Delta list number (%u) is out of range (%u)", list_number,
1241			    delta_index->list_count);
1242	if (result != VDO_SUCCESS)
1243		return UDS_CORRUPT_DATA;
1244
1245	zone_number = list_number / delta_index->lists_per_zone;
1246	delta_zone = &delta_index->delta_zones[zone_number];
1247	list_number -= delta_zone->first_list;
1248	result = VDO_ASSERT((list_number < delta_zone->list_count),
1249			    "Delta list number (%u) is out of range (%u) for zone (%u)",
1250			    list_number, delta_zone->list_count, zone_number);
1251	if (result != VDO_SUCCESS)
1252		return UDS_CORRUPT_DATA;
1253
1254	if (delta_index->mutable) {
1255		delta_list = &delta_zone->delta_lists[list_number + 1];
1256	} else {
1257		u32 end_offset;
1258
1259		/*
1260		 * Translate the immutable delta list header into a temporary
1261		 * full delta list header.
1262		 */
1263		delta_list = &delta_entry->temp_delta_list;
1264		delta_list->start = get_immutable_start(delta_zone->memory, list_number);
1265		end_offset = get_immutable_start(delta_zone->memory, list_number + 1);
1266		delta_list->size = end_offset - delta_list->start;
1267		delta_list->save_key = 0;
1268		delta_list->save_offset = 0;
1269	}
1270
1271	if (key > delta_list->save_key) {
1272		delta_entry->key = delta_list->save_key;
1273		delta_entry->offset = delta_list->save_offset;
1274	} else {
1275		delta_entry->key = 0;
1276		delta_entry->offset = 0;
1277		if (key == 0) {
1278			/*
1279			 * This usually means we're about to walk the entire delta list, so get all
1280			 * of it into the CPU cache.
1281			 */
1282			uds_prefetch_range(&delta_zone->memory[delta_list->start / BITS_PER_BYTE],
1283					   delta_list->size / BITS_PER_BYTE, false);
1284		}
1285	}
1286
1287	delta_entry->at_end = false;
1288	delta_entry->delta_zone = delta_zone;
1289	delta_entry->delta_list = delta_list;
1290	delta_entry->entry_bits = 0;
1291	delta_entry->is_collision = false;
1292	delta_entry->list_number = list_number;
1293	delta_entry->list_overflow = false;
1294	delta_entry->value_bits = delta_zone->value_bits;
1295	return UDS_SUCCESS;
1296}
1297
1298static inline u64 get_delta_entry_offset(const struct delta_index_entry *delta_entry)
1299{
1300	return delta_entry->delta_list->start + delta_entry->offset;
1301}
1302
1303/*
1304 * Decode a delta index entry delta value. The delta_index_entry basically describes the previous
1305 * list entry, and has had its offset field changed to point to the subsequent entry. We decode the
1306 * bit stream and update the delta_list_entry to describe the entry.
1307 */
1308static inline void decode_delta(struct delta_index_entry *delta_entry)
1309{
1310	int key_bits;
1311	u32 delta;
1312	const struct delta_zone *delta_zone = delta_entry->delta_zone;
1313	const u8 *memory = delta_zone->memory;
1314	u64 delta_offset = get_delta_entry_offset(delta_entry) + delta_entry->value_bits;
1315	const u8 *addr = memory + delta_offset / BITS_PER_BYTE;
1316	int offset = delta_offset % BITS_PER_BYTE;
1317	u32 data = get_unaligned_le32(addr) >> offset;
1318
1319	addr += sizeof(u32);
1320	key_bits = delta_zone->min_bits;
1321	delta = data & ((1 << key_bits) - 1);
1322	if (delta >= delta_zone->min_keys) {
1323		data >>= key_bits;
1324		if (data == 0) {
1325			key_bits = sizeof(u32) * BITS_PER_BYTE - offset;
1326			while ((data = get_unaligned_le32(addr)) == 0) {
1327				addr += sizeof(u32);
1328				key_bits += sizeof(u32) * BITS_PER_BYTE;
1329			}
1330		}
1331		key_bits += ffs(data);
1332		delta += ((key_bits - delta_zone->min_bits - 1) * delta_zone->incr_keys);
1333	}
1334	delta_entry->delta = delta;
1335	delta_entry->key += delta;
1336
1337	/* Check for a collision, a delta of zero after the start. */
1338	if (unlikely((delta == 0) && (delta_entry->offset > 0))) {
1339		delta_entry->is_collision = true;
1340		delta_entry->entry_bits = delta_entry->value_bits + key_bits + COLLISION_BITS;
1341	} else {
1342		delta_entry->is_collision = false;
1343		delta_entry->entry_bits = delta_entry->value_bits + key_bits;
1344	}
1345}
1346
1347noinline int uds_next_delta_index_entry(struct delta_index_entry *delta_entry)
1348{
1349	int result;
1350	const struct delta_list *delta_list;
1351	u32 next_offset;
1352	u16 size;
1353
1354	result = assert_not_at_end(delta_entry);
1355	if (result != UDS_SUCCESS)
1356		return result;
1357
1358	delta_list = delta_entry->delta_list;
1359	delta_entry->offset += delta_entry->entry_bits;
1360	size = delta_list->size;
1361	if (unlikely(delta_entry->offset >= size)) {
1362		delta_entry->at_end = true;
1363		delta_entry->delta = 0;
1364		delta_entry->is_collision = false;
1365		result = VDO_ASSERT((delta_entry->offset == size),
1366				    "next offset past end of delta list");
1367		if (result != VDO_SUCCESS)
1368			result = UDS_CORRUPT_DATA;
1369
1370		return result;
1371	}
1372
1373	decode_delta(delta_entry);
1374
1375	next_offset = delta_entry->offset + delta_entry->entry_bits;
1376	if (next_offset > size) {
1377		/*
1378		 * This is not an assertion because uds_validate_chapter_index_page() wants to
1379		 * handle this error.
1380		 */
1381		vdo_log_warning("Decoded past the end of the delta list");
1382		return UDS_CORRUPT_DATA;
1383	}
1384
1385	return UDS_SUCCESS;
1386}
1387
1388int uds_remember_delta_index_offset(const struct delta_index_entry *delta_entry)
1389{
1390	int result;
1391	struct delta_list *delta_list = delta_entry->delta_list;
1392
1393	result = VDO_ASSERT(!delta_entry->is_collision, "entry is not a collision");
1394	if (result != VDO_SUCCESS)
1395		return result;
1396
1397	delta_list->save_key = delta_entry->key - delta_entry->delta;
1398	delta_list->save_offset = delta_entry->offset;
1399	return UDS_SUCCESS;
1400}
1401
1402static void set_delta(struct delta_index_entry *delta_entry, u32 delta)
1403{
1404	const struct delta_zone *delta_zone = delta_entry->delta_zone;
1405	u32 key_bits = (delta_zone->min_bits +
1406			((delta_zone->incr_keys - delta_zone->min_keys + delta) /
1407			 delta_zone->incr_keys));
1408
1409	delta_entry->delta = delta;
1410	delta_entry->entry_bits = delta_entry->value_bits + key_bits;
1411}
1412
1413static void get_collision_name(const struct delta_index_entry *entry, u8 *name)
1414{
1415	u64 offset = get_delta_entry_offset(entry) + entry->entry_bits - COLLISION_BITS;
1416	const u8 *addr = entry->delta_zone->memory + offset / BITS_PER_BYTE;
1417	int size = COLLISION_BYTES;
1418	int shift = offset % BITS_PER_BYTE;
1419
1420	while (--size >= 0)
1421		*name++ = get_unaligned_le16(addr++) >> shift;
1422}
1423
1424static void set_collision_name(const struct delta_index_entry *entry, const u8 *name)
1425{
1426	u64 offset = get_delta_entry_offset(entry) + entry->entry_bits - COLLISION_BITS;
1427	u8 *addr = entry->delta_zone->memory + offset / BITS_PER_BYTE;
1428	int size = COLLISION_BYTES;
1429	int shift = offset % BITS_PER_BYTE;
1430	u16 mask = ~((u16) 0xFF << shift);
1431	u16 data;
1432
1433	while (--size >= 0) {
1434		data = (get_unaligned_le16(addr) & mask) | (*name++ << shift);
1435		put_unaligned_le16(data, addr++);
1436	}
1437}
1438
1439int uds_get_delta_index_entry(const struct delta_index *delta_index, u32 list_number,
1440			      u32 key, const u8 *name,
1441			      struct delta_index_entry *delta_entry)
1442{
1443	int result;
1444
1445	result = uds_start_delta_index_search(delta_index, list_number, key,
1446					      delta_entry);
1447	if (result != UDS_SUCCESS)
1448		return result;
1449
1450	do {
1451		result = uds_next_delta_index_entry(delta_entry);
1452		if (result != UDS_SUCCESS)
1453			return result;
1454	} while (!delta_entry->at_end && (key > delta_entry->key));
1455
1456	result = uds_remember_delta_index_offset(delta_entry);
1457	if (result != UDS_SUCCESS)
1458		return result;
1459
1460	if (!delta_entry->at_end && (key == delta_entry->key)) {
1461		struct delta_index_entry collision_entry = *delta_entry;
1462
1463		for (;;) {
1464			u8 full_name[COLLISION_BYTES];
1465
1466			result = uds_next_delta_index_entry(&collision_entry);
1467			if (result != UDS_SUCCESS)
1468				return result;
1469
1470			if (collision_entry.at_end || !collision_entry.is_collision)
1471				break;
1472
1473			get_collision_name(&collision_entry, full_name);
1474			if (memcmp(full_name, name, COLLISION_BYTES) == 0) {
1475				*delta_entry = collision_entry;
1476				break;
1477			}
1478		}
1479	}
1480
1481	return UDS_SUCCESS;
1482}
1483
1484int uds_get_delta_entry_collision(const struct delta_index_entry *delta_entry, u8 *name)
1485{
1486	int result;
1487
1488	result = assert_not_at_end(delta_entry);
1489	if (result != UDS_SUCCESS)
1490		return result;
1491
1492	result = VDO_ASSERT(delta_entry->is_collision,
1493			    "Cannot get full block name from a non-collision delta index entry");
1494	if (result != VDO_SUCCESS)
1495		return UDS_BAD_STATE;
1496
1497	get_collision_name(delta_entry, name);
1498	return UDS_SUCCESS;
1499}
1500
1501u32 uds_get_delta_entry_value(const struct delta_index_entry *delta_entry)
1502{
1503	return get_field(delta_entry->delta_zone->memory,
1504			 get_delta_entry_offset(delta_entry), delta_entry->value_bits);
1505}
1506
1507static int assert_mutable_entry(const struct delta_index_entry *delta_entry)
1508{
1509	int result = VDO_ASSERT((delta_entry->delta_list != &delta_entry->temp_delta_list),
1510			        "delta index is mutable");
1511	if (result != VDO_SUCCESS)
1512		result = UDS_BAD_STATE;
1513
1514	return result;
1515}
1516
1517int uds_set_delta_entry_value(const struct delta_index_entry *delta_entry, u32 value)
1518{
1519	int result;
1520	u32 value_mask = (1 << delta_entry->value_bits) - 1;
1521
1522	result = assert_mutable_entry(delta_entry);
1523	if (result != UDS_SUCCESS)
1524		return result;
1525
1526	result = assert_not_at_end(delta_entry);
1527	if (result != UDS_SUCCESS)
1528		return result;
1529
1530	result = VDO_ASSERT((value & value_mask) == value,
1531			    "Value (%u) being set in a delta index is too large (must fit in %u bits)",
1532			    value, delta_entry->value_bits);
1533	if (result != VDO_SUCCESS)
1534		return UDS_INVALID_ARGUMENT;
1535
1536	set_field(value, delta_entry->delta_zone->memory,
1537		  get_delta_entry_offset(delta_entry), delta_entry->value_bits);
1538	return UDS_SUCCESS;
1539}
1540
1541/*
1542 * Extend the memory used by the delta lists by adding growing_size bytes before the list indicated
1543 * by growing_index, then rebalancing the lists in the new chunk.
1544 */
1545static int extend_delta_zone(struct delta_zone *delta_zone, u32 growing_index,
1546			     size_t growing_size)
1547{
1548	ktime_t start_time;
1549	ktime_t end_time;
1550	struct delta_list *delta_lists;
1551	u32 i;
1552	size_t used_space;
1553
1554
1555	/* Calculate the amount of space that is or will be in use. */
1556	start_time = current_time_ns(CLOCK_MONOTONIC);
1557	delta_lists = delta_zone->delta_lists;
1558	used_space = growing_size;
1559	for (i = 0; i <= delta_zone->list_count + 1; i++)
1560		used_space += get_delta_list_byte_size(&delta_lists[i]);
1561
1562	if (delta_zone->size < used_space)
1563		return UDS_OVERFLOW;
1564
1565	/* Compute the new offsets of the delta lists. */
1566	compute_new_list_offsets(delta_zone, growing_index, growing_size, used_space);
1567
1568	/*
1569	 * When we rebalance the delta list, we will include the end guard list in the rebalancing.
1570	 * It contains the end guard data, which must be copied.
1571	 */
1572	rebalance_delta_zone(delta_zone, 1, delta_zone->list_count + 1);
1573	end_time = current_time_ns(CLOCK_MONOTONIC);
1574	delta_zone->rebalance_count++;
1575	delta_zone->rebalance_time += ktime_sub(end_time, start_time);
1576	return UDS_SUCCESS;
1577}
1578
1579static int insert_bits(struct delta_index_entry *delta_entry, u16 size)
1580{
1581	u64 free_before;
1582	u64 free_after;
1583	u64 source;
1584	u64 destination;
1585	u32 count;
1586	bool before_flag;
1587	u8 *memory;
1588	struct delta_zone *delta_zone = delta_entry->delta_zone;
1589	struct delta_list *delta_list = delta_entry->delta_list;
1590	/* Compute bits in use before and after the inserted bits. */
1591	u32 total_size = delta_list->size;
1592	u32 before_size = delta_entry->offset;
1593	u32 after_size = total_size - delta_entry->offset;
1594
1595	if (total_size + size > U16_MAX) {
1596		delta_entry->list_overflow = true;
1597		delta_zone->overflow_count++;
1598		return UDS_OVERFLOW;
1599	}
1600
1601	/* Compute bits available before and after the delta list. */
1602	free_before = (delta_list[0].start - (delta_list[-1].start + delta_list[-1].size));
1603	free_after = (delta_list[1].start - (delta_list[0].start + delta_list[0].size));
1604
1605	if ((size <= free_before) && (size <= free_after)) {
1606		/*
1607		 * We have enough space to use either before or after the list. Select the smaller
1608		 * amount of data. If it is exactly the same, try to take from the larger amount of
1609		 * free space.
1610		 */
1611		if (before_size < after_size)
1612			before_flag = true;
1613		else if (after_size < before_size)
1614			before_flag = false;
1615		else
1616			before_flag = free_before > free_after;
1617	} else if (size <= free_before) {
1618		/* There is space before but not after. */
1619		before_flag = true;
1620	} else if (size <= free_after) {
1621		/* There is space after but not before. */
1622		before_flag = false;
1623	} else {
1624		/*
1625		 * Neither of the surrounding spaces is large enough for this request. Extend
1626		 * and/or rebalance the delta list memory choosing to move the least amount of
1627		 * data.
1628		 */
1629		int result;
1630		u32 growing_index = delta_entry->list_number + 1;
1631
1632		before_flag = before_size < after_size;
1633		if (!before_flag)
1634			growing_index++;
1635		result = extend_delta_zone(delta_zone, growing_index,
1636					   BITS_TO_BYTES(size));
1637		if (result != UDS_SUCCESS)
1638			return result;
1639	}
1640
1641	delta_list->size += size;
1642	if (before_flag) {
1643		source = delta_list->start;
1644		destination = source - size;
1645		delta_list->start -= size;
1646		count = before_size;
1647	} else {
1648		source = delta_list->start + delta_entry->offset;
1649		destination = source + size;
1650		count = after_size;
1651	}
1652
1653	memory = delta_zone->memory;
1654	move_bits(memory, source, memory, destination, count);
1655	return UDS_SUCCESS;
1656}
1657
1658static void encode_delta(const struct delta_index_entry *delta_entry)
1659{
1660	u32 temp;
1661	u32 t1;
1662	u32 t2;
1663	u64 offset;
1664	const struct delta_zone *delta_zone = delta_entry->delta_zone;
1665	u8 *memory = delta_zone->memory;
1666
1667	offset = get_delta_entry_offset(delta_entry) + delta_entry->value_bits;
1668	if (delta_entry->delta < delta_zone->min_keys) {
1669		set_field(delta_entry->delta, memory, offset, delta_zone->min_bits);
1670		return;
1671	}
1672
1673	temp = delta_entry->delta - delta_zone->min_keys;
1674	t1 = (temp % delta_zone->incr_keys) + delta_zone->min_keys;
1675	t2 = temp / delta_zone->incr_keys;
1676	set_field(t1, memory, offset, delta_zone->min_bits);
1677	set_zero(memory, offset + delta_zone->min_bits, t2);
1678	set_field(1, memory, offset + delta_zone->min_bits + t2, 1);
1679}
1680
1681static void encode_entry(const struct delta_index_entry *delta_entry, u32 value,
1682			 const u8 *name)
1683{
1684	u8 *memory = delta_entry->delta_zone->memory;
1685	u64 offset = get_delta_entry_offset(delta_entry);
1686
1687	set_field(value, memory, offset, delta_entry->value_bits);
1688	encode_delta(delta_entry);
1689	if (name != NULL)
1690		set_collision_name(delta_entry, name);
1691}
1692
1693/*
1694 * Create a new entry in the delta index. If the entry is a collision, the full 256 bit name must
1695 * be provided.
1696 */
1697int uds_put_delta_index_entry(struct delta_index_entry *delta_entry, u32 key, u32 value,
1698			      const u8 *name)
1699{
1700	int result;
1701	struct delta_zone *delta_zone;
1702
1703	result = assert_mutable_entry(delta_entry);
1704	if (result != UDS_SUCCESS)
1705		return result;
1706
1707	if (delta_entry->is_collision) {
1708		/*
1709		 * The caller wants us to insert a collision entry onto a collision entry. This
1710		 * happens when we find a collision and attempt to add the name again to the index.
1711		 * This is normally a fatal error unless we are replaying a closed chapter while we
1712		 * are rebuilding a volume index.
1713		 */
1714		return UDS_DUPLICATE_NAME;
1715	}
1716
1717	if (delta_entry->offset < delta_entry->delta_list->save_offset) {
1718		/*
1719		 * The saved entry offset is after the new entry and will no longer be valid, so
1720		 * replace it with the insertion point.
1721		 */
1722		result = uds_remember_delta_index_offset(delta_entry);
1723		if (result != UDS_SUCCESS)
1724			return result;
1725	}
1726
1727	if (name != NULL) {
1728		/* Insert a collision entry which is placed after this entry. */
1729		result = assert_not_at_end(delta_entry);
1730		if (result != UDS_SUCCESS)
1731			return result;
1732
1733		result = VDO_ASSERT((key == delta_entry->key),
1734				    "incorrect key for collision entry");
1735		if (result != VDO_SUCCESS)
1736			return result;
1737
1738		delta_entry->offset += delta_entry->entry_bits;
1739		set_delta(delta_entry, 0);
1740		delta_entry->is_collision = true;
1741		delta_entry->entry_bits += COLLISION_BITS;
1742		result = insert_bits(delta_entry, delta_entry->entry_bits);
1743	} else if (delta_entry->at_end) {
1744		/* Insert a new entry at the end of the delta list. */
1745		result = VDO_ASSERT((key >= delta_entry->key), "key past end of list");
1746		if (result != VDO_SUCCESS)
1747			return result;
1748
1749		set_delta(delta_entry, key - delta_entry->key);
1750		delta_entry->key = key;
1751		delta_entry->at_end = false;
1752		result = insert_bits(delta_entry, delta_entry->entry_bits);
1753	} else {
1754		u16 old_entry_size;
1755		u16 additional_size;
1756		struct delta_index_entry next_entry;
1757		u32 next_value;
1758
1759		/*
1760		 * Insert a new entry which requires the delta in the following entry to be
1761		 * updated.
1762		 */
1763		result = VDO_ASSERT((key < delta_entry->key),
1764				    "key precedes following entry");
1765		if (result != VDO_SUCCESS)
1766			return result;
1767
1768		result = VDO_ASSERT((key >= delta_entry->key - delta_entry->delta),
1769				    "key effects following entry's delta");
1770		if (result != VDO_SUCCESS)
1771			return result;
1772
1773		old_entry_size = delta_entry->entry_bits;
1774		next_entry = *delta_entry;
1775		next_value = uds_get_delta_entry_value(&next_entry);
1776		set_delta(delta_entry, key - (delta_entry->key - delta_entry->delta));
1777		delta_entry->key = key;
1778		set_delta(&next_entry, next_entry.key - key);
1779		next_entry.offset += delta_entry->entry_bits;
1780		/* The two new entries are always bigger than the single entry being replaced. */
1781		additional_size = (delta_entry->entry_bits +
1782				   next_entry.entry_bits - old_entry_size);
1783		result = insert_bits(delta_entry, additional_size);
1784		if (result != UDS_SUCCESS)
1785			return result;
1786
1787		encode_entry(&next_entry, next_value, NULL);
1788	}
1789
1790	if (result != UDS_SUCCESS)
1791		return result;
1792
1793	encode_entry(delta_entry, value, name);
1794	delta_zone = delta_entry->delta_zone;
1795	delta_zone->record_count++;
1796	delta_zone->collision_count += delta_entry->is_collision ? 1 : 0;
1797	return UDS_SUCCESS;
1798}
1799
1800static void delete_bits(const struct delta_index_entry *delta_entry, int size)
1801{
1802	u64 source;
1803	u64 destination;
1804	u32 count;
1805	bool before_flag;
1806	struct delta_list *delta_list = delta_entry->delta_list;
1807	u8 *memory = delta_entry->delta_zone->memory;
1808	/* Compute bits retained before and after the deleted bits. */
1809	u32 total_size = delta_list->size;
1810	u32 before_size = delta_entry->offset;
1811	u32 after_size = total_size - delta_entry->offset - size;
1812
1813	/*
1814	 * Determine whether to add to the available space either before or after the delta list.
1815	 * We prefer to move the least amount of data. If it is exactly the same, try to add to the
1816	 * smaller amount of free space.
1817	 */
1818	if (before_size < after_size) {
1819		before_flag = true;
1820	} else if (after_size < before_size) {
1821		before_flag = false;
1822	} else {
1823		u64 free_before =
1824			(delta_list[0].start - (delta_list[-1].start + delta_list[-1].size));
1825		u64 free_after =
1826			(delta_list[1].start - (delta_list[0].start + delta_list[0].size));
1827
1828		before_flag = (free_before < free_after);
1829	}
1830
1831	delta_list->size -= size;
1832	if (before_flag) {
1833		source = delta_list->start;
1834		destination = source + size;
1835		delta_list->start += size;
1836		count = before_size;
1837	} else {
1838		destination = delta_list->start + delta_entry->offset;
1839		source = destination + size;
1840		count = after_size;
1841	}
1842
1843	move_bits(memory, source, memory, destination, count);
1844}
1845
1846int uds_remove_delta_index_entry(struct delta_index_entry *delta_entry)
1847{
1848	int result;
1849	struct delta_index_entry next_entry;
1850	struct delta_zone *delta_zone;
1851	struct delta_list *delta_list;
1852
1853	result = assert_mutable_entry(delta_entry);
1854	if (result != UDS_SUCCESS)
1855		return result;
1856
1857	next_entry = *delta_entry;
1858	result = uds_next_delta_index_entry(&next_entry);
1859	if (result != UDS_SUCCESS)
1860		return result;
1861
1862	delta_zone = delta_entry->delta_zone;
1863
1864	if (delta_entry->is_collision) {
1865		/* This is a collision entry, so just remove it. */
1866		delete_bits(delta_entry, delta_entry->entry_bits);
1867		next_entry.offset = delta_entry->offset;
1868		delta_zone->collision_count -= 1;
1869	} else if (next_entry.at_end) {
1870		/* This entry is at the end of the list, so just remove it. */
1871		delete_bits(delta_entry, delta_entry->entry_bits);
1872		next_entry.key -= delta_entry->delta;
1873		next_entry.offset = delta_entry->offset;
1874	} else {
1875		/* The delta in the next entry needs to be updated. */
1876		u32 next_value = uds_get_delta_entry_value(&next_entry);
1877		u16 old_size = delta_entry->entry_bits + next_entry.entry_bits;
1878
1879		if (next_entry.is_collision) {
1880			next_entry.is_collision = false;
1881			delta_zone->collision_count -= 1;
1882		}
1883
1884		set_delta(&next_entry, delta_entry->delta + next_entry.delta);
1885		next_entry.offset = delta_entry->offset;
1886		/* The one new entry is always smaller than the two entries being replaced. */
1887		delete_bits(delta_entry, old_size - next_entry.entry_bits);
1888		encode_entry(&next_entry, next_value, NULL);
1889	}
1890
1891	delta_zone->record_count--;
1892	delta_zone->discard_count++;
1893	*delta_entry = next_entry;
1894
1895	delta_list = delta_entry->delta_list;
1896	if (delta_entry->offset < delta_list->save_offset) {
1897		/* The saved entry offset is no longer valid. */
1898		delta_list->save_key = 0;
1899		delta_list->save_offset = 0;
1900	}
1901
1902	return UDS_SUCCESS;
1903}
1904
1905void uds_get_delta_index_stats(const struct delta_index *delta_index,
1906			       struct delta_index_stats *stats)
1907{
1908	unsigned int z;
1909	const struct delta_zone *delta_zone;
1910
1911	memset(stats, 0, sizeof(struct delta_index_stats));
1912	for (z = 0; z < delta_index->zone_count; z++) {
1913		delta_zone = &delta_index->delta_zones[z];
1914		stats->rebalance_time += delta_zone->rebalance_time;
1915		stats->rebalance_count += delta_zone->rebalance_count;
1916		stats->record_count += delta_zone->record_count;
1917		stats->collision_count += delta_zone->collision_count;
1918		stats->discard_count += delta_zone->discard_count;
1919		stats->overflow_count += delta_zone->overflow_count;
1920		stats->list_count += delta_zone->list_count;
1921	}
1922}
1923
1924size_t uds_compute_delta_index_size(u32 entry_count, u32 mean_delta, u32 payload_bits)
1925{
1926	u16 min_bits;
1927	u32 incr_keys;
1928	u32 min_keys;
1929
1930	compute_coding_constants(mean_delta, &min_bits, &min_keys, &incr_keys);
1931	/* On average, each delta is encoded into about min_bits + 1.5 bits. */
1932	return entry_count * (payload_bits + min_bits + 1) + entry_count / 2;
1933}
1934
1935u32 uds_get_delta_index_page_count(u32 entry_count, u32 list_count, u32 mean_delta,
1936				   u32 payload_bits, size_t bytes_per_page)
1937{
1938	unsigned int bits_per_delta_list;
1939	unsigned int bits_per_page;
1940	size_t bits_per_index;
1941
1942	/* Compute the expected number of bits needed for all the entries. */
1943	bits_per_index = uds_compute_delta_index_size(entry_count, mean_delta,
1944						      payload_bits);
1945	bits_per_delta_list = bits_per_index / list_count;
1946
1947	/* Add in the immutable delta list headers. */
1948	bits_per_index += list_count * IMMUTABLE_HEADER_SIZE;
1949	/* Compute the number of usable bits on an immutable index page. */
1950	bits_per_page = ((bytes_per_page - sizeof(struct delta_page_header)) * BITS_PER_BYTE);
1951	/*
1952	 * Reduce the bits per page by one immutable delta list header and one delta list to
1953	 * account for internal fragmentation.
1954	 */
1955	bits_per_page -= IMMUTABLE_HEADER_SIZE + bits_per_delta_list;
1956	/* Now compute the number of pages needed. */
1957	return DIV_ROUND_UP(bits_per_index, bits_per_page);
1958}
1959
1960void uds_log_delta_index_entry(struct delta_index_entry *delta_entry)
1961{
1962	vdo_log_ratelimit(vdo_log_info,
1963			  "List 0x%X Key 0x%X Offset 0x%X%s%s List_size 0x%X%s",
1964			  delta_entry->list_number, delta_entry->key,
1965			  delta_entry->offset, delta_entry->at_end ? " end" : "",
1966			  delta_entry->is_collision ? " collision" : "",
1967			  delta_entry->delta_list->size,
1968			  delta_entry->list_overflow ? " overflow" : "");
1969	delta_entry->list_overflow = false;
1970}
1971