1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2011 Red Hat, Inc.
4 *
5 * This file is released under the GPL.
6 */
7
8#include "dm-space-map-common.h"
9#include "dm-transaction-manager.h"
10#include "dm-btree-internal.h"
11#include "dm-persistent-data-internal.h"
12
13#include <linux/bitops.h>
14#include <linux/device-mapper.h>
15
16#define DM_MSG_PREFIX "space map common"
17
18/*----------------------------------------------------------------*/
19
20/*
21 * Index validator.
22 */
23#define INDEX_CSUM_XOR 160478
24
25static void index_prepare_for_write(struct dm_block_validator *v,
26				    struct dm_block *b,
27				    size_t block_size)
28{
29	struct disk_metadata_index *mi_le = dm_block_data(b);
30
31	mi_le->blocknr = cpu_to_le64(dm_block_location(b));
32	mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
33						 block_size - sizeof(__le32),
34						 INDEX_CSUM_XOR));
35}
36
37static int index_check(struct dm_block_validator *v,
38		       struct dm_block *b,
39		       size_t block_size)
40{
41	struct disk_metadata_index *mi_le = dm_block_data(b);
42	__le32 csum_disk;
43
44	if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
45		DMERR_LIMIT("%s failed: blocknr %llu != wanted %llu", __func__,
46			    le64_to_cpu(mi_le->blocknr), dm_block_location(b));
47		return -ENOTBLK;
48	}
49
50	csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
51					       block_size - sizeof(__le32),
52					       INDEX_CSUM_XOR));
53	if (csum_disk != mi_le->csum) {
54		DMERR_LIMIT("i%s failed: csum %u != wanted %u", __func__,
55			    le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
56		return -EILSEQ;
57	}
58
59	return 0;
60}
61
62static struct dm_block_validator index_validator = {
63	.name = "index",
64	.prepare_for_write = index_prepare_for_write,
65	.check = index_check
66};
67
68/*----------------------------------------------------------------*/
69
70/*
71 * Bitmap validator
72 */
73#define BITMAP_CSUM_XOR 240779
74
75static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
76					struct dm_block *b,
77					size_t block_size)
78{
79	struct disk_bitmap_header *disk_header = dm_block_data(b);
80
81	disk_header->blocknr = cpu_to_le64(dm_block_location(b));
82	disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
83						       block_size - sizeof(__le32),
84						       BITMAP_CSUM_XOR));
85}
86
87static int dm_bitmap_check(struct dm_block_validator *v,
88			   struct dm_block *b,
89			   size_t block_size)
90{
91	struct disk_bitmap_header *disk_header = dm_block_data(b);
92	__le32 csum_disk;
93
94	if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
95		DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
96			    le64_to_cpu(disk_header->blocknr), dm_block_location(b));
97		return -ENOTBLK;
98	}
99
100	csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
101					       block_size - sizeof(__le32),
102					       BITMAP_CSUM_XOR));
103	if (csum_disk != disk_header->csum) {
104		DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
105			    le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
106		return -EILSEQ;
107	}
108
109	return 0;
110}
111
112static struct dm_block_validator dm_sm_bitmap_validator = {
113	.name = "sm_bitmap",
114	.prepare_for_write = dm_bitmap_prepare_for_write,
115	.check = dm_bitmap_check,
116};
117
118/*----------------------------------------------------------------*/
119
120#define ENTRIES_PER_WORD 32
121#define ENTRIES_SHIFT	5
122
123static void *dm_bitmap_data(struct dm_block *b)
124{
125	return dm_block_data(b) + sizeof(struct disk_bitmap_header);
126}
127
128#define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
129
130static unsigned int dm_bitmap_word_used(void *addr, unsigned int b)
131{
132	__le64 *words_le = addr;
133	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
134
135	uint64_t bits = le64_to_cpu(*w_le);
136	uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
137
138	return !(~bits & mask);
139}
140
141static unsigned int sm_lookup_bitmap(void *addr, unsigned int b)
142{
143	__le64 *words_le = addr;
144	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
145	unsigned int hi, lo;
146
147	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
148	hi = !!test_bit_le(b, (void *) w_le);
149	lo = !!test_bit_le(b + 1, (void *) w_le);
150	return (hi << 1) | lo;
151}
152
153static void sm_set_bitmap(void *addr, unsigned int b, unsigned int val)
154{
155	__le64 *words_le = addr;
156	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
157
158	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
159
160	if (val & 2)
161		__set_bit_le(b, (void *) w_le);
162	else
163		__clear_bit_le(b, (void *) w_le);
164
165	if (val & 1)
166		__set_bit_le(b + 1, (void *) w_le);
167	else
168		__clear_bit_le(b + 1, (void *) w_le);
169}
170
171static int sm_find_free(void *addr, unsigned int begin, unsigned int end,
172			unsigned int *result)
173{
174	while (begin < end) {
175		if (!(begin & (ENTRIES_PER_WORD - 1)) &&
176		    dm_bitmap_word_used(addr, begin)) {
177			begin += ENTRIES_PER_WORD;
178			continue;
179		}
180
181		if (!sm_lookup_bitmap(addr, begin)) {
182			*result = begin;
183			return 0;
184		}
185
186		begin++;
187	}
188
189	return -ENOSPC;
190}
191
192/*----------------------------------------------------------------*/
193
194static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
195{
196	memset(ll, 0, sizeof(struct ll_disk));
197
198	ll->tm = tm;
199
200	ll->bitmap_info.tm = tm;
201	ll->bitmap_info.levels = 1;
202
203	/*
204	 * Because the new bitmap blocks are created via a shadow
205	 * operation, the old entry has already had its reference count
206	 * decremented and we don't need the btree to do any bookkeeping.
207	 */
208	ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
209	ll->bitmap_info.value_type.inc = NULL;
210	ll->bitmap_info.value_type.dec = NULL;
211	ll->bitmap_info.value_type.equal = NULL;
212
213	ll->ref_count_info.tm = tm;
214	ll->ref_count_info.levels = 1;
215	ll->ref_count_info.value_type.size = sizeof(uint32_t);
216	ll->ref_count_info.value_type.inc = NULL;
217	ll->ref_count_info.value_type.dec = NULL;
218	ll->ref_count_info.value_type.equal = NULL;
219
220	ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
221
222	if (ll->block_size > (1 << 30)) {
223		DMERR("block size too big to hold bitmaps");
224		return -EINVAL;
225	}
226
227	ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
228		ENTRIES_PER_BYTE;
229	ll->nr_blocks = 0;
230	ll->bitmap_root = 0;
231	ll->ref_count_root = 0;
232	ll->bitmap_index_changed = false;
233
234	return 0;
235}
236
237int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
238{
239	int r;
240	dm_block_t i, nr_blocks, nr_indexes;
241	unsigned int old_blocks, blocks;
242
243	nr_blocks = ll->nr_blocks + extra_blocks;
244	old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
245	blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
246
247	nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
248	if (nr_indexes > ll->max_entries(ll)) {
249		DMERR("space map too large");
250		return -EINVAL;
251	}
252
253	/*
254	 * We need to set this before the dm_tm_new_block() call below.
255	 */
256	ll->nr_blocks = nr_blocks;
257	for (i = old_blocks; i < blocks; i++) {
258		struct dm_block *b;
259		struct disk_index_entry idx;
260
261		r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
262		if (r < 0)
263			return r;
264
265		idx.blocknr = cpu_to_le64(dm_block_location(b));
266
267		dm_tm_unlock(ll->tm, b);
268
269		idx.nr_free = cpu_to_le32(ll->entries_per_block);
270		idx.none_free_before = 0;
271
272		r = ll->save_ie(ll, i, &idx);
273		if (r < 0)
274			return r;
275	}
276
277	return 0;
278}
279
280int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
281{
282	int r;
283	dm_block_t index = b;
284	struct disk_index_entry ie_disk;
285	struct dm_block *blk;
286
287	if (b >= ll->nr_blocks) {
288		DMERR_LIMIT("metadata block out of bounds");
289		return -EINVAL;
290	}
291
292	b = do_div(index, ll->entries_per_block);
293	r = ll->load_ie(ll, index, &ie_disk);
294	if (r < 0)
295		return r;
296
297	r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
298			    &dm_sm_bitmap_validator, &blk);
299	if (r < 0)
300		return r;
301
302	*result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
303
304	dm_tm_unlock(ll->tm, blk);
305
306	return 0;
307}
308
309static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
310				      uint32_t *result)
311{
312	__le32 le_rc;
313	int r;
314
315	r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
316	if (r < 0)
317		return r;
318
319	*result = le32_to_cpu(le_rc);
320
321	return r;
322}
323
324int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
325{
326	int r = sm_ll_lookup_bitmap(ll, b, result);
327
328	if (r)
329		return r;
330
331	if (*result != 3)
332		return r;
333
334	return sm_ll_lookup_big_ref_count(ll, b, result);
335}
336
337int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
338			  dm_block_t end, dm_block_t *result)
339{
340	int r;
341	struct disk_index_entry ie_disk;
342	dm_block_t i, index_begin = begin;
343	dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
344
345	/*
346	 * FIXME: Use shifts
347	 */
348	begin = do_div(index_begin, ll->entries_per_block);
349	end = do_div(end, ll->entries_per_block);
350	if (end == 0)
351		end = ll->entries_per_block;
352
353	for (i = index_begin; i < index_end; i++, begin = 0) {
354		struct dm_block *blk;
355		unsigned int position;
356		uint32_t bit_end;
357
358		r = ll->load_ie(ll, i, &ie_disk);
359		if (r < 0)
360			return r;
361
362		if (le32_to_cpu(ie_disk.nr_free) == 0)
363			continue;
364
365		r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
366				    &dm_sm_bitmap_validator, &blk);
367		if (r < 0)
368			return r;
369
370		bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
371
372		r = sm_find_free(dm_bitmap_data(blk),
373				 max_t(unsigned int, begin, le32_to_cpu(ie_disk.none_free_before)),
374				 bit_end, &position);
375		if (r == -ENOSPC) {
376			/*
377			 * This might happen because we started searching
378			 * part way through the bitmap.
379			 */
380			dm_tm_unlock(ll->tm, blk);
381			continue;
382		}
383
384		dm_tm_unlock(ll->tm, blk);
385
386		*result = i * ll->entries_per_block + (dm_block_t) position;
387		return 0;
388	}
389
390	return -ENOSPC;
391}
392
393int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
394				 dm_block_t begin, dm_block_t end, dm_block_t *b)
395{
396	int r;
397	uint32_t count;
398
399	do {
400		r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
401		if (r)
402			break;
403
404		/* double check this block wasn't used in the old transaction */
405		if (*b >= old_ll->nr_blocks)
406			count = 0;
407		else {
408			r = sm_ll_lookup(old_ll, *b, &count);
409			if (r)
410				break;
411
412			if (count)
413				begin = *b + 1;
414		}
415	} while (count);
416
417	return r;
418}
419
420/*----------------------------------------------------------------*/
421
422int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
423		 uint32_t ref_count, int32_t *nr_allocations)
424{
425	int r;
426	uint32_t bit, old;
427	struct dm_block *nb;
428	dm_block_t index = b;
429	struct disk_index_entry ie_disk;
430	void *bm_le;
431	int inc;
432
433	bit = do_div(index, ll->entries_per_block);
434	r = ll->load_ie(ll, index, &ie_disk);
435	if (r < 0)
436		return r;
437
438	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
439			       &dm_sm_bitmap_validator, &nb, &inc);
440	if (r < 0) {
441		DMERR("dm_tm_shadow_block() failed");
442		return r;
443	}
444	ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
445	bm_le = dm_bitmap_data(nb);
446
447	old = sm_lookup_bitmap(bm_le, bit);
448	if (old > 2) {
449		r = sm_ll_lookup_big_ref_count(ll, b, &old);
450		if (r < 0) {
451			dm_tm_unlock(ll->tm, nb);
452			return r;
453		}
454	}
455
456	if (r) {
457		dm_tm_unlock(ll->tm, nb);
458		return r;
459	}
460
461	if (ref_count <= 2) {
462		sm_set_bitmap(bm_le, bit, ref_count);
463		dm_tm_unlock(ll->tm, nb);
464
465		if (old > 2) {
466			r = dm_btree_remove(&ll->ref_count_info,
467					    ll->ref_count_root,
468					    &b, &ll->ref_count_root);
469			if (r)
470				return r;
471		}
472
473	} else {
474		__le32 le_rc = cpu_to_le32(ref_count);
475
476		sm_set_bitmap(bm_le, bit, 3);
477		dm_tm_unlock(ll->tm, nb);
478
479		__dm_bless_for_disk(&le_rc);
480		r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
481				    &b, &le_rc, &ll->ref_count_root);
482		if (r < 0) {
483			DMERR("ref count insert failed");
484			return r;
485		}
486	}
487
488	if (ref_count && !old) {
489		*nr_allocations = 1;
490		ll->nr_allocated++;
491		le32_add_cpu(&ie_disk.nr_free, -1);
492		if (le32_to_cpu(ie_disk.none_free_before) == bit)
493			ie_disk.none_free_before = cpu_to_le32(bit + 1);
494
495	} else if (old && !ref_count) {
496		*nr_allocations = -1;
497		ll->nr_allocated--;
498		le32_add_cpu(&ie_disk.nr_free, 1);
499		ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
500	} else
501		*nr_allocations = 0;
502
503	return ll->save_ie(ll, index, &ie_disk);
504}
505
506/*----------------------------------------------------------------*/
507
508/*
509 * Holds useful intermediate results for the range based inc and dec
510 * operations.
511 */
512struct inc_context {
513	struct disk_index_entry ie_disk;
514	struct dm_block *bitmap_block;
515	void *bitmap;
516
517	struct dm_block *overflow_leaf;
518};
519
520static inline void init_inc_context(struct inc_context *ic)
521{
522	ic->bitmap_block = NULL;
523	ic->bitmap = NULL;
524	ic->overflow_leaf = NULL;
525}
526
527static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
528{
529	if (ic->bitmap_block)
530		dm_tm_unlock(ll->tm, ic->bitmap_block);
531	if (ic->overflow_leaf)
532		dm_tm_unlock(ll->tm, ic->overflow_leaf);
533}
534
535static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
536{
537	exit_inc_context(ll, ic);
538	init_inc_context(ic);
539}
540
541/*
542 * Confirms a btree node contains a particular key at an index.
543 */
544static bool contains_key(struct btree_node *n, uint64_t key, int index)
545{
546	return index >= 0 &&
547		index < le32_to_cpu(n->header.nr_entries) &&
548		le64_to_cpu(n->keys[index]) == key;
549}
550
551static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
552{
553	int r;
554	int index;
555	struct btree_node *n;
556	__le32 *v_ptr;
557	uint32_t rc;
558
559	/*
560	 * bitmap_block needs to be unlocked because getting the
561	 * overflow_leaf may need to allocate, and thus use the space map.
562	 */
563	reset_inc_context(ll, ic);
564
565	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
566				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
567	if (r < 0)
568		return r;
569
570	n = dm_block_data(ic->overflow_leaf);
571
572	if (!contains_key(n, b, index)) {
573		DMERR("overflow btree is missing an entry");
574		return -EINVAL;
575	}
576
577	v_ptr = value_ptr(n, index);
578	rc = le32_to_cpu(*v_ptr) + 1;
579	*v_ptr = cpu_to_le32(rc);
580
581	return 0;
582}
583
584static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
585{
586	int index;
587	struct btree_node *n;
588	__le32 *v_ptr;
589	uint32_t rc;
590
591	/*
592	 * Do we already have the correct overflow leaf?
593	 */
594	if (ic->overflow_leaf) {
595		n = dm_block_data(ic->overflow_leaf);
596		index = lower_bound(n, b);
597		if (contains_key(n, b, index)) {
598			v_ptr = value_ptr(n, index);
599			rc = le32_to_cpu(*v_ptr) + 1;
600			*v_ptr = cpu_to_le32(rc);
601
602			return 0;
603		}
604	}
605
606	return __sm_ll_inc_overflow(ll, b, ic);
607}
608
609static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
610{
611	int r, inc;
612
613	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
614			       &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
615	if (r < 0) {
616		DMERR("dm_tm_shadow_block() failed");
617		return r;
618	}
619	ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
620	ic->bitmap = dm_bitmap_data(ic->bitmap_block);
621	return 0;
622}
623
624/*
625 * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
626 * we can reopen the bitmap with a simple write lock, rather than re calling
627 * dm_tm_shadow_block().
628 */
629static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
630{
631	if (!ic->bitmap_block) {
632		int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
633					 &dm_sm_bitmap_validator, &ic->bitmap_block);
634		if (r) {
635			DMERR("unable to re-get write lock for bitmap");
636			return r;
637		}
638		ic->bitmap = dm_bitmap_data(ic->bitmap_block);
639	}
640
641	return 0;
642}
643
644/*
645 * Loops round incrementing entries in a single bitmap.
646 */
647static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
648				   uint32_t bit, uint32_t bit_end,
649				   int32_t *nr_allocations, dm_block_t *new_b,
650				   struct inc_context *ic)
651{
652	int r;
653	__le32 le_rc;
654	uint32_t old;
655
656	for (; bit != bit_end; bit++, b++) {
657		/*
658		 * We only need to drop the bitmap if we need to find a new btree
659		 * leaf for the overflow.  So if it was dropped last iteration,
660		 * we now re-get it.
661		 */
662		r = ensure_bitmap(ll, ic);
663		if (r)
664			return r;
665
666		old = sm_lookup_bitmap(ic->bitmap, bit);
667		switch (old) {
668		case 0:
669			/* inc bitmap, adjust nr_allocated */
670			sm_set_bitmap(ic->bitmap, bit, 1);
671			(*nr_allocations)++;
672			ll->nr_allocated++;
673			le32_add_cpu(&ic->ie_disk.nr_free, -1);
674			if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
675				ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
676			break;
677
678		case 1:
679			/* inc bitmap */
680			sm_set_bitmap(ic->bitmap, bit, 2);
681			break;
682
683		case 2:
684			/* inc bitmap and insert into overflow */
685			sm_set_bitmap(ic->bitmap, bit, 3);
686			reset_inc_context(ll, ic);
687
688			le_rc = cpu_to_le32(3);
689			__dm_bless_for_disk(&le_rc);
690			r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
691					    &b, &le_rc, &ll->ref_count_root);
692			if (r < 0) {
693				DMERR("ref count insert failed");
694				return r;
695			}
696			break;
697
698		default:
699			/*
700			 * inc within the overflow tree only.
701			 */
702			r = sm_ll_inc_overflow(ll, b, ic);
703			if (r < 0)
704				return r;
705		}
706	}
707
708	*new_b = b;
709	return 0;
710}
711
712/*
713 * Finds a bitmap that contains entries in the block range, and increments
714 * them.
715 */
716static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
717		       int32_t *nr_allocations, dm_block_t *new_b)
718{
719	int r;
720	struct inc_context ic;
721	uint32_t bit, bit_end;
722	dm_block_t index = b;
723
724	init_inc_context(&ic);
725
726	bit = do_div(index, ll->entries_per_block);
727	r = ll->load_ie(ll, index, &ic.ie_disk);
728	if (r < 0)
729		return r;
730
731	r = shadow_bitmap(ll, &ic);
732	if (r)
733		return r;
734
735	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
736	r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
737
738	exit_inc_context(ll, &ic);
739
740	if (r)
741		return r;
742
743	return ll->save_ie(ll, index, &ic.ie_disk);
744}
745
746int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
747	      int32_t *nr_allocations)
748{
749	*nr_allocations = 0;
750	while (b != e) {
751		int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
752
753		if (r)
754			return r;
755	}
756
757	return 0;
758}
759
760/*----------------------------------------------------------------*/
761
762static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
763				struct inc_context *ic)
764{
765	reset_inc_context(ll, ic);
766	return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
767			       &b, &ll->ref_count_root);
768}
769
770static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
771				struct inc_context *ic, uint32_t *old_rc)
772{
773	int r;
774	int index = -1;
775	struct btree_node *n;
776	__le32 *v_ptr;
777	uint32_t rc;
778
779	reset_inc_context(ll, ic);
780	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
781				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
782	if (r < 0)
783		return r;
784
785	n = dm_block_data(ic->overflow_leaf);
786
787	if (!contains_key(n, b, index)) {
788		DMERR("overflow btree is missing an entry");
789		return -EINVAL;
790	}
791
792	v_ptr = value_ptr(n, index);
793	rc = le32_to_cpu(*v_ptr);
794	*old_rc = rc;
795
796	if (rc == 3)
797		return __sm_ll_del_overflow(ll, b, ic);
798
799	rc--;
800	*v_ptr = cpu_to_le32(rc);
801	return 0;
802}
803
804static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
805			      struct inc_context *ic, uint32_t *old_rc)
806{
807	/*
808	 * Do we already have the correct overflow leaf?
809	 */
810	if (ic->overflow_leaf) {
811		int index;
812		struct btree_node *n;
813		__le32 *v_ptr;
814		uint32_t rc;
815
816		n = dm_block_data(ic->overflow_leaf);
817		index = lower_bound(n, b);
818		if (contains_key(n, b, index)) {
819			v_ptr = value_ptr(n, index);
820			rc = le32_to_cpu(*v_ptr);
821			*old_rc = rc;
822
823			if (rc > 3) {
824				rc--;
825				*v_ptr = cpu_to_le32(rc);
826				return 0;
827			} else {
828				return __sm_ll_del_overflow(ll, b, ic);
829			}
830
831		}
832	}
833
834	return __sm_ll_dec_overflow(ll, b, ic, old_rc);
835}
836
837/*
838 * Loops round incrementing entries in a single bitmap.
839 */
840static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
841				   uint32_t bit, uint32_t bit_end,
842				   struct inc_context *ic,
843				   int32_t *nr_allocations, dm_block_t *new_b)
844{
845	int r;
846	uint32_t old;
847
848	for (; bit != bit_end; bit++, b++) {
849		/*
850		 * We only need to drop the bitmap if we need to find a new btree
851		 * leaf for the overflow.  So if it was dropped last iteration,
852		 * we now re-get it.
853		 */
854		r = ensure_bitmap(ll, ic);
855		if (r)
856			return r;
857
858		old = sm_lookup_bitmap(ic->bitmap, bit);
859		switch (old) {
860		case 0:
861			DMERR("unable to decrement block");
862			return -EINVAL;
863
864		case 1:
865			/* dec bitmap */
866			sm_set_bitmap(ic->bitmap, bit, 0);
867			(*nr_allocations)--;
868			ll->nr_allocated--;
869			le32_add_cpu(&ic->ie_disk.nr_free, 1);
870			ic->ie_disk.none_free_before =
871				cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
872			break;
873
874		case 2:
875			/* dec bitmap and insert into overflow */
876			sm_set_bitmap(ic->bitmap, bit, 1);
877			break;
878
879		case 3:
880			r = sm_ll_dec_overflow(ll, b, ic, &old);
881			if (r < 0)
882				return r;
883
884			if (old == 3) {
885				r = ensure_bitmap(ll, ic);
886				if (r)
887					return r;
888
889				sm_set_bitmap(ic->bitmap, bit, 2);
890			}
891			break;
892		}
893	}
894
895	*new_b = b;
896	return 0;
897}
898
899static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
900		       int32_t *nr_allocations, dm_block_t *new_b)
901{
902	int r;
903	uint32_t bit, bit_end;
904	struct inc_context ic;
905	dm_block_t index = b;
906
907	init_inc_context(&ic);
908
909	bit = do_div(index, ll->entries_per_block);
910	r = ll->load_ie(ll, index, &ic.ie_disk);
911	if (r < 0)
912		return r;
913
914	r = shadow_bitmap(ll, &ic);
915	if (r)
916		return r;
917
918	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
919	r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
920	exit_inc_context(ll, &ic);
921
922	if (r)
923		return r;
924
925	return ll->save_ie(ll, index, &ic.ie_disk);
926}
927
928int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
929	      int32_t *nr_allocations)
930{
931	*nr_allocations = 0;
932	while (b != e) {
933		int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
934
935		if (r)
936			return r;
937	}
938
939	return 0;
940}
941
942/*----------------------------------------------------------------*/
943
944int sm_ll_commit(struct ll_disk *ll)
945{
946	int r = 0;
947
948	if (ll->bitmap_index_changed) {
949		r = ll->commit(ll);
950		if (!r)
951			ll->bitmap_index_changed = false;
952	}
953
954	return r;
955}
956
957/*----------------------------------------------------------------*/
958
959static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
960			       struct disk_index_entry *ie)
961{
962	memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
963	return 0;
964}
965
966static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
967			       struct disk_index_entry *ie)
968{
969	ll->bitmap_index_changed = true;
970	memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
971	return 0;
972}
973
974static int metadata_ll_init_index(struct ll_disk *ll)
975{
976	int r;
977	struct dm_block *b;
978
979	r = dm_tm_new_block(ll->tm, &index_validator, &b);
980	if (r < 0)
981		return r;
982
983	ll->bitmap_root = dm_block_location(b);
984
985	dm_tm_unlock(ll->tm, b);
986
987	return 0;
988}
989
990static int metadata_ll_open(struct ll_disk *ll)
991{
992	int r;
993	struct dm_block *block;
994
995	r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
996			    &index_validator, &block);
997	if (r)
998		return r;
999
1000	memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
1001	dm_tm_unlock(ll->tm, block);
1002
1003	return 0;
1004}
1005
1006static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
1007{
1008	return MAX_METADATA_BITMAPS;
1009}
1010
1011static int metadata_ll_commit(struct ll_disk *ll)
1012{
1013	int r, inc;
1014	struct dm_block *b;
1015
1016	r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1017	if (r)
1018		return r;
1019
1020	memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1021	ll->bitmap_root = dm_block_location(b);
1022
1023	dm_tm_unlock(ll->tm, b);
1024
1025	return 0;
1026}
1027
1028int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1029{
1030	int r;
1031
1032	r = sm_ll_init(ll, tm);
1033	if (r < 0)
1034		return r;
1035
1036	ll->load_ie = metadata_ll_load_ie;
1037	ll->save_ie = metadata_ll_save_ie;
1038	ll->init_index = metadata_ll_init_index;
1039	ll->open_index = metadata_ll_open;
1040	ll->max_entries = metadata_ll_max_entries;
1041	ll->commit = metadata_ll_commit;
1042
1043	ll->nr_blocks = 0;
1044	ll->nr_allocated = 0;
1045
1046	r = ll->init_index(ll);
1047	if (r < 0)
1048		return r;
1049
1050	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1051	if (r < 0)
1052		return r;
1053
1054	return 0;
1055}
1056
1057int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1058			void *root_le, size_t len)
1059{
1060	int r;
1061	struct disk_sm_root smr;
1062
1063	if (len < sizeof(struct disk_sm_root)) {
1064		DMERR("sm_metadata root too small");
1065		return -ENOMEM;
1066	}
1067
1068	/*
1069	 * We don't know the alignment of the root_le buffer, so need to
1070	 * copy into a new structure.
1071	 */
1072	memcpy(&smr, root_le, sizeof(smr));
1073
1074	r = sm_ll_init(ll, tm);
1075	if (r < 0)
1076		return r;
1077
1078	ll->load_ie = metadata_ll_load_ie;
1079	ll->save_ie = metadata_ll_save_ie;
1080	ll->init_index = metadata_ll_init_index;
1081	ll->open_index = metadata_ll_open;
1082	ll->max_entries = metadata_ll_max_entries;
1083	ll->commit = metadata_ll_commit;
1084
1085	ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1086	ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1087	ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1088	ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
1089
1090	return ll->open_index(ll);
1091}
1092
1093/*----------------------------------------------------------------*/
1094
1095static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
1096{
1097	iec->dirty = false;
1098	__dm_bless_for_disk(iec->ie);
1099	return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1100			       &iec->index, &iec->ie, &ll->bitmap_root);
1101}
1102
1103static inline unsigned int hash_index(dm_block_t index)
1104{
1105	return dm_hash_block(index, IE_CACHE_MASK);
1106}
1107
1108static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1109			   struct disk_index_entry *ie)
1110{
1111	int r;
1112	unsigned int h = hash_index(index);
1113	struct ie_cache *iec = ll->ie_cache + h;
1114
1115	if (iec->valid) {
1116		if (iec->index == index) {
1117			memcpy(ie, &iec->ie, sizeof(*ie));
1118			return 0;
1119		}
1120
1121		if (iec->dirty) {
1122			r = ie_cache_writeback(ll, iec);
1123			if (r)
1124				return r;
1125		}
1126	}
1127
1128	r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1129	if (!r) {
1130		iec->valid = true;
1131		iec->dirty = false;
1132		iec->index = index;
1133		memcpy(&iec->ie, ie, sizeof(*ie));
1134	}
1135
1136	return r;
1137}
1138
1139static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1140			   struct disk_index_entry *ie)
1141{
1142	int r;
1143	unsigned int h = hash_index(index);
1144	struct ie_cache *iec = ll->ie_cache + h;
1145
1146	ll->bitmap_index_changed = true;
1147	if (iec->valid) {
1148		if (iec->index == index) {
1149			memcpy(&iec->ie, ie, sizeof(*ie));
1150			iec->dirty = true;
1151			return 0;
1152		}
1153
1154		if (iec->dirty) {
1155			r = ie_cache_writeback(ll, iec);
1156			if (r)
1157				return r;
1158		}
1159	}
1160
1161	iec->valid = true;
1162	iec->dirty = true;
1163	iec->index = index;
1164	memcpy(&iec->ie, ie, sizeof(*ie));
1165	return 0;
1166}
1167
1168static int disk_ll_init_index(struct ll_disk *ll)
1169{
1170	unsigned int i;
1171
1172	for (i = 0; i < IE_CACHE_SIZE; i++) {
1173		struct ie_cache *iec = ll->ie_cache + i;
1174
1175		iec->valid = false;
1176		iec->dirty = false;
1177	}
1178	return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1179}
1180
1181static int disk_ll_open(struct ll_disk *ll)
1182{
1183	return 0;
1184}
1185
1186static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1187{
1188	return -1ULL;
1189}
1190
1191static int disk_ll_commit(struct ll_disk *ll)
1192{
1193	int r = 0;
1194	unsigned int i;
1195
1196	for (i = 0; i < IE_CACHE_SIZE; i++) {
1197		struct ie_cache *iec = ll->ie_cache + i;
1198
1199		if (iec->valid && iec->dirty)
1200			r = ie_cache_writeback(ll, iec);
1201	}
1202
1203	return r;
1204}
1205
1206int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1207{
1208	int r;
1209
1210	r = sm_ll_init(ll, tm);
1211	if (r < 0)
1212		return r;
1213
1214	ll->load_ie = disk_ll_load_ie;
1215	ll->save_ie = disk_ll_save_ie;
1216	ll->init_index = disk_ll_init_index;
1217	ll->open_index = disk_ll_open;
1218	ll->max_entries = disk_ll_max_entries;
1219	ll->commit = disk_ll_commit;
1220
1221	ll->nr_blocks = 0;
1222	ll->nr_allocated = 0;
1223
1224	r = ll->init_index(ll);
1225	if (r < 0)
1226		return r;
1227
1228	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1229	if (r < 0)
1230		return r;
1231
1232	return 0;
1233}
1234
1235int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1236		    void *root_le, size_t len)
1237{
1238	int r;
1239	struct disk_sm_root *smr = root_le;
1240
1241	if (len < sizeof(struct disk_sm_root)) {
1242		DMERR("sm_metadata root too small");
1243		return -ENOMEM;
1244	}
1245
1246	r = sm_ll_init(ll, tm);
1247	if (r < 0)
1248		return r;
1249
1250	ll->load_ie = disk_ll_load_ie;
1251	ll->save_ie = disk_ll_save_ie;
1252	ll->init_index = disk_ll_init_index;
1253	ll->open_index = disk_ll_open;
1254	ll->max_entries = disk_ll_max_entries;
1255	ll->commit = disk_ll_commit;
1256
1257	ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1258	ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1259	ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1260	ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1261
1262	return ll->open_index(ll);
1263}
1264
1265/*----------------------------------------------------------------*/
1266