1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 Red Hat, Inc.
4 *
5 * This file is released under the GPL.
6 */
7
8#include "dm-array.h"
9#include "dm-space-map.h"
10#include "dm-transaction-manager.h"
11
12#include <linux/export.h>
13#include <linux/device-mapper.h>
14
15#define DM_MSG_PREFIX "array"
16
17/*----------------------------------------------------------------*/
18
19/*
20 * The array is implemented as a fully populated btree, which points to
21 * blocks that contain the packed values.  This is more space efficient
22 * than just using a btree since we don't store 1 key per value.
23 */
24struct array_block {
25	__le32 csum;
26	__le32 max_entries;
27	__le32 nr_entries;
28	__le32 value_size;
29	__le64 blocknr; /* Block this node is supposed to live in. */
30} __packed;
31
32/*----------------------------------------------------------------*/
33
34/*
35 * Validator methods.  As usual we calculate a checksum, and also write the
36 * block location into the header (paranoia about ssds remapping areas by
37 * mistake).
38 */
39#define CSUM_XOR 595846735
40
41static void array_block_prepare_for_write(struct dm_block_validator *v,
42					  struct dm_block *b,
43					  size_t size_of_block)
44{
45	struct array_block *bh_le = dm_block_data(b);
46
47	bh_le->blocknr = cpu_to_le64(dm_block_location(b));
48	bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
49						 size_of_block - sizeof(__le32),
50						 CSUM_XOR));
51}
52
53static int array_block_check(struct dm_block_validator *v,
54			     struct dm_block *b,
55			     size_t size_of_block)
56{
57	struct array_block *bh_le = dm_block_data(b);
58	__le32 csum_disk;
59
60	if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) {
61		DMERR_LIMIT("%s failed: blocknr %llu != wanted %llu", __func__,
62			    (unsigned long long) le64_to_cpu(bh_le->blocknr),
63			    (unsigned long long) dm_block_location(b));
64		return -ENOTBLK;
65	}
66
67	csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries,
68					       size_of_block - sizeof(__le32),
69					       CSUM_XOR));
70	if (csum_disk != bh_le->csum) {
71		DMERR_LIMIT("%s failed: csum %u != wanted %u", __func__,
72			    (unsigned int) le32_to_cpu(csum_disk),
73			    (unsigned int) le32_to_cpu(bh_le->csum));
74		return -EILSEQ;
75	}
76
77	return 0;
78}
79
80static struct dm_block_validator array_validator = {
81	.name = "array",
82	.prepare_for_write = array_block_prepare_for_write,
83	.check = array_block_check
84};
85
86/*----------------------------------------------------------------*/
87
88/*
89 * Functions for manipulating the array blocks.
90 */
91
92/*
93 * Returns a pointer to a value within an array block.
94 *
95 * index - The index into _this_ specific block.
96 */
97static void *element_at(struct dm_array_info *info, struct array_block *ab,
98			unsigned int index)
99{
100	unsigned char *entry = (unsigned char *) (ab + 1);
101
102	entry += index * info->value_type.size;
103
104	return entry;
105}
106
107/*
108 * Utility function that calls one of the value_type methods on every value
109 * in an array block.
110 */
111static void on_entries(struct dm_array_info *info, struct array_block *ab,
112		       void (*fn)(void *, const void *, unsigned int))
113{
114	unsigned int nr_entries = le32_to_cpu(ab->nr_entries);
115
116	fn(info->value_type.context, element_at(info, ab, 0), nr_entries);
117}
118
119/*
120 * Increment every value in an array block.
121 */
122static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab)
123{
124	struct dm_btree_value_type *vt = &info->value_type;
125
126	if (vt->inc)
127		on_entries(info, ab, vt->inc);
128}
129
130/*
131 * Decrement every value in an array block.
132 */
133static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab)
134{
135	struct dm_btree_value_type *vt = &info->value_type;
136
137	if (vt->dec)
138		on_entries(info, ab, vt->dec);
139}
140
141/*
142 * Each array block can hold this many values.
143 */
144static uint32_t calc_max_entries(size_t value_size, size_t size_of_block)
145{
146	return (size_of_block - sizeof(struct array_block)) / value_size;
147}
148
149/*
150 * Allocate a new array block.  The caller will need to unlock block.
151 */
152static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
153			uint32_t max_entries,
154			struct dm_block **block, struct array_block **ab)
155{
156	int r;
157
158	r = dm_tm_new_block(info->btree_info.tm, &array_validator, block);
159	if (r)
160		return r;
161
162	(*ab) = dm_block_data(*block);
163	(*ab)->max_entries = cpu_to_le32(max_entries);
164	(*ab)->nr_entries = cpu_to_le32(0);
165	(*ab)->value_size = cpu_to_le32(info->value_type.size);
166
167	return 0;
168}
169
170/*
171 * Pad an array block out with a particular value.  Every instance will
172 * cause an increment of the value_type.  new_nr must always be more than
173 * the current number of entries.
174 */
175static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
176			const void *value, unsigned int new_nr)
177{
178	uint32_t nr_entries, delta, i;
179	struct dm_btree_value_type *vt = &info->value_type;
180
181	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
182	BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
183
184	nr_entries = le32_to_cpu(ab->nr_entries);
185	delta = new_nr - nr_entries;
186	if (vt->inc)
187		vt->inc(vt->context, value, delta);
188	for (i = nr_entries; i < new_nr; i++)
189		memcpy(element_at(info, ab, i), value, vt->size);
190	ab->nr_entries = cpu_to_le32(new_nr);
191}
192
193/*
194 * Remove some entries from the back of an array block.  Every value
195 * removed will be decremented.  new_nr must be <= the current number of
196 * entries.
197 */
198static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
199			unsigned int new_nr)
200{
201	uint32_t nr_entries, delta;
202	struct dm_btree_value_type *vt = &info->value_type;
203
204	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
205	BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
206
207	nr_entries = le32_to_cpu(ab->nr_entries);
208	delta = nr_entries - new_nr;
209	if (vt->dec)
210		vt->dec(vt->context, element_at(info, ab, new_nr - 1), delta);
211	ab->nr_entries = cpu_to_le32(new_nr);
212}
213
214/*
215 * Read locks a block, and coerces it to an array block.  The caller must
216 * unlock 'block' when finished.
217 */
218static int get_ablock(struct dm_array_info *info, dm_block_t b,
219		      struct dm_block **block, struct array_block **ab)
220{
221	int r;
222
223	r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block);
224	if (r)
225		return r;
226
227	*ab = dm_block_data(*block);
228	return 0;
229}
230
231/*
232 * Unlocks an array block.
233 */
234static void unlock_ablock(struct dm_array_info *info, struct dm_block *block)
235{
236	dm_tm_unlock(info->btree_info.tm, block);
237}
238
239/*----------------------------------------------------------------*/
240
241/*
242 * Btree manipulation.
243 */
244
245/*
246 * Looks up an array block in the btree, and then read locks it.
247 *
248 * index is the index of the index of the array_block, (ie. the array index
249 * / max_entries).
250 */
251static int lookup_ablock(struct dm_array_info *info, dm_block_t root,
252			 unsigned int index, struct dm_block **block,
253			 struct array_block **ab)
254{
255	int r;
256	uint64_t key = index;
257	__le64 block_le;
258
259	r = dm_btree_lookup(&info->btree_info, root, &key, &block_le);
260	if (r)
261		return r;
262
263	return get_ablock(info, le64_to_cpu(block_le), block, ab);
264}
265
266/*
267 * Insert an array block into the btree.  The block is _not_ unlocked.
268 */
269static int insert_ablock(struct dm_array_info *info, uint64_t index,
270			 struct dm_block *block, dm_block_t *root)
271{
272	__le64 block_le = cpu_to_le64(dm_block_location(block));
273
274	__dm_bless_for_disk(block_le);
275	return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root);
276}
277
278/*----------------------------------------------------------------*/
279
280static int __shadow_ablock(struct dm_array_info *info, dm_block_t b,
281			   struct dm_block **block, struct array_block **ab)
282{
283	int inc;
284	int r = dm_tm_shadow_block(info->btree_info.tm, b,
285				   &array_validator, block, &inc);
286	if (r)
287		return r;
288
289	*ab = dm_block_data(*block);
290	if (inc)
291		inc_ablock_entries(info, *ab);
292
293	return 0;
294}
295
296/*
297 * The shadow op will often be a noop.  Only insert if it really
298 * copied data.
299 */
300static int __reinsert_ablock(struct dm_array_info *info, unsigned int index,
301			     struct dm_block *block, dm_block_t b,
302			     dm_block_t *root)
303{
304	int r = 0;
305
306	if (dm_block_location(block) != b) {
307		/*
308		 * dm_tm_shadow_block will have already decremented the old
309		 * block, but it is still referenced by the btree.  We
310		 * increment to stop the insert decrementing it below zero
311		 * when overwriting the old value.
312		 */
313		dm_tm_inc(info->btree_info.tm, b);
314		r = insert_ablock(info, index, block, root);
315	}
316
317	return r;
318}
319
320/*
321 * Looks up an array block in the btree.  Then shadows it, and updates the
322 * btree to point to this new shadow.  'root' is an input/output parameter
323 * for both the current root block, and the new one.
324 */
325static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
326			 unsigned int index, struct dm_block **block,
327			 struct array_block **ab)
328{
329	int r;
330	uint64_t key = index;
331	dm_block_t b;
332	__le64 block_le;
333
334	r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le);
335	if (r)
336		return r;
337	b = le64_to_cpu(block_le);
338
339	r = __shadow_ablock(info, b, block, ab);
340	if (r)
341		return r;
342
343	return __reinsert_ablock(info, index, *block, b, root);
344}
345
346/*
347 * Allocate an new array block, and fill it with some values.
348 */
349static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block,
350			     uint32_t max_entries,
351			     unsigned int block_index, uint32_t nr,
352			     const void *value, dm_block_t *root)
353{
354	int r;
355	struct dm_block *block;
356	struct array_block *ab;
357
358	r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
359	if (r)
360		return r;
361
362	fill_ablock(info, ab, value, nr);
363	r = insert_ablock(info, block_index, block, root);
364	unlock_ablock(info, block);
365
366	return r;
367}
368
369static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block,
370			       unsigned int begin_block, unsigned int end_block,
371			       unsigned int max_entries, const void *value,
372			       dm_block_t *root)
373{
374	int r = 0;
375
376	for (; !r && begin_block != end_block; begin_block++)
377		r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root);
378
379	return r;
380}
381
382/*
383 * There are a bunch of functions involved with resizing an array.  This
384 * structure holds information that commonly needed by them.  Purely here
385 * to reduce parameter count.
386 */
387struct resize {
388	/*
389	 * Describes the array.
390	 */
391	struct dm_array_info *info;
392
393	/*
394	 * The current root of the array.  This gets updated.
395	 */
396	dm_block_t root;
397
398	/*
399	 * Metadata block size.  Used to calculate the nr entries in an
400	 * array block.
401	 */
402	size_t size_of_block;
403
404	/*
405	 * Maximum nr entries in an array block.
406	 */
407	unsigned int max_entries;
408
409	/*
410	 * nr of completely full blocks in the array.
411	 *
412	 * 'old' refers to before the resize, 'new' after.
413	 */
414	unsigned int old_nr_full_blocks, new_nr_full_blocks;
415
416	/*
417	 * Number of entries in the final block.  0 iff only full blocks in
418	 * the array.
419	 */
420	unsigned int old_nr_entries_in_last_block, new_nr_entries_in_last_block;
421
422	/*
423	 * The default value used when growing the array.
424	 */
425	const void *value;
426};
427
428/*
429 * Removes a consecutive set of array blocks from the btree.  The values
430 * in block are decremented as a side effect of the btree remove.
431 *
432 * begin_index - the index of the first array block to remove.
433 * end_index - the one-past-the-end value.  ie. this block is not removed.
434 */
435static int drop_blocks(struct resize *resize, unsigned int begin_index,
436		       unsigned int end_index)
437{
438	int r;
439
440	while (begin_index != end_index) {
441		uint64_t key = begin_index++;
442
443		r = dm_btree_remove(&resize->info->btree_info, resize->root,
444				    &key, &resize->root);
445		if (r)
446			return r;
447	}
448
449	return 0;
450}
451
452/*
453 * Calculates how many blocks are needed for the array.
454 */
455static unsigned int total_nr_blocks_needed(unsigned int nr_full_blocks,
456				       unsigned int nr_entries_in_last_block)
457{
458	return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0);
459}
460
461/*
462 * Shrink an array.
463 */
464static int shrink(struct resize *resize)
465{
466	int r;
467	unsigned int begin, end;
468	struct dm_block *block;
469	struct array_block *ab;
470
471	/*
472	 * Lose some blocks from the back?
473	 */
474	if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) {
475		begin = total_nr_blocks_needed(resize->new_nr_full_blocks,
476					       resize->new_nr_entries_in_last_block);
477		end = total_nr_blocks_needed(resize->old_nr_full_blocks,
478					     resize->old_nr_entries_in_last_block);
479
480		r = drop_blocks(resize, begin, end);
481		if (r)
482			return r;
483	}
484
485	/*
486	 * Trim the new tail block
487	 */
488	if (resize->new_nr_entries_in_last_block) {
489		r = shadow_ablock(resize->info, &resize->root,
490				  resize->new_nr_full_blocks, &block, &ab);
491		if (r)
492			return r;
493
494		trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block);
495		unlock_ablock(resize->info, block);
496	}
497
498	return 0;
499}
500
501/*
502 * Grow an array.
503 */
504static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries)
505{
506	int r;
507	struct dm_block *block;
508	struct array_block *ab;
509
510	r = shadow_ablock(resize->info, &resize->root,
511			  resize->old_nr_full_blocks, &block, &ab);
512	if (r)
513		return r;
514
515	fill_ablock(resize->info, ab, resize->value, new_nr_entries);
516	unlock_ablock(resize->info, block);
517
518	return r;
519}
520
521static int grow_add_tail_block(struct resize *resize)
522{
523	return insert_new_ablock(resize->info, resize->size_of_block,
524				 resize->max_entries,
525				 resize->new_nr_full_blocks,
526				 resize->new_nr_entries_in_last_block,
527				 resize->value, &resize->root);
528}
529
530static int grow_needs_more_blocks(struct resize *resize)
531{
532	int r;
533	unsigned int old_nr_blocks = resize->old_nr_full_blocks;
534
535	if (resize->old_nr_entries_in_last_block > 0) {
536		old_nr_blocks++;
537
538		r = grow_extend_tail_block(resize, resize->max_entries);
539		if (r)
540			return r;
541	}
542
543	r = insert_full_ablocks(resize->info, resize->size_of_block,
544				old_nr_blocks,
545				resize->new_nr_full_blocks,
546				resize->max_entries, resize->value,
547				&resize->root);
548	if (r)
549		return r;
550
551	if (resize->new_nr_entries_in_last_block)
552		r = grow_add_tail_block(resize);
553
554	return r;
555}
556
557static int grow(struct resize *resize)
558{
559	if (resize->new_nr_full_blocks > resize->old_nr_full_blocks)
560		return grow_needs_more_blocks(resize);
561
562	else if (resize->old_nr_entries_in_last_block)
563		return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block);
564
565	else
566		return grow_add_tail_block(resize);
567}
568
569/*----------------------------------------------------------------*/
570
571/*
572 * These are the value_type functions for the btree elements, which point
573 * to array blocks.
574 */
575static void block_inc(void *context, const void *value, unsigned int count)
576{
577	const __le64 *block_le = value;
578	struct dm_array_info *info = context;
579	unsigned int i;
580
581	for (i = 0; i < count; i++, block_le++)
582		dm_tm_inc(info->btree_info.tm, le64_to_cpu(*block_le));
583}
584
585static void __block_dec(void *context, const void *value)
586{
587	int r;
588	uint64_t b;
589	__le64 block_le;
590	uint32_t ref_count;
591	struct dm_block *block;
592	struct array_block *ab;
593	struct dm_array_info *info = context;
594
595	memcpy(&block_le, value, sizeof(block_le));
596	b = le64_to_cpu(block_le);
597
598	r = dm_tm_ref(info->btree_info.tm, b, &ref_count);
599	if (r) {
600		DMERR_LIMIT("couldn't get reference count for block %llu",
601			    (unsigned long long) b);
602		return;
603	}
604
605	if (ref_count == 1) {
606		/*
607		 * We're about to drop the last reference to this ablock.
608		 * So we need to decrement the ref count of the contents.
609		 */
610		r = get_ablock(info, b, &block, &ab);
611		if (r) {
612			DMERR_LIMIT("couldn't get array block %llu",
613				    (unsigned long long) b);
614			return;
615		}
616
617		dec_ablock_entries(info, ab);
618		unlock_ablock(info, block);
619	}
620
621	dm_tm_dec(info->btree_info.tm, b);
622}
623
624static void block_dec(void *context, const void *value, unsigned int count)
625{
626	unsigned int i;
627
628	for (i = 0; i < count; i++, value += sizeof(__le64))
629		__block_dec(context, value);
630}
631
632static int block_equal(void *context, const void *value1, const void *value2)
633{
634	return !memcmp(value1, value2, sizeof(__le64));
635}
636
637/*----------------------------------------------------------------*/
638
639void dm_array_info_init(struct dm_array_info *info,
640			struct dm_transaction_manager *tm,
641			struct dm_btree_value_type *vt)
642{
643	struct dm_btree_value_type *bvt = &info->btree_info.value_type;
644
645	memcpy(&info->value_type, vt, sizeof(info->value_type));
646	info->btree_info.tm = tm;
647	info->btree_info.levels = 1;
648
649	bvt->context = info;
650	bvt->size = sizeof(__le64);
651	bvt->inc = block_inc;
652	bvt->dec = block_dec;
653	bvt->equal = block_equal;
654}
655EXPORT_SYMBOL_GPL(dm_array_info_init);
656
657int dm_array_empty(struct dm_array_info *info, dm_block_t *root)
658{
659	return dm_btree_empty(&info->btree_info, root);
660}
661EXPORT_SYMBOL_GPL(dm_array_empty);
662
663static int array_resize(struct dm_array_info *info, dm_block_t root,
664			uint32_t old_size, uint32_t new_size,
665			const void *value, dm_block_t *new_root)
666{
667	int r;
668	struct resize resize;
669
670	if (old_size == new_size) {
671		*new_root = root;
672		return 0;
673	}
674
675	resize.info = info;
676	resize.root = root;
677	resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
678	resize.max_entries = calc_max_entries(info->value_type.size,
679					      resize.size_of_block);
680
681	resize.old_nr_full_blocks = old_size / resize.max_entries;
682	resize.old_nr_entries_in_last_block = old_size % resize.max_entries;
683	resize.new_nr_full_blocks = new_size / resize.max_entries;
684	resize.new_nr_entries_in_last_block = new_size % resize.max_entries;
685	resize.value = value;
686
687	r = ((new_size > old_size) ? grow : shrink)(&resize);
688	if (r)
689		return r;
690
691	*new_root = resize.root;
692	return 0;
693}
694
695int dm_array_resize(struct dm_array_info *info, dm_block_t root,
696		    uint32_t old_size, uint32_t new_size,
697		    const void *value, dm_block_t *new_root)
698	__dm_written_to_disk(value)
699{
700	int r = array_resize(info, root, old_size, new_size, value, new_root);
701
702	__dm_unbless_for_disk(value);
703	return r;
704}
705EXPORT_SYMBOL_GPL(dm_array_resize);
706
707static int populate_ablock_with_values(struct dm_array_info *info, struct array_block *ab,
708				       value_fn fn, void *context,
709				       unsigned int base, unsigned int new_nr)
710{
711	int r;
712	unsigned int i;
713	struct dm_btree_value_type *vt = &info->value_type;
714
715	BUG_ON(le32_to_cpu(ab->nr_entries));
716	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
717
718	for (i = 0; i < new_nr; i++) {
719		r = fn(base + i, element_at(info, ab, i), context);
720		if (r)
721			return r;
722
723		if (vt->inc)
724			vt->inc(vt->context, element_at(info, ab, i), 1);
725	}
726
727	ab->nr_entries = cpu_to_le32(new_nr);
728	return 0;
729}
730
731int dm_array_new(struct dm_array_info *info, dm_block_t *root,
732		 uint32_t size, value_fn fn, void *context)
733{
734	int r;
735	struct dm_block *block;
736	struct array_block *ab;
737	unsigned int block_index, end_block, size_of_block, max_entries;
738
739	r = dm_array_empty(info, root);
740	if (r)
741		return r;
742
743	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
744	max_entries = calc_max_entries(info->value_type.size, size_of_block);
745	end_block = dm_div_up(size, max_entries);
746
747	for (block_index = 0; block_index != end_block; block_index++) {
748		r = alloc_ablock(info, size_of_block, max_entries, &block, &ab);
749		if (r)
750			break;
751
752		r = populate_ablock_with_values(info, ab, fn, context,
753						block_index * max_entries,
754						min(max_entries, size));
755		if (r) {
756			unlock_ablock(info, block);
757			break;
758		}
759
760		r = insert_ablock(info, block_index, block, root);
761		unlock_ablock(info, block);
762		if (r)
763			break;
764
765		size -= max_entries;
766	}
767
768	return r;
769}
770EXPORT_SYMBOL_GPL(dm_array_new);
771
772int dm_array_del(struct dm_array_info *info, dm_block_t root)
773{
774	return dm_btree_del(&info->btree_info, root);
775}
776EXPORT_SYMBOL_GPL(dm_array_del);
777
778int dm_array_get_value(struct dm_array_info *info, dm_block_t root,
779		       uint32_t index, void *value_le)
780{
781	int r;
782	struct dm_block *block;
783	struct array_block *ab;
784	size_t size_of_block;
785	unsigned int entry, max_entries;
786
787	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
788	max_entries = calc_max_entries(info->value_type.size, size_of_block);
789
790	r = lookup_ablock(info, root, index / max_entries, &block, &ab);
791	if (r)
792		return r;
793
794	entry = index % max_entries;
795	if (entry >= le32_to_cpu(ab->nr_entries))
796		r = -ENODATA;
797	else
798		memcpy(value_le, element_at(info, ab, entry),
799		       info->value_type.size);
800
801	unlock_ablock(info, block);
802	return r;
803}
804EXPORT_SYMBOL_GPL(dm_array_get_value);
805
806static int array_set_value(struct dm_array_info *info, dm_block_t root,
807			   uint32_t index, const void *value, dm_block_t *new_root)
808{
809	int r;
810	struct dm_block *block;
811	struct array_block *ab;
812	size_t size_of_block;
813	unsigned int max_entries;
814	unsigned int entry;
815	void *old_value;
816	struct dm_btree_value_type *vt = &info->value_type;
817
818	size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm));
819	max_entries = calc_max_entries(info->value_type.size, size_of_block);
820
821	r = shadow_ablock(info, &root, index / max_entries, &block, &ab);
822	if (r)
823		return r;
824	*new_root = root;
825
826	entry = index % max_entries;
827	if (entry >= le32_to_cpu(ab->nr_entries)) {
828		r = -ENODATA;
829		goto out;
830	}
831
832	old_value = element_at(info, ab, entry);
833	if (vt->dec &&
834	    (!vt->equal || !vt->equal(vt->context, old_value, value))) {
835		vt->dec(vt->context, old_value, 1);
836		if (vt->inc)
837			vt->inc(vt->context, value, 1);
838	}
839
840	memcpy(old_value, value, info->value_type.size);
841
842out:
843	unlock_ablock(info, block);
844	return r;
845}
846
847int dm_array_set_value(struct dm_array_info *info, dm_block_t root,
848		 uint32_t index, const void *value, dm_block_t *new_root)
849	__dm_written_to_disk(value)
850{
851	int r;
852
853	r = array_set_value(info, root, index, value, new_root);
854	__dm_unbless_for_disk(value);
855	return r;
856}
857EXPORT_SYMBOL_GPL(dm_array_set_value);
858
859struct walk_info {
860	struct dm_array_info *info;
861	int (*fn)(void *context, uint64_t key, void *leaf);
862	void *context;
863};
864
865static int walk_ablock(void *context, uint64_t *keys, void *leaf)
866{
867	struct walk_info *wi = context;
868
869	int r;
870	unsigned int i;
871	__le64 block_le;
872	unsigned int nr_entries, max_entries;
873	struct dm_block *block;
874	struct array_block *ab;
875
876	memcpy(&block_le, leaf, sizeof(block_le));
877	r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab);
878	if (r)
879		return r;
880
881	max_entries = le32_to_cpu(ab->max_entries);
882	nr_entries = le32_to_cpu(ab->nr_entries);
883	for (i = 0; i < nr_entries; i++) {
884		r = wi->fn(wi->context, keys[0] * max_entries + i,
885			   element_at(wi->info, ab, i));
886
887		if (r)
888			break;
889	}
890
891	unlock_ablock(wi->info, block);
892	return r;
893}
894
895int dm_array_walk(struct dm_array_info *info, dm_block_t root,
896		  int (*fn)(void *, uint64_t key, void *leaf),
897		  void *context)
898{
899	struct walk_info wi;
900
901	wi.info = info;
902	wi.fn = fn;
903	wi.context = context;
904
905	return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi);
906}
907EXPORT_SYMBOL_GPL(dm_array_walk);
908
909/*----------------------------------------------------------------*/
910
911static int load_ablock(struct dm_array_cursor *c)
912{
913	int r;
914	__le64 value_le;
915	uint64_t key;
916
917	if (c->block)
918		unlock_ablock(c->info, c->block);
919
920	c->block = NULL;
921	c->ab = NULL;
922	c->index = 0;
923
924	r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le);
925	if (r) {
926		DMERR("dm_btree_cursor_get_value failed");
927		dm_btree_cursor_end(&c->cursor);
928
929	} else {
930		r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab);
931		if (r) {
932			DMERR("get_ablock failed");
933			dm_btree_cursor_end(&c->cursor);
934		}
935	}
936
937	return r;
938}
939
940int dm_array_cursor_begin(struct dm_array_info *info, dm_block_t root,
941			  struct dm_array_cursor *c)
942{
943	int r;
944
945	memset(c, 0, sizeof(*c));
946	c->info = info;
947	r = dm_btree_cursor_begin(&info->btree_info, root, true, &c->cursor);
948	if (r) {
949		DMERR("couldn't create btree cursor");
950		return r;
951	}
952
953	return load_ablock(c);
954}
955EXPORT_SYMBOL_GPL(dm_array_cursor_begin);
956
957void dm_array_cursor_end(struct dm_array_cursor *c)
958{
959	if (c->block) {
960		unlock_ablock(c->info, c->block);
961		dm_btree_cursor_end(&c->cursor);
962	}
963}
964EXPORT_SYMBOL_GPL(dm_array_cursor_end);
965
966int dm_array_cursor_next(struct dm_array_cursor *c)
967{
968	int r;
969
970	if (!c->block)
971		return -ENODATA;
972
973	c->index++;
974
975	if (c->index >= le32_to_cpu(c->ab->nr_entries)) {
976		r = dm_btree_cursor_next(&c->cursor);
977		if (r)
978			return r;
979
980		r = load_ablock(c);
981		if (r)
982			return r;
983	}
984
985	return 0;
986}
987EXPORT_SYMBOL_GPL(dm_array_cursor_next);
988
989int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count)
990{
991	int r;
992
993	do {
994		uint32_t remaining = le32_to_cpu(c->ab->nr_entries) - c->index;
995
996		if (count < remaining) {
997			c->index += count;
998			return 0;
999		}
1000
1001		count -= remaining;
1002		r = dm_array_cursor_next(c);
1003
1004	} while (!r);
1005
1006	return r;
1007}
1008EXPORT_SYMBOL_GPL(dm_array_cursor_skip);
1009
1010void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le)
1011{
1012	*value_le = element_at(c->info, c->ab, c->index);
1013}
1014EXPORT_SYMBOL_GPL(dm_array_cursor_get_value);
1015
1016/*----------------------------------------------------------------*/
1017