1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2015 Facebook.  All rights reserved.
4 */
5
6#include <linux/kernel.h>
7#include <linux/sched/mm.h>
8#include "messages.h"
9#include "ctree.h"
10#include "disk-io.h"
11#include "locking.h"
12#include "free-space-tree.h"
13#include "transaction.h"
14#include "block-group.h"
15#include "fs.h"
16#include "accessors.h"
17#include "extent-tree.h"
18#include "root-tree.h"
19
20static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
21					struct btrfs_block_group *block_group,
22					struct btrfs_path *path);
23
24static struct btrfs_root *btrfs_free_space_root(
25				struct btrfs_block_group *block_group)
26{
27	struct btrfs_key key = {
28		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
29		.type = BTRFS_ROOT_ITEM_KEY,
30		.offset = 0,
31	};
32
33	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
34		key.offset = block_group->global_root_id;
35	return btrfs_global_root(block_group->fs_info, &key);
36}
37
38void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
39{
40	u32 bitmap_range;
41	size_t bitmap_size;
42	u64 num_bitmaps, total_bitmap_size;
43
44	if (WARN_ON(cache->length == 0))
45		btrfs_warn(cache->fs_info, "block group %llu length is zero",
46			   cache->start);
47
48	/*
49	 * We convert to bitmaps when the disk space required for using extents
50	 * exceeds that required for using bitmaps.
51	 */
52	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
53	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
54	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
55	total_bitmap_size = num_bitmaps * bitmap_size;
56	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
57					    sizeof(struct btrfs_item));
58
59	/*
60	 * We allow for a small buffer between the high threshold and low
61	 * threshold to avoid thrashing back and forth between the two formats.
62	 */
63	if (cache->bitmap_high_thresh > 100)
64		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
65	else
66		cache->bitmap_low_thresh = 0;
67}
68
69static int add_new_free_space_info(struct btrfs_trans_handle *trans,
70				   struct btrfs_block_group *block_group,
71				   struct btrfs_path *path)
72{
73	struct btrfs_root *root = btrfs_free_space_root(block_group);
74	struct btrfs_free_space_info *info;
75	struct btrfs_key key;
76	struct extent_buffer *leaf;
77	int ret;
78
79	key.objectid = block_group->start;
80	key.type = BTRFS_FREE_SPACE_INFO_KEY;
81	key.offset = block_group->length;
82
83	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
84	if (ret)
85		goto out;
86
87	leaf = path->nodes[0];
88	info = btrfs_item_ptr(leaf, path->slots[0],
89			      struct btrfs_free_space_info);
90	btrfs_set_free_space_extent_count(leaf, info, 0);
91	btrfs_set_free_space_flags(leaf, info, 0);
92	btrfs_mark_buffer_dirty(trans, leaf);
93
94	ret = 0;
95out:
96	btrfs_release_path(path);
97	return ret;
98}
99
100EXPORT_FOR_TESTS
101struct btrfs_free_space_info *search_free_space_info(
102		struct btrfs_trans_handle *trans,
103		struct btrfs_block_group *block_group,
104		struct btrfs_path *path, int cow)
105{
106	struct btrfs_fs_info *fs_info = block_group->fs_info;
107	struct btrfs_root *root = btrfs_free_space_root(block_group);
108	struct btrfs_key key;
109	int ret;
110
111	key.objectid = block_group->start;
112	key.type = BTRFS_FREE_SPACE_INFO_KEY;
113	key.offset = block_group->length;
114
115	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
116	if (ret < 0)
117		return ERR_PTR(ret);
118	if (ret != 0) {
119		btrfs_warn(fs_info, "missing free space info for %llu",
120			   block_group->start);
121		ASSERT(0);
122		return ERR_PTR(-ENOENT);
123	}
124
125	return btrfs_item_ptr(path->nodes[0], path->slots[0],
126			      struct btrfs_free_space_info);
127}
128
129/*
130 * btrfs_search_slot() but we're looking for the greatest key less than the
131 * passed key.
132 */
133static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
134				  struct btrfs_root *root,
135				  struct btrfs_key *key, struct btrfs_path *p,
136				  int ins_len, int cow)
137{
138	int ret;
139
140	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
141	if (ret < 0)
142		return ret;
143
144	if (ret == 0) {
145		ASSERT(0);
146		return -EIO;
147	}
148
149	if (p->slots[0] == 0) {
150		ASSERT(0);
151		return -EIO;
152	}
153	p->slots[0]--;
154
155	return 0;
156}
157
158static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
159					 u64 size)
160{
161	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
162}
163
164static unsigned long *alloc_bitmap(u32 bitmap_size)
165{
166	unsigned long *ret;
167	unsigned int nofs_flag;
168	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
169
170	/*
171	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
172	 * into the filesystem as the free space bitmap can be modified in the
173	 * critical section of a transaction commit.
174	 *
175	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
176	 * know that recursion is unsafe.
177	 */
178	nofs_flag = memalloc_nofs_save();
179	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
180	memalloc_nofs_restore(nofs_flag);
181	return ret;
182}
183
184static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
185{
186	u8 *p = ((u8 *)map) + BIT_BYTE(start);
187	const unsigned int size = start + len;
188	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
189	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
190
191	while (len - bits_to_set >= 0) {
192		*p |= mask_to_set;
193		len -= bits_to_set;
194		bits_to_set = BITS_PER_BYTE;
195		mask_to_set = ~0;
196		p++;
197	}
198	if (len) {
199		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
200		*p |= mask_to_set;
201	}
202}
203
204EXPORT_FOR_TESTS
205int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
206				  struct btrfs_block_group *block_group,
207				  struct btrfs_path *path)
208{
209	struct btrfs_fs_info *fs_info = trans->fs_info;
210	struct btrfs_root *root = btrfs_free_space_root(block_group);
211	struct btrfs_free_space_info *info;
212	struct btrfs_key key, found_key;
213	struct extent_buffer *leaf;
214	unsigned long *bitmap;
215	char *bitmap_cursor;
216	u64 start, end;
217	u64 bitmap_range, i;
218	u32 bitmap_size, flags, expected_extent_count;
219	u32 extent_count = 0;
220	int done = 0, nr;
221	int ret;
222
223	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
224	bitmap = alloc_bitmap(bitmap_size);
225	if (!bitmap) {
226		ret = -ENOMEM;
227		goto out;
228	}
229
230	start = block_group->start;
231	end = block_group->start + block_group->length;
232
233	key.objectid = end - 1;
234	key.type = (u8)-1;
235	key.offset = (u64)-1;
236
237	while (!done) {
238		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
239		if (ret)
240			goto out;
241
242		leaf = path->nodes[0];
243		nr = 0;
244		path->slots[0]++;
245		while (path->slots[0] > 0) {
246			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
247
248			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
249				ASSERT(found_key.objectid == block_group->start);
250				ASSERT(found_key.offset == block_group->length);
251				done = 1;
252				break;
253			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
254				u64 first, last;
255
256				ASSERT(found_key.objectid >= start);
257				ASSERT(found_key.objectid < end);
258				ASSERT(found_key.objectid + found_key.offset <= end);
259
260				first = div_u64(found_key.objectid - start,
261						fs_info->sectorsize);
262				last = div_u64(found_key.objectid + found_key.offset - start,
263					       fs_info->sectorsize);
264				le_bitmap_set(bitmap, first, last - first);
265
266				extent_count++;
267				nr++;
268				path->slots[0]--;
269			} else {
270				ASSERT(0);
271			}
272		}
273
274		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
275		if (ret)
276			goto out;
277		btrfs_release_path(path);
278	}
279
280	info = search_free_space_info(trans, block_group, path, 1);
281	if (IS_ERR(info)) {
282		ret = PTR_ERR(info);
283		goto out;
284	}
285	leaf = path->nodes[0];
286	flags = btrfs_free_space_flags(leaf, info);
287	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
288	btrfs_set_free_space_flags(leaf, info, flags);
289	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
290	btrfs_mark_buffer_dirty(trans, leaf);
291	btrfs_release_path(path);
292
293	if (extent_count != expected_extent_count) {
294		btrfs_err(fs_info,
295			  "incorrect extent count for %llu; counted %u, expected %u",
296			  block_group->start, extent_count,
297			  expected_extent_count);
298		ASSERT(0);
299		ret = -EIO;
300		goto out;
301	}
302
303	bitmap_cursor = (char *)bitmap;
304	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
305	i = start;
306	while (i < end) {
307		unsigned long ptr;
308		u64 extent_size;
309		u32 data_size;
310
311		extent_size = min(end - i, bitmap_range);
312		data_size = free_space_bitmap_size(fs_info, extent_size);
313
314		key.objectid = i;
315		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
316		key.offset = extent_size;
317
318		ret = btrfs_insert_empty_item(trans, root, path, &key,
319					      data_size);
320		if (ret)
321			goto out;
322
323		leaf = path->nodes[0];
324		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
325		write_extent_buffer(leaf, bitmap_cursor, ptr,
326				    data_size);
327		btrfs_mark_buffer_dirty(trans, leaf);
328		btrfs_release_path(path);
329
330		i += extent_size;
331		bitmap_cursor += data_size;
332	}
333
334	ret = 0;
335out:
336	kvfree(bitmap);
337	if (ret)
338		btrfs_abort_transaction(trans, ret);
339	return ret;
340}
341
342EXPORT_FOR_TESTS
343int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
344				  struct btrfs_block_group *block_group,
345				  struct btrfs_path *path)
346{
347	struct btrfs_fs_info *fs_info = trans->fs_info;
348	struct btrfs_root *root = btrfs_free_space_root(block_group);
349	struct btrfs_free_space_info *info;
350	struct btrfs_key key, found_key;
351	struct extent_buffer *leaf;
352	unsigned long *bitmap;
353	u64 start, end;
354	u32 bitmap_size, flags, expected_extent_count;
355	unsigned long nrbits, start_bit, end_bit;
356	u32 extent_count = 0;
357	int done = 0, nr;
358	int ret;
359
360	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
361	bitmap = alloc_bitmap(bitmap_size);
362	if (!bitmap) {
363		ret = -ENOMEM;
364		goto out;
365	}
366
367	start = block_group->start;
368	end = block_group->start + block_group->length;
369
370	key.objectid = end - 1;
371	key.type = (u8)-1;
372	key.offset = (u64)-1;
373
374	while (!done) {
375		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
376		if (ret)
377			goto out;
378
379		leaf = path->nodes[0];
380		nr = 0;
381		path->slots[0]++;
382		while (path->slots[0] > 0) {
383			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
384
385			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
386				ASSERT(found_key.objectid == block_group->start);
387				ASSERT(found_key.offset == block_group->length);
388				done = 1;
389				break;
390			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
391				unsigned long ptr;
392				char *bitmap_cursor;
393				u32 bitmap_pos, data_size;
394
395				ASSERT(found_key.objectid >= start);
396				ASSERT(found_key.objectid < end);
397				ASSERT(found_key.objectid + found_key.offset <= end);
398
399				bitmap_pos = div_u64(found_key.objectid - start,
400						     fs_info->sectorsize *
401						     BITS_PER_BYTE);
402				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
403				data_size = free_space_bitmap_size(fs_info,
404								found_key.offset);
405
406				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
407				read_extent_buffer(leaf, bitmap_cursor, ptr,
408						   data_size);
409
410				nr++;
411				path->slots[0]--;
412			} else {
413				ASSERT(0);
414			}
415		}
416
417		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
418		if (ret)
419			goto out;
420		btrfs_release_path(path);
421	}
422
423	info = search_free_space_info(trans, block_group, path, 1);
424	if (IS_ERR(info)) {
425		ret = PTR_ERR(info);
426		goto out;
427	}
428	leaf = path->nodes[0];
429	flags = btrfs_free_space_flags(leaf, info);
430	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
431	btrfs_set_free_space_flags(leaf, info, flags);
432	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
433	btrfs_mark_buffer_dirty(trans, leaf);
434	btrfs_release_path(path);
435
436	nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
437	start_bit = find_next_bit_le(bitmap, nrbits, 0);
438
439	while (start_bit < nrbits) {
440		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
441		ASSERT(start_bit < end_bit);
442
443		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
444		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
445		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
446
447		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
448		if (ret)
449			goto out;
450		btrfs_release_path(path);
451
452		extent_count++;
453
454		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
455	}
456
457	if (extent_count != expected_extent_count) {
458		btrfs_err(fs_info,
459			  "incorrect extent count for %llu; counted %u, expected %u",
460			  block_group->start, extent_count,
461			  expected_extent_count);
462		ASSERT(0);
463		ret = -EIO;
464		goto out;
465	}
466
467	ret = 0;
468out:
469	kvfree(bitmap);
470	if (ret)
471		btrfs_abort_transaction(trans, ret);
472	return ret;
473}
474
475static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
476					  struct btrfs_block_group *block_group,
477					  struct btrfs_path *path,
478					  int new_extents)
479{
480	struct btrfs_free_space_info *info;
481	u32 flags;
482	u32 extent_count;
483	int ret = 0;
484
485	if (new_extents == 0)
486		return 0;
487
488	info = search_free_space_info(trans, block_group, path, 1);
489	if (IS_ERR(info)) {
490		ret = PTR_ERR(info);
491		goto out;
492	}
493	flags = btrfs_free_space_flags(path->nodes[0], info);
494	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
495
496	extent_count += new_extents;
497	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
498	btrfs_mark_buffer_dirty(trans, path->nodes[0]);
499	btrfs_release_path(path);
500
501	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
502	    extent_count > block_group->bitmap_high_thresh) {
503		ret = convert_free_space_to_bitmaps(trans, block_group, path);
504	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
505		   extent_count < block_group->bitmap_low_thresh) {
506		ret = convert_free_space_to_extents(trans, block_group, path);
507	}
508
509out:
510	return ret;
511}
512
513EXPORT_FOR_TESTS
514int free_space_test_bit(struct btrfs_block_group *block_group,
515			struct btrfs_path *path, u64 offset)
516{
517	struct extent_buffer *leaf;
518	struct btrfs_key key;
519	u64 found_start, found_end;
520	unsigned long ptr, i;
521
522	leaf = path->nodes[0];
523	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
524	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
525
526	found_start = key.objectid;
527	found_end = key.objectid + key.offset;
528	ASSERT(offset >= found_start && offset < found_end);
529
530	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
531	i = div_u64(offset - found_start,
532		    block_group->fs_info->sectorsize);
533	return !!extent_buffer_test_bit(leaf, ptr, i);
534}
535
536static void free_space_set_bits(struct btrfs_trans_handle *trans,
537				struct btrfs_block_group *block_group,
538				struct btrfs_path *path, u64 *start, u64 *size,
539				int bit)
540{
541	struct btrfs_fs_info *fs_info = block_group->fs_info;
542	struct extent_buffer *leaf;
543	struct btrfs_key key;
544	u64 end = *start + *size;
545	u64 found_start, found_end;
546	unsigned long ptr, first, last;
547
548	leaf = path->nodes[0];
549	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
550	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
551
552	found_start = key.objectid;
553	found_end = key.objectid + key.offset;
554	ASSERT(*start >= found_start && *start < found_end);
555	ASSERT(end > found_start);
556
557	if (end > found_end)
558		end = found_end;
559
560	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
561	first = (*start - found_start) >> fs_info->sectorsize_bits;
562	last = (end - found_start) >> fs_info->sectorsize_bits;
563	if (bit)
564		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
565	else
566		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
567	btrfs_mark_buffer_dirty(trans, leaf);
568
569	*size -= end - *start;
570	*start = end;
571}
572
573/*
574 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
575 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
576 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
577 * looking for.
578 */
579static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
580				  struct btrfs_root *root, struct btrfs_path *p)
581{
582	struct btrfs_key key;
583
584	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
585		p->slots[0]++;
586		return 0;
587	}
588
589	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
590	btrfs_release_path(p);
591
592	key.objectid += key.offset;
593	key.type = (u8)-1;
594	key.offset = (u64)-1;
595
596	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
597}
598
599/*
600 * If remove is 1, then we are removing free space, thus clearing bits in the
601 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
602 * the bitmap.
603 */
604static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
605				    struct btrfs_block_group *block_group,
606				    struct btrfs_path *path,
607				    u64 start, u64 size, int remove)
608{
609	struct btrfs_root *root = btrfs_free_space_root(block_group);
610	struct btrfs_key key;
611	u64 end = start + size;
612	u64 cur_start, cur_size;
613	int prev_bit, next_bit;
614	int new_extents;
615	int ret;
616
617	/*
618	 * Read the bit for the block immediately before the extent of space if
619	 * that block is within the block group.
620	 */
621	if (start > block_group->start) {
622		u64 prev_block = start - block_group->fs_info->sectorsize;
623
624		key.objectid = prev_block;
625		key.type = (u8)-1;
626		key.offset = (u64)-1;
627
628		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
629		if (ret)
630			goto out;
631
632		prev_bit = free_space_test_bit(block_group, path, prev_block);
633
634		/* The previous block may have been in the previous bitmap. */
635		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
636		if (start >= key.objectid + key.offset) {
637			ret = free_space_next_bitmap(trans, root, path);
638			if (ret)
639				goto out;
640		}
641	} else {
642		key.objectid = start;
643		key.type = (u8)-1;
644		key.offset = (u64)-1;
645
646		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
647		if (ret)
648			goto out;
649
650		prev_bit = -1;
651	}
652
653	/*
654	 * Iterate over all of the bitmaps overlapped by the extent of space,
655	 * clearing/setting bits as required.
656	 */
657	cur_start = start;
658	cur_size = size;
659	while (1) {
660		free_space_set_bits(trans, block_group, path, &cur_start, &cur_size,
661				    !remove);
662		if (cur_size == 0)
663			break;
664		ret = free_space_next_bitmap(trans, root, path);
665		if (ret)
666			goto out;
667	}
668
669	/*
670	 * Read the bit for the block immediately after the extent of space if
671	 * that block is within the block group.
672	 */
673	if (end < block_group->start + block_group->length) {
674		/* The next block may be in the next bitmap. */
675		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
676		if (end >= key.objectid + key.offset) {
677			ret = free_space_next_bitmap(trans, root, path);
678			if (ret)
679				goto out;
680		}
681
682		next_bit = free_space_test_bit(block_group, path, end);
683	} else {
684		next_bit = -1;
685	}
686
687	if (remove) {
688		new_extents = -1;
689		if (prev_bit == 1) {
690			/* Leftover on the left. */
691			new_extents++;
692		}
693		if (next_bit == 1) {
694			/* Leftover on the right. */
695			new_extents++;
696		}
697	} else {
698		new_extents = 1;
699		if (prev_bit == 1) {
700			/* Merging with neighbor on the left. */
701			new_extents--;
702		}
703		if (next_bit == 1) {
704			/* Merging with neighbor on the right. */
705			new_extents--;
706		}
707	}
708
709	btrfs_release_path(path);
710	ret = update_free_space_extent_count(trans, block_group, path,
711					     new_extents);
712
713out:
714	return ret;
715}
716
717static int remove_free_space_extent(struct btrfs_trans_handle *trans,
718				    struct btrfs_block_group *block_group,
719				    struct btrfs_path *path,
720				    u64 start, u64 size)
721{
722	struct btrfs_root *root = btrfs_free_space_root(block_group);
723	struct btrfs_key key;
724	u64 found_start, found_end;
725	u64 end = start + size;
726	int new_extents = -1;
727	int ret;
728
729	key.objectid = start;
730	key.type = (u8)-1;
731	key.offset = (u64)-1;
732
733	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
734	if (ret)
735		goto out;
736
737	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
738
739	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
740
741	found_start = key.objectid;
742	found_end = key.objectid + key.offset;
743	ASSERT(start >= found_start && end <= found_end);
744
745	/*
746	 * Okay, now that we've found the free space extent which contains the
747	 * free space that we are removing, there are four cases:
748	 *
749	 * 1. We're using the whole extent: delete the key we found and
750	 * decrement the free space extent count.
751	 * 2. We are using part of the extent starting at the beginning: delete
752	 * the key we found and insert a new key representing the leftover at
753	 * the end. There is no net change in the number of extents.
754	 * 3. We are using part of the extent ending at the end: delete the key
755	 * we found and insert a new key representing the leftover at the
756	 * beginning. There is no net change in the number of extents.
757	 * 4. We are using part of the extent in the middle: delete the key we
758	 * found and insert two new keys representing the leftovers on each
759	 * side. Where we used to have one extent, we now have two, so increment
760	 * the extent count. We may need to convert the block group to bitmaps
761	 * as a result.
762	 */
763
764	/* Delete the existing key (cases 1-4). */
765	ret = btrfs_del_item(trans, root, path);
766	if (ret)
767		goto out;
768
769	/* Add a key for leftovers at the beginning (cases 3 and 4). */
770	if (start > found_start) {
771		key.objectid = found_start;
772		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
773		key.offset = start - found_start;
774
775		btrfs_release_path(path);
776		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
777		if (ret)
778			goto out;
779		new_extents++;
780	}
781
782	/* Add a key for leftovers at the end (cases 2 and 4). */
783	if (end < found_end) {
784		key.objectid = end;
785		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
786		key.offset = found_end - end;
787
788		btrfs_release_path(path);
789		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
790		if (ret)
791			goto out;
792		new_extents++;
793	}
794
795	btrfs_release_path(path);
796	ret = update_free_space_extent_count(trans, block_group, path,
797					     new_extents);
798
799out:
800	return ret;
801}
802
803EXPORT_FOR_TESTS
804int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
805				  struct btrfs_block_group *block_group,
806				  struct btrfs_path *path, u64 start, u64 size)
807{
808	struct btrfs_free_space_info *info;
809	u32 flags;
810	int ret;
811
812	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
813		ret = __add_block_group_free_space(trans, block_group, path);
814		if (ret)
815			return ret;
816	}
817
818	info = search_free_space_info(NULL, block_group, path, 0);
819	if (IS_ERR(info))
820		return PTR_ERR(info);
821	flags = btrfs_free_space_flags(path->nodes[0], info);
822	btrfs_release_path(path);
823
824	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
825		return modify_free_space_bitmap(trans, block_group, path,
826						start, size, 1);
827	} else {
828		return remove_free_space_extent(trans, block_group, path,
829						start, size);
830	}
831}
832
833int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
834				u64 start, u64 size)
835{
836	struct btrfs_block_group *block_group;
837	struct btrfs_path *path;
838	int ret;
839
840	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
841		return 0;
842
843	path = btrfs_alloc_path();
844	if (!path) {
845		ret = -ENOMEM;
846		goto out;
847	}
848
849	block_group = btrfs_lookup_block_group(trans->fs_info, start);
850	if (!block_group) {
851		ASSERT(0);
852		ret = -ENOENT;
853		goto out;
854	}
855
856	mutex_lock(&block_group->free_space_lock);
857	ret = __remove_from_free_space_tree(trans, block_group, path, start,
858					    size);
859	mutex_unlock(&block_group->free_space_lock);
860
861	btrfs_put_block_group(block_group);
862out:
863	btrfs_free_path(path);
864	if (ret)
865		btrfs_abort_transaction(trans, ret);
866	return ret;
867}
868
869static int add_free_space_extent(struct btrfs_trans_handle *trans,
870				 struct btrfs_block_group *block_group,
871				 struct btrfs_path *path,
872				 u64 start, u64 size)
873{
874	struct btrfs_root *root = btrfs_free_space_root(block_group);
875	struct btrfs_key key, new_key;
876	u64 found_start, found_end;
877	u64 end = start + size;
878	int new_extents = 1;
879	int ret;
880
881	/*
882	 * We are adding a new extent of free space, but we need to merge
883	 * extents. There are four cases here:
884	 *
885	 * 1. The new extent does not have any immediate neighbors to merge
886	 * with: add the new key and increment the free space extent count. We
887	 * may need to convert the block group to bitmaps as a result.
888	 * 2. The new extent has an immediate neighbor before it: remove the
889	 * previous key and insert a new key combining both of them. There is no
890	 * net change in the number of extents.
891	 * 3. The new extent has an immediate neighbor after it: remove the next
892	 * key and insert a new key combining both of them. There is no net
893	 * change in the number of extents.
894	 * 4. The new extent has immediate neighbors on both sides: remove both
895	 * of the keys and insert a new key combining all of them. Where we used
896	 * to have two extents, we now have one, so decrement the extent count.
897	 */
898
899	new_key.objectid = start;
900	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
901	new_key.offset = size;
902
903	/* Search for a neighbor on the left. */
904	if (start == block_group->start)
905		goto right;
906	key.objectid = start - 1;
907	key.type = (u8)-1;
908	key.offset = (u64)-1;
909
910	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
911	if (ret)
912		goto out;
913
914	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
915
916	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
917		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
918		btrfs_release_path(path);
919		goto right;
920	}
921
922	found_start = key.objectid;
923	found_end = key.objectid + key.offset;
924	ASSERT(found_start >= block_group->start &&
925	       found_end > block_group->start);
926	ASSERT(found_start < start && found_end <= start);
927
928	/*
929	 * Delete the neighbor on the left and absorb it into the new key (cases
930	 * 2 and 4).
931	 */
932	if (found_end == start) {
933		ret = btrfs_del_item(trans, root, path);
934		if (ret)
935			goto out;
936		new_key.objectid = found_start;
937		new_key.offset += key.offset;
938		new_extents--;
939	}
940	btrfs_release_path(path);
941
942right:
943	/* Search for a neighbor on the right. */
944	if (end == block_group->start + block_group->length)
945		goto insert;
946	key.objectid = end;
947	key.type = (u8)-1;
948	key.offset = (u64)-1;
949
950	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
951	if (ret)
952		goto out;
953
954	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
955
956	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
957		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
958		btrfs_release_path(path);
959		goto insert;
960	}
961
962	found_start = key.objectid;
963	found_end = key.objectid + key.offset;
964	ASSERT(found_start >= block_group->start &&
965	       found_end > block_group->start);
966	ASSERT((found_start < start && found_end <= start) ||
967	       (found_start >= end && found_end > end));
968
969	/*
970	 * Delete the neighbor on the right and absorb it into the new key
971	 * (cases 3 and 4).
972	 */
973	if (found_start == end) {
974		ret = btrfs_del_item(trans, root, path);
975		if (ret)
976			goto out;
977		new_key.offset += key.offset;
978		new_extents--;
979	}
980	btrfs_release_path(path);
981
982insert:
983	/* Insert the new key (cases 1-4). */
984	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
985	if (ret)
986		goto out;
987
988	btrfs_release_path(path);
989	ret = update_free_space_extent_count(trans, block_group, path,
990					     new_extents);
991
992out:
993	return ret;
994}
995
996EXPORT_FOR_TESTS
997int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
998			     struct btrfs_block_group *block_group,
999			     struct btrfs_path *path, u64 start, u64 size)
1000{
1001	struct btrfs_free_space_info *info;
1002	u32 flags;
1003	int ret;
1004
1005	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1006		ret = __add_block_group_free_space(trans, block_group, path);
1007		if (ret)
1008			return ret;
1009	}
1010
1011	info = search_free_space_info(NULL, block_group, path, 0);
1012	if (IS_ERR(info))
1013		return PTR_ERR(info);
1014	flags = btrfs_free_space_flags(path->nodes[0], info);
1015	btrfs_release_path(path);
1016
1017	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1018		return modify_free_space_bitmap(trans, block_group, path,
1019						start, size, 0);
1020	} else {
1021		return add_free_space_extent(trans, block_group, path, start,
1022					     size);
1023	}
1024}
1025
1026int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1027			   u64 start, u64 size)
1028{
1029	struct btrfs_block_group *block_group;
1030	struct btrfs_path *path;
1031	int ret;
1032
1033	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1034		return 0;
1035
1036	path = btrfs_alloc_path();
1037	if (!path) {
1038		ret = -ENOMEM;
1039		goto out;
1040	}
1041
1042	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1043	if (!block_group) {
1044		ASSERT(0);
1045		ret = -ENOENT;
1046		goto out;
1047	}
1048
1049	mutex_lock(&block_group->free_space_lock);
1050	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1051	mutex_unlock(&block_group->free_space_lock);
1052
1053	btrfs_put_block_group(block_group);
1054out:
1055	btrfs_free_path(path);
1056	if (ret)
1057		btrfs_abort_transaction(trans, ret);
1058	return ret;
1059}
1060
1061/*
1062 * Populate the free space tree by walking the extent tree. Operations on the
1063 * extent tree that happen as a result of writes to the free space tree will go
1064 * through the normal add/remove hooks.
1065 */
1066static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1067				    struct btrfs_block_group *block_group)
1068{
1069	struct btrfs_root *extent_root;
1070	struct btrfs_path *path, *path2;
1071	struct btrfs_key key;
1072	u64 start, end;
1073	int ret;
1074
1075	path = btrfs_alloc_path();
1076	if (!path)
1077		return -ENOMEM;
1078	path->reada = READA_FORWARD;
1079
1080	path2 = btrfs_alloc_path();
1081	if (!path2) {
1082		btrfs_free_path(path);
1083		return -ENOMEM;
1084	}
1085
1086	ret = add_new_free_space_info(trans, block_group, path2);
1087	if (ret)
1088		goto out;
1089
1090	mutex_lock(&block_group->free_space_lock);
1091
1092	/*
1093	 * Iterate through all of the extent and metadata items in this block
1094	 * group, adding the free space between them and the free space at the
1095	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1096	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1097	 * contained in.
1098	 */
1099	key.objectid = block_group->start;
1100	key.type = BTRFS_EXTENT_ITEM_KEY;
1101	key.offset = 0;
1102
1103	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1104	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1105	if (ret < 0)
1106		goto out_locked;
1107	ASSERT(ret == 0);
1108
1109	start = block_group->start;
1110	end = block_group->start + block_group->length;
1111	while (1) {
1112		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1113
1114		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1115		    key.type == BTRFS_METADATA_ITEM_KEY) {
1116			if (key.objectid >= end)
1117				break;
1118
1119			if (start < key.objectid) {
1120				ret = __add_to_free_space_tree(trans,
1121							       block_group,
1122							       path2, start,
1123							       key.objectid -
1124							       start);
1125				if (ret)
1126					goto out_locked;
1127			}
1128			start = key.objectid;
1129			if (key.type == BTRFS_METADATA_ITEM_KEY)
1130				start += trans->fs_info->nodesize;
1131			else
1132				start += key.offset;
1133		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1134			if (key.objectid != block_group->start)
1135				break;
1136		}
1137
1138		ret = btrfs_next_item(extent_root, path);
1139		if (ret < 0)
1140			goto out_locked;
1141		if (ret)
1142			break;
1143	}
1144	if (start < end) {
1145		ret = __add_to_free_space_tree(trans, block_group, path2,
1146					       start, end - start);
1147		if (ret)
1148			goto out_locked;
1149	}
1150
1151	ret = 0;
1152out_locked:
1153	mutex_unlock(&block_group->free_space_lock);
1154out:
1155	btrfs_free_path(path2);
1156	btrfs_free_path(path);
1157	return ret;
1158}
1159
1160int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1161{
1162	struct btrfs_trans_handle *trans;
1163	struct btrfs_root *tree_root = fs_info->tree_root;
1164	struct btrfs_root *free_space_root;
1165	struct btrfs_block_group *block_group;
1166	struct rb_node *node;
1167	int ret;
1168
1169	trans = btrfs_start_transaction(tree_root, 0);
1170	if (IS_ERR(trans))
1171		return PTR_ERR(trans);
1172
1173	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1174	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1175	free_space_root = btrfs_create_tree(trans,
1176					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1177	if (IS_ERR(free_space_root)) {
1178		ret = PTR_ERR(free_space_root);
1179		btrfs_abort_transaction(trans, ret);
1180		btrfs_end_transaction(trans);
1181		goto out_clear;
1182	}
1183	ret = btrfs_global_root_insert(free_space_root);
1184	if (ret) {
1185		btrfs_put_root(free_space_root);
1186		btrfs_abort_transaction(trans, ret);
1187		btrfs_end_transaction(trans);
1188		goto out_clear;
1189	}
1190
1191	node = rb_first_cached(&fs_info->block_group_cache_tree);
1192	while (node) {
1193		block_group = rb_entry(node, struct btrfs_block_group,
1194				       cache_node);
1195		ret = populate_free_space_tree(trans, block_group);
1196		if (ret) {
1197			btrfs_abort_transaction(trans, ret);
1198			btrfs_end_transaction(trans);
1199			goto out_clear;
1200		}
1201		node = rb_next(node);
1202	}
1203
1204	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1205	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1206	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1207	ret = btrfs_commit_transaction(trans);
1208
1209	/*
1210	 * Now that we've committed the transaction any reading of our commit
1211	 * root will be safe, so we can cache from the free space tree now.
1212	 */
1213	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1214	return ret;
1215
1216out_clear:
1217	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1218	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1219	return ret;
1220}
1221
1222static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1223				 struct btrfs_root *root)
1224{
1225	struct btrfs_path *path;
1226	struct btrfs_key key;
1227	int nr;
1228	int ret;
1229
1230	path = btrfs_alloc_path();
1231	if (!path)
1232		return -ENOMEM;
1233
1234	key.objectid = 0;
1235	key.type = 0;
1236	key.offset = 0;
1237
1238	while (1) {
1239		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1240		if (ret < 0)
1241			goto out;
1242
1243		nr = btrfs_header_nritems(path->nodes[0]);
1244		if (!nr)
1245			break;
1246
1247		path->slots[0] = 0;
1248		ret = btrfs_del_items(trans, root, path, 0, nr);
1249		if (ret)
1250			goto out;
1251
1252		btrfs_release_path(path);
1253	}
1254
1255	ret = 0;
1256out:
1257	btrfs_free_path(path);
1258	return ret;
1259}
1260
1261int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
1262{
1263	struct btrfs_trans_handle *trans;
1264	struct btrfs_root *tree_root = fs_info->tree_root;
1265	struct btrfs_key key = {
1266		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1267		.type = BTRFS_ROOT_ITEM_KEY,
1268		.offset = 0,
1269	};
1270	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1271	int ret;
1272
1273	trans = btrfs_start_transaction(tree_root, 0);
1274	if (IS_ERR(trans))
1275		return PTR_ERR(trans);
1276
1277	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1278	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1279
1280	ret = clear_free_space_tree(trans, free_space_root);
1281	if (ret) {
1282		btrfs_abort_transaction(trans, ret);
1283		btrfs_end_transaction(trans);
1284		return ret;
1285	}
1286
1287	ret = btrfs_del_root(trans, &free_space_root->root_key);
1288	if (ret) {
1289		btrfs_abort_transaction(trans, ret);
1290		btrfs_end_transaction(trans);
1291		return ret;
1292	}
1293
1294	btrfs_global_root_delete(free_space_root);
1295
1296	spin_lock(&fs_info->trans_lock);
1297	list_del(&free_space_root->dirty_list);
1298	spin_unlock(&fs_info->trans_lock);
1299
1300	btrfs_tree_lock(free_space_root->node);
1301	btrfs_clear_buffer_dirty(trans, free_space_root->node);
1302	btrfs_tree_unlock(free_space_root->node);
1303	btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1304			      free_space_root->node, 0, 1);
1305
1306	btrfs_put_root(free_space_root);
1307
1308	return btrfs_commit_transaction(trans);
1309}
1310
1311int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
1312{
1313	struct btrfs_trans_handle *trans;
1314	struct btrfs_key key = {
1315		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1316		.type = BTRFS_ROOT_ITEM_KEY,
1317		.offset = 0,
1318	};
1319	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1320	struct rb_node *node;
1321	int ret;
1322
1323	trans = btrfs_start_transaction(free_space_root, 1);
1324	if (IS_ERR(trans))
1325		return PTR_ERR(trans);
1326
1327	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1328	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1329
1330	ret = clear_free_space_tree(trans, free_space_root);
1331	if (ret) {
1332		btrfs_abort_transaction(trans, ret);
1333		btrfs_end_transaction(trans);
1334		return ret;
1335	}
1336
1337	node = rb_first_cached(&fs_info->block_group_cache_tree);
1338	while (node) {
1339		struct btrfs_block_group *block_group;
1340
1341		block_group = rb_entry(node, struct btrfs_block_group,
1342				       cache_node);
1343		ret = populate_free_space_tree(trans, block_group);
1344		if (ret) {
1345			btrfs_abort_transaction(trans, ret);
1346			btrfs_end_transaction(trans);
1347			return ret;
1348		}
1349		node = rb_next(node);
1350	}
1351
1352	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1353	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1354	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1355
1356	ret = btrfs_commit_transaction(trans);
1357	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1358	return ret;
1359}
1360
1361static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1362					struct btrfs_block_group *block_group,
1363					struct btrfs_path *path)
1364{
1365	int ret;
1366
1367	clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
1368
1369	ret = add_new_free_space_info(trans, block_group, path);
1370	if (ret)
1371		return ret;
1372
1373	return __add_to_free_space_tree(trans, block_group, path,
1374					block_group->start,
1375					block_group->length);
1376}
1377
1378int add_block_group_free_space(struct btrfs_trans_handle *trans,
1379			       struct btrfs_block_group *block_group)
1380{
1381	struct btrfs_fs_info *fs_info = trans->fs_info;
1382	struct btrfs_path *path = NULL;
1383	int ret = 0;
1384
1385	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1386		return 0;
1387
1388	mutex_lock(&block_group->free_space_lock);
1389	if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags))
1390		goto out;
1391
1392	path = btrfs_alloc_path();
1393	if (!path) {
1394		ret = -ENOMEM;
1395		goto out;
1396	}
1397
1398	ret = __add_block_group_free_space(trans, block_group, path);
1399
1400out:
1401	btrfs_free_path(path);
1402	mutex_unlock(&block_group->free_space_lock);
1403	if (ret)
1404		btrfs_abort_transaction(trans, ret);
1405	return ret;
1406}
1407
1408int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1409				  struct btrfs_block_group *block_group)
1410{
1411	struct btrfs_root *root = btrfs_free_space_root(block_group);
1412	struct btrfs_path *path;
1413	struct btrfs_key key, found_key;
1414	struct extent_buffer *leaf;
1415	u64 start, end;
1416	int done = 0, nr;
1417	int ret;
1418
1419	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1420		return 0;
1421
1422	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1423		/* We never added this block group to the free space tree. */
1424		return 0;
1425	}
1426
1427	path = btrfs_alloc_path();
1428	if (!path) {
1429		ret = -ENOMEM;
1430		goto out;
1431	}
1432
1433	start = block_group->start;
1434	end = block_group->start + block_group->length;
1435
1436	key.objectid = end - 1;
1437	key.type = (u8)-1;
1438	key.offset = (u64)-1;
1439
1440	while (!done) {
1441		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1442		if (ret)
1443			goto out;
1444
1445		leaf = path->nodes[0];
1446		nr = 0;
1447		path->slots[0]++;
1448		while (path->slots[0] > 0) {
1449			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1450
1451			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1452				ASSERT(found_key.objectid == block_group->start);
1453				ASSERT(found_key.offset == block_group->length);
1454				done = 1;
1455				nr++;
1456				path->slots[0]--;
1457				break;
1458			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1459				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1460				ASSERT(found_key.objectid >= start);
1461				ASSERT(found_key.objectid < end);
1462				ASSERT(found_key.objectid + found_key.offset <= end);
1463				nr++;
1464				path->slots[0]--;
1465			} else {
1466				ASSERT(0);
1467			}
1468		}
1469
1470		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1471		if (ret)
1472			goto out;
1473		btrfs_release_path(path);
1474	}
1475
1476	ret = 0;
1477out:
1478	btrfs_free_path(path);
1479	if (ret)
1480		btrfs_abort_transaction(trans, ret);
1481	return ret;
1482}
1483
1484static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1485				   struct btrfs_path *path,
1486				   u32 expected_extent_count)
1487{
1488	struct btrfs_block_group *block_group;
1489	struct btrfs_fs_info *fs_info;
1490	struct btrfs_root *root;
1491	struct btrfs_key key;
1492	int prev_bit = 0, bit;
1493	/* Initialize to silence GCC. */
1494	u64 extent_start = 0;
1495	u64 end, offset;
1496	u64 total_found = 0;
1497	u32 extent_count = 0;
1498	int ret;
1499
1500	block_group = caching_ctl->block_group;
1501	fs_info = block_group->fs_info;
1502	root = btrfs_free_space_root(block_group);
1503
1504	end = block_group->start + block_group->length;
1505
1506	while (1) {
1507		ret = btrfs_next_item(root, path);
1508		if (ret < 0)
1509			goto out;
1510		if (ret)
1511			break;
1512
1513		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1514
1515		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1516			break;
1517
1518		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1519		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1520
1521		offset = key.objectid;
1522		while (offset < key.objectid + key.offset) {
1523			bit = free_space_test_bit(block_group, path, offset);
1524			if (prev_bit == 0 && bit == 1) {
1525				extent_start = offset;
1526			} else if (prev_bit == 1 && bit == 0) {
1527				u64 space_added;
1528
1529				ret = btrfs_add_new_free_space(block_group,
1530							       extent_start,
1531							       offset,
1532							       &space_added);
1533				if (ret)
1534					goto out;
1535				total_found += space_added;
1536				if (total_found > CACHING_CTL_WAKE_UP) {
1537					total_found = 0;
1538					wake_up(&caching_ctl->wait);
1539				}
1540				extent_count++;
1541			}
1542			prev_bit = bit;
1543			offset += fs_info->sectorsize;
1544		}
1545	}
1546	if (prev_bit == 1) {
1547		ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
1548		if (ret)
1549			goto out;
1550		extent_count++;
1551	}
1552
1553	if (extent_count != expected_extent_count) {
1554		btrfs_err(fs_info,
1555			  "incorrect extent count for %llu; counted %u, expected %u",
1556			  block_group->start, extent_count,
1557			  expected_extent_count);
1558		ASSERT(0);
1559		ret = -EIO;
1560		goto out;
1561	}
1562
1563	ret = 0;
1564out:
1565	return ret;
1566}
1567
1568static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1569				   struct btrfs_path *path,
1570				   u32 expected_extent_count)
1571{
1572	struct btrfs_block_group *block_group;
1573	struct btrfs_fs_info *fs_info;
1574	struct btrfs_root *root;
1575	struct btrfs_key key;
1576	u64 end;
1577	u64 total_found = 0;
1578	u32 extent_count = 0;
1579	int ret;
1580
1581	block_group = caching_ctl->block_group;
1582	fs_info = block_group->fs_info;
1583	root = btrfs_free_space_root(block_group);
1584
1585	end = block_group->start + block_group->length;
1586
1587	while (1) {
1588		u64 space_added;
1589
1590		ret = btrfs_next_item(root, path);
1591		if (ret < 0)
1592			goto out;
1593		if (ret)
1594			break;
1595
1596		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1597
1598		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1599			break;
1600
1601		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1602		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1603
1604		ret = btrfs_add_new_free_space(block_group, key.objectid,
1605					       key.objectid + key.offset,
1606					       &space_added);
1607		if (ret)
1608			goto out;
1609		total_found += space_added;
1610		if (total_found > CACHING_CTL_WAKE_UP) {
1611			total_found = 0;
1612			wake_up(&caching_ctl->wait);
1613		}
1614		extent_count++;
1615	}
1616
1617	if (extent_count != expected_extent_count) {
1618		btrfs_err(fs_info,
1619			  "incorrect extent count for %llu; counted %u, expected %u",
1620			  block_group->start, extent_count,
1621			  expected_extent_count);
1622		ASSERT(0);
1623		ret = -EIO;
1624		goto out;
1625	}
1626
1627	ret = 0;
1628out:
1629	return ret;
1630}
1631
1632int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1633{
1634	struct btrfs_block_group *block_group;
1635	struct btrfs_free_space_info *info;
1636	struct btrfs_path *path;
1637	u32 extent_count, flags;
1638	int ret;
1639
1640	block_group = caching_ctl->block_group;
1641
1642	path = btrfs_alloc_path();
1643	if (!path)
1644		return -ENOMEM;
1645
1646	/*
1647	 * Just like caching_thread() doesn't want to deadlock on the extent
1648	 * tree, we don't want to deadlock on the free space tree.
1649	 */
1650	path->skip_locking = 1;
1651	path->search_commit_root = 1;
1652	path->reada = READA_FORWARD;
1653
1654	info = search_free_space_info(NULL, block_group, path, 0);
1655	if (IS_ERR(info)) {
1656		ret = PTR_ERR(info);
1657		goto out;
1658	}
1659	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1660	flags = btrfs_free_space_flags(path->nodes[0], info);
1661
1662	/*
1663	 * We left path pointing to the free space info item, so now
1664	 * load_free_space_foo can just iterate through the free space tree from
1665	 * there.
1666	 */
1667	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1668		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1669	else
1670		ret = load_free_space_extents(caching_ctl, path, extent_count);
1671
1672out:
1673	btrfs_free_path(path);
1674	return ret;
1675}
1676