free-space-cache.c revision 98caf953
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2008 Red Hat.  All rights reserved.
4 */
5
6#include <linux/pagemap.h>
7#include <linux/sched.h>
8#include <linux/sched/signal.h>
9#include <linux/slab.h>
10#include <linux/math64.h>
11#include <linux/ratelimit.h>
12#include <linux/error-injection.h>
13#include <linux/sched/mm.h>
14#include "misc.h"
15#include "ctree.h"
16#include "free-space-cache.h"
17#include "transaction.h"
18#include "disk-io.h"
19#include "extent_io.h"
20#include "volumes.h"
21#include "space-info.h"
22#include "delalloc-space.h"
23#include "block-group.h"
24#include "discard.h"
25
26#define BITS_PER_BITMAP		(PAGE_SIZE * 8UL)
27#define MAX_CACHE_BYTES_PER_GIG	SZ_64K
28#define FORCE_EXTENT_THRESHOLD	SZ_1M
29
30struct btrfs_trim_range {
31	u64 start;
32	u64 bytes;
33	struct list_head list;
34};
35
36static int link_free_space(struct btrfs_free_space_ctl *ctl,
37			   struct btrfs_free_space *info);
38static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
39			      struct btrfs_free_space *info);
40static int search_bitmap(struct btrfs_free_space_ctl *ctl,
41			 struct btrfs_free_space *bitmap_info, u64 *offset,
42			 u64 *bytes, bool for_alloc);
43static void free_bitmap(struct btrfs_free_space_ctl *ctl,
44			struct btrfs_free_space *bitmap_info);
45static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
46			      struct btrfs_free_space *info, u64 offset,
47			      u64 bytes);
48
49static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
50					       struct btrfs_path *path,
51					       u64 offset)
52{
53	struct btrfs_fs_info *fs_info = root->fs_info;
54	struct btrfs_key key;
55	struct btrfs_key location;
56	struct btrfs_disk_key disk_key;
57	struct btrfs_free_space_header *header;
58	struct extent_buffer *leaf;
59	struct inode *inode = NULL;
60	unsigned nofs_flag;
61	int ret;
62
63	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
64	key.offset = offset;
65	key.type = 0;
66
67	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
68	if (ret < 0)
69		return ERR_PTR(ret);
70	if (ret > 0) {
71		btrfs_release_path(path);
72		return ERR_PTR(-ENOENT);
73	}
74
75	leaf = path->nodes[0];
76	header = btrfs_item_ptr(leaf, path->slots[0],
77				struct btrfs_free_space_header);
78	btrfs_free_space_key(leaf, header, &disk_key);
79	btrfs_disk_key_to_cpu(&location, &disk_key);
80	btrfs_release_path(path);
81
82	/*
83	 * We are often under a trans handle at this point, so we need to make
84	 * sure NOFS is set to keep us from deadlocking.
85	 */
86	nofs_flag = memalloc_nofs_save();
87	inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
88	btrfs_release_path(path);
89	memalloc_nofs_restore(nofs_flag);
90	if (IS_ERR(inode))
91		return inode;
92
93	mapping_set_gfp_mask(inode->i_mapping,
94			mapping_gfp_constraint(inode->i_mapping,
95			~(__GFP_FS | __GFP_HIGHMEM)));
96
97	return inode;
98}
99
100struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
101		struct btrfs_path *path)
102{
103	struct btrfs_fs_info *fs_info = block_group->fs_info;
104	struct inode *inode = NULL;
105	u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
106
107	spin_lock(&block_group->lock);
108	if (block_group->inode)
109		inode = igrab(block_group->inode);
110	spin_unlock(&block_group->lock);
111	if (inode)
112		return inode;
113
114	inode = __lookup_free_space_inode(fs_info->tree_root, path,
115					  block_group->start);
116	if (IS_ERR(inode))
117		return inode;
118
119	spin_lock(&block_group->lock);
120	if (!((BTRFS_I(inode)->flags & flags) == flags)) {
121		btrfs_info(fs_info, "Old style space inode found, converting.");
122		BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
123			BTRFS_INODE_NODATACOW;
124		block_group->disk_cache_state = BTRFS_DC_CLEAR;
125	}
126
127	if (!block_group->iref) {
128		block_group->inode = igrab(inode);
129		block_group->iref = 1;
130	}
131	spin_unlock(&block_group->lock);
132
133	return inode;
134}
135
136static int __create_free_space_inode(struct btrfs_root *root,
137				     struct btrfs_trans_handle *trans,
138				     struct btrfs_path *path,
139				     u64 ino, u64 offset)
140{
141	struct btrfs_key key;
142	struct btrfs_disk_key disk_key;
143	struct btrfs_free_space_header *header;
144	struct btrfs_inode_item *inode_item;
145	struct extent_buffer *leaf;
146	/* We inline CRCs for the free disk space cache */
147	const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC |
148			  BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
149	int ret;
150
151	ret = btrfs_insert_empty_inode(trans, root, path, ino);
152	if (ret)
153		return ret;
154
155	leaf = path->nodes[0];
156	inode_item = btrfs_item_ptr(leaf, path->slots[0],
157				    struct btrfs_inode_item);
158	btrfs_item_key(leaf, &disk_key, path->slots[0]);
159	memzero_extent_buffer(leaf, (unsigned long)inode_item,
160			     sizeof(*inode_item));
161	btrfs_set_inode_generation(leaf, inode_item, trans->transid);
162	btrfs_set_inode_size(leaf, inode_item, 0);
163	btrfs_set_inode_nbytes(leaf, inode_item, 0);
164	btrfs_set_inode_uid(leaf, inode_item, 0);
165	btrfs_set_inode_gid(leaf, inode_item, 0);
166	btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
167	btrfs_set_inode_flags(leaf, inode_item, flags);
168	btrfs_set_inode_nlink(leaf, inode_item, 1);
169	btrfs_set_inode_transid(leaf, inode_item, trans->transid);
170	btrfs_set_inode_block_group(leaf, inode_item, offset);
171	btrfs_mark_buffer_dirty(leaf);
172	btrfs_release_path(path);
173
174	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
175	key.offset = offset;
176	key.type = 0;
177	ret = btrfs_insert_empty_item(trans, root, path, &key,
178				      sizeof(struct btrfs_free_space_header));
179	if (ret < 0) {
180		btrfs_release_path(path);
181		return ret;
182	}
183
184	leaf = path->nodes[0];
185	header = btrfs_item_ptr(leaf, path->slots[0],
186				struct btrfs_free_space_header);
187	memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header));
188	btrfs_set_free_space_key(leaf, header, &disk_key);
189	btrfs_mark_buffer_dirty(leaf);
190	btrfs_release_path(path);
191
192	return 0;
193}
194
195int create_free_space_inode(struct btrfs_trans_handle *trans,
196			    struct btrfs_block_group *block_group,
197			    struct btrfs_path *path)
198{
199	int ret;
200	u64 ino;
201
202	ret = btrfs_get_free_objectid(trans->fs_info->tree_root, &ino);
203	if (ret < 0)
204		return ret;
205
206	return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
207					 ino, block_group->start);
208}
209
210/*
211 * inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode
212 * handles lookup, otherwise it takes ownership and iputs the inode.
213 * Don't reuse an inode pointer after passing it into this function.
214 */
215int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
216				  struct inode *inode,
217				  struct btrfs_block_group *block_group)
218{
219	struct btrfs_path *path;
220	struct btrfs_key key;
221	int ret = 0;
222
223	path = btrfs_alloc_path();
224	if (!path)
225		return -ENOMEM;
226
227	if (!inode)
228		inode = lookup_free_space_inode(block_group, path);
229	if (IS_ERR(inode)) {
230		if (PTR_ERR(inode) != -ENOENT)
231			ret = PTR_ERR(inode);
232		goto out;
233	}
234	ret = btrfs_orphan_add(trans, BTRFS_I(inode));
235	if (ret) {
236		btrfs_add_delayed_iput(inode);
237		goto out;
238	}
239	clear_nlink(inode);
240	/* One for the block groups ref */
241	spin_lock(&block_group->lock);
242	if (block_group->iref) {
243		block_group->iref = 0;
244		block_group->inode = NULL;
245		spin_unlock(&block_group->lock);
246		iput(inode);
247	} else {
248		spin_unlock(&block_group->lock);
249	}
250	/* One for the lookup ref */
251	btrfs_add_delayed_iput(inode);
252
253	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
254	key.type = 0;
255	key.offset = block_group->start;
256	ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path,
257				-1, 1);
258	if (ret) {
259		if (ret > 0)
260			ret = 0;
261		goto out;
262	}
263	ret = btrfs_del_item(trans, trans->fs_info->tree_root, path);
264out:
265	btrfs_free_path(path);
266	return ret;
267}
268
269int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
270				       struct btrfs_block_rsv *rsv)
271{
272	u64 needed_bytes;
273	int ret;
274
275	/* 1 for slack space, 1 for updating the inode */
276	needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
277		btrfs_calc_metadata_size(fs_info, 1);
278
279	spin_lock(&rsv->lock);
280	if (rsv->reserved < needed_bytes)
281		ret = -ENOSPC;
282	else
283		ret = 0;
284	spin_unlock(&rsv->lock);
285	return ret;
286}
287
288int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
289				    struct btrfs_block_group *block_group,
290				    struct inode *inode)
291{
292	struct btrfs_root *root = BTRFS_I(inode)->root;
293	int ret = 0;
294	bool locked = false;
295
296	if (block_group) {
297		struct btrfs_path *path = btrfs_alloc_path();
298
299		if (!path) {
300			ret = -ENOMEM;
301			goto fail;
302		}
303		locked = true;
304		mutex_lock(&trans->transaction->cache_write_mutex);
305		if (!list_empty(&block_group->io_list)) {
306			list_del_init(&block_group->io_list);
307
308			btrfs_wait_cache_io(trans, block_group, path);
309			btrfs_put_block_group(block_group);
310		}
311
312		/*
313		 * now that we've truncated the cache away, its no longer
314		 * setup or written
315		 */
316		spin_lock(&block_group->lock);
317		block_group->disk_cache_state = BTRFS_DC_CLEAR;
318		spin_unlock(&block_group->lock);
319		btrfs_free_path(path);
320	}
321
322	btrfs_i_size_write(BTRFS_I(inode), 0);
323	truncate_pagecache(inode, 0);
324
325	/*
326	 * We skip the throttling logic for free space cache inodes, so we don't
327	 * need to check for -EAGAIN.
328	 */
329	ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
330					 0, BTRFS_EXTENT_DATA_KEY, NULL);
331	if (ret)
332		goto fail;
333
334	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
335
336fail:
337	if (locked)
338		mutex_unlock(&trans->transaction->cache_write_mutex);
339	if (ret)
340		btrfs_abort_transaction(trans, ret);
341
342	return ret;
343}
344
345static void readahead_cache(struct inode *inode)
346{
347	struct file_ra_state ra;
348	unsigned long last_index;
349
350	file_ra_state_init(&ra, inode->i_mapping);
351	last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
352
353	page_cache_sync_readahead(inode->i_mapping, &ra, NULL, 0, last_index);
354}
355
356static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
357		       int write)
358{
359	int num_pages;
360
361	num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
362
363	/* Make sure we can fit our crcs and generation into the first page */
364	if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
365		return -ENOSPC;
366
367	memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
368
369	io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
370	if (!io_ctl->pages)
371		return -ENOMEM;
372
373	io_ctl->num_pages = num_pages;
374	io_ctl->fs_info = btrfs_sb(inode->i_sb);
375	io_ctl->inode = inode;
376
377	return 0;
378}
379ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
380
381static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
382{
383	kfree(io_ctl->pages);
384	io_ctl->pages = NULL;
385}
386
387static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
388{
389	if (io_ctl->cur) {
390		io_ctl->cur = NULL;
391		io_ctl->orig = NULL;
392	}
393}
394
395static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
396{
397	ASSERT(io_ctl->index < io_ctl->num_pages);
398	io_ctl->page = io_ctl->pages[io_ctl->index++];
399	io_ctl->cur = page_address(io_ctl->page);
400	io_ctl->orig = io_ctl->cur;
401	io_ctl->size = PAGE_SIZE;
402	if (clear)
403		clear_page(io_ctl->cur);
404}
405
406static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
407{
408	int i;
409
410	io_ctl_unmap_page(io_ctl);
411
412	for (i = 0; i < io_ctl->num_pages; i++) {
413		if (io_ctl->pages[i]) {
414			ClearPageChecked(io_ctl->pages[i]);
415			unlock_page(io_ctl->pages[i]);
416			put_page(io_ctl->pages[i]);
417		}
418	}
419}
420
421static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
422{
423	struct page *page;
424	struct inode *inode = io_ctl->inode;
425	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
426	int i;
427
428	for (i = 0; i < io_ctl->num_pages; i++) {
429		int ret;
430
431		page = find_or_create_page(inode->i_mapping, i, mask);
432		if (!page) {
433			io_ctl_drop_pages(io_ctl);
434			return -ENOMEM;
435		}
436
437		ret = set_page_extent_mapped(page);
438		if (ret < 0) {
439			unlock_page(page);
440			put_page(page);
441			io_ctl_drop_pages(io_ctl);
442			return ret;
443		}
444
445		io_ctl->pages[i] = page;
446		if (uptodate && !PageUptodate(page)) {
447			btrfs_readpage(NULL, page);
448			lock_page(page);
449			if (page->mapping != inode->i_mapping) {
450				btrfs_err(BTRFS_I(inode)->root->fs_info,
451					  "free space cache page truncated");
452				io_ctl_drop_pages(io_ctl);
453				return -EIO;
454			}
455			if (!PageUptodate(page)) {
456				btrfs_err(BTRFS_I(inode)->root->fs_info,
457					   "error reading free space cache");
458				io_ctl_drop_pages(io_ctl);
459				return -EIO;
460			}
461		}
462	}
463
464	for (i = 0; i < io_ctl->num_pages; i++)
465		clear_page_dirty_for_io(io_ctl->pages[i]);
466
467	return 0;
468}
469
470static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
471{
472	io_ctl_map_page(io_ctl, 1);
473
474	/*
475	 * Skip the csum areas.  If we don't check crcs then we just have a
476	 * 64bit chunk at the front of the first page.
477	 */
478	io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
479	io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
480
481	put_unaligned_le64(generation, io_ctl->cur);
482	io_ctl->cur += sizeof(u64);
483}
484
485static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
486{
487	u64 cache_gen;
488
489	/*
490	 * Skip the crc area.  If we don't check crcs then we just have a 64bit
491	 * chunk at the front of the first page.
492	 */
493	io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
494	io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
495
496	cache_gen = get_unaligned_le64(io_ctl->cur);
497	if (cache_gen != generation) {
498		btrfs_err_rl(io_ctl->fs_info,
499			"space cache generation (%llu) does not match inode (%llu)",
500				cache_gen, generation);
501		io_ctl_unmap_page(io_ctl);
502		return -EIO;
503	}
504	io_ctl->cur += sizeof(u64);
505	return 0;
506}
507
508static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
509{
510	u32 *tmp;
511	u32 crc = ~(u32)0;
512	unsigned offset = 0;
513
514	if (index == 0)
515		offset = sizeof(u32) * io_ctl->num_pages;
516
517	crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
518	btrfs_crc32c_final(crc, (u8 *)&crc);
519	io_ctl_unmap_page(io_ctl);
520	tmp = page_address(io_ctl->pages[0]);
521	tmp += index;
522	*tmp = crc;
523}
524
525static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
526{
527	u32 *tmp, val;
528	u32 crc = ~(u32)0;
529	unsigned offset = 0;
530
531	if (index == 0)
532		offset = sizeof(u32) * io_ctl->num_pages;
533
534	tmp = page_address(io_ctl->pages[0]);
535	tmp += index;
536	val = *tmp;
537
538	io_ctl_map_page(io_ctl, 0);
539	crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
540	btrfs_crc32c_final(crc, (u8 *)&crc);
541	if (val != crc) {
542		btrfs_err_rl(io_ctl->fs_info,
543			"csum mismatch on free space cache");
544		io_ctl_unmap_page(io_ctl);
545		return -EIO;
546	}
547
548	return 0;
549}
550
551static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
552			    void *bitmap)
553{
554	struct btrfs_free_space_entry *entry;
555
556	if (!io_ctl->cur)
557		return -ENOSPC;
558
559	entry = io_ctl->cur;
560	put_unaligned_le64(offset, &entry->offset);
561	put_unaligned_le64(bytes, &entry->bytes);
562	entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
563		BTRFS_FREE_SPACE_EXTENT;
564	io_ctl->cur += sizeof(struct btrfs_free_space_entry);
565	io_ctl->size -= sizeof(struct btrfs_free_space_entry);
566
567	if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
568		return 0;
569
570	io_ctl_set_crc(io_ctl, io_ctl->index - 1);
571
572	/* No more pages to map */
573	if (io_ctl->index >= io_ctl->num_pages)
574		return 0;
575
576	/* map the next page */
577	io_ctl_map_page(io_ctl, 1);
578	return 0;
579}
580
581static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
582{
583	if (!io_ctl->cur)
584		return -ENOSPC;
585
586	/*
587	 * If we aren't at the start of the current page, unmap this one and
588	 * map the next one if there is any left.
589	 */
590	if (io_ctl->cur != io_ctl->orig) {
591		io_ctl_set_crc(io_ctl, io_ctl->index - 1);
592		if (io_ctl->index >= io_ctl->num_pages)
593			return -ENOSPC;
594		io_ctl_map_page(io_ctl, 0);
595	}
596
597	copy_page(io_ctl->cur, bitmap);
598	io_ctl_set_crc(io_ctl, io_ctl->index - 1);
599	if (io_ctl->index < io_ctl->num_pages)
600		io_ctl_map_page(io_ctl, 0);
601	return 0;
602}
603
604static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
605{
606	/*
607	 * If we're not on the boundary we know we've modified the page and we
608	 * need to crc the page.
609	 */
610	if (io_ctl->cur != io_ctl->orig)
611		io_ctl_set_crc(io_ctl, io_ctl->index - 1);
612	else
613		io_ctl_unmap_page(io_ctl);
614
615	while (io_ctl->index < io_ctl->num_pages) {
616		io_ctl_map_page(io_ctl, 1);
617		io_ctl_set_crc(io_ctl, io_ctl->index - 1);
618	}
619}
620
621static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
622			    struct btrfs_free_space *entry, u8 *type)
623{
624	struct btrfs_free_space_entry *e;
625	int ret;
626
627	if (!io_ctl->cur) {
628		ret = io_ctl_check_crc(io_ctl, io_ctl->index);
629		if (ret)
630			return ret;
631	}
632
633	e = io_ctl->cur;
634	entry->offset = get_unaligned_le64(&e->offset);
635	entry->bytes = get_unaligned_le64(&e->bytes);
636	*type = e->type;
637	io_ctl->cur += sizeof(struct btrfs_free_space_entry);
638	io_ctl->size -= sizeof(struct btrfs_free_space_entry);
639
640	if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
641		return 0;
642
643	io_ctl_unmap_page(io_ctl);
644
645	return 0;
646}
647
648static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
649			      struct btrfs_free_space *entry)
650{
651	int ret;
652
653	ret = io_ctl_check_crc(io_ctl, io_ctl->index);
654	if (ret)
655		return ret;
656
657	copy_page(entry->bitmap, io_ctl->cur);
658	io_ctl_unmap_page(io_ctl);
659
660	return 0;
661}
662
663static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
664{
665	struct btrfs_block_group *block_group = ctl->private;
666	u64 max_bytes;
667	u64 bitmap_bytes;
668	u64 extent_bytes;
669	u64 size = block_group->length;
670	u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
671	u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
672
673	max_bitmaps = max_t(u64, max_bitmaps, 1);
674
675	ASSERT(ctl->total_bitmaps <= max_bitmaps);
676
677	/*
678	 * We are trying to keep the total amount of memory used per 1GiB of
679	 * space to be MAX_CACHE_BYTES_PER_GIG.  However, with a reclamation
680	 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
681	 * bitmaps, we may end up using more memory than this.
682	 */
683	if (size < SZ_1G)
684		max_bytes = MAX_CACHE_BYTES_PER_GIG;
685	else
686		max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
687
688	bitmap_bytes = ctl->total_bitmaps * ctl->unit;
689
690	/*
691	 * we want the extent entry threshold to always be at most 1/2 the max
692	 * bytes we can have, or whatever is less than that.
693	 */
694	extent_bytes = max_bytes - bitmap_bytes;
695	extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
696
697	ctl->extents_thresh =
698		div_u64(extent_bytes, sizeof(struct btrfs_free_space));
699}
700
701static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
702				   struct btrfs_free_space_ctl *ctl,
703				   struct btrfs_path *path, u64 offset)
704{
705	struct btrfs_fs_info *fs_info = root->fs_info;
706	struct btrfs_free_space_header *header;
707	struct extent_buffer *leaf;
708	struct btrfs_io_ctl io_ctl;
709	struct btrfs_key key;
710	struct btrfs_free_space *e, *n;
711	LIST_HEAD(bitmaps);
712	u64 num_entries;
713	u64 num_bitmaps;
714	u64 generation;
715	u8 type;
716	int ret = 0;
717
718	/* Nothing in the space cache, goodbye */
719	if (!i_size_read(inode))
720		return 0;
721
722	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
723	key.offset = offset;
724	key.type = 0;
725
726	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
727	if (ret < 0)
728		return 0;
729	else if (ret > 0) {
730		btrfs_release_path(path);
731		return 0;
732	}
733
734	ret = -1;
735
736	leaf = path->nodes[0];
737	header = btrfs_item_ptr(leaf, path->slots[0],
738				struct btrfs_free_space_header);
739	num_entries = btrfs_free_space_entries(leaf, header);
740	num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
741	generation = btrfs_free_space_generation(leaf, header);
742	btrfs_release_path(path);
743
744	if (!BTRFS_I(inode)->generation) {
745		btrfs_info(fs_info,
746			   "the free space cache file (%llu) is invalid, skip it",
747			   offset);
748		return 0;
749	}
750
751	if (BTRFS_I(inode)->generation != generation) {
752		btrfs_err(fs_info,
753			  "free space inode generation (%llu) did not match free space cache generation (%llu)",
754			  BTRFS_I(inode)->generation, generation);
755		return 0;
756	}
757
758	if (!num_entries)
759		return 0;
760
761	ret = io_ctl_init(&io_ctl, inode, 0);
762	if (ret)
763		return ret;
764
765	readahead_cache(inode);
766
767	ret = io_ctl_prepare_pages(&io_ctl, true);
768	if (ret)
769		goto out;
770
771	ret = io_ctl_check_crc(&io_ctl, 0);
772	if (ret)
773		goto free_cache;
774
775	ret = io_ctl_check_generation(&io_ctl, generation);
776	if (ret)
777		goto free_cache;
778
779	while (num_entries) {
780		e = kmem_cache_zalloc(btrfs_free_space_cachep,
781				      GFP_NOFS);
782		if (!e) {
783			ret = -ENOMEM;
784			goto free_cache;
785		}
786
787		ret = io_ctl_read_entry(&io_ctl, e, &type);
788		if (ret) {
789			kmem_cache_free(btrfs_free_space_cachep, e);
790			goto free_cache;
791		}
792
793		if (!e->bytes) {
794			ret = -1;
795			kmem_cache_free(btrfs_free_space_cachep, e);
796			goto free_cache;
797		}
798
799		if (type == BTRFS_FREE_SPACE_EXTENT) {
800			spin_lock(&ctl->tree_lock);
801			ret = link_free_space(ctl, e);
802			spin_unlock(&ctl->tree_lock);
803			if (ret) {
804				btrfs_err(fs_info,
805					"Duplicate entries in free space cache, dumping");
806				kmem_cache_free(btrfs_free_space_cachep, e);
807				goto free_cache;
808			}
809		} else {
810			ASSERT(num_bitmaps);
811			num_bitmaps--;
812			e->bitmap = kmem_cache_zalloc(
813					btrfs_free_space_bitmap_cachep, GFP_NOFS);
814			if (!e->bitmap) {
815				ret = -ENOMEM;
816				kmem_cache_free(
817					btrfs_free_space_cachep, e);
818				goto free_cache;
819			}
820			spin_lock(&ctl->tree_lock);
821			ret = link_free_space(ctl, e);
822			ctl->total_bitmaps++;
823			recalculate_thresholds(ctl);
824			spin_unlock(&ctl->tree_lock);
825			if (ret) {
826				btrfs_err(fs_info,
827					"Duplicate entries in free space cache, dumping");
828				kmem_cache_free(btrfs_free_space_cachep, e);
829				goto free_cache;
830			}
831			list_add_tail(&e->list, &bitmaps);
832		}
833
834		num_entries--;
835	}
836
837	io_ctl_unmap_page(&io_ctl);
838
839	/*
840	 * We add the bitmaps at the end of the entries in order that
841	 * the bitmap entries are added to the cache.
842	 */
843	list_for_each_entry_safe(e, n, &bitmaps, list) {
844		list_del_init(&e->list);
845		ret = io_ctl_read_bitmap(&io_ctl, e);
846		if (ret)
847			goto free_cache;
848	}
849
850	io_ctl_drop_pages(&io_ctl);
851	ret = 1;
852out:
853	io_ctl_free(&io_ctl);
854	return ret;
855free_cache:
856	io_ctl_drop_pages(&io_ctl);
857	__btrfs_remove_free_space_cache(ctl);
858	goto out;
859}
860
861static int copy_free_space_cache(struct btrfs_block_group *block_group,
862				 struct btrfs_free_space_ctl *ctl)
863{
864	struct btrfs_free_space *info;
865	struct rb_node *n;
866	int ret = 0;
867
868	while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
869		info = rb_entry(n, struct btrfs_free_space, offset_index);
870		if (!info->bitmap) {
871			unlink_free_space(ctl, info);
872			ret = btrfs_add_free_space(block_group, info->offset,
873						   info->bytes);
874			kmem_cache_free(btrfs_free_space_cachep, info);
875		} else {
876			u64 offset = info->offset;
877			u64 bytes = ctl->unit;
878
879			while (search_bitmap(ctl, info, &offset, &bytes,
880					     false) == 0) {
881				ret = btrfs_add_free_space(block_group, offset,
882							   bytes);
883				if (ret)
884					break;
885				bitmap_clear_bits(ctl, info, offset, bytes);
886				offset = info->offset;
887				bytes = ctl->unit;
888			}
889			free_bitmap(ctl, info);
890		}
891		cond_resched();
892	}
893	return ret;
894}
895
896int load_free_space_cache(struct btrfs_block_group *block_group)
897{
898	struct btrfs_fs_info *fs_info = block_group->fs_info;
899	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
900	struct btrfs_free_space_ctl tmp_ctl = {};
901	struct inode *inode;
902	struct btrfs_path *path;
903	int ret = 0;
904	bool matched;
905	u64 used = block_group->used;
906
907	/*
908	 * Because we could potentially discard our loaded free space, we want
909	 * to load everything into a temporary structure first, and then if it's
910	 * valid copy it all into the actual free space ctl.
911	 */
912	btrfs_init_free_space_ctl(block_group, &tmp_ctl);
913
914	/*
915	 * If this block group has been marked to be cleared for one reason or
916	 * another then we can't trust the on disk cache, so just return.
917	 */
918	spin_lock(&block_group->lock);
919	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
920		spin_unlock(&block_group->lock);
921		return 0;
922	}
923	spin_unlock(&block_group->lock);
924
925	path = btrfs_alloc_path();
926	if (!path)
927		return 0;
928	path->search_commit_root = 1;
929	path->skip_locking = 1;
930
931	/*
932	 * We must pass a path with search_commit_root set to btrfs_iget in
933	 * order to avoid a deadlock when allocating extents for the tree root.
934	 *
935	 * When we are COWing an extent buffer from the tree root, when looking
936	 * for a free extent, at extent-tree.c:find_free_extent(), we can find
937	 * block group without its free space cache loaded. When we find one
938	 * we must load its space cache which requires reading its free space
939	 * cache's inode item from the root tree. If this inode item is located
940	 * in the same leaf that we started COWing before, then we end up in
941	 * deadlock on the extent buffer (trying to read lock it when we
942	 * previously write locked it).
943	 *
944	 * It's safe to read the inode item using the commit root because
945	 * block groups, once loaded, stay in memory forever (until they are
946	 * removed) as well as their space caches once loaded. New block groups
947	 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
948	 * we will never try to read their inode item while the fs is mounted.
949	 */
950	inode = lookup_free_space_inode(block_group, path);
951	if (IS_ERR(inode)) {
952		btrfs_free_path(path);
953		return 0;
954	}
955
956	/* We may have converted the inode and made the cache invalid. */
957	spin_lock(&block_group->lock);
958	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
959		spin_unlock(&block_group->lock);
960		btrfs_free_path(path);
961		goto out;
962	}
963	spin_unlock(&block_group->lock);
964
965	ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
966				      path, block_group->start);
967	btrfs_free_path(path);
968	if (ret <= 0)
969		goto out;
970
971	matched = (tmp_ctl.free_space == (block_group->length - used -
972					  block_group->bytes_super));
973
974	if (matched) {
975		ret = copy_free_space_cache(block_group, &tmp_ctl);
976		/*
977		 * ret == 1 means we successfully loaded the free space cache,
978		 * so we need to re-set it here.
979		 */
980		if (ret == 0)
981			ret = 1;
982	} else {
983		__btrfs_remove_free_space_cache(&tmp_ctl);
984		btrfs_warn(fs_info,
985			   "block group %llu has wrong amount of free space",
986			   block_group->start);
987		ret = -1;
988	}
989out:
990	if (ret < 0) {
991		/* This cache is bogus, make sure it gets cleared */
992		spin_lock(&block_group->lock);
993		block_group->disk_cache_state = BTRFS_DC_CLEAR;
994		spin_unlock(&block_group->lock);
995		ret = 0;
996
997		btrfs_warn(fs_info,
998			   "failed to load free space cache for block group %llu, rebuilding it now",
999			   block_group->start);
1000	}
1001
1002	spin_lock(&ctl->tree_lock);
1003	btrfs_discard_update_discardable(block_group);
1004	spin_unlock(&ctl->tree_lock);
1005	iput(inode);
1006	return ret;
1007}
1008
1009static noinline_for_stack
1010int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
1011			      struct btrfs_free_space_ctl *ctl,
1012			      struct btrfs_block_group *block_group,
1013			      int *entries, int *bitmaps,
1014			      struct list_head *bitmap_list)
1015{
1016	int ret;
1017	struct btrfs_free_cluster *cluster = NULL;
1018	struct btrfs_free_cluster *cluster_locked = NULL;
1019	struct rb_node *node = rb_first(&ctl->free_space_offset);
1020	struct btrfs_trim_range *trim_entry;
1021
1022	/* Get the cluster for this block_group if it exists */
1023	if (block_group && !list_empty(&block_group->cluster_list)) {
1024		cluster = list_entry(block_group->cluster_list.next,
1025				     struct btrfs_free_cluster,
1026				     block_group_list);
1027	}
1028
1029	if (!node && cluster) {
1030		cluster_locked = cluster;
1031		spin_lock(&cluster_locked->lock);
1032		node = rb_first(&cluster->root);
1033		cluster = NULL;
1034	}
1035
1036	/* Write out the extent entries */
1037	while (node) {
1038		struct btrfs_free_space *e;
1039
1040		e = rb_entry(node, struct btrfs_free_space, offset_index);
1041		*entries += 1;
1042
1043		ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes,
1044				       e->bitmap);
1045		if (ret)
1046			goto fail;
1047
1048		if (e->bitmap) {
1049			list_add_tail(&e->list, bitmap_list);
1050			*bitmaps += 1;
1051		}
1052		node = rb_next(node);
1053		if (!node && cluster) {
1054			node = rb_first(&cluster->root);
1055			cluster_locked = cluster;
1056			spin_lock(&cluster_locked->lock);
1057			cluster = NULL;
1058		}
1059	}
1060	if (cluster_locked) {
1061		spin_unlock(&cluster_locked->lock);
1062		cluster_locked = NULL;
1063	}
1064
1065	/*
1066	 * Make sure we don't miss any range that was removed from our rbtree
1067	 * because trimming is running. Otherwise after a umount+mount (or crash
1068	 * after committing the transaction) we would leak free space and get
1069	 * an inconsistent free space cache report from fsck.
1070	 */
1071	list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
1072		ret = io_ctl_add_entry(io_ctl, trim_entry->start,
1073				       trim_entry->bytes, NULL);
1074		if (ret)
1075			goto fail;
1076		*entries += 1;
1077	}
1078
1079	return 0;
1080fail:
1081	if (cluster_locked)
1082		spin_unlock(&cluster_locked->lock);
1083	return -ENOSPC;
1084}
1085
1086static noinline_for_stack int
1087update_cache_item(struct btrfs_trans_handle *trans,
1088		  struct btrfs_root *root,
1089		  struct inode *inode,
1090		  struct btrfs_path *path, u64 offset,
1091		  int entries, int bitmaps)
1092{
1093	struct btrfs_key key;
1094	struct btrfs_free_space_header *header;
1095	struct extent_buffer *leaf;
1096	int ret;
1097
1098	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
1099	key.offset = offset;
1100	key.type = 0;
1101
1102	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1103	if (ret < 0) {
1104		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1105				 EXTENT_DELALLOC, 0, 0, NULL);
1106		goto fail;
1107	}
1108	leaf = path->nodes[0];
1109	if (ret > 0) {
1110		struct btrfs_key found_key;
1111		ASSERT(path->slots[0]);
1112		path->slots[0]--;
1113		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1114		if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1115		    found_key.offset != offset) {
1116			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1117					 inode->i_size - 1, EXTENT_DELALLOC, 0,
1118					 0, NULL);
1119			btrfs_release_path(path);
1120			goto fail;
1121		}
1122	}
1123
1124	BTRFS_I(inode)->generation = trans->transid;
1125	header = btrfs_item_ptr(leaf, path->slots[0],
1126				struct btrfs_free_space_header);
1127	btrfs_set_free_space_entries(leaf, header, entries);
1128	btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1129	btrfs_set_free_space_generation(leaf, header, trans->transid);
1130	btrfs_mark_buffer_dirty(leaf);
1131	btrfs_release_path(path);
1132
1133	return 0;
1134
1135fail:
1136	return -1;
1137}
1138
1139static noinline_for_stack int write_pinned_extent_entries(
1140			    struct btrfs_trans_handle *trans,
1141			    struct btrfs_block_group *block_group,
1142			    struct btrfs_io_ctl *io_ctl,
1143			    int *entries)
1144{
1145	u64 start, extent_start, extent_end, len;
1146	struct extent_io_tree *unpin = NULL;
1147	int ret;
1148
1149	if (!block_group)
1150		return 0;
1151
1152	/*
1153	 * We want to add any pinned extents to our free space cache
1154	 * so we don't leak the space
1155	 *
1156	 * We shouldn't have switched the pinned extents yet so this is the
1157	 * right one
1158	 */
1159	unpin = &trans->transaction->pinned_extents;
1160
1161	start = block_group->start;
1162
1163	while (start < block_group->start + block_group->length) {
1164		ret = find_first_extent_bit(unpin, start,
1165					    &extent_start, &extent_end,
1166					    EXTENT_DIRTY, NULL);
1167		if (ret)
1168			return 0;
1169
1170		/* This pinned extent is out of our range */
1171		if (extent_start >= block_group->start + block_group->length)
1172			return 0;
1173
1174		extent_start = max(extent_start, start);
1175		extent_end = min(block_group->start + block_group->length,
1176				 extent_end + 1);
1177		len = extent_end - extent_start;
1178
1179		*entries += 1;
1180		ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
1181		if (ret)
1182			return -ENOSPC;
1183
1184		start = extent_end;
1185	}
1186
1187	return 0;
1188}
1189
1190static noinline_for_stack int
1191write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
1192{
1193	struct btrfs_free_space *entry, *next;
1194	int ret;
1195
1196	/* Write out the bitmaps */
1197	list_for_each_entry_safe(entry, next, bitmap_list, list) {
1198		ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
1199		if (ret)
1200			return -ENOSPC;
1201		list_del_init(&entry->list);
1202	}
1203
1204	return 0;
1205}
1206
1207static int flush_dirty_cache(struct inode *inode)
1208{
1209	int ret;
1210
1211	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
1212	if (ret)
1213		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1214				 EXTENT_DELALLOC, 0, 0, NULL);
1215
1216	return ret;
1217}
1218
1219static void noinline_for_stack
1220cleanup_bitmap_list(struct list_head *bitmap_list)
1221{
1222	struct btrfs_free_space *entry, *next;
1223
1224	list_for_each_entry_safe(entry, next, bitmap_list, list)
1225		list_del_init(&entry->list);
1226}
1227
1228static void noinline_for_stack
1229cleanup_write_cache_enospc(struct inode *inode,
1230			   struct btrfs_io_ctl *io_ctl,
1231			   struct extent_state **cached_state)
1232{
1233	io_ctl_drop_pages(io_ctl);
1234	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1235			     i_size_read(inode) - 1, cached_state);
1236}
1237
1238static int __btrfs_wait_cache_io(struct btrfs_root *root,
1239				 struct btrfs_trans_handle *trans,
1240				 struct btrfs_block_group *block_group,
1241				 struct btrfs_io_ctl *io_ctl,
1242				 struct btrfs_path *path, u64 offset)
1243{
1244	int ret;
1245	struct inode *inode = io_ctl->inode;
1246
1247	if (!inode)
1248		return 0;
1249
1250	/* Flush the dirty pages in the cache file. */
1251	ret = flush_dirty_cache(inode);
1252	if (ret)
1253		goto out;
1254
1255	/* Update the cache item to tell everyone this cache file is valid. */
1256	ret = update_cache_item(trans, root, inode, path, offset,
1257				io_ctl->entries, io_ctl->bitmaps);
1258out:
1259	if (ret) {
1260		invalidate_inode_pages2(inode->i_mapping);
1261		BTRFS_I(inode)->generation = 0;
1262		if (block_group)
1263			btrfs_debug(root->fs_info,
1264	  "failed to write free space cache for block group %llu error %d",
1265				  block_group->start, ret);
1266	}
1267	btrfs_update_inode(trans, root, BTRFS_I(inode));
1268
1269	if (block_group) {
1270		/* the dirty list is protected by the dirty_bgs_lock */
1271		spin_lock(&trans->transaction->dirty_bgs_lock);
1272
1273		/* the disk_cache_state is protected by the block group lock */
1274		spin_lock(&block_group->lock);
1275
1276		/*
1277		 * only mark this as written if we didn't get put back on
1278		 * the dirty list while waiting for IO.   Otherwise our
1279		 * cache state won't be right, and we won't get written again
1280		 */
1281		if (!ret && list_empty(&block_group->dirty_list))
1282			block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1283		else if (ret)
1284			block_group->disk_cache_state = BTRFS_DC_ERROR;
1285
1286		spin_unlock(&block_group->lock);
1287		spin_unlock(&trans->transaction->dirty_bgs_lock);
1288		io_ctl->inode = NULL;
1289		iput(inode);
1290	}
1291
1292	return ret;
1293
1294}
1295
1296int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
1297			struct btrfs_block_group *block_group,
1298			struct btrfs_path *path)
1299{
1300	return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
1301				     block_group, &block_group->io_ctl,
1302				     path, block_group->start);
1303}
1304
1305/**
1306 * Write out cached info to an inode
1307 *
1308 * @root:        root the inode belongs to
1309 * @inode:       freespace inode we are writing out
1310 * @ctl:         free space cache we are going to write out
1311 * @block_group: block_group for this cache if it belongs to a block_group
1312 * @io_ctl:      holds context for the io
1313 * @trans:       the trans handle
1314 *
1315 * This function writes out a free space cache struct to disk for quick recovery
1316 * on mount.  This will return 0 if it was successful in writing the cache out,
1317 * or an errno if it was not.
1318 */
1319static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1320				   struct btrfs_free_space_ctl *ctl,
1321				   struct btrfs_block_group *block_group,
1322				   struct btrfs_io_ctl *io_ctl,
1323				   struct btrfs_trans_handle *trans)
1324{
1325	struct extent_state *cached_state = NULL;
1326	LIST_HEAD(bitmap_list);
1327	int entries = 0;
1328	int bitmaps = 0;
1329	int ret;
1330	int must_iput = 0;
1331
1332	if (!i_size_read(inode))
1333		return -EIO;
1334
1335	WARN_ON(io_ctl->pages);
1336	ret = io_ctl_init(io_ctl, inode, 1);
1337	if (ret)
1338		return ret;
1339
1340	if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1341		down_write(&block_group->data_rwsem);
1342		spin_lock(&block_group->lock);
1343		if (block_group->delalloc_bytes) {
1344			block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1345			spin_unlock(&block_group->lock);
1346			up_write(&block_group->data_rwsem);
1347			BTRFS_I(inode)->generation = 0;
1348			ret = 0;
1349			must_iput = 1;
1350			goto out;
1351		}
1352		spin_unlock(&block_group->lock);
1353	}
1354
1355	/* Lock all pages first so we can lock the extent safely. */
1356	ret = io_ctl_prepare_pages(io_ctl, false);
1357	if (ret)
1358		goto out_unlock;
1359
1360	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1361			 &cached_state);
1362
1363	io_ctl_set_generation(io_ctl, trans->transid);
1364
1365	mutex_lock(&ctl->cache_writeout_mutex);
1366	/* Write out the extent entries in the free space cache */
1367	spin_lock(&ctl->tree_lock);
1368	ret = write_cache_extent_entries(io_ctl, ctl,
1369					 block_group, &entries, &bitmaps,
1370					 &bitmap_list);
1371	if (ret)
1372		goto out_nospc_locked;
1373
1374	/*
1375	 * Some spaces that are freed in the current transaction are pinned,
1376	 * they will be added into free space cache after the transaction is
1377	 * committed, we shouldn't lose them.
1378	 *
1379	 * If this changes while we are working we'll get added back to
1380	 * the dirty list and redo it.  No locking needed
1381	 */
1382	ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
1383	if (ret)
1384		goto out_nospc_locked;
1385
1386	/*
1387	 * At last, we write out all the bitmaps and keep cache_writeout_mutex
1388	 * locked while doing it because a concurrent trim can be manipulating
1389	 * or freeing the bitmap.
1390	 */
1391	ret = write_bitmap_entries(io_ctl, &bitmap_list);
1392	spin_unlock(&ctl->tree_lock);
1393	mutex_unlock(&ctl->cache_writeout_mutex);
1394	if (ret)
1395		goto out_nospc;
1396
1397	/* Zero out the rest of the pages just to make sure */
1398	io_ctl_zero_remaining_pages(io_ctl);
1399
1400	/* Everything is written out, now we dirty the pages in the file. */
1401	ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
1402				io_ctl->num_pages, 0, i_size_read(inode),
1403				&cached_state, false);
1404	if (ret)
1405		goto out_nospc;
1406
1407	if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1408		up_write(&block_group->data_rwsem);
1409	/*
1410	 * Release the pages and unlock the extent, we will flush
1411	 * them out later
1412	 */
1413	io_ctl_drop_pages(io_ctl);
1414	io_ctl_free(io_ctl);
1415
1416	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1417			     i_size_read(inode) - 1, &cached_state);
1418
1419	/*
1420	 * at this point the pages are under IO and we're happy,
1421	 * The caller is responsible for waiting on them and updating
1422	 * the cache and the inode
1423	 */
1424	io_ctl->entries = entries;
1425	io_ctl->bitmaps = bitmaps;
1426
1427	ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
1428	if (ret)
1429		goto out;
1430
1431	return 0;
1432
1433out_nospc_locked:
1434	cleanup_bitmap_list(&bitmap_list);
1435	spin_unlock(&ctl->tree_lock);
1436	mutex_unlock(&ctl->cache_writeout_mutex);
1437
1438out_nospc:
1439	cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
1440
1441out_unlock:
1442	if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1443		up_write(&block_group->data_rwsem);
1444
1445out:
1446	io_ctl->inode = NULL;
1447	io_ctl_free(io_ctl);
1448	if (ret) {
1449		invalidate_inode_pages2(inode->i_mapping);
1450		BTRFS_I(inode)->generation = 0;
1451	}
1452	btrfs_update_inode(trans, root, BTRFS_I(inode));
1453	if (must_iput)
1454		iput(inode);
1455	return ret;
1456}
1457
1458int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
1459			  struct btrfs_block_group *block_group,
1460			  struct btrfs_path *path)
1461{
1462	struct btrfs_fs_info *fs_info = trans->fs_info;
1463	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1464	struct inode *inode;
1465	int ret = 0;
1466
1467	spin_lock(&block_group->lock);
1468	if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1469		spin_unlock(&block_group->lock);
1470		return 0;
1471	}
1472	spin_unlock(&block_group->lock);
1473
1474	inode = lookup_free_space_inode(block_group, path);
1475	if (IS_ERR(inode))
1476		return 0;
1477
1478	ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
1479				block_group, &block_group->io_ctl, trans);
1480	if (ret) {
1481		btrfs_debug(fs_info,
1482	  "failed to write free space cache for block group %llu error %d",
1483			  block_group->start, ret);
1484		spin_lock(&block_group->lock);
1485		block_group->disk_cache_state = BTRFS_DC_ERROR;
1486		spin_unlock(&block_group->lock);
1487
1488		block_group->io_ctl.inode = NULL;
1489		iput(inode);
1490	}
1491
1492	/*
1493	 * if ret == 0 the caller is expected to call btrfs_wait_cache_io
1494	 * to wait for IO and put the inode
1495	 */
1496
1497	return ret;
1498}
1499
1500static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1501					  u64 offset)
1502{
1503	ASSERT(offset >= bitmap_start);
1504	offset -= bitmap_start;
1505	return (unsigned long)(div_u64(offset, unit));
1506}
1507
1508static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1509{
1510	return (unsigned long)(div_u64(bytes, unit));
1511}
1512
1513static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1514				   u64 offset)
1515{
1516	u64 bitmap_start;
1517	u64 bytes_per_bitmap;
1518
1519	bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1520	bitmap_start = offset - ctl->start;
1521	bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1522	bitmap_start *= bytes_per_bitmap;
1523	bitmap_start += ctl->start;
1524
1525	return bitmap_start;
1526}
1527
1528static int tree_insert_offset(struct rb_root *root, u64 offset,
1529			      struct rb_node *node, int bitmap)
1530{
1531	struct rb_node **p = &root->rb_node;
1532	struct rb_node *parent = NULL;
1533	struct btrfs_free_space *info;
1534
1535	while (*p) {
1536		parent = *p;
1537		info = rb_entry(parent, struct btrfs_free_space, offset_index);
1538
1539		if (offset < info->offset) {
1540			p = &(*p)->rb_left;
1541		} else if (offset > info->offset) {
1542			p = &(*p)->rb_right;
1543		} else {
1544			/*
1545			 * we could have a bitmap entry and an extent entry
1546			 * share the same offset.  If this is the case, we want
1547			 * the extent entry to always be found first if we do a
1548			 * linear search through the tree, since we want to have
1549			 * the quickest allocation time, and allocating from an
1550			 * extent is faster than allocating from a bitmap.  So
1551			 * if we're inserting a bitmap and we find an entry at
1552			 * this offset, we want to go right, or after this entry
1553			 * logically.  If we are inserting an extent and we've
1554			 * found a bitmap, we want to go left, or before
1555			 * logically.
1556			 */
1557			if (bitmap) {
1558				if (info->bitmap) {
1559					WARN_ON_ONCE(1);
1560					return -EEXIST;
1561				}
1562				p = &(*p)->rb_right;
1563			} else {
1564				if (!info->bitmap) {
1565					WARN_ON_ONCE(1);
1566					return -EEXIST;
1567				}
1568				p = &(*p)->rb_left;
1569			}
1570		}
1571	}
1572
1573	rb_link_node(node, parent, p);
1574	rb_insert_color(node, root);
1575
1576	return 0;
1577}
1578
1579/*
1580 * searches the tree for the given offset.
1581 *
1582 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1583 * want a section that has at least bytes size and comes at or after the given
1584 * offset.
1585 */
1586static struct btrfs_free_space *
1587tree_search_offset(struct btrfs_free_space_ctl *ctl,
1588		   u64 offset, int bitmap_only, int fuzzy)
1589{
1590	struct rb_node *n = ctl->free_space_offset.rb_node;
1591	struct btrfs_free_space *entry, *prev = NULL;
1592
1593	/* find entry that is closest to the 'offset' */
1594	while (1) {
1595		if (!n) {
1596			entry = NULL;
1597			break;
1598		}
1599
1600		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1601		prev = entry;
1602
1603		if (offset < entry->offset)
1604			n = n->rb_left;
1605		else if (offset > entry->offset)
1606			n = n->rb_right;
1607		else
1608			break;
1609	}
1610
1611	if (bitmap_only) {
1612		if (!entry)
1613			return NULL;
1614		if (entry->bitmap)
1615			return entry;
1616
1617		/*
1618		 * bitmap entry and extent entry may share same offset,
1619		 * in that case, bitmap entry comes after extent entry.
1620		 */
1621		n = rb_next(n);
1622		if (!n)
1623			return NULL;
1624		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1625		if (entry->offset != offset)
1626			return NULL;
1627
1628		WARN_ON(!entry->bitmap);
1629		return entry;
1630	} else if (entry) {
1631		if (entry->bitmap) {
1632			/*
1633			 * if previous extent entry covers the offset,
1634			 * we should return it instead of the bitmap entry
1635			 */
1636			n = rb_prev(&entry->offset_index);
1637			if (n) {
1638				prev = rb_entry(n, struct btrfs_free_space,
1639						offset_index);
1640				if (!prev->bitmap &&
1641				    prev->offset + prev->bytes > offset)
1642					entry = prev;
1643			}
1644		}
1645		return entry;
1646	}
1647
1648	if (!prev)
1649		return NULL;
1650
1651	/* find last entry before the 'offset' */
1652	entry = prev;
1653	if (entry->offset > offset) {
1654		n = rb_prev(&entry->offset_index);
1655		if (n) {
1656			entry = rb_entry(n, struct btrfs_free_space,
1657					offset_index);
1658			ASSERT(entry->offset <= offset);
1659		} else {
1660			if (fuzzy)
1661				return entry;
1662			else
1663				return NULL;
1664		}
1665	}
1666
1667	if (entry->bitmap) {
1668		n = rb_prev(&entry->offset_index);
1669		if (n) {
1670			prev = rb_entry(n, struct btrfs_free_space,
1671					offset_index);
1672			if (!prev->bitmap &&
1673			    prev->offset + prev->bytes > offset)
1674				return prev;
1675		}
1676		if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1677			return entry;
1678	} else if (entry->offset + entry->bytes > offset)
1679		return entry;
1680
1681	if (!fuzzy)
1682		return NULL;
1683
1684	while (1) {
1685		if (entry->bitmap) {
1686			if (entry->offset + BITS_PER_BITMAP *
1687			    ctl->unit > offset)
1688				break;
1689		} else {
1690			if (entry->offset + entry->bytes > offset)
1691				break;
1692		}
1693
1694		n = rb_next(&entry->offset_index);
1695		if (!n)
1696			return NULL;
1697		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1698	}
1699	return entry;
1700}
1701
1702static inline void
1703__unlink_free_space(struct btrfs_free_space_ctl *ctl,
1704		    struct btrfs_free_space *info)
1705{
1706	rb_erase(&info->offset_index, &ctl->free_space_offset);
1707	ctl->free_extents--;
1708
1709	if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1710		ctl->discardable_extents[BTRFS_STAT_CURR]--;
1711		ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
1712	}
1713}
1714
1715static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1716			      struct btrfs_free_space *info)
1717{
1718	__unlink_free_space(ctl, info);
1719	ctl->free_space -= info->bytes;
1720}
1721
1722static int link_free_space(struct btrfs_free_space_ctl *ctl,
1723			   struct btrfs_free_space *info)
1724{
1725	int ret = 0;
1726
1727	ASSERT(info->bytes || info->bitmap);
1728	ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1729				 &info->offset_index, (info->bitmap != NULL));
1730	if (ret)
1731		return ret;
1732
1733	if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1734		ctl->discardable_extents[BTRFS_STAT_CURR]++;
1735		ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
1736	}
1737
1738	ctl->free_space += info->bytes;
1739	ctl->free_extents++;
1740	return ret;
1741}
1742
1743static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1744				       struct btrfs_free_space *info,
1745				       u64 offset, u64 bytes)
1746{
1747	unsigned long start, count, end;
1748	int extent_delta = -1;
1749
1750	start = offset_to_bit(info->offset, ctl->unit, offset);
1751	count = bytes_to_bits(bytes, ctl->unit);
1752	end = start + count;
1753	ASSERT(end <= BITS_PER_BITMAP);
1754
1755	bitmap_clear(info->bitmap, start, count);
1756
1757	info->bytes -= bytes;
1758	if (info->max_extent_size > ctl->unit)
1759		info->max_extent_size = 0;
1760
1761	if (start && test_bit(start - 1, info->bitmap))
1762		extent_delta++;
1763
1764	if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1765		extent_delta++;
1766
1767	info->bitmap_extents += extent_delta;
1768	if (!btrfs_free_space_trimmed(info)) {
1769		ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1770		ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
1771	}
1772}
1773
1774static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1775			      struct btrfs_free_space *info, u64 offset,
1776			      u64 bytes)
1777{
1778	__bitmap_clear_bits(ctl, info, offset, bytes);
1779	ctl->free_space -= bytes;
1780}
1781
1782static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1783			    struct btrfs_free_space *info, u64 offset,
1784			    u64 bytes)
1785{
1786	unsigned long start, count, end;
1787	int extent_delta = 1;
1788
1789	start = offset_to_bit(info->offset, ctl->unit, offset);
1790	count = bytes_to_bits(bytes, ctl->unit);
1791	end = start + count;
1792	ASSERT(end <= BITS_PER_BITMAP);
1793
1794	bitmap_set(info->bitmap, start, count);
1795
1796	info->bytes += bytes;
1797	ctl->free_space += bytes;
1798
1799	if (start && test_bit(start - 1, info->bitmap))
1800		extent_delta--;
1801
1802	if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1803		extent_delta--;
1804
1805	info->bitmap_extents += extent_delta;
1806	if (!btrfs_free_space_trimmed(info)) {
1807		ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1808		ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
1809	}
1810}
1811
1812/*
1813 * If we can not find suitable extent, we will use bytes to record
1814 * the size of the max extent.
1815 */
1816static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1817			 struct btrfs_free_space *bitmap_info, u64 *offset,
1818			 u64 *bytes, bool for_alloc)
1819{
1820	unsigned long found_bits = 0;
1821	unsigned long max_bits = 0;
1822	unsigned long bits, i;
1823	unsigned long next_zero;
1824	unsigned long extent_bits;
1825
1826	/*
1827	 * Skip searching the bitmap if we don't have a contiguous section that
1828	 * is large enough for this allocation.
1829	 */
1830	if (for_alloc &&
1831	    bitmap_info->max_extent_size &&
1832	    bitmap_info->max_extent_size < *bytes) {
1833		*bytes = bitmap_info->max_extent_size;
1834		return -1;
1835	}
1836
1837	i = offset_to_bit(bitmap_info->offset, ctl->unit,
1838			  max_t(u64, *offset, bitmap_info->offset));
1839	bits = bytes_to_bits(*bytes, ctl->unit);
1840
1841	for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1842		if (for_alloc && bits == 1) {
1843			found_bits = 1;
1844			break;
1845		}
1846		next_zero = find_next_zero_bit(bitmap_info->bitmap,
1847					       BITS_PER_BITMAP, i);
1848		extent_bits = next_zero - i;
1849		if (extent_bits >= bits) {
1850			found_bits = extent_bits;
1851			break;
1852		} else if (extent_bits > max_bits) {
1853			max_bits = extent_bits;
1854		}
1855		i = next_zero;
1856	}
1857
1858	if (found_bits) {
1859		*offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1860		*bytes = (u64)(found_bits) * ctl->unit;
1861		return 0;
1862	}
1863
1864	*bytes = (u64)(max_bits) * ctl->unit;
1865	bitmap_info->max_extent_size = *bytes;
1866	return -1;
1867}
1868
1869static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
1870{
1871	if (entry->bitmap)
1872		return entry->max_extent_size;
1873	return entry->bytes;
1874}
1875
1876/* Cache the size of the max extent in bytes */
1877static struct btrfs_free_space *
1878find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1879		unsigned long align, u64 *max_extent_size)
1880{
1881	struct btrfs_free_space *entry;
1882	struct rb_node *node;
1883	u64 tmp;
1884	u64 align_off;
1885	int ret;
1886
1887	if (!ctl->free_space_offset.rb_node)
1888		goto out;
1889
1890	entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1891	if (!entry)
1892		goto out;
1893
1894	for (node = &entry->offset_index; node; node = rb_next(node)) {
1895		entry = rb_entry(node, struct btrfs_free_space, offset_index);
1896		if (entry->bytes < *bytes) {
1897			*max_extent_size = max(get_max_extent_size(entry),
1898					       *max_extent_size);
1899			continue;
1900		}
1901
1902		/* make sure the space returned is big enough
1903		 * to match our requested alignment
1904		 */
1905		if (*bytes >= align) {
1906			tmp = entry->offset - ctl->start + align - 1;
1907			tmp = div64_u64(tmp, align);
1908			tmp = tmp * align + ctl->start;
1909			align_off = tmp - entry->offset;
1910		} else {
1911			align_off = 0;
1912			tmp = entry->offset;
1913		}
1914
1915		if (entry->bytes < *bytes + align_off) {
1916			*max_extent_size = max(get_max_extent_size(entry),
1917					       *max_extent_size);
1918			continue;
1919		}
1920
1921		if (entry->bitmap) {
1922			u64 size = *bytes;
1923
1924			ret = search_bitmap(ctl, entry, &tmp, &size, true);
1925			if (!ret) {
1926				*offset = tmp;
1927				*bytes = size;
1928				return entry;
1929			} else {
1930				*max_extent_size =
1931					max(get_max_extent_size(entry),
1932					    *max_extent_size);
1933			}
1934			continue;
1935		}
1936
1937		*offset = tmp;
1938		*bytes = entry->bytes - align_off;
1939		return entry;
1940	}
1941out:
1942	return NULL;
1943}
1944
1945static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1946			   struct btrfs_free_space *info, u64 offset)
1947{
1948	info->offset = offset_to_bitmap(ctl, offset);
1949	info->bytes = 0;
1950	info->bitmap_extents = 0;
1951	INIT_LIST_HEAD(&info->list);
1952	link_free_space(ctl, info);
1953	ctl->total_bitmaps++;
1954	recalculate_thresholds(ctl);
1955}
1956
1957static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1958			struct btrfs_free_space *bitmap_info)
1959{
1960	/*
1961	 * Normally when this is called, the bitmap is completely empty. However,
1962	 * if we are blowing up the free space cache for one reason or another
1963	 * via __btrfs_remove_free_space_cache(), then it may not be freed and
1964	 * we may leave stats on the table.
1965	 */
1966	if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
1967		ctl->discardable_extents[BTRFS_STAT_CURR] -=
1968			bitmap_info->bitmap_extents;
1969		ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
1970
1971	}
1972	unlink_free_space(ctl, bitmap_info);
1973	kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
1974	kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1975	ctl->total_bitmaps--;
1976	recalculate_thresholds(ctl);
1977}
1978
1979static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1980			      struct btrfs_free_space *bitmap_info,
1981			      u64 *offset, u64 *bytes)
1982{
1983	u64 end;
1984	u64 search_start, search_bytes;
1985	int ret;
1986
1987again:
1988	end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1989
1990	/*
1991	 * We need to search for bits in this bitmap.  We could only cover some
1992	 * of the extent in this bitmap thanks to how we add space, so we need
1993	 * to search for as much as it as we can and clear that amount, and then
1994	 * go searching for the next bit.
1995	 */
1996	search_start = *offset;
1997	search_bytes = ctl->unit;
1998	search_bytes = min(search_bytes, end - search_start + 1);
1999	ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
2000			    false);
2001	if (ret < 0 || search_start != *offset)
2002		return -EINVAL;
2003
2004	/* We may have found more bits than what we need */
2005	search_bytes = min(search_bytes, *bytes);
2006
2007	/* Cannot clear past the end of the bitmap */
2008	search_bytes = min(search_bytes, end - search_start + 1);
2009
2010	bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
2011	*offset += search_bytes;
2012	*bytes -= search_bytes;
2013
2014	if (*bytes) {
2015		struct rb_node *next = rb_next(&bitmap_info->offset_index);
2016		if (!bitmap_info->bytes)
2017			free_bitmap(ctl, bitmap_info);
2018
2019		/*
2020		 * no entry after this bitmap, but we still have bytes to
2021		 * remove, so something has gone wrong.
2022		 */
2023		if (!next)
2024			return -EINVAL;
2025
2026		bitmap_info = rb_entry(next, struct btrfs_free_space,
2027				       offset_index);
2028
2029		/*
2030		 * if the next entry isn't a bitmap we need to return to let the
2031		 * extent stuff do its work.
2032		 */
2033		if (!bitmap_info->bitmap)
2034			return -EAGAIN;
2035
2036		/*
2037		 * Ok the next item is a bitmap, but it may not actually hold
2038		 * the information for the rest of this free space stuff, so
2039		 * look for it, and if we don't find it return so we can try
2040		 * everything over again.
2041		 */
2042		search_start = *offset;
2043		search_bytes = ctl->unit;
2044		ret = search_bitmap(ctl, bitmap_info, &search_start,
2045				    &search_bytes, false);
2046		if (ret < 0 || search_start != *offset)
2047			return -EAGAIN;
2048
2049		goto again;
2050	} else if (!bitmap_info->bytes)
2051		free_bitmap(ctl, bitmap_info);
2052
2053	return 0;
2054}
2055
2056static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
2057			       struct btrfs_free_space *info, u64 offset,
2058			       u64 bytes, enum btrfs_trim_state trim_state)
2059{
2060	u64 bytes_to_set = 0;
2061	u64 end;
2062
2063	/*
2064	 * This is a tradeoff to make bitmap trim state minimal.  We mark the
2065	 * whole bitmap untrimmed if at any point we add untrimmed regions.
2066	 */
2067	if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
2068		if (btrfs_free_space_trimmed(info)) {
2069			ctl->discardable_extents[BTRFS_STAT_CURR] +=
2070				info->bitmap_extents;
2071			ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
2072		}
2073		info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2074	}
2075
2076	end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
2077
2078	bytes_to_set = min(end - offset, bytes);
2079
2080	bitmap_set_bits(ctl, info, offset, bytes_to_set);
2081
2082	/*
2083	 * We set some bytes, we have no idea what the max extent size is
2084	 * anymore.
2085	 */
2086	info->max_extent_size = 0;
2087
2088	return bytes_to_set;
2089
2090}
2091
2092static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
2093		      struct btrfs_free_space *info)
2094{
2095	struct btrfs_block_group *block_group = ctl->private;
2096	struct btrfs_fs_info *fs_info = block_group->fs_info;
2097	bool forced = false;
2098
2099#ifdef CONFIG_BTRFS_DEBUG
2100	if (btrfs_should_fragment_free_space(block_group))
2101		forced = true;
2102#endif
2103
2104	/* This is a way to reclaim large regions from the bitmaps. */
2105	if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
2106		return false;
2107
2108	/*
2109	 * If we are below the extents threshold then we can add this as an
2110	 * extent, and don't have to deal with the bitmap
2111	 */
2112	if (!forced && ctl->free_extents < ctl->extents_thresh) {
2113		/*
2114		 * If this block group has some small extents we don't want to
2115		 * use up all of our free slots in the cache with them, we want
2116		 * to reserve them to larger extents, however if we have plenty
2117		 * of cache left then go ahead an dadd them, no sense in adding
2118		 * the overhead of a bitmap if we don't have to.
2119		 */
2120		if (info->bytes <= fs_info->sectorsize * 8) {
2121			if (ctl->free_extents * 3 <= ctl->extents_thresh)
2122				return false;
2123		} else {
2124			return false;
2125		}
2126	}
2127
2128	/*
2129	 * The original block groups from mkfs can be really small, like 8
2130	 * megabytes, so don't bother with a bitmap for those entries.  However
2131	 * some block groups can be smaller than what a bitmap would cover but
2132	 * are still large enough that they could overflow the 32k memory limit,
2133	 * so allow those block groups to still be allowed to have a bitmap
2134	 * entry.
2135	 */
2136	if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
2137		return false;
2138
2139	return true;
2140}
2141
2142static const struct btrfs_free_space_op free_space_op = {
2143	.use_bitmap		= use_bitmap,
2144};
2145
2146static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
2147			      struct btrfs_free_space *info)
2148{
2149	struct btrfs_free_space *bitmap_info;
2150	struct btrfs_block_group *block_group = NULL;
2151	int added = 0;
2152	u64 bytes, offset, bytes_added;
2153	enum btrfs_trim_state trim_state;
2154	int ret;
2155
2156	bytes = info->bytes;
2157	offset = info->offset;
2158	trim_state = info->trim_state;
2159
2160	if (!ctl->op->use_bitmap(ctl, info))
2161		return 0;
2162
2163	if (ctl->op == &free_space_op)
2164		block_group = ctl->private;
2165again:
2166	/*
2167	 * Since we link bitmaps right into the cluster we need to see if we
2168	 * have a cluster here, and if so and it has our bitmap we need to add
2169	 * the free space to that bitmap.
2170	 */
2171	if (block_group && !list_empty(&block_group->cluster_list)) {
2172		struct btrfs_free_cluster *cluster;
2173		struct rb_node *node;
2174		struct btrfs_free_space *entry;
2175
2176		cluster = list_entry(block_group->cluster_list.next,
2177				     struct btrfs_free_cluster,
2178				     block_group_list);
2179		spin_lock(&cluster->lock);
2180		node = rb_first(&cluster->root);
2181		if (!node) {
2182			spin_unlock(&cluster->lock);
2183			goto no_cluster_bitmap;
2184		}
2185
2186		entry = rb_entry(node, struct btrfs_free_space, offset_index);
2187		if (!entry->bitmap) {
2188			spin_unlock(&cluster->lock);
2189			goto no_cluster_bitmap;
2190		}
2191
2192		if (entry->offset == offset_to_bitmap(ctl, offset)) {
2193			bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
2194							  bytes, trim_state);
2195			bytes -= bytes_added;
2196			offset += bytes_added;
2197		}
2198		spin_unlock(&cluster->lock);
2199		if (!bytes) {
2200			ret = 1;
2201			goto out;
2202		}
2203	}
2204
2205no_cluster_bitmap:
2206	bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2207					 1, 0);
2208	if (!bitmap_info) {
2209		ASSERT(added == 0);
2210		goto new_bitmap;
2211	}
2212
2213	bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
2214					  trim_state);
2215	bytes -= bytes_added;
2216	offset += bytes_added;
2217	added = 0;
2218
2219	if (!bytes) {
2220		ret = 1;
2221		goto out;
2222	} else
2223		goto again;
2224
2225new_bitmap:
2226	if (info && info->bitmap) {
2227		add_new_bitmap(ctl, info, offset);
2228		added = 1;
2229		info = NULL;
2230		goto again;
2231	} else {
2232		spin_unlock(&ctl->tree_lock);
2233
2234		/* no pre-allocated info, allocate a new one */
2235		if (!info) {
2236			info = kmem_cache_zalloc(btrfs_free_space_cachep,
2237						 GFP_NOFS);
2238			if (!info) {
2239				spin_lock(&ctl->tree_lock);
2240				ret = -ENOMEM;
2241				goto out;
2242			}
2243		}
2244
2245		/* allocate the bitmap */
2246		info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
2247						 GFP_NOFS);
2248		info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
2249		spin_lock(&ctl->tree_lock);
2250		if (!info->bitmap) {
2251			ret = -ENOMEM;
2252			goto out;
2253		}
2254		goto again;
2255	}
2256
2257out:
2258	if (info) {
2259		if (info->bitmap)
2260			kmem_cache_free(btrfs_free_space_bitmap_cachep,
2261					info->bitmap);
2262		kmem_cache_free(btrfs_free_space_cachep, info);
2263	}
2264
2265	return ret;
2266}
2267
2268/*
2269 * Free space merging rules:
2270 *  1) Merge trimmed areas together
2271 *  2) Let untrimmed areas coalesce with trimmed areas
2272 *  3) Always pull neighboring regions from bitmaps
2273 *
2274 * The above rules are for when we merge free space based on btrfs_trim_state.
2275 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
2276 * same reason: to promote larger extent regions which makes life easier for
2277 * find_free_extent().  Rule 2 enables coalescing based on the common path
2278 * being returning free space from btrfs_finish_extent_commit().  So when free
2279 * space is trimmed, it will prevent aggregating trimmed new region and
2280 * untrimmed regions in the rb_tree.  Rule 3 is purely to obtain larger extents
2281 * and provide find_free_extent() with the largest extents possible hoping for
2282 * the reuse path.
2283 */
2284static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
2285			  struct btrfs_free_space *info, bool update_stat)
2286{
2287	struct btrfs_free_space *left_info = NULL;
2288	struct btrfs_free_space *right_info;
2289	bool merged = false;
2290	u64 offset = info->offset;
2291	u64 bytes = info->bytes;
2292	const bool is_trimmed = btrfs_free_space_trimmed(info);
2293
2294	/*
2295	 * first we want to see if there is free space adjacent to the range we
2296	 * are adding, if there is remove that struct and add a new one to
2297	 * cover the entire range
2298	 */
2299	right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
2300	if (right_info && rb_prev(&right_info->offset_index))
2301		left_info = rb_entry(rb_prev(&right_info->offset_index),
2302				     struct btrfs_free_space, offset_index);
2303	else if (!right_info)
2304		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
2305
2306	/* See try_merge_free_space() comment. */
2307	if (right_info && !right_info->bitmap &&
2308	    (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
2309		if (update_stat)
2310			unlink_free_space(ctl, right_info);
2311		else
2312			__unlink_free_space(ctl, right_info);
2313		info->bytes += right_info->bytes;
2314		kmem_cache_free(btrfs_free_space_cachep, right_info);
2315		merged = true;
2316	}
2317
2318	/* See try_merge_free_space() comment. */
2319	if (left_info && !left_info->bitmap &&
2320	    left_info->offset + left_info->bytes == offset &&
2321	    (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
2322		if (update_stat)
2323			unlink_free_space(ctl, left_info);
2324		else
2325			__unlink_free_space(ctl, left_info);
2326		info->offset = left_info->offset;
2327		info->bytes += left_info->bytes;
2328		kmem_cache_free(btrfs_free_space_cachep, left_info);
2329		merged = true;
2330	}
2331
2332	return merged;
2333}
2334
2335static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
2336				     struct btrfs_free_space *info,
2337				     bool update_stat)
2338{
2339	struct btrfs_free_space *bitmap;
2340	unsigned long i;
2341	unsigned long j;
2342	const u64 end = info->offset + info->bytes;
2343	const u64 bitmap_offset = offset_to_bitmap(ctl, end);
2344	u64 bytes;
2345
2346	bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2347	if (!bitmap)
2348		return false;
2349
2350	i = offset_to_bit(bitmap->offset, ctl->unit, end);
2351	j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
2352	if (j == i)
2353		return false;
2354	bytes = (j - i) * ctl->unit;
2355	info->bytes += bytes;
2356
2357	/* See try_merge_free_space() comment. */
2358	if (!btrfs_free_space_trimmed(bitmap))
2359		info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2360
2361	if (update_stat)
2362		bitmap_clear_bits(ctl, bitmap, end, bytes);
2363	else
2364		__bitmap_clear_bits(ctl, bitmap, end, bytes);
2365
2366	if (!bitmap->bytes)
2367		free_bitmap(ctl, bitmap);
2368
2369	return true;
2370}
2371
2372static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
2373				       struct btrfs_free_space *info,
2374				       bool update_stat)
2375{
2376	struct btrfs_free_space *bitmap;
2377	u64 bitmap_offset;
2378	unsigned long i;
2379	unsigned long j;
2380	unsigned long prev_j;
2381	u64 bytes;
2382
2383	bitmap_offset = offset_to_bitmap(ctl, info->offset);
2384	/* If we're on a boundary, try the previous logical bitmap. */
2385	if (bitmap_offset == info->offset) {
2386		if (info->offset == 0)
2387			return false;
2388		bitmap_offset = offset_to_bitmap(ctl, info->offset - 1);
2389	}
2390
2391	bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2392	if (!bitmap)
2393		return false;
2394
2395	i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
2396	j = 0;
2397	prev_j = (unsigned long)-1;
2398	for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
2399		if (j > i)
2400			break;
2401		prev_j = j;
2402	}
2403	if (prev_j == i)
2404		return false;
2405
2406	if (prev_j == (unsigned long)-1)
2407		bytes = (i + 1) * ctl->unit;
2408	else
2409		bytes = (i - prev_j) * ctl->unit;
2410
2411	info->offset -= bytes;
2412	info->bytes += bytes;
2413
2414	/* See try_merge_free_space() comment. */
2415	if (!btrfs_free_space_trimmed(bitmap))
2416		info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2417
2418	if (update_stat)
2419		bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2420	else
2421		__bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2422
2423	if (!bitmap->bytes)
2424		free_bitmap(ctl, bitmap);
2425
2426	return true;
2427}
2428
2429/*
2430 * We prefer always to allocate from extent entries, both for clustered and
2431 * non-clustered allocation requests. So when attempting to add a new extent
2432 * entry, try to see if there's adjacent free space in bitmap entries, and if
2433 * there is, migrate that space from the bitmaps to the extent.
2434 * Like this we get better chances of satisfying space allocation requests
2435 * because we attempt to satisfy them based on a single cache entry, and never
2436 * on 2 or more entries - even if the entries represent a contiguous free space
2437 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry
2438 * ends).
2439 */
2440static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
2441			      struct btrfs_free_space *info,
2442			      bool update_stat)
2443{
2444	/*
2445	 * Only work with disconnected entries, as we can change their offset,
2446	 * and must be extent entries.
2447	 */
2448	ASSERT(!info->bitmap);
2449	ASSERT(RB_EMPTY_NODE(&info->offset_index));
2450
2451	if (ctl->total_bitmaps > 0) {
2452		bool stole_end;
2453		bool stole_front = false;
2454
2455		stole_end = steal_from_bitmap_to_end(ctl, info, update_stat);
2456		if (ctl->total_bitmaps > 0)
2457			stole_front = steal_from_bitmap_to_front(ctl, info,
2458								 update_stat);
2459
2460		if (stole_end || stole_front)
2461			try_merge_free_space(ctl, info, update_stat);
2462	}
2463}
2464
2465int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
2466			   struct btrfs_free_space_ctl *ctl,
2467			   u64 offset, u64 bytes,
2468			   enum btrfs_trim_state trim_state)
2469{
2470	struct btrfs_block_group *block_group = ctl->private;
2471	struct btrfs_free_space *info;
2472	int ret = 0;
2473	u64 filter_bytes = bytes;
2474
2475	ASSERT(!btrfs_is_zoned(fs_info));
2476
2477	info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
2478	if (!info)
2479		return -ENOMEM;
2480
2481	info->offset = offset;
2482	info->bytes = bytes;
2483	info->trim_state = trim_state;
2484	RB_CLEAR_NODE(&info->offset_index);
2485
2486	spin_lock(&ctl->tree_lock);
2487
2488	if (try_merge_free_space(ctl, info, true))
2489		goto link;
2490
2491	/*
2492	 * There was no extent directly to the left or right of this new
2493	 * extent then we know we're going to have to allocate a new extent, so
2494	 * before we do that see if we need to drop this into a bitmap
2495	 */
2496	ret = insert_into_bitmap(ctl, info);
2497	if (ret < 0) {
2498		goto out;
2499	} else if (ret) {
2500		ret = 0;
2501		goto out;
2502	}
2503link:
2504	/*
2505	 * Only steal free space from adjacent bitmaps if we're sure we're not
2506	 * going to add the new free space to existing bitmap entries - because
2507	 * that would mean unnecessary work that would be reverted. Therefore
2508	 * attempt to steal space from bitmaps if we're adding an extent entry.
2509	 */
2510	steal_from_bitmap(ctl, info, true);
2511
2512	filter_bytes = max(filter_bytes, info->bytes);
2513
2514	ret = link_free_space(ctl, info);
2515	if (ret)
2516		kmem_cache_free(btrfs_free_space_cachep, info);
2517out:
2518	btrfs_discard_update_discardable(block_group);
2519	spin_unlock(&ctl->tree_lock);
2520
2521	if (ret) {
2522		btrfs_crit(fs_info, "unable to add free space :%d", ret);
2523		ASSERT(ret != -EEXIST);
2524	}
2525
2526	if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
2527		btrfs_discard_check_filter(block_group, filter_bytes);
2528		btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
2529	}
2530
2531	return ret;
2532}
2533
2534static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
2535					u64 bytenr, u64 size, bool used)
2536{
2537	struct btrfs_fs_info *fs_info = block_group->fs_info;
2538	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2539	u64 offset = bytenr - block_group->start;
2540	u64 to_free, to_unusable;
2541
2542	spin_lock(&ctl->tree_lock);
2543	if (!used)
2544		to_free = size;
2545	else if (offset >= block_group->alloc_offset)
2546		to_free = size;
2547	else if (offset + size <= block_group->alloc_offset)
2548		to_free = 0;
2549	else
2550		to_free = offset + size - block_group->alloc_offset;
2551	to_unusable = size - to_free;
2552
2553	ctl->free_space += to_free;
2554	/*
2555	 * If the block group is read-only, we should account freed space into
2556	 * bytes_readonly.
2557	 */
2558	if (!block_group->ro)
2559		block_group->zone_unusable += to_unusable;
2560	spin_unlock(&ctl->tree_lock);
2561	if (!used) {
2562		spin_lock(&block_group->lock);
2563		block_group->alloc_offset -= size;
2564		spin_unlock(&block_group->lock);
2565	}
2566
2567	/* All the region is now unusable. Mark it as unused and reclaim */
2568	if (block_group->zone_unusable == block_group->length) {
2569		btrfs_mark_bg_unused(block_group);
2570	} else if (block_group->zone_unusable >=
2571		   div_factor_fine(block_group->length,
2572				   fs_info->bg_reclaim_threshold)) {
2573		btrfs_mark_bg_to_reclaim(block_group);
2574	}
2575
2576	return 0;
2577}
2578
2579int btrfs_add_free_space(struct btrfs_block_group *block_group,
2580			 u64 bytenr, u64 size)
2581{
2582	enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2583
2584	if (btrfs_is_zoned(block_group->fs_info))
2585		return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2586						    true);
2587
2588	if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
2589		trim_state = BTRFS_TRIM_STATE_TRIMMED;
2590
2591	return __btrfs_add_free_space(block_group->fs_info,
2592				      block_group->free_space_ctl,
2593				      bytenr, size, trim_state);
2594}
2595
2596int btrfs_add_free_space_unused(struct btrfs_block_group *block_group,
2597				u64 bytenr, u64 size)
2598{
2599	if (btrfs_is_zoned(block_group->fs_info))
2600		return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2601						    false);
2602
2603	return btrfs_add_free_space(block_group, bytenr, size);
2604}
2605
2606/*
2607 * This is a subtle distinction because when adding free space back in general,
2608 * we want it to be added as untrimmed for async. But in the case where we add
2609 * it on loading of a block group, we want to consider it trimmed.
2610 */
2611int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
2612				       u64 bytenr, u64 size)
2613{
2614	enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2615
2616	if (btrfs_is_zoned(block_group->fs_info))
2617		return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2618						    true);
2619
2620	if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
2621	    btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2622		trim_state = BTRFS_TRIM_STATE_TRIMMED;
2623
2624	return __btrfs_add_free_space(block_group->fs_info,
2625				      block_group->free_space_ctl,
2626				      bytenr, size, trim_state);
2627}
2628
2629int btrfs_remove_free_space(struct btrfs_block_group *block_group,
2630			    u64 offset, u64 bytes)
2631{
2632	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2633	struct btrfs_free_space *info;
2634	int ret;
2635	bool re_search = false;
2636
2637	if (btrfs_is_zoned(block_group->fs_info)) {
2638		/*
2639		 * This can happen with conventional zones when replaying log.
2640		 * Since the allocation info of tree-log nodes are not recorded
2641		 * to the extent-tree, calculate_alloc_pointer() failed to
2642		 * advance the allocation pointer after last allocated tree log
2643		 * node blocks.
2644		 *
2645		 * This function is called from
2646		 * btrfs_pin_extent_for_log_replay() when replaying the log.
2647		 * Advance the pointer not to overwrite the tree-log nodes.
2648		 */
2649		if (block_group->alloc_offset < offset + bytes)
2650			block_group->alloc_offset = offset + bytes;
2651		return 0;
2652	}
2653
2654	spin_lock(&ctl->tree_lock);
2655
2656again:
2657	ret = 0;
2658	if (!bytes)
2659		goto out_lock;
2660
2661	info = tree_search_offset(ctl, offset, 0, 0);
2662	if (!info) {
2663		/*
2664		 * oops didn't find an extent that matched the space we wanted
2665		 * to remove, look for a bitmap instead
2666		 */
2667		info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2668					  1, 0);
2669		if (!info) {
2670			/*
2671			 * If we found a partial bit of our free space in a
2672			 * bitmap but then couldn't find the other part this may
2673			 * be a problem, so WARN about it.
2674			 */
2675			WARN_ON(re_search);
2676			goto out_lock;
2677		}
2678	}
2679
2680	re_search = false;
2681	if (!info->bitmap) {
2682		unlink_free_space(ctl, info);
2683		if (offset == info->offset) {
2684			u64 to_free = min(bytes, info->bytes);
2685
2686			info->bytes -= to_free;
2687			info->offset += to_free;
2688			if (info->bytes) {
2689				ret = link_free_space(ctl, info);
2690				WARN_ON(ret);
2691			} else {
2692				kmem_cache_free(btrfs_free_space_cachep, info);
2693			}
2694
2695			offset += to_free;
2696			bytes -= to_free;
2697			goto again;
2698		} else {
2699			u64 old_end = info->bytes + info->offset;
2700
2701			info->bytes = offset - info->offset;
2702			ret = link_free_space(ctl, info);
2703			WARN_ON(ret);
2704			if (ret)
2705				goto out_lock;
2706
2707			/* Not enough bytes in this entry to satisfy us */
2708			if (old_end < offset + bytes) {
2709				bytes -= old_end - offset;
2710				offset = old_end;
2711				goto again;
2712			} else if (old_end == offset + bytes) {
2713				/* all done */
2714				goto out_lock;
2715			}
2716			spin_unlock(&ctl->tree_lock);
2717
2718			ret = __btrfs_add_free_space(block_group->fs_info, ctl,
2719						     offset + bytes,
2720						     old_end - (offset + bytes),
2721						     info->trim_state);
2722			WARN_ON(ret);
2723			goto out;
2724		}
2725	}
2726
2727	ret = remove_from_bitmap(ctl, info, &offset, &bytes);
2728	if (ret == -EAGAIN) {
2729		re_search = true;
2730		goto again;
2731	}
2732out_lock:
2733	btrfs_discard_update_discardable(block_group);
2734	spin_unlock(&ctl->tree_lock);
2735out:
2736	return ret;
2737}
2738
2739void btrfs_dump_free_space(struct btrfs_block_group *block_group,
2740			   u64 bytes)
2741{
2742	struct btrfs_fs_info *fs_info = block_group->fs_info;
2743	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2744	struct btrfs_free_space *info;
2745	struct rb_node *n;
2746	int count = 0;
2747
2748	/*
2749	 * Zoned btrfs does not use free space tree and cluster. Just print
2750	 * out the free space after the allocation offset.
2751	 */
2752	if (btrfs_is_zoned(fs_info)) {
2753		btrfs_info(fs_info, "free space %llu",
2754			   block_group->length - block_group->alloc_offset);
2755		return;
2756	}
2757
2758	spin_lock(&ctl->tree_lock);
2759	for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
2760		info = rb_entry(n, struct btrfs_free_space, offset_index);
2761		if (info->bytes >= bytes && !block_group->ro)
2762			count++;
2763		btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s",
2764			   info->offset, info->bytes,
2765		       (info->bitmap) ? "yes" : "no");
2766	}
2767	spin_unlock(&ctl->tree_lock);
2768	btrfs_info(fs_info, "block group has cluster?: %s",
2769	       list_empty(&block_group->cluster_list) ? "no" : "yes");
2770	btrfs_info(fs_info,
2771		   "%d blocks of free space at or bigger than bytes is", count);
2772}
2773
2774void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
2775			       struct btrfs_free_space_ctl *ctl)
2776{
2777	struct btrfs_fs_info *fs_info = block_group->fs_info;
2778
2779	spin_lock_init(&ctl->tree_lock);
2780	ctl->unit = fs_info->sectorsize;
2781	ctl->start = block_group->start;
2782	ctl->private = block_group;
2783	ctl->op = &free_space_op;
2784	INIT_LIST_HEAD(&ctl->trimming_ranges);
2785	mutex_init(&ctl->cache_writeout_mutex);
2786
2787	/*
2788	 * we only want to have 32k of ram per block group for keeping
2789	 * track of free space, and if we pass 1/2 of that we want to
2790	 * start converting things over to using bitmaps
2791	 */
2792	ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space);
2793}
2794
2795/*
2796 * for a given cluster, put all of its extents back into the free
2797 * space cache.  If the block group passed doesn't match the block group
2798 * pointed to by the cluster, someone else raced in and freed the
2799 * cluster already.  In that case, we just return without changing anything
2800 */
2801static void __btrfs_return_cluster_to_free_space(
2802			     struct btrfs_block_group *block_group,
2803			     struct btrfs_free_cluster *cluster)
2804{
2805	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2806	struct btrfs_free_space *entry;
2807	struct rb_node *node;
2808
2809	spin_lock(&cluster->lock);
2810	if (cluster->block_group != block_group) {
2811		spin_unlock(&cluster->lock);
2812		return;
2813	}
2814
2815	cluster->block_group = NULL;
2816	cluster->window_start = 0;
2817	list_del_init(&cluster->block_group_list);
2818
2819	node = rb_first(&cluster->root);
2820	while (node) {
2821		bool bitmap;
2822
2823		entry = rb_entry(node, struct btrfs_free_space, offset_index);
2824		node = rb_next(&entry->offset_index);
2825		rb_erase(&entry->offset_index, &cluster->root);
2826		RB_CLEAR_NODE(&entry->offset_index);
2827
2828		bitmap = (entry->bitmap != NULL);
2829		if (!bitmap) {
2830			/* Merging treats extents as if they were new */
2831			if (!btrfs_free_space_trimmed(entry)) {
2832				ctl->discardable_extents[BTRFS_STAT_CURR]--;
2833				ctl->discardable_bytes[BTRFS_STAT_CURR] -=
2834					entry->bytes;
2835			}
2836
2837			try_merge_free_space(ctl, entry, false);
2838			steal_from_bitmap(ctl, entry, false);
2839
2840			/* As we insert directly, update these statistics */
2841			if (!btrfs_free_space_trimmed(entry)) {
2842				ctl->discardable_extents[BTRFS_STAT_CURR]++;
2843				ctl->discardable_bytes[BTRFS_STAT_CURR] +=
2844					entry->bytes;
2845			}
2846		}
2847		tree_insert_offset(&ctl->free_space_offset,
2848				   entry->offset, &entry->offset_index, bitmap);
2849	}
2850	cluster->root = RB_ROOT;
2851	spin_unlock(&cluster->lock);
2852	btrfs_put_block_group(block_group);
2853}
2854
2855static void __btrfs_remove_free_space_cache_locked(
2856				struct btrfs_free_space_ctl *ctl)
2857{
2858	struct btrfs_free_space *info;
2859	struct rb_node *node;
2860
2861	while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2862		info = rb_entry(node, struct btrfs_free_space, offset_index);
2863		if (!info->bitmap) {
2864			unlink_free_space(ctl, info);
2865			kmem_cache_free(btrfs_free_space_cachep, info);
2866		} else {
2867			free_bitmap(ctl, info);
2868		}
2869
2870		cond_resched_lock(&ctl->tree_lock);
2871	}
2872}
2873
2874void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2875{
2876	spin_lock(&ctl->tree_lock);
2877	__btrfs_remove_free_space_cache_locked(ctl);
2878	if (ctl->private)
2879		btrfs_discard_update_discardable(ctl->private);
2880	spin_unlock(&ctl->tree_lock);
2881}
2882
2883void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
2884{
2885	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2886	struct btrfs_free_cluster *cluster;
2887	struct list_head *head;
2888
2889	spin_lock(&ctl->tree_lock);
2890	while ((head = block_group->cluster_list.next) !=
2891	       &block_group->cluster_list) {
2892		cluster = list_entry(head, struct btrfs_free_cluster,
2893				     block_group_list);
2894
2895		WARN_ON(cluster->block_group != block_group);
2896		__btrfs_return_cluster_to_free_space(block_group, cluster);
2897
2898		cond_resched_lock(&ctl->tree_lock);
2899	}
2900	__btrfs_remove_free_space_cache_locked(ctl);
2901	btrfs_discard_update_discardable(block_group);
2902	spin_unlock(&ctl->tree_lock);
2903
2904}
2905
2906/**
2907 * btrfs_is_free_space_trimmed - see if everything is trimmed
2908 * @block_group: block_group of interest
2909 *
2910 * Walk @block_group's free space rb_tree to determine if everything is trimmed.
2911 */
2912bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
2913{
2914	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2915	struct btrfs_free_space *info;
2916	struct rb_node *node;
2917	bool ret = true;
2918
2919	spin_lock(&ctl->tree_lock);
2920	node = rb_first(&ctl->free_space_offset);
2921
2922	while (node) {
2923		info = rb_entry(node, struct btrfs_free_space, offset_index);
2924
2925		if (!btrfs_free_space_trimmed(info)) {
2926			ret = false;
2927			break;
2928		}
2929
2930		node = rb_next(node);
2931	}
2932
2933	spin_unlock(&ctl->tree_lock);
2934	return ret;
2935}
2936
2937u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
2938			       u64 offset, u64 bytes, u64 empty_size,
2939			       u64 *max_extent_size)
2940{
2941	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2942	struct btrfs_discard_ctl *discard_ctl =
2943					&block_group->fs_info->discard_ctl;
2944	struct btrfs_free_space *entry = NULL;
2945	u64 bytes_search = bytes + empty_size;
2946	u64 ret = 0;
2947	u64 align_gap = 0;
2948	u64 align_gap_len = 0;
2949	enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2950
2951	ASSERT(!btrfs_is_zoned(block_group->fs_info));
2952
2953	spin_lock(&ctl->tree_lock);
2954	entry = find_free_space(ctl, &offset, &bytes_search,
2955				block_group->full_stripe_len, max_extent_size);
2956	if (!entry)
2957		goto out;
2958
2959	ret = offset;
2960	if (entry->bitmap) {
2961		bitmap_clear_bits(ctl, entry, offset, bytes);
2962
2963		if (!btrfs_free_space_trimmed(entry))
2964			atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2965
2966		if (!entry->bytes)
2967			free_bitmap(ctl, entry);
2968	} else {
2969		unlink_free_space(ctl, entry);
2970		align_gap_len = offset - entry->offset;
2971		align_gap = entry->offset;
2972		align_gap_trim_state = entry->trim_state;
2973
2974		if (!btrfs_free_space_trimmed(entry))
2975			atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2976
2977		entry->offset = offset + bytes;
2978		WARN_ON(entry->bytes < bytes + align_gap_len);
2979
2980		entry->bytes -= bytes + align_gap_len;
2981		if (!entry->bytes)
2982			kmem_cache_free(btrfs_free_space_cachep, entry);
2983		else
2984			link_free_space(ctl, entry);
2985	}
2986out:
2987	btrfs_discard_update_discardable(block_group);
2988	spin_unlock(&ctl->tree_lock);
2989
2990	if (align_gap_len)
2991		__btrfs_add_free_space(block_group->fs_info, ctl,
2992				       align_gap, align_gap_len,
2993				       align_gap_trim_state);
2994	return ret;
2995}
2996
2997/*
2998 * given a cluster, put all of its extents back into the free space
2999 * cache.  If a block group is passed, this function will only free
3000 * a cluster that belongs to the passed block group.
3001 *
3002 * Otherwise, it'll get a reference on the block group pointed to by the
3003 * cluster and remove the cluster from it.
3004 */
3005void btrfs_return_cluster_to_free_space(
3006			       struct btrfs_block_group *block_group,
3007			       struct btrfs_free_cluster *cluster)
3008{
3009	struct btrfs_free_space_ctl *ctl;
3010
3011	/* first, get a safe pointer to the block group */
3012	spin_lock(&cluster->lock);
3013	if (!block_group) {
3014		block_group = cluster->block_group;
3015		if (!block_group) {
3016			spin_unlock(&cluster->lock);
3017			return;
3018		}
3019	} else if (cluster->block_group != block_group) {
3020		/* someone else has already freed it don't redo their work */
3021		spin_unlock(&cluster->lock);
3022		return;
3023	}
3024	btrfs_get_block_group(block_group);
3025	spin_unlock(&cluster->lock);
3026
3027	ctl = block_group->free_space_ctl;
3028
3029	/* now return any extents the cluster had on it */
3030	spin_lock(&ctl->tree_lock);
3031	__btrfs_return_cluster_to_free_space(block_group, cluster);
3032	spin_unlock(&ctl->tree_lock);
3033
3034	btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
3035
3036	/* finally drop our ref */
3037	btrfs_put_block_group(block_group);
3038}
3039
3040static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
3041				   struct btrfs_free_cluster *cluster,
3042				   struct btrfs_free_space *entry,
3043				   u64 bytes, u64 min_start,
3044				   u64 *max_extent_size)
3045{
3046	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3047	int err;
3048	u64 search_start = cluster->window_start;
3049	u64 search_bytes = bytes;
3050	u64 ret = 0;
3051
3052	search_start = min_start;
3053	search_bytes = bytes;
3054
3055	err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
3056	if (err) {
3057		*max_extent_size = max(get_max_extent_size(entry),
3058				       *max_extent_size);
3059		return 0;
3060	}
3061
3062	ret = search_start;
3063	__bitmap_clear_bits(ctl, entry, ret, bytes);
3064
3065	return ret;
3066}
3067
3068/*
3069 * given a cluster, try to allocate 'bytes' from it, returns 0
3070 * if it couldn't find anything suitably large, or a logical disk offset
3071 * if things worked out
3072 */
3073u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
3074			     struct btrfs_free_cluster *cluster, u64 bytes,
3075			     u64 min_start, u64 *max_extent_size)
3076{
3077	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3078	struct btrfs_discard_ctl *discard_ctl =
3079					&block_group->fs_info->discard_ctl;
3080	struct btrfs_free_space *entry = NULL;
3081	struct rb_node *node;
3082	u64 ret = 0;
3083
3084	ASSERT(!btrfs_is_zoned(block_group->fs_info));
3085
3086	spin_lock(&cluster->lock);
3087	if (bytes > cluster->max_size)
3088		goto out;
3089
3090	if (cluster->block_group != block_group)
3091		goto out;
3092
3093	node = rb_first(&cluster->root);
3094	if (!node)
3095		goto out;
3096
3097	entry = rb_entry(node, struct btrfs_free_space, offset_index);
3098	while (1) {
3099		if (entry->bytes < bytes)
3100			*max_extent_size = max(get_max_extent_size(entry),
3101					       *max_extent_size);
3102
3103		if (entry->bytes < bytes ||
3104		    (!entry->bitmap && entry->offset < min_start)) {
3105			node = rb_next(&entry->offset_index);
3106			if (!node)
3107				break;
3108			entry = rb_entry(node, struct btrfs_free_space,
3109					 offset_index);
3110			continue;
3111		}
3112
3113		if (entry->bitmap) {
3114			ret = btrfs_alloc_from_bitmap(block_group,
3115						      cluster, entry, bytes,
3116						      cluster->window_start,
3117						      max_extent_size);
3118			if (ret == 0) {
3119				node = rb_next(&entry->offset_index);
3120				if (!node)
3121					break;
3122				entry = rb_entry(node, struct btrfs_free_space,
3123						 offset_index);
3124				continue;
3125			}
3126			cluster->window_start += bytes;
3127		} else {
3128			ret = entry->offset;
3129
3130			entry->offset += bytes;
3131			entry->bytes -= bytes;
3132		}
3133
3134		break;
3135	}
3136out:
3137	spin_unlock(&cluster->lock);
3138
3139	if (!ret)
3140		return 0;
3141
3142	spin_lock(&ctl->tree_lock);
3143
3144	if (!btrfs_free_space_trimmed(entry))
3145		atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
3146
3147	ctl->free_space -= bytes;
3148	if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
3149		ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
3150
3151	spin_lock(&cluster->lock);
3152	if (entry->bytes == 0) {
3153		rb_erase(&entry->offset_index, &cluster->root);
3154		ctl->free_extents--;
3155		if (entry->bitmap) {
3156			kmem_cache_free(btrfs_free_space_bitmap_cachep,
3157					entry->bitmap);
3158			ctl->total_bitmaps--;
3159			recalculate_thresholds(ctl);
3160		} else if (!btrfs_free_space_trimmed(entry)) {
3161			ctl->discardable_extents[BTRFS_STAT_CURR]--;
3162		}
3163		kmem_cache_free(btrfs_free_space_cachep, entry);
3164	}
3165
3166	spin_unlock(&cluster->lock);
3167	spin_unlock(&ctl->tree_lock);
3168
3169	return ret;
3170}
3171
3172static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
3173				struct btrfs_free_space *entry,
3174				struct btrfs_free_cluster *cluster,
3175				u64 offset, u64 bytes,
3176				u64 cont1_bytes, u64 min_bytes)
3177{
3178	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3179	unsigned long next_zero;
3180	unsigned long i;
3181	unsigned long want_bits;
3182	unsigned long min_bits;
3183	unsigned long found_bits;
3184	unsigned long max_bits = 0;
3185	unsigned long start = 0;
3186	unsigned long total_found = 0;
3187	int ret;
3188
3189	i = offset_to_bit(entry->offset, ctl->unit,
3190			  max_t(u64, offset, entry->offset));
3191	want_bits = bytes_to_bits(bytes, ctl->unit);
3192	min_bits = bytes_to_bits(min_bytes, ctl->unit);
3193
3194	/*
3195	 * Don't bother looking for a cluster in this bitmap if it's heavily
3196	 * fragmented.
3197	 */
3198	if (entry->max_extent_size &&
3199	    entry->max_extent_size < cont1_bytes)
3200		return -ENOSPC;
3201again:
3202	found_bits = 0;
3203	for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
3204		next_zero = find_next_zero_bit(entry->bitmap,
3205					       BITS_PER_BITMAP, i);
3206		if (next_zero - i >= min_bits) {
3207			found_bits = next_zero - i;
3208			if (found_bits > max_bits)
3209				max_bits = found_bits;
3210			break;
3211		}
3212		if (next_zero - i > max_bits)
3213			max_bits = next_zero - i;
3214		i = next_zero;
3215	}
3216
3217	if (!found_bits) {
3218		entry->max_extent_size = (u64)max_bits * ctl->unit;
3219		return -ENOSPC;
3220	}
3221
3222	if (!total_found) {
3223		start = i;
3224		cluster->max_size = 0;
3225	}
3226
3227	total_found += found_bits;
3228
3229	if (cluster->max_size < found_bits * ctl->unit)
3230		cluster->max_size = found_bits * ctl->unit;
3231
3232	if (total_found < want_bits || cluster->max_size < cont1_bytes) {
3233		i = next_zero + 1;
3234		goto again;
3235	}
3236
3237	cluster->window_start = start * ctl->unit + entry->offset;
3238	rb_erase(&entry->offset_index, &ctl->free_space_offset);
3239	ret = tree_insert_offset(&cluster->root, entry->offset,
3240				 &entry->offset_index, 1);
3241	ASSERT(!ret); /* -EEXIST; Logic error */
3242
3243	trace_btrfs_setup_cluster(block_group, cluster,
3244				  total_found * ctl->unit, 1);
3245	return 0;
3246}
3247
3248/*
3249 * This searches the block group for just extents to fill the cluster with.
3250 * Try to find a cluster with at least bytes total bytes, at least one
3251 * extent of cont1_bytes, and other clusters of at least min_bytes.
3252 */
3253static noinline int
3254setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
3255			struct btrfs_free_cluster *cluster,
3256			struct list_head *bitmaps, u64 offset, u64 bytes,
3257			u64 cont1_bytes, u64 min_bytes)
3258{
3259	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3260	struct btrfs_free_space *first = NULL;
3261	struct btrfs_free_space *entry = NULL;
3262	struct btrfs_free_space *last;
3263	struct rb_node *node;
3264	u64 window_free;
3265	u64 max_extent;
3266	u64 total_size = 0;
3267
3268	entry = tree_search_offset(ctl, offset, 0, 1);
3269	if (!entry)
3270		return -ENOSPC;
3271
3272	/*
3273	 * We don't want bitmaps, so just move along until we find a normal
3274	 * extent entry.
3275	 */
3276	while (entry->bitmap || entry->bytes < min_bytes) {
3277		if (entry->bitmap && list_empty(&entry->list))
3278			list_add_tail(&entry->list, bitmaps);
3279		node = rb_next(&entry->offset_index);
3280		if (!node)
3281			return -ENOSPC;
3282		entry = rb_entry(node, struct btrfs_free_space, offset_index);
3283	}
3284
3285	window_free = entry->bytes;
3286	max_extent = entry->bytes;
3287	first = entry;
3288	last = entry;
3289
3290	for (node = rb_next(&entry->offset_index); node;
3291	     node = rb_next(&entry->offset_index)) {
3292		entry = rb_entry(node, struct btrfs_free_space, offset_index);
3293
3294		if (entry->bitmap) {
3295			if (list_empty(&entry->list))
3296				list_add_tail(&entry->list, bitmaps);
3297			continue;
3298		}
3299
3300		if (entry->bytes < min_bytes)
3301			continue;
3302
3303		last = entry;
3304		window_free += entry->bytes;
3305		if (entry->bytes > max_extent)
3306			max_extent = entry->bytes;
3307	}
3308
3309	if (window_free < bytes || max_extent < cont1_bytes)
3310		return -ENOSPC;
3311
3312	cluster->window_start = first->offset;
3313
3314	node = &first->offset_index;
3315
3316	/*
3317	 * now we've found our entries, pull them out of the free space
3318	 * cache and put them into the cluster rbtree
3319	 */
3320	do {
3321		int ret;
3322
3323		entry = rb_entry(node, struct btrfs_free_space, offset_index);
3324		node = rb_next(&entry->offset_index);
3325		if (entry->bitmap || entry->bytes < min_bytes)
3326			continue;
3327
3328		rb_erase(&entry->offset_index, &ctl->free_space_offset);
3329		ret = tree_insert_offset(&cluster->root, entry->offset,
3330					 &entry->offset_index, 0);
3331		total_size += entry->bytes;
3332		ASSERT(!ret); /* -EEXIST; Logic error */
3333	} while (node && entry != last);
3334
3335	cluster->max_size = max_extent;
3336	trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
3337	return 0;
3338}
3339
3340/*
3341 * This specifically looks for bitmaps that may work in the cluster, we assume
3342 * that we have already failed to find extents that will work.
3343 */
3344static noinline int
3345setup_cluster_bitmap(struct btrfs_block_group *block_group,
3346		     struct btrfs_free_cluster *cluster,
3347		     struct list_head *bitmaps, u64 offset, u64 bytes,
3348		     u64 cont1_bytes, u64 min_bytes)
3349{
3350	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3351	struct btrfs_free_space *entry = NULL;
3352	int ret = -ENOSPC;
3353	u64 bitmap_offset = offset_to_bitmap(ctl, offset);
3354
3355	if (ctl->total_bitmaps == 0)
3356		return -ENOSPC;
3357
3358	/*
3359	 * The bitmap that covers offset won't be in the list unless offset
3360	 * is just its start offset.
3361	 */
3362	if (!list_empty(bitmaps))
3363		entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
3364
3365	if (!entry || entry->offset != bitmap_offset) {
3366		entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
3367		if (entry && list_empty(&entry->list))
3368			list_add(&entry->list, bitmaps);
3369	}
3370
3371	list_for_each_entry(entry, bitmaps, list) {
3372		if (entry->bytes < bytes)
3373			continue;
3374		ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
3375					   bytes, cont1_bytes, min_bytes);
3376		if (!ret)
3377			return 0;
3378	}
3379
3380	/*
3381	 * The bitmaps list has all the bitmaps that record free space
3382	 * starting after offset, so no more search is required.
3383	 */
3384	return -ENOSPC;
3385}
3386
3387/*
3388 * here we try to find a cluster of blocks in a block group.  The goal
3389 * is to find at least bytes+empty_size.
3390 * We might not find them all in one contiguous area.
3391 *
3392 * returns zero and sets up cluster if things worked out, otherwise
3393 * it returns -enospc
3394 */
3395int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
3396			     struct btrfs_free_cluster *cluster,
3397			     u64 offset, u64 bytes, u64 empty_size)
3398{
3399	struct btrfs_fs_info *fs_info = block_group->fs_info;
3400	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3401	struct btrfs_free_space *entry, *tmp;
3402	LIST_HEAD(bitmaps);
3403	u64 min_bytes;
3404	u64 cont1_bytes;
3405	int ret;
3406
3407	/*
3408	 * Choose the minimum extent size we'll require for this
3409	 * cluster.  For SSD_SPREAD, don't allow any fragmentation.
3410	 * For metadata, allow allocates with smaller extents.  For
3411	 * data, keep it dense.
3412	 */
3413	if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
3414		cont1_bytes = min_bytes = bytes + empty_size;
3415	} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
3416		cont1_bytes = bytes;
3417		min_bytes = fs_info->sectorsize;
3418	} else {
3419		cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
3420		min_bytes = fs_info->sectorsize;
3421	}
3422
3423	spin_lock(&ctl->tree_lock);
3424
3425	/*
3426	 * If we know we don't have enough space to make a cluster don't even
3427	 * bother doing all the work to try and find one.
3428	 */
3429	if (ctl->free_space < bytes) {
3430		spin_unlock(&ctl->tree_lock);
3431		return -ENOSPC;
3432	}
3433
3434	spin_lock(&cluster->lock);
3435
3436	/* someone already found a cluster, hooray */
3437	if (cluster->block_group) {
3438		ret = 0;
3439		goto out;
3440	}
3441
3442	trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
3443				 min_bytes);
3444
3445	ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
3446				      bytes + empty_size,
3447				      cont1_bytes, min_bytes);
3448	if (ret)
3449		ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
3450					   offset, bytes + empty_size,
3451					   cont1_bytes, min_bytes);
3452
3453	/* Clear our temporary list */
3454	list_for_each_entry_safe(entry, tmp, &bitmaps, list)
3455		list_del_init(&entry->list);
3456
3457	if (!ret) {
3458		btrfs_get_block_group(block_group);
3459		list_add_tail(&cluster->block_group_list,
3460			      &block_group->cluster_list);
3461		cluster->block_group = block_group;
3462	} else {
3463		trace_btrfs_failed_cluster_setup(block_group);
3464	}
3465out:
3466	spin_unlock(&cluster->lock);
3467	spin_unlock(&ctl->tree_lock);
3468
3469	return ret;
3470}
3471
3472/*
3473 * simple code to zero out a cluster
3474 */
3475void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
3476{
3477	spin_lock_init(&cluster->lock);
3478	spin_lock_init(&cluster->refill_lock);
3479	cluster->root = RB_ROOT;
3480	cluster->max_size = 0;
3481	cluster->fragmented = false;
3482	INIT_LIST_HEAD(&cluster->block_group_list);
3483	cluster->block_group = NULL;
3484}
3485
3486static int do_trimming(struct btrfs_block_group *block_group,
3487		       u64 *total_trimmed, u64 start, u64 bytes,
3488		       u64 reserved_start, u64 reserved_bytes,
3489		       enum btrfs_trim_state reserved_trim_state,
3490		       struct btrfs_trim_range *trim_entry)
3491{
3492	struct btrfs_space_info *space_info = block_group->space_info;
3493	struct btrfs_fs_info *fs_info = block_group->fs_info;
3494	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3495	int ret;
3496	int update = 0;
3497	const u64 end = start + bytes;
3498	const u64 reserved_end = reserved_start + reserved_bytes;
3499	enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3500	u64 trimmed = 0;
3501
3502	spin_lock(&space_info->lock);
3503	spin_lock(&block_group->lock);
3504	if (!block_group->ro) {
3505		block_group->reserved += reserved_bytes;
3506		space_info->bytes_reserved += reserved_bytes;
3507		update = 1;
3508	}
3509	spin_unlock(&block_group->lock);
3510	spin_unlock(&space_info->lock);
3511
3512	ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
3513	if (!ret) {
3514		*total_trimmed += trimmed;
3515		trim_state = BTRFS_TRIM_STATE_TRIMMED;
3516	}
3517
3518	mutex_lock(&ctl->cache_writeout_mutex);
3519	if (reserved_start < start)
3520		__btrfs_add_free_space(fs_info, ctl, reserved_start,
3521				       start - reserved_start,
3522				       reserved_trim_state);
3523	if (start + bytes < reserved_start + reserved_bytes)
3524		__btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
3525				       reserved_trim_state);
3526	__btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
3527	list_del(&trim_entry->list);
3528	mutex_unlock(&ctl->cache_writeout_mutex);
3529
3530	if (update) {
3531		spin_lock(&space_info->lock);
3532		spin_lock(&block_group->lock);
3533		if (block_group->ro)
3534			space_info->bytes_readonly += reserved_bytes;
3535		block_group->reserved -= reserved_bytes;
3536		space_info->bytes_reserved -= reserved_bytes;
3537		spin_unlock(&block_group->lock);
3538		spin_unlock(&space_info->lock);
3539	}
3540
3541	return ret;
3542}
3543
3544/*
3545 * If @async is set, then we will trim 1 region and return.
3546 */
3547static int trim_no_bitmap(struct btrfs_block_group *block_group,
3548			  u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3549			  bool async)
3550{
3551	struct btrfs_discard_ctl *discard_ctl =
3552					&block_group->fs_info->discard_ctl;
3553	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3554	struct btrfs_free_space *entry;
3555	struct rb_node *node;
3556	int ret = 0;
3557	u64 extent_start;
3558	u64 extent_bytes;
3559	enum btrfs_trim_state extent_trim_state;
3560	u64 bytes;
3561	const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3562
3563	while (start < end) {
3564		struct btrfs_trim_range trim_entry;
3565
3566		mutex_lock(&ctl->cache_writeout_mutex);
3567		spin_lock(&ctl->tree_lock);
3568
3569		if (ctl->free_space < minlen)
3570			goto out_unlock;
3571
3572		entry = tree_search_offset(ctl, start, 0, 1);
3573		if (!entry)
3574			goto out_unlock;
3575
3576		/* Skip bitmaps and if async, already trimmed entries */
3577		while (entry->bitmap ||
3578		       (async && btrfs_free_space_trimmed(entry))) {
3579			node = rb_next(&entry->offset_index);
3580			if (!node)
3581				goto out_unlock;
3582			entry = rb_entry(node, struct btrfs_free_space,
3583					 offset_index);
3584		}
3585
3586		if (entry->offset >= end)
3587			goto out_unlock;
3588
3589		extent_start = entry->offset;
3590		extent_bytes = entry->bytes;
3591		extent_trim_state = entry->trim_state;
3592		if (async) {
3593			start = entry->offset;
3594			bytes = entry->bytes;
3595			if (bytes < minlen) {
3596				spin_unlock(&ctl->tree_lock);
3597				mutex_unlock(&ctl->cache_writeout_mutex);
3598				goto next;
3599			}
3600			unlink_free_space(ctl, entry);
3601			/*
3602			 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3603			 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
3604			 * X when we come back around.  So trim it now.
3605			 */
3606			if (max_discard_size &&
3607			    bytes >= (max_discard_size +
3608				      BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
3609				bytes = max_discard_size;
3610				extent_bytes = max_discard_size;
3611				entry->offset += max_discard_size;
3612				entry->bytes -= max_discard_size;
3613				link_free_space(ctl, entry);
3614			} else {
3615				kmem_cache_free(btrfs_free_space_cachep, entry);
3616			}
3617		} else {
3618			start = max(start, extent_start);
3619			bytes = min(extent_start + extent_bytes, end) - start;
3620			if (bytes < minlen) {
3621				spin_unlock(&ctl->tree_lock);
3622				mutex_unlock(&ctl->cache_writeout_mutex);
3623				goto next;
3624			}
3625
3626			unlink_free_space(ctl, entry);
3627			kmem_cache_free(btrfs_free_space_cachep, entry);
3628		}
3629
3630		spin_unlock(&ctl->tree_lock);
3631		trim_entry.start = extent_start;
3632		trim_entry.bytes = extent_bytes;
3633		list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3634		mutex_unlock(&ctl->cache_writeout_mutex);
3635
3636		ret = do_trimming(block_group, total_trimmed, start, bytes,
3637				  extent_start, extent_bytes, extent_trim_state,
3638				  &trim_entry);
3639		if (ret) {
3640			block_group->discard_cursor = start + bytes;
3641			break;
3642		}
3643next:
3644		start += bytes;
3645		block_group->discard_cursor = start;
3646		if (async && *total_trimmed)
3647			break;
3648
3649		if (fatal_signal_pending(current)) {
3650			ret = -ERESTARTSYS;
3651			break;
3652		}
3653
3654		cond_resched();
3655	}
3656
3657	return ret;
3658
3659out_unlock:
3660	block_group->discard_cursor = btrfs_block_group_end(block_group);
3661	spin_unlock(&ctl->tree_lock);
3662	mutex_unlock(&ctl->cache_writeout_mutex);
3663
3664	return ret;
3665}
3666
3667/*
3668 * If we break out of trimming a bitmap prematurely, we should reset the
3669 * trimming bit.  In a rather contrieved case, it's possible to race here so
3670 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
3671 *
3672 * start = start of bitmap
3673 * end = near end of bitmap
3674 *
3675 * Thread 1:			Thread 2:
3676 * trim_bitmaps(start)
3677 *				trim_bitmaps(end)
3678 *				end_trimming_bitmap()
3679 * reset_trimming_bitmap()
3680 */
3681static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
3682{
3683	struct btrfs_free_space *entry;
3684
3685	spin_lock(&ctl->tree_lock);
3686	entry = tree_search_offset(ctl, offset, 1, 0);
3687	if (entry) {
3688		if (btrfs_free_space_trimmed(entry)) {
3689			ctl->discardable_extents[BTRFS_STAT_CURR] +=
3690				entry->bitmap_extents;
3691			ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
3692		}
3693		entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3694	}
3695
3696	spin_unlock(&ctl->tree_lock);
3697}
3698
3699static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
3700				struct btrfs_free_space *entry)
3701{
3702	if (btrfs_free_space_trimming_bitmap(entry)) {
3703		entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
3704		ctl->discardable_extents[BTRFS_STAT_CURR] -=
3705			entry->bitmap_extents;
3706		ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
3707	}
3708}
3709
3710/*
3711 * If @async is set, then we will trim 1 region and return.
3712 */
3713static int trim_bitmaps(struct btrfs_block_group *block_group,
3714			u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3715			u64 maxlen, bool async)
3716{
3717	struct btrfs_discard_ctl *discard_ctl =
3718					&block_group->fs_info->discard_ctl;
3719	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3720	struct btrfs_free_space *entry;
3721	int ret = 0;
3722	int ret2;
3723	u64 bytes;
3724	u64 offset = offset_to_bitmap(ctl, start);
3725	const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3726
3727	while (offset < end) {
3728		bool next_bitmap = false;
3729		struct btrfs_trim_range trim_entry;
3730
3731		mutex_lock(&ctl->cache_writeout_mutex);
3732		spin_lock(&ctl->tree_lock);
3733
3734		if (ctl->free_space < minlen) {
3735			block_group->discard_cursor =
3736				btrfs_block_group_end(block_group);
3737			spin_unlock(&ctl->tree_lock);
3738			mutex_unlock(&ctl->cache_writeout_mutex);
3739			break;
3740		}
3741
3742		entry = tree_search_offset(ctl, offset, 1, 0);
3743		/*
3744		 * Bitmaps are marked trimmed lossily now to prevent constant
3745		 * discarding of the same bitmap (the reason why we are bound
3746		 * by the filters).  So, retrim the block group bitmaps when we
3747		 * are preparing to punt to the unused_bgs list.  This uses
3748		 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
3749		 * which is the only discard index which sets minlen to 0.
3750		 */
3751		if (!entry || (async && minlen && start == offset &&
3752			       btrfs_free_space_trimmed(entry))) {
3753			spin_unlock(&ctl->tree_lock);
3754			mutex_unlock(&ctl->cache_writeout_mutex);
3755			next_bitmap = true;
3756			goto next;
3757		}
3758
3759		/*
3760		 * Async discard bitmap trimming begins at by setting the start
3761		 * to be key.objectid and the offset_to_bitmap() aligns to the
3762		 * start of the bitmap.  This lets us know we are fully
3763		 * scanning the bitmap rather than only some portion of it.
3764		 */
3765		if (start == offset)
3766			entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
3767
3768		bytes = minlen;
3769		ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
3770		if (ret2 || start >= end) {
3771			/*
3772			 * We lossily consider a bitmap trimmed if we only skip
3773			 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
3774			 */
3775			if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
3776				end_trimming_bitmap(ctl, entry);
3777			else
3778				entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3779			spin_unlock(&ctl->tree_lock);
3780			mutex_unlock(&ctl->cache_writeout_mutex);
3781			next_bitmap = true;
3782			goto next;
3783		}
3784
3785		/*
3786		 * We already trimmed a region, but are using the locking above
3787		 * to reset the trim_state.
3788		 */
3789		if (async && *total_trimmed) {
3790			spin_unlock(&ctl->tree_lock);
3791			mutex_unlock(&ctl->cache_writeout_mutex);
3792			goto out;
3793		}
3794
3795		bytes = min(bytes, end - start);
3796		if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
3797			spin_unlock(&ctl->tree_lock);
3798			mutex_unlock(&ctl->cache_writeout_mutex);
3799			goto next;
3800		}
3801
3802		/*
3803		 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3804		 * If X < @minlen, we won't trim X when we come back around.
3805		 * So trim it now.  We differ here from trimming extents as we
3806		 * don't keep individual state per bit.
3807		 */
3808		if (async &&
3809		    max_discard_size &&
3810		    bytes > (max_discard_size + minlen))
3811			bytes = max_discard_size;
3812
3813		bitmap_clear_bits(ctl, entry, start, bytes);
3814		if (entry->bytes == 0)
3815			free_bitmap(ctl, entry);
3816
3817		spin_unlock(&ctl->tree_lock);
3818		trim_entry.start = start;
3819		trim_entry.bytes = bytes;
3820		list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3821		mutex_unlock(&ctl->cache_writeout_mutex);
3822
3823		ret = do_trimming(block_group, total_trimmed, start, bytes,
3824				  start, bytes, 0, &trim_entry);
3825		if (ret) {
3826			reset_trimming_bitmap(ctl, offset);
3827			block_group->discard_cursor =
3828				btrfs_block_group_end(block_group);
3829			break;
3830		}
3831next:
3832		if (next_bitmap) {
3833			offset += BITS_PER_BITMAP * ctl->unit;
3834			start = offset;
3835		} else {
3836			start += bytes;
3837		}
3838		block_group->discard_cursor = start;
3839
3840		if (fatal_signal_pending(current)) {
3841			if (start != offset)
3842				reset_trimming_bitmap(ctl, offset);
3843			ret = -ERESTARTSYS;
3844			break;
3845		}
3846
3847		cond_resched();
3848	}
3849
3850	if (offset >= end)
3851		block_group->discard_cursor = end;
3852
3853out:
3854	return ret;
3855}
3856
3857int btrfs_trim_block_group(struct btrfs_block_group *block_group,
3858			   u64 *trimmed, u64 start, u64 end, u64 minlen)
3859{
3860	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3861	int ret;
3862	u64 rem = 0;
3863
3864	ASSERT(!btrfs_is_zoned(block_group->fs_info));
3865
3866	*trimmed = 0;
3867
3868	spin_lock(&block_group->lock);
3869	if (block_group->removed) {
3870		spin_unlock(&block_group->lock);
3871		return 0;
3872	}
3873	btrfs_freeze_block_group(block_group);
3874	spin_unlock(&block_group->lock);
3875
3876	ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
3877	if (ret)
3878		goto out;
3879
3880	ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
3881	div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
3882	/* If we ended in the middle of a bitmap, reset the trimming flag */
3883	if (rem)
3884		reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
3885out:
3886	btrfs_unfreeze_block_group(block_group);
3887	return ret;
3888}
3889
3890int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
3891				   u64 *trimmed, u64 start, u64 end, u64 minlen,
3892				   bool async)
3893{
3894	int ret;
3895
3896	*trimmed = 0;
3897
3898	spin_lock(&block_group->lock);
3899	if (block_group->removed) {
3900		spin_unlock(&block_group->lock);
3901		return 0;
3902	}
3903	btrfs_freeze_block_group(block_group);
3904	spin_unlock(&block_group->lock);
3905
3906	ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
3907	btrfs_unfreeze_block_group(block_group);
3908
3909	return ret;
3910}
3911
3912int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
3913				   u64 *trimmed, u64 start, u64 end, u64 minlen,
3914				   u64 maxlen, bool async)
3915{
3916	int ret;
3917
3918	*trimmed = 0;
3919
3920	spin_lock(&block_group->lock);
3921	if (block_group->removed) {
3922		spin_unlock(&block_group->lock);
3923		return 0;
3924	}
3925	btrfs_freeze_block_group(block_group);
3926	spin_unlock(&block_group->lock);
3927
3928	ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
3929			   async);
3930
3931	btrfs_unfreeze_block_group(block_group);
3932
3933	return ret;
3934}
3935
3936bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info)
3937{
3938	return btrfs_super_cache_generation(fs_info->super_copy);
3939}
3940
3941static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
3942				       struct btrfs_trans_handle *trans)
3943{
3944	struct btrfs_block_group *block_group;
3945	struct rb_node *node;
3946	int ret = 0;
3947
3948	btrfs_info(fs_info, "cleaning free space cache v1");
3949
3950	node = rb_first(&fs_info->block_group_cache_tree);
3951	while (node) {
3952		block_group = rb_entry(node, struct btrfs_block_group, cache_node);
3953		ret = btrfs_remove_free_space_inode(trans, NULL, block_group);
3954		if (ret)
3955			goto out;
3956		node = rb_next(node);
3957	}
3958out:
3959	return ret;
3960}
3961
3962int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active)
3963{
3964	struct btrfs_trans_handle *trans;
3965	int ret;
3966
3967	/*
3968	 * update_super_roots will appropriately set or unset
3969	 * super_copy->cache_generation based on SPACE_CACHE and
3970	 * BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a
3971	 * transaction commit whether we are enabling space cache v1 and don't
3972	 * have any other work to do, or are disabling it and removing free
3973	 * space inodes.
3974	 */
3975	trans = btrfs_start_transaction(fs_info->tree_root, 0);
3976	if (IS_ERR(trans))
3977		return PTR_ERR(trans);
3978
3979	if (!active) {
3980		set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
3981		ret = cleanup_free_space_cache_v1(fs_info, trans);
3982		if (ret) {
3983			btrfs_abort_transaction(trans, ret);
3984			btrfs_end_transaction(trans);
3985			goto out;
3986		}
3987	}
3988
3989	ret = btrfs_commit_transaction(trans);
3990out:
3991	clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
3992
3993	return ret;
3994}
3995
3996#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3997/*
3998 * Use this if you need to make a bitmap or extent entry specifically, it
3999 * doesn't do any of the merging that add_free_space does, this acts a lot like
4000 * how the free space cache loading stuff works, so you can get really weird
4001 * configurations.
4002 */
4003int test_add_free_space_entry(struct btrfs_block_group *cache,
4004			      u64 offset, u64 bytes, bool bitmap)
4005{
4006	struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4007	struct btrfs_free_space *info = NULL, *bitmap_info;
4008	void *map = NULL;
4009	enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
4010	u64 bytes_added;
4011	int ret;
4012
4013again:
4014	if (!info) {
4015		info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
4016		if (!info)
4017			return -ENOMEM;
4018	}
4019
4020	if (!bitmap) {
4021		spin_lock(&ctl->tree_lock);
4022		info->offset = offset;
4023		info->bytes = bytes;
4024		info->max_extent_size = 0;
4025		ret = link_free_space(ctl, info);
4026		spin_unlock(&ctl->tree_lock);
4027		if (ret)
4028			kmem_cache_free(btrfs_free_space_cachep, info);
4029		return ret;
4030	}
4031
4032	if (!map) {
4033		map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS);
4034		if (!map) {
4035			kmem_cache_free(btrfs_free_space_cachep, info);
4036			return -ENOMEM;
4037		}
4038	}
4039
4040	spin_lock(&ctl->tree_lock);
4041	bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4042					 1, 0);
4043	if (!bitmap_info) {
4044		info->bitmap = map;
4045		map = NULL;
4046		add_new_bitmap(ctl, info, offset);
4047		bitmap_info = info;
4048		info = NULL;
4049	}
4050
4051	bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
4052					  trim_state);
4053
4054	bytes -= bytes_added;
4055	offset += bytes_added;
4056	spin_unlock(&ctl->tree_lock);
4057
4058	if (bytes)
4059		goto again;
4060
4061	if (info)
4062		kmem_cache_free(btrfs_free_space_cachep, info);
4063	if (map)
4064		kmem_cache_free(btrfs_free_space_bitmap_cachep, map);
4065	return 0;
4066}
4067
4068/*
4069 * Checks to see if the given range is in the free space cache.  This is really
4070 * just used to check the absence of space, so if there is free space in the
4071 * range at all we will return 1.
4072 */
4073int test_check_exists(struct btrfs_block_group *cache,
4074		      u64 offset, u64 bytes)
4075{
4076	struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4077	struct btrfs_free_space *info;
4078	int ret = 0;
4079
4080	spin_lock(&ctl->tree_lock);
4081	info = tree_search_offset(ctl, offset, 0, 0);
4082	if (!info) {
4083		info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4084					  1, 0);
4085		if (!info)
4086			goto out;
4087	}
4088
4089have_info:
4090	if (info->bitmap) {
4091		u64 bit_off, bit_bytes;
4092		struct rb_node *n;
4093		struct btrfs_free_space *tmp;
4094
4095		bit_off = offset;
4096		bit_bytes = ctl->unit;
4097		ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
4098		if (!ret) {
4099			if (bit_off == offset) {
4100				ret = 1;
4101				goto out;
4102			} else if (bit_off > offset &&
4103				   offset + bytes > bit_off) {
4104				ret = 1;
4105				goto out;
4106			}
4107		}
4108
4109		n = rb_prev(&info->offset_index);
4110		while (n) {
4111			tmp = rb_entry(n, struct btrfs_free_space,
4112				       offset_index);
4113			if (tmp->offset + tmp->bytes < offset)
4114				break;
4115			if (offset + bytes < tmp->offset) {
4116				n = rb_prev(&tmp->offset_index);
4117				continue;
4118			}
4119			info = tmp;
4120			goto have_info;
4121		}
4122
4123		n = rb_next(&info->offset_index);
4124		while (n) {
4125			tmp = rb_entry(n, struct btrfs_free_space,
4126				       offset_index);
4127			if (offset + bytes < tmp->offset)
4128				break;
4129			if (tmp->offset + tmp->bytes < offset) {
4130				n = rb_next(&tmp->offset_index);
4131				continue;
4132			}
4133			info = tmp;
4134			goto have_info;
4135		}
4136
4137		ret = 0;
4138		goto out;
4139	}
4140
4141	if (info->offset == offset) {
4142		ret = 1;
4143		goto out;
4144	}
4145
4146	if (offset > info->offset && offset < info->offset + info->bytes)
4147		ret = 1;
4148out:
4149	spin_unlock(&ctl->tree_lock);
4150	return ret;
4151}
4152#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
4153