free-space-cache.c revision 77233c2d
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2008 Red Hat.  All rights reserved.
4 */
5
6#include <linux/pagemap.h>
7#include <linux/sched.h>
8#include <linux/sched/signal.h>
9#include <linux/slab.h>
10#include <linux/math64.h>
11#include <linux/ratelimit.h>
12#include <linux/error-injection.h>
13#include <linux/sched/mm.h>
14#include "misc.h"
15#include "ctree.h"
16#include "free-space-cache.h"
17#include "transaction.h"
18#include "disk-io.h"
19#include "extent_io.h"
20#include "volumes.h"
21#include "space-info.h"
22#include "delalloc-space.h"
23#include "block-group.h"
24#include "discard.h"
25
26#define BITS_PER_BITMAP		(PAGE_SIZE * 8UL)
27#define MAX_CACHE_BYTES_PER_GIG	SZ_64K
28#define FORCE_EXTENT_THRESHOLD	SZ_1M
29
30struct btrfs_trim_range {
31	u64 start;
32	u64 bytes;
33	struct list_head list;
34};
35
36static int link_free_space(struct btrfs_free_space_ctl *ctl,
37			   struct btrfs_free_space *info);
38static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
39			      struct btrfs_free_space *info);
40static int search_bitmap(struct btrfs_free_space_ctl *ctl,
41			 struct btrfs_free_space *bitmap_info, u64 *offset,
42			 u64 *bytes, bool for_alloc);
43static void free_bitmap(struct btrfs_free_space_ctl *ctl,
44			struct btrfs_free_space *bitmap_info);
45static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
46			      struct btrfs_free_space *info, u64 offset,
47			      u64 bytes);
48
49static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
50					       struct btrfs_path *path,
51					       u64 offset)
52{
53	struct btrfs_fs_info *fs_info = root->fs_info;
54	struct btrfs_key key;
55	struct btrfs_key location;
56	struct btrfs_disk_key disk_key;
57	struct btrfs_free_space_header *header;
58	struct extent_buffer *leaf;
59	struct inode *inode = NULL;
60	unsigned nofs_flag;
61	int ret;
62
63	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
64	key.offset = offset;
65	key.type = 0;
66
67	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
68	if (ret < 0)
69		return ERR_PTR(ret);
70	if (ret > 0) {
71		btrfs_release_path(path);
72		return ERR_PTR(-ENOENT);
73	}
74
75	leaf = path->nodes[0];
76	header = btrfs_item_ptr(leaf, path->slots[0],
77				struct btrfs_free_space_header);
78	btrfs_free_space_key(leaf, header, &disk_key);
79	btrfs_disk_key_to_cpu(&location, &disk_key);
80	btrfs_release_path(path);
81
82	/*
83	 * We are often under a trans handle at this point, so we need to make
84	 * sure NOFS is set to keep us from deadlocking.
85	 */
86	nofs_flag = memalloc_nofs_save();
87	inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
88	btrfs_release_path(path);
89	memalloc_nofs_restore(nofs_flag);
90	if (IS_ERR(inode))
91		return inode;
92
93	mapping_set_gfp_mask(inode->i_mapping,
94			mapping_gfp_constraint(inode->i_mapping,
95			~(__GFP_FS | __GFP_HIGHMEM)));
96
97	return inode;
98}
99
100struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
101		struct btrfs_path *path)
102{
103	struct btrfs_fs_info *fs_info = block_group->fs_info;
104	struct inode *inode = NULL;
105	u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
106
107	spin_lock(&block_group->lock);
108	if (block_group->inode)
109		inode = igrab(block_group->inode);
110	spin_unlock(&block_group->lock);
111	if (inode)
112		return inode;
113
114	inode = __lookup_free_space_inode(fs_info->tree_root, path,
115					  block_group->start);
116	if (IS_ERR(inode))
117		return inode;
118
119	spin_lock(&block_group->lock);
120	if (!((BTRFS_I(inode)->flags & flags) == flags)) {
121		btrfs_info(fs_info, "Old style space inode found, converting.");
122		BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
123			BTRFS_INODE_NODATACOW;
124		block_group->disk_cache_state = BTRFS_DC_CLEAR;
125	}
126
127	if (!block_group->iref) {
128		block_group->inode = igrab(inode);
129		block_group->iref = 1;
130	}
131	spin_unlock(&block_group->lock);
132
133	return inode;
134}
135
136static int __create_free_space_inode(struct btrfs_root *root,
137				     struct btrfs_trans_handle *trans,
138				     struct btrfs_path *path,
139				     u64 ino, u64 offset)
140{
141	struct btrfs_key key;
142	struct btrfs_disk_key disk_key;
143	struct btrfs_free_space_header *header;
144	struct btrfs_inode_item *inode_item;
145	struct extent_buffer *leaf;
146	/* We inline CRCs for the free disk space cache */
147	const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC |
148			  BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
149	int ret;
150
151	ret = btrfs_insert_empty_inode(trans, root, path, ino);
152	if (ret)
153		return ret;
154
155	leaf = path->nodes[0];
156	inode_item = btrfs_item_ptr(leaf, path->slots[0],
157				    struct btrfs_inode_item);
158	btrfs_item_key(leaf, &disk_key, path->slots[0]);
159	memzero_extent_buffer(leaf, (unsigned long)inode_item,
160			     sizeof(*inode_item));
161	btrfs_set_inode_generation(leaf, inode_item, trans->transid);
162	btrfs_set_inode_size(leaf, inode_item, 0);
163	btrfs_set_inode_nbytes(leaf, inode_item, 0);
164	btrfs_set_inode_uid(leaf, inode_item, 0);
165	btrfs_set_inode_gid(leaf, inode_item, 0);
166	btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
167	btrfs_set_inode_flags(leaf, inode_item, flags);
168	btrfs_set_inode_nlink(leaf, inode_item, 1);
169	btrfs_set_inode_transid(leaf, inode_item, trans->transid);
170	btrfs_set_inode_block_group(leaf, inode_item, offset);
171	btrfs_mark_buffer_dirty(leaf);
172	btrfs_release_path(path);
173
174	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
175	key.offset = offset;
176	key.type = 0;
177	ret = btrfs_insert_empty_item(trans, root, path, &key,
178				      sizeof(struct btrfs_free_space_header));
179	if (ret < 0) {
180		btrfs_release_path(path);
181		return ret;
182	}
183
184	leaf = path->nodes[0];
185	header = btrfs_item_ptr(leaf, path->slots[0],
186				struct btrfs_free_space_header);
187	memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header));
188	btrfs_set_free_space_key(leaf, header, &disk_key);
189	btrfs_mark_buffer_dirty(leaf);
190	btrfs_release_path(path);
191
192	return 0;
193}
194
195int create_free_space_inode(struct btrfs_trans_handle *trans,
196			    struct btrfs_block_group *block_group,
197			    struct btrfs_path *path)
198{
199	int ret;
200	u64 ino;
201
202	ret = btrfs_get_free_objectid(trans->fs_info->tree_root, &ino);
203	if (ret < 0)
204		return ret;
205
206	return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
207					 ino, block_group->start);
208}
209
210/*
211 * inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode
212 * handles lookup, otherwise it takes ownership and iputs the inode.
213 * Don't reuse an inode pointer after passing it into this function.
214 */
215int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
216				  struct inode *inode,
217				  struct btrfs_block_group *block_group)
218{
219	struct btrfs_path *path;
220	struct btrfs_key key;
221	int ret = 0;
222
223	path = btrfs_alloc_path();
224	if (!path)
225		return -ENOMEM;
226
227	if (!inode)
228		inode = lookup_free_space_inode(block_group, path);
229	if (IS_ERR(inode)) {
230		if (PTR_ERR(inode) != -ENOENT)
231			ret = PTR_ERR(inode);
232		goto out;
233	}
234	ret = btrfs_orphan_add(trans, BTRFS_I(inode));
235	if (ret) {
236		btrfs_add_delayed_iput(inode);
237		goto out;
238	}
239	clear_nlink(inode);
240	/* One for the block groups ref */
241	spin_lock(&block_group->lock);
242	if (block_group->iref) {
243		block_group->iref = 0;
244		block_group->inode = NULL;
245		spin_unlock(&block_group->lock);
246		iput(inode);
247	} else {
248		spin_unlock(&block_group->lock);
249	}
250	/* One for the lookup ref */
251	btrfs_add_delayed_iput(inode);
252
253	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
254	key.type = 0;
255	key.offset = block_group->start;
256	ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path,
257				-1, 1);
258	if (ret) {
259		if (ret > 0)
260			ret = 0;
261		goto out;
262	}
263	ret = btrfs_del_item(trans, trans->fs_info->tree_root, path);
264out:
265	btrfs_free_path(path);
266	return ret;
267}
268
269int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
270				       struct btrfs_block_rsv *rsv)
271{
272	u64 needed_bytes;
273	int ret;
274
275	/* 1 for slack space, 1 for updating the inode */
276	needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
277		btrfs_calc_metadata_size(fs_info, 1);
278
279	spin_lock(&rsv->lock);
280	if (rsv->reserved < needed_bytes)
281		ret = -ENOSPC;
282	else
283		ret = 0;
284	spin_unlock(&rsv->lock);
285	return ret;
286}
287
288int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
289				    struct btrfs_block_group *block_group,
290				    struct inode *inode)
291{
292	struct btrfs_root *root = BTRFS_I(inode)->root;
293	int ret = 0;
294	bool locked = false;
295
296	if (block_group) {
297		struct btrfs_path *path = btrfs_alloc_path();
298
299		if (!path) {
300			ret = -ENOMEM;
301			goto fail;
302		}
303		locked = true;
304		mutex_lock(&trans->transaction->cache_write_mutex);
305		if (!list_empty(&block_group->io_list)) {
306			list_del_init(&block_group->io_list);
307
308			btrfs_wait_cache_io(trans, block_group, path);
309			btrfs_put_block_group(block_group);
310		}
311
312		/*
313		 * now that we've truncated the cache away, its no longer
314		 * setup or written
315		 */
316		spin_lock(&block_group->lock);
317		block_group->disk_cache_state = BTRFS_DC_CLEAR;
318		spin_unlock(&block_group->lock);
319		btrfs_free_path(path);
320	}
321
322	btrfs_i_size_write(BTRFS_I(inode), 0);
323	truncate_pagecache(inode, 0);
324
325	/*
326	 * We skip the throttling logic for free space cache inodes, so we don't
327	 * need to check for -EAGAIN.
328	 */
329	ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
330					 0, BTRFS_EXTENT_DATA_KEY, NULL);
331	if (ret)
332		goto fail;
333
334	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
335
336fail:
337	if (locked)
338		mutex_unlock(&trans->transaction->cache_write_mutex);
339	if (ret)
340		btrfs_abort_transaction(trans, ret);
341
342	return ret;
343}
344
345static void readahead_cache(struct inode *inode)
346{
347	struct file_ra_state ra;
348	unsigned long last_index;
349
350	file_ra_state_init(&ra, inode->i_mapping);
351	last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
352
353	page_cache_sync_readahead(inode->i_mapping, &ra, NULL, 0, last_index);
354}
355
356static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
357		       int write)
358{
359	int num_pages;
360
361	num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
362
363	/* Make sure we can fit our crcs and generation into the first page */
364	if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
365		return -ENOSPC;
366
367	memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
368
369	io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
370	if (!io_ctl->pages)
371		return -ENOMEM;
372
373	io_ctl->num_pages = num_pages;
374	io_ctl->fs_info = btrfs_sb(inode->i_sb);
375	io_ctl->inode = inode;
376
377	return 0;
378}
379ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
380
381static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
382{
383	kfree(io_ctl->pages);
384	io_ctl->pages = NULL;
385}
386
387static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
388{
389	if (io_ctl->cur) {
390		io_ctl->cur = NULL;
391		io_ctl->orig = NULL;
392	}
393}
394
395static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
396{
397	ASSERT(io_ctl->index < io_ctl->num_pages);
398	io_ctl->page = io_ctl->pages[io_ctl->index++];
399	io_ctl->cur = page_address(io_ctl->page);
400	io_ctl->orig = io_ctl->cur;
401	io_ctl->size = PAGE_SIZE;
402	if (clear)
403		clear_page(io_ctl->cur);
404}
405
406static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
407{
408	int i;
409
410	io_ctl_unmap_page(io_ctl);
411
412	for (i = 0; i < io_ctl->num_pages; i++) {
413		if (io_ctl->pages[i]) {
414			ClearPageChecked(io_ctl->pages[i]);
415			unlock_page(io_ctl->pages[i]);
416			put_page(io_ctl->pages[i]);
417		}
418	}
419}
420
421static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
422{
423	struct page *page;
424	struct inode *inode = io_ctl->inode;
425	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
426	int i;
427
428	for (i = 0; i < io_ctl->num_pages; i++) {
429		int ret;
430
431		page = find_or_create_page(inode->i_mapping, i, mask);
432		if (!page) {
433			io_ctl_drop_pages(io_ctl);
434			return -ENOMEM;
435		}
436
437		ret = set_page_extent_mapped(page);
438		if (ret < 0) {
439			unlock_page(page);
440			put_page(page);
441			io_ctl_drop_pages(io_ctl);
442			return ret;
443		}
444
445		io_ctl->pages[i] = page;
446		if (uptodate && !PageUptodate(page)) {
447			btrfs_readpage(NULL, page);
448			lock_page(page);
449			if (page->mapping != inode->i_mapping) {
450				btrfs_err(BTRFS_I(inode)->root->fs_info,
451					  "free space cache page truncated");
452				io_ctl_drop_pages(io_ctl);
453				return -EIO;
454			}
455			if (!PageUptodate(page)) {
456				btrfs_err(BTRFS_I(inode)->root->fs_info,
457					   "error reading free space cache");
458				io_ctl_drop_pages(io_ctl);
459				return -EIO;
460			}
461		}
462	}
463
464	for (i = 0; i < io_ctl->num_pages; i++)
465		clear_page_dirty_for_io(io_ctl->pages[i]);
466
467	return 0;
468}
469
470static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
471{
472	io_ctl_map_page(io_ctl, 1);
473
474	/*
475	 * Skip the csum areas.  If we don't check crcs then we just have a
476	 * 64bit chunk at the front of the first page.
477	 */
478	io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
479	io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
480
481	put_unaligned_le64(generation, io_ctl->cur);
482	io_ctl->cur += sizeof(u64);
483}
484
485static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
486{
487	u64 cache_gen;
488
489	/*
490	 * Skip the crc area.  If we don't check crcs then we just have a 64bit
491	 * chunk at the front of the first page.
492	 */
493	io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
494	io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
495
496	cache_gen = get_unaligned_le64(io_ctl->cur);
497	if (cache_gen != generation) {
498		btrfs_err_rl(io_ctl->fs_info,
499			"space cache generation (%llu) does not match inode (%llu)",
500				cache_gen, generation);
501		io_ctl_unmap_page(io_ctl);
502		return -EIO;
503	}
504	io_ctl->cur += sizeof(u64);
505	return 0;
506}
507
508static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
509{
510	u32 *tmp;
511	u32 crc = ~(u32)0;
512	unsigned offset = 0;
513
514	if (index == 0)
515		offset = sizeof(u32) * io_ctl->num_pages;
516
517	crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
518	btrfs_crc32c_final(crc, (u8 *)&crc);
519	io_ctl_unmap_page(io_ctl);
520	tmp = page_address(io_ctl->pages[0]);
521	tmp += index;
522	*tmp = crc;
523}
524
525static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
526{
527	u32 *tmp, val;
528	u32 crc = ~(u32)0;
529	unsigned offset = 0;
530
531	if (index == 0)
532		offset = sizeof(u32) * io_ctl->num_pages;
533
534	tmp = page_address(io_ctl->pages[0]);
535	tmp += index;
536	val = *tmp;
537
538	io_ctl_map_page(io_ctl, 0);
539	crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
540	btrfs_crc32c_final(crc, (u8 *)&crc);
541	if (val != crc) {
542		btrfs_err_rl(io_ctl->fs_info,
543			"csum mismatch on free space cache");
544		io_ctl_unmap_page(io_ctl);
545		return -EIO;
546	}
547
548	return 0;
549}
550
551static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
552			    void *bitmap)
553{
554	struct btrfs_free_space_entry *entry;
555
556	if (!io_ctl->cur)
557		return -ENOSPC;
558
559	entry = io_ctl->cur;
560	put_unaligned_le64(offset, &entry->offset);
561	put_unaligned_le64(bytes, &entry->bytes);
562	entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
563		BTRFS_FREE_SPACE_EXTENT;
564	io_ctl->cur += sizeof(struct btrfs_free_space_entry);
565	io_ctl->size -= sizeof(struct btrfs_free_space_entry);
566
567	if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
568		return 0;
569
570	io_ctl_set_crc(io_ctl, io_ctl->index - 1);
571
572	/* No more pages to map */
573	if (io_ctl->index >= io_ctl->num_pages)
574		return 0;
575
576	/* map the next page */
577	io_ctl_map_page(io_ctl, 1);
578	return 0;
579}
580
581static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
582{
583	if (!io_ctl->cur)
584		return -ENOSPC;
585
586	/*
587	 * If we aren't at the start of the current page, unmap this one and
588	 * map the next one if there is any left.
589	 */
590	if (io_ctl->cur != io_ctl->orig) {
591		io_ctl_set_crc(io_ctl, io_ctl->index - 1);
592		if (io_ctl->index >= io_ctl->num_pages)
593			return -ENOSPC;
594		io_ctl_map_page(io_ctl, 0);
595	}
596
597	copy_page(io_ctl->cur, bitmap);
598	io_ctl_set_crc(io_ctl, io_ctl->index - 1);
599	if (io_ctl->index < io_ctl->num_pages)
600		io_ctl_map_page(io_ctl, 0);
601	return 0;
602}
603
604static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
605{
606	/*
607	 * If we're not on the boundary we know we've modified the page and we
608	 * need to crc the page.
609	 */
610	if (io_ctl->cur != io_ctl->orig)
611		io_ctl_set_crc(io_ctl, io_ctl->index - 1);
612	else
613		io_ctl_unmap_page(io_ctl);
614
615	while (io_ctl->index < io_ctl->num_pages) {
616		io_ctl_map_page(io_ctl, 1);
617		io_ctl_set_crc(io_ctl, io_ctl->index - 1);
618	}
619}
620
621static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
622			    struct btrfs_free_space *entry, u8 *type)
623{
624	struct btrfs_free_space_entry *e;
625	int ret;
626
627	if (!io_ctl->cur) {
628		ret = io_ctl_check_crc(io_ctl, io_ctl->index);
629		if (ret)
630			return ret;
631	}
632
633	e = io_ctl->cur;
634	entry->offset = get_unaligned_le64(&e->offset);
635	entry->bytes = get_unaligned_le64(&e->bytes);
636	*type = e->type;
637	io_ctl->cur += sizeof(struct btrfs_free_space_entry);
638	io_ctl->size -= sizeof(struct btrfs_free_space_entry);
639
640	if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
641		return 0;
642
643	io_ctl_unmap_page(io_ctl);
644
645	return 0;
646}
647
648static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
649			      struct btrfs_free_space *entry)
650{
651	int ret;
652
653	ret = io_ctl_check_crc(io_ctl, io_ctl->index);
654	if (ret)
655		return ret;
656
657	copy_page(entry->bitmap, io_ctl->cur);
658	io_ctl_unmap_page(io_ctl);
659
660	return 0;
661}
662
663static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
664{
665	struct btrfs_block_group *block_group = ctl->private;
666	u64 max_bytes;
667	u64 bitmap_bytes;
668	u64 extent_bytes;
669	u64 size = block_group->length;
670	u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
671	u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
672
673	max_bitmaps = max_t(u64, max_bitmaps, 1);
674
675	ASSERT(ctl->total_bitmaps <= max_bitmaps);
676
677	/*
678	 * We are trying to keep the total amount of memory used per 1GiB of
679	 * space to be MAX_CACHE_BYTES_PER_GIG.  However, with a reclamation
680	 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
681	 * bitmaps, we may end up using more memory than this.
682	 */
683	if (size < SZ_1G)
684		max_bytes = MAX_CACHE_BYTES_PER_GIG;
685	else
686		max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
687
688	bitmap_bytes = ctl->total_bitmaps * ctl->unit;
689
690	/*
691	 * we want the extent entry threshold to always be at most 1/2 the max
692	 * bytes we can have, or whatever is less than that.
693	 */
694	extent_bytes = max_bytes - bitmap_bytes;
695	extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
696
697	ctl->extents_thresh =
698		div_u64(extent_bytes, sizeof(struct btrfs_free_space));
699}
700
701static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
702				   struct btrfs_free_space_ctl *ctl,
703				   struct btrfs_path *path, u64 offset)
704{
705	struct btrfs_fs_info *fs_info = root->fs_info;
706	struct btrfs_free_space_header *header;
707	struct extent_buffer *leaf;
708	struct btrfs_io_ctl io_ctl;
709	struct btrfs_key key;
710	struct btrfs_free_space *e, *n;
711	LIST_HEAD(bitmaps);
712	u64 num_entries;
713	u64 num_bitmaps;
714	u64 generation;
715	u8 type;
716	int ret = 0;
717
718	/* Nothing in the space cache, goodbye */
719	if (!i_size_read(inode))
720		return 0;
721
722	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
723	key.offset = offset;
724	key.type = 0;
725
726	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
727	if (ret < 0)
728		return 0;
729	else if (ret > 0) {
730		btrfs_release_path(path);
731		return 0;
732	}
733
734	ret = -1;
735
736	leaf = path->nodes[0];
737	header = btrfs_item_ptr(leaf, path->slots[0],
738				struct btrfs_free_space_header);
739	num_entries = btrfs_free_space_entries(leaf, header);
740	num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
741	generation = btrfs_free_space_generation(leaf, header);
742	btrfs_release_path(path);
743
744	if (!BTRFS_I(inode)->generation) {
745		btrfs_info(fs_info,
746			   "the free space cache file (%llu) is invalid, skip it",
747			   offset);
748		return 0;
749	}
750
751	if (BTRFS_I(inode)->generation != generation) {
752		btrfs_err(fs_info,
753			  "free space inode generation (%llu) did not match free space cache generation (%llu)",
754			  BTRFS_I(inode)->generation, generation);
755		return 0;
756	}
757
758	if (!num_entries)
759		return 0;
760
761	ret = io_ctl_init(&io_ctl, inode, 0);
762	if (ret)
763		return ret;
764
765	readahead_cache(inode);
766
767	ret = io_ctl_prepare_pages(&io_ctl, true);
768	if (ret)
769		goto out;
770
771	ret = io_ctl_check_crc(&io_ctl, 0);
772	if (ret)
773		goto free_cache;
774
775	ret = io_ctl_check_generation(&io_ctl, generation);
776	if (ret)
777		goto free_cache;
778
779	while (num_entries) {
780		e = kmem_cache_zalloc(btrfs_free_space_cachep,
781				      GFP_NOFS);
782		if (!e) {
783			ret = -ENOMEM;
784			goto free_cache;
785		}
786
787		ret = io_ctl_read_entry(&io_ctl, e, &type);
788		if (ret) {
789			kmem_cache_free(btrfs_free_space_cachep, e);
790			goto free_cache;
791		}
792
793		if (!e->bytes) {
794			ret = -1;
795			kmem_cache_free(btrfs_free_space_cachep, e);
796			goto free_cache;
797		}
798
799		if (type == BTRFS_FREE_SPACE_EXTENT) {
800			spin_lock(&ctl->tree_lock);
801			ret = link_free_space(ctl, e);
802			spin_unlock(&ctl->tree_lock);
803			if (ret) {
804				btrfs_err(fs_info,
805					"Duplicate entries in free space cache, dumping");
806				kmem_cache_free(btrfs_free_space_cachep, e);
807				goto free_cache;
808			}
809		} else {
810			ASSERT(num_bitmaps);
811			num_bitmaps--;
812			e->bitmap = kmem_cache_zalloc(
813					btrfs_free_space_bitmap_cachep, GFP_NOFS);
814			if (!e->bitmap) {
815				ret = -ENOMEM;
816				kmem_cache_free(
817					btrfs_free_space_cachep, e);
818				goto free_cache;
819			}
820			spin_lock(&ctl->tree_lock);
821			ret = link_free_space(ctl, e);
822			ctl->total_bitmaps++;
823			recalculate_thresholds(ctl);
824			spin_unlock(&ctl->tree_lock);
825			if (ret) {
826				btrfs_err(fs_info,
827					"Duplicate entries in free space cache, dumping");
828				kmem_cache_free(btrfs_free_space_cachep, e);
829				goto free_cache;
830			}
831			list_add_tail(&e->list, &bitmaps);
832		}
833
834		num_entries--;
835	}
836
837	io_ctl_unmap_page(&io_ctl);
838
839	/*
840	 * We add the bitmaps at the end of the entries in order that
841	 * the bitmap entries are added to the cache.
842	 */
843	list_for_each_entry_safe(e, n, &bitmaps, list) {
844		list_del_init(&e->list);
845		ret = io_ctl_read_bitmap(&io_ctl, e);
846		if (ret)
847			goto free_cache;
848	}
849
850	io_ctl_drop_pages(&io_ctl);
851	ret = 1;
852out:
853	io_ctl_free(&io_ctl);
854	return ret;
855free_cache:
856	io_ctl_drop_pages(&io_ctl);
857	__btrfs_remove_free_space_cache(ctl);
858	goto out;
859}
860
861static int copy_free_space_cache(struct btrfs_block_group *block_group,
862				 struct btrfs_free_space_ctl *ctl)
863{
864	struct btrfs_free_space *info;
865	struct rb_node *n;
866	int ret = 0;
867
868	while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
869		info = rb_entry(n, struct btrfs_free_space, offset_index);
870		if (!info->bitmap) {
871			unlink_free_space(ctl, info);
872			ret = btrfs_add_free_space(block_group, info->offset,
873						   info->bytes);
874			kmem_cache_free(btrfs_free_space_cachep, info);
875		} else {
876			u64 offset = info->offset;
877			u64 bytes = ctl->unit;
878
879			while (search_bitmap(ctl, info, &offset, &bytes,
880					     false) == 0) {
881				ret = btrfs_add_free_space(block_group, offset,
882							   bytes);
883				if (ret)
884					break;
885				bitmap_clear_bits(ctl, info, offset, bytes);
886				offset = info->offset;
887				bytes = ctl->unit;
888			}
889			free_bitmap(ctl, info);
890		}
891		cond_resched();
892	}
893	return ret;
894}
895
896int load_free_space_cache(struct btrfs_block_group *block_group)
897{
898	struct btrfs_fs_info *fs_info = block_group->fs_info;
899	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
900	struct btrfs_free_space_ctl tmp_ctl = {};
901	struct inode *inode;
902	struct btrfs_path *path;
903	int ret = 0;
904	bool matched;
905	u64 used = block_group->used;
906
907	/*
908	 * Because we could potentially discard our loaded free space, we want
909	 * to load everything into a temporary structure first, and then if it's
910	 * valid copy it all into the actual free space ctl.
911	 */
912	btrfs_init_free_space_ctl(block_group, &tmp_ctl);
913
914	/*
915	 * If this block group has been marked to be cleared for one reason or
916	 * another then we can't trust the on disk cache, so just return.
917	 */
918	spin_lock(&block_group->lock);
919	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
920		spin_unlock(&block_group->lock);
921		return 0;
922	}
923	spin_unlock(&block_group->lock);
924
925	path = btrfs_alloc_path();
926	if (!path)
927		return 0;
928	path->search_commit_root = 1;
929	path->skip_locking = 1;
930
931	/*
932	 * We must pass a path with search_commit_root set to btrfs_iget in
933	 * order to avoid a deadlock when allocating extents for the tree root.
934	 *
935	 * When we are COWing an extent buffer from the tree root, when looking
936	 * for a free extent, at extent-tree.c:find_free_extent(), we can find
937	 * block group without its free space cache loaded. When we find one
938	 * we must load its space cache which requires reading its free space
939	 * cache's inode item from the root tree. If this inode item is located
940	 * in the same leaf that we started COWing before, then we end up in
941	 * deadlock on the extent buffer (trying to read lock it when we
942	 * previously write locked it).
943	 *
944	 * It's safe to read the inode item using the commit root because
945	 * block groups, once loaded, stay in memory forever (until they are
946	 * removed) as well as their space caches once loaded. New block groups
947	 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
948	 * we will never try to read their inode item while the fs is mounted.
949	 */
950	inode = lookup_free_space_inode(block_group, path);
951	if (IS_ERR(inode)) {
952		btrfs_free_path(path);
953		return 0;
954	}
955
956	/* We may have converted the inode and made the cache invalid. */
957	spin_lock(&block_group->lock);
958	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
959		spin_unlock(&block_group->lock);
960		btrfs_free_path(path);
961		goto out;
962	}
963	spin_unlock(&block_group->lock);
964
965	ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
966				      path, block_group->start);
967	btrfs_free_path(path);
968	if (ret <= 0)
969		goto out;
970
971	matched = (tmp_ctl.free_space == (block_group->length - used -
972					  block_group->bytes_super));
973
974	if (matched) {
975		ret = copy_free_space_cache(block_group, &tmp_ctl);
976		/*
977		 * ret == 1 means we successfully loaded the free space cache,
978		 * so we need to re-set it here.
979		 */
980		if (ret == 0)
981			ret = 1;
982	} else {
983		__btrfs_remove_free_space_cache(&tmp_ctl);
984		btrfs_warn(fs_info,
985			   "block group %llu has wrong amount of free space",
986			   block_group->start);
987		ret = -1;
988	}
989out:
990	if (ret < 0) {
991		/* This cache is bogus, make sure it gets cleared */
992		spin_lock(&block_group->lock);
993		block_group->disk_cache_state = BTRFS_DC_CLEAR;
994		spin_unlock(&block_group->lock);
995		ret = 0;
996
997		btrfs_warn(fs_info,
998			   "failed to load free space cache for block group %llu, rebuilding it now",
999			   block_group->start);
1000	}
1001
1002	spin_lock(&ctl->tree_lock);
1003	btrfs_discard_update_discardable(block_group);
1004	spin_unlock(&ctl->tree_lock);
1005	iput(inode);
1006	return ret;
1007}
1008
1009static noinline_for_stack
1010int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
1011			      struct btrfs_free_space_ctl *ctl,
1012			      struct btrfs_block_group *block_group,
1013			      int *entries, int *bitmaps,
1014			      struct list_head *bitmap_list)
1015{
1016	int ret;
1017	struct btrfs_free_cluster *cluster = NULL;
1018	struct btrfs_free_cluster *cluster_locked = NULL;
1019	struct rb_node *node = rb_first(&ctl->free_space_offset);
1020	struct btrfs_trim_range *trim_entry;
1021
1022	/* Get the cluster for this block_group if it exists */
1023	if (block_group && !list_empty(&block_group->cluster_list)) {
1024		cluster = list_entry(block_group->cluster_list.next,
1025				     struct btrfs_free_cluster,
1026				     block_group_list);
1027	}
1028
1029	if (!node && cluster) {
1030		cluster_locked = cluster;
1031		spin_lock(&cluster_locked->lock);
1032		node = rb_first(&cluster->root);
1033		cluster = NULL;
1034	}
1035
1036	/* Write out the extent entries */
1037	while (node) {
1038		struct btrfs_free_space *e;
1039
1040		e = rb_entry(node, struct btrfs_free_space, offset_index);
1041		*entries += 1;
1042
1043		ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes,
1044				       e->bitmap);
1045		if (ret)
1046			goto fail;
1047
1048		if (e->bitmap) {
1049			list_add_tail(&e->list, bitmap_list);
1050			*bitmaps += 1;
1051		}
1052		node = rb_next(node);
1053		if (!node && cluster) {
1054			node = rb_first(&cluster->root);
1055			cluster_locked = cluster;
1056			spin_lock(&cluster_locked->lock);
1057			cluster = NULL;
1058		}
1059	}
1060	if (cluster_locked) {
1061		spin_unlock(&cluster_locked->lock);
1062		cluster_locked = NULL;
1063	}
1064
1065	/*
1066	 * Make sure we don't miss any range that was removed from our rbtree
1067	 * because trimming is running. Otherwise after a umount+mount (or crash
1068	 * after committing the transaction) we would leak free space and get
1069	 * an inconsistent free space cache report from fsck.
1070	 */
1071	list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
1072		ret = io_ctl_add_entry(io_ctl, trim_entry->start,
1073				       trim_entry->bytes, NULL);
1074		if (ret)
1075			goto fail;
1076		*entries += 1;
1077	}
1078
1079	return 0;
1080fail:
1081	if (cluster_locked)
1082		spin_unlock(&cluster_locked->lock);
1083	return -ENOSPC;
1084}
1085
1086static noinline_for_stack int
1087update_cache_item(struct btrfs_trans_handle *trans,
1088		  struct btrfs_root *root,
1089		  struct inode *inode,
1090		  struct btrfs_path *path, u64 offset,
1091		  int entries, int bitmaps)
1092{
1093	struct btrfs_key key;
1094	struct btrfs_free_space_header *header;
1095	struct extent_buffer *leaf;
1096	int ret;
1097
1098	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
1099	key.offset = offset;
1100	key.type = 0;
1101
1102	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1103	if (ret < 0) {
1104		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1105				 EXTENT_DELALLOC, 0, 0, NULL);
1106		goto fail;
1107	}
1108	leaf = path->nodes[0];
1109	if (ret > 0) {
1110		struct btrfs_key found_key;
1111		ASSERT(path->slots[0]);
1112		path->slots[0]--;
1113		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1114		if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1115		    found_key.offset != offset) {
1116			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1117					 inode->i_size - 1, EXTENT_DELALLOC, 0,
1118					 0, NULL);
1119			btrfs_release_path(path);
1120			goto fail;
1121		}
1122	}
1123
1124	BTRFS_I(inode)->generation = trans->transid;
1125	header = btrfs_item_ptr(leaf, path->slots[0],
1126				struct btrfs_free_space_header);
1127	btrfs_set_free_space_entries(leaf, header, entries);
1128	btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1129	btrfs_set_free_space_generation(leaf, header, trans->transid);
1130	btrfs_mark_buffer_dirty(leaf);
1131	btrfs_release_path(path);
1132
1133	return 0;
1134
1135fail:
1136	return -1;
1137}
1138
1139static noinline_for_stack int write_pinned_extent_entries(
1140			    struct btrfs_trans_handle *trans,
1141			    struct btrfs_block_group *block_group,
1142			    struct btrfs_io_ctl *io_ctl,
1143			    int *entries)
1144{
1145	u64 start, extent_start, extent_end, len;
1146	struct extent_io_tree *unpin = NULL;
1147	int ret;
1148
1149	if (!block_group)
1150		return 0;
1151
1152	/*
1153	 * We want to add any pinned extents to our free space cache
1154	 * so we don't leak the space
1155	 *
1156	 * We shouldn't have switched the pinned extents yet so this is the
1157	 * right one
1158	 */
1159	unpin = &trans->transaction->pinned_extents;
1160
1161	start = block_group->start;
1162
1163	while (start < block_group->start + block_group->length) {
1164		ret = find_first_extent_bit(unpin, start,
1165					    &extent_start, &extent_end,
1166					    EXTENT_DIRTY, NULL);
1167		if (ret)
1168			return 0;
1169
1170		/* This pinned extent is out of our range */
1171		if (extent_start >= block_group->start + block_group->length)
1172			return 0;
1173
1174		extent_start = max(extent_start, start);
1175		extent_end = min(block_group->start + block_group->length,
1176				 extent_end + 1);
1177		len = extent_end - extent_start;
1178
1179		*entries += 1;
1180		ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
1181		if (ret)
1182			return -ENOSPC;
1183
1184		start = extent_end;
1185	}
1186
1187	return 0;
1188}
1189
1190static noinline_for_stack int
1191write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
1192{
1193	struct btrfs_free_space *entry, *next;
1194	int ret;
1195
1196	/* Write out the bitmaps */
1197	list_for_each_entry_safe(entry, next, bitmap_list, list) {
1198		ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
1199		if (ret)
1200			return -ENOSPC;
1201		list_del_init(&entry->list);
1202	}
1203
1204	return 0;
1205}
1206
1207static int flush_dirty_cache(struct inode *inode)
1208{
1209	int ret;
1210
1211	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
1212	if (ret)
1213		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1214				 EXTENT_DELALLOC, 0, 0, NULL);
1215
1216	return ret;
1217}
1218
1219static void noinline_for_stack
1220cleanup_bitmap_list(struct list_head *bitmap_list)
1221{
1222	struct btrfs_free_space *entry, *next;
1223
1224	list_for_each_entry_safe(entry, next, bitmap_list, list)
1225		list_del_init(&entry->list);
1226}
1227
1228static void noinline_for_stack
1229cleanup_write_cache_enospc(struct inode *inode,
1230			   struct btrfs_io_ctl *io_ctl,
1231			   struct extent_state **cached_state)
1232{
1233	io_ctl_drop_pages(io_ctl);
1234	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1235			     i_size_read(inode) - 1, cached_state);
1236}
1237
1238static int __btrfs_wait_cache_io(struct btrfs_root *root,
1239				 struct btrfs_trans_handle *trans,
1240				 struct btrfs_block_group *block_group,
1241				 struct btrfs_io_ctl *io_ctl,
1242				 struct btrfs_path *path, u64 offset)
1243{
1244	int ret;
1245	struct inode *inode = io_ctl->inode;
1246
1247	if (!inode)
1248		return 0;
1249
1250	/* Flush the dirty pages in the cache file. */
1251	ret = flush_dirty_cache(inode);
1252	if (ret)
1253		goto out;
1254
1255	/* Update the cache item to tell everyone this cache file is valid. */
1256	ret = update_cache_item(trans, root, inode, path, offset,
1257				io_ctl->entries, io_ctl->bitmaps);
1258out:
1259	if (ret) {
1260		invalidate_inode_pages2(inode->i_mapping);
1261		BTRFS_I(inode)->generation = 0;
1262		if (block_group)
1263			btrfs_debug(root->fs_info,
1264	  "failed to write free space cache for block group %llu error %d",
1265				  block_group->start, ret);
1266	}
1267	btrfs_update_inode(trans, root, BTRFS_I(inode));
1268
1269	if (block_group) {
1270		/* the dirty list is protected by the dirty_bgs_lock */
1271		spin_lock(&trans->transaction->dirty_bgs_lock);
1272
1273		/* the disk_cache_state is protected by the block group lock */
1274		spin_lock(&block_group->lock);
1275
1276		/*
1277		 * only mark this as written if we didn't get put back on
1278		 * the dirty list while waiting for IO.   Otherwise our
1279		 * cache state won't be right, and we won't get written again
1280		 */
1281		if (!ret && list_empty(&block_group->dirty_list))
1282			block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1283		else if (ret)
1284			block_group->disk_cache_state = BTRFS_DC_ERROR;
1285
1286		spin_unlock(&block_group->lock);
1287		spin_unlock(&trans->transaction->dirty_bgs_lock);
1288		io_ctl->inode = NULL;
1289		iput(inode);
1290	}
1291
1292	return ret;
1293
1294}
1295
1296int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
1297			struct btrfs_block_group *block_group,
1298			struct btrfs_path *path)
1299{
1300	return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
1301				     block_group, &block_group->io_ctl,
1302				     path, block_group->start);
1303}
1304
1305/**
1306 * Write out cached info to an inode
1307 *
1308 * @root:        root the inode belongs to
1309 * @inode:       freespace inode we are writing out
1310 * @ctl:         free space cache we are going to write out
1311 * @block_group: block_group for this cache if it belongs to a block_group
1312 * @io_ctl:      holds context for the io
1313 * @trans:       the trans handle
1314 *
1315 * This function writes out a free space cache struct to disk for quick recovery
1316 * on mount.  This will return 0 if it was successful in writing the cache out,
1317 * or an errno if it was not.
1318 */
1319static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1320				   struct btrfs_free_space_ctl *ctl,
1321				   struct btrfs_block_group *block_group,
1322				   struct btrfs_io_ctl *io_ctl,
1323				   struct btrfs_trans_handle *trans)
1324{
1325	struct extent_state *cached_state = NULL;
1326	LIST_HEAD(bitmap_list);
1327	int entries = 0;
1328	int bitmaps = 0;
1329	int ret;
1330	int must_iput = 0;
1331
1332	if (!i_size_read(inode))
1333		return -EIO;
1334
1335	WARN_ON(io_ctl->pages);
1336	ret = io_ctl_init(io_ctl, inode, 1);
1337	if (ret)
1338		return ret;
1339
1340	if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1341		down_write(&block_group->data_rwsem);
1342		spin_lock(&block_group->lock);
1343		if (block_group->delalloc_bytes) {
1344			block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1345			spin_unlock(&block_group->lock);
1346			up_write(&block_group->data_rwsem);
1347			BTRFS_I(inode)->generation = 0;
1348			ret = 0;
1349			must_iput = 1;
1350			goto out;
1351		}
1352		spin_unlock(&block_group->lock);
1353	}
1354
1355	/* Lock all pages first so we can lock the extent safely. */
1356	ret = io_ctl_prepare_pages(io_ctl, false);
1357	if (ret)
1358		goto out_unlock;
1359
1360	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1361			 &cached_state);
1362
1363	io_ctl_set_generation(io_ctl, trans->transid);
1364
1365	mutex_lock(&ctl->cache_writeout_mutex);
1366	/* Write out the extent entries in the free space cache */
1367	spin_lock(&ctl->tree_lock);
1368	ret = write_cache_extent_entries(io_ctl, ctl,
1369					 block_group, &entries, &bitmaps,
1370					 &bitmap_list);
1371	if (ret)
1372		goto out_nospc_locked;
1373
1374	/*
1375	 * Some spaces that are freed in the current transaction are pinned,
1376	 * they will be added into free space cache after the transaction is
1377	 * committed, we shouldn't lose them.
1378	 *
1379	 * If this changes while we are working we'll get added back to
1380	 * the dirty list and redo it.  No locking needed
1381	 */
1382	ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
1383	if (ret)
1384		goto out_nospc_locked;
1385
1386	/*
1387	 * At last, we write out all the bitmaps and keep cache_writeout_mutex
1388	 * locked while doing it because a concurrent trim can be manipulating
1389	 * or freeing the bitmap.
1390	 */
1391	ret = write_bitmap_entries(io_ctl, &bitmap_list);
1392	spin_unlock(&ctl->tree_lock);
1393	mutex_unlock(&ctl->cache_writeout_mutex);
1394	if (ret)
1395		goto out_nospc;
1396
1397	/* Zero out the rest of the pages just to make sure */
1398	io_ctl_zero_remaining_pages(io_ctl);
1399
1400	/* Everything is written out, now we dirty the pages in the file. */
1401	ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
1402				io_ctl->num_pages, 0, i_size_read(inode),
1403				&cached_state, false);
1404	if (ret)
1405		goto out_nospc;
1406
1407	if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1408		up_write(&block_group->data_rwsem);
1409	/*
1410	 * Release the pages and unlock the extent, we will flush
1411	 * them out later
1412	 */
1413	io_ctl_drop_pages(io_ctl);
1414	io_ctl_free(io_ctl);
1415
1416	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1417			     i_size_read(inode) - 1, &cached_state);
1418
1419	/*
1420	 * at this point the pages are under IO and we're happy,
1421	 * The caller is responsible for waiting on them and updating
1422	 * the cache and the inode
1423	 */
1424	io_ctl->entries = entries;
1425	io_ctl->bitmaps = bitmaps;
1426
1427	ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
1428	if (ret)
1429		goto out;
1430
1431	return 0;
1432
1433out_nospc_locked:
1434	cleanup_bitmap_list(&bitmap_list);
1435	spin_unlock(&ctl->tree_lock);
1436	mutex_unlock(&ctl->cache_writeout_mutex);
1437
1438out_nospc:
1439	cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
1440
1441out_unlock:
1442	if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1443		up_write(&block_group->data_rwsem);
1444
1445out:
1446	io_ctl->inode = NULL;
1447	io_ctl_free(io_ctl);
1448	if (ret) {
1449		invalidate_inode_pages2(inode->i_mapping);
1450		BTRFS_I(inode)->generation = 0;
1451	}
1452	btrfs_update_inode(trans, root, BTRFS_I(inode));
1453	if (must_iput)
1454		iput(inode);
1455	return ret;
1456}
1457
1458int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
1459			  struct btrfs_block_group *block_group,
1460			  struct btrfs_path *path)
1461{
1462	struct btrfs_fs_info *fs_info = trans->fs_info;
1463	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1464	struct inode *inode;
1465	int ret = 0;
1466
1467	spin_lock(&block_group->lock);
1468	if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1469		spin_unlock(&block_group->lock);
1470		return 0;
1471	}
1472	spin_unlock(&block_group->lock);
1473
1474	inode = lookup_free_space_inode(block_group, path);
1475	if (IS_ERR(inode))
1476		return 0;
1477
1478	ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
1479				block_group, &block_group->io_ctl, trans);
1480	if (ret) {
1481		btrfs_debug(fs_info,
1482	  "failed to write free space cache for block group %llu error %d",
1483			  block_group->start, ret);
1484		spin_lock(&block_group->lock);
1485		block_group->disk_cache_state = BTRFS_DC_ERROR;
1486		spin_unlock(&block_group->lock);
1487
1488		block_group->io_ctl.inode = NULL;
1489		iput(inode);
1490	}
1491
1492	/*
1493	 * if ret == 0 the caller is expected to call btrfs_wait_cache_io
1494	 * to wait for IO and put the inode
1495	 */
1496
1497	return ret;
1498}
1499
1500static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1501					  u64 offset)
1502{
1503	ASSERT(offset >= bitmap_start);
1504	offset -= bitmap_start;
1505	return (unsigned long)(div_u64(offset, unit));
1506}
1507
1508static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1509{
1510	return (unsigned long)(div_u64(bytes, unit));
1511}
1512
1513static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1514				   u64 offset)
1515{
1516	u64 bitmap_start;
1517	u64 bytes_per_bitmap;
1518
1519	bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1520	bitmap_start = offset - ctl->start;
1521	bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1522	bitmap_start *= bytes_per_bitmap;
1523	bitmap_start += ctl->start;
1524
1525	return bitmap_start;
1526}
1527
1528static int tree_insert_offset(struct rb_root *root, u64 offset,
1529			      struct rb_node *node, int bitmap)
1530{
1531	struct rb_node **p = &root->rb_node;
1532	struct rb_node *parent = NULL;
1533	struct btrfs_free_space *info;
1534
1535	while (*p) {
1536		parent = *p;
1537		info = rb_entry(parent, struct btrfs_free_space, offset_index);
1538
1539		if (offset < info->offset) {
1540			p = &(*p)->rb_left;
1541		} else if (offset > info->offset) {
1542			p = &(*p)->rb_right;
1543		} else {
1544			/*
1545			 * we could have a bitmap entry and an extent entry
1546			 * share the same offset.  If this is the case, we want
1547			 * the extent entry to always be found first if we do a
1548			 * linear search through the tree, since we want to have
1549			 * the quickest allocation time, and allocating from an
1550			 * extent is faster than allocating from a bitmap.  So
1551			 * if we're inserting a bitmap and we find an entry at
1552			 * this offset, we want to go right, or after this entry
1553			 * logically.  If we are inserting an extent and we've
1554			 * found a bitmap, we want to go left, or before
1555			 * logically.
1556			 */
1557			if (bitmap) {
1558				if (info->bitmap) {
1559					WARN_ON_ONCE(1);
1560					return -EEXIST;
1561				}
1562				p = &(*p)->rb_right;
1563			} else {
1564				if (!info->bitmap) {
1565					WARN_ON_ONCE(1);
1566					return -EEXIST;
1567				}
1568				p = &(*p)->rb_left;
1569			}
1570		}
1571	}
1572
1573	rb_link_node(node, parent, p);
1574	rb_insert_color(node, root);
1575
1576	return 0;
1577}
1578
1579/*
1580 * searches the tree for the given offset.
1581 *
1582 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1583 * want a section that has at least bytes size and comes at or after the given
1584 * offset.
1585 */
1586static struct btrfs_free_space *
1587tree_search_offset(struct btrfs_free_space_ctl *ctl,
1588		   u64 offset, int bitmap_only, int fuzzy)
1589{
1590	struct rb_node *n = ctl->free_space_offset.rb_node;
1591	struct btrfs_free_space *entry, *prev = NULL;
1592
1593	/* find entry that is closest to the 'offset' */
1594	while (1) {
1595		if (!n) {
1596			entry = NULL;
1597			break;
1598		}
1599
1600		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1601		prev = entry;
1602
1603		if (offset < entry->offset)
1604			n = n->rb_left;
1605		else if (offset > entry->offset)
1606			n = n->rb_right;
1607		else
1608			break;
1609	}
1610
1611	if (bitmap_only) {
1612		if (!entry)
1613			return NULL;
1614		if (entry->bitmap)
1615			return entry;
1616
1617		/*
1618		 * bitmap entry and extent entry may share same offset,
1619		 * in that case, bitmap entry comes after extent entry.
1620		 */
1621		n = rb_next(n);
1622		if (!n)
1623			return NULL;
1624		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1625		if (entry->offset != offset)
1626			return NULL;
1627
1628		WARN_ON(!entry->bitmap);
1629		return entry;
1630	} else if (entry) {
1631		if (entry->bitmap) {
1632			/*
1633			 * if previous extent entry covers the offset,
1634			 * we should return it instead of the bitmap entry
1635			 */
1636			n = rb_prev(&entry->offset_index);
1637			if (n) {
1638				prev = rb_entry(n, struct btrfs_free_space,
1639						offset_index);
1640				if (!prev->bitmap &&
1641				    prev->offset + prev->bytes > offset)
1642					entry = prev;
1643			}
1644		}
1645		return entry;
1646	}
1647
1648	if (!prev)
1649		return NULL;
1650
1651	/* find last entry before the 'offset' */
1652	entry = prev;
1653	if (entry->offset > offset) {
1654		n = rb_prev(&entry->offset_index);
1655		if (n) {
1656			entry = rb_entry(n, struct btrfs_free_space,
1657					offset_index);
1658			ASSERT(entry->offset <= offset);
1659		} else {
1660			if (fuzzy)
1661				return entry;
1662			else
1663				return NULL;
1664		}
1665	}
1666
1667	if (entry->bitmap) {
1668		n = rb_prev(&entry->offset_index);
1669		if (n) {
1670			prev = rb_entry(n, struct btrfs_free_space,
1671					offset_index);
1672			if (!prev->bitmap &&
1673			    prev->offset + prev->bytes > offset)
1674				return prev;
1675		}
1676		if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1677			return entry;
1678	} else if (entry->offset + entry->bytes > offset)
1679		return entry;
1680
1681	if (!fuzzy)
1682		return NULL;
1683
1684	while (1) {
1685		if (entry->bitmap) {
1686			if (entry->offset + BITS_PER_BITMAP *
1687			    ctl->unit > offset)
1688				break;
1689		} else {
1690			if (entry->offset + entry->bytes > offset)
1691				break;
1692		}
1693
1694		n = rb_next(&entry->offset_index);
1695		if (!n)
1696			return NULL;
1697		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1698	}
1699	return entry;
1700}
1701
1702static inline void
1703__unlink_free_space(struct btrfs_free_space_ctl *ctl,
1704		    struct btrfs_free_space *info)
1705{
1706	rb_erase(&info->offset_index, &ctl->free_space_offset);
1707	ctl->free_extents--;
1708
1709	if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1710		ctl->discardable_extents[BTRFS_STAT_CURR]--;
1711		ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
1712	}
1713}
1714
1715static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1716			      struct btrfs_free_space *info)
1717{
1718	__unlink_free_space(ctl, info);
1719	ctl->free_space -= info->bytes;
1720}
1721
1722static int link_free_space(struct btrfs_free_space_ctl *ctl,
1723			   struct btrfs_free_space *info)
1724{
1725	int ret = 0;
1726
1727	ASSERT(info->bytes || info->bitmap);
1728	ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1729				 &info->offset_index, (info->bitmap != NULL));
1730	if (ret)
1731		return ret;
1732
1733	if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1734		ctl->discardable_extents[BTRFS_STAT_CURR]++;
1735		ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
1736	}
1737
1738	ctl->free_space += info->bytes;
1739	ctl->free_extents++;
1740	return ret;
1741}
1742
1743static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1744				       struct btrfs_free_space *info,
1745				       u64 offset, u64 bytes)
1746{
1747	unsigned long start, count, end;
1748	int extent_delta = -1;
1749
1750	start = offset_to_bit(info->offset, ctl->unit, offset);
1751	count = bytes_to_bits(bytes, ctl->unit);
1752	end = start + count;
1753	ASSERT(end <= BITS_PER_BITMAP);
1754
1755	bitmap_clear(info->bitmap, start, count);
1756
1757	info->bytes -= bytes;
1758	if (info->max_extent_size > ctl->unit)
1759		info->max_extent_size = 0;
1760
1761	if (start && test_bit(start - 1, info->bitmap))
1762		extent_delta++;
1763
1764	if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1765		extent_delta++;
1766
1767	info->bitmap_extents += extent_delta;
1768	if (!btrfs_free_space_trimmed(info)) {
1769		ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1770		ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
1771	}
1772}
1773
1774static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1775			      struct btrfs_free_space *info, u64 offset,
1776			      u64 bytes)
1777{
1778	__bitmap_clear_bits(ctl, info, offset, bytes);
1779	ctl->free_space -= bytes;
1780}
1781
1782static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1783			    struct btrfs_free_space *info, u64 offset,
1784			    u64 bytes)
1785{
1786	unsigned long start, count, end;
1787	int extent_delta = 1;
1788
1789	start = offset_to_bit(info->offset, ctl->unit, offset);
1790	count = bytes_to_bits(bytes, ctl->unit);
1791	end = start + count;
1792	ASSERT(end <= BITS_PER_BITMAP);
1793
1794	bitmap_set(info->bitmap, start, count);
1795
1796	info->bytes += bytes;
1797	ctl->free_space += bytes;
1798
1799	if (start && test_bit(start - 1, info->bitmap))
1800		extent_delta--;
1801
1802	if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1803		extent_delta--;
1804
1805	info->bitmap_extents += extent_delta;
1806	if (!btrfs_free_space_trimmed(info)) {
1807		ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1808		ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
1809	}
1810}
1811
1812/*
1813 * If we can not find suitable extent, we will use bytes to record
1814 * the size of the max extent.
1815 */
1816static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1817			 struct btrfs_free_space *bitmap_info, u64 *offset,
1818			 u64 *bytes, bool for_alloc)
1819{
1820	unsigned long found_bits = 0;
1821	unsigned long max_bits = 0;
1822	unsigned long bits, i;
1823	unsigned long next_zero;
1824	unsigned long extent_bits;
1825
1826	/*
1827	 * Skip searching the bitmap if we don't have a contiguous section that
1828	 * is large enough for this allocation.
1829	 */
1830	if (for_alloc &&
1831	    bitmap_info->max_extent_size &&
1832	    bitmap_info->max_extent_size < *bytes) {
1833		*bytes = bitmap_info->max_extent_size;
1834		return -1;
1835	}
1836
1837	i = offset_to_bit(bitmap_info->offset, ctl->unit,
1838			  max_t(u64, *offset, bitmap_info->offset));
1839	bits = bytes_to_bits(*bytes, ctl->unit);
1840
1841	for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1842		if (for_alloc && bits == 1) {
1843			found_bits = 1;
1844			break;
1845		}
1846		next_zero = find_next_zero_bit(bitmap_info->bitmap,
1847					       BITS_PER_BITMAP, i);
1848		extent_bits = next_zero - i;
1849		if (extent_bits >= bits) {
1850			found_bits = extent_bits;
1851			break;
1852		} else if (extent_bits > max_bits) {
1853			max_bits = extent_bits;
1854		}
1855		i = next_zero;
1856	}
1857
1858	if (found_bits) {
1859		*offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1860		*bytes = (u64)(found_bits) * ctl->unit;
1861		return 0;
1862	}
1863
1864	*bytes = (u64)(max_bits) * ctl->unit;
1865	bitmap_info->max_extent_size = *bytes;
1866	return -1;
1867}
1868
1869static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
1870{
1871	if (entry->bitmap)
1872		return entry->max_extent_size;
1873	return entry->bytes;
1874}
1875
1876/* Cache the size of the max extent in bytes */
1877static struct btrfs_free_space *
1878find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1879		unsigned long align, u64 *max_extent_size)
1880{
1881	struct btrfs_free_space *entry;
1882	struct rb_node *node;
1883	u64 tmp;
1884	u64 align_off;
1885	int ret;
1886
1887	if (!ctl->free_space_offset.rb_node)
1888		goto out;
1889
1890	entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1891	if (!entry)
1892		goto out;
1893
1894	for (node = &entry->offset_index; node; node = rb_next(node)) {
1895		entry = rb_entry(node, struct btrfs_free_space, offset_index);
1896		if (entry->bytes < *bytes) {
1897			*max_extent_size = max(get_max_extent_size(entry),
1898					       *max_extent_size);
1899			continue;
1900		}
1901
1902		/* make sure the space returned is big enough
1903		 * to match our requested alignment
1904		 */
1905		if (*bytes >= align) {
1906			tmp = entry->offset - ctl->start + align - 1;
1907			tmp = div64_u64(tmp, align);
1908			tmp = tmp * align + ctl->start;
1909			align_off = tmp - entry->offset;
1910		} else {
1911			align_off = 0;
1912			tmp = entry->offset;
1913		}
1914
1915		if (entry->bytes < *bytes + align_off) {
1916			*max_extent_size = max(get_max_extent_size(entry),
1917					       *max_extent_size);
1918			continue;
1919		}
1920
1921		if (entry->bitmap) {
1922			u64 size = *bytes;
1923
1924			ret = search_bitmap(ctl, entry, &tmp, &size, true);
1925			if (!ret) {
1926				*offset = tmp;
1927				*bytes = size;
1928				return entry;
1929			} else {
1930				*max_extent_size =
1931					max(get_max_extent_size(entry),
1932					    *max_extent_size);
1933			}
1934			continue;
1935		}
1936
1937		*offset = tmp;
1938		*bytes = entry->bytes - align_off;
1939		return entry;
1940	}
1941out:
1942	return NULL;
1943}
1944
1945static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1946			   struct btrfs_free_space *info, u64 offset)
1947{
1948	info->offset = offset_to_bitmap(ctl, offset);
1949	info->bytes = 0;
1950	info->bitmap_extents = 0;
1951	INIT_LIST_HEAD(&info->list);
1952	link_free_space(ctl, info);
1953	ctl->total_bitmaps++;
1954	recalculate_thresholds(ctl);
1955}
1956
1957static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1958			struct btrfs_free_space *bitmap_info)
1959{
1960	/*
1961	 * Normally when this is called, the bitmap is completely empty. However,
1962	 * if we are blowing up the free space cache for one reason or another
1963	 * via __btrfs_remove_free_space_cache(), then it may not be freed and
1964	 * we may leave stats on the table.
1965	 */
1966	if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
1967		ctl->discardable_extents[BTRFS_STAT_CURR] -=
1968			bitmap_info->bitmap_extents;
1969		ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
1970
1971	}
1972	unlink_free_space(ctl, bitmap_info);
1973	kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
1974	kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1975	ctl->total_bitmaps--;
1976	recalculate_thresholds(ctl);
1977}
1978
1979static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1980			      struct btrfs_free_space *bitmap_info,
1981			      u64 *offset, u64 *bytes)
1982{
1983	u64 end;
1984	u64 search_start, search_bytes;
1985	int ret;
1986
1987again:
1988	end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1989
1990	/*
1991	 * We need to search for bits in this bitmap.  We could only cover some
1992	 * of the extent in this bitmap thanks to how we add space, so we need
1993	 * to search for as much as it as we can and clear that amount, and then
1994	 * go searching for the next bit.
1995	 */
1996	search_start = *offset;
1997	search_bytes = ctl->unit;
1998	search_bytes = min(search_bytes, end - search_start + 1);
1999	ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
2000			    false);
2001	if (ret < 0 || search_start != *offset)
2002		return -EINVAL;
2003
2004	/* We may have found more bits than what we need */
2005	search_bytes = min(search_bytes, *bytes);
2006
2007	/* Cannot clear past the end of the bitmap */
2008	search_bytes = min(search_bytes, end - search_start + 1);
2009
2010	bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
2011	*offset += search_bytes;
2012	*bytes -= search_bytes;
2013
2014	if (*bytes) {
2015		struct rb_node *next = rb_next(&bitmap_info->offset_index);
2016		if (!bitmap_info->bytes)
2017			free_bitmap(ctl, bitmap_info);
2018
2019		/*
2020		 * no entry after this bitmap, but we still have bytes to
2021		 * remove, so something has gone wrong.
2022		 */
2023		if (!next)
2024			return -EINVAL;
2025
2026		bitmap_info = rb_entry(next, struct btrfs_free_space,
2027				       offset_index);
2028
2029		/*
2030		 * if the next entry isn't a bitmap we need to return to let the
2031		 * extent stuff do its work.
2032		 */
2033		if (!bitmap_info->bitmap)
2034			return -EAGAIN;
2035
2036		/*
2037		 * Ok the next item is a bitmap, but it may not actually hold
2038		 * the information for the rest of this free space stuff, so
2039		 * look for it, and if we don't find it return so we can try
2040		 * everything over again.
2041		 */
2042		search_start = *offset;
2043		search_bytes = ctl->unit;
2044		ret = search_bitmap(ctl, bitmap_info, &search_start,
2045				    &search_bytes, false);
2046		if (ret < 0 || search_start != *offset)
2047			return -EAGAIN;
2048
2049		goto again;
2050	} else if (!bitmap_info->bytes)
2051		free_bitmap(ctl, bitmap_info);
2052
2053	return 0;
2054}
2055
2056static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
2057			       struct btrfs_free_space *info, u64 offset,
2058			       u64 bytes, enum btrfs_trim_state trim_state)
2059{
2060	u64 bytes_to_set = 0;
2061	u64 end;
2062
2063	/*
2064	 * This is a tradeoff to make bitmap trim state minimal.  We mark the
2065	 * whole bitmap untrimmed if at any point we add untrimmed regions.
2066	 */
2067	if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
2068		if (btrfs_free_space_trimmed(info)) {
2069			ctl->discardable_extents[BTRFS_STAT_CURR] +=
2070				info->bitmap_extents;
2071			ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
2072		}
2073		info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2074	}
2075
2076	end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
2077
2078	bytes_to_set = min(end - offset, bytes);
2079
2080	bitmap_set_bits(ctl, info, offset, bytes_to_set);
2081
2082	/*
2083	 * We set some bytes, we have no idea what the max extent size is
2084	 * anymore.
2085	 */
2086	info->max_extent_size = 0;
2087
2088	return bytes_to_set;
2089
2090}
2091
2092static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
2093		      struct btrfs_free_space *info)
2094{
2095	struct btrfs_block_group *block_group = ctl->private;
2096	struct btrfs_fs_info *fs_info = block_group->fs_info;
2097	bool forced = false;
2098
2099#ifdef CONFIG_BTRFS_DEBUG
2100	if (btrfs_should_fragment_free_space(block_group))
2101		forced = true;
2102#endif
2103
2104	/* This is a way to reclaim large regions from the bitmaps. */
2105	if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
2106		return false;
2107
2108	/*
2109	 * If we are below the extents threshold then we can add this as an
2110	 * extent, and don't have to deal with the bitmap
2111	 */
2112	if (!forced && ctl->free_extents < ctl->extents_thresh) {
2113		/*
2114		 * If this block group has some small extents we don't want to
2115		 * use up all of our free slots in the cache with them, we want
2116		 * to reserve them to larger extents, however if we have plenty
2117		 * of cache left then go ahead an dadd them, no sense in adding
2118		 * the overhead of a bitmap if we don't have to.
2119		 */
2120		if (info->bytes <= fs_info->sectorsize * 8) {
2121			if (ctl->free_extents * 3 <= ctl->extents_thresh)
2122				return false;
2123		} else {
2124			return false;
2125		}
2126	}
2127
2128	/*
2129	 * The original block groups from mkfs can be really small, like 8
2130	 * megabytes, so don't bother with a bitmap for those entries.  However
2131	 * some block groups can be smaller than what a bitmap would cover but
2132	 * are still large enough that they could overflow the 32k memory limit,
2133	 * so allow those block groups to still be allowed to have a bitmap
2134	 * entry.
2135	 */
2136	if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
2137		return false;
2138
2139	return true;
2140}
2141
2142static const struct btrfs_free_space_op free_space_op = {
2143	.use_bitmap		= use_bitmap,
2144};
2145
2146static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
2147			      struct btrfs_free_space *info)
2148{
2149	struct btrfs_free_space *bitmap_info;
2150	struct btrfs_block_group *block_group = NULL;
2151	int added = 0;
2152	u64 bytes, offset, bytes_added;
2153	enum btrfs_trim_state trim_state;
2154	int ret;
2155
2156	bytes = info->bytes;
2157	offset = info->offset;
2158	trim_state = info->trim_state;
2159
2160	if (!ctl->op->use_bitmap(ctl, info))
2161		return 0;
2162
2163	if (ctl->op == &free_space_op)
2164		block_group = ctl->private;
2165again:
2166	/*
2167	 * Since we link bitmaps right into the cluster we need to see if we
2168	 * have a cluster here, and if so and it has our bitmap we need to add
2169	 * the free space to that bitmap.
2170	 */
2171	if (block_group && !list_empty(&block_group->cluster_list)) {
2172		struct btrfs_free_cluster *cluster;
2173		struct rb_node *node;
2174		struct btrfs_free_space *entry;
2175
2176		cluster = list_entry(block_group->cluster_list.next,
2177				     struct btrfs_free_cluster,
2178				     block_group_list);
2179		spin_lock(&cluster->lock);
2180		node = rb_first(&cluster->root);
2181		if (!node) {
2182			spin_unlock(&cluster->lock);
2183			goto no_cluster_bitmap;
2184		}
2185
2186		entry = rb_entry(node, struct btrfs_free_space, offset_index);
2187		if (!entry->bitmap) {
2188			spin_unlock(&cluster->lock);
2189			goto no_cluster_bitmap;
2190		}
2191
2192		if (entry->offset == offset_to_bitmap(ctl, offset)) {
2193			bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
2194							  bytes, trim_state);
2195			bytes -= bytes_added;
2196			offset += bytes_added;
2197		}
2198		spin_unlock(&cluster->lock);
2199		if (!bytes) {
2200			ret = 1;
2201			goto out;
2202		}
2203	}
2204
2205no_cluster_bitmap:
2206	bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2207					 1, 0);
2208	if (!bitmap_info) {
2209		ASSERT(added == 0);
2210		goto new_bitmap;
2211	}
2212
2213	bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
2214					  trim_state);
2215	bytes -= bytes_added;
2216	offset += bytes_added;
2217	added = 0;
2218
2219	if (!bytes) {
2220		ret = 1;
2221		goto out;
2222	} else
2223		goto again;
2224
2225new_bitmap:
2226	if (info && info->bitmap) {
2227		add_new_bitmap(ctl, info, offset);
2228		added = 1;
2229		info = NULL;
2230		goto again;
2231	} else {
2232		spin_unlock(&ctl->tree_lock);
2233
2234		/* no pre-allocated info, allocate a new one */
2235		if (!info) {
2236			info = kmem_cache_zalloc(btrfs_free_space_cachep,
2237						 GFP_NOFS);
2238			if (!info) {
2239				spin_lock(&ctl->tree_lock);
2240				ret = -ENOMEM;
2241				goto out;
2242			}
2243		}
2244
2245		/* allocate the bitmap */
2246		info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
2247						 GFP_NOFS);
2248		info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
2249		spin_lock(&ctl->tree_lock);
2250		if (!info->bitmap) {
2251			ret = -ENOMEM;
2252			goto out;
2253		}
2254		goto again;
2255	}
2256
2257out:
2258	if (info) {
2259		if (info->bitmap)
2260			kmem_cache_free(btrfs_free_space_bitmap_cachep,
2261					info->bitmap);
2262		kmem_cache_free(btrfs_free_space_cachep, info);
2263	}
2264
2265	return ret;
2266}
2267
2268/*
2269 * Free space merging rules:
2270 *  1) Merge trimmed areas together
2271 *  2) Let untrimmed areas coalesce with trimmed areas
2272 *  3) Always pull neighboring regions from bitmaps
2273 *
2274 * The above rules are for when we merge free space based on btrfs_trim_state.
2275 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
2276 * same reason: to promote larger extent regions which makes life easier for
2277 * find_free_extent().  Rule 2 enables coalescing based on the common path
2278 * being returning free space from btrfs_finish_extent_commit().  So when free
2279 * space is trimmed, it will prevent aggregating trimmed new region and
2280 * untrimmed regions in the rb_tree.  Rule 3 is purely to obtain larger extents
2281 * and provide find_free_extent() with the largest extents possible hoping for
2282 * the reuse path.
2283 */
2284static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
2285			  struct btrfs_free_space *info, bool update_stat)
2286{
2287	struct btrfs_free_space *left_info = NULL;
2288	struct btrfs_free_space *right_info;
2289	bool merged = false;
2290	u64 offset = info->offset;
2291	u64 bytes = info->bytes;
2292	const bool is_trimmed = btrfs_free_space_trimmed(info);
2293
2294	/*
2295	 * first we want to see if there is free space adjacent to the range we
2296	 * are adding, if there is remove that struct and add a new one to
2297	 * cover the entire range
2298	 */
2299	right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
2300	if (right_info && rb_prev(&right_info->offset_index))
2301		left_info = rb_entry(rb_prev(&right_info->offset_index),
2302				     struct btrfs_free_space, offset_index);
2303	else if (!right_info)
2304		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
2305
2306	/* See try_merge_free_space() comment. */
2307	if (right_info && !right_info->bitmap &&
2308	    (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
2309		if (update_stat)
2310			unlink_free_space(ctl, right_info);
2311		else
2312			__unlink_free_space(ctl, right_info);
2313		info->bytes += right_info->bytes;
2314		kmem_cache_free(btrfs_free_space_cachep, right_info);
2315		merged = true;
2316	}
2317
2318	/* See try_merge_free_space() comment. */
2319	if (left_info && !left_info->bitmap &&
2320	    left_info->offset + left_info->bytes == offset &&
2321	    (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
2322		if (update_stat)
2323			unlink_free_space(ctl, left_info);
2324		else
2325			__unlink_free_space(ctl, left_info);
2326		info->offset = left_info->offset;
2327		info->bytes += left_info->bytes;
2328		kmem_cache_free(btrfs_free_space_cachep, left_info);
2329		merged = true;
2330	}
2331
2332	return merged;
2333}
2334
2335static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
2336				     struct btrfs_free_space *info,
2337				     bool update_stat)
2338{
2339	struct btrfs_free_space *bitmap;
2340	unsigned long i;
2341	unsigned long j;
2342	const u64 end = info->offset + info->bytes;
2343	const u64 bitmap_offset = offset_to_bitmap(ctl, end);
2344	u64 bytes;
2345
2346	bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2347	if (!bitmap)
2348		return false;
2349
2350	i = offset_to_bit(bitmap->offset, ctl->unit, end);
2351	j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
2352	if (j == i)
2353		return false;
2354	bytes = (j - i) * ctl->unit;
2355	info->bytes += bytes;
2356
2357	/* See try_merge_free_space() comment. */
2358	if (!btrfs_free_space_trimmed(bitmap))
2359		info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2360
2361	if (update_stat)
2362		bitmap_clear_bits(ctl, bitmap, end, bytes);
2363	else
2364		__bitmap_clear_bits(ctl, bitmap, end, bytes);
2365
2366	if (!bitmap->bytes)
2367		free_bitmap(ctl, bitmap);
2368
2369	return true;
2370}
2371
2372static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
2373				       struct btrfs_free_space *info,
2374				       bool update_stat)
2375{
2376	struct btrfs_free_space *bitmap;
2377	u64 bitmap_offset;
2378	unsigned long i;
2379	unsigned long j;
2380	unsigned long prev_j;
2381	u64 bytes;
2382
2383	bitmap_offset = offset_to_bitmap(ctl, info->offset);
2384	/* If we're on a boundary, try the previous logical bitmap. */
2385	if (bitmap_offset == info->offset) {
2386		if (info->offset == 0)
2387			return false;
2388		bitmap_offset = offset_to_bitmap(ctl, info->offset - 1);
2389	}
2390
2391	bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2392	if (!bitmap)
2393		return false;
2394
2395	i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
2396	j = 0;
2397	prev_j = (unsigned long)-1;
2398	for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
2399		if (j > i)
2400			break;
2401		prev_j = j;
2402	}
2403	if (prev_j == i)
2404		return false;
2405
2406	if (prev_j == (unsigned long)-1)
2407		bytes = (i + 1) * ctl->unit;
2408	else
2409		bytes = (i - prev_j) * ctl->unit;
2410
2411	info->offset -= bytes;
2412	info->bytes += bytes;
2413
2414	/* See try_merge_free_space() comment. */
2415	if (!btrfs_free_space_trimmed(bitmap))
2416		info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2417
2418	if (update_stat)
2419		bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2420	else
2421		__bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2422
2423	if (!bitmap->bytes)
2424		free_bitmap(ctl, bitmap);
2425
2426	return true;
2427}
2428
2429/*
2430 * We prefer always to allocate from extent entries, both for clustered and
2431 * non-clustered allocation requests. So when attempting to add a new extent
2432 * entry, try to see if there's adjacent free space in bitmap entries, and if
2433 * there is, migrate that space from the bitmaps to the extent.
2434 * Like this we get better chances of satisfying space allocation requests
2435 * because we attempt to satisfy them based on a single cache entry, and never
2436 * on 2 or more entries - even if the entries represent a contiguous free space
2437 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry
2438 * ends).
2439 */
2440static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
2441			      struct btrfs_free_space *info,
2442			      bool update_stat)
2443{
2444	/*
2445	 * Only work with disconnected entries, as we can change their offset,
2446	 * and must be extent entries.
2447	 */
2448	ASSERT(!info->bitmap);
2449	ASSERT(RB_EMPTY_NODE(&info->offset_index));
2450
2451	if (ctl->total_bitmaps > 0) {
2452		bool stole_end;
2453		bool stole_front = false;
2454
2455		stole_end = steal_from_bitmap_to_end(ctl, info, update_stat);
2456		if (ctl->total_bitmaps > 0)
2457			stole_front = steal_from_bitmap_to_front(ctl, info,
2458								 update_stat);
2459
2460		if (stole_end || stole_front)
2461			try_merge_free_space(ctl, info, update_stat);
2462	}
2463}
2464
2465int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
2466			   struct btrfs_free_space_ctl *ctl,
2467			   u64 offset, u64 bytes,
2468			   enum btrfs_trim_state trim_state)
2469{
2470	struct btrfs_block_group *block_group = ctl->private;
2471	struct btrfs_free_space *info;
2472	int ret = 0;
2473	u64 filter_bytes = bytes;
2474
2475	ASSERT(!btrfs_is_zoned(fs_info));
2476
2477	info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
2478	if (!info)
2479		return -ENOMEM;
2480
2481	info->offset = offset;
2482	info->bytes = bytes;
2483	info->trim_state = trim_state;
2484	RB_CLEAR_NODE(&info->offset_index);
2485
2486	spin_lock(&ctl->tree_lock);
2487
2488	if (try_merge_free_space(ctl, info, true))
2489		goto link;
2490
2491	/*
2492	 * There was no extent directly to the left or right of this new
2493	 * extent then we know we're going to have to allocate a new extent, so
2494	 * before we do that see if we need to drop this into a bitmap
2495	 */
2496	ret = insert_into_bitmap(ctl, info);
2497	if (ret < 0) {
2498		goto out;
2499	} else if (ret) {
2500		ret = 0;
2501		goto out;
2502	}
2503link:
2504	/*
2505	 * Only steal free space from adjacent bitmaps if we're sure we're not
2506	 * going to add the new free space to existing bitmap entries - because
2507	 * that would mean unnecessary work that would be reverted. Therefore
2508	 * attempt to steal space from bitmaps if we're adding an extent entry.
2509	 */
2510	steal_from_bitmap(ctl, info, true);
2511
2512	filter_bytes = max(filter_bytes, info->bytes);
2513
2514	ret = link_free_space(ctl, info);
2515	if (ret)
2516		kmem_cache_free(btrfs_free_space_cachep, info);
2517out:
2518	btrfs_discard_update_discardable(block_group);
2519	spin_unlock(&ctl->tree_lock);
2520
2521	if (ret) {
2522		btrfs_crit(fs_info, "unable to add free space :%d", ret);
2523		ASSERT(ret != -EEXIST);
2524	}
2525
2526	if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
2527		btrfs_discard_check_filter(block_group, filter_bytes);
2528		btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
2529	}
2530
2531	return ret;
2532}
2533
2534static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
2535					u64 bytenr, u64 size, bool used)
2536{
2537	struct btrfs_fs_info *fs_info = block_group->fs_info;
2538	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2539	u64 offset = bytenr - block_group->start;
2540	u64 to_free, to_unusable;
2541	const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold);
2542
2543	spin_lock(&ctl->tree_lock);
2544	if (!used)
2545		to_free = size;
2546	else if (offset >= block_group->alloc_offset)
2547		to_free = size;
2548	else if (offset + size <= block_group->alloc_offset)
2549		to_free = 0;
2550	else
2551		to_free = offset + size - block_group->alloc_offset;
2552	to_unusable = size - to_free;
2553
2554	ctl->free_space += to_free;
2555	/*
2556	 * If the block group is read-only, we should account freed space into
2557	 * bytes_readonly.
2558	 */
2559	if (!block_group->ro)
2560		block_group->zone_unusable += to_unusable;
2561	spin_unlock(&ctl->tree_lock);
2562	if (!used) {
2563		spin_lock(&block_group->lock);
2564		block_group->alloc_offset -= size;
2565		spin_unlock(&block_group->lock);
2566	}
2567
2568	/* All the region is now unusable. Mark it as unused and reclaim */
2569	if (block_group->zone_unusable == block_group->length) {
2570		btrfs_mark_bg_unused(block_group);
2571	} else if (bg_reclaim_threshold &&
2572		   block_group->zone_unusable >=
2573		   div_factor_fine(block_group->length, bg_reclaim_threshold)) {
2574		btrfs_mark_bg_to_reclaim(block_group);
2575	}
2576
2577	return 0;
2578}
2579
2580int btrfs_add_free_space(struct btrfs_block_group *block_group,
2581			 u64 bytenr, u64 size)
2582{
2583	enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2584
2585	if (btrfs_is_zoned(block_group->fs_info))
2586		return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2587						    true);
2588
2589	if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
2590		trim_state = BTRFS_TRIM_STATE_TRIMMED;
2591
2592	return __btrfs_add_free_space(block_group->fs_info,
2593				      block_group->free_space_ctl,
2594				      bytenr, size, trim_state);
2595}
2596
2597int btrfs_add_free_space_unused(struct btrfs_block_group *block_group,
2598				u64 bytenr, u64 size)
2599{
2600	if (btrfs_is_zoned(block_group->fs_info))
2601		return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2602						    false);
2603
2604	return btrfs_add_free_space(block_group, bytenr, size);
2605}
2606
2607/*
2608 * This is a subtle distinction because when adding free space back in general,
2609 * we want it to be added as untrimmed for async. But in the case where we add
2610 * it on loading of a block group, we want to consider it trimmed.
2611 */
2612int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
2613				       u64 bytenr, u64 size)
2614{
2615	enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2616
2617	if (btrfs_is_zoned(block_group->fs_info))
2618		return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2619						    true);
2620
2621	if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
2622	    btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2623		trim_state = BTRFS_TRIM_STATE_TRIMMED;
2624
2625	return __btrfs_add_free_space(block_group->fs_info,
2626				      block_group->free_space_ctl,
2627				      bytenr, size, trim_state);
2628}
2629
2630int btrfs_remove_free_space(struct btrfs_block_group *block_group,
2631			    u64 offset, u64 bytes)
2632{
2633	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2634	struct btrfs_free_space *info;
2635	int ret;
2636	bool re_search = false;
2637
2638	if (btrfs_is_zoned(block_group->fs_info)) {
2639		/*
2640		 * This can happen with conventional zones when replaying log.
2641		 * Since the allocation info of tree-log nodes are not recorded
2642		 * to the extent-tree, calculate_alloc_pointer() failed to
2643		 * advance the allocation pointer after last allocated tree log
2644		 * node blocks.
2645		 *
2646		 * This function is called from
2647		 * btrfs_pin_extent_for_log_replay() when replaying the log.
2648		 * Advance the pointer not to overwrite the tree-log nodes.
2649		 */
2650		if (block_group->alloc_offset < offset + bytes)
2651			block_group->alloc_offset = offset + bytes;
2652		return 0;
2653	}
2654
2655	spin_lock(&ctl->tree_lock);
2656
2657again:
2658	ret = 0;
2659	if (!bytes)
2660		goto out_lock;
2661
2662	info = tree_search_offset(ctl, offset, 0, 0);
2663	if (!info) {
2664		/*
2665		 * oops didn't find an extent that matched the space we wanted
2666		 * to remove, look for a bitmap instead
2667		 */
2668		info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2669					  1, 0);
2670		if (!info) {
2671			/*
2672			 * If we found a partial bit of our free space in a
2673			 * bitmap but then couldn't find the other part this may
2674			 * be a problem, so WARN about it.
2675			 */
2676			WARN_ON(re_search);
2677			goto out_lock;
2678		}
2679	}
2680
2681	re_search = false;
2682	if (!info->bitmap) {
2683		unlink_free_space(ctl, info);
2684		if (offset == info->offset) {
2685			u64 to_free = min(bytes, info->bytes);
2686
2687			info->bytes -= to_free;
2688			info->offset += to_free;
2689			if (info->bytes) {
2690				ret = link_free_space(ctl, info);
2691				WARN_ON(ret);
2692			} else {
2693				kmem_cache_free(btrfs_free_space_cachep, info);
2694			}
2695
2696			offset += to_free;
2697			bytes -= to_free;
2698			goto again;
2699		} else {
2700			u64 old_end = info->bytes + info->offset;
2701
2702			info->bytes = offset - info->offset;
2703			ret = link_free_space(ctl, info);
2704			WARN_ON(ret);
2705			if (ret)
2706				goto out_lock;
2707
2708			/* Not enough bytes in this entry to satisfy us */
2709			if (old_end < offset + bytes) {
2710				bytes -= old_end - offset;
2711				offset = old_end;
2712				goto again;
2713			} else if (old_end == offset + bytes) {
2714				/* all done */
2715				goto out_lock;
2716			}
2717			spin_unlock(&ctl->tree_lock);
2718
2719			ret = __btrfs_add_free_space(block_group->fs_info, ctl,
2720						     offset + bytes,
2721						     old_end - (offset + bytes),
2722						     info->trim_state);
2723			WARN_ON(ret);
2724			goto out;
2725		}
2726	}
2727
2728	ret = remove_from_bitmap(ctl, info, &offset, &bytes);
2729	if (ret == -EAGAIN) {
2730		re_search = true;
2731		goto again;
2732	}
2733out_lock:
2734	btrfs_discard_update_discardable(block_group);
2735	spin_unlock(&ctl->tree_lock);
2736out:
2737	return ret;
2738}
2739
2740void btrfs_dump_free_space(struct btrfs_block_group *block_group,
2741			   u64 bytes)
2742{
2743	struct btrfs_fs_info *fs_info = block_group->fs_info;
2744	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2745	struct btrfs_free_space *info;
2746	struct rb_node *n;
2747	int count = 0;
2748
2749	/*
2750	 * Zoned btrfs does not use free space tree and cluster. Just print
2751	 * out the free space after the allocation offset.
2752	 */
2753	if (btrfs_is_zoned(fs_info)) {
2754		btrfs_info(fs_info, "free space %llu",
2755			   block_group->length - block_group->alloc_offset);
2756		return;
2757	}
2758
2759	spin_lock(&ctl->tree_lock);
2760	for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
2761		info = rb_entry(n, struct btrfs_free_space, offset_index);
2762		if (info->bytes >= bytes && !block_group->ro)
2763			count++;
2764		btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s",
2765			   info->offset, info->bytes,
2766		       (info->bitmap) ? "yes" : "no");
2767	}
2768	spin_unlock(&ctl->tree_lock);
2769	btrfs_info(fs_info, "block group has cluster?: %s",
2770	       list_empty(&block_group->cluster_list) ? "no" : "yes");
2771	btrfs_info(fs_info,
2772		   "%d blocks of free space at or bigger than bytes is", count);
2773}
2774
2775void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
2776			       struct btrfs_free_space_ctl *ctl)
2777{
2778	struct btrfs_fs_info *fs_info = block_group->fs_info;
2779
2780	spin_lock_init(&ctl->tree_lock);
2781	ctl->unit = fs_info->sectorsize;
2782	ctl->start = block_group->start;
2783	ctl->private = block_group;
2784	ctl->op = &free_space_op;
2785	INIT_LIST_HEAD(&ctl->trimming_ranges);
2786	mutex_init(&ctl->cache_writeout_mutex);
2787
2788	/*
2789	 * we only want to have 32k of ram per block group for keeping
2790	 * track of free space, and if we pass 1/2 of that we want to
2791	 * start converting things over to using bitmaps
2792	 */
2793	ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space);
2794}
2795
2796/*
2797 * for a given cluster, put all of its extents back into the free
2798 * space cache.  If the block group passed doesn't match the block group
2799 * pointed to by the cluster, someone else raced in and freed the
2800 * cluster already.  In that case, we just return without changing anything
2801 */
2802static void __btrfs_return_cluster_to_free_space(
2803			     struct btrfs_block_group *block_group,
2804			     struct btrfs_free_cluster *cluster)
2805{
2806	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2807	struct btrfs_free_space *entry;
2808	struct rb_node *node;
2809
2810	spin_lock(&cluster->lock);
2811	if (cluster->block_group != block_group) {
2812		spin_unlock(&cluster->lock);
2813		return;
2814	}
2815
2816	cluster->block_group = NULL;
2817	cluster->window_start = 0;
2818	list_del_init(&cluster->block_group_list);
2819
2820	node = rb_first(&cluster->root);
2821	while (node) {
2822		bool bitmap;
2823
2824		entry = rb_entry(node, struct btrfs_free_space, offset_index);
2825		node = rb_next(&entry->offset_index);
2826		rb_erase(&entry->offset_index, &cluster->root);
2827		RB_CLEAR_NODE(&entry->offset_index);
2828
2829		bitmap = (entry->bitmap != NULL);
2830		if (!bitmap) {
2831			/* Merging treats extents as if they were new */
2832			if (!btrfs_free_space_trimmed(entry)) {
2833				ctl->discardable_extents[BTRFS_STAT_CURR]--;
2834				ctl->discardable_bytes[BTRFS_STAT_CURR] -=
2835					entry->bytes;
2836			}
2837
2838			try_merge_free_space(ctl, entry, false);
2839			steal_from_bitmap(ctl, entry, false);
2840
2841			/* As we insert directly, update these statistics */
2842			if (!btrfs_free_space_trimmed(entry)) {
2843				ctl->discardable_extents[BTRFS_STAT_CURR]++;
2844				ctl->discardable_bytes[BTRFS_STAT_CURR] +=
2845					entry->bytes;
2846			}
2847		}
2848		tree_insert_offset(&ctl->free_space_offset,
2849				   entry->offset, &entry->offset_index, bitmap);
2850	}
2851	cluster->root = RB_ROOT;
2852	spin_unlock(&cluster->lock);
2853	btrfs_put_block_group(block_group);
2854}
2855
2856static void __btrfs_remove_free_space_cache_locked(
2857				struct btrfs_free_space_ctl *ctl)
2858{
2859	struct btrfs_free_space *info;
2860	struct rb_node *node;
2861
2862	while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2863		info = rb_entry(node, struct btrfs_free_space, offset_index);
2864		if (!info->bitmap) {
2865			unlink_free_space(ctl, info);
2866			kmem_cache_free(btrfs_free_space_cachep, info);
2867		} else {
2868			free_bitmap(ctl, info);
2869		}
2870
2871		cond_resched_lock(&ctl->tree_lock);
2872	}
2873}
2874
2875void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2876{
2877	spin_lock(&ctl->tree_lock);
2878	__btrfs_remove_free_space_cache_locked(ctl);
2879	if (ctl->private)
2880		btrfs_discard_update_discardable(ctl->private);
2881	spin_unlock(&ctl->tree_lock);
2882}
2883
2884void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
2885{
2886	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2887	struct btrfs_free_cluster *cluster;
2888	struct list_head *head;
2889
2890	spin_lock(&ctl->tree_lock);
2891	while ((head = block_group->cluster_list.next) !=
2892	       &block_group->cluster_list) {
2893		cluster = list_entry(head, struct btrfs_free_cluster,
2894				     block_group_list);
2895
2896		WARN_ON(cluster->block_group != block_group);
2897		__btrfs_return_cluster_to_free_space(block_group, cluster);
2898
2899		cond_resched_lock(&ctl->tree_lock);
2900	}
2901	__btrfs_remove_free_space_cache_locked(ctl);
2902	btrfs_discard_update_discardable(block_group);
2903	spin_unlock(&ctl->tree_lock);
2904
2905}
2906
2907/**
2908 * btrfs_is_free_space_trimmed - see if everything is trimmed
2909 * @block_group: block_group of interest
2910 *
2911 * Walk @block_group's free space rb_tree to determine if everything is trimmed.
2912 */
2913bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
2914{
2915	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2916	struct btrfs_free_space *info;
2917	struct rb_node *node;
2918	bool ret = true;
2919
2920	spin_lock(&ctl->tree_lock);
2921	node = rb_first(&ctl->free_space_offset);
2922
2923	while (node) {
2924		info = rb_entry(node, struct btrfs_free_space, offset_index);
2925
2926		if (!btrfs_free_space_trimmed(info)) {
2927			ret = false;
2928			break;
2929		}
2930
2931		node = rb_next(node);
2932	}
2933
2934	spin_unlock(&ctl->tree_lock);
2935	return ret;
2936}
2937
2938u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
2939			       u64 offset, u64 bytes, u64 empty_size,
2940			       u64 *max_extent_size)
2941{
2942	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2943	struct btrfs_discard_ctl *discard_ctl =
2944					&block_group->fs_info->discard_ctl;
2945	struct btrfs_free_space *entry = NULL;
2946	u64 bytes_search = bytes + empty_size;
2947	u64 ret = 0;
2948	u64 align_gap = 0;
2949	u64 align_gap_len = 0;
2950	enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2951
2952	ASSERT(!btrfs_is_zoned(block_group->fs_info));
2953
2954	spin_lock(&ctl->tree_lock);
2955	entry = find_free_space(ctl, &offset, &bytes_search,
2956				block_group->full_stripe_len, max_extent_size);
2957	if (!entry)
2958		goto out;
2959
2960	ret = offset;
2961	if (entry->bitmap) {
2962		bitmap_clear_bits(ctl, entry, offset, bytes);
2963
2964		if (!btrfs_free_space_trimmed(entry))
2965			atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2966
2967		if (!entry->bytes)
2968			free_bitmap(ctl, entry);
2969	} else {
2970		unlink_free_space(ctl, entry);
2971		align_gap_len = offset - entry->offset;
2972		align_gap = entry->offset;
2973		align_gap_trim_state = entry->trim_state;
2974
2975		if (!btrfs_free_space_trimmed(entry))
2976			atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2977
2978		entry->offset = offset + bytes;
2979		WARN_ON(entry->bytes < bytes + align_gap_len);
2980
2981		entry->bytes -= bytes + align_gap_len;
2982		if (!entry->bytes)
2983			kmem_cache_free(btrfs_free_space_cachep, entry);
2984		else
2985			link_free_space(ctl, entry);
2986	}
2987out:
2988	btrfs_discard_update_discardable(block_group);
2989	spin_unlock(&ctl->tree_lock);
2990
2991	if (align_gap_len)
2992		__btrfs_add_free_space(block_group->fs_info, ctl,
2993				       align_gap, align_gap_len,
2994				       align_gap_trim_state);
2995	return ret;
2996}
2997
2998/*
2999 * given a cluster, put all of its extents back into the free space
3000 * cache.  If a block group is passed, this function will only free
3001 * a cluster that belongs to the passed block group.
3002 *
3003 * Otherwise, it'll get a reference on the block group pointed to by the
3004 * cluster and remove the cluster from it.
3005 */
3006void btrfs_return_cluster_to_free_space(
3007			       struct btrfs_block_group *block_group,
3008			       struct btrfs_free_cluster *cluster)
3009{
3010	struct btrfs_free_space_ctl *ctl;
3011
3012	/* first, get a safe pointer to the block group */
3013	spin_lock(&cluster->lock);
3014	if (!block_group) {
3015		block_group = cluster->block_group;
3016		if (!block_group) {
3017			spin_unlock(&cluster->lock);
3018			return;
3019		}
3020	} else if (cluster->block_group != block_group) {
3021		/* someone else has already freed it don't redo their work */
3022		spin_unlock(&cluster->lock);
3023		return;
3024	}
3025	btrfs_get_block_group(block_group);
3026	spin_unlock(&cluster->lock);
3027
3028	ctl = block_group->free_space_ctl;
3029
3030	/* now return any extents the cluster had on it */
3031	spin_lock(&ctl->tree_lock);
3032	__btrfs_return_cluster_to_free_space(block_group, cluster);
3033	spin_unlock(&ctl->tree_lock);
3034
3035	btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
3036
3037	/* finally drop our ref */
3038	btrfs_put_block_group(block_group);
3039}
3040
3041static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
3042				   struct btrfs_free_cluster *cluster,
3043				   struct btrfs_free_space *entry,
3044				   u64 bytes, u64 min_start,
3045				   u64 *max_extent_size)
3046{
3047	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3048	int err;
3049	u64 search_start = cluster->window_start;
3050	u64 search_bytes = bytes;
3051	u64 ret = 0;
3052
3053	search_start = min_start;
3054	search_bytes = bytes;
3055
3056	err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
3057	if (err) {
3058		*max_extent_size = max(get_max_extent_size(entry),
3059				       *max_extent_size);
3060		return 0;
3061	}
3062
3063	ret = search_start;
3064	__bitmap_clear_bits(ctl, entry, ret, bytes);
3065
3066	return ret;
3067}
3068
3069/*
3070 * given a cluster, try to allocate 'bytes' from it, returns 0
3071 * if it couldn't find anything suitably large, or a logical disk offset
3072 * if things worked out
3073 */
3074u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
3075			     struct btrfs_free_cluster *cluster, u64 bytes,
3076			     u64 min_start, u64 *max_extent_size)
3077{
3078	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3079	struct btrfs_discard_ctl *discard_ctl =
3080					&block_group->fs_info->discard_ctl;
3081	struct btrfs_free_space *entry = NULL;
3082	struct rb_node *node;
3083	u64 ret = 0;
3084
3085	ASSERT(!btrfs_is_zoned(block_group->fs_info));
3086
3087	spin_lock(&cluster->lock);
3088	if (bytes > cluster->max_size)
3089		goto out;
3090
3091	if (cluster->block_group != block_group)
3092		goto out;
3093
3094	node = rb_first(&cluster->root);
3095	if (!node)
3096		goto out;
3097
3098	entry = rb_entry(node, struct btrfs_free_space, offset_index);
3099	while (1) {
3100		if (entry->bytes < bytes)
3101			*max_extent_size = max(get_max_extent_size(entry),
3102					       *max_extent_size);
3103
3104		if (entry->bytes < bytes ||
3105		    (!entry->bitmap && entry->offset < min_start)) {
3106			node = rb_next(&entry->offset_index);
3107			if (!node)
3108				break;
3109			entry = rb_entry(node, struct btrfs_free_space,
3110					 offset_index);
3111			continue;
3112		}
3113
3114		if (entry->bitmap) {
3115			ret = btrfs_alloc_from_bitmap(block_group,
3116						      cluster, entry, bytes,
3117						      cluster->window_start,
3118						      max_extent_size);
3119			if (ret == 0) {
3120				node = rb_next(&entry->offset_index);
3121				if (!node)
3122					break;
3123				entry = rb_entry(node, struct btrfs_free_space,
3124						 offset_index);
3125				continue;
3126			}
3127			cluster->window_start += bytes;
3128		} else {
3129			ret = entry->offset;
3130
3131			entry->offset += bytes;
3132			entry->bytes -= bytes;
3133		}
3134
3135		break;
3136	}
3137out:
3138	spin_unlock(&cluster->lock);
3139
3140	if (!ret)
3141		return 0;
3142
3143	spin_lock(&ctl->tree_lock);
3144
3145	if (!btrfs_free_space_trimmed(entry))
3146		atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
3147
3148	ctl->free_space -= bytes;
3149	if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
3150		ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
3151
3152	spin_lock(&cluster->lock);
3153	if (entry->bytes == 0) {
3154		rb_erase(&entry->offset_index, &cluster->root);
3155		ctl->free_extents--;
3156		if (entry->bitmap) {
3157			kmem_cache_free(btrfs_free_space_bitmap_cachep,
3158					entry->bitmap);
3159			ctl->total_bitmaps--;
3160			recalculate_thresholds(ctl);
3161		} else if (!btrfs_free_space_trimmed(entry)) {
3162			ctl->discardable_extents[BTRFS_STAT_CURR]--;
3163		}
3164		kmem_cache_free(btrfs_free_space_cachep, entry);
3165	}
3166
3167	spin_unlock(&cluster->lock);
3168	spin_unlock(&ctl->tree_lock);
3169
3170	return ret;
3171}
3172
3173static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
3174				struct btrfs_free_space *entry,
3175				struct btrfs_free_cluster *cluster,
3176				u64 offset, u64 bytes,
3177				u64 cont1_bytes, u64 min_bytes)
3178{
3179	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3180	unsigned long next_zero;
3181	unsigned long i;
3182	unsigned long want_bits;
3183	unsigned long min_bits;
3184	unsigned long found_bits;
3185	unsigned long max_bits = 0;
3186	unsigned long start = 0;
3187	unsigned long total_found = 0;
3188	int ret;
3189
3190	i = offset_to_bit(entry->offset, ctl->unit,
3191			  max_t(u64, offset, entry->offset));
3192	want_bits = bytes_to_bits(bytes, ctl->unit);
3193	min_bits = bytes_to_bits(min_bytes, ctl->unit);
3194
3195	/*
3196	 * Don't bother looking for a cluster in this bitmap if it's heavily
3197	 * fragmented.
3198	 */
3199	if (entry->max_extent_size &&
3200	    entry->max_extent_size < cont1_bytes)
3201		return -ENOSPC;
3202again:
3203	found_bits = 0;
3204	for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
3205		next_zero = find_next_zero_bit(entry->bitmap,
3206					       BITS_PER_BITMAP, i);
3207		if (next_zero - i >= min_bits) {
3208			found_bits = next_zero - i;
3209			if (found_bits > max_bits)
3210				max_bits = found_bits;
3211			break;
3212		}
3213		if (next_zero - i > max_bits)
3214			max_bits = next_zero - i;
3215		i = next_zero;
3216	}
3217
3218	if (!found_bits) {
3219		entry->max_extent_size = (u64)max_bits * ctl->unit;
3220		return -ENOSPC;
3221	}
3222
3223	if (!total_found) {
3224		start = i;
3225		cluster->max_size = 0;
3226	}
3227
3228	total_found += found_bits;
3229
3230	if (cluster->max_size < found_bits * ctl->unit)
3231		cluster->max_size = found_bits * ctl->unit;
3232
3233	if (total_found < want_bits || cluster->max_size < cont1_bytes) {
3234		i = next_zero + 1;
3235		goto again;
3236	}
3237
3238	cluster->window_start = start * ctl->unit + entry->offset;
3239	rb_erase(&entry->offset_index, &ctl->free_space_offset);
3240	ret = tree_insert_offset(&cluster->root, entry->offset,
3241				 &entry->offset_index, 1);
3242	ASSERT(!ret); /* -EEXIST; Logic error */
3243
3244	trace_btrfs_setup_cluster(block_group, cluster,
3245				  total_found * ctl->unit, 1);
3246	return 0;
3247}
3248
3249/*
3250 * This searches the block group for just extents to fill the cluster with.
3251 * Try to find a cluster with at least bytes total bytes, at least one
3252 * extent of cont1_bytes, and other clusters of at least min_bytes.
3253 */
3254static noinline int
3255setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
3256			struct btrfs_free_cluster *cluster,
3257			struct list_head *bitmaps, u64 offset, u64 bytes,
3258			u64 cont1_bytes, u64 min_bytes)
3259{
3260	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3261	struct btrfs_free_space *first = NULL;
3262	struct btrfs_free_space *entry = NULL;
3263	struct btrfs_free_space *last;
3264	struct rb_node *node;
3265	u64 window_free;
3266	u64 max_extent;
3267	u64 total_size = 0;
3268
3269	entry = tree_search_offset(ctl, offset, 0, 1);
3270	if (!entry)
3271		return -ENOSPC;
3272
3273	/*
3274	 * We don't want bitmaps, so just move along until we find a normal
3275	 * extent entry.
3276	 */
3277	while (entry->bitmap || entry->bytes < min_bytes) {
3278		if (entry->bitmap && list_empty(&entry->list))
3279			list_add_tail(&entry->list, bitmaps);
3280		node = rb_next(&entry->offset_index);
3281		if (!node)
3282			return -ENOSPC;
3283		entry = rb_entry(node, struct btrfs_free_space, offset_index);
3284	}
3285
3286	window_free = entry->bytes;
3287	max_extent = entry->bytes;
3288	first = entry;
3289	last = entry;
3290
3291	for (node = rb_next(&entry->offset_index); node;
3292	     node = rb_next(&entry->offset_index)) {
3293		entry = rb_entry(node, struct btrfs_free_space, offset_index);
3294
3295		if (entry->bitmap) {
3296			if (list_empty(&entry->list))
3297				list_add_tail(&entry->list, bitmaps);
3298			continue;
3299		}
3300
3301		if (entry->bytes < min_bytes)
3302			continue;
3303
3304		last = entry;
3305		window_free += entry->bytes;
3306		if (entry->bytes > max_extent)
3307			max_extent = entry->bytes;
3308	}
3309
3310	if (window_free < bytes || max_extent < cont1_bytes)
3311		return -ENOSPC;
3312
3313	cluster->window_start = first->offset;
3314
3315	node = &first->offset_index;
3316
3317	/*
3318	 * now we've found our entries, pull them out of the free space
3319	 * cache and put them into the cluster rbtree
3320	 */
3321	do {
3322		int ret;
3323
3324		entry = rb_entry(node, struct btrfs_free_space, offset_index);
3325		node = rb_next(&entry->offset_index);
3326		if (entry->bitmap || entry->bytes < min_bytes)
3327			continue;
3328
3329		rb_erase(&entry->offset_index, &ctl->free_space_offset);
3330		ret = tree_insert_offset(&cluster->root, entry->offset,
3331					 &entry->offset_index, 0);
3332		total_size += entry->bytes;
3333		ASSERT(!ret); /* -EEXIST; Logic error */
3334	} while (node && entry != last);
3335
3336	cluster->max_size = max_extent;
3337	trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
3338	return 0;
3339}
3340
3341/*
3342 * This specifically looks for bitmaps that may work in the cluster, we assume
3343 * that we have already failed to find extents that will work.
3344 */
3345static noinline int
3346setup_cluster_bitmap(struct btrfs_block_group *block_group,
3347		     struct btrfs_free_cluster *cluster,
3348		     struct list_head *bitmaps, u64 offset, u64 bytes,
3349		     u64 cont1_bytes, u64 min_bytes)
3350{
3351	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3352	struct btrfs_free_space *entry = NULL;
3353	int ret = -ENOSPC;
3354	u64 bitmap_offset = offset_to_bitmap(ctl, offset);
3355
3356	if (ctl->total_bitmaps == 0)
3357		return -ENOSPC;
3358
3359	/*
3360	 * The bitmap that covers offset won't be in the list unless offset
3361	 * is just its start offset.
3362	 */
3363	if (!list_empty(bitmaps))
3364		entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
3365
3366	if (!entry || entry->offset != bitmap_offset) {
3367		entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
3368		if (entry && list_empty(&entry->list))
3369			list_add(&entry->list, bitmaps);
3370	}
3371
3372	list_for_each_entry(entry, bitmaps, list) {
3373		if (entry->bytes < bytes)
3374			continue;
3375		ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
3376					   bytes, cont1_bytes, min_bytes);
3377		if (!ret)
3378			return 0;
3379	}
3380
3381	/*
3382	 * The bitmaps list has all the bitmaps that record free space
3383	 * starting after offset, so no more search is required.
3384	 */
3385	return -ENOSPC;
3386}
3387
3388/*
3389 * here we try to find a cluster of blocks in a block group.  The goal
3390 * is to find at least bytes+empty_size.
3391 * We might not find them all in one contiguous area.
3392 *
3393 * returns zero and sets up cluster if things worked out, otherwise
3394 * it returns -enospc
3395 */
3396int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
3397			     struct btrfs_free_cluster *cluster,
3398			     u64 offset, u64 bytes, u64 empty_size)
3399{
3400	struct btrfs_fs_info *fs_info = block_group->fs_info;
3401	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3402	struct btrfs_free_space *entry, *tmp;
3403	LIST_HEAD(bitmaps);
3404	u64 min_bytes;
3405	u64 cont1_bytes;
3406	int ret;
3407
3408	/*
3409	 * Choose the minimum extent size we'll require for this
3410	 * cluster.  For SSD_SPREAD, don't allow any fragmentation.
3411	 * For metadata, allow allocates with smaller extents.  For
3412	 * data, keep it dense.
3413	 */
3414	if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
3415		cont1_bytes = min_bytes = bytes + empty_size;
3416	} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
3417		cont1_bytes = bytes;
3418		min_bytes = fs_info->sectorsize;
3419	} else {
3420		cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
3421		min_bytes = fs_info->sectorsize;
3422	}
3423
3424	spin_lock(&ctl->tree_lock);
3425
3426	/*
3427	 * If we know we don't have enough space to make a cluster don't even
3428	 * bother doing all the work to try and find one.
3429	 */
3430	if (ctl->free_space < bytes) {
3431		spin_unlock(&ctl->tree_lock);
3432		return -ENOSPC;
3433	}
3434
3435	spin_lock(&cluster->lock);
3436
3437	/* someone already found a cluster, hooray */
3438	if (cluster->block_group) {
3439		ret = 0;
3440		goto out;
3441	}
3442
3443	trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
3444				 min_bytes);
3445
3446	ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
3447				      bytes + empty_size,
3448				      cont1_bytes, min_bytes);
3449	if (ret)
3450		ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
3451					   offset, bytes + empty_size,
3452					   cont1_bytes, min_bytes);
3453
3454	/* Clear our temporary list */
3455	list_for_each_entry_safe(entry, tmp, &bitmaps, list)
3456		list_del_init(&entry->list);
3457
3458	if (!ret) {
3459		btrfs_get_block_group(block_group);
3460		list_add_tail(&cluster->block_group_list,
3461			      &block_group->cluster_list);
3462		cluster->block_group = block_group;
3463	} else {
3464		trace_btrfs_failed_cluster_setup(block_group);
3465	}
3466out:
3467	spin_unlock(&cluster->lock);
3468	spin_unlock(&ctl->tree_lock);
3469
3470	return ret;
3471}
3472
3473/*
3474 * simple code to zero out a cluster
3475 */
3476void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
3477{
3478	spin_lock_init(&cluster->lock);
3479	spin_lock_init(&cluster->refill_lock);
3480	cluster->root = RB_ROOT;
3481	cluster->max_size = 0;
3482	cluster->fragmented = false;
3483	INIT_LIST_HEAD(&cluster->block_group_list);
3484	cluster->block_group = NULL;
3485}
3486
3487static int do_trimming(struct btrfs_block_group *block_group,
3488		       u64 *total_trimmed, u64 start, u64 bytes,
3489		       u64 reserved_start, u64 reserved_bytes,
3490		       enum btrfs_trim_state reserved_trim_state,
3491		       struct btrfs_trim_range *trim_entry)
3492{
3493	struct btrfs_space_info *space_info = block_group->space_info;
3494	struct btrfs_fs_info *fs_info = block_group->fs_info;
3495	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3496	int ret;
3497	int update = 0;
3498	const u64 end = start + bytes;
3499	const u64 reserved_end = reserved_start + reserved_bytes;
3500	enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3501	u64 trimmed = 0;
3502
3503	spin_lock(&space_info->lock);
3504	spin_lock(&block_group->lock);
3505	if (!block_group->ro) {
3506		block_group->reserved += reserved_bytes;
3507		space_info->bytes_reserved += reserved_bytes;
3508		update = 1;
3509	}
3510	spin_unlock(&block_group->lock);
3511	spin_unlock(&space_info->lock);
3512
3513	ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
3514	if (!ret) {
3515		*total_trimmed += trimmed;
3516		trim_state = BTRFS_TRIM_STATE_TRIMMED;
3517	}
3518
3519	mutex_lock(&ctl->cache_writeout_mutex);
3520	if (reserved_start < start)
3521		__btrfs_add_free_space(fs_info, ctl, reserved_start,
3522				       start - reserved_start,
3523				       reserved_trim_state);
3524	if (start + bytes < reserved_start + reserved_bytes)
3525		__btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
3526				       reserved_trim_state);
3527	__btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
3528	list_del(&trim_entry->list);
3529	mutex_unlock(&ctl->cache_writeout_mutex);
3530
3531	if (update) {
3532		spin_lock(&space_info->lock);
3533		spin_lock(&block_group->lock);
3534		if (block_group->ro)
3535			space_info->bytes_readonly += reserved_bytes;
3536		block_group->reserved -= reserved_bytes;
3537		space_info->bytes_reserved -= reserved_bytes;
3538		spin_unlock(&block_group->lock);
3539		spin_unlock(&space_info->lock);
3540	}
3541
3542	return ret;
3543}
3544
3545/*
3546 * If @async is set, then we will trim 1 region and return.
3547 */
3548static int trim_no_bitmap(struct btrfs_block_group *block_group,
3549			  u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3550			  bool async)
3551{
3552	struct btrfs_discard_ctl *discard_ctl =
3553					&block_group->fs_info->discard_ctl;
3554	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3555	struct btrfs_free_space *entry;
3556	struct rb_node *node;
3557	int ret = 0;
3558	u64 extent_start;
3559	u64 extent_bytes;
3560	enum btrfs_trim_state extent_trim_state;
3561	u64 bytes;
3562	const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3563
3564	while (start < end) {
3565		struct btrfs_trim_range trim_entry;
3566
3567		mutex_lock(&ctl->cache_writeout_mutex);
3568		spin_lock(&ctl->tree_lock);
3569
3570		if (ctl->free_space < minlen)
3571			goto out_unlock;
3572
3573		entry = tree_search_offset(ctl, start, 0, 1);
3574		if (!entry)
3575			goto out_unlock;
3576
3577		/* Skip bitmaps and if async, already trimmed entries */
3578		while (entry->bitmap ||
3579		       (async && btrfs_free_space_trimmed(entry))) {
3580			node = rb_next(&entry->offset_index);
3581			if (!node)
3582				goto out_unlock;
3583			entry = rb_entry(node, struct btrfs_free_space,
3584					 offset_index);
3585		}
3586
3587		if (entry->offset >= end)
3588			goto out_unlock;
3589
3590		extent_start = entry->offset;
3591		extent_bytes = entry->bytes;
3592		extent_trim_state = entry->trim_state;
3593		if (async) {
3594			start = entry->offset;
3595			bytes = entry->bytes;
3596			if (bytes < minlen) {
3597				spin_unlock(&ctl->tree_lock);
3598				mutex_unlock(&ctl->cache_writeout_mutex);
3599				goto next;
3600			}
3601			unlink_free_space(ctl, entry);
3602			/*
3603			 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3604			 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
3605			 * X when we come back around.  So trim it now.
3606			 */
3607			if (max_discard_size &&
3608			    bytes >= (max_discard_size +
3609				      BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
3610				bytes = max_discard_size;
3611				extent_bytes = max_discard_size;
3612				entry->offset += max_discard_size;
3613				entry->bytes -= max_discard_size;
3614				link_free_space(ctl, entry);
3615			} else {
3616				kmem_cache_free(btrfs_free_space_cachep, entry);
3617			}
3618		} else {
3619			start = max(start, extent_start);
3620			bytes = min(extent_start + extent_bytes, end) - start;
3621			if (bytes < minlen) {
3622				spin_unlock(&ctl->tree_lock);
3623				mutex_unlock(&ctl->cache_writeout_mutex);
3624				goto next;
3625			}
3626
3627			unlink_free_space(ctl, entry);
3628			kmem_cache_free(btrfs_free_space_cachep, entry);
3629		}
3630
3631		spin_unlock(&ctl->tree_lock);
3632		trim_entry.start = extent_start;
3633		trim_entry.bytes = extent_bytes;
3634		list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3635		mutex_unlock(&ctl->cache_writeout_mutex);
3636
3637		ret = do_trimming(block_group, total_trimmed, start, bytes,
3638				  extent_start, extent_bytes, extent_trim_state,
3639				  &trim_entry);
3640		if (ret) {
3641			block_group->discard_cursor = start + bytes;
3642			break;
3643		}
3644next:
3645		start += bytes;
3646		block_group->discard_cursor = start;
3647		if (async && *total_trimmed)
3648			break;
3649
3650		if (fatal_signal_pending(current)) {
3651			ret = -ERESTARTSYS;
3652			break;
3653		}
3654
3655		cond_resched();
3656	}
3657
3658	return ret;
3659
3660out_unlock:
3661	block_group->discard_cursor = btrfs_block_group_end(block_group);
3662	spin_unlock(&ctl->tree_lock);
3663	mutex_unlock(&ctl->cache_writeout_mutex);
3664
3665	return ret;
3666}
3667
3668/*
3669 * If we break out of trimming a bitmap prematurely, we should reset the
3670 * trimming bit.  In a rather contrieved case, it's possible to race here so
3671 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
3672 *
3673 * start = start of bitmap
3674 * end = near end of bitmap
3675 *
3676 * Thread 1:			Thread 2:
3677 * trim_bitmaps(start)
3678 *				trim_bitmaps(end)
3679 *				end_trimming_bitmap()
3680 * reset_trimming_bitmap()
3681 */
3682static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
3683{
3684	struct btrfs_free_space *entry;
3685
3686	spin_lock(&ctl->tree_lock);
3687	entry = tree_search_offset(ctl, offset, 1, 0);
3688	if (entry) {
3689		if (btrfs_free_space_trimmed(entry)) {
3690			ctl->discardable_extents[BTRFS_STAT_CURR] +=
3691				entry->bitmap_extents;
3692			ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
3693		}
3694		entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3695	}
3696
3697	spin_unlock(&ctl->tree_lock);
3698}
3699
3700static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
3701				struct btrfs_free_space *entry)
3702{
3703	if (btrfs_free_space_trimming_bitmap(entry)) {
3704		entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
3705		ctl->discardable_extents[BTRFS_STAT_CURR] -=
3706			entry->bitmap_extents;
3707		ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
3708	}
3709}
3710
3711/*
3712 * If @async is set, then we will trim 1 region and return.
3713 */
3714static int trim_bitmaps(struct btrfs_block_group *block_group,
3715			u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3716			u64 maxlen, bool async)
3717{
3718	struct btrfs_discard_ctl *discard_ctl =
3719					&block_group->fs_info->discard_ctl;
3720	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3721	struct btrfs_free_space *entry;
3722	int ret = 0;
3723	int ret2;
3724	u64 bytes;
3725	u64 offset = offset_to_bitmap(ctl, start);
3726	const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3727
3728	while (offset < end) {
3729		bool next_bitmap = false;
3730		struct btrfs_trim_range trim_entry;
3731
3732		mutex_lock(&ctl->cache_writeout_mutex);
3733		spin_lock(&ctl->tree_lock);
3734
3735		if (ctl->free_space < minlen) {
3736			block_group->discard_cursor =
3737				btrfs_block_group_end(block_group);
3738			spin_unlock(&ctl->tree_lock);
3739			mutex_unlock(&ctl->cache_writeout_mutex);
3740			break;
3741		}
3742
3743		entry = tree_search_offset(ctl, offset, 1, 0);
3744		/*
3745		 * Bitmaps are marked trimmed lossily now to prevent constant
3746		 * discarding of the same bitmap (the reason why we are bound
3747		 * by the filters).  So, retrim the block group bitmaps when we
3748		 * are preparing to punt to the unused_bgs list.  This uses
3749		 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
3750		 * which is the only discard index which sets minlen to 0.
3751		 */
3752		if (!entry || (async && minlen && start == offset &&
3753			       btrfs_free_space_trimmed(entry))) {
3754			spin_unlock(&ctl->tree_lock);
3755			mutex_unlock(&ctl->cache_writeout_mutex);
3756			next_bitmap = true;
3757			goto next;
3758		}
3759
3760		/*
3761		 * Async discard bitmap trimming begins at by setting the start
3762		 * to be key.objectid and the offset_to_bitmap() aligns to the
3763		 * start of the bitmap.  This lets us know we are fully
3764		 * scanning the bitmap rather than only some portion of it.
3765		 */
3766		if (start == offset)
3767			entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
3768
3769		bytes = minlen;
3770		ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
3771		if (ret2 || start >= end) {
3772			/*
3773			 * We lossily consider a bitmap trimmed if we only skip
3774			 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
3775			 */
3776			if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
3777				end_trimming_bitmap(ctl, entry);
3778			else
3779				entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3780			spin_unlock(&ctl->tree_lock);
3781			mutex_unlock(&ctl->cache_writeout_mutex);
3782			next_bitmap = true;
3783			goto next;
3784		}
3785
3786		/*
3787		 * We already trimmed a region, but are using the locking above
3788		 * to reset the trim_state.
3789		 */
3790		if (async && *total_trimmed) {
3791			spin_unlock(&ctl->tree_lock);
3792			mutex_unlock(&ctl->cache_writeout_mutex);
3793			goto out;
3794		}
3795
3796		bytes = min(bytes, end - start);
3797		if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
3798			spin_unlock(&ctl->tree_lock);
3799			mutex_unlock(&ctl->cache_writeout_mutex);
3800			goto next;
3801		}
3802
3803		/*
3804		 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3805		 * If X < @minlen, we won't trim X when we come back around.
3806		 * So trim it now.  We differ here from trimming extents as we
3807		 * don't keep individual state per bit.
3808		 */
3809		if (async &&
3810		    max_discard_size &&
3811		    bytes > (max_discard_size + minlen))
3812			bytes = max_discard_size;
3813
3814		bitmap_clear_bits(ctl, entry, start, bytes);
3815		if (entry->bytes == 0)
3816			free_bitmap(ctl, entry);
3817
3818		spin_unlock(&ctl->tree_lock);
3819		trim_entry.start = start;
3820		trim_entry.bytes = bytes;
3821		list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3822		mutex_unlock(&ctl->cache_writeout_mutex);
3823
3824		ret = do_trimming(block_group, total_trimmed, start, bytes,
3825				  start, bytes, 0, &trim_entry);
3826		if (ret) {
3827			reset_trimming_bitmap(ctl, offset);
3828			block_group->discard_cursor =
3829				btrfs_block_group_end(block_group);
3830			break;
3831		}
3832next:
3833		if (next_bitmap) {
3834			offset += BITS_PER_BITMAP * ctl->unit;
3835			start = offset;
3836		} else {
3837			start += bytes;
3838		}
3839		block_group->discard_cursor = start;
3840
3841		if (fatal_signal_pending(current)) {
3842			if (start != offset)
3843				reset_trimming_bitmap(ctl, offset);
3844			ret = -ERESTARTSYS;
3845			break;
3846		}
3847
3848		cond_resched();
3849	}
3850
3851	if (offset >= end)
3852		block_group->discard_cursor = end;
3853
3854out:
3855	return ret;
3856}
3857
3858int btrfs_trim_block_group(struct btrfs_block_group *block_group,
3859			   u64 *trimmed, u64 start, u64 end, u64 minlen)
3860{
3861	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3862	int ret;
3863	u64 rem = 0;
3864
3865	ASSERT(!btrfs_is_zoned(block_group->fs_info));
3866
3867	*trimmed = 0;
3868
3869	spin_lock(&block_group->lock);
3870	if (block_group->removed) {
3871		spin_unlock(&block_group->lock);
3872		return 0;
3873	}
3874	btrfs_freeze_block_group(block_group);
3875	spin_unlock(&block_group->lock);
3876
3877	ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
3878	if (ret)
3879		goto out;
3880
3881	ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
3882	div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
3883	/* If we ended in the middle of a bitmap, reset the trimming flag */
3884	if (rem)
3885		reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
3886out:
3887	btrfs_unfreeze_block_group(block_group);
3888	return ret;
3889}
3890
3891int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
3892				   u64 *trimmed, u64 start, u64 end, u64 minlen,
3893				   bool async)
3894{
3895	int ret;
3896
3897	*trimmed = 0;
3898
3899	spin_lock(&block_group->lock);
3900	if (block_group->removed) {
3901		spin_unlock(&block_group->lock);
3902		return 0;
3903	}
3904	btrfs_freeze_block_group(block_group);
3905	spin_unlock(&block_group->lock);
3906
3907	ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
3908	btrfs_unfreeze_block_group(block_group);
3909
3910	return ret;
3911}
3912
3913int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
3914				   u64 *trimmed, u64 start, u64 end, u64 minlen,
3915				   u64 maxlen, bool async)
3916{
3917	int ret;
3918
3919	*trimmed = 0;
3920
3921	spin_lock(&block_group->lock);
3922	if (block_group->removed) {
3923		spin_unlock(&block_group->lock);
3924		return 0;
3925	}
3926	btrfs_freeze_block_group(block_group);
3927	spin_unlock(&block_group->lock);
3928
3929	ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
3930			   async);
3931
3932	btrfs_unfreeze_block_group(block_group);
3933
3934	return ret;
3935}
3936
3937bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info)
3938{
3939	return btrfs_super_cache_generation(fs_info->super_copy);
3940}
3941
3942static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
3943				       struct btrfs_trans_handle *trans)
3944{
3945	struct btrfs_block_group *block_group;
3946	struct rb_node *node;
3947	int ret = 0;
3948
3949	btrfs_info(fs_info, "cleaning free space cache v1");
3950
3951	node = rb_first(&fs_info->block_group_cache_tree);
3952	while (node) {
3953		block_group = rb_entry(node, struct btrfs_block_group, cache_node);
3954		ret = btrfs_remove_free_space_inode(trans, NULL, block_group);
3955		if (ret)
3956			goto out;
3957		node = rb_next(node);
3958	}
3959out:
3960	return ret;
3961}
3962
3963int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active)
3964{
3965	struct btrfs_trans_handle *trans;
3966	int ret;
3967
3968	/*
3969	 * update_super_roots will appropriately set or unset
3970	 * super_copy->cache_generation based on SPACE_CACHE and
3971	 * BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a
3972	 * transaction commit whether we are enabling space cache v1 and don't
3973	 * have any other work to do, or are disabling it and removing free
3974	 * space inodes.
3975	 */
3976	trans = btrfs_start_transaction(fs_info->tree_root, 0);
3977	if (IS_ERR(trans))
3978		return PTR_ERR(trans);
3979
3980	if (!active) {
3981		set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
3982		ret = cleanup_free_space_cache_v1(fs_info, trans);
3983		if (ret) {
3984			btrfs_abort_transaction(trans, ret);
3985			btrfs_end_transaction(trans);
3986			goto out;
3987		}
3988	}
3989
3990	ret = btrfs_commit_transaction(trans);
3991out:
3992	clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
3993
3994	return ret;
3995}
3996
3997#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3998/*
3999 * Use this if you need to make a bitmap or extent entry specifically, it
4000 * doesn't do any of the merging that add_free_space does, this acts a lot like
4001 * how the free space cache loading stuff works, so you can get really weird
4002 * configurations.
4003 */
4004int test_add_free_space_entry(struct btrfs_block_group *cache,
4005			      u64 offset, u64 bytes, bool bitmap)
4006{
4007	struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4008	struct btrfs_free_space *info = NULL, *bitmap_info;
4009	void *map = NULL;
4010	enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
4011	u64 bytes_added;
4012	int ret;
4013
4014again:
4015	if (!info) {
4016		info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
4017		if (!info)
4018			return -ENOMEM;
4019	}
4020
4021	if (!bitmap) {
4022		spin_lock(&ctl->tree_lock);
4023		info->offset = offset;
4024		info->bytes = bytes;
4025		info->max_extent_size = 0;
4026		ret = link_free_space(ctl, info);
4027		spin_unlock(&ctl->tree_lock);
4028		if (ret)
4029			kmem_cache_free(btrfs_free_space_cachep, info);
4030		return ret;
4031	}
4032
4033	if (!map) {
4034		map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS);
4035		if (!map) {
4036			kmem_cache_free(btrfs_free_space_cachep, info);
4037			return -ENOMEM;
4038		}
4039	}
4040
4041	spin_lock(&ctl->tree_lock);
4042	bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4043					 1, 0);
4044	if (!bitmap_info) {
4045		info->bitmap = map;
4046		map = NULL;
4047		add_new_bitmap(ctl, info, offset);
4048		bitmap_info = info;
4049		info = NULL;
4050	}
4051
4052	bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
4053					  trim_state);
4054
4055	bytes -= bytes_added;
4056	offset += bytes_added;
4057	spin_unlock(&ctl->tree_lock);
4058
4059	if (bytes)
4060		goto again;
4061
4062	if (info)
4063		kmem_cache_free(btrfs_free_space_cachep, info);
4064	if (map)
4065		kmem_cache_free(btrfs_free_space_bitmap_cachep, map);
4066	return 0;
4067}
4068
4069/*
4070 * Checks to see if the given range is in the free space cache.  This is really
4071 * just used to check the absence of space, so if there is free space in the
4072 * range at all we will return 1.
4073 */
4074int test_check_exists(struct btrfs_block_group *cache,
4075		      u64 offset, u64 bytes)
4076{
4077	struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4078	struct btrfs_free_space *info;
4079	int ret = 0;
4080
4081	spin_lock(&ctl->tree_lock);
4082	info = tree_search_offset(ctl, offset, 0, 0);
4083	if (!info) {
4084		info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4085					  1, 0);
4086		if (!info)
4087			goto out;
4088	}
4089
4090have_info:
4091	if (info->bitmap) {
4092		u64 bit_off, bit_bytes;
4093		struct rb_node *n;
4094		struct btrfs_free_space *tmp;
4095
4096		bit_off = offset;
4097		bit_bytes = ctl->unit;
4098		ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
4099		if (!ret) {
4100			if (bit_off == offset) {
4101				ret = 1;
4102				goto out;
4103			} else if (bit_off > offset &&
4104				   offset + bytes > bit_off) {
4105				ret = 1;
4106				goto out;
4107			}
4108		}
4109
4110		n = rb_prev(&info->offset_index);
4111		while (n) {
4112			tmp = rb_entry(n, struct btrfs_free_space,
4113				       offset_index);
4114			if (tmp->offset + tmp->bytes < offset)
4115				break;
4116			if (offset + bytes < tmp->offset) {
4117				n = rb_prev(&tmp->offset_index);
4118				continue;
4119			}
4120			info = tmp;
4121			goto have_info;
4122		}
4123
4124		n = rb_next(&info->offset_index);
4125		while (n) {
4126			tmp = rb_entry(n, struct btrfs_free_space,
4127				       offset_index);
4128			if (offset + bytes < tmp->offset)
4129				break;
4130			if (tmp->offset + tmp->bytes < offset) {
4131				n = rb_next(&tmp->offset_index);
4132				continue;
4133			}
4134			info = tmp;
4135			goto have_info;
4136		}
4137
4138		ret = 0;
4139		goto out;
4140	}
4141
4142	if (info->offset == offset) {
4143		ret = 1;
4144		goto out;
4145	}
4146
4147	if (offset > info->offset && offset < info->offset + info->bytes)
4148		ret = 1;
4149out:
4150	spin_unlock(&ctl->tree_lock);
4151	return ret;
4152}
4153#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
4154