1// SPDX-License-Identifier: GPL-2.0+
2#include <common.h>
3#include <fs_internal.h>
4#include <log.h>
5#include <uuid.h>
6#include <memalign.h>
7#include "kernel-shared/btrfs_tree.h"
8#include "common/rbtree-utils.h"
9#include "disk-io.h"
10#include "ctree.h"
11#include "btrfs.h"
12#include "volumes.h"
13#include "extent-io.h"
14#include "crypto/hash.h"
15
16/* specified errno for check_tree_block */
17#define BTRFS_BAD_BYTENR		(-1)
18#define BTRFS_BAD_FSID			(-2)
19#define BTRFS_BAD_LEVEL			(-3)
20#define BTRFS_BAD_NRITEMS		(-4)
21
22/* Calculate max possible nritems for a leaf/node */
23static u32 max_nritems(u8 level, u32 nodesize)
24{
25
26	if (level == 0)
27		return ((nodesize - sizeof(struct btrfs_header)) /
28			sizeof(struct btrfs_item));
29	return ((nodesize - sizeof(struct btrfs_header)) /
30		sizeof(struct btrfs_key_ptr));
31}
32
33static int check_tree_block(struct btrfs_fs_info *fs_info,
34			    struct extent_buffer *buf)
35{
36
37	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
38	u32 nodesize = fs_info->nodesize;
39	bool fsid_match = false;
40	int ret = BTRFS_BAD_FSID;
41
42	if (buf->start != btrfs_header_bytenr(buf))
43		return BTRFS_BAD_BYTENR;
44	if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
45		return BTRFS_BAD_LEVEL;
46	if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
47						    nodesize))
48		return BTRFS_BAD_NRITEMS;
49
50	/* Only leaf can be empty */
51	if (btrfs_header_nritems(buf) == 0 &&
52	    btrfs_header_level(buf) != 0)
53		return BTRFS_BAD_NRITEMS;
54
55	while (fs_devices) {
56		/*
57		 * Checking the incompat flag is only valid for the current
58		 * fs. For seed devices it's forbidden to have their uuid
59		 * changed so reading ->fsid in this case is fine
60		 */
61		if (fs_devices == fs_info->fs_devices &&
62		    btrfs_fs_incompat(fs_info, METADATA_UUID))
63			fsid_match = !memcmp_extent_buffer(buf,
64						   fs_devices->metadata_uuid,
65						   btrfs_header_fsid(),
66						   BTRFS_FSID_SIZE);
67		else
68			fsid_match = !memcmp_extent_buffer(buf,
69						    fs_devices->fsid,
70						    btrfs_header_fsid(),
71						    BTRFS_FSID_SIZE);
72
73
74		if (fsid_match) {
75			ret = 0;
76			break;
77		}
78		fs_devices = fs_devices->seed;
79	}
80	return ret;
81}
82
83static void print_tree_block_error(struct btrfs_fs_info *fs_info,
84				struct extent_buffer *eb,
85				int err)
86{
87	char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
88	char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
89	u8 buf[BTRFS_UUID_SIZE];
90
91	if (!err)
92		return;
93
94	fprintf(stderr, "bad tree block %llu, ", eb->start);
95	switch (err) {
96	case BTRFS_BAD_FSID:
97		read_extent_buffer(eb, buf, btrfs_header_fsid(),
98				   BTRFS_UUID_SIZE);
99		uuid_unparse(buf, found_uuid);
100		uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
101		fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
102			fs_uuid, found_uuid);
103		break;
104	case BTRFS_BAD_BYTENR:
105		fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
106			eb->start, btrfs_header_bytenr(eb));
107		break;
108	case BTRFS_BAD_LEVEL:
109		fprintf(stderr, "bad level, %u > %d\n",
110			btrfs_header_level(eb), BTRFS_MAX_LEVEL);
111		break;
112	case BTRFS_BAD_NRITEMS:
113		fprintf(stderr, "invalid nr_items: %u\n",
114			btrfs_header_nritems(eb));
115		break;
116	}
117}
118
119int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len)
120{
121	memset(out, 0, BTRFS_CSUM_SIZE);
122
123	switch (csum_type) {
124	case BTRFS_CSUM_TYPE_CRC32:
125		return hash_crc32c(data, len, out);
126	case BTRFS_CSUM_TYPE_XXHASH:
127		return hash_xxhash(data, len, out);
128	case BTRFS_CSUM_TYPE_SHA256:
129		return hash_sha256(data, len, out);
130	case BTRFS_CSUM_TYPE_BLAKE2:
131		return hash_blake2(data, len, out);
132	default:
133		printf("Unknown csum type %d\n", csum_type);
134		return -EINVAL;
135	}
136}
137
138/*
139 * Check if the super is valid:
140 * - nodesize/sectorsize - minimum, maximum, alignment
141 * - tree block starts   - alignment
142 * - number of devices   - something sane
143 * - sys array size      - maximum
144 */
145static int btrfs_check_super(struct btrfs_super_block *sb)
146{
147	u8 result[BTRFS_CSUM_SIZE];
148	u16 csum_type;
149	int csum_size;
150	u8 *metadata_uuid;
151
152	if (btrfs_super_magic(sb) != BTRFS_MAGIC)
153		return -EIO;
154
155	csum_type = btrfs_super_csum_type(sb);
156	if (csum_type >= btrfs_super_num_csums()) {
157		error("unsupported checksum algorithm %u", csum_type);
158		return -EIO;
159	}
160	csum_size = btrfs_super_csum_size(sb);
161
162	btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
163			result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
164
165	if (memcmp(result, sb->csum, csum_size)) {
166		error("superblock checksum mismatch");
167		return -EIO;
168	}
169	if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
170		error("tree_root level too big: %d >= %d",
171			btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
172		goto error_out;
173	}
174	if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
175		error("chunk_root level too big: %d >= %d",
176			btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
177		goto error_out;
178	}
179	if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
180		error("log_root level too big: %d >= %d",
181			btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
182		goto error_out;
183	}
184
185	if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
186		error("tree_root block unaligned: %llu", btrfs_super_root(sb));
187		goto error_out;
188	}
189	if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
190		error("chunk_root block unaligned: %llu",
191			btrfs_super_chunk_root(sb));
192		goto error_out;
193	}
194	if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
195		error("log_root block unaligned: %llu",
196			btrfs_super_log_root(sb));
197		goto error_out;
198	}
199	if (btrfs_super_nodesize(sb) < 4096) {
200		error("nodesize too small: %u < 4096",
201			btrfs_super_nodesize(sb));
202		goto error_out;
203	}
204	if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
205		error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
206		goto error_out;
207	}
208	if (btrfs_super_sectorsize(sb) < 4096) {
209		error("sectorsize too small: %u < 4096",
210			btrfs_super_sectorsize(sb));
211		goto error_out;
212	}
213	if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
214		error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
215		goto error_out;
216	}
217	if (btrfs_super_total_bytes(sb) == 0) {
218		error("invalid total_bytes 0");
219		goto error_out;
220	}
221	if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
222		error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
223		goto error_out;
224	}
225	if ((btrfs_super_stripesize(sb) != 4096)
226		&& (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
227		error("invalid stripesize %u", btrfs_super_stripesize(sb));
228		goto error_out;
229	}
230
231	if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
232		metadata_uuid = sb->metadata_uuid;
233	else
234		metadata_uuid = sb->fsid;
235
236	if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
237		char fsid[BTRFS_UUID_UNPARSED_SIZE];
238		char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
239
240		uuid_unparse(sb->metadata_uuid, fsid);
241		uuid_unparse(sb->dev_item.fsid, dev_fsid);
242		error("dev_item UUID does not match fsid: %s != %s",
243			dev_fsid, fsid);
244		goto error_out;
245	}
246
247	/*
248	 * Hint to catch really bogus numbers, bitflips or so
249	 */
250	if (btrfs_super_num_devices(sb) > (1UL << 31)) {
251		error("suspicious number of devices: %llu",
252			btrfs_super_num_devices(sb));
253	}
254
255	if (btrfs_super_num_devices(sb) == 0) {
256		error("number of devices is 0");
257		goto error_out;
258	}
259
260	/*
261	 * Obvious sys_chunk_array corruptions, it must hold at least one key
262	 * and one chunk
263	 */
264	if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
265		error("system chunk array too big %u > %u",
266		      btrfs_super_sys_array_size(sb),
267		      BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
268		goto error_out;
269	}
270	if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
271			+ sizeof(struct btrfs_chunk)) {
272		error("system chunk array too small %u < %zu",
273		      btrfs_super_sys_array_size(sb),
274		      sizeof(struct btrfs_disk_key) +
275		      sizeof(struct btrfs_chunk));
276		goto error_out;
277	}
278
279	return 0;
280
281error_out:
282	error("superblock checksum matches but it has invalid members");
283	return -EIO;
284}
285
286/*
287 * btrfs_read_dev_super - read a valid primary superblock from a block device
288 * @desc,@part:	file descriptor of the device
289 * @sb:		buffer where the superblock is going to be read in
290 *
291 * Unlike the btrfs-progs/kernel version, here we ony care about the first
292 * super block, thus it's much simpler.
293 */
294int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
295			 struct btrfs_super_block *sb)
296{
297	ALLOC_CACHE_ALIGN_BUFFER(char, tmp, BTRFS_SUPER_INFO_SIZE);
298	struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
299	int ret;
300
301	ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE,
302			      BTRFS_SUPER_INFO_OFFSET);
303	if (ret < BTRFS_SUPER_INFO_SIZE)
304		return -EIO;
305
306	if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET)
307		return -EIO;
308
309	if (btrfs_check_super(buf))
310		return -EIO;
311
312	memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
313	return 0;
314}
315
316static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
317				  int verify, int silent, u16 csum_type)
318{
319	u8 result[BTRFS_CSUM_SIZE];
320	u32 len;
321
322	len = buf->len - BTRFS_CSUM_SIZE;
323	btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
324			result, len);
325
326	if (verify) {
327		if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
328			/* FIXME: format */
329			if (!silent)
330				printk("checksum verify failed on %llu found %08X wanted %08X\n",
331				       (unsigned long long)buf->start,
332				       result[0],
333				       buf->data[0]);
334			return 1;
335		}
336	} else {
337		write_extent_buffer(buf, result, 0, csum_size);
338	}
339	return 0;
340}
341
342int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
343			 u16 csum_type)
344{
345	return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
346}
347
348static int csum_tree_block(struct btrfs_fs_info *fs_info,
349			   struct extent_buffer *buf, int verify)
350{
351	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
352	u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
353
354	return csum_tree_block_size(buf, csum_size, verify, csum_type);
355}
356
357struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
358					    u64 bytenr, u32 blocksize)
359{
360	return find_extent_buffer(&fs_info->extent_cache,
361				  bytenr, blocksize);
362}
363
364struct extent_buffer* btrfs_find_create_tree_block(
365		struct btrfs_fs_info *fs_info, u64 bytenr)
366{
367	return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
368}
369
370static int verify_parent_transid(struct extent_io_tree *io_tree,
371				 struct extent_buffer *eb, u64 parent_transid,
372				 int ignore)
373{
374	int ret;
375
376	if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
377		return 0;
378
379	if (extent_buffer_uptodate(eb) &&
380	    btrfs_header_generation(eb) == parent_transid) {
381		ret = 0;
382		goto out;
383	}
384	printk("parent transid verify failed on %llu wanted %llu found %llu\n",
385	       (unsigned long long)eb->start,
386	       (unsigned long long)parent_transid,
387	       (unsigned long long)btrfs_header_generation(eb));
388	if (ignore) {
389		eb->flags |= EXTENT_BAD_TRANSID;
390		printk("Ignoring transid failure\n");
391		return 0;
392	}
393
394	ret = 1;
395out:
396	clear_extent_buffer_uptodate(eb);
397	return ret;
398
399}
400
401int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
402{
403	unsigned long offset = 0;
404	struct btrfs_multi_bio *multi = NULL;
405	struct btrfs_device *device;
406	int ret = 0;
407	u64 read_len;
408	unsigned long bytes_left = eb->len;
409
410	while (bytes_left) {
411		read_len = bytes_left;
412		device = NULL;
413
414		ret = btrfs_map_block(info, READ, eb->start + offset,
415				      &read_len, &multi, mirror, NULL);
416		if (ret) {
417			printk("Couldn't map the block %Lu\n", eb->start + offset);
418			kfree(multi);
419			return -EIO;
420		}
421		device = multi->stripes[0].dev;
422
423		if (!device->desc || !device->part) {
424			kfree(multi);
425			return -EIO;
426		}
427
428		if (read_len > bytes_left)
429			read_len = bytes_left;
430
431		ret = read_extent_from_disk(device->desc, device->part,
432					    multi->stripes[0].physical, eb,
433					    offset, read_len);
434		kfree(multi);
435		multi = NULL;
436
437		if (ret)
438			return -EIO;
439		offset += read_len;
440		bytes_left -= read_len;
441	}
442	return 0;
443}
444
445struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
446		u64 parent_transid)
447{
448	int ret;
449	struct extent_buffer *eb;
450	u64 best_transid = 0;
451	u32 sectorsize = fs_info->sectorsize;
452	int mirror_num = 1;
453	int good_mirror = 0;
454	int candidate_mirror = 0;
455	int num_copies;
456	int ignore = 0;
457
458	/*
459	 * Don't even try to create tree block for unaligned tree block
460	 * bytenr.
461	 * Such unaligned tree block will free overlapping extent buffer,
462	 * causing use-after-free bugs for fuzzed images.
463	 */
464	if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
465		error("tree block bytenr %llu is not aligned to sectorsize %u",
466		      bytenr, sectorsize);
467		return ERR_PTR(-EIO);
468	}
469
470	eb = btrfs_find_create_tree_block(fs_info, bytenr);
471	if (!eb)
472		return ERR_PTR(-ENOMEM);
473
474	if (btrfs_buffer_uptodate(eb, parent_transid))
475		return eb;
476
477	num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
478	while (1) {
479		ret = read_whole_eb(fs_info, eb, mirror_num);
480		if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
481		    check_tree_block(fs_info, eb) == 0 &&
482		    verify_parent_transid(&fs_info->extent_cache, eb,
483					  parent_transid, ignore) == 0) {
484			/*
485			 * check_tree_block() is less strict to allow btrfs
486			 * check to get raw eb with bad key order and fix it.
487			 * But we still need to try to get a good copy if
488			 * possible, or bad key order can go into tools like
489			 * btrfs ins dump-tree.
490			 */
491			if (btrfs_header_level(eb))
492				ret = btrfs_check_node(fs_info, NULL, eb);
493			else
494				ret = btrfs_check_leaf(fs_info, NULL, eb);
495			if (!ret || candidate_mirror == mirror_num) {
496				btrfs_set_buffer_uptodate(eb);
497				return eb;
498			}
499			if (candidate_mirror <= 0)
500				candidate_mirror = mirror_num;
501		}
502		if (ignore) {
503			if (candidate_mirror > 0) {
504				mirror_num = candidate_mirror;
505				continue;
506			}
507			if (check_tree_block(fs_info, eb))
508				print_tree_block_error(fs_info, eb,
509						check_tree_block(fs_info, eb));
510			else
511				fprintf(stderr, "Csum didn't match\n");
512			ret = -EIO;
513			break;
514		}
515		if (num_copies == 1) {
516			ignore = 1;
517			continue;
518		}
519		if (btrfs_header_generation(eb) > best_transid) {
520			best_transid = btrfs_header_generation(eb);
521			good_mirror = mirror_num;
522		}
523		mirror_num++;
524		if (mirror_num > num_copies) {
525			if (candidate_mirror > 0)
526				mirror_num = candidate_mirror;
527			else
528				mirror_num = good_mirror;
529			ignore = 1;
530			continue;
531		}
532	}
533	/*
534	 * We failed to read this tree block, it be should deleted right now
535	 * to avoid stale cache populate the cache.
536	 */
537	free_extent_buffer(eb);
538	return ERR_PTR(ret);
539}
540
541int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
542		     u64 *len, int mirror)
543{
544	u64 orig_len = *len;
545	u64 cur = logical;
546	struct btrfs_multi_bio *multi = NULL;
547	struct btrfs_device *device;
548	int ret = 0;
549
550	while (cur < logical + orig_len) {
551		u64 cur_len = logical + orig_len - cur;
552
553		ret = btrfs_map_block(fs_info, READ, cur, &cur_len, &multi,
554				      mirror, NULL);
555		if (ret) {
556			error("Couldn't map the block %llu", cur);
557			goto err;
558		}
559		device = multi->stripes[0].dev;
560		if (!device->desc || !device->part) {
561			error("devid %llu is missing", device->devid);
562			ret = -EIO;
563			goto err;
564		}
565		ret = __btrfs_devread(device->desc, device->part,
566				data + (cur - logical), cur_len,
567				multi->stripes[0].physical);
568		if (ret != cur_len) {
569			error("read failed on devid %llu physical %llu",
570			      device->devid, multi->stripes[0].physical);
571			ret = -EIO;
572			goto err;
573		}
574		cur += cur_len;
575		ret = 0;
576	}
577err:
578	kfree(multi);
579	return ret;
580}
581
582void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
583		      u64 objectid)
584{
585	root->node = NULL;
586	root->track_dirty = 0;
587
588	root->fs_info = fs_info;
589	root->objectid = objectid;
590	root->last_trans = 0;
591	root->last_inode_alloc = 0;
592
593	memset(&root->root_key, 0, sizeof(root->root_key));
594	memset(&root->root_item, 0, sizeof(root->root_item));
595	root->root_key.objectid = objectid;
596}
597
598static int find_and_setup_root(struct btrfs_root *tree_root,
599			       struct btrfs_fs_info *fs_info,
600			       u64 objectid, struct btrfs_root *root)
601{
602	int ret;
603	u64 generation;
604
605	btrfs_setup_root(root, fs_info, objectid);
606	ret = btrfs_find_last_root(tree_root, objectid,
607				   &root->root_item, &root->root_key);
608	if (ret)
609		return ret;
610
611	generation = btrfs_root_generation(&root->root_item);
612	root->node = read_tree_block(fs_info,
613			btrfs_root_bytenr(&root->root_item), generation);
614	if (!extent_buffer_uptodate(root->node))
615		return -EIO;
616
617	return 0;
618}
619
620int btrfs_free_fs_root(struct btrfs_root *root)
621{
622	if (root->node)
623		free_extent_buffer(root->node);
624	kfree(root);
625	return 0;
626}
627
628static void __free_fs_root(struct rb_node *node)
629{
630	struct btrfs_root *root;
631
632	root = container_of(node, struct btrfs_root, rb_node);
633	btrfs_free_fs_root(root);
634}
635
636FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
637
638struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
639					       struct btrfs_key *location)
640{
641	struct btrfs_root *root;
642	struct btrfs_root *tree_root = fs_info->tree_root;
643	struct btrfs_path *path;
644	struct extent_buffer *l;
645	u64 generation;
646	int ret = 0;
647
648	root = calloc(1, sizeof(*root));
649	if (!root)
650		return ERR_PTR(-ENOMEM);
651	if (location->offset == (u64)-1) {
652		ret = find_and_setup_root(tree_root, fs_info,
653					  location->objectid, root);
654		if (ret) {
655			free(root);
656			return ERR_PTR(ret);
657		}
658		goto insert;
659	}
660
661	btrfs_setup_root(root, fs_info,
662			 location->objectid);
663
664	path = btrfs_alloc_path();
665	if (!path) {
666		free(root);
667		return ERR_PTR(-ENOMEM);
668	}
669
670	ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
671	if (ret != 0) {
672		if (ret > 0)
673			ret = -ENOENT;
674		goto out;
675	}
676	l = path->nodes[0];
677	read_extent_buffer(l, &root->root_item,
678	       btrfs_item_ptr_offset(l, path->slots[0]),
679	       sizeof(root->root_item));
680	memcpy(&root->root_key, location, sizeof(*location));
681
682	/* If this root is already an orphan, no need to read */
683	if (btrfs_root_refs(&root->root_item) == 0) {
684		ret = -ENOENT;
685		goto out;
686	}
687	ret = 0;
688out:
689	btrfs_free_path(path);
690	if (ret) {
691		free(root);
692		return ERR_PTR(ret);
693	}
694	generation = btrfs_root_generation(&root->root_item);
695	root->node = read_tree_block(fs_info,
696			btrfs_root_bytenr(&root->root_item), generation);
697	if (!extent_buffer_uptodate(root->node)) {
698		free(root);
699		return ERR_PTR(-EIO);
700	}
701insert:
702	root->ref_cows = 1;
703	return root;
704}
705
706static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
707					    void *data)
708{
709	u64 objectid = *((u64 *)data);
710	struct btrfs_root *root;
711
712	root = rb_entry(node, struct btrfs_root, rb_node);
713	if (objectid > root->objectid)
714		return 1;
715	else if (objectid < root->objectid)
716		return -1;
717	else
718		return 0;
719}
720
721int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2)
722{
723	struct btrfs_root *root;
724
725	root = rb_entry(node2, struct btrfs_root, rb_node);
726	return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
727}
728
729struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
730				      struct btrfs_key *location)
731{
732	struct btrfs_root *root;
733	struct rb_node *node;
734	int ret;
735	u64 objectid = location->objectid;
736
737	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
738		return fs_info->tree_root;
739	if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
740		return fs_info->chunk_root;
741	if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
742		return fs_info->csum_root;
743	BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID);
744
745	node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
746			 btrfs_fs_roots_compare_objectids, NULL);
747	if (node)
748		return container_of(node, struct btrfs_root, rb_node);
749
750	root = btrfs_read_fs_root_no_cache(fs_info, location);
751	if (IS_ERR(root))
752		return root;
753
754	ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
755			btrfs_fs_roots_compare_roots);
756	BUG_ON(ret);
757	return root;
758}
759
760void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
761{
762	free(fs_info->tree_root);
763	free(fs_info->chunk_root);
764	free(fs_info->csum_root);
765	free(fs_info->super_copy);
766	free(fs_info);
767}
768
769struct btrfs_fs_info *btrfs_new_fs_info(void)
770{
771	struct btrfs_fs_info *fs_info;
772
773	fs_info = calloc(1, sizeof(struct btrfs_fs_info));
774	if (!fs_info)
775		return NULL;
776
777	fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
778	fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
779	fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
780	fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
781
782	if (!fs_info->tree_root || !fs_info->chunk_root ||
783	    !fs_info->csum_root || !fs_info->super_copy)
784		goto free_all;
785
786	extent_io_tree_init(&fs_info->extent_cache);
787
788	fs_info->fs_root_tree = RB_ROOT;
789	cache_tree_init(&fs_info->mapping_tree.cache_tree);
790
791	return fs_info;
792free_all:
793	btrfs_free_fs_info(fs_info);
794	return NULL;
795}
796
797static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
798				      struct btrfs_root *info_root,
799				      u64 objectid, char *str)
800{
801	struct btrfs_root *root = fs_info->tree_root;
802	int ret;
803
804	ret = find_and_setup_root(root, fs_info, objectid, info_root);
805	if (ret) {
806		error("could not setup %s tree", str);
807		return -EIO;
808	}
809
810	return 0;
811}
812
813static int get_default_subvolume(struct btrfs_fs_info *fs_info,
814				 struct btrfs_key *key_ret)
815{
816	struct btrfs_root *root = fs_info->tree_root;
817	struct btrfs_dir_item *dir_item;
818	struct btrfs_path path;
819	int ret = 0;
820
821	btrfs_init_path(&path);
822
823	dir_item = btrfs_lookup_dir_item(NULL, root, &path,
824					 BTRFS_ROOT_TREE_DIR_OBJECTID,
825					 "default", 7, 0);
826	if (IS_ERR(dir_item)) {
827		ret = PTR_ERR(dir_item);
828		goto out;
829	}
830
831	btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret);
832out:
833	btrfs_release_path(&path);
834	return ret;
835}
836
837int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info)
838{
839	struct btrfs_super_block *sb = fs_info->super_copy;
840	struct btrfs_root *root;
841	struct btrfs_key key;
842	u64 root_tree_bytenr;
843	u64 generation;
844	int ret;
845
846	root = fs_info->tree_root;
847	btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
848	generation = btrfs_super_generation(sb);
849
850	root_tree_bytenr = btrfs_super_root(sb);
851
852	root->node = read_tree_block(fs_info, root_tree_bytenr, generation);
853	if (!extent_buffer_uptodate(root->node)) {
854		fprintf(stderr, "Couldn't read tree root\n");
855		return -EIO;
856	}
857
858	ret = setup_root_or_create_block(fs_info, fs_info->csum_root,
859					 BTRFS_CSUM_TREE_OBJECTID, "csum");
860	if (ret)
861		return ret;
862	fs_info->csum_root->track_dirty = 1;
863
864	fs_info->last_trans_committed = generation;
865
866	ret = get_default_subvolume(fs_info, &key);
867	if (ret) {
868		/*
869		 * The default dir item isn't there. Linux kernel behaviour is
870		 * to silently use the top-level subvolume in this case.
871		 */
872		key.objectid = BTRFS_FS_TREE_OBJECTID;
873		key.type = BTRFS_ROOT_ITEM_KEY;
874		key.offset = (u64)-1;
875	}
876
877	fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
878
879	if (IS_ERR(fs_info->fs_root))
880		return -EIO;
881	return 0;
882}
883
884void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
885{
886	if (fs_info->csum_root)
887		free_extent_buffer(fs_info->csum_root->node);
888	if (fs_info->tree_root)
889		free_extent_buffer(fs_info->tree_root->node);
890	if (fs_info->chunk_root)
891		free_extent_buffer(fs_info->chunk_root->node);
892}
893
894static void free_map_lookup(struct cache_extent *ce)
895{
896	struct map_lookup *map;
897
898	map = container_of(ce, struct map_lookup, ce);
899	kfree(map);
900}
901
902FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
903
904void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
905{
906	free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
907	extent_io_tree_cleanup(&fs_info->extent_cache);
908}
909
910static int btrfs_scan_fs_devices(struct blk_desc *desc,
911				 struct disk_partition *part,
912				 struct btrfs_fs_devices **fs_devices)
913{
914	u64 total_devs;
915	int ret;
916
917	if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
918		     desc->blksz) > (part->size << desc->log2blksz)) {
919		log_debug("superblock end %u is larger than device size " LBAFU,
920			  BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
921			  part->size << desc->log2blksz);
922		return -EINVAL;
923	}
924
925	ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs);
926	if (ret) {
927		/*
928		 * Avoid showing this when probing for a possible Btrfs
929		 *
930		 * fprintf(stderr, "No valid Btrfs found\n");
931		 */
932		return ret;
933	}
934	return 0;
935}
936
937int btrfs_check_fs_compatibility(struct btrfs_super_block *sb)
938{
939	u64 features;
940
941	features = btrfs_super_incompat_flags(sb) &
942		   ~BTRFS_FEATURE_INCOMPAT_SUPP;
943	if (features) {
944		printk("couldn't open because of unsupported "
945		       "option features (%llx).\n",
946		       (unsigned long long)features);
947		return -ENOTSUPP;
948	}
949
950	features = btrfs_super_incompat_flags(sb);
951	if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
952		features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
953		btrfs_set_super_incompat_flags(sb, features);
954	}
955
956	return 0;
957}
958
959static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
960{
961	struct btrfs_super_block *sb = fs_info->super_copy;
962	u64 chunk_root_bytenr;
963	u64 generation;
964	int ret;
965
966	btrfs_setup_root(fs_info->chunk_root, fs_info,
967			BTRFS_CHUNK_TREE_OBJECTID);
968
969	ret = btrfs_read_sys_array(fs_info);
970	if (ret)
971		return ret;
972
973	generation = btrfs_super_chunk_root_generation(sb);
974	chunk_root_bytenr = btrfs_super_chunk_root(sb);
975
976	fs_info->chunk_root->node = read_tree_block(fs_info,
977						    chunk_root_bytenr,
978						    generation);
979	if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
980		error("cannot read chunk root");
981		return -EIO;
982	}
983
984	ret = btrfs_read_chunk_tree(fs_info);
985	if (ret) {
986		fprintf(stderr, "Couldn't read chunk tree\n");
987		return ret;
988	}
989	return 0;
990}
991
992struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
993					 struct disk_partition *part)
994{
995	struct btrfs_fs_info *fs_info;
996	struct btrfs_super_block *disk_super;
997	struct btrfs_fs_devices *fs_devices = NULL;
998	struct extent_buffer *eb;
999	int ret;
1000
1001	fs_info = btrfs_new_fs_info();
1002	if (!fs_info) {
1003		fprintf(stderr, "Failed to allocate memory for fs_info\n");
1004		return NULL;
1005	}
1006
1007	ret = btrfs_scan_fs_devices(desc, part, &fs_devices);
1008	if (ret)
1009		goto out;
1010
1011	fs_info->fs_devices = fs_devices;
1012
1013	ret = btrfs_open_devices(fs_devices);
1014	if (ret)
1015		goto out;
1016
1017	disk_super = fs_info->super_copy;
1018	ret = btrfs_read_dev_super(desc, part, disk_super);
1019	if (ret) {
1020		debug("No valid btrfs found\n");
1021		goto out_devices;
1022	}
1023
1024	if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) {
1025		fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
1026		goto out_devices;
1027	}
1028
1029	ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
1030	if (btrfs_fs_incompat(fs_info, METADATA_UUID))
1031		ASSERT(!memcmp(disk_super->metadata_uuid,
1032			       fs_devices->metadata_uuid, BTRFS_FSID_SIZE));
1033
1034	fs_info->sectorsize = btrfs_super_sectorsize(disk_super);
1035	fs_info->nodesize = btrfs_super_nodesize(disk_super);
1036	fs_info->stripesize = btrfs_super_stripesize(disk_super);
1037
1038	ret = btrfs_check_fs_compatibility(fs_info->super_copy);
1039	if (ret)
1040		goto out_devices;
1041
1042	ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
1043	if (ret)
1044		goto out_chunk;
1045
1046	/* Chunk tree root is unable to read, return directly */
1047	if (!fs_info->chunk_root)
1048		return fs_info;
1049
1050	eb = fs_info->chunk_root->node;
1051	read_extent_buffer(eb, fs_info->chunk_tree_uuid,
1052			   btrfs_header_chunk_tree_uuid(eb),
1053			   BTRFS_UUID_SIZE);
1054
1055	ret = btrfs_setup_all_roots(fs_info);
1056	if (ret)
1057		goto out_chunk;
1058
1059	return fs_info;
1060
1061out_chunk:
1062	btrfs_release_all_roots(fs_info);
1063	btrfs_cleanup_all_caches(fs_info);
1064out_devices:
1065	btrfs_close_devices(fs_devices);
1066out:
1067	btrfs_free_fs_info(fs_info);
1068	return NULL;
1069}
1070
1071int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
1072{
1073	int ret;
1074
1075	free_fs_roots_tree(&fs_info->fs_root_tree);
1076
1077	btrfs_release_all_roots(fs_info);
1078	ret = btrfs_close_devices(fs_info->fs_devices);
1079	btrfs_cleanup_all_caches(fs_info);
1080	btrfs_free_fs_info(fs_info);
1081	return ret;
1082}
1083
1084int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
1085{
1086	int ret;
1087
1088	ret = extent_buffer_uptodate(buf);
1089	if (!ret)
1090		return ret;
1091
1092	ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
1093				    parent_transid, 1);
1094	return !ret;
1095}
1096
1097int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
1098{
1099	return set_extent_buffer_uptodate(eb);
1100}
1101