1// SPDX-License-Identifier: GPL-2.0
2#include "bcachefs.h"
3#include "bbpos.h"
4#include "alloc_background.h"
5#include "backpointers.h"
6#include "bkey_buf.h"
7#include "btree_cache.h"
8#include "btree_update.h"
9#include "btree_update_interior.h"
10#include "btree_write_buffer.h"
11#include "checksum.h"
12#include "error.h"
13
14#include <linux/mm.h>
15
16static bool extent_matches_bp(struct bch_fs *c,
17			      enum btree_id btree_id, unsigned level,
18			      struct bkey_s_c k,
19			      struct bpos bucket,
20			      struct bch_backpointer bp)
21{
22	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
23	const union bch_extent_entry *entry;
24	struct extent_ptr_decoded p;
25
26	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
27		struct bpos bucket2;
28		struct bch_backpointer bp2;
29
30		if (p.ptr.cached)
31			continue;
32
33		bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
34		if (bpos_eq(bucket, bucket2) &&
35		    !memcmp(&bp, &bp2, sizeof(bp)))
36			return true;
37	}
38
39	return false;
40}
41
42int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
43			     enum bkey_invalid_flags flags,
44			     struct printbuf *err)
45{
46	struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
47
48	/* these will be caught by fsck */
49	if (!bch2_dev_exists2(c, bp.k->p.inode))
50		return 0;
51
52	struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
53	struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
54	int ret = 0;
55
56	bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
57			 !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
58			 c, err,
59			 backpointer_bucket_offset_wrong,
60			 "backpointer bucket_offset wrong");
61fsck_err:
62	return ret;
63}
64
65void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
66{
67	prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
68	       bch2_btree_id_str(bp->btree_id),
69	       bp->level,
70	       (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
71	       (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
72	       bp->bucket_len);
73	bch2_bpos_to_text(out, bp->pos);
74}
75
76void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
77{
78	if (bch2_dev_exists2(c, k.k->p.inode)) {
79		prt_str(out, "bucket=");
80		bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
81		prt_str(out, " ");
82	}
83
84	bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
85}
86
87void bch2_backpointer_swab(struct bkey_s k)
88{
89	struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
90
91	bp.v->bucket_offset	= swab40(bp.v->bucket_offset);
92	bp.v->bucket_len	= swab32(bp.v->bucket_len);
93	bch2_bpos_swab(&bp.v->pos);
94}
95
96static noinline int backpointer_mod_err(struct btree_trans *trans,
97					struct bch_backpointer bp,
98					struct bkey_s_c bp_k,
99					struct bkey_s_c orig_k,
100					bool insert)
101{
102	struct bch_fs *c = trans->c;
103	struct printbuf buf = PRINTBUF;
104
105	if (insert) {
106		prt_printf(&buf, "existing backpointer found when inserting ");
107		bch2_backpointer_to_text(&buf, &bp);
108		prt_newline(&buf);
109		printbuf_indent_add(&buf, 2);
110
111		prt_printf(&buf, "found ");
112		bch2_bkey_val_to_text(&buf, c, bp_k);
113		prt_newline(&buf);
114
115		prt_printf(&buf, "for ");
116		bch2_bkey_val_to_text(&buf, c, orig_k);
117
118		bch_err(c, "%s", buf.buf);
119	} else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
120		prt_printf(&buf, "backpointer not found when deleting");
121		prt_newline(&buf);
122		printbuf_indent_add(&buf, 2);
123
124		prt_printf(&buf, "searching for ");
125		bch2_backpointer_to_text(&buf, &bp);
126		prt_newline(&buf);
127
128		prt_printf(&buf, "got ");
129		bch2_bkey_val_to_text(&buf, c, bp_k);
130		prt_newline(&buf);
131
132		prt_printf(&buf, "for ");
133		bch2_bkey_val_to_text(&buf, c, orig_k);
134
135		bch_err(c, "%s", buf.buf);
136	}
137
138	printbuf_exit(&buf);
139
140	if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
141		return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
142	} else {
143		return 0;
144	}
145}
146
147int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
148				struct bpos bucket,
149				struct bch_backpointer bp,
150				struct bkey_s_c orig_k,
151				bool insert)
152{
153	struct btree_iter bp_iter;
154	struct bkey_s_c k;
155	struct bkey_i_backpointer *bp_k;
156	int ret;
157
158	bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
159	ret = PTR_ERR_OR_ZERO(bp_k);
160	if (ret)
161		return ret;
162
163	bkey_backpointer_init(&bp_k->k_i);
164	bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
165	bp_k->v = bp;
166
167	if (!insert) {
168		bp_k->k.type = KEY_TYPE_deleted;
169		set_bkey_val_u64s(&bp_k->k, 0);
170	}
171
172	k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
173			       bp_k->k.p,
174			       BTREE_ITER_INTENT|
175			       BTREE_ITER_SLOTS|
176			       BTREE_ITER_WITH_UPDATES);
177	ret = bkey_err(k);
178	if (ret)
179		goto err;
180
181	if (insert
182	    ? k.k->type
183	    : (k.k->type != KEY_TYPE_backpointer ||
184	       memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
185		ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
186		if (ret)
187			goto err;
188	}
189
190	ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
191err:
192	bch2_trans_iter_exit(trans, &bp_iter);
193	return ret;
194}
195
196/*
197 * Find the next backpointer >= *bp_offset:
198 */
199int bch2_get_next_backpointer(struct btree_trans *trans,
200			      struct bpos bucket, int gen,
201			      struct bpos *bp_pos,
202			      struct bch_backpointer *bp,
203			      unsigned iter_flags)
204{
205	struct bch_fs *c = trans->c;
206	struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
207	struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
208	struct bkey_s_c k;
209	int ret = 0;
210
211	if (bpos_ge(*bp_pos, bp_end_pos))
212		goto done;
213
214	if (gen >= 0) {
215		k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
216				       bucket, BTREE_ITER_CACHED|iter_flags);
217		ret = bkey_err(k);
218		if (ret)
219			goto out;
220
221		if (k.k->type != KEY_TYPE_alloc_v4 ||
222		    bkey_s_c_to_alloc_v4(k).v->gen != gen)
223			goto done;
224	}
225
226	*bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
227
228	for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
229				     *bp_pos, iter_flags, k, ret) {
230		if (bpos_ge(k.k->p, bp_end_pos))
231			break;
232
233		*bp_pos = k.k->p;
234		*bp = *bkey_s_c_to_backpointer(k).v;
235		goto out;
236	}
237done:
238	*bp_pos = SPOS_MAX;
239out:
240	bch2_trans_iter_exit(trans, &bp_iter);
241	bch2_trans_iter_exit(trans, &alloc_iter);
242	return ret;
243}
244
245static void backpointer_not_found(struct btree_trans *trans,
246				  struct bpos bp_pos,
247				  struct bch_backpointer bp,
248				  struct bkey_s_c k)
249{
250	struct bch_fs *c = trans->c;
251	struct printbuf buf = PRINTBUF;
252	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
253
254	/*
255	 * If we're using the btree write buffer, the backpointer we were
256	 * looking at may have already been deleted - failure to find what it
257	 * pointed to is not an error:
258	 */
259	if (likely(!bch2_backpointers_no_use_write_buffer))
260		return;
261
262	prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
263		   bp.level ? "btree node" : "extent");
264	prt_printf(&buf, "bucket: ");
265	bch2_bpos_to_text(&buf, bucket);
266	prt_printf(&buf, "\n  ");
267
268	prt_printf(&buf, "backpointer pos: ");
269	bch2_bpos_to_text(&buf, bp_pos);
270	prt_printf(&buf, "\n  ");
271
272	bch2_backpointer_to_text(&buf, &bp);
273	prt_printf(&buf, "\n  ");
274	bch2_bkey_val_to_text(&buf, c, k);
275	if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
276		bch_err_ratelimited(c, "%s", buf.buf);
277	else
278		bch2_trans_inconsistent(trans, "%s", buf.buf);
279
280	printbuf_exit(&buf);
281}
282
283struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
284					 struct btree_iter *iter,
285					 struct bpos bp_pos,
286					 struct bch_backpointer bp,
287					 unsigned iter_flags)
288{
289	if (likely(!bp.level)) {
290		struct bch_fs *c = trans->c;
291		struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
292		struct bkey_s_c k;
293
294		bch2_trans_node_iter_init(trans, iter,
295					  bp.btree_id,
296					  bp.pos,
297					  0, 0,
298					  iter_flags);
299		k = bch2_btree_iter_peek_slot(iter);
300		if (bkey_err(k)) {
301			bch2_trans_iter_exit(trans, iter);
302			return k;
303		}
304
305		if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
306			return k;
307
308		bch2_trans_iter_exit(trans, iter);
309		backpointer_not_found(trans, bp_pos, bp, k);
310		return bkey_s_c_null;
311	} else {
312		struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
313
314		if (IS_ERR_OR_NULL(b)) {
315			bch2_trans_iter_exit(trans, iter);
316			return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
317		}
318		return bkey_i_to_s_c(&b->key);
319	}
320}
321
322struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
323					struct btree_iter *iter,
324					struct bpos bp_pos,
325					struct bch_backpointer bp)
326{
327	struct bch_fs *c = trans->c;
328	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
329	struct btree *b;
330
331	BUG_ON(!bp.level);
332
333	bch2_trans_node_iter_init(trans, iter,
334				  bp.btree_id,
335				  bp.pos,
336				  0,
337				  bp.level - 1,
338				  0);
339	b = bch2_btree_iter_peek_node(iter);
340	if (IS_ERR_OR_NULL(b))
341		goto err;
342
343	BUG_ON(b->c.level != bp.level - 1);
344
345	if (extent_matches_bp(c, bp.btree_id, bp.level,
346			      bkey_i_to_s_c(&b->key),
347			      bucket, bp))
348		return b;
349
350	if (btree_node_will_make_reachable(b)) {
351		b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
352	} else {
353		backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
354		b = NULL;
355	}
356err:
357	bch2_trans_iter_exit(trans, iter);
358	return b;
359}
360
361static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
362					struct bkey_s_c k)
363{
364	struct bch_fs *c = trans->c;
365	struct btree_iter alloc_iter = { NULL };
366	struct bkey_s_c alloc_k;
367	struct printbuf buf = PRINTBUF;
368	int ret = 0;
369
370	if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
371			backpointer_to_missing_device,
372			"backpointer for missing device:\n%s",
373			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
374		ret = bch2_btree_delete_at(trans, bp_iter, 0);
375		goto out;
376	}
377
378	alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
379				     bp_pos_to_bucket(c, k.k->p), 0);
380	ret = bkey_err(alloc_k);
381	if (ret)
382		goto out;
383
384	if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
385			backpointer_to_missing_alloc,
386			"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
387			alloc_iter.pos.inode, alloc_iter.pos.offset,
388			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
389		ret = bch2_btree_delete_at(trans, bp_iter, 0);
390		goto out;
391	}
392out:
393fsck_err:
394	bch2_trans_iter_exit(trans, &alloc_iter);
395	printbuf_exit(&buf);
396	return ret;
397}
398
399/* verify that every backpointer has a corresponding alloc key */
400int bch2_check_btree_backpointers(struct bch_fs *c)
401{
402	int ret = bch2_trans_run(c,
403		for_each_btree_key_commit(trans, iter,
404			BTREE_ID_backpointers, POS_MIN, 0, k,
405			NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
406		  bch2_check_btree_backpointer(trans, &iter, k)));
407	bch_err_fn(c, ret);
408	return ret;
409}
410
411static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
412{
413	return bpos_eq(l.k->p, r.k->p) &&
414		bkey_bytes(l.k) == bkey_bytes(r.k) &&
415		!memcmp(l.v, r.v, bkey_val_bytes(l.k));
416}
417
418struct extents_to_bp_state {
419	struct bpos	bucket_start;
420	struct bpos	bucket_end;
421	struct bkey_buf last_flushed;
422};
423
424static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
425			       struct bkey_s_c extent, unsigned dev)
426{
427	struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
428	int ret = PTR_ERR_OR_ZERO(n);
429	if (ret)
430		return ret;
431
432	bch2_bkey_drop_device(bkey_i_to_s(n), dev);
433	return bch2_btree_insert_trans(trans, btree, n, 0);
434}
435
436static int check_extent_checksum(struct btree_trans *trans,
437				 enum btree_id btree, struct bkey_s_c extent,
438				 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
439{
440	struct bch_fs *c = trans->c;
441	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
442	const union bch_extent_entry *entry;
443	struct extent_ptr_decoded p;
444	struct printbuf buf = PRINTBUF;
445	void *data_buf = NULL;
446	struct bio *bio = NULL;
447	size_t bytes;
448	int ret = 0;
449
450	if (bkey_is_btree_ptr(extent.k))
451		return false;
452
453	bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
454		if (p.ptr.dev == dev)
455			goto found;
456	BUG();
457found:
458	if (!p.crc.csum_type)
459		return false;
460
461	bytes = p.crc.compressed_size << 9;
462
463	struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
464	if (!bch2_dev_get_ioref(ca, READ))
465		return false;
466
467	data_buf = kvmalloc(bytes, GFP_KERNEL);
468	if (!data_buf) {
469		ret = -ENOMEM;
470		goto err;
471	}
472
473	bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
474	bio->bi_iter.bi_sector = p.ptr.offset;
475	bch2_bio_map(bio, data_buf, bytes);
476	ret = submit_bio_wait(bio);
477	if (ret)
478		goto err;
479
480	prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
481	prt_printf(&buf, "\n  %s ", bch2_btree_id_str(btree));
482	bch2_bkey_val_to_text(&buf, c, extent);
483	prt_printf(&buf, "\n  %s ", bch2_btree_id_str(o_btree));
484	bch2_bkey_val_to_text(&buf, c, extent2);
485
486	struct nonce nonce = extent_nonce(extent.k->version, p.crc);
487	struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
488	if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
489			c, dup_backpointer_to_bad_csum_extent,
490			"%s", buf.buf))
491		ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
492fsck_err:
493err:
494	if (bio)
495		bio_put(bio);
496	kvfree(data_buf);
497	percpu_ref_put(&ca->io_ref);
498	printbuf_exit(&buf);
499	return ret;
500}
501
502static int check_bp_exists(struct btree_trans *trans,
503			   struct extents_to_bp_state *s,
504			   struct bpos bucket,
505			   struct bch_backpointer bp,
506			   struct bkey_s_c orig_k)
507{
508	struct bch_fs *c = trans->c;
509	struct btree_iter bp_iter = {};
510	struct btree_iter other_extent_iter = {};
511	struct printbuf buf = PRINTBUF;
512	struct bkey_s_c bp_k;
513	struct bkey_buf tmp;
514	int ret;
515
516	bch2_bkey_buf_init(&tmp);
517
518	if (!bch2_dev_bucket_exists(c, bucket)) {
519		prt_str(&buf, "extent for nonexistent device:bucket ");
520		bch2_bpos_to_text(&buf, bucket);
521		prt_str(&buf, "\n  ");
522		bch2_bkey_val_to_text(&buf, c, orig_k);
523		bch_err(c, "%s", buf.buf);
524		return -BCH_ERR_fsck_repair_unimplemented;
525	}
526
527	if (bpos_lt(bucket, s->bucket_start) ||
528	    bpos_gt(bucket, s->bucket_end))
529		return 0;
530
531	bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
532				  bucket_pos_to_bp(c, bucket, bp.bucket_offset),
533				  0);
534	ret = bkey_err(bp_k);
535	if (ret)
536		goto err;
537
538	if (bp_k.k->type != KEY_TYPE_backpointer ||
539	    memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
540		bch2_bkey_buf_reassemble(&tmp, c, orig_k);
541
542		if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
543			if (bp.level) {
544				bch2_trans_unlock(trans);
545				bch2_btree_interior_updates_flush(c);
546			}
547
548			ret = bch2_btree_write_buffer_flush_sync(trans);
549			if (ret)
550				goto err;
551
552			bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
553			ret = -BCH_ERR_transaction_restart_write_buffer_flush;
554			goto out;
555		}
556
557		goto check_existing_bp;
558	}
559out:
560err:
561fsck_err:
562	bch2_trans_iter_exit(trans, &other_extent_iter);
563	bch2_trans_iter_exit(trans, &bp_iter);
564	bch2_bkey_buf_exit(&tmp, c);
565	printbuf_exit(&buf);
566	return ret;
567check_existing_bp:
568	/* Do we have a backpointer for a different extent? */
569	if (bp_k.k->type != KEY_TYPE_backpointer)
570		goto missing;
571
572	struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
573
574	struct bkey_s_c other_extent =
575		bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
576	ret = bkey_err(other_extent);
577	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
578		ret = 0;
579	if (ret)
580		goto err;
581
582	if (!other_extent.k)
583		goto missing;
584
585	if (bch2_extents_match(orig_k, other_extent)) {
586		printbuf_reset(&buf);
587		prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n  ");
588		bch2_bkey_val_to_text(&buf, c, orig_k);
589		prt_str(&buf, "\n  ");
590		bch2_bkey_val_to_text(&buf, c, other_extent);
591		bch_err(c, "%s", buf.buf);
592
593		if (other_extent.k->size <= orig_k.k->size) {
594			ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
595			if (ret)
596				goto err;
597			goto out;
598		} else {
599			ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
600			if (ret)
601				goto err;
602			goto missing;
603		}
604	}
605
606	ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
607	if (ret < 0)
608		goto err;
609	if (ret) {
610		ret = 0;
611		goto missing;
612	}
613
614	ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
615	if (ret < 0)
616		goto err;
617	if (ret) {
618		ret = 0;
619		goto out;
620	}
621
622	printbuf_reset(&buf);
623	prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n  ", bucket.inode);
624	bch2_bkey_val_to_text(&buf, c, orig_k);
625	prt_str(&buf, "\n  ");
626	bch2_bkey_val_to_text(&buf, c, other_extent);
627	bch_err(c, "%s", buf.buf);
628	ret = -BCH_ERR_fsck_repair_unimplemented;
629	goto err;
630missing:
631	printbuf_reset(&buf);
632	prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
633	       bch2_btree_id_str(bp.btree_id), bp.level);
634	bch2_bkey_val_to_text(&buf, c, orig_k);
635	prt_printf(&buf, "\n  got:   ");
636	bch2_bkey_val_to_text(&buf, c, bp_k);
637
638	struct bkey_i_backpointer n_bp_k;
639	bkey_backpointer_init(&n_bp_k.k_i);
640	n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
641	n_bp_k.v = bp;
642	prt_printf(&buf, "\n  want:  ");
643	bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
644
645	if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
646		ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
647
648	goto out;
649}
650
651static int check_extent_to_backpointers(struct btree_trans *trans,
652					struct extents_to_bp_state *s,
653					enum btree_id btree, unsigned level,
654					struct bkey_s_c k)
655{
656	struct bch_fs *c = trans->c;
657	struct bkey_ptrs_c ptrs;
658	const union bch_extent_entry *entry;
659	struct extent_ptr_decoded p;
660	int ret;
661
662	ptrs = bch2_bkey_ptrs_c(k);
663	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
664		struct bpos bucket_pos;
665		struct bch_backpointer bp;
666
667		if (p.ptr.cached)
668			continue;
669
670		bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
671
672		ret = check_bp_exists(trans, s, bucket_pos, bp, k);
673		if (ret)
674			return ret;
675	}
676
677	return 0;
678}
679
680static int check_btree_root_to_backpointers(struct btree_trans *trans,
681					    struct extents_to_bp_state *s,
682					    enum btree_id btree_id,
683					    int *level)
684{
685	struct bch_fs *c = trans->c;
686	struct btree_iter iter;
687	struct btree *b;
688	struct bkey_s_c k;
689	int ret;
690retry:
691	bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
692				  0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
693	b = bch2_btree_iter_peek_node(&iter);
694	ret = PTR_ERR_OR_ZERO(b);
695	if (ret)
696		goto err;
697
698	if (b != btree_node_root(c, b)) {
699		bch2_trans_iter_exit(trans, &iter);
700		goto retry;
701	}
702
703	*level = b->c.level;
704
705	k = bkey_i_to_s_c(&b->key);
706	ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
707err:
708	bch2_trans_iter_exit(trans, &iter);
709	return ret;
710}
711
712static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
713{
714	return (struct bbpos) {
715		.btree	= bp.btree_id,
716		.pos	= bp.pos,
717	};
718}
719
720static u64 mem_may_pin_bytes(struct bch_fs *c)
721{
722	struct sysinfo i;
723	si_meminfo(&i);
724
725	u64 mem_bytes = i.totalram * i.mem_unit;
726	return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
727}
728
729static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
730{
731	return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
732}
733
734static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
735					u64 btree_leaf_mask,
736					u64 btree_interior_mask,
737					struct bbpos start, struct bbpos *end)
738{
739	struct bch_fs *c = trans->c;
740	s64 mem_may_pin = mem_may_pin_bytes(c);
741	int ret = 0;
742
743	btree_interior_mask |= btree_leaf_mask;
744
745	c->btree_cache.pinned_nodes_leaf_mask		= btree_leaf_mask;
746	c->btree_cache.pinned_nodes_interior_mask	= btree_interior_mask;
747	c->btree_cache.pinned_nodes_start		= start;
748	c->btree_cache.pinned_nodes_end			= *end = BBPOS_MAX;
749
750	for (enum btree_id btree = start.btree;
751	     btree < BTREE_ID_NR && !ret;
752	     btree++) {
753		unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
754		struct btree_iter iter;
755		struct btree *b;
756
757		if (!((1U << btree) & btree_leaf_mask) &&
758		    !((1U << btree) & btree_interior_mask))
759			continue;
760
761		__for_each_btree_node(trans, iter, btree,
762				      btree == start.btree ? start.pos : POS_MIN,
763				      0, depth, BTREE_ITER_PREFETCH, b, ret) {
764			mem_may_pin -= btree_buf_bytes(b);
765			if (mem_may_pin <= 0) {
766				c->btree_cache.pinned_nodes_end = *end =
767					BBPOS(btree, b->key.k.p);
768				bch2_trans_iter_exit(trans, &iter);
769				return 0;
770			}
771		}
772		bch2_trans_iter_exit(trans, &iter);
773	}
774
775	return ret;
776}
777
778static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
779						   struct extents_to_bp_state *s)
780{
781	struct bch_fs *c = trans->c;
782	int ret = 0;
783
784	for (enum btree_id btree_id = 0;
785	     btree_id < btree_id_nr_alive(c);
786	     btree_id++) {
787		int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
788
789		ret = commit_do(trans, NULL, NULL,
790				BCH_TRANS_COMMIT_no_enospc,
791				check_btree_root_to_backpointers(trans, s, btree_id, &level));
792		if (ret)
793			return ret;
794
795		while (level >= depth) {
796			struct btree_iter iter;
797			bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
798						  level,
799						  BTREE_ITER_PREFETCH);
800			while (1) {
801				bch2_trans_begin(trans);
802
803				struct bkey_s_c k = bch2_btree_iter_peek(&iter);
804				if (!k.k)
805					break;
806				ret = bkey_err(k) ?:
807					check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
808					bch2_trans_commit(trans, NULL, NULL,
809							  BCH_TRANS_COMMIT_no_enospc);
810				if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
811					ret = 0;
812					continue;
813				}
814				if (ret)
815					break;
816				if (bpos_eq(iter.pos, SPOS_MAX))
817					break;
818				bch2_btree_iter_advance(&iter);
819			}
820			bch2_trans_iter_exit(trans, &iter);
821
822			if (ret)
823				return ret;
824
825			--level;
826		}
827	}
828
829	return 0;
830}
831
832int bch2_check_extents_to_backpointers(struct bch_fs *c)
833{
834	struct btree_trans *trans = bch2_trans_get(c);
835	struct extents_to_bp_state s = { .bucket_start = POS_MIN };
836	int ret;
837
838	bch2_bkey_buf_init(&s.last_flushed);
839	bkey_init(&s.last_flushed.k->k);
840
841	while (1) {
842		struct bbpos end;
843		ret = bch2_get_btree_in_memory_pos(trans,
844				BIT_ULL(BTREE_ID_backpointers),
845				BIT_ULL(BTREE_ID_backpointers),
846				BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
847		if (ret)
848			break;
849
850		s.bucket_end = end.pos;
851
852		if ( bpos_eq(s.bucket_start, POS_MIN) &&
853		    !bpos_eq(s.bucket_end, SPOS_MAX))
854			bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
855				    __func__, btree_nodes_fit_in_ram(c));
856
857		if (!bpos_eq(s.bucket_start, POS_MIN) ||
858		    !bpos_eq(s.bucket_end, SPOS_MAX)) {
859			struct printbuf buf = PRINTBUF;
860
861			prt_str(&buf, "check_extents_to_backpointers(): ");
862			bch2_bpos_to_text(&buf, s.bucket_start);
863			prt_str(&buf, "-");
864			bch2_bpos_to_text(&buf, s.bucket_end);
865
866			bch_verbose(c, "%s", buf.buf);
867			printbuf_exit(&buf);
868		}
869
870		ret = bch2_check_extents_to_backpointers_pass(trans, &s);
871		if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
872			break;
873
874		s.bucket_start = bpos_successor(s.bucket_end);
875	}
876	bch2_trans_put(trans);
877	bch2_bkey_buf_exit(&s.last_flushed, c);
878
879	c->btree_cache.pinned_nodes_leaf_mask = 0;
880	c->btree_cache.pinned_nodes_interior_mask = 0;
881
882	bch_err_fn(c, ret);
883	return ret;
884}
885
886static int check_one_backpointer(struct btree_trans *trans,
887				 struct bbpos start,
888				 struct bbpos end,
889				 struct bkey_s_c_backpointer bp,
890				 struct bpos *last_flushed_pos)
891{
892	struct bch_fs *c = trans->c;
893	struct btree_iter iter;
894	struct bbpos pos = bp_to_bbpos(*bp.v);
895	struct bkey_s_c k;
896	struct printbuf buf = PRINTBUF;
897	int ret;
898
899	if (bbpos_cmp(pos, start) < 0 ||
900	    bbpos_cmp(pos, end) > 0)
901		return 0;
902
903	k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
904	ret = bkey_err(k);
905	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
906		return 0;
907	if (ret)
908		return ret;
909
910	if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
911		*last_flushed_pos = bp.k->p;
912		ret = bch2_btree_write_buffer_flush_sync(trans) ?:
913			-BCH_ERR_transaction_restart_write_buffer_flush;
914		goto out;
915	}
916
917	if (fsck_err_on(!k.k, c,
918			backpointer_to_missing_ptr,
919			"backpointer for missing %s\n  %s",
920			bp.v->level ? "btree node" : "extent",
921			(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
922		ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
923		goto out;
924	}
925out:
926fsck_err:
927	bch2_trans_iter_exit(trans, &iter);
928	printbuf_exit(&buf);
929	return ret;
930}
931
932static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
933						   struct bbpos start,
934						   struct bbpos end)
935{
936	struct bpos last_flushed_pos = SPOS_MAX;
937
938	return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
939				  POS_MIN, BTREE_ITER_PREFETCH, k,
940				  NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
941		check_one_backpointer(trans, start, end,
942				      bkey_s_c_to_backpointer(k),
943				      &last_flushed_pos));
944}
945
946int bch2_check_backpointers_to_extents(struct bch_fs *c)
947{
948	struct btree_trans *trans = bch2_trans_get(c);
949	struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
950	int ret;
951
952	while (1) {
953		ret = bch2_get_btree_in_memory_pos(trans,
954						   (1U << BTREE_ID_extents)|
955						   (1U << BTREE_ID_reflink),
956						   ~0,
957						   start, &end);
958		if (ret)
959			break;
960
961		if (!bbpos_cmp(start, BBPOS_MIN) &&
962		    bbpos_cmp(end, BBPOS_MAX))
963			bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
964				    __func__, btree_nodes_fit_in_ram(c));
965
966		if (bbpos_cmp(start, BBPOS_MIN) ||
967		    bbpos_cmp(end, BBPOS_MAX)) {
968			struct printbuf buf = PRINTBUF;
969
970			prt_str(&buf, "check_backpointers_to_extents(): ");
971			bch2_bbpos_to_text(&buf, start);
972			prt_str(&buf, "-");
973			bch2_bbpos_to_text(&buf, end);
974
975			bch_verbose(c, "%s", buf.buf);
976			printbuf_exit(&buf);
977		}
978
979		ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
980		if (ret || !bbpos_cmp(end, BBPOS_MAX))
981			break;
982
983		start = bbpos_successor(end);
984	}
985	bch2_trans_put(trans);
986
987	c->btree_cache.pinned_nodes_leaf_mask = 0;
988	c->btree_cache.pinned_nodes_interior_mask = 0;
989
990	bch_err_fn(c, ret);
991	return ret;
992}
993