1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_btree_staging.h"
15#include "xfs_bit.h"
16#include "xfs_log_format.h"
17#include "xfs_trans.h"
18#include "xfs_sb.h"
19#include "xfs_inode.h"
20#include "xfs_inode_fork.h"
21#include "xfs_alloc.h"
22#include "xfs_rtalloc.h"
23#include "xfs_bmap.h"
24#include "xfs_bmap_util.h"
25#include "xfs_bmap_btree.h"
26#include "xfs_rmap.h"
27#include "xfs_rmap_btree.h"
28#include "xfs_refcount.h"
29#include "xfs_quota.h"
30#include "xfs_ialloc.h"
31#include "xfs_ag.h"
32#include "xfs_reflink.h"
33#include "scrub/xfs_scrub.h"
34#include "scrub/scrub.h"
35#include "scrub/common.h"
36#include "scrub/btree.h"
37#include "scrub/trace.h"
38#include "scrub/repair.h"
39#include "scrub/bitmap.h"
40#include "scrub/fsb_bitmap.h"
41#include "scrub/xfile.h"
42#include "scrub/xfarray.h"
43#include "scrub/newbt.h"
44#include "scrub/reap.h"
45
46/*
47 * Inode Fork Block Mapping (BMBT) Repair
48 * ======================================
49 *
50 * Gather all the rmap records for the inode and fork we're fixing, reset the
51 * incore fork, then recreate the btree.
52 */
53
54enum reflink_scan_state {
55	RLS_IRRELEVANT = -1,	/* not applicable to this file */
56	RLS_UNKNOWN,		/* shared extent scans required */
57	RLS_SET_IFLAG,		/* iflag must be set */
58};
59
60struct xrep_bmap {
61	/* Old bmbt blocks */
62	struct xfsb_bitmap	old_bmbt_blocks;
63
64	/* New fork. */
65	struct xrep_newbt	new_bmapbt;
66
67	/* List of new bmap records. */
68	struct xfarray		*bmap_records;
69
70	struct xfs_scrub	*sc;
71
72	/* How many blocks did we find allocated to this file? */
73	xfs_rfsblock_t		nblocks;
74
75	/* How many bmbt blocks did we find for this fork? */
76	xfs_rfsblock_t		old_bmbt_block_count;
77
78	/* get_records()'s position in the free space record array. */
79	xfarray_idx_t		array_cur;
80
81	/* How many real (non-hole, non-delalloc) mappings do we have? */
82	uint64_t		real_mappings;
83
84	/* Which fork are we fixing? */
85	int			whichfork;
86
87	/* What d the REFLINK flag be set when the repair is over? */
88	enum reflink_scan_state	reflink_scan;
89
90	/* Do we allow unwritten extents? */
91	bool			allow_unwritten;
92};
93
94/* Is this space extent shared?  Flag the inode if it is. */
95STATIC int
96xrep_bmap_discover_shared(
97	struct xrep_bmap	*rb,
98	xfs_fsblock_t		startblock,
99	xfs_filblks_t		blockcount)
100{
101	struct xfs_scrub	*sc = rb->sc;
102	xfs_agblock_t		agbno;
103	xfs_agblock_t		fbno;
104	xfs_extlen_t		flen;
105	int			error;
106
107	agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock);
108	error = xfs_refcount_find_shared(sc->sa.refc_cur, agbno, blockcount,
109			&fbno, &flen, false);
110	if (error)
111		return error;
112
113	if (fbno != NULLAGBLOCK)
114		rb->reflink_scan = RLS_SET_IFLAG;
115
116	return 0;
117}
118
119/* Remember this reverse-mapping as a series of bmap records. */
120STATIC int
121xrep_bmap_from_rmap(
122	struct xrep_bmap	*rb,
123	xfs_fileoff_t		startoff,
124	xfs_fsblock_t		startblock,
125	xfs_filblks_t		blockcount,
126	bool			unwritten)
127{
128	struct xfs_bmbt_irec	irec = {
129		.br_startoff	= startoff,
130		.br_startblock	= startblock,
131		.br_state	= unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM,
132	};
133	struct xfs_bmbt_rec	rbe;
134	struct xfs_scrub	*sc = rb->sc;
135	int			error = 0;
136
137	/*
138	 * If we're repairing the data fork of a non-reflinked regular file on
139	 * a reflink filesystem, we need to figure out if this space extent is
140	 * shared.
141	 */
142	if (rb->reflink_scan == RLS_UNKNOWN && !unwritten) {
143		error = xrep_bmap_discover_shared(rb, startblock, blockcount);
144		if (error)
145			return error;
146	}
147
148	do {
149		xfs_failaddr_t	fa;
150
151		irec.br_blockcount = min_t(xfs_filblks_t, blockcount,
152				XFS_MAX_BMBT_EXTLEN);
153
154		fa = xfs_bmap_validate_extent(sc->ip, rb->whichfork, &irec);
155		if (fa)
156			return -EFSCORRUPTED;
157
158		xfs_bmbt_disk_set_all(&rbe, &irec);
159
160		trace_xrep_bmap_found(sc->ip, rb->whichfork, &irec);
161
162		if (xchk_should_terminate(sc, &error))
163			return error;
164
165		error = xfarray_append(rb->bmap_records, &rbe);
166		if (error)
167			return error;
168
169		rb->real_mappings++;
170
171		irec.br_startblock += irec.br_blockcount;
172		irec.br_startoff += irec.br_blockcount;
173		blockcount -= irec.br_blockcount;
174	} while (blockcount > 0);
175
176	return 0;
177}
178
179/* Check for any obvious errors or conflicts in the file mapping. */
180STATIC int
181xrep_bmap_check_fork_rmap(
182	struct xrep_bmap		*rb,
183	struct xfs_btree_cur		*cur,
184	const struct xfs_rmap_irec	*rec)
185{
186	struct xfs_scrub		*sc = rb->sc;
187	enum xbtree_recpacking		outcome;
188	int				error;
189
190	/*
191	 * Data extents for rt files are never stored on the data device, but
192	 * everything else (xattrs, bmbt blocks) can be.
193	 */
194	if (XFS_IS_REALTIME_INODE(sc->ip) &&
195	    !(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))
196		return -EFSCORRUPTED;
197
198	/* Check that this is within the AG. */
199	if (!xfs_verify_agbext(cur->bc_ag.pag, rec->rm_startblock,
200				rec->rm_blockcount))
201		return -EFSCORRUPTED;
202
203	/* Check the file offset range. */
204	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
205	    !xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
206		return -EFSCORRUPTED;
207
208	/* No contradictory flags. */
209	if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) &&
210	    (rec->rm_flags & XFS_RMAP_UNWRITTEN))
211		return -EFSCORRUPTED;
212
213	/* Make sure this isn't free space. */
214	error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
215			rec->rm_blockcount, &outcome);
216	if (error)
217		return error;
218	if (outcome != XBTREE_RECPACKING_EMPTY)
219		return -EFSCORRUPTED;
220
221	/* Must not be an inode chunk. */
222	error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
223			rec->rm_startblock, rec->rm_blockcount, &outcome);
224	if (error)
225		return error;
226	if (outcome != XBTREE_RECPACKING_EMPTY)
227		return -EFSCORRUPTED;
228
229	return 0;
230}
231
232/* Record extents that belong to this inode's fork. */
233STATIC int
234xrep_bmap_walk_rmap(
235	struct xfs_btree_cur		*cur,
236	const struct xfs_rmap_irec	*rec,
237	void				*priv)
238{
239	struct xrep_bmap		*rb = priv;
240	struct xfs_mount		*mp = cur->bc_mp;
241	xfs_fsblock_t			fsbno;
242	int				error = 0;
243
244	if (xchk_should_terminate(rb->sc, &error))
245		return error;
246
247	if (rec->rm_owner != rb->sc->ip->i_ino)
248		return 0;
249
250	error = xrep_bmap_check_fork_rmap(rb, cur, rec);
251	if (error)
252		return error;
253
254	/*
255	 * Record all blocks allocated to this file even if the extent isn't
256	 * for the fork we're rebuilding so that we can reset di_nblocks later.
257	 */
258	rb->nblocks += rec->rm_blockcount;
259
260	/* If this rmap isn't for the fork we want, we're done. */
261	if (rb->whichfork == XFS_DATA_FORK &&
262	    (rec->rm_flags & XFS_RMAP_ATTR_FORK))
263		return 0;
264	if (rb->whichfork == XFS_ATTR_FORK &&
265	    !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
266		return 0;
267
268	/* Reject unwritten extents if we don't allow those. */
269	if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten)
270		return -EFSCORRUPTED;
271
272	fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
273			rec->rm_startblock);
274
275	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
276		rb->old_bmbt_block_count += rec->rm_blockcount;
277		return xfsb_bitmap_set(&rb->old_bmbt_blocks, fsbno,
278				rec->rm_blockcount);
279	}
280
281	return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno,
282			rec->rm_blockcount,
283			rec->rm_flags & XFS_RMAP_UNWRITTEN);
284}
285
286/*
287 * Compare two block mapping records.  We want to sort in order of increasing
288 * file offset.
289 */
290static int
291xrep_bmap_extent_cmp(
292	const void			*a,
293	const void			*b)
294{
295	const struct xfs_bmbt_rec	*ba = a;
296	const struct xfs_bmbt_rec	*bb = b;
297	xfs_fileoff_t			ao = xfs_bmbt_disk_get_startoff(ba);
298	xfs_fileoff_t			bo = xfs_bmbt_disk_get_startoff(bb);
299
300	if (ao > bo)
301		return 1;
302	else if (ao < bo)
303		return -1;
304	return 0;
305}
306
307/*
308 * Sort the bmap extents by fork offset or else the records will be in the
309 * wrong order.  Ensure there are no overlaps in the file offset ranges.
310 */
311STATIC int
312xrep_bmap_sort_records(
313	struct xrep_bmap	*rb)
314{
315	struct xfs_bmbt_irec	irec;
316	xfs_fileoff_t		next_off = 0;
317	xfarray_idx_t		array_cur;
318	int			error;
319
320	error = xfarray_sort(rb->bmap_records, xrep_bmap_extent_cmp,
321			XFARRAY_SORT_KILLABLE);
322	if (error)
323		return error;
324
325	foreach_xfarray_idx(rb->bmap_records, array_cur) {
326		struct xfs_bmbt_rec	rec;
327
328		if (xchk_should_terminate(rb->sc, &error))
329			return error;
330
331		error = xfarray_load(rb->bmap_records, array_cur, &rec);
332		if (error)
333			return error;
334
335		xfs_bmbt_disk_get_all(&rec, &irec);
336
337		if (irec.br_startoff < next_off)
338			return -EFSCORRUPTED;
339
340		next_off = irec.br_startoff + irec.br_blockcount;
341	}
342
343	return 0;
344}
345
346/* Scan one AG for reverse mappings that we can turn into extent maps. */
347STATIC int
348xrep_bmap_scan_ag(
349	struct xrep_bmap	*rb,
350	struct xfs_perag	*pag)
351{
352	struct xfs_scrub	*sc = rb->sc;
353	int			error;
354
355	error = xrep_ag_init(sc, pag, &sc->sa);
356	if (error)
357		return error;
358
359	error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_bmap_walk_rmap, rb);
360	xchk_ag_free(sc, &sc->sa);
361	return error;
362}
363
364/* Find the delalloc extents from the old incore extent tree. */
365STATIC int
366xrep_bmap_find_delalloc(
367	struct xrep_bmap	*rb)
368{
369	struct xfs_bmbt_irec	irec;
370	struct xfs_iext_cursor	icur;
371	struct xfs_bmbt_rec	rbe;
372	struct xfs_inode	*ip = rb->sc->ip;
373	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, rb->whichfork);
374	int			error = 0;
375
376	/*
377	 * Skip this scan if we don't expect to find delayed allocation
378	 * reservations in this fork.
379	 */
380	if (rb->whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0)
381		return 0;
382
383	for_each_xfs_iext(ifp, &icur, &irec) {
384		if (!isnullstartblock(irec.br_startblock))
385			continue;
386
387		xfs_bmbt_disk_set_all(&rbe, &irec);
388
389		trace_xrep_bmap_found(ip, rb->whichfork, &irec);
390
391		if (xchk_should_terminate(rb->sc, &error))
392			return error;
393
394		error = xfarray_append(rb->bmap_records, &rbe);
395		if (error)
396			return error;
397	}
398
399	return 0;
400}
401
402/*
403 * Collect block mappings for this fork of this inode and decide if we have
404 * enough space to rebuild.  Caller is responsible for cleaning up the list if
405 * anything goes wrong.
406 */
407STATIC int
408xrep_bmap_find_mappings(
409	struct xrep_bmap	*rb)
410{
411	struct xfs_scrub	*sc = rb->sc;
412	struct xfs_perag	*pag;
413	xfs_agnumber_t		agno;
414	int			error = 0;
415
416	/* Iterate the rmaps for extents. */
417	for_each_perag(sc->mp, agno, pag) {
418		error = xrep_bmap_scan_ag(rb, pag);
419		if (error) {
420			xfs_perag_rele(pag);
421			return error;
422		}
423	}
424
425	return xrep_bmap_find_delalloc(rb);
426}
427
428/* Retrieve real extent mappings for bulk loading the bmap btree. */
429STATIC int
430xrep_bmap_get_records(
431	struct xfs_btree_cur	*cur,
432	unsigned int		idx,
433	struct xfs_btree_block	*block,
434	unsigned int		nr_wanted,
435	void			*priv)
436{
437	struct xfs_bmbt_rec	rec;
438	struct xfs_bmbt_irec	*irec = &cur->bc_rec.b;
439	struct xrep_bmap	*rb = priv;
440	union xfs_btree_rec	*block_rec;
441	unsigned int		loaded;
442	int			error;
443
444	for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
445		do {
446			error = xfarray_load(rb->bmap_records, rb->array_cur++,
447					&rec);
448			if (error)
449				return error;
450
451			xfs_bmbt_disk_get_all(&rec, irec);
452		} while (isnullstartblock(irec->br_startblock));
453
454		block_rec = xfs_btree_rec_addr(cur, idx, block);
455		cur->bc_ops->init_rec_from_cur(cur, block_rec);
456	}
457
458	return loaded;
459}
460
461/* Feed one of the new btree blocks to the bulk loader. */
462STATIC int
463xrep_bmap_claim_block(
464	struct xfs_btree_cur	*cur,
465	union xfs_btree_ptr	*ptr,
466	void			*priv)
467{
468	struct xrep_bmap        *rb = priv;
469
470	return xrep_newbt_claim_block(cur, &rb->new_bmapbt, ptr);
471}
472
473/* Figure out how much space we need to create the incore btree root block. */
474STATIC size_t
475xrep_bmap_iroot_size(
476	struct xfs_btree_cur	*cur,
477	unsigned int		level,
478	unsigned int		nr_this_level,
479	void			*priv)
480{
481	ASSERT(level > 0);
482
483	return XFS_BMAP_BROOT_SPACE_CALC(cur->bc_mp, nr_this_level);
484}
485
486/* Update the inode counters. */
487STATIC int
488xrep_bmap_reset_counters(
489	struct xrep_bmap	*rb)
490{
491	struct xfs_scrub	*sc = rb->sc;
492	struct xbtree_ifakeroot	*ifake = &rb->new_bmapbt.ifake;
493	int64_t			delta;
494
495	if (rb->reflink_scan == RLS_SET_IFLAG)
496		sc->ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
497
498	/*
499	 * Update the inode block counts to reflect the extents we found in the
500	 * rmapbt.
501	 */
502	delta = ifake->if_blocks - rb->old_bmbt_block_count;
503	sc->ip->i_nblocks = rb->nblocks + delta;
504	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
505
506	/*
507	 * Adjust the quota counts by the difference in size between the old
508	 * and new bmbt.
509	 */
510	xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT, delta);
511	return 0;
512}
513
514/*
515 * Create a new iext tree and load it with block mappings.  If the inode is
516 * in extents format, that's all we need to do to commit the new mappings.
517 * If it is in btree format, this takes care of preloading the incore tree.
518 */
519STATIC int
520xrep_bmap_extents_load(
521	struct xrep_bmap	*rb)
522{
523	struct xfs_iext_cursor	icur;
524	struct xfs_bmbt_irec	irec;
525	struct xfs_ifork	*ifp = rb->new_bmapbt.ifake.if_fork;
526	xfarray_idx_t		array_cur;
527	int			error;
528
529	ASSERT(ifp->if_bytes == 0);
530
531	/* Add all the mappings (incl. delalloc) to the incore extent tree. */
532	xfs_iext_first(ifp, &icur);
533	foreach_xfarray_idx(rb->bmap_records, array_cur) {
534		struct xfs_bmbt_rec	rec;
535
536		error = xfarray_load(rb->bmap_records, array_cur, &rec);
537		if (error)
538			return error;
539
540		xfs_bmbt_disk_get_all(&rec, &irec);
541
542		xfs_iext_insert_raw(ifp, &icur, &irec);
543		if (!isnullstartblock(irec.br_startblock))
544			ifp->if_nextents++;
545
546		xfs_iext_next(ifp, &icur);
547	}
548
549	return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork,
550			ifp->if_nextents);
551}
552
553/*
554 * Reserve new btree blocks, bulk load the bmap records into the ondisk btree,
555 * and load the incore extent tree.
556 */
557STATIC int
558xrep_bmap_btree_load(
559	struct xrep_bmap	*rb,
560	struct xfs_btree_cur	*bmap_cur)
561{
562	struct xfs_scrub	*sc = rb->sc;
563	int			error;
564
565	/* Compute how many blocks we'll need. */
566	error = xfs_btree_bload_compute_geometry(bmap_cur,
567			&rb->new_bmapbt.bload, rb->real_mappings);
568	if (error)
569		return error;
570
571	/* Last chance to abort before we start committing fixes. */
572	if (xchk_should_terminate(sc, &error))
573		return error;
574
575	/*
576	 * Guess how many blocks we're going to need to rebuild an entire bmap
577	 * from the number of extents we found, and pump up our transaction to
578	 * have sufficient block reservation.  We're allowed to exceed file
579	 * quota to repair inconsistent metadata.
580	 */
581	error = xfs_trans_reserve_more_inode(sc->tp, sc->ip,
582			rb->new_bmapbt.bload.nr_blocks, 0, true);
583	if (error)
584		return error;
585
586	/* Reserve the space we'll need for the new btree. */
587	error = xrep_newbt_alloc_blocks(&rb->new_bmapbt,
588			rb->new_bmapbt.bload.nr_blocks);
589	if (error)
590		return error;
591
592	/* Add all observed bmap records. */
593	rb->array_cur = XFARRAY_CURSOR_INIT;
594	error = xfs_btree_bload(bmap_cur, &rb->new_bmapbt.bload, rb);
595	if (error)
596		return error;
597
598	/*
599	 * Load the new bmap records into the new incore extent tree to
600	 * preserve delalloc reservations for regular files.  The directory
601	 * code loads the extent tree during xfs_dir_open and assumes
602	 * thereafter that it remains loaded, so we must not violate that
603	 * assumption.
604	 */
605	return xrep_bmap_extents_load(rb);
606}
607
608/*
609 * Use the collected bmap information to stage a new bmap fork.  If this is
610 * successful we'll return with the new fork information logged to the repair
611 * transaction but not yet committed.  The caller must ensure that the inode
612 * is joined to the transaction; the inode will be joined to a clean
613 * transaction when the function returns.
614 */
615STATIC int
616xrep_bmap_build_new_fork(
617	struct xrep_bmap	*rb)
618{
619	struct xfs_owner_info	oinfo;
620	struct xfs_scrub	*sc = rb->sc;
621	struct xfs_btree_cur	*bmap_cur;
622	struct xbtree_ifakeroot	*ifake = &rb->new_bmapbt.ifake;
623	int			error;
624
625	error = xrep_bmap_sort_records(rb);
626	if (error)
627		return error;
628
629	/*
630	 * Prepare to construct the new fork by initializing the new btree
631	 * structure and creating a fake ifork in the ifakeroot structure.
632	 */
633	xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
634	error = xrep_newbt_init_inode(&rb->new_bmapbt, sc, rb->whichfork,
635			&oinfo);
636	if (error)
637		return error;
638
639	rb->new_bmapbt.bload.get_records = xrep_bmap_get_records;
640	rb->new_bmapbt.bload.claim_block = xrep_bmap_claim_block;
641	rb->new_bmapbt.bload.iroot_size = xrep_bmap_iroot_size;
642
643	/*
644	 * Allocate a new bmap btree cursor for reloading an inode block mapping
645	 * data structure.
646	 */
647	bmap_cur = xfs_bmbt_init_cursor(sc->mp, NULL, sc->ip, XFS_STAGING_FORK);
648	xfs_btree_stage_ifakeroot(bmap_cur, ifake);
649
650	/*
651	 * Figure out the size and format of the new fork, then fill it with
652	 * all the bmap records we've found.  Join the inode to the transaction
653	 * so that we can roll the transaction while holding the inode locked.
654	 */
655	if (rb->real_mappings <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) {
656		ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS;
657		error = xrep_bmap_extents_load(rb);
658	} else {
659		ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE;
660		error = xrep_bmap_btree_load(rb, bmap_cur);
661	}
662	if (error)
663		goto err_cur;
664
665	/*
666	 * Install the new fork in the inode.  After this point the old mapping
667	 * data are no longer accessible and the new tree is live.  We delete
668	 * the cursor immediately after committing the staged root because the
669	 * staged fork might be in extents format.
670	 */
671	xfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork);
672	xfs_btree_del_cursor(bmap_cur, 0);
673
674	/* Reset the inode counters now that we've changed the fork. */
675	error = xrep_bmap_reset_counters(rb);
676	if (error)
677		goto err_newbt;
678
679	/* Dispose of any unused blocks and the accounting information. */
680	error = xrep_newbt_commit(&rb->new_bmapbt);
681	if (error)
682		return error;
683
684	return xrep_roll_trans(sc);
685
686err_cur:
687	if (bmap_cur)
688		xfs_btree_del_cursor(bmap_cur, error);
689err_newbt:
690	xrep_newbt_cancel(&rb->new_bmapbt);
691	return error;
692}
693
694/*
695 * Now that we've logged the new inode btree, invalidate all of the old blocks
696 * and free them, if there were any.
697 */
698STATIC int
699xrep_bmap_remove_old_tree(
700	struct xrep_bmap	*rb)
701{
702	struct xfs_scrub	*sc = rb->sc;
703	struct xfs_owner_info	oinfo;
704
705	/* Free the old bmbt blocks if they're not in use. */
706	xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
707	return xrep_reap_fsblocks(sc, &rb->old_bmbt_blocks, &oinfo);
708}
709
710/* Check for garbage inputs.  Returns -ECANCELED if there's nothing to do. */
711STATIC int
712xrep_bmap_check_inputs(
713	struct xfs_scrub	*sc,
714	int			whichfork)
715{
716	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, whichfork);
717
718	ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
719
720	if (!xfs_has_rmapbt(sc->mp))
721		return -EOPNOTSUPP;
722
723	/* No fork means nothing to rebuild. */
724	if (!ifp)
725		return -ECANCELED;
726
727	/*
728	 * We only know how to repair extent mappings, which is to say that we
729	 * only support extents and btree fork format.  Repairs to a local
730	 * format fork require a higher level repair function, so we do not
731	 * have any work to do here.
732	 */
733	switch (ifp->if_format) {
734	case XFS_DINODE_FMT_DEV:
735	case XFS_DINODE_FMT_LOCAL:
736	case XFS_DINODE_FMT_UUID:
737		return -ECANCELED;
738	case XFS_DINODE_FMT_EXTENTS:
739	case XFS_DINODE_FMT_BTREE:
740		break;
741	default:
742		return -EFSCORRUPTED;
743	}
744
745	if (whichfork == XFS_ATTR_FORK)
746		return 0;
747
748	/* Only files, symlinks, and directories get to have data forks. */
749	switch (VFS_I(sc->ip)->i_mode & S_IFMT) {
750	case S_IFREG:
751	case S_IFDIR:
752	case S_IFLNK:
753		/* ok */
754		break;
755	default:
756		return -EINVAL;
757	}
758
759	/* Don't know how to rebuild realtime data forks. */
760	if (XFS_IS_REALTIME_INODE(sc->ip))
761		return -EOPNOTSUPP;
762
763	return 0;
764}
765
766/* Set up the initial state of the reflink scan. */
767static inline enum reflink_scan_state
768xrep_bmap_init_reflink_scan(
769	struct xfs_scrub	*sc,
770	int			whichfork)
771{
772	/* cannot share on non-reflink filesystem */
773	if (!xfs_has_reflink(sc->mp))
774		return RLS_IRRELEVANT;
775
776	/* preserve flag if it's already set */
777	if (xfs_is_reflink_inode(sc->ip))
778		return RLS_SET_IFLAG;
779
780	/* can only share regular files */
781	if (!S_ISREG(VFS_I(sc->ip)->i_mode))
782		return RLS_IRRELEVANT;
783
784	/* cannot share attr fork extents */
785	if (whichfork != XFS_DATA_FORK)
786		return RLS_IRRELEVANT;
787
788	/* cannot share realtime extents */
789	if (XFS_IS_REALTIME_INODE(sc->ip))
790		return RLS_IRRELEVANT;
791
792	return RLS_UNKNOWN;
793}
794
795/* Repair an inode fork. */
796int
797xrep_bmap(
798	struct xfs_scrub	*sc,
799	int			whichfork,
800	bool			allow_unwritten)
801{
802	struct xrep_bmap	*rb;
803	char			*descr;
804	unsigned int		max_bmbt_recs;
805	bool			large_extcount;
806	int			error = 0;
807
808	error = xrep_bmap_check_inputs(sc, whichfork);
809	if (error == -ECANCELED)
810		return 0;
811	if (error)
812		return error;
813
814	rb = kzalloc(sizeof(struct xrep_bmap), XCHK_GFP_FLAGS);
815	if (!rb)
816		return -ENOMEM;
817	rb->sc = sc;
818	rb->whichfork = whichfork;
819	rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork);
820	rb->allow_unwritten = allow_unwritten;
821
822	/* Set up enough storage to handle the max records for this fork. */
823	large_extcount = xfs_has_large_extent_counts(sc->mp);
824	max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork);
825	descr = xchk_xfile_ino_descr(sc, "%s fork mapping records",
826			whichfork == XFS_DATA_FORK ? "data" : "attr");
827	error = xfarray_create(descr, max_bmbt_recs,
828			sizeof(struct xfs_bmbt_rec), &rb->bmap_records);
829	kfree(descr);
830	if (error)
831		goto out_rb;
832
833	/* Collect all reverse mappings for this fork's extents. */
834	xfsb_bitmap_init(&rb->old_bmbt_blocks);
835	error = xrep_bmap_find_mappings(rb);
836	if (error)
837		goto out_bitmap;
838
839	xfs_trans_ijoin(sc->tp, sc->ip, 0);
840
841	/* Rebuild the bmap information. */
842	error = xrep_bmap_build_new_fork(rb);
843	if (error)
844		goto out_bitmap;
845
846	/* Kill the old tree. */
847	error = xrep_bmap_remove_old_tree(rb);
848	if (error)
849		goto out_bitmap;
850
851out_bitmap:
852	xfsb_bitmap_destroy(&rb->old_bmbt_blocks);
853	xfarray_destroy(rb->bmap_records);
854out_rb:
855	kfree(rb);
856	return error;
857}
858
859/* Repair an inode's data fork. */
860int
861xrep_bmap_data(
862	struct xfs_scrub	*sc)
863{
864	return xrep_bmap(sc, XFS_DATA_FORK, true);
865}
866
867/* Repair an inode's attr fork. */
868int
869xrep_bmap_attr(
870	struct xfs_scrub	*sc)
871{
872	return xrep_bmap(sc, XFS_ATTR_FORK, false);
873}
874