1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_log_format.h"
13#include "xfs_trans.h"
14#include "xfs_inode.h"
15#include "xfs_ialloc.h"
16#include "xfs_quota.h"
17#include "xfs_bmap.h"
18#include "xfs_bmap_btree.h"
19#include "xfs_trans_space.h"
20#include "xfs_dir2.h"
21#include "xfs_exchrange.h"
22#include "xfs_exchmaps.h"
23#include "xfs_defer.h"
24#include "xfs_symlink_remote.h"
25#include "scrub/scrub.h"
26#include "scrub/common.h"
27#include "scrub/repair.h"
28#include "scrub/trace.h"
29#include "scrub/tempfile.h"
30#include "scrub/tempexch.h"
31#include "scrub/xfile.h"
32
33/*
34 * Create a temporary file for reconstructing metadata, with the intention of
35 * atomically exchanging the temporary file's contents with the file that's
36 * being repaired.
37 */
38int
39xrep_tempfile_create(
40	struct xfs_scrub	*sc,
41	uint16_t		mode)
42{
43	struct xfs_mount	*mp = sc->mp;
44	struct xfs_trans	*tp = NULL;
45	struct xfs_dquot	*udqp = NULL;
46	struct xfs_dquot	*gdqp = NULL;
47	struct xfs_dquot	*pdqp = NULL;
48	struct xfs_trans_res	*tres;
49	struct xfs_inode	*dp = mp->m_rootip;
50	xfs_ino_t		ino;
51	unsigned int		resblks;
52	bool			is_dir = S_ISDIR(mode);
53	int			error;
54
55	if (xfs_is_shutdown(mp))
56		return -EIO;
57	if (xfs_is_readonly(mp))
58		return -EROFS;
59
60	ASSERT(sc->tp == NULL);
61	ASSERT(sc->tempip == NULL);
62
63	/*
64	 * Make sure that we have allocated dquot(s) on disk.  The temporary
65	 * inode should be completely root owned so that we don't fail due to
66	 * quota limits.
67	 */
68	error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
69			XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
70	if (error)
71		return error;
72
73	if (is_dir) {
74		resblks = xfs_mkdir_space_res(mp, 0);
75		tres = &M_RES(mp)->tr_mkdir;
76	} else {
77		resblks = XFS_IALLOC_SPACE_RES(mp);
78		tres = &M_RES(mp)->tr_create_tmpfile;
79	}
80
81	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
82			&tp);
83	if (error)
84		goto out_release_dquots;
85
86	/* Allocate inode, set up directory. */
87	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
88	if (error)
89		goto out_trans_cancel;
90	error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0,
91			0, false, &sc->tempip);
92	if (error)
93		goto out_trans_cancel;
94
95	/* Change the ownership of the inode to root. */
96	VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
97	VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
98	sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
99	xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
100
101	/*
102	 * Mark our temporary file as private so that LSMs and the ACL code
103	 * don't try to add their own metadata or reason about these files.
104	 * The file should never be exposed to userspace.
105	 */
106	VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
107	VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
108
109	if (is_dir) {
110		error = xfs_dir_init(tp, sc->tempip, dp);
111		if (error)
112			goto out_trans_cancel;
113	} else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
114		/*
115		 * Initialize the temporary symlink with a meaningless target
116		 * that won't trip the verifiers.  Repair must rewrite the
117		 * target with meaningful content before swapping with the file
118		 * being repaired.  A single-byte target will not write a
119		 * remote target block, so the owner is irrelevant.
120		 */
121		error = xfs_symlink_write_target(tp, sc->tempip,
122				sc->tempip->i_ino, ".", 1, 0, 0);
123		if (error)
124			goto out_trans_cancel;
125	}
126
127	/*
128	 * Attach the dquot(s) to the inodes and modify them incore.
129	 * These ids of the inode couldn't have changed since the new
130	 * inode has been locked ever since it was created.
131	 */
132	xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
133
134	/*
135	 * Put our temp file on the unlinked list so it's purged automatically.
136	 * All file-based metadata being reconstructed using this file must be
137	 * atomically exchanged with the original file because the contents
138	 * here will be purged when the inode is dropped or log recovery cleans
139	 * out the unlinked list.
140	 */
141	error = xfs_iunlink(tp, sc->tempip);
142	if (error)
143		goto out_trans_cancel;
144
145	error = xfs_trans_commit(tp);
146	if (error)
147		goto out_release_inode;
148
149	trace_xrep_tempfile_create(sc);
150
151	xfs_qm_dqrele(udqp);
152	xfs_qm_dqrele(gdqp);
153	xfs_qm_dqrele(pdqp);
154
155	/* Finish setting up the incore / vfs context. */
156	xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
157	xfs_setup_iops(sc->tempip);
158	xfs_finish_inode_setup(sc->tempip);
159
160	sc->temp_ilock_flags = 0;
161	return error;
162
163out_trans_cancel:
164	xfs_trans_cancel(tp);
165out_release_inode:
166	/*
167	 * Wait until after the current transaction is aborted to finish the
168	 * setup of the inode and release the inode.  This prevents recursive
169	 * transactions and deadlocks from xfs_inactive.
170	 */
171	if (sc->tempip) {
172		xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
173		xfs_finish_inode_setup(sc->tempip);
174		xchk_irele(sc, sc->tempip);
175	}
176out_release_dquots:
177	xfs_qm_dqrele(udqp);
178	xfs_qm_dqrele(gdqp);
179	xfs_qm_dqrele(pdqp);
180
181	return error;
182}
183
184/* Take IOLOCK_EXCL on the temporary file, maybe. */
185bool
186xrep_tempfile_iolock_nowait(
187	struct xfs_scrub	*sc)
188{
189	if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
190		sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
191		return true;
192	}
193
194	return false;
195}
196
197/*
198 * Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
199 * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
200 * to avoid deadlocks and lockdep complaints.
201 */
202int
203xrep_tempfile_iolock_polled(
204	struct xfs_scrub	*sc)
205{
206	int			error = 0;
207
208	while (!xrep_tempfile_iolock_nowait(sc)) {
209		if (xchk_should_terminate(sc, &error))
210			return error;
211		delay(1);
212	}
213
214	return 0;
215}
216
217/* Release IOLOCK_EXCL on the temporary file. */
218void
219xrep_tempfile_iounlock(
220	struct xfs_scrub	*sc)
221{
222	xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
223	sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
224}
225
226/* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
227void
228xrep_tempfile_ilock(
229	struct xfs_scrub	*sc)
230{
231	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
232	xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
233}
234
235/* Try to grab ILOCK_EXCL on the temporary file. */
236bool
237xrep_tempfile_ilock_nowait(
238	struct xfs_scrub	*sc)
239{
240	if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
241		sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
242		return true;
243	}
244
245	return false;
246}
247
248/* Unlock ILOCK_EXCL on the temporary file after an update. */
249void
250xrep_tempfile_iunlock(
251	struct xfs_scrub	*sc)
252{
253	xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
254	sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
255}
256
257/*
258 * Begin the process of making changes to both the file being scrubbed and
259 * the temporary file by taking ILOCK_EXCL on both.
260 */
261void
262xrep_tempfile_ilock_both(
263	struct xfs_scrub	*sc)
264{
265	xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
266	sc->ilock_flags |= XFS_ILOCK_EXCL;
267	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
268}
269
270/* Unlock ILOCK_EXCL on both files. */
271void
272xrep_tempfile_iunlock_both(
273	struct xfs_scrub	*sc)
274{
275	xrep_tempfile_iunlock(sc);
276	xchk_iunlock(sc, XFS_ILOCK_EXCL);
277}
278
279/* Release the temporary file. */
280void
281xrep_tempfile_rele(
282	struct xfs_scrub	*sc)
283{
284	if (!sc->tempip)
285		return;
286
287	if (sc->temp_ilock_flags) {
288		xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
289		sc->temp_ilock_flags = 0;
290	}
291
292	xchk_irele(sc, sc->tempip);
293	sc->tempip = NULL;
294}
295
296/*
297 * Make sure that the given range of the data fork of the temporary file is
298 * mapped to written blocks.  The caller must ensure that both inodes are
299 * joined to the transaction.
300 */
301int
302xrep_tempfile_prealloc(
303	struct xfs_scrub	*sc,
304	xfs_fileoff_t		off,
305	xfs_filblks_t		len)
306{
307	struct xfs_bmbt_irec	map;
308	xfs_fileoff_t		end = off + len;
309	int			error;
310
311	ASSERT(sc->tempip != NULL);
312	ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
313
314	for (; off < end; off = map.br_startoff + map.br_blockcount) {
315		int		nmaps = 1;
316
317		/*
318		 * If we have a real extent mapping this block then we're
319		 * in ok shape.
320		 */
321		error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
322				XFS_DATA_FORK);
323		if (error)
324			return error;
325		if (nmaps == 0) {
326			ASSERT(nmaps != 0);
327			return -EFSCORRUPTED;
328		}
329
330		if (xfs_bmap_is_written_extent(&map))
331			continue;
332
333		/*
334		 * If we find a delalloc reservation then something is very
335		 * very wrong.  Bail out.
336		 */
337		if (map.br_startblock == DELAYSTARTBLOCK)
338			return -EFSCORRUPTED;
339
340		/*
341		 * Make sure this block has a real zeroed extent allocated to
342		 * it.
343		 */
344		nmaps = 1;
345		error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
346				XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
347				&nmaps);
348		if (error)
349			return error;
350		if (nmaps != 1)
351			return -EFSCORRUPTED;
352
353		trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
354
355		/* Commit new extent and all deferred work. */
356		error = xfs_defer_finish(&sc->tp);
357		if (error)
358			return error;
359	}
360
361	return 0;
362}
363
364/*
365 * Write data to each block of a file.  The given range of the tempfile's data
366 * fork must already be populated with written extents.
367 */
368int
369xrep_tempfile_copyin(
370	struct xfs_scrub	*sc,
371	xfs_fileoff_t		off,
372	xfs_filblks_t		len,
373	xrep_tempfile_copyin_fn	prep_fn,
374	void			*data)
375{
376	LIST_HEAD(buffers_list);
377	struct xfs_mount	*mp = sc->mp;
378	struct xfs_buf		*bp;
379	xfs_fileoff_t		flush_mask;
380	xfs_fileoff_t		end = off + len;
381	loff_t			pos = XFS_FSB_TO_B(mp, off);
382	int			error = 0;
383
384	ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
385
386	/* Flush buffers to disk every 512K */
387	flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
388
389	for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
390		struct xfs_bmbt_irec	map;
391		int			nmaps = 1;
392
393		/* Read block mapping for this file block. */
394		error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
395		if (error)
396			goto out_err;
397		if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
398			error = -EFSCORRUPTED;
399			goto out_err;
400		}
401
402		/* Get the metadata buffer for this offset in the file. */
403		error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
404				XFS_FSB_TO_DADDR(mp, map.br_startblock),
405				mp->m_bsize, 0, &bp);
406		if (error)
407			goto out_err;
408
409		trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);
410
411		/* Read in a block's worth of data from the xfile. */
412		error = prep_fn(sc, bp, data);
413		if (error) {
414			xfs_trans_brelse(sc->tp, bp);
415			goto out_err;
416		}
417
418		/* Queue buffer, and flush if we have too much dirty data. */
419		xfs_buf_delwri_queue_here(bp, &buffers_list);
420		xfs_trans_brelse(sc->tp, bp);
421
422		if (!(off & flush_mask)) {
423			error = xfs_buf_delwri_submit(&buffers_list);
424			if (error)
425				goto out_err;
426		}
427	}
428
429	/*
430	 * Write the new blocks to disk.  If the ordered list isn't empty after
431	 * that, then something went wrong and we have to fail.  This should
432	 * never happen, but we'll check anyway.
433	 */
434	error = xfs_buf_delwri_submit(&buffers_list);
435	if (error)
436		goto out_err;
437
438	if (!list_empty(&buffers_list)) {
439		ASSERT(list_empty(&buffers_list));
440		error = -EIO;
441		goto out_err;
442	}
443
444	return 0;
445
446out_err:
447	xfs_buf_delwri_cancel(&buffers_list);
448	return error;
449}
450
451/*
452 * Set the temporary file's size.  Caller must join the tempfile to the scrub
453 * transaction and is responsible for adjusting block mappings as needed.
454 */
455int
456xrep_tempfile_set_isize(
457	struct xfs_scrub	*sc,
458	unsigned long long	isize)
459{
460	if (sc->tempip->i_disk_size == isize)
461		return 0;
462
463	sc->tempip->i_disk_size = isize;
464	i_size_write(VFS_I(sc->tempip), isize);
465	return xrep_tempfile_roll_trans(sc);
466}
467
468/*
469 * Roll a repair transaction involving the temporary file.  Caller must join
470 * both the temporary file and the file being scrubbed to the transaction.
471 * This function return with both inodes joined to a new scrub transaction,
472 * or the usual negative errno.
473 */
474int
475xrep_tempfile_roll_trans(
476	struct xfs_scrub	*sc)
477{
478	int			error;
479
480	xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
481	error = xrep_roll_trans(sc);
482	if (error)
483		return error;
484
485	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
486	return 0;
487}
488
489/*
490 * Fill out the mapping exchange request in preparation for atomically
491 * committing the contents of a metadata file that we've rebuilt in the temp
492 * file.
493 */
494STATIC int
495xrep_tempexch_prep_request(
496	struct xfs_scrub	*sc,
497	int			whichfork,
498	struct xrep_tempexch	*tx)
499{
500	struct xfs_exchmaps_req	*req = &tx->req;
501
502	memset(tx, 0, sizeof(struct xrep_tempexch));
503
504	/* COW forks don't exist on disk. */
505	if (whichfork == XFS_COW_FORK) {
506		ASSERT(0);
507		return -EINVAL;
508	}
509
510	/* Both files should have the relevant forks. */
511	if (!xfs_ifork_ptr(sc->ip, whichfork) ||
512	    !xfs_ifork_ptr(sc->tempip, whichfork)) {
513		ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
514		ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
515		return -EINVAL;
516	}
517
518	/* Exchange all mappings in both forks. */
519	req->ip1 = sc->tempip;
520	req->ip2 = sc->ip;
521	req->startoff1 = 0;
522	req->startoff2 = 0;
523	switch (whichfork) {
524	case XFS_ATTR_FORK:
525		req->flags |= XFS_EXCHMAPS_ATTR_FORK;
526		break;
527	case XFS_DATA_FORK:
528		/* Always exchange sizes when exchanging data fork mappings. */
529		req->flags |= XFS_EXCHMAPS_SET_SIZES;
530		break;
531	}
532	req->blockcount = XFS_MAX_FILEOFF;
533
534	return 0;
535}
536
537/*
538 * Fill out the mapping exchange resource estimation structures in preparation
539 * for exchanging the contents of a metadata file that we've rebuilt in the
540 * temp file.  Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
541 */
542STATIC int
543xrep_tempexch_estimate(
544	struct xfs_scrub	*sc,
545	struct xrep_tempexch	*tx)
546{
547	struct xfs_exchmaps_req	*req = &tx->req;
548	struct xfs_ifork	*ifp;
549	struct xfs_ifork	*tifp;
550	int			whichfork = xfs_exchmaps_reqfork(req);
551	int			state = 0;
552
553	/*
554	 * The exchmaps code only knows how to exchange file fork space
555	 * mappings.  Any fork data in local format must be promoted to a
556	 * single block before the exchange can take place.
557	 */
558	ifp = xfs_ifork_ptr(sc->ip, whichfork);
559	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
560		state |= 1;
561
562	tifp = xfs_ifork_ptr(sc->tempip, whichfork);
563	if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
564		state |= 2;
565
566	switch (state) {
567	case 0:
568		/* Both files have mapped extents; use the regular estimate. */
569		return xfs_exchrange_estimate(req);
570	case 1:
571		/*
572		 * The file being repaired is in local format, but the temp
573		 * file has mapped extents.  To perform the exchange, the file
574		 * being repaired must have its shorform data converted to an
575		 * ondisk block so that the forks will be in extents format.
576		 * We need one resblk for the conversion; the number of
577		 * exchanges is (worst case) the temporary file's extent count
578		 * plus the block we converted.
579		 */
580		req->ip1_bcount = sc->tempip->i_nblocks;
581		req->ip2_bcount = 1;
582		req->nr_exchanges = 1 + tifp->if_nextents;
583		req->resblks = 1;
584		break;
585	case 2:
586		/*
587		 * The temporary file is in local format, but the file being
588		 * repaired has mapped extents.  To perform the exchange, the
589		 * temp file must have its shortform data converted to an
590		 * ondisk block, and the fork changed to extents format.  We
591		 * need one resblk for the conversion; the number of exchanges
592		 * is (worst case) the extent count of the file being repaired
593		 * plus the block we converted.
594		 */
595		req->ip1_bcount = 1;
596		req->ip2_bcount = sc->ip->i_nblocks;
597		req->nr_exchanges = 1 + ifp->if_nextents;
598		req->resblks = 1;
599		break;
600	case 3:
601		/*
602		 * Both forks are in local format.  To perform the exchange,
603		 * both files must have their shortform data converted to
604		 * fsblocks, and both forks must be converted to extents
605		 * format.  We need two resblks for the two conversions, and
606		 * the number of exchanges is 1 since there's only one block at
607		 * fileoff 0.  Presumably, the caller could not exchange the
608		 * two inode fork areas directly.
609		 */
610		req->ip1_bcount = 1;
611		req->ip2_bcount = 1;
612		req->nr_exchanges = 1;
613		req->resblks = 2;
614		break;
615	}
616
617	return xfs_exchmaps_estimate_overhead(req);
618}
619
620/*
621 * Obtain a quota reservation to make sure we don't hit EDQUOT.  We can skip
622 * this if quota enforcement is disabled or if both inodes' dquots are the
623 * same.  The qretry structure must be initialized to zeroes before the first
624 * call to this function.
625 */
626STATIC int
627xrep_tempexch_reserve_quota(
628	struct xfs_scrub		*sc,
629	const struct xrep_tempexch	*tx)
630{
631	struct xfs_trans		*tp = sc->tp;
632	const struct xfs_exchmaps_req	*req = &tx->req;
633	int64_t				ddelta, rdelta;
634	int				error;
635
636	/*
637	 * Don't bother with a quota reservation if we're not enforcing them
638	 * or the two inodes have the same dquots.
639	 */
640	if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
641	    (req->ip1->i_udquot == req->ip2->i_udquot &&
642	     req->ip1->i_gdquot == req->ip2->i_gdquot &&
643	     req->ip1->i_pdquot == req->ip2->i_pdquot))
644		return 0;
645
646	/*
647	 * Quota reservation for each file comes from two sources.  First, we
648	 * need to account for any net gain in mapped blocks during the
649	 * exchange.  Second, we need reservation for the gross gain in mapped
650	 * blocks so that we don't trip over any quota block reservation
651	 * assertions.  We must reserve the gross gain because the quota code
652	 * subtracts from bcount the number of blocks that we unmap; it does
653	 * not add that quantity back to the quota block reservation.
654	 */
655	ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
656	rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
657	error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
658			ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
659			true);
660	if (error)
661		return error;
662
663	ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
664	rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
665	return xfs_trans_reserve_quota_nblks(tp, req->ip2,
666			ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
667			true);
668}
669
670/*
671 * Prepare an existing transaction for an atomic file contents exchange.
672 *
673 * This function fills out the mapping exchange request and resource estimation
674 * structures in preparation for exchanging the contents of a metadata file
675 * that has been rebuilt in the temp file.  Next, it reserves space and quota
676 * for the transaction.
677 *
678 * The caller must hold ILOCK_EXCL of the scrub target file and the temporary
679 * file.  The caller must join both inodes to the transaction with no unlock
680 * flags, and is responsible for dropping both ILOCKs when appropriate.  Only
681 * use this when those ILOCKs cannot be dropped.
682 */
683int
684xrep_tempexch_trans_reserve(
685	struct xfs_scrub	*sc,
686	int			whichfork,
687	struct xrep_tempexch	*tx)
688{
689	int			error;
690
691	ASSERT(sc->tp != NULL);
692	xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
693	xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);
694
695	error = xrep_tempexch_prep_request(sc, whichfork, tx);
696	if (error)
697		return error;
698
699	error = xfs_exchmaps_estimate(&tx->req);
700	if (error)
701		return error;
702
703	error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
704	if (error)
705		return error;
706
707	return xrep_tempexch_reserve_quota(sc, tx);
708}
709
710/*
711 * Create a new transaction for a file contents exchange.
712 *
713 * This function fills out the mapping excahange request and resource
714 * estimation structures in preparation for exchanging the contents of a
715 * metadata file that has been rebuilt in the temp file.  Next, it reserves
716 * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and
717 * reserves quota for the transaction.
718 *
719 * The caller is responsible for dropping both ILOCKs when appropriate.
720 */
721int
722xrep_tempexch_trans_alloc(
723	struct xfs_scrub	*sc,
724	int			whichfork,
725	struct xrep_tempexch	*tx)
726{
727	unsigned int		flags = 0;
728	int			error;
729
730	ASSERT(sc->tp == NULL);
731	ASSERT(xfs_has_exchange_range(sc->mp));
732
733	error = xrep_tempexch_prep_request(sc, whichfork, tx);
734	if (error)
735		return error;
736
737	error = xrep_tempexch_estimate(sc, tx);
738	if (error)
739		return error;
740
741	if (xfs_has_lazysbcount(sc->mp))
742		flags |= XFS_TRANS_RES_FDBLKS;
743
744	error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
745			tx->req.resblks, 0, flags, &sc->tp);
746	if (error)
747		return error;
748
749	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
750	sc->ilock_flags |= XFS_ILOCK_EXCL;
751	xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip);
752
753	return xrep_tempexch_reserve_quota(sc, tx);
754}
755
756/*
757 * Exchange file mappings (and hence file contents) between the file being
758 * repaired and the temporary file.  Returns with both inodes locked and joined
759 * to a clean scrub transaction.
760 */
761int
762xrep_tempexch_contents(
763	struct xfs_scrub	*sc,
764	struct xrep_tempexch	*tx)
765{
766	int			error;
767
768	ASSERT(xfs_has_exchange_range(sc->mp));
769
770	xfs_exchange_mappings(sc->tp, &tx->req);
771	error = xfs_defer_finish(&sc->tp);
772	if (error)
773		return error;
774
775	/*
776	 * If we exchanged the ondisk sizes of two metadata files, we must
777	 * exchanged the incore sizes as well.
778	 */
779	if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) {
780		loff_t	temp;
781
782		temp = i_size_read(VFS_I(sc->ip));
783		i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
784		i_size_write(VFS_I(sc->tempip), temp);
785	}
786
787	return 0;
788}
789
790/*
791 * Write local format data from one of the temporary file's forks into the same
792 * fork of file being repaired, and exchange the file sizes, if appropriate.
793 * Caller must ensure that the file being repaired has enough fork space to
794 * hold all the bytes.
795 */
796void
797xrep_tempfile_copyout_local(
798	struct xfs_scrub	*sc,
799	int			whichfork)
800{
801	struct xfs_ifork	*temp_ifp;
802	struct xfs_ifork	*ifp;
803	unsigned int		ilog_flags = XFS_ILOG_CORE;
804
805	temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
806	ifp = xfs_ifork_ptr(sc->ip, whichfork);
807
808	ASSERT(temp_ifp != NULL);
809	ASSERT(ifp != NULL);
810	ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
811	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
812
813	switch (whichfork) {
814	case XFS_DATA_FORK:
815		ASSERT(sc->tempip->i_disk_size <=
816					xfs_inode_data_fork_size(sc->ip));
817		break;
818	case XFS_ATTR_FORK:
819		ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
820		break;
821	default:
822		ASSERT(0);
823		return;
824	}
825
826	/* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */
827	xfs_idestroy_fork(ifp);
828	xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data,
829			temp_ifp->if_bytes);
830
831	if (whichfork == XFS_DATA_FORK) {
832		i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
833		sc->ip->i_disk_size = sc->tempip->i_disk_size;
834	}
835
836	ilog_flags |= xfs_ilog_fdata(whichfork);
837	xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
838}
839
840/* Decide if a given XFS inode is a temporary file for a repair. */
841bool
842xrep_is_tempfile(
843	const struct xfs_inode	*ip)
844{
845	const struct inode	*inode = &ip->i_vnode;
846
847	if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
848		return true;
849
850	return false;
851}
852