1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h"
19#include "xfs_da_format.h"
20#include "xfs_da_btree.h"
21#include "xfs_dir2.h"
22#include "xfs_attr.h"
23#include "xfs_attr_leaf.h"
24#include "xfs_attr_sf.h"
25#include "xfs_attr_remote.h"
26#include "xfs_bmap.h"
27#include "xfs_bmap_util.h"
28#include "xfs_exchmaps.h"
29#include "xfs_exchrange.h"
30#include "xfs_acl.h"
31#include "xfs_parent.h"
32#include "scrub/xfs_scrub.h"
33#include "scrub/scrub.h"
34#include "scrub/common.h"
35#include "scrub/trace.h"
36#include "scrub/repair.h"
37#include "scrub/tempfile.h"
38#include "scrub/tempexch.h"
39#include "scrub/xfile.h"
40#include "scrub/xfarray.h"
41#include "scrub/xfblob.h"
42#include "scrub/attr.h"
43#include "scrub/reap.h"
44#include "scrub/attr_repair.h"
45
46/*
47 * Extended Attribute Repair
48 * =========================
49 *
50 * We repair extended attributes by reading the attr leaf blocks looking for
51 * attributes entries that look salvageable (name passes verifiers, value can
52 * be retrieved, etc).  Each extended attribute worth salvaging is stashed in
53 * memory, and the stashed entries are periodically replayed into a temporary
54 * file to constrain memory use.  Batching the construction of the temporary
55 * extended attribute structure in this fashion reduces lock cycling of the
56 * file being repaired and the temporary file.
57 *
58 * When salvaging completes, the remaining stashed attributes are replayed to
59 * the temporary file.  An atomic file contents exchange is used to commit the
60 * new xattr blocks to the file being repaired.  This will disrupt attrmulti
61 * cursors.
62 */
63
64struct xrep_xattr_key {
65	/* Cookie for retrieval of the xattr name. */
66	xfblob_cookie		name_cookie;
67
68	/* Cookie for retrieval of the xattr value. */
69	xfblob_cookie		value_cookie;
70
71	/* XFS_ATTR_* flags */
72	int			flags;
73
74	/* Length of the value and name. */
75	uint32_t		valuelen;
76	uint16_t		namelen;
77};
78
79/*
80 * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
81 * them to the temp file.
82 */
83#define XREP_XATTR_MAX_STASH_BYTES	(PAGE_SIZE * 8)
84
85struct xrep_xattr {
86	struct xfs_scrub	*sc;
87
88	/* Information for exchanging attr fork mappings at the end. */
89	struct xrep_tempexch	tx;
90
91	/* xattr keys */
92	struct xfarray		*xattr_records;
93
94	/* xattr values */
95	struct xfblob		*xattr_blobs;
96
97	/* Number of attributes that we are salvaging. */
98	unsigned long long	attrs_found;
99
100	/* Can we flush stashed attrs to the tempfile? */
101	bool			can_flush;
102
103	/* Did the live update fail, and hence the repair is now out of date? */
104	bool			live_update_aborted;
105
106	/* Lock protecting parent pointer updates */
107	struct mutex		lock;
108
109	/* Fixed-size array of xrep_xattr_pptr structures. */
110	struct xfarray		*pptr_recs;
111
112	/* Blobs containing parent pointer names. */
113	struct xfblob		*pptr_names;
114
115	/* Hook to capture parent pointer updates. */
116	struct xfs_dir_hook	dhook;
117
118	/* Scratch buffer for capturing parent pointers. */
119	struct xfs_da_args	pptr_args;
120
121	/* Name buffer */
122	struct xfs_name		xname;
123	char			namebuf[MAXNAMELEN];
124};
125
126/* Create a parent pointer in the tempfile. */
127#define XREP_XATTR_PPTR_ADD	(1)
128
129/* Remove a parent pointer from the tempfile. */
130#define XREP_XATTR_PPTR_REMOVE	(2)
131
132/* A stashed parent pointer update. */
133struct xrep_xattr_pptr {
134	/* Cookie for retrieval of the pptr name. */
135	xfblob_cookie		name_cookie;
136
137	/* Parent pointer record. */
138	struct xfs_parent_rec	pptr_rec;
139
140	/* Length of the pptr name. */
141	uint8_t			namelen;
142
143	/* XREP_XATTR_PPTR_{ADD,REMOVE} */
144	uint8_t			action;
145};
146
147/* Set up to recreate the extended attributes. */
148int
149xrep_setup_xattr(
150	struct xfs_scrub	*sc)
151{
152	if (xfs_has_parent(sc->mp))
153		xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
154
155	return xrep_tempfile_create(sc, S_IFREG);
156}
157
158/*
159 * Decide if we want to salvage this attribute.  We don't bother with
160 * incomplete or oversized keys or values.  The @value parameter can be null
161 * for remote attrs.
162 */
163STATIC int
164xrep_xattr_want_salvage(
165	struct xrep_xattr	*rx,
166	unsigned int		attr_flags,
167	const void		*name,
168	int			namelen,
169	const void		*value,
170	int			valuelen)
171{
172	if (attr_flags & XFS_ATTR_INCOMPLETE)
173		return false;
174	if (namelen > XATTR_NAME_MAX || namelen <= 0)
175		return false;
176	if (!xfs_attr_namecheck(attr_flags, name, namelen))
177		return false;
178	if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
179		return false;
180	if (attr_flags & XFS_ATTR_PARENT)
181		return xfs_parent_valuecheck(rx->sc->mp, value, valuelen);
182
183	return true;
184}
185
186/* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */
187STATIC int
188xrep_xattr_salvage_key(
189	struct xrep_xattr	*rx,
190	int			flags,
191	unsigned char		*name,
192	int			namelen,
193	unsigned char		*value,
194	int			valuelen)
195{
196	struct xrep_xattr_key	key = {
197		.valuelen	= valuelen,
198		.flags		= flags & XFS_ATTR_NSP_ONDISK_MASK,
199	};
200	unsigned int		i = 0;
201	int			error = 0;
202
203	if (xchk_should_terminate(rx->sc, &error))
204		return error;
205
206	/*
207	 * Truncate the name to the first character that would trip namecheck.
208	 * If we no longer have a name after that, ignore this attribute.
209	 */
210	if (flags & XFS_ATTR_PARENT) {
211		key.namelen = namelen;
212
213		trace_xrep_xattr_salvage_pptr(rx->sc->ip, flags, name,
214				key.namelen, value, valuelen);
215	} else {
216		while (i < namelen && name[i] != 0)
217			i++;
218		if (i == 0)
219			return 0;
220		key.namelen = i;
221
222		trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name,
223				key.namelen, valuelen);
224	}
225
226	error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name,
227			key.namelen);
228	if (error)
229		return error;
230
231	error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value,
232			key.valuelen);
233	if (error)
234		return error;
235
236	error = xfarray_append(rx->xattr_records, &key);
237	if (error)
238		return error;
239
240	rx->attrs_found++;
241	return 0;
242}
243
244/*
245 * Record a shortform extended attribute key & value for later reinsertion
246 * into the inode.
247 */
248STATIC int
249xrep_xattr_salvage_sf_attr(
250	struct xrep_xattr		*rx,
251	struct xfs_attr_sf_hdr		*hdr,
252	struct xfs_attr_sf_entry	*sfe)
253{
254	struct xfs_scrub		*sc = rx->sc;
255	struct xchk_xattr_buf		*ab = sc->buf;
256	unsigned char			*name = sfe->nameval;
257	unsigned char			*value = &sfe->nameval[sfe->namelen];
258
259	if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)hdr,
260			sfe->namelen))
261		return 0;
262
263	if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)hdr,
264			sfe->valuelen))
265		return 0;
266
267	if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval,
268			sfe->namelen, value, sfe->valuelen))
269		return 0;
270
271	return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval,
272			sfe->namelen, value, sfe->valuelen);
273}
274
275/*
276 * Record a local format extended attribute key & value for later reinsertion
277 * into the inode.
278 */
279STATIC int
280xrep_xattr_salvage_local_attr(
281	struct xrep_xattr		*rx,
282	struct xfs_attr_leaf_entry	*ent,
283	unsigned int			nameidx,
284	const char			*buf_end,
285	struct xfs_attr_leaf_name_local	*lentry)
286{
287	struct xchk_xattr_buf		*ab = rx->sc->buf;
288	unsigned char			*value;
289	unsigned int			valuelen;
290	unsigned int			namesize;
291
292	/*
293	 * Decode the leaf local entry format.  If something seems wrong, we
294	 * junk the attribute.
295	 */
296	value = &lentry->nameval[lentry->namelen];
297	valuelen = be16_to_cpu(lentry->valuelen);
298	namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen);
299	if ((char *)lentry + namesize > buf_end)
300		return 0;
301	if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval,
302			lentry->namelen, value, valuelen))
303		return 0;
304	if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
305		return 0;
306
307	/* Try to save this attribute. */
308	return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval,
309			lentry->namelen, value, valuelen);
310}
311
312/*
313 * Record a remote format extended attribute key & value for later reinsertion
314 * into the inode.
315 */
316STATIC int
317xrep_xattr_salvage_remote_attr(
318	struct xrep_xattr		*rx,
319	struct xfs_attr_leaf_entry	*ent,
320	unsigned int			nameidx,
321	const char			*buf_end,
322	struct xfs_attr_leaf_name_remote *rentry,
323	unsigned int			ent_idx,
324	struct xfs_buf			*leaf_bp)
325{
326	struct xchk_xattr_buf		*ab = rx->sc->buf;
327	struct xfs_da_args		args = {
328		.trans			= rx->sc->tp,
329		.dp			= rx->sc->ip,
330		.index			= ent_idx,
331		.geo			= rx->sc->mp->m_attr_geo,
332		.owner			= rx->sc->ip->i_ino,
333		.attr_filter		= ent->flags & XFS_ATTR_NSP_ONDISK_MASK,
334		.namelen		= rentry->namelen,
335		.name			= rentry->name,
336		.value			= ab->value,
337		.valuelen		= be32_to_cpu(rentry->valuelen),
338	};
339	unsigned int			namesize;
340	int				error;
341
342	/*
343	 * Decode the leaf remote entry format.  If something seems wrong, we
344	 * junk the attribute.  Note that we should never find a zero-length
345	 * remote attribute value.
346	 */
347	namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
348	if ((char *)rentry + namesize > buf_end)
349		return 0;
350	if (args.valuelen == 0 ||
351	    !xrep_xattr_want_salvage(rx, ent->flags, rentry->name,
352			rentry->namelen, NULL, args.valuelen))
353		return 0;
354	if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
355		return 0;
356
357	/*
358	 * Enlarge the buffer (if needed) to hold the value that we're trying
359	 * to salvage from the old extended attribute data.
360	 */
361	error = xchk_setup_xattr_buf(rx->sc, args.valuelen);
362	if (error == -ENOMEM)
363		error = -EDEADLOCK;
364	if (error)
365		return error;
366
367	/* Look up the remote value and stash it for reconstruction. */
368	error = xfs_attr3_leaf_getvalue(leaf_bp, &args);
369	if (error || args.rmtblkno == 0)
370		goto err_free;
371
372	error = xfs_attr_rmtval_get(&args);
373	if (error)
374		goto err_free;
375
376	/* Try to save this attribute. */
377	error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name,
378			rentry->namelen, ab->value, args.valuelen);
379err_free:
380	/* remote value was garbage, junk it */
381	if (error == -EFSBADCRC || error == -EFSCORRUPTED)
382		error = 0;
383	return error;
384}
385
386/* Extract every xattr key that we can from this attr fork block. */
387STATIC int
388xrep_xattr_recover_leaf(
389	struct xrep_xattr		*rx,
390	struct xfs_buf			*bp)
391{
392	struct xfs_attr3_icleaf_hdr	leafhdr;
393	struct xfs_scrub		*sc = rx->sc;
394	struct xfs_mount		*mp = sc->mp;
395	struct xfs_attr_leafblock	*leaf;
396	struct xfs_attr_leaf_name_local	*lentry;
397	struct xfs_attr_leaf_name_remote *rentry;
398	struct xfs_attr_leaf_entry	*ent;
399	struct xfs_attr_leaf_entry	*entries;
400	struct xchk_xattr_buf		*ab = rx->sc->buf;
401	char				*buf_end;
402	size_t				off;
403	unsigned int			nameidx;
404	unsigned int			hdrsize;
405	int				i;
406	int				error = 0;
407
408	bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize);
409
410	/* Check the leaf header */
411	leaf = bp->b_addr;
412	xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
413	hdrsize = xfs_attr3_leaf_hdr_size(leaf);
414	xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize);
415	entries = xfs_attr3_leaf_entryp(leaf);
416
417	buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
418	for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
419		if (xchk_should_terminate(sc, &error))
420			return error;
421
422		/* Skip key if it conflicts with something else? */
423		off = (char *)ent - (char *)leaf;
424		if (!xchk_xattr_set_map(sc, ab->usedmap, off,
425				sizeof(xfs_attr_leaf_entry_t)))
426			continue;
427
428		/* Check the name information. */
429		nameidx = be16_to_cpu(ent->nameidx);
430		if (nameidx < leafhdr.firstused ||
431		    nameidx >= mp->m_attr_geo->blksize)
432			continue;
433
434		if (ent->flags & XFS_ATTR_LOCAL) {
435			lentry = xfs_attr3_leaf_name_local(leaf, i);
436			error = xrep_xattr_salvage_local_attr(rx, ent, nameidx,
437					buf_end, lentry);
438		} else {
439			rentry = xfs_attr3_leaf_name_remote(leaf, i);
440			error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx,
441					buf_end, rentry, i, bp);
442		}
443		if (error)
444			return error;
445	}
446
447	return 0;
448}
449
450/* Try to recover shortform attrs. */
451STATIC int
452xrep_xattr_recover_sf(
453	struct xrep_xattr		*rx)
454{
455	struct xfs_scrub		*sc = rx->sc;
456	struct xchk_xattr_buf		*ab = sc->buf;
457	struct xfs_attr_sf_hdr		*hdr;
458	struct xfs_attr_sf_entry	*sfe;
459	struct xfs_attr_sf_entry	*next;
460	struct xfs_ifork		*ifp;
461	unsigned char			*end;
462	int				i;
463	int				error = 0;
464
465	ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK);
466	hdr = ifp->if_data;
467
468	bitmap_zero(ab->usedmap, ifp->if_bytes);
469	end = (unsigned char *)ifp->if_data + ifp->if_bytes;
470	xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*hdr));
471
472	sfe = xfs_attr_sf_firstentry(hdr);
473	if ((unsigned char *)sfe > end)
474		return 0;
475
476	for (i = 0; i < hdr->count; i++) {
477		if (xchk_should_terminate(sc, &error))
478			return error;
479
480		next = xfs_attr_sf_nextentry(sfe);
481		if ((unsigned char *)next > end)
482			break;
483
484		if (xchk_xattr_set_map(sc, ab->usedmap,
485				(char *)sfe - (char *)hdr,
486				sizeof(struct xfs_attr_sf_entry))) {
487			/*
488			 * No conflicts with the sf entry; let's save this
489			 * attribute.
490			 */
491			error = xrep_xattr_salvage_sf_attr(rx, hdr, sfe);
492			if (error)
493				return error;
494		}
495
496		sfe = next;
497	}
498
499	return 0;
500}
501
502/*
503 * Try to return a buffer of xattr data for a given physical extent.
504 *
505 * Because the buffer cache get function complains if it finds a buffer
506 * matching the block number but not matching the length, we must be careful to
507 * look for incore buffers (up to the maximum length of a remote value) that
508 * could be hiding anywhere in the physical range.  If we find an incore
509 * buffer, we can pass that to the caller.  Optionally, read a single block and
510 * pass that back.
511 *
512 * Note the subtlety that remote attr value blocks for which there is no incore
513 * buffer will be passed to the callback one block at a time.  These buffers
514 * will not have any ops attached and must be staled to prevent aliasing with
515 * multiblock buffers once we drop the ILOCK.
516 */
517STATIC int
518xrep_xattr_find_buf(
519	struct xfs_mount	*mp,
520	xfs_fsblock_t		fsbno,
521	xfs_extlen_t		max_len,
522	bool			can_read,
523	struct xfs_buf		**bpp)
524{
525	struct xrep_bufscan	scan = {
526		.daddr		= XFS_FSB_TO_DADDR(mp, fsbno),
527		.max_sectors	= xrep_bufscan_max_sectors(mp, max_len),
528		.daddr_step	= XFS_FSB_TO_BB(mp, 1),
529	};
530	struct xfs_buf		*bp;
531
532	while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
533		*bpp = bp;
534		return 0;
535	}
536
537	if (!can_read) {
538		*bpp = NULL;
539		return 0;
540	}
541
542	return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1),
543			XBF_TRYLOCK, bpp, NULL);
544}
545
546/*
547 * Deal with a buffer that we found during our walk of the attr fork.
548 *
549 * Attribute leaf and node blocks are simple -- they're a single block, so we
550 * can walk them one at a time and we never have to worry about discontiguous
551 * multiblock buffers like we do for directories.
552 *
553 * Unfortunately, remote attr blocks add a lot of complexity here.  Each disk
554 * block is totally self contained, in the sense that the v5 header provides no
555 * indication that there could be more data in the next block.  The incore
556 * buffers can span multiple blocks, though they never cross extent records.
557 * However, they don't necessarily start or end on an extent record boundary.
558 * Therefore, we need a special buffer find function to walk the buffer cache
559 * for us.
560 *
561 * The caller must hold the ILOCK on the file being repaired.  We use
562 * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't
563 * own the block and don't want to hang the system on a potentially garbage
564 * buffer.
565 */
566STATIC int
567xrep_xattr_recover_block(
568	struct xrep_xattr	*rx,
569	xfs_dablk_t		dabno,
570	xfs_fsblock_t		fsbno,
571	xfs_extlen_t		max_len,
572	xfs_extlen_t		*actual_len)
573{
574	struct xfs_da_blkinfo	*info;
575	struct xfs_buf		*bp;
576	int			error;
577
578	error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp);
579	if (error)
580		return error;
581	info = bp->b_addr;
582	*actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length);
583
584	trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno,
585			be16_to_cpu(info->magic));
586
587	/*
588	 * If the buffer has the right magic number for an attr leaf block and
589	 * passes a structure check (we don't care about checksums), salvage
590	 * as much as we can from the block. */
591	if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) &&
592	    xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) &&
593	    xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL)
594		error = xrep_xattr_recover_leaf(rx, bp);
595
596	/*
597	 * If the buffer didn't already have buffer ops set, it was read in by
598	 * the _find_buf function and could very well be /part/ of a multiblock
599	 * remote block.  Mark it stale so that it doesn't hang around in
600	 * memory to cause problems.
601	 */
602	if (bp->b_ops == NULL)
603		xfs_buf_stale(bp);
604
605	xfs_buf_relse(bp);
606	return error;
607}
608
609/* Insert one xattr key/value. */
610STATIC int
611xrep_xattr_insert_rec(
612	struct xrep_xattr		*rx,
613	const struct xrep_xattr_key	*key)
614{
615	struct xfs_da_args		args = {
616		.dp			= rx->sc->tempip,
617		.attr_filter		= key->flags,
618		.namelen		= key->namelen,
619		.valuelen		= key->valuelen,
620		.owner			= rx->sc->ip->i_ino,
621		.geo			= rx->sc->mp->m_attr_geo,
622		.whichfork		= XFS_ATTR_FORK,
623		.op_flags		= XFS_DA_OP_OKNOENT,
624	};
625	struct xchk_xattr_buf		*ab = rx->sc->buf;
626	int				error;
627
628	/*
629	 * Grab pointers to the scrub buffer so that we can use them to insert
630	 * attrs into the temp file.
631	 */
632	args.name = ab->name;
633	args.value = ab->value;
634
635	/*
636	 * The attribute name is stored near the end of the in-core buffer,
637	 * though we reserve one more byte to ensure null termination.
638	 */
639	ab->name[XATTR_NAME_MAX] = 0;
640
641	error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name,
642			key->namelen);
643	if (error)
644		return error;
645
646	error = xfblob_free(rx->xattr_blobs, key->name_cookie);
647	if (error)
648		return error;
649
650	error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value,
651			key->valuelen);
652	if (error)
653		return error;
654
655	error = xfblob_free(rx->xattr_blobs, key->value_cookie);
656	if (error)
657		return error;
658
659	ab->name[key->namelen] = 0;
660
661	if (key->flags & XFS_ATTR_PARENT) {
662		trace_xrep_xattr_insert_pptr(rx->sc->tempip, key->flags,
663				ab->name, key->namelen, ab->value,
664				key->valuelen);
665		args.op_flags |= XFS_DA_OP_LOGGED;
666	} else {
667		trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags,
668				ab->name, key->namelen, key->valuelen);
669	}
670
671	/*
672	 * xfs_attr_set creates and commits its own transaction.  If the attr
673	 * already exists, we'll just drop it during the rebuild.
674	 */
675	xfs_attr_sethash(&args);
676	error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE, false);
677	if (error == -EEXIST)
678		error = 0;
679
680	return error;
681}
682
683/*
684 * Periodically flush salvaged attributes to the temporary file.  This is done
685 * to reduce the memory requirements of the xattr rebuild because files can
686 * contain millions of attributes.
687 */
688STATIC int
689xrep_xattr_flush_stashed(
690	struct xrep_xattr	*rx)
691{
692	xfarray_idx_t		array_cur;
693	int			error;
694
695	/*
696	 * Entering this function, the scrub context has a reference to the
697	 * inode being repaired, the temporary file, and a scrub transaction
698	 * that we use during xattr salvaging to avoid livelocking if there
699	 * are cycles in the xattr structures.  We hold ILOCK_EXCL on both
700	 * the inode being repaired, though it is not ijoined to the scrub
701	 * transaction.
702	 *
703	 * To constrain kernel memory use, we occasionally flush salvaged
704	 * xattrs from the xfarray and xfblob structures into the temporary
705	 * file in preparation for exchanging the xattr structures at the end.
706	 * Updating the temporary file requires a transaction, so we commit the
707	 * scrub transaction and drop the two ILOCKs so that xfs_attr_set can
708	 * allocate whatever transaction it wants.
709	 *
710	 * We still hold IOLOCK_EXCL on the inode being repaired, which
711	 * prevents anyone from modifying the damaged xattr data while we
712	 * repair it.
713	 */
714	error = xrep_trans_commit(rx->sc);
715	if (error)
716		return error;
717	xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
718
719	/*
720	 * Take the IOLOCK of the temporary file while we modify xattrs.  This
721	 * isn't strictly required because the temporary file is never revealed
722	 * to userspace, but we follow the same locking rules.  We still hold
723	 * sc->ip's IOLOCK.
724	 */
725	error = xrep_tempfile_iolock_polled(rx->sc);
726	if (error)
727		return error;
728
729	/* Add all the salvaged attrs to the temporary file. */
730	foreach_xfarray_idx(rx->xattr_records, array_cur) {
731		struct xrep_xattr_key	key;
732
733		error = xfarray_load(rx->xattr_records, array_cur, &key);
734		if (error)
735			return error;
736
737		error = xrep_xattr_insert_rec(rx, &key);
738		if (error)
739			return error;
740	}
741
742	/* Empty out both arrays now that we've added the entries. */
743	xfarray_truncate(rx->xattr_records);
744	xfblob_truncate(rx->xattr_blobs);
745
746	xrep_tempfile_iounlock(rx->sc);
747
748	/* Recreate the salvage transaction and relock the inode. */
749	error = xchk_trans_alloc(rx->sc, 0);
750	if (error)
751		return error;
752	xchk_ilock(rx->sc, XFS_ILOCK_EXCL);
753	return 0;
754}
755
756/* Decide if we've stashed too much xattr data in memory. */
757static inline bool
758xrep_xattr_want_flush_stashed(
759	struct xrep_xattr	*rx)
760{
761	unsigned long long	bytes;
762
763	if (!rx->can_flush)
764		return false;
765
766	bytes = xfarray_bytes(rx->xattr_records) +
767		xfblob_bytes(rx->xattr_blobs);
768	return bytes > XREP_XATTR_MAX_STASH_BYTES;
769}
770
771/*
772 * Did we observe rename changing parent pointer xattrs while we were flushing
773 * salvaged attrs?
774 */
775static inline bool
776xrep_xattr_saw_pptr_conflict(
777	struct xrep_xattr	*rx)
778{
779	bool			ret;
780
781	ASSERT(rx->can_flush);
782
783	if (!xfs_has_parent(rx->sc->mp))
784		return false;
785
786	xfs_assert_ilocked(rx->sc->ip, XFS_ILOCK_EXCL);
787
788	mutex_lock(&rx->lock);
789	ret = xfarray_bytes(rx->pptr_recs) > 0;
790	mutex_unlock(&rx->lock);
791
792	return ret;
793}
794
795/*
796 * Reset the entire repair state back to initial conditions, now that we've
797 * detected a parent pointer update to the attr structure while we were
798 * flushing salvaged attrs.  See the locking notes in dir_repair.c for more
799 * information on why this is all necessary.
800 */
801STATIC int
802xrep_xattr_full_reset(
803	struct xrep_xattr	*rx)
804{
805	struct xfs_scrub	*sc = rx->sc;
806	struct xfs_attr_sf_hdr	*hdr;
807	struct xfs_ifork	*ifp = &sc->tempip->i_af;
808	int			error;
809
810	trace_xrep_xattr_full_reset(sc->ip, sc->tempip);
811
812	/* The temporary file's data fork had better not be in btree format. */
813	if (sc->tempip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
814		ASSERT(0);
815		return -EIO;
816	}
817
818	/*
819	 * We begin in transaction context with sc->ip ILOCKed but not joined
820	 * to the transaction.  To reset to the initial state, we must hold
821	 * sc->ip's ILOCK to prevent rename from updating parent pointer
822	 * information and the tempfile's ILOCK to clear its contents.
823	 */
824	xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
825	xrep_tempfile_ilock_both(sc);
826	xfs_trans_ijoin(sc->tp, sc->ip, 0);
827	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
828
829	/*
830	 * Free all the blocks of the attr fork of the temp file, and reset
831	 * it back to local format.
832	 */
833	if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
834		error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
835		if (error)
836			return error;
837
838		ASSERT(ifp->if_bytes == 0);
839		ifp->if_format = XFS_DINODE_FMT_LOCAL;
840		xfs_idata_realloc(sc->tempip, sizeof(*hdr), XFS_ATTR_FORK);
841	}
842
843	/* Reinitialize the attr fork to an empty shortform structure. */
844	hdr = ifp->if_data;
845	memset(hdr, 0, sizeof(*hdr));
846	hdr->totsize = cpu_to_be16(sizeof(*hdr));
847	xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE | XFS_ILOG_ADATA);
848
849	/*
850	 * Roll this transaction to commit our reset ondisk.  The tempfile
851	 * should no longer be joined to the transaction, so we drop its ILOCK.
852	 * This should leave us in transaction context with sc->ip ILOCKed but
853	 * not joined to the transaction.
854	 */
855	error = xrep_roll_trans(sc);
856	if (error)
857		return error;
858	xrep_tempfile_iunlock(sc);
859
860	/*
861	 * Erase any accumulated parent pointer updates now that we've erased
862	 * the tempfile's attr fork.  We're resetting the entire repair state
863	 * back to where we were initially, except now we won't flush salvaged
864	 * xattrs until the very end.
865	 */
866	mutex_lock(&rx->lock);
867	xfarray_truncate(rx->pptr_recs);
868	xfblob_truncate(rx->pptr_names);
869	mutex_unlock(&rx->lock);
870
871	rx->can_flush = false;
872	rx->attrs_found = 0;
873
874	ASSERT(xfarray_bytes(rx->xattr_records) == 0);
875	ASSERT(xfblob_bytes(rx->xattr_blobs) == 0);
876	return 0;
877}
878
879/* Extract as many attribute keys and values as we can. */
880STATIC int
881xrep_xattr_recover(
882	struct xrep_xattr	*rx)
883{
884	struct xfs_bmbt_irec	got;
885	struct xfs_scrub	*sc = rx->sc;
886	struct xfs_da_geometry	*geo = sc->mp->m_attr_geo;
887	xfs_fileoff_t		offset;
888	xfs_extlen_t		len;
889	xfs_dablk_t		dabno;
890	int			nmap;
891	int			error;
892
893restart:
894	/*
895	 * Iterate each xattr leaf block in the attr fork to scan them for any
896	 * attributes that we might salvage.
897	 */
898	for (offset = 0;
899	     offset < XFS_MAX_FILEOFF;
900	     offset = got.br_startoff + got.br_blockcount) {
901		nmap = 1;
902		error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset,
903				&got, &nmap, XFS_BMAPI_ATTRFORK);
904		if (error)
905			return error;
906		if (nmap != 1)
907			return -EFSCORRUPTED;
908		if (!xfs_bmap_is_written_extent(&got))
909			continue;
910
911		for (dabno = round_up(got.br_startoff, geo->fsbcount);
912		     dabno < got.br_startoff + got.br_blockcount;
913		     dabno += len) {
914			xfs_fileoff_t	curr_offset = dabno - got.br_startoff;
915			xfs_extlen_t	maxlen;
916
917			if (xchk_should_terminate(rx->sc, &error))
918				return error;
919
920			maxlen = min_t(xfs_filblks_t, INT_MAX,
921					got.br_blockcount - curr_offset);
922			error = xrep_xattr_recover_block(rx, dabno,
923					curr_offset + got.br_startblock,
924					maxlen, &len);
925			if (error)
926				return error;
927
928			if (xrep_xattr_want_flush_stashed(rx)) {
929				error = xrep_xattr_flush_stashed(rx);
930				if (error)
931					return error;
932
933				if (xrep_xattr_saw_pptr_conflict(rx)) {
934					error = xrep_xattr_full_reset(rx);
935					if (error)
936						return error;
937
938					goto restart;
939				}
940			}
941		}
942	}
943
944	return 0;
945}
946
947/*
948 * Reset the extended attribute fork to a state where we can start re-adding
949 * the salvaged attributes.
950 */
951STATIC int
952xrep_xattr_fork_remove(
953	struct xfs_scrub	*sc,
954	struct xfs_inode	*ip)
955{
956	struct xfs_attr_sf_hdr	*hdr;
957	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK);
958
959	/*
960	 * If the data fork is in btree format, we can't change di_forkoff
961	 * because we could run afoul of the rule that the data fork isn't
962	 * supposed to be in btree format if there's enough space in the fork
963	 * that it could have used extents format.  Instead, reinitialize the
964	 * attr fork to have a shortform structure with zero attributes.
965	 */
966	if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
967		ifp->if_format = XFS_DINODE_FMT_LOCAL;
968		hdr = xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes,
969				XFS_ATTR_FORK);
970		hdr->count = 0;
971		hdr->totsize = cpu_to_be16(sizeof(*hdr));
972		xfs_trans_log_inode(sc->tp, ip,
973				XFS_ILOG_CORE | XFS_ILOG_ADATA);
974		return 0;
975	}
976
977	/* If we still have attr fork extents, something's wrong. */
978	if (ifp->if_nextents != 0) {
979		struct xfs_iext_cursor	icur;
980		struct xfs_bmbt_irec	irec;
981		unsigned int		i = 0;
982
983		xfs_emerg(sc->mp,
984	"inode 0x%llx attr fork still has %llu attr extents, format %d?!",
985				ip->i_ino, ifp->if_nextents, ifp->if_format);
986		for_each_xfs_iext(ifp, &icur, &irec) {
987			xfs_err(sc->mp,
988	"[%u]: startoff %llu startblock %llu blockcount %llu state %u",
989					i++, irec.br_startoff,
990					irec.br_startblock, irec.br_blockcount,
991					irec.br_state);
992		}
993		ASSERT(0);
994		return -EFSCORRUPTED;
995	}
996
997	xfs_attr_fork_remove(ip, sc->tp);
998	return 0;
999}
1000
1001/*
1002 * Free all the attribute fork blocks of the file being repaired and delete the
1003 * fork.  The caller must ILOCK the scrub file and join it to the transaction.
1004 * This function returns with the inode joined to a clean transaction.
1005 */
1006int
1007xrep_xattr_reset_fork(
1008	struct xfs_scrub	*sc)
1009{
1010	int			error;
1011
1012	trace_xrep_xattr_reset_fork(sc->ip, sc->ip);
1013
1014	/* Unmap all the attr blocks. */
1015	if (xfs_ifork_has_extents(&sc->ip->i_af)) {
1016		error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK);
1017		if (error)
1018			return error;
1019	}
1020
1021	error = xrep_xattr_fork_remove(sc, sc->ip);
1022	if (error)
1023		return error;
1024
1025	return xfs_trans_roll_inode(&sc->tp, sc->ip);
1026}
1027
1028/*
1029 * Free all the attribute fork blocks of the temporary file and delete the attr
1030 * fork.  The caller must ILOCK the tempfile and join it to the transaction.
1031 * This function returns with the inode joined to a clean scrub transaction.
1032 */
1033int
1034xrep_xattr_reset_tempfile_fork(
1035	struct xfs_scrub	*sc)
1036{
1037	int			error;
1038
1039	trace_xrep_xattr_reset_fork(sc->ip, sc->tempip);
1040
1041	/*
1042	 * Wipe out the attr fork of the temp file so that regular inode
1043	 * inactivation won't trip over the corrupt attr fork.
1044	 */
1045	if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
1046		error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
1047		if (error)
1048			return error;
1049	}
1050
1051	return xrep_xattr_fork_remove(sc, sc->tempip);
1052}
1053
1054/*
1055 * Find all the extended attributes for this inode by scraping them out of the
1056 * attribute key blocks by hand, and flushing them into the temp file.
1057 * When we're done, free the staging memory before exchanging the xattr
1058 * structures to reduce memory usage.
1059 */
1060STATIC int
1061xrep_xattr_salvage_attributes(
1062	struct xrep_xattr	*rx)
1063{
1064	struct xfs_inode	*ip = rx->sc->ip;
1065	int			error;
1066
1067	/* Short format xattrs are easy! */
1068	if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) {
1069		error = xrep_xattr_recover_sf(rx);
1070		if (error)
1071			return error;
1072
1073		return xrep_xattr_flush_stashed(rx);
1074	}
1075
1076	/*
1077	 * For non-inline xattr structures, the salvage function scans the
1078	 * buffer cache looking for potential attr leaf blocks.  The scan
1079	 * requires the ability to lock any buffer found and runs independently
1080	 * of any transaction <-> buffer item <-> buffer linkage.  Therefore,
1081	 * roll the transaction to ensure there are no buffers joined.  We hold
1082	 * the ILOCK independently of the transaction.
1083	 */
1084	error = xfs_trans_roll(&rx->sc->tp);
1085	if (error)
1086		return error;
1087
1088	error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK);
1089	if (error)
1090		return error;
1091
1092	error = xrep_xattr_recover(rx);
1093	if (error)
1094		return error;
1095
1096	return xrep_xattr_flush_stashed(rx);
1097}
1098
1099/*
1100 * Add this stashed incore parent pointer to the temporary file.  The caller
1101 * must hold the tempdir's IOLOCK, must not hold any ILOCKs, and must not be in
1102 * transaction context.
1103 */
1104STATIC int
1105xrep_xattr_replay_pptr_update(
1106	struct xrep_xattr		*rx,
1107	const struct xfs_name		*xname,
1108	struct xrep_xattr_pptr		*pptr)
1109{
1110	struct xfs_scrub		*sc = rx->sc;
1111	int				error;
1112
1113	switch (pptr->action) {
1114	case XREP_XATTR_PPTR_ADD:
1115		/* Create parent pointer. */
1116		trace_xrep_xattr_replay_parentadd(sc->tempip, xname,
1117				&pptr->pptr_rec);
1118
1119		error = xfs_parent_set(sc->tempip, sc->ip->i_ino, xname,
1120				&pptr->pptr_rec, &rx->pptr_args);
1121		ASSERT(error != -EEXIST);
1122		return error;
1123	case XREP_XATTR_PPTR_REMOVE:
1124		/* Remove parent pointer. */
1125		trace_xrep_xattr_replay_parentremove(sc->tempip, xname,
1126				&pptr->pptr_rec);
1127
1128		error = xfs_parent_unset(sc->tempip, sc->ip->i_ino, xname,
1129				&pptr->pptr_rec, &rx->pptr_args);
1130		ASSERT(error != -ENOATTR);
1131		return error;
1132	}
1133
1134	ASSERT(0);
1135	return -EIO;
1136}
1137
1138/*
1139 * Flush stashed parent pointer updates that have been recorded by the scanner.
1140 * This is done to reduce the memory requirements of the xattr rebuild, since
1141 * files can have a lot of hardlinks and the fs can be busy.
1142 *
1143 * Caller must not hold transactions or ILOCKs.  Caller must hold the tempfile
1144 * IOLOCK.
1145 */
1146STATIC int
1147xrep_xattr_replay_pptr_updates(
1148	struct xrep_xattr	*rx)
1149{
1150	xfarray_idx_t		array_cur;
1151	int			error;
1152
1153	mutex_lock(&rx->lock);
1154	foreach_xfarray_idx(rx->pptr_recs, array_cur) {
1155		struct xrep_xattr_pptr	pptr;
1156
1157		error = xfarray_load(rx->pptr_recs, array_cur, &pptr);
1158		if (error)
1159			goto out_unlock;
1160
1161		error = xfblob_loadname(rx->pptr_names, pptr.name_cookie,
1162				&rx->xname, pptr.namelen);
1163		if (error)
1164			goto out_unlock;
1165		mutex_unlock(&rx->lock);
1166
1167		error = xrep_xattr_replay_pptr_update(rx, &rx->xname, &pptr);
1168		if (error)
1169			return error;
1170
1171		mutex_lock(&rx->lock);
1172	}
1173
1174	/* Empty out both arrays now that we've added the entries. */
1175	xfarray_truncate(rx->pptr_recs);
1176	xfblob_truncate(rx->pptr_names);
1177	mutex_unlock(&rx->lock);
1178	return 0;
1179out_unlock:
1180	mutex_unlock(&rx->lock);
1181	return error;
1182}
1183
1184/*
1185 * Remember that we want to create a parent pointer in the tempfile.  These
1186 * stashed actions will be replayed later.
1187 */
1188STATIC int
1189xrep_xattr_stash_parentadd(
1190	struct xrep_xattr	*rx,
1191	const struct xfs_name	*name,
1192	const struct xfs_inode	*dp)
1193{
1194	struct xrep_xattr_pptr	pptr = {
1195		.action		= XREP_XATTR_PPTR_ADD,
1196		.namelen	= name->len,
1197	};
1198	int			error;
1199
1200	trace_xrep_xattr_stash_parentadd(rx->sc->tempip, dp, name);
1201
1202	xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
1203	error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
1204	if (error)
1205		return error;
1206
1207	return xfarray_append(rx->pptr_recs, &pptr);
1208}
1209
1210/*
1211 * Remember that we want to remove a parent pointer from the tempfile.  These
1212 * stashed actions will be replayed later.
1213 */
1214STATIC int
1215xrep_xattr_stash_parentremove(
1216	struct xrep_xattr	*rx,
1217	const struct xfs_name	*name,
1218	const struct xfs_inode	*dp)
1219{
1220	struct xrep_xattr_pptr	pptr = {
1221		.action		= XREP_XATTR_PPTR_REMOVE,
1222		.namelen	= name->len,
1223	};
1224	int			error;
1225
1226	trace_xrep_xattr_stash_parentremove(rx->sc->tempip, dp, name);
1227
1228	xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
1229	error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
1230	if (error)
1231		return error;
1232
1233	return xfarray_append(rx->pptr_recs, &pptr);
1234}
1235
1236/*
1237 * Capture dirent updates being made by other threads.  We will have to replay
1238 * the parent pointer updates before exchanging attr forks.
1239 */
1240STATIC int
1241xrep_xattr_live_dirent_update(
1242	struct notifier_block		*nb,
1243	unsigned long			action,
1244	void				*data)
1245{
1246	struct xfs_dir_update_params	*p = data;
1247	struct xrep_xattr		*rx;
1248	struct xfs_scrub		*sc;
1249	int				error;
1250
1251	rx = container_of(nb, struct xrep_xattr, dhook.dirent_hook.nb);
1252	sc = rx->sc;
1253
1254	/*
1255	 * This thread updated a dirent that points to the file that we're
1256	 * repairing, so stash the update for replay against the temporary
1257	 * file.
1258	 */
1259	if (p->ip->i_ino != sc->ip->i_ino)
1260		return NOTIFY_DONE;
1261
1262	mutex_lock(&rx->lock);
1263	if (p->delta > 0)
1264		error = xrep_xattr_stash_parentadd(rx, p->name, p->dp);
1265	else
1266		error = xrep_xattr_stash_parentremove(rx, p->name, p->dp);
1267	if (error)
1268		rx->live_update_aborted = true;
1269	mutex_unlock(&rx->lock);
1270	return NOTIFY_DONE;
1271}
1272
1273/*
1274 * Prepare both inodes' attribute forks for an exchange.  Promote the tempfile
1275 * from short format to leaf format, and if the file being repaired has a short
1276 * format attr fork, turn it into an empty extent list.
1277 */
1278STATIC int
1279xrep_xattr_swap_prep(
1280	struct xfs_scrub	*sc,
1281	bool			temp_local,
1282	bool			ip_local)
1283{
1284	int			error;
1285
1286	/*
1287	 * If the tempfile's attributes are in shortform format, convert that
1288	 * to a single leaf extent so that we can use the atomic mapping
1289	 * exchange.
1290	 */
1291	if (temp_local) {
1292		struct xfs_da_args	args = {
1293			.dp		= sc->tempip,
1294			.geo		= sc->mp->m_attr_geo,
1295			.whichfork	= XFS_ATTR_FORK,
1296			.trans		= sc->tp,
1297			.total		= 1,
1298			.owner		= sc->ip->i_ino,
1299		};
1300
1301		error = xfs_attr_shortform_to_leaf(&args);
1302		if (error)
1303			return error;
1304
1305		/*
1306		 * Roll the deferred log items to get us back to a clean
1307		 * transaction.
1308		 */
1309		error = xfs_defer_finish(&sc->tp);
1310		if (error)
1311			return error;
1312	}
1313
1314	/*
1315	 * If the file being repaired had a shortform attribute fork, convert
1316	 * that to an empty extent list in preparation for the atomic mapping
1317	 * exchange.
1318	 */
1319	if (ip_local) {
1320		struct xfs_ifork	*ifp;
1321
1322		ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1323
1324		xfs_idestroy_fork(ifp);
1325		ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1326		ifp->if_nextents = 0;
1327		ifp->if_bytes = 0;
1328		ifp->if_data = NULL;
1329		ifp->if_height = 0;
1330
1331		xfs_trans_log_inode(sc->tp, sc->ip,
1332				XFS_ILOG_CORE | XFS_ILOG_ADATA);
1333	}
1334
1335	return 0;
1336}
1337
1338/* Exchange the temporary file's attribute fork with the one being repaired. */
1339int
1340xrep_xattr_swap(
1341	struct xfs_scrub	*sc,
1342	struct xrep_tempexch	*tx)
1343{
1344	bool			ip_local, temp_local;
1345	int			error = 0;
1346
1347	ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1348	temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1349
1350	/*
1351	 * If the both files have a local format attr fork and the rebuilt
1352	 * xattr data would fit in the repaired file's attr fork, just copy
1353	 * the contents from the tempfile and declare ourselves done.
1354	 */
1355	if (ip_local && temp_local) {
1356		int	forkoff;
1357		int	newsize;
1358
1359		newsize = xfs_attr_sf_totsize(sc->tempip);
1360		forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize);
1361		if (forkoff > 0) {
1362			sc->ip->i_forkoff = forkoff;
1363			xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK);
1364			return 0;
1365		}
1366	}
1367
1368	/* Otherwise, make sure both attr forks are in block-mapping mode. */
1369	error = xrep_xattr_swap_prep(sc, temp_local, ip_local);
1370	if (error)
1371		return error;
1372
1373	return xrep_tempexch_contents(sc, tx);
1374}
1375
1376/*
1377 * Finish replaying stashed parent pointer updates, allocate a transaction for
1378 * exchanging extent mappings, and take the ILOCKs of both files before we
1379 * commit the new extended attribute structure.
1380 */
1381STATIC int
1382xrep_xattr_finalize_tempfile(
1383	struct xrep_xattr	*rx)
1384{
1385	struct xfs_scrub	*sc = rx->sc;
1386	int			error;
1387
1388	if (!xfs_has_parent(sc->mp))
1389		return xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1390
1391	/*
1392	 * Repair relies on the ILOCK to quiesce all possible xattr updates.
1393	 * Replay all queued parent pointer updates into the tempfile before
1394	 * exchanging the contents, even if that means dropping the ILOCKs and
1395	 * the transaction.
1396	 */
1397	do {
1398		error = xrep_xattr_replay_pptr_updates(rx);
1399		if (error)
1400			return error;
1401
1402		error = xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1403		if (error)
1404			return error;
1405
1406		if (xfarray_length(rx->pptr_recs) == 0)
1407			break;
1408
1409		xchk_trans_cancel(sc);
1410		xrep_tempfile_iunlock_both(sc);
1411	} while (!xchk_should_terminate(sc, &error));
1412	return error;
1413}
1414
1415/*
1416 * Exchange the new extended attribute data (which we created in the tempfile)
1417 * with the file being repaired.
1418 */
1419STATIC int
1420xrep_xattr_rebuild_tree(
1421	struct xrep_xattr	*rx)
1422{
1423	struct xfs_scrub	*sc = rx->sc;
1424	int			error;
1425
1426	/*
1427	 * If we didn't find any attributes to salvage, repair the file by
1428	 * zapping its attr fork.
1429	 */
1430	if (rx->attrs_found == 0) {
1431		xfs_trans_ijoin(sc->tp, sc->ip, 0);
1432		error = xrep_xattr_reset_fork(sc);
1433		if (error)
1434			return error;
1435
1436		goto forget_acls;
1437	}
1438
1439	trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip);
1440
1441	/*
1442	 * Commit the repair transaction and drop the ILOCKs so that we can use
1443	 * the atomic file content exchange helper functions to compute the
1444	 * correct resource reservations.
1445	 *
1446	 * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr
1447	 * modifications, but there's nothing to prevent userspace from reading
1448	 * the attributes until we're ready for the exchange operation.  Reads
1449	 * will return -EIO without shutting down the fs, so we're ok with
1450	 * that.
1451	 */
1452	error = xrep_trans_commit(sc);
1453	if (error)
1454		return error;
1455
1456	xchk_iunlock(sc, XFS_ILOCK_EXCL);
1457
1458	/*
1459	 * Take the IOLOCK on the temporary file so that we can run xattr
1460	 * operations with the same locks held as we would for a normal file.
1461	 * We still hold sc->ip's IOLOCK.
1462	 */
1463	error = xrep_tempfile_iolock_polled(rx->sc);
1464	if (error)
1465		return error;
1466
1467	/*
1468	 * Allocate transaction, lock inodes, and make sure that we've replayed
1469	 * all the stashed parent pointer updates to the temp file.  After this
1470	 * point, we're ready to exchange attr fork mappings.
1471	 */
1472	error = xrep_xattr_finalize_tempfile(rx);
1473	if (error)
1474		return error;
1475
1476	/*
1477	 * Exchange the blocks mapped by the tempfile's attr fork with the file
1478	 * being repaired.  The old attr blocks will then be attached to the
1479	 * tempfile, so reap its attr fork.
1480	 */
1481	error = xrep_xattr_swap(sc, &rx->tx);
1482	if (error)
1483		return error;
1484
1485	error = xrep_xattr_reset_tempfile_fork(sc);
1486	if (error)
1487		return error;
1488
1489	/*
1490	 * Roll to get a transaction without any inodes joined to it.  Then we
1491	 * can drop the tempfile's ILOCK and IOLOCK before doing more work on
1492	 * the scrub target file.
1493	 */
1494	error = xfs_trans_roll(&sc->tp);
1495	if (error)
1496		return error;
1497
1498	xrep_tempfile_iunlock(sc);
1499	xrep_tempfile_iounlock(sc);
1500
1501forget_acls:
1502	/* Invalidate cached ACLs now that we've reloaded all the xattrs. */
1503	xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE);
1504	xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT);
1505	return 0;
1506}
1507
1508/* Tear down all the incore scan stuff we created. */
1509STATIC void
1510xrep_xattr_teardown(
1511	struct xrep_xattr	*rx)
1512{
1513	if (xfs_has_parent(rx->sc->mp))
1514		xfs_dir_hook_del(rx->sc->mp, &rx->dhook);
1515	if (rx->pptr_names)
1516		xfblob_destroy(rx->pptr_names);
1517	if (rx->pptr_recs)
1518		xfarray_destroy(rx->pptr_recs);
1519	xfblob_destroy(rx->xattr_blobs);
1520	xfarray_destroy(rx->xattr_records);
1521	mutex_destroy(&rx->lock);
1522	kfree(rx);
1523}
1524
1525/* Set up the filesystem scan so we can regenerate extended attributes. */
1526STATIC int
1527xrep_xattr_setup_scan(
1528	struct xfs_scrub	*sc,
1529	struct xrep_xattr	**rxp)
1530{
1531	struct xrep_xattr	*rx;
1532	char			*descr;
1533	int			max_len;
1534	int			error;
1535
1536	rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS);
1537	if (!rx)
1538		return -ENOMEM;
1539	rx->sc = sc;
1540	rx->can_flush = true;
1541	rx->xname.name = rx->namebuf;
1542
1543	mutex_init(&rx->lock);
1544
1545	/*
1546	 * Allocate enough memory to handle loading local attr values from the
1547	 * xfblob data while flushing stashed attrs to the temporary file.
1548	 * We only realloc the buffer when salvaging remote attr values.
1549	 */
1550	max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize);
1551	error = xchk_setup_xattr_buf(rx->sc, max_len);
1552	if (error == -ENOMEM)
1553		error = -EDEADLOCK;
1554	if (error)
1555		goto out_rx;
1556
1557	/* Set up some staging for salvaged attribute keys and values */
1558	descr = xchk_xfile_ino_descr(sc, "xattr keys");
1559	error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key),
1560			&rx->xattr_records);
1561	kfree(descr);
1562	if (error)
1563		goto out_rx;
1564
1565	descr = xchk_xfile_ino_descr(sc, "xattr names");
1566	error = xfblob_create(descr, &rx->xattr_blobs);
1567	kfree(descr);
1568	if (error)
1569		goto out_keys;
1570
1571	if (xfs_has_parent(sc->mp)) {
1572		ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
1573
1574		descr = xchk_xfile_ino_descr(sc,
1575				"xattr retained parent pointer entries");
1576		error = xfarray_create(descr, 0,
1577				sizeof(struct xrep_xattr_pptr),
1578				&rx->pptr_recs);
1579		kfree(descr);
1580		if (error)
1581			goto out_values;
1582
1583		descr = xchk_xfile_ino_descr(sc,
1584				"xattr retained parent pointer names");
1585		error = xfblob_create(descr, &rx->pptr_names);
1586		kfree(descr);
1587		if (error)
1588			goto out_pprecs;
1589
1590		xfs_dir_hook_setup(&rx->dhook, xrep_xattr_live_dirent_update);
1591		error = xfs_dir_hook_add(sc->mp, &rx->dhook);
1592		if (error)
1593			goto out_ppnames;
1594	}
1595
1596	*rxp = rx;
1597	return 0;
1598out_ppnames:
1599	xfblob_destroy(rx->pptr_names);
1600out_pprecs:
1601	xfarray_destroy(rx->pptr_recs);
1602out_values:
1603	xfblob_destroy(rx->xattr_blobs);
1604out_keys:
1605	xfarray_destroy(rx->xattr_records);
1606out_rx:
1607	mutex_destroy(&rx->lock);
1608	kfree(rx);
1609	return error;
1610}
1611
1612/*
1613 * Repair the extended attribute metadata.
1614 *
1615 * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer.
1616 * The buffer cache in XFS can't handle aliased multiblock buffers, so this
1617 * might misbehave if the attr fork is crosslinked with other filesystem
1618 * metadata.
1619 */
1620int
1621xrep_xattr(
1622	struct xfs_scrub	*sc)
1623{
1624	struct xrep_xattr	*rx = NULL;
1625	int			error;
1626
1627	if (!xfs_inode_hasattr(sc->ip))
1628		return -ENOENT;
1629
1630	/* The rmapbt is required to reap the old attr fork. */
1631	if (!xfs_has_rmapbt(sc->mp))
1632		return -EOPNOTSUPP;
1633	/* We require atomic file exchange range to rebuild anything. */
1634	if (!xfs_has_exchange_range(sc->mp))
1635		return -EOPNOTSUPP;
1636
1637	error = xrep_xattr_setup_scan(sc, &rx);
1638	if (error)
1639		return error;
1640
1641	ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1642
1643	error = xrep_xattr_salvage_attributes(rx);
1644	if (error)
1645		goto out_scan;
1646
1647	if (rx->live_update_aborted) {
1648		error = -EIO;
1649		goto out_scan;
1650	}
1651
1652	/* Last chance to abort before we start committing fixes. */
1653	if (xchk_should_terminate(sc, &error))
1654		goto out_scan;
1655
1656	error = xrep_xattr_rebuild_tree(rx);
1657	if (error)
1658		goto out_scan;
1659
1660out_scan:
1661	xrep_xattr_teardown(rx);
1662	return error;
1663}
1664