1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2019 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_inode.h"
14#include "xfs_trace.h"
15#include "xfs_health.h"
16#include "xfs_ag.h"
17#include "xfs_btree.h"
18#include "xfs_da_format.h"
19#include "xfs_da_btree.h"
20#include "xfs_quota_defs.h"
21
22/*
23 * Warn about metadata corruption that we detected but haven't fixed, and
24 * make sure we're not sitting on anything that would get in the way of
25 * recovery.
26 */
27void
28xfs_health_unmount(
29	struct xfs_mount	*mp)
30{
31	struct xfs_perag	*pag;
32	xfs_agnumber_t		agno;
33	unsigned int		sick = 0;
34	unsigned int		checked = 0;
35	bool			warn = false;
36
37	if (xfs_is_shutdown(mp))
38		return;
39
40	/* Measure AG corruption levels. */
41	for_each_perag(mp, agno, pag) {
42		xfs_ag_measure_sickness(pag, &sick, &checked);
43		if (sick) {
44			trace_xfs_ag_unfixed_corruption(mp, agno, sick);
45			warn = true;
46		}
47	}
48
49	/* Measure realtime volume corruption levels. */
50	xfs_rt_measure_sickness(mp, &sick, &checked);
51	if (sick) {
52		trace_xfs_rt_unfixed_corruption(mp, sick);
53		warn = true;
54	}
55
56	/*
57	 * Measure fs corruption and keep the sample around for the warning.
58	 * See the note below for why we exempt FS_COUNTERS.
59	 */
60	xfs_fs_measure_sickness(mp, &sick, &checked);
61	if (sick & ~XFS_SICK_FS_COUNTERS) {
62		trace_xfs_fs_unfixed_corruption(mp, sick);
63		warn = true;
64	}
65
66	if (warn) {
67		xfs_warn(mp,
68"Uncorrected metadata errors detected; please run xfs_repair.");
69
70		/*
71		 * We discovered uncorrected metadata problems at some point
72		 * during this filesystem mount and have advised the
73		 * administrator to run repair once the unmount completes.
74		 *
75		 * However, we must be careful -- when FSCOUNTERS are flagged
76		 * unhealthy, the unmount procedure omits writing the clean
77		 * unmount record to the log so that the next mount will run
78		 * recovery and recompute the summary counters.  In other
79		 * words, we leave a dirty log to get the counters fixed.
80		 *
81		 * Unfortunately, xfs_repair cannot recover dirty logs, so if
82		 * there were filesystem problems, FSCOUNTERS was flagged, and
83		 * the administrator takes our advice to run xfs_repair,
84		 * they'll have to zap the log before repairing structures.
85		 * We don't really want to encourage this, so we mark the
86		 * FSCOUNTERS healthy so that a subsequent repair run won't see
87		 * a dirty log.
88		 */
89		if (sick & XFS_SICK_FS_COUNTERS)
90			xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
91	}
92}
93
94/* Mark unhealthy per-fs metadata. */
95void
96xfs_fs_mark_sick(
97	struct xfs_mount	*mp,
98	unsigned int		mask)
99{
100	ASSERT(!(mask & ~XFS_SICK_FS_ALL));
101	trace_xfs_fs_mark_sick(mp, mask);
102
103	spin_lock(&mp->m_sb_lock);
104	mp->m_fs_sick |= mask;
105	spin_unlock(&mp->m_sb_lock);
106}
107
108/* Mark per-fs metadata as having been checked and found unhealthy by fsck. */
109void
110xfs_fs_mark_corrupt(
111	struct xfs_mount	*mp,
112	unsigned int		mask)
113{
114	ASSERT(!(mask & ~XFS_SICK_FS_ALL));
115	trace_xfs_fs_mark_corrupt(mp, mask);
116
117	spin_lock(&mp->m_sb_lock);
118	mp->m_fs_sick |= mask;
119	mp->m_fs_checked |= mask;
120	spin_unlock(&mp->m_sb_lock);
121}
122
123/* Mark a per-fs metadata healed. */
124void
125xfs_fs_mark_healthy(
126	struct xfs_mount	*mp,
127	unsigned int		mask)
128{
129	ASSERT(!(mask & ~XFS_SICK_FS_ALL));
130	trace_xfs_fs_mark_healthy(mp, mask);
131
132	spin_lock(&mp->m_sb_lock);
133	mp->m_fs_sick &= ~mask;
134	if (!(mp->m_fs_sick & XFS_SICK_FS_PRIMARY))
135		mp->m_fs_sick &= ~XFS_SICK_FS_SECONDARY;
136	mp->m_fs_checked |= mask;
137	spin_unlock(&mp->m_sb_lock);
138}
139
140/* Sample which per-fs metadata are unhealthy. */
141void
142xfs_fs_measure_sickness(
143	struct xfs_mount	*mp,
144	unsigned int		*sick,
145	unsigned int		*checked)
146{
147	spin_lock(&mp->m_sb_lock);
148	*sick = mp->m_fs_sick;
149	*checked = mp->m_fs_checked;
150	spin_unlock(&mp->m_sb_lock);
151}
152
153/* Mark unhealthy realtime metadata. */
154void
155xfs_rt_mark_sick(
156	struct xfs_mount	*mp,
157	unsigned int		mask)
158{
159	ASSERT(!(mask & ~XFS_SICK_RT_ALL));
160	trace_xfs_rt_mark_sick(mp, mask);
161
162	spin_lock(&mp->m_sb_lock);
163	mp->m_rt_sick |= mask;
164	spin_unlock(&mp->m_sb_lock);
165}
166
167/* Mark realtime metadata as having been checked and found unhealthy by fsck. */
168void
169xfs_rt_mark_corrupt(
170	struct xfs_mount	*mp,
171	unsigned int		mask)
172{
173	ASSERT(!(mask & ~XFS_SICK_RT_ALL));
174	trace_xfs_rt_mark_corrupt(mp, mask);
175
176	spin_lock(&mp->m_sb_lock);
177	mp->m_rt_sick |= mask;
178	mp->m_rt_checked |= mask;
179	spin_unlock(&mp->m_sb_lock);
180}
181
182/* Mark a realtime metadata healed. */
183void
184xfs_rt_mark_healthy(
185	struct xfs_mount	*mp,
186	unsigned int		mask)
187{
188	ASSERT(!(mask & ~XFS_SICK_RT_ALL));
189	trace_xfs_rt_mark_healthy(mp, mask);
190
191	spin_lock(&mp->m_sb_lock);
192	mp->m_rt_sick &= ~mask;
193	if (!(mp->m_rt_sick & XFS_SICK_RT_PRIMARY))
194		mp->m_rt_sick &= ~XFS_SICK_RT_SECONDARY;
195	mp->m_rt_checked |= mask;
196	spin_unlock(&mp->m_sb_lock);
197}
198
199/* Sample which realtime metadata are unhealthy. */
200void
201xfs_rt_measure_sickness(
202	struct xfs_mount	*mp,
203	unsigned int		*sick,
204	unsigned int		*checked)
205{
206	spin_lock(&mp->m_sb_lock);
207	*sick = mp->m_rt_sick;
208	*checked = mp->m_rt_checked;
209	spin_unlock(&mp->m_sb_lock);
210}
211
212/* Mark unhealthy per-ag metadata given a raw AG number. */
213void
214xfs_agno_mark_sick(
215	struct xfs_mount	*mp,
216	xfs_agnumber_t		agno,
217	unsigned int		mask)
218{
219	struct xfs_perag	*pag = xfs_perag_get(mp, agno);
220
221	/* per-ag structure not set up yet? */
222	if (!pag)
223		return;
224
225	xfs_ag_mark_sick(pag, mask);
226	xfs_perag_put(pag);
227}
228
229/* Mark unhealthy per-ag metadata. */
230void
231xfs_ag_mark_sick(
232	struct xfs_perag	*pag,
233	unsigned int		mask)
234{
235	ASSERT(!(mask & ~XFS_SICK_AG_ALL));
236	trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask);
237
238	spin_lock(&pag->pag_state_lock);
239	pag->pag_sick |= mask;
240	spin_unlock(&pag->pag_state_lock);
241}
242
243/* Mark per-ag metadata as having been checked and found unhealthy by fsck. */
244void
245xfs_ag_mark_corrupt(
246	struct xfs_perag	*pag,
247	unsigned int		mask)
248{
249	ASSERT(!(mask & ~XFS_SICK_AG_ALL));
250	trace_xfs_ag_mark_corrupt(pag->pag_mount, pag->pag_agno, mask);
251
252	spin_lock(&pag->pag_state_lock);
253	pag->pag_sick |= mask;
254	pag->pag_checked |= mask;
255	spin_unlock(&pag->pag_state_lock);
256}
257
258/* Mark per-ag metadata ok. */
259void
260xfs_ag_mark_healthy(
261	struct xfs_perag	*pag,
262	unsigned int		mask)
263{
264	ASSERT(!(mask & ~XFS_SICK_AG_ALL));
265	trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask);
266
267	spin_lock(&pag->pag_state_lock);
268	pag->pag_sick &= ~mask;
269	if (!(pag->pag_sick & XFS_SICK_AG_PRIMARY))
270		pag->pag_sick &= ~XFS_SICK_AG_SECONDARY;
271	pag->pag_checked |= mask;
272	spin_unlock(&pag->pag_state_lock);
273}
274
275/* Sample which per-ag metadata are unhealthy. */
276void
277xfs_ag_measure_sickness(
278	struct xfs_perag	*pag,
279	unsigned int		*sick,
280	unsigned int		*checked)
281{
282	spin_lock(&pag->pag_state_lock);
283	*sick = pag->pag_sick;
284	*checked = pag->pag_checked;
285	spin_unlock(&pag->pag_state_lock);
286}
287
288/* Mark the unhealthy parts of an inode. */
289void
290xfs_inode_mark_sick(
291	struct xfs_inode	*ip,
292	unsigned int		mask)
293{
294	ASSERT(!(mask & ~XFS_SICK_INO_ALL));
295	trace_xfs_inode_mark_sick(ip, mask);
296
297	spin_lock(&ip->i_flags_lock);
298	ip->i_sick |= mask;
299	spin_unlock(&ip->i_flags_lock);
300
301	/*
302	 * Keep this inode around so we don't lose the sickness report.  Scrub
303	 * grabs inodes with DONTCACHE assuming that most inode are ok, which
304	 * is not the case here.
305	 */
306	spin_lock(&VFS_I(ip)->i_lock);
307	VFS_I(ip)->i_state &= ~I_DONTCACHE;
308	spin_unlock(&VFS_I(ip)->i_lock);
309}
310
311/* Mark inode metadata as having been checked and found unhealthy by fsck. */
312void
313xfs_inode_mark_corrupt(
314	struct xfs_inode	*ip,
315	unsigned int		mask)
316{
317	ASSERT(!(mask & ~XFS_SICK_INO_ALL));
318	trace_xfs_inode_mark_corrupt(ip, mask);
319
320	spin_lock(&ip->i_flags_lock);
321	ip->i_sick |= mask;
322	ip->i_checked |= mask;
323	spin_unlock(&ip->i_flags_lock);
324
325	/*
326	 * Keep this inode around so we don't lose the sickness report.  Scrub
327	 * grabs inodes with DONTCACHE assuming that most inode are ok, which
328	 * is not the case here.
329	 */
330	spin_lock(&VFS_I(ip)->i_lock);
331	VFS_I(ip)->i_state &= ~I_DONTCACHE;
332	spin_unlock(&VFS_I(ip)->i_lock);
333}
334
335/* Mark parts of an inode healed. */
336void
337xfs_inode_mark_healthy(
338	struct xfs_inode	*ip,
339	unsigned int		mask)
340{
341	ASSERT(!(mask & ~XFS_SICK_INO_ALL));
342	trace_xfs_inode_mark_healthy(ip, mask);
343
344	spin_lock(&ip->i_flags_lock);
345	ip->i_sick &= ~mask;
346	if (!(ip->i_sick & XFS_SICK_INO_PRIMARY))
347		ip->i_sick &= ~XFS_SICK_INO_SECONDARY;
348	ip->i_checked |= mask;
349	spin_unlock(&ip->i_flags_lock);
350}
351
352/* Sample which parts of an inode are unhealthy. */
353void
354xfs_inode_measure_sickness(
355	struct xfs_inode	*ip,
356	unsigned int		*sick,
357	unsigned int		*checked)
358{
359	spin_lock(&ip->i_flags_lock);
360	*sick = ip->i_sick;
361	*checked = ip->i_checked;
362	spin_unlock(&ip->i_flags_lock);
363}
364
365/* Mappings between internal sick masks and ioctl sick masks. */
366
367struct ioctl_sick_map {
368	unsigned int		sick_mask;
369	unsigned int		ioctl_mask;
370};
371
372static const struct ioctl_sick_map fs_map[] = {
373	{ XFS_SICK_FS_COUNTERS,	XFS_FSOP_GEOM_SICK_COUNTERS},
374	{ XFS_SICK_FS_UQUOTA,	XFS_FSOP_GEOM_SICK_UQUOTA },
375	{ XFS_SICK_FS_GQUOTA,	XFS_FSOP_GEOM_SICK_GQUOTA },
376	{ XFS_SICK_FS_PQUOTA,	XFS_FSOP_GEOM_SICK_PQUOTA },
377	{ XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK },
378	{ XFS_SICK_FS_NLINKS,	XFS_FSOP_GEOM_SICK_NLINKS },
379	{ 0, 0 },
380};
381
382static const struct ioctl_sick_map rt_map[] = {
383	{ XFS_SICK_RT_BITMAP,	XFS_FSOP_GEOM_SICK_RT_BITMAP },
384	{ XFS_SICK_RT_SUMMARY,	XFS_FSOP_GEOM_SICK_RT_SUMMARY },
385	{ 0, 0 },
386};
387
388static inline void
389xfgeo_health_tick(
390	struct xfs_fsop_geom		*geo,
391	unsigned int			sick,
392	unsigned int			checked,
393	const struct ioctl_sick_map	*m)
394{
395	if (checked & m->sick_mask)
396		geo->checked |= m->ioctl_mask;
397	if (sick & m->sick_mask)
398		geo->sick |= m->ioctl_mask;
399}
400
401/* Fill out fs geometry health info. */
402void
403xfs_fsop_geom_health(
404	struct xfs_mount		*mp,
405	struct xfs_fsop_geom		*geo)
406{
407	const struct ioctl_sick_map	*m;
408	unsigned int			sick;
409	unsigned int			checked;
410
411	geo->sick = 0;
412	geo->checked = 0;
413
414	xfs_fs_measure_sickness(mp, &sick, &checked);
415	for (m = fs_map; m->sick_mask; m++)
416		xfgeo_health_tick(geo, sick, checked, m);
417
418	xfs_rt_measure_sickness(mp, &sick, &checked);
419	for (m = rt_map; m->sick_mask; m++)
420		xfgeo_health_tick(geo, sick, checked, m);
421}
422
423static const struct ioctl_sick_map ag_map[] = {
424	{ XFS_SICK_AG_SB,	XFS_AG_GEOM_SICK_SB },
425	{ XFS_SICK_AG_AGF,	XFS_AG_GEOM_SICK_AGF },
426	{ XFS_SICK_AG_AGFL,	XFS_AG_GEOM_SICK_AGFL },
427	{ XFS_SICK_AG_AGI,	XFS_AG_GEOM_SICK_AGI },
428	{ XFS_SICK_AG_BNOBT,	XFS_AG_GEOM_SICK_BNOBT },
429	{ XFS_SICK_AG_CNTBT,	XFS_AG_GEOM_SICK_CNTBT },
430	{ XFS_SICK_AG_INOBT,	XFS_AG_GEOM_SICK_INOBT },
431	{ XFS_SICK_AG_FINOBT,	XFS_AG_GEOM_SICK_FINOBT },
432	{ XFS_SICK_AG_RMAPBT,	XFS_AG_GEOM_SICK_RMAPBT },
433	{ XFS_SICK_AG_REFCNTBT,	XFS_AG_GEOM_SICK_REFCNTBT },
434	{ XFS_SICK_AG_INODES,	XFS_AG_GEOM_SICK_INODES },
435	{ 0, 0 },
436};
437
438/* Fill out ag geometry health info. */
439void
440xfs_ag_geom_health(
441	struct xfs_perag		*pag,
442	struct xfs_ag_geometry		*ageo)
443{
444	const struct ioctl_sick_map	*m;
445	unsigned int			sick;
446	unsigned int			checked;
447
448	ageo->ag_sick = 0;
449	ageo->ag_checked = 0;
450
451	xfs_ag_measure_sickness(pag, &sick, &checked);
452	for (m = ag_map; m->sick_mask; m++) {
453		if (checked & m->sick_mask)
454			ageo->ag_checked |= m->ioctl_mask;
455		if (sick & m->sick_mask)
456			ageo->ag_sick |= m->ioctl_mask;
457	}
458}
459
460static const struct ioctl_sick_map ino_map[] = {
461	{ XFS_SICK_INO_CORE,	XFS_BS_SICK_INODE },
462	{ XFS_SICK_INO_BMBTD,	XFS_BS_SICK_BMBTD },
463	{ XFS_SICK_INO_BMBTA,	XFS_BS_SICK_BMBTA },
464	{ XFS_SICK_INO_BMBTC,	XFS_BS_SICK_BMBTC },
465	{ XFS_SICK_INO_DIR,	XFS_BS_SICK_DIR },
466	{ XFS_SICK_INO_XATTR,	XFS_BS_SICK_XATTR },
467	{ XFS_SICK_INO_SYMLINK,	XFS_BS_SICK_SYMLINK },
468	{ XFS_SICK_INO_PARENT,	XFS_BS_SICK_PARENT },
469	{ XFS_SICK_INO_BMBTD_ZAPPED,	XFS_BS_SICK_BMBTD },
470	{ XFS_SICK_INO_BMBTA_ZAPPED,	XFS_BS_SICK_BMBTA },
471	{ XFS_SICK_INO_DIR_ZAPPED,	XFS_BS_SICK_DIR },
472	{ XFS_SICK_INO_SYMLINK_ZAPPED,	XFS_BS_SICK_SYMLINK },
473	{ 0, 0 },
474};
475
476/* Fill out bulkstat health info. */
477void
478xfs_bulkstat_health(
479	struct xfs_inode		*ip,
480	struct xfs_bulkstat		*bs)
481{
482	const struct ioctl_sick_map	*m;
483	unsigned int			sick;
484	unsigned int			checked;
485
486	bs->bs_sick = 0;
487	bs->bs_checked = 0;
488
489	xfs_inode_measure_sickness(ip, &sick, &checked);
490	for (m = ino_map; m->sick_mask; m++) {
491		if (checked & m->sick_mask)
492			bs->bs_checked |= m->ioctl_mask;
493		if (sick & m->sick_mask)
494			bs->bs_sick |= m->ioctl_mask;
495	}
496}
497
498/* Mark a block mapping sick. */
499void
500xfs_bmap_mark_sick(
501	struct xfs_inode	*ip,
502	int			whichfork)
503{
504	unsigned int		mask;
505
506	switch (whichfork) {
507	case XFS_DATA_FORK:
508		mask = XFS_SICK_INO_BMBTD;
509		break;
510	case XFS_ATTR_FORK:
511		mask = XFS_SICK_INO_BMBTA;
512		break;
513	case XFS_COW_FORK:
514		mask = XFS_SICK_INO_BMBTC;
515		break;
516	default:
517		ASSERT(0);
518		return;
519	}
520
521	xfs_inode_mark_sick(ip, mask);
522}
523
524/* Record observations of btree corruption with the health tracking system. */
525void
526xfs_btree_mark_sick(
527	struct xfs_btree_cur		*cur)
528{
529	switch (cur->bc_ops->type) {
530	case XFS_BTREE_TYPE_MEM:
531		/* no health state tracking for ephemeral btrees */
532		return;
533	case XFS_BTREE_TYPE_AG:
534		ASSERT(cur->bc_ops->sick_mask);
535		xfs_ag_mark_sick(cur->bc_ag.pag, cur->bc_ops->sick_mask);
536		return;
537	case XFS_BTREE_TYPE_INODE:
538		if (xfs_btree_is_bmap(cur->bc_ops)) {
539			xfs_bmap_mark_sick(cur->bc_ino.ip,
540					   cur->bc_ino.whichfork);
541			return;
542		}
543		fallthrough;
544	default:
545		ASSERT(0);
546		return;
547	}
548}
549
550/*
551 * Record observations of dir/attr btree corruption with the health tracking
552 * system.
553 */
554void
555xfs_dirattr_mark_sick(
556	struct xfs_inode	*ip,
557	int			whichfork)
558{
559	unsigned int		mask;
560
561	switch (whichfork) {
562	case XFS_DATA_FORK:
563		mask = XFS_SICK_INO_DIR;
564		break;
565	case XFS_ATTR_FORK:
566		mask = XFS_SICK_INO_XATTR;
567		break;
568	default:
569		ASSERT(0);
570		return;
571	}
572
573	xfs_inode_mark_sick(ip, mask);
574}
575
576/*
577 * Record observations of dir/attr btree corruption with the health tracking
578 * system.
579 */
580void
581xfs_da_mark_sick(
582	struct xfs_da_args	*args)
583{
584	xfs_dirattr_mark_sick(args->dp, args->whichfork);
585}
586