1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9#include <linux/spinlock.h>
10#include <linux/completion.h>
11#include <linux/buffer_head.h>
12#include <linux/kthread.h>
13#include <linux/crc32.h>
14#include <linux/gfs2_ondisk.h>
15#include <linux/delay.h>
16#include <linux/uaccess.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "glock.h"
21#include "glops.h"
22#include "log.h"
23#include "lops.h"
24#include "recovery.h"
25#include "rgrp.h"
26#include "super.h"
27#include "util.h"
28
29struct kmem_cache *gfs2_glock_cachep __read_mostly;
30struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly;
31struct kmem_cache *gfs2_inode_cachep __read_mostly;
32struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
33struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
34struct kmem_cache *gfs2_quotad_cachep __read_mostly;
35struct kmem_cache *gfs2_qadata_cachep __read_mostly;
36struct kmem_cache *gfs2_trans_cachep __read_mostly;
37mempool_t *gfs2_page_pool __read_mostly;
38
39void gfs2_assert_i(struct gfs2_sbd *sdp)
40{
41	fs_emerg(sdp, "fatal assertion failed\n");
42}
43
44/**
45 * check_journal_clean - Make sure a journal is clean for a spectator mount
46 * @sdp: The GFS2 superblock
47 * @jd: The journal descriptor
48 * @verbose: Show more prints in the log
49 *
50 * Returns: 0 if the journal is clean or locked, else an error
51 */
52int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
53			bool verbose)
54{
55	int error;
56	struct gfs2_holder j_gh;
57	struct gfs2_log_header_host head;
58	struct gfs2_inode *ip;
59
60	ip = GFS2_I(jd->jd_inode);
61	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
62				   GL_EXACT | GL_NOCACHE, &j_gh);
63	if (error) {
64		if (verbose)
65			fs_err(sdp, "Error %d locking journal for spectator "
66			       "mount.\n", error);
67		return -EPERM;
68	}
69	error = gfs2_jdesc_check(jd);
70	if (error) {
71		if (verbose)
72			fs_err(sdp, "Error checking journal for spectator "
73			       "mount.\n");
74		goto out_unlock;
75	}
76	error = gfs2_find_jhead(jd, &head, false);
77	if (error) {
78		if (verbose)
79			fs_err(sdp, "Error parsing journal for spectator "
80			       "mount.\n");
81		goto out_unlock;
82	}
83	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
84		error = -EPERM;
85		if (verbose)
86			fs_err(sdp, "jid=%u: Journal is dirty, so the first "
87			       "mounter must not be a spectator.\n",
88			       jd->jd_jid);
89	}
90
91out_unlock:
92	gfs2_glock_dq_uninit(&j_gh);
93	return error;
94}
95
96/**
97 * gfs2_freeze_lock_shared - hold the freeze glock
98 * @sdp: the superblock
99 */
100int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp)
101{
102	int error;
103
104	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
105				   LM_FLAG_NOEXP | GL_EXACT,
106				   &sdp->sd_freeze_gh);
107	if (error)
108		fs_err(sdp, "can't lock the freeze glock: %d\n", error);
109	return error;
110}
111
112void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
113{
114	if (gfs2_holder_initialized(freeze_gh))
115		gfs2_glock_dq_uninit(freeze_gh);
116}
117
118static void signal_our_withdraw(struct gfs2_sbd *sdp)
119{
120	struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
121	struct inode *inode;
122	struct gfs2_inode *ip;
123	struct gfs2_glock *i_gl;
124	u64 no_formal_ino;
125	int ret = 0;
126	int tries;
127
128	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc)
129		return;
130
131	gfs2_ail_drain(sdp); /* frees all transactions */
132	inode = sdp->sd_jdesc->jd_inode;
133	ip = GFS2_I(inode);
134	i_gl = ip->i_gl;
135	no_formal_ino = ip->i_no_formal_ino;
136
137	/* Prevent any glock dq until withdraw recovery is complete */
138	set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
139	/*
140	 * Don't tell dlm we're bailing until we have no more buffers in the
141	 * wind. If journal had an IO error, the log code should just purge
142	 * the outstanding buffers rather than submitting new IO. Making the
143	 * file system read-only will flush the journal, etc.
144	 *
145	 * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
146	 * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
147	 * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
148	 * therefore we need to clear SDF_JOURNAL_LIVE manually.
149	 */
150	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
151	if (!sb_rdonly(sdp->sd_vfs)) {
152		bool locked = mutex_trylock(&sdp->sd_freeze_mutex);
153
154		wake_up(&sdp->sd_logd_waitq);
155		wake_up(&sdp->sd_quota_wait);
156
157		wait_event_timeout(sdp->sd_log_waitq,
158				   gfs2_log_is_empty(sdp),
159				   HZ * 5);
160
161		sdp->sd_vfs->s_flags |= SB_RDONLY;
162
163		if (locked)
164			mutex_unlock(&sdp->sd_freeze_mutex);
165
166		/*
167		 * Dequeue any pending non-system glock holders that can no
168		 * longer be granted because the file system is withdrawn.
169		 */
170		gfs2_gl_dq_holders(sdp);
171	}
172
173	if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
174		if (!ret)
175			ret = -EIO;
176		clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
177		goto skip_recovery;
178	}
179	/*
180	 * Drop the glock for our journal so another node can recover it.
181	 */
182	if (gfs2_holder_initialized(&sdp->sd_journal_gh)) {
183		gfs2_glock_dq_wait(&sdp->sd_journal_gh);
184		gfs2_holder_uninit(&sdp->sd_journal_gh);
185	}
186	sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
187	gfs2_glock_dq(&sdp->sd_jinode_gh);
188	gfs2_thaw_freeze_initiator(sdp->sd_vfs);
189	wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
190
191	/*
192	 * holder_uninit to force glock_put, to force dlm to let go
193	 */
194	gfs2_holder_uninit(&sdp->sd_jinode_gh);
195
196	/*
197	 * Note: We need to be careful here:
198	 * Our iput of jd_inode will evict it. The evict will dequeue its
199	 * glock, but the glock dq will wait for the withdraw unless we have
200	 * exception code in glock_dq.
201	 */
202	iput(inode);
203	sdp->sd_jdesc->jd_inode = NULL;
204	/*
205	 * Wait until the journal inode's glock is freed. This allows try locks
206	 * on other nodes to be successful, otherwise we remain the owner of
207	 * the glock as far as dlm is concerned.
208	 */
209	if (i_gl->gl_ops->go_free) {
210		set_bit(GLF_FREEING, &i_gl->gl_flags);
211		wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
212	}
213
214	/*
215	 * Dequeue the "live" glock, but keep a reference so it's never freed.
216	 */
217	gfs2_glock_hold(live_gl);
218	gfs2_glock_dq_wait(&sdp->sd_live_gh);
219	/*
220	 * We enqueue the "live" glock in EX so that all other nodes
221	 * get a demote request and act on it. We don't really want the
222	 * lock in EX, so we send a "try" lock with 1CB to produce a callback.
223	 */
224	fs_warn(sdp, "Requesting recovery of jid %d.\n",
225		sdp->sd_lockstruct.ls_jid);
226	gfs2_holder_reinit(LM_ST_EXCLUSIVE,
227			   LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | GL_NOPID,
228			   &sdp->sd_live_gh);
229	msleep(GL_GLOCK_MAX_HOLD);
230	/*
231	 * This will likely fail in a cluster, but succeed standalone:
232	 */
233	ret = gfs2_glock_nq(&sdp->sd_live_gh);
234
235	/*
236	 * If we actually got the "live" lock in EX mode, there are no other
237	 * nodes available to replay our journal. So we try to replay it
238	 * ourselves. We hold the "live" glock to prevent other mounters
239	 * during recovery, then just dequeue it and reacquire it in our
240	 * normal SH mode. Just in case the problem that caused us to
241	 * withdraw prevents us from recovering our journal (e.g. io errors
242	 * and such) we still check if the journal is clean before proceeding
243	 * but we may wait forever until another mounter does the recovery.
244	 */
245	if (ret == 0) {
246		fs_warn(sdp, "No other mounters found. Trying to recover our "
247			"own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
248		if (gfs2_recover_journal(sdp->sd_jdesc, 1))
249			fs_warn(sdp, "Unable to recover our journal jid %d.\n",
250				sdp->sd_lockstruct.ls_jid);
251		gfs2_glock_dq_wait(&sdp->sd_live_gh);
252		gfs2_holder_reinit(LM_ST_SHARED,
253				   LM_FLAG_NOEXP | GL_EXACT | GL_NOPID,
254				   &sdp->sd_live_gh);
255		gfs2_glock_nq(&sdp->sd_live_gh);
256	}
257
258	gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
259	clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
260
261	/*
262	 * At this point our journal is evicted, so we need to get a new inode
263	 * for it. Once done, we need to call gfs2_find_jhead which
264	 * calls gfs2_map_journal_extents to map it for us again.
265	 *
266	 * Note that we don't really want it to look up a FREE block. The
267	 * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
268	 * which would otherwise fail because it requires grabbing an rgrp
269	 * glock, which would fail with -EIO because we're withdrawing.
270	 */
271	inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN,
272				  sdp->sd_jdesc->jd_no_addr, no_formal_ino,
273				  GFS2_BLKST_FREE);
274	if (IS_ERR(inode)) {
275		fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
276			sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
277		goto skip_recovery;
278	}
279	sdp->sd_jdesc->jd_inode = inode;
280	d_mark_dontcache(inode);
281
282	/*
283	 * Now wait until recovery is complete.
284	 */
285	for (tries = 0; tries < 10; tries++) {
286		ret = check_journal_clean(sdp, sdp->sd_jdesc, false);
287		if (!ret)
288			break;
289		msleep(HZ);
290		fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
291			sdp->sd_lockstruct.ls_jid);
292	}
293skip_recovery:
294	if (!ret)
295		fs_warn(sdp, "Journal recovery complete for jid %d.\n",
296			sdp->sd_lockstruct.ls_jid);
297	else
298		fs_warn(sdp, "Journal recovery skipped for jid %d until next "
299			"mount.\n", sdp->sd_lockstruct.ls_jid);
300	fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
301	sdp->sd_glock_dqs_held = 0;
302	wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY);
303}
304
305void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
306{
307	struct va_format vaf;
308	va_list args;
309
310	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
311	    test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
312		return;
313
314	va_start(args, fmt);
315	vaf.fmt = fmt;
316	vaf.va = &args;
317	fs_err(sdp, "%pV", &vaf);
318	va_end(args);
319}
320
321int gfs2_withdraw(struct gfs2_sbd *sdp)
322{
323	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
324	const struct lm_lockops *lm = ls->ls_ops;
325
326	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
327		unsigned long old = READ_ONCE(sdp->sd_flags), new;
328
329		do {
330			if (old & BIT(SDF_WITHDRAWN)) {
331				wait_on_bit(&sdp->sd_flags,
332					    SDF_WITHDRAW_IN_PROG,
333					    TASK_UNINTERRUPTIBLE);
334				return -1;
335			}
336			new = old | BIT(SDF_WITHDRAWN) | BIT(SDF_WITHDRAW_IN_PROG);
337		} while (unlikely(!try_cmpxchg(&sdp->sd_flags, &old, new)));
338
339		fs_err(sdp, "about to withdraw this file system\n");
340		BUG_ON(sdp->sd_args.ar_debug);
341
342		signal_our_withdraw(sdp);
343
344		kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
345
346		if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
347			wait_for_completion(&sdp->sd_wdack);
348
349		if (lm->lm_unmount) {
350			fs_err(sdp, "telling LM to unmount\n");
351			lm->lm_unmount(sdp);
352		}
353		set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
354		fs_err(sdp, "File system withdrawn\n");
355		dump_stack();
356		clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
357		smp_mb__after_atomic();
358		wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG);
359	}
360
361	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
362		panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
363
364	return -1;
365}
366
367/*
368 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
369 */
370
371void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
372			    const char *function, char *file, unsigned int line,
373			    bool delayed)
374{
375	if (gfs2_withdrawing_or_withdrawn(sdp))
376		return;
377
378	fs_err(sdp,
379	       "fatal: assertion \"%s\" failed\n"
380	       "   function = %s, file = %s, line = %u\n",
381	       assertion, function, file, line);
382
383	/*
384	 * If errors=panic was specified on mount, it won't help to delay the
385	 * withdraw.
386	 */
387	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
388		delayed = false;
389
390	if (delayed)
391		gfs2_withdraw_delayed(sdp);
392	else
393		gfs2_withdraw(sdp);
394	dump_stack();
395}
396
397/*
398 * gfs2_assert_warn_i - Print a message to the console if @assertion is false
399 */
400
401void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
402			const char *function, char *file, unsigned int line)
403{
404	if (time_before(jiffies,
405			sdp->sd_last_warning +
406			gfs2_tune_get(sdp, gt_complain_secs) * HZ))
407		return;
408
409	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
410		fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
411			assertion, function, file, line);
412
413	if (sdp->sd_args.ar_debug)
414		BUG();
415	else
416		dump_stack();
417
418	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
419		panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
420		      "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
421		      sdp->sd_fsname, assertion,
422		      sdp->sd_fsname, function, file, line);
423
424	sdp->sd_last_warning = jiffies;
425}
426
427/*
428 * gfs2_consist_i - Flag a filesystem consistency error and withdraw
429 */
430
431void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function,
432		    char *file, unsigned int line)
433{
434	gfs2_lm(sdp,
435		"fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
436		function, file, line);
437	gfs2_withdraw(sdp);
438}
439
440/*
441 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
442 */
443
444void gfs2_consist_inode_i(struct gfs2_inode *ip,
445			  const char *function, char *file, unsigned int line)
446{
447	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
448
449	gfs2_lm(sdp,
450		"fatal: filesystem consistency error\n"
451		"  inode = %llu %llu\n"
452		"  function = %s, file = %s, line = %u\n",
453		(unsigned long long)ip->i_no_formal_ino,
454		(unsigned long long)ip->i_no_addr,
455		function, file, line);
456	gfs2_dump_glock(NULL, ip->i_gl, 1);
457	gfs2_withdraw(sdp);
458}
459
460/*
461 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
462 */
463
464void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
465			  const char *function, char *file, unsigned int line)
466{
467	struct gfs2_sbd *sdp = rgd->rd_sbd;
468	char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
469
470	sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
471	gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
472	gfs2_lm(sdp,
473		"fatal: filesystem consistency error\n"
474		"  RG = %llu\n"
475		"  function = %s, file = %s, line = %u\n",
476		(unsigned long long)rgd->rd_addr,
477		function, file, line);
478	gfs2_dump_glock(NULL, rgd->rd_gl, 1);
479	gfs2_withdraw(sdp);
480}
481
482/*
483 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
484 * Returns: -1 if this call withdrew the machine,
485 *          -2 if it was already withdrawn
486 */
487
488int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
489		       const char *type, const char *function, char *file,
490		       unsigned int line)
491{
492	int me;
493
494	gfs2_lm(sdp,
495		"fatal: invalid metadata block\n"
496		"  bh = %llu (%s)\n"
497		"  function = %s, file = %s, line = %u\n",
498		(unsigned long long)bh->b_blocknr, type,
499		function, file, line);
500	me = gfs2_withdraw(sdp);
501	return (me) ? -1 : -2;
502}
503
504/*
505 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
506 * Returns: -1 if this call withdrew the machine,
507 *          -2 if it was already withdrawn
508 */
509
510int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
511			   u16 type, u16 t, const char *function,
512			   char *file, unsigned int line)
513{
514	int me;
515
516	gfs2_lm(sdp,
517		"fatal: invalid metadata block\n"
518		"  bh = %llu (type: exp=%u, found=%u)\n"
519		"  function = %s, file = %s, line = %u\n",
520		(unsigned long long)bh->b_blocknr, type, t,
521		function, file, line);
522	me = gfs2_withdraw(sdp);
523	return (me) ? -1 : -2;
524}
525
526/*
527 * gfs2_io_error_i - Flag an I/O error and withdraw
528 * Returns: -1 if this call withdrew the machine,
529 *          0 if it was already withdrawn
530 */
531
532int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
533		    unsigned int line)
534{
535	gfs2_lm(sdp,
536		"fatal: I/O error\n"
537		"  function = %s, file = %s, line = %u\n",
538		function, file, line);
539	return gfs2_withdraw(sdp);
540}
541
542/*
543 * gfs2_io_error_bh_i - Flag a buffer I/O error
544 * @withdraw: withdraw the filesystem
545 */
546
547void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
548			const char *function, char *file, unsigned int line,
549			bool withdraw)
550{
551	if (gfs2_withdrawing_or_withdrawn(sdp))
552		return;
553
554	fs_err(sdp, "fatal: I/O error\n"
555	       "  block = %llu\n"
556	       "  function = %s, file = %s, line = %u\n",
557	       (unsigned long long)bh->b_blocknr, function, file, line);
558	if (withdraw)
559		gfs2_withdraw(sdp);
560}
561
562