1/*
2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h>
15#include <linux/crc32.h>
16#include <linux/lm_interface.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "bmap.h"
21#include "glock.h"
22#include "glops.h"
23#include "lm.h"
24#include "lops.h"
25#include "meta_io.h"
26#include "recovery.h"
27#include "super.h"
28#include "util.h"
29#include "dir.h"
30
31int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
32			   struct buffer_head **bh)
33{
34	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
35	struct gfs2_glock *gl = ip->i_gl;
36	int new = 0;
37	u64 dblock;
38	u32 extlen;
39	int error;
40
41	error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
42	if (error)
43		return error;
44	if (!dblock) {
45		gfs2_consist_inode(ip);
46		return -EIO;
47	}
48
49	*bh = gfs2_meta_ra(gl, dblock, extlen);
50
51	return error;
52}
53
54int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
55{
56	struct list_head *head = &sdp->sd_revoke_list;
57	struct gfs2_revoke_replay *rr;
58	int found = 0;
59
60	list_for_each_entry(rr, head, rr_list) {
61		if (rr->rr_blkno == blkno) {
62			found = 1;
63			break;
64		}
65	}
66
67	if (found) {
68		rr->rr_where = where;
69		return 0;
70	}
71
72	rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
73	if (!rr)
74		return -ENOMEM;
75
76	rr->rr_blkno = blkno;
77	rr->rr_where = where;
78	list_add(&rr->rr_list, head);
79
80	return 1;
81}
82
83int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
84{
85	struct gfs2_revoke_replay *rr;
86	int wrap, a, b, revoke;
87	int found = 0;
88
89	list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
90		if (rr->rr_blkno == blkno) {
91			found = 1;
92			break;
93		}
94	}
95
96	if (!found)
97		return 0;
98
99	wrap = (rr->rr_where < sdp->sd_replay_tail);
100	a = (sdp->sd_replay_tail < where);
101	b = (where < rr->rr_where);
102	revoke = (wrap) ? (a || b) : (a && b);
103
104	return revoke;
105}
106
107void gfs2_revoke_clean(struct gfs2_sbd *sdp)
108{
109	struct list_head *head = &sdp->sd_revoke_list;
110	struct gfs2_revoke_replay *rr;
111
112	while (!list_empty(head)) {
113		rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
114		list_del(&rr->rr_list);
115		kfree(rr);
116	}
117}
118
119/**
120 * get_log_header - read the log header for a given segment
121 * @jd: the journal
122 * @blk: the block to look at
123 * @lh: the log header to return
124 *
125 * Read the log header for a given segement in a given journal.  Do a few
126 * sanity checks on it.
127 *
128 * Returns: 0 on success,
129 *          1 if the header was invalid or incomplete,
130 *          errno on error
131 */
132
133static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
134			  struct gfs2_log_header_host *head)
135{
136	struct buffer_head *bh;
137	struct gfs2_log_header_host lh;
138	const u32 nothing = 0;
139	u32 hash;
140	int error;
141
142	error = gfs2_replay_read_block(jd, blk, &bh);
143	if (error)
144		return error;
145
146	hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
147					     sizeof(u32));
148	hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
149	hash ^= (u32)~0;
150	gfs2_log_header_in(&lh, bh->b_data);
151	brelse(bh);
152
153	if (lh.lh_header.mh_magic != GFS2_MAGIC ||
154	    lh.lh_header.mh_type != GFS2_METATYPE_LH ||
155	    lh.lh_blkno != blk || lh.lh_hash != hash)
156		return 1;
157
158	*head = lh;
159
160	return 0;
161}
162
163/**
164 * find_good_lh - find a good log header
165 * @jd: the journal
166 * @blk: the segment to start searching from
167 * @lh: the log header to fill in
168 * @forward: if true search forward in the log, else search backward
169 *
170 * Call get_log_header() to get a log header for a segment, but if the
171 * segment is bad, either scan forward or backward until we find a good one.
172 *
173 * Returns: errno
174 */
175
176static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
177			struct gfs2_log_header_host *head)
178{
179	unsigned int orig_blk = *blk;
180	int error;
181
182	for (;;) {
183		error = get_log_header(jd, *blk, head);
184		if (error <= 0)
185			return error;
186
187		if (++*blk == jd->jd_blocks)
188			*blk = 0;
189
190		if (*blk == orig_blk) {
191			gfs2_consist_inode(GFS2_I(jd->jd_inode));
192			return -EIO;
193		}
194	}
195}
196
197/**
198 * jhead_scan - make sure we've found the head of the log
199 * @jd: the journal
200 * @head: this is filled in with the log descriptor of the head
201 *
202 * At this point, seg and lh should be either the head of the log or just
203 * before.  Scan forward until we find the head.
204 *
205 * Returns: errno
206 */
207
208static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
209{
210	unsigned int blk = head->lh_blkno;
211	struct gfs2_log_header_host lh;
212	int error;
213
214	for (;;) {
215		if (++blk == jd->jd_blocks)
216			blk = 0;
217
218		error = get_log_header(jd, blk, &lh);
219		if (error < 0)
220			return error;
221		if (error == 1)
222			continue;
223
224		if (lh.lh_sequence == head->lh_sequence) {
225			gfs2_consist_inode(GFS2_I(jd->jd_inode));
226			return -EIO;
227		}
228		if (lh.lh_sequence < head->lh_sequence)
229			break;
230
231		*head = lh;
232	}
233
234	return 0;
235}
236
237/**
238 * gfs2_find_jhead - find the head of a log
239 * @jd: the journal
240 * @head: the log descriptor for the head of the log is returned here
241 *
242 * Do a binary search of a journal and find the valid log entry with the
243 * highest sequence number.  (i.e. the log head)
244 *
245 * Returns: errno
246 */
247
248int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
249{
250	struct gfs2_log_header_host lh_1, lh_m;
251	u32 blk_1, blk_2, blk_m;
252	int error;
253
254	blk_1 = 0;
255	blk_2 = jd->jd_blocks - 1;
256
257	for (;;) {
258		blk_m = (blk_1 + blk_2) / 2;
259
260		error = find_good_lh(jd, &blk_1, &lh_1);
261		if (error)
262			return error;
263
264		error = find_good_lh(jd, &blk_m, &lh_m);
265		if (error)
266			return error;
267
268		if (blk_1 == blk_m || blk_m == blk_2)
269			break;
270
271		if (lh_1.lh_sequence <= lh_m.lh_sequence)
272			blk_1 = blk_m;
273		else
274			blk_2 = blk_m;
275	}
276
277	error = jhead_scan(jd, &lh_1);
278	if (error)
279		return error;
280
281	*head = lh_1;
282
283	return error;
284}
285
286/**
287 * foreach_descriptor - go through the active part of the log
288 * @jd: the journal
289 * @start: the first log header in the active region
290 * @end: the last log header (don't process the contents of this entry))
291 *
292 * Call a given function once for every log descriptor in the active
293 * portion of the log.
294 *
295 * Returns: errno
296 */
297
298static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
299			      unsigned int end, int pass)
300{
301	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
302	struct buffer_head *bh;
303	struct gfs2_log_descriptor *ld;
304	int error = 0;
305	u32 length;
306	__be64 *ptr;
307	unsigned int offset = sizeof(struct gfs2_log_descriptor);
308	offset += sizeof(__be64) - 1;
309	offset &= ~(sizeof(__be64) - 1);
310
311	while (start != end) {
312		error = gfs2_replay_read_block(jd, start, &bh);
313		if (error)
314			return error;
315		if (gfs2_meta_check(sdp, bh)) {
316			brelse(bh);
317			return -EIO;
318		}
319		ld = (struct gfs2_log_descriptor *)bh->b_data;
320		length = be32_to_cpu(ld->ld_length);
321
322		if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
323			struct gfs2_log_header_host lh;
324			error = get_log_header(jd, start, &lh);
325			if (!error) {
326				gfs2_replay_incr_blk(sdp, &start);
327				brelse(bh);
328				continue;
329			}
330			if (error == 1) {
331				gfs2_consist_inode(GFS2_I(jd->jd_inode));
332				error = -EIO;
333			}
334			brelse(bh);
335			return error;
336		} else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
337			brelse(bh);
338			return -EIO;
339		}
340		ptr = (__be64 *)(bh->b_data + offset);
341		error = lops_scan_elements(jd, start, ld, ptr, pass);
342		if (error) {
343			brelse(bh);
344			return error;
345		}
346
347		while (length--)
348			gfs2_replay_incr_blk(sdp, &start);
349
350		brelse(bh);
351	}
352
353	return 0;
354}
355
356/**
357 * clean_journal - mark a dirty journal as being clean
358 * @sdp: the filesystem
359 * @jd: the journal
360 * @gl: the journal's glock
361 * @head: the head journal to start from
362 *
363 * Returns: errno
364 */
365
366static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
367{
368	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
369	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
370	unsigned int lblock;
371	struct gfs2_log_header *lh;
372	u32 hash;
373	struct buffer_head *bh;
374	int error;
375	struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
376
377	lblock = head->lh_blkno;
378	gfs2_replay_incr_blk(sdp, &lblock);
379	bh_map.b_size = 1 << ip->i_inode.i_blkbits;
380	error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map);
381	if (error)
382		return error;
383	if (!bh_map.b_blocknr) {
384		gfs2_consist_inode(ip);
385		return -EIO;
386	}
387
388	bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
389	lock_buffer(bh);
390	memset(bh->b_data, 0, bh->b_size);
391	set_buffer_uptodate(bh);
392	clear_buffer_dirty(bh);
393	unlock_buffer(bh);
394
395	lh = (struct gfs2_log_header *)bh->b_data;
396	memset(lh, 0, sizeof(struct gfs2_log_header));
397	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
398	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
399	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
400	lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
401	lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
402	lh->lh_blkno = cpu_to_be32(lblock);
403	hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
404	lh->lh_hash = cpu_to_be32(hash);
405
406	set_buffer_dirty(bh);
407	if (sync_dirty_buffer(bh))
408		gfs2_io_error_bh(sdp, bh);
409	brelse(bh);
410
411	return error;
412}
413
414/**
415 * gfs2_recover_journal - recovery a given journal
416 * @jd: the struct gfs2_jdesc describing the journal
417 *
418 * Acquire the journal's lock, check to see if the journal is clean, and
419 * do recovery if necessary.
420 *
421 * Returns: errno
422 */
423
424int gfs2_recover_journal(struct gfs2_jdesc *jd)
425{
426	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
427	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
428	struct gfs2_log_header_host head;
429	struct gfs2_holder j_gh, ji_gh, t_gh;
430	unsigned long t;
431	int ro = 0;
432	unsigned int pass;
433	int error;
434
435	if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
436		fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
437			jd->jd_jid);
438
439		/* Aquire the journal lock so we can do recovery */
440
441		error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
442					  LM_ST_EXCLUSIVE,
443					  LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
444					  &j_gh);
445		switch (error) {
446		case 0:
447			break;
448
449		case GLR_TRYFAILED:
450			fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
451			error = 0;
452
453		default:
454			goto fail;
455		};
456
457		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
458					   LM_FLAG_NOEXP, &ji_gh);
459		if (error)
460			goto fail_gunlock_j;
461	} else {
462		fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
463	}
464
465	fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
466
467	error = gfs2_jdesc_check(jd);
468	if (error)
469		goto fail_gunlock_ji;
470
471	error = gfs2_find_jhead(jd, &head);
472	if (error)
473		goto fail_gunlock_ji;
474
475	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
476		fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
477			jd->jd_jid);
478
479		t = jiffies;
480
481		/* Acquire a shared hold on the transaction lock */
482
483		error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
484					   LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
485					   GL_NOCANCEL | GL_NOCACHE, &t_gh);
486		if (error)
487			goto fail_gunlock_ji;
488
489		if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
490			if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
491				ro = 1;
492		} else {
493			if (sdp->sd_vfs->s_flags & MS_RDONLY)
494				ro = 1;
495		}
496
497		if (ro) {
498			fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
499				jd->jd_jid);
500			error = -EROFS;
501			goto fail_gunlock_tr;
502		}
503
504		fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
505
506		for (pass = 0; pass < 2; pass++) {
507			lops_before_scan(jd, &head, pass);
508			error = foreach_descriptor(jd, head.lh_tail,
509						   head.lh_blkno, pass);
510			lops_after_scan(jd, error, pass);
511			if (error)
512				goto fail_gunlock_tr;
513		}
514
515		error = clean_journal(jd, &head);
516		if (error)
517			goto fail_gunlock_tr;
518
519		gfs2_glock_dq_uninit(&t_gh);
520		t = DIV_ROUND_UP(jiffies - t, HZ);
521		fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
522			jd->jd_jid, t);
523	}
524
525	if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
526		gfs2_glock_dq_uninit(&ji_gh);
527
528	gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
529
530	if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
531		gfs2_glock_dq_uninit(&j_gh);
532
533	fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
534	return 0;
535
536fail_gunlock_tr:
537	gfs2_glock_dq_uninit(&t_gh);
538fail_gunlock_ji:
539	if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
540		gfs2_glock_dq_uninit(&ji_gh);
541fail_gunlock_j:
542		gfs2_glock_dq_uninit(&j_gh);
543	}
544
545	fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
546
547fail:
548	gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
549	return error;
550}
551
552/**
553 * gfs2_check_journals - Recover any dirty journals
554 * @sdp: the filesystem
555 *
556 */
557
558void gfs2_check_journals(struct gfs2_sbd *sdp)
559{
560	struct gfs2_jdesc *jd;
561
562	for (;;) {
563		jd = gfs2_jdesc_find_dirty(sdp);
564		if (!jd)
565			break;
566
567		if (jd != sdp->sd_jdesc)
568			gfs2_recover_journal(jd);
569	}
570}
571