super.c revision a28dc123
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4 * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9#include <linux/bio.h>
10#include <linux/sched/signal.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/statfs.h>
16#include <linux/seq_file.h>
17#include <linux/mount.h>
18#include <linux/kthread.h>
19#include <linux/delay.h>
20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h>
22#include <linux/time.h>
23#include <linux/wait.h>
24#include <linux/writeback.h>
25#include <linux/backing-dev.h>
26#include <linux/kernel.h>
27
28#include "gfs2.h"
29#include "incore.h"
30#include "bmap.h"
31#include "dir.h"
32#include "glock.h"
33#include "glops.h"
34#include "inode.h"
35#include "log.h"
36#include "meta_io.h"
37#include "quota.h"
38#include "recovery.h"
39#include "rgrp.h"
40#include "super.h"
41#include "trans.h"
42#include "util.h"
43#include "sys.h"
44#include "xattr.h"
45#include "lops.h"
46
47enum dinode_demise {
48	SHOULD_DELETE_DINODE,
49	SHOULD_NOT_DELETE_DINODE,
50	SHOULD_DEFER_EVICTION,
51};
52
53/**
54 * gfs2_jindex_free - Clear all the journal index information
55 * @sdp: The GFS2 superblock
56 *
57 */
58
59void gfs2_jindex_free(struct gfs2_sbd *sdp)
60{
61	struct list_head list;
62	struct gfs2_jdesc *jd;
63
64	spin_lock(&sdp->sd_jindex_spin);
65	list_add(&list, &sdp->sd_jindex_list);
66	list_del_init(&sdp->sd_jindex_list);
67	sdp->sd_journals = 0;
68	spin_unlock(&sdp->sd_jindex_spin);
69
70	sdp->sd_jdesc = NULL;
71	while (!list_empty(&list)) {
72		jd = list_first_entry(&list, struct gfs2_jdesc, jd_list);
73		gfs2_free_journal_extents(jd);
74		list_del(&jd->jd_list);
75		iput(jd->jd_inode);
76		jd->jd_inode = NULL;
77		kfree(jd);
78	}
79}
80
81static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
82{
83	struct gfs2_jdesc *jd;
84
85	list_for_each_entry(jd, head, jd_list) {
86		if (jd->jd_jid == jid)
87			return jd;
88	}
89	return NULL;
90}
91
92struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
93{
94	struct gfs2_jdesc *jd;
95
96	spin_lock(&sdp->sd_jindex_spin);
97	jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
98	spin_unlock(&sdp->sd_jindex_spin);
99
100	return jd;
101}
102
103int gfs2_jdesc_check(struct gfs2_jdesc *jd)
104{
105	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
106	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
107	u64 size = i_size_read(jd->jd_inode);
108
109	if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, BIT(30)))
110		return -EIO;
111
112	jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift;
113
114	if (gfs2_write_alloc_required(ip, 0, size)) {
115		gfs2_consist_inode(ip);
116		return -EIO;
117	}
118
119	return 0;
120}
121
122/**
123 * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
124 * @sdp: the filesystem
125 *
126 * Returns: errno
127 */
128
129int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
130{
131	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
132	struct gfs2_glock *j_gl = ip->i_gl;
133	struct gfs2_log_header_host head;
134	int error;
135
136	j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
137	if (gfs2_withdrawn(sdp))
138		return -EIO;
139
140	error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
141	if (error || gfs2_withdrawn(sdp))
142		return error;
143
144	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
145		gfs2_consist(sdp);
146		return -EIO;
147	}
148
149	/*  Initialize some head of the log stuff  */
150	sdp->sd_log_sequence = head.lh_sequence + 1;
151	gfs2_log_pointers_init(sdp, head.lh_blkno);
152
153	error = gfs2_quota_init(sdp);
154	if (!error && !gfs2_withdrawn(sdp))
155		set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
156	return error;
157}
158
159void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
160{
161	const struct gfs2_statfs_change *str = buf;
162
163	sc->sc_total = be64_to_cpu(str->sc_total);
164	sc->sc_free = be64_to_cpu(str->sc_free);
165	sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
166}
167
168void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
169{
170	struct gfs2_statfs_change *str = buf;
171
172	str->sc_total = cpu_to_be64(sc->sc_total);
173	str->sc_free = cpu_to_be64(sc->sc_free);
174	str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
175}
176
177int gfs2_statfs_init(struct gfs2_sbd *sdp)
178{
179	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
180	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
181	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
182	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
183	struct buffer_head *m_bh, *l_bh;
184	struct gfs2_holder gh;
185	int error;
186
187	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
188				   &gh);
189	if (error)
190		return error;
191
192	error = gfs2_meta_inode_buffer(m_ip, &m_bh);
193	if (error)
194		goto out;
195
196	if (sdp->sd_args.ar_spectator) {
197		spin_lock(&sdp->sd_statfs_spin);
198		gfs2_statfs_change_in(m_sc, m_bh->b_data +
199				      sizeof(struct gfs2_dinode));
200		spin_unlock(&sdp->sd_statfs_spin);
201	} else {
202		error = gfs2_meta_inode_buffer(l_ip, &l_bh);
203		if (error)
204			goto out_m_bh;
205
206		spin_lock(&sdp->sd_statfs_spin);
207		gfs2_statfs_change_in(m_sc, m_bh->b_data +
208				      sizeof(struct gfs2_dinode));
209		gfs2_statfs_change_in(l_sc, l_bh->b_data +
210				      sizeof(struct gfs2_dinode));
211		spin_unlock(&sdp->sd_statfs_spin);
212
213		brelse(l_bh);
214	}
215
216out_m_bh:
217	brelse(m_bh);
218out:
219	gfs2_glock_dq_uninit(&gh);
220	return 0;
221}
222
223void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
224			s64 dinodes)
225{
226	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
227	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
228	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
229	struct buffer_head *l_bh;
230	s64 x, y;
231	int need_sync = 0;
232	int error;
233
234	error = gfs2_meta_inode_buffer(l_ip, &l_bh);
235	if (error)
236		return;
237
238	gfs2_trans_add_meta(l_ip->i_gl, l_bh);
239
240	spin_lock(&sdp->sd_statfs_spin);
241	l_sc->sc_total += total;
242	l_sc->sc_free += free;
243	l_sc->sc_dinodes += dinodes;
244	gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
245	if (sdp->sd_args.ar_statfs_percent) {
246		x = 100 * l_sc->sc_free;
247		y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent;
248		if (x >= y || x <= -y)
249			need_sync = 1;
250	}
251	spin_unlock(&sdp->sd_statfs_spin);
252
253	brelse(l_bh);
254	if (need_sync)
255		gfs2_wake_up_statfs(sdp);
256}
257
258void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
259		   struct buffer_head *l_bh)
260{
261	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
262	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
263	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
264	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
265
266	gfs2_trans_add_meta(l_ip->i_gl, l_bh);
267	gfs2_trans_add_meta(m_ip->i_gl, m_bh);
268
269	spin_lock(&sdp->sd_statfs_spin);
270	m_sc->sc_total += l_sc->sc_total;
271	m_sc->sc_free += l_sc->sc_free;
272	m_sc->sc_dinodes += l_sc->sc_dinodes;
273	memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
274	memset(l_bh->b_data + sizeof(struct gfs2_dinode),
275	       0, sizeof(struct gfs2_statfs_change));
276	gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
277	spin_unlock(&sdp->sd_statfs_spin);
278}
279
280int gfs2_statfs_sync(struct super_block *sb, int type)
281{
282	struct gfs2_sbd *sdp = sb->s_fs_info;
283	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
284	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
285	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
286	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
287	struct gfs2_holder gh;
288	struct buffer_head *m_bh, *l_bh;
289	int error;
290
291	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
292				   &gh);
293	if (error)
294		goto out;
295
296	error = gfs2_meta_inode_buffer(m_ip, &m_bh);
297	if (error)
298		goto out_unlock;
299
300	spin_lock(&sdp->sd_statfs_spin);
301	gfs2_statfs_change_in(m_sc, m_bh->b_data +
302			      sizeof(struct gfs2_dinode));
303	if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
304		spin_unlock(&sdp->sd_statfs_spin);
305		goto out_bh;
306	}
307	spin_unlock(&sdp->sd_statfs_spin);
308
309	error = gfs2_meta_inode_buffer(l_ip, &l_bh);
310	if (error)
311		goto out_bh;
312
313	error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
314	if (error)
315		goto out_bh2;
316
317	update_statfs(sdp, m_bh, l_bh);
318	sdp->sd_statfs_force_sync = 0;
319
320	gfs2_trans_end(sdp);
321
322out_bh2:
323	brelse(l_bh);
324out_bh:
325	brelse(m_bh);
326out_unlock:
327	gfs2_glock_dq_uninit(&gh);
328out:
329	return error;
330}
331
332struct lfcc {
333	struct list_head list;
334	struct gfs2_holder gh;
335};
336
337/**
338 * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
339 *                            journals are clean
340 * @sdp: the file system
341 *
342 * Returns: errno
343 */
344
345static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
346{
347	struct gfs2_inode *ip;
348	struct gfs2_jdesc *jd;
349	struct lfcc *lfcc;
350	LIST_HEAD(list);
351	struct gfs2_log_header_host lh;
352	int error;
353
354	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
355		lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
356		if (!lfcc) {
357			error = -ENOMEM;
358			goto out;
359		}
360		ip = GFS2_I(jd->jd_inode);
361		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh);
362		if (error) {
363			kfree(lfcc);
364			goto out;
365		}
366		list_add(&lfcc->list, &list);
367	}
368
369	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
370				   LM_FLAG_NOEXP, &sdp->sd_freeze_gh);
371	if (error)
372		goto out;
373
374	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
375		error = gfs2_jdesc_check(jd);
376		if (error)
377			break;
378		error = gfs2_find_jhead(jd, &lh, false);
379		if (error)
380			break;
381		if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
382			error = -EBUSY;
383			break;
384		}
385	}
386
387	if (error)
388		gfs2_freeze_unlock(&sdp->sd_freeze_gh);
389
390out:
391	while (!list_empty(&list)) {
392		lfcc = list_first_entry(&list, struct lfcc, list);
393		list_del(&lfcc->list);
394		gfs2_glock_dq_uninit(&lfcc->gh);
395		kfree(lfcc);
396	}
397	return error;
398}
399
400void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
401{
402	struct gfs2_dinode *str = buf;
403
404	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
405	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
406	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
407	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
408	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
409	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
410	str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
411	str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
412	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
413	str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
414	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
415	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
416	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
417	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
418
419	str->di_goal_meta = cpu_to_be64(ip->i_goal);
420	str->di_goal_data = cpu_to_be64(ip->i_goal);
421	str->di_generation = cpu_to_be64(ip->i_generation);
422
423	str->di_flags = cpu_to_be32(ip->i_diskflags);
424	str->di_height = cpu_to_be16(ip->i_height);
425	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
426					     !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
427					     GFS2_FORMAT_DE : 0);
428	str->di_depth = cpu_to_be16(ip->i_depth);
429	str->di_entries = cpu_to_be32(ip->i_entries);
430
431	str->di_eattr = cpu_to_be64(ip->i_eattr);
432	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
433	str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
434	str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
435}
436
437/**
438 * gfs2_write_inode - Make sure the inode is stable on the disk
439 * @inode: The inode
440 * @wbc: The writeback control structure
441 *
442 * Returns: errno
443 */
444
445static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
446{
447	struct gfs2_inode *ip = GFS2_I(inode);
448	struct gfs2_sbd *sdp = GFS2_SB(inode);
449	struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
450	struct backing_dev_info *bdi = inode_to_bdi(metamapping->host);
451	int ret = 0;
452	bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip));
453
454	if (flush_all)
455		gfs2_log_flush(GFS2_SB(inode), ip->i_gl,
456			       GFS2_LOG_HEAD_FLUSH_NORMAL |
457			       GFS2_LFC_WRITE_INODE);
458	if (bdi->wb.dirty_exceeded)
459		gfs2_ail1_flush(sdp, wbc);
460	else
461		filemap_fdatawrite(metamapping);
462	if (flush_all)
463		ret = filemap_fdatawait(metamapping);
464	if (ret)
465		mark_inode_dirty_sync(inode);
466	else {
467		spin_lock(&inode->i_lock);
468		if (!(inode->i_flags & I_DIRTY))
469			gfs2_ordered_del_inode(ip);
470		spin_unlock(&inode->i_lock);
471	}
472	return ret;
473}
474
475/**
476 * gfs2_dirty_inode - check for atime updates
477 * @inode: The inode in question
478 * @flags: The type of dirty
479 *
480 * Unfortunately it can be called under any combination of inode
481 * glock and transaction lock, so we have to check carefully.
482 *
483 * At the moment this deals only with atime - it should be possible
484 * to expand that role in future, once a review of the locking has
485 * been carried out.
486 */
487
488static void gfs2_dirty_inode(struct inode *inode, int flags)
489{
490	struct gfs2_inode *ip = GFS2_I(inode);
491	struct gfs2_sbd *sdp = GFS2_SB(inode);
492	struct buffer_head *bh;
493	struct gfs2_holder gh;
494	int need_unlock = 0;
495	int need_endtrans = 0;
496	int ret;
497
498	if (unlikely(gfs2_withdrawn(sdp)))
499		return;
500	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
501		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
502		if (ret) {
503			fs_err(sdp, "dirty_inode: glock %d\n", ret);
504			gfs2_dump_glock(NULL, ip->i_gl, true);
505			return;
506		}
507		need_unlock = 1;
508	} else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
509		return;
510
511	if (current->journal_info == NULL) {
512		ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
513		if (ret) {
514			fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n", ret);
515			goto out;
516		}
517		need_endtrans = 1;
518	}
519
520	ret = gfs2_meta_inode_buffer(ip, &bh);
521	if (ret == 0) {
522		gfs2_trans_add_meta(ip->i_gl, bh);
523		gfs2_dinode_out(ip, bh->b_data);
524		brelse(bh);
525	}
526
527	if (need_endtrans)
528		gfs2_trans_end(sdp);
529out:
530	if (need_unlock)
531		gfs2_glock_dq_uninit(&gh);
532}
533
534/**
535 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
536 * @sdp: the filesystem
537 *
538 * Returns: errno
539 */
540
541void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
542{
543	int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
544
545	gfs2_flush_delete_work(sdp);
546	if (!log_write_allowed && current == sdp->sd_quotad_process)
547		fs_warn(sdp, "The quotad daemon is withdrawing.\n");
548	else if (sdp->sd_quotad_process)
549		kthread_stop(sdp->sd_quotad_process);
550	sdp->sd_quotad_process = NULL;
551
552	if (!log_write_allowed && current == sdp->sd_logd_process)
553		fs_warn(sdp, "The logd daemon is withdrawing.\n");
554	else if (sdp->sd_logd_process)
555		kthread_stop(sdp->sd_logd_process);
556	sdp->sd_logd_process = NULL;
557
558	if (log_write_allowed) {
559		gfs2_quota_sync(sdp->sd_vfs, 0);
560		gfs2_statfs_sync(sdp->sd_vfs, 0);
561
562		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
563			       GFS2_LFC_MAKE_FS_RO);
564		wait_event_timeout(sdp->sd_log_waitq,
565				   gfs2_log_is_empty(sdp),
566				   HZ * 5);
567		gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
568	} else {
569		wait_event_timeout(sdp->sd_log_waitq,
570				   gfs2_log_is_empty(sdp),
571				   HZ * 5);
572	}
573	gfs2_quota_cleanup(sdp);
574
575	if (!log_write_allowed)
576		sdp->sd_vfs->s_flags |= SB_RDONLY;
577}
578
579/**
580 * gfs2_put_super - Unmount the filesystem
581 * @sb: The VFS superblock
582 *
583 */
584
585static void gfs2_put_super(struct super_block *sb)
586{
587	struct gfs2_sbd *sdp = sb->s_fs_info;
588	struct gfs2_jdesc *jd;
589
590	/* No more recovery requests */
591	set_bit(SDF_NORECOVERY, &sdp->sd_flags);
592	smp_mb();
593
594	/* Wait on outstanding recovery */
595restart:
596	spin_lock(&sdp->sd_jindex_spin);
597	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
598		if (!test_bit(JDF_RECOVERY, &jd->jd_flags))
599			continue;
600		spin_unlock(&sdp->sd_jindex_spin);
601		wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
602			    TASK_UNINTERRUPTIBLE);
603		goto restart;
604	}
605	spin_unlock(&sdp->sd_jindex_spin);
606
607	if (!sb_rdonly(sb)) {
608		gfs2_make_fs_ro(sdp);
609	}
610	WARN_ON(gfs2_withdrawing(sdp));
611
612	/*  At this point, we're through modifying the disk  */
613
614	/*  Release stuff  */
615
616	iput(sdp->sd_jindex);
617	iput(sdp->sd_statfs_inode);
618	iput(sdp->sd_rindex);
619	iput(sdp->sd_quota_inode);
620
621	gfs2_glock_put(sdp->sd_rename_gl);
622	gfs2_glock_put(sdp->sd_freeze_gl);
623
624	if (!sdp->sd_args.ar_spectator) {
625		if (gfs2_holder_initialized(&sdp->sd_journal_gh))
626			gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
627		if (gfs2_holder_initialized(&sdp->sd_jinode_gh))
628			gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
629		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
630		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
631		free_local_statfs_inodes(sdp);
632		iput(sdp->sd_qc_inode);
633	}
634
635	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
636	gfs2_clear_rgrpd(sdp);
637	gfs2_jindex_free(sdp);
638	/*  Take apart glock structures and buffer lists  */
639	gfs2_gl_hash_clear(sdp);
640	truncate_inode_pages_final(&sdp->sd_aspace);
641	gfs2_delete_debugfs_file(sdp);
642	/*  Unmount the locking protocol  */
643	gfs2_lm_unmount(sdp);
644
645	/*  At this point, we're through participating in the lockspace  */
646	gfs2_sys_fs_del(sdp);
647	free_sbd(sdp);
648}
649
650/**
651 * gfs2_sync_fs - sync the filesystem
652 * @sb: the superblock
653 * @wait: true to wait for completion
654 *
655 * Flushes the log to disk.
656 */
657
658static int gfs2_sync_fs(struct super_block *sb, int wait)
659{
660	struct gfs2_sbd *sdp = sb->s_fs_info;
661
662	gfs2_quota_sync(sb, -1);
663	if (wait)
664		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
665			       GFS2_LFC_SYNC_FS);
666	return sdp->sd_log_error;
667}
668
669void gfs2_freeze_func(struct work_struct *work)
670{
671	int error;
672	struct gfs2_holder freeze_gh;
673	struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
674	struct super_block *sb = sdp->sd_vfs;
675
676	atomic_inc(&sb->s_active);
677	error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
678	if (error) {
679		gfs2_assert_withdraw(sdp, 0);
680	} else {
681		atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
682		error = thaw_super(sb);
683		if (error) {
684			fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
685				error);
686			gfs2_assert_withdraw(sdp, 0);
687		}
688		gfs2_freeze_unlock(&freeze_gh);
689	}
690	deactivate_super(sb);
691	clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags);
692	wake_up_bit(&sdp->sd_flags, SDF_FS_FROZEN);
693	return;
694}
695
696/**
697 * gfs2_freeze - prevent further writes to the filesystem
698 * @sb: the VFS structure for the filesystem
699 *
700 */
701
702static int gfs2_freeze(struct super_block *sb)
703{
704	struct gfs2_sbd *sdp = sb->s_fs_info;
705	int error;
706
707	mutex_lock(&sdp->sd_freeze_mutex);
708	if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
709		error = -EBUSY;
710		goto out;
711	}
712
713	for (;;) {
714		if (gfs2_withdrawn(sdp)) {
715			error = -EINVAL;
716			goto out;
717		}
718
719		error = gfs2_lock_fs_check_clean(sdp);
720		if (!error)
721			break;
722
723		if (error == -EBUSY)
724			fs_err(sdp, "waiting for recovery before freeze\n");
725		else if (error == -EIO) {
726			fs_err(sdp, "Fatal IO error: cannot freeze gfs2 due "
727			       "to recovery error.\n");
728			goto out;
729		} else {
730			fs_err(sdp, "error freezing FS: %d\n", error);
731		}
732		fs_err(sdp, "retrying...\n");
733		msleep(1000);
734	}
735	set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
736out:
737	mutex_unlock(&sdp->sd_freeze_mutex);
738	return error;
739}
740
741/**
742 * gfs2_unfreeze - reallow writes to the filesystem
743 * @sb: the VFS structure for the filesystem
744 *
745 */
746
747static int gfs2_unfreeze(struct super_block *sb)
748{
749	struct gfs2_sbd *sdp = sb->s_fs_info;
750
751	mutex_lock(&sdp->sd_freeze_mutex);
752	if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
753	    !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
754		mutex_unlock(&sdp->sd_freeze_mutex);
755		return -EINVAL;
756	}
757
758	gfs2_freeze_unlock(&sdp->sd_freeze_gh);
759	mutex_unlock(&sdp->sd_freeze_mutex);
760	return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE);
761}
762
763/**
764 * statfs_slow_fill - fill in the sg for a given RG
765 * @rgd: the RG
766 * @sc: the sc structure
767 *
768 * Returns: 0 on success, -ESTALE if the LVB is invalid
769 */
770
771static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
772			    struct gfs2_statfs_change_host *sc)
773{
774	gfs2_rgrp_verify(rgd);
775	sc->sc_total += rgd->rd_data;
776	sc->sc_free += rgd->rd_free;
777	sc->sc_dinodes += rgd->rd_dinodes;
778	return 0;
779}
780
781/**
782 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
783 * @sdp: the filesystem
784 * @sc: the sc info that will be returned
785 *
786 * Any error (other than a signal) will cause this routine to fall back
787 * to the synchronous version.
788 *
789 * FIXME: This really shouldn't busy wait like this.
790 *
791 * Returns: errno
792 */
793
794static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
795{
796	struct gfs2_rgrpd *rgd_next;
797	struct gfs2_holder *gha, *gh;
798	unsigned int slots = 64;
799	unsigned int x;
800	int done;
801	int error = 0, err;
802
803	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
804	gha = kmalloc_array(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
805	if (!gha)
806		return -ENOMEM;
807	for (x = 0; x < slots; x++)
808		gfs2_holder_mark_uninitialized(gha + x);
809
810	rgd_next = gfs2_rgrpd_get_first(sdp);
811
812	for (;;) {
813		done = 1;
814
815		for (x = 0; x < slots; x++) {
816			gh = gha + x;
817
818			if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) {
819				err = gfs2_glock_wait(gh);
820				if (err) {
821					gfs2_holder_uninit(gh);
822					error = err;
823				} else {
824					if (!error) {
825						struct gfs2_rgrpd *rgd =
826							gfs2_glock2rgrp(gh->gh_gl);
827
828						error = statfs_slow_fill(rgd, sc);
829					}
830					gfs2_glock_dq_uninit(gh);
831				}
832			}
833
834			if (gfs2_holder_initialized(gh))
835				done = 0;
836			else if (rgd_next && !error) {
837				error = gfs2_glock_nq_init(rgd_next->rd_gl,
838							   LM_ST_SHARED,
839							   GL_ASYNC,
840							   gh);
841				rgd_next = gfs2_rgrpd_get_next(rgd_next);
842				done = 0;
843			}
844
845			if (signal_pending(current))
846				error = -ERESTARTSYS;
847		}
848
849		if (done)
850			break;
851
852		yield();
853	}
854
855	kfree(gha);
856	return error;
857}
858
859/**
860 * gfs2_statfs_i - Do a statfs
861 * @sdp: the filesystem
862 * @sc: the sc structure
863 *
864 * Returns: errno
865 */
866
867static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
868{
869	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
870	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
871
872	spin_lock(&sdp->sd_statfs_spin);
873
874	*sc = *m_sc;
875	sc->sc_total += l_sc->sc_total;
876	sc->sc_free += l_sc->sc_free;
877	sc->sc_dinodes += l_sc->sc_dinodes;
878
879	spin_unlock(&sdp->sd_statfs_spin);
880
881	if (sc->sc_free < 0)
882		sc->sc_free = 0;
883	if (sc->sc_free > sc->sc_total)
884		sc->sc_free = sc->sc_total;
885	if (sc->sc_dinodes < 0)
886		sc->sc_dinodes = 0;
887
888	return 0;
889}
890
891/**
892 * gfs2_statfs - Gather and return stats about the filesystem
893 * @dentry: The name of the link
894 * @buf: The buffer
895 *
896 * Returns: 0 on success or error code
897 */
898
899static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
900{
901	struct super_block *sb = dentry->d_sb;
902	struct gfs2_sbd *sdp = sb->s_fs_info;
903	struct gfs2_statfs_change_host sc;
904	int error;
905
906	error = gfs2_rindex_update(sdp);
907	if (error)
908		return error;
909
910	if (gfs2_tune_get(sdp, gt_statfs_slow))
911		error = gfs2_statfs_slow(sdp, &sc);
912	else
913		error = gfs2_statfs_i(sdp, &sc);
914
915	if (error)
916		return error;
917
918	buf->f_type = GFS2_MAGIC;
919	buf->f_bsize = sdp->sd_sb.sb_bsize;
920	buf->f_blocks = sc.sc_total;
921	buf->f_bfree = sc.sc_free;
922	buf->f_bavail = sc.sc_free;
923	buf->f_files = sc.sc_dinodes + sc.sc_free;
924	buf->f_ffree = sc.sc_free;
925	buf->f_namelen = GFS2_FNAMESIZE;
926
927	return 0;
928}
929
930/**
931 * gfs2_drop_inode - Drop an inode (test for remote unlink)
932 * @inode: The inode to drop
933 *
934 * If we've received a callback on an iopen lock then it's because a
935 * remote node tried to deallocate the inode but failed due to this node
936 * still having the inode open. Here we mark the link count zero
937 * since we know that it must have reached zero if the GLF_DEMOTE flag
938 * is set on the iopen glock. If we didn't do a disk read since the
939 * remote node removed the final link then we might otherwise miss
940 * this event. This check ensures that this node will deallocate the
941 * inode's blocks, or alternatively pass the baton on to another
942 * node for later deallocation.
943 */
944
945static int gfs2_drop_inode(struct inode *inode)
946{
947	struct gfs2_inode *ip = GFS2_I(inode);
948
949	if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) &&
950	    inode->i_nlink &&
951	    gfs2_holder_initialized(&ip->i_iopen_gh)) {
952		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
953		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
954			clear_nlink(inode);
955	}
956
957	/*
958	 * When under memory pressure when an inode's link count has dropped to
959	 * zero, defer deleting the inode to the delete workqueue.  This avoids
960	 * calling into DLM under memory pressure, which can deadlock.
961	 */
962	if (!inode->i_nlink &&
963	    unlikely(current->flags & PF_MEMALLOC) &&
964	    gfs2_holder_initialized(&ip->i_iopen_gh)) {
965		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
966
967		gfs2_glock_hold(gl);
968		if (!gfs2_queue_delete_work(gl, 0))
969			gfs2_glock_queue_put(gl);
970		return false;
971	}
972
973	return generic_drop_inode(inode);
974}
975
976static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
977{
978	do {
979		if (d1 == d2)
980			return 1;
981		d1 = d1->d_parent;
982	} while (!IS_ROOT(d1));
983	return 0;
984}
985
986/**
987 * gfs2_show_options - Show mount options for /proc/mounts
988 * @s: seq_file structure
989 * @root: root of this (sub)tree
990 *
991 * Returns: 0 on success or error code
992 */
993
994static int gfs2_show_options(struct seq_file *s, struct dentry *root)
995{
996	struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
997	struct gfs2_args *args = &sdp->sd_args;
998	int val;
999
1000	if (is_ancestor(root, sdp->sd_master_dir))
1001		seq_puts(s, ",meta");
1002	if (args->ar_lockproto[0])
1003		seq_show_option(s, "lockproto", args->ar_lockproto);
1004	if (args->ar_locktable[0])
1005		seq_show_option(s, "locktable", args->ar_locktable);
1006	if (args->ar_hostdata[0])
1007		seq_show_option(s, "hostdata", args->ar_hostdata);
1008	if (args->ar_spectator)
1009		seq_puts(s, ",spectator");
1010	if (args->ar_localflocks)
1011		seq_puts(s, ",localflocks");
1012	if (args->ar_debug)
1013		seq_puts(s, ",debug");
1014	if (args->ar_posix_acl)
1015		seq_puts(s, ",acl");
1016	if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
1017		char *state;
1018		switch (args->ar_quota) {
1019		case GFS2_QUOTA_OFF:
1020			state = "off";
1021			break;
1022		case GFS2_QUOTA_ACCOUNT:
1023			state = "account";
1024			break;
1025		case GFS2_QUOTA_ON:
1026			state = "on";
1027			break;
1028		default:
1029			state = "unknown";
1030			break;
1031		}
1032		seq_printf(s, ",quota=%s", state);
1033	}
1034	if (args->ar_suiddir)
1035		seq_puts(s, ",suiddir");
1036	if (args->ar_data != GFS2_DATA_DEFAULT) {
1037		char *state;
1038		switch (args->ar_data) {
1039		case GFS2_DATA_WRITEBACK:
1040			state = "writeback";
1041			break;
1042		case GFS2_DATA_ORDERED:
1043			state = "ordered";
1044			break;
1045		default:
1046			state = "unknown";
1047			break;
1048		}
1049		seq_printf(s, ",data=%s", state);
1050	}
1051	if (args->ar_discard)
1052		seq_puts(s, ",discard");
1053	val = sdp->sd_tune.gt_logd_secs;
1054	if (val != 30)
1055		seq_printf(s, ",commit=%d", val);
1056	val = sdp->sd_tune.gt_statfs_quantum;
1057	if (val != 30)
1058		seq_printf(s, ",statfs_quantum=%d", val);
1059	else if (sdp->sd_tune.gt_statfs_slow)
1060		seq_puts(s, ",statfs_quantum=0");
1061	val = sdp->sd_tune.gt_quota_quantum;
1062	if (val != 60)
1063		seq_printf(s, ",quota_quantum=%d", val);
1064	if (args->ar_statfs_percent)
1065		seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
1066	if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
1067		const char *state;
1068
1069		switch (args->ar_errors) {
1070		case GFS2_ERRORS_WITHDRAW:
1071			state = "withdraw";
1072			break;
1073		case GFS2_ERRORS_PANIC:
1074			state = "panic";
1075			break;
1076		default:
1077			state = "unknown";
1078			break;
1079		}
1080		seq_printf(s, ",errors=%s", state);
1081	}
1082	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
1083		seq_puts(s, ",nobarrier");
1084	if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
1085		seq_puts(s, ",demote_interface_used");
1086	if (args->ar_rgrplvb)
1087		seq_puts(s, ",rgrplvb");
1088	if (args->ar_loccookie)
1089		seq_puts(s, ",loccookie");
1090	return 0;
1091}
1092
1093static void gfs2_final_release_pages(struct gfs2_inode *ip)
1094{
1095	struct inode *inode = &ip->i_inode;
1096	struct gfs2_glock *gl = ip->i_gl;
1097
1098	truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
1099	truncate_inode_pages(&inode->i_data, 0);
1100
1101	if (atomic_read(&gl->gl_revokes) == 0) {
1102		clear_bit(GLF_LFLUSH, &gl->gl_flags);
1103		clear_bit(GLF_DIRTY, &gl->gl_flags);
1104	}
1105}
1106
1107static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1108{
1109	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1110	struct gfs2_rgrpd *rgd;
1111	struct gfs2_holder gh;
1112	int error;
1113
1114	if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
1115		gfs2_consist_inode(ip);
1116		return -EIO;
1117	}
1118
1119	error = gfs2_rindex_update(sdp);
1120	if (error)
1121		return error;
1122
1123	error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1124	if (error)
1125		return error;
1126
1127	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1128	if (!rgd) {
1129		gfs2_consist_inode(ip);
1130		error = -EIO;
1131		goto out_qs;
1132	}
1133
1134	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1135				   LM_FLAG_NODE_SCOPE, &gh);
1136	if (error)
1137		goto out_qs;
1138
1139	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
1140				 sdp->sd_jdesc->jd_blocks);
1141	if (error)
1142		goto out_rg_gunlock;
1143
1144	gfs2_free_di(rgd, ip);
1145
1146	gfs2_final_release_pages(ip);
1147
1148	gfs2_trans_end(sdp);
1149
1150out_rg_gunlock:
1151	gfs2_glock_dq_uninit(&gh);
1152out_qs:
1153	gfs2_quota_unhold(ip);
1154	return error;
1155}
1156
1157/**
1158 * gfs2_glock_put_eventually
1159 * @gl:	The glock to put
1160 *
1161 * When under memory pressure, trigger a deferred glock put to make sure we
1162 * won't call into DLM and deadlock.  Otherwise, put the glock directly.
1163 */
1164
1165static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
1166{
1167	if (current->flags & PF_MEMALLOC)
1168		gfs2_glock_queue_put(gl);
1169	else
1170		gfs2_glock_put(gl);
1171}
1172
1173static bool gfs2_upgrade_iopen_glock(struct inode *inode)
1174{
1175	struct gfs2_inode *ip = GFS2_I(inode);
1176	struct gfs2_sbd *sdp = GFS2_SB(inode);
1177	struct gfs2_holder *gh = &ip->i_iopen_gh;
1178	long timeout = 5 * HZ;
1179	int error;
1180
1181	gh->gh_flags |= GL_NOCACHE;
1182	gfs2_glock_dq_wait(gh);
1183
1184	/*
1185	 * If there are no other lock holders, we'll get the lock immediately.
1186	 * Otherwise, the other nodes holding the lock will be notified about
1187	 * our locking request.  If they don't have the inode open, they'll
1188	 * evict the cached inode and release the lock.  Otherwise, if they
1189	 * poke the inode glock, we'll take this as an indication that they
1190	 * still need the iopen glock and that they'll take care of deleting
1191	 * the inode when they're done.  As a last resort, if another node
1192	 * keeps holding the iopen glock without showing any activity on the
1193	 * inode glock, we'll eventually time out.
1194	 *
1195	 * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
1196	 * locking request as an optimization to notify lock holders as soon as
1197	 * possible.  Without that flag, they'd be notified implicitly by the
1198	 * second locking request.
1199	 */
1200
1201	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh);
1202	error = gfs2_glock_nq(gh);
1203	if (error != GLR_TRYFAILED)
1204		return !error;
1205
1206	gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
1207	error = gfs2_glock_nq(gh);
1208	if (error)
1209		return false;
1210
1211	timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
1212		!test_bit(HIF_WAIT, &gh->gh_iflags) ||
1213		test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags),
1214		timeout);
1215	if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
1216		gfs2_glock_dq(gh);
1217		return false;
1218	}
1219	return true;
1220}
1221
1222/**
1223 * evict_should_delete - determine whether the inode is eligible for deletion
1224 * @inode: The inode to evict
1225 * @gh: The glock holder structure
1226 *
1227 * This function determines whether the evicted inode is eligible to be deleted
1228 * and locks the inode glock.
1229 *
1230 * Returns: the fate of the dinode
1231 */
1232static enum dinode_demise evict_should_delete(struct inode *inode,
1233					      struct gfs2_holder *gh)
1234{
1235	struct gfs2_inode *ip = GFS2_I(inode);
1236	struct super_block *sb = inode->i_sb;
1237	struct gfs2_sbd *sdp = sb->s_fs_info;
1238	int ret;
1239
1240	if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
1241		BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
1242		goto should_delete;
1243	}
1244
1245	if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
1246		return SHOULD_DEFER_EVICTION;
1247
1248	/* Deletes should never happen under memory pressure anymore.  */
1249	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
1250		return SHOULD_DEFER_EVICTION;
1251
1252	/* Must not read inode block until block type has been verified */
1253	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh);
1254	if (unlikely(ret)) {
1255		glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
1256		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1257		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1258		return SHOULD_DEFER_EVICTION;
1259	}
1260
1261	if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
1262		return SHOULD_NOT_DELETE_DINODE;
1263	ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
1264	if (ret)
1265		return SHOULD_NOT_DELETE_DINODE;
1266
1267	if (test_bit(GIF_INVALID, &ip->i_flags)) {
1268		ret = gfs2_inode_refresh(ip);
1269		if (ret)
1270			return SHOULD_NOT_DELETE_DINODE;
1271	}
1272
1273	/*
1274	 * The inode may have been recreated in the meantime.
1275	 */
1276	if (inode->i_nlink)
1277		return SHOULD_NOT_DELETE_DINODE;
1278
1279should_delete:
1280	if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
1281	    test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
1282		if (!gfs2_upgrade_iopen_glock(inode)) {
1283			gfs2_holder_uninit(&ip->i_iopen_gh);
1284			return SHOULD_NOT_DELETE_DINODE;
1285		}
1286	}
1287	return SHOULD_DELETE_DINODE;
1288}
1289
1290/**
1291 * evict_unlinked_inode - delete the pieces of an unlinked evicted inode
1292 * @inode: The inode to evict
1293 */
1294static int evict_unlinked_inode(struct inode *inode)
1295{
1296	struct gfs2_inode *ip = GFS2_I(inode);
1297	int ret;
1298
1299	if (S_ISDIR(inode->i_mode) &&
1300	    (ip->i_diskflags & GFS2_DIF_EXHASH)) {
1301		ret = gfs2_dir_exhash_dealloc(ip);
1302		if (ret)
1303			goto out;
1304	}
1305
1306	if (ip->i_eattr) {
1307		ret = gfs2_ea_dealloc(ip);
1308		if (ret)
1309			goto out;
1310	}
1311
1312	if (!gfs2_is_stuffed(ip)) {
1313		ret = gfs2_file_dealloc(ip);
1314		if (ret)
1315			goto out;
1316	}
1317
1318	/* We're about to clear the bitmap for the dinode, but as soon as we
1319	   do, gfs2_create_inode can create another inode at the same block
1320	   location and try to set gl_object again. We clear gl_object here so
1321	   that subsequent inode creates don't see an old gl_object. */
1322	glock_clear_object(ip->i_gl, ip);
1323	ret = gfs2_dinode_dealloc(ip);
1324	gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
1325out:
1326	return ret;
1327}
1328
1329/*
1330 * evict_linked_inode - evict an inode whose dinode has not been unlinked
1331 * @inode: The inode to evict
1332 */
1333static int evict_linked_inode(struct inode *inode)
1334{
1335	struct super_block *sb = inode->i_sb;
1336	struct gfs2_sbd *sdp = sb->s_fs_info;
1337	struct gfs2_inode *ip = GFS2_I(inode);
1338	struct address_space *metamapping;
1339	int ret;
1340
1341	gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
1342		       GFS2_LFC_EVICT_INODE);
1343	metamapping = gfs2_glock2aspace(ip->i_gl);
1344	if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
1345		filemap_fdatawrite(metamapping);
1346		filemap_fdatawait(metamapping);
1347	}
1348	write_inode_now(inode, 1);
1349	gfs2_ail_flush(ip->i_gl, 0);
1350
1351	ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1352	if (ret)
1353		return ret;
1354
1355	/* Needs to be done before glock release & also in a transaction */
1356	truncate_inode_pages(&inode->i_data, 0);
1357	truncate_inode_pages(metamapping, 0);
1358	gfs2_trans_end(sdp);
1359	return 0;
1360}
1361
1362/**
1363 * gfs2_evict_inode - Remove an inode from cache
1364 * @inode: The inode to evict
1365 *
1366 * There are three cases to consider:
1367 * 1. i_nlink == 0, we are final opener (and must deallocate)
1368 * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
1369 * 3. i_nlink > 0
1370 *
1371 * If the fs is read only, then we have to treat all cases as per #3
1372 * since we are unable to do any deallocation. The inode will be
1373 * deallocated by the next read/write node to attempt an allocation
1374 * in the same resource group
1375 *
1376 * We have to (at the moment) hold the inodes main lock to cover
1377 * the gap between unlocking the shared lock on the iopen lock and
1378 * taking the exclusive lock. I'd rather do a shared -> exclusive
1379 * conversion on the iopen lock, but we can change that later. This
1380 * is safe, just less efficient.
1381 */
1382
1383static void gfs2_evict_inode(struct inode *inode)
1384{
1385	struct super_block *sb = inode->i_sb;
1386	struct gfs2_sbd *sdp = sb->s_fs_info;
1387	struct gfs2_inode *ip = GFS2_I(inode);
1388	struct gfs2_holder gh;
1389	int ret;
1390
1391	if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
1392		clear_inode(inode);
1393		return;
1394	}
1395
1396	if (inode->i_nlink || sb_rdonly(sb))
1397		goto out;
1398
1399	gfs2_holder_mark_uninitialized(&gh);
1400	ret = evict_should_delete(inode, &gh);
1401	if (ret == SHOULD_DEFER_EVICTION)
1402		goto out;
1403	if (ret == SHOULD_DELETE_DINODE)
1404		ret = evict_unlinked_inode(inode);
1405	else
1406		ret = evict_linked_inode(inode);
1407
1408	if (gfs2_rs_active(&ip->i_res))
1409		gfs2_rs_deltree(&ip->i_res);
1410
1411	if (gfs2_holder_initialized(&gh)) {
1412		glock_clear_object(ip->i_gl, ip);
1413		gfs2_glock_dq_uninit(&gh);
1414	}
1415	if (ret && ret != GLR_TRYFAILED && ret != -EROFS)
1416		fs_warn(sdp, "gfs2_evict_inode: %d\n", ret);
1417out:
1418	truncate_inode_pages_final(&inode->i_data);
1419	if (ip->i_qadata)
1420		gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
1421	gfs2_rs_delete(ip, NULL);
1422	gfs2_ordered_del_inode(ip);
1423	clear_inode(inode);
1424	gfs2_dir_hash_inval(ip);
1425	if (ip->i_gl) {
1426		glock_clear_object(ip->i_gl, ip);
1427		wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
1428		gfs2_glock_add_to_lru(ip->i_gl);
1429		gfs2_glock_put_eventually(ip->i_gl);
1430		ip->i_gl = NULL;
1431	}
1432	if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
1433		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1434
1435		glock_clear_object(gl, ip);
1436		if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
1437			ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1438			gfs2_glock_dq(&ip->i_iopen_gh);
1439		}
1440		gfs2_glock_hold(gl);
1441		gfs2_holder_uninit(&ip->i_iopen_gh);
1442		gfs2_glock_put_eventually(gl);
1443	}
1444}
1445
1446static struct inode *gfs2_alloc_inode(struct super_block *sb)
1447{
1448	struct gfs2_inode *ip;
1449
1450	ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
1451	if (!ip)
1452		return NULL;
1453	ip->i_flags = 0;
1454	ip->i_gl = NULL;
1455	gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
1456	memset(&ip->i_res, 0, sizeof(ip->i_res));
1457	RB_CLEAR_NODE(&ip->i_res.rs_node);
1458	ip->i_rahead = 0;
1459	return &ip->i_inode;
1460}
1461
1462static void gfs2_free_inode(struct inode *inode)
1463{
1464	kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
1465}
1466
1467extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
1468{
1469	struct local_statfs_inode *lsi, *safe;
1470
1471	/* Run through the statfs inodes list to iput and free memory */
1472	list_for_each_entry_safe(lsi, safe, &sdp->sd_sc_inodes_list, si_list) {
1473		if (lsi->si_jid == sdp->sd_jdesc->jd_jid)
1474			sdp->sd_sc_inode = NULL; /* belongs to this node */
1475		if (lsi->si_sc_inode)
1476			iput(lsi->si_sc_inode);
1477		list_del(&lsi->si_list);
1478		kfree(lsi);
1479	}
1480}
1481
1482extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
1483					     unsigned int index)
1484{
1485	struct local_statfs_inode *lsi;
1486
1487	/* Return the local (per node) statfs inode in the
1488	 * sdp->sd_sc_inodes_list corresponding to the 'index'. */
1489	list_for_each_entry(lsi, &sdp->sd_sc_inodes_list, si_list) {
1490		if (lsi->si_jid == index)
1491			return lsi->si_sc_inode;
1492	}
1493	return NULL;
1494}
1495
1496const struct super_operations gfs2_super_ops = {
1497	.alloc_inode		= gfs2_alloc_inode,
1498	.free_inode		= gfs2_free_inode,
1499	.write_inode		= gfs2_write_inode,
1500	.dirty_inode		= gfs2_dirty_inode,
1501	.evict_inode		= gfs2_evict_inode,
1502	.put_super		= gfs2_put_super,
1503	.sync_fs		= gfs2_sync_fs,
1504	.freeze_super		= gfs2_freeze,
1505	.thaw_super		= gfs2_unfreeze,
1506	.statfs			= gfs2_statfs,
1507	.drop_inode		= gfs2_drop_inode,
1508	.show_options		= gfs2_show_options,
1509};
1510
1511