1/*
2 *  linux/fs/ext3/super.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 *  from
10 *
11 *  linux/fs/minix/inode.c
12 *
13 *  Copyright (C) 1991, 1992  Linus Torvalds
14 *
15 *  Big-endian to little-endian byte-swapping/bitmaps by
16 *        David S. Miller (davem@caip.rutgers.edu), 1995
17 */
18
19#include <linux/module.h>
20#include <linux/string.h>
21#include <linux/fs.h>
22#include <linux/time.h>
23#include <linux/jbd.h>
24#include <linux/ext3_fs.h>
25#include <linux/ext3_jbd.h>
26#include <linux/slab.h>
27#include <linux/init.h>
28#include <linux/blkdev.h>
29#include <linux/parser.h>
30#include <linux/smp_lock.h>
31#include <linux/buffer_head.h>
32#include <linux/vfs.h>
33#include <linux/random.h>
34#include <linux/mount.h>
35#include <linux/namei.h>
36#include <linux/quotaops.h>
37#include <linux/seq_file.h>
38
39#include <asm/uaccess.h>
40
41#include "xattr.h"
42#include "acl.h"
43#include "namei.h"
44
45static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
46			     unsigned long journal_devnum);
47static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
48			       unsigned int);
49static void ext3_commit_super (struct super_block * sb,
50			       struct ext3_super_block * es,
51			       int sync);
52static void ext3_mark_recovery_complete(struct super_block * sb,
53					struct ext3_super_block * es);
54static void ext3_clear_journal_err(struct super_block * sb,
55				   struct ext3_super_block * es);
56static int ext3_sync_fs(struct super_block *sb, int wait);
57static const char *ext3_decode_error(struct super_block * sb, int errno,
58				     char nbuf[16]);
59static int ext3_remount (struct super_block * sb, int * flags, char * data);
60static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
61static void ext3_unlockfs(struct super_block *sb);
62static void ext3_write_super (struct super_block * sb);
63static void ext3_write_super_lockfs(struct super_block *sb);
64
65/*
66 * Wrappers for journal_start/end.
67 *
68 * The only special thing we need to do here is to make sure that all
69 * journal_end calls result in the superblock being marked dirty, so
70 * that sync() will call the filesystem's write_super callback if
71 * appropriate.
72 */
73handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
74{
75	journal_t *journal;
76
77	if (sb->s_flags & MS_RDONLY)
78		return ERR_PTR(-EROFS);
79
80	/* Special case here: if the journal has aborted behind our
81	 * backs (eg. EIO in the commit thread), then we still need to
82	 * take the FS itself readonly cleanly. */
83	journal = EXT3_SB(sb)->s_journal;
84	if (is_journal_aborted(journal)) {
85		ext3_abort(sb, __FUNCTION__,
86			   "Detected aborted journal");
87		return ERR_PTR(-EROFS);
88	}
89
90	return journal_start(journal, nblocks);
91}
92
93/*
94 * The only special thing we need to do here is to make sure that all
95 * journal_stop calls result in the superblock being marked dirty, so
96 * that sync() will call the filesystem's write_super callback if
97 * appropriate.
98 */
99int __ext3_journal_stop(const char *where, handle_t *handle)
100{
101	struct super_block *sb;
102	int err;
103	int rc;
104
105	sb = handle->h_transaction->t_journal->j_private;
106	err = handle->h_err;
107	rc = journal_stop(handle);
108
109	if (!err)
110		err = rc;
111	if (err)
112		__ext3_std_error(sb, where, err);
113	return err;
114}
115
116void ext3_journal_abort_handle(const char *caller, const char *err_fn,
117		struct buffer_head *bh, handle_t *handle, int err)
118{
119	char nbuf[16];
120	const char *errstr = ext3_decode_error(NULL, err, nbuf);
121
122	if (bh)
123		BUFFER_TRACE(bh, "abort");
124
125	if (!handle->h_err)
126		handle->h_err = err;
127
128	if (is_handle_aborted(handle))
129		return;
130
131	printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
132	       caller, errstr, err_fn);
133
134	journal_abort_handle(handle);
135}
136
137/* Deal with the reporting of failure conditions on a filesystem such as
138 * inconsistencies detected or read IO failures.
139 *
140 * On ext2, we can store the error state of the filesystem in the
141 * superblock.  That is not possible on ext3, because we may have other
142 * write ordering constraints on the superblock which prevent us from
143 * writing it out straight away; and given that the journal is about to
144 * be aborted, we can't rely on the current, or future, transactions to
145 * write out the superblock safely.
146 *
147 * We'll just use the journal_abort() error code to record an error in
148 * the journal instead.  On recovery, the journal will compain about
149 * that error until we've noted it down and cleared it.
150 */
151
152static void ext3_handle_error(struct super_block *sb)
153{
154	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
155
156	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
157	es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
158
159	if (sb->s_flags & MS_RDONLY)
160		return;
161
162	if (!test_opt (sb, ERRORS_CONT)) {
163		journal_t *journal = EXT3_SB(sb)->s_journal;
164
165		EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
166		if (journal)
167			journal_abort(journal, -EIO);
168	}
169	if (test_opt (sb, ERRORS_RO)) {
170		printk (KERN_CRIT "Remounting filesystem read-only\n");
171		sb->s_flags |= MS_RDONLY;
172	}
173	ext3_commit_super(sb, es, 1);
174	if (test_opt(sb, ERRORS_PANIC))
175		panic("EXT3-fs (device %s): panic forced after error\n",
176			sb->s_id);
177}
178
179void ext3_error (struct super_block * sb, const char * function,
180		 const char * fmt, ...)
181{
182	va_list args;
183
184	va_start(args, fmt);
185	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
186	vprintk(fmt, args);
187	printk("\n");
188	va_end(args);
189
190	ext3_handle_error(sb);
191}
192
193static const char *ext3_decode_error(struct super_block * sb, int errno,
194				     char nbuf[16])
195{
196	char *errstr = NULL;
197
198	switch (errno) {
199	case -EIO:
200		errstr = "IO failure";
201		break;
202	case -ENOMEM:
203		errstr = "Out of memory";
204		break;
205	case -EROFS:
206		if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
207			errstr = "Journal has aborted";
208		else
209			errstr = "Readonly filesystem";
210		break;
211	default:
212		/* If the caller passed in an extra buffer for unknown
213		 * errors, textualise them now.  Else we just return
214		 * NULL. */
215		if (nbuf) {
216			/* Check for truncated error codes... */
217			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
218				errstr = nbuf;
219		}
220		break;
221	}
222
223	return errstr;
224}
225
226/* __ext3_std_error decodes expected errors from journaling functions
227 * automatically and invokes the appropriate error response.  */
228
229void __ext3_std_error (struct super_block * sb, const char * function,
230		       int errno)
231{
232	char nbuf[16];
233	const char *errstr;
234
235	/* Special case: if the error is EROFS, and we're not already
236	 * inside a transaction, then there's really no point in logging
237	 * an error. */
238	if (errno == -EROFS && journal_current_handle() == NULL &&
239	    (sb->s_flags & MS_RDONLY))
240		return;
241
242	errstr = ext3_decode_error(sb, errno, nbuf);
243	printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
244		sb->s_id, function, errstr);
245
246	ext3_handle_error(sb);
247}
248
249/*
250 * ext3_abort is a much stronger failure handler than ext3_error.  The
251 * abort function may be used to deal with unrecoverable failures such
252 * as journal IO errors or ENOMEM at a critical moment in log management.
253 *
254 * We unconditionally force the filesystem into an ABORT|READONLY state,
255 * unless the error response on the fs has been set to panic in which
256 * case we take the easy way out and panic immediately.
257 */
258
259void ext3_abort (struct super_block * sb, const char * function,
260		 const char * fmt, ...)
261{
262	va_list args;
263
264	printk (KERN_CRIT "ext3_abort called.\n");
265
266	va_start(args, fmt);
267	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
268	vprintk(fmt, args);
269	printk("\n");
270	va_end(args);
271
272	if (test_opt(sb, ERRORS_PANIC))
273		panic("EXT3-fs panic from previous error\n");
274
275	if (sb->s_flags & MS_RDONLY)
276		return;
277
278	printk(KERN_CRIT "Remounting filesystem read-only\n");
279	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
280	sb->s_flags |= MS_RDONLY;
281	EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
282	journal_abort(EXT3_SB(sb)->s_journal, -EIO);
283}
284
285void ext3_warning (struct super_block * sb, const char * function,
286		   const char * fmt, ...)
287{
288	va_list args;
289
290	va_start(args, fmt);
291	printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
292	       sb->s_id, function);
293	vprintk(fmt, args);
294	printk("\n");
295	va_end(args);
296}
297
298void ext3_update_dynamic_rev(struct super_block *sb)
299{
300	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
301
302	if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
303		return;
304
305	ext3_warning(sb, __FUNCTION__,
306		     "updating to rev %d because of new feature flag, "
307		     "running e2fsck is recommended",
308		     EXT3_DYNAMIC_REV);
309
310	es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
311	es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
312	es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
313	/* leave es->s_feature_*compat flags alone */
314	/* es->s_uuid will be set by e2fsck if empty */
315
316	/*
317	 * The rest of the superblock fields should be zero, and if not it
318	 * means they are likely already in use, so leave them alone.  We
319	 * can leave it up to e2fsck to clean up any inconsistencies there.
320	 */
321}
322
323/*
324 * Open the external journal device
325 */
326static struct block_device *ext3_blkdev_get(dev_t dev)
327{
328	struct block_device *bdev;
329	char b[BDEVNAME_SIZE];
330
331	bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
332	if (IS_ERR(bdev))
333		goto fail;
334	return bdev;
335
336fail:
337	printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n",
338			__bdevname(dev, b), PTR_ERR(bdev));
339	return NULL;
340}
341
342/*
343 * Release the journal device
344 */
345static int ext3_blkdev_put(struct block_device *bdev)
346{
347	bd_release(bdev);
348	return blkdev_put(bdev);
349}
350
351static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
352{
353	struct block_device *bdev;
354	int ret = -ENODEV;
355
356	bdev = sbi->journal_bdev;
357	if (bdev) {
358		ret = ext3_blkdev_put(bdev);
359		sbi->journal_bdev = NULL;
360	}
361	return ret;
362}
363
364static inline struct inode *orphan_list_entry(struct list_head *l)
365{
366	return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
367}
368
369static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
370{
371	struct list_head *l;
372
373	printk(KERN_ERR "sb orphan head is %d\n",
374	       le32_to_cpu(sbi->s_es->s_last_orphan));
375
376	printk(KERN_ERR "sb_info orphan list:\n");
377	list_for_each(l, &sbi->s_orphan) {
378		struct inode *inode = orphan_list_entry(l);
379		printk(KERN_ERR "  "
380		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
381		       inode->i_sb->s_id, inode->i_ino, inode,
382		       inode->i_mode, inode->i_nlink,
383		       NEXT_ORPHAN(inode));
384	}
385}
386
387static void ext3_put_super (struct super_block * sb)
388{
389	struct ext3_sb_info *sbi = EXT3_SB(sb);
390	struct ext3_super_block *es = sbi->s_es;
391	int i;
392
393	ext3_xattr_put_super(sb);
394	journal_destroy(sbi->s_journal);
395	if (!(sb->s_flags & MS_RDONLY)) {
396		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
397		es->s_state = cpu_to_le16(sbi->s_mount_state);
398		BUFFER_TRACE(sbi->s_sbh, "marking dirty");
399		mark_buffer_dirty(sbi->s_sbh);
400		ext3_commit_super(sb, es, 1);
401	}
402
403	for (i = 0; i < sbi->s_gdb_count; i++)
404		brelse(sbi->s_group_desc[i]);
405	kfree(sbi->s_group_desc);
406	percpu_counter_destroy(&sbi->s_freeblocks_counter);
407	percpu_counter_destroy(&sbi->s_freeinodes_counter);
408	percpu_counter_destroy(&sbi->s_dirs_counter);
409	brelse(sbi->s_sbh);
410#ifdef CONFIG_QUOTA
411	for (i = 0; i < MAXQUOTAS; i++)
412		kfree(sbi->s_qf_names[i]);
413#endif
414
415	/* Debugging code just in case the in-memory inode orphan list
416	 * isn't empty.  The on-disk one can be non-empty if we've
417	 * detected an error and taken the fs readonly, but the
418	 * in-memory list had better be clean by this point. */
419	if (!list_empty(&sbi->s_orphan))
420		dump_orphan_list(sb, sbi);
421	J_ASSERT(list_empty(&sbi->s_orphan));
422
423	invalidate_bdev(sb->s_bdev);
424	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
425		/*
426		 * Invalidate the journal device's buffers.  We don't want them
427		 * floating about in memory - the physical journal device may
428		 * hotswapped, and it breaks the `ro-after' testing code.
429		 */
430		sync_blockdev(sbi->journal_bdev);
431		invalidate_bdev(sbi->journal_bdev);
432		ext3_blkdev_remove(sbi);
433	}
434	sb->s_fs_info = NULL;
435	kfree(sbi);
436	return;
437}
438
439static struct kmem_cache *ext3_inode_cachep;
440
441/*
442 * Called inside transaction, so use GFP_NOFS
443 */
444static struct inode *ext3_alloc_inode(struct super_block *sb)
445{
446	struct ext3_inode_info *ei;
447
448	ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
449	if (!ei)
450		return NULL;
451#ifdef CONFIG_EXT3_FS_POSIX_ACL
452	ei->i_acl = EXT3_ACL_NOT_CACHED;
453	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
454#endif
455	ei->i_block_alloc_info = NULL;
456	ei->vfs_inode.i_version = 1;
457	return &ei->vfs_inode;
458}
459
460static void ext3_destroy_inode(struct inode *inode)
461{
462	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
463}
464
465static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
466{
467	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
468
469	INIT_LIST_HEAD(&ei->i_orphan);
470#ifdef CONFIG_EXT3_FS_XATTR
471	init_rwsem(&ei->xattr_sem);
472#endif
473	mutex_init(&ei->truncate_mutex);
474	inode_init_once(&ei->vfs_inode);
475}
476
477static int init_inodecache(void)
478{
479	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
480					     sizeof(struct ext3_inode_info),
481					     0, (SLAB_RECLAIM_ACCOUNT|
482						SLAB_MEM_SPREAD),
483					     init_once, NULL);
484	if (ext3_inode_cachep == NULL)
485		return -ENOMEM;
486	return 0;
487}
488
489static void destroy_inodecache(void)
490{
491	kmem_cache_destroy(ext3_inode_cachep);
492}
493
494static void ext3_clear_inode(struct inode *inode)
495{
496	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
497#ifdef CONFIG_EXT3_FS_POSIX_ACL
498	if (EXT3_I(inode)->i_acl &&
499			EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
500		posix_acl_release(EXT3_I(inode)->i_acl);
501		EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
502	}
503	if (EXT3_I(inode)->i_default_acl &&
504			EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
505		posix_acl_release(EXT3_I(inode)->i_default_acl);
506		EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
507	}
508#endif
509	ext3_discard_reservation(inode);
510	EXT3_I(inode)->i_block_alloc_info = NULL;
511	if (unlikely(rsv))
512		kfree(rsv);
513}
514
515static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
516{
517#if defined(CONFIG_QUOTA)
518	struct ext3_sb_info *sbi = EXT3_SB(sb);
519
520	if (sbi->s_jquota_fmt)
521		seq_printf(seq, ",jqfmt=%s",
522		(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
523
524	if (sbi->s_qf_names[USRQUOTA])
525		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
526
527	if (sbi->s_qf_names[GRPQUOTA])
528		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
529
530	if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
531		seq_puts(seq, ",usrquota");
532
533	if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
534		seq_puts(seq, ",grpquota");
535#endif
536}
537
538static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
539{
540	struct super_block *sb = vfs->mnt_sb;
541
542	if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
543		seq_puts(seq, ",data=journal");
544	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
545		seq_puts(seq, ",data=ordered");
546	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
547		seq_puts(seq, ",data=writeback");
548
549	ext3_show_quota_options(seq, sb);
550
551	return 0;
552}
553
554
555static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
556{
557	__u32 *objp = vobjp;
558	unsigned long ino = objp[0];
559	__u32 generation = objp[1];
560	struct inode *inode;
561	struct dentry *result;
562
563	if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
564		return ERR_PTR(-ESTALE);
565	if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
566		return ERR_PTR(-ESTALE);
567
568	/* iget isn't really right if the inode is currently unallocated!!
569	 *
570	 * ext3_read_inode will return a bad_inode if the inode had been
571	 * deleted, so we should be safe.
572	 *
573	 * Currently we don't know the generation for parent directory, so
574	 * a generation of 0 means "accept any"
575	 */
576	inode = iget(sb, ino);
577	if (inode == NULL)
578		return ERR_PTR(-ENOMEM);
579	if (is_bad_inode(inode) ||
580	    (generation && inode->i_generation != generation)) {
581		iput(inode);
582		return ERR_PTR(-ESTALE);
583	}
584	/* now to find a dentry.
585	 * If possible, get a well-connected one
586	 */
587	result = d_alloc_anon(inode);
588	if (!result) {
589		iput(inode);
590		return ERR_PTR(-ENOMEM);
591	}
592	return result;
593}
594
595#ifdef CONFIG_QUOTA
596#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
597#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
598
599static int ext3_dquot_initialize(struct inode *inode, int type);
600static int ext3_dquot_drop(struct inode *inode);
601static int ext3_write_dquot(struct dquot *dquot);
602static int ext3_acquire_dquot(struct dquot *dquot);
603static int ext3_release_dquot(struct dquot *dquot);
604static int ext3_mark_dquot_dirty(struct dquot *dquot);
605static int ext3_write_info(struct super_block *sb, int type);
606static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path);
607static int ext3_quota_on_mount(struct super_block *sb, int type);
608static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
609			       size_t len, loff_t off);
610static ssize_t ext3_quota_write(struct super_block *sb, int type,
611				const char *data, size_t len, loff_t off);
612
613static struct dquot_operations ext3_quota_operations = {
614	.initialize	= ext3_dquot_initialize,
615	.drop		= ext3_dquot_drop,
616	.alloc_space	= dquot_alloc_space,
617	.alloc_inode	= dquot_alloc_inode,
618	.free_space	= dquot_free_space,
619	.free_inode	= dquot_free_inode,
620	.transfer	= dquot_transfer,
621	.write_dquot	= ext3_write_dquot,
622	.acquire_dquot	= ext3_acquire_dquot,
623	.release_dquot	= ext3_release_dquot,
624	.mark_dirty	= ext3_mark_dquot_dirty,
625	.write_info	= ext3_write_info
626};
627
628static struct quotactl_ops ext3_qctl_operations = {
629	.quota_on	= ext3_quota_on,
630	.quota_off	= vfs_quota_off,
631	.quota_sync	= vfs_quota_sync,
632	.get_info	= vfs_get_dqinfo,
633	.set_info	= vfs_set_dqinfo,
634	.get_dqblk	= vfs_get_dqblk,
635	.set_dqblk	= vfs_set_dqblk
636};
637#endif
638
639static const struct super_operations ext3_sops = {
640	.alloc_inode	= ext3_alloc_inode,
641	.destroy_inode	= ext3_destroy_inode,
642	.read_inode	= ext3_read_inode,
643	.write_inode	= ext3_write_inode,
644	.dirty_inode	= ext3_dirty_inode,
645	.delete_inode	= ext3_delete_inode,
646	.put_super	= ext3_put_super,
647	.write_super	= ext3_write_super,
648	.sync_fs	= ext3_sync_fs,
649	.write_super_lockfs = ext3_write_super_lockfs,
650	.unlockfs	= ext3_unlockfs,
651	.statfs		= ext3_statfs,
652	.remount_fs	= ext3_remount,
653	.clear_inode	= ext3_clear_inode,
654	.show_options	= ext3_show_options,
655#ifdef CONFIG_QUOTA
656	.quota_read	= ext3_quota_read,
657	.quota_write	= ext3_quota_write,
658#endif
659};
660
661static struct export_operations ext3_export_ops = {
662	.get_parent = ext3_get_parent,
663	.get_dentry = ext3_get_dentry,
664};
665
666enum {
667	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
668	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
669	Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
670	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
671	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
672	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
673	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
674	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
675	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
676	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
677	Opt_grpquota
678};
679
680static match_table_t tokens = {
681	{Opt_bsd_df, "bsddf"},
682	{Opt_minix_df, "minixdf"},
683	{Opt_grpid, "grpid"},
684	{Opt_grpid, "bsdgroups"},
685	{Opt_nogrpid, "nogrpid"},
686	{Opt_nogrpid, "sysvgroups"},
687	{Opt_resgid, "resgid=%u"},
688	{Opt_resuid, "resuid=%u"},
689	{Opt_sb, "sb=%u"},
690	{Opt_err_cont, "errors=continue"},
691	{Opt_err_panic, "errors=panic"},
692	{Opt_err_ro, "errors=remount-ro"},
693	{Opt_nouid32, "nouid32"},
694	{Opt_nocheck, "nocheck"},
695	{Opt_nocheck, "check=none"},
696	{Opt_debug, "debug"},
697	{Opt_oldalloc, "oldalloc"},
698	{Opt_orlov, "orlov"},
699	{Opt_user_xattr, "user_xattr"},
700	{Opt_nouser_xattr, "nouser_xattr"},
701	{Opt_acl, "acl"},
702	{Opt_noacl, "noacl"},
703	{Opt_reservation, "reservation"},
704	{Opt_noreservation, "noreservation"},
705	{Opt_noload, "noload"},
706	{Opt_nobh, "nobh"},
707	{Opt_bh, "bh"},
708	{Opt_commit, "commit=%u"},
709	{Opt_journal_update, "journal=update"},
710	{Opt_journal_inum, "journal=%u"},
711	{Opt_journal_dev, "journal_dev=%u"},
712	{Opt_abort, "abort"},
713	{Opt_data_journal, "data=journal"},
714	{Opt_data_ordered, "data=ordered"},
715	{Opt_data_writeback, "data=writeback"},
716	{Opt_offusrjquota, "usrjquota="},
717	{Opt_usrjquota, "usrjquota=%s"},
718	{Opt_offgrpjquota, "grpjquota="},
719	{Opt_grpjquota, "grpjquota=%s"},
720	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
721	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
722	{Opt_grpquota, "grpquota"},
723	{Opt_noquota, "noquota"},
724	{Opt_quota, "quota"},
725	{Opt_usrquota, "usrquota"},
726	{Opt_barrier, "barrier=%u"},
727	{Opt_err, NULL},
728	{Opt_resize, "resize"},
729};
730
731static ext3_fsblk_t get_sb_block(void **data)
732{
733	ext3_fsblk_t	sb_block;
734	char		*options = (char *) *data;
735
736	if (!options || strncmp(options, "sb=", 3) != 0)
737		return 1;	/* Default location */
738	options += 3;
739	/*todo: use simple_strtoll with >32bit ext3 */
740	sb_block = simple_strtoul(options, &options, 0);
741	if (*options && *options != ',') {
742		printk("EXT3-fs: Invalid sb specification: %s\n",
743		       (char *) *data);
744		return 1;
745	}
746	if (*options == ',')
747		options++;
748	*data = (void *) options;
749	return sb_block;
750}
751
752static int parse_options (char *options, struct super_block *sb,
753			  unsigned int *inum, unsigned long *journal_devnum,
754			  ext3_fsblk_t *n_blocks_count, int is_remount)
755{
756	struct ext3_sb_info *sbi = EXT3_SB(sb);
757	char * p;
758	substring_t args[MAX_OPT_ARGS];
759	int data_opt = 0;
760	int option;
761#ifdef CONFIG_QUOTA
762	int qtype;
763	char *qname;
764#endif
765
766	if (!options)
767		return 1;
768
769	while ((p = strsep (&options, ",")) != NULL) {
770		int token;
771		if (!*p)
772			continue;
773
774		token = match_token(p, tokens, args);
775		switch (token) {
776		case Opt_bsd_df:
777			clear_opt (sbi->s_mount_opt, MINIX_DF);
778			break;
779		case Opt_minix_df:
780			set_opt (sbi->s_mount_opt, MINIX_DF);
781			break;
782		case Opt_grpid:
783			set_opt (sbi->s_mount_opt, GRPID);
784			break;
785		case Opt_nogrpid:
786			clear_opt (sbi->s_mount_opt, GRPID);
787			break;
788		case Opt_resuid:
789			if (match_int(&args[0], &option))
790				return 0;
791			sbi->s_resuid = option;
792			break;
793		case Opt_resgid:
794			if (match_int(&args[0], &option))
795				return 0;
796			sbi->s_resgid = option;
797			break;
798		case Opt_sb:
799			/* handled by get_sb_block() instead of here */
800			/* *sb_block = match_int(&args[0]); */
801			break;
802		case Opt_err_panic:
803			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
804			clear_opt (sbi->s_mount_opt, ERRORS_RO);
805			set_opt (sbi->s_mount_opt, ERRORS_PANIC);
806			break;
807		case Opt_err_ro:
808			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
809			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
810			set_opt (sbi->s_mount_opt, ERRORS_RO);
811			break;
812		case Opt_err_cont:
813			clear_opt (sbi->s_mount_opt, ERRORS_RO);
814			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
815			set_opt (sbi->s_mount_opt, ERRORS_CONT);
816			break;
817		case Opt_nouid32:
818			set_opt (sbi->s_mount_opt, NO_UID32);
819			break;
820		case Opt_nocheck:
821			clear_opt (sbi->s_mount_opt, CHECK);
822			break;
823		case Opt_debug:
824			set_opt (sbi->s_mount_opt, DEBUG);
825			break;
826		case Opt_oldalloc:
827			set_opt (sbi->s_mount_opt, OLDALLOC);
828			break;
829		case Opt_orlov:
830			clear_opt (sbi->s_mount_opt, OLDALLOC);
831			break;
832#ifdef CONFIG_EXT3_FS_XATTR
833		case Opt_user_xattr:
834			set_opt (sbi->s_mount_opt, XATTR_USER);
835			break;
836		case Opt_nouser_xattr:
837			clear_opt (sbi->s_mount_opt, XATTR_USER);
838			break;
839#else
840		case Opt_user_xattr:
841		case Opt_nouser_xattr:
842			printk("EXT3 (no)user_xattr options not supported\n");
843			break;
844#endif
845#ifdef CONFIG_EXT3_FS_POSIX_ACL
846		case Opt_acl:
847			set_opt(sbi->s_mount_opt, POSIX_ACL);
848			break;
849		case Opt_noacl:
850			clear_opt(sbi->s_mount_opt, POSIX_ACL);
851			break;
852#else
853		case Opt_acl:
854		case Opt_noacl:
855			printk("EXT3 (no)acl options not supported\n");
856			break;
857#endif
858		case Opt_reservation:
859			set_opt(sbi->s_mount_opt, RESERVATION);
860			break;
861		case Opt_noreservation:
862			clear_opt(sbi->s_mount_opt, RESERVATION);
863			break;
864		case Opt_journal_update:
865			/* Eventually we will want to be able to create
866			   a journal file here.  For now, only allow the
867			   user to specify an existing inode to be the
868			   journal file. */
869			if (is_remount) {
870				printk(KERN_ERR "EXT3-fs: cannot specify "
871				       "journal on remount\n");
872				return 0;
873			}
874			set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
875			break;
876		case Opt_journal_inum:
877			if (is_remount) {
878				printk(KERN_ERR "EXT3-fs: cannot specify "
879				       "journal on remount\n");
880				return 0;
881			}
882			if (match_int(&args[0], &option))
883				return 0;
884			*inum = option;
885			break;
886		case Opt_journal_dev:
887			if (is_remount) {
888				printk(KERN_ERR "EXT3-fs: cannot specify "
889				       "journal on remount\n");
890				return 0;
891			}
892			if (match_int(&args[0], &option))
893				return 0;
894			*journal_devnum = option;
895			break;
896		case Opt_noload:
897			set_opt (sbi->s_mount_opt, NOLOAD);
898			break;
899		case Opt_commit:
900			if (match_int(&args[0], &option))
901				return 0;
902			if (option < 0)
903				return 0;
904			if (option == 0)
905				option = JBD_DEFAULT_MAX_COMMIT_AGE;
906			sbi->s_commit_interval = HZ * option;
907			break;
908		case Opt_data_journal:
909			data_opt = EXT3_MOUNT_JOURNAL_DATA;
910			goto datacheck;
911		case Opt_data_ordered:
912			data_opt = EXT3_MOUNT_ORDERED_DATA;
913			goto datacheck;
914		case Opt_data_writeback:
915			data_opt = EXT3_MOUNT_WRITEBACK_DATA;
916		datacheck:
917			if (is_remount) {
918				if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
919						!= data_opt) {
920					printk(KERN_ERR
921						"EXT3-fs: cannot change data "
922						"mode on remount\n");
923					return 0;
924				}
925			} else {
926				sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
927				sbi->s_mount_opt |= data_opt;
928			}
929			break;
930#ifdef CONFIG_QUOTA
931		case Opt_usrjquota:
932			qtype = USRQUOTA;
933			goto set_qf_name;
934		case Opt_grpjquota:
935			qtype = GRPQUOTA;
936set_qf_name:
937			if (sb_any_quota_enabled(sb)) {
938				printk(KERN_ERR
939					"EXT3-fs: Cannot change journalled "
940					"quota options when quota turned on.\n");
941				return 0;
942			}
943			qname = match_strdup(&args[0]);
944			if (!qname) {
945				printk(KERN_ERR
946					"EXT3-fs: not enough memory for "
947					"storing quotafile name.\n");
948				return 0;
949			}
950			if (sbi->s_qf_names[qtype] &&
951			    strcmp(sbi->s_qf_names[qtype], qname)) {
952				printk(KERN_ERR
953					"EXT3-fs: %s quota file already "
954					"specified.\n", QTYPE2NAME(qtype));
955				kfree(qname);
956				return 0;
957			}
958			sbi->s_qf_names[qtype] = qname;
959			if (strchr(sbi->s_qf_names[qtype], '/')) {
960				printk(KERN_ERR
961					"EXT3-fs: quotafile must be on "
962					"filesystem root.\n");
963				kfree(sbi->s_qf_names[qtype]);
964				sbi->s_qf_names[qtype] = NULL;
965				return 0;
966			}
967			set_opt(sbi->s_mount_opt, QUOTA);
968			break;
969		case Opt_offusrjquota:
970			qtype = USRQUOTA;
971			goto clear_qf_name;
972		case Opt_offgrpjquota:
973			qtype = GRPQUOTA;
974clear_qf_name:
975			if (sb_any_quota_enabled(sb)) {
976				printk(KERN_ERR "EXT3-fs: Cannot change "
977					"journalled quota options when "
978					"quota turned on.\n");
979				return 0;
980			}
981			/*
982			 * The space will be released later when all options
983			 * are confirmed to be correct
984			 */
985			sbi->s_qf_names[qtype] = NULL;
986			break;
987		case Opt_jqfmt_vfsold:
988			sbi->s_jquota_fmt = QFMT_VFS_OLD;
989			break;
990		case Opt_jqfmt_vfsv0:
991			sbi->s_jquota_fmt = QFMT_VFS_V0;
992			break;
993		case Opt_quota:
994		case Opt_usrquota:
995			set_opt(sbi->s_mount_opt, QUOTA);
996			set_opt(sbi->s_mount_opt, USRQUOTA);
997			break;
998		case Opt_grpquota:
999			set_opt(sbi->s_mount_opt, QUOTA);
1000			set_opt(sbi->s_mount_opt, GRPQUOTA);
1001			break;
1002		case Opt_noquota:
1003			if (sb_any_quota_enabled(sb)) {
1004				printk(KERN_ERR "EXT3-fs: Cannot change quota "
1005					"options when quota turned on.\n");
1006				return 0;
1007			}
1008			clear_opt(sbi->s_mount_opt, QUOTA);
1009			clear_opt(sbi->s_mount_opt, USRQUOTA);
1010			clear_opt(sbi->s_mount_opt, GRPQUOTA);
1011			break;
1012#else
1013		case Opt_quota:
1014		case Opt_usrquota:
1015		case Opt_grpquota:
1016		case Opt_usrjquota:
1017		case Opt_grpjquota:
1018		case Opt_offusrjquota:
1019		case Opt_offgrpjquota:
1020		case Opt_jqfmt_vfsold:
1021		case Opt_jqfmt_vfsv0:
1022			printk(KERN_ERR
1023				"EXT3-fs: journalled quota options not "
1024				"supported.\n");
1025			break;
1026		case Opt_noquota:
1027			break;
1028#endif
1029		case Opt_abort:
1030			set_opt(sbi->s_mount_opt, ABORT);
1031			break;
1032		case Opt_barrier:
1033			if (match_int(&args[0], &option))
1034				return 0;
1035			if (option)
1036				set_opt(sbi->s_mount_opt, BARRIER);
1037			else
1038				clear_opt(sbi->s_mount_opt, BARRIER);
1039			break;
1040		case Opt_ignore:
1041			break;
1042		case Opt_resize:
1043			if (!is_remount) {
1044				printk("EXT3-fs: resize option only available "
1045					"for remount\n");
1046				return 0;
1047			}
1048			if (match_int(&args[0], &option) != 0)
1049				return 0;
1050			*n_blocks_count = option;
1051			break;
1052		case Opt_nobh:
1053			set_opt(sbi->s_mount_opt, NOBH);
1054			break;
1055		case Opt_bh:
1056			clear_opt(sbi->s_mount_opt, NOBH);
1057			break;
1058		default:
1059			printk (KERN_ERR
1060				"EXT3-fs: Unrecognized mount option \"%s\" "
1061				"or missing value\n", p);
1062			return 0;
1063		}
1064	}
1065#ifdef CONFIG_QUOTA
1066	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1067		if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
1068		     sbi->s_qf_names[USRQUOTA])
1069			clear_opt(sbi->s_mount_opt, USRQUOTA);
1070
1071		if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
1072		     sbi->s_qf_names[GRPQUOTA])
1073			clear_opt(sbi->s_mount_opt, GRPQUOTA);
1074
1075		if ((sbi->s_qf_names[USRQUOTA] &&
1076				(sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
1077		    (sbi->s_qf_names[GRPQUOTA] &&
1078				(sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
1079			printk(KERN_ERR "EXT3-fs: old and new quota "
1080					"format mixing.\n");
1081			return 0;
1082		}
1083
1084		if (!sbi->s_jquota_fmt) {
1085			printk(KERN_ERR "EXT3-fs: journalled quota format "
1086					"not specified.\n");
1087			return 0;
1088		}
1089	} else {
1090		if (sbi->s_jquota_fmt) {
1091			printk(KERN_ERR "EXT3-fs: journalled quota format "
1092					"specified with no journalling "
1093					"enabled.\n");
1094			return 0;
1095		}
1096	}
1097#endif
1098	return 1;
1099}
1100
1101static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1102			    int read_only)
1103{
1104	struct ext3_sb_info *sbi = EXT3_SB(sb);
1105	int res = 0;
1106
1107	if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
1108		printk (KERN_ERR "EXT3-fs warning: revision level too high, "
1109			"forcing read-only mode\n");
1110		res = MS_RDONLY;
1111	}
1112	if (read_only)
1113		return res;
1114	if (!(sbi->s_mount_state & EXT3_VALID_FS))
1115		printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
1116			"running e2fsck is recommended\n");
1117	else if ((sbi->s_mount_state & EXT3_ERROR_FS))
1118		printk (KERN_WARNING
1119			"EXT3-fs warning: mounting fs with errors, "
1120			"running e2fsck is recommended\n");
1121	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1122		 le16_to_cpu(es->s_mnt_count) >=
1123		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1124		printk (KERN_WARNING
1125			"EXT3-fs warning: maximal mount count reached, "
1126			"running e2fsck is recommended\n");
1127	else if (le32_to_cpu(es->s_checkinterval) &&
1128		(le32_to_cpu(es->s_lastcheck) +
1129			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1130		printk (KERN_WARNING
1131			"EXT3-fs warning: checktime reached, "
1132			"running e2fsck is recommended\n");
1133	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1134		es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
1135	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
1136	es->s_mtime = cpu_to_le32(get_seconds());
1137	ext3_update_dynamic_rev(sb);
1138	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
1139
1140	ext3_commit_super(sb, es, 1);
1141	if (test_opt(sb, DEBUG))
1142		printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, "
1143				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
1144			sb->s_blocksize,
1145			sbi->s_groups_count,
1146			EXT3_BLOCKS_PER_GROUP(sb),
1147			EXT3_INODES_PER_GROUP(sb),
1148			sbi->s_mount_opt);
1149
1150	printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
1151	if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
1152		char b[BDEVNAME_SIZE];
1153
1154		printk("external journal on %s\n",
1155			bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
1156	} else {
1157		printk("internal journal\n");
1158	}
1159	return res;
1160}
1161
1162/* Called at mount-time, super-block is locked */
1163static int ext3_check_descriptors (struct super_block * sb)
1164{
1165	struct ext3_sb_info *sbi = EXT3_SB(sb);
1166	ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1167	ext3_fsblk_t last_block;
1168	struct ext3_group_desc * gdp = NULL;
1169	int desc_block = 0;
1170	int i;
1171
1172	ext3_debug ("Checking group descriptors");
1173
1174	for (i = 0; i < sbi->s_groups_count; i++)
1175	{
1176		if (i == sbi->s_groups_count - 1)
1177			last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
1178		else
1179			last_block = first_block +
1180				(EXT3_BLOCKS_PER_GROUP(sb) - 1);
1181
1182		if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
1183			gdp = (struct ext3_group_desc *)
1184					sbi->s_group_desc[desc_block++]->b_data;
1185		if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
1186		    le32_to_cpu(gdp->bg_block_bitmap) > last_block)
1187		{
1188			ext3_error (sb, "ext3_check_descriptors",
1189				    "Block bitmap for group %d"
1190				    " not in group (block %lu)!",
1191				    i, (unsigned long)
1192					le32_to_cpu(gdp->bg_block_bitmap));
1193			return 0;
1194		}
1195		if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
1196		    le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
1197		{
1198			ext3_error (sb, "ext3_check_descriptors",
1199				    "Inode bitmap for group %d"
1200				    " not in group (block %lu)!",
1201				    i, (unsigned long)
1202					le32_to_cpu(gdp->bg_inode_bitmap));
1203			return 0;
1204		}
1205		if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
1206		    le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
1207		    last_block)
1208		{
1209			ext3_error (sb, "ext3_check_descriptors",
1210				    "Inode table for group %d"
1211				    " not in group (block %lu)!",
1212				    i, (unsigned long)
1213					le32_to_cpu(gdp->bg_inode_table));
1214			return 0;
1215		}
1216		first_block += EXT3_BLOCKS_PER_GROUP(sb);
1217		gdp++;
1218	}
1219
1220	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
1221	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
1222	return 1;
1223}
1224
1225
1226/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
1227 * the superblock) which were deleted from all directories, but held open by
1228 * a process at the time of a crash.  We walk the list and try to delete these
1229 * inodes at recovery time (only with a read-write filesystem).
1230 *
1231 * In order to keep the orphan inode chain consistent during traversal (in
1232 * case of crash during recovery), we link each inode into the superblock
1233 * orphan list_head and handle it the same way as an inode deletion during
1234 * normal operation (which journals the operations for us).
1235 *
1236 * We only do an iget() and an iput() on each inode, which is very safe if we
1237 * accidentally point at an in-use or already deleted inode.  The worst that
1238 * can happen in this case is that we get a "bit already cleared" message from
1239 * ext3_free_inode().  The only reason we would point at a wrong inode is if
1240 * e2fsck was run on this filesystem, and it must have already done the orphan
1241 * inode cleanup for us, so we can safely abort without any further action.
1242 */
1243static void ext3_orphan_cleanup (struct super_block * sb,
1244				 struct ext3_super_block * es)
1245{
1246	unsigned int s_flags = sb->s_flags;
1247	int nr_orphans = 0, nr_truncates = 0;
1248#ifdef CONFIG_QUOTA
1249	int i;
1250#endif
1251	if (!es->s_last_orphan) {
1252		jbd_debug(4, "no orphan inodes to clean up\n");
1253		return;
1254	}
1255
1256	if (bdev_read_only(sb->s_bdev)) {
1257		printk(KERN_ERR "EXT3-fs: write access "
1258			"unavailable, skipping orphan cleanup.\n");
1259		return;
1260	}
1261
1262	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
1263		if (es->s_last_orphan)
1264			jbd_debug(1, "Errors on filesystem, "
1265				  "clearing orphan list.\n");
1266		es->s_last_orphan = 0;
1267		jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1268		return;
1269	}
1270
1271	if (s_flags & MS_RDONLY) {
1272		printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
1273		       sb->s_id);
1274		sb->s_flags &= ~MS_RDONLY;
1275	}
1276#ifdef CONFIG_QUOTA
1277	/* Needed for iput() to work correctly and not trash data */
1278	sb->s_flags |= MS_ACTIVE;
1279	/* Turn on quotas so that they are updated correctly */
1280	for (i = 0; i < MAXQUOTAS; i++) {
1281		if (EXT3_SB(sb)->s_qf_names[i]) {
1282			int ret = ext3_quota_on_mount(sb, i);
1283			if (ret < 0)
1284				printk(KERN_ERR
1285					"EXT3-fs: Cannot turn on journalled "
1286					"quota: error %d\n", ret);
1287		}
1288	}
1289#endif
1290
1291	while (es->s_last_orphan) {
1292		struct inode *inode;
1293
1294		if (!(inode =
1295		      ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
1296			es->s_last_orphan = 0;
1297			break;
1298		}
1299
1300		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
1301		DQUOT_INIT(inode);
1302		if (inode->i_nlink) {
1303			printk(KERN_DEBUG
1304				"%s: truncating inode %lu to %Ld bytes\n",
1305				__FUNCTION__, inode->i_ino, inode->i_size);
1306			jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1307				  inode->i_ino, inode->i_size);
1308			ext3_truncate(inode);
1309			nr_truncates++;
1310		} else {
1311			printk(KERN_DEBUG
1312				"%s: deleting unreferenced inode %lu\n",
1313				__FUNCTION__, inode->i_ino);
1314			jbd_debug(2, "deleting unreferenced inode %lu\n",
1315				  inode->i_ino);
1316			nr_orphans++;
1317		}
1318		iput(inode);  /* The delete magic happens here! */
1319	}
1320
1321#define PLURAL(x) (x), ((x)==1) ? "" : "s"
1322
1323	if (nr_orphans)
1324		printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
1325		       sb->s_id, PLURAL(nr_orphans));
1326	if (nr_truncates)
1327		printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
1328		       sb->s_id, PLURAL(nr_truncates));
1329#ifdef CONFIG_QUOTA
1330	/* Turn quotas off */
1331	for (i = 0; i < MAXQUOTAS; i++) {
1332		if (sb_dqopt(sb)->files[i])
1333			vfs_quota_off(sb, i);
1334	}
1335#endif
1336	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1337}
1338
1339/*
1340 * Maximal file size.  There is a direct, and {,double-,triple-}indirect
1341 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
1342 * We need to be 1 filesystem block less than the 2^32 sector limit.
1343 */
1344static loff_t ext3_max_size(int bits)
1345{
1346	loff_t res = EXT3_NDIR_BLOCKS;
1347	/* This constant is calculated to be the largest file size for a
1348	 * dense, 4k-blocksize file such that the total number of
1349	 * sectors in the file, including data and all indirect blocks,
1350	 * does not exceed 2^32. */
1351	const loff_t upper_limit = 0x1ff7fffd000LL;
1352
1353	res += 1LL << (bits-2);
1354	res += 1LL << (2*(bits-2));
1355	res += 1LL << (3*(bits-2));
1356	res <<= bits;
1357	if (res > upper_limit)
1358		res = upper_limit;
1359	return res;
1360}
1361
1362static ext3_fsblk_t descriptor_loc(struct super_block *sb,
1363				    ext3_fsblk_t logic_sb_block,
1364				    int nr)
1365{
1366	struct ext3_sb_info *sbi = EXT3_SB(sb);
1367	unsigned long bg, first_meta_bg;
1368	int has_super = 0;
1369
1370	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
1371
1372	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
1373	    nr < first_meta_bg)
1374		return (logic_sb_block + nr + 1);
1375	bg = sbi->s_desc_per_block * nr;
1376	if (ext3_bg_has_super(sb, bg))
1377		has_super = 1;
1378	return (has_super + ext3_group_first_block_no(sb, bg));
1379}
1380
1381
1382static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1383{
1384	struct buffer_head * bh;
1385	struct ext3_super_block *es = NULL;
1386	struct ext3_sb_info *sbi;
1387	ext3_fsblk_t block;
1388	ext3_fsblk_t sb_block = get_sb_block(&data);
1389	ext3_fsblk_t logic_sb_block;
1390	unsigned long offset = 0;
1391	unsigned int journal_inum = 0;
1392	unsigned long journal_devnum = 0;
1393	unsigned long def_mount_opts;
1394	struct inode *root;
1395	int blocksize;
1396	int hblock;
1397	int db_count;
1398	int i;
1399	int needs_recovery;
1400	__le32 features;
1401
1402	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
1403	if (!sbi)
1404		return -ENOMEM;
1405	sb->s_fs_info = sbi;
1406	sbi->s_mount_opt = 0;
1407	sbi->s_resuid = EXT3_DEF_RESUID;
1408	sbi->s_resgid = EXT3_DEF_RESGID;
1409
1410	unlock_kernel();
1411
1412	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
1413	if (!blocksize) {
1414		printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
1415		goto out_fail;
1416	}
1417
1418	/*
1419	 * The ext3 superblock will not be buffer aligned for other than 1kB
1420	 * block sizes.  We need to calculate the offset from buffer start.
1421	 */
1422	if (blocksize != EXT3_MIN_BLOCK_SIZE) {
1423		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1424		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1425	} else {
1426		logic_sb_block = sb_block;
1427	}
1428
1429	if (!(bh = sb_bread(sb, logic_sb_block))) {
1430		printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
1431		goto out_fail;
1432	}
1433	/*
1434	 * Note: s_es must be initialized as soon as possible because
1435	 *       some ext3 macro-instructions depend on its value
1436	 */
1437	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
1438	sbi->s_es = es;
1439	sb->s_magic = le16_to_cpu(es->s_magic);
1440	if (sb->s_magic != EXT3_SUPER_MAGIC)
1441		goto cantfind_ext3;
1442
1443	/* Set defaults before we parse the mount options */
1444	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
1445	if (def_mount_opts & EXT3_DEFM_DEBUG)
1446		set_opt(sbi->s_mount_opt, DEBUG);
1447	if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
1448		set_opt(sbi->s_mount_opt, GRPID);
1449	if (def_mount_opts & EXT3_DEFM_UID16)
1450		set_opt(sbi->s_mount_opt, NO_UID32);
1451#ifdef CONFIG_EXT3_FS_XATTR
1452	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
1453		set_opt(sbi->s_mount_opt, XATTR_USER);
1454#endif
1455#ifdef CONFIG_EXT3_FS_POSIX_ACL
1456	if (def_mount_opts & EXT3_DEFM_ACL)
1457		set_opt(sbi->s_mount_opt, POSIX_ACL);
1458#endif
1459	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
1460		sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
1461	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
1462		sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA;
1463	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
1464		sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA;
1465
1466	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
1467		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1468	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
1469		set_opt(sbi->s_mount_opt, ERRORS_RO);
1470	else
1471		set_opt(sbi->s_mount_opt, ERRORS_CONT);
1472
1473	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
1474	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
1475
1476	set_opt(sbi->s_mount_opt, RESERVATION);
1477
1478	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
1479			    NULL, 0))
1480		goto failed_mount;
1481
1482	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1483		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
1484
1485	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
1486	    (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
1487	     EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
1488	     EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
1489		printk(KERN_WARNING
1490		       "EXT3-fs warning: feature flags set on rev 0 fs, "
1491		       "running e2fsck is recommended\n");
1492	/*
1493	 * Check feature flags regardless of the revision level, since we
1494	 * previously didn't change the revision level when setting the flags,
1495	 * so there is a chance incompat flags are set on a rev 0 filesystem.
1496	 */
1497	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
1498	if (features) {
1499		printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
1500		       "unsupported optional features (%x).\n",
1501		       sb->s_id, le32_to_cpu(features));
1502		goto failed_mount;
1503	}
1504	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
1505	if (!(sb->s_flags & MS_RDONLY) && features) {
1506		printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
1507		       "unsupported optional features (%x).\n",
1508		       sb->s_id, le32_to_cpu(features));
1509		goto failed_mount;
1510	}
1511	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
1512
1513	if (blocksize < EXT3_MIN_BLOCK_SIZE ||
1514	    blocksize > EXT3_MAX_BLOCK_SIZE) {
1515		printk(KERN_ERR
1516		       "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
1517		       blocksize, sb->s_id);
1518		goto failed_mount;
1519	}
1520
1521	hblock = bdev_hardsect_size(sb->s_bdev);
1522	if (sb->s_blocksize != blocksize) {
1523		/*
1524		 * Make sure the blocksize for the filesystem is larger
1525		 * than the hardware sectorsize for the machine.
1526		 */
1527		if (blocksize < hblock) {
1528			printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
1529			       "device blocksize %d.\n", blocksize, hblock);
1530			goto failed_mount;
1531		}
1532
1533		brelse (bh);
1534		sb_set_blocksize(sb, blocksize);
1535		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1536		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1537		bh = sb_bread(sb, logic_sb_block);
1538		if (!bh) {
1539			printk(KERN_ERR
1540			       "EXT3-fs: Can't read superblock on 2nd try.\n");
1541			goto failed_mount;
1542		}
1543		es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
1544		sbi->s_es = es;
1545		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
1546			printk (KERN_ERR
1547				"EXT3-fs: Magic mismatch, very weird !\n");
1548			goto failed_mount;
1549		}
1550	}
1551
1552	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
1553
1554	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
1555		sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
1556		sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
1557	} else {
1558		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
1559		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
1560		if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
1561		    (sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
1562		    (sbi->s_inode_size > blocksize)) {
1563			printk (KERN_ERR
1564				"EXT3-fs: unsupported inode size: %d\n",
1565				sbi->s_inode_size);
1566			goto failed_mount;
1567		}
1568	}
1569	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
1570				   le32_to_cpu(es->s_log_frag_size);
1571	if (blocksize != sbi->s_frag_size) {
1572		printk(KERN_ERR
1573		       "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
1574		       sbi->s_frag_size, blocksize);
1575		goto failed_mount;
1576	}
1577	sbi->s_frags_per_block = 1;
1578	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
1579	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
1580	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
1581	if (EXT3_INODE_SIZE(sb) == 0)
1582		goto cantfind_ext3;
1583	sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
1584	if (sbi->s_inodes_per_block == 0)
1585		goto cantfind_ext3;
1586	sbi->s_itb_per_group = sbi->s_inodes_per_group /
1587					sbi->s_inodes_per_block;
1588	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
1589	sbi->s_sbh = bh;
1590	sbi->s_mount_state = le16_to_cpu(es->s_state);
1591	sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
1592	sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
1593	for (i=0; i < 4; i++)
1594		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
1595	sbi->s_def_hash_version = es->s_def_hash_version;
1596
1597	if (sbi->s_blocks_per_group > blocksize * 8) {
1598		printk (KERN_ERR
1599			"EXT3-fs: #blocks per group too big: %lu\n",
1600			sbi->s_blocks_per_group);
1601		goto failed_mount;
1602	}
1603	if (sbi->s_frags_per_group > blocksize * 8) {
1604		printk (KERN_ERR
1605			"EXT3-fs: #fragments per group too big: %lu\n",
1606			sbi->s_frags_per_group);
1607		goto failed_mount;
1608	}
1609	if (sbi->s_inodes_per_group > blocksize * 8) {
1610		printk (KERN_ERR
1611			"EXT3-fs: #inodes per group too big: %lu\n",
1612			sbi->s_inodes_per_group);
1613		goto failed_mount;
1614	}
1615
1616	if (le32_to_cpu(es->s_blocks_count) >
1617		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1618		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
1619			" too large to mount safely\n", sb->s_id);
1620		if (sizeof(sector_t) < 8)
1621			printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not "
1622					"enabled\n");
1623		goto failed_mount;
1624	}
1625
1626	if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
1627		goto cantfind_ext3;
1628	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
1629			       le32_to_cpu(es->s_first_data_block) - 1)
1630				       / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
1631	db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
1632		   EXT3_DESC_PER_BLOCK(sb);
1633	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
1634				    GFP_KERNEL);
1635	if (sbi->s_group_desc == NULL) {
1636		printk (KERN_ERR "EXT3-fs: not enough memory\n");
1637		goto failed_mount;
1638	}
1639
1640	bgl_lock_init(&sbi->s_blockgroup_lock);
1641
1642	for (i = 0; i < db_count; i++) {
1643		block = descriptor_loc(sb, logic_sb_block, i);
1644		sbi->s_group_desc[i] = sb_bread(sb, block);
1645		if (!sbi->s_group_desc[i]) {
1646			printk (KERN_ERR "EXT3-fs: "
1647				"can't read group descriptor %d\n", i);
1648			db_count = i;
1649			goto failed_mount2;
1650		}
1651	}
1652	if (!ext3_check_descriptors (sb)) {
1653		printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n");
1654		goto failed_mount2;
1655	}
1656	sbi->s_gdb_count = db_count;
1657	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1658	spin_lock_init(&sbi->s_next_gen_lock);
1659
1660	percpu_counter_init(&sbi->s_freeblocks_counter,
1661		ext3_count_free_blocks(sb));
1662	percpu_counter_init(&sbi->s_freeinodes_counter,
1663		ext3_count_free_inodes(sb));
1664	percpu_counter_init(&sbi->s_dirs_counter,
1665		ext3_count_dirs(sb));
1666
1667	/* per fileystem reservation list head & lock */
1668	spin_lock_init(&sbi->s_rsv_window_lock);
1669	sbi->s_rsv_window_root = RB_ROOT;
1670	/* Add a single, static dummy reservation to the start of the
1671	 * reservation window list --- it gives us a placeholder for
1672	 * append-at-start-of-list which makes the allocation logic
1673	 * _much_ simpler. */
1674	sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
1675	sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
1676	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
1677	sbi->s_rsv_window_head.rsv_goal_size = 0;
1678	ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
1679
1680	/*
1681	 * set up enough so that it can read an inode
1682	 */
1683	sb->s_op = &ext3_sops;
1684	sb->s_export_op = &ext3_export_ops;
1685	sb->s_xattr = ext3_xattr_handlers;
1686#ifdef CONFIG_QUOTA
1687	sb->s_qcop = &ext3_qctl_operations;
1688	sb->dq_op = &ext3_quota_operations;
1689#endif
1690	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1691
1692	sb->s_root = NULL;
1693
1694	needs_recovery = (es->s_last_orphan != 0 ||
1695			  EXT3_HAS_INCOMPAT_FEATURE(sb,
1696				    EXT3_FEATURE_INCOMPAT_RECOVER));
1697
1698	/*
1699	 * The first inode we look at is the journal inode.  Don't try
1700	 * root first: it may be modified in the journal!
1701	 */
1702	if (!test_opt(sb, NOLOAD) &&
1703	    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
1704		if (ext3_load_journal(sb, es, journal_devnum))
1705			goto failed_mount3;
1706	} else if (journal_inum) {
1707		if (ext3_create_journal(sb, es, journal_inum))
1708			goto failed_mount3;
1709	} else {
1710		if (!silent)
1711			printk (KERN_ERR
1712				"ext3: No journal on filesystem on %s\n",
1713				sb->s_id);
1714		goto failed_mount3;
1715	}
1716
1717	/* We have now updated the journal if required, so we can
1718	 * validate the data journaling mode. */
1719	switch (test_opt(sb, DATA_FLAGS)) {
1720	case 0:
1721		/* No mode set, assume a default based on the journal
1722                   capabilities: ORDERED_DATA if the journal can
1723                   cope, else JOURNAL_DATA */
1724		if (journal_check_available_features
1725		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
1726			set_opt(sbi->s_mount_opt, ORDERED_DATA);
1727		else
1728			set_opt(sbi->s_mount_opt, JOURNAL_DATA);
1729		break;
1730
1731	case EXT3_MOUNT_ORDERED_DATA:
1732	case EXT3_MOUNT_WRITEBACK_DATA:
1733		if (!journal_check_available_features
1734		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
1735			printk(KERN_ERR "EXT3-fs: Journal does not support "
1736			       "requested data journaling mode\n");
1737			goto failed_mount4;
1738		}
1739	default:
1740		break;
1741	}
1742
1743	if (test_opt(sb, NOBH)) {
1744		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
1745			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
1746				"its supported only with writeback mode\n");
1747			clear_opt(sbi->s_mount_opt, NOBH);
1748		}
1749	}
1750	/*
1751	 * The journal_load will have done any necessary log recovery,
1752	 * so we can safely mount the rest of the filesystem now.
1753	 */
1754
1755	root = iget(sb, EXT3_ROOT_INO);
1756	sb->s_root = d_alloc_root(root);
1757	if (!sb->s_root) {
1758		printk(KERN_ERR "EXT3-fs: get root inode failed\n");
1759		iput(root);
1760		goto failed_mount4;
1761	}
1762	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1763		dput(sb->s_root);
1764		sb->s_root = NULL;
1765		printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
1766		goto failed_mount4;
1767	}
1768
1769	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
1770	/*
1771	 * akpm: core read_super() calls in here with the superblock locked.
1772	 * That deadlocks, because orphan cleanup needs to lock the superblock
1773	 * in numerous places.  Here we just pop the lock - it's relatively
1774	 * harmless, because we are now ready to accept write_super() requests,
1775	 * and aviro says that's the only reason for hanging onto the
1776	 * superblock lock.
1777	 */
1778	EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
1779	ext3_orphan_cleanup(sb, es);
1780	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
1781	if (needs_recovery)
1782		printk (KERN_INFO "EXT3-fs: recovery complete.\n");
1783	ext3_mark_recovery_complete(sb, es);
1784	printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
1785		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
1786		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
1787		"writeback");
1788
1789	lock_kernel();
1790	return 0;
1791
1792cantfind_ext3:
1793	if (!silent)
1794		printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
1795		       sb->s_id);
1796	goto failed_mount;
1797
1798failed_mount4:
1799	journal_destroy(sbi->s_journal);
1800failed_mount3:
1801	percpu_counter_destroy(&sbi->s_freeblocks_counter);
1802	percpu_counter_destroy(&sbi->s_freeinodes_counter);
1803	percpu_counter_destroy(&sbi->s_dirs_counter);
1804failed_mount2:
1805	for (i = 0; i < db_count; i++)
1806		brelse(sbi->s_group_desc[i]);
1807	kfree(sbi->s_group_desc);
1808failed_mount:
1809#ifdef CONFIG_QUOTA
1810	for (i = 0; i < MAXQUOTAS; i++)
1811		kfree(sbi->s_qf_names[i]);
1812#endif
1813	ext3_blkdev_remove(sbi);
1814	brelse(bh);
1815out_fail:
1816	sb->s_fs_info = NULL;
1817	kfree(sbi);
1818	lock_kernel();
1819	return -EINVAL;
1820}
1821
1822/*
1823 * Setup any per-fs journal parameters now.  We'll do this both on
1824 * initial mount, once the journal has been initialised but before we've
1825 * done any recovery; and again on any subsequent remount.
1826 */
1827static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
1828{
1829	struct ext3_sb_info *sbi = EXT3_SB(sb);
1830
1831	if (sbi->s_commit_interval)
1832		journal->j_commit_interval = sbi->s_commit_interval;
1833	/* We could also set up an ext3-specific default for the commit
1834	 * interval here, but for now we'll just fall back to the jbd
1835	 * default. */
1836
1837	spin_lock(&journal->j_state_lock);
1838	if (test_opt(sb, BARRIER))
1839		journal->j_flags |= JFS_BARRIER;
1840	else
1841		journal->j_flags &= ~JFS_BARRIER;
1842	spin_unlock(&journal->j_state_lock);
1843}
1844
1845static journal_t *ext3_get_journal(struct super_block *sb,
1846				   unsigned int journal_inum)
1847{
1848	struct inode *journal_inode;
1849	journal_t *journal;
1850
1851	/* First, test for the existence of a valid inode on disk.  Bad
1852	 * things happen if we iget() an unused inode, as the subsequent
1853	 * iput() will try to delete it. */
1854
1855	journal_inode = iget(sb, journal_inum);
1856	if (!journal_inode) {
1857		printk(KERN_ERR "EXT3-fs: no journal found.\n");
1858		return NULL;
1859	}
1860	if (!journal_inode->i_nlink) {
1861		make_bad_inode(journal_inode);
1862		iput(journal_inode);
1863		printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
1864		return NULL;
1865	}
1866
1867	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
1868		  journal_inode, journal_inode->i_size);
1869	if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) {
1870		printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
1871		iput(journal_inode);
1872		return NULL;
1873	}
1874
1875	journal = journal_init_inode(journal_inode);
1876	if (!journal) {
1877		printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
1878		iput(journal_inode);
1879		return NULL;
1880	}
1881	journal->j_private = sb;
1882	ext3_init_journal_params(sb, journal);
1883	return journal;
1884}
1885
1886static journal_t *ext3_get_dev_journal(struct super_block *sb,
1887				       dev_t j_dev)
1888{
1889	struct buffer_head * bh;
1890	journal_t *journal;
1891	ext3_fsblk_t start;
1892	ext3_fsblk_t len;
1893	int hblock, blocksize;
1894	ext3_fsblk_t sb_block;
1895	unsigned long offset;
1896	struct ext3_super_block * es;
1897	struct block_device *bdev;
1898
1899	bdev = ext3_blkdev_get(j_dev);
1900	if (bdev == NULL)
1901		return NULL;
1902
1903	if (bd_claim(bdev, sb)) {
1904		printk(KERN_ERR
1905		        "EXT3: failed to claim external journal device.\n");
1906		blkdev_put(bdev);
1907		return NULL;
1908	}
1909
1910	blocksize = sb->s_blocksize;
1911	hblock = bdev_hardsect_size(bdev);
1912	if (blocksize < hblock) {
1913		printk(KERN_ERR
1914			"EXT3-fs: blocksize too small for journal device.\n");
1915		goto out_bdev;
1916	}
1917
1918	sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
1919	offset = EXT3_MIN_BLOCK_SIZE % blocksize;
1920	set_blocksize(bdev, blocksize);
1921	if (!(bh = __bread(bdev, sb_block, blocksize))) {
1922		printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
1923		       "external journal\n");
1924		goto out_bdev;
1925	}
1926
1927	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
1928	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
1929	    !(le32_to_cpu(es->s_feature_incompat) &
1930	      EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
1931		printk(KERN_ERR "EXT3-fs: external journal has "
1932					"bad superblock\n");
1933		brelse(bh);
1934		goto out_bdev;
1935	}
1936
1937	if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
1938		printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
1939		brelse(bh);
1940		goto out_bdev;
1941	}
1942
1943	len = le32_to_cpu(es->s_blocks_count);
1944	start = sb_block + 1;
1945	brelse(bh);	/* we're done with the superblock */
1946
1947	journal = journal_init_dev(bdev, sb->s_bdev,
1948					start, len, blocksize);
1949	if (!journal) {
1950		printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
1951		goto out_bdev;
1952	}
1953	journal->j_private = sb;
1954	ll_rw_block(READ, 1, &journal->j_sb_buffer);
1955	wait_on_buffer(journal->j_sb_buffer);
1956	if (!buffer_uptodate(journal->j_sb_buffer)) {
1957		printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
1958		goto out_journal;
1959	}
1960	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
1961		printk(KERN_ERR "EXT3-fs: External journal has more than one "
1962					"user (unsupported) - %d\n",
1963			be32_to_cpu(journal->j_superblock->s_nr_users));
1964		goto out_journal;
1965	}
1966	EXT3_SB(sb)->journal_bdev = bdev;
1967	ext3_init_journal_params(sb, journal);
1968	return journal;
1969out_journal:
1970	journal_destroy(journal);
1971out_bdev:
1972	ext3_blkdev_put(bdev);
1973	return NULL;
1974}
1975
1976static int ext3_load_journal(struct super_block *sb,
1977			     struct ext3_super_block *es,
1978			     unsigned long journal_devnum)
1979{
1980	journal_t *journal;
1981	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
1982	dev_t journal_dev;
1983	int err = 0;
1984	int really_read_only;
1985
1986	if (journal_devnum &&
1987	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
1988		printk(KERN_INFO "EXT3-fs: external journal device major/minor "
1989			"numbers have changed\n");
1990		journal_dev = new_decode_dev(journal_devnum);
1991	} else
1992		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
1993
1994	really_read_only = bdev_read_only(sb->s_bdev);
1995
1996	/*
1997	 * Are we loading a blank journal or performing recovery after a
1998	 * crash?  For recovery, we need to check in advance whether we
1999	 * can get read-write access to the device.
2000	 */
2001
2002	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
2003		if (sb->s_flags & MS_RDONLY) {
2004			printk(KERN_INFO "EXT3-fs: INFO: recovery "
2005					"required on readonly filesystem.\n");
2006			if (really_read_only) {
2007				printk(KERN_ERR "EXT3-fs: write access "
2008					"unavailable, cannot proceed.\n");
2009				return -EROFS;
2010			}
2011			printk (KERN_INFO "EXT3-fs: write access will "
2012					"be enabled during recovery.\n");
2013		}
2014	}
2015
2016	if (journal_inum && journal_dev) {
2017		printk(KERN_ERR "EXT3-fs: filesystem has both journal "
2018		       "and inode journals!\n");
2019		return -EINVAL;
2020	}
2021
2022	if (journal_inum) {
2023		if (!(journal = ext3_get_journal(sb, journal_inum)))
2024			return -EINVAL;
2025	} else {
2026		if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
2027			return -EINVAL;
2028	}
2029
2030	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2031		err = journal_update_format(journal);
2032		if (err)  {
2033			printk(KERN_ERR "EXT3-fs: error updating journal.\n");
2034			journal_destroy(journal);
2035			return err;
2036		}
2037	}
2038
2039	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
2040		err = journal_wipe(journal, !really_read_only);
2041	if (!err)
2042		err = journal_load(journal);
2043
2044	if (err) {
2045		printk(KERN_ERR "EXT3-fs: error loading journal.\n");
2046		journal_destroy(journal);
2047		return err;
2048	}
2049
2050	EXT3_SB(sb)->s_journal = journal;
2051	ext3_clear_journal_err(sb, es);
2052
2053	if (journal_devnum &&
2054	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2055		es->s_journal_dev = cpu_to_le32(journal_devnum);
2056		sb->s_dirt = 1;
2057
2058		/* Make sure we flush the recovery flag to disk. */
2059		ext3_commit_super(sb, es, 1);
2060	}
2061
2062	return 0;
2063}
2064
2065static int ext3_create_journal(struct super_block * sb,
2066			       struct ext3_super_block * es,
2067			       unsigned int journal_inum)
2068{
2069	journal_t *journal;
2070
2071	if (sb->s_flags & MS_RDONLY) {
2072		printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
2073				"create journal.\n");
2074		return -EROFS;
2075	}
2076
2077	if (!(journal = ext3_get_journal(sb, journal_inum)))
2078		return -EINVAL;
2079
2080	printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
2081	       journal_inum);
2082
2083	if (journal_create(journal)) {
2084		printk(KERN_ERR "EXT3-fs: error creating journal.\n");
2085		journal_destroy(journal);
2086		return -EIO;
2087	}
2088
2089	EXT3_SB(sb)->s_journal = journal;
2090
2091	ext3_update_dynamic_rev(sb);
2092	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2093	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
2094
2095	es->s_journal_inum = cpu_to_le32(journal_inum);
2096	sb->s_dirt = 1;
2097
2098	/* Make sure we flush the recovery flag to disk. */
2099	ext3_commit_super(sb, es, 1);
2100
2101	return 0;
2102}
2103
2104static void ext3_commit_super (struct super_block * sb,
2105			       struct ext3_super_block * es,
2106			       int sync)
2107{
2108	struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
2109
2110	if (!sbh)
2111		return;
2112	es->s_wtime = cpu_to_le32(get_seconds());
2113	es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
2114	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
2115	BUFFER_TRACE(sbh, "marking dirty");
2116	mark_buffer_dirty(sbh);
2117	if (sync)
2118		sync_dirty_buffer(sbh);
2119}
2120
2121
2122/*
2123 * Have we just finished recovery?  If so, and if we are mounting (or
2124 * remounting) the filesystem readonly, then we will end up with a
2125 * consistent fs on disk.  Record that fact.
2126 */
2127static void ext3_mark_recovery_complete(struct super_block * sb,
2128					struct ext3_super_block * es)
2129{
2130	journal_t *journal = EXT3_SB(sb)->s_journal;
2131
2132	journal_lock_updates(journal);
2133	journal_flush(journal);
2134	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2135	    sb->s_flags & MS_RDONLY) {
2136		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2137		sb->s_dirt = 0;
2138		ext3_commit_super(sb, es, 1);
2139	}
2140	journal_unlock_updates(journal);
2141}
2142
2143/*
2144 * If we are mounting (or read-write remounting) a filesystem whose journal
2145 * has recorded an error from a previous lifetime, move that error to the
2146 * main filesystem now.
2147 */
2148static void ext3_clear_journal_err(struct super_block * sb,
2149				   struct ext3_super_block * es)
2150{
2151	journal_t *journal;
2152	int j_errno;
2153	const char *errstr;
2154
2155	journal = EXT3_SB(sb)->s_journal;
2156
2157	/*
2158	 * Now check for any error status which may have been recorded in the
2159	 * journal by a prior ext3_error() or ext3_abort()
2160	 */
2161
2162	j_errno = journal_errno(journal);
2163	if (j_errno) {
2164		char nbuf[16];
2165
2166		errstr = ext3_decode_error(sb, j_errno, nbuf);
2167		ext3_warning(sb, __FUNCTION__, "Filesystem error recorded "
2168			     "from previous mount: %s", errstr);
2169		ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
2170			     "filesystem check.");
2171
2172		EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
2173		es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
2174		ext3_commit_super (sb, es, 1);
2175
2176		journal_clear_err(journal);
2177	}
2178}
2179
2180/*
2181 * Force the running and committing transactions to commit,
2182 * and wait on the commit.
2183 */
2184int ext3_force_commit(struct super_block *sb)
2185{
2186	journal_t *journal;
2187	int ret;
2188
2189	if (sb->s_flags & MS_RDONLY)
2190		return 0;
2191
2192	journal = EXT3_SB(sb)->s_journal;
2193	sb->s_dirt = 0;
2194	ret = ext3_journal_force_commit(journal);
2195	return ret;
2196}
2197
2198/*
2199 * Ext3 always journals updates to the superblock itself, so we don't
2200 * have to propagate any other updates to the superblock on disk at this
2201 * point.  Just start an async writeback to get the buffers on their way
2202 * to the disk.
2203 *
2204 * This implicitly triggers the writebehind on sync().
2205 */
2206
2207static void ext3_write_super (struct super_block * sb)
2208{
2209	if (mutex_trylock(&sb->s_lock) != 0)
2210		BUG();
2211	sb->s_dirt = 0;
2212}
2213
2214static int ext3_sync_fs(struct super_block *sb, int wait)
2215{
2216	tid_t target;
2217
2218	sb->s_dirt = 0;
2219	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2220		if (wait)
2221			log_wait_commit(EXT3_SB(sb)->s_journal, target);
2222	}
2223	return 0;
2224}
2225
2226/*
2227 * LVM calls this function before a (read-only) snapshot is created.  This
2228 * gives us a chance to flush the journal completely and mark the fs clean.
2229 */
2230static void ext3_write_super_lockfs(struct super_block *sb)
2231{
2232	sb->s_dirt = 0;
2233
2234	if (!(sb->s_flags & MS_RDONLY)) {
2235		journal_t *journal = EXT3_SB(sb)->s_journal;
2236
2237		/* Now we set up the journal barrier. */
2238		journal_lock_updates(journal);
2239		journal_flush(journal);
2240
2241		/* Journal blocked and flushed, clear needs_recovery flag. */
2242		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2243		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2244	}
2245}
2246
2247/*
2248 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
2249 * flag here, even though the filesystem is not technically dirty yet.
2250 */
2251static void ext3_unlockfs(struct super_block *sb)
2252{
2253	if (!(sb->s_flags & MS_RDONLY)) {
2254		lock_super(sb);
2255		/* Reser the needs_recovery flag before the fs is unlocked. */
2256		EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2257		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2258		unlock_super(sb);
2259		journal_unlock_updates(EXT3_SB(sb)->s_journal);
2260	}
2261}
2262
2263static int ext3_remount (struct super_block * sb, int * flags, char * data)
2264{
2265	struct ext3_super_block * es;
2266	struct ext3_sb_info *sbi = EXT3_SB(sb);
2267	ext3_fsblk_t n_blocks_count = 0;
2268	unsigned long old_sb_flags;
2269	struct ext3_mount_options old_opts;
2270	int err;
2271#ifdef CONFIG_QUOTA
2272	int i;
2273#endif
2274
2275	/* Store the original options */
2276	old_sb_flags = sb->s_flags;
2277	old_opts.s_mount_opt = sbi->s_mount_opt;
2278	old_opts.s_resuid = sbi->s_resuid;
2279	old_opts.s_resgid = sbi->s_resgid;
2280	old_opts.s_commit_interval = sbi->s_commit_interval;
2281#ifdef CONFIG_QUOTA
2282	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
2283	for (i = 0; i < MAXQUOTAS; i++)
2284		old_opts.s_qf_names[i] = sbi->s_qf_names[i];
2285#endif
2286
2287	/*
2288	 * Allow the "check" option to be passed as a remount option.
2289	 */
2290	if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
2291		err = -EINVAL;
2292		goto restore_opts;
2293	}
2294
2295	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
2296		ext3_abort(sb, __FUNCTION__, "Abort forced by user");
2297
2298	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2299		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2300
2301	es = sbi->s_es;
2302
2303	ext3_init_journal_params(sb, sbi->s_journal);
2304
2305	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
2306		n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
2307		if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) {
2308			err = -EROFS;
2309			goto restore_opts;
2310		}
2311
2312		if (*flags & MS_RDONLY) {
2313			/*
2314			 * First of all, the unconditional stuff we have to do
2315			 * to disable replay of the journal when we next remount
2316			 */
2317			sb->s_flags |= MS_RDONLY;
2318
2319			/*
2320			 * OK, test if we are remounting a valid rw partition
2321			 * readonly, and if so set the rdonly flag and then
2322			 * mark the partition as valid again.
2323			 */
2324			if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
2325			    (sbi->s_mount_state & EXT3_VALID_FS))
2326				es->s_state = cpu_to_le16(sbi->s_mount_state);
2327
2328			ext3_mark_recovery_complete(sb, es);
2329		} else {
2330			__le32 ret;
2331			if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
2332					~EXT3_FEATURE_RO_COMPAT_SUPP))) {
2333				printk(KERN_WARNING "EXT3-fs: %s: couldn't "
2334				       "remount RDWR because of unsupported "
2335				       "optional features (%x).\n",
2336				       sb->s_id, le32_to_cpu(ret));
2337				err = -EROFS;
2338				goto restore_opts;
2339			}
2340
2341			/*
2342			 * If we have an unprocessed orphan list hanging
2343			 * around from a previously readonly bdev mount,
2344			 * require a full umount/remount for now.
2345			 */
2346			if (es->s_last_orphan) {
2347				printk(KERN_WARNING "EXT3-fs: %s: couldn't "
2348				       "remount RDWR because of unprocessed "
2349				       "orphan inode list.  Please "
2350				       "umount/remount instead.\n",
2351				       sb->s_id);
2352				err = -EINVAL;
2353				goto restore_opts;
2354			}
2355
2356			/*
2357			 * Mounting a RDONLY partition read-write, so reread
2358			 * and store the current valid flag.  (It may have
2359			 * been changed by e2fsck since we originally mounted
2360			 * the partition.)
2361			 */
2362			ext3_clear_journal_err(sb, es);
2363			sbi->s_mount_state = le16_to_cpu(es->s_state);
2364			if ((err = ext3_group_extend(sb, es, n_blocks_count)))
2365				goto restore_opts;
2366			if (!ext3_setup_super (sb, es, 0))
2367				sb->s_flags &= ~MS_RDONLY;
2368		}
2369	}
2370#ifdef CONFIG_QUOTA
2371	/* Release old quota file names */
2372	for (i = 0; i < MAXQUOTAS; i++)
2373		if (old_opts.s_qf_names[i] &&
2374		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2375			kfree(old_opts.s_qf_names[i]);
2376#endif
2377	return 0;
2378restore_opts:
2379	sb->s_flags = old_sb_flags;
2380	sbi->s_mount_opt = old_opts.s_mount_opt;
2381	sbi->s_resuid = old_opts.s_resuid;
2382	sbi->s_resgid = old_opts.s_resgid;
2383	sbi->s_commit_interval = old_opts.s_commit_interval;
2384#ifdef CONFIG_QUOTA
2385	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
2386	for (i = 0; i < MAXQUOTAS; i++) {
2387		if (sbi->s_qf_names[i] &&
2388		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2389			kfree(sbi->s_qf_names[i]);
2390		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
2391	}
2392#endif
2393	return err;
2394}
2395
2396static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2397{
2398	struct super_block *sb = dentry->d_sb;
2399	struct ext3_sb_info *sbi = EXT3_SB(sb);
2400	struct ext3_super_block *es = sbi->s_es;
2401	ext3_fsblk_t overhead;
2402	int i;
2403	u64 fsid;
2404
2405	if (test_opt (sb, MINIX_DF))
2406		overhead = 0;
2407	else {
2408		unsigned long ngroups;
2409		ngroups = EXT3_SB(sb)->s_groups_count;
2410		smp_rmb();
2411
2412		/*
2413		 * Compute the overhead (FS structures)
2414		 */
2415
2416		/*
2417		 * All of the blocks before first_data_block are
2418		 * overhead
2419		 */
2420		overhead = le32_to_cpu(es->s_first_data_block);
2421
2422		/*
2423		 * Add the overhead attributed to the superblock and
2424		 * block group descriptors.  If the sparse superblocks
2425		 * feature is turned on, then not all groups have this.
2426		 */
2427		for (i = 0; i < ngroups; i++) {
2428			overhead += ext3_bg_has_super(sb, i) +
2429				ext3_bg_num_gdb(sb, i);
2430			cond_resched();
2431		}
2432
2433		/*
2434		 * Every block group has an inode bitmap, a block
2435		 * bitmap, and an inode table.
2436		 */
2437		overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
2438	}
2439
2440	buf->f_type = EXT3_SUPER_MAGIC;
2441	buf->f_bsize = sb->s_blocksize;
2442	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
2443	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
2444	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
2445	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
2446		buf->f_bavail = 0;
2447	buf->f_files = le32_to_cpu(es->s_inodes_count);
2448	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
2449	buf->f_namelen = EXT3_NAME_LEN;
2450	fsid = le64_to_cpup((void *)es->s_uuid) ^
2451	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
2452	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
2453	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
2454	return 0;
2455}
2456
2457/* Helper function for writing quotas on sync - we need to start transaction before quota file
2458 * is locked for write. Otherwise the are possible deadlocks:
2459 * Process 1                         Process 2
2460 * ext3_create()                     quota_sync()
2461 *   journal_start()                   write_dquot()
2462 *   DQUOT_INIT()                        down(dqio_mutex)
2463 *     down(dqio_mutex)                    journal_start()
2464 *
2465 */
2466
2467#ifdef CONFIG_QUOTA
2468
2469static inline struct inode *dquot_to_inode(struct dquot *dquot)
2470{
2471	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
2472}
2473
2474static int ext3_dquot_initialize(struct inode *inode, int type)
2475{
2476	handle_t *handle;
2477	int ret, err;
2478
2479	/* We may create quota structure so we need to reserve enough blocks */
2480	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
2481	if (IS_ERR(handle))
2482		return PTR_ERR(handle);
2483	ret = dquot_initialize(inode, type);
2484	err = ext3_journal_stop(handle);
2485	if (!ret)
2486		ret = err;
2487	return ret;
2488}
2489
2490static int ext3_dquot_drop(struct inode *inode)
2491{
2492	handle_t *handle;
2493	int ret, err;
2494
2495	/* We may delete quota structure so we need to reserve enough blocks */
2496	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
2497	if (IS_ERR(handle))
2498		return PTR_ERR(handle);
2499	ret = dquot_drop(inode);
2500	err = ext3_journal_stop(handle);
2501	if (!ret)
2502		ret = err;
2503	return ret;
2504}
2505
2506static int ext3_write_dquot(struct dquot *dquot)
2507{
2508	int ret, err;
2509	handle_t *handle;
2510	struct inode *inode;
2511
2512	inode = dquot_to_inode(dquot);
2513	handle = ext3_journal_start(inode,
2514					EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2515	if (IS_ERR(handle))
2516		return PTR_ERR(handle);
2517	ret = dquot_commit(dquot);
2518	err = ext3_journal_stop(handle);
2519	if (!ret)
2520		ret = err;
2521	return ret;
2522}
2523
2524static int ext3_acquire_dquot(struct dquot *dquot)
2525{
2526	int ret, err;
2527	handle_t *handle;
2528
2529	handle = ext3_journal_start(dquot_to_inode(dquot),
2530					EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2531	if (IS_ERR(handle))
2532		return PTR_ERR(handle);
2533	ret = dquot_acquire(dquot);
2534	err = ext3_journal_stop(handle);
2535	if (!ret)
2536		ret = err;
2537	return ret;
2538}
2539
2540static int ext3_release_dquot(struct dquot *dquot)
2541{
2542	int ret, err;
2543	handle_t *handle;
2544
2545	handle = ext3_journal_start(dquot_to_inode(dquot),
2546					EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2547	if (IS_ERR(handle))
2548		return PTR_ERR(handle);
2549	ret = dquot_release(dquot);
2550	err = ext3_journal_stop(handle);
2551	if (!ret)
2552		ret = err;
2553	return ret;
2554}
2555
2556static int ext3_mark_dquot_dirty(struct dquot *dquot)
2557{
2558	/* Are we journalling quotas? */
2559	if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
2560	    EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
2561		dquot_mark_dquot_dirty(dquot);
2562		return ext3_write_dquot(dquot);
2563	} else {
2564		return dquot_mark_dquot_dirty(dquot);
2565	}
2566}
2567
2568static int ext3_write_info(struct super_block *sb, int type)
2569{
2570	int ret, err;
2571	handle_t *handle;
2572
2573	/* Data block + inode block */
2574	handle = ext3_journal_start(sb->s_root->d_inode, 2);
2575	if (IS_ERR(handle))
2576		return PTR_ERR(handle);
2577	ret = dquot_commit_info(sb, type);
2578	err = ext3_journal_stop(handle);
2579	if (!ret)
2580		ret = err;
2581	return ret;
2582}
2583
2584/*
2585 * Turn on quotas during mount time - we need to find
2586 * the quota file and such...
2587 */
2588static int ext3_quota_on_mount(struct super_block *sb, int type)
2589{
2590	return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
2591			EXT3_SB(sb)->s_jquota_fmt, type);
2592}
2593
2594/*
2595 * Standard function to be called on quota_on
2596 */
2597static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2598			 char *path)
2599{
2600	int err;
2601	struct nameidata nd;
2602
2603	if (!test_opt(sb, QUOTA))
2604		return -EINVAL;
2605	/* Not journalling quota? */
2606	if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
2607	    !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
2608		return vfs_quota_on(sb, type, format_id, path);
2609	err = path_lookup(path, LOOKUP_FOLLOW, &nd);
2610	if (err)
2611		return err;
2612	/* Quotafile not on the same filesystem? */
2613	if (nd.mnt->mnt_sb != sb) {
2614		path_release(&nd);
2615		return -EXDEV;
2616	}
2617	/* Quotafile not of fs root? */
2618	if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
2619		printk(KERN_WARNING
2620			"EXT3-fs: Quota file not on filesystem root. "
2621			"Journalled quota will not work.\n");
2622	path_release(&nd);
2623	return vfs_quota_on(sb, type, format_id, path);
2624}
2625
2626/* Read data from quotafile - avoid pagecache and such because we cannot afford
2627 * acquiring the locks... As quota files are never truncated and quota code
2628 * itself serializes the operations (and noone else should touch the files)
2629 * we don't have to be afraid of races */
2630static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
2631			       size_t len, loff_t off)
2632{
2633	struct inode *inode = sb_dqopt(sb)->files[type];
2634	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
2635	int err = 0;
2636	int offset = off & (sb->s_blocksize - 1);
2637	int tocopy;
2638	size_t toread;
2639	struct buffer_head *bh;
2640	loff_t i_size = i_size_read(inode);
2641
2642	if (off > i_size)
2643		return 0;
2644	if (off+len > i_size)
2645		len = i_size-off;
2646	toread = len;
2647	while (toread > 0) {
2648		tocopy = sb->s_blocksize - offset < toread ?
2649				sb->s_blocksize - offset : toread;
2650		bh = ext3_bread(NULL, inode, blk, 0, &err);
2651		if (err)
2652			return err;
2653		if (!bh)	/* A hole? */
2654			memset(data, 0, tocopy);
2655		else
2656			memcpy(data, bh->b_data+offset, tocopy);
2657		brelse(bh);
2658		offset = 0;
2659		toread -= tocopy;
2660		data += tocopy;
2661		blk++;
2662	}
2663	return len;
2664}
2665
2666/* Write to quotafile (we know the transaction is already started and has
2667 * enough credits) */
2668static ssize_t ext3_quota_write(struct super_block *sb, int type,
2669				const char *data, size_t len, loff_t off)
2670{
2671	struct inode *inode = sb_dqopt(sb)->files[type];
2672	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
2673	int err = 0;
2674	int offset = off & (sb->s_blocksize - 1);
2675	int tocopy;
2676	int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
2677	size_t towrite = len;
2678	struct buffer_head *bh;
2679	handle_t *handle = journal_current_handle();
2680
2681	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2682	while (towrite > 0) {
2683		tocopy = sb->s_blocksize - offset < towrite ?
2684				sb->s_blocksize - offset : towrite;
2685		bh = ext3_bread(handle, inode, blk, 1, &err);
2686		if (!bh)
2687			goto out;
2688		if (journal_quota) {
2689			err = ext3_journal_get_write_access(handle, bh);
2690			if (err) {
2691				brelse(bh);
2692				goto out;
2693			}
2694		}
2695		lock_buffer(bh);
2696		memcpy(bh->b_data+offset, data, tocopy);
2697		flush_dcache_page(bh->b_page);
2698		unlock_buffer(bh);
2699		if (journal_quota)
2700			err = ext3_journal_dirty_metadata(handle, bh);
2701		else {
2702			/* Always do at least ordered writes for quotas */
2703			err = ext3_journal_dirty_data(handle, bh);
2704			mark_buffer_dirty(bh);
2705		}
2706		brelse(bh);
2707		if (err)
2708			goto out;
2709		offset = 0;
2710		towrite -= tocopy;
2711		data += tocopy;
2712		blk++;
2713	}
2714out:
2715	if (len == towrite)
2716		return err;
2717	if (inode->i_size < off+len-towrite) {
2718		i_size_write(inode, off+len-towrite);
2719		EXT3_I(inode)->i_disksize = inode->i_size;
2720	}
2721	inode->i_version++;
2722	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2723	ext3_mark_inode_dirty(handle, inode);
2724	mutex_unlock(&inode->i_mutex);
2725	return len - towrite;
2726}
2727
2728#endif
2729
2730static int ext3_get_sb(struct file_system_type *fs_type,
2731	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2732{
2733	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
2734}
2735
2736static struct file_system_type ext3_fs_type = {
2737	.owner		= THIS_MODULE,
2738	.name		= "ext3",
2739	.get_sb		= ext3_get_sb,
2740	.kill_sb	= kill_block_super,
2741	.fs_flags	= FS_REQUIRES_DEV,
2742};
2743
2744static int __init init_ext3_fs(void)
2745{
2746	int err = init_ext3_xattr();
2747	if (err)
2748		return err;
2749	err = init_inodecache();
2750	if (err)
2751		goto out1;
2752        err = register_filesystem(&ext3_fs_type);
2753	if (err)
2754		goto out;
2755	return 0;
2756out:
2757	destroy_inodecache();
2758out1:
2759	exit_ext3_xattr();
2760	return err;
2761}
2762
2763static void __exit exit_ext3_fs(void)
2764{
2765	unregister_filesystem(&ext3_fs_type);
2766	destroy_inodecache();
2767	exit_ext3_xattr();
2768}
2769
2770MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
2771MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
2772MODULE_LICENSE("GPL");
2773module_init(init_ext3_fs)
2774module_exit(exit_ext3_fs)
2775